{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.33537352226041756, "eval_steps": 500, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.353735222604176e-05, "grad_norm": 0.4128517832619569, "learning_rate": 2e-05, "loss": 5.4752, "step": 1 }, { "epoch": 6.707470445208351e-05, "grad_norm": 0.4213411384490361, "learning_rate": 2e-05, "loss": 5.6285, "step": 2 }, { "epoch": 0.00010061205667812527, "grad_norm": 0.4472221238051605, "learning_rate": 2e-05, "loss": 5.425, "step": 3 }, { "epoch": 0.00013414940890416703, "grad_norm": 0.4102533291977229, "learning_rate": 2e-05, "loss": 5.5957, "step": 4 }, { "epoch": 0.00016768676113020878, "grad_norm": 0.4424409451366419, "learning_rate": 2e-05, "loss": 5.6039, "step": 5 }, { "epoch": 0.00020122411335625053, "grad_norm": 0.4205517647838499, "learning_rate": 2e-05, "loss": 5.654, "step": 6 }, { "epoch": 0.00023476146558229228, "grad_norm": 0.4352406283397576, "learning_rate": 2e-05, "loss": 5.496, "step": 7 }, { "epoch": 0.00026829881780833406, "grad_norm": 0.44431380603662085, "learning_rate": 2e-05, "loss": 5.5897, "step": 8 }, { "epoch": 0.0003018361700343758, "grad_norm": 0.4445263995807613, "learning_rate": 2e-05, "loss": 5.6387, "step": 9 }, { "epoch": 0.00033537352226041756, "grad_norm": 0.441627379785802, "learning_rate": 2e-05, "loss": 5.9561, "step": 10 }, { "epoch": 0.0003689108744864593, "grad_norm": 0.43330630922754443, "learning_rate": 2e-05, "loss": 5.6135, "step": 11 }, { "epoch": 0.00040244822671250106, "grad_norm": 0.4329188492917598, "learning_rate": 2e-05, "loss": 5.4026, "step": 12 }, { "epoch": 0.0004359855789385428, "grad_norm": 0.42967623587541504, "learning_rate": 2e-05, "loss": 5.5951, "step": 13 }, { "epoch": 0.00046952293116458456, "grad_norm": 0.4532434601648619, "learning_rate": 2e-05, "loss": 5.7968, "step": 14 }, { "epoch": 0.0005030602833906263, "grad_norm": 0.43024309214125744, "learning_rate": 2e-05, "loss": 5.618, "step": 15 }, { "epoch": 0.0005365976356166681, "grad_norm": 0.42521935069211, "learning_rate": 2e-05, "loss": 5.6895, "step": 16 }, { "epoch": 0.0005701349878427098, "grad_norm": 0.4197107898536531, "learning_rate": 2e-05, "loss": 5.7603, "step": 17 }, { "epoch": 0.0006036723400687516, "grad_norm": 0.46404583628064217, "learning_rate": 2e-05, "loss": 5.7377, "step": 18 }, { "epoch": 0.0006372096922947933, "grad_norm": 0.42379700210686955, "learning_rate": 2e-05, "loss": 5.7783, "step": 19 }, { "epoch": 0.0006707470445208351, "grad_norm": 0.41056672534524147, "learning_rate": 2e-05, "loss": 5.7051, "step": 20 }, { "epoch": 0.0007042843967468768, "grad_norm": 0.42415614348881514, "learning_rate": 2e-05, "loss": 5.4357, "step": 21 }, { "epoch": 0.0007378217489729186, "grad_norm": 0.41832917052478297, "learning_rate": 2e-05, "loss": 5.6135, "step": 22 }, { "epoch": 0.0007713591011989603, "grad_norm": 0.40042653850299076, "learning_rate": 2e-05, "loss": 5.6019, "step": 23 }, { "epoch": 0.0008048964534250021, "grad_norm": 0.3931821460365853, "learning_rate": 2e-05, "loss": 5.5871, "step": 24 }, { "epoch": 0.0008384338056510439, "grad_norm": 0.38830245410367714, "learning_rate": 2e-05, "loss": 5.6155, "step": 25 }, { "epoch": 0.0008719711578770856, "grad_norm": 0.40149706182373207, "learning_rate": 2e-05, "loss": 5.5148, "step": 26 }, { "epoch": 0.0009055085101031273, "grad_norm": 0.4077418219131963, "learning_rate": 2e-05, "loss": 5.4332, "step": 27 }, { "epoch": 0.0009390458623291691, "grad_norm": 0.3978143770935279, "learning_rate": 2e-05, "loss": 5.8234, "step": 28 }, { "epoch": 0.0009725832145552109, "grad_norm": 0.3928629987402962, "learning_rate": 2e-05, "loss": 5.5375, "step": 29 }, { "epoch": 0.0010061205667812527, "grad_norm": 0.4038483217694067, "learning_rate": 2e-05, "loss": 5.6159, "step": 30 }, { "epoch": 0.0010396579190072943, "grad_norm": 0.4149860939008008, "learning_rate": 2e-05, "loss": 5.7213, "step": 31 }, { "epoch": 0.0010731952712333362, "grad_norm": 0.41728125319377013, "learning_rate": 2e-05, "loss": 5.5503, "step": 32 }, { "epoch": 0.001106732623459378, "grad_norm": 0.3938768142868132, "learning_rate": 2e-05, "loss": 5.348, "step": 33 }, { "epoch": 0.0011402699756854196, "grad_norm": 0.41349162965566966, "learning_rate": 2e-05, "loss": 5.6308, "step": 34 }, { "epoch": 0.0011738073279114615, "grad_norm": 0.40364426029721295, "learning_rate": 2e-05, "loss": 5.5964, "step": 35 }, { "epoch": 0.0012073446801375031, "grad_norm": 0.4157479414727291, "learning_rate": 2e-05, "loss": 5.6076, "step": 36 }, { "epoch": 0.0012408820323635448, "grad_norm": 0.4508641823309127, "learning_rate": 2e-05, "loss": 5.685, "step": 37 }, { "epoch": 0.0012744193845895867, "grad_norm": 0.4122553327522678, "learning_rate": 2e-05, "loss": 5.5835, "step": 38 }, { "epoch": 0.0013079567368156284, "grad_norm": 0.39945705253928754, "learning_rate": 2e-05, "loss": 5.5752, "step": 39 }, { "epoch": 0.0013414940890416702, "grad_norm": 0.4832569644076449, "learning_rate": 2e-05, "loss": 5.6461, "step": 40 }, { "epoch": 0.001375031441267712, "grad_norm": 0.3993060452074629, "learning_rate": 2e-05, "loss": 5.4497, "step": 41 }, { "epoch": 0.0014085687934937536, "grad_norm": 0.38934432559556326, "learning_rate": 2e-05, "loss": 5.5096, "step": 42 }, { "epoch": 0.0014421061457197955, "grad_norm": 0.4082480046499007, "learning_rate": 2e-05, "loss": 5.4593, "step": 43 }, { "epoch": 0.0014756434979458371, "grad_norm": 0.40623785378647803, "learning_rate": 2e-05, "loss": 5.7549, "step": 44 }, { "epoch": 0.001509180850171879, "grad_norm": 0.4091322540050125, "learning_rate": 2e-05, "loss": 5.6464, "step": 45 }, { "epoch": 0.0015427182023979207, "grad_norm": 0.3904216238316471, "learning_rate": 2e-05, "loss": 5.5251, "step": 46 }, { "epoch": 0.0015762555546239624, "grad_norm": 0.42061421104703434, "learning_rate": 2e-05, "loss": 5.5994, "step": 47 }, { "epoch": 0.0016097929068500042, "grad_norm": 0.4611972167210989, "learning_rate": 2e-05, "loss": 5.5938, "step": 48 }, { "epoch": 0.001643330259076046, "grad_norm": 0.40390061492661455, "learning_rate": 2e-05, "loss": 5.6095, "step": 49 }, { "epoch": 0.0016768676113020878, "grad_norm": 0.46730487274405896, "learning_rate": 2e-05, "loss": 5.5906, "step": 50 }, { "epoch": 0.0017104049635281295, "grad_norm": 0.40081497753349593, "learning_rate": 2e-05, "loss": 5.6857, "step": 51 }, { "epoch": 0.0017439423157541711, "grad_norm": 0.3934775070627544, "learning_rate": 2e-05, "loss": 5.5756, "step": 52 }, { "epoch": 0.001777479667980213, "grad_norm": 0.4326328557506038, "learning_rate": 2e-05, "loss": 5.5614, "step": 53 }, { "epoch": 0.0018110170202062547, "grad_norm": 0.4028878224007471, "learning_rate": 2e-05, "loss": 5.4948, "step": 54 }, { "epoch": 0.0018445543724322966, "grad_norm": 0.3898416397417778, "learning_rate": 2e-05, "loss": 5.7536, "step": 55 }, { "epoch": 0.0018780917246583382, "grad_norm": 0.4038925885282522, "learning_rate": 2e-05, "loss": 5.638, "step": 56 }, { "epoch": 0.00191162907688438, "grad_norm": 0.42667770937747573, "learning_rate": 2e-05, "loss": 5.5974, "step": 57 }, { "epoch": 0.0019451664291104218, "grad_norm": 0.42600197258877215, "learning_rate": 2e-05, "loss": 5.6113, "step": 58 }, { "epoch": 0.0019787037813364635, "grad_norm": 0.41690105873717115, "learning_rate": 2e-05, "loss": 5.7118, "step": 59 }, { "epoch": 0.0020122411335625054, "grad_norm": 0.41339772457715024, "learning_rate": 2e-05, "loss": 5.6634, "step": 60 }, { "epoch": 0.002045778485788547, "grad_norm": 0.515100004849172, "learning_rate": 2e-05, "loss": 5.6702, "step": 61 }, { "epoch": 0.0020793158380145887, "grad_norm": 0.4074102776977574, "learning_rate": 2e-05, "loss": 5.4656, "step": 62 }, { "epoch": 0.0021128531902406306, "grad_norm": 0.4429548911318261, "learning_rate": 2e-05, "loss": 5.6542, "step": 63 }, { "epoch": 0.0021463905424666725, "grad_norm": 0.5035475897742773, "learning_rate": 2e-05, "loss": 5.5101, "step": 64 }, { "epoch": 0.002179927894692714, "grad_norm": 0.3967705558783199, "learning_rate": 2e-05, "loss": 5.4615, "step": 65 }, { "epoch": 0.002213465246918756, "grad_norm": 0.48075906883696645, "learning_rate": 2e-05, "loss": 5.585, "step": 66 }, { "epoch": 0.0022470025991447977, "grad_norm": 0.4728645018274066, "learning_rate": 2e-05, "loss": 5.319, "step": 67 }, { "epoch": 0.002280539951370839, "grad_norm": 0.4239680005565426, "learning_rate": 2e-05, "loss": 5.4659, "step": 68 }, { "epoch": 0.002314077303596881, "grad_norm": 0.4368899378174936, "learning_rate": 2e-05, "loss": 5.5951, "step": 69 }, { "epoch": 0.002347614655822923, "grad_norm": 0.5032332756156006, "learning_rate": 2e-05, "loss": 5.4409, "step": 70 }, { "epoch": 0.0023811520080489644, "grad_norm": 0.40436525562329195, "learning_rate": 2e-05, "loss": 5.7054, "step": 71 }, { "epoch": 0.0024146893602750063, "grad_norm": 0.3943647208970959, "learning_rate": 2e-05, "loss": 5.4986, "step": 72 }, { "epoch": 0.002448226712501048, "grad_norm": 0.43326208619068546, "learning_rate": 2e-05, "loss": 5.3021, "step": 73 }, { "epoch": 0.0024817640647270896, "grad_norm": 0.4092769623433174, "learning_rate": 2e-05, "loss": 5.5505, "step": 74 }, { "epoch": 0.0025153014169531315, "grad_norm": 0.4159991141099823, "learning_rate": 2e-05, "loss": 5.5004, "step": 75 }, { "epoch": 0.0025488387691791734, "grad_norm": 0.431426313872149, "learning_rate": 2e-05, "loss": 5.6854, "step": 76 }, { "epoch": 0.0025823761214052153, "grad_norm": 0.3964975103873677, "learning_rate": 2e-05, "loss": 5.6305, "step": 77 }, { "epoch": 0.0026159134736312567, "grad_norm": 0.3964556770428695, "learning_rate": 2e-05, "loss": 5.7331, "step": 78 }, { "epoch": 0.0026494508258572986, "grad_norm": 0.4265900410907477, "learning_rate": 2e-05, "loss": 5.6304, "step": 79 }, { "epoch": 0.0026829881780833405, "grad_norm": 0.4397023693812907, "learning_rate": 2e-05, "loss": 5.6158, "step": 80 }, { "epoch": 0.002716525530309382, "grad_norm": 0.45594228413419047, "learning_rate": 2e-05, "loss": 5.5984, "step": 81 }, { "epoch": 0.002750062882535424, "grad_norm": 0.41760227024038166, "learning_rate": 2e-05, "loss": 5.9309, "step": 82 }, { "epoch": 0.0027836002347614657, "grad_norm": 0.45101153974260966, "learning_rate": 2e-05, "loss": 5.6423, "step": 83 }, { "epoch": 0.002817137586987507, "grad_norm": 0.4303956657992236, "learning_rate": 2e-05, "loss": 5.5362, "step": 84 }, { "epoch": 0.002850674939213549, "grad_norm": 0.4086603723071665, "learning_rate": 2e-05, "loss": 5.7167, "step": 85 }, { "epoch": 0.002884212291439591, "grad_norm": 0.4255350364195062, "learning_rate": 2e-05, "loss": 5.5717, "step": 86 }, { "epoch": 0.002917749643665633, "grad_norm": 0.40621288893233953, "learning_rate": 2e-05, "loss": 5.4348, "step": 87 }, { "epoch": 0.0029512869958916743, "grad_norm": 0.40380686196867066, "learning_rate": 2e-05, "loss": 5.4821, "step": 88 }, { "epoch": 0.002984824348117716, "grad_norm": 0.41646169450699333, "learning_rate": 2e-05, "loss": 5.69, "step": 89 }, { "epoch": 0.003018361700343758, "grad_norm": 0.4353637265087102, "learning_rate": 2e-05, "loss": 5.4575, "step": 90 }, { "epoch": 0.0030518990525697995, "grad_norm": 0.4111443876294096, "learning_rate": 2e-05, "loss": 5.5598, "step": 91 }, { "epoch": 0.0030854364047958414, "grad_norm": 0.4185421816271673, "learning_rate": 2e-05, "loss": 5.6693, "step": 92 }, { "epoch": 0.0031189737570218833, "grad_norm": 0.42409392610483754, "learning_rate": 2e-05, "loss": 5.5563, "step": 93 }, { "epoch": 0.0031525111092479247, "grad_norm": 0.4257251412020725, "learning_rate": 2e-05, "loss": 5.5507, "step": 94 }, { "epoch": 0.0031860484614739666, "grad_norm": 0.3882926001181396, "learning_rate": 2e-05, "loss": 5.4755, "step": 95 }, { "epoch": 0.0032195858137000085, "grad_norm": 0.44522942115216296, "learning_rate": 2e-05, "loss": 5.7663, "step": 96 }, { "epoch": 0.00325312316592605, "grad_norm": 0.44787994801055964, "learning_rate": 2e-05, "loss": 5.6307, "step": 97 }, { "epoch": 0.003286660518152092, "grad_norm": 0.4047508276747668, "learning_rate": 2e-05, "loss": 5.605, "step": 98 }, { "epoch": 0.0033201978703781337, "grad_norm": 0.4110716069227767, "learning_rate": 2e-05, "loss": 5.4912, "step": 99 }, { "epoch": 0.0033537352226041756, "grad_norm": 0.43234145826004716, "learning_rate": 2e-05, "loss": 5.5379, "step": 100 }, { "epoch": 0.003387272574830217, "grad_norm": 0.42362939175433295, "learning_rate": 2e-05, "loss": 5.5025, "step": 101 }, { "epoch": 0.003420809927056259, "grad_norm": 0.4115764092588376, "learning_rate": 2e-05, "loss": 5.3692, "step": 102 }, { "epoch": 0.003454347279282301, "grad_norm": 0.43941473116043855, "learning_rate": 2e-05, "loss": 5.5963, "step": 103 }, { "epoch": 0.0034878846315083423, "grad_norm": 0.4522689780374432, "learning_rate": 2e-05, "loss": 5.5727, "step": 104 }, { "epoch": 0.003521421983734384, "grad_norm": 0.4192491213250186, "learning_rate": 2e-05, "loss": 5.6995, "step": 105 }, { "epoch": 0.003554959335960426, "grad_norm": 0.427072558846705, "learning_rate": 2e-05, "loss": 5.7553, "step": 106 }, { "epoch": 0.0035884966881864675, "grad_norm": 0.44694771218907925, "learning_rate": 2e-05, "loss": 5.6591, "step": 107 }, { "epoch": 0.0036220340404125094, "grad_norm": 0.4380605890667054, "learning_rate": 2e-05, "loss": 5.5089, "step": 108 }, { "epoch": 0.0036555713926385513, "grad_norm": 0.44989160548122265, "learning_rate": 2e-05, "loss": 5.3848, "step": 109 }, { "epoch": 0.003689108744864593, "grad_norm": 0.4291011020495119, "learning_rate": 2e-05, "loss": 5.5768, "step": 110 }, { "epoch": 0.0037226460970906346, "grad_norm": 0.44611968960578036, "learning_rate": 2e-05, "loss": 5.5671, "step": 111 }, { "epoch": 0.0037561834493166765, "grad_norm": 0.5066468174284108, "learning_rate": 2e-05, "loss": 5.4952, "step": 112 }, { "epoch": 0.0037897208015427184, "grad_norm": 0.4081051223241444, "learning_rate": 2e-05, "loss": 5.4056, "step": 113 }, { "epoch": 0.00382325815376876, "grad_norm": 0.3953294026370148, "learning_rate": 2e-05, "loss": 5.5239, "step": 114 }, { "epoch": 0.0038567955059948017, "grad_norm": 0.3994495279360735, "learning_rate": 2e-05, "loss": 5.5914, "step": 115 }, { "epoch": 0.0038903328582208436, "grad_norm": 0.4176310866900785, "learning_rate": 2e-05, "loss": 5.6593, "step": 116 }, { "epoch": 0.003923870210446885, "grad_norm": 0.4111798591703112, "learning_rate": 2e-05, "loss": 5.6275, "step": 117 }, { "epoch": 0.003957407562672927, "grad_norm": 0.4005605431497667, "learning_rate": 2e-05, "loss": 5.3548, "step": 118 }, { "epoch": 0.003990944914898969, "grad_norm": 0.44600304413783043, "learning_rate": 2e-05, "loss": 5.5314, "step": 119 }, { "epoch": 0.004024482267125011, "grad_norm": 0.4465688314773722, "learning_rate": 2e-05, "loss": 5.3714, "step": 120 }, { "epoch": 0.004058019619351053, "grad_norm": 0.4473094056023053, "learning_rate": 2e-05, "loss": 5.6593, "step": 121 }, { "epoch": 0.004091556971577094, "grad_norm": 0.418529376028409, "learning_rate": 2e-05, "loss": 5.5527, "step": 122 }, { "epoch": 0.0041250943238031355, "grad_norm": 0.4547762120582528, "learning_rate": 2e-05, "loss": 5.4279, "step": 123 }, { "epoch": 0.004158631676029177, "grad_norm": 0.43939362923506153, "learning_rate": 2e-05, "loss": 5.6617, "step": 124 }, { "epoch": 0.004192169028255219, "grad_norm": 0.40697978542740815, "learning_rate": 2e-05, "loss": 5.5931, "step": 125 }, { "epoch": 0.004225706380481261, "grad_norm": 0.3936000090551644, "learning_rate": 2e-05, "loss": 5.5617, "step": 126 }, { "epoch": 0.004259243732707303, "grad_norm": 0.3971998465254591, "learning_rate": 2e-05, "loss": 5.5551, "step": 127 }, { "epoch": 0.004292781084933345, "grad_norm": 0.41952653198565065, "learning_rate": 2e-05, "loss": 5.456, "step": 128 }, { "epoch": 0.004326318437159386, "grad_norm": 0.4301983130786015, "learning_rate": 2e-05, "loss": 5.5623, "step": 129 }, { "epoch": 0.004359855789385428, "grad_norm": 0.41820627853231357, "learning_rate": 2e-05, "loss": 5.5715, "step": 130 }, { "epoch": 0.00439339314161147, "grad_norm": 0.3897079945549203, "learning_rate": 2e-05, "loss": 5.6423, "step": 131 }, { "epoch": 0.004426930493837512, "grad_norm": 0.4407392481660704, "learning_rate": 2e-05, "loss": 5.6665, "step": 132 }, { "epoch": 0.0044604678460635535, "grad_norm": 0.40314436707479534, "learning_rate": 2e-05, "loss": 5.6073, "step": 133 }, { "epoch": 0.004494005198289595, "grad_norm": 0.39742169584511206, "learning_rate": 2e-05, "loss": 5.7563, "step": 134 }, { "epoch": 0.004527542550515636, "grad_norm": 0.43362519393508997, "learning_rate": 2e-05, "loss": 5.704, "step": 135 }, { "epoch": 0.004561079902741678, "grad_norm": 0.4295947682900309, "learning_rate": 2e-05, "loss": 5.5329, "step": 136 }, { "epoch": 0.00459461725496772, "grad_norm": 0.3955339637150672, "learning_rate": 2e-05, "loss": 5.5541, "step": 137 }, { "epoch": 0.004628154607193762, "grad_norm": 0.4059629641054568, "learning_rate": 2e-05, "loss": 5.6651, "step": 138 }, { "epoch": 0.004661691959419804, "grad_norm": 0.3938323113858081, "learning_rate": 2e-05, "loss": 5.6397, "step": 139 }, { "epoch": 0.004695229311645846, "grad_norm": 0.39370337292192087, "learning_rate": 2e-05, "loss": 5.4025, "step": 140 }, { "epoch": 0.004728766663871888, "grad_norm": 0.41747284718722666, "learning_rate": 2e-05, "loss": 5.6714, "step": 141 }, { "epoch": 0.004762304016097929, "grad_norm": 0.40973910940343633, "learning_rate": 2e-05, "loss": 5.3671, "step": 142 }, { "epoch": 0.004795841368323971, "grad_norm": 0.39977638936819754, "learning_rate": 2e-05, "loss": 5.6352, "step": 143 }, { "epoch": 0.0048293787205500125, "grad_norm": 0.39208237160419646, "learning_rate": 2e-05, "loss": 5.6674, "step": 144 }, { "epoch": 0.004862916072776054, "grad_norm": 0.39184018762328887, "learning_rate": 2e-05, "loss": 5.5451, "step": 145 }, { "epoch": 0.004896453425002096, "grad_norm": 0.4061295385087274, "learning_rate": 2e-05, "loss": 5.5949, "step": 146 }, { "epoch": 0.004929990777228138, "grad_norm": 0.4297261986048883, "learning_rate": 2e-05, "loss": 5.5987, "step": 147 }, { "epoch": 0.004963528129454179, "grad_norm": 0.43178520278358024, "learning_rate": 2e-05, "loss": 5.7148, "step": 148 }, { "epoch": 0.004997065481680221, "grad_norm": 0.40979985776582184, "learning_rate": 2e-05, "loss": 5.638, "step": 149 }, { "epoch": 0.005030602833906263, "grad_norm": 0.3996657155977001, "learning_rate": 2e-05, "loss": 5.4722, "step": 150 }, { "epoch": 0.005064140186132305, "grad_norm": 0.4314038996854644, "learning_rate": 2e-05, "loss": 5.4869, "step": 151 }, { "epoch": 0.005097677538358347, "grad_norm": 0.42453088273356404, "learning_rate": 2e-05, "loss": 5.6995, "step": 152 }, { "epoch": 0.005131214890584389, "grad_norm": 0.4100512590907224, "learning_rate": 2e-05, "loss": 5.5312, "step": 153 }, { "epoch": 0.0051647522428104305, "grad_norm": 0.4694861977617328, "learning_rate": 2e-05, "loss": 5.4138, "step": 154 }, { "epoch": 0.0051982895950364715, "grad_norm": 0.41427961613059366, "learning_rate": 2e-05, "loss": 5.688, "step": 155 }, { "epoch": 0.005231826947262513, "grad_norm": 0.4119501500374492, "learning_rate": 2e-05, "loss": 5.5168, "step": 156 }, { "epoch": 0.005265364299488555, "grad_norm": 0.4338618599237253, "learning_rate": 2e-05, "loss": 5.3425, "step": 157 }, { "epoch": 0.005298901651714597, "grad_norm": 0.4299856426287997, "learning_rate": 2e-05, "loss": 5.5496, "step": 158 }, { "epoch": 0.005332439003940639, "grad_norm": 0.40537078362657764, "learning_rate": 2e-05, "loss": 5.7385, "step": 159 }, { "epoch": 0.005365976356166681, "grad_norm": 0.45061415317778303, "learning_rate": 2e-05, "loss": 5.5875, "step": 160 }, { "epoch": 0.005399513708392722, "grad_norm": 0.41060648341614203, "learning_rate": 2e-05, "loss": 5.7011, "step": 161 }, { "epoch": 0.005433051060618764, "grad_norm": 0.3999213051991609, "learning_rate": 2e-05, "loss": 5.5916, "step": 162 }, { "epoch": 0.005466588412844806, "grad_norm": 0.4445263524943175, "learning_rate": 2e-05, "loss": 5.4867, "step": 163 }, { "epoch": 0.005500125765070848, "grad_norm": 0.43154533157777997, "learning_rate": 2e-05, "loss": 5.5741, "step": 164 }, { "epoch": 0.0055336631172968895, "grad_norm": 0.4135174859109835, "learning_rate": 2e-05, "loss": 5.5195, "step": 165 }, { "epoch": 0.005567200469522931, "grad_norm": 0.39139773769832337, "learning_rate": 2e-05, "loss": 5.5354, "step": 166 }, { "epoch": 0.005600737821748973, "grad_norm": 0.45944818791863146, "learning_rate": 2e-05, "loss": 5.6447, "step": 167 }, { "epoch": 0.005634275173975014, "grad_norm": 0.43073699589227843, "learning_rate": 2e-05, "loss": 5.6184, "step": 168 }, { "epoch": 0.005667812526201056, "grad_norm": 0.3941284861073409, "learning_rate": 2e-05, "loss": 5.7505, "step": 169 }, { "epoch": 0.005701349878427098, "grad_norm": 0.4248290447502161, "learning_rate": 2e-05, "loss": 5.4997, "step": 170 }, { "epoch": 0.00573488723065314, "grad_norm": 0.4407523369221954, "learning_rate": 2e-05, "loss": 5.3855, "step": 171 }, { "epoch": 0.005768424582879182, "grad_norm": 0.40846518695441975, "learning_rate": 2e-05, "loss": 5.5658, "step": 172 }, { "epoch": 0.005801961935105224, "grad_norm": 0.38934757703614187, "learning_rate": 2e-05, "loss": 5.5596, "step": 173 }, { "epoch": 0.005835499287331266, "grad_norm": 0.4032317083358054, "learning_rate": 2e-05, "loss": 5.6006, "step": 174 }, { "epoch": 0.005869036639557307, "grad_norm": 0.42501294614138796, "learning_rate": 2e-05, "loss": 5.6626, "step": 175 }, { "epoch": 0.0059025739917833485, "grad_norm": 0.41335608962761927, "learning_rate": 2e-05, "loss": 5.6551, "step": 176 }, { "epoch": 0.00593611134400939, "grad_norm": 0.40525960428744046, "learning_rate": 2e-05, "loss": 5.5393, "step": 177 }, { "epoch": 0.005969648696235432, "grad_norm": 0.42441020068666796, "learning_rate": 2e-05, "loss": 5.4983, "step": 178 }, { "epoch": 0.006003186048461474, "grad_norm": 0.4101562927522332, "learning_rate": 2e-05, "loss": 5.5372, "step": 179 }, { "epoch": 0.006036723400687516, "grad_norm": 0.4286945671817976, "learning_rate": 2e-05, "loss": 5.5464, "step": 180 }, { "epoch": 0.006070260752913557, "grad_norm": 0.40802245269057813, "learning_rate": 2e-05, "loss": 5.2705, "step": 181 }, { "epoch": 0.006103798105139599, "grad_norm": 0.3976514336402166, "learning_rate": 2e-05, "loss": 5.8092, "step": 182 }, { "epoch": 0.006137335457365641, "grad_norm": 0.40360547684356696, "learning_rate": 2e-05, "loss": 5.5354, "step": 183 }, { "epoch": 0.006170872809591683, "grad_norm": 0.4194894270517626, "learning_rate": 2e-05, "loss": 5.468, "step": 184 }, { "epoch": 0.006204410161817725, "grad_norm": 0.4043461728806453, "learning_rate": 2e-05, "loss": 5.6872, "step": 185 }, { "epoch": 0.0062379475140437665, "grad_norm": 0.39838006143098986, "learning_rate": 2e-05, "loss": 5.315, "step": 186 }, { "epoch": 0.006271484866269808, "grad_norm": 0.42228702949509883, "learning_rate": 2e-05, "loss": 5.6447, "step": 187 }, { "epoch": 0.006305022218495849, "grad_norm": 0.3918059515114469, "learning_rate": 2e-05, "loss": 5.5102, "step": 188 }, { "epoch": 0.006338559570721891, "grad_norm": 0.44111661966245613, "learning_rate": 2e-05, "loss": 5.8413, "step": 189 }, { "epoch": 0.006372096922947933, "grad_norm": 0.440035485887414, "learning_rate": 2e-05, "loss": 5.5947, "step": 190 }, { "epoch": 0.006405634275173975, "grad_norm": 0.4201449245789015, "learning_rate": 2e-05, "loss": 5.5543, "step": 191 }, { "epoch": 0.006439171627400017, "grad_norm": 0.45238052138182716, "learning_rate": 2e-05, "loss": 5.6428, "step": 192 }, { "epoch": 0.006472708979626059, "grad_norm": 0.4201175201127823, "learning_rate": 2e-05, "loss": 5.6906, "step": 193 }, { "epoch": 0.0065062463318521, "grad_norm": 0.4351730828279368, "learning_rate": 2e-05, "loss": 5.7039, "step": 194 }, { "epoch": 0.006539783684078142, "grad_norm": 0.4383918105698068, "learning_rate": 2e-05, "loss": 5.5125, "step": 195 }, { "epoch": 0.006573321036304184, "grad_norm": 0.42454539149806647, "learning_rate": 2e-05, "loss": 5.6215, "step": 196 }, { "epoch": 0.0066068583885302255, "grad_norm": 0.41188520164618975, "learning_rate": 2e-05, "loss": 5.7701, "step": 197 }, { "epoch": 0.006640395740756267, "grad_norm": 0.418625245823402, "learning_rate": 2e-05, "loss": 5.4911, "step": 198 }, { "epoch": 0.006673933092982309, "grad_norm": 0.3898335503613395, "learning_rate": 2e-05, "loss": 5.6324, "step": 199 }, { "epoch": 0.006707470445208351, "grad_norm": 0.3989128757642201, "learning_rate": 2e-05, "loss": 5.5926, "step": 200 }, { "epoch": 0.006741007797434392, "grad_norm": 0.4454832494111782, "learning_rate": 2e-05, "loss": 5.563, "step": 201 }, { "epoch": 0.006774545149660434, "grad_norm": 0.42509736281166594, "learning_rate": 2e-05, "loss": 5.6052, "step": 202 }, { "epoch": 0.006808082501886476, "grad_norm": 0.4524885865944321, "learning_rate": 2e-05, "loss": 5.7361, "step": 203 }, { "epoch": 0.006841619854112518, "grad_norm": 0.44953049070297446, "learning_rate": 2e-05, "loss": 5.5562, "step": 204 }, { "epoch": 0.00687515720633856, "grad_norm": 0.4090890116791131, "learning_rate": 2e-05, "loss": 5.6451, "step": 205 }, { "epoch": 0.006908694558564602, "grad_norm": 0.41729839490229803, "learning_rate": 2e-05, "loss": 5.759, "step": 206 }, { "epoch": 0.006942231910790643, "grad_norm": 0.4219497230581399, "learning_rate": 2e-05, "loss": 5.6211, "step": 207 }, { "epoch": 0.0069757692630166845, "grad_norm": 0.4285805276798519, "learning_rate": 2e-05, "loss": 5.6648, "step": 208 }, { "epoch": 0.007009306615242726, "grad_norm": 0.46170382041170693, "learning_rate": 2e-05, "loss": 5.6154, "step": 209 }, { "epoch": 0.007042843967468768, "grad_norm": 0.42077353967606007, "learning_rate": 2e-05, "loss": 5.5566, "step": 210 }, { "epoch": 0.00707638131969481, "grad_norm": 0.3904402931094, "learning_rate": 2e-05, "loss": 5.5195, "step": 211 }, { "epoch": 0.007109918671920852, "grad_norm": 0.4091208054968495, "learning_rate": 2e-05, "loss": 5.6191, "step": 212 }, { "epoch": 0.007143456024146894, "grad_norm": 0.4033860235223257, "learning_rate": 2e-05, "loss": 5.665, "step": 213 }, { "epoch": 0.007176993376372935, "grad_norm": 0.41395649938165, "learning_rate": 2e-05, "loss": 5.594, "step": 214 }, { "epoch": 0.007210530728598977, "grad_norm": 0.41493892640311913, "learning_rate": 2e-05, "loss": 5.448, "step": 215 }, { "epoch": 0.007244068080825019, "grad_norm": 0.4110965629379078, "learning_rate": 2e-05, "loss": 5.5328, "step": 216 }, { "epoch": 0.007277605433051061, "grad_norm": 0.41750591386172164, "learning_rate": 2e-05, "loss": 5.4606, "step": 217 }, { "epoch": 0.0073111427852771025, "grad_norm": 0.4052665517463922, "learning_rate": 2e-05, "loss": 5.5708, "step": 218 }, { "epoch": 0.007344680137503144, "grad_norm": 0.41835888565907664, "learning_rate": 2e-05, "loss": 5.6331, "step": 219 }, { "epoch": 0.007378217489729186, "grad_norm": 0.41504864231383515, "learning_rate": 2e-05, "loss": 5.7216, "step": 220 }, { "epoch": 0.007411754841955227, "grad_norm": 0.40466609833214134, "learning_rate": 2e-05, "loss": 5.5633, "step": 221 }, { "epoch": 0.007445292194181269, "grad_norm": 0.39325701697546805, "learning_rate": 2e-05, "loss": 5.6341, "step": 222 }, { "epoch": 0.007478829546407311, "grad_norm": 0.42100102307503534, "learning_rate": 2e-05, "loss": 5.4596, "step": 223 }, { "epoch": 0.007512366898633353, "grad_norm": 0.45876234720412906, "learning_rate": 2e-05, "loss": 5.4215, "step": 224 }, { "epoch": 0.007545904250859395, "grad_norm": 0.4101062785607263, "learning_rate": 2e-05, "loss": 5.49, "step": 225 }, { "epoch": 0.007579441603085437, "grad_norm": 0.4052655338512672, "learning_rate": 2e-05, "loss": 5.4991, "step": 226 }, { "epoch": 0.007612978955311478, "grad_norm": 0.4269510621785581, "learning_rate": 2e-05, "loss": 5.5395, "step": 227 }, { "epoch": 0.00764651630753752, "grad_norm": 0.41140239688128905, "learning_rate": 2e-05, "loss": 5.5483, "step": 228 }, { "epoch": 0.0076800536597635615, "grad_norm": 0.40854769841859767, "learning_rate": 2e-05, "loss": 5.6522, "step": 229 }, { "epoch": 0.007713591011989603, "grad_norm": 0.4257486653320879, "learning_rate": 2e-05, "loss": 5.5912, "step": 230 }, { "epoch": 0.007747128364215645, "grad_norm": 0.4300290936992129, "learning_rate": 2e-05, "loss": 5.6559, "step": 231 }, { "epoch": 0.007780665716441687, "grad_norm": 0.4112178493048699, "learning_rate": 2e-05, "loss": 5.6605, "step": 232 }, { "epoch": 0.007814203068667728, "grad_norm": 0.422109835226696, "learning_rate": 2e-05, "loss": 5.6388, "step": 233 }, { "epoch": 0.00784774042089377, "grad_norm": 0.41890430044821375, "learning_rate": 2e-05, "loss": 5.5997, "step": 234 }, { "epoch": 0.007881277773119812, "grad_norm": 0.4326356241454007, "learning_rate": 2e-05, "loss": 5.4567, "step": 235 }, { "epoch": 0.007914815125345854, "grad_norm": 0.46484757151180933, "learning_rate": 2e-05, "loss": 5.5264, "step": 236 }, { "epoch": 0.007948352477571896, "grad_norm": 0.4405521868602718, "learning_rate": 2e-05, "loss": 5.602, "step": 237 }, { "epoch": 0.007981889829797938, "grad_norm": 0.40056386027624424, "learning_rate": 2e-05, "loss": 5.5247, "step": 238 }, { "epoch": 0.00801542718202398, "grad_norm": 0.4256903922572069, "learning_rate": 2e-05, "loss": 5.6349, "step": 239 }, { "epoch": 0.008048964534250021, "grad_norm": 0.41916683700827106, "learning_rate": 2e-05, "loss": 5.7016, "step": 240 }, { "epoch": 0.008082501886476063, "grad_norm": 0.41798722638581287, "learning_rate": 2e-05, "loss": 5.5519, "step": 241 }, { "epoch": 0.008116039238702105, "grad_norm": 0.4154247193336292, "learning_rate": 2e-05, "loss": 5.4048, "step": 242 }, { "epoch": 0.008149576590928147, "grad_norm": 0.44240889981956005, "learning_rate": 2e-05, "loss": 5.4543, "step": 243 }, { "epoch": 0.008183113943154187, "grad_norm": 0.43750847434293627, "learning_rate": 2e-05, "loss": 5.7863, "step": 244 }, { "epoch": 0.008216651295380229, "grad_norm": 0.41586419044461936, "learning_rate": 2e-05, "loss": 5.5277, "step": 245 }, { "epoch": 0.008250188647606271, "grad_norm": 0.4100774864884577, "learning_rate": 2e-05, "loss": 5.4608, "step": 246 }, { "epoch": 0.008283725999832313, "grad_norm": 0.43533879149877147, "learning_rate": 2e-05, "loss": 5.5069, "step": 247 }, { "epoch": 0.008317263352058355, "grad_norm": 0.43544322867001095, "learning_rate": 2e-05, "loss": 5.6092, "step": 248 }, { "epoch": 0.008350800704284397, "grad_norm": 0.4173911261785612, "learning_rate": 2e-05, "loss": 5.586, "step": 249 }, { "epoch": 0.008384338056510439, "grad_norm": 0.43430905062051656, "learning_rate": 2e-05, "loss": 5.6178, "step": 250 }, { "epoch": 0.00841787540873648, "grad_norm": 0.42581719213831914, "learning_rate": 2e-05, "loss": 5.3553, "step": 251 }, { "epoch": 0.008451412760962522, "grad_norm": 0.4030483085578603, "learning_rate": 2e-05, "loss": 5.723, "step": 252 }, { "epoch": 0.008484950113188564, "grad_norm": 0.3932180067029237, "learning_rate": 2e-05, "loss": 5.5302, "step": 253 }, { "epoch": 0.008518487465414606, "grad_norm": 0.44061154225146837, "learning_rate": 2e-05, "loss": 5.5846, "step": 254 }, { "epoch": 0.008552024817640648, "grad_norm": 0.43180532679974853, "learning_rate": 2e-05, "loss": 5.3845, "step": 255 }, { "epoch": 0.00858556216986669, "grad_norm": 0.4006864095565577, "learning_rate": 2e-05, "loss": 5.7311, "step": 256 }, { "epoch": 0.00861909952209273, "grad_norm": 0.4706600715743998, "learning_rate": 2e-05, "loss": 5.6288, "step": 257 }, { "epoch": 0.008652636874318772, "grad_norm": 0.4383327543147816, "learning_rate": 2e-05, "loss": 5.5382, "step": 258 }, { "epoch": 0.008686174226544814, "grad_norm": 0.3944405324353416, "learning_rate": 2e-05, "loss": 5.5441, "step": 259 }, { "epoch": 0.008719711578770856, "grad_norm": 0.4300509209978646, "learning_rate": 2e-05, "loss": 5.7021, "step": 260 }, { "epoch": 0.008753248930996898, "grad_norm": 0.46453589179591137, "learning_rate": 2e-05, "loss": 5.5529, "step": 261 }, { "epoch": 0.00878678628322294, "grad_norm": 0.4027951808636551, "learning_rate": 2e-05, "loss": 5.5424, "step": 262 }, { "epoch": 0.008820323635448981, "grad_norm": 0.4307321940698793, "learning_rate": 2e-05, "loss": 5.5839, "step": 263 }, { "epoch": 0.008853860987675023, "grad_norm": 0.4093314267982799, "learning_rate": 2e-05, "loss": 5.6806, "step": 264 }, { "epoch": 0.008887398339901065, "grad_norm": 0.41675530548083306, "learning_rate": 2e-05, "loss": 5.5454, "step": 265 }, { "epoch": 0.008920935692127107, "grad_norm": 0.4025893193079304, "learning_rate": 2e-05, "loss": 5.4106, "step": 266 }, { "epoch": 0.008954473044353149, "grad_norm": 0.4309204337417145, "learning_rate": 2e-05, "loss": 5.6103, "step": 267 }, { "epoch": 0.00898801039657919, "grad_norm": 0.40795049486813084, "learning_rate": 2e-05, "loss": 5.5547, "step": 268 }, { "epoch": 0.009021547748805233, "grad_norm": 0.4070576113097532, "learning_rate": 2e-05, "loss": 5.5972, "step": 269 }, { "epoch": 0.009055085101031273, "grad_norm": 0.4148586414669435, "learning_rate": 2e-05, "loss": 5.6097, "step": 270 }, { "epoch": 0.009088622453257315, "grad_norm": 0.411816346394655, "learning_rate": 2e-05, "loss": 5.6812, "step": 271 }, { "epoch": 0.009122159805483357, "grad_norm": 0.449201167226067, "learning_rate": 2e-05, "loss": 5.632, "step": 272 }, { "epoch": 0.009155697157709398, "grad_norm": 0.4170172837404563, "learning_rate": 2e-05, "loss": 5.6422, "step": 273 }, { "epoch": 0.00918923450993544, "grad_norm": 0.42344698956648563, "learning_rate": 2e-05, "loss": 5.534, "step": 274 }, { "epoch": 0.009222771862161482, "grad_norm": 0.40485839922864847, "learning_rate": 2e-05, "loss": 5.5321, "step": 275 }, { "epoch": 0.009256309214387524, "grad_norm": 0.4440560649620361, "learning_rate": 2e-05, "loss": 5.5981, "step": 276 }, { "epoch": 0.009289846566613566, "grad_norm": 0.40520075428341223, "learning_rate": 2e-05, "loss": 5.5377, "step": 277 }, { "epoch": 0.009323383918839608, "grad_norm": 0.3753670457187734, "learning_rate": 2e-05, "loss": 5.5143, "step": 278 }, { "epoch": 0.00935692127106565, "grad_norm": 0.41585527814305845, "learning_rate": 2e-05, "loss": 5.6945, "step": 279 }, { "epoch": 0.009390458623291692, "grad_norm": 0.4251128695728642, "learning_rate": 2e-05, "loss": 5.5199, "step": 280 }, { "epoch": 0.009423995975517734, "grad_norm": 0.41273520446696843, "learning_rate": 2e-05, "loss": 5.6003, "step": 281 }, { "epoch": 0.009457533327743775, "grad_norm": 0.40588161937106426, "learning_rate": 2e-05, "loss": 5.4964, "step": 282 }, { "epoch": 0.009491070679969816, "grad_norm": 0.422516417506846, "learning_rate": 2e-05, "loss": 5.6473, "step": 283 }, { "epoch": 0.009524608032195857, "grad_norm": 0.4330265805864069, "learning_rate": 2e-05, "loss": 5.6537, "step": 284 }, { "epoch": 0.0095581453844219, "grad_norm": 0.4381693539047342, "learning_rate": 2e-05, "loss": 5.5514, "step": 285 }, { "epoch": 0.009591682736647941, "grad_norm": 0.4371125251900529, "learning_rate": 2e-05, "loss": 5.4794, "step": 286 }, { "epoch": 0.009625220088873983, "grad_norm": 0.43048411950720145, "learning_rate": 2e-05, "loss": 5.5301, "step": 287 }, { "epoch": 0.009658757441100025, "grad_norm": 0.4171698785307164, "learning_rate": 2e-05, "loss": 5.4307, "step": 288 }, { "epoch": 0.009692294793326067, "grad_norm": 0.4149672011785158, "learning_rate": 2e-05, "loss": 5.6651, "step": 289 }, { "epoch": 0.009725832145552109, "grad_norm": 0.4466233411033108, "learning_rate": 2e-05, "loss": 5.6324, "step": 290 }, { "epoch": 0.00975936949777815, "grad_norm": 0.4535285456072735, "learning_rate": 2e-05, "loss": 5.5775, "step": 291 }, { "epoch": 0.009792906850004193, "grad_norm": 0.4026614697493199, "learning_rate": 2e-05, "loss": 5.5323, "step": 292 }, { "epoch": 0.009826444202230234, "grad_norm": 0.45053836377063106, "learning_rate": 2e-05, "loss": 5.5866, "step": 293 }, { "epoch": 0.009859981554456276, "grad_norm": 0.4496980820421004, "learning_rate": 2e-05, "loss": 5.6812, "step": 294 }, { "epoch": 0.009893518906682318, "grad_norm": 0.442976007456296, "learning_rate": 2e-05, "loss": 5.5804, "step": 295 }, { "epoch": 0.009927056258908358, "grad_norm": 0.4407491773241772, "learning_rate": 2e-05, "loss": 5.524, "step": 296 }, { "epoch": 0.0099605936111344, "grad_norm": 0.4435295958393341, "learning_rate": 2e-05, "loss": 5.7361, "step": 297 }, { "epoch": 0.009994130963360442, "grad_norm": 0.4468664871594879, "learning_rate": 2e-05, "loss": 5.4967, "step": 298 }, { "epoch": 0.010027668315586484, "grad_norm": 0.4480394220813889, "learning_rate": 2e-05, "loss": 5.6413, "step": 299 }, { "epoch": 0.010061205667812526, "grad_norm": 0.4398685641197282, "learning_rate": 2e-05, "loss": 5.7069, "step": 300 }, { "epoch": 0.010094743020038568, "grad_norm": 0.4529334799832976, "learning_rate": 2e-05, "loss": 5.4007, "step": 301 }, { "epoch": 0.01012828037226461, "grad_norm": 0.4636673838917556, "learning_rate": 2e-05, "loss": 5.8333, "step": 302 }, { "epoch": 0.010161817724490652, "grad_norm": 0.4535006789732892, "learning_rate": 2e-05, "loss": 5.6628, "step": 303 }, { "epoch": 0.010195355076716693, "grad_norm": 0.464069332641398, "learning_rate": 2e-05, "loss": 5.7919, "step": 304 }, { "epoch": 0.010228892428942735, "grad_norm": 0.4476400071771559, "learning_rate": 2e-05, "loss": 5.6671, "step": 305 }, { "epoch": 0.010262429781168777, "grad_norm": 0.4398663933242504, "learning_rate": 2e-05, "loss": 5.6925, "step": 306 }, { "epoch": 0.010295967133394819, "grad_norm": 0.4360331588271897, "learning_rate": 2e-05, "loss": 5.5356, "step": 307 }, { "epoch": 0.010329504485620861, "grad_norm": 0.4345940278093373, "learning_rate": 2e-05, "loss": 5.6642, "step": 308 }, { "epoch": 0.010363041837846901, "grad_norm": 0.4115403097300204, "learning_rate": 2e-05, "loss": 5.7305, "step": 309 }, { "epoch": 0.010396579190072943, "grad_norm": 0.40720148599794254, "learning_rate": 2e-05, "loss": 5.5019, "step": 310 }, { "epoch": 0.010430116542298985, "grad_norm": 0.42670446363838566, "learning_rate": 2e-05, "loss": 5.5486, "step": 311 }, { "epoch": 0.010463653894525027, "grad_norm": 0.442026646248303, "learning_rate": 2e-05, "loss": 5.5445, "step": 312 }, { "epoch": 0.010497191246751069, "grad_norm": 0.4292305304057498, "learning_rate": 2e-05, "loss": 5.6943, "step": 313 }, { "epoch": 0.01053072859897711, "grad_norm": 0.4344917138673437, "learning_rate": 2e-05, "loss": 5.6975, "step": 314 }, { "epoch": 0.010564265951203152, "grad_norm": 0.4022428844378193, "learning_rate": 2e-05, "loss": 5.3023, "step": 315 }, { "epoch": 0.010597803303429194, "grad_norm": 0.4738593052186973, "learning_rate": 2e-05, "loss": 5.6353, "step": 316 }, { "epoch": 0.010631340655655236, "grad_norm": 0.4624934993071807, "learning_rate": 2e-05, "loss": 5.6523, "step": 317 }, { "epoch": 0.010664878007881278, "grad_norm": 0.4251922098639976, "learning_rate": 2e-05, "loss": 5.5813, "step": 318 }, { "epoch": 0.01069841536010732, "grad_norm": 0.39751638045543286, "learning_rate": 2e-05, "loss": 5.5859, "step": 319 }, { "epoch": 0.010731952712333362, "grad_norm": 0.4303127268256086, "learning_rate": 2e-05, "loss": 5.5781, "step": 320 }, { "epoch": 0.010765490064559404, "grad_norm": 0.4507753716716485, "learning_rate": 2e-05, "loss": 5.4769, "step": 321 }, { "epoch": 0.010799027416785444, "grad_norm": 0.44336841706257785, "learning_rate": 2e-05, "loss": 5.725, "step": 322 }, { "epoch": 0.010832564769011486, "grad_norm": 0.44030709739385754, "learning_rate": 2e-05, "loss": 5.4302, "step": 323 }, { "epoch": 0.010866102121237528, "grad_norm": 0.42885599105430683, "learning_rate": 2e-05, "loss": 5.7514, "step": 324 }, { "epoch": 0.01089963947346357, "grad_norm": 0.42786988417494265, "learning_rate": 2e-05, "loss": 5.7649, "step": 325 }, { "epoch": 0.010933176825689611, "grad_norm": 0.4034397146937999, "learning_rate": 2e-05, "loss": 5.5463, "step": 326 }, { "epoch": 0.010966714177915653, "grad_norm": 0.4150506273141199, "learning_rate": 2e-05, "loss": 5.7065, "step": 327 }, { "epoch": 0.011000251530141695, "grad_norm": 0.40538997605494903, "learning_rate": 2e-05, "loss": 5.5643, "step": 328 }, { "epoch": 0.011033788882367737, "grad_norm": 0.423426609481288, "learning_rate": 2e-05, "loss": 5.641, "step": 329 }, { "epoch": 0.011067326234593779, "grad_norm": 0.4131075087645795, "learning_rate": 2e-05, "loss": 5.5638, "step": 330 }, { "epoch": 0.011100863586819821, "grad_norm": 0.4315632016600327, "learning_rate": 2e-05, "loss": 5.3679, "step": 331 }, { "epoch": 0.011134400939045863, "grad_norm": 0.4523062473532256, "learning_rate": 2e-05, "loss": 5.4538, "step": 332 }, { "epoch": 0.011167938291271905, "grad_norm": 0.4042354463934684, "learning_rate": 2e-05, "loss": 5.7865, "step": 333 }, { "epoch": 0.011201475643497947, "grad_norm": 0.4072430416008952, "learning_rate": 2e-05, "loss": 5.6849, "step": 334 }, { "epoch": 0.011235012995723988, "grad_norm": 0.44159651219449025, "learning_rate": 2e-05, "loss": 5.5565, "step": 335 }, { "epoch": 0.011268550347950029, "grad_norm": 0.42177427762954356, "learning_rate": 2e-05, "loss": 5.37, "step": 336 }, { "epoch": 0.01130208770017607, "grad_norm": 0.4030016402925038, "learning_rate": 2e-05, "loss": 5.657, "step": 337 }, { "epoch": 0.011335625052402112, "grad_norm": 0.4330702613196526, "learning_rate": 2e-05, "loss": 5.6006, "step": 338 }, { "epoch": 0.011369162404628154, "grad_norm": 0.4087306517829942, "learning_rate": 2e-05, "loss": 5.5106, "step": 339 }, { "epoch": 0.011402699756854196, "grad_norm": 0.4073062701032165, "learning_rate": 2e-05, "loss": 5.6065, "step": 340 }, { "epoch": 0.011436237109080238, "grad_norm": 0.4327630303329693, "learning_rate": 2e-05, "loss": 5.6611, "step": 341 }, { "epoch": 0.01146977446130628, "grad_norm": 0.4091708718833069, "learning_rate": 2e-05, "loss": 5.7848, "step": 342 }, { "epoch": 0.011503311813532322, "grad_norm": 0.4004732178524433, "learning_rate": 2e-05, "loss": 5.5077, "step": 343 }, { "epoch": 0.011536849165758364, "grad_norm": 0.4276056794042364, "learning_rate": 2e-05, "loss": 5.5397, "step": 344 }, { "epoch": 0.011570386517984406, "grad_norm": 0.4160025764134318, "learning_rate": 2e-05, "loss": 5.71, "step": 345 }, { "epoch": 0.011603923870210447, "grad_norm": 0.4359147982616995, "learning_rate": 2e-05, "loss": 5.4763, "step": 346 }, { "epoch": 0.01163746122243649, "grad_norm": 0.4393589305166445, "learning_rate": 2e-05, "loss": 5.3897, "step": 347 }, { "epoch": 0.011670998574662531, "grad_norm": 0.40495352948984953, "learning_rate": 2e-05, "loss": 5.4999, "step": 348 }, { "epoch": 0.011704535926888571, "grad_norm": 0.42768469278060334, "learning_rate": 2e-05, "loss": 5.4529, "step": 349 }, { "epoch": 0.011738073279114613, "grad_norm": 0.4499741999902791, "learning_rate": 2e-05, "loss": 5.5622, "step": 350 }, { "epoch": 0.011771610631340655, "grad_norm": 0.4372633429137295, "learning_rate": 2e-05, "loss": 5.5627, "step": 351 }, { "epoch": 0.011805147983566697, "grad_norm": 0.4322745301540221, "learning_rate": 2e-05, "loss": 5.5354, "step": 352 }, { "epoch": 0.011838685335792739, "grad_norm": 0.4118573067594274, "learning_rate": 2e-05, "loss": 5.6257, "step": 353 }, { "epoch": 0.01187222268801878, "grad_norm": 0.4107751070679113, "learning_rate": 2e-05, "loss": 5.6965, "step": 354 }, { "epoch": 0.011905760040244823, "grad_norm": 0.4144779029778103, "learning_rate": 2e-05, "loss": 5.6455, "step": 355 }, { "epoch": 0.011939297392470865, "grad_norm": 0.4265828309851393, "learning_rate": 2e-05, "loss": 5.3436, "step": 356 }, { "epoch": 0.011972834744696906, "grad_norm": 0.4233505768687414, "learning_rate": 2e-05, "loss": 5.5841, "step": 357 }, { "epoch": 0.012006372096922948, "grad_norm": 0.39861455743153296, "learning_rate": 2e-05, "loss": 5.6313, "step": 358 }, { "epoch": 0.01203990944914899, "grad_norm": 0.3832400055840583, "learning_rate": 2e-05, "loss": 5.763, "step": 359 }, { "epoch": 0.012073446801375032, "grad_norm": 0.39297028234997855, "learning_rate": 2e-05, "loss": 5.6124, "step": 360 }, { "epoch": 0.012106984153601074, "grad_norm": 0.39920260072004965, "learning_rate": 2e-05, "loss": 5.581, "step": 361 }, { "epoch": 0.012140521505827114, "grad_norm": 0.41902408402560454, "learning_rate": 2e-05, "loss": 5.5399, "step": 362 }, { "epoch": 0.012174058858053156, "grad_norm": 0.4052979517419349, "learning_rate": 2e-05, "loss": 5.5122, "step": 363 }, { "epoch": 0.012207596210279198, "grad_norm": 0.424441087516899, "learning_rate": 2e-05, "loss": 5.5492, "step": 364 }, { "epoch": 0.01224113356250524, "grad_norm": 0.40956469320367445, "learning_rate": 2e-05, "loss": 5.7041, "step": 365 }, { "epoch": 0.012274670914731282, "grad_norm": 0.406162356900767, "learning_rate": 2e-05, "loss": 5.5549, "step": 366 }, { "epoch": 0.012308208266957324, "grad_norm": 0.4293587954272353, "learning_rate": 2e-05, "loss": 5.5622, "step": 367 }, { "epoch": 0.012341745619183365, "grad_norm": 0.42422598547738266, "learning_rate": 2e-05, "loss": 5.5192, "step": 368 }, { "epoch": 0.012375282971409407, "grad_norm": 0.40604945265415465, "learning_rate": 2e-05, "loss": 5.5951, "step": 369 }, { "epoch": 0.01240882032363545, "grad_norm": 0.41574400400956935, "learning_rate": 2e-05, "loss": 5.6871, "step": 370 }, { "epoch": 0.012442357675861491, "grad_norm": 0.42872718220163947, "learning_rate": 2e-05, "loss": 5.4001, "step": 371 }, { "epoch": 0.012475895028087533, "grad_norm": 0.4117206597204732, "learning_rate": 2e-05, "loss": 5.5746, "step": 372 }, { "epoch": 0.012509432380313575, "grad_norm": 0.48669152707153995, "learning_rate": 2e-05, "loss": 5.5593, "step": 373 }, { "epoch": 0.012542969732539617, "grad_norm": 0.40733014224532565, "learning_rate": 2e-05, "loss": 5.743, "step": 374 }, { "epoch": 0.012576507084765657, "grad_norm": 0.4468757673132273, "learning_rate": 2e-05, "loss": 5.5145, "step": 375 }, { "epoch": 0.012610044436991699, "grad_norm": 0.4224967598112687, "learning_rate": 2e-05, "loss": 5.6447, "step": 376 }, { "epoch": 0.01264358178921774, "grad_norm": 0.43709354087121227, "learning_rate": 2e-05, "loss": 5.5841, "step": 377 }, { "epoch": 0.012677119141443783, "grad_norm": 0.43912224056895816, "learning_rate": 2e-05, "loss": 5.4497, "step": 378 }, { "epoch": 0.012710656493669825, "grad_norm": 0.4475142474112815, "learning_rate": 2e-05, "loss": 5.5691, "step": 379 }, { "epoch": 0.012744193845895866, "grad_norm": 0.45197220521352693, "learning_rate": 2e-05, "loss": 5.4571, "step": 380 }, { "epoch": 0.012777731198121908, "grad_norm": 0.4063375049611474, "learning_rate": 2e-05, "loss": 5.3242, "step": 381 }, { "epoch": 0.01281126855034795, "grad_norm": 0.4344034283670724, "learning_rate": 2e-05, "loss": 5.7005, "step": 382 }, { "epoch": 0.012844805902573992, "grad_norm": 0.4353804523177557, "learning_rate": 2e-05, "loss": 5.4673, "step": 383 }, { "epoch": 0.012878343254800034, "grad_norm": 0.4245401203172088, "learning_rate": 2e-05, "loss": 5.6902, "step": 384 }, { "epoch": 0.012911880607026076, "grad_norm": 0.4498039082649302, "learning_rate": 2e-05, "loss": 5.5609, "step": 385 }, { "epoch": 0.012945417959252118, "grad_norm": 0.44669300522041605, "learning_rate": 2e-05, "loss": 5.6151, "step": 386 }, { "epoch": 0.01297895531147816, "grad_norm": 0.42167846862887415, "learning_rate": 2e-05, "loss": 5.4526, "step": 387 }, { "epoch": 0.0130124926637042, "grad_norm": 0.41279935557853237, "learning_rate": 2e-05, "loss": 5.5236, "step": 388 }, { "epoch": 0.013046030015930242, "grad_norm": 0.4207934750188208, "learning_rate": 2e-05, "loss": 5.5346, "step": 389 }, { "epoch": 0.013079567368156284, "grad_norm": 0.4216236099014103, "learning_rate": 2e-05, "loss": 5.6407, "step": 390 }, { "epoch": 0.013113104720382325, "grad_norm": 0.4236373343683017, "learning_rate": 2e-05, "loss": 5.437, "step": 391 }, { "epoch": 0.013146642072608367, "grad_norm": 0.4104581887741771, "learning_rate": 2e-05, "loss": 5.5406, "step": 392 }, { "epoch": 0.01318017942483441, "grad_norm": 0.414653503474058, "learning_rate": 2e-05, "loss": 5.4846, "step": 393 }, { "epoch": 0.013213716777060451, "grad_norm": 0.4274181182640965, "learning_rate": 2e-05, "loss": 5.6099, "step": 394 }, { "epoch": 0.013247254129286493, "grad_norm": 0.4320949255724117, "learning_rate": 2e-05, "loss": 5.6526, "step": 395 }, { "epoch": 0.013280791481512535, "grad_norm": 0.4394811806580034, "learning_rate": 2e-05, "loss": 5.5913, "step": 396 }, { "epoch": 0.013314328833738577, "grad_norm": 0.412758010795716, "learning_rate": 2e-05, "loss": 5.7281, "step": 397 }, { "epoch": 0.013347866185964619, "grad_norm": 0.4054281324017042, "learning_rate": 2e-05, "loss": 5.548, "step": 398 }, { "epoch": 0.01338140353819066, "grad_norm": 0.4003618626753835, "learning_rate": 2e-05, "loss": 5.5784, "step": 399 }, { "epoch": 0.013414940890416702, "grad_norm": 0.44352935831719337, "learning_rate": 2e-05, "loss": 5.584, "step": 400 }, { "epoch": 0.013448478242642743, "grad_norm": 0.3963423068450899, "learning_rate": 2e-05, "loss": 5.7361, "step": 401 }, { "epoch": 0.013482015594868784, "grad_norm": 0.41957751976066804, "learning_rate": 2e-05, "loss": 5.6086, "step": 402 }, { "epoch": 0.013515552947094826, "grad_norm": 0.4139826837625454, "learning_rate": 2e-05, "loss": 5.5073, "step": 403 }, { "epoch": 0.013549090299320868, "grad_norm": 0.41237033511806565, "learning_rate": 2e-05, "loss": 5.6718, "step": 404 }, { "epoch": 0.01358262765154691, "grad_norm": 0.43467506312401877, "learning_rate": 2e-05, "loss": 5.5073, "step": 405 }, { "epoch": 0.013616165003772952, "grad_norm": 0.43856608326147617, "learning_rate": 2e-05, "loss": 5.6134, "step": 406 }, { "epoch": 0.013649702355998994, "grad_norm": 0.44457437777702136, "learning_rate": 2e-05, "loss": 5.4124, "step": 407 }, { "epoch": 0.013683239708225036, "grad_norm": 0.38538455477156125, "learning_rate": 2e-05, "loss": 5.6619, "step": 408 }, { "epoch": 0.013716777060451078, "grad_norm": 0.4278747597292341, "learning_rate": 2e-05, "loss": 5.5882, "step": 409 }, { "epoch": 0.01375031441267712, "grad_norm": 0.45553269773056504, "learning_rate": 2e-05, "loss": 5.6823, "step": 410 }, { "epoch": 0.013783851764903161, "grad_norm": 0.4353309094267988, "learning_rate": 2e-05, "loss": 5.6278, "step": 411 }, { "epoch": 0.013817389117129203, "grad_norm": 0.42578360128333037, "learning_rate": 2e-05, "loss": 5.5776, "step": 412 }, { "epoch": 0.013850926469355245, "grad_norm": 0.41203230541196095, "learning_rate": 2e-05, "loss": 5.5199, "step": 413 }, { "epoch": 0.013884463821581285, "grad_norm": 0.4323028122388386, "learning_rate": 2e-05, "loss": 5.7043, "step": 414 }, { "epoch": 0.013918001173807327, "grad_norm": 0.4300437573165062, "learning_rate": 2e-05, "loss": 5.4702, "step": 415 }, { "epoch": 0.013951538526033369, "grad_norm": 0.43753994289465636, "learning_rate": 2e-05, "loss": 5.5594, "step": 416 }, { "epoch": 0.013985075878259411, "grad_norm": 0.4251685587083065, "learning_rate": 2e-05, "loss": 5.5527, "step": 417 }, { "epoch": 0.014018613230485453, "grad_norm": 0.4495444631849476, "learning_rate": 2e-05, "loss": 5.5556, "step": 418 }, { "epoch": 0.014052150582711495, "grad_norm": 0.4303196914224069, "learning_rate": 2e-05, "loss": 5.5749, "step": 419 }, { "epoch": 0.014085687934937537, "grad_norm": 0.40885748807259964, "learning_rate": 2e-05, "loss": 5.823, "step": 420 }, { "epoch": 0.014119225287163579, "grad_norm": 0.4658000573207415, "learning_rate": 2e-05, "loss": 5.4617, "step": 421 }, { "epoch": 0.01415276263938962, "grad_norm": 0.4198403844365712, "learning_rate": 2e-05, "loss": 5.6648, "step": 422 }, { "epoch": 0.014186299991615662, "grad_norm": 0.4921835534623332, "learning_rate": 2e-05, "loss": 5.5704, "step": 423 }, { "epoch": 0.014219837343841704, "grad_norm": 0.4401660122271907, "learning_rate": 2e-05, "loss": 5.6435, "step": 424 }, { "epoch": 0.014253374696067746, "grad_norm": 0.41800382024684185, "learning_rate": 2e-05, "loss": 5.5882, "step": 425 }, { "epoch": 0.014286912048293788, "grad_norm": 0.41076055239221365, "learning_rate": 2e-05, "loss": 5.6506, "step": 426 }, { "epoch": 0.014320449400519828, "grad_norm": 0.4250626644196589, "learning_rate": 2e-05, "loss": 5.5436, "step": 427 }, { "epoch": 0.01435398675274587, "grad_norm": 0.4718869744065829, "learning_rate": 2e-05, "loss": 5.7446, "step": 428 }, { "epoch": 0.014387524104971912, "grad_norm": 0.4218055128806829, "learning_rate": 2e-05, "loss": 5.7078, "step": 429 }, { "epoch": 0.014421061457197954, "grad_norm": 0.4036311319242929, "learning_rate": 2e-05, "loss": 5.4789, "step": 430 }, { "epoch": 0.014454598809423996, "grad_norm": 0.42902955471680204, "learning_rate": 2e-05, "loss": 5.6281, "step": 431 }, { "epoch": 0.014488136161650038, "grad_norm": 0.40661199774370355, "learning_rate": 2e-05, "loss": 5.7895, "step": 432 }, { "epoch": 0.01452167351387608, "grad_norm": 0.39583721684203793, "learning_rate": 2e-05, "loss": 5.5781, "step": 433 }, { "epoch": 0.014555210866102121, "grad_norm": 0.4014932626992647, "learning_rate": 2e-05, "loss": 5.5747, "step": 434 }, { "epoch": 0.014588748218328163, "grad_norm": 0.419840126414194, "learning_rate": 2e-05, "loss": 5.765, "step": 435 }, { "epoch": 0.014622285570554205, "grad_norm": 0.38048489450813894, "learning_rate": 2e-05, "loss": 5.5594, "step": 436 }, { "epoch": 0.014655822922780247, "grad_norm": 0.41012263002977406, "learning_rate": 2e-05, "loss": 5.584, "step": 437 }, { "epoch": 0.014689360275006289, "grad_norm": 0.4299636118674375, "learning_rate": 2e-05, "loss": 5.6118, "step": 438 }, { "epoch": 0.01472289762723233, "grad_norm": 0.40930832986349586, "learning_rate": 2e-05, "loss": 5.5014, "step": 439 }, { "epoch": 0.014756434979458373, "grad_norm": 0.40660030919719103, "learning_rate": 2e-05, "loss": 5.4776, "step": 440 }, { "epoch": 0.014789972331684413, "grad_norm": 0.3954620292428574, "learning_rate": 2e-05, "loss": 5.5633, "step": 441 }, { "epoch": 0.014823509683910455, "grad_norm": 0.43789776351450693, "learning_rate": 2e-05, "loss": 5.6624, "step": 442 }, { "epoch": 0.014857047036136497, "grad_norm": 0.42303887648557603, "learning_rate": 2e-05, "loss": 5.6942, "step": 443 }, { "epoch": 0.014890584388362538, "grad_norm": 0.4194120330573958, "learning_rate": 2e-05, "loss": 5.5553, "step": 444 }, { "epoch": 0.01492412174058858, "grad_norm": 0.4288094619403621, "learning_rate": 2e-05, "loss": 5.7605, "step": 445 }, { "epoch": 0.014957659092814622, "grad_norm": 0.4109841186028746, "learning_rate": 2e-05, "loss": 5.4236, "step": 446 }, { "epoch": 0.014991196445040664, "grad_norm": 0.4231572435249166, "learning_rate": 2e-05, "loss": 5.4934, "step": 447 }, { "epoch": 0.015024733797266706, "grad_norm": 0.43916937419776153, "learning_rate": 2e-05, "loss": 5.6461, "step": 448 }, { "epoch": 0.015058271149492748, "grad_norm": 0.4326833510360944, "learning_rate": 2e-05, "loss": 5.5613, "step": 449 }, { "epoch": 0.01509180850171879, "grad_norm": 0.4530758874483376, "learning_rate": 2e-05, "loss": 5.632, "step": 450 }, { "epoch": 0.015125345853944832, "grad_norm": 0.4225011019389023, "learning_rate": 2e-05, "loss": 5.6609, "step": 451 }, { "epoch": 0.015158883206170874, "grad_norm": 0.42271874542957333, "learning_rate": 2e-05, "loss": 5.5734, "step": 452 }, { "epoch": 0.015192420558396915, "grad_norm": 0.45241638544467233, "learning_rate": 2e-05, "loss": 5.4434, "step": 453 }, { "epoch": 0.015225957910622956, "grad_norm": 0.44385794486441865, "learning_rate": 2e-05, "loss": 5.3063, "step": 454 }, { "epoch": 0.015259495262848997, "grad_norm": 0.4274604196125459, "learning_rate": 2e-05, "loss": 5.6207, "step": 455 }, { "epoch": 0.01529303261507504, "grad_norm": 0.46699144744592613, "learning_rate": 2e-05, "loss": 5.6521, "step": 456 }, { "epoch": 0.015326569967301081, "grad_norm": 0.41699264484686255, "learning_rate": 2e-05, "loss": 5.6828, "step": 457 }, { "epoch": 0.015360107319527123, "grad_norm": 0.42317120272596365, "learning_rate": 2e-05, "loss": 5.4388, "step": 458 }, { "epoch": 0.015393644671753165, "grad_norm": 0.46519649712498945, "learning_rate": 2e-05, "loss": 5.6433, "step": 459 }, { "epoch": 0.015427182023979207, "grad_norm": 0.42909660769304747, "learning_rate": 2e-05, "loss": 5.4445, "step": 460 }, { "epoch": 0.015460719376205249, "grad_norm": 0.4119339547610405, "learning_rate": 2e-05, "loss": 5.5536, "step": 461 }, { "epoch": 0.01549425672843129, "grad_norm": 0.4222358085777864, "learning_rate": 2e-05, "loss": 5.5994, "step": 462 }, { "epoch": 0.015527794080657333, "grad_norm": 0.4461489463817139, "learning_rate": 2e-05, "loss": 5.6022, "step": 463 }, { "epoch": 0.015561331432883374, "grad_norm": 0.4381760347544924, "learning_rate": 2e-05, "loss": 5.6091, "step": 464 }, { "epoch": 0.015594868785109416, "grad_norm": 0.42784336450041716, "learning_rate": 2e-05, "loss": 5.4463, "step": 465 }, { "epoch": 0.015628406137335456, "grad_norm": 0.42287268035406855, "learning_rate": 2e-05, "loss": 5.6179, "step": 466 }, { "epoch": 0.0156619434895615, "grad_norm": 0.3965464167438903, "learning_rate": 2e-05, "loss": 5.5607, "step": 467 }, { "epoch": 0.01569548084178754, "grad_norm": 0.3865306056896102, "learning_rate": 2e-05, "loss": 5.6709, "step": 468 }, { "epoch": 0.015729018194013584, "grad_norm": 0.4138190836513581, "learning_rate": 2e-05, "loss": 5.4609, "step": 469 }, { "epoch": 0.015762555546239624, "grad_norm": 0.4161240861541937, "learning_rate": 2e-05, "loss": 5.7748, "step": 470 }, { "epoch": 0.015796092898465668, "grad_norm": 0.4291550275907177, "learning_rate": 2e-05, "loss": 5.7338, "step": 471 }, { "epoch": 0.015829630250691708, "grad_norm": 0.43572531481298593, "learning_rate": 2e-05, "loss": 5.4563, "step": 472 }, { "epoch": 0.015863167602917748, "grad_norm": 0.40272481714106706, "learning_rate": 2e-05, "loss": 5.7256, "step": 473 }, { "epoch": 0.01589670495514379, "grad_norm": 0.42175841040022566, "learning_rate": 2e-05, "loss": 5.5341, "step": 474 }, { "epoch": 0.01593024230736983, "grad_norm": 0.4055736254028285, "learning_rate": 2e-05, "loss": 5.5135, "step": 475 }, { "epoch": 0.015963779659595875, "grad_norm": 0.39973030062993803, "learning_rate": 2e-05, "loss": 5.5371, "step": 476 }, { "epoch": 0.015997317011821915, "grad_norm": 0.4597791228732287, "learning_rate": 2e-05, "loss": 5.615, "step": 477 }, { "epoch": 0.01603085436404796, "grad_norm": 0.4305108837766133, "learning_rate": 2e-05, "loss": 5.4012, "step": 478 }, { "epoch": 0.016064391716274, "grad_norm": 0.42498617469480693, "learning_rate": 2e-05, "loss": 5.5706, "step": 479 }, { "epoch": 0.016097929068500043, "grad_norm": 0.46317828697854796, "learning_rate": 2e-05, "loss": 5.6859, "step": 480 }, { "epoch": 0.016131466420726083, "grad_norm": 0.4143750196666772, "learning_rate": 2e-05, "loss": 5.7189, "step": 481 }, { "epoch": 0.016165003772952127, "grad_norm": 0.41463600981633086, "learning_rate": 2e-05, "loss": 5.6724, "step": 482 }, { "epoch": 0.016198541125178167, "grad_norm": 0.4547674463186271, "learning_rate": 2e-05, "loss": 5.3132, "step": 483 }, { "epoch": 0.01623207847740421, "grad_norm": 0.46395751533632196, "learning_rate": 2e-05, "loss": 5.4757, "step": 484 }, { "epoch": 0.01626561582963025, "grad_norm": 0.40254754464686404, "learning_rate": 2e-05, "loss": 5.3687, "step": 485 }, { "epoch": 0.016299153181856294, "grad_norm": 0.4311778096334001, "learning_rate": 2e-05, "loss": 5.4752, "step": 486 }, { "epoch": 0.016332690534082334, "grad_norm": 0.4706257709241986, "learning_rate": 2e-05, "loss": 5.5658, "step": 487 }, { "epoch": 0.016366227886308374, "grad_norm": 0.427542817464514, "learning_rate": 2e-05, "loss": 5.5473, "step": 488 }, { "epoch": 0.016399765238534418, "grad_norm": 0.4135191095164774, "learning_rate": 2e-05, "loss": 5.7051, "step": 489 }, { "epoch": 0.016433302590760458, "grad_norm": 0.44257823894175535, "learning_rate": 2e-05, "loss": 5.6621, "step": 490 }, { "epoch": 0.016466839942986502, "grad_norm": 0.4129135265927595, "learning_rate": 2e-05, "loss": 5.5846, "step": 491 }, { "epoch": 0.016500377295212542, "grad_norm": 0.40415467918403214, "learning_rate": 2e-05, "loss": 5.7095, "step": 492 }, { "epoch": 0.016533914647438586, "grad_norm": 0.43418745894353, "learning_rate": 2e-05, "loss": 5.3713, "step": 493 }, { "epoch": 0.016567451999664626, "grad_norm": 0.4099281666201981, "learning_rate": 2e-05, "loss": 5.5261, "step": 494 }, { "epoch": 0.01660098935189067, "grad_norm": 0.43638680253230455, "learning_rate": 2e-05, "loss": 5.6082, "step": 495 }, { "epoch": 0.01663452670411671, "grad_norm": 0.38974368909097795, "learning_rate": 2e-05, "loss": 5.6451, "step": 496 }, { "epoch": 0.016668064056342753, "grad_norm": 0.44591644243860445, "learning_rate": 2e-05, "loss": 5.4976, "step": 497 }, { "epoch": 0.016701601408568793, "grad_norm": 0.4070570140565169, "learning_rate": 2e-05, "loss": 5.5678, "step": 498 }, { "epoch": 0.016735138760794837, "grad_norm": 0.412258274577658, "learning_rate": 2e-05, "loss": 5.5475, "step": 499 }, { "epoch": 0.016768676113020877, "grad_norm": 0.4441490481928355, "learning_rate": 2e-05, "loss": 5.4685, "step": 500 }, { "epoch": 0.016802213465246917, "grad_norm": 0.4229033736591035, "learning_rate": 2e-05, "loss": 5.6819, "step": 501 }, { "epoch": 0.01683575081747296, "grad_norm": 0.4134236908020161, "learning_rate": 2e-05, "loss": 5.6597, "step": 502 }, { "epoch": 0.016869288169699, "grad_norm": 0.4499667405716228, "learning_rate": 2e-05, "loss": 5.6992, "step": 503 }, { "epoch": 0.016902825521925045, "grad_norm": 0.409455264119457, "learning_rate": 2e-05, "loss": 5.7107, "step": 504 }, { "epoch": 0.016936362874151085, "grad_norm": 0.4552826957179042, "learning_rate": 2e-05, "loss": 5.5527, "step": 505 }, { "epoch": 0.01696990022637713, "grad_norm": 0.3834304353716223, "learning_rate": 2e-05, "loss": 5.4517, "step": 506 }, { "epoch": 0.01700343757860317, "grad_norm": 0.41372104287201067, "learning_rate": 2e-05, "loss": 5.708, "step": 507 }, { "epoch": 0.017036974930829212, "grad_norm": 0.45066807098238915, "learning_rate": 2e-05, "loss": 5.5435, "step": 508 }, { "epoch": 0.017070512283055252, "grad_norm": 0.4168648409494088, "learning_rate": 2e-05, "loss": 5.419, "step": 509 }, { "epoch": 0.017104049635281296, "grad_norm": 0.42728822993640764, "learning_rate": 2e-05, "loss": 5.4575, "step": 510 }, { "epoch": 0.017137586987507336, "grad_norm": 0.45484251188660335, "learning_rate": 2e-05, "loss": 5.594, "step": 511 }, { "epoch": 0.01717112433973338, "grad_norm": 0.45126374873528985, "learning_rate": 2e-05, "loss": 5.5906, "step": 512 }, { "epoch": 0.01720466169195942, "grad_norm": 0.42547114385301443, "learning_rate": 2e-05, "loss": 5.7436, "step": 513 }, { "epoch": 0.01723819904418546, "grad_norm": 0.44757546757587524, "learning_rate": 2e-05, "loss": 5.3901, "step": 514 }, { "epoch": 0.017271736396411504, "grad_norm": 0.4407425295473879, "learning_rate": 2e-05, "loss": 5.327, "step": 515 }, { "epoch": 0.017305273748637544, "grad_norm": 0.42319048809443877, "learning_rate": 2e-05, "loss": 5.4751, "step": 516 }, { "epoch": 0.017338811100863587, "grad_norm": 0.40699386480246885, "learning_rate": 2e-05, "loss": 5.7119, "step": 517 }, { "epoch": 0.017372348453089628, "grad_norm": 0.42199788039511194, "learning_rate": 2e-05, "loss": 5.3964, "step": 518 }, { "epoch": 0.01740588580531567, "grad_norm": 0.4083872270739887, "learning_rate": 2e-05, "loss": 5.4152, "step": 519 }, { "epoch": 0.01743942315754171, "grad_norm": 0.41996898008731504, "learning_rate": 2e-05, "loss": 5.5938, "step": 520 }, { "epoch": 0.017472960509767755, "grad_norm": 0.4266246493726718, "learning_rate": 2e-05, "loss": 5.7551, "step": 521 }, { "epoch": 0.017506497861993795, "grad_norm": 0.3999946609908331, "learning_rate": 2e-05, "loss": 5.5576, "step": 522 }, { "epoch": 0.01754003521421984, "grad_norm": 0.3988062093127345, "learning_rate": 2e-05, "loss": 5.6105, "step": 523 }, { "epoch": 0.01757357256644588, "grad_norm": 0.4226643506300435, "learning_rate": 2e-05, "loss": 5.4333, "step": 524 }, { "epoch": 0.017607109918671923, "grad_norm": 0.4312775032960121, "learning_rate": 2e-05, "loss": 5.6645, "step": 525 }, { "epoch": 0.017640647270897963, "grad_norm": 0.42243253565089867, "learning_rate": 2e-05, "loss": 5.6333, "step": 526 }, { "epoch": 0.017674184623124003, "grad_norm": 0.40445237572309667, "learning_rate": 2e-05, "loss": 5.5625, "step": 527 }, { "epoch": 0.017707721975350046, "grad_norm": 0.4239104769319564, "learning_rate": 2e-05, "loss": 5.4538, "step": 528 }, { "epoch": 0.017741259327576087, "grad_norm": 0.44485613350328446, "learning_rate": 2e-05, "loss": 5.4763, "step": 529 }, { "epoch": 0.01777479667980213, "grad_norm": 0.43567915689176195, "learning_rate": 2e-05, "loss": 5.7093, "step": 530 }, { "epoch": 0.01780833403202817, "grad_norm": 0.41862304602557354, "learning_rate": 2e-05, "loss": 5.5601, "step": 531 }, { "epoch": 0.017841871384254214, "grad_norm": 0.44714798603958655, "learning_rate": 2e-05, "loss": 5.5446, "step": 532 }, { "epoch": 0.017875408736480254, "grad_norm": 0.44059271196894106, "learning_rate": 2e-05, "loss": 5.5288, "step": 533 }, { "epoch": 0.017908946088706298, "grad_norm": 0.43282185253035876, "learning_rate": 2e-05, "loss": 5.3509, "step": 534 }, { "epoch": 0.017942483440932338, "grad_norm": 0.428549814975039, "learning_rate": 2e-05, "loss": 5.684, "step": 535 }, { "epoch": 0.01797602079315838, "grad_norm": 0.4191340411918717, "learning_rate": 2e-05, "loss": 5.4997, "step": 536 }, { "epoch": 0.01800955814538442, "grad_norm": 0.4133055748551066, "learning_rate": 2e-05, "loss": 5.4589, "step": 537 }, { "epoch": 0.018043095497610465, "grad_norm": 0.4430961790316608, "learning_rate": 2e-05, "loss": 5.5868, "step": 538 }, { "epoch": 0.018076632849836505, "grad_norm": 0.41900464921730995, "learning_rate": 2e-05, "loss": 5.6417, "step": 539 }, { "epoch": 0.018110170202062546, "grad_norm": 0.39932227015259264, "learning_rate": 2e-05, "loss": 5.5094, "step": 540 }, { "epoch": 0.01814370755428859, "grad_norm": 0.4095095759118628, "learning_rate": 2e-05, "loss": 5.5338, "step": 541 }, { "epoch": 0.01817724490651463, "grad_norm": 0.42893162440088156, "learning_rate": 2e-05, "loss": 5.448, "step": 542 }, { "epoch": 0.018210782258740673, "grad_norm": 0.42779168520434796, "learning_rate": 2e-05, "loss": 5.6717, "step": 543 }, { "epoch": 0.018244319610966713, "grad_norm": 0.4068444293613382, "learning_rate": 2e-05, "loss": 5.6498, "step": 544 }, { "epoch": 0.018277856963192757, "grad_norm": 0.42532348604944326, "learning_rate": 2e-05, "loss": 5.4668, "step": 545 }, { "epoch": 0.018311394315418797, "grad_norm": 0.4014271383494071, "learning_rate": 2e-05, "loss": 5.6154, "step": 546 }, { "epoch": 0.01834493166764484, "grad_norm": 0.42301939800308735, "learning_rate": 2e-05, "loss": 5.7274, "step": 547 }, { "epoch": 0.01837846901987088, "grad_norm": 0.4152385288482206, "learning_rate": 2e-05, "loss": 5.4314, "step": 548 }, { "epoch": 0.018412006372096924, "grad_norm": 0.43108532765438196, "learning_rate": 2e-05, "loss": 5.5523, "step": 549 }, { "epoch": 0.018445543724322964, "grad_norm": 0.392621826394365, "learning_rate": 2e-05, "loss": 5.5236, "step": 550 }, { "epoch": 0.018479081076549008, "grad_norm": 0.41901707039825625, "learning_rate": 2e-05, "loss": 5.4977, "step": 551 }, { "epoch": 0.018512618428775048, "grad_norm": 0.43317526330524464, "learning_rate": 2e-05, "loss": 5.5534, "step": 552 }, { "epoch": 0.01854615578100109, "grad_norm": 0.4561667249068886, "learning_rate": 2e-05, "loss": 5.352, "step": 553 }, { "epoch": 0.018579693133227132, "grad_norm": 0.40743228318012, "learning_rate": 2e-05, "loss": 5.4921, "step": 554 }, { "epoch": 0.018613230485453172, "grad_norm": 0.4374236864687467, "learning_rate": 2e-05, "loss": 5.6976, "step": 555 }, { "epoch": 0.018646767837679216, "grad_norm": 0.4296922502253949, "learning_rate": 2e-05, "loss": 5.615, "step": 556 }, { "epoch": 0.018680305189905256, "grad_norm": 0.4121593179098675, "learning_rate": 2e-05, "loss": 5.4717, "step": 557 }, { "epoch": 0.0187138425421313, "grad_norm": 0.4085497583543573, "learning_rate": 2e-05, "loss": 5.656, "step": 558 }, { "epoch": 0.01874737989435734, "grad_norm": 0.4128569389335424, "learning_rate": 2e-05, "loss": 5.5503, "step": 559 }, { "epoch": 0.018780917246583383, "grad_norm": 0.43118064174590404, "learning_rate": 2e-05, "loss": 5.3022, "step": 560 }, { "epoch": 0.018814454598809423, "grad_norm": 0.40873244626354305, "learning_rate": 2e-05, "loss": 5.6018, "step": 561 }, { "epoch": 0.018847991951035467, "grad_norm": 0.39841799328450134, "learning_rate": 2e-05, "loss": 5.6399, "step": 562 }, { "epoch": 0.018881529303261507, "grad_norm": 0.4161442131542257, "learning_rate": 2e-05, "loss": 5.5208, "step": 563 }, { "epoch": 0.01891506665548755, "grad_norm": 0.404920629126935, "learning_rate": 2e-05, "loss": 5.6669, "step": 564 }, { "epoch": 0.01894860400771359, "grad_norm": 0.41145463757974954, "learning_rate": 2e-05, "loss": 5.4989, "step": 565 }, { "epoch": 0.01898214135993963, "grad_norm": 0.3978493566798284, "learning_rate": 2e-05, "loss": 5.6081, "step": 566 }, { "epoch": 0.019015678712165675, "grad_norm": 0.4130281618849602, "learning_rate": 2e-05, "loss": 5.7223, "step": 567 }, { "epoch": 0.019049216064391715, "grad_norm": 0.42545508435565704, "learning_rate": 2e-05, "loss": 5.7055, "step": 568 }, { "epoch": 0.01908275341661776, "grad_norm": 0.4341267358471366, "learning_rate": 2e-05, "loss": 5.4056, "step": 569 }, { "epoch": 0.0191162907688438, "grad_norm": 0.4074008036171017, "learning_rate": 2e-05, "loss": 5.4263, "step": 570 }, { "epoch": 0.019149828121069842, "grad_norm": 0.40715936644897704, "learning_rate": 2e-05, "loss": 5.6482, "step": 571 }, { "epoch": 0.019183365473295882, "grad_norm": 0.4150959702741359, "learning_rate": 2e-05, "loss": 5.7144, "step": 572 }, { "epoch": 0.019216902825521926, "grad_norm": 0.4215311416448878, "learning_rate": 2e-05, "loss": 5.581, "step": 573 }, { "epoch": 0.019250440177747966, "grad_norm": 0.4219222172893356, "learning_rate": 2e-05, "loss": 5.7456, "step": 574 }, { "epoch": 0.01928397752997401, "grad_norm": 0.4356746541233306, "learning_rate": 2e-05, "loss": 5.5715, "step": 575 }, { "epoch": 0.01931751488220005, "grad_norm": 0.4075805768611969, "learning_rate": 2e-05, "loss": 5.6881, "step": 576 }, { "epoch": 0.019351052234426094, "grad_norm": 0.4179254529414178, "learning_rate": 2e-05, "loss": 5.4303, "step": 577 }, { "epoch": 0.019384589586652134, "grad_norm": 0.48887295881895937, "learning_rate": 2e-05, "loss": 5.527, "step": 578 }, { "epoch": 0.019418126938878174, "grad_norm": 0.4036499058164531, "learning_rate": 2e-05, "loss": 5.4887, "step": 579 }, { "epoch": 0.019451664291104218, "grad_norm": 0.4239872735641646, "learning_rate": 2e-05, "loss": 5.4534, "step": 580 }, { "epoch": 0.019485201643330258, "grad_norm": 0.4476450691368829, "learning_rate": 2e-05, "loss": 5.5854, "step": 581 }, { "epoch": 0.0195187389955563, "grad_norm": 0.42515803132397734, "learning_rate": 2e-05, "loss": 5.3952, "step": 582 }, { "epoch": 0.01955227634778234, "grad_norm": 0.4404581082155295, "learning_rate": 2e-05, "loss": 5.5695, "step": 583 }, { "epoch": 0.019585813700008385, "grad_norm": 0.44521910932938846, "learning_rate": 2e-05, "loss": 5.6469, "step": 584 }, { "epoch": 0.019619351052234425, "grad_norm": 0.40089386778353325, "learning_rate": 2e-05, "loss": 5.6146, "step": 585 }, { "epoch": 0.01965288840446047, "grad_norm": 0.4376625261706152, "learning_rate": 2e-05, "loss": 5.5752, "step": 586 }, { "epoch": 0.01968642575668651, "grad_norm": 0.44266805041421614, "learning_rate": 2e-05, "loss": 5.6104, "step": 587 }, { "epoch": 0.019719963108912553, "grad_norm": 0.41545600782117575, "learning_rate": 2e-05, "loss": 5.3647, "step": 588 }, { "epoch": 0.019753500461138593, "grad_norm": 0.41352772568257806, "learning_rate": 2e-05, "loss": 5.6104, "step": 589 }, { "epoch": 0.019787037813364636, "grad_norm": 0.46683173358879176, "learning_rate": 2e-05, "loss": 5.5931, "step": 590 }, { "epoch": 0.019820575165590677, "grad_norm": 0.39341251927546267, "learning_rate": 2e-05, "loss": 5.848, "step": 591 }, { "epoch": 0.019854112517816717, "grad_norm": 0.398623375723077, "learning_rate": 2e-05, "loss": 5.5288, "step": 592 }, { "epoch": 0.01988764987004276, "grad_norm": 0.3946796675079741, "learning_rate": 2e-05, "loss": 5.7432, "step": 593 }, { "epoch": 0.0199211872222688, "grad_norm": 0.43912195215340966, "learning_rate": 2e-05, "loss": 5.5514, "step": 594 }, { "epoch": 0.019954724574494844, "grad_norm": 0.43654228290893154, "learning_rate": 2e-05, "loss": 5.6045, "step": 595 }, { "epoch": 0.019988261926720884, "grad_norm": 0.41181779119029716, "learning_rate": 2e-05, "loss": 5.5198, "step": 596 }, { "epoch": 0.020021799278946928, "grad_norm": 0.41521253926520396, "learning_rate": 2e-05, "loss": 5.7161, "step": 597 }, { "epoch": 0.020055336631172968, "grad_norm": 0.41947227073099136, "learning_rate": 2e-05, "loss": 5.4261, "step": 598 }, { "epoch": 0.02008887398339901, "grad_norm": 0.4078337038402258, "learning_rate": 2e-05, "loss": 5.5155, "step": 599 }, { "epoch": 0.020122411335625052, "grad_norm": 0.40956927569517754, "learning_rate": 2e-05, "loss": 5.7284, "step": 600 }, { "epoch": 0.020155948687851095, "grad_norm": 0.40414052364635117, "learning_rate": 2e-05, "loss": 5.56, "step": 601 }, { "epoch": 0.020189486040077136, "grad_norm": 0.4293535842706912, "learning_rate": 2e-05, "loss": 5.6222, "step": 602 }, { "epoch": 0.02022302339230318, "grad_norm": 0.41553618491856087, "learning_rate": 2e-05, "loss": 5.6366, "step": 603 }, { "epoch": 0.02025656074452922, "grad_norm": 0.4362792010078424, "learning_rate": 2e-05, "loss": 5.3183, "step": 604 }, { "epoch": 0.02029009809675526, "grad_norm": 0.4192472701623581, "learning_rate": 2e-05, "loss": 5.642, "step": 605 }, { "epoch": 0.020323635448981303, "grad_norm": 0.4263510024014498, "learning_rate": 2e-05, "loss": 5.3733, "step": 606 }, { "epoch": 0.020357172801207343, "grad_norm": 0.4013955388434123, "learning_rate": 2e-05, "loss": 5.4351, "step": 607 }, { "epoch": 0.020390710153433387, "grad_norm": 0.40170894641715316, "learning_rate": 2e-05, "loss": 5.4604, "step": 608 }, { "epoch": 0.020424247505659427, "grad_norm": 0.40772436009819457, "learning_rate": 2e-05, "loss": 5.373, "step": 609 }, { "epoch": 0.02045778485788547, "grad_norm": 0.394563518246566, "learning_rate": 2e-05, "loss": 5.4242, "step": 610 }, { "epoch": 0.02049132221011151, "grad_norm": 0.40427033533152945, "learning_rate": 2e-05, "loss": 5.6747, "step": 611 }, { "epoch": 0.020524859562337554, "grad_norm": 0.40670657344694333, "learning_rate": 2e-05, "loss": 5.4727, "step": 612 }, { "epoch": 0.020558396914563595, "grad_norm": 0.41744883075569655, "learning_rate": 2e-05, "loss": 5.5746, "step": 613 }, { "epoch": 0.020591934266789638, "grad_norm": 0.43464120005309614, "learning_rate": 2e-05, "loss": 5.5574, "step": 614 }, { "epoch": 0.02062547161901568, "grad_norm": 0.42310700128066264, "learning_rate": 2e-05, "loss": 5.4916, "step": 615 }, { "epoch": 0.020659008971241722, "grad_norm": 0.4248728024126493, "learning_rate": 2e-05, "loss": 5.7493, "step": 616 }, { "epoch": 0.020692546323467762, "grad_norm": 0.4459377668350255, "learning_rate": 2e-05, "loss": 5.6277, "step": 617 }, { "epoch": 0.020726083675693802, "grad_norm": 0.3868060690696678, "learning_rate": 2e-05, "loss": 5.8099, "step": 618 }, { "epoch": 0.020759621027919846, "grad_norm": 0.40518679536019553, "learning_rate": 2e-05, "loss": 5.4566, "step": 619 }, { "epoch": 0.020793158380145886, "grad_norm": 0.4209358347834212, "learning_rate": 2e-05, "loss": 5.6507, "step": 620 }, { "epoch": 0.02082669573237193, "grad_norm": 0.40181293186189393, "learning_rate": 2e-05, "loss": 5.6014, "step": 621 }, { "epoch": 0.02086023308459797, "grad_norm": 0.4014750757925521, "learning_rate": 2e-05, "loss": 5.3253, "step": 622 }, { "epoch": 0.020893770436824013, "grad_norm": 0.43796842764362703, "learning_rate": 2e-05, "loss": 5.6241, "step": 623 }, { "epoch": 0.020927307789050054, "grad_norm": 0.40355770237688554, "learning_rate": 2e-05, "loss": 5.3451, "step": 624 }, { "epoch": 0.020960845141276097, "grad_norm": 0.40862736040837183, "learning_rate": 2e-05, "loss": 5.475, "step": 625 }, { "epoch": 0.020994382493502137, "grad_norm": 0.43434805233404217, "learning_rate": 2e-05, "loss": 5.6097, "step": 626 }, { "epoch": 0.02102791984572818, "grad_norm": 0.39197019954475937, "learning_rate": 2e-05, "loss": 5.6035, "step": 627 }, { "epoch": 0.02106145719795422, "grad_norm": 0.4236980184032063, "learning_rate": 2e-05, "loss": 5.6878, "step": 628 }, { "epoch": 0.021094994550180265, "grad_norm": 0.4612922452190859, "learning_rate": 2e-05, "loss": 5.4611, "step": 629 }, { "epoch": 0.021128531902406305, "grad_norm": 0.41182818839244206, "learning_rate": 2e-05, "loss": 5.6414, "step": 630 }, { "epoch": 0.021162069254632345, "grad_norm": 0.40887162315705927, "learning_rate": 2e-05, "loss": 5.6394, "step": 631 }, { "epoch": 0.02119560660685839, "grad_norm": 0.42928139097389273, "learning_rate": 2e-05, "loss": 5.7923, "step": 632 }, { "epoch": 0.02122914395908443, "grad_norm": 0.46154917020362957, "learning_rate": 2e-05, "loss": 5.5314, "step": 633 }, { "epoch": 0.021262681311310472, "grad_norm": 0.4175415850777575, "learning_rate": 2e-05, "loss": 5.4905, "step": 634 }, { "epoch": 0.021296218663536513, "grad_norm": 0.4123085886180074, "learning_rate": 2e-05, "loss": 5.6099, "step": 635 }, { "epoch": 0.021329756015762556, "grad_norm": 0.4421697808523377, "learning_rate": 2e-05, "loss": 5.5959, "step": 636 }, { "epoch": 0.021363293367988596, "grad_norm": 0.41628564539736507, "learning_rate": 2e-05, "loss": 5.6877, "step": 637 }, { "epoch": 0.02139683072021464, "grad_norm": 0.41828670869283163, "learning_rate": 2e-05, "loss": 5.461, "step": 638 }, { "epoch": 0.02143036807244068, "grad_norm": 0.41308449463980834, "learning_rate": 2e-05, "loss": 5.5321, "step": 639 }, { "epoch": 0.021463905424666724, "grad_norm": 0.4127013930733782, "learning_rate": 2e-05, "loss": 5.4526, "step": 640 }, { "epoch": 0.021497442776892764, "grad_norm": 0.4039418120527476, "learning_rate": 2e-05, "loss": 5.3061, "step": 641 }, { "epoch": 0.021530980129118808, "grad_norm": 0.4278792595129204, "learning_rate": 2e-05, "loss": 5.8162, "step": 642 }, { "epoch": 0.021564517481344848, "grad_norm": 0.42629652979863003, "learning_rate": 2e-05, "loss": 5.5371, "step": 643 }, { "epoch": 0.021598054833570888, "grad_norm": 0.43505909137516, "learning_rate": 2e-05, "loss": 5.4665, "step": 644 }, { "epoch": 0.02163159218579693, "grad_norm": 0.4078761984576198, "learning_rate": 2e-05, "loss": 5.6809, "step": 645 }, { "epoch": 0.02166512953802297, "grad_norm": 0.42787800686354827, "learning_rate": 2e-05, "loss": 5.4686, "step": 646 }, { "epoch": 0.021698666890249015, "grad_norm": 0.40787982997533223, "learning_rate": 2e-05, "loss": 5.5984, "step": 647 }, { "epoch": 0.021732204242475055, "grad_norm": 0.42544255681328264, "learning_rate": 2e-05, "loss": 5.5486, "step": 648 }, { "epoch": 0.0217657415947011, "grad_norm": 0.44130773317529454, "learning_rate": 2e-05, "loss": 5.6736, "step": 649 }, { "epoch": 0.02179927894692714, "grad_norm": 0.4150100947754703, "learning_rate": 2e-05, "loss": 5.5006, "step": 650 }, { "epoch": 0.021832816299153183, "grad_norm": 0.41988701791241073, "learning_rate": 2e-05, "loss": 5.6834, "step": 651 }, { "epoch": 0.021866353651379223, "grad_norm": 0.44982606529212793, "learning_rate": 2e-05, "loss": 5.576, "step": 652 }, { "epoch": 0.021899891003605267, "grad_norm": 0.39528309203347384, "learning_rate": 2e-05, "loss": 5.3805, "step": 653 }, { "epoch": 0.021933428355831307, "grad_norm": 0.4618693289009438, "learning_rate": 2e-05, "loss": 5.714, "step": 654 }, { "epoch": 0.02196696570805735, "grad_norm": 0.42766608246784926, "learning_rate": 2e-05, "loss": 5.5043, "step": 655 }, { "epoch": 0.02200050306028339, "grad_norm": 0.4116098888295802, "learning_rate": 2e-05, "loss": 5.6286, "step": 656 }, { "epoch": 0.02203404041250943, "grad_norm": 0.40843372648085374, "learning_rate": 2e-05, "loss": 5.64, "step": 657 }, { "epoch": 0.022067577764735474, "grad_norm": 0.41155330934479656, "learning_rate": 2e-05, "loss": 5.4956, "step": 658 }, { "epoch": 0.022101115116961514, "grad_norm": 0.40606920835958327, "learning_rate": 2e-05, "loss": 5.5945, "step": 659 }, { "epoch": 0.022134652469187558, "grad_norm": 0.4425131117355856, "learning_rate": 2e-05, "loss": 5.7508, "step": 660 }, { "epoch": 0.022168189821413598, "grad_norm": 0.4052037705801766, "learning_rate": 2e-05, "loss": 5.5315, "step": 661 }, { "epoch": 0.022201727173639642, "grad_norm": 0.43729101695907385, "learning_rate": 2e-05, "loss": 5.5755, "step": 662 }, { "epoch": 0.022235264525865682, "grad_norm": 0.3902986671986653, "learning_rate": 2e-05, "loss": 5.5082, "step": 663 }, { "epoch": 0.022268801878091726, "grad_norm": 0.407165251745804, "learning_rate": 2e-05, "loss": 5.5667, "step": 664 }, { "epoch": 0.022302339230317766, "grad_norm": 0.42325532203721006, "learning_rate": 2e-05, "loss": 5.4835, "step": 665 }, { "epoch": 0.02233587658254381, "grad_norm": 0.43652089318382314, "learning_rate": 2e-05, "loss": 5.5758, "step": 666 }, { "epoch": 0.02236941393476985, "grad_norm": 0.42407366132100904, "learning_rate": 2e-05, "loss": 5.7718, "step": 667 }, { "epoch": 0.022402951286995893, "grad_norm": 0.4250446029651102, "learning_rate": 2e-05, "loss": 5.7146, "step": 668 }, { "epoch": 0.022436488639221933, "grad_norm": 0.4096880813539283, "learning_rate": 2e-05, "loss": 5.4574, "step": 669 }, { "epoch": 0.022470025991447977, "grad_norm": 0.4356248869351444, "learning_rate": 2e-05, "loss": 5.5813, "step": 670 }, { "epoch": 0.022503563343674017, "grad_norm": 0.42264390495741855, "learning_rate": 2e-05, "loss": 5.4829, "step": 671 }, { "epoch": 0.022537100695900057, "grad_norm": 0.43493386145763874, "learning_rate": 2e-05, "loss": 5.5632, "step": 672 }, { "epoch": 0.0225706380481261, "grad_norm": 0.4159241077105589, "learning_rate": 2e-05, "loss": 5.5652, "step": 673 }, { "epoch": 0.02260417540035214, "grad_norm": 0.43287102708981134, "learning_rate": 2e-05, "loss": 5.3484, "step": 674 }, { "epoch": 0.022637712752578185, "grad_norm": 0.42953164453452575, "learning_rate": 2e-05, "loss": 5.5636, "step": 675 }, { "epoch": 0.022671250104804225, "grad_norm": 0.41667182472154557, "learning_rate": 2e-05, "loss": 5.5014, "step": 676 }, { "epoch": 0.02270478745703027, "grad_norm": 0.4548973740564331, "learning_rate": 2e-05, "loss": 5.47, "step": 677 }, { "epoch": 0.02273832480925631, "grad_norm": 0.4081531057234237, "learning_rate": 2e-05, "loss": 5.6862, "step": 678 }, { "epoch": 0.022771862161482352, "grad_norm": 0.40662995367254007, "learning_rate": 2e-05, "loss": 5.3758, "step": 679 }, { "epoch": 0.022805399513708392, "grad_norm": 0.417432130844658, "learning_rate": 2e-05, "loss": 5.6084, "step": 680 }, { "epoch": 0.022838936865934436, "grad_norm": 0.43875654063317865, "learning_rate": 2e-05, "loss": 5.5388, "step": 681 }, { "epoch": 0.022872474218160476, "grad_norm": 0.4086082273858201, "learning_rate": 2e-05, "loss": 5.6188, "step": 682 }, { "epoch": 0.02290601157038652, "grad_norm": 0.4503370544586092, "learning_rate": 2e-05, "loss": 5.4654, "step": 683 }, { "epoch": 0.02293954892261256, "grad_norm": 0.44964618157999464, "learning_rate": 2e-05, "loss": 5.543, "step": 684 }, { "epoch": 0.0229730862748386, "grad_norm": 0.4019957663300842, "learning_rate": 2e-05, "loss": 5.4878, "step": 685 }, { "epoch": 0.023006623627064644, "grad_norm": 0.39831364887660453, "learning_rate": 2e-05, "loss": 5.559, "step": 686 }, { "epoch": 0.023040160979290684, "grad_norm": 0.4335320064378855, "learning_rate": 2e-05, "loss": 5.7406, "step": 687 }, { "epoch": 0.023073698331516727, "grad_norm": 0.4950771308603925, "learning_rate": 2e-05, "loss": 5.3764, "step": 688 }, { "epoch": 0.023107235683742768, "grad_norm": 0.424919869099644, "learning_rate": 2e-05, "loss": 5.6262, "step": 689 }, { "epoch": 0.02314077303596881, "grad_norm": 0.40346893031314546, "learning_rate": 2e-05, "loss": 5.5865, "step": 690 }, { "epoch": 0.02317431038819485, "grad_norm": 0.43040151264456605, "learning_rate": 2e-05, "loss": 5.5796, "step": 691 }, { "epoch": 0.023207847740420895, "grad_norm": 0.4305846709058541, "learning_rate": 2e-05, "loss": 5.5232, "step": 692 }, { "epoch": 0.023241385092646935, "grad_norm": 0.4682647645138506, "learning_rate": 2e-05, "loss": 5.5512, "step": 693 }, { "epoch": 0.02327492244487298, "grad_norm": 0.4232430010805816, "learning_rate": 2e-05, "loss": 5.4754, "step": 694 }, { "epoch": 0.02330845979709902, "grad_norm": 0.4393018538569311, "learning_rate": 2e-05, "loss": 5.5754, "step": 695 }, { "epoch": 0.023341997149325062, "grad_norm": 0.4147195305086349, "learning_rate": 2e-05, "loss": 5.4752, "step": 696 }, { "epoch": 0.023375534501551103, "grad_norm": 0.42530514102711425, "learning_rate": 2e-05, "loss": 5.4867, "step": 697 }, { "epoch": 0.023409071853777143, "grad_norm": 0.41042357370684557, "learning_rate": 2e-05, "loss": 5.5888, "step": 698 }, { "epoch": 0.023442609206003186, "grad_norm": 0.40848148948188145, "learning_rate": 2e-05, "loss": 5.541, "step": 699 }, { "epoch": 0.023476146558229227, "grad_norm": 0.4492940335393255, "learning_rate": 2e-05, "loss": 5.6147, "step": 700 }, { "epoch": 0.02350968391045527, "grad_norm": 0.445190995992448, "learning_rate": 2e-05, "loss": 5.4171, "step": 701 }, { "epoch": 0.02354322126268131, "grad_norm": 0.48029742103712253, "learning_rate": 2e-05, "loss": 5.5057, "step": 702 }, { "epoch": 0.023576758614907354, "grad_norm": 0.449339560857633, "learning_rate": 2e-05, "loss": 5.5046, "step": 703 }, { "epoch": 0.023610295967133394, "grad_norm": 0.43565493919818077, "learning_rate": 2e-05, "loss": 5.2955, "step": 704 }, { "epoch": 0.023643833319359438, "grad_norm": 0.4672352019199247, "learning_rate": 2e-05, "loss": 5.6958, "step": 705 }, { "epoch": 0.023677370671585478, "grad_norm": 0.4443371954076224, "learning_rate": 2e-05, "loss": 5.6935, "step": 706 }, { "epoch": 0.02371090802381152, "grad_norm": 0.40768043464709214, "learning_rate": 2e-05, "loss": 5.2905, "step": 707 }, { "epoch": 0.02374444537603756, "grad_norm": 0.4403351978713901, "learning_rate": 2e-05, "loss": 5.7546, "step": 708 }, { "epoch": 0.023777982728263605, "grad_norm": 0.4260932579745095, "learning_rate": 2e-05, "loss": 5.3963, "step": 709 }, { "epoch": 0.023811520080489645, "grad_norm": 0.4020183396066679, "learning_rate": 2e-05, "loss": 5.5601, "step": 710 }, { "epoch": 0.023845057432715686, "grad_norm": 0.4538015480162957, "learning_rate": 2e-05, "loss": 5.3555, "step": 711 }, { "epoch": 0.02387859478494173, "grad_norm": 0.413051771872417, "learning_rate": 2e-05, "loss": 5.5965, "step": 712 }, { "epoch": 0.02391213213716777, "grad_norm": 0.42367453238667113, "learning_rate": 2e-05, "loss": 5.7199, "step": 713 }, { "epoch": 0.023945669489393813, "grad_norm": 0.3936722225345979, "learning_rate": 2e-05, "loss": 5.3955, "step": 714 }, { "epoch": 0.023979206841619853, "grad_norm": 0.4035742394141793, "learning_rate": 2e-05, "loss": 5.3738, "step": 715 }, { "epoch": 0.024012744193845897, "grad_norm": 0.4241214419914999, "learning_rate": 2e-05, "loss": 5.6625, "step": 716 }, { "epoch": 0.024046281546071937, "grad_norm": 0.3963261727007952, "learning_rate": 2e-05, "loss": 5.4491, "step": 717 }, { "epoch": 0.02407981889829798, "grad_norm": 0.41805622233007594, "learning_rate": 2e-05, "loss": 5.6262, "step": 718 }, { "epoch": 0.02411335625052402, "grad_norm": 0.41693919811479935, "learning_rate": 2e-05, "loss": 5.5962, "step": 719 }, { "epoch": 0.024146893602750064, "grad_norm": 0.399749263154528, "learning_rate": 2e-05, "loss": 5.52, "step": 720 }, { "epoch": 0.024180430954976104, "grad_norm": 0.43802079814727635, "learning_rate": 2e-05, "loss": 5.438, "step": 721 }, { "epoch": 0.024213968307202148, "grad_norm": 0.4254392288104108, "learning_rate": 2e-05, "loss": 5.4817, "step": 722 }, { "epoch": 0.024247505659428188, "grad_norm": 0.39640585524657695, "learning_rate": 2e-05, "loss": 5.6076, "step": 723 }, { "epoch": 0.02428104301165423, "grad_norm": 0.40546582232961653, "learning_rate": 2e-05, "loss": 5.6422, "step": 724 }, { "epoch": 0.024314580363880272, "grad_norm": 0.4185048497045189, "learning_rate": 2e-05, "loss": 5.6698, "step": 725 }, { "epoch": 0.024348117716106312, "grad_norm": 0.4264302061678442, "learning_rate": 2e-05, "loss": 5.6951, "step": 726 }, { "epoch": 0.024381655068332356, "grad_norm": 0.39784450353977463, "learning_rate": 2e-05, "loss": 5.5187, "step": 727 }, { "epoch": 0.024415192420558396, "grad_norm": 0.432894642157944, "learning_rate": 2e-05, "loss": 5.5928, "step": 728 }, { "epoch": 0.02444872977278444, "grad_norm": 0.42211244747208654, "learning_rate": 2e-05, "loss": 5.6165, "step": 729 }, { "epoch": 0.02448226712501048, "grad_norm": 0.4403065747959638, "learning_rate": 2e-05, "loss": 5.6708, "step": 730 }, { "epoch": 0.024515804477236523, "grad_norm": 0.41690437533918956, "learning_rate": 2e-05, "loss": 5.5019, "step": 731 }, { "epoch": 0.024549341829462563, "grad_norm": 0.4611721737812778, "learning_rate": 2e-05, "loss": 5.6649, "step": 732 }, { "epoch": 0.024582879181688607, "grad_norm": 0.4422719760949317, "learning_rate": 2e-05, "loss": 5.5447, "step": 733 }, { "epoch": 0.024616416533914647, "grad_norm": 0.42707846608794153, "learning_rate": 2e-05, "loss": 5.6332, "step": 734 }, { "epoch": 0.02464995388614069, "grad_norm": 0.4299981906969668, "learning_rate": 2e-05, "loss": 5.702, "step": 735 }, { "epoch": 0.02468349123836673, "grad_norm": 0.44899626938985326, "learning_rate": 2e-05, "loss": 5.5403, "step": 736 }, { "epoch": 0.02471702859059277, "grad_norm": 0.4823168678269713, "learning_rate": 2e-05, "loss": 5.6152, "step": 737 }, { "epoch": 0.024750565942818815, "grad_norm": 0.4213316902261505, "learning_rate": 2e-05, "loss": 5.56, "step": 738 }, { "epoch": 0.024784103295044855, "grad_norm": 0.46712586746602236, "learning_rate": 2e-05, "loss": 5.5953, "step": 739 }, { "epoch": 0.0248176406472709, "grad_norm": 0.4553524886062282, "learning_rate": 2e-05, "loss": 5.516, "step": 740 }, { "epoch": 0.02485117799949694, "grad_norm": 0.43317762006582794, "learning_rate": 2e-05, "loss": 5.5519, "step": 741 }, { "epoch": 0.024884715351722982, "grad_norm": 0.42358458229371165, "learning_rate": 2e-05, "loss": 5.7729, "step": 742 }, { "epoch": 0.024918252703949022, "grad_norm": 0.40784768971911634, "learning_rate": 2e-05, "loss": 5.6769, "step": 743 }, { "epoch": 0.024951790056175066, "grad_norm": 0.4481993367975484, "learning_rate": 2e-05, "loss": 5.4934, "step": 744 }, { "epoch": 0.024985327408401106, "grad_norm": 0.4388456993300487, "learning_rate": 2e-05, "loss": 5.64, "step": 745 }, { "epoch": 0.02501886476062715, "grad_norm": 0.4855036707892644, "learning_rate": 2e-05, "loss": 5.6637, "step": 746 }, { "epoch": 0.02505240211285319, "grad_norm": 0.455418958890624, "learning_rate": 2e-05, "loss": 5.5234, "step": 747 }, { "epoch": 0.025085939465079234, "grad_norm": 0.4263136177867645, "learning_rate": 2e-05, "loss": 5.3995, "step": 748 }, { "epoch": 0.025119476817305274, "grad_norm": 0.40037917698870534, "learning_rate": 2e-05, "loss": 5.6265, "step": 749 }, { "epoch": 0.025153014169531314, "grad_norm": 0.4203391208960984, "learning_rate": 2e-05, "loss": 5.4732, "step": 750 }, { "epoch": 0.025186551521757358, "grad_norm": 0.44533438675154774, "learning_rate": 2e-05, "loss": 5.4854, "step": 751 }, { "epoch": 0.025220088873983398, "grad_norm": 0.42688733821547575, "learning_rate": 2e-05, "loss": 5.5764, "step": 752 }, { "epoch": 0.02525362622620944, "grad_norm": 0.4211691197822205, "learning_rate": 2e-05, "loss": 5.5197, "step": 753 }, { "epoch": 0.02528716357843548, "grad_norm": 0.45361029046843543, "learning_rate": 2e-05, "loss": 5.5502, "step": 754 }, { "epoch": 0.025320700930661525, "grad_norm": 0.3978420785408352, "learning_rate": 2e-05, "loss": 5.3815, "step": 755 }, { "epoch": 0.025354238282887565, "grad_norm": 0.40747313658335427, "learning_rate": 2e-05, "loss": 5.6309, "step": 756 }, { "epoch": 0.02538777563511361, "grad_norm": 0.3991760061675898, "learning_rate": 2e-05, "loss": 5.5616, "step": 757 }, { "epoch": 0.02542131298733965, "grad_norm": 0.3981240526466294, "learning_rate": 2e-05, "loss": 5.6148, "step": 758 }, { "epoch": 0.025454850339565693, "grad_norm": 0.40178432306962975, "learning_rate": 2e-05, "loss": 5.4586, "step": 759 }, { "epoch": 0.025488387691791733, "grad_norm": 0.414273367306798, "learning_rate": 2e-05, "loss": 5.5946, "step": 760 }, { "epoch": 0.025521925044017776, "grad_norm": 0.44147594733920126, "learning_rate": 2e-05, "loss": 5.6887, "step": 761 }, { "epoch": 0.025555462396243817, "grad_norm": 0.4244581922176654, "learning_rate": 2e-05, "loss": 5.5441, "step": 762 }, { "epoch": 0.025588999748469857, "grad_norm": 0.4079252140606453, "learning_rate": 2e-05, "loss": 5.5595, "step": 763 }, { "epoch": 0.0256225371006959, "grad_norm": 0.42111716443726344, "learning_rate": 2e-05, "loss": 5.4427, "step": 764 }, { "epoch": 0.02565607445292194, "grad_norm": 0.41562104215775125, "learning_rate": 2e-05, "loss": 5.5689, "step": 765 }, { "epoch": 0.025689611805147984, "grad_norm": 0.4054470984783288, "learning_rate": 2e-05, "loss": 5.4912, "step": 766 }, { "epoch": 0.025723149157374024, "grad_norm": 0.3913183320913171, "learning_rate": 2e-05, "loss": 5.734, "step": 767 }, { "epoch": 0.025756686509600068, "grad_norm": 0.44138614355861927, "learning_rate": 2e-05, "loss": 5.3941, "step": 768 }, { "epoch": 0.025790223861826108, "grad_norm": 0.40391563934927516, "learning_rate": 2e-05, "loss": 5.7034, "step": 769 }, { "epoch": 0.02582376121405215, "grad_norm": 0.39974162167472127, "learning_rate": 2e-05, "loss": 5.4056, "step": 770 }, { "epoch": 0.025857298566278192, "grad_norm": 0.411231148805563, "learning_rate": 2e-05, "loss": 5.4886, "step": 771 }, { "epoch": 0.025890835918504235, "grad_norm": 0.3964017311330782, "learning_rate": 2e-05, "loss": 5.5259, "step": 772 }, { "epoch": 0.025924373270730276, "grad_norm": 0.43479190486540564, "learning_rate": 2e-05, "loss": 5.7888, "step": 773 }, { "epoch": 0.02595791062295632, "grad_norm": 0.41218058046497913, "learning_rate": 2e-05, "loss": 5.6749, "step": 774 }, { "epoch": 0.02599144797518236, "grad_norm": 0.41652131923614377, "learning_rate": 2e-05, "loss": 5.696, "step": 775 }, { "epoch": 0.0260249853274084, "grad_norm": 0.40802650858235767, "learning_rate": 2e-05, "loss": 5.5129, "step": 776 }, { "epoch": 0.026058522679634443, "grad_norm": 0.40430087318675517, "learning_rate": 2e-05, "loss": 5.469, "step": 777 }, { "epoch": 0.026092060031860483, "grad_norm": 0.4052947064222916, "learning_rate": 2e-05, "loss": 5.5821, "step": 778 }, { "epoch": 0.026125597384086527, "grad_norm": 0.4018458300948854, "learning_rate": 2e-05, "loss": 5.6027, "step": 779 }, { "epoch": 0.026159134736312567, "grad_norm": 0.43122575281938064, "learning_rate": 2e-05, "loss": 5.5319, "step": 780 }, { "epoch": 0.02619267208853861, "grad_norm": 0.40771701050026166, "learning_rate": 2e-05, "loss": 5.5213, "step": 781 }, { "epoch": 0.02622620944076465, "grad_norm": 0.41847886273204155, "learning_rate": 2e-05, "loss": 5.3938, "step": 782 }, { "epoch": 0.026259746792990694, "grad_norm": 0.43873315142978536, "learning_rate": 2e-05, "loss": 5.512, "step": 783 }, { "epoch": 0.026293284145216735, "grad_norm": 0.4434472241183101, "learning_rate": 2e-05, "loss": 5.6319, "step": 784 }, { "epoch": 0.026326821497442778, "grad_norm": 0.4616951341796811, "learning_rate": 2e-05, "loss": 5.4925, "step": 785 }, { "epoch": 0.02636035884966882, "grad_norm": 0.41366956617214806, "learning_rate": 2e-05, "loss": 5.3532, "step": 786 }, { "epoch": 0.026393896201894862, "grad_norm": 0.4784837741837409, "learning_rate": 2e-05, "loss": 5.6823, "step": 787 }, { "epoch": 0.026427433554120902, "grad_norm": 0.4091541251894986, "learning_rate": 2e-05, "loss": 5.6608, "step": 788 }, { "epoch": 0.026460970906346942, "grad_norm": 0.44951706524641666, "learning_rate": 2e-05, "loss": 5.7029, "step": 789 }, { "epoch": 0.026494508258572986, "grad_norm": 0.4396039727736856, "learning_rate": 2e-05, "loss": 5.5252, "step": 790 }, { "epoch": 0.026528045610799026, "grad_norm": 0.4386740716176094, "learning_rate": 2e-05, "loss": 5.4894, "step": 791 }, { "epoch": 0.02656158296302507, "grad_norm": 0.4202084918305655, "learning_rate": 2e-05, "loss": 5.4746, "step": 792 }, { "epoch": 0.02659512031525111, "grad_norm": 0.4066123999264822, "learning_rate": 2e-05, "loss": 5.5698, "step": 793 }, { "epoch": 0.026628657667477153, "grad_norm": 0.41819450785775814, "learning_rate": 2e-05, "loss": 5.5917, "step": 794 }, { "epoch": 0.026662195019703194, "grad_norm": 0.40848132664802356, "learning_rate": 2e-05, "loss": 5.6682, "step": 795 }, { "epoch": 0.026695732371929237, "grad_norm": 0.45690139782824263, "learning_rate": 2e-05, "loss": 5.6511, "step": 796 }, { "epoch": 0.026729269724155277, "grad_norm": 0.43758974194776623, "learning_rate": 2e-05, "loss": 5.4914, "step": 797 }, { "epoch": 0.02676280707638132, "grad_norm": 0.43030896180050865, "learning_rate": 2e-05, "loss": 5.5653, "step": 798 }, { "epoch": 0.02679634442860736, "grad_norm": 0.42950066662764846, "learning_rate": 2e-05, "loss": 5.6365, "step": 799 }, { "epoch": 0.026829881780833405, "grad_norm": 0.4318852350703131, "learning_rate": 2e-05, "loss": 5.5453, "step": 800 }, { "epoch": 0.026863419133059445, "grad_norm": 0.432466184841351, "learning_rate": 2e-05, "loss": 5.4459, "step": 801 }, { "epoch": 0.026896956485285485, "grad_norm": 0.39758412460796344, "learning_rate": 2e-05, "loss": 5.9302, "step": 802 }, { "epoch": 0.02693049383751153, "grad_norm": 0.4247625772890498, "learning_rate": 2e-05, "loss": 5.6255, "step": 803 }, { "epoch": 0.02696403118973757, "grad_norm": 0.4333126908980824, "learning_rate": 2e-05, "loss": 5.7368, "step": 804 }, { "epoch": 0.026997568541963612, "grad_norm": 0.40241802819870937, "learning_rate": 2e-05, "loss": 5.5902, "step": 805 }, { "epoch": 0.027031105894189653, "grad_norm": 0.4481886278807051, "learning_rate": 2e-05, "loss": 5.7614, "step": 806 }, { "epoch": 0.027064643246415696, "grad_norm": 0.43526514492264323, "learning_rate": 2e-05, "loss": 5.1483, "step": 807 }, { "epoch": 0.027098180598641736, "grad_norm": 0.404403857744165, "learning_rate": 2e-05, "loss": 5.4409, "step": 808 }, { "epoch": 0.02713171795086778, "grad_norm": 0.3981984888186688, "learning_rate": 2e-05, "loss": 5.5764, "step": 809 }, { "epoch": 0.02716525530309382, "grad_norm": 0.45207643640945466, "learning_rate": 2e-05, "loss": 5.6015, "step": 810 }, { "epoch": 0.027198792655319864, "grad_norm": 0.4253905508580101, "learning_rate": 2e-05, "loss": 5.5532, "step": 811 }, { "epoch": 0.027232330007545904, "grad_norm": 0.42886445553442987, "learning_rate": 2e-05, "loss": 5.6767, "step": 812 }, { "epoch": 0.027265867359771948, "grad_norm": 0.41853637855601145, "learning_rate": 2e-05, "loss": 5.8181, "step": 813 }, { "epoch": 0.027299404711997988, "grad_norm": 0.42713393847054043, "learning_rate": 2e-05, "loss": 5.5184, "step": 814 }, { "epoch": 0.027332942064224028, "grad_norm": 0.42949532447081795, "learning_rate": 2e-05, "loss": 5.6227, "step": 815 }, { "epoch": 0.02736647941645007, "grad_norm": 0.41124750651503506, "learning_rate": 2e-05, "loss": 5.9823, "step": 816 }, { "epoch": 0.02740001676867611, "grad_norm": 0.425435606166942, "learning_rate": 2e-05, "loss": 5.594, "step": 817 }, { "epoch": 0.027433554120902155, "grad_norm": 0.48263801192502365, "learning_rate": 2e-05, "loss": 5.4435, "step": 818 }, { "epoch": 0.027467091473128195, "grad_norm": 0.4255040918832769, "learning_rate": 2e-05, "loss": 5.4174, "step": 819 }, { "epoch": 0.02750062882535424, "grad_norm": 0.4326861804700855, "learning_rate": 2e-05, "loss": 5.5896, "step": 820 }, { "epoch": 0.02753416617758028, "grad_norm": 0.422202865296924, "learning_rate": 2e-05, "loss": 5.2406, "step": 821 }, { "epoch": 0.027567703529806323, "grad_norm": 0.4133999459260381, "learning_rate": 2e-05, "loss": 5.4584, "step": 822 }, { "epoch": 0.027601240882032363, "grad_norm": 0.4397149315951972, "learning_rate": 2e-05, "loss": 5.5354, "step": 823 }, { "epoch": 0.027634778234258407, "grad_norm": 0.4513797237411649, "learning_rate": 2e-05, "loss": 5.5877, "step": 824 }, { "epoch": 0.027668315586484447, "grad_norm": 0.4573516656701256, "learning_rate": 2e-05, "loss": 5.5069, "step": 825 }, { "epoch": 0.02770185293871049, "grad_norm": 0.42189070346594126, "learning_rate": 2e-05, "loss": 5.508, "step": 826 }, { "epoch": 0.02773539029093653, "grad_norm": 0.45790415925937, "learning_rate": 2e-05, "loss": 5.4629, "step": 827 }, { "epoch": 0.02776892764316257, "grad_norm": 0.4242147816711353, "learning_rate": 2e-05, "loss": 5.731, "step": 828 }, { "epoch": 0.027802464995388614, "grad_norm": 0.432033046936258, "learning_rate": 2e-05, "loss": 5.7076, "step": 829 }, { "epoch": 0.027836002347614654, "grad_norm": 0.5010638251124502, "learning_rate": 2e-05, "loss": 5.5287, "step": 830 }, { "epoch": 0.027869539699840698, "grad_norm": 0.41850711134387114, "learning_rate": 2e-05, "loss": 5.5058, "step": 831 }, { "epoch": 0.027903077052066738, "grad_norm": 0.436535209316193, "learning_rate": 2e-05, "loss": 5.3496, "step": 832 }, { "epoch": 0.027936614404292782, "grad_norm": 0.42689636407206377, "learning_rate": 2e-05, "loss": 5.6642, "step": 833 }, { "epoch": 0.027970151756518822, "grad_norm": 0.41724381730141763, "learning_rate": 2e-05, "loss": 5.451, "step": 834 }, { "epoch": 0.028003689108744866, "grad_norm": 0.4760376351305013, "learning_rate": 2e-05, "loss": 5.6762, "step": 835 }, { "epoch": 0.028037226460970906, "grad_norm": 0.5095833848894636, "learning_rate": 2e-05, "loss": 5.2633, "step": 836 }, { "epoch": 0.02807076381319695, "grad_norm": 0.41544176024468765, "learning_rate": 2e-05, "loss": 5.561, "step": 837 }, { "epoch": 0.02810430116542299, "grad_norm": 0.48142780560872817, "learning_rate": 2e-05, "loss": 5.5825, "step": 838 }, { "epoch": 0.028137838517649033, "grad_norm": 0.4500068094049138, "learning_rate": 2e-05, "loss": 5.4592, "step": 839 }, { "epoch": 0.028171375869875073, "grad_norm": 0.4334166710154749, "learning_rate": 2e-05, "loss": 5.5871, "step": 840 }, { "epoch": 0.028204913222101113, "grad_norm": 0.44477336170102727, "learning_rate": 2e-05, "loss": 5.4692, "step": 841 }, { "epoch": 0.028238450574327157, "grad_norm": 0.4354909064953738, "learning_rate": 2e-05, "loss": 5.773, "step": 842 }, { "epoch": 0.028271987926553197, "grad_norm": 0.4548969870543898, "learning_rate": 2e-05, "loss": 5.5227, "step": 843 }, { "epoch": 0.02830552527877924, "grad_norm": 0.4296877107851359, "learning_rate": 2e-05, "loss": 5.5371, "step": 844 }, { "epoch": 0.02833906263100528, "grad_norm": 0.45754276200821253, "learning_rate": 2e-05, "loss": 5.7995, "step": 845 }, { "epoch": 0.028372599983231325, "grad_norm": 0.4261340648516032, "learning_rate": 2e-05, "loss": 5.6374, "step": 846 }, { "epoch": 0.028406137335457365, "grad_norm": 0.4182032291612399, "learning_rate": 2e-05, "loss": 5.5783, "step": 847 }, { "epoch": 0.02843967468768341, "grad_norm": 0.45130222537284986, "learning_rate": 2e-05, "loss": 5.71, "step": 848 }, { "epoch": 0.02847321203990945, "grad_norm": 0.4221978550571099, "learning_rate": 2e-05, "loss": 5.5403, "step": 849 }, { "epoch": 0.028506749392135492, "grad_norm": 0.4132680452956112, "learning_rate": 2e-05, "loss": 5.8722, "step": 850 }, { "epoch": 0.028540286744361532, "grad_norm": 0.4515661583127905, "learning_rate": 2e-05, "loss": 5.4862, "step": 851 }, { "epoch": 0.028573824096587576, "grad_norm": 0.42795878341182797, "learning_rate": 2e-05, "loss": 5.3962, "step": 852 }, { "epoch": 0.028607361448813616, "grad_norm": 0.4200547819541424, "learning_rate": 2e-05, "loss": 5.3737, "step": 853 }, { "epoch": 0.028640898801039656, "grad_norm": 0.436168150316166, "learning_rate": 2e-05, "loss": 5.7444, "step": 854 }, { "epoch": 0.0286744361532657, "grad_norm": 0.4199491779641979, "learning_rate": 2e-05, "loss": 5.3662, "step": 855 }, { "epoch": 0.02870797350549174, "grad_norm": 0.399756234197091, "learning_rate": 2e-05, "loss": 5.6785, "step": 856 }, { "epoch": 0.028741510857717784, "grad_norm": 0.45552986543132257, "learning_rate": 2e-05, "loss": 5.4184, "step": 857 }, { "epoch": 0.028775048209943824, "grad_norm": 0.42283879417942266, "learning_rate": 2e-05, "loss": 5.4359, "step": 858 }, { "epoch": 0.028808585562169867, "grad_norm": 0.4254962374669987, "learning_rate": 2e-05, "loss": 5.697, "step": 859 }, { "epoch": 0.028842122914395908, "grad_norm": 0.46142181553188827, "learning_rate": 2e-05, "loss": 5.5704, "step": 860 }, { "epoch": 0.02887566026662195, "grad_norm": 0.4157382967217542, "learning_rate": 2e-05, "loss": 5.3791, "step": 861 }, { "epoch": 0.02890919761884799, "grad_norm": 0.3971900562094068, "learning_rate": 2e-05, "loss": 5.7292, "step": 862 }, { "epoch": 0.028942734971074035, "grad_norm": 0.44822710976592184, "learning_rate": 2e-05, "loss": 5.6044, "step": 863 }, { "epoch": 0.028976272323300075, "grad_norm": 0.42289842262976296, "learning_rate": 2e-05, "loss": 5.5765, "step": 864 }, { "epoch": 0.02900980967552612, "grad_norm": 0.4042553402896645, "learning_rate": 2e-05, "loss": 5.6171, "step": 865 }, { "epoch": 0.02904334702775216, "grad_norm": 0.41694447057692274, "learning_rate": 2e-05, "loss": 5.4772, "step": 866 }, { "epoch": 0.029076884379978202, "grad_norm": 0.4448727065949896, "learning_rate": 2e-05, "loss": 5.6636, "step": 867 }, { "epoch": 0.029110421732204243, "grad_norm": 0.42340119197532494, "learning_rate": 2e-05, "loss": 5.4042, "step": 868 }, { "epoch": 0.029143959084430283, "grad_norm": 0.40266252281864334, "learning_rate": 2e-05, "loss": 5.5655, "step": 869 }, { "epoch": 0.029177496436656326, "grad_norm": 0.3973822857946696, "learning_rate": 2e-05, "loss": 5.4404, "step": 870 }, { "epoch": 0.029211033788882367, "grad_norm": 0.40119457749073384, "learning_rate": 2e-05, "loss": 5.7423, "step": 871 }, { "epoch": 0.02924457114110841, "grad_norm": 0.40025966022606196, "learning_rate": 2e-05, "loss": 5.5191, "step": 872 }, { "epoch": 0.02927810849333445, "grad_norm": 0.4059350719212385, "learning_rate": 2e-05, "loss": 5.4899, "step": 873 }, { "epoch": 0.029311645845560494, "grad_norm": 0.44273613021245345, "learning_rate": 2e-05, "loss": 5.4721, "step": 874 }, { "epoch": 0.029345183197786534, "grad_norm": 0.3997713942777997, "learning_rate": 2e-05, "loss": 5.4227, "step": 875 }, { "epoch": 0.029378720550012578, "grad_norm": 0.39948281362299976, "learning_rate": 2e-05, "loss": 5.692, "step": 876 }, { "epoch": 0.029412257902238618, "grad_norm": 0.46177849996113113, "learning_rate": 2e-05, "loss": 5.592, "step": 877 }, { "epoch": 0.02944579525446466, "grad_norm": 0.4238579809182363, "learning_rate": 2e-05, "loss": 5.479, "step": 878 }, { "epoch": 0.0294793326066907, "grad_norm": 0.42182222198083313, "learning_rate": 2e-05, "loss": 5.4682, "step": 879 }, { "epoch": 0.029512869958916745, "grad_norm": 0.39949824760602953, "learning_rate": 2e-05, "loss": 5.549, "step": 880 }, { "epoch": 0.029546407311142785, "grad_norm": 0.4194615055913997, "learning_rate": 2e-05, "loss": 5.7177, "step": 881 }, { "epoch": 0.029579944663368826, "grad_norm": 0.43079506320291777, "learning_rate": 2e-05, "loss": 5.5433, "step": 882 }, { "epoch": 0.02961348201559487, "grad_norm": 0.4371218744382129, "learning_rate": 2e-05, "loss": 5.7208, "step": 883 }, { "epoch": 0.02964701936782091, "grad_norm": 0.4584937877857817, "learning_rate": 2e-05, "loss": 5.6517, "step": 884 }, { "epoch": 0.029680556720046953, "grad_norm": 0.4481809844436113, "learning_rate": 2e-05, "loss": 5.6769, "step": 885 }, { "epoch": 0.029714094072272993, "grad_norm": 0.46910318593332684, "learning_rate": 2e-05, "loss": 5.4786, "step": 886 }, { "epoch": 0.029747631424499037, "grad_norm": 0.4008730126184101, "learning_rate": 2e-05, "loss": 5.6092, "step": 887 }, { "epoch": 0.029781168776725077, "grad_norm": 0.3932003564352598, "learning_rate": 2e-05, "loss": 5.7769, "step": 888 }, { "epoch": 0.02981470612895112, "grad_norm": 0.4462499492917155, "learning_rate": 2e-05, "loss": 5.6191, "step": 889 }, { "epoch": 0.02984824348117716, "grad_norm": 0.3878410663645352, "learning_rate": 2e-05, "loss": 5.7825, "step": 890 }, { "epoch": 0.029881780833403204, "grad_norm": 0.44182448397636914, "learning_rate": 2e-05, "loss": 5.3467, "step": 891 }, { "epoch": 0.029915318185629244, "grad_norm": 0.473744923333879, "learning_rate": 2e-05, "loss": 5.5802, "step": 892 }, { "epoch": 0.029948855537855288, "grad_norm": 0.4082302216088547, "learning_rate": 2e-05, "loss": 5.3877, "step": 893 }, { "epoch": 0.029982392890081328, "grad_norm": 0.4002706703940132, "learning_rate": 2e-05, "loss": 5.5869, "step": 894 }, { "epoch": 0.03001593024230737, "grad_norm": 0.4602849440758754, "learning_rate": 2e-05, "loss": 5.439, "step": 895 }, { "epoch": 0.030049467594533412, "grad_norm": 0.4117897671203696, "learning_rate": 2e-05, "loss": 5.6602, "step": 896 }, { "epoch": 0.030083004946759452, "grad_norm": 0.42409777769121565, "learning_rate": 2e-05, "loss": 5.5336, "step": 897 }, { "epoch": 0.030116542298985496, "grad_norm": 0.43446171678122547, "learning_rate": 2e-05, "loss": 5.5079, "step": 898 }, { "epoch": 0.030150079651211536, "grad_norm": 0.42690089402122017, "learning_rate": 2e-05, "loss": 5.5294, "step": 899 }, { "epoch": 0.03018361700343758, "grad_norm": 0.42382322994748617, "learning_rate": 2e-05, "loss": 5.4352, "step": 900 }, { "epoch": 0.03021715435566362, "grad_norm": 0.4419259715713497, "learning_rate": 2e-05, "loss": 5.4928, "step": 901 }, { "epoch": 0.030250691707889663, "grad_norm": 0.4407519647273817, "learning_rate": 2e-05, "loss": 5.4726, "step": 902 }, { "epoch": 0.030284229060115703, "grad_norm": 0.4075735049375084, "learning_rate": 2e-05, "loss": 5.6368, "step": 903 }, { "epoch": 0.030317766412341747, "grad_norm": 0.43856547345795893, "learning_rate": 2e-05, "loss": 5.618, "step": 904 }, { "epoch": 0.030351303764567787, "grad_norm": 0.4336959687497202, "learning_rate": 2e-05, "loss": 5.8077, "step": 905 }, { "epoch": 0.03038484111679383, "grad_norm": 0.4221319882048276, "learning_rate": 2e-05, "loss": 5.5047, "step": 906 }, { "epoch": 0.03041837846901987, "grad_norm": 0.4215758246545226, "learning_rate": 2e-05, "loss": 5.6257, "step": 907 }, { "epoch": 0.03045191582124591, "grad_norm": 0.4536104119670365, "learning_rate": 2e-05, "loss": 5.4292, "step": 908 }, { "epoch": 0.030485453173471955, "grad_norm": 0.41971169273924125, "learning_rate": 2e-05, "loss": 5.4286, "step": 909 }, { "epoch": 0.030518990525697995, "grad_norm": 0.42806235309080676, "learning_rate": 2e-05, "loss": 5.7677, "step": 910 }, { "epoch": 0.03055252787792404, "grad_norm": 0.45351467767956827, "learning_rate": 2e-05, "loss": 5.4748, "step": 911 }, { "epoch": 0.03058606523015008, "grad_norm": 0.4265396937909115, "learning_rate": 2e-05, "loss": 5.5419, "step": 912 }, { "epoch": 0.030619602582376122, "grad_norm": 0.4284479250631923, "learning_rate": 2e-05, "loss": 5.5935, "step": 913 }, { "epoch": 0.030653139934602162, "grad_norm": 0.40158131459943547, "learning_rate": 2e-05, "loss": 5.5259, "step": 914 }, { "epoch": 0.030686677286828206, "grad_norm": 0.42954323834254526, "learning_rate": 2e-05, "loss": 5.7013, "step": 915 }, { "epoch": 0.030720214639054246, "grad_norm": 0.43251150396297144, "learning_rate": 2e-05, "loss": 5.5423, "step": 916 }, { "epoch": 0.03075375199128029, "grad_norm": 0.412030994479639, "learning_rate": 2e-05, "loss": 5.4855, "step": 917 }, { "epoch": 0.03078728934350633, "grad_norm": 0.4492218586417852, "learning_rate": 2e-05, "loss": 5.443, "step": 918 }, { "epoch": 0.030820826695732374, "grad_norm": 0.4212149428563001, "learning_rate": 2e-05, "loss": 5.4686, "step": 919 }, { "epoch": 0.030854364047958414, "grad_norm": 0.43702039810560195, "learning_rate": 2e-05, "loss": 5.5422, "step": 920 }, { "epoch": 0.030887901400184454, "grad_norm": 0.4598987015050067, "learning_rate": 2e-05, "loss": 5.6851, "step": 921 }, { "epoch": 0.030921438752410497, "grad_norm": 0.4369461098808438, "learning_rate": 2e-05, "loss": 5.6225, "step": 922 }, { "epoch": 0.030954976104636538, "grad_norm": 0.4233596981029442, "learning_rate": 2e-05, "loss": 5.551, "step": 923 }, { "epoch": 0.03098851345686258, "grad_norm": 0.4164914927256043, "learning_rate": 2e-05, "loss": 5.6032, "step": 924 }, { "epoch": 0.03102205080908862, "grad_norm": 0.4307792732923377, "learning_rate": 2e-05, "loss": 5.5283, "step": 925 }, { "epoch": 0.031055588161314665, "grad_norm": 0.4193569369465107, "learning_rate": 2e-05, "loss": 5.5194, "step": 926 }, { "epoch": 0.031089125513540705, "grad_norm": 0.4354559561459747, "learning_rate": 2e-05, "loss": 5.5531, "step": 927 }, { "epoch": 0.03112266286576675, "grad_norm": 0.4169854567757154, "learning_rate": 2e-05, "loss": 5.4878, "step": 928 }, { "epoch": 0.03115620021799279, "grad_norm": 0.4483707758519607, "learning_rate": 2e-05, "loss": 5.6251, "step": 929 }, { "epoch": 0.031189737570218833, "grad_norm": 0.42125634768249076, "learning_rate": 2e-05, "loss": 5.6821, "step": 930 }, { "epoch": 0.031223274922444873, "grad_norm": 0.4285694856483595, "learning_rate": 2e-05, "loss": 5.6455, "step": 931 }, { "epoch": 0.03125681227467091, "grad_norm": 0.4210755684031081, "learning_rate": 2e-05, "loss": 5.4615, "step": 932 }, { "epoch": 0.03129034962689695, "grad_norm": 0.4179616027444164, "learning_rate": 2e-05, "loss": 5.5246, "step": 933 }, { "epoch": 0.031323886979123, "grad_norm": 0.44204409553764884, "learning_rate": 2e-05, "loss": 5.7666, "step": 934 }, { "epoch": 0.03135742433134904, "grad_norm": 0.45349211589712213, "learning_rate": 2e-05, "loss": 5.714, "step": 935 }, { "epoch": 0.03139096168357508, "grad_norm": 0.47188992278007247, "learning_rate": 2e-05, "loss": 5.4484, "step": 936 }, { "epoch": 0.03142449903580112, "grad_norm": 0.42647594806125655, "learning_rate": 2e-05, "loss": 5.523, "step": 937 }, { "epoch": 0.03145803638802717, "grad_norm": 0.44259033126506286, "learning_rate": 2e-05, "loss": 5.3359, "step": 938 }, { "epoch": 0.03149157374025321, "grad_norm": 0.4334240793018991, "learning_rate": 2e-05, "loss": 5.5199, "step": 939 }, { "epoch": 0.03152511109247925, "grad_norm": 0.4169158341730006, "learning_rate": 2e-05, "loss": 5.5205, "step": 940 }, { "epoch": 0.03155864844470529, "grad_norm": 0.4346625594441761, "learning_rate": 2e-05, "loss": 5.6512, "step": 941 }, { "epoch": 0.031592185796931335, "grad_norm": 0.4331022762475555, "learning_rate": 2e-05, "loss": 5.4624, "step": 942 }, { "epoch": 0.031625723149157375, "grad_norm": 0.4216573922821869, "learning_rate": 2e-05, "loss": 5.5184, "step": 943 }, { "epoch": 0.031659260501383416, "grad_norm": 0.432555304406387, "learning_rate": 2e-05, "loss": 5.4325, "step": 944 }, { "epoch": 0.031692797853609456, "grad_norm": 0.4174463587299153, "learning_rate": 2e-05, "loss": 5.5325, "step": 945 }, { "epoch": 0.031726335205835496, "grad_norm": 0.40065921117003506, "learning_rate": 2e-05, "loss": 5.7218, "step": 946 }, { "epoch": 0.03175987255806154, "grad_norm": 0.4022374614010489, "learning_rate": 2e-05, "loss": 5.4637, "step": 947 }, { "epoch": 0.03179340991028758, "grad_norm": 0.43784300946361393, "learning_rate": 2e-05, "loss": 5.5029, "step": 948 }, { "epoch": 0.03182694726251362, "grad_norm": 0.40175052006065287, "learning_rate": 2e-05, "loss": 5.4999, "step": 949 }, { "epoch": 0.03186048461473966, "grad_norm": 0.39818349697828986, "learning_rate": 2e-05, "loss": 5.6367, "step": 950 }, { "epoch": 0.03189402196696571, "grad_norm": 0.418286431054498, "learning_rate": 2e-05, "loss": 5.534, "step": 951 }, { "epoch": 0.03192755931919175, "grad_norm": 0.41613974487650457, "learning_rate": 2e-05, "loss": 5.4586, "step": 952 }, { "epoch": 0.03196109667141779, "grad_norm": 0.4359976365298922, "learning_rate": 2e-05, "loss": 5.6639, "step": 953 }, { "epoch": 0.03199463402364383, "grad_norm": 0.4117274284890158, "learning_rate": 2e-05, "loss": 5.5316, "step": 954 }, { "epoch": 0.03202817137586988, "grad_norm": 0.386382077505715, "learning_rate": 2e-05, "loss": 5.6451, "step": 955 }, { "epoch": 0.03206170872809592, "grad_norm": 0.41697748284763564, "learning_rate": 2e-05, "loss": 5.5817, "step": 956 }, { "epoch": 0.03209524608032196, "grad_norm": 0.419714825777515, "learning_rate": 2e-05, "loss": 5.7134, "step": 957 }, { "epoch": 0.032128783432548, "grad_norm": 0.4305596285556291, "learning_rate": 2e-05, "loss": 5.3637, "step": 958 }, { "epoch": 0.032162320784774046, "grad_norm": 0.40256673788969305, "learning_rate": 2e-05, "loss": 5.3881, "step": 959 }, { "epoch": 0.032195858137000086, "grad_norm": 0.40914348640007897, "learning_rate": 2e-05, "loss": 5.7731, "step": 960 }, { "epoch": 0.032229395489226126, "grad_norm": 0.41194797130011096, "learning_rate": 2e-05, "loss": 5.4655, "step": 961 }, { "epoch": 0.032262932841452166, "grad_norm": 0.3806608882298344, "learning_rate": 2e-05, "loss": 5.4117, "step": 962 }, { "epoch": 0.032296470193678206, "grad_norm": 0.41910069904533853, "learning_rate": 2e-05, "loss": 5.7521, "step": 963 }, { "epoch": 0.03233000754590425, "grad_norm": 0.40734032391026326, "learning_rate": 2e-05, "loss": 5.6711, "step": 964 }, { "epoch": 0.03236354489813029, "grad_norm": 0.43737135271491523, "learning_rate": 2e-05, "loss": 5.687, "step": 965 }, { "epoch": 0.032397082250356334, "grad_norm": 0.40185666766225336, "learning_rate": 2e-05, "loss": 5.606, "step": 966 }, { "epoch": 0.032430619602582374, "grad_norm": 0.43453325449212116, "learning_rate": 2e-05, "loss": 5.6641, "step": 967 }, { "epoch": 0.03246415695480842, "grad_norm": 0.39884507016945864, "learning_rate": 2e-05, "loss": 5.5551, "step": 968 }, { "epoch": 0.03249769430703446, "grad_norm": 0.4257111568539708, "learning_rate": 2e-05, "loss": 5.4885, "step": 969 }, { "epoch": 0.0325312316592605, "grad_norm": 0.4213273669273586, "learning_rate": 2e-05, "loss": 5.7295, "step": 970 }, { "epoch": 0.03256476901148654, "grad_norm": 0.3944678873589624, "learning_rate": 2e-05, "loss": 5.6276, "step": 971 }, { "epoch": 0.03259830636371259, "grad_norm": 0.41721260907248736, "learning_rate": 2e-05, "loss": 5.4723, "step": 972 }, { "epoch": 0.03263184371593863, "grad_norm": 0.4129947507978762, "learning_rate": 2e-05, "loss": 5.2786, "step": 973 }, { "epoch": 0.03266538106816467, "grad_norm": 0.40729237713490035, "learning_rate": 2e-05, "loss": 5.5488, "step": 974 }, { "epoch": 0.03269891842039071, "grad_norm": 0.4126432735426872, "learning_rate": 2e-05, "loss": 5.4341, "step": 975 }, { "epoch": 0.03273245577261675, "grad_norm": 0.4019912739542967, "learning_rate": 2e-05, "loss": 5.6141, "step": 976 }, { "epoch": 0.032765993124842796, "grad_norm": 0.4121839993991228, "learning_rate": 2e-05, "loss": 5.4815, "step": 977 }, { "epoch": 0.032799530477068836, "grad_norm": 0.3997609323970459, "learning_rate": 2e-05, "loss": 5.5432, "step": 978 }, { "epoch": 0.032833067829294876, "grad_norm": 0.4013860765418876, "learning_rate": 2e-05, "loss": 5.6358, "step": 979 }, { "epoch": 0.032866605181520916, "grad_norm": 0.3985808152536372, "learning_rate": 2e-05, "loss": 5.6712, "step": 980 }, { "epoch": 0.032900142533746964, "grad_norm": 0.43488130357749855, "learning_rate": 2e-05, "loss": 5.4635, "step": 981 }, { "epoch": 0.032933679885973004, "grad_norm": 0.4102315507203811, "learning_rate": 2e-05, "loss": 5.5776, "step": 982 }, { "epoch": 0.032967217238199044, "grad_norm": 0.41228684021728845, "learning_rate": 2e-05, "loss": 5.718, "step": 983 }, { "epoch": 0.033000754590425084, "grad_norm": 0.43458046630439245, "learning_rate": 2e-05, "loss": 5.4932, "step": 984 }, { "epoch": 0.03303429194265113, "grad_norm": 0.38546735113066477, "learning_rate": 2e-05, "loss": 5.4622, "step": 985 }, { "epoch": 0.03306782929487717, "grad_norm": 0.39836505048380083, "learning_rate": 2e-05, "loss": 5.6818, "step": 986 }, { "epoch": 0.03310136664710321, "grad_norm": 0.4279933388087391, "learning_rate": 2e-05, "loss": 5.5195, "step": 987 }, { "epoch": 0.03313490399932925, "grad_norm": 0.4226576387742568, "learning_rate": 2e-05, "loss": 5.5514, "step": 988 }, { "epoch": 0.03316844135155529, "grad_norm": 0.41939424353047206, "learning_rate": 2e-05, "loss": 5.563, "step": 989 }, { "epoch": 0.03320197870378134, "grad_norm": 0.4094750409928893, "learning_rate": 2e-05, "loss": 5.5837, "step": 990 }, { "epoch": 0.03323551605600738, "grad_norm": 0.4321165908144895, "learning_rate": 2e-05, "loss": 5.6369, "step": 991 }, { "epoch": 0.03326905340823342, "grad_norm": 0.41549440656291753, "learning_rate": 2e-05, "loss": 5.6079, "step": 992 }, { "epoch": 0.03330259076045946, "grad_norm": 0.404245946569752, "learning_rate": 2e-05, "loss": 5.627, "step": 993 }, { "epoch": 0.033336128112685506, "grad_norm": 0.4129650791301771, "learning_rate": 2e-05, "loss": 5.424, "step": 994 }, { "epoch": 0.033369665464911547, "grad_norm": 0.41588533993095966, "learning_rate": 2e-05, "loss": 5.5193, "step": 995 }, { "epoch": 0.03340320281713759, "grad_norm": 0.415230151155317, "learning_rate": 2e-05, "loss": 5.5335, "step": 996 }, { "epoch": 0.03343674016936363, "grad_norm": 0.39394948023670706, "learning_rate": 2e-05, "loss": 5.4536, "step": 997 }, { "epoch": 0.033470277521589674, "grad_norm": 0.4279110853081807, "learning_rate": 2e-05, "loss": 5.5323, "step": 998 }, { "epoch": 0.033503814873815714, "grad_norm": 0.4367673452569409, "learning_rate": 2e-05, "loss": 5.389, "step": 999 }, { "epoch": 0.033537352226041754, "grad_norm": 0.4053946301671585, "learning_rate": 2e-05, "loss": 5.394, "step": 1000 }, { "epoch": 0.033570889578267794, "grad_norm": 0.4212563273129882, "learning_rate": 2e-05, "loss": 5.5395, "step": 1001 }, { "epoch": 0.033604426930493835, "grad_norm": 0.4045964961576838, "learning_rate": 2e-05, "loss": 5.7515, "step": 1002 }, { "epoch": 0.03363796428271988, "grad_norm": 0.4195153787874383, "learning_rate": 2e-05, "loss": 5.4726, "step": 1003 }, { "epoch": 0.03367150163494592, "grad_norm": 0.4245469620602287, "learning_rate": 2e-05, "loss": 5.4015, "step": 1004 }, { "epoch": 0.03370503898717196, "grad_norm": 0.4154724788626123, "learning_rate": 2e-05, "loss": 5.6315, "step": 1005 }, { "epoch": 0.033738576339398, "grad_norm": 0.4266918948376994, "learning_rate": 2e-05, "loss": 5.5079, "step": 1006 }, { "epoch": 0.03377211369162405, "grad_norm": 0.42537120931329425, "learning_rate": 2e-05, "loss": 5.541, "step": 1007 }, { "epoch": 0.03380565104385009, "grad_norm": 0.41852671159639987, "learning_rate": 2e-05, "loss": 5.5729, "step": 1008 }, { "epoch": 0.03383918839607613, "grad_norm": 0.46767789785054453, "learning_rate": 2e-05, "loss": 5.4673, "step": 1009 }, { "epoch": 0.03387272574830217, "grad_norm": 0.4329239443824087, "learning_rate": 2e-05, "loss": 5.4871, "step": 1010 }, { "epoch": 0.03390626310052822, "grad_norm": 0.44025383280747976, "learning_rate": 2e-05, "loss": 5.6816, "step": 1011 }, { "epoch": 0.03393980045275426, "grad_norm": 0.4086095896508643, "learning_rate": 2e-05, "loss": 5.6568, "step": 1012 }, { "epoch": 0.0339733378049803, "grad_norm": 0.4183314324344563, "learning_rate": 2e-05, "loss": 5.3616, "step": 1013 }, { "epoch": 0.03400687515720634, "grad_norm": 0.4384539780734469, "learning_rate": 2e-05, "loss": 5.4146, "step": 1014 }, { "epoch": 0.03404041250943238, "grad_norm": 0.4556256174353531, "learning_rate": 2e-05, "loss": 5.5206, "step": 1015 }, { "epoch": 0.034073949861658424, "grad_norm": 0.4095526789699891, "learning_rate": 2e-05, "loss": 5.446, "step": 1016 }, { "epoch": 0.034107487213884465, "grad_norm": 0.45084402298353354, "learning_rate": 2e-05, "loss": 5.4944, "step": 1017 }, { "epoch": 0.034141024566110505, "grad_norm": 0.4353964799505757, "learning_rate": 2e-05, "loss": 5.6401, "step": 1018 }, { "epoch": 0.034174561918336545, "grad_norm": 0.4330098019622049, "learning_rate": 2e-05, "loss": 5.5238, "step": 1019 }, { "epoch": 0.03420809927056259, "grad_norm": 0.45614549357985057, "learning_rate": 2e-05, "loss": 5.5492, "step": 1020 }, { "epoch": 0.03424163662278863, "grad_norm": 0.4081420945718711, "learning_rate": 2e-05, "loss": 5.4361, "step": 1021 }, { "epoch": 0.03427517397501467, "grad_norm": 0.40315623269537687, "learning_rate": 2e-05, "loss": 5.305, "step": 1022 }, { "epoch": 0.03430871132724071, "grad_norm": 0.41447491004048703, "learning_rate": 2e-05, "loss": 5.5456, "step": 1023 }, { "epoch": 0.03434224867946676, "grad_norm": 0.4490892698251445, "learning_rate": 2e-05, "loss": 5.7176, "step": 1024 }, { "epoch": 0.0343757860316928, "grad_norm": 0.38774678988027733, "learning_rate": 2e-05, "loss": 5.7794, "step": 1025 }, { "epoch": 0.03440932338391884, "grad_norm": 0.39501905522799496, "learning_rate": 2e-05, "loss": 5.6684, "step": 1026 }, { "epoch": 0.03444286073614488, "grad_norm": 0.399732897236457, "learning_rate": 2e-05, "loss": 5.4356, "step": 1027 }, { "epoch": 0.03447639808837092, "grad_norm": 0.4180559555203049, "learning_rate": 2e-05, "loss": 5.7498, "step": 1028 }, { "epoch": 0.03450993544059697, "grad_norm": 0.39757854635301654, "learning_rate": 2e-05, "loss": 5.6983, "step": 1029 }, { "epoch": 0.03454347279282301, "grad_norm": 0.40955138601246094, "learning_rate": 2e-05, "loss": 5.6571, "step": 1030 }, { "epoch": 0.03457701014504905, "grad_norm": 0.4201078426004234, "learning_rate": 2e-05, "loss": 5.3286, "step": 1031 }, { "epoch": 0.03461054749727509, "grad_norm": 0.4021692088714854, "learning_rate": 2e-05, "loss": 5.5769, "step": 1032 }, { "epoch": 0.034644084849501135, "grad_norm": 0.42088482041830105, "learning_rate": 2e-05, "loss": 5.5248, "step": 1033 }, { "epoch": 0.034677622201727175, "grad_norm": 0.42653384631605196, "learning_rate": 2e-05, "loss": 5.5104, "step": 1034 }, { "epoch": 0.034711159553953215, "grad_norm": 0.4041372235692399, "learning_rate": 2e-05, "loss": 5.7599, "step": 1035 }, { "epoch": 0.034744696906179255, "grad_norm": 0.4073957406597048, "learning_rate": 2e-05, "loss": 5.4477, "step": 1036 }, { "epoch": 0.0347782342584053, "grad_norm": 0.42723556137958246, "learning_rate": 2e-05, "loss": 5.5273, "step": 1037 }, { "epoch": 0.03481177161063134, "grad_norm": 0.3977470022565537, "learning_rate": 2e-05, "loss": 5.4342, "step": 1038 }, { "epoch": 0.03484530896285738, "grad_norm": 0.4181977197504892, "learning_rate": 2e-05, "loss": 5.6264, "step": 1039 }, { "epoch": 0.03487884631508342, "grad_norm": 0.39737746244757444, "learning_rate": 2e-05, "loss": 5.6005, "step": 1040 }, { "epoch": 0.03491238366730946, "grad_norm": 0.40075325145855345, "learning_rate": 2e-05, "loss": 5.5023, "step": 1041 }, { "epoch": 0.03494592101953551, "grad_norm": 0.42608575592226133, "learning_rate": 2e-05, "loss": 5.6162, "step": 1042 }, { "epoch": 0.03497945837176155, "grad_norm": 0.4126808831802974, "learning_rate": 2e-05, "loss": 5.4371, "step": 1043 }, { "epoch": 0.03501299572398759, "grad_norm": 0.4264123723622933, "learning_rate": 2e-05, "loss": 5.596, "step": 1044 }, { "epoch": 0.03504653307621363, "grad_norm": 0.4144289490916169, "learning_rate": 2e-05, "loss": 5.5031, "step": 1045 }, { "epoch": 0.03508007042843968, "grad_norm": 0.41233984739203944, "learning_rate": 2e-05, "loss": 5.3651, "step": 1046 }, { "epoch": 0.03511360778066572, "grad_norm": 0.44468535562620215, "learning_rate": 2e-05, "loss": 5.5521, "step": 1047 }, { "epoch": 0.03514714513289176, "grad_norm": 0.43030399465997426, "learning_rate": 2e-05, "loss": 5.382, "step": 1048 }, { "epoch": 0.0351806824851178, "grad_norm": 0.42761094682140877, "learning_rate": 2e-05, "loss": 5.7102, "step": 1049 }, { "epoch": 0.035214219837343845, "grad_norm": 0.4461283134103825, "learning_rate": 2e-05, "loss": 5.4422, "step": 1050 }, { "epoch": 0.035247757189569885, "grad_norm": 0.41865098745857227, "learning_rate": 2e-05, "loss": 5.4436, "step": 1051 }, { "epoch": 0.035281294541795925, "grad_norm": 0.4161550099565823, "learning_rate": 2e-05, "loss": 5.5307, "step": 1052 }, { "epoch": 0.035314831894021965, "grad_norm": 0.42627769590141174, "learning_rate": 2e-05, "loss": 5.3834, "step": 1053 }, { "epoch": 0.035348369246248006, "grad_norm": 0.40245125500046325, "learning_rate": 2e-05, "loss": 5.4877, "step": 1054 }, { "epoch": 0.03538190659847405, "grad_norm": 0.4095742558008761, "learning_rate": 2e-05, "loss": 5.5517, "step": 1055 }, { "epoch": 0.03541544395070009, "grad_norm": 0.45014272055568055, "learning_rate": 2e-05, "loss": 5.4818, "step": 1056 }, { "epoch": 0.03544898130292613, "grad_norm": 0.4199543095560614, "learning_rate": 2e-05, "loss": 5.4381, "step": 1057 }, { "epoch": 0.03548251865515217, "grad_norm": 0.41688428536130406, "learning_rate": 2e-05, "loss": 5.5251, "step": 1058 }, { "epoch": 0.03551605600737822, "grad_norm": 0.4164734888273927, "learning_rate": 2e-05, "loss": 5.6862, "step": 1059 }, { "epoch": 0.03554959335960426, "grad_norm": 0.4468521079219813, "learning_rate": 2e-05, "loss": 5.5071, "step": 1060 }, { "epoch": 0.0355831307118303, "grad_norm": 0.4112656050917503, "learning_rate": 2e-05, "loss": 5.5214, "step": 1061 }, { "epoch": 0.03561666806405634, "grad_norm": 0.42046968921845607, "learning_rate": 2e-05, "loss": 5.5442, "step": 1062 }, { "epoch": 0.03565020541628239, "grad_norm": 0.4248940027916247, "learning_rate": 2e-05, "loss": 5.4696, "step": 1063 }, { "epoch": 0.03568374276850843, "grad_norm": 0.4614753515993786, "learning_rate": 2e-05, "loss": 5.5978, "step": 1064 }, { "epoch": 0.03571728012073447, "grad_norm": 0.42148977493398476, "learning_rate": 2e-05, "loss": 5.538, "step": 1065 }, { "epoch": 0.03575081747296051, "grad_norm": 0.41140419438274367, "learning_rate": 2e-05, "loss": 5.7502, "step": 1066 }, { "epoch": 0.03578435482518655, "grad_norm": 0.4531150414364575, "learning_rate": 2e-05, "loss": 5.7356, "step": 1067 }, { "epoch": 0.035817892177412596, "grad_norm": 0.4505619199981449, "learning_rate": 2e-05, "loss": 5.5111, "step": 1068 }, { "epoch": 0.035851429529638636, "grad_norm": 0.427662890422276, "learning_rate": 2e-05, "loss": 5.5607, "step": 1069 }, { "epoch": 0.035884966881864676, "grad_norm": 0.4388569444892725, "learning_rate": 2e-05, "loss": 5.3881, "step": 1070 }, { "epoch": 0.035918504234090716, "grad_norm": 0.4240366152095682, "learning_rate": 2e-05, "loss": 5.3634, "step": 1071 }, { "epoch": 0.03595204158631676, "grad_norm": 0.4070637940311095, "learning_rate": 2e-05, "loss": 5.5051, "step": 1072 }, { "epoch": 0.0359855789385428, "grad_norm": 0.4398174428113001, "learning_rate": 2e-05, "loss": 5.4189, "step": 1073 }, { "epoch": 0.03601911629076884, "grad_norm": 0.446594005653742, "learning_rate": 2e-05, "loss": 5.6414, "step": 1074 }, { "epoch": 0.036052653642994884, "grad_norm": 0.436107396951582, "learning_rate": 2e-05, "loss": 5.6761, "step": 1075 }, { "epoch": 0.03608619099522093, "grad_norm": 0.4619841723311078, "learning_rate": 2e-05, "loss": 5.7371, "step": 1076 }, { "epoch": 0.03611972834744697, "grad_norm": 0.47245032970747053, "learning_rate": 2e-05, "loss": 5.6104, "step": 1077 }, { "epoch": 0.03615326569967301, "grad_norm": 0.41084241154680273, "learning_rate": 2e-05, "loss": 5.3641, "step": 1078 }, { "epoch": 0.03618680305189905, "grad_norm": 0.4030344082092604, "learning_rate": 2e-05, "loss": 5.4057, "step": 1079 }, { "epoch": 0.03622034040412509, "grad_norm": 0.4120533865565568, "learning_rate": 2e-05, "loss": 5.5882, "step": 1080 }, { "epoch": 0.03625387775635114, "grad_norm": 0.45650951030456627, "learning_rate": 2e-05, "loss": 5.3642, "step": 1081 }, { "epoch": 0.03628741510857718, "grad_norm": 0.4438363373334843, "learning_rate": 2e-05, "loss": 5.5835, "step": 1082 }, { "epoch": 0.03632095246080322, "grad_norm": 0.41609876432570897, "learning_rate": 2e-05, "loss": 5.3317, "step": 1083 }, { "epoch": 0.03635448981302926, "grad_norm": 0.46654456069487216, "learning_rate": 2e-05, "loss": 5.48, "step": 1084 }, { "epoch": 0.036388027165255306, "grad_norm": 0.4384607367197028, "learning_rate": 2e-05, "loss": 5.4505, "step": 1085 }, { "epoch": 0.036421564517481346, "grad_norm": 0.44575630303025493, "learning_rate": 2e-05, "loss": 5.5195, "step": 1086 }, { "epoch": 0.036455101869707386, "grad_norm": 0.47198920284968343, "learning_rate": 2e-05, "loss": 5.4558, "step": 1087 }, { "epoch": 0.036488639221933426, "grad_norm": 0.46220315177248245, "learning_rate": 2e-05, "loss": 5.4985, "step": 1088 }, { "epoch": 0.03652217657415947, "grad_norm": 0.4303721616912088, "learning_rate": 2e-05, "loss": 5.4123, "step": 1089 }, { "epoch": 0.036555713926385514, "grad_norm": 0.4286471947725366, "learning_rate": 2e-05, "loss": 5.405, "step": 1090 }, { "epoch": 0.036589251278611554, "grad_norm": 0.4305684383887757, "learning_rate": 2e-05, "loss": 5.6239, "step": 1091 }, { "epoch": 0.036622788630837594, "grad_norm": 0.4156415704390821, "learning_rate": 2e-05, "loss": 5.6719, "step": 1092 }, { "epoch": 0.036656325983063634, "grad_norm": 0.48227227106779336, "learning_rate": 2e-05, "loss": 5.5296, "step": 1093 }, { "epoch": 0.03668986333528968, "grad_norm": 0.44713985287843955, "learning_rate": 2e-05, "loss": 5.7273, "step": 1094 }, { "epoch": 0.03672340068751572, "grad_norm": 0.436192474520801, "learning_rate": 2e-05, "loss": 5.6272, "step": 1095 }, { "epoch": 0.03675693803974176, "grad_norm": 0.4949502275719085, "learning_rate": 2e-05, "loss": 5.4962, "step": 1096 }, { "epoch": 0.0367904753919678, "grad_norm": 0.43348252034277057, "learning_rate": 2e-05, "loss": 5.3251, "step": 1097 }, { "epoch": 0.03682401274419385, "grad_norm": 0.43397677351864244, "learning_rate": 2e-05, "loss": 5.8159, "step": 1098 }, { "epoch": 0.03685755009641989, "grad_norm": 0.4361027316373658, "learning_rate": 2e-05, "loss": 5.6204, "step": 1099 }, { "epoch": 0.03689108744864593, "grad_norm": 0.4456497926307576, "learning_rate": 2e-05, "loss": 5.4552, "step": 1100 }, { "epoch": 0.03692462480087197, "grad_norm": 0.4101257965952601, "learning_rate": 2e-05, "loss": 5.6381, "step": 1101 }, { "epoch": 0.036958162153098016, "grad_norm": 0.4583685218100234, "learning_rate": 2e-05, "loss": 5.7345, "step": 1102 }, { "epoch": 0.036991699505324056, "grad_norm": 0.43042082669588017, "learning_rate": 2e-05, "loss": 5.7527, "step": 1103 }, { "epoch": 0.037025236857550096, "grad_norm": 0.409139787371369, "learning_rate": 2e-05, "loss": 5.5287, "step": 1104 }, { "epoch": 0.03705877420977614, "grad_norm": 0.4737158218295241, "learning_rate": 2e-05, "loss": 5.5741, "step": 1105 }, { "epoch": 0.03709231156200218, "grad_norm": 0.41011800268279197, "learning_rate": 2e-05, "loss": 5.505, "step": 1106 }, { "epoch": 0.037125848914228224, "grad_norm": 0.4384294681351142, "learning_rate": 2e-05, "loss": 5.6272, "step": 1107 }, { "epoch": 0.037159386266454264, "grad_norm": 0.4987782512450664, "learning_rate": 2e-05, "loss": 5.7154, "step": 1108 }, { "epoch": 0.037192923618680304, "grad_norm": 0.4446320607722715, "learning_rate": 2e-05, "loss": 5.449, "step": 1109 }, { "epoch": 0.037226460970906344, "grad_norm": 0.4156306060169619, "learning_rate": 2e-05, "loss": 5.8689, "step": 1110 }, { "epoch": 0.03725999832313239, "grad_norm": 0.42403311647634434, "learning_rate": 2e-05, "loss": 5.5263, "step": 1111 }, { "epoch": 0.03729353567535843, "grad_norm": 0.45718976517888016, "learning_rate": 2e-05, "loss": 5.5853, "step": 1112 }, { "epoch": 0.03732707302758447, "grad_norm": 0.4188031344465533, "learning_rate": 2e-05, "loss": 5.5609, "step": 1113 }, { "epoch": 0.03736061037981051, "grad_norm": 0.41349254801316254, "learning_rate": 2e-05, "loss": 5.4672, "step": 1114 }, { "epoch": 0.03739414773203656, "grad_norm": 0.4610957144311079, "learning_rate": 2e-05, "loss": 5.5993, "step": 1115 }, { "epoch": 0.0374276850842626, "grad_norm": 0.5130779230789693, "learning_rate": 2e-05, "loss": 5.5971, "step": 1116 }, { "epoch": 0.03746122243648864, "grad_norm": 0.4468745154482893, "learning_rate": 2e-05, "loss": 5.5009, "step": 1117 }, { "epoch": 0.03749475978871468, "grad_norm": 0.4854683905677972, "learning_rate": 2e-05, "loss": 5.7954, "step": 1118 }, { "epoch": 0.03752829714094072, "grad_norm": 0.5207187250209628, "learning_rate": 2e-05, "loss": 5.557, "step": 1119 }, { "epoch": 0.03756183449316677, "grad_norm": 0.43158326927717144, "learning_rate": 2e-05, "loss": 5.5145, "step": 1120 }, { "epoch": 0.03759537184539281, "grad_norm": 0.4672846994314373, "learning_rate": 2e-05, "loss": 5.6619, "step": 1121 }, { "epoch": 0.03762890919761885, "grad_norm": 0.501778277263435, "learning_rate": 2e-05, "loss": 5.4271, "step": 1122 }, { "epoch": 0.03766244654984489, "grad_norm": 0.4457244512096768, "learning_rate": 2e-05, "loss": 5.5138, "step": 1123 }, { "epoch": 0.037695983902070934, "grad_norm": 0.49554366844811754, "learning_rate": 2e-05, "loss": 5.7624, "step": 1124 }, { "epoch": 0.037729521254296974, "grad_norm": 0.46933353681660456, "learning_rate": 2e-05, "loss": 5.5993, "step": 1125 }, { "epoch": 0.037763058606523014, "grad_norm": 0.44992886459249093, "learning_rate": 2e-05, "loss": 5.546, "step": 1126 }, { "epoch": 0.037796595958749055, "grad_norm": 0.45534603840863463, "learning_rate": 2e-05, "loss": 5.4691, "step": 1127 }, { "epoch": 0.0378301333109751, "grad_norm": 0.4318875932630509, "learning_rate": 2e-05, "loss": 5.4683, "step": 1128 }, { "epoch": 0.03786367066320114, "grad_norm": 0.46045035352404506, "learning_rate": 2e-05, "loss": 5.5443, "step": 1129 }, { "epoch": 0.03789720801542718, "grad_norm": 0.487503866946629, "learning_rate": 2e-05, "loss": 5.5582, "step": 1130 }, { "epoch": 0.03793074536765322, "grad_norm": 0.42198539054943873, "learning_rate": 2e-05, "loss": 5.5678, "step": 1131 }, { "epoch": 0.03796428271987926, "grad_norm": 0.44409492742094625, "learning_rate": 2e-05, "loss": 5.6224, "step": 1132 }, { "epoch": 0.03799782007210531, "grad_norm": 0.4246982413889079, "learning_rate": 2e-05, "loss": 5.3429, "step": 1133 }, { "epoch": 0.03803135742433135, "grad_norm": 0.4615549507672432, "learning_rate": 2e-05, "loss": 5.3842, "step": 1134 }, { "epoch": 0.03806489477655739, "grad_norm": 0.42363994274334815, "learning_rate": 2e-05, "loss": 5.5448, "step": 1135 }, { "epoch": 0.03809843212878343, "grad_norm": 0.406371188341706, "learning_rate": 2e-05, "loss": 5.7452, "step": 1136 }, { "epoch": 0.03813196948100948, "grad_norm": 0.42960203400111807, "learning_rate": 2e-05, "loss": 5.4968, "step": 1137 }, { "epoch": 0.03816550683323552, "grad_norm": 0.4524681041497334, "learning_rate": 2e-05, "loss": 5.5411, "step": 1138 }, { "epoch": 0.03819904418546156, "grad_norm": 0.46171498576479403, "learning_rate": 2e-05, "loss": 5.5906, "step": 1139 }, { "epoch": 0.0382325815376876, "grad_norm": 0.4258588478690374, "learning_rate": 2e-05, "loss": 5.6376, "step": 1140 }, { "epoch": 0.038266118889913645, "grad_norm": 0.4580736044203936, "learning_rate": 2e-05, "loss": 5.6913, "step": 1141 }, { "epoch": 0.038299656242139685, "grad_norm": 0.4181206945923947, "learning_rate": 2e-05, "loss": 5.2506, "step": 1142 }, { "epoch": 0.038333193594365725, "grad_norm": 0.423692812238238, "learning_rate": 2e-05, "loss": 5.5966, "step": 1143 }, { "epoch": 0.038366730946591765, "grad_norm": 0.4765578334595021, "learning_rate": 2e-05, "loss": 5.4923, "step": 1144 }, { "epoch": 0.038400268298817805, "grad_norm": 0.4304264015618771, "learning_rate": 2e-05, "loss": 5.4084, "step": 1145 }, { "epoch": 0.03843380565104385, "grad_norm": 0.43172115214888923, "learning_rate": 2e-05, "loss": 5.5719, "step": 1146 }, { "epoch": 0.03846734300326989, "grad_norm": 0.4513539357343437, "learning_rate": 2e-05, "loss": 5.6335, "step": 1147 }, { "epoch": 0.03850088035549593, "grad_norm": 0.47494636876095075, "learning_rate": 2e-05, "loss": 5.5698, "step": 1148 }, { "epoch": 0.03853441770772197, "grad_norm": 0.42387923731993354, "learning_rate": 2e-05, "loss": 5.4782, "step": 1149 }, { "epoch": 0.03856795505994802, "grad_norm": 0.4464696156126329, "learning_rate": 2e-05, "loss": 5.5752, "step": 1150 }, { "epoch": 0.03860149241217406, "grad_norm": 0.4111127570787795, "learning_rate": 2e-05, "loss": 5.5969, "step": 1151 }, { "epoch": 0.0386350297644001, "grad_norm": 0.4344628382101121, "learning_rate": 2e-05, "loss": 5.7056, "step": 1152 }, { "epoch": 0.03866856711662614, "grad_norm": 0.4358275804312892, "learning_rate": 2e-05, "loss": 5.7107, "step": 1153 }, { "epoch": 0.03870210446885219, "grad_norm": 0.42349703060375704, "learning_rate": 2e-05, "loss": 5.7241, "step": 1154 }, { "epoch": 0.03873564182107823, "grad_norm": 0.48691049688136573, "learning_rate": 2e-05, "loss": 5.5232, "step": 1155 }, { "epoch": 0.03876917917330427, "grad_norm": 0.4269342272538043, "learning_rate": 2e-05, "loss": 5.3337, "step": 1156 }, { "epoch": 0.03880271652553031, "grad_norm": 0.4329217518217585, "learning_rate": 2e-05, "loss": 5.7053, "step": 1157 }, { "epoch": 0.03883625387775635, "grad_norm": 0.4748521515492113, "learning_rate": 2e-05, "loss": 5.505, "step": 1158 }, { "epoch": 0.038869791229982395, "grad_norm": 0.47433479773773846, "learning_rate": 2e-05, "loss": 5.5417, "step": 1159 }, { "epoch": 0.038903328582208435, "grad_norm": 0.4544562927971842, "learning_rate": 2e-05, "loss": 5.4387, "step": 1160 }, { "epoch": 0.038936865934434475, "grad_norm": 0.4663842105597143, "learning_rate": 2e-05, "loss": 5.4696, "step": 1161 }, { "epoch": 0.038970403286660515, "grad_norm": 0.42689609814575186, "learning_rate": 2e-05, "loss": 5.5892, "step": 1162 }, { "epoch": 0.03900394063888656, "grad_norm": 0.43096977418951593, "learning_rate": 2e-05, "loss": 5.5445, "step": 1163 }, { "epoch": 0.0390374779911126, "grad_norm": 0.4204386724944783, "learning_rate": 2e-05, "loss": 5.5828, "step": 1164 }, { "epoch": 0.03907101534333864, "grad_norm": 0.41960197436392366, "learning_rate": 2e-05, "loss": 5.5397, "step": 1165 }, { "epoch": 0.03910455269556468, "grad_norm": 0.40022693301208234, "learning_rate": 2e-05, "loss": 5.4219, "step": 1166 }, { "epoch": 0.03913809004779073, "grad_norm": 0.3996516760755346, "learning_rate": 2e-05, "loss": 5.6433, "step": 1167 }, { "epoch": 0.03917162740001677, "grad_norm": 0.4235051898303664, "learning_rate": 2e-05, "loss": 5.4866, "step": 1168 }, { "epoch": 0.03920516475224281, "grad_norm": 0.4112163513030538, "learning_rate": 2e-05, "loss": 5.4136, "step": 1169 }, { "epoch": 0.03923870210446885, "grad_norm": 0.40847570137046485, "learning_rate": 2e-05, "loss": 5.2761, "step": 1170 }, { "epoch": 0.03927223945669489, "grad_norm": 0.4290072541306505, "learning_rate": 2e-05, "loss": 5.7455, "step": 1171 }, { "epoch": 0.03930577680892094, "grad_norm": 0.44209759628005585, "learning_rate": 2e-05, "loss": 5.6637, "step": 1172 }, { "epoch": 0.03933931416114698, "grad_norm": 0.4108761856838168, "learning_rate": 2e-05, "loss": 5.4803, "step": 1173 }, { "epoch": 0.03937285151337302, "grad_norm": 0.41307614377167257, "learning_rate": 2e-05, "loss": 5.3573, "step": 1174 }, { "epoch": 0.03940638886559906, "grad_norm": 0.41487421305902006, "learning_rate": 2e-05, "loss": 5.2796, "step": 1175 }, { "epoch": 0.039439926217825105, "grad_norm": 0.4094414808710572, "learning_rate": 2e-05, "loss": 5.4771, "step": 1176 }, { "epoch": 0.039473463570051145, "grad_norm": 0.424204794990529, "learning_rate": 2e-05, "loss": 5.7565, "step": 1177 }, { "epoch": 0.039507000922277186, "grad_norm": 0.4185625933892392, "learning_rate": 2e-05, "loss": 5.5489, "step": 1178 }, { "epoch": 0.039540538274503226, "grad_norm": 0.4016398699316955, "learning_rate": 2e-05, "loss": 5.2997, "step": 1179 }, { "epoch": 0.03957407562672927, "grad_norm": 0.44836238351657093, "learning_rate": 2e-05, "loss": 5.6212, "step": 1180 }, { "epoch": 0.03960761297895531, "grad_norm": 0.4142418234517627, "learning_rate": 2e-05, "loss": 5.6926, "step": 1181 }, { "epoch": 0.03964115033118135, "grad_norm": 0.4008731840325694, "learning_rate": 2e-05, "loss": 5.5231, "step": 1182 }, { "epoch": 0.03967468768340739, "grad_norm": 0.4220185824246394, "learning_rate": 2e-05, "loss": 5.4188, "step": 1183 }, { "epoch": 0.039708225035633433, "grad_norm": 0.4378183518071253, "learning_rate": 2e-05, "loss": 5.63, "step": 1184 }, { "epoch": 0.03974176238785948, "grad_norm": 0.3952023097854801, "learning_rate": 2e-05, "loss": 5.5913, "step": 1185 }, { "epoch": 0.03977529974008552, "grad_norm": 0.4164097496114302, "learning_rate": 2e-05, "loss": 5.7194, "step": 1186 }, { "epoch": 0.03980883709231156, "grad_norm": 0.4316103961486783, "learning_rate": 2e-05, "loss": 5.433, "step": 1187 }, { "epoch": 0.0398423744445376, "grad_norm": 0.43042963799307854, "learning_rate": 2e-05, "loss": 5.4941, "step": 1188 }, { "epoch": 0.03987591179676365, "grad_norm": 0.3894131585179421, "learning_rate": 2e-05, "loss": 5.477, "step": 1189 }, { "epoch": 0.03990944914898969, "grad_norm": 0.4512315785715694, "learning_rate": 2e-05, "loss": 5.5351, "step": 1190 }, { "epoch": 0.03994298650121573, "grad_norm": 0.40932554221014344, "learning_rate": 2e-05, "loss": 5.6063, "step": 1191 }, { "epoch": 0.03997652385344177, "grad_norm": 0.40424591211564154, "learning_rate": 2e-05, "loss": 5.7637, "step": 1192 }, { "epoch": 0.040010061205667816, "grad_norm": 0.420784162064768, "learning_rate": 2e-05, "loss": 5.5349, "step": 1193 }, { "epoch": 0.040043598557893856, "grad_norm": 0.42506280156769527, "learning_rate": 2e-05, "loss": 5.3871, "step": 1194 }, { "epoch": 0.040077135910119896, "grad_norm": 0.42148618740240396, "learning_rate": 2e-05, "loss": 5.6631, "step": 1195 }, { "epoch": 0.040110673262345936, "grad_norm": 0.4284918319960121, "learning_rate": 2e-05, "loss": 5.6824, "step": 1196 }, { "epoch": 0.040144210614571976, "grad_norm": 0.4780903655566722, "learning_rate": 2e-05, "loss": 5.422, "step": 1197 }, { "epoch": 0.04017774796679802, "grad_norm": 0.41462348615571076, "learning_rate": 2e-05, "loss": 5.6001, "step": 1198 }, { "epoch": 0.040211285319024064, "grad_norm": 0.40613687093395723, "learning_rate": 2e-05, "loss": 5.657, "step": 1199 }, { "epoch": 0.040244822671250104, "grad_norm": 0.4550231692014375, "learning_rate": 2e-05, "loss": 5.621, "step": 1200 }, { "epoch": 0.040278360023476144, "grad_norm": 0.4379294436503036, "learning_rate": 2e-05, "loss": 5.5524, "step": 1201 }, { "epoch": 0.04031189737570219, "grad_norm": 0.4304961942433935, "learning_rate": 2e-05, "loss": 5.6896, "step": 1202 }, { "epoch": 0.04034543472792823, "grad_norm": 0.46091113977482034, "learning_rate": 2e-05, "loss": 5.513, "step": 1203 }, { "epoch": 0.04037897208015427, "grad_norm": 0.4360444310311269, "learning_rate": 2e-05, "loss": 5.6299, "step": 1204 }, { "epoch": 0.04041250943238031, "grad_norm": 0.43680463031075545, "learning_rate": 2e-05, "loss": 5.4554, "step": 1205 }, { "epoch": 0.04044604678460636, "grad_norm": 0.438023726116959, "learning_rate": 2e-05, "loss": 5.5229, "step": 1206 }, { "epoch": 0.0404795841368324, "grad_norm": 0.438441884698179, "learning_rate": 2e-05, "loss": 5.4892, "step": 1207 }, { "epoch": 0.04051312148905844, "grad_norm": 0.49632370466724807, "learning_rate": 2e-05, "loss": 5.505, "step": 1208 }, { "epoch": 0.04054665884128448, "grad_norm": 0.4197799093905322, "learning_rate": 2e-05, "loss": 5.4347, "step": 1209 }, { "epoch": 0.04058019619351052, "grad_norm": 0.4133822587595759, "learning_rate": 2e-05, "loss": 5.5244, "step": 1210 }, { "epoch": 0.040613733545736566, "grad_norm": 0.4307563901069197, "learning_rate": 2e-05, "loss": 5.7589, "step": 1211 }, { "epoch": 0.040647270897962606, "grad_norm": 0.40752692550396735, "learning_rate": 2e-05, "loss": 5.4671, "step": 1212 }, { "epoch": 0.040680808250188646, "grad_norm": 0.4215731248873446, "learning_rate": 2e-05, "loss": 5.4618, "step": 1213 }, { "epoch": 0.04071434560241469, "grad_norm": 0.426922043913769, "learning_rate": 2e-05, "loss": 5.6462, "step": 1214 }, { "epoch": 0.040747882954640734, "grad_norm": 0.3980784699807016, "learning_rate": 2e-05, "loss": 5.5086, "step": 1215 }, { "epoch": 0.040781420306866774, "grad_norm": 0.38373382228556074, "learning_rate": 2e-05, "loss": 5.5036, "step": 1216 }, { "epoch": 0.040814957659092814, "grad_norm": 0.43359870553782043, "learning_rate": 2e-05, "loss": 5.7389, "step": 1217 }, { "epoch": 0.040848495011318854, "grad_norm": 0.4162142783031599, "learning_rate": 2e-05, "loss": 5.6966, "step": 1218 }, { "epoch": 0.0408820323635449, "grad_norm": 0.43085612111453353, "learning_rate": 2e-05, "loss": 5.6017, "step": 1219 }, { "epoch": 0.04091556971577094, "grad_norm": 0.4087556321771782, "learning_rate": 2e-05, "loss": 5.6814, "step": 1220 }, { "epoch": 0.04094910706799698, "grad_norm": 0.40999279856427256, "learning_rate": 2e-05, "loss": 5.6237, "step": 1221 }, { "epoch": 0.04098264442022302, "grad_norm": 0.43932102531085004, "learning_rate": 2e-05, "loss": 5.5921, "step": 1222 }, { "epoch": 0.04101618177244906, "grad_norm": 0.39164766029970255, "learning_rate": 2e-05, "loss": 5.5649, "step": 1223 }, { "epoch": 0.04104971912467511, "grad_norm": 0.4427873145050048, "learning_rate": 2e-05, "loss": 5.672, "step": 1224 }, { "epoch": 0.04108325647690115, "grad_norm": 0.4604917679305682, "learning_rate": 2e-05, "loss": 5.7621, "step": 1225 }, { "epoch": 0.04111679382912719, "grad_norm": 0.4454070355936132, "learning_rate": 2e-05, "loss": 5.4546, "step": 1226 }, { "epoch": 0.04115033118135323, "grad_norm": 0.4573482375204566, "learning_rate": 2e-05, "loss": 5.6705, "step": 1227 }, { "epoch": 0.041183868533579276, "grad_norm": 0.4575344602305951, "learning_rate": 2e-05, "loss": 5.6017, "step": 1228 }, { "epoch": 0.04121740588580532, "grad_norm": 0.4917089122828211, "learning_rate": 2e-05, "loss": 5.5275, "step": 1229 }, { "epoch": 0.04125094323803136, "grad_norm": 0.4253975183175092, "learning_rate": 2e-05, "loss": 5.4202, "step": 1230 }, { "epoch": 0.0412844805902574, "grad_norm": 0.4205808561527561, "learning_rate": 2e-05, "loss": 5.4969, "step": 1231 }, { "epoch": 0.041318017942483444, "grad_norm": 0.42158236687960426, "learning_rate": 2e-05, "loss": 5.5807, "step": 1232 }, { "epoch": 0.041351555294709484, "grad_norm": 0.43803500311907834, "learning_rate": 2e-05, "loss": 5.4867, "step": 1233 }, { "epoch": 0.041385092646935524, "grad_norm": 0.4112990440380791, "learning_rate": 2e-05, "loss": 5.4902, "step": 1234 }, { "epoch": 0.041418629999161564, "grad_norm": 0.399330989403226, "learning_rate": 2e-05, "loss": 5.6231, "step": 1235 }, { "epoch": 0.041452167351387605, "grad_norm": 0.4281142628158818, "learning_rate": 2e-05, "loss": 5.4989, "step": 1236 }, { "epoch": 0.04148570470361365, "grad_norm": 0.4377375854391518, "learning_rate": 2e-05, "loss": 5.6542, "step": 1237 }, { "epoch": 0.04151924205583969, "grad_norm": 0.3947650532966134, "learning_rate": 2e-05, "loss": 5.6032, "step": 1238 }, { "epoch": 0.04155277940806573, "grad_norm": 0.4233930482437434, "learning_rate": 2e-05, "loss": 5.6644, "step": 1239 }, { "epoch": 0.04158631676029177, "grad_norm": 0.4431306404259925, "learning_rate": 2e-05, "loss": 5.6987, "step": 1240 }, { "epoch": 0.04161985411251782, "grad_norm": 0.43438439091583825, "learning_rate": 2e-05, "loss": 5.7706, "step": 1241 }, { "epoch": 0.04165339146474386, "grad_norm": 0.4054345721813111, "learning_rate": 2e-05, "loss": 5.5383, "step": 1242 }, { "epoch": 0.0416869288169699, "grad_norm": 0.4176558735106743, "learning_rate": 2e-05, "loss": 5.4481, "step": 1243 }, { "epoch": 0.04172046616919594, "grad_norm": 0.44236151598115786, "learning_rate": 2e-05, "loss": 5.6675, "step": 1244 }, { "epoch": 0.04175400352142199, "grad_norm": 0.43564245311786476, "learning_rate": 2e-05, "loss": 5.4832, "step": 1245 }, { "epoch": 0.04178754087364803, "grad_norm": 0.394236198681534, "learning_rate": 2e-05, "loss": 5.5485, "step": 1246 }, { "epoch": 0.04182107822587407, "grad_norm": 0.4093671913119599, "learning_rate": 2e-05, "loss": 5.5573, "step": 1247 }, { "epoch": 0.04185461557810011, "grad_norm": 0.4132601005981471, "learning_rate": 2e-05, "loss": 5.591, "step": 1248 }, { "epoch": 0.04188815293032615, "grad_norm": 0.4530477668052692, "learning_rate": 2e-05, "loss": 5.5075, "step": 1249 }, { "epoch": 0.041921690282552194, "grad_norm": 0.38712315008495585, "learning_rate": 2e-05, "loss": 5.5946, "step": 1250 }, { "epoch": 0.041955227634778235, "grad_norm": 0.4653588771891122, "learning_rate": 2e-05, "loss": 5.5849, "step": 1251 }, { "epoch": 0.041988764987004275, "grad_norm": 0.4162373520906531, "learning_rate": 2e-05, "loss": 5.3696, "step": 1252 }, { "epoch": 0.042022302339230315, "grad_norm": 0.4106036242372799, "learning_rate": 2e-05, "loss": 5.5434, "step": 1253 }, { "epoch": 0.04205583969145636, "grad_norm": 0.4176656443859146, "learning_rate": 2e-05, "loss": 5.7817, "step": 1254 }, { "epoch": 0.0420893770436824, "grad_norm": 0.41510605017797086, "learning_rate": 2e-05, "loss": 5.5262, "step": 1255 }, { "epoch": 0.04212291439590844, "grad_norm": 0.43073612403167333, "learning_rate": 2e-05, "loss": 5.6579, "step": 1256 }, { "epoch": 0.04215645174813448, "grad_norm": 0.4027470308635825, "learning_rate": 2e-05, "loss": 5.3955, "step": 1257 }, { "epoch": 0.04218998910036053, "grad_norm": 0.42032596624469837, "learning_rate": 2e-05, "loss": 5.6123, "step": 1258 }, { "epoch": 0.04222352645258657, "grad_norm": 0.4403492521389937, "learning_rate": 2e-05, "loss": 5.7192, "step": 1259 }, { "epoch": 0.04225706380481261, "grad_norm": 0.4458079561412784, "learning_rate": 2e-05, "loss": 5.5136, "step": 1260 }, { "epoch": 0.04229060115703865, "grad_norm": 0.41620015728146287, "learning_rate": 2e-05, "loss": 5.6556, "step": 1261 }, { "epoch": 0.04232413850926469, "grad_norm": 0.44233570047180865, "learning_rate": 2e-05, "loss": 5.8037, "step": 1262 }, { "epoch": 0.04235767586149074, "grad_norm": 0.4287401500776288, "learning_rate": 2e-05, "loss": 5.7547, "step": 1263 }, { "epoch": 0.04239121321371678, "grad_norm": 0.42073725301110154, "learning_rate": 2e-05, "loss": 5.74, "step": 1264 }, { "epoch": 0.04242475056594282, "grad_norm": 0.41519196366498023, "learning_rate": 2e-05, "loss": 5.8064, "step": 1265 }, { "epoch": 0.04245828791816886, "grad_norm": 0.4157930560145068, "learning_rate": 2e-05, "loss": 5.5536, "step": 1266 }, { "epoch": 0.042491825270394905, "grad_norm": 0.40657560142035476, "learning_rate": 2e-05, "loss": 5.3472, "step": 1267 }, { "epoch": 0.042525362622620945, "grad_norm": 0.4124669524972923, "learning_rate": 2e-05, "loss": 5.6388, "step": 1268 }, { "epoch": 0.042558899974846985, "grad_norm": 0.4342495746153314, "learning_rate": 2e-05, "loss": 5.5106, "step": 1269 }, { "epoch": 0.042592437327073025, "grad_norm": 0.418928094636035, "learning_rate": 2e-05, "loss": 5.6342, "step": 1270 }, { "epoch": 0.04262597467929907, "grad_norm": 0.4258846507762033, "learning_rate": 2e-05, "loss": 5.4717, "step": 1271 }, { "epoch": 0.04265951203152511, "grad_norm": 0.4352825649199081, "learning_rate": 2e-05, "loss": 5.614, "step": 1272 }, { "epoch": 0.04269304938375115, "grad_norm": 0.4346723339058844, "learning_rate": 2e-05, "loss": 5.646, "step": 1273 }, { "epoch": 0.04272658673597719, "grad_norm": 0.43558266000949947, "learning_rate": 2e-05, "loss": 5.6179, "step": 1274 }, { "epoch": 0.04276012408820323, "grad_norm": 0.41605614752952963, "learning_rate": 2e-05, "loss": 5.5388, "step": 1275 }, { "epoch": 0.04279366144042928, "grad_norm": 0.40189476827010423, "learning_rate": 2e-05, "loss": 5.6682, "step": 1276 }, { "epoch": 0.04282719879265532, "grad_norm": 0.4182446342438898, "learning_rate": 2e-05, "loss": 5.7243, "step": 1277 }, { "epoch": 0.04286073614488136, "grad_norm": 0.43384679868296366, "learning_rate": 2e-05, "loss": 5.5884, "step": 1278 }, { "epoch": 0.0428942734971074, "grad_norm": 0.4041930582405221, "learning_rate": 2e-05, "loss": 5.4173, "step": 1279 }, { "epoch": 0.04292781084933345, "grad_norm": 0.44765960086949813, "learning_rate": 2e-05, "loss": 5.6183, "step": 1280 }, { "epoch": 0.04296134820155949, "grad_norm": 0.42298896533019414, "learning_rate": 2e-05, "loss": 5.7545, "step": 1281 }, { "epoch": 0.04299488555378553, "grad_norm": 0.4466379012887922, "learning_rate": 2e-05, "loss": 5.439, "step": 1282 }, { "epoch": 0.04302842290601157, "grad_norm": 0.4364812178952803, "learning_rate": 2e-05, "loss": 5.6652, "step": 1283 }, { "epoch": 0.043061960258237615, "grad_norm": 0.4341971439312575, "learning_rate": 2e-05, "loss": 5.4533, "step": 1284 }, { "epoch": 0.043095497610463655, "grad_norm": 0.4077338034954507, "learning_rate": 2e-05, "loss": 5.619, "step": 1285 }, { "epoch": 0.043129034962689695, "grad_norm": 0.42786214928833505, "learning_rate": 2e-05, "loss": 5.6042, "step": 1286 }, { "epoch": 0.043162572314915736, "grad_norm": 0.41672907080204863, "learning_rate": 2e-05, "loss": 5.5714, "step": 1287 }, { "epoch": 0.043196109667141776, "grad_norm": 0.4225381511006214, "learning_rate": 2e-05, "loss": 5.469, "step": 1288 }, { "epoch": 0.04322964701936782, "grad_norm": 0.41764870904386775, "learning_rate": 2e-05, "loss": 5.5193, "step": 1289 }, { "epoch": 0.04326318437159386, "grad_norm": 0.419269715843823, "learning_rate": 2e-05, "loss": 5.4913, "step": 1290 }, { "epoch": 0.0432967217238199, "grad_norm": 0.4593870052951466, "learning_rate": 2e-05, "loss": 5.3018, "step": 1291 }, { "epoch": 0.04333025907604594, "grad_norm": 0.4086946782184082, "learning_rate": 2e-05, "loss": 5.4749, "step": 1292 }, { "epoch": 0.04336379642827199, "grad_norm": 0.4092788560752716, "learning_rate": 2e-05, "loss": 5.5809, "step": 1293 }, { "epoch": 0.04339733378049803, "grad_norm": 0.4476933704138071, "learning_rate": 2e-05, "loss": 5.4582, "step": 1294 }, { "epoch": 0.04343087113272407, "grad_norm": 0.43015941355834264, "learning_rate": 2e-05, "loss": 5.3598, "step": 1295 }, { "epoch": 0.04346440848495011, "grad_norm": 0.426029492116592, "learning_rate": 2e-05, "loss": 5.45, "step": 1296 }, { "epoch": 0.04349794583717616, "grad_norm": 0.4118396762242395, "learning_rate": 2e-05, "loss": 5.5778, "step": 1297 }, { "epoch": 0.0435314831894022, "grad_norm": 0.42721709971267113, "learning_rate": 2e-05, "loss": 5.6245, "step": 1298 }, { "epoch": 0.04356502054162824, "grad_norm": 0.427797072241741, "learning_rate": 2e-05, "loss": 5.4724, "step": 1299 }, { "epoch": 0.04359855789385428, "grad_norm": 0.40334208509197533, "learning_rate": 2e-05, "loss": 5.772, "step": 1300 }, { "epoch": 0.04363209524608032, "grad_norm": 0.4270386905791663, "learning_rate": 2e-05, "loss": 5.59, "step": 1301 }, { "epoch": 0.043665632598306366, "grad_norm": 0.41634192013164123, "learning_rate": 2e-05, "loss": 5.3694, "step": 1302 }, { "epoch": 0.043699169950532406, "grad_norm": 0.41558120760154377, "learning_rate": 2e-05, "loss": 5.4043, "step": 1303 }, { "epoch": 0.043732707302758446, "grad_norm": 0.4317022766112117, "learning_rate": 2e-05, "loss": 5.5043, "step": 1304 }, { "epoch": 0.043766244654984486, "grad_norm": 0.43344651705477194, "learning_rate": 2e-05, "loss": 5.4384, "step": 1305 }, { "epoch": 0.04379978200721053, "grad_norm": 0.41933363554487674, "learning_rate": 2e-05, "loss": 5.6471, "step": 1306 }, { "epoch": 0.04383331935943657, "grad_norm": 0.41833933832023723, "learning_rate": 2e-05, "loss": 5.725, "step": 1307 }, { "epoch": 0.04386685671166261, "grad_norm": 0.4485765882793724, "learning_rate": 2e-05, "loss": 5.791, "step": 1308 }, { "epoch": 0.043900394063888654, "grad_norm": 0.4148495734481283, "learning_rate": 2e-05, "loss": 5.5197, "step": 1309 }, { "epoch": 0.0439339314161147, "grad_norm": 0.449085238339387, "learning_rate": 2e-05, "loss": 5.6091, "step": 1310 }, { "epoch": 0.04396746876834074, "grad_norm": 0.4564644986753283, "learning_rate": 2e-05, "loss": 5.7096, "step": 1311 }, { "epoch": 0.04400100612056678, "grad_norm": 0.41034423590894786, "learning_rate": 2e-05, "loss": 5.6894, "step": 1312 }, { "epoch": 0.04403454347279282, "grad_norm": 0.43398508601876706, "learning_rate": 2e-05, "loss": 5.4101, "step": 1313 }, { "epoch": 0.04406808082501886, "grad_norm": 0.4361550498870271, "learning_rate": 2e-05, "loss": 5.6278, "step": 1314 }, { "epoch": 0.04410161817724491, "grad_norm": 0.43702248834078306, "learning_rate": 2e-05, "loss": 5.6682, "step": 1315 }, { "epoch": 0.04413515552947095, "grad_norm": 0.40763989736188183, "learning_rate": 2e-05, "loss": 5.5491, "step": 1316 }, { "epoch": 0.04416869288169699, "grad_norm": 0.4125870482119394, "learning_rate": 2e-05, "loss": 5.4345, "step": 1317 }, { "epoch": 0.04420223023392303, "grad_norm": 0.43019802382872674, "learning_rate": 2e-05, "loss": 5.5794, "step": 1318 }, { "epoch": 0.044235767586149076, "grad_norm": 0.4282323982035443, "learning_rate": 2e-05, "loss": 5.6607, "step": 1319 }, { "epoch": 0.044269304938375116, "grad_norm": 0.42515145882130156, "learning_rate": 2e-05, "loss": 5.4672, "step": 1320 }, { "epoch": 0.044302842290601156, "grad_norm": 0.4669312614037001, "learning_rate": 2e-05, "loss": 5.6221, "step": 1321 }, { "epoch": 0.044336379642827196, "grad_norm": 0.4317732171553968, "learning_rate": 2e-05, "loss": 5.7827, "step": 1322 }, { "epoch": 0.044369916995053243, "grad_norm": 0.40920587862367686, "learning_rate": 2e-05, "loss": 5.7092, "step": 1323 }, { "epoch": 0.044403454347279284, "grad_norm": 0.41959993804512424, "learning_rate": 2e-05, "loss": 5.4342, "step": 1324 }, { "epoch": 0.044436991699505324, "grad_norm": 0.43744440059565, "learning_rate": 2e-05, "loss": 5.4988, "step": 1325 }, { "epoch": 0.044470529051731364, "grad_norm": 0.4271400008525316, "learning_rate": 2e-05, "loss": 5.5766, "step": 1326 }, { "epoch": 0.044504066403957404, "grad_norm": 0.41390160181686586, "learning_rate": 2e-05, "loss": 5.5158, "step": 1327 }, { "epoch": 0.04453760375618345, "grad_norm": 0.4100629357335492, "learning_rate": 2e-05, "loss": 5.5995, "step": 1328 }, { "epoch": 0.04457114110840949, "grad_norm": 0.43327835693184796, "learning_rate": 2e-05, "loss": 5.5494, "step": 1329 }, { "epoch": 0.04460467846063553, "grad_norm": 0.41524346943377577, "learning_rate": 2e-05, "loss": 5.4948, "step": 1330 }, { "epoch": 0.04463821581286157, "grad_norm": 0.4131755500263766, "learning_rate": 2e-05, "loss": 5.5866, "step": 1331 }, { "epoch": 0.04467175316508762, "grad_norm": 0.45684813444215694, "learning_rate": 2e-05, "loss": 5.3731, "step": 1332 }, { "epoch": 0.04470529051731366, "grad_norm": 0.4028079529778214, "learning_rate": 2e-05, "loss": 5.6399, "step": 1333 }, { "epoch": 0.0447388278695397, "grad_norm": 0.4515120957556248, "learning_rate": 2e-05, "loss": 5.8813, "step": 1334 }, { "epoch": 0.04477236522176574, "grad_norm": 0.4219507253589975, "learning_rate": 2e-05, "loss": 5.6575, "step": 1335 }, { "epoch": 0.044805902573991786, "grad_norm": 0.41106922880037183, "learning_rate": 2e-05, "loss": 5.6362, "step": 1336 }, { "epoch": 0.044839439926217826, "grad_norm": 0.4940325483410389, "learning_rate": 2e-05, "loss": 5.6728, "step": 1337 }, { "epoch": 0.04487297727844387, "grad_norm": 0.4506381885304596, "learning_rate": 2e-05, "loss": 5.3622, "step": 1338 }, { "epoch": 0.04490651463066991, "grad_norm": 0.3934647153253231, "learning_rate": 2e-05, "loss": 5.6108, "step": 1339 }, { "epoch": 0.044940051982895954, "grad_norm": 0.439250988975825, "learning_rate": 2e-05, "loss": 5.5409, "step": 1340 }, { "epoch": 0.044973589335121994, "grad_norm": 0.4485323373604621, "learning_rate": 2e-05, "loss": 5.3746, "step": 1341 }, { "epoch": 0.045007126687348034, "grad_norm": 0.4121228035040473, "learning_rate": 2e-05, "loss": 5.6194, "step": 1342 }, { "epoch": 0.045040664039574074, "grad_norm": 0.40525553186064023, "learning_rate": 2e-05, "loss": 5.662, "step": 1343 }, { "epoch": 0.045074201391800114, "grad_norm": 0.44459240255392435, "learning_rate": 2e-05, "loss": 5.4974, "step": 1344 }, { "epoch": 0.04510773874402616, "grad_norm": 0.4007483017312091, "learning_rate": 2e-05, "loss": 5.6672, "step": 1345 }, { "epoch": 0.0451412760962522, "grad_norm": 0.40428044966759996, "learning_rate": 2e-05, "loss": 5.5131, "step": 1346 }, { "epoch": 0.04517481344847824, "grad_norm": 0.444344452818634, "learning_rate": 2e-05, "loss": 5.6108, "step": 1347 }, { "epoch": 0.04520835080070428, "grad_norm": 0.4068814809383715, "learning_rate": 2e-05, "loss": 5.6106, "step": 1348 }, { "epoch": 0.04524188815293033, "grad_norm": 0.41193967087097827, "learning_rate": 2e-05, "loss": 5.6536, "step": 1349 }, { "epoch": 0.04527542550515637, "grad_norm": 0.41307937131897315, "learning_rate": 2e-05, "loss": 5.4657, "step": 1350 }, { "epoch": 0.04530896285738241, "grad_norm": 0.40794898743395724, "learning_rate": 2e-05, "loss": 5.5145, "step": 1351 }, { "epoch": 0.04534250020960845, "grad_norm": 0.41405015120818356, "learning_rate": 2e-05, "loss": 5.4811, "step": 1352 }, { "epoch": 0.0453760375618345, "grad_norm": 0.4189577839726422, "learning_rate": 2e-05, "loss": 5.4315, "step": 1353 }, { "epoch": 0.04540957491406054, "grad_norm": 0.39931007552470665, "learning_rate": 2e-05, "loss": 5.7029, "step": 1354 }, { "epoch": 0.04544311226628658, "grad_norm": 0.4393884833654123, "learning_rate": 2e-05, "loss": 5.5908, "step": 1355 }, { "epoch": 0.04547664961851262, "grad_norm": 0.452972130010981, "learning_rate": 2e-05, "loss": 5.5273, "step": 1356 }, { "epoch": 0.04551018697073866, "grad_norm": 0.397241559326721, "learning_rate": 2e-05, "loss": 5.6123, "step": 1357 }, { "epoch": 0.045543724322964704, "grad_norm": 0.4414199830054394, "learning_rate": 2e-05, "loss": 5.6653, "step": 1358 }, { "epoch": 0.045577261675190744, "grad_norm": 0.4355776205412131, "learning_rate": 2e-05, "loss": 5.5285, "step": 1359 }, { "epoch": 0.045610799027416785, "grad_norm": 0.4149460066897663, "learning_rate": 2e-05, "loss": 5.7197, "step": 1360 }, { "epoch": 0.045644336379642825, "grad_norm": 0.42715178465406484, "learning_rate": 2e-05, "loss": 5.514, "step": 1361 }, { "epoch": 0.04567787373186887, "grad_norm": 0.4223956608997509, "learning_rate": 2e-05, "loss": 5.4842, "step": 1362 }, { "epoch": 0.04571141108409491, "grad_norm": 0.41580171728040194, "learning_rate": 2e-05, "loss": 5.6611, "step": 1363 }, { "epoch": 0.04574494843632095, "grad_norm": 0.4295143013789495, "learning_rate": 2e-05, "loss": 5.5795, "step": 1364 }, { "epoch": 0.04577848578854699, "grad_norm": 0.417306228945085, "learning_rate": 2e-05, "loss": 5.6415, "step": 1365 }, { "epoch": 0.04581202314077304, "grad_norm": 0.4274419336398622, "learning_rate": 2e-05, "loss": 5.6453, "step": 1366 }, { "epoch": 0.04584556049299908, "grad_norm": 0.45111119490331764, "learning_rate": 2e-05, "loss": 5.4763, "step": 1367 }, { "epoch": 0.04587909784522512, "grad_norm": 0.43303111358429325, "learning_rate": 2e-05, "loss": 5.5218, "step": 1368 }, { "epoch": 0.04591263519745116, "grad_norm": 0.48603084917316913, "learning_rate": 2e-05, "loss": 5.392, "step": 1369 }, { "epoch": 0.0459461725496772, "grad_norm": 0.4035584110845077, "learning_rate": 2e-05, "loss": 5.5302, "step": 1370 }, { "epoch": 0.04597970990190325, "grad_norm": 0.42468052811681267, "learning_rate": 2e-05, "loss": 5.7021, "step": 1371 }, { "epoch": 0.04601324725412929, "grad_norm": 0.4387643342083084, "learning_rate": 2e-05, "loss": 5.6171, "step": 1372 }, { "epoch": 0.04604678460635533, "grad_norm": 0.43792964949982693, "learning_rate": 2e-05, "loss": 5.6516, "step": 1373 }, { "epoch": 0.04608032195858137, "grad_norm": 0.4107980559031863, "learning_rate": 2e-05, "loss": 5.5463, "step": 1374 }, { "epoch": 0.046113859310807415, "grad_norm": 0.40910895128197755, "learning_rate": 2e-05, "loss": 5.5436, "step": 1375 }, { "epoch": 0.046147396663033455, "grad_norm": 0.41476690974899877, "learning_rate": 2e-05, "loss": 5.6335, "step": 1376 }, { "epoch": 0.046180934015259495, "grad_norm": 0.3985257577913402, "learning_rate": 2e-05, "loss": 5.7601, "step": 1377 }, { "epoch": 0.046214471367485535, "grad_norm": 0.4149895195832963, "learning_rate": 2e-05, "loss": 5.3762, "step": 1378 }, { "epoch": 0.04624800871971158, "grad_norm": 0.410028980615095, "learning_rate": 2e-05, "loss": 5.3803, "step": 1379 }, { "epoch": 0.04628154607193762, "grad_norm": 0.40420105674981, "learning_rate": 2e-05, "loss": 5.5547, "step": 1380 }, { "epoch": 0.04631508342416366, "grad_norm": 0.40409107504584807, "learning_rate": 2e-05, "loss": 5.5948, "step": 1381 }, { "epoch": 0.0463486207763897, "grad_norm": 0.418228707980076, "learning_rate": 2e-05, "loss": 5.48, "step": 1382 }, { "epoch": 0.04638215812861574, "grad_norm": 0.4080367601359207, "learning_rate": 2e-05, "loss": 5.5086, "step": 1383 }, { "epoch": 0.04641569548084179, "grad_norm": 0.4305506982174508, "learning_rate": 2e-05, "loss": 5.3943, "step": 1384 }, { "epoch": 0.04644923283306783, "grad_norm": 0.4287160770809177, "learning_rate": 2e-05, "loss": 5.6314, "step": 1385 }, { "epoch": 0.04648277018529387, "grad_norm": 0.40160896374309824, "learning_rate": 2e-05, "loss": 5.6696, "step": 1386 }, { "epoch": 0.04651630753751991, "grad_norm": 0.42634952093850476, "learning_rate": 2e-05, "loss": 5.5965, "step": 1387 }, { "epoch": 0.04654984488974596, "grad_norm": 0.41946640442224764, "learning_rate": 2e-05, "loss": 5.5763, "step": 1388 }, { "epoch": 0.046583382241972, "grad_norm": 0.470074224675147, "learning_rate": 2e-05, "loss": 5.6237, "step": 1389 }, { "epoch": 0.04661691959419804, "grad_norm": 0.41702518556403495, "learning_rate": 2e-05, "loss": 5.5876, "step": 1390 }, { "epoch": 0.04665045694642408, "grad_norm": 0.42422546871482886, "learning_rate": 2e-05, "loss": 5.3798, "step": 1391 }, { "epoch": 0.046683994298650125, "grad_norm": 0.4160108808912796, "learning_rate": 2e-05, "loss": 5.5147, "step": 1392 }, { "epoch": 0.046717531650876165, "grad_norm": 0.41367848253550155, "learning_rate": 2e-05, "loss": 5.6637, "step": 1393 }, { "epoch": 0.046751069003102205, "grad_norm": 0.40903200048340566, "learning_rate": 2e-05, "loss": 5.6442, "step": 1394 }, { "epoch": 0.046784606355328245, "grad_norm": 0.41818787541425073, "learning_rate": 2e-05, "loss": 5.5609, "step": 1395 }, { "epoch": 0.046818143707554286, "grad_norm": 0.4341680254088455, "learning_rate": 2e-05, "loss": 5.6455, "step": 1396 }, { "epoch": 0.04685168105978033, "grad_norm": 0.42538462005168887, "learning_rate": 2e-05, "loss": 5.6083, "step": 1397 }, { "epoch": 0.04688521841200637, "grad_norm": 0.4529861837824776, "learning_rate": 2e-05, "loss": 5.595, "step": 1398 }, { "epoch": 0.04691875576423241, "grad_norm": 0.41219692849110146, "learning_rate": 2e-05, "loss": 5.7518, "step": 1399 }, { "epoch": 0.04695229311645845, "grad_norm": 0.39889139074942653, "learning_rate": 2e-05, "loss": 5.5984, "step": 1400 }, { "epoch": 0.0469858304686845, "grad_norm": 0.38495794557263674, "learning_rate": 2e-05, "loss": 5.5345, "step": 1401 }, { "epoch": 0.04701936782091054, "grad_norm": 0.4098104494521997, "learning_rate": 2e-05, "loss": 5.5484, "step": 1402 }, { "epoch": 0.04705290517313658, "grad_norm": 0.4222443746303844, "learning_rate": 2e-05, "loss": 5.6147, "step": 1403 }, { "epoch": 0.04708644252536262, "grad_norm": 0.412849535862194, "learning_rate": 2e-05, "loss": 5.701, "step": 1404 }, { "epoch": 0.04711997987758867, "grad_norm": 0.41521554957816387, "learning_rate": 2e-05, "loss": 5.5217, "step": 1405 }, { "epoch": 0.04715351722981471, "grad_norm": 0.45144139773379877, "learning_rate": 2e-05, "loss": 5.6782, "step": 1406 }, { "epoch": 0.04718705458204075, "grad_norm": 0.4141854678340044, "learning_rate": 2e-05, "loss": 5.6454, "step": 1407 }, { "epoch": 0.04722059193426679, "grad_norm": 0.4158165096540621, "learning_rate": 2e-05, "loss": 5.8019, "step": 1408 }, { "epoch": 0.04725412928649283, "grad_norm": 0.40537766539855286, "learning_rate": 2e-05, "loss": 5.6053, "step": 1409 }, { "epoch": 0.047287666638718875, "grad_norm": 0.4180071077402622, "learning_rate": 2e-05, "loss": 5.568, "step": 1410 }, { "epoch": 0.047321203990944916, "grad_norm": 0.4260177880804716, "learning_rate": 2e-05, "loss": 5.6024, "step": 1411 }, { "epoch": 0.047354741343170956, "grad_norm": 0.4157727358419012, "learning_rate": 2e-05, "loss": 5.5363, "step": 1412 }, { "epoch": 0.047388278695396996, "grad_norm": 0.4116690040610287, "learning_rate": 2e-05, "loss": 5.4725, "step": 1413 }, { "epoch": 0.04742181604762304, "grad_norm": 0.3888285296670207, "learning_rate": 2e-05, "loss": 5.4793, "step": 1414 }, { "epoch": 0.04745535339984908, "grad_norm": 0.4199846698150925, "learning_rate": 2e-05, "loss": 5.5801, "step": 1415 }, { "epoch": 0.04748889075207512, "grad_norm": 0.3953098714414189, "learning_rate": 2e-05, "loss": 5.4342, "step": 1416 }, { "epoch": 0.04752242810430116, "grad_norm": 0.44808097747282316, "learning_rate": 2e-05, "loss": 5.465, "step": 1417 }, { "epoch": 0.04755596545652721, "grad_norm": 0.42085543594619784, "learning_rate": 2e-05, "loss": 5.5578, "step": 1418 }, { "epoch": 0.04758950280875325, "grad_norm": 0.3996814691367169, "learning_rate": 2e-05, "loss": 5.4945, "step": 1419 }, { "epoch": 0.04762304016097929, "grad_norm": 0.4158496841039851, "learning_rate": 2e-05, "loss": 5.4364, "step": 1420 }, { "epoch": 0.04765657751320533, "grad_norm": 0.4399717284421344, "learning_rate": 2e-05, "loss": 5.5079, "step": 1421 }, { "epoch": 0.04769011486543137, "grad_norm": 0.4376772425456542, "learning_rate": 2e-05, "loss": 5.5631, "step": 1422 }, { "epoch": 0.04772365221765742, "grad_norm": 0.39793327945221835, "learning_rate": 2e-05, "loss": 5.5764, "step": 1423 }, { "epoch": 0.04775718956988346, "grad_norm": 0.3940772428861867, "learning_rate": 2e-05, "loss": 5.693, "step": 1424 }, { "epoch": 0.0477907269221095, "grad_norm": 0.43115129486269593, "learning_rate": 2e-05, "loss": 5.5974, "step": 1425 }, { "epoch": 0.04782426427433554, "grad_norm": 0.4557042694042442, "learning_rate": 2e-05, "loss": 5.48, "step": 1426 }, { "epoch": 0.047857801626561586, "grad_norm": 0.47221307117216194, "learning_rate": 2e-05, "loss": 5.5249, "step": 1427 }, { "epoch": 0.047891338978787626, "grad_norm": 0.43592109753709607, "learning_rate": 2e-05, "loss": 5.411, "step": 1428 }, { "epoch": 0.047924876331013666, "grad_norm": 0.482343346598498, "learning_rate": 2e-05, "loss": 5.5983, "step": 1429 }, { "epoch": 0.047958413683239706, "grad_norm": 0.42019391083914664, "learning_rate": 2e-05, "loss": 5.5374, "step": 1430 }, { "epoch": 0.04799195103546575, "grad_norm": 0.4326010413145501, "learning_rate": 2e-05, "loss": 5.6236, "step": 1431 }, { "epoch": 0.04802548838769179, "grad_norm": 0.4202667553262686, "learning_rate": 2e-05, "loss": 5.66, "step": 1432 }, { "epoch": 0.048059025739917834, "grad_norm": 0.3984081825834539, "learning_rate": 2e-05, "loss": 5.5848, "step": 1433 }, { "epoch": 0.048092563092143874, "grad_norm": 0.44616357340339724, "learning_rate": 2e-05, "loss": 5.56, "step": 1434 }, { "epoch": 0.048126100444369914, "grad_norm": 0.40271765341291543, "learning_rate": 2e-05, "loss": 5.693, "step": 1435 }, { "epoch": 0.04815963779659596, "grad_norm": 0.42306964427445914, "learning_rate": 2e-05, "loss": 5.5593, "step": 1436 }, { "epoch": 0.048193175148822, "grad_norm": 0.41842736147393245, "learning_rate": 2e-05, "loss": 5.7073, "step": 1437 }, { "epoch": 0.04822671250104804, "grad_norm": 0.4208477902790598, "learning_rate": 2e-05, "loss": 5.5957, "step": 1438 }, { "epoch": 0.04826024985327408, "grad_norm": 0.4108780882605941, "learning_rate": 2e-05, "loss": 5.4168, "step": 1439 }, { "epoch": 0.04829378720550013, "grad_norm": 0.5293726776520606, "learning_rate": 2e-05, "loss": 5.725, "step": 1440 }, { "epoch": 0.04832732455772617, "grad_norm": 0.4367770195042297, "learning_rate": 2e-05, "loss": 5.4321, "step": 1441 }, { "epoch": 0.04836086190995221, "grad_norm": 0.426069502594961, "learning_rate": 2e-05, "loss": 5.72, "step": 1442 }, { "epoch": 0.04839439926217825, "grad_norm": 0.41237353834551904, "learning_rate": 2e-05, "loss": 5.6463, "step": 1443 }, { "epoch": 0.048427936614404296, "grad_norm": 0.4154562193872237, "learning_rate": 2e-05, "loss": 5.5304, "step": 1444 }, { "epoch": 0.048461473966630336, "grad_norm": 0.44826049334928497, "learning_rate": 2e-05, "loss": 5.4306, "step": 1445 }, { "epoch": 0.048495011318856376, "grad_norm": 0.41995568226725755, "learning_rate": 2e-05, "loss": 5.5198, "step": 1446 }, { "epoch": 0.04852854867108242, "grad_norm": 0.42076315054974794, "learning_rate": 2e-05, "loss": 5.5617, "step": 1447 }, { "epoch": 0.04856208602330846, "grad_norm": 0.4842273057733559, "learning_rate": 2e-05, "loss": 5.499, "step": 1448 }, { "epoch": 0.048595623375534504, "grad_norm": 0.44278397645494194, "learning_rate": 2e-05, "loss": 5.5226, "step": 1449 }, { "epoch": 0.048629160727760544, "grad_norm": 0.4142212538725314, "learning_rate": 2e-05, "loss": 5.5231, "step": 1450 }, { "epoch": 0.048662698079986584, "grad_norm": 0.423660782287731, "learning_rate": 2e-05, "loss": 5.464, "step": 1451 }, { "epoch": 0.048696235432212624, "grad_norm": 0.4049520620024788, "learning_rate": 2e-05, "loss": 5.6249, "step": 1452 }, { "epoch": 0.04872977278443867, "grad_norm": 0.4518770390457579, "learning_rate": 2e-05, "loss": 5.4728, "step": 1453 }, { "epoch": 0.04876331013666471, "grad_norm": 0.41153834878332324, "learning_rate": 2e-05, "loss": 5.4829, "step": 1454 }, { "epoch": 0.04879684748889075, "grad_norm": 0.4479778982271905, "learning_rate": 2e-05, "loss": 5.4433, "step": 1455 }, { "epoch": 0.04883038484111679, "grad_norm": 0.4163313355763931, "learning_rate": 2e-05, "loss": 5.5445, "step": 1456 }, { "epoch": 0.04886392219334284, "grad_norm": 0.6073666549999985, "learning_rate": 2e-05, "loss": 5.5767, "step": 1457 }, { "epoch": 0.04889745954556888, "grad_norm": 0.43114577251277975, "learning_rate": 2e-05, "loss": 5.4014, "step": 1458 }, { "epoch": 0.04893099689779492, "grad_norm": 0.44864861466448375, "learning_rate": 2e-05, "loss": 5.6575, "step": 1459 }, { "epoch": 0.04896453425002096, "grad_norm": 0.4308118747997688, "learning_rate": 2e-05, "loss": 5.4031, "step": 1460 }, { "epoch": 0.048998071602247, "grad_norm": 0.44889565701023426, "learning_rate": 2e-05, "loss": 5.6243, "step": 1461 }, { "epoch": 0.04903160895447305, "grad_norm": 0.4293312920644442, "learning_rate": 2e-05, "loss": 5.5464, "step": 1462 }, { "epoch": 0.04906514630669909, "grad_norm": 0.4877418400215325, "learning_rate": 2e-05, "loss": 5.3503, "step": 1463 }, { "epoch": 0.04909868365892513, "grad_norm": 0.3945345065395941, "learning_rate": 2e-05, "loss": 5.762, "step": 1464 }, { "epoch": 0.04913222101115117, "grad_norm": 0.44485605294977226, "learning_rate": 2e-05, "loss": 5.659, "step": 1465 }, { "epoch": 0.049165758363377214, "grad_norm": 0.4716008655719468, "learning_rate": 2e-05, "loss": 5.4186, "step": 1466 }, { "epoch": 0.049199295715603254, "grad_norm": 0.42974525809713376, "learning_rate": 2e-05, "loss": 5.6841, "step": 1467 }, { "epoch": 0.049232833067829294, "grad_norm": 0.5132587829632138, "learning_rate": 2e-05, "loss": 5.5343, "step": 1468 }, { "epoch": 0.049266370420055335, "grad_norm": 0.44070107915585327, "learning_rate": 2e-05, "loss": 5.5519, "step": 1469 }, { "epoch": 0.04929990777228138, "grad_norm": 0.49044506010765543, "learning_rate": 2e-05, "loss": 5.7061, "step": 1470 }, { "epoch": 0.04933344512450742, "grad_norm": 0.4161727556485931, "learning_rate": 2e-05, "loss": 5.4406, "step": 1471 }, { "epoch": 0.04936698247673346, "grad_norm": 0.44311770321143934, "learning_rate": 2e-05, "loss": 5.404, "step": 1472 }, { "epoch": 0.0494005198289595, "grad_norm": 0.4273790927258203, "learning_rate": 2e-05, "loss": 5.6315, "step": 1473 }, { "epoch": 0.04943405718118554, "grad_norm": 0.4060157127609856, "learning_rate": 2e-05, "loss": 5.6224, "step": 1474 }, { "epoch": 0.04946759453341159, "grad_norm": 0.4596232705770731, "learning_rate": 2e-05, "loss": 5.5633, "step": 1475 }, { "epoch": 0.04950113188563763, "grad_norm": 0.4114845952830872, "learning_rate": 2e-05, "loss": 5.7668, "step": 1476 }, { "epoch": 0.04953466923786367, "grad_norm": 0.40826967754238064, "learning_rate": 2e-05, "loss": 5.5661, "step": 1477 }, { "epoch": 0.04956820659008971, "grad_norm": 0.4183160107730872, "learning_rate": 2e-05, "loss": 5.3923, "step": 1478 }, { "epoch": 0.04960174394231576, "grad_norm": 0.4147748902777679, "learning_rate": 2e-05, "loss": 5.6524, "step": 1479 }, { "epoch": 0.0496352812945418, "grad_norm": 0.4573592822677254, "learning_rate": 2e-05, "loss": 5.4806, "step": 1480 }, { "epoch": 0.04966881864676784, "grad_norm": 0.42270203417342406, "learning_rate": 2e-05, "loss": 5.6712, "step": 1481 }, { "epoch": 0.04970235599899388, "grad_norm": 0.42951285710412296, "learning_rate": 2e-05, "loss": 5.5936, "step": 1482 }, { "epoch": 0.049735893351219924, "grad_norm": 0.4105811256460354, "learning_rate": 2e-05, "loss": 5.6804, "step": 1483 }, { "epoch": 0.049769430703445965, "grad_norm": 0.4461051013598289, "learning_rate": 2e-05, "loss": 5.433, "step": 1484 }, { "epoch": 0.049802968055672005, "grad_norm": 0.39942516457266103, "learning_rate": 2e-05, "loss": 5.6499, "step": 1485 }, { "epoch": 0.049836505407898045, "grad_norm": 0.4424814479422008, "learning_rate": 2e-05, "loss": 5.5536, "step": 1486 }, { "epoch": 0.049870042760124085, "grad_norm": 0.44784419530339664, "learning_rate": 2e-05, "loss": 5.4371, "step": 1487 }, { "epoch": 0.04990358011235013, "grad_norm": 0.4040541692075054, "learning_rate": 2e-05, "loss": 5.4442, "step": 1488 }, { "epoch": 0.04993711746457617, "grad_norm": 0.4021763721050591, "learning_rate": 2e-05, "loss": 5.6747, "step": 1489 }, { "epoch": 0.04997065481680221, "grad_norm": 0.42270401553857034, "learning_rate": 2e-05, "loss": 5.5489, "step": 1490 }, { "epoch": 0.05000419216902825, "grad_norm": 0.43446358043810873, "learning_rate": 2e-05, "loss": 5.5671, "step": 1491 }, { "epoch": 0.0500377295212543, "grad_norm": 0.4021771058708482, "learning_rate": 2e-05, "loss": 5.5711, "step": 1492 }, { "epoch": 0.05007126687348034, "grad_norm": 0.4151594693888377, "learning_rate": 2e-05, "loss": 5.5572, "step": 1493 }, { "epoch": 0.05010480422570638, "grad_norm": 0.4161387064931662, "learning_rate": 2e-05, "loss": 5.6085, "step": 1494 }, { "epoch": 0.05013834157793242, "grad_norm": 0.39545667661710626, "learning_rate": 2e-05, "loss": 5.5971, "step": 1495 }, { "epoch": 0.05017187893015847, "grad_norm": 0.411426996998055, "learning_rate": 2e-05, "loss": 5.7361, "step": 1496 }, { "epoch": 0.05020541628238451, "grad_norm": 0.4424234322419829, "learning_rate": 2e-05, "loss": 5.4878, "step": 1497 }, { "epoch": 0.05023895363461055, "grad_norm": 0.4196654093801157, "learning_rate": 2e-05, "loss": 5.6895, "step": 1498 }, { "epoch": 0.05027249098683659, "grad_norm": 0.4082478818314615, "learning_rate": 2e-05, "loss": 5.6675, "step": 1499 }, { "epoch": 0.05030602833906263, "grad_norm": 0.458171567558835, "learning_rate": 2e-05, "loss": 5.7415, "step": 1500 }, { "epoch": 0.050339565691288675, "grad_norm": 0.4408607495765974, "learning_rate": 2e-05, "loss": 5.4329, "step": 1501 }, { "epoch": 0.050373103043514715, "grad_norm": 0.39414952288409777, "learning_rate": 2e-05, "loss": 5.6306, "step": 1502 }, { "epoch": 0.050406640395740755, "grad_norm": 0.4260215899036007, "learning_rate": 2e-05, "loss": 5.6556, "step": 1503 }, { "epoch": 0.050440177747966795, "grad_norm": 0.4054283495948364, "learning_rate": 2e-05, "loss": 5.6448, "step": 1504 }, { "epoch": 0.05047371510019284, "grad_norm": 0.42479165893868004, "learning_rate": 2e-05, "loss": 5.7236, "step": 1505 }, { "epoch": 0.05050725245241888, "grad_norm": 0.40717731079615666, "learning_rate": 2e-05, "loss": 5.5247, "step": 1506 }, { "epoch": 0.05054078980464492, "grad_norm": 0.43068679064047066, "learning_rate": 2e-05, "loss": 5.7846, "step": 1507 }, { "epoch": 0.05057432715687096, "grad_norm": 0.4528756468196915, "learning_rate": 2e-05, "loss": 5.4649, "step": 1508 }, { "epoch": 0.05060786450909701, "grad_norm": 0.4392438972866838, "learning_rate": 2e-05, "loss": 5.5421, "step": 1509 }, { "epoch": 0.05064140186132305, "grad_norm": 0.3916493824888253, "learning_rate": 2e-05, "loss": 5.7913, "step": 1510 }, { "epoch": 0.05067493921354909, "grad_norm": 0.45715787847788975, "learning_rate": 2e-05, "loss": 5.5249, "step": 1511 }, { "epoch": 0.05070847656577513, "grad_norm": 0.45405445388976756, "learning_rate": 2e-05, "loss": 5.5161, "step": 1512 }, { "epoch": 0.05074201391800117, "grad_norm": 0.4323624762165546, "learning_rate": 2e-05, "loss": 5.4855, "step": 1513 }, { "epoch": 0.05077555127022722, "grad_norm": 0.4056398236805148, "learning_rate": 2e-05, "loss": 5.5091, "step": 1514 }, { "epoch": 0.05080908862245326, "grad_norm": 0.4451701957472455, "learning_rate": 2e-05, "loss": 5.4226, "step": 1515 }, { "epoch": 0.0508426259746793, "grad_norm": 0.4218440399809524, "learning_rate": 2e-05, "loss": 5.683, "step": 1516 }, { "epoch": 0.05087616332690534, "grad_norm": 0.4205087804928213, "learning_rate": 2e-05, "loss": 5.7223, "step": 1517 }, { "epoch": 0.050909700679131385, "grad_norm": 0.4369181959600633, "learning_rate": 2e-05, "loss": 5.6238, "step": 1518 }, { "epoch": 0.050943238031357425, "grad_norm": 0.41710138978555467, "learning_rate": 2e-05, "loss": 5.5962, "step": 1519 }, { "epoch": 0.050976775383583466, "grad_norm": 0.41247879836204876, "learning_rate": 2e-05, "loss": 5.5543, "step": 1520 }, { "epoch": 0.051010312735809506, "grad_norm": 0.4091552381564034, "learning_rate": 2e-05, "loss": 5.6411, "step": 1521 }, { "epoch": 0.05104385008803555, "grad_norm": 0.43868268789000425, "learning_rate": 2e-05, "loss": 5.5215, "step": 1522 }, { "epoch": 0.05107738744026159, "grad_norm": 0.4456293036262632, "learning_rate": 2e-05, "loss": 5.6752, "step": 1523 }, { "epoch": 0.05111092479248763, "grad_norm": 0.44663696022453764, "learning_rate": 2e-05, "loss": 5.4879, "step": 1524 }, { "epoch": 0.05114446214471367, "grad_norm": 0.41634963372384076, "learning_rate": 2e-05, "loss": 5.5756, "step": 1525 }, { "epoch": 0.05117799949693971, "grad_norm": 0.4183576090824153, "learning_rate": 2e-05, "loss": 5.5693, "step": 1526 }, { "epoch": 0.05121153684916576, "grad_norm": 0.46366172310556103, "learning_rate": 2e-05, "loss": 5.5059, "step": 1527 }, { "epoch": 0.0512450742013918, "grad_norm": 0.41768603484027006, "learning_rate": 2e-05, "loss": 5.5868, "step": 1528 }, { "epoch": 0.05127861155361784, "grad_norm": 0.3815833839775615, "learning_rate": 2e-05, "loss": 5.509, "step": 1529 }, { "epoch": 0.05131214890584388, "grad_norm": 0.4127418231753248, "learning_rate": 2e-05, "loss": 5.5937, "step": 1530 }, { "epoch": 0.05134568625806993, "grad_norm": 0.41840423646447084, "learning_rate": 2e-05, "loss": 5.6567, "step": 1531 }, { "epoch": 0.05137922361029597, "grad_norm": 0.41179205427036175, "learning_rate": 2e-05, "loss": 5.4143, "step": 1532 }, { "epoch": 0.05141276096252201, "grad_norm": 0.4217936422096918, "learning_rate": 2e-05, "loss": 5.6274, "step": 1533 }, { "epoch": 0.05144629831474805, "grad_norm": 0.4229709262001858, "learning_rate": 2e-05, "loss": 5.6083, "step": 1534 }, { "epoch": 0.051479835666974096, "grad_norm": 0.4347863023580049, "learning_rate": 2e-05, "loss": 5.6099, "step": 1535 }, { "epoch": 0.051513373019200136, "grad_norm": 0.40711227489649787, "learning_rate": 2e-05, "loss": 5.6695, "step": 1536 }, { "epoch": 0.051546910371426176, "grad_norm": 0.41680613517763626, "learning_rate": 2e-05, "loss": 5.616, "step": 1537 }, { "epoch": 0.051580447723652216, "grad_norm": 0.4438454581531692, "learning_rate": 2e-05, "loss": 5.5146, "step": 1538 }, { "epoch": 0.051613985075878256, "grad_norm": 0.47233520081555747, "learning_rate": 2e-05, "loss": 5.489, "step": 1539 }, { "epoch": 0.0516475224281043, "grad_norm": 0.44068303969349987, "learning_rate": 2e-05, "loss": 5.7209, "step": 1540 }, { "epoch": 0.05168105978033034, "grad_norm": 0.4735968106832045, "learning_rate": 2e-05, "loss": 5.6185, "step": 1541 }, { "epoch": 0.051714597132556384, "grad_norm": 0.44395644533146855, "learning_rate": 2e-05, "loss": 5.5924, "step": 1542 }, { "epoch": 0.051748134484782424, "grad_norm": 0.4531685178893733, "learning_rate": 2e-05, "loss": 5.488, "step": 1543 }, { "epoch": 0.05178167183700847, "grad_norm": 0.41138767488429556, "learning_rate": 2e-05, "loss": 5.602, "step": 1544 }, { "epoch": 0.05181520918923451, "grad_norm": 0.41389310042550637, "learning_rate": 2e-05, "loss": 5.5279, "step": 1545 }, { "epoch": 0.05184874654146055, "grad_norm": 0.43181700597538436, "learning_rate": 2e-05, "loss": 5.7554, "step": 1546 }, { "epoch": 0.05188228389368659, "grad_norm": 0.4034674496764022, "learning_rate": 2e-05, "loss": 5.5012, "step": 1547 }, { "epoch": 0.05191582124591264, "grad_norm": 0.4546293567992173, "learning_rate": 2e-05, "loss": 5.5041, "step": 1548 }, { "epoch": 0.05194935859813868, "grad_norm": 0.4779730046902839, "learning_rate": 2e-05, "loss": 5.6418, "step": 1549 }, { "epoch": 0.05198289595036472, "grad_norm": 0.4228075922872243, "learning_rate": 2e-05, "loss": 5.5366, "step": 1550 }, { "epoch": 0.05201643330259076, "grad_norm": 0.43179921840121027, "learning_rate": 2e-05, "loss": 5.6656, "step": 1551 }, { "epoch": 0.0520499706548168, "grad_norm": 0.4243249736011095, "learning_rate": 2e-05, "loss": 5.6167, "step": 1552 }, { "epoch": 0.052083508007042846, "grad_norm": 0.41507677339889265, "learning_rate": 2e-05, "loss": 5.6074, "step": 1553 }, { "epoch": 0.052117045359268886, "grad_norm": 0.4196846495752159, "learning_rate": 2e-05, "loss": 5.3874, "step": 1554 }, { "epoch": 0.052150582711494926, "grad_norm": 0.4279728388745262, "learning_rate": 2e-05, "loss": 5.5043, "step": 1555 }, { "epoch": 0.052184120063720967, "grad_norm": 0.4243114454061082, "learning_rate": 2e-05, "loss": 5.5253, "step": 1556 }, { "epoch": 0.052217657415947014, "grad_norm": 0.4600310523722522, "learning_rate": 2e-05, "loss": 5.4162, "step": 1557 }, { "epoch": 0.052251194768173054, "grad_norm": 0.4486061099069727, "learning_rate": 2e-05, "loss": 5.3618, "step": 1558 }, { "epoch": 0.052284732120399094, "grad_norm": 0.44068762842162984, "learning_rate": 2e-05, "loss": 5.685, "step": 1559 }, { "epoch": 0.052318269472625134, "grad_norm": 0.5158830750483858, "learning_rate": 2e-05, "loss": 5.2993, "step": 1560 }, { "epoch": 0.05235180682485118, "grad_norm": 0.4132591504671653, "learning_rate": 2e-05, "loss": 5.6556, "step": 1561 }, { "epoch": 0.05238534417707722, "grad_norm": 0.4234754558968086, "learning_rate": 2e-05, "loss": 5.818, "step": 1562 }, { "epoch": 0.05241888152930326, "grad_norm": 0.44766902507305095, "learning_rate": 2e-05, "loss": 5.5051, "step": 1563 }, { "epoch": 0.0524524188815293, "grad_norm": 0.4489575226989655, "learning_rate": 2e-05, "loss": 5.5508, "step": 1564 }, { "epoch": 0.05248595623375534, "grad_norm": 0.4047291298168611, "learning_rate": 2e-05, "loss": 5.3372, "step": 1565 }, { "epoch": 0.05251949358598139, "grad_norm": 0.4553634537739686, "learning_rate": 2e-05, "loss": 5.4215, "step": 1566 }, { "epoch": 0.05255303093820743, "grad_norm": 0.44592445344621995, "learning_rate": 2e-05, "loss": 5.4548, "step": 1567 }, { "epoch": 0.05258656829043347, "grad_norm": 0.43205927271877387, "learning_rate": 2e-05, "loss": 5.5163, "step": 1568 }, { "epoch": 0.05262010564265951, "grad_norm": 0.4123645623875295, "learning_rate": 2e-05, "loss": 5.572, "step": 1569 }, { "epoch": 0.052653642994885556, "grad_norm": 0.4387710818475739, "learning_rate": 2e-05, "loss": 5.544, "step": 1570 }, { "epoch": 0.0526871803471116, "grad_norm": 0.41175288514082203, "learning_rate": 2e-05, "loss": 5.434, "step": 1571 }, { "epoch": 0.05272071769933764, "grad_norm": 0.42978373293815575, "learning_rate": 2e-05, "loss": 5.4595, "step": 1572 }, { "epoch": 0.05275425505156368, "grad_norm": 0.4252812100229396, "learning_rate": 2e-05, "loss": 5.5932, "step": 1573 }, { "epoch": 0.052787792403789724, "grad_norm": 0.44281011774686874, "learning_rate": 2e-05, "loss": 5.573, "step": 1574 }, { "epoch": 0.052821329756015764, "grad_norm": 0.4222201796568237, "learning_rate": 2e-05, "loss": 5.6058, "step": 1575 }, { "epoch": 0.052854867108241804, "grad_norm": 0.4534705070481436, "learning_rate": 2e-05, "loss": 5.433, "step": 1576 }, { "epoch": 0.052888404460467844, "grad_norm": 0.41569485825784047, "learning_rate": 2e-05, "loss": 5.532, "step": 1577 }, { "epoch": 0.052921941812693885, "grad_norm": 0.41197319725638926, "learning_rate": 2e-05, "loss": 5.6626, "step": 1578 }, { "epoch": 0.05295547916491993, "grad_norm": 0.4686286355799608, "learning_rate": 2e-05, "loss": 5.4652, "step": 1579 }, { "epoch": 0.05298901651714597, "grad_norm": 0.40611547953934174, "learning_rate": 2e-05, "loss": 5.6755, "step": 1580 }, { "epoch": 0.05302255386937201, "grad_norm": 0.3943883966770888, "learning_rate": 2e-05, "loss": 5.7597, "step": 1581 }, { "epoch": 0.05305609122159805, "grad_norm": 0.4242606302810661, "learning_rate": 2e-05, "loss": 5.4119, "step": 1582 }, { "epoch": 0.0530896285738241, "grad_norm": 0.4570151795875608, "learning_rate": 2e-05, "loss": 5.6319, "step": 1583 }, { "epoch": 0.05312316592605014, "grad_norm": 0.4212196653192262, "learning_rate": 2e-05, "loss": 5.505, "step": 1584 }, { "epoch": 0.05315670327827618, "grad_norm": 0.40631219155151255, "learning_rate": 2e-05, "loss": 5.1401, "step": 1585 }, { "epoch": 0.05319024063050222, "grad_norm": 0.4768663033489213, "learning_rate": 2e-05, "loss": 5.3917, "step": 1586 }, { "epoch": 0.05322377798272827, "grad_norm": 0.4362852406970359, "learning_rate": 2e-05, "loss": 5.6201, "step": 1587 }, { "epoch": 0.05325731533495431, "grad_norm": 0.47189376622886886, "learning_rate": 2e-05, "loss": 5.7886, "step": 1588 }, { "epoch": 0.05329085268718035, "grad_norm": 0.4399909368440869, "learning_rate": 2e-05, "loss": 5.7006, "step": 1589 }, { "epoch": 0.05332439003940639, "grad_norm": 0.40928486307808404, "learning_rate": 2e-05, "loss": 5.7232, "step": 1590 }, { "epoch": 0.05335792739163243, "grad_norm": 0.416850247990341, "learning_rate": 2e-05, "loss": 5.4355, "step": 1591 }, { "epoch": 0.053391464743858474, "grad_norm": 0.4520981803311844, "learning_rate": 2e-05, "loss": 5.6533, "step": 1592 }, { "epoch": 0.053425002096084515, "grad_norm": 0.4230346814903579, "learning_rate": 2e-05, "loss": 5.5309, "step": 1593 }, { "epoch": 0.053458539448310555, "grad_norm": 0.4158215914181802, "learning_rate": 2e-05, "loss": 5.5099, "step": 1594 }, { "epoch": 0.053492076800536595, "grad_norm": 0.40305791535731506, "learning_rate": 2e-05, "loss": 5.5185, "step": 1595 }, { "epoch": 0.05352561415276264, "grad_norm": 0.4525581384580287, "learning_rate": 2e-05, "loss": 5.4882, "step": 1596 }, { "epoch": 0.05355915150498868, "grad_norm": 0.3985619721733492, "learning_rate": 2e-05, "loss": 5.465, "step": 1597 }, { "epoch": 0.05359268885721472, "grad_norm": 0.42893216590904093, "learning_rate": 2e-05, "loss": 5.5403, "step": 1598 }, { "epoch": 0.05362622620944076, "grad_norm": 0.39861335916463786, "learning_rate": 2e-05, "loss": 5.6851, "step": 1599 }, { "epoch": 0.05365976356166681, "grad_norm": 0.4447805433916951, "learning_rate": 2e-05, "loss": 5.5342, "step": 1600 }, { "epoch": 0.05369330091389285, "grad_norm": 0.4312175768984327, "learning_rate": 2e-05, "loss": 5.4302, "step": 1601 }, { "epoch": 0.05372683826611889, "grad_norm": 0.432856493372872, "learning_rate": 2e-05, "loss": 5.4086, "step": 1602 }, { "epoch": 0.05376037561834493, "grad_norm": 0.4557089682218737, "learning_rate": 2e-05, "loss": 5.5228, "step": 1603 }, { "epoch": 0.05379391297057097, "grad_norm": 0.3986429979796909, "learning_rate": 2e-05, "loss": 5.4138, "step": 1604 }, { "epoch": 0.05382745032279702, "grad_norm": 0.4394630964289513, "learning_rate": 2e-05, "loss": 5.6306, "step": 1605 }, { "epoch": 0.05386098767502306, "grad_norm": 0.42921748304694973, "learning_rate": 2e-05, "loss": 5.5877, "step": 1606 }, { "epoch": 0.0538945250272491, "grad_norm": 0.41363322443155753, "learning_rate": 2e-05, "loss": 5.5714, "step": 1607 }, { "epoch": 0.05392806237947514, "grad_norm": 0.41025216478353427, "learning_rate": 2e-05, "loss": 5.3528, "step": 1608 }, { "epoch": 0.053961599731701185, "grad_norm": 0.41021434033644594, "learning_rate": 2e-05, "loss": 5.5927, "step": 1609 }, { "epoch": 0.053995137083927225, "grad_norm": 0.4154659507314166, "learning_rate": 2e-05, "loss": 5.3876, "step": 1610 }, { "epoch": 0.054028674436153265, "grad_norm": 0.43364623319676737, "learning_rate": 2e-05, "loss": 5.5368, "step": 1611 }, { "epoch": 0.054062211788379305, "grad_norm": 0.4096881218616092, "learning_rate": 2e-05, "loss": 5.4382, "step": 1612 }, { "epoch": 0.05409574914060535, "grad_norm": 0.3759667736169754, "learning_rate": 2e-05, "loss": 5.435, "step": 1613 }, { "epoch": 0.05412928649283139, "grad_norm": 0.4566929128250595, "learning_rate": 2e-05, "loss": 5.489, "step": 1614 }, { "epoch": 0.05416282384505743, "grad_norm": 0.45836483801733113, "learning_rate": 2e-05, "loss": 5.5254, "step": 1615 }, { "epoch": 0.05419636119728347, "grad_norm": 0.40534133471299827, "learning_rate": 2e-05, "loss": 5.711, "step": 1616 }, { "epoch": 0.05422989854950951, "grad_norm": 0.43111452431250213, "learning_rate": 2e-05, "loss": 5.4892, "step": 1617 }, { "epoch": 0.05426343590173556, "grad_norm": 0.422048875885329, "learning_rate": 2e-05, "loss": 5.5774, "step": 1618 }, { "epoch": 0.0542969732539616, "grad_norm": 0.44910504322437916, "learning_rate": 2e-05, "loss": 5.6569, "step": 1619 }, { "epoch": 0.05433051060618764, "grad_norm": 0.43735004829045554, "learning_rate": 2e-05, "loss": 5.4857, "step": 1620 }, { "epoch": 0.05436404795841368, "grad_norm": 0.4117432431535603, "learning_rate": 2e-05, "loss": 5.6018, "step": 1621 }, { "epoch": 0.05439758531063973, "grad_norm": 0.44429044033153725, "learning_rate": 2e-05, "loss": 5.5162, "step": 1622 }, { "epoch": 0.05443112266286577, "grad_norm": 0.46378164893841833, "learning_rate": 2e-05, "loss": 5.6079, "step": 1623 }, { "epoch": 0.05446466001509181, "grad_norm": 0.41294493183873415, "learning_rate": 2e-05, "loss": 5.5978, "step": 1624 }, { "epoch": 0.05449819736731785, "grad_norm": 0.4223201118013197, "learning_rate": 2e-05, "loss": 5.5469, "step": 1625 }, { "epoch": 0.054531734719543895, "grad_norm": 0.41626917219397874, "learning_rate": 2e-05, "loss": 5.4366, "step": 1626 }, { "epoch": 0.054565272071769935, "grad_norm": 0.4432131448461524, "learning_rate": 2e-05, "loss": 5.5956, "step": 1627 }, { "epoch": 0.054598809423995975, "grad_norm": 0.40679574734514723, "learning_rate": 2e-05, "loss": 5.6077, "step": 1628 }, { "epoch": 0.054632346776222016, "grad_norm": 0.42554651375867786, "learning_rate": 2e-05, "loss": 5.5292, "step": 1629 }, { "epoch": 0.054665884128448056, "grad_norm": 0.43776563875803637, "learning_rate": 2e-05, "loss": 5.6875, "step": 1630 }, { "epoch": 0.0546994214806741, "grad_norm": 0.453146803282779, "learning_rate": 2e-05, "loss": 5.5011, "step": 1631 }, { "epoch": 0.05473295883290014, "grad_norm": 0.40697060666041074, "learning_rate": 2e-05, "loss": 5.6398, "step": 1632 }, { "epoch": 0.05476649618512618, "grad_norm": 0.41607361771662715, "learning_rate": 2e-05, "loss": 5.4136, "step": 1633 }, { "epoch": 0.05480003353735222, "grad_norm": 0.47253627965842837, "learning_rate": 2e-05, "loss": 5.6304, "step": 1634 }, { "epoch": 0.05483357088957827, "grad_norm": 0.39087036695011207, "learning_rate": 2e-05, "loss": 5.6894, "step": 1635 }, { "epoch": 0.05486710824180431, "grad_norm": 0.4316655358976948, "learning_rate": 2e-05, "loss": 5.5153, "step": 1636 }, { "epoch": 0.05490064559403035, "grad_norm": 0.42727412852165897, "learning_rate": 2e-05, "loss": 5.6027, "step": 1637 }, { "epoch": 0.05493418294625639, "grad_norm": 0.38731031096176405, "learning_rate": 2e-05, "loss": 5.6629, "step": 1638 }, { "epoch": 0.05496772029848244, "grad_norm": 0.40347660527434137, "learning_rate": 2e-05, "loss": 5.4593, "step": 1639 }, { "epoch": 0.05500125765070848, "grad_norm": 0.4271930376268255, "learning_rate": 2e-05, "loss": 5.5841, "step": 1640 }, { "epoch": 0.05503479500293452, "grad_norm": 0.4213959309135827, "learning_rate": 2e-05, "loss": 5.6231, "step": 1641 }, { "epoch": 0.05506833235516056, "grad_norm": 0.42388485041759655, "learning_rate": 2e-05, "loss": 5.5254, "step": 1642 }, { "epoch": 0.0551018697073866, "grad_norm": 0.4423181311717867, "learning_rate": 2e-05, "loss": 5.2587, "step": 1643 }, { "epoch": 0.055135407059612646, "grad_norm": 0.39308778813000517, "learning_rate": 2e-05, "loss": 5.4151, "step": 1644 }, { "epoch": 0.055168944411838686, "grad_norm": 0.45026159728210674, "learning_rate": 2e-05, "loss": 5.5401, "step": 1645 }, { "epoch": 0.055202481764064726, "grad_norm": 0.40925116467486106, "learning_rate": 2e-05, "loss": 5.5957, "step": 1646 }, { "epoch": 0.055236019116290766, "grad_norm": 0.4123992209944024, "learning_rate": 2e-05, "loss": 5.5913, "step": 1647 }, { "epoch": 0.05526955646851681, "grad_norm": 0.4023628363480531, "learning_rate": 2e-05, "loss": 5.5308, "step": 1648 }, { "epoch": 0.05530309382074285, "grad_norm": 0.42395205814494596, "learning_rate": 2e-05, "loss": 5.6117, "step": 1649 }, { "epoch": 0.05533663117296889, "grad_norm": 0.4019800578049424, "learning_rate": 2e-05, "loss": 5.6353, "step": 1650 }, { "epoch": 0.055370168525194934, "grad_norm": 0.4462174085116599, "learning_rate": 2e-05, "loss": 5.6365, "step": 1651 }, { "epoch": 0.05540370587742098, "grad_norm": 0.3943158033666955, "learning_rate": 2e-05, "loss": 5.5206, "step": 1652 }, { "epoch": 0.05543724322964702, "grad_norm": 0.4002930400504884, "learning_rate": 2e-05, "loss": 5.4441, "step": 1653 }, { "epoch": 0.05547078058187306, "grad_norm": 0.3985921433501662, "learning_rate": 2e-05, "loss": 5.5797, "step": 1654 }, { "epoch": 0.0555043179340991, "grad_norm": 0.4352200639689115, "learning_rate": 2e-05, "loss": 5.442, "step": 1655 }, { "epoch": 0.05553785528632514, "grad_norm": 0.4277288743454112, "learning_rate": 2e-05, "loss": 5.5093, "step": 1656 }, { "epoch": 0.05557139263855119, "grad_norm": 0.40275943951717236, "learning_rate": 2e-05, "loss": 5.5119, "step": 1657 }, { "epoch": 0.05560492999077723, "grad_norm": 0.4089306604838737, "learning_rate": 2e-05, "loss": 5.6097, "step": 1658 }, { "epoch": 0.05563846734300327, "grad_norm": 0.4375556158505098, "learning_rate": 2e-05, "loss": 5.5731, "step": 1659 }, { "epoch": 0.05567200469522931, "grad_norm": 0.4477132104937723, "learning_rate": 2e-05, "loss": 5.5484, "step": 1660 }, { "epoch": 0.055705542047455356, "grad_norm": 0.43228140390129643, "learning_rate": 2e-05, "loss": 5.6958, "step": 1661 }, { "epoch": 0.055739079399681396, "grad_norm": 0.42859627377998405, "learning_rate": 2e-05, "loss": 5.4639, "step": 1662 }, { "epoch": 0.055772616751907436, "grad_norm": 0.41626608229740675, "learning_rate": 2e-05, "loss": 5.6247, "step": 1663 }, { "epoch": 0.055806154104133476, "grad_norm": 0.43302665236799615, "learning_rate": 2e-05, "loss": 5.4424, "step": 1664 }, { "epoch": 0.05583969145635952, "grad_norm": 0.4578392156261691, "learning_rate": 2e-05, "loss": 5.6962, "step": 1665 }, { "epoch": 0.055873228808585564, "grad_norm": 0.48749349662481173, "learning_rate": 2e-05, "loss": 5.551, "step": 1666 }, { "epoch": 0.055906766160811604, "grad_norm": 0.4578226439083857, "learning_rate": 2e-05, "loss": 5.5031, "step": 1667 }, { "epoch": 0.055940303513037644, "grad_norm": 0.4407969815491592, "learning_rate": 2e-05, "loss": 5.3998, "step": 1668 }, { "epoch": 0.055973840865263684, "grad_norm": 0.4415804409311198, "learning_rate": 2e-05, "loss": 5.5789, "step": 1669 }, { "epoch": 0.05600737821748973, "grad_norm": 0.45556978376966306, "learning_rate": 2e-05, "loss": 5.4967, "step": 1670 }, { "epoch": 0.05604091556971577, "grad_norm": 0.42360907950496457, "learning_rate": 2e-05, "loss": 5.7073, "step": 1671 }, { "epoch": 0.05607445292194181, "grad_norm": 0.42963364575906743, "learning_rate": 2e-05, "loss": 5.5225, "step": 1672 }, { "epoch": 0.05610799027416785, "grad_norm": 0.4147530631623081, "learning_rate": 2e-05, "loss": 5.6428, "step": 1673 }, { "epoch": 0.0561415276263939, "grad_norm": 0.4307378350890284, "learning_rate": 2e-05, "loss": 5.747, "step": 1674 }, { "epoch": 0.05617506497861994, "grad_norm": 0.4174482301333323, "learning_rate": 2e-05, "loss": 5.6997, "step": 1675 }, { "epoch": 0.05620860233084598, "grad_norm": 0.4186998263179712, "learning_rate": 2e-05, "loss": 5.4294, "step": 1676 }, { "epoch": 0.05624213968307202, "grad_norm": 0.4255376235282392, "learning_rate": 2e-05, "loss": 5.6623, "step": 1677 }, { "epoch": 0.056275677035298066, "grad_norm": 0.41857153802021196, "learning_rate": 2e-05, "loss": 5.5381, "step": 1678 }, { "epoch": 0.056309214387524106, "grad_norm": 0.4119336521113655, "learning_rate": 2e-05, "loss": 5.6753, "step": 1679 }, { "epoch": 0.056342751739750147, "grad_norm": 0.41845173323855, "learning_rate": 2e-05, "loss": 5.6659, "step": 1680 }, { "epoch": 0.05637628909197619, "grad_norm": 0.4486018868318074, "learning_rate": 2e-05, "loss": 5.4717, "step": 1681 }, { "epoch": 0.05640982644420223, "grad_norm": 0.440922654372197, "learning_rate": 2e-05, "loss": 5.485, "step": 1682 }, { "epoch": 0.056443363796428274, "grad_norm": 0.4406379215551537, "learning_rate": 2e-05, "loss": 5.4736, "step": 1683 }, { "epoch": 0.056476901148654314, "grad_norm": 0.41517593134321534, "learning_rate": 2e-05, "loss": 5.596, "step": 1684 }, { "epoch": 0.056510438500880354, "grad_norm": 0.40168019548232947, "learning_rate": 2e-05, "loss": 5.6099, "step": 1685 }, { "epoch": 0.056543975853106394, "grad_norm": 0.45143842501038484, "learning_rate": 2e-05, "loss": 5.3349, "step": 1686 }, { "epoch": 0.05657751320533244, "grad_norm": 0.4165078799157152, "learning_rate": 2e-05, "loss": 5.6387, "step": 1687 }, { "epoch": 0.05661105055755848, "grad_norm": 0.4441584928427762, "learning_rate": 2e-05, "loss": 5.3932, "step": 1688 }, { "epoch": 0.05664458790978452, "grad_norm": 0.4289759282265463, "learning_rate": 2e-05, "loss": 5.5669, "step": 1689 }, { "epoch": 0.05667812526201056, "grad_norm": 0.4175240979463292, "learning_rate": 2e-05, "loss": 5.6568, "step": 1690 }, { "epoch": 0.05671166261423661, "grad_norm": 0.43034252418564994, "learning_rate": 2e-05, "loss": 5.4123, "step": 1691 }, { "epoch": 0.05674519996646265, "grad_norm": 0.4238414983221301, "learning_rate": 2e-05, "loss": 5.4944, "step": 1692 }, { "epoch": 0.05677873731868869, "grad_norm": 0.4305206901326903, "learning_rate": 2e-05, "loss": 5.5175, "step": 1693 }, { "epoch": 0.05681227467091473, "grad_norm": 0.3869648082815582, "learning_rate": 2e-05, "loss": 5.5304, "step": 1694 }, { "epoch": 0.05684581202314077, "grad_norm": 0.442223485101039, "learning_rate": 2e-05, "loss": 5.3361, "step": 1695 }, { "epoch": 0.05687934937536682, "grad_norm": 0.4493544791359708, "learning_rate": 2e-05, "loss": 5.5929, "step": 1696 }, { "epoch": 0.05691288672759286, "grad_norm": 0.4366902839253107, "learning_rate": 2e-05, "loss": 5.4888, "step": 1697 }, { "epoch": 0.0569464240798189, "grad_norm": 0.39535885806893806, "learning_rate": 2e-05, "loss": 5.4204, "step": 1698 }, { "epoch": 0.05697996143204494, "grad_norm": 0.4147493149434055, "learning_rate": 2e-05, "loss": 5.51, "step": 1699 }, { "epoch": 0.057013498784270984, "grad_norm": 0.44828892241621676, "learning_rate": 2e-05, "loss": 5.5323, "step": 1700 }, { "epoch": 0.057047036136497024, "grad_norm": 0.444414277798278, "learning_rate": 2e-05, "loss": 5.3884, "step": 1701 }, { "epoch": 0.057080573488723065, "grad_norm": 0.413518835980229, "learning_rate": 2e-05, "loss": 5.5903, "step": 1702 }, { "epoch": 0.057114110840949105, "grad_norm": 0.42420169583855394, "learning_rate": 2e-05, "loss": 5.553, "step": 1703 }, { "epoch": 0.05714764819317515, "grad_norm": 0.46053968112183863, "learning_rate": 2e-05, "loss": 5.5368, "step": 1704 }, { "epoch": 0.05718118554540119, "grad_norm": 0.42328150380537627, "learning_rate": 2e-05, "loss": 5.7801, "step": 1705 }, { "epoch": 0.05721472289762723, "grad_norm": 0.4266329289061583, "learning_rate": 2e-05, "loss": 5.5104, "step": 1706 }, { "epoch": 0.05724826024985327, "grad_norm": 0.3970107058379827, "learning_rate": 2e-05, "loss": 5.6869, "step": 1707 }, { "epoch": 0.05728179760207931, "grad_norm": 0.44129888324680394, "learning_rate": 2e-05, "loss": 5.6623, "step": 1708 }, { "epoch": 0.05731533495430536, "grad_norm": 0.4270311185002145, "learning_rate": 2e-05, "loss": 5.8361, "step": 1709 }, { "epoch": 0.0573488723065314, "grad_norm": 0.4453694839175154, "learning_rate": 2e-05, "loss": 5.6462, "step": 1710 }, { "epoch": 0.05738240965875744, "grad_norm": 0.43042683641320467, "learning_rate": 2e-05, "loss": 5.406, "step": 1711 }, { "epoch": 0.05741594701098348, "grad_norm": 0.4373588758342027, "learning_rate": 2e-05, "loss": 5.4308, "step": 1712 }, { "epoch": 0.05744948436320953, "grad_norm": 0.44265754221067716, "learning_rate": 2e-05, "loss": 5.4556, "step": 1713 }, { "epoch": 0.05748302171543557, "grad_norm": 0.4017894025269547, "learning_rate": 2e-05, "loss": 5.5516, "step": 1714 }, { "epoch": 0.05751655906766161, "grad_norm": 0.404445602702674, "learning_rate": 2e-05, "loss": 5.4745, "step": 1715 }, { "epoch": 0.05755009641988765, "grad_norm": 0.4224917613102455, "learning_rate": 2e-05, "loss": 5.4893, "step": 1716 }, { "epoch": 0.057583633772113695, "grad_norm": 0.4011744364650739, "learning_rate": 2e-05, "loss": 5.6479, "step": 1717 }, { "epoch": 0.057617171124339735, "grad_norm": 0.44046156058157987, "learning_rate": 2e-05, "loss": 5.7867, "step": 1718 }, { "epoch": 0.057650708476565775, "grad_norm": 0.410652398644407, "learning_rate": 2e-05, "loss": 5.5463, "step": 1719 }, { "epoch": 0.057684245828791815, "grad_norm": 0.4440390210428921, "learning_rate": 2e-05, "loss": 5.6325, "step": 1720 }, { "epoch": 0.05771778318101786, "grad_norm": 0.4490788491630594, "learning_rate": 2e-05, "loss": 5.6488, "step": 1721 }, { "epoch": 0.0577513205332439, "grad_norm": 0.45760961173768466, "learning_rate": 2e-05, "loss": 5.4375, "step": 1722 }, { "epoch": 0.05778485788546994, "grad_norm": 0.443544789895527, "learning_rate": 2e-05, "loss": 5.5351, "step": 1723 }, { "epoch": 0.05781839523769598, "grad_norm": 0.49533706061798666, "learning_rate": 2e-05, "loss": 5.5736, "step": 1724 }, { "epoch": 0.05785193258992202, "grad_norm": 0.41629450798077905, "learning_rate": 2e-05, "loss": 5.5962, "step": 1725 }, { "epoch": 0.05788546994214807, "grad_norm": 0.4060851324573195, "learning_rate": 2e-05, "loss": 5.6093, "step": 1726 }, { "epoch": 0.05791900729437411, "grad_norm": 0.42644746948785184, "learning_rate": 2e-05, "loss": 5.6406, "step": 1727 }, { "epoch": 0.05795254464660015, "grad_norm": 0.5088148931441095, "learning_rate": 2e-05, "loss": 5.2286, "step": 1728 }, { "epoch": 0.05798608199882619, "grad_norm": 0.4592641512185941, "learning_rate": 2e-05, "loss": 5.3939, "step": 1729 }, { "epoch": 0.05801961935105224, "grad_norm": 0.47679389214591184, "learning_rate": 2e-05, "loss": 5.532, "step": 1730 }, { "epoch": 0.05805315670327828, "grad_norm": 0.41185191569349705, "learning_rate": 2e-05, "loss": 5.5895, "step": 1731 }, { "epoch": 0.05808669405550432, "grad_norm": 0.43769859599646627, "learning_rate": 2e-05, "loss": 5.482, "step": 1732 }, { "epoch": 0.05812023140773036, "grad_norm": 0.4489390409000117, "learning_rate": 2e-05, "loss": 5.4449, "step": 1733 }, { "epoch": 0.058153768759956405, "grad_norm": 0.4239686126533435, "learning_rate": 2e-05, "loss": 5.6331, "step": 1734 }, { "epoch": 0.058187306112182445, "grad_norm": 0.4143565495542223, "learning_rate": 2e-05, "loss": 5.5644, "step": 1735 }, { "epoch": 0.058220843464408485, "grad_norm": 0.48554021176202417, "learning_rate": 2e-05, "loss": 5.7518, "step": 1736 }, { "epoch": 0.058254380816634525, "grad_norm": 0.4192967959103135, "learning_rate": 2e-05, "loss": 5.4749, "step": 1737 }, { "epoch": 0.058287918168860565, "grad_norm": 0.41222531424820774, "learning_rate": 2e-05, "loss": 5.6425, "step": 1738 }, { "epoch": 0.05832145552108661, "grad_norm": 0.4123752014738508, "learning_rate": 2e-05, "loss": 5.5914, "step": 1739 }, { "epoch": 0.05835499287331265, "grad_norm": 0.461500198668421, "learning_rate": 2e-05, "loss": 5.4974, "step": 1740 }, { "epoch": 0.05838853022553869, "grad_norm": 0.4189856931458365, "learning_rate": 2e-05, "loss": 5.4226, "step": 1741 }, { "epoch": 0.05842206757776473, "grad_norm": 0.4131874303085142, "learning_rate": 2e-05, "loss": 5.4518, "step": 1742 }, { "epoch": 0.05845560492999078, "grad_norm": 0.444536929084537, "learning_rate": 2e-05, "loss": 5.8832, "step": 1743 }, { "epoch": 0.05848914228221682, "grad_norm": 0.42020457702811287, "learning_rate": 2e-05, "loss": 5.5513, "step": 1744 }, { "epoch": 0.05852267963444286, "grad_norm": 0.4042632071257216, "learning_rate": 2e-05, "loss": 5.7225, "step": 1745 }, { "epoch": 0.0585562169866689, "grad_norm": 0.43090063450914273, "learning_rate": 2e-05, "loss": 5.4312, "step": 1746 }, { "epoch": 0.05858975433889495, "grad_norm": 0.4265314715740753, "learning_rate": 2e-05, "loss": 5.4556, "step": 1747 }, { "epoch": 0.05862329169112099, "grad_norm": 0.4277033485685496, "learning_rate": 2e-05, "loss": 5.6018, "step": 1748 }, { "epoch": 0.05865682904334703, "grad_norm": 0.4236512214110221, "learning_rate": 2e-05, "loss": 5.4395, "step": 1749 }, { "epoch": 0.05869036639557307, "grad_norm": 0.4279617636624599, "learning_rate": 2e-05, "loss": 5.6288, "step": 1750 }, { "epoch": 0.05872390374779911, "grad_norm": 0.4156357653217103, "learning_rate": 2e-05, "loss": 5.6851, "step": 1751 }, { "epoch": 0.058757441100025155, "grad_norm": 0.4011126132316362, "learning_rate": 2e-05, "loss": 5.598, "step": 1752 }, { "epoch": 0.058790978452251196, "grad_norm": 0.42927307506659257, "learning_rate": 2e-05, "loss": 5.5798, "step": 1753 }, { "epoch": 0.058824515804477236, "grad_norm": 0.4122419414295883, "learning_rate": 2e-05, "loss": 5.5711, "step": 1754 }, { "epoch": 0.058858053156703276, "grad_norm": 0.42146722647917895, "learning_rate": 2e-05, "loss": 5.5994, "step": 1755 }, { "epoch": 0.05889159050892932, "grad_norm": 0.40974195902349714, "learning_rate": 2e-05, "loss": 5.4754, "step": 1756 }, { "epoch": 0.05892512786115536, "grad_norm": 0.4301022653444274, "learning_rate": 2e-05, "loss": 5.7293, "step": 1757 }, { "epoch": 0.0589586652133814, "grad_norm": 0.4228917844899465, "learning_rate": 2e-05, "loss": 5.5737, "step": 1758 }, { "epoch": 0.05899220256560744, "grad_norm": 0.39558012492003586, "learning_rate": 2e-05, "loss": 5.5151, "step": 1759 }, { "epoch": 0.05902573991783349, "grad_norm": 0.43146853460880474, "learning_rate": 2e-05, "loss": 5.6074, "step": 1760 }, { "epoch": 0.05905927727005953, "grad_norm": 0.4526231477976885, "learning_rate": 2e-05, "loss": 5.5058, "step": 1761 }, { "epoch": 0.05909281462228557, "grad_norm": 0.39499137042554566, "learning_rate": 2e-05, "loss": 5.866, "step": 1762 }, { "epoch": 0.05912635197451161, "grad_norm": 0.4351715866676901, "learning_rate": 2e-05, "loss": 5.5772, "step": 1763 }, { "epoch": 0.05915988932673765, "grad_norm": 0.43509960937923353, "learning_rate": 2e-05, "loss": 5.6575, "step": 1764 }, { "epoch": 0.0591934266789637, "grad_norm": 0.4541106418484366, "learning_rate": 2e-05, "loss": 5.5693, "step": 1765 }, { "epoch": 0.05922696403118974, "grad_norm": 0.4243618447723217, "learning_rate": 2e-05, "loss": 5.8081, "step": 1766 }, { "epoch": 0.05926050138341578, "grad_norm": 0.4592558883655336, "learning_rate": 2e-05, "loss": 5.6147, "step": 1767 }, { "epoch": 0.05929403873564182, "grad_norm": 0.418786881650311, "learning_rate": 2e-05, "loss": 5.6824, "step": 1768 }, { "epoch": 0.059327576087867866, "grad_norm": 0.43760613563842254, "learning_rate": 2e-05, "loss": 5.6026, "step": 1769 }, { "epoch": 0.059361113440093906, "grad_norm": 0.4378605607755759, "learning_rate": 2e-05, "loss": 5.7242, "step": 1770 }, { "epoch": 0.059394650792319946, "grad_norm": 0.42807010848692495, "learning_rate": 2e-05, "loss": 5.6384, "step": 1771 }, { "epoch": 0.059428188144545986, "grad_norm": 0.4440621153708752, "learning_rate": 2e-05, "loss": 5.4751, "step": 1772 }, { "epoch": 0.05946172549677203, "grad_norm": 0.4370699129923196, "learning_rate": 2e-05, "loss": 5.6156, "step": 1773 }, { "epoch": 0.05949526284899807, "grad_norm": 0.4022827049524976, "learning_rate": 2e-05, "loss": 5.4875, "step": 1774 }, { "epoch": 0.059528800201224114, "grad_norm": 0.4194972513808625, "learning_rate": 2e-05, "loss": 5.4671, "step": 1775 }, { "epoch": 0.059562337553450154, "grad_norm": 0.44657563852809573, "learning_rate": 2e-05, "loss": 5.5557, "step": 1776 }, { "epoch": 0.059595874905676194, "grad_norm": 0.42249439017828544, "learning_rate": 2e-05, "loss": 5.3457, "step": 1777 }, { "epoch": 0.05962941225790224, "grad_norm": 0.45951519888041575, "learning_rate": 2e-05, "loss": 5.6386, "step": 1778 }, { "epoch": 0.05966294961012828, "grad_norm": 0.40879388434681907, "learning_rate": 2e-05, "loss": 5.656, "step": 1779 }, { "epoch": 0.05969648696235432, "grad_norm": 0.4143261229414972, "learning_rate": 2e-05, "loss": 5.6072, "step": 1780 }, { "epoch": 0.05973002431458036, "grad_norm": 0.41572953219803366, "learning_rate": 2e-05, "loss": 5.5466, "step": 1781 }, { "epoch": 0.05976356166680641, "grad_norm": 0.4144498892503121, "learning_rate": 2e-05, "loss": 5.7183, "step": 1782 }, { "epoch": 0.05979709901903245, "grad_norm": 0.441663398856212, "learning_rate": 2e-05, "loss": 5.6036, "step": 1783 }, { "epoch": 0.05983063637125849, "grad_norm": 0.42749683931348476, "learning_rate": 2e-05, "loss": 5.4685, "step": 1784 }, { "epoch": 0.05986417372348453, "grad_norm": 0.47210132140774935, "learning_rate": 2e-05, "loss": 5.3709, "step": 1785 }, { "epoch": 0.059897711075710576, "grad_norm": 0.4197937701322212, "learning_rate": 2e-05, "loss": 5.3654, "step": 1786 }, { "epoch": 0.059931248427936616, "grad_norm": 0.4076453045555602, "learning_rate": 2e-05, "loss": 5.7235, "step": 1787 }, { "epoch": 0.059964785780162656, "grad_norm": 0.41988521233273846, "learning_rate": 2e-05, "loss": 5.6416, "step": 1788 }, { "epoch": 0.059998323132388696, "grad_norm": 0.4624383333029714, "learning_rate": 2e-05, "loss": 5.3937, "step": 1789 }, { "epoch": 0.06003186048461474, "grad_norm": 0.42926554621700275, "learning_rate": 2e-05, "loss": 5.5554, "step": 1790 }, { "epoch": 0.060065397836840784, "grad_norm": 0.4156612643933816, "learning_rate": 2e-05, "loss": 5.5087, "step": 1791 }, { "epoch": 0.060098935189066824, "grad_norm": 0.4666772622715812, "learning_rate": 2e-05, "loss": 5.29, "step": 1792 }, { "epoch": 0.060132472541292864, "grad_norm": 0.4267790543565527, "learning_rate": 2e-05, "loss": 5.5312, "step": 1793 }, { "epoch": 0.060166009893518904, "grad_norm": 0.4465174799466562, "learning_rate": 2e-05, "loss": 5.5435, "step": 1794 }, { "epoch": 0.06019954724574495, "grad_norm": 0.45074257376113513, "learning_rate": 2e-05, "loss": 5.3833, "step": 1795 }, { "epoch": 0.06023308459797099, "grad_norm": 0.47991295443296145, "learning_rate": 2e-05, "loss": 5.7083, "step": 1796 }, { "epoch": 0.06026662195019703, "grad_norm": 0.41437547764604266, "learning_rate": 2e-05, "loss": 5.4636, "step": 1797 }, { "epoch": 0.06030015930242307, "grad_norm": 0.43216774188233825, "learning_rate": 2e-05, "loss": 5.5763, "step": 1798 }, { "epoch": 0.06033369665464912, "grad_norm": 0.45945200925275903, "learning_rate": 2e-05, "loss": 5.6225, "step": 1799 }, { "epoch": 0.06036723400687516, "grad_norm": 0.42298856155916076, "learning_rate": 2e-05, "loss": 5.4924, "step": 1800 }, { "epoch": 0.0604007713591012, "grad_norm": 0.4150780586990937, "learning_rate": 2e-05, "loss": 5.5024, "step": 1801 }, { "epoch": 0.06043430871132724, "grad_norm": 0.41482007473739074, "learning_rate": 2e-05, "loss": 5.554, "step": 1802 }, { "epoch": 0.06046784606355328, "grad_norm": 0.43316657427499916, "learning_rate": 2e-05, "loss": 5.6443, "step": 1803 }, { "epoch": 0.060501383415779326, "grad_norm": 0.4223965268014388, "learning_rate": 2e-05, "loss": 5.4656, "step": 1804 }, { "epoch": 0.06053492076800537, "grad_norm": 0.4112688136072763, "learning_rate": 2e-05, "loss": 5.6097, "step": 1805 }, { "epoch": 0.06056845812023141, "grad_norm": 0.41261708069469966, "learning_rate": 2e-05, "loss": 5.5261, "step": 1806 }, { "epoch": 0.06060199547245745, "grad_norm": 0.42771423196648795, "learning_rate": 2e-05, "loss": 5.5538, "step": 1807 }, { "epoch": 0.060635532824683494, "grad_norm": 0.4067462894376807, "learning_rate": 2e-05, "loss": 5.5558, "step": 1808 }, { "epoch": 0.060669070176909534, "grad_norm": 0.3969577317318216, "learning_rate": 2e-05, "loss": 5.5915, "step": 1809 }, { "epoch": 0.060702607529135574, "grad_norm": 0.4504517836198495, "learning_rate": 2e-05, "loss": 5.6996, "step": 1810 }, { "epoch": 0.060736144881361614, "grad_norm": 0.4288546428148933, "learning_rate": 2e-05, "loss": 5.7134, "step": 1811 }, { "epoch": 0.06076968223358766, "grad_norm": 0.44049598799771733, "learning_rate": 2e-05, "loss": 5.4885, "step": 1812 }, { "epoch": 0.0608032195858137, "grad_norm": 0.45084484320303386, "learning_rate": 2e-05, "loss": 5.3636, "step": 1813 }, { "epoch": 0.06083675693803974, "grad_norm": 0.4042088122129456, "learning_rate": 2e-05, "loss": 5.4146, "step": 1814 }, { "epoch": 0.06087029429026578, "grad_norm": 0.39266032868081435, "learning_rate": 2e-05, "loss": 5.5053, "step": 1815 }, { "epoch": 0.06090383164249182, "grad_norm": 0.42113482442037825, "learning_rate": 2e-05, "loss": 5.6542, "step": 1816 }, { "epoch": 0.06093736899471787, "grad_norm": 0.4285283908814687, "learning_rate": 2e-05, "loss": 5.4277, "step": 1817 }, { "epoch": 0.06097090634694391, "grad_norm": 0.41087756385494695, "learning_rate": 2e-05, "loss": 5.7194, "step": 1818 }, { "epoch": 0.06100444369916995, "grad_norm": 0.4044321861755282, "learning_rate": 2e-05, "loss": 5.6769, "step": 1819 }, { "epoch": 0.06103798105139599, "grad_norm": 0.4147016601919414, "learning_rate": 2e-05, "loss": 5.554, "step": 1820 }, { "epoch": 0.06107151840362204, "grad_norm": 0.42014345620601246, "learning_rate": 2e-05, "loss": 5.6952, "step": 1821 }, { "epoch": 0.06110505575584808, "grad_norm": 0.41096617730216733, "learning_rate": 2e-05, "loss": 5.6602, "step": 1822 }, { "epoch": 0.06113859310807412, "grad_norm": 0.4173990429623904, "learning_rate": 2e-05, "loss": 5.3385, "step": 1823 }, { "epoch": 0.06117213046030016, "grad_norm": 0.3950503519210559, "learning_rate": 2e-05, "loss": 5.3608, "step": 1824 }, { "epoch": 0.061205667812526204, "grad_norm": 0.4019077192738869, "learning_rate": 2e-05, "loss": 5.5773, "step": 1825 }, { "epoch": 0.061239205164752245, "grad_norm": 0.431841368723832, "learning_rate": 2e-05, "loss": 5.5367, "step": 1826 }, { "epoch": 0.061272742516978285, "grad_norm": 0.39736137520834425, "learning_rate": 2e-05, "loss": 5.5036, "step": 1827 }, { "epoch": 0.061306279869204325, "grad_norm": 0.40920337453376465, "learning_rate": 2e-05, "loss": 5.6314, "step": 1828 }, { "epoch": 0.061339817221430365, "grad_norm": 0.4163545059571355, "learning_rate": 2e-05, "loss": 5.6103, "step": 1829 }, { "epoch": 0.06137335457365641, "grad_norm": 0.38464917842875734, "learning_rate": 2e-05, "loss": 5.4303, "step": 1830 }, { "epoch": 0.06140689192588245, "grad_norm": 0.41780101168946365, "learning_rate": 2e-05, "loss": 5.6558, "step": 1831 }, { "epoch": 0.06144042927810849, "grad_norm": 0.4603290908629396, "learning_rate": 2e-05, "loss": 5.4491, "step": 1832 }, { "epoch": 0.06147396663033453, "grad_norm": 0.41181987430787326, "learning_rate": 2e-05, "loss": 5.5212, "step": 1833 }, { "epoch": 0.06150750398256058, "grad_norm": 0.4179577802207844, "learning_rate": 2e-05, "loss": 5.4409, "step": 1834 }, { "epoch": 0.06154104133478662, "grad_norm": 0.3982605691968611, "learning_rate": 2e-05, "loss": 5.7897, "step": 1835 }, { "epoch": 0.06157457868701266, "grad_norm": 0.3999340660943425, "learning_rate": 2e-05, "loss": 5.5688, "step": 1836 }, { "epoch": 0.0616081160392387, "grad_norm": 0.38108100518432286, "learning_rate": 2e-05, "loss": 5.6724, "step": 1837 }, { "epoch": 0.06164165339146475, "grad_norm": 0.41730108556445267, "learning_rate": 2e-05, "loss": 5.5216, "step": 1838 }, { "epoch": 0.06167519074369079, "grad_norm": 0.43623800459101403, "learning_rate": 2e-05, "loss": 5.6398, "step": 1839 }, { "epoch": 0.06170872809591683, "grad_norm": 0.41876369774313993, "learning_rate": 2e-05, "loss": 5.6627, "step": 1840 }, { "epoch": 0.06174226544814287, "grad_norm": 0.42933327308534025, "learning_rate": 2e-05, "loss": 5.5136, "step": 1841 }, { "epoch": 0.06177580280036891, "grad_norm": 0.4096303807556728, "learning_rate": 2e-05, "loss": 5.4922, "step": 1842 }, { "epoch": 0.061809340152594955, "grad_norm": 0.41526386019017997, "learning_rate": 2e-05, "loss": 5.5952, "step": 1843 }, { "epoch": 0.061842877504820995, "grad_norm": 0.43010471613266804, "learning_rate": 2e-05, "loss": 5.5449, "step": 1844 }, { "epoch": 0.061876414857047035, "grad_norm": 0.4333103809466212, "learning_rate": 2e-05, "loss": 5.5148, "step": 1845 }, { "epoch": 0.061909952209273075, "grad_norm": 0.39769956793421335, "learning_rate": 2e-05, "loss": 5.5093, "step": 1846 }, { "epoch": 0.06194348956149912, "grad_norm": 0.4371291101957964, "learning_rate": 2e-05, "loss": 5.6422, "step": 1847 }, { "epoch": 0.06197702691372516, "grad_norm": 0.4221517542325892, "learning_rate": 2e-05, "loss": 5.6774, "step": 1848 }, { "epoch": 0.0620105642659512, "grad_norm": 0.431157600883706, "learning_rate": 2e-05, "loss": 5.5797, "step": 1849 }, { "epoch": 0.06204410161817724, "grad_norm": 0.4216362603220314, "learning_rate": 2e-05, "loss": 5.5249, "step": 1850 }, { "epoch": 0.06207763897040329, "grad_norm": 0.44052852943790877, "learning_rate": 2e-05, "loss": 5.6732, "step": 1851 }, { "epoch": 0.06211117632262933, "grad_norm": 0.4152100045552918, "learning_rate": 2e-05, "loss": 5.5834, "step": 1852 }, { "epoch": 0.06214471367485537, "grad_norm": 0.4396780047980715, "learning_rate": 2e-05, "loss": 5.5753, "step": 1853 }, { "epoch": 0.06217825102708141, "grad_norm": 0.4093138504991826, "learning_rate": 2e-05, "loss": 5.7157, "step": 1854 }, { "epoch": 0.06221178837930745, "grad_norm": 0.43112616873792053, "learning_rate": 2e-05, "loss": 5.4985, "step": 1855 }, { "epoch": 0.0622453257315335, "grad_norm": 0.4490216834223934, "learning_rate": 2e-05, "loss": 5.7625, "step": 1856 }, { "epoch": 0.06227886308375954, "grad_norm": 0.4142164135308356, "learning_rate": 2e-05, "loss": 5.4779, "step": 1857 }, { "epoch": 0.06231240043598558, "grad_norm": 0.43315087282433157, "learning_rate": 2e-05, "loss": 5.5727, "step": 1858 }, { "epoch": 0.06234593778821162, "grad_norm": 0.4974508722799907, "learning_rate": 2e-05, "loss": 5.4219, "step": 1859 }, { "epoch": 0.062379475140437665, "grad_norm": 0.46469051325989347, "learning_rate": 2e-05, "loss": 5.5865, "step": 1860 }, { "epoch": 0.062413012492663705, "grad_norm": 0.4120167547214799, "learning_rate": 2e-05, "loss": 5.6564, "step": 1861 }, { "epoch": 0.062446549844889745, "grad_norm": 0.40504691328127046, "learning_rate": 2e-05, "loss": 5.693, "step": 1862 }, { "epoch": 0.062480087197115786, "grad_norm": 0.47218148174004937, "learning_rate": 2e-05, "loss": 5.4483, "step": 1863 }, { "epoch": 0.06251362454934183, "grad_norm": 0.4214093089224714, "learning_rate": 2e-05, "loss": 5.5667, "step": 1864 }, { "epoch": 0.06254716190156787, "grad_norm": 0.43320492555021584, "learning_rate": 2e-05, "loss": 5.65, "step": 1865 }, { "epoch": 0.0625806992537939, "grad_norm": 0.425776678252642, "learning_rate": 2e-05, "loss": 5.5188, "step": 1866 }, { "epoch": 0.06261423660601996, "grad_norm": 0.4117081896893446, "learning_rate": 2e-05, "loss": 5.5964, "step": 1867 }, { "epoch": 0.062647773958246, "grad_norm": 0.41081436313039754, "learning_rate": 2e-05, "loss": 5.6376, "step": 1868 }, { "epoch": 0.06268131131047204, "grad_norm": 0.4434083036680339, "learning_rate": 2e-05, "loss": 5.7508, "step": 1869 }, { "epoch": 0.06271484866269808, "grad_norm": 0.4100960207388628, "learning_rate": 2e-05, "loss": 5.4677, "step": 1870 }, { "epoch": 0.06274838601492412, "grad_norm": 0.4472480238777528, "learning_rate": 2e-05, "loss": 5.5382, "step": 1871 }, { "epoch": 0.06278192336715016, "grad_norm": 0.41768436841010065, "learning_rate": 2e-05, "loss": 5.5522, "step": 1872 }, { "epoch": 0.0628154607193762, "grad_norm": 0.4100260908218848, "learning_rate": 2e-05, "loss": 5.5294, "step": 1873 }, { "epoch": 0.06284899807160224, "grad_norm": 0.4192716782818729, "learning_rate": 2e-05, "loss": 5.6495, "step": 1874 }, { "epoch": 0.0628825354238283, "grad_norm": 0.4414060481924191, "learning_rate": 2e-05, "loss": 5.5877, "step": 1875 }, { "epoch": 0.06291607277605434, "grad_norm": 0.4074609325575004, "learning_rate": 2e-05, "loss": 5.4674, "step": 1876 }, { "epoch": 0.06294961012828038, "grad_norm": 0.4316975811450394, "learning_rate": 2e-05, "loss": 5.4637, "step": 1877 }, { "epoch": 0.06298314748050642, "grad_norm": 0.43139973798439396, "learning_rate": 2e-05, "loss": 5.5598, "step": 1878 }, { "epoch": 0.06301668483273246, "grad_norm": 0.4091843146410598, "learning_rate": 2e-05, "loss": 5.5638, "step": 1879 }, { "epoch": 0.0630502221849585, "grad_norm": 0.39025625802956415, "learning_rate": 2e-05, "loss": 5.5269, "step": 1880 }, { "epoch": 0.06308375953718454, "grad_norm": 0.4336411420327458, "learning_rate": 2e-05, "loss": 5.8214, "step": 1881 }, { "epoch": 0.06311729688941058, "grad_norm": 0.4081134658474253, "learning_rate": 2e-05, "loss": 5.606, "step": 1882 }, { "epoch": 0.06315083424163662, "grad_norm": 0.42213991015426083, "learning_rate": 2e-05, "loss": 5.6449, "step": 1883 }, { "epoch": 0.06318437159386267, "grad_norm": 0.46841747542446366, "learning_rate": 2e-05, "loss": 5.5544, "step": 1884 }, { "epoch": 0.06321790894608871, "grad_norm": 0.42395072087062413, "learning_rate": 2e-05, "loss": 5.5127, "step": 1885 }, { "epoch": 0.06325144629831475, "grad_norm": 0.4051858862253517, "learning_rate": 2e-05, "loss": 5.1824, "step": 1886 }, { "epoch": 0.06328498365054079, "grad_norm": 0.41758902949099697, "learning_rate": 2e-05, "loss": 5.6147, "step": 1887 }, { "epoch": 0.06331852100276683, "grad_norm": 0.39387700388188107, "learning_rate": 2e-05, "loss": 5.6973, "step": 1888 }, { "epoch": 0.06335205835499287, "grad_norm": 0.41937610812562015, "learning_rate": 2e-05, "loss": 5.6892, "step": 1889 }, { "epoch": 0.06338559570721891, "grad_norm": 0.4343184949971351, "learning_rate": 2e-05, "loss": 5.4225, "step": 1890 }, { "epoch": 0.06341913305944495, "grad_norm": 0.4325534811578418, "learning_rate": 2e-05, "loss": 5.5715, "step": 1891 }, { "epoch": 0.06345267041167099, "grad_norm": 0.4185847342620314, "learning_rate": 2e-05, "loss": 5.4689, "step": 1892 }, { "epoch": 0.06348620776389705, "grad_norm": 0.4390066509392112, "learning_rate": 2e-05, "loss": 5.8407, "step": 1893 }, { "epoch": 0.06351974511612309, "grad_norm": 0.45328945017698696, "learning_rate": 2e-05, "loss": 5.5991, "step": 1894 }, { "epoch": 0.06355328246834913, "grad_norm": 0.40447699481752664, "learning_rate": 2e-05, "loss": 5.4569, "step": 1895 }, { "epoch": 0.06358681982057517, "grad_norm": 0.394823606337954, "learning_rate": 2e-05, "loss": 5.4163, "step": 1896 }, { "epoch": 0.0636203571728012, "grad_norm": 0.4278083580993777, "learning_rate": 2e-05, "loss": 5.577, "step": 1897 }, { "epoch": 0.06365389452502725, "grad_norm": 0.4167764074995744, "learning_rate": 2e-05, "loss": 5.3111, "step": 1898 }, { "epoch": 0.06368743187725329, "grad_norm": 0.43350010597412403, "learning_rate": 2e-05, "loss": 5.4743, "step": 1899 }, { "epoch": 0.06372096922947933, "grad_norm": 0.4155222642103456, "learning_rate": 2e-05, "loss": 5.7564, "step": 1900 }, { "epoch": 0.06375450658170538, "grad_norm": 0.41972147776775015, "learning_rate": 2e-05, "loss": 5.3626, "step": 1901 }, { "epoch": 0.06378804393393142, "grad_norm": 0.3896500221076837, "learning_rate": 2e-05, "loss": 5.5675, "step": 1902 }, { "epoch": 0.06382158128615746, "grad_norm": 0.4306417308837477, "learning_rate": 2e-05, "loss": 5.8112, "step": 1903 }, { "epoch": 0.0638551186383835, "grad_norm": 0.4379244847755419, "learning_rate": 2e-05, "loss": 5.7675, "step": 1904 }, { "epoch": 0.06388865599060954, "grad_norm": 0.4408213982401105, "learning_rate": 2e-05, "loss": 5.381, "step": 1905 }, { "epoch": 0.06392219334283558, "grad_norm": 0.3846943236412202, "learning_rate": 2e-05, "loss": 5.5189, "step": 1906 }, { "epoch": 0.06395573069506162, "grad_norm": 0.4718167726483313, "learning_rate": 2e-05, "loss": 5.5516, "step": 1907 }, { "epoch": 0.06398926804728766, "grad_norm": 0.4197758542708754, "learning_rate": 2e-05, "loss": 5.6322, "step": 1908 }, { "epoch": 0.0640228053995137, "grad_norm": 0.40065352375124, "learning_rate": 2e-05, "loss": 5.4789, "step": 1909 }, { "epoch": 0.06405634275173976, "grad_norm": 0.4194979517632503, "learning_rate": 2e-05, "loss": 5.5347, "step": 1910 }, { "epoch": 0.0640898801039658, "grad_norm": 0.42485292945076186, "learning_rate": 2e-05, "loss": 5.7733, "step": 1911 }, { "epoch": 0.06412341745619184, "grad_norm": 0.4145805811971153, "learning_rate": 2e-05, "loss": 5.6862, "step": 1912 }, { "epoch": 0.06415695480841788, "grad_norm": 0.40354419217791715, "learning_rate": 2e-05, "loss": 5.4783, "step": 1913 }, { "epoch": 0.06419049216064392, "grad_norm": 0.4359061334822483, "learning_rate": 2e-05, "loss": 5.473, "step": 1914 }, { "epoch": 0.06422402951286996, "grad_norm": 0.4038345924669843, "learning_rate": 2e-05, "loss": 5.5725, "step": 1915 }, { "epoch": 0.064257566865096, "grad_norm": 0.39557875437496715, "learning_rate": 2e-05, "loss": 5.4644, "step": 1916 }, { "epoch": 0.06429110421732204, "grad_norm": 0.4335285915553954, "learning_rate": 2e-05, "loss": 5.5299, "step": 1917 }, { "epoch": 0.06432464156954809, "grad_norm": 0.4260355487627575, "learning_rate": 2e-05, "loss": 5.55, "step": 1918 }, { "epoch": 0.06435817892177413, "grad_norm": 0.4045907645371416, "learning_rate": 2e-05, "loss": 5.7344, "step": 1919 }, { "epoch": 0.06439171627400017, "grad_norm": 0.4346407214463528, "learning_rate": 2e-05, "loss": 5.6603, "step": 1920 }, { "epoch": 0.06442525362622621, "grad_norm": 0.4136727817580632, "learning_rate": 2e-05, "loss": 5.7493, "step": 1921 }, { "epoch": 0.06445879097845225, "grad_norm": 0.37686037842282427, "learning_rate": 2e-05, "loss": 5.5253, "step": 1922 }, { "epoch": 0.06449232833067829, "grad_norm": 0.43118189236059795, "learning_rate": 2e-05, "loss": 5.4486, "step": 1923 }, { "epoch": 0.06452586568290433, "grad_norm": 0.4324868923914285, "learning_rate": 2e-05, "loss": 5.6321, "step": 1924 }, { "epoch": 0.06455940303513037, "grad_norm": 0.41201314457878346, "learning_rate": 2e-05, "loss": 5.526, "step": 1925 }, { "epoch": 0.06459294038735641, "grad_norm": 0.40707916919705844, "learning_rate": 2e-05, "loss": 5.6968, "step": 1926 }, { "epoch": 0.06462647773958247, "grad_norm": 0.3998040656928954, "learning_rate": 2e-05, "loss": 5.6498, "step": 1927 }, { "epoch": 0.0646600150918085, "grad_norm": 0.41409983783298576, "learning_rate": 2e-05, "loss": 5.6797, "step": 1928 }, { "epoch": 0.06469355244403455, "grad_norm": 0.4057800367605242, "learning_rate": 2e-05, "loss": 5.4773, "step": 1929 }, { "epoch": 0.06472708979626059, "grad_norm": 0.41461314411015043, "learning_rate": 2e-05, "loss": 5.6955, "step": 1930 }, { "epoch": 0.06476062714848663, "grad_norm": 0.4317594424776622, "learning_rate": 2e-05, "loss": 5.5264, "step": 1931 }, { "epoch": 0.06479416450071267, "grad_norm": 0.4075113279169639, "learning_rate": 2e-05, "loss": 5.517, "step": 1932 }, { "epoch": 0.06482770185293871, "grad_norm": 0.40341122247466726, "learning_rate": 2e-05, "loss": 5.4816, "step": 1933 }, { "epoch": 0.06486123920516475, "grad_norm": 0.4214644701744155, "learning_rate": 2e-05, "loss": 5.563, "step": 1934 }, { "epoch": 0.06489477655739079, "grad_norm": 0.4457114634244247, "learning_rate": 2e-05, "loss": 5.3575, "step": 1935 }, { "epoch": 0.06492831390961684, "grad_norm": 0.40419917254825105, "learning_rate": 2e-05, "loss": 5.3967, "step": 1936 }, { "epoch": 0.06496185126184288, "grad_norm": 0.42664078863938065, "learning_rate": 2e-05, "loss": 5.7517, "step": 1937 }, { "epoch": 0.06499538861406892, "grad_norm": 0.4142758300573428, "learning_rate": 2e-05, "loss": 5.4221, "step": 1938 }, { "epoch": 0.06502892596629496, "grad_norm": 0.414902227981499, "learning_rate": 2e-05, "loss": 5.6452, "step": 1939 }, { "epoch": 0.065062463318521, "grad_norm": 0.4096405779456615, "learning_rate": 2e-05, "loss": 5.6586, "step": 1940 }, { "epoch": 0.06509600067074704, "grad_norm": 0.4118700768849862, "learning_rate": 2e-05, "loss": 5.5859, "step": 1941 }, { "epoch": 0.06512953802297308, "grad_norm": 0.46721701554697903, "learning_rate": 2e-05, "loss": 5.4526, "step": 1942 }, { "epoch": 0.06516307537519912, "grad_norm": 0.433438584215684, "learning_rate": 2e-05, "loss": 5.5531, "step": 1943 }, { "epoch": 0.06519661272742518, "grad_norm": 0.4099487059151592, "learning_rate": 2e-05, "loss": 5.5572, "step": 1944 }, { "epoch": 0.06523015007965122, "grad_norm": 0.4351885613203795, "learning_rate": 2e-05, "loss": 5.5961, "step": 1945 }, { "epoch": 0.06526368743187726, "grad_norm": 0.4401832656063046, "learning_rate": 2e-05, "loss": 5.4425, "step": 1946 }, { "epoch": 0.0652972247841033, "grad_norm": 0.4206133583041218, "learning_rate": 2e-05, "loss": 5.4917, "step": 1947 }, { "epoch": 0.06533076213632934, "grad_norm": 0.46693233570541437, "learning_rate": 2e-05, "loss": 5.5452, "step": 1948 }, { "epoch": 0.06536429948855538, "grad_norm": 0.42794293480183476, "learning_rate": 2e-05, "loss": 5.6148, "step": 1949 }, { "epoch": 0.06539783684078142, "grad_norm": 0.4304423133564218, "learning_rate": 2e-05, "loss": 5.6221, "step": 1950 }, { "epoch": 0.06543137419300746, "grad_norm": 0.4266009622958264, "learning_rate": 2e-05, "loss": 5.6204, "step": 1951 }, { "epoch": 0.0654649115452335, "grad_norm": 0.43968402738593854, "learning_rate": 2e-05, "loss": 5.5617, "step": 1952 }, { "epoch": 0.06549844889745955, "grad_norm": 0.44236653334451254, "learning_rate": 2e-05, "loss": 5.593, "step": 1953 }, { "epoch": 0.06553198624968559, "grad_norm": 0.41614398854737167, "learning_rate": 2e-05, "loss": 5.6117, "step": 1954 }, { "epoch": 0.06556552360191163, "grad_norm": 0.39697278983992784, "learning_rate": 2e-05, "loss": 5.6663, "step": 1955 }, { "epoch": 0.06559906095413767, "grad_norm": 0.43667012602741023, "learning_rate": 2e-05, "loss": 5.3689, "step": 1956 }, { "epoch": 0.06563259830636371, "grad_norm": 0.40865634131789197, "learning_rate": 2e-05, "loss": 5.5687, "step": 1957 }, { "epoch": 0.06566613565858975, "grad_norm": 0.4443828294429042, "learning_rate": 2e-05, "loss": 5.6831, "step": 1958 }, { "epoch": 0.06569967301081579, "grad_norm": 0.4563801369148046, "learning_rate": 2e-05, "loss": 5.5959, "step": 1959 }, { "epoch": 0.06573321036304183, "grad_norm": 0.40313330825505467, "learning_rate": 2e-05, "loss": 5.6563, "step": 1960 }, { "epoch": 0.06576674771526787, "grad_norm": 0.4261978055875075, "learning_rate": 2e-05, "loss": 5.3116, "step": 1961 }, { "epoch": 0.06580028506749393, "grad_norm": 0.45696811785175906, "learning_rate": 2e-05, "loss": 5.5568, "step": 1962 }, { "epoch": 0.06583382241971997, "grad_norm": 0.42567335293004266, "learning_rate": 2e-05, "loss": 5.6226, "step": 1963 }, { "epoch": 0.06586735977194601, "grad_norm": 0.4370089949434856, "learning_rate": 2e-05, "loss": 5.5634, "step": 1964 }, { "epoch": 0.06590089712417205, "grad_norm": 0.4439480247122002, "learning_rate": 2e-05, "loss": 5.5192, "step": 1965 }, { "epoch": 0.06593443447639809, "grad_norm": 0.41362543429853493, "learning_rate": 2e-05, "loss": 5.7942, "step": 1966 }, { "epoch": 0.06596797182862413, "grad_norm": 0.43642821976105095, "learning_rate": 2e-05, "loss": 5.5215, "step": 1967 }, { "epoch": 0.06600150918085017, "grad_norm": 0.40303639939332875, "learning_rate": 2e-05, "loss": 5.5459, "step": 1968 }, { "epoch": 0.06603504653307621, "grad_norm": 0.4292532318217379, "learning_rate": 2e-05, "loss": 5.6503, "step": 1969 }, { "epoch": 0.06606858388530226, "grad_norm": 0.4702407190812591, "learning_rate": 2e-05, "loss": 5.3866, "step": 1970 }, { "epoch": 0.0661021212375283, "grad_norm": 0.41317075083122146, "learning_rate": 2e-05, "loss": 5.6677, "step": 1971 }, { "epoch": 0.06613565858975434, "grad_norm": 0.41965103828706857, "learning_rate": 2e-05, "loss": 5.883, "step": 1972 }, { "epoch": 0.06616919594198038, "grad_norm": 0.46297976485879383, "learning_rate": 2e-05, "loss": 5.4488, "step": 1973 }, { "epoch": 0.06620273329420642, "grad_norm": 0.44298414901963384, "learning_rate": 2e-05, "loss": 5.6707, "step": 1974 }, { "epoch": 0.06623627064643246, "grad_norm": 0.43194282050727234, "learning_rate": 2e-05, "loss": 5.3809, "step": 1975 }, { "epoch": 0.0662698079986585, "grad_norm": 0.4139368032283427, "learning_rate": 2e-05, "loss": 5.4879, "step": 1976 }, { "epoch": 0.06630334535088454, "grad_norm": 0.39893978631070376, "learning_rate": 2e-05, "loss": 5.4478, "step": 1977 }, { "epoch": 0.06633688270311058, "grad_norm": 0.45928649710016134, "learning_rate": 2e-05, "loss": 5.6892, "step": 1978 }, { "epoch": 0.06637042005533664, "grad_norm": 0.4344338816211198, "learning_rate": 2e-05, "loss": 5.5756, "step": 1979 }, { "epoch": 0.06640395740756268, "grad_norm": 0.4267381096990058, "learning_rate": 2e-05, "loss": 5.5251, "step": 1980 }, { "epoch": 0.06643749475978872, "grad_norm": 0.436929828904449, "learning_rate": 2e-05, "loss": 5.4603, "step": 1981 }, { "epoch": 0.06647103211201476, "grad_norm": 0.43725841864445614, "learning_rate": 2e-05, "loss": 5.4646, "step": 1982 }, { "epoch": 0.0665045694642408, "grad_norm": 0.3982632002149861, "learning_rate": 2e-05, "loss": 5.7306, "step": 1983 }, { "epoch": 0.06653810681646684, "grad_norm": 0.44228518897711444, "learning_rate": 2e-05, "loss": 5.4468, "step": 1984 }, { "epoch": 0.06657164416869288, "grad_norm": 0.4476942848512525, "learning_rate": 2e-05, "loss": 5.5325, "step": 1985 }, { "epoch": 0.06660518152091892, "grad_norm": 0.41369113886787034, "learning_rate": 2e-05, "loss": 5.4582, "step": 1986 }, { "epoch": 0.06663871887314496, "grad_norm": 0.4332606107966644, "learning_rate": 2e-05, "loss": 5.6999, "step": 1987 }, { "epoch": 0.06667225622537101, "grad_norm": 0.43925825036312544, "learning_rate": 2e-05, "loss": 5.6371, "step": 1988 }, { "epoch": 0.06670579357759705, "grad_norm": 0.4175679899813386, "learning_rate": 2e-05, "loss": 5.3769, "step": 1989 }, { "epoch": 0.06673933092982309, "grad_norm": 0.42255855270651643, "learning_rate": 2e-05, "loss": 5.3729, "step": 1990 }, { "epoch": 0.06677286828204913, "grad_norm": 0.4429131412156498, "learning_rate": 2e-05, "loss": 5.5535, "step": 1991 }, { "epoch": 0.06680640563427517, "grad_norm": 0.43152087726440097, "learning_rate": 2e-05, "loss": 5.6096, "step": 1992 }, { "epoch": 0.06683994298650121, "grad_norm": 0.44140180517825417, "learning_rate": 2e-05, "loss": 5.3908, "step": 1993 }, { "epoch": 0.06687348033872725, "grad_norm": 0.41995705592243254, "learning_rate": 2e-05, "loss": 5.6154, "step": 1994 }, { "epoch": 0.0669070176909533, "grad_norm": 0.41584467325816626, "learning_rate": 2e-05, "loss": 5.6242, "step": 1995 }, { "epoch": 0.06694055504317935, "grad_norm": 0.45835129037100747, "learning_rate": 2e-05, "loss": 5.587, "step": 1996 }, { "epoch": 0.06697409239540539, "grad_norm": 0.4164507824692495, "learning_rate": 2e-05, "loss": 5.6752, "step": 1997 }, { "epoch": 0.06700762974763143, "grad_norm": 0.41769150630471474, "learning_rate": 2e-05, "loss": 5.4471, "step": 1998 }, { "epoch": 0.06704116709985747, "grad_norm": 0.429479377504974, "learning_rate": 2e-05, "loss": 5.4681, "step": 1999 }, { "epoch": 0.06707470445208351, "grad_norm": 0.4072991154528607, "learning_rate": 2e-05, "loss": 5.7413, "step": 2000 }, { "epoch": 0.06710824180430955, "grad_norm": 0.4396621324691173, "learning_rate": 2e-05, "loss": 5.5758, "step": 2001 }, { "epoch": 0.06714177915653559, "grad_norm": 0.4331275041910707, "learning_rate": 2e-05, "loss": 5.5435, "step": 2002 }, { "epoch": 0.06717531650876163, "grad_norm": 0.4233435923946044, "learning_rate": 2e-05, "loss": 5.4431, "step": 2003 }, { "epoch": 0.06720885386098767, "grad_norm": 0.418986736729053, "learning_rate": 2e-05, "loss": 5.5637, "step": 2004 }, { "epoch": 0.06724239121321372, "grad_norm": 0.41993079558390006, "learning_rate": 2e-05, "loss": 5.4219, "step": 2005 }, { "epoch": 0.06727592856543976, "grad_norm": 0.4014940776089685, "learning_rate": 2e-05, "loss": 5.6175, "step": 2006 }, { "epoch": 0.0673094659176658, "grad_norm": 0.429533371929493, "learning_rate": 2e-05, "loss": 5.517, "step": 2007 }, { "epoch": 0.06734300326989184, "grad_norm": 0.43534412725433713, "learning_rate": 2e-05, "loss": 5.4791, "step": 2008 }, { "epoch": 0.06737654062211788, "grad_norm": 0.4186224783455672, "learning_rate": 2e-05, "loss": 5.6031, "step": 2009 }, { "epoch": 0.06741007797434392, "grad_norm": 0.39536341968313987, "learning_rate": 2e-05, "loss": 5.6981, "step": 2010 }, { "epoch": 0.06744361532656996, "grad_norm": 0.4418183785622666, "learning_rate": 2e-05, "loss": 5.5997, "step": 2011 }, { "epoch": 0.067477152678796, "grad_norm": 0.41138919824645154, "learning_rate": 2e-05, "loss": 5.7407, "step": 2012 }, { "epoch": 0.06751069003102204, "grad_norm": 0.41219792454627735, "learning_rate": 2e-05, "loss": 5.6356, "step": 2013 }, { "epoch": 0.0675442273832481, "grad_norm": 0.43465129809883396, "learning_rate": 2e-05, "loss": 5.4116, "step": 2014 }, { "epoch": 0.06757776473547414, "grad_norm": 0.4076122014872989, "learning_rate": 2e-05, "loss": 5.424, "step": 2015 }, { "epoch": 0.06761130208770018, "grad_norm": 0.4192777210788033, "learning_rate": 2e-05, "loss": 5.6549, "step": 2016 }, { "epoch": 0.06764483943992622, "grad_norm": 0.40204887297804287, "learning_rate": 2e-05, "loss": 5.5616, "step": 2017 }, { "epoch": 0.06767837679215226, "grad_norm": 0.4014219428592607, "learning_rate": 2e-05, "loss": 5.506, "step": 2018 }, { "epoch": 0.0677119141443783, "grad_norm": 0.4312219176072835, "learning_rate": 2e-05, "loss": 5.5992, "step": 2019 }, { "epoch": 0.06774545149660434, "grad_norm": 0.4032436163139016, "learning_rate": 2e-05, "loss": 5.5144, "step": 2020 }, { "epoch": 0.06777898884883038, "grad_norm": 0.4115922957515368, "learning_rate": 2e-05, "loss": 5.6777, "step": 2021 }, { "epoch": 0.06781252620105643, "grad_norm": 0.4149772506753136, "learning_rate": 2e-05, "loss": 5.5245, "step": 2022 }, { "epoch": 0.06784606355328247, "grad_norm": 0.4139790607547382, "learning_rate": 2e-05, "loss": 5.6048, "step": 2023 }, { "epoch": 0.06787960090550851, "grad_norm": 0.4050519615795497, "learning_rate": 2e-05, "loss": 5.4195, "step": 2024 }, { "epoch": 0.06791313825773455, "grad_norm": 0.42768721746863314, "learning_rate": 2e-05, "loss": 5.3251, "step": 2025 }, { "epoch": 0.0679466756099606, "grad_norm": 0.45176086809105986, "learning_rate": 2e-05, "loss": 5.3939, "step": 2026 }, { "epoch": 0.06798021296218663, "grad_norm": 0.4245043632659925, "learning_rate": 2e-05, "loss": 5.5769, "step": 2027 }, { "epoch": 0.06801375031441267, "grad_norm": 0.4280202431945083, "learning_rate": 2e-05, "loss": 5.7048, "step": 2028 }, { "epoch": 0.06804728766663871, "grad_norm": 0.4150454612204075, "learning_rate": 2e-05, "loss": 5.6296, "step": 2029 }, { "epoch": 0.06808082501886475, "grad_norm": 0.3999978426621949, "learning_rate": 2e-05, "loss": 5.4212, "step": 2030 }, { "epoch": 0.06811436237109081, "grad_norm": 0.40287437824269934, "learning_rate": 2e-05, "loss": 5.4504, "step": 2031 }, { "epoch": 0.06814789972331685, "grad_norm": 0.40972788430582596, "learning_rate": 2e-05, "loss": 5.6693, "step": 2032 }, { "epoch": 0.06818143707554289, "grad_norm": 0.41426102051641883, "learning_rate": 2e-05, "loss": 5.6207, "step": 2033 }, { "epoch": 0.06821497442776893, "grad_norm": 0.44195622495774733, "learning_rate": 2e-05, "loss": 5.6565, "step": 2034 }, { "epoch": 0.06824851177999497, "grad_norm": 0.4135305637359423, "learning_rate": 2e-05, "loss": 5.2979, "step": 2035 }, { "epoch": 0.06828204913222101, "grad_norm": 0.413640142407332, "learning_rate": 2e-05, "loss": 5.5402, "step": 2036 }, { "epoch": 0.06831558648444705, "grad_norm": 0.4272861440821897, "learning_rate": 2e-05, "loss": 5.5709, "step": 2037 }, { "epoch": 0.06834912383667309, "grad_norm": 0.4067884935567941, "learning_rate": 2e-05, "loss": 5.7597, "step": 2038 }, { "epoch": 0.06838266118889913, "grad_norm": 0.4088966372985781, "learning_rate": 2e-05, "loss": 5.4511, "step": 2039 }, { "epoch": 0.06841619854112518, "grad_norm": 0.4373331257397585, "learning_rate": 2e-05, "loss": 5.6448, "step": 2040 }, { "epoch": 0.06844973589335122, "grad_norm": 0.44630816269727186, "learning_rate": 2e-05, "loss": 5.7085, "step": 2041 }, { "epoch": 0.06848327324557726, "grad_norm": 0.4170201765893619, "learning_rate": 2e-05, "loss": 5.4349, "step": 2042 }, { "epoch": 0.0685168105978033, "grad_norm": 0.39408760549187344, "learning_rate": 2e-05, "loss": 5.6674, "step": 2043 }, { "epoch": 0.06855034795002934, "grad_norm": 0.4269524510995337, "learning_rate": 2e-05, "loss": 5.532, "step": 2044 }, { "epoch": 0.06858388530225538, "grad_norm": 0.4302876263256542, "learning_rate": 2e-05, "loss": 5.6578, "step": 2045 }, { "epoch": 0.06861742265448142, "grad_norm": 0.41799660245852693, "learning_rate": 2e-05, "loss": 5.5843, "step": 2046 }, { "epoch": 0.06865096000670746, "grad_norm": 0.4758943602635671, "learning_rate": 2e-05, "loss": 5.6679, "step": 2047 }, { "epoch": 0.06868449735893352, "grad_norm": 0.43507486831954134, "learning_rate": 2e-05, "loss": 5.4793, "step": 2048 }, { "epoch": 0.06871803471115956, "grad_norm": 0.4111902358355468, "learning_rate": 2e-05, "loss": 5.5632, "step": 2049 }, { "epoch": 0.0687515720633856, "grad_norm": 0.4270088294579766, "learning_rate": 2e-05, "loss": 5.5602, "step": 2050 }, { "epoch": 0.06878510941561164, "grad_norm": 0.4422133363469093, "learning_rate": 2e-05, "loss": 5.4925, "step": 2051 }, { "epoch": 0.06881864676783768, "grad_norm": 0.42099209926307335, "learning_rate": 2e-05, "loss": 5.5758, "step": 2052 }, { "epoch": 0.06885218412006372, "grad_norm": 0.4398533955264389, "learning_rate": 2e-05, "loss": 5.6578, "step": 2053 }, { "epoch": 0.06888572147228976, "grad_norm": 0.4193048113365711, "learning_rate": 2e-05, "loss": 5.5081, "step": 2054 }, { "epoch": 0.0689192588245158, "grad_norm": 0.4366880450938622, "learning_rate": 2e-05, "loss": 5.6175, "step": 2055 }, { "epoch": 0.06895279617674184, "grad_norm": 0.4261309813230675, "learning_rate": 2e-05, "loss": 5.5922, "step": 2056 }, { "epoch": 0.0689863335289679, "grad_norm": 0.4195149747761014, "learning_rate": 2e-05, "loss": 5.71, "step": 2057 }, { "epoch": 0.06901987088119393, "grad_norm": 0.4462231008910975, "learning_rate": 2e-05, "loss": 5.4059, "step": 2058 }, { "epoch": 0.06905340823341997, "grad_norm": 0.47847560062621286, "learning_rate": 2e-05, "loss": 5.6176, "step": 2059 }, { "epoch": 0.06908694558564601, "grad_norm": 0.41199258851067877, "learning_rate": 2e-05, "loss": 5.4122, "step": 2060 }, { "epoch": 0.06912048293787205, "grad_norm": 0.4001470130376448, "learning_rate": 2e-05, "loss": 5.451, "step": 2061 }, { "epoch": 0.0691540202900981, "grad_norm": 0.47024990630485614, "learning_rate": 2e-05, "loss": 5.7101, "step": 2062 }, { "epoch": 0.06918755764232414, "grad_norm": 0.4352222257602121, "learning_rate": 2e-05, "loss": 5.4399, "step": 2063 }, { "epoch": 0.06922109499455018, "grad_norm": 0.4164577274720816, "learning_rate": 2e-05, "loss": 5.5916, "step": 2064 }, { "epoch": 0.06925463234677622, "grad_norm": 0.41662226459516594, "learning_rate": 2e-05, "loss": 5.5801, "step": 2065 }, { "epoch": 0.06928816969900227, "grad_norm": 0.44154804741531684, "learning_rate": 2e-05, "loss": 5.5084, "step": 2066 }, { "epoch": 0.06932170705122831, "grad_norm": 0.38774376285510964, "learning_rate": 2e-05, "loss": 5.6438, "step": 2067 }, { "epoch": 0.06935524440345435, "grad_norm": 0.4440181951376095, "learning_rate": 2e-05, "loss": 5.4875, "step": 2068 }, { "epoch": 0.06938878175568039, "grad_norm": 0.4141796460314648, "learning_rate": 2e-05, "loss": 5.6642, "step": 2069 }, { "epoch": 0.06942231910790643, "grad_norm": 0.42452285749850865, "learning_rate": 2e-05, "loss": 5.3975, "step": 2070 }, { "epoch": 0.06945585646013247, "grad_norm": 0.41078504619883244, "learning_rate": 2e-05, "loss": 5.5756, "step": 2071 }, { "epoch": 0.06948939381235851, "grad_norm": 0.41651778447677185, "learning_rate": 2e-05, "loss": 5.6731, "step": 2072 }, { "epoch": 0.06952293116458455, "grad_norm": 0.44199567257082556, "learning_rate": 2e-05, "loss": 5.5233, "step": 2073 }, { "epoch": 0.0695564685168106, "grad_norm": 0.41845148878088445, "learning_rate": 2e-05, "loss": 5.6175, "step": 2074 }, { "epoch": 0.06959000586903664, "grad_norm": 0.41718925148845926, "learning_rate": 2e-05, "loss": 5.5943, "step": 2075 }, { "epoch": 0.06962354322126268, "grad_norm": 0.4438486319495173, "learning_rate": 2e-05, "loss": 5.4439, "step": 2076 }, { "epoch": 0.06965708057348872, "grad_norm": 0.4103634486102282, "learning_rate": 2e-05, "loss": 5.5424, "step": 2077 }, { "epoch": 0.06969061792571477, "grad_norm": 0.3994421298151037, "learning_rate": 2e-05, "loss": 5.4729, "step": 2078 }, { "epoch": 0.0697241552779408, "grad_norm": 0.4035730297103482, "learning_rate": 2e-05, "loss": 5.4427, "step": 2079 }, { "epoch": 0.06975769263016685, "grad_norm": 0.4447692612819552, "learning_rate": 2e-05, "loss": 5.4865, "step": 2080 }, { "epoch": 0.06979122998239289, "grad_norm": 0.4697722412207597, "learning_rate": 2e-05, "loss": 5.6549, "step": 2081 }, { "epoch": 0.06982476733461893, "grad_norm": 0.40328767707730706, "learning_rate": 2e-05, "loss": 5.4426, "step": 2082 }, { "epoch": 0.06985830468684498, "grad_norm": 0.47401139106396684, "learning_rate": 2e-05, "loss": 5.4332, "step": 2083 }, { "epoch": 0.06989184203907102, "grad_norm": 0.49451905496130544, "learning_rate": 2e-05, "loss": 5.4036, "step": 2084 }, { "epoch": 0.06992537939129706, "grad_norm": 0.4202899849966635, "learning_rate": 2e-05, "loss": 5.6732, "step": 2085 }, { "epoch": 0.0699589167435231, "grad_norm": 0.43084446792259085, "learning_rate": 2e-05, "loss": 5.753, "step": 2086 }, { "epoch": 0.06999245409574914, "grad_norm": 0.4525800127501276, "learning_rate": 2e-05, "loss": 5.6694, "step": 2087 }, { "epoch": 0.07002599144797518, "grad_norm": 0.3988595103844281, "learning_rate": 2e-05, "loss": 5.581, "step": 2088 }, { "epoch": 0.07005952880020122, "grad_norm": 0.41901665993085097, "learning_rate": 2e-05, "loss": 5.5284, "step": 2089 }, { "epoch": 0.07009306615242726, "grad_norm": 0.462944122451869, "learning_rate": 2e-05, "loss": 5.5369, "step": 2090 }, { "epoch": 0.0701266035046533, "grad_norm": 0.38729570067011604, "learning_rate": 2e-05, "loss": 5.6178, "step": 2091 }, { "epoch": 0.07016014085687935, "grad_norm": 0.427180380091223, "learning_rate": 2e-05, "loss": 5.3881, "step": 2092 }, { "epoch": 0.0701936782091054, "grad_norm": 0.4277117192161666, "learning_rate": 2e-05, "loss": 5.7129, "step": 2093 }, { "epoch": 0.07022721556133144, "grad_norm": 0.4207823237017602, "learning_rate": 2e-05, "loss": 5.5929, "step": 2094 }, { "epoch": 0.07026075291355748, "grad_norm": 0.4390702638982757, "learning_rate": 2e-05, "loss": 5.7935, "step": 2095 }, { "epoch": 0.07029429026578352, "grad_norm": 0.42243904535029797, "learning_rate": 2e-05, "loss": 5.4189, "step": 2096 }, { "epoch": 0.07032782761800956, "grad_norm": 0.4086829581394726, "learning_rate": 2e-05, "loss": 5.5038, "step": 2097 }, { "epoch": 0.0703613649702356, "grad_norm": 0.4198984155134124, "learning_rate": 2e-05, "loss": 5.483, "step": 2098 }, { "epoch": 0.07039490232246164, "grad_norm": 0.46036905408191686, "learning_rate": 2e-05, "loss": 5.5793, "step": 2099 }, { "epoch": 0.07042843967468769, "grad_norm": 0.4606494169765942, "learning_rate": 2e-05, "loss": 5.5779, "step": 2100 }, { "epoch": 0.07046197702691373, "grad_norm": 0.4205477566216732, "learning_rate": 2e-05, "loss": 5.2871, "step": 2101 }, { "epoch": 0.07049551437913977, "grad_norm": 0.4473896316429227, "learning_rate": 2e-05, "loss": 5.6765, "step": 2102 }, { "epoch": 0.07052905173136581, "grad_norm": 0.4399930684061058, "learning_rate": 2e-05, "loss": 5.3846, "step": 2103 }, { "epoch": 0.07056258908359185, "grad_norm": 0.4233485539981247, "learning_rate": 2e-05, "loss": 5.4175, "step": 2104 }, { "epoch": 0.07059612643581789, "grad_norm": 0.41487838060011495, "learning_rate": 2e-05, "loss": 5.5566, "step": 2105 }, { "epoch": 0.07062966378804393, "grad_norm": 0.3985746773677656, "learning_rate": 2e-05, "loss": 5.3892, "step": 2106 }, { "epoch": 0.07066320114026997, "grad_norm": 0.43123590073298523, "learning_rate": 2e-05, "loss": 5.5032, "step": 2107 }, { "epoch": 0.07069673849249601, "grad_norm": 0.4264852454792201, "learning_rate": 2e-05, "loss": 5.3717, "step": 2108 }, { "epoch": 0.07073027584472207, "grad_norm": 0.398927805305974, "learning_rate": 2e-05, "loss": 5.6009, "step": 2109 }, { "epoch": 0.0707638131969481, "grad_norm": 0.45113350013069564, "learning_rate": 2e-05, "loss": 5.5421, "step": 2110 }, { "epoch": 0.07079735054917415, "grad_norm": 0.43921539330855475, "learning_rate": 2e-05, "loss": 5.6434, "step": 2111 }, { "epoch": 0.07083088790140019, "grad_norm": 0.4085968592458038, "learning_rate": 2e-05, "loss": 5.4223, "step": 2112 }, { "epoch": 0.07086442525362623, "grad_norm": 0.4543199930897221, "learning_rate": 2e-05, "loss": 5.4279, "step": 2113 }, { "epoch": 0.07089796260585227, "grad_norm": 0.4279652433253538, "learning_rate": 2e-05, "loss": 5.601, "step": 2114 }, { "epoch": 0.0709314999580783, "grad_norm": 0.4300247407861694, "learning_rate": 2e-05, "loss": 5.4398, "step": 2115 }, { "epoch": 0.07096503731030435, "grad_norm": 0.44976059649352135, "learning_rate": 2e-05, "loss": 5.6628, "step": 2116 }, { "epoch": 0.07099857466253039, "grad_norm": 0.4476896077030886, "learning_rate": 2e-05, "loss": 5.645, "step": 2117 }, { "epoch": 0.07103211201475644, "grad_norm": 0.4160462009636024, "learning_rate": 2e-05, "loss": 5.793, "step": 2118 }, { "epoch": 0.07106564936698248, "grad_norm": 0.4482096023117798, "learning_rate": 2e-05, "loss": 5.7061, "step": 2119 }, { "epoch": 0.07109918671920852, "grad_norm": 0.45066594850936204, "learning_rate": 2e-05, "loss": 5.7095, "step": 2120 }, { "epoch": 0.07113272407143456, "grad_norm": 0.40625573565733075, "learning_rate": 2e-05, "loss": 5.4518, "step": 2121 }, { "epoch": 0.0711662614236606, "grad_norm": 0.4011769772802175, "learning_rate": 2e-05, "loss": 5.482, "step": 2122 }, { "epoch": 0.07119979877588664, "grad_norm": 0.41557198413356694, "learning_rate": 2e-05, "loss": 5.581, "step": 2123 }, { "epoch": 0.07123333612811268, "grad_norm": 0.4224337283509843, "learning_rate": 2e-05, "loss": 5.5659, "step": 2124 }, { "epoch": 0.07126687348033872, "grad_norm": 0.4210850654195944, "learning_rate": 2e-05, "loss": 5.5703, "step": 2125 }, { "epoch": 0.07130041083256478, "grad_norm": 0.4121705592796871, "learning_rate": 2e-05, "loss": 5.4697, "step": 2126 }, { "epoch": 0.07133394818479082, "grad_norm": 0.4176578369374197, "learning_rate": 2e-05, "loss": 5.4978, "step": 2127 }, { "epoch": 0.07136748553701686, "grad_norm": 0.3917501354213043, "learning_rate": 2e-05, "loss": 5.7188, "step": 2128 }, { "epoch": 0.0714010228892429, "grad_norm": 0.4294539403024907, "learning_rate": 2e-05, "loss": 5.6482, "step": 2129 }, { "epoch": 0.07143456024146894, "grad_norm": 0.4052404669212931, "learning_rate": 2e-05, "loss": 5.6231, "step": 2130 }, { "epoch": 0.07146809759369498, "grad_norm": 0.41778488350785015, "learning_rate": 2e-05, "loss": 5.7943, "step": 2131 }, { "epoch": 0.07150163494592102, "grad_norm": 0.3880656910579795, "learning_rate": 2e-05, "loss": 5.5638, "step": 2132 }, { "epoch": 0.07153517229814706, "grad_norm": 0.4590680571278188, "learning_rate": 2e-05, "loss": 5.6799, "step": 2133 }, { "epoch": 0.0715687096503731, "grad_norm": 0.4393766312817034, "learning_rate": 2e-05, "loss": 5.5454, "step": 2134 }, { "epoch": 0.07160224700259915, "grad_norm": 0.406780983064134, "learning_rate": 2e-05, "loss": 5.6864, "step": 2135 }, { "epoch": 0.07163578435482519, "grad_norm": 0.40065225936061516, "learning_rate": 2e-05, "loss": 5.5248, "step": 2136 }, { "epoch": 0.07166932170705123, "grad_norm": 0.3938831890434465, "learning_rate": 2e-05, "loss": 5.5374, "step": 2137 }, { "epoch": 0.07170285905927727, "grad_norm": 0.4212217335786586, "learning_rate": 2e-05, "loss": 5.408, "step": 2138 }, { "epoch": 0.07173639641150331, "grad_norm": 0.44394383422509687, "learning_rate": 2e-05, "loss": 5.4602, "step": 2139 }, { "epoch": 0.07176993376372935, "grad_norm": 0.4318935158270158, "learning_rate": 2e-05, "loss": 5.5332, "step": 2140 }, { "epoch": 0.07180347111595539, "grad_norm": 0.4349652128935446, "learning_rate": 2e-05, "loss": 5.5662, "step": 2141 }, { "epoch": 0.07183700846818143, "grad_norm": 0.45260815127630394, "learning_rate": 2e-05, "loss": 5.5891, "step": 2142 }, { "epoch": 0.07187054582040747, "grad_norm": 0.4166727705772603, "learning_rate": 2e-05, "loss": 5.6141, "step": 2143 }, { "epoch": 0.07190408317263353, "grad_norm": 0.4014613907657115, "learning_rate": 2e-05, "loss": 5.6364, "step": 2144 }, { "epoch": 0.07193762052485957, "grad_norm": 0.39789127329899493, "learning_rate": 2e-05, "loss": 5.4808, "step": 2145 }, { "epoch": 0.0719711578770856, "grad_norm": 0.4045670835509802, "learning_rate": 2e-05, "loss": 5.5907, "step": 2146 }, { "epoch": 0.07200469522931165, "grad_norm": 0.3925689641712914, "learning_rate": 2e-05, "loss": 5.4191, "step": 2147 }, { "epoch": 0.07203823258153769, "grad_norm": 0.3960838333848315, "learning_rate": 2e-05, "loss": 5.5294, "step": 2148 }, { "epoch": 0.07207176993376373, "grad_norm": 0.4239846188525824, "learning_rate": 2e-05, "loss": 5.5857, "step": 2149 }, { "epoch": 0.07210530728598977, "grad_norm": 0.4208111812144923, "learning_rate": 2e-05, "loss": 5.5803, "step": 2150 }, { "epoch": 0.07213884463821581, "grad_norm": 0.4277537947089496, "learning_rate": 2e-05, "loss": 5.4993, "step": 2151 }, { "epoch": 0.07217238199044186, "grad_norm": 0.4398990240895716, "learning_rate": 2e-05, "loss": 5.4166, "step": 2152 }, { "epoch": 0.0722059193426679, "grad_norm": 0.4189041582660968, "learning_rate": 2e-05, "loss": 5.5429, "step": 2153 }, { "epoch": 0.07223945669489394, "grad_norm": 0.4228046712596033, "learning_rate": 2e-05, "loss": 5.6554, "step": 2154 }, { "epoch": 0.07227299404711998, "grad_norm": 0.44708046094883824, "learning_rate": 2e-05, "loss": 5.4381, "step": 2155 }, { "epoch": 0.07230653139934602, "grad_norm": 0.404641213958227, "learning_rate": 2e-05, "loss": 5.6694, "step": 2156 }, { "epoch": 0.07234006875157206, "grad_norm": 0.4386468807454095, "learning_rate": 2e-05, "loss": 5.5189, "step": 2157 }, { "epoch": 0.0723736061037981, "grad_norm": 0.4246631312484799, "learning_rate": 2e-05, "loss": 5.5549, "step": 2158 }, { "epoch": 0.07240714345602414, "grad_norm": 0.41074482838285237, "learning_rate": 2e-05, "loss": 5.7209, "step": 2159 }, { "epoch": 0.07244068080825018, "grad_norm": 0.434972689230749, "learning_rate": 2e-05, "loss": 5.414, "step": 2160 }, { "epoch": 0.07247421816047624, "grad_norm": 0.4583961560149038, "learning_rate": 2e-05, "loss": 5.5104, "step": 2161 }, { "epoch": 0.07250775551270228, "grad_norm": 0.38258612992035196, "learning_rate": 2e-05, "loss": 5.4934, "step": 2162 }, { "epoch": 0.07254129286492832, "grad_norm": 0.39620803736368027, "learning_rate": 2e-05, "loss": 5.6012, "step": 2163 }, { "epoch": 0.07257483021715436, "grad_norm": 0.4606218060960072, "learning_rate": 2e-05, "loss": 5.632, "step": 2164 }, { "epoch": 0.0726083675693804, "grad_norm": 0.40314816164300127, "learning_rate": 2e-05, "loss": 5.8992, "step": 2165 }, { "epoch": 0.07264190492160644, "grad_norm": 0.41352153717030626, "learning_rate": 2e-05, "loss": 5.5596, "step": 2166 }, { "epoch": 0.07267544227383248, "grad_norm": 0.39876832812650215, "learning_rate": 2e-05, "loss": 5.5574, "step": 2167 }, { "epoch": 0.07270897962605852, "grad_norm": 0.39637417719416007, "learning_rate": 2e-05, "loss": 5.7619, "step": 2168 }, { "epoch": 0.07274251697828456, "grad_norm": 0.40717004624962616, "learning_rate": 2e-05, "loss": 5.3772, "step": 2169 }, { "epoch": 0.07277605433051061, "grad_norm": 0.45369188442487735, "learning_rate": 2e-05, "loss": 5.4373, "step": 2170 }, { "epoch": 0.07280959168273665, "grad_norm": 0.39552506663130904, "learning_rate": 2e-05, "loss": 5.6133, "step": 2171 }, { "epoch": 0.07284312903496269, "grad_norm": 0.44551094493059124, "learning_rate": 2e-05, "loss": 5.5835, "step": 2172 }, { "epoch": 0.07287666638718873, "grad_norm": 0.4153106521797292, "learning_rate": 2e-05, "loss": 5.3619, "step": 2173 }, { "epoch": 0.07291020373941477, "grad_norm": 0.41324908080367956, "learning_rate": 2e-05, "loss": 5.5606, "step": 2174 }, { "epoch": 0.07294374109164081, "grad_norm": 0.4104227072116456, "learning_rate": 2e-05, "loss": 5.4052, "step": 2175 }, { "epoch": 0.07297727844386685, "grad_norm": 0.43784972169289993, "learning_rate": 2e-05, "loss": 5.6226, "step": 2176 }, { "epoch": 0.07301081579609289, "grad_norm": 0.41339699136915753, "learning_rate": 2e-05, "loss": 5.8219, "step": 2177 }, { "epoch": 0.07304435314831895, "grad_norm": 0.4271378987186215, "learning_rate": 2e-05, "loss": 5.6712, "step": 2178 }, { "epoch": 0.07307789050054499, "grad_norm": 0.4378711492111858, "learning_rate": 2e-05, "loss": 5.7269, "step": 2179 }, { "epoch": 0.07311142785277103, "grad_norm": 0.4048184128262175, "learning_rate": 2e-05, "loss": 5.3974, "step": 2180 }, { "epoch": 0.07314496520499707, "grad_norm": 0.4152156905469408, "learning_rate": 2e-05, "loss": 5.6381, "step": 2181 }, { "epoch": 0.07317850255722311, "grad_norm": 0.45141305340567933, "learning_rate": 2e-05, "loss": 5.2393, "step": 2182 }, { "epoch": 0.07321203990944915, "grad_norm": 0.4021727639458626, "learning_rate": 2e-05, "loss": 5.6112, "step": 2183 }, { "epoch": 0.07324557726167519, "grad_norm": 0.4301620375345319, "learning_rate": 2e-05, "loss": 5.5109, "step": 2184 }, { "epoch": 0.07327911461390123, "grad_norm": 0.4601118949769579, "learning_rate": 2e-05, "loss": 5.4716, "step": 2185 }, { "epoch": 0.07331265196612727, "grad_norm": 0.4232626532887407, "learning_rate": 2e-05, "loss": 5.5497, "step": 2186 }, { "epoch": 0.07334618931835332, "grad_norm": 0.39044550667713523, "learning_rate": 2e-05, "loss": 5.5328, "step": 2187 }, { "epoch": 0.07337972667057936, "grad_norm": 0.49735768408462466, "learning_rate": 2e-05, "loss": 5.4585, "step": 2188 }, { "epoch": 0.0734132640228054, "grad_norm": 0.41763444050615645, "learning_rate": 2e-05, "loss": 5.5022, "step": 2189 }, { "epoch": 0.07344680137503144, "grad_norm": 0.40779102138835, "learning_rate": 2e-05, "loss": 5.7095, "step": 2190 }, { "epoch": 0.07348033872725748, "grad_norm": 0.43603935289775436, "learning_rate": 2e-05, "loss": 5.5584, "step": 2191 }, { "epoch": 0.07351387607948352, "grad_norm": 0.4623938475739784, "learning_rate": 2e-05, "loss": 5.5415, "step": 2192 }, { "epoch": 0.07354741343170956, "grad_norm": 0.42761453273296146, "learning_rate": 2e-05, "loss": 5.5963, "step": 2193 }, { "epoch": 0.0735809507839356, "grad_norm": 0.41628581479747756, "learning_rate": 2e-05, "loss": 5.6908, "step": 2194 }, { "epoch": 0.07361448813616164, "grad_norm": 0.42504199200096804, "learning_rate": 2e-05, "loss": 5.5388, "step": 2195 }, { "epoch": 0.0736480254883877, "grad_norm": 0.40860670965486623, "learning_rate": 2e-05, "loss": 5.6137, "step": 2196 }, { "epoch": 0.07368156284061374, "grad_norm": 0.3894616676660043, "learning_rate": 2e-05, "loss": 5.452, "step": 2197 }, { "epoch": 0.07371510019283978, "grad_norm": 0.4405840066066374, "learning_rate": 2e-05, "loss": 5.5208, "step": 2198 }, { "epoch": 0.07374863754506582, "grad_norm": 0.4222179058395402, "learning_rate": 2e-05, "loss": 5.6622, "step": 2199 }, { "epoch": 0.07378217489729186, "grad_norm": 0.4023487971224155, "learning_rate": 2e-05, "loss": 5.5935, "step": 2200 }, { "epoch": 0.0738157122495179, "grad_norm": 0.41870558765060906, "learning_rate": 2e-05, "loss": 5.5346, "step": 2201 }, { "epoch": 0.07384924960174394, "grad_norm": 0.4330266800796842, "learning_rate": 2e-05, "loss": 5.5053, "step": 2202 }, { "epoch": 0.07388278695396998, "grad_norm": 0.4276068133181351, "learning_rate": 2e-05, "loss": 5.5682, "step": 2203 }, { "epoch": 0.07391632430619603, "grad_norm": 0.4070371573199137, "learning_rate": 2e-05, "loss": 5.4097, "step": 2204 }, { "epoch": 0.07394986165842207, "grad_norm": 0.453092964150827, "learning_rate": 2e-05, "loss": 5.4923, "step": 2205 }, { "epoch": 0.07398339901064811, "grad_norm": 0.41610992729840823, "learning_rate": 2e-05, "loss": 5.6847, "step": 2206 }, { "epoch": 0.07401693636287415, "grad_norm": 0.4155737026866814, "learning_rate": 2e-05, "loss": 5.4292, "step": 2207 }, { "epoch": 0.07405047371510019, "grad_norm": 0.4466712474745684, "learning_rate": 2e-05, "loss": 5.6672, "step": 2208 }, { "epoch": 0.07408401106732623, "grad_norm": 0.4117401951042844, "learning_rate": 2e-05, "loss": 5.652, "step": 2209 }, { "epoch": 0.07411754841955227, "grad_norm": 0.4226435711223404, "learning_rate": 2e-05, "loss": 5.5725, "step": 2210 }, { "epoch": 0.07415108577177831, "grad_norm": 0.41479364618748343, "learning_rate": 2e-05, "loss": 5.4821, "step": 2211 }, { "epoch": 0.07418462312400435, "grad_norm": 0.4249342170473214, "learning_rate": 2e-05, "loss": 5.6093, "step": 2212 }, { "epoch": 0.07421816047623041, "grad_norm": 0.4388414649280282, "learning_rate": 2e-05, "loss": 5.5609, "step": 2213 }, { "epoch": 0.07425169782845645, "grad_norm": 0.40067482679018085, "learning_rate": 2e-05, "loss": 5.5466, "step": 2214 }, { "epoch": 0.07428523518068249, "grad_norm": 0.41084498445047685, "learning_rate": 2e-05, "loss": 5.4932, "step": 2215 }, { "epoch": 0.07431877253290853, "grad_norm": 0.41595993793212804, "learning_rate": 2e-05, "loss": 5.4271, "step": 2216 }, { "epoch": 0.07435230988513457, "grad_norm": 0.43435432985512235, "learning_rate": 2e-05, "loss": 5.2577, "step": 2217 }, { "epoch": 0.07438584723736061, "grad_norm": 0.4424600616918409, "learning_rate": 2e-05, "loss": 5.581, "step": 2218 }, { "epoch": 0.07441938458958665, "grad_norm": 0.4095649540915621, "learning_rate": 2e-05, "loss": 5.3612, "step": 2219 }, { "epoch": 0.07445292194181269, "grad_norm": 0.4192132220182597, "learning_rate": 2e-05, "loss": 5.3183, "step": 2220 }, { "epoch": 0.07448645929403873, "grad_norm": 0.4364468321631441, "learning_rate": 2e-05, "loss": 5.6648, "step": 2221 }, { "epoch": 0.07451999664626478, "grad_norm": 0.4797407776305945, "learning_rate": 2e-05, "loss": 5.3981, "step": 2222 }, { "epoch": 0.07455353399849082, "grad_norm": 0.41156886688570454, "learning_rate": 2e-05, "loss": 5.7478, "step": 2223 }, { "epoch": 0.07458707135071686, "grad_norm": 0.3964119545122109, "learning_rate": 2e-05, "loss": 5.5434, "step": 2224 }, { "epoch": 0.0746206087029429, "grad_norm": 0.4266043816466125, "learning_rate": 2e-05, "loss": 5.5607, "step": 2225 }, { "epoch": 0.07465414605516894, "grad_norm": 0.4765487177488737, "learning_rate": 2e-05, "loss": 5.5563, "step": 2226 }, { "epoch": 0.07468768340739498, "grad_norm": 0.4100835334476101, "learning_rate": 2e-05, "loss": 5.5703, "step": 2227 }, { "epoch": 0.07472122075962102, "grad_norm": 0.43019853385990636, "learning_rate": 2e-05, "loss": 5.4225, "step": 2228 }, { "epoch": 0.07475475811184706, "grad_norm": 0.4810349171166455, "learning_rate": 2e-05, "loss": 5.6713, "step": 2229 }, { "epoch": 0.07478829546407312, "grad_norm": 0.42106445276053844, "learning_rate": 2e-05, "loss": 5.7582, "step": 2230 }, { "epoch": 0.07482183281629916, "grad_norm": 0.4219688836732234, "learning_rate": 2e-05, "loss": 5.6949, "step": 2231 }, { "epoch": 0.0748553701685252, "grad_norm": 0.43757188992682805, "learning_rate": 2e-05, "loss": 5.627, "step": 2232 }, { "epoch": 0.07488890752075124, "grad_norm": 0.4061771108823896, "learning_rate": 2e-05, "loss": 5.5238, "step": 2233 }, { "epoch": 0.07492244487297728, "grad_norm": 0.42911880913284917, "learning_rate": 2e-05, "loss": 5.6134, "step": 2234 }, { "epoch": 0.07495598222520332, "grad_norm": 0.4412899419974271, "learning_rate": 2e-05, "loss": 5.6881, "step": 2235 }, { "epoch": 0.07498951957742936, "grad_norm": 0.44318509448714716, "learning_rate": 2e-05, "loss": 5.6786, "step": 2236 }, { "epoch": 0.0750230569296554, "grad_norm": 0.4057048301739163, "learning_rate": 2e-05, "loss": 5.616, "step": 2237 }, { "epoch": 0.07505659428188144, "grad_norm": 0.4595176649782218, "learning_rate": 2e-05, "loss": 5.9773, "step": 2238 }, { "epoch": 0.0750901316341075, "grad_norm": 0.47435781815533246, "learning_rate": 2e-05, "loss": 5.5776, "step": 2239 }, { "epoch": 0.07512366898633353, "grad_norm": 0.417454975962046, "learning_rate": 2e-05, "loss": 5.6642, "step": 2240 }, { "epoch": 0.07515720633855957, "grad_norm": 0.4198406300398764, "learning_rate": 2e-05, "loss": 5.642, "step": 2241 }, { "epoch": 0.07519074369078561, "grad_norm": 0.4623705495677565, "learning_rate": 2e-05, "loss": 5.588, "step": 2242 }, { "epoch": 0.07522428104301165, "grad_norm": 0.4116723151540014, "learning_rate": 2e-05, "loss": 5.6714, "step": 2243 }, { "epoch": 0.0752578183952377, "grad_norm": 0.41032296557762893, "learning_rate": 2e-05, "loss": 5.6105, "step": 2244 }, { "epoch": 0.07529135574746373, "grad_norm": 0.44023120684370765, "learning_rate": 2e-05, "loss": 5.537, "step": 2245 }, { "epoch": 0.07532489309968977, "grad_norm": 0.44594203160437595, "learning_rate": 2e-05, "loss": 5.5452, "step": 2246 }, { "epoch": 0.07535843045191581, "grad_norm": 0.42176426825336544, "learning_rate": 2e-05, "loss": 5.4137, "step": 2247 }, { "epoch": 0.07539196780414187, "grad_norm": 0.49184599495848147, "learning_rate": 2e-05, "loss": 5.6235, "step": 2248 }, { "epoch": 0.07542550515636791, "grad_norm": 0.46969161984812474, "learning_rate": 2e-05, "loss": 5.5419, "step": 2249 }, { "epoch": 0.07545904250859395, "grad_norm": 0.44041515060220776, "learning_rate": 2e-05, "loss": 5.7167, "step": 2250 }, { "epoch": 0.07549257986081999, "grad_norm": 0.4715259954803985, "learning_rate": 2e-05, "loss": 5.5031, "step": 2251 }, { "epoch": 0.07552611721304603, "grad_norm": 0.42576315766752204, "learning_rate": 2e-05, "loss": 5.6498, "step": 2252 }, { "epoch": 0.07555965456527207, "grad_norm": 0.4164747470515665, "learning_rate": 2e-05, "loss": 5.552, "step": 2253 }, { "epoch": 0.07559319191749811, "grad_norm": 0.41161683918669806, "learning_rate": 2e-05, "loss": 5.5258, "step": 2254 }, { "epoch": 0.07562672926972415, "grad_norm": 0.4348159865890194, "learning_rate": 2e-05, "loss": 5.6124, "step": 2255 }, { "epoch": 0.0756602666219502, "grad_norm": 0.42484858047457136, "learning_rate": 2e-05, "loss": 5.4877, "step": 2256 }, { "epoch": 0.07569380397417624, "grad_norm": 0.4061714313478767, "learning_rate": 2e-05, "loss": 5.6973, "step": 2257 }, { "epoch": 0.07572734132640228, "grad_norm": 0.4737513470792058, "learning_rate": 2e-05, "loss": 5.549, "step": 2258 }, { "epoch": 0.07576087867862832, "grad_norm": 0.4321471667722047, "learning_rate": 2e-05, "loss": 5.4729, "step": 2259 }, { "epoch": 0.07579441603085436, "grad_norm": 0.41251980342202665, "learning_rate": 2e-05, "loss": 5.4349, "step": 2260 }, { "epoch": 0.0758279533830804, "grad_norm": 0.45966817160756757, "learning_rate": 2e-05, "loss": 5.5594, "step": 2261 }, { "epoch": 0.07586149073530644, "grad_norm": 0.4372794355862907, "learning_rate": 2e-05, "loss": 5.6271, "step": 2262 }, { "epoch": 0.07589502808753248, "grad_norm": 0.45200352093388313, "learning_rate": 2e-05, "loss": 5.4972, "step": 2263 }, { "epoch": 0.07592856543975852, "grad_norm": 0.4425019369191849, "learning_rate": 2e-05, "loss": 5.5809, "step": 2264 }, { "epoch": 0.07596210279198458, "grad_norm": 0.4254450416906938, "learning_rate": 2e-05, "loss": 5.6599, "step": 2265 }, { "epoch": 0.07599564014421062, "grad_norm": 0.39329750287214404, "learning_rate": 2e-05, "loss": 5.3916, "step": 2266 }, { "epoch": 0.07602917749643666, "grad_norm": 0.4103828695429188, "learning_rate": 2e-05, "loss": 5.5116, "step": 2267 }, { "epoch": 0.0760627148486627, "grad_norm": 0.3985613582501987, "learning_rate": 2e-05, "loss": 5.6968, "step": 2268 }, { "epoch": 0.07609625220088874, "grad_norm": 0.4078253723923642, "learning_rate": 2e-05, "loss": 5.7565, "step": 2269 }, { "epoch": 0.07612978955311478, "grad_norm": 0.40976883900304334, "learning_rate": 2e-05, "loss": 5.3571, "step": 2270 }, { "epoch": 0.07616332690534082, "grad_norm": 0.3955160904639242, "learning_rate": 2e-05, "loss": 5.558, "step": 2271 }, { "epoch": 0.07619686425756686, "grad_norm": 0.40632282239617395, "learning_rate": 2e-05, "loss": 5.6932, "step": 2272 }, { "epoch": 0.0762304016097929, "grad_norm": 0.39645392335344515, "learning_rate": 2e-05, "loss": 5.643, "step": 2273 }, { "epoch": 0.07626393896201895, "grad_norm": 0.4516809071973249, "learning_rate": 2e-05, "loss": 5.4307, "step": 2274 }, { "epoch": 0.076297476314245, "grad_norm": 0.41444624957346654, "learning_rate": 2e-05, "loss": 5.5983, "step": 2275 }, { "epoch": 0.07633101366647103, "grad_norm": 0.40527658396324207, "learning_rate": 2e-05, "loss": 5.5452, "step": 2276 }, { "epoch": 0.07636455101869707, "grad_norm": 0.4209943316476018, "learning_rate": 2e-05, "loss": 5.4853, "step": 2277 }, { "epoch": 0.07639808837092311, "grad_norm": 0.4628423205700783, "learning_rate": 2e-05, "loss": 5.4591, "step": 2278 }, { "epoch": 0.07643162572314915, "grad_norm": 0.39406321675998457, "learning_rate": 2e-05, "loss": 5.6201, "step": 2279 }, { "epoch": 0.0764651630753752, "grad_norm": 0.41750005879615193, "learning_rate": 2e-05, "loss": 5.7178, "step": 2280 }, { "epoch": 0.07649870042760124, "grad_norm": 0.4219431385112224, "learning_rate": 2e-05, "loss": 5.7212, "step": 2281 }, { "epoch": 0.07653223777982729, "grad_norm": 0.4302416352608855, "learning_rate": 2e-05, "loss": 5.6082, "step": 2282 }, { "epoch": 0.07656577513205333, "grad_norm": 0.40818597646443655, "learning_rate": 2e-05, "loss": 5.5471, "step": 2283 }, { "epoch": 0.07659931248427937, "grad_norm": 0.45099870984918344, "learning_rate": 2e-05, "loss": 5.4666, "step": 2284 }, { "epoch": 0.07663284983650541, "grad_norm": 0.3892051637504154, "learning_rate": 2e-05, "loss": 5.6087, "step": 2285 }, { "epoch": 0.07666638718873145, "grad_norm": 0.4186358442567866, "learning_rate": 2e-05, "loss": 5.6885, "step": 2286 }, { "epoch": 0.07669992454095749, "grad_norm": 0.4143915970492047, "learning_rate": 2e-05, "loss": 5.6635, "step": 2287 }, { "epoch": 0.07673346189318353, "grad_norm": 0.4120483641703096, "learning_rate": 2e-05, "loss": 5.461, "step": 2288 }, { "epoch": 0.07676699924540957, "grad_norm": 0.44169611789507696, "learning_rate": 2e-05, "loss": 5.5551, "step": 2289 }, { "epoch": 0.07680053659763561, "grad_norm": 0.4249010263464383, "learning_rate": 2e-05, "loss": 5.7773, "step": 2290 }, { "epoch": 0.07683407394986166, "grad_norm": 0.41303679669908067, "learning_rate": 2e-05, "loss": 5.4772, "step": 2291 }, { "epoch": 0.0768676113020877, "grad_norm": 0.4422516760790734, "learning_rate": 2e-05, "loss": 5.7523, "step": 2292 }, { "epoch": 0.07690114865431374, "grad_norm": 0.4034246918961536, "learning_rate": 2e-05, "loss": 5.7225, "step": 2293 }, { "epoch": 0.07693468600653978, "grad_norm": 0.424421574279226, "learning_rate": 2e-05, "loss": 5.5788, "step": 2294 }, { "epoch": 0.07696822335876582, "grad_norm": 0.41119060497706034, "learning_rate": 2e-05, "loss": 5.5331, "step": 2295 }, { "epoch": 0.07700176071099187, "grad_norm": 0.4259162943005596, "learning_rate": 2e-05, "loss": 5.4879, "step": 2296 }, { "epoch": 0.0770352980632179, "grad_norm": 0.3995270633731702, "learning_rate": 2e-05, "loss": 5.3963, "step": 2297 }, { "epoch": 0.07706883541544395, "grad_norm": 0.40438103698668726, "learning_rate": 2e-05, "loss": 5.6605, "step": 2298 }, { "epoch": 0.07710237276767, "grad_norm": 0.47283027962072366, "learning_rate": 2e-05, "loss": 5.5503, "step": 2299 }, { "epoch": 0.07713591011989604, "grad_norm": 0.41843931397124895, "learning_rate": 2e-05, "loss": 5.6381, "step": 2300 }, { "epoch": 0.07716944747212208, "grad_norm": 0.4299129264711115, "learning_rate": 2e-05, "loss": 5.6876, "step": 2301 }, { "epoch": 0.07720298482434812, "grad_norm": 0.40928887227877836, "learning_rate": 2e-05, "loss": 5.7315, "step": 2302 }, { "epoch": 0.07723652217657416, "grad_norm": 0.42884187679872887, "learning_rate": 2e-05, "loss": 5.4988, "step": 2303 }, { "epoch": 0.0772700595288002, "grad_norm": 0.40884662191417503, "learning_rate": 2e-05, "loss": 5.5219, "step": 2304 }, { "epoch": 0.07730359688102624, "grad_norm": 0.4124321283430422, "learning_rate": 2e-05, "loss": 5.5192, "step": 2305 }, { "epoch": 0.07733713423325228, "grad_norm": 0.40796456985613394, "learning_rate": 2e-05, "loss": 5.4471, "step": 2306 }, { "epoch": 0.07737067158547832, "grad_norm": 0.436107222424359, "learning_rate": 2e-05, "loss": 5.2525, "step": 2307 }, { "epoch": 0.07740420893770437, "grad_norm": 0.42031416928201887, "learning_rate": 2e-05, "loss": 5.6817, "step": 2308 }, { "epoch": 0.07743774628993041, "grad_norm": 0.4694193605944691, "learning_rate": 2e-05, "loss": 5.4577, "step": 2309 }, { "epoch": 0.07747128364215645, "grad_norm": 0.459327670503227, "learning_rate": 2e-05, "loss": 5.5533, "step": 2310 }, { "epoch": 0.0775048209943825, "grad_norm": 0.4119371464661352, "learning_rate": 2e-05, "loss": 5.7213, "step": 2311 }, { "epoch": 0.07753835834660854, "grad_norm": 0.43910321037485806, "learning_rate": 2e-05, "loss": 5.8645, "step": 2312 }, { "epoch": 0.07757189569883458, "grad_norm": 0.475890003710246, "learning_rate": 2e-05, "loss": 5.7036, "step": 2313 }, { "epoch": 0.07760543305106062, "grad_norm": 0.445965083012731, "learning_rate": 2e-05, "loss": 5.833, "step": 2314 }, { "epoch": 0.07763897040328666, "grad_norm": 0.42115300196229066, "learning_rate": 2e-05, "loss": 5.5483, "step": 2315 }, { "epoch": 0.0776725077555127, "grad_norm": 0.4309288655919566, "learning_rate": 2e-05, "loss": 5.4669, "step": 2316 }, { "epoch": 0.07770604510773875, "grad_norm": 0.42733220324036236, "learning_rate": 2e-05, "loss": 5.6609, "step": 2317 }, { "epoch": 0.07773958245996479, "grad_norm": 0.4190398059624189, "learning_rate": 2e-05, "loss": 5.5978, "step": 2318 }, { "epoch": 0.07777311981219083, "grad_norm": 0.4023230641754978, "learning_rate": 2e-05, "loss": 5.7014, "step": 2319 }, { "epoch": 0.07780665716441687, "grad_norm": 0.416490126081964, "learning_rate": 2e-05, "loss": 5.532, "step": 2320 }, { "epoch": 0.07784019451664291, "grad_norm": 0.4016993869722856, "learning_rate": 2e-05, "loss": 5.7168, "step": 2321 }, { "epoch": 0.07787373186886895, "grad_norm": 0.4044846708791753, "learning_rate": 2e-05, "loss": 5.388, "step": 2322 }, { "epoch": 0.07790726922109499, "grad_norm": 0.45361605498758867, "learning_rate": 2e-05, "loss": 5.5214, "step": 2323 }, { "epoch": 0.07794080657332103, "grad_norm": 0.41779297700295476, "learning_rate": 2e-05, "loss": 5.7178, "step": 2324 }, { "epoch": 0.07797434392554708, "grad_norm": 0.4055774601294754, "learning_rate": 2e-05, "loss": 5.4946, "step": 2325 }, { "epoch": 0.07800788127777313, "grad_norm": 0.38803340601589525, "learning_rate": 2e-05, "loss": 5.7183, "step": 2326 }, { "epoch": 0.07804141862999917, "grad_norm": 0.40010470762623457, "learning_rate": 2e-05, "loss": 5.5499, "step": 2327 }, { "epoch": 0.0780749559822252, "grad_norm": 0.47276625958876695, "learning_rate": 2e-05, "loss": 5.4209, "step": 2328 }, { "epoch": 0.07810849333445125, "grad_norm": 0.4005596032041073, "learning_rate": 2e-05, "loss": 5.5293, "step": 2329 }, { "epoch": 0.07814203068667729, "grad_norm": 0.42346422019150864, "learning_rate": 2e-05, "loss": 5.5647, "step": 2330 }, { "epoch": 0.07817556803890333, "grad_norm": 0.41208708724365223, "learning_rate": 2e-05, "loss": 5.6925, "step": 2331 }, { "epoch": 0.07820910539112937, "grad_norm": 0.41949922833628994, "learning_rate": 2e-05, "loss": 5.5695, "step": 2332 }, { "epoch": 0.0782426427433554, "grad_norm": 0.4196968753117631, "learning_rate": 2e-05, "loss": 5.5711, "step": 2333 }, { "epoch": 0.07827618009558146, "grad_norm": 0.39804864837662246, "learning_rate": 2e-05, "loss": 5.4923, "step": 2334 }, { "epoch": 0.0783097174478075, "grad_norm": 0.4283596473129377, "learning_rate": 2e-05, "loss": 5.4845, "step": 2335 }, { "epoch": 0.07834325480003354, "grad_norm": 0.39388822017055997, "learning_rate": 2e-05, "loss": 5.4013, "step": 2336 }, { "epoch": 0.07837679215225958, "grad_norm": 0.4304439164495861, "learning_rate": 2e-05, "loss": 5.5132, "step": 2337 }, { "epoch": 0.07841032950448562, "grad_norm": 0.40427495470013736, "learning_rate": 2e-05, "loss": 5.7326, "step": 2338 }, { "epoch": 0.07844386685671166, "grad_norm": 0.39957931355000487, "learning_rate": 2e-05, "loss": 5.707, "step": 2339 }, { "epoch": 0.0784774042089377, "grad_norm": 0.41932309483208385, "learning_rate": 2e-05, "loss": 5.5358, "step": 2340 }, { "epoch": 0.07851094156116374, "grad_norm": 0.39588319580346093, "learning_rate": 2e-05, "loss": 5.5173, "step": 2341 }, { "epoch": 0.07854447891338978, "grad_norm": 0.40220196706134426, "learning_rate": 2e-05, "loss": 5.4805, "step": 2342 }, { "epoch": 0.07857801626561584, "grad_norm": 0.4189380864630713, "learning_rate": 2e-05, "loss": 5.6225, "step": 2343 }, { "epoch": 0.07861155361784188, "grad_norm": 0.4322217376196091, "learning_rate": 2e-05, "loss": 5.6626, "step": 2344 }, { "epoch": 0.07864509097006792, "grad_norm": 0.4214297416142506, "learning_rate": 2e-05, "loss": 5.5764, "step": 2345 }, { "epoch": 0.07867862832229396, "grad_norm": 0.41325993949362994, "learning_rate": 2e-05, "loss": 5.3871, "step": 2346 }, { "epoch": 0.07871216567452, "grad_norm": 0.40126016132161535, "learning_rate": 2e-05, "loss": 5.4753, "step": 2347 }, { "epoch": 0.07874570302674604, "grad_norm": 0.40101268954433517, "learning_rate": 2e-05, "loss": 5.6487, "step": 2348 }, { "epoch": 0.07877924037897208, "grad_norm": 0.425889444014661, "learning_rate": 2e-05, "loss": 5.7883, "step": 2349 }, { "epoch": 0.07881277773119812, "grad_norm": 0.4262712072324991, "learning_rate": 2e-05, "loss": 5.6727, "step": 2350 }, { "epoch": 0.07884631508342417, "grad_norm": 0.4025772787414915, "learning_rate": 2e-05, "loss": 5.491, "step": 2351 }, { "epoch": 0.07887985243565021, "grad_norm": 0.4065769317631239, "learning_rate": 2e-05, "loss": 5.586, "step": 2352 }, { "epoch": 0.07891338978787625, "grad_norm": 0.4022497523678938, "learning_rate": 2e-05, "loss": 5.6169, "step": 2353 }, { "epoch": 0.07894692714010229, "grad_norm": 0.40791114353745284, "learning_rate": 2e-05, "loss": 5.4957, "step": 2354 }, { "epoch": 0.07898046449232833, "grad_norm": 0.4081260783048824, "learning_rate": 2e-05, "loss": 5.6008, "step": 2355 }, { "epoch": 0.07901400184455437, "grad_norm": 0.41129936607809575, "learning_rate": 2e-05, "loss": 5.4977, "step": 2356 }, { "epoch": 0.07904753919678041, "grad_norm": 0.4172356109297457, "learning_rate": 2e-05, "loss": 5.6868, "step": 2357 }, { "epoch": 0.07908107654900645, "grad_norm": 0.4004859170277205, "learning_rate": 2e-05, "loss": 5.4534, "step": 2358 }, { "epoch": 0.07911461390123249, "grad_norm": 0.44571063649409765, "learning_rate": 2e-05, "loss": 5.8628, "step": 2359 }, { "epoch": 0.07914815125345855, "grad_norm": 0.47086167049470373, "learning_rate": 2e-05, "loss": 5.5011, "step": 2360 }, { "epoch": 0.07918168860568459, "grad_norm": 0.4068067023635, "learning_rate": 2e-05, "loss": 5.5504, "step": 2361 }, { "epoch": 0.07921522595791063, "grad_norm": 0.43231257742234014, "learning_rate": 2e-05, "loss": 5.3788, "step": 2362 }, { "epoch": 0.07924876331013667, "grad_norm": 0.46504499700279234, "learning_rate": 2e-05, "loss": 5.4779, "step": 2363 }, { "epoch": 0.0792823006623627, "grad_norm": 0.41968550187135334, "learning_rate": 2e-05, "loss": 5.5707, "step": 2364 }, { "epoch": 0.07931583801458875, "grad_norm": 0.4169004008043454, "learning_rate": 2e-05, "loss": 5.4604, "step": 2365 }, { "epoch": 0.07934937536681479, "grad_norm": 0.4363500347760353, "learning_rate": 2e-05, "loss": 5.6702, "step": 2366 }, { "epoch": 0.07938291271904083, "grad_norm": 0.4479988226459135, "learning_rate": 2e-05, "loss": 5.525, "step": 2367 }, { "epoch": 0.07941645007126687, "grad_norm": 0.42097623416221863, "learning_rate": 2e-05, "loss": 5.5072, "step": 2368 }, { "epoch": 0.07944998742349292, "grad_norm": 0.42760860407761186, "learning_rate": 2e-05, "loss": 5.6045, "step": 2369 }, { "epoch": 0.07948352477571896, "grad_norm": 0.5456727919087091, "learning_rate": 2e-05, "loss": 5.6336, "step": 2370 }, { "epoch": 0.079517062127945, "grad_norm": 0.4251194810819205, "learning_rate": 2e-05, "loss": 5.7198, "step": 2371 }, { "epoch": 0.07955059948017104, "grad_norm": 0.45667117386342304, "learning_rate": 2e-05, "loss": 5.4511, "step": 2372 }, { "epoch": 0.07958413683239708, "grad_norm": 0.4986384526277875, "learning_rate": 2e-05, "loss": 5.4478, "step": 2373 }, { "epoch": 0.07961767418462312, "grad_norm": 0.43231353944406137, "learning_rate": 2e-05, "loss": 5.6274, "step": 2374 }, { "epoch": 0.07965121153684916, "grad_norm": 0.4082770161693387, "learning_rate": 2e-05, "loss": 5.6537, "step": 2375 }, { "epoch": 0.0796847488890752, "grad_norm": 0.46740443588544395, "learning_rate": 2e-05, "loss": 5.6397, "step": 2376 }, { "epoch": 0.07971828624130126, "grad_norm": 0.5020906228255194, "learning_rate": 2e-05, "loss": 5.5612, "step": 2377 }, { "epoch": 0.0797518235935273, "grad_norm": 0.40436786863416885, "learning_rate": 2e-05, "loss": 5.5203, "step": 2378 }, { "epoch": 0.07978536094575334, "grad_norm": 0.47507871542073005, "learning_rate": 2e-05, "loss": 5.4712, "step": 2379 }, { "epoch": 0.07981889829797938, "grad_norm": 0.4895343571017132, "learning_rate": 2e-05, "loss": 5.5399, "step": 2380 }, { "epoch": 0.07985243565020542, "grad_norm": 0.42344661581456833, "learning_rate": 2e-05, "loss": 5.687, "step": 2381 }, { "epoch": 0.07988597300243146, "grad_norm": 0.4395264462708443, "learning_rate": 2e-05, "loss": 5.5673, "step": 2382 }, { "epoch": 0.0799195103546575, "grad_norm": 0.4321241203692541, "learning_rate": 2e-05, "loss": 5.4941, "step": 2383 }, { "epoch": 0.07995304770688354, "grad_norm": 0.42222467392192786, "learning_rate": 2e-05, "loss": 5.5129, "step": 2384 }, { "epoch": 0.07998658505910958, "grad_norm": 0.430076114026804, "learning_rate": 2e-05, "loss": 5.5585, "step": 2385 }, { "epoch": 0.08002012241133563, "grad_norm": 0.4195894433618068, "learning_rate": 2e-05, "loss": 5.5967, "step": 2386 }, { "epoch": 0.08005365976356167, "grad_norm": 0.4122795235518905, "learning_rate": 2e-05, "loss": 5.6874, "step": 2387 }, { "epoch": 0.08008719711578771, "grad_norm": 0.42812225282132277, "learning_rate": 2e-05, "loss": 5.6591, "step": 2388 }, { "epoch": 0.08012073446801375, "grad_norm": 0.42118126406285855, "learning_rate": 2e-05, "loss": 5.7283, "step": 2389 }, { "epoch": 0.08015427182023979, "grad_norm": 0.48535706288825553, "learning_rate": 2e-05, "loss": 5.5785, "step": 2390 }, { "epoch": 0.08018780917246583, "grad_norm": 0.4263473713146213, "learning_rate": 2e-05, "loss": 5.5493, "step": 2391 }, { "epoch": 0.08022134652469187, "grad_norm": 0.41364760567089937, "learning_rate": 2e-05, "loss": 5.2769, "step": 2392 }, { "epoch": 0.08025488387691791, "grad_norm": 0.4316620567930647, "learning_rate": 2e-05, "loss": 5.6264, "step": 2393 }, { "epoch": 0.08028842122914395, "grad_norm": 0.41496161573471646, "learning_rate": 2e-05, "loss": 5.7196, "step": 2394 }, { "epoch": 0.08032195858137, "grad_norm": 0.4274708477618126, "learning_rate": 2e-05, "loss": 5.2407, "step": 2395 }, { "epoch": 0.08035549593359605, "grad_norm": 0.41206483232781155, "learning_rate": 2e-05, "loss": 5.7135, "step": 2396 }, { "epoch": 0.08038903328582209, "grad_norm": 0.4032516548718847, "learning_rate": 2e-05, "loss": 5.4717, "step": 2397 }, { "epoch": 0.08042257063804813, "grad_norm": 0.4135712539340665, "learning_rate": 2e-05, "loss": 5.5396, "step": 2398 }, { "epoch": 0.08045610799027417, "grad_norm": 0.4282410553554195, "learning_rate": 2e-05, "loss": 5.4753, "step": 2399 }, { "epoch": 0.08048964534250021, "grad_norm": 0.43885028957512634, "learning_rate": 2e-05, "loss": 5.4953, "step": 2400 }, { "epoch": 0.08052318269472625, "grad_norm": 0.43402943179135234, "learning_rate": 2e-05, "loss": 5.4525, "step": 2401 }, { "epoch": 0.08055672004695229, "grad_norm": 0.4087459216489701, "learning_rate": 2e-05, "loss": 5.5043, "step": 2402 }, { "epoch": 0.08059025739917834, "grad_norm": 0.40562190009887944, "learning_rate": 2e-05, "loss": 5.5114, "step": 2403 }, { "epoch": 0.08062379475140438, "grad_norm": 0.42996599682552067, "learning_rate": 2e-05, "loss": 5.6726, "step": 2404 }, { "epoch": 0.08065733210363042, "grad_norm": 0.4386846308367689, "learning_rate": 2e-05, "loss": 5.6447, "step": 2405 }, { "epoch": 0.08069086945585646, "grad_norm": 0.4201240912495839, "learning_rate": 2e-05, "loss": 5.7019, "step": 2406 }, { "epoch": 0.0807244068080825, "grad_norm": 0.4340112467694865, "learning_rate": 2e-05, "loss": 5.5129, "step": 2407 }, { "epoch": 0.08075794416030854, "grad_norm": 0.4133454557581971, "learning_rate": 2e-05, "loss": 5.5891, "step": 2408 }, { "epoch": 0.08079148151253458, "grad_norm": 0.39762326317126484, "learning_rate": 2e-05, "loss": 5.5829, "step": 2409 }, { "epoch": 0.08082501886476062, "grad_norm": 0.4299947768821509, "learning_rate": 2e-05, "loss": 5.5767, "step": 2410 }, { "epoch": 0.08085855621698666, "grad_norm": 0.43584939380655924, "learning_rate": 2e-05, "loss": 5.6842, "step": 2411 }, { "epoch": 0.08089209356921272, "grad_norm": 0.4596953708443452, "learning_rate": 2e-05, "loss": 5.5137, "step": 2412 }, { "epoch": 0.08092563092143876, "grad_norm": 0.4331912888511902, "learning_rate": 2e-05, "loss": 5.5962, "step": 2413 }, { "epoch": 0.0809591682736648, "grad_norm": 0.4256201632708061, "learning_rate": 2e-05, "loss": 5.5816, "step": 2414 }, { "epoch": 0.08099270562589084, "grad_norm": 0.45018585516540194, "learning_rate": 2e-05, "loss": 5.5034, "step": 2415 }, { "epoch": 0.08102624297811688, "grad_norm": 0.4320443626783137, "learning_rate": 2e-05, "loss": 5.7511, "step": 2416 }, { "epoch": 0.08105978033034292, "grad_norm": 0.4291784898247134, "learning_rate": 2e-05, "loss": 5.68, "step": 2417 }, { "epoch": 0.08109331768256896, "grad_norm": 0.43806619751472825, "learning_rate": 2e-05, "loss": 5.4592, "step": 2418 }, { "epoch": 0.081126855034795, "grad_norm": 0.4283604121659792, "learning_rate": 2e-05, "loss": 5.5382, "step": 2419 }, { "epoch": 0.08116039238702104, "grad_norm": 0.4068421606999187, "learning_rate": 2e-05, "loss": 5.8042, "step": 2420 }, { "epoch": 0.08119392973924709, "grad_norm": 0.44783116996579775, "learning_rate": 2e-05, "loss": 5.6755, "step": 2421 }, { "epoch": 0.08122746709147313, "grad_norm": 0.43772987552836934, "learning_rate": 2e-05, "loss": 5.5022, "step": 2422 }, { "epoch": 0.08126100444369917, "grad_norm": 0.4138206301002863, "learning_rate": 2e-05, "loss": 5.5709, "step": 2423 }, { "epoch": 0.08129454179592521, "grad_norm": 0.4176186761984329, "learning_rate": 2e-05, "loss": 5.4357, "step": 2424 }, { "epoch": 0.08132807914815125, "grad_norm": 0.4002323269085116, "learning_rate": 2e-05, "loss": 5.3508, "step": 2425 }, { "epoch": 0.08136161650037729, "grad_norm": 0.41314662698358967, "learning_rate": 2e-05, "loss": 5.5837, "step": 2426 }, { "epoch": 0.08139515385260333, "grad_norm": 0.4132862765004134, "learning_rate": 2e-05, "loss": 5.2124, "step": 2427 }, { "epoch": 0.08142869120482937, "grad_norm": 0.4299347616907789, "learning_rate": 2e-05, "loss": 5.5807, "step": 2428 }, { "epoch": 0.08146222855705543, "grad_norm": 0.40271868593130933, "learning_rate": 2e-05, "loss": 5.5075, "step": 2429 }, { "epoch": 0.08149576590928147, "grad_norm": 0.4044376795695435, "learning_rate": 2e-05, "loss": 5.6013, "step": 2430 }, { "epoch": 0.08152930326150751, "grad_norm": 0.40524550619668287, "learning_rate": 2e-05, "loss": 5.5017, "step": 2431 }, { "epoch": 0.08156284061373355, "grad_norm": 0.4070635016997474, "learning_rate": 2e-05, "loss": 5.3967, "step": 2432 }, { "epoch": 0.08159637796595959, "grad_norm": 0.41990183413027843, "learning_rate": 2e-05, "loss": 5.7526, "step": 2433 }, { "epoch": 0.08162991531818563, "grad_norm": 0.40595336861352865, "learning_rate": 2e-05, "loss": 5.5696, "step": 2434 }, { "epoch": 0.08166345267041167, "grad_norm": 0.3925455035425505, "learning_rate": 2e-05, "loss": 5.6308, "step": 2435 }, { "epoch": 0.08169699002263771, "grad_norm": 0.38932841756031683, "learning_rate": 2e-05, "loss": 5.3483, "step": 2436 }, { "epoch": 0.08173052737486375, "grad_norm": 0.39466900642645164, "learning_rate": 2e-05, "loss": 5.3963, "step": 2437 }, { "epoch": 0.0817640647270898, "grad_norm": 0.4162983761573419, "learning_rate": 2e-05, "loss": 5.57, "step": 2438 }, { "epoch": 0.08179760207931584, "grad_norm": 0.4158193759716838, "learning_rate": 2e-05, "loss": 5.5342, "step": 2439 }, { "epoch": 0.08183113943154188, "grad_norm": 0.4049402029918235, "learning_rate": 2e-05, "loss": 5.5101, "step": 2440 }, { "epoch": 0.08186467678376792, "grad_norm": 0.40184198256040415, "learning_rate": 2e-05, "loss": 5.507, "step": 2441 }, { "epoch": 0.08189821413599396, "grad_norm": 0.41862368321511484, "learning_rate": 2e-05, "loss": 5.5756, "step": 2442 }, { "epoch": 0.08193175148822, "grad_norm": 0.4072652576301845, "learning_rate": 2e-05, "loss": 5.6329, "step": 2443 }, { "epoch": 0.08196528884044604, "grad_norm": 0.3860415122854753, "learning_rate": 2e-05, "loss": 5.7781, "step": 2444 }, { "epoch": 0.08199882619267208, "grad_norm": 0.42489022445930563, "learning_rate": 2e-05, "loss": 5.7487, "step": 2445 }, { "epoch": 0.08203236354489812, "grad_norm": 0.4154115361910815, "learning_rate": 2e-05, "loss": 5.4711, "step": 2446 }, { "epoch": 0.08206590089712418, "grad_norm": 0.46275264679804445, "learning_rate": 2e-05, "loss": 5.5591, "step": 2447 }, { "epoch": 0.08209943824935022, "grad_norm": 0.4102770735060549, "learning_rate": 2e-05, "loss": 5.3843, "step": 2448 }, { "epoch": 0.08213297560157626, "grad_norm": 0.45021665393043414, "learning_rate": 2e-05, "loss": 5.5046, "step": 2449 }, { "epoch": 0.0821665129538023, "grad_norm": 0.4107235294627388, "learning_rate": 2e-05, "loss": 5.5932, "step": 2450 }, { "epoch": 0.08220005030602834, "grad_norm": 0.42082316370615747, "learning_rate": 2e-05, "loss": 5.6561, "step": 2451 }, { "epoch": 0.08223358765825438, "grad_norm": 0.42311303636983916, "learning_rate": 2e-05, "loss": 5.7825, "step": 2452 }, { "epoch": 0.08226712501048042, "grad_norm": 0.4915338643577724, "learning_rate": 2e-05, "loss": 5.4888, "step": 2453 }, { "epoch": 0.08230066236270646, "grad_norm": 0.409454934142751, "learning_rate": 2e-05, "loss": 5.6369, "step": 2454 }, { "epoch": 0.08233419971493251, "grad_norm": 0.43208980040869527, "learning_rate": 2e-05, "loss": 5.4, "step": 2455 }, { "epoch": 0.08236773706715855, "grad_norm": 0.4347240275041126, "learning_rate": 2e-05, "loss": 5.5486, "step": 2456 }, { "epoch": 0.0824012744193846, "grad_norm": 0.4184503862660213, "learning_rate": 2e-05, "loss": 5.4594, "step": 2457 }, { "epoch": 0.08243481177161063, "grad_norm": 0.4462061681875482, "learning_rate": 2e-05, "loss": 5.5682, "step": 2458 }, { "epoch": 0.08246834912383667, "grad_norm": 0.44696322477164985, "learning_rate": 2e-05, "loss": 5.755, "step": 2459 }, { "epoch": 0.08250188647606271, "grad_norm": 0.47656630316050025, "learning_rate": 2e-05, "loss": 5.3604, "step": 2460 }, { "epoch": 0.08253542382828875, "grad_norm": 0.4178709839249146, "learning_rate": 2e-05, "loss": 5.3375, "step": 2461 }, { "epoch": 0.0825689611805148, "grad_norm": 0.4275222240193147, "learning_rate": 2e-05, "loss": 5.5157, "step": 2462 }, { "epoch": 0.08260249853274083, "grad_norm": 0.46751101332303996, "learning_rate": 2e-05, "loss": 5.4847, "step": 2463 }, { "epoch": 0.08263603588496689, "grad_norm": 0.4345168745200726, "learning_rate": 2e-05, "loss": 5.691, "step": 2464 }, { "epoch": 0.08266957323719293, "grad_norm": 0.4478559613430801, "learning_rate": 2e-05, "loss": 5.3461, "step": 2465 }, { "epoch": 0.08270311058941897, "grad_norm": 0.42206778336135253, "learning_rate": 2e-05, "loss": 5.4134, "step": 2466 }, { "epoch": 0.08273664794164501, "grad_norm": 0.42559292463002707, "learning_rate": 2e-05, "loss": 5.6006, "step": 2467 }, { "epoch": 0.08277018529387105, "grad_norm": 0.45381692251937233, "learning_rate": 2e-05, "loss": 5.5095, "step": 2468 }, { "epoch": 0.08280372264609709, "grad_norm": 0.43292767268032495, "learning_rate": 2e-05, "loss": 5.5281, "step": 2469 }, { "epoch": 0.08283725999832313, "grad_norm": 0.4259308180002335, "learning_rate": 2e-05, "loss": 5.8089, "step": 2470 }, { "epoch": 0.08287079735054917, "grad_norm": 0.46534090073969425, "learning_rate": 2e-05, "loss": 5.3242, "step": 2471 }, { "epoch": 0.08290433470277521, "grad_norm": 0.4312358801557587, "learning_rate": 2e-05, "loss": 5.6376, "step": 2472 }, { "epoch": 0.08293787205500126, "grad_norm": 0.4105509362330573, "learning_rate": 2e-05, "loss": 5.4638, "step": 2473 }, { "epoch": 0.0829714094072273, "grad_norm": 0.3963887273169291, "learning_rate": 2e-05, "loss": 5.7457, "step": 2474 }, { "epoch": 0.08300494675945334, "grad_norm": 0.4580000042278343, "learning_rate": 2e-05, "loss": 5.7479, "step": 2475 }, { "epoch": 0.08303848411167938, "grad_norm": 0.3964785095991979, "learning_rate": 2e-05, "loss": 5.4502, "step": 2476 }, { "epoch": 0.08307202146390542, "grad_norm": 0.41898257205922906, "learning_rate": 2e-05, "loss": 5.5001, "step": 2477 }, { "epoch": 0.08310555881613146, "grad_norm": 0.4421877137963112, "learning_rate": 2e-05, "loss": 5.6034, "step": 2478 }, { "epoch": 0.0831390961683575, "grad_norm": 0.4467630654619899, "learning_rate": 2e-05, "loss": 5.4854, "step": 2479 }, { "epoch": 0.08317263352058354, "grad_norm": 0.4533432980771384, "learning_rate": 2e-05, "loss": 5.5674, "step": 2480 }, { "epoch": 0.0832061708728096, "grad_norm": 0.4650030164453931, "learning_rate": 2e-05, "loss": 5.5821, "step": 2481 }, { "epoch": 0.08323970822503564, "grad_norm": 0.4768007867095064, "learning_rate": 2e-05, "loss": 5.636, "step": 2482 }, { "epoch": 0.08327324557726168, "grad_norm": 0.45023084875334746, "learning_rate": 2e-05, "loss": 5.6696, "step": 2483 }, { "epoch": 0.08330678292948772, "grad_norm": 0.4302938986463003, "learning_rate": 2e-05, "loss": 5.5492, "step": 2484 }, { "epoch": 0.08334032028171376, "grad_norm": 0.4339731016469339, "learning_rate": 2e-05, "loss": 5.7211, "step": 2485 }, { "epoch": 0.0833738576339398, "grad_norm": 0.44101970583649824, "learning_rate": 2e-05, "loss": 5.5005, "step": 2486 }, { "epoch": 0.08340739498616584, "grad_norm": 0.4614047966949182, "learning_rate": 2e-05, "loss": 5.7385, "step": 2487 }, { "epoch": 0.08344093233839188, "grad_norm": 0.42060249155958074, "learning_rate": 2e-05, "loss": 5.5512, "step": 2488 }, { "epoch": 0.08347446969061792, "grad_norm": 0.45223312574207525, "learning_rate": 2e-05, "loss": 5.5787, "step": 2489 }, { "epoch": 0.08350800704284397, "grad_norm": 0.40358817140515607, "learning_rate": 2e-05, "loss": 5.8305, "step": 2490 }, { "epoch": 0.08354154439507001, "grad_norm": 0.4300693574893429, "learning_rate": 2e-05, "loss": 5.6198, "step": 2491 }, { "epoch": 0.08357508174729605, "grad_norm": 0.40836638033333694, "learning_rate": 2e-05, "loss": 5.7746, "step": 2492 }, { "epoch": 0.0836086190995221, "grad_norm": 0.4609514757470691, "learning_rate": 2e-05, "loss": 5.3699, "step": 2493 }, { "epoch": 0.08364215645174813, "grad_norm": 0.4464405297441713, "learning_rate": 2e-05, "loss": 5.3982, "step": 2494 }, { "epoch": 0.08367569380397417, "grad_norm": 0.39234638936581034, "learning_rate": 2e-05, "loss": 5.6494, "step": 2495 }, { "epoch": 0.08370923115620021, "grad_norm": 0.44987673475388457, "learning_rate": 2e-05, "loss": 5.729, "step": 2496 }, { "epoch": 0.08374276850842625, "grad_norm": 0.4571337590326227, "learning_rate": 2e-05, "loss": 5.6811, "step": 2497 }, { "epoch": 0.0837763058606523, "grad_norm": 0.44084532551402017, "learning_rate": 2e-05, "loss": 5.5707, "step": 2498 }, { "epoch": 0.08380984321287835, "grad_norm": 0.3980438222521777, "learning_rate": 2e-05, "loss": 5.463, "step": 2499 }, { "epoch": 0.08384338056510439, "grad_norm": 0.44101289276459893, "learning_rate": 2e-05, "loss": 5.5333, "step": 2500 }, { "epoch": 0.08387691791733043, "grad_norm": 0.44026135120869103, "learning_rate": 2e-05, "loss": 5.61, "step": 2501 }, { "epoch": 0.08391045526955647, "grad_norm": 0.38957088189458006, "learning_rate": 2e-05, "loss": 5.6315, "step": 2502 }, { "epoch": 0.08394399262178251, "grad_norm": 0.43379461174172557, "learning_rate": 2e-05, "loss": 5.5373, "step": 2503 }, { "epoch": 0.08397752997400855, "grad_norm": 0.41790742733368624, "learning_rate": 2e-05, "loss": 5.6746, "step": 2504 }, { "epoch": 0.08401106732623459, "grad_norm": 0.44703577599448807, "learning_rate": 2e-05, "loss": 5.5778, "step": 2505 }, { "epoch": 0.08404460467846063, "grad_norm": 0.43061164803234103, "learning_rate": 2e-05, "loss": 5.6768, "step": 2506 }, { "epoch": 0.08407814203068668, "grad_norm": 0.4186944604328967, "learning_rate": 2e-05, "loss": 5.734, "step": 2507 }, { "epoch": 0.08411167938291272, "grad_norm": 0.4270845407877142, "learning_rate": 2e-05, "loss": 5.4683, "step": 2508 }, { "epoch": 0.08414521673513876, "grad_norm": 0.3992715086503719, "learning_rate": 2e-05, "loss": 5.4906, "step": 2509 }, { "epoch": 0.0841787540873648, "grad_norm": 0.3979375433045787, "learning_rate": 2e-05, "loss": 5.5898, "step": 2510 }, { "epoch": 0.08421229143959084, "grad_norm": 0.4277374022888058, "learning_rate": 2e-05, "loss": 5.5364, "step": 2511 }, { "epoch": 0.08424582879181688, "grad_norm": 0.4414308360522602, "learning_rate": 2e-05, "loss": 5.7144, "step": 2512 }, { "epoch": 0.08427936614404292, "grad_norm": 0.41910978727529075, "learning_rate": 2e-05, "loss": 5.6941, "step": 2513 }, { "epoch": 0.08431290349626896, "grad_norm": 0.41584098404918757, "learning_rate": 2e-05, "loss": 5.6652, "step": 2514 }, { "epoch": 0.084346440848495, "grad_norm": 0.43768652285748, "learning_rate": 2e-05, "loss": 5.4969, "step": 2515 }, { "epoch": 0.08437997820072106, "grad_norm": 0.4139336405184854, "learning_rate": 2e-05, "loss": 5.7458, "step": 2516 }, { "epoch": 0.0844135155529471, "grad_norm": 0.45814841358945846, "learning_rate": 2e-05, "loss": 5.5838, "step": 2517 }, { "epoch": 0.08444705290517314, "grad_norm": 0.4528859669325055, "learning_rate": 2e-05, "loss": 5.4561, "step": 2518 }, { "epoch": 0.08448059025739918, "grad_norm": 0.4046141307333906, "learning_rate": 2e-05, "loss": 5.5284, "step": 2519 }, { "epoch": 0.08451412760962522, "grad_norm": 0.4031545655507818, "learning_rate": 2e-05, "loss": 5.4366, "step": 2520 }, { "epoch": 0.08454766496185126, "grad_norm": 0.42124927742933915, "learning_rate": 2e-05, "loss": 5.7064, "step": 2521 }, { "epoch": 0.0845812023140773, "grad_norm": 0.4334927635429006, "learning_rate": 2e-05, "loss": 5.6564, "step": 2522 }, { "epoch": 0.08461473966630334, "grad_norm": 0.3955471729589661, "learning_rate": 2e-05, "loss": 5.5572, "step": 2523 }, { "epoch": 0.08464827701852938, "grad_norm": 0.43655882999819823, "learning_rate": 2e-05, "loss": 5.5426, "step": 2524 }, { "epoch": 0.08468181437075543, "grad_norm": 0.43729357866743535, "learning_rate": 2e-05, "loss": 5.4884, "step": 2525 }, { "epoch": 0.08471535172298147, "grad_norm": 0.39831294851863513, "learning_rate": 2e-05, "loss": 5.5677, "step": 2526 }, { "epoch": 0.08474888907520751, "grad_norm": 0.40548034867888744, "learning_rate": 2e-05, "loss": 5.4031, "step": 2527 }, { "epoch": 0.08478242642743355, "grad_norm": 0.4142681672231289, "learning_rate": 2e-05, "loss": 5.7133, "step": 2528 }, { "epoch": 0.0848159637796596, "grad_norm": 0.46813193609574, "learning_rate": 2e-05, "loss": 5.4226, "step": 2529 }, { "epoch": 0.08484950113188564, "grad_norm": 0.40505155283989586, "learning_rate": 2e-05, "loss": 5.4518, "step": 2530 }, { "epoch": 0.08488303848411168, "grad_norm": 0.4108774684100158, "learning_rate": 2e-05, "loss": 5.7062, "step": 2531 }, { "epoch": 0.08491657583633772, "grad_norm": 0.4450421529869882, "learning_rate": 2e-05, "loss": 5.5016, "step": 2532 }, { "epoch": 0.08495011318856377, "grad_norm": 0.4393124315933481, "learning_rate": 2e-05, "loss": 5.4091, "step": 2533 }, { "epoch": 0.08498365054078981, "grad_norm": 0.4481814407618846, "learning_rate": 2e-05, "loss": 5.7764, "step": 2534 }, { "epoch": 0.08501718789301585, "grad_norm": 0.4075967722548517, "learning_rate": 2e-05, "loss": 5.5114, "step": 2535 }, { "epoch": 0.08505072524524189, "grad_norm": 0.4888460097613812, "learning_rate": 2e-05, "loss": 5.5088, "step": 2536 }, { "epoch": 0.08508426259746793, "grad_norm": 0.4621549973907172, "learning_rate": 2e-05, "loss": 5.3838, "step": 2537 }, { "epoch": 0.08511779994969397, "grad_norm": 0.4120655214919283, "learning_rate": 2e-05, "loss": 5.5814, "step": 2538 }, { "epoch": 0.08515133730192001, "grad_norm": 0.4226917957738573, "learning_rate": 2e-05, "loss": 5.5385, "step": 2539 }, { "epoch": 0.08518487465414605, "grad_norm": 0.45750880060219, "learning_rate": 2e-05, "loss": 5.6711, "step": 2540 }, { "epoch": 0.08521841200637209, "grad_norm": 0.4323681435512226, "learning_rate": 2e-05, "loss": 5.3915, "step": 2541 }, { "epoch": 0.08525194935859814, "grad_norm": 0.39649520943071664, "learning_rate": 2e-05, "loss": 5.5356, "step": 2542 }, { "epoch": 0.08528548671082418, "grad_norm": 0.41086852513753586, "learning_rate": 2e-05, "loss": 5.7678, "step": 2543 }, { "epoch": 0.08531902406305023, "grad_norm": 0.4302414962524038, "learning_rate": 2e-05, "loss": 5.5468, "step": 2544 }, { "epoch": 0.08535256141527627, "grad_norm": 0.46230381139372906, "learning_rate": 2e-05, "loss": 5.3468, "step": 2545 }, { "epoch": 0.0853860987675023, "grad_norm": 0.4216705048263822, "learning_rate": 2e-05, "loss": 5.6555, "step": 2546 }, { "epoch": 0.08541963611972835, "grad_norm": 0.40065662510951827, "learning_rate": 2e-05, "loss": 5.5372, "step": 2547 }, { "epoch": 0.08545317347195439, "grad_norm": 0.4240429412709641, "learning_rate": 2e-05, "loss": 5.5306, "step": 2548 }, { "epoch": 0.08548671082418043, "grad_norm": 0.4207951180596634, "learning_rate": 2e-05, "loss": 5.5735, "step": 2549 }, { "epoch": 0.08552024817640647, "grad_norm": 0.4154946259744252, "learning_rate": 2e-05, "loss": 5.4017, "step": 2550 }, { "epoch": 0.08555378552863252, "grad_norm": 0.4217022628551471, "learning_rate": 2e-05, "loss": 5.4866, "step": 2551 }, { "epoch": 0.08558732288085856, "grad_norm": 0.4711077487026605, "learning_rate": 2e-05, "loss": 5.5325, "step": 2552 }, { "epoch": 0.0856208602330846, "grad_norm": 0.4346281335210595, "learning_rate": 2e-05, "loss": 5.4983, "step": 2553 }, { "epoch": 0.08565439758531064, "grad_norm": 0.40807749950936356, "learning_rate": 2e-05, "loss": 5.5974, "step": 2554 }, { "epoch": 0.08568793493753668, "grad_norm": 0.43794874289712443, "learning_rate": 2e-05, "loss": 5.6774, "step": 2555 }, { "epoch": 0.08572147228976272, "grad_norm": 0.43457537907226373, "learning_rate": 2e-05, "loss": 5.4896, "step": 2556 }, { "epoch": 0.08575500964198876, "grad_norm": 0.4260943476371691, "learning_rate": 2e-05, "loss": 5.5533, "step": 2557 }, { "epoch": 0.0857885469942148, "grad_norm": 0.4447884862281528, "learning_rate": 2e-05, "loss": 5.6352, "step": 2558 }, { "epoch": 0.08582208434644086, "grad_norm": 0.47531414092475405, "learning_rate": 2e-05, "loss": 5.378, "step": 2559 }, { "epoch": 0.0858556216986669, "grad_norm": 0.44725890723135653, "learning_rate": 2e-05, "loss": 5.6344, "step": 2560 }, { "epoch": 0.08588915905089294, "grad_norm": 0.4396374344975392, "learning_rate": 2e-05, "loss": 5.6194, "step": 2561 }, { "epoch": 0.08592269640311898, "grad_norm": 0.41829789017751373, "learning_rate": 2e-05, "loss": 5.5235, "step": 2562 }, { "epoch": 0.08595623375534502, "grad_norm": 0.409752053950641, "learning_rate": 2e-05, "loss": 5.5101, "step": 2563 }, { "epoch": 0.08598977110757106, "grad_norm": 0.4212417826199785, "learning_rate": 2e-05, "loss": 5.685, "step": 2564 }, { "epoch": 0.0860233084597971, "grad_norm": 0.4263844202160119, "learning_rate": 2e-05, "loss": 5.6001, "step": 2565 }, { "epoch": 0.08605684581202314, "grad_norm": 0.41690231554563917, "learning_rate": 2e-05, "loss": 5.5594, "step": 2566 }, { "epoch": 0.08609038316424918, "grad_norm": 0.43355339232247186, "learning_rate": 2e-05, "loss": 5.4971, "step": 2567 }, { "epoch": 0.08612392051647523, "grad_norm": 0.42233167609442474, "learning_rate": 2e-05, "loss": 5.5779, "step": 2568 }, { "epoch": 0.08615745786870127, "grad_norm": 0.44013341431907593, "learning_rate": 2e-05, "loss": 5.5142, "step": 2569 }, { "epoch": 0.08619099522092731, "grad_norm": 0.4449523243067473, "learning_rate": 2e-05, "loss": 5.4343, "step": 2570 }, { "epoch": 0.08622453257315335, "grad_norm": 0.4213411312410689, "learning_rate": 2e-05, "loss": 5.5409, "step": 2571 }, { "epoch": 0.08625806992537939, "grad_norm": 0.43551114270873265, "learning_rate": 2e-05, "loss": 5.7156, "step": 2572 }, { "epoch": 0.08629160727760543, "grad_norm": 0.474790040409755, "learning_rate": 2e-05, "loss": 5.4191, "step": 2573 }, { "epoch": 0.08632514462983147, "grad_norm": 0.3811123831332694, "learning_rate": 2e-05, "loss": 5.3228, "step": 2574 }, { "epoch": 0.08635868198205751, "grad_norm": 0.4000255490842201, "learning_rate": 2e-05, "loss": 5.7923, "step": 2575 }, { "epoch": 0.08639221933428355, "grad_norm": 0.41785402026848595, "learning_rate": 2e-05, "loss": 5.4011, "step": 2576 }, { "epoch": 0.0864257566865096, "grad_norm": 0.4304258989467618, "learning_rate": 2e-05, "loss": 5.5573, "step": 2577 }, { "epoch": 0.08645929403873565, "grad_norm": 0.4507905392972353, "learning_rate": 2e-05, "loss": 5.4616, "step": 2578 }, { "epoch": 0.08649283139096169, "grad_norm": 0.4252743993767114, "learning_rate": 2e-05, "loss": 5.5078, "step": 2579 }, { "epoch": 0.08652636874318773, "grad_norm": 0.44451465073803464, "learning_rate": 2e-05, "loss": 5.6043, "step": 2580 }, { "epoch": 0.08655990609541377, "grad_norm": 0.41455547974072193, "learning_rate": 2e-05, "loss": 5.4818, "step": 2581 }, { "epoch": 0.0865934434476398, "grad_norm": 0.4382751806894003, "learning_rate": 2e-05, "loss": 5.478, "step": 2582 }, { "epoch": 0.08662698079986585, "grad_norm": 0.4387611017733642, "learning_rate": 2e-05, "loss": 5.5644, "step": 2583 }, { "epoch": 0.08666051815209189, "grad_norm": 0.39498632246277254, "learning_rate": 2e-05, "loss": 5.6197, "step": 2584 }, { "epoch": 0.08669405550431794, "grad_norm": 0.40481122255437146, "learning_rate": 2e-05, "loss": 5.6676, "step": 2585 }, { "epoch": 0.08672759285654398, "grad_norm": 0.4138365544675366, "learning_rate": 2e-05, "loss": 5.5531, "step": 2586 }, { "epoch": 0.08676113020877002, "grad_norm": 0.4301783511251046, "learning_rate": 2e-05, "loss": 5.6384, "step": 2587 }, { "epoch": 0.08679466756099606, "grad_norm": 0.40253688960141787, "learning_rate": 2e-05, "loss": 5.4618, "step": 2588 }, { "epoch": 0.0868282049132221, "grad_norm": 0.4214448475381854, "learning_rate": 2e-05, "loss": 5.4731, "step": 2589 }, { "epoch": 0.08686174226544814, "grad_norm": 0.4156553907446995, "learning_rate": 2e-05, "loss": 5.626, "step": 2590 }, { "epoch": 0.08689527961767418, "grad_norm": 0.4229681383080714, "learning_rate": 2e-05, "loss": 5.6531, "step": 2591 }, { "epoch": 0.08692881696990022, "grad_norm": 0.4108393594620596, "learning_rate": 2e-05, "loss": 5.6768, "step": 2592 }, { "epoch": 0.08696235432212626, "grad_norm": 0.43949275890930145, "learning_rate": 2e-05, "loss": 5.6237, "step": 2593 }, { "epoch": 0.08699589167435232, "grad_norm": 0.4252558328774147, "learning_rate": 2e-05, "loss": 5.5437, "step": 2594 }, { "epoch": 0.08702942902657836, "grad_norm": 0.4190254814201808, "learning_rate": 2e-05, "loss": 5.6418, "step": 2595 }, { "epoch": 0.0870629663788044, "grad_norm": 0.44678358456047096, "learning_rate": 2e-05, "loss": 5.6025, "step": 2596 }, { "epoch": 0.08709650373103044, "grad_norm": 0.4370508899194787, "learning_rate": 2e-05, "loss": 5.6383, "step": 2597 }, { "epoch": 0.08713004108325648, "grad_norm": 0.4623320085876256, "learning_rate": 2e-05, "loss": 5.5687, "step": 2598 }, { "epoch": 0.08716357843548252, "grad_norm": 0.4283047937630443, "learning_rate": 2e-05, "loss": 5.663, "step": 2599 }, { "epoch": 0.08719711578770856, "grad_norm": 0.428804033805402, "learning_rate": 2e-05, "loss": 5.6994, "step": 2600 }, { "epoch": 0.0872306531399346, "grad_norm": 0.4309523757676762, "learning_rate": 2e-05, "loss": 5.6068, "step": 2601 }, { "epoch": 0.08726419049216064, "grad_norm": 0.4530789294828186, "learning_rate": 2e-05, "loss": 5.591, "step": 2602 }, { "epoch": 0.08729772784438669, "grad_norm": 0.40443914113895957, "learning_rate": 2e-05, "loss": 5.7296, "step": 2603 }, { "epoch": 0.08733126519661273, "grad_norm": 0.41093546074065757, "learning_rate": 2e-05, "loss": 5.3036, "step": 2604 }, { "epoch": 0.08736480254883877, "grad_norm": 0.442852551337911, "learning_rate": 2e-05, "loss": 5.7318, "step": 2605 }, { "epoch": 0.08739833990106481, "grad_norm": 0.4265313252832951, "learning_rate": 2e-05, "loss": 5.4886, "step": 2606 }, { "epoch": 0.08743187725329085, "grad_norm": 0.4244911412149259, "learning_rate": 2e-05, "loss": 5.4005, "step": 2607 }, { "epoch": 0.08746541460551689, "grad_norm": 0.4028708377208089, "learning_rate": 2e-05, "loss": 5.5114, "step": 2608 }, { "epoch": 0.08749895195774293, "grad_norm": 0.38973704564285394, "learning_rate": 2e-05, "loss": 5.7182, "step": 2609 }, { "epoch": 0.08753248930996897, "grad_norm": 0.3980377354994678, "learning_rate": 2e-05, "loss": 5.5876, "step": 2610 }, { "epoch": 0.08756602666219503, "grad_norm": 0.4281174930844379, "learning_rate": 2e-05, "loss": 5.6328, "step": 2611 }, { "epoch": 0.08759956401442107, "grad_norm": 0.40254422018810043, "learning_rate": 2e-05, "loss": 5.569, "step": 2612 }, { "epoch": 0.0876331013666471, "grad_norm": 0.4185440964504567, "learning_rate": 2e-05, "loss": 5.4, "step": 2613 }, { "epoch": 0.08766663871887315, "grad_norm": 0.42346516809681545, "learning_rate": 2e-05, "loss": 5.8354, "step": 2614 }, { "epoch": 0.08770017607109919, "grad_norm": 0.41459359366209114, "learning_rate": 2e-05, "loss": 5.4544, "step": 2615 }, { "epoch": 0.08773371342332523, "grad_norm": 0.4129715087569862, "learning_rate": 2e-05, "loss": 5.619, "step": 2616 }, { "epoch": 0.08776725077555127, "grad_norm": 0.4286671282067519, "learning_rate": 2e-05, "loss": 5.5646, "step": 2617 }, { "epoch": 0.08780078812777731, "grad_norm": 0.4233554009190053, "learning_rate": 2e-05, "loss": 5.7167, "step": 2618 }, { "epoch": 0.08783432548000335, "grad_norm": 0.4394015853813469, "learning_rate": 2e-05, "loss": 5.6071, "step": 2619 }, { "epoch": 0.0878678628322294, "grad_norm": 0.4085774375668275, "learning_rate": 2e-05, "loss": 5.4022, "step": 2620 }, { "epoch": 0.08790140018445544, "grad_norm": 0.3954755982607332, "learning_rate": 2e-05, "loss": 5.6156, "step": 2621 }, { "epoch": 0.08793493753668148, "grad_norm": 0.4257767242087735, "learning_rate": 2e-05, "loss": 5.6275, "step": 2622 }, { "epoch": 0.08796847488890752, "grad_norm": 0.3983389450310594, "learning_rate": 2e-05, "loss": 5.7241, "step": 2623 }, { "epoch": 0.08800201224113356, "grad_norm": 0.4367132604503681, "learning_rate": 2e-05, "loss": 5.3415, "step": 2624 }, { "epoch": 0.0880355495933596, "grad_norm": 0.40853231817305985, "learning_rate": 2e-05, "loss": 5.6248, "step": 2625 }, { "epoch": 0.08806908694558564, "grad_norm": 0.4117481910512048, "learning_rate": 2e-05, "loss": 5.3852, "step": 2626 }, { "epoch": 0.08810262429781168, "grad_norm": 0.4407650699890468, "learning_rate": 2e-05, "loss": 5.6634, "step": 2627 }, { "epoch": 0.08813616165003772, "grad_norm": 0.4166152057793179, "learning_rate": 2e-05, "loss": 5.618, "step": 2628 }, { "epoch": 0.08816969900226378, "grad_norm": 0.41316923973954295, "learning_rate": 2e-05, "loss": 5.7104, "step": 2629 }, { "epoch": 0.08820323635448982, "grad_norm": 0.42085295966156133, "learning_rate": 2e-05, "loss": 5.4574, "step": 2630 }, { "epoch": 0.08823677370671586, "grad_norm": 0.4178408269387118, "learning_rate": 2e-05, "loss": 5.5425, "step": 2631 }, { "epoch": 0.0882703110589419, "grad_norm": 0.4491758554127899, "learning_rate": 2e-05, "loss": 5.374, "step": 2632 }, { "epoch": 0.08830384841116794, "grad_norm": 0.44273428671681286, "learning_rate": 2e-05, "loss": 5.5808, "step": 2633 }, { "epoch": 0.08833738576339398, "grad_norm": 0.422344769192301, "learning_rate": 2e-05, "loss": 5.7163, "step": 2634 }, { "epoch": 0.08837092311562002, "grad_norm": 0.4073502848082663, "learning_rate": 2e-05, "loss": 5.605, "step": 2635 }, { "epoch": 0.08840446046784606, "grad_norm": 0.4224832544212189, "learning_rate": 2e-05, "loss": 5.4806, "step": 2636 }, { "epoch": 0.08843799782007211, "grad_norm": 0.41708940979790243, "learning_rate": 2e-05, "loss": 5.6203, "step": 2637 }, { "epoch": 0.08847153517229815, "grad_norm": 0.41591605844087676, "learning_rate": 2e-05, "loss": 5.3905, "step": 2638 }, { "epoch": 0.08850507252452419, "grad_norm": 0.43050145314790667, "learning_rate": 2e-05, "loss": 5.7145, "step": 2639 }, { "epoch": 0.08853860987675023, "grad_norm": 0.43624799842579676, "learning_rate": 2e-05, "loss": 5.5959, "step": 2640 }, { "epoch": 0.08857214722897627, "grad_norm": 0.4158805655161243, "learning_rate": 2e-05, "loss": 5.6878, "step": 2641 }, { "epoch": 0.08860568458120231, "grad_norm": 0.46616226885897183, "learning_rate": 2e-05, "loss": 5.5127, "step": 2642 }, { "epoch": 0.08863922193342835, "grad_norm": 0.42457490070450343, "learning_rate": 2e-05, "loss": 5.4057, "step": 2643 }, { "epoch": 0.08867275928565439, "grad_norm": 0.41047822914985305, "learning_rate": 2e-05, "loss": 5.591, "step": 2644 }, { "epoch": 0.08870629663788043, "grad_norm": 0.494288938711152, "learning_rate": 2e-05, "loss": 5.6006, "step": 2645 }, { "epoch": 0.08873983399010649, "grad_norm": 0.45569698931259406, "learning_rate": 2e-05, "loss": 5.4272, "step": 2646 }, { "epoch": 0.08877337134233253, "grad_norm": 0.3901213986811219, "learning_rate": 2e-05, "loss": 5.5216, "step": 2647 }, { "epoch": 0.08880690869455857, "grad_norm": 0.41536285844772547, "learning_rate": 2e-05, "loss": 5.5639, "step": 2648 }, { "epoch": 0.08884044604678461, "grad_norm": 0.43437484783242536, "learning_rate": 2e-05, "loss": 5.5305, "step": 2649 }, { "epoch": 0.08887398339901065, "grad_norm": 0.4542781115840909, "learning_rate": 2e-05, "loss": 5.372, "step": 2650 }, { "epoch": 0.08890752075123669, "grad_norm": 0.3863603547476126, "learning_rate": 2e-05, "loss": 5.703, "step": 2651 }, { "epoch": 0.08894105810346273, "grad_norm": 0.3949800509956637, "learning_rate": 2e-05, "loss": 5.3992, "step": 2652 }, { "epoch": 0.08897459545568877, "grad_norm": 0.4165294038107742, "learning_rate": 2e-05, "loss": 5.5656, "step": 2653 }, { "epoch": 0.08900813280791481, "grad_norm": 0.3975174144191429, "learning_rate": 2e-05, "loss": 5.5139, "step": 2654 }, { "epoch": 0.08904167016014086, "grad_norm": 0.42709123435977303, "learning_rate": 2e-05, "loss": 5.5061, "step": 2655 }, { "epoch": 0.0890752075123669, "grad_norm": 0.4328501656833659, "learning_rate": 2e-05, "loss": 5.6751, "step": 2656 }, { "epoch": 0.08910874486459294, "grad_norm": 0.4077293304277042, "learning_rate": 2e-05, "loss": 5.4851, "step": 2657 }, { "epoch": 0.08914228221681898, "grad_norm": 0.4784299995799537, "learning_rate": 2e-05, "loss": 5.4621, "step": 2658 }, { "epoch": 0.08917581956904502, "grad_norm": 0.43186741979632953, "learning_rate": 2e-05, "loss": 5.6107, "step": 2659 }, { "epoch": 0.08920935692127106, "grad_norm": 0.42256243273831806, "learning_rate": 2e-05, "loss": 5.5297, "step": 2660 }, { "epoch": 0.0892428942734971, "grad_norm": 0.39931851591983597, "learning_rate": 2e-05, "loss": 5.4108, "step": 2661 }, { "epoch": 0.08927643162572314, "grad_norm": 0.3930735098494873, "learning_rate": 2e-05, "loss": 5.5303, "step": 2662 }, { "epoch": 0.0893099689779492, "grad_norm": 0.4195972765057035, "learning_rate": 2e-05, "loss": 5.6038, "step": 2663 }, { "epoch": 0.08934350633017524, "grad_norm": 0.3964522720145757, "learning_rate": 2e-05, "loss": 5.3094, "step": 2664 }, { "epoch": 0.08937704368240128, "grad_norm": 0.45654543987525886, "learning_rate": 2e-05, "loss": 5.4474, "step": 2665 }, { "epoch": 0.08941058103462732, "grad_norm": 0.41897755076564847, "learning_rate": 2e-05, "loss": 5.4323, "step": 2666 }, { "epoch": 0.08944411838685336, "grad_norm": 0.41808933776594215, "learning_rate": 2e-05, "loss": 5.498, "step": 2667 }, { "epoch": 0.0894776557390794, "grad_norm": 0.38853255001216713, "learning_rate": 2e-05, "loss": 5.5225, "step": 2668 }, { "epoch": 0.08951119309130544, "grad_norm": 0.4143534943453722, "learning_rate": 2e-05, "loss": 5.8203, "step": 2669 }, { "epoch": 0.08954473044353148, "grad_norm": 0.4275634749952005, "learning_rate": 2e-05, "loss": 5.7519, "step": 2670 }, { "epoch": 0.08957826779575752, "grad_norm": 0.43144309072266857, "learning_rate": 2e-05, "loss": 5.6673, "step": 2671 }, { "epoch": 0.08961180514798357, "grad_norm": 0.3981368752359143, "learning_rate": 2e-05, "loss": 5.674, "step": 2672 }, { "epoch": 0.08964534250020961, "grad_norm": 0.44760670092670446, "learning_rate": 2e-05, "loss": 5.6505, "step": 2673 }, { "epoch": 0.08967887985243565, "grad_norm": 0.45630932273969316, "learning_rate": 2e-05, "loss": 5.3798, "step": 2674 }, { "epoch": 0.08971241720466169, "grad_norm": 0.40212928720711355, "learning_rate": 2e-05, "loss": 5.5388, "step": 2675 }, { "epoch": 0.08974595455688773, "grad_norm": 0.4211817329469353, "learning_rate": 2e-05, "loss": 5.4111, "step": 2676 }, { "epoch": 0.08977949190911377, "grad_norm": 0.39987659242347906, "learning_rate": 2e-05, "loss": 5.7976, "step": 2677 }, { "epoch": 0.08981302926133981, "grad_norm": 0.407418668401979, "learning_rate": 2e-05, "loss": 5.6914, "step": 2678 }, { "epoch": 0.08984656661356585, "grad_norm": 0.4101114576996441, "learning_rate": 2e-05, "loss": 5.4086, "step": 2679 }, { "epoch": 0.08988010396579191, "grad_norm": 0.3848079569662703, "learning_rate": 2e-05, "loss": 5.6325, "step": 2680 }, { "epoch": 0.08991364131801795, "grad_norm": 0.411153358154906, "learning_rate": 2e-05, "loss": 5.6791, "step": 2681 }, { "epoch": 0.08994717867024399, "grad_norm": 0.41664935261063135, "learning_rate": 2e-05, "loss": 5.7927, "step": 2682 }, { "epoch": 0.08998071602247003, "grad_norm": 0.4176453995160944, "learning_rate": 2e-05, "loss": 5.7647, "step": 2683 }, { "epoch": 0.09001425337469607, "grad_norm": 0.40657615422283216, "learning_rate": 2e-05, "loss": 5.6337, "step": 2684 }, { "epoch": 0.09004779072692211, "grad_norm": 0.4178698049482947, "learning_rate": 2e-05, "loss": 5.5393, "step": 2685 }, { "epoch": 0.09008132807914815, "grad_norm": 0.43034972812276434, "learning_rate": 2e-05, "loss": 5.5144, "step": 2686 }, { "epoch": 0.09011486543137419, "grad_norm": 0.42162831774950915, "learning_rate": 2e-05, "loss": 5.6325, "step": 2687 }, { "epoch": 0.09014840278360023, "grad_norm": 0.43193553476178836, "learning_rate": 2e-05, "loss": 5.4979, "step": 2688 }, { "epoch": 0.09018194013582628, "grad_norm": 0.42863921208863076, "learning_rate": 2e-05, "loss": 5.4922, "step": 2689 }, { "epoch": 0.09021547748805232, "grad_norm": 0.4149961835537577, "learning_rate": 2e-05, "loss": 5.8313, "step": 2690 }, { "epoch": 0.09024901484027836, "grad_norm": 0.4481950496825044, "learning_rate": 2e-05, "loss": 5.4217, "step": 2691 }, { "epoch": 0.0902825521925044, "grad_norm": 0.3998821624831406, "learning_rate": 2e-05, "loss": 5.7107, "step": 2692 }, { "epoch": 0.09031608954473044, "grad_norm": 0.4243035520502888, "learning_rate": 2e-05, "loss": 5.5004, "step": 2693 }, { "epoch": 0.09034962689695648, "grad_norm": 0.4306624793685675, "learning_rate": 2e-05, "loss": 5.4182, "step": 2694 }, { "epoch": 0.09038316424918252, "grad_norm": 0.4001809687869063, "learning_rate": 2e-05, "loss": 5.5697, "step": 2695 }, { "epoch": 0.09041670160140856, "grad_norm": 0.49763291751440036, "learning_rate": 2e-05, "loss": 5.6566, "step": 2696 }, { "epoch": 0.0904502389536346, "grad_norm": 0.4370938765027772, "learning_rate": 2e-05, "loss": 5.6307, "step": 2697 }, { "epoch": 0.09048377630586066, "grad_norm": 0.39865225260809656, "learning_rate": 2e-05, "loss": 5.5675, "step": 2698 }, { "epoch": 0.0905173136580867, "grad_norm": 0.4440418219779946, "learning_rate": 2e-05, "loss": 5.6342, "step": 2699 }, { "epoch": 0.09055085101031274, "grad_norm": 0.42314435641590825, "learning_rate": 2e-05, "loss": 5.6258, "step": 2700 }, { "epoch": 0.09058438836253878, "grad_norm": 0.4394915670099517, "learning_rate": 2e-05, "loss": 5.5199, "step": 2701 }, { "epoch": 0.09061792571476482, "grad_norm": 0.40549066686728696, "learning_rate": 2e-05, "loss": 5.619, "step": 2702 }, { "epoch": 0.09065146306699086, "grad_norm": 0.39533736103779216, "learning_rate": 2e-05, "loss": 5.523, "step": 2703 }, { "epoch": 0.0906850004192169, "grad_norm": 0.4670300209065528, "learning_rate": 2e-05, "loss": 5.5367, "step": 2704 }, { "epoch": 0.09071853777144294, "grad_norm": 0.3919872996447147, "learning_rate": 2e-05, "loss": 5.5908, "step": 2705 }, { "epoch": 0.090752075123669, "grad_norm": 0.4053225775916703, "learning_rate": 2e-05, "loss": 5.5678, "step": 2706 }, { "epoch": 0.09078561247589503, "grad_norm": 0.42640238003637226, "learning_rate": 2e-05, "loss": 5.6441, "step": 2707 }, { "epoch": 0.09081914982812107, "grad_norm": 0.48657525384347455, "learning_rate": 2e-05, "loss": 5.2826, "step": 2708 }, { "epoch": 0.09085268718034711, "grad_norm": 0.3812355822254502, "learning_rate": 2e-05, "loss": 5.4844, "step": 2709 }, { "epoch": 0.09088622453257315, "grad_norm": 0.44102752295709613, "learning_rate": 2e-05, "loss": 5.5579, "step": 2710 }, { "epoch": 0.0909197618847992, "grad_norm": 0.4112492959621756, "learning_rate": 2e-05, "loss": 5.4443, "step": 2711 }, { "epoch": 0.09095329923702523, "grad_norm": 0.4020148006510752, "learning_rate": 2e-05, "loss": 5.5518, "step": 2712 }, { "epoch": 0.09098683658925127, "grad_norm": 0.4228105542099438, "learning_rate": 2e-05, "loss": 5.4897, "step": 2713 }, { "epoch": 0.09102037394147731, "grad_norm": 0.44756357671761204, "learning_rate": 2e-05, "loss": 5.4571, "step": 2714 }, { "epoch": 0.09105391129370337, "grad_norm": 0.4310254999207037, "learning_rate": 2e-05, "loss": 5.5895, "step": 2715 }, { "epoch": 0.09108744864592941, "grad_norm": 0.38766536076412106, "learning_rate": 2e-05, "loss": 5.4824, "step": 2716 }, { "epoch": 0.09112098599815545, "grad_norm": 0.44087400320933057, "learning_rate": 2e-05, "loss": 5.6191, "step": 2717 }, { "epoch": 0.09115452335038149, "grad_norm": 0.42534214873364096, "learning_rate": 2e-05, "loss": 5.6381, "step": 2718 }, { "epoch": 0.09118806070260753, "grad_norm": 0.4275315122969737, "learning_rate": 2e-05, "loss": 5.4564, "step": 2719 }, { "epoch": 0.09122159805483357, "grad_norm": 0.4367055526635165, "learning_rate": 2e-05, "loss": 5.6025, "step": 2720 }, { "epoch": 0.09125513540705961, "grad_norm": 0.4352769357882209, "learning_rate": 2e-05, "loss": 5.7049, "step": 2721 }, { "epoch": 0.09128867275928565, "grad_norm": 0.41886063567853843, "learning_rate": 2e-05, "loss": 5.5142, "step": 2722 }, { "epoch": 0.09132221011151169, "grad_norm": 0.42760804309663086, "learning_rate": 2e-05, "loss": 5.5458, "step": 2723 }, { "epoch": 0.09135574746373774, "grad_norm": 0.43839849225112065, "learning_rate": 2e-05, "loss": 5.6161, "step": 2724 }, { "epoch": 0.09138928481596378, "grad_norm": 0.39882830004129377, "learning_rate": 2e-05, "loss": 5.561, "step": 2725 }, { "epoch": 0.09142282216818982, "grad_norm": 0.40862972514912355, "learning_rate": 2e-05, "loss": 5.5662, "step": 2726 }, { "epoch": 0.09145635952041586, "grad_norm": 0.4291702898491968, "learning_rate": 2e-05, "loss": 5.5752, "step": 2727 }, { "epoch": 0.0914898968726419, "grad_norm": 0.39984098404110996, "learning_rate": 2e-05, "loss": 5.5029, "step": 2728 }, { "epoch": 0.09152343422486794, "grad_norm": 0.4523676313925786, "learning_rate": 2e-05, "loss": 5.3849, "step": 2729 }, { "epoch": 0.09155697157709398, "grad_norm": 0.411507851024758, "learning_rate": 2e-05, "loss": 5.5342, "step": 2730 }, { "epoch": 0.09159050892932002, "grad_norm": 0.40958903752476467, "learning_rate": 2e-05, "loss": 5.4951, "step": 2731 }, { "epoch": 0.09162404628154608, "grad_norm": 0.4193204267857471, "learning_rate": 2e-05, "loss": 5.6816, "step": 2732 }, { "epoch": 0.09165758363377212, "grad_norm": 0.4210775741094681, "learning_rate": 2e-05, "loss": 5.4317, "step": 2733 }, { "epoch": 0.09169112098599816, "grad_norm": 0.4363189537727431, "learning_rate": 2e-05, "loss": 5.4765, "step": 2734 }, { "epoch": 0.0917246583382242, "grad_norm": 0.4257048000773892, "learning_rate": 2e-05, "loss": 5.6059, "step": 2735 }, { "epoch": 0.09175819569045024, "grad_norm": 0.40748466655287985, "learning_rate": 2e-05, "loss": 5.375, "step": 2736 }, { "epoch": 0.09179173304267628, "grad_norm": 0.4137277633437279, "learning_rate": 2e-05, "loss": 5.4934, "step": 2737 }, { "epoch": 0.09182527039490232, "grad_norm": 0.41198933572385976, "learning_rate": 2e-05, "loss": 5.6814, "step": 2738 }, { "epoch": 0.09185880774712836, "grad_norm": 0.42535976936155445, "learning_rate": 2e-05, "loss": 5.5622, "step": 2739 }, { "epoch": 0.0918923450993544, "grad_norm": 0.4174879409166337, "learning_rate": 2e-05, "loss": 5.4931, "step": 2740 }, { "epoch": 0.09192588245158045, "grad_norm": 0.3953315818312676, "learning_rate": 2e-05, "loss": 5.61, "step": 2741 }, { "epoch": 0.0919594198038065, "grad_norm": 0.39166069240416035, "learning_rate": 2e-05, "loss": 5.365, "step": 2742 }, { "epoch": 0.09199295715603253, "grad_norm": 0.4014415152324404, "learning_rate": 2e-05, "loss": 5.4259, "step": 2743 }, { "epoch": 0.09202649450825857, "grad_norm": 0.40955430875334714, "learning_rate": 2e-05, "loss": 5.6204, "step": 2744 }, { "epoch": 0.09206003186048461, "grad_norm": 0.40828392834965743, "learning_rate": 2e-05, "loss": 5.5981, "step": 2745 }, { "epoch": 0.09209356921271065, "grad_norm": 0.4020945884749691, "learning_rate": 2e-05, "loss": 5.4538, "step": 2746 }, { "epoch": 0.0921271065649367, "grad_norm": 0.39944968167323486, "learning_rate": 2e-05, "loss": 5.6931, "step": 2747 }, { "epoch": 0.09216064391716274, "grad_norm": 0.41977891962640934, "learning_rate": 2e-05, "loss": 5.5277, "step": 2748 }, { "epoch": 0.09219418126938878, "grad_norm": 0.4160017729632526, "learning_rate": 2e-05, "loss": 5.5441, "step": 2749 }, { "epoch": 0.09222771862161483, "grad_norm": 0.41180344800207014, "learning_rate": 2e-05, "loss": 5.3448, "step": 2750 }, { "epoch": 0.09226125597384087, "grad_norm": 0.4051264697061689, "learning_rate": 2e-05, "loss": 5.4411, "step": 2751 }, { "epoch": 0.09229479332606691, "grad_norm": 0.4041716040513279, "learning_rate": 2e-05, "loss": 5.6159, "step": 2752 }, { "epoch": 0.09232833067829295, "grad_norm": 0.38413246138100426, "learning_rate": 2e-05, "loss": 5.6677, "step": 2753 }, { "epoch": 0.09236186803051899, "grad_norm": 0.4110674656699497, "learning_rate": 2e-05, "loss": 5.5833, "step": 2754 }, { "epoch": 0.09239540538274503, "grad_norm": 0.418448852960214, "learning_rate": 2e-05, "loss": 5.5509, "step": 2755 }, { "epoch": 0.09242894273497107, "grad_norm": 0.39684750214847836, "learning_rate": 2e-05, "loss": 5.5837, "step": 2756 }, { "epoch": 0.09246248008719711, "grad_norm": 0.42012161683926064, "learning_rate": 2e-05, "loss": 5.5799, "step": 2757 }, { "epoch": 0.09249601743942316, "grad_norm": 0.4253660585851816, "learning_rate": 2e-05, "loss": 5.6562, "step": 2758 }, { "epoch": 0.0925295547916492, "grad_norm": 0.3945642425336478, "learning_rate": 2e-05, "loss": 5.5116, "step": 2759 }, { "epoch": 0.09256309214387524, "grad_norm": 0.4054046666275997, "learning_rate": 2e-05, "loss": 5.5659, "step": 2760 }, { "epoch": 0.09259662949610128, "grad_norm": 0.4112214390703192, "learning_rate": 2e-05, "loss": 5.4505, "step": 2761 }, { "epoch": 0.09263016684832732, "grad_norm": 0.43394192174119156, "learning_rate": 2e-05, "loss": 5.6407, "step": 2762 }, { "epoch": 0.09266370420055337, "grad_norm": 0.39275514227671965, "learning_rate": 2e-05, "loss": 5.4704, "step": 2763 }, { "epoch": 0.0926972415527794, "grad_norm": 0.42362559511562203, "learning_rate": 2e-05, "loss": 5.4428, "step": 2764 }, { "epoch": 0.09273077890500545, "grad_norm": 0.44859793012892407, "learning_rate": 2e-05, "loss": 5.6099, "step": 2765 }, { "epoch": 0.09276431625723149, "grad_norm": 0.42305105666478443, "learning_rate": 2e-05, "loss": 5.5375, "step": 2766 }, { "epoch": 0.09279785360945754, "grad_norm": 0.4292339867128885, "learning_rate": 2e-05, "loss": 5.4491, "step": 2767 }, { "epoch": 0.09283139096168358, "grad_norm": 0.41587726258203334, "learning_rate": 2e-05, "loss": 5.5721, "step": 2768 }, { "epoch": 0.09286492831390962, "grad_norm": 0.4204612473690018, "learning_rate": 2e-05, "loss": 5.4978, "step": 2769 }, { "epoch": 0.09289846566613566, "grad_norm": 0.41000114600152504, "learning_rate": 2e-05, "loss": 5.4863, "step": 2770 }, { "epoch": 0.0929320030183617, "grad_norm": 0.3896961498016559, "learning_rate": 2e-05, "loss": 5.4826, "step": 2771 }, { "epoch": 0.09296554037058774, "grad_norm": 0.41527525845995966, "learning_rate": 2e-05, "loss": 5.4217, "step": 2772 }, { "epoch": 0.09299907772281378, "grad_norm": 0.4106777449075716, "learning_rate": 2e-05, "loss": 5.6456, "step": 2773 }, { "epoch": 0.09303261507503982, "grad_norm": 0.4276889646349103, "learning_rate": 2e-05, "loss": 5.4722, "step": 2774 }, { "epoch": 0.09306615242726586, "grad_norm": 0.40985304867660055, "learning_rate": 2e-05, "loss": 5.7032, "step": 2775 }, { "epoch": 0.09309968977949191, "grad_norm": 0.41372773912758676, "learning_rate": 2e-05, "loss": 5.6312, "step": 2776 }, { "epoch": 0.09313322713171795, "grad_norm": 0.41605655646769735, "learning_rate": 2e-05, "loss": 5.4827, "step": 2777 }, { "epoch": 0.093166764483944, "grad_norm": 0.459434919663685, "learning_rate": 2e-05, "loss": 5.4597, "step": 2778 }, { "epoch": 0.09320030183617004, "grad_norm": 0.43532918396628595, "learning_rate": 2e-05, "loss": 5.5254, "step": 2779 }, { "epoch": 0.09323383918839608, "grad_norm": 0.3984507720252649, "learning_rate": 2e-05, "loss": 5.5574, "step": 2780 }, { "epoch": 0.09326737654062212, "grad_norm": 0.4128207351192517, "learning_rate": 2e-05, "loss": 5.5383, "step": 2781 }, { "epoch": 0.09330091389284816, "grad_norm": 0.43944188711306537, "learning_rate": 2e-05, "loss": 5.6926, "step": 2782 }, { "epoch": 0.0933344512450742, "grad_norm": 0.41830591689752983, "learning_rate": 2e-05, "loss": 5.6473, "step": 2783 }, { "epoch": 0.09336798859730025, "grad_norm": 0.41488782819040015, "learning_rate": 2e-05, "loss": 5.489, "step": 2784 }, { "epoch": 0.09340152594952629, "grad_norm": 0.43487618584496357, "learning_rate": 2e-05, "loss": 5.6685, "step": 2785 }, { "epoch": 0.09343506330175233, "grad_norm": 0.4445027517736545, "learning_rate": 2e-05, "loss": 5.5405, "step": 2786 }, { "epoch": 0.09346860065397837, "grad_norm": 0.4281413060421138, "learning_rate": 2e-05, "loss": 5.6174, "step": 2787 }, { "epoch": 0.09350213800620441, "grad_norm": 0.41166182905148946, "learning_rate": 2e-05, "loss": 5.4993, "step": 2788 }, { "epoch": 0.09353567535843045, "grad_norm": 0.43000528874913224, "learning_rate": 2e-05, "loss": 5.602, "step": 2789 }, { "epoch": 0.09356921271065649, "grad_norm": 0.42926804734136853, "learning_rate": 2e-05, "loss": 5.5104, "step": 2790 }, { "epoch": 0.09360275006288253, "grad_norm": 0.39106810595142727, "learning_rate": 2e-05, "loss": 5.6373, "step": 2791 }, { "epoch": 0.09363628741510857, "grad_norm": 0.40567052311600166, "learning_rate": 2e-05, "loss": 5.5397, "step": 2792 }, { "epoch": 0.09366982476733463, "grad_norm": 0.4493159853423198, "learning_rate": 2e-05, "loss": 5.5203, "step": 2793 }, { "epoch": 0.09370336211956067, "grad_norm": 0.45648799067195783, "learning_rate": 2e-05, "loss": 5.4096, "step": 2794 }, { "epoch": 0.0937368994717867, "grad_norm": 0.41312217944930035, "learning_rate": 2e-05, "loss": 5.5953, "step": 2795 }, { "epoch": 0.09377043682401275, "grad_norm": 0.43784774144917704, "learning_rate": 2e-05, "loss": 5.7596, "step": 2796 }, { "epoch": 0.09380397417623879, "grad_norm": 0.41120049462786834, "learning_rate": 2e-05, "loss": 5.7605, "step": 2797 }, { "epoch": 0.09383751152846483, "grad_norm": 0.4858471628827332, "learning_rate": 2e-05, "loss": 5.6767, "step": 2798 }, { "epoch": 0.09387104888069087, "grad_norm": 0.42494340584640206, "learning_rate": 2e-05, "loss": 5.4442, "step": 2799 }, { "epoch": 0.0939045862329169, "grad_norm": 0.415219906676172, "learning_rate": 2e-05, "loss": 5.636, "step": 2800 }, { "epoch": 0.09393812358514295, "grad_norm": 0.43560455579834123, "learning_rate": 2e-05, "loss": 5.4847, "step": 2801 }, { "epoch": 0.093971660937369, "grad_norm": 0.4366026599752199, "learning_rate": 2e-05, "loss": 5.4713, "step": 2802 }, { "epoch": 0.09400519828959504, "grad_norm": 0.39789602020663456, "learning_rate": 2e-05, "loss": 5.7118, "step": 2803 }, { "epoch": 0.09403873564182108, "grad_norm": 0.407924903598722, "learning_rate": 2e-05, "loss": 5.4523, "step": 2804 }, { "epoch": 0.09407227299404712, "grad_norm": 0.4176392420349937, "learning_rate": 2e-05, "loss": 5.7016, "step": 2805 }, { "epoch": 0.09410581034627316, "grad_norm": 0.38308738000852766, "learning_rate": 2e-05, "loss": 5.3728, "step": 2806 }, { "epoch": 0.0941393476984992, "grad_norm": 0.42555245432148486, "learning_rate": 2e-05, "loss": 5.7202, "step": 2807 }, { "epoch": 0.09417288505072524, "grad_norm": 0.4245131952243316, "learning_rate": 2e-05, "loss": 5.5924, "step": 2808 }, { "epoch": 0.09420642240295128, "grad_norm": 0.4448109175134164, "learning_rate": 2e-05, "loss": 5.668, "step": 2809 }, { "epoch": 0.09423995975517734, "grad_norm": 0.4129509077947847, "learning_rate": 2e-05, "loss": 5.6458, "step": 2810 }, { "epoch": 0.09427349710740338, "grad_norm": 0.42673980847865634, "learning_rate": 2e-05, "loss": 5.5945, "step": 2811 }, { "epoch": 0.09430703445962942, "grad_norm": 0.4287474704366404, "learning_rate": 2e-05, "loss": 5.7003, "step": 2812 }, { "epoch": 0.09434057181185546, "grad_norm": 0.3919707792810164, "learning_rate": 2e-05, "loss": 5.5976, "step": 2813 }, { "epoch": 0.0943741091640815, "grad_norm": 0.45840139154485837, "learning_rate": 2e-05, "loss": 5.559, "step": 2814 }, { "epoch": 0.09440764651630754, "grad_norm": 0.43091096732772766, "learning_rate": 2e-05, "loss": 5.507, "step": 2815 }, { "epoch": 0.09444118386853358, "grad_norm": 0.39200004895371854, "learning_rate": 2e-05, "loss": 5.478, "step": 2816 }, { "epoch": 0.09447472122075962, "grad_norm": 0.4091867835519922, "learning_rate": 2e-05, "loss": 5.5629, "step": 2817 }, { "epoch": 0.09450825857298566, "grad_norm": 0.40417503586132464, "learning_rate": 2e-05, "loss": 5.4884, "step": 2818 }, { "epoch": 0.09454179592521171, "grad_norm": 0.4217350213431395, "learning_rate": 2e-05, "loss": 5.4175, "step": 2819 }, { "epoch": 0.09457533327743775, "grad_norm": 0.4219037205016199, "learning_rate": 2e-05, "loss": 5.3974, "step": 2820 }, { "epoch": 0.09460887062966379, "grad_norm": 0.39699482579246015, "learning_rate": 2e-05, "loss": 5.7235, "step": 2821 }, { "epoch": 0.09464240798188983, "grad_norm": 0.40829670921826366, "learning_rate": 2e-05, "loss": 5.4568, "step": 2822 }, { "epoch": 0.09467594533411587, "grad_norm": 0.396375681872691, "learning_rate": 2e-05, "loss": 5.6346, "step": 2823 }, { "epoch": 0.09470948268634191, "grad_norm": 0.41526325125210395, "learning_rate": 2e-05, "loss": 5.464, "step": 2824 }, { "epoch": 0.09474302003856795, "grad_norm": 0.4196638213958442, "learning_rate": 2e-05, "loss": 5.7466, "step": 2825 }, { "epoch": 0.09477655739079399, "grad_norm": 0.41597998351119886, "learning_rate": 2e-05, "loss": 5.5492, "step": 2826 }, { "epoch": 0.09481009474302003, "grad_norm": 0.39482074401772793, "learning_rate": 2e-05, "loss": 5.6458, "step": 2827 }, { "epoch": 0.09484363209524609, "grad_norm": 0.40848842738943686, "learning_rate": 2e-05, "loss": 5.782, "step": 2828 }, { "epoch": 0.09487716944747213, "grad_norm": 0.4141719583265217, "learning_rate": 2e-05, "loss": 5.4992, "step": 2829 }, { "epoch": 0.09491070679969817, "grad_norm": 0.4441729070664025, "learning_rate": 2e-05, "loss": 5.6054, "step": 2830 }, { "epoch": 0.0949442441519242, "grad_norm": 0.40330115505618486, "learning_rate": 2e-05, "loss": 5.6547, "step": 2831 }, { "epoch": 0.09497778150415025, "grad_norm": 0.39810694446885386, "learning_rate": 2e-05, "loss": 5.4703, "step": 2832 }, { "epoch": 0.09501131885637629, "grad_norm": 0.40773558092527484, "learning_rate": 2e-05, "loss": 5.5881, "step": 2833 }, { "epoch": 0.09504485620860233, "grad_norm": 0.4100072132731595, "learning_rate": 2e-05, "loss": 5.5894, "step": 2834 }, { "epoch": 0.09507839356082837, "grad_norm": 0.3991493535812128, "learning_rate": 2e-05, "loss": 5.5163, "step": 2835 }, { "epoch": 0.09511193091305442, "grad_norm": 0.4265854820140682, "learning_rate": 2e-05, "loss": 5.5059, "step": 2836 }, { "epoch": 0.09514546826528046, "grad_norm": 0.40342183385888997, "learning_rate": 2e-05, "loss": 5.6227, "step": 2837 }, { "epoch": 0.0951790056175065, "grad_norm": 0.41015779559272536, "learning_rate": 2e-05, "loss": 5.5101, "step": 2838 }, { "epoch": 0.09521254296973254, "grad_norm": 0.4103045893552474, "learning_rate": 2e-05, "loss": 5.6252, "step": 2839 }, { "epoch": 0.09524608032195858, "grad_norm": 0.4046698178733145, "learning_rate": 2e-05, "loss": 5.461, "step": 2840 }, { "epoch": 0.09527961767418462, "grad_norm": 0.3952334703011364, "learning_rate": 2e-05, "loss": 5.569, "step": 2841 }, { "epoch": 0.09531315502641066, "grad_norm": 0.4437194833827599, "learning_rate": 2e-05, "loss": 5.5148, "step": 2842 }, { "epoch": 0.0953466923786367, "grad_norm": 0.487468213295956, "learning_rate": 2e-05, "loss": 5.457, "step": 2843 }, { "epoch": 0.09538022973086274, "grad_norm": 0.4161174565123646, "learning_rate": 2e-05, "loss": 5.5519, "step": 2844 }, { "epoch": 0.0954137670830888, "grad_norm": 0.4146903552591448, "learning_rate": 2e-05, "loss": 5.3873, "step": 2845 }, { "epoch": 0.09544730443531484, "grad_norm": 0.44598114717330906, "learning_rate": 2e-05, "loss": 5.6029, "step": 2846 }, { "epoch": 0.09548084178754088, "grad_norm": 0.3921025739976117, "learning_rate": 2e-05, "loss": 5.4966, "step": 2847 }, { "epoch": 0.09551437913976692, "grad_norm": 0.4276105437785096, "learning_rate": 2e-05, "loss": 5.6771, "step": 2848 }, { "epoch": 0.09554791649199296, "grad_norm": 0.415736816419078, "learning_rate": 2e-05, "loss": 5.635, "step": 2849 }, { "epoch": 0.095581453844219, "grad_norm": 0.41169531981929985, "learning_rate": 2e-05, "loss": 5.6291, "step": 2850 }, { "epoch": 0.09561499119644504, "grad_norm": 0.4133575396280804, "learning_rate": 2e-05, "loss": 5.6056, "step": 2851 }, { "epoch": 0.09564852854867108, "grad_norm": 0.4317306175386621, "learning_rate": 2e-05, "loss": 5.4293, "step": 2852 }, { "epoch": 0.09568206590089712, "grad_norm": 0.42209911891387836, "learning_rate": 2e-05, "loss": 5.7534, "step": 2853 }, { "epoch": 0.09571560325312317, "grad_norm": 0.41302937005903295, "learning_rate": 2e-05, "loss": 5.7486, "step": 2854 }, { "epoch": 0.09574914060534921, "grad_norm": 0.40132989435674776, "learning_rate": 2e-05, "loss": 5.5979, "step": 2855 }, { "epoch": 0.09578267795757525, "grad_norm": 0.41312451105796505, "learning_rate": 2e-05, "loss": 5.6596, "step": 2856 }, { "epoch": 0.09581621530980129, "grad_norm": 0.4656766358880395, "learning_rate": 2e-05, "loss": 5.574, "step": 2857 }, { "epoch": 0.09584975266202733, "grad_norm": 0.42167190738265614, "learning_rate": 2e-05, "loss": 5.5401, "step": 2858 }, { "epoch": 0.09588329001425337, "grad_norm": 0.4471982306095222, "learning_rate": 2e-05, "loss": 5.7173, "step": 2859 }, { "epoch": 0.09591682736647941, "grad_norm": 0.4425102668980756, "learning_rate": 2e-05, "loss": 5.403, "step": 2860 }, { "epoch": 0.09595036471870545, "grad_norm": 0.4229048980245579, "learning_rate": 2e-05, "loss": 5.493, "step": 2861 }, { "epoch": 0.0959839020709315, "grad_norm": 0.4261910383251401, "learning_rate": 2e-05, "loss": 5.7825, "step": 2862 }, { "epoch": 0.09601743942315755, "grad_norm": 0.40702176035354626, "learning_rate": 2e-05, "loss": 5.5466, "step": 2863 }, { "epoch": 0.09605097677538359, "grad_norm": 0.43112657428636014, "learning_rate": 2e-05, "loss": 5.5844, "step": 2864 }, { "epoch": 0.09608451412760963, "grad_norm": 0.4424862988254143, "learning_rate": 2e-05, "loss": 5.516, "step": 2865 }, { "epoch": 0.09611805147983567, "grad_norm": 0.4188689907666262, "learning_rate": 2e-05, "loss": 5.8182, "step": 2866 }, { "epoch": 0.09615158883206171, "grad_norm": 0.42739665722523035, "learning_rate": 2e-05, "loss": 5.5761, "step": 2867 }, { "epoch": 0.09618512618428775, "grad_norm": 0.4118063198264148, "learning_rate": 2e-05, "loss": 5.5037, "step": 2868 }, { "epoch": 0.09621866353651379, "grad_norm": 0.3941317712864441, "learning_rate": 2e-05, "loss": 5.7633, "step": 2869 }, { "epoch": 0.09625220088873983, "grad_norm": 0.4471706621464006, "learning_rate": 2e-05, "loss": 5.2912, "step": 2870 }, { "epoch": 0.09628573824096588, "grad_norm": 0.4119453893379102, "learning_rate": 2e-05, "loss": 5.5719, "step": 2871 }, { "epoch": 0.09631927559319192, "grad_norm": 0.42126892111394976, "learning_rate": 2e-05, "loss": 5.5616, "step": 2872 }, { "epoch": 0.09635281294541796, "grad_norm": 0.39815872466583635, "learning_rate": 2e-05, "loss": 5.6226, "step": 2873 }, { "epoch": 0.096386350297644, "grad_norm": 0.43532109550401343, "learning_rate": 2e-05, "loss": 5.6216, "step": 2874 }, { "epoch": 0.09641988764987004, "grad_norm": 0.4100843390301648, "learning_rate": 2e-05, "loss": 5.6013, "step": 2875 }, { "epoch": 0.09645342500209608, "grad_norm": 0.41077288765925596, "learning_rate": 2e-05, "loss": 5.6985, "step": 2876 }, { "epoch": 0.09648696235432212, "grad_norm": 0.4255176436541862, "learning_rate": 2e-05, "loss": 5.4983, "step": 2877 }, { "epoch": 0.09652049970654816, "grad_norm": 0.4028593654198746, "learning_rate": 2e-05, "loss": 5.5558, "step": 2878 }, { "epoch": 0.0965540370587742, "grad_norm": 0.4263117284501489, "learning_rate": 2e-05, "loss": 5.7613, "step": 2879 }, { "epoch": 0.09658757441100026, "grad_norm": 0.4288842273857156, "learning_rate": 2e-05, "loss": 5.5152, "step": 2880 }, { "epoch": 0.0966211117632263, "grad_norm": 0.4132691406434023, "learning_rate": 2e-05, "loss": 5.5425, "step": 2881 }, { "epoch": 0.09665464911545234, "grad_norm": 0.43714095822382265, "learning_rate": 2e-05, "loss": 5.5597, "step": 2882 }, { "epoch": 0.09668818646767838, "grad_norm": 0.43789297639770614, "learning_rate": 2e-05, "loss": 5.6906, "step": 2883 }, { "epoch": 0.09672172381990442, "grad_norm": 0.4401025400729986, "learning_rate": 2e-05, "loss": 5.5937, "step": 2884 }, { "epoch": 0.09675526117213046, "grad_norm": 0.4127647682677913, "learning_rate": 2e-05, "loss": 5.4994, "step": 2885 }, { "epoch": 0.0967887985243565, "grad_norm": 0.4200338263865929, "learning_rate": 2e-05, "loss": 5.7499, "step": 2886 }, { "epoch": 0.09682233587658254, "grad_norm": 0.4582705677352533, "learning_rate": 2e-05, "loss": 5.4635, "step": 2887 }, { "epoch": 0.09685587322880859, "grad_norm": 0.4280141698797997, "learning_rate": 2e-05, "loss": 5.5202, "step": 2888 }, { "epoch": 0.09688941058103463, "grad_norm": 0.4419211096640027, "learning_rate": 2e-05, "loss": 5.403, "step": 2889 }, { "epoch": 0.09692294793326067, "grad_norm": 0.4100767753476112, "learning_rate": 2e-05, "loss": 5.6405, "step": 2890 }, { "epoch": 0.09695648528548671, "grad_norm": 0.42658795846391107, "learning_rate": 2e-05, "loss": 5.6227, "step": 2891 }, { "epoch": 0.09699002263771275, "grad_norm": 0.4419505505002049, "learning_rate": 2e-05, "loss": 5.4459, "step": 2892 }, { "epoch": 0.09702355998993879, "grad_norm": 0.4310495099676237, "learning_rate": 2e-05, "loss": 5.5879, "step": 2893 }, { "epoch": 0.09705709734216483, "grad_norm": 0.44133280208904985, "learning_rate": 2e-05, "loss": 5.3897, "step": 2894 }, { "epoch": 0.09709063469439087, "grad_norm": 0.42852332220576345, "learning_rate": 2e-05, "loss": 5.4983, "step": 2895 }, { "epoch": 0.09712417204661691, "grad_norm": 0.418598377705549, "learning_rate": 2e-05, "loss": 5.5929, "step": 2896 }, { "epoch": 0.09715770939884297, "grad_norm": 0.40197075703191043, "learning_rate": 2e-05, "loss": 5.7209, "step": 2897 }, { "epoch": 0.09719124675106901, "grad_norm": 0.4188181608794059, "learning_rate": 2e-05, "loss": 5.5783, "step": 2898 }, { "epoch": 0.09722478410329505, "grad_norm": 0.4410174040907933, "learning_rate": 2e-05, "loss": 5.6145, "step": 2899 }, { "epoch": 0.09725832145552109, "grad_norm": 0.4041452158906539, "learning_rate": 2e-05, "loss": 5.6288, "step": 2900 }, { "epoch": 0.09729185880774713, "grad_norm": 0.42079534062783697, "learning_rate": 2e-05, "loss": 5.2289, "step": 2901 }, { "epoch": 0.09732539615997317, "grad_norm": 0.4580785594163261, "learning_rate": 2e-05, "loss": 5.4403, "step": 2902 }, { "epoch": 0.09735893351219921, "grad_norm": 0.40868053335656285, "learning_rate": 2e-05, "loss": 5.454, "step": 2903 }, { "epoch": 0.09739247086442525, "grad_norm": 0.42284523827772313, "learning_rate": 2e-05, "loss": 5.6167, "step": 2904 }, { "epoch": 0.09742600821665129, "grad_norm": 0.4479286225278784, "learning_rate": 2e-05, "loss": 5.5966, "step": 2905 }, { "epoch": 0.09745954556887734, "grad_norm": 0.4217526966621397, "learning_rate": 2e-05, "loss": 5.6201, "step": 2906 }, { "epoch": 0.09749308292110338, "grad_norm": 0.4406296519481809, "learning_rate": 2e-05, "loss": 5.6282, "step": 2907 }, { "epoch": 0.09752662027332942, "grad_norm": 0.41667153442237137, "learning_rate": 2e-05, "loss": 5.5784, "step": 2908 }, { "epoch": 0.09756015762555546, "grad_norm": 0.42890889735999427, "learning_rate": 2e-05, "loss": 5.5999, "step": 2909 }, { "epoch": 0.0975936949777815, "grad_norm": 0.4232051979783287, "learning_rate": 2e-05, "loss": 5.5817, "step": 2910 }, { "epoch": 0.09762723233000754, "grad_norm": 0.4434148368909025, "learning_rate": 2e-05, "loss": 5.4554, "step": 2911 }, { "epoch": 0.09766076968223358, "grad_norm": 0.43145977865611307, "learning_rate": 2e-05, "loss": 5.5654, "step": 2912 }, { "epoch": 0.09769430703445962, "grad_norm": 0.42640917767715797, "learning_rate": 2e-05, "loss": 5.4874, "step": 2913 }, { "epoch": 0.09772784438668568, "grad_norm": 0.43248396298048786, "learning_rate": 2e-05, "loss": 5.7002, "step": 2914 }, { "epoch": 0.09776138173891172, "grad_norm": 0.4005512163587758, "learning_rate": 2e-05, "loss": 5.7496, "step": 2915 }, { "epoch": 0.09779491909113776, "grad_norm": 0.4230219361589976, "learning_rate": 2e-05, "loss": 5.6747, "step": 2916 }, { "epoch": 0.0978284564433638, "grad_norm": 0.42474829030394656, "learning_rate": 2e-05, "loss": 5.4456, "step": 2917 }, { "epoch": 0.09786199379558984, "grad_norm": 0.4215457465708461, "learning_rate": 2e-05, "loss": 5.456, "step": 2918 }, { "epoch": 0.09789553114781588, "grad_norm": 0.43690074918242666, "learning_rate": 2e-05, "loss": 5.7006, "step": 2919 }, { "epoch": 0.09792906850004192, "grad_norm": 0.449226269076718, "learning_rate": 2e-05, "loss": 5.588, "step": 2920 }, { "epoch": 0.09796260585226796, "grad_norm": 0.4289631178627522, "learning_rate": 2e-05, "loss": 5.6688, "step": 2921 }, { "epoch": 0.097996143204494, "grad_norm": 0.40846890026341814, "learning_rate": 2e-05, "loss": 5.652, "step": 2922 }, { "epoch": 0.09802968055672005, "grad_norm": 0.44208124667569243, "learning_rate": 2e-05, "loss": 5.4976, "step": 2923 }, { "epoch": 0.0980632179089461, "grad_norm": 0.4101630257920048, "learning_rate": 2e-05, "loss": 5.6396, "step": 2924 }, { "epoch": 0.09809675526117213, "grad_norm": 0.42086664029070503, "learning_rate": 2e-05, "loss": 5.6651, "step": 2925 }, { "epoch": 0.09813029261339817, "grad_norm": 0.4398879411178148, "learning_rate": 2e-05, "loss": 5.4199, "step": 2926 }, { "epoch": 0.09816382996562421, "grad_norm": 0.40949268710469766, "learning_rate": 2e-05, "loss": 5.6088, "step": 2927 }, { "epoch": 0.09819736731785025, "grad_norm": 0.417094327676539, "learning_rate": 2e-05, "loss": 5.5108, "step": 2928 }, { "epoch": 0.0982309046700763, "grad_norm": 0.4284383089185543, "learning_rate": 2e-05, "loss": 5.5212, "step": 2929 }, { "epoch": 0.09826444202230233, "grad_norm": 0.4144027848695347, "learning_rate": 2e-05, "loss": 5.3787, "step": 2930 }, { "epoch": 0.09829797937452837, "grad_norm": 0.38842819336682943, "learning_rate": 2e-05, "loss": 5.5412, "step": 2931 }, { "epoch": 0.09833151672675443, "grad_norm": 0.4079301756167161, "learning_rate": 2e-05, "loss": 5.3976, "step": 2932 }, { "epoch": 0.09836505407898047, "grad_norm": 0.402109558409095, "learning_rate": 2e-05, "loss": 5.7034, "step": 2933 }, { "epoch": 0.09839859143120651, "grad_norm": 0.4053782960053505, "learning_rate": 2e-05, "loss": 5.6032, "step": 2934 }, { "epoch": 0.09843212878343255, "grad_norm": 0.42024123523771223, "learning_rate": 2e-05, "loss": 5.5863, "step": 2935 }, { "epoch": 0.09846566613565859, "grad_norm": 0.40999486057170803, "learning_rate": 2e-05, "loss": 5.3694, "step": 2936 }, { "epoch": 0.09849920348788463, "grad_norm": 0.4114238458301363, "learning_rate": 2e-05, "loss": 5.6273, "step": 2937 }, { "epoch": 0.09853274084011067, "grad_norm": 0.4236529062633583, "learning_rate": 2e-05, "loss": 5.3483, "step": 2938 }, { "epoch": 0.09856627819233671, "grad_norm": 0.39837336147436986, "learning_rate": 2e-05, "loss": 5.6249, "step": 2939 }, { "epoch": 0.09859981554456276, "grad_norm": 0.39929639825349283, "learning_rate": 2e-05, "loss": 5.4463, "step": 2940 }, { "epoch": 0.0986333528967888, "grad_norm": 0.39555858267237837, "learning_rate": 2e-05, "loss": 5.776, "step": 2941 }, { "epoch": 0.09866689024901484, "grad_norm": 0.41745966992361333, "learning_rate": 2e-05, "loss": 5.5737, "step": 2942 }, { "epoch": 0.09870042760124088, "grad_norm": 0.41013601157380397, "learning_rate": 2e-05, "loss": 5.714, "step": 2943 }, { "epoch": 0.09873396495346692, "grad_norm": 0.432474517313724, "learning_rate": 2e-05, "loss": 5.5259, "step": 2944 }, { "epoch": 0.09876750230569296, "grad_norm": 0.40316314360399313, "learning_rate": 2e-05, "loss": 5.5551, "step": 2945 }, { "epoch": 0.098801039657919, "grad_norm": 0.4097262894918571, "learning_rate": 2e-05, "loss": 5.3936, "step": 2946 }, { "epoch": 0.09883457701014504, "grad_norm": 0.3965468756131829, "learning_rate": 2e-05, "loss": 5.5722, "step": 2947 }, { "epoch": 0.09886811436237108, "grad_norm": 0.4222721225567299, "learning_rate": 2e-05, "loss": 5.8063, "step": 2948 }, { "epoch": 0.09890165171459714, "grad_norm": 0.4131730562736932, "learning_rate": 2e-05, "loss": 5.6713, "step": 2949 }, { "epoch": 0.09893518906682318, "grad_norm": 0.4212892760754937, "learning_rate": 2e-05, "loss": 5.3682, "step": 2950 }, { "epoch": 0.09896872641904922, "grad_norm": 0.4055531605101733, "learning_rate": 2e-05, "loss": 5.5007, "step": 2951 }, { "epoch": 0.09900226377127526, "grad_norm": 0.4041135117092514, "learning_rate": 2e-05, "loss": 5.5255, "step": 2952 }, { "epoch": 0.0990358011235013, "grad_norm": 0.3859451343122753, "learning_rate": 2e-05, "loss": 5.3113, "step": 2953 }, { "epoch": 0.09906933847572734, "grad_norm": 0.3901038055501821, "learning_rate": 2e-05, "loss": 5.5031, "step": 2954 }, { "epoch": 0.09910287582795338, "grad_norm": 0.40777399902232664, "learning_rate": 2e-05, "loss": 5.7673, "step": 2955 }, { "epoch": 0.09913641318017942, "grad_norm": 0.3935293867934086, "learning_rate": 2e-05, "loss": 5.6173, "step": 2956 }, { "epoch": 0.09916995053240546, "grad_norm": 0.4286727439467258, "learning_rate": 2e-05, "loss": 5.7957, "step": 2957 }, { "epoch": 0.09920348788463151, "grad_norm": 0.4139674074563024, "learning_rate": 2e-05, "loss": 5.6961, "step": 2958 }, { "epoch": 0.09923702523685755, "grad_norm": 0.4230688030279931, "learning_rate": 2e-05, "loss": 5.7543, "step": 2959 }, { "epoch": 0.0992705625890836, "grad_norm": 0.4192380782363351, "learning_rate": 2e-05, "loss": 5.7021, "step": 2960 }, { "epoch": 0.09930409994130963, "grad_norm": 0.44177583445351987, "learning_rate": 2e-05, "loss": 5.619, "step": 2961 }, { "epoch": 0.09933763729353567, "grad_norm": 0.39523453191816454, "learning_rate": 2e-05, "loss": 5.528, "step": 2962 }, { "epoch": 0.09937117464576171, "grad_norm": 0.40984399624703755, "learning_rate": 2e-05, "loss": 5.5467, "step": 2963 }, { "epoch": 0.09940471199798775, "grad_norm": 0.4212694574315305, "learning_rate": 2e-05, "loss": 5.5973, "step": 2964 }, { "epoch": 0.0994382493502138, "grad_norm": 0.40021678609757244, "learning_rate": 2e-05, "loss": 5.4197, "step": 2965 }, { "epoch": 0.09947178670243985, "grad_norm": 0.43098173597888817, "learning_rate": 2e-05, "loss": 5.6641, "step": 2966 }, { "epoch": 0.09950532405466589, "grad_norm": 0.45033276778015396, "learning_rate": 2e-05, "loss": 5.6395, "step": 2967 }, { "epoch": 0.09953886140689193, "grad_norm": 0.4032052798206839, "learning_rate": 2e-05, "loss": 5.3872, "step": 2968 }, { "epoch": 0.09957239875911797, "grad_norm": 0.4224708240411215, "learning_rate": 2e-05, "loss": 5.5785, "step": 2969 }, { "epoch": 0.09960593611134401, "grad_norm": 0.410729137227568, "learning_rate": 2e-05, "loss": 5.5549, "step": 2970 }, { "epoch": 0.09963947346357005, "grad_norm": 0.455773644489537, "learning_rate": 2e-05, "loss": 5.546, "step": 2971 }, { "epoch": 0.09967301081579609, "grad_norm": 0.4290703455678436, "learning_rate": 2e-05, "loss": 5.6025, "step": 2972 }, { "epoch": 0.09970654816802213, "grad_norm": 0.40650769426530287, "learning_rate": 2e-05, "loss": 5.4827, "step": 2973 }, { "epoch": 0.09974008552024817, "grad_norm": 0.40292435792361514, "learning_rate": 2e-05, "loss": 5.5462, "step": 2974 }, { "epoch": 0.09977362287247422, "grad_norm": 0.39835902562475256, "learning_rate": 2e-05, "loss": 5.6178, "step": 2975 }, { "epoch": 0.09980716022470026, "grad_norm": 0.4498420525590363, "learning_rate": 2e-05, "loss": 5.5198, "step": 2976 }, { "epoch": 0.0998406975769263, "grad_norm": 0.4156253292482333, "learning_rate": 2e-05, "loss": 5.7956, "step": 2977 }, { "epoch": 0.09987423492915234, "grad_norm": 0.42150552790598306, "learning_rate": 2e-05, "loss": 5.6674, "step": 2978 }, { "epoch": 0.09990777228137838, "grad_norm": 0.4542040205204572, "learning_rate": 2e-05, "loss": 5.5563, "step": 2979 }, { "epoch": 0.09994130963360442, "grad_norm": 0.40126400943145435, "learning_rate": 2e-05, "loss": 5.501, "step": 2980 }, { "epoch": 0.09997484698583047, "grad_norm": 0.4148629971598214, "learning_rate": 2e-05, "loss": 5.3441, "step": 2981 }, { "epoch": 0.1000083843380565, "grad_norm": 0.47164866824000384, "learning_rate": 2e-05, "loss": 5.6983, "step": 2982 }, { "epoch": 0.10004192169028255, "grad_norm": 0.4501379374242939, "learning_rate": 2e-05, "loss": 5.6292, "step": 2983 }, { "epoch": 0.1000754590425086, "grad_norm": 0.428477067227862, "learning_rate": 2e-05, "loss": 5.6512, "step": 2984 }, { "epoch": 0.10010899639473464, "grad_norm": 0.4094705234637409, "learning_rate": 2e-05, "loss": 5.7194, "step": 2985 }, { "epoch": 0.10014253374696068, "grad_norm": 0.4487550821470635, "learning_rate": 2e-05, "loss": 5.5743, "step": 2986 }, { "epoch": 0.10017607109918672, "grad_norm": 0.40570533425463345, "learning_rate": 2e-05, "loss": 5.5973, "step": 2987 }, { "epoch": 0.10020960845141276, "grad_norm": 0.4420870810615123, "learning_rate": 2e-05, "loss": 5.4648, "step": 2988 }, { "epoch": 0.1002431458036388, "grad_norm": 0.41818245039451163, "learning_rate": 2e-05, "loss": 5.6818, "step": 2989 }, { "epoch": 0.10027668315586484, "grad_norm": 0.4002684849872194, "learning_rate": 2e-05, "loss": 5.604, "step": 2990 }, { "epoch": 0.10031022050809088, "grad_norm": 0.40762098058454177, "learning_rate": 2e-05, "loss": 5.5819, "step": 2991 }, { "epoch": 0.10034375786031693, "grad_norm": 0.4315908838427542, "learning_rate": 2e-05, "loss": 5.4672, "step": 2992 }, { "epoch": 0.10037729521254297, "grad_norm": 0.39809693169498145, "learning_rate": 2e-05, "loss": 5.6102, "step": 2993 }, { "epoch": 0.10041083256476901, "grad_norm": 0.4038799176925259, "learning_rate": 2e-05, "loss": 5.526, "step": 2994 }, { "epoch": 0.10044436991699505, "grad_norm": 0.4005376084971032, "learning_rate": 2e-05, "loss": 5.5336, "step": 2995 }, { "epoch": 0.1004779072692211, "grad_norm": 0.40084339811452585, "learning_rate": 2e-05, "loss": 5.4659, "step": 2996 }, { "epoch": 0.10051144462144714, "grad_norm": 0.41101066543623854, "learning_rate": 2e-05, "loss": 5.3573, "step": 2997 }, { "epoch": 0.10054498197367318, "grad_norm": 0.416409893701346, "learning_rate": 2e-05, "loss": 5.5532, "step": 2998 }, { "epoch": 0.10057851932589922, "grad_norm": 0.4278348772491418, "learning_rate": 2e-05, "loss": 5.48, "step": 2999 }, { "epoch": 0.10061205667812526, "grad_norm": 0.45600949689409703, "learning_rate": 2e-05, "loss": 5.5597, "step": 3000 }, { "epoch": 0.10064559403035131, "grad_norm": 0.4142862700448555, "learning_rate": 2e-05, "loss": 5.5871, "step": 3001 }, { "epoch": 0.10067913138257735, "grad_norm": 0.4062149307858233, "learning_rate": 2e-05, "loss": 5.6019, "step": 3002 }, { "epoch": 0.10071266873480339, "grad_norm": 0.4247787507693562, "learning_rate": 2e-05, "loss": 5.4055, "step": 3003 }, { "epoch": 0.10074620608702943, "grad_norm": 0.4404278396397471, "learning_rate": 2e-05, "loss": 5.5526, "step": 3004 }, { "epoch": 0.10077974343925547, "grad_norm": 0.4210999959214935, "learning_rate": 2e-05, "loss": 5.6856, "step": 3005 }, { "epoch": 0.10081328079148151, "grad_norm": 0.437658193125379, "learning_rate": 2e-05, "loss": 5.7212, "step": 3006 }, { "epoch": 0.10084681814370755, "grad_norm": 0.40339422479751297, "learning_rate": 2e-05, "loss": 5.5037, "step": 3007 }, { "epoch": 0.10088035549593359, "grad_norm": 0.41361207738509914, "learning_rate": 2e-05, "loss": 5.4883, "step": 3008 }, { "epoch": 0.10091389284815963, "grad_norm": 0.40807196858320266, "learning_rate": 2e-05, "loss": 5.775, "step": 3009 }, { "epoch": 0.10094743020038568, "grad_norm": 0.47050345020213385, "learning_rate": 2e-05, "loss": 5.4274, "step": 3010 }, { "epoch": 0.10098096755261173, "grad_norm": 0.42828457386310126, "learning_rate": 2e-05, "loss": 5.5429, "step": 3011 }, { "epoch": 0.10101450490483777, "grad_norm": 0.4359478320706157, "learning_rate": 2e-05, "loss": 5.6429, "step": 3012 }, { "epoch": 0.1010480422570638, "grad_norm": 0.44521731300977685, "learning_rate": 2e-05, "loss": 5.4944, "step": 3013 }, { "epoch": 0.10108157960928985, "grad_norm": 0.423151901558855, "learning_rate": 2e-05, "loss": 5.4981, "step": 3014 }, { "epoch": 0.10111511696151589, "grad_norm": 0.4031549454166784, "learning_rate": 2e-05, "loss": 5.5593, "step": 3015 }, { "epoch": 0.10114865431374193, "grad_norm": 0.43592149624350873, "learning_rate": 2e-05, "loss": 5.5773, "step": 3016 }, { "epoch": 0.10118219166596797, "grad_norm": 0.49393938618906025, "learning_rate": 2e-05, "loss": 5.4664, "step": 3017 }, { "epoch": 0.10121572901819402, "grad_norm": 0.4007975266531117, "learning_rate": 2e-05, "loss": 5.549, "step": 3018 }, { "epoch": 0.10124926637042006, "grad_norm": 0.45009487682553834, "learning_rate": 2e-05, "loss": 5.6096, "step": 3019 }, { "epoch": 0.1012828037226461, "grad_norm": 0.45439023176783255, "learning_rate": 2e-05, "loss": 5.4646, "step": 3020 }, { "epoch": 0.10131634107487214, "grad_norm": 0.41050279321238964, "learning_rate": 2e-05, "loss": 5.5778, "step": 3021 }, { "epoch": 0.10134987842709818, "grad_norm": 0.4543352190803486, "learning_rate": 2e-05, "loss": 5.5599, "step": 3022 }, { "epoch": 0.10138341577932422, "grad_norm": 0.46120899507077445, "learning_rate": 2e-05, "loss": 5.5922, "step": 3023 }, { "epoch": 0.10141695313155026, "grad_norm": 0.40538603721835587, "learning_rate": 2e-05, "loss": 5.4923, "step": 3024 }, { "epoch": 0.1014504904837763, "grad_norm": 0.44755711403803017, "learning_rate": 2e-05, "loss": 5.7452, "step": 3025 }, { "epoch": 0.10148402783600234, "grad_norm": 0.4164196770113627, "learning_rate": 2e-05, "loss": 5.3511, "step": 3026 }, { "epoch": 0.1015175651882284, "grad_norm": 0.41647360851670173, "learning_rate": 2e-05, "loss": 5.6642, "step": 3027 }, { "epoch": 0.10155110254045444, "grad_norm": 0.4208703422903254, "learning_rate": 2e-05, "loss": 5.6392, "step": 3028 }, { "epoch": 0.10158463989268048, "grad_norm": 0.41191392870666843, "learning_rate": 2e-05, "loss": 5.4614, "step": 3029 }, { "epoch": 0.10161817724490652, "grad_norm": 0.41201912387996625, "learning_rate": 2e-05, "loss": 5.4098, "step": 3030 }, { "epoch": 0.10165171459713256, "grad_norm": 0.4564419128177032, "learning_rate": 2e-05, "loss": 5.5234, "step": 3031 }, { "epoch": 0.1016852519493586, "grad_norm": 0.43381555940183614, "learning_rate": 2e-05, "loss": 5.5769, "step": 3032 }, { "epoch": 0.10171878930158464, "grad_norm": 0.4077013391397404, "learning_rate": 2e-05, "loss": 5.4456, "step": 3033 }, { "epoch": 0.10175232665381068, "grad_norm": 0.43548098523246787, "learning_rate": 2e-05, "loss": 5.7341, "step": 3034 }, { "epoch": 0.10178586400603672, "grad_norm": 0.46582584477640426, "learning_rate": 2e-05, "loss": 5.5062, "step": 3035 }, { "epoch": 0.10181940135826277, "grad_norm": 0.42652794325784793, "learning_rate": 2e-05, "loss": 5.5866, "step": 3036 }, { "epoch": 0.10185293871048881, "grad_norm": 0.42540100176590706, "learning_rate": 2e-05, "loss": 5.6457, "step": 3037 }, { "epoch": 0.10188647606271485, "grad_norm": 0.4501583189587986, "learning_rate": 2e-05, "loss": 5.4989, "step": 3038 }, { "epoch": 0.10192001341494089, "grad_norm": 0.44839343746866717, "learning_rate": 2e-05, "loss": 5.6693, "step": 3039 }, { "epoch": 0.10195355076716693, "grad_norm": 0.42900774804421316, "learning_rate": 2e-05, "loss": 5.6609, "step": 3040 }, { "epoch": 0.10198708811939297, "grad_norm": 0.4335534115550069, "learning_rate": 2e-05, "loss": 5.5189, "step": 3041 }, { "epoch": 0.10202062547161901, "grad_norm": 0.43860381043519475, "learning_rate": 2e-05, "loss": 5.4781, "step": 3042 }, { "epoch": 0.10205416282384505, "grad_norm": 0.4138642563123816, "learning_rate": 2e-05, "loss": 5.6457, "step": 3043 }, { "epoch": 0.1020877001760711, "grad_norm": 0.39932364163024364, "learning_rate": 2e-05, "loss": 5.4901, "step": 3044 }, { "epoch": 0.10212123752829715, "grad_norm": 0.43861686009826023, "learning_rate": 2e-05, "loss": 5.5215, "step": 3045 }, { "epoch": 0.10215477488052319, "grad_norm": 0.38842151315021967, "learning_rate": 2e-05, "loss": 5.4861, "step": 3046 }, { "epoch": 0.10218831223274923, "grad_norm": 0.41420839071040283, "learning_rate": 2e-05, "loss": 5.71, "step": 3047 }, { "epoch": 0.10222184958497527, "grad_norm": 0.4184440196496432, "learning_rate": 2e-05, "loss": 5.7809, "step": 3048 }, { "epoch": 0.1022553869372013, "grad_norm": 0.4316073428200331, "learning_rate": 2e-05, "loss": 5.6362, "step": 3049 }, { "epoch": 0.10228892428942735, "grad_norm": 0.40255810149761023, "learning_rate": 2e-05, "loss": 5.7096, "step": 3050 }, { "epoch": 0.10232246164165339, "grad_norm": 0.44052696861244567, "learning_rate": 2e-05, "loss": 5.6103, "step": 3051 }, { "epoch": 0.10235599899387943, "grad_norm": 0.43463141910110115, "learning_rate": 2e-05, "loss": 5.4966, "step": 3052 }, { "epoch": 0.10238953634610548, "grad_norm": 0.44448951876334825, "learning_rate": 2e-05, "loss": 5.5364, "step": 3053 }, { "epoch": 0.10242307369833152, "grad_norm": 0.409200878892378, "learning_rate": 2e-05, "loss": 5.5201, "step": 3054 }, { "epoch": 0.10245661105055756, "grad_norm": 0.4160553885367706, "learning_rate": 2e-05, "loss": 5.6965, "step": 3055 }, { "epoch": 0.1024901484027836, "grad_norm": 0.42610458787800903, "learning_rate": 2e-05, "loss": 5.5551, "step": 3056 }, { "epoch": 0.10252368575500964, "grad_norm": 0.41223234325173597, "learning_rate": 2e-05, "loss": 5.7305, "step": 3057 }, { "epoch": 0.10255722310723568, "grad_norm": 0.41960852718897884, "learning_rate": 2e-05, "loss": 5.516, "step": 3058 }, { "epoch": 0.10259076045946172, "grad_norm": 0.4418145107543364, "learning_rate": 2e-05, "loss": 5.499, "step": 3059 }, { "epoch": 0.10262429781168776, "grad_norm": 0.42782636963777654, "learning_rate": 2e-05, "loss": 5.6994, "step": 3060 }, { "epoch": 0.10265783516391382, "grad_norm": 0.4143586379330015, "learning_rate": 2e-05, "loss": 5.6379, "step": 3061 }, { "epoch": 0.10269137251613986, "grad_norm": 0.427352147227393, "learning_rate": 2e-05, "loss": 5.5782, "step": 3062 }, { "epoch": 0.1027249098683659, "grad_norm": 0.4697220092110256, "learning_rate": 2e-05, "loss": 5.511, "step": 3063 }, { "epoch": 0.10275844722059194, "grad_norm": 0.4425018031509854, "learning_rate": 2e-05, "loss": 5.4255, "step": 3064 }, { "epoch": 0.10279198457281798, "grad_norm": 0.43334111253368907, "learning_rate": 2e-05, "loss": 5.6601, "step": 3065 }, { "epoch": 0.10282552192504402, "grad_norm": 0.43237510709193117, "learning_rate": 2e-05, "loss": 5.6839, "step": 3066 }, { "epoch": 0.10285905927727006, "grad_norm": 0.4012355233950333, "learning_rate": 2e-05, "loss": 5.3886, "step": 3067 }, { "epoch": 0.1028925966294961, "grad_norm": 0.43805405359436755, "learning_rate": 2e-05, "loss": 5.3308, "step": 3068 }, { "epoch": 0.10292613398172214, "grad_norm": 0.4152907626596502, "learning_rate": 2e-05, "loss": 5.7644, "step": 3069 }, { "epoch": 0.10295967133394819, "grad_norm": 0.429406300414297, "learning_rate": 2e-05, "loss": 5.4912, "step": 3070 }, { "epoch": 0.10299320868617423, "grad_norm": 0.4161926272700727, "learning_rate": 2e-05, "loss": 5.4893, "step": 3071 }, { "epoch": 0.10302674603840027, "grad_norm": 0.4342694851496708, "learning_rate": 2e-05, "loss": 5.3461, "step": 3072 }, { "epoch": 0.10306028339062631, "grad_norm": 0.4084967432567474, "learning_rate": 2e-05, "loss": 5.213, "step": 3073 }, { "epoch": 0.10309382074285235, "grad_norm": 0.42923078800555686, "learning_rate": 2e-05, "loss": 5.5429, "step": 3074 }, { "epoch": 0.10312735809507839, "grad_norm": 0.39358705471195254, "learning_rate": 2e-05, "loss": 5.521, "step": 3075 }, { "epoch": 0.10316089544730443, "grad_norm": 0.41714266921848264, "learning_rate": 2e-05, "loss": 5.5756, "step": 3076 }, { "epoch": 0.10319443279953047, "grad_norm": 0.415330250878509, "learning_rate": 2e-05, "loss": 5.6009, "step": 3077 }, { "epoch": 0.10322797015175651, "grad_norm": 0.4191813011844096, "learning_rate": 2e-05, "loss": 5.7676, "step": 3078 }, { "epoch": 0.10326150750398257, "grad_norm": 0.42317218692264763, "learning_rate": 2e-05, "loss": 5.8103, "step": 3079 }, { "epoch": 0.1032950448562086, "grad_norm": 0.4235537754811091, "learning_rate": 2e-05, "loss": 5.4877, "step": 3080 }, { "epoch": 0.10332858220843465, "grad_norm": 0.4090281749018758, "learning_rate": 2e-05, "loss": 5.3648, "step": 3081 }, { "epoch": 0.10336211956066069, "grad_norm": 0.3974697705411952, "learning_rate": 2e-05, "loss": 5.6077, "step": 3082 }, { "epoch": 0.10339565691288673, "grad_norm": 0.46031059002455194, "learning_rate": 2e-05, "loss": 5.6655, "step": 3083 }, { "epoch": 0.10342919426511277, "grad_norm": 0.4090885189959987, "learning_rate": 2e-05, "loss": 5.7309, "step": 3084 }, { "epoch": 0.10346273161733881, "grad_norm": 0.41262671542773854, "learning_rate": 2e-05, "loss": 5.5166, "step": 3085 }, { "epoch": 0.10349626896956485, "grad_norm": 0.4243811630561162, "learning_rate": 2e-05, "loss": 5.6405, "step": 3086 }, { "epoch": 0.1035298063217909, "grad_norm": 0.41398437688316664, "learning_rate": 2e-05, "loss": 5.7016, "step": 3087 }, { "epoch": 0.10356334367401694, "grad_norm": 0.41996588591153844, "learning_rate": 2e-05, "loss": 5.5702, "step": 3088 }, { "epoch": 0.10359688102624298, "grad_norm": 0.4118182184200243, "learning_rate": 2e-05, "loss": 5.4803, "step": 3089 }, { "epoch": 0.10363041837846902, "grad_norm": 0.43570033919712087, "learning_rate": 2e-05, "loss": 5.6627, "step": 3090 }, { "epoch": 0.10366395573069506, "grad_norm": 0.4027903464159655, "learning_rate": 2e-05, "loss": 5.4522, "step": 3091 }, { "epoch": 0.1036974930829211, "grad_norm": 0.41538873112538044, "learning_rate": 2e-05, "loss": 5.504, "step": 3092 }, { "epoch": 0.10373103043514714, "grad_norm": 0.4191291311849902, "learning_rate": 2e-05, "loss": 5.5007, "step": 3093 }, { "epoch": 0.10376456778737318, "grad_norm": 0.39053055931945374, "learning_rate": 2e-05, "loss": 5.6103, "step": 3094 }, { "epoch": 0.10379810513959922, "grad_norm": 0.40803711518915037, "learning_rate": 2e-05, "loss": 5.4727, "step": 3095 }, { "epoch": 0.10383164249182528, "grad_norm": 0.39392302404777374, "learning_rate": 2e-05, "loss": 5.3992, "step": 3096 }, { "epoch": 0.10386517984405132, "grad_norm": 0.4241063878163013, "learning_rate": 2e-05, "loss": 5.5617, "step": 3097 }, { "epoch": 0.10389871719627736, "grad_norm": 0.41112167273676264, "learning_rate": 2e-05, "loss": 5.4379, "step": 3098 }, { "epoch": 0.1039322545485034, "grad_norm": 0.4018792345866349, "learning_rate": 2e-05, "loss": 5.3117, "step": 3099 }, { "epoch": 0.10396579190072944, "grad_norm": 0.3984389100823835, "learning_rate": 2e-05, "loss": 5.4836, "step": 3100 }, { "epoch": 0.10399932925295548, "grad_norm": 0.425900078307238, "learning_rate": 2e-05, "loss": 5.6285, "step": 3101 }, { "epoch": 0.10403286660518152, "grad_norm": 0.4520727611536738, "learning_rate": 2e-05, "loss": 5.6005, "step": 3102 }, { "epoch": 0.10406640395740756, "grad_norm": 0.45617189098043665, "learning_rate": 2e-05, "loss": 5.4493, "step": 3103 }, { "epoch": 0.1040999413096336, "grad_norm": 0.43778671189510693, "learning_rate": 2e-05, "loss": 5.5471, "step": 3104 }, { "epoch": 0.10413347866185965, "grad_norm": 0.4172762488708169, "learning_rate": 2e-05, "loss": 5.5501, "step": 3105 }, { "epoch": 0.10416701601408569, "grad_norm": 0.4203077336966103, "learning_rate": 2e-05, "loss": 5.5664, "step": 3106 }, { "epoch": 0.10420055336631173, "grad_norm": 0.43116328085153266, "learning_rate": 2e-05, "loss": 5.6201, "step": 3107 }, { "epoch": 0.10423409071853777, "grad_norm": 0.39970448387129276, "learning_rate": 2e-05, "loss": 5.614, "step": 3108 }, { "epoch": 0.10426762807076381, "grad_norm": 0.4142750407807605, "learning_rate": 2e-05, "loss": 5.6224, "step": 3109 }, { "epoch": 0.10430116542298985, "grad_norm": 0.3877335105487696, "learning_rate": 2e-05, "loss": 5.5826, "step": 3110 }, { "epoch": 0.10433470277521589, "grad_norm": 0.4050572917629175, "learning_rate": 2e-05, "loss": 5.7952, "step": 3111 }, { "epoch": 0.10436824012744193, "grad_norm": 0.4126235033290565, "learning_rate": 2e-05, "loss": 5.7172, "step": 3112 }, { "epoch": 0.10440177747966799, "grad_norm": 0.43382949799260306, "learning_rate": 2e-05, "loss": 5.7599, "step": 3113 }, { "epoch": 0.10443531483189403, "grad_norm": 0.41125971365962777, "learning_rate": 2e-05, "loss": 5.6886, "step": 3114 }, { "epoch": 0.10446885218412007, "grad_norm": 0.45578489741294764, "learning_rate": 2e-05, "loss": 5.6031, "step": 3115 }, { "epoch": 0.10450238953634611, "grad_norm": 0.430322840153027, "learning_rate": 2e-05, "loss": 5.5867, "step": 3116 }, { "epoch": 0.10453592688857215, "grad_norm": 0.41656244557152305, "learning_rate": 2e-05, "loss": 5.6447, "step": 3117 }, { "epoch": 0.10456946424079819, "grad_norm": 0.41127044338160845, "learning_rate": 2e-05, "loss": 5.6088, "step": 3118 }, { "epoch": 0.10460300159302423, "grad_norm": 0.4065256509762132, "learning_rate": 2e-05, "loss": 5.4575, "step": 3119 }, { "epoch": 0.10463653894525027, "grad_norm": 0.4232470548187559, "learning_rate": 2e-05, "loss": 5.6636, "step": 3120 }, { "epoch": 0.10467007629747631, "grad_norm": 0.42938874989672404, "learning_rate": 2e-05, "loss": 5.5102, "step": 3121 }, { "epoch": 0.10470361364970236, "grad_norm": 0.42485757147413233, "learning_rate": 2e-05, "loss": 5.6356, "step": 3122 }, { "epoch": 0.1047371510019284, "grad_norm": 0.4269847814443841, "learning_rate": 2e-05, "loss": 5.3646, "step": 3123 }, { "epoch": 0.10477068835415444, "grad_norm": 0.431584355993592, "learning_rate": 2e-05, "loss": 5.6061, "step": 3124 }, { "epoch": 0.10480422570638048, "grad_norm": 0.4278794765561144, "learning_rate": 2e-05, "loss": 5.3994, "step": 3125 }, { "epoch": 0.10483776305860652, "grad_norm": 0.41799564376480364, "learning_rate": 2e-05, "loss": 5.6826, "step": 3126 }, { "epoch": 0.10487130041083256, "grad_norm": 0.41690272913000886, "learning_rate": 2e-05, "loss": 5.5357, "step": 3127 }, { "epoch": 0.1049048377630586, "grad_norm": 0.40825771983067644, "learning_rate": 2e-05, "loss": 5.4633, "step": 3128 }, { "epoch": 0.10493837511528464, "grad_norm": 0.42406965014703407, "learning_rate": 2e-05, "loss": 5.4146, "step": 3129 }, { "epoch": 0.10497191246751068, "grad_norm": 0.4086722066339692, "learning_rate": 2e-05, "loss": 5.5127, "step": 3130 }, { "epoch": 0.10500544981973674, "grad_norm": 0.4716743173416863, "learning_rate": 2e-05, "loss": 5.6213, "step": 3131 }, { "epoch": 0.10503898717196278, "grad_norm": 0.4133523554372107, "learning_rate": 2e-05, "loss": 5.4826, "step": 3132 }, { "epoch": 0.10507252452418882, "grad_norm": 0.4330549877841243, "learning_rate": 2e-05, "loss": 5.5062, "step": 3133 }, { "epoch": 0.10510606187641486, "grad_norm": 0.4389602832104134, "learning_rate": 2e-05, "loss": 5.4774, "step": 3134 }, { "epoch": 0.1051395992286409, "grad_norm": 0.4791145010336078, "learning_rate": 2e-05, "loss": 5.6864, "step": 3135 }, { "epoch": 0.10517313658086694, "grad_norm": 0.42445174288585535, "learning_rate": 2e-05, "loss": 5.5036, "step": 3136 }, { "epoch": 0.10520667393309298, "grad_norm": 0.4186661874976387, "learning_rate": 2e-05, "loss": 5.3857, "step": 3137 }, { "epoch": 0.10524021128531902, "grad_norm": 0.4022897964564163, "learning_rate": 2e-05, "loss": 5.5257, "step": 3138 }, { "epoch": 0.10527374863754507, "grad_norm": 0.4433180496835971, "learning_rate": 2e-05, "loss": 5.4283, "step": 3139 }, { "epoch": 0.10530728598977111, "grad_norm": 0.4172141063191252, "learning_rate": 2e-05, "loss": 5.4468, "step": 3140 }, { "epoch": 0.10534082334199715, "grad_norm": 0.4105813107055401, "learning_rate": 2e-05, "loss": 5.7134, "step": 3141 }, { "epoch": 0.1053743606942232, "grad_norm": 0.42681877690670994, "learning_rate": 2e-05, "loss": 5.5397, "step": 3142 }, { "epoch": 0.10540789804644923, "grad_norm": 0.49492730172127164, "learning_rate": 2e-05, "loss": 5.5945, "step": 3143 }, { "epoch": 0.10544143539867527, "grad_norm": 0.4280671407751488, "learning_rate": 2e-05, "loss": 5.5158, "step": 3144 }, { "epoch": 0.10547497275090131, "grad_norm": 0.40581272845542166, "learning_rate": 2e-05, "loss": 5.6597, "step": 3145 }, { "epoch": 0.10550851010312735, "grad_norm": 0.40163709458325647, "learning_rate": 2e-05, "loss": 5.4606, "step": 3146 }, { "epoch": 0.1055420474553534, "grad_norm": 0.4030914759749655, "learning_rate": 2e-05, "loss": 5.6092, "step": 3147 }, { "epoch": 0.10557558480757945, "grad_norm": 0.4620636537355924, "learning_rate": 2e-05, "loss": 5.7085, "step": 3148 }, { "epoch": 0.10560912215980549, "grad_norm": 0.43349543733977913, "learning_rate": 2e-05, "loss": 5.6772, "step": 3149 }, { "epoch": 0.10564265951203153, "grad_norm": 0.4061328392947904, "learning_rate": 2e-05, "loss": 5.6506, "step": 3150 }, { "epoch": 0.10567619686425757, "grad_norm": 0.4413893109089666, "learning_rate": 2e-05, "loss": 5.7081, "step": 3151 }, { "epoch": 0.10570973421648361, "grad_norm": 0.4463349670000757, "learning_rate": 2e-05, "loss": 5.4997, "step": 3152 }, { "epoch": 0.10574327156870965, "grad_norm": 0.4228116978701917, "learning_rate": 2e-05, "loss": 5.5454, "step": 3153 }, { "epoch": 0.10577680892093569, "grad_norm": 0.45280532114164673, "learning_rate": 2e-05, "loss": 5.4525, "step": 3154 }, { "epoch": 0.10581034627316173, "grad_norm": 0.45380734737481615, "learning_rate": 2e-05, "loss": 5.5501, "step": 3155 }, { "epoch": 0.10584388362538777, "grad_norm": 0.4353810693309805, "learning_rate": 2e-05, "loss": 5.6179, "step": 3156 }, { "epoch": 0.10587742097761382, "grad_norm": 0.40218327828194644, "learning_rate": 2e-05, "loss": 5.6572, "step": 3157 }, { "epoch": 0.10591095832983986, "grad_norm": 0.42417529618481314, "learning_rate": 2e-05, "loss": 5.596, "step": 3158 }, { "epoch": 0.1059444956820659, "grad_norm": 0.4456492093843934, "learning_rate": 2e-05, "loss": 5.7101, "step": 3159 }, { "epoch": 0.10597803303429194, "grad_norm": 0.414125701256687, "learning_rate": 2e-05, "loss": 5.6106, "step": 3160 }, { "epoch": 0.10601157038651798, "grad_norm": 0.39923908394490587, "learning_rate": 2e-05, "loss": 5.5647, "step": 3161 }, { "epoch": 0.10604510773874402, "grad_norm": 0.40090925366692226, "learning_rate": 2e-05, "loss": 5.5227, "step": 3162 }, { "epoch": 0.10607864509097006, "grad_norm": 0.40508447617894655, "learning_rate": 2e-05, "loss": 5.4429, "step": 3163 }, { "epoch": 0.1061121824431961, "grad_norm": 0.4553242153263736, "learning_rate": 2e-05, "loss": 5.5171, "step": 3164 }, { "epoch": 0.10614571979542216, "grad_norm": 0.40587575898434486, "learning_rate": 2e-05, "loss": 5.7335, "step": 3165 }, { "epoch": 0.1061792571476482, "grad_norm": 0.41052869681374315, "learning_rate": 2e-05, "loss": 5.5624, "step": 3166 }, { "epoch": 0.10621279449987424, "grad_norm": 0.40412070337611256, "learning_rate": 2e-05, "loss": 5.3814, "step": 3167 }, { "epoch": 0.10624633185210028, "grad_norm": 0.4262324069867121, "learning_rate": 2e-05, "loss": 5.5892, "step": 3168 }, { "epoch": 0.10627986920432632, "grad_norm": 0.4100954429566604, "learning_rate": 2e-05, "loss": 5.6981, "step": 3169 }, { "epoch": 0.10631340655655236, "grad_norm": 0.4424045739614947, "learning_rate": 2e-05, "loss": 5.6365, "step": 3170 }, { "epoch": 0.1063469439087784, "grad_norm": 0.4131534559340238, "learning_rate": 2e-05, "loss": 5.4332, "step": 3171 }, { "epoch": 0.10638048126100444, "grad_norm": 0.4162629861195975, "learning_rate": 2e-05, "loss": 5.2409, "step": 3172 }, { "epoch": 0.10641401861323048, "grad_norm": 0.3928934510858688, "learning_rate": 2e-05, "loss": 5.5046, "step": 3173 }, { "epoch": 0.10644755596545653, "grad_norm": 0.46613499477008785, "learning_rate": 2e-05, "loss": 5.7975, "step": 3174 }, { "epoch": 0.10648109331768257, "grad_norm": 0.4289052525767259, "learning_rate": 2e-05, "loss": 5.7096, "step": 3175 }, { "epoch": 0.10651463066990861, "grad_norm": 0.4354995126996069, "learning_rate": 2e-05, "loss": 5.3684, "step": 3176 }, { "epoch": 0.10654816802213465, "grad_norm": 0.42743272532974613, "learning_rate": 2e-05, "loss": 5.5095, "step": 3177 }, { "epoch": 0.1065817053743607, "grad_norm": 0.4248758258097136, "learning_rate": 2e-05, "loss": 5.5496, "step": 3178 }, { "epoch": 0.10661524272658673, "grad_norm": 0.3997497624544976, "learning_rate": 2e-05, "loss": 5.381, "step": 3179 }, { "epoch": 0.10664878007881277, "grad_norm": 0.43365416726644607, "learning_rate": 2e-05, "loss": 5.5471, "step": 3180 }, { "epoch": 0.10668231743103881, "grad_norm": 0.43071654273193416, "learning_rate": 2e-05, "loss": 5.4415, "step": 3181 }, { "epoch": 0.10671585478326485, "grad_norm": 0.4142846939862543, "learning_rate": 2e-05, "loss": 5.5746, "step": 3182 }, { "epoch": 0.10674939213549091, "grad_norm": 0.42318347925881605, "learning_rate": 2e-05, "loss": 5.501, "step": 3183 }, { "epoch": 0.10678292948771695, "grad_norm": 0.4226021238269001, "learning_rate": 2e-05, "loss": 5.3627, "step": 3184 }, { "epoch": 0.10681646683994299, "grad_norm": 0.43013694597509344, "learning_rate": 2e-05, "loss": 5.6009, "step": 3185 }, { "epoch": 0.10685000419216903, "grad_norm": 0.4748417988656866, "learning_rate": 2e-05, "loss": 5.3557, "step": 3186 }, { "epoch": 0.10688354154439507, "grad_norm": 0.41559554131861004, "learning_rate": 2e-05, "loss": 5.7381, "step": 3187 }, { "epoch": 0.10691707889662111, "grad_norm": 0.4269941590205203, "learning_rate": 2e-05, "loss": 5.6577, "step": 3188 }, { "epoch": 0.10695061624884715, "grad_norm": 0.4240120309214736, "learning_rate": 2e-05, "loss": 5.7192, "step": 3189 }, { "epoch": 0.10698415360107319, "grad_norm": 0.41540685645043807, "learning_rate": 2e-05, "loss": 5.4322, "step": 3190 }, { "epoch": 0.10701769095329924, "grad_norm": 0.432029219020355, "learning_rate": 2e-05, "loss": 5.3806, "step": 3191 }, { "epoch": 0.10705122830552528, "grad_norm": 0.39234787971091756, "learning_rate": 2e-05, "loss": 5.5227, "step": 3192 }, { "epoch": 0.10708476565775132, "grad_norm": 0.41340342698281657, "learning_rate": 2e-05, "loss": 5.5617, "step": 3193 }, { "epoch": 0.10711830300997736, "grad_norm": 0.43804114144560935, "learning_rate": 2e-05, "loss": 5.5448, "step": 3194 }, { "epoch": 0.1071518403622034, "grad_norm": 0.4332745627401869, "learning_rate": 2e-05, "loss": 5.451, "step": 3195 }, { "epoch": 0.10718537771442944, "grad_norm": 0.42985686785536964, "learning_rate": 2e-05, "loss": 5.291, "step": 3196 }, { "epoch": 0.10721891506665548, "grad_norm": 0.43594262451389165, "learning_rate": 2e-05, "loss": 5.6604, "step": 3197 }, { "epoch": 0.10725245241888152, "grad_norm": 0.41325790811243646, "learning_rate": 2e-05, "loss": 5.7805, "step": 3198 }, { "epoch": 0.10728598977110756, "grad_norm": 0.41403368783103317, "learning_rate": 2e-05, "loss": 5.5984, "step": 3199 }, { "epoch": 0.10731952712333362, "grad_norm": 0.41467304186960635, "learning_rate": 2e-05, "loss": 5.5247, "step": 3200 }, { "epoch": 0.10735306447555966, "grad_norm": 0.47990236280613063, "learning_rate": 2e-05, "loss": 5.2462, "step": 3201 }, { "epoch": 0.1073866018277857, "grad_norm": 0.40226292049848306, "learning_rate": 2e-05, "loss": 5.5306, "step": 3202 }, { "epoch": 0.10742013918001174, "grad_norm": 0.47474446537209075, "learning_rate": 2e-05, "loss": 5.5727, "step": 3203 }, { "epoch": 0.10745367653223778, "grad_norm": 0.45915463980400384, "learning_rate": 2e-05, "loss": 5.4801, "step": 3204 }, { "epoch": 0.10748721388446382, "grad_norm": 0.3914721092474711, "learning_rate": 2e-05, "loss": 5.5809, "step": 3205 }, { "epoch": 0.10752075123668986, "grad_norm": 0.43979266190867916, "learning_rate": 2e-05, "loss": 5.446, "step": 3206 }, { "epoch": 0.1075542885889159, "grad_norm": 0.45266430287026266, "learning_rate": 2e-05, "loss": 5.4848, "step": 3207 }, { "epoch": 0.10758782594114194, "grad_norm": 0.44941028774190156, "learning_rate": 2e-05, "loss": 5.3037, "step": 3208 }, { "epoch": 0.107621363293368, "grad_norm": 0.42860660089732167, "learning_rate": 2e-05, "loss": 5.5158, "step": 3209 }, { "epoch": 0.10765490064559403, "grad_norm": 0.44788803098694235, "learning_rate": 2e-05, "loss": 5.6166, "step": 3210 }, { "epoch": 0.10768843799782007, "grad_norm": 0.4079066792315779, "learning_rate": 2e-05, "loss": 5.5727, "step": 3211 }, { "epoch": 0.10772197535004611, "grad_norm": 0.4510907315233631, "learning_rate": 2e-05, "loss": 5.5122, "step": 3212 }, { "epoch": 0.10775551270227215, "grad_norm": 0.4286984490258473, "learning_rate": 2e-05, "loss": 5.5237, "step": 3213 }, { "epoch": 0.1077890500544982, "grad_norm": 0.4099965260482784, "learning_rate": 2e-05, "loss": 5.4721, "step": 3214 }, { "epoch": 0.10782258740672424, "grad_norm": 0.4170814107981649, "learning_rate": 2e-05, "loss": 5.4597, "step": 3215 }, { "epoch": 0.10785612475895028, "grad_norm": 0.42932975210762747, "learning_rate": 2e-05, "loss": 5.5055, "step": 3216 }, { "epoch": 0.10788966211117633, "grad_norm": 0.43951854090712317, "learning_rate": 2e-05, "loss": 5.4558, "step": 3217 }, { "epoch": 0.10792319946340237, "grad_norm": 0.4113527436186918, "learning_rate": 2e-05, "loss": 5.5059, "step": 3218 }, { "epoch": 0.10795673681562841, "grad_norm": 0.4144263530729359, "learning_rate": 2e-05, "loss": 5.5079, "step": 3219 }, { "epoch": 0.10799027416785445, "grad_norm": 0.4041663711670862, "learning_rate": 2e-05, "loss": 5.7881, "step": 3220 }, { "epoch": 0.10802381152008049, "grad_norm": 0.39520737120688126, "learning_rate": 2e-05, "loss": 5.556, "step": 3221 }, { "epoch": 0.10805734887230653, "grad_norm": 0.42043214448922583, "learning_rate": 2e-05, "loss": 5.6853, "step": 3222 }, { "epoch": 0.10809088622453257, "grad_norm": 0.4187329179869815, "learning_rate": 2e-05, "loss": 5.6588, "step": 3223 }, { "epoch": 0.10812442357675861, "grad_norm": 0.4430453204044645, "learning_rate": 2e-05, "loss": 5.5326, "step": 3224 }, { "epoch": 0.10815796092898465, "grad_norm": 0.40986310828984246, "learning_rate": 2e-05, "loss": 5.6205, "step": 3225 }, { "epoch": 0.1081914982812107, "grad_norm": 0.4343317187347808, "learning_rate": 2e-05, "loss": 5.5122, "step": 3226 }, { "epoch": 0.10822503563343674, "grad_norm": 0.48191029122408996, "learning_rate": 2e-05, "loss": 5.499, "step": 3227 }, { "epoch": 0.10825857298566278, "grad_norm": 0.4522450097424964, "learning_rate": 2e-05, "loss": 5.5902, "step": 3228 }, { "epoch": 0.10829211033788883, "grad_norm": 0.4572535835247387, "learning_rate": 2e-05, "loss": 5.4355, "step": 3229 }, { "epoch": 0.10832564769011487, "grad_norm": 0.45678145528314024, "learning_rate": 2e-05, "loss": 5.5126, "step": 3230 }, { "epoch": 0.1083591850423409, "grad_norm": 0.41536216504513995, "learning_rate": 2e-05, "loss": 5.775, "step": 3231 }, { "epoch": 0.10839272239456695, "grad_norm": 0.4130609803576487, "learning_rate": 2e-05, "loss": 5.6603, "step": 3232 }, { "epoch": 0.10842625974679299, "grad_norm": 0.4322272788822428, "learning_rate": 2e-05, "loss": 5.5748, "step": 3233 }, { "epoch": 0.10845979709901903, "grad_norm": 0.43013097978143283, "learning_rate": 2e-05, "loss": 5.76, "step": 3234 }, { "epoch": 0.10849333445124508, "grad_norm": 0.407925031325965, "learning_rate": 2e-05, "loss": 5.5982, "step": 3235 }, { "epoch": 0.10852687180347112, "grad_norm": 0.48305081383353154, "learning_rate": 2e-05, "loss": 5.5229, "step": 3236 }, { "epoch": 0.10856040915569716, "grad_norm": 0.42877993509793316, "learning_rate": 2e-05, "loss": 5.6669, "step": 3237 }, { "epoch": 0.1085939465079232, "grad_norm": 0.4134547037001412, "learning_rate": 2e-05, "loss": 5.5545, "step": 3238 }, { "epoch": 0.10862748386014924, "grad_norm": 0.4221670362680017, "learning_rate": 2e-05, "loss": 5.5031, "step": 3239 }, { "epoch": 0.10866102121237528, "grad_norm": 0.4386040263172781, "learning_rate": 2e-05, "loss": 5.4602, "step": 3240 }, { "epoch": 0.10869455856460132, "grad_norm": 0.43525250054804676, "learning_rate": 2e-05, "loss": 5.623, "step": 3241 }, { "epoch": 0.10872809591682736, "grad_norm": 0.4040218801792485, "learning_rate": 2e-05, "loss": 5.4356, "step": 3242 }, { "epoch": 0.10876163326905341, "grad_norm": 0.4066511639779633, "learning_rate": 2e-05, "loss": 5.7213, "step": 3243 }, { "epoch": 0.10879517062127946, "grad_norm": 0.4278043138791191, "learning_rate": 2e-05, "loss": 5.6013, "step": 3244 }, { "epoch": 0.1088287079735055, "grad_norm": 0.41787334218411454, "learning_rate": 2e-05, "loss": 5.442, "step": 3245 }, { "epoch": 0.10886224532573154, "grad_norm": 0.39684524921755526, "learning_rate": 2e-05, "loss": 5.4494, "step": 3246 }, { "epoch": 0.10889578267795758, "grad_norm": 0.40872910446471095, "learning_rate": 2e-05, "loss": 5.4777, "step": 3247 }, { "epoch": 0.10892932003018362, "grad_norm": 0.45832063738579965, "learning_rate": 2e-05, "loss": 5.5509, "step": 3248 }, { "epoch": 0.10896285738240966, "grad_norm": 0.40058540035908696, "learning_rate": 2e-05, "loss": 5.7076, "step": 3249 }, { "epoch": 0.1089963947346357, "grad_norm": 0.4308082924284884, "learning_rate": 2e-05, "loss": 5.4801, "step": 3250 }, { "epoch": 0.10902993208686174, "grad_norm": 0.4587368693506499, "learning_rate": 2e-05, "loss": 5.3844, "step": 3251 }, { "epoch": 0.10906346943908779, "grad_norm": 0.43657317552678254, "learning_rate": 2e-05, "loss": 5.4284, "step": 3252 }, { "epoch": 0.10909700679131383, "grad_norm": 0.4234259170032169, "learning_rate": 2e-05, "loss": 5.7689, "step": 3253 }, { "epoch": 0.10913054414353987, "grad_norm": 0.45609351781237656, "learning_rate": 2e-05, "loss": 5.3837, "step": 3254 }, { "epoch": 0.10916408149576591, "grad_norm": 0.4656546067819773, "learning_rate": 2e-05, "loss": 5.5934, "step": 3255 }, { "epoch": 0.10919761884799195, "grad_norm": 0.41473718831268086, "learning_rate": 2e-05, "loss": 5.5443, "step": 3256 }, { "epoch": 0.10923115620021799, "grad_norm": 0.4763581232546926, "learning_rate": 2e-05, "loss": 5.544, "step": 3257 }, { "epoch": 0.10926469355244403, "grad_norm": 0.44301351313967835, "learning_rate": 2e-05, "loss": 5.4965, "step": 3258 }, { "epoch": 0.10929823090467007, "grad_norm": 0.4312022644349061, "learning_rate": 2e-05, "loss": 5.756, "step": 3259 }, { "epoch": 0.10933176825689611, "grad_norm": 0.44334112461790326, "learning_rate": 2e-05, "loss": 5.7297, "step": 3260 }, { "epoch": 0.10936530560912217, "grad_norm": 0.43426266493267873, "learning_rate": 2e-05, "loss": 5.5805, "step": 3261 }, { "epoch": 0.1093988429613482, "grad_norm": 0.4195511347318647, "learning_rate": 2e-05, "loss": 5.271, "step": 3262 }, { "epoch": 0.10943238031357425, "grad_norm": 0.41567494959972306, "learning_rate": 2e-05, "loss": 5.715, "step": 3263 }, { "epoch": 0.10946591766580029, "grad_norm": 0.46113260070372886, "learning_rate": 2e-05, "loss": 5.6216, "step": 3264 }, { "epoch": 0.10949945501802633, "grad_norm": 0.4163689387486488, "learning_rate": 2e-05, "loss": 5.6967, "step": 3265 }, { "epoch": 0.10953299237025237, "grad_norm": 0.40740932731601015, "learning_rate": 2e-05, "loss": 5.7919, "step": 3266 }, { "epoch": 0.1095665297224784, "grad_norm": 0.4213614499780932, "learning_rate": 2e-05, "loss": 5.671, "step": 3267 }, { "epoch": 0.10960006707470445, "grad_norm": 0.4065874843041595, "learning_rate": 2e-05, "loss": 5.4833, "step": 3268 }, { "epoch": 0.1096336044269305, "grad_norm": 0.43327073629881846, "learning_rate": 2e-05, "loss": 5.5194, "step": 3269 }, { "epoch": 0.10966714177915654, "grad_norm": 0.42548116316908935, "learning_rate": 2e-05, "loss": 5.3797, "step": 3270 }, { "epoch": 0.10970067913138258, "grad_norm": 0.4534425431577649, "learning_rate": 2e-05, "loss": 5.6704, "step": 3271 }, { "epoch": 0.10973421648360862, "grad_norm": 0.4191462373665532, "learning_rate": 2e-05, "loss": 5.4087, "step": 3272 }, { "epoch": 0.10976775383583466, "grad_norm": 0.43348972756839643, "learning_rate": 2e-05, "loss": 5.6712, "step": 3273 }, { "epoch": 0.1098012911880607, "grad_norm": 0.43121293895720375, "learning_rate": 2e-05, "loss": 5.4679, "step": 3274 }, { "epoch": 0.10983482854028674, "grad_norm": 0.40608045403192955, "learning_rate": 2e-05, "loss": 5.5437, "step": 3275 }, { "epoch": 0.10986836589251278, "grad_norm": 0.39227369347367874, "learning_rate": 2e-05, "loss": 5.5287, "step": 3276 }, { "epoch": 0.10990190324473882, "grad_norm": 0.4487157609584122, "learning_rate": 2e-05, "loss": 5.5259, "step": 3277 }, { "epoch": 0.10993544059696488, "grad_norm": 0.4493119798376067, "learning_rate": 2e-05, "loss": 5.4288, "step": 3278 }, { "epoch": 0.10996897794919092, "grad_norm": 0.4242528643955885, "learning_rate": 2e-05, "loss": 5.4163, "step": 3279 }, { "epoch": 0.11000251530141696, "grad_norm": 0.4472685755109637, "learning_rate": 2e-05, "loss": 5.5793, "step": 3280 }, { "epoch": 0.110036052653643, "grad_norm": 0.45093265417985967, "learning_rate": 2e-05, "loss": 5.529, "step": 3281 }, { "epoch": 0.11006959000586904, "grad_norm": 0.44450035814514693, "learning_rate": 2e-05, "loss": 5.6836, "step": 3282 }, { "epoch": 0.11010312735809508, "grad_norm": 0.4069190745800136, "learning_rate": 2e-05, "loss": 5.5947, "step": 3283 }, { "epoch": 0.11013666471032112, "grad_norm": 0.442159759891871, "learning_rate": 2e-05, "loss": 5.4627, "step": 3284 }, { "epoch": 0.11017020206254716, "grad_norm": 0.3891044893834869, "learning_rate": 2e-05, "loss": 5.4218, "step": 3285 }, { "epoch": 0.1102037394147732, "grad_norm": 0.40066438866714327, "learning_rate": 2e-05, "loss": 5.7518, "step": 3286 }, { "epoch": 0.11023727676699925, "grad_norm": 0.40908241094547554, "learning_rate": 2e-05, "loss": 5.4466, "step": 3287 }, { "epoch": 0.11027081411922529, "grad_norm": 0.3930196653295439, "learning_rate": 2e-05, "loss": 5.5545, "step": 3288 }, { "epoch": 0.11030435147145133, "grad_norm": 0.4398954737967832, "learning_rate": 2e-05, "loss": 5.6664, "step": 3289 }, { "epoch": 0.11033788882367737, "grad_norm": 0.41018171494109035, "learning_rate": 2e-05, "loss": 5.5136, "step": 3290 }, { "epoch": 0.11037142617590341, "grad_norm": 0.41376114162923533, "learning_rate": 2e-05, "loss": 5.4697, "step": 3291 }, { "epoch": 0.11040496352812945, "grad_norm": 0.40226604935099247, "learning_rate": 2e-05, "loss": 5.3169, "step": 3292 }, { "epoch": 0.11043850088035549, "grad_norm": 0.41783916203893695, "learning_rate": 2e-05, "loss": 5.721, "step": 3293 }, { "epoch": 0.11047203823258153, "grad_norm": 0.41971865095889715, "learning_rate": 2e-05, "loss": 5.5982, "step": 3294 }, { "epoch": 0.11050557558480759, "grad_norm": 0.4044580234647171, "learning_rate": 2e-05, "loss": 5.3213, "step": 3295 }, { "epoch": 0.11053911293703363, "grad_norm": 0.4222237020640758, "learning_rate": 2e-05, "loss": 5.4057, "step": 3296 }, { "epoch": 0.11057265028925967, "grad_norm": 0.4233992615817838, "learning_rate": 2e-05, "loss": 5.651, "step": 3297 }, { "epoch": 0.1106061876414857, "grad_norm": 0.4325173344273998, "learning_rate": 2e-05, "loss": 5.4458, "step": 3298 }, { "epoch": 0.11063972499371175, "grad_norm": 0.4028907735198783, "learning_rate": 2e-05, "loss": 5.6615, "step": 3299 }, { "epoch": 0.11067326234593779, "grad_norm": 0.4038920911597368, "learning_rate": 2e-05, "loss": 5.3397, "step": 3300 }, { "epoch": 0.11070679969816383, "grad_norm": 0.430696644967362, "learning_rate": 2e-05, "loss": 5.2555, "step": 3301 }, { "epoch": 0.11074033705038987, "grad_norm": 0.4180163429379921, "learning_rate": 2e-05, "loss": 5.629, "step": 3302 }, { "epoch": 0.11077387440261591, "grad_norm": 0.4077502923706047, "learning_rate": 2e-05, "loss": 5.4278, "step": 3303 }, { "epoch": 0.11080741175484196, "grad_norm": 0.38862647210084184, "learning_rate": 2e-05, "loss": 5.3407, "step": 3304 }, { "epoch": 0.110840949107068, "grad_norm": 0.4444902653268423, "learning_rate": 2e-05, "loss": 5.4247, "step": 3305 }, { "epoch": 0.11087448645929404, "grad_norm": 0.42829814840256386, "learning_rate": 2e-05, "loss": 5.6188, "step": 3306 }, { "epoch": 0.11090802381152008, "grad_norm": 0.40932159726267014, "learning_rate": 2e-05, "loss": 5.5017, "step": 3307 }, { "epoch": 0.11094156116374612, "grad_norm": 0.4268558995135862, "learning_rate": 2e-05, "loss": 5.5871, "step": 3308 }, { "epoch": 0.11097509851597216, "grad_norm": 0.42676029519366265, "learning_rate": 2e-05, "loss": 5.5624, "step": 3309 }, { "epoch": 0.1110086358681982, "grad_norm": 0.39991398125932853, "learning_rate": 2e-05, "loss": 5.7819, "step": 3310 }, { "epoch": 0.11104217322042424, "grad_norm": 0.42289742240131356, "learning_rate": 2e-05, "loss": 5.4727, "step": 3311 }, { "epoch": 0.11107571057265028, "grad_norm": 0.4078511844823741, "learning_rate": 2e-05, "loss": 5.5999, "step": 3312 }, { "epoch": 0.11110924792487634, "grad_norm": 0.40849502063560034, "learning_rate": 2e-05, "loss": 5.1557, "step": 3313 }, { "epoch": 0.11114278527710238, "grad_norm": 0.433751619096518, "learning_rate": 2e-05, "loss": 5.6849, "step": 3314 }, { "epoch": 0.11117632262932842, "grad_norm": 0.4252649692956025, "learning_rate": 2e-05, "loss": 5.418, "step": 3315 }, { "epoch": 0.11120985998155446, "grad_norm": 0.407040712994573, "learning_rate": 2e-05, "loss": 5.5926, "step": 3316 }, { "epoch": 0.1112433973337805, "grad_norm": 0.40596414631629735, "learning_rate": 2e-05, "loss": 5.5385, "step": 3317 }, { "epoch": 0.11127693468600654, "grad_norm": 0.4065700187019131, "learning_rate": 2e-05, "loss": 5.8337, "step": 3318 }, { "epoch": 0.11131047203823258, "grad_norm": 0.4040047744218961, "learning_rate": 2e-05, "loss": 5.7203, "step": 3319 }, { "epoch": 0.11134400939045862, "grad_norm": 0.41834330014191345, "learning_rate": 2e-05, "loss": 5.574, "step": 3320 }, { "epoch": 0.11137754674268467, "grad_norm": 0.39937973008164407, "learning_rate": 2e-05, "loss": 5.5802, "step": 3321 }, { "epoch": 0.11141108409491071, "grad_norm": 0.41112857434831007, "learning_rate": 2e-05, "loss": 5.53, "step": 3322 }, { "epoch": 0.11144462144713675, "grad_norm": 0.4656545936242385, "learning_rate": 2e-05, "loss": 5.5252, "step": 3323 }, { "epoch": 0.11147815879936279, "grad_norm": 0.4190101488767612, "learning_rate": 2e-05, "loss": 5.4544, "step": 3324 }, { "epoch": 0.11151169615158883, "grad_norm": 0.4261191224188317, "learning_rate": 2e-05, "loss": 5.6317, "step": 3325 }, { "epoch": 0.11154523350381487, "grad_norm": 0.4530182235220074, "learning_rate": 2e-05, "loss": 5.5035, "step": 3326 }, { "epoch": 0.11157877085604091, "grad_norm": 0.4414761530191986, "learning_rate": 2e-05, "loss": 5.559, "step": 3327 }, { "epoch": 0.11161230820826695, "grad_norm": 0.4177435266098148, "learning_rate": 2e-05, "loss": 5.5599, "step": 3328 }, { "epoch": 0.11164584556049299, "grad_norm": 0.4321218203679404, "learning_rate": 2e-05, "loss": 5.5918, "step": 3329 }, { "epoch": 0.11167938291271905, "grad_norm": 0.4304198459903621, "learning_rate": 2e-05, "loss": 5.5661, "step": 3330 }, { "epoch": 0.11171292026494509, "grad_norm": 0.4217091886701945, "learning_rate": 2e-05, "loss": 5.6487, "step": 3331 }, { "epoch": 0.11174645761717113, "grad_norm": 0.43358070265007104, "learning_rate": 2e-05, "loss": 5.5549, "step": 3332 }, { "epoch": 0.11177999496939717, "grad_norm": 0.3951075944417031, "learning_rate": 2e-05, "loss": 5.6084, "step": 3333 }, { "epoch": 0.11181353232162321, "grad_norm": 0.44109501832966974, "learning_rate": 2e-05, "loss": 5.4669, "step": 3334 }, { "epoch": 0.11184706967384925, "grad_norm": 0.4462176466746123, "learning_rate": 2e-05, "loss": 5.4713, "step": 3335 }, { "epoch": 0.11188060702607529, "grad_norm": 0.44080437045656357, "learning_rate": 2e-05, "loss": 5.6923, "step": 3336 }, { "epoch": 0.11191414437830133, "grad_norm": 0.45238504157863507, "learning_rate": 2e-05, "loss": 5.8421, "step": 3337 }, { "epoch": 0.11194768173052737, "grad_norm": 0.4855121999252082, "learning_rate": 2e-05, "loss": 5.5828, "step": 3338 }, { "epoch": 0.11198121908275342, "grad_norm": 0.4309017717385961, "learning_rate": 2e-05, "loss": 5.7084, "step": 3339 }, { "epoch": 0.11201475643497946, "grad_norm": 0.42867883894016745, "learning_rate": 2e-05, "loss": 5.4487, "step": 3340 }, { "epoch": 0.1120482937872055, "grad_norm": 0.42263114422767306, "learning_rate": 2e-05, "loss": 5.4959, "step": 3341 }, { "epoch": 0.11208183113943154, "grad_norm": 0.4448465794828251, "learning_rate": 2e-05, "loss": 5.3561, "step": 3342 }, { "epoch": 0.11211536849165758, "grad_norm": 0.43094851431154874, "learning_rate": 2e-05, "loss": 5.5047, "step": 3343 }, { "epoch": 0.11214890584388362, "grad_norm": 0.45112366962306316, "learning_rate": 2e-05, "loss": 5.582, "step": 3344 }, { "epoch": 0.11218244319610966, "grad_norm": 0.5035322510550401, "learning_rate": 2e-05, "loss": 5.4837, "step": 3345 }, { "epoch": 0.1122159805483357, "grad_norm": 0.3864928582410652, "learning_rate": 2e-05, "loss": 5.723, "step": 3346 }, { "epoch": 0.11224951790056176, "grad_norm": 0.45172221571106363, "learning_rate": 2e-05, "loss": 5.6629, "step": 3347 }, { "epoch": 0.1122830552527878, "grad_norm": 0.45374205732291323, "learning_rate": 2e-05, "loss": 5.5738, "step": 3348 }, { "epoch": 0.11231659260501384, "grad_norm": 0.4074195131602524, "learning_rate": 2e-05, "loss": 5.6558, "step": 3349 }, { "epoch": 0.11235012995723988, "grad_norm": 0.421341627915543, "learning_rate": 2e-05, "loss": 5.6526, "step": 3350 }, { "epoch": 0.11238366730946592, "grad_norm": 0.44197635524917517, "learning_rate": 2e-05, "loss": 5.7654, "step": 3351 }, { "epoch": 0.11241720466169196, "grad_norm": 0.4973813586063831, "learning_rate": 2e-05, "loss": 5.4254, "step": 3352 }, { "epoch": 0.112450742013918, "grad_norm": 0.3957056008273629, "learning_rate": 2e-05, "loss": 5.6605, "step": 3353 }, { "epoch": 0.11248427936614404, "grad_norm": 0.46040579166871715, "learning_rate": 2e-05, "loss": 5.2974, "step": 3354 }, { "epoch": 0.11251781671837008, "grad_norm": 0.45105461130801766, "learning_rate": 2e-05, "loss": 5.6025, "step": 3355 }, { "epoch": 0.11255135407059613, "grad_norm": 0.41860407323144705, "learning_rate": 2e-05, "loss": 5.4829, "step": 3356 }, { "epoch": 0.11258489142282217, "grad_norm": 0.4334269881479618, "learning_rate": 2e-05, "loss": 5.4861, "step": 3357 }, { "epoch": 0.11261842877504821, "grad_norm": 0.43118706241140864, "learning_rate": 2e-05, "loss": 5.584, "step": 3358 }, { "epoch": 0.11265196612727425, "grad_norm": 0.41363658512328494, "learning_rate": 2e-05, "loss": 5.3981, "step": 3359 }, { "epoch": 0.11268550347950029, "grad_norm": 0.4915696687437301, "learning_rate": 2e-05, "loss": 5.6153, "step": 3360 }, { "epoch": 0.11271904083172633, "grad_norm": 0.43060803844378126, "learning_rate": 2e-05, "loss": 5.6365, "step": 3361 }, { "epoch": 0.11275257818395237, "grad_norm": 0.4139316754716466, "learning_rate": 2e-05, "loss": 5.5359, "step": 3362 }, { "epoch": 0.11278611553617841, "grad_norm": 0.4713384144070504, "learning_rate": 2e-05, "loss": 5.8391, "step": 3363 }, { "epoch": 0.11281965288840445, "grad_norm": 0.44195895536694835, "learning_rate": 2e-05, "loss": 5.6213, "step": 3364 }, { "epoch": 0.11285319024063051, "grad_norm": 0.5041737275660038, "learning_rate": 2e-05, "loss": 5.6295, "step": 3365 }, { "epoch": 0.11288672759285655, "grad_norm": 0.45035902881839385, "learning_rate": 2e-05, "loss": 5.6295, "step": 3366 }, { "epoch": 0.11292026494508259, "grad_norm": 0.48283235919989453, "learning_rate": 2e-05, "loss": 5.634, "step": 3367 }, { "epoch": 0.11295380229730863, "grad_norm": 0.4493702767262501, "learning_rate": 2e-05, "loss": 5.4612, "step": 3368 }, { "epoch": 0.11298733964953467, "grad_norm": 0.41421257560439345, "learning_rate": 2e-05, "loss": 5.5414, "step": 3369 }, { "epoch": 0.11302087700176071, "grad_norm": 0.42195364335169155, "learning_rate": 2e-05, "loss": 5.5979, "step": 3370 }, { "epoch": 0.11305441435398675, "grad_norm": 0.5033862825950152, "learning_rate": 2e-05, "loss": 5.6, "step": 3371 }, { "epoch": 0.11308795170621279, "grad_norm": 0.48904799257930226, "learning_rate": 2e-05, "loss": 5.4452, "step": 3372 }, { "epoch": 0.11312148905843884, "grad_norm": 0.43677224008942456, "learning_rate": 2e-05, "loss": 5.5453, "step": 3373 }, { "epoch": 0.11315502641066488, "grad_norm": 0.4748210383907941, "learning_rate": 2e-05, "loss": 5.7035, "step": 3374 }, { "epoch": 0.11318856376289092, "grad_norm": 0.46309336999535944, "learning_rate": 2e-05, "loss": 5.2969, "step": 3375 }, { "epoch": 0.11322210111511696, "grad_norm": 0.4860166512590259, "learning_rate": 2e-05, "loss": 5.5187, "step": 3376 }, { "epoch": 0.113255638467343, "grad_norm": 0.4237558933303141, "learning_rate": 2e-05, "loss": 5.599, "step": 3377 }, { "epoch": 0.11328917581956904, "grad_norm": 0.4182143220626068, "learning_rate": 2e-05, "loss": 5.6194, "step": 3378 }, { "epoch": 0.11332271317179508, "grad_norm": 0.49368234167261227, "learning_rate": 2e-05, "loss": 5.4793, "step": 3379 }, { "epoch": 0.11335625052402112, "grad_norm": 0.42477023522160795, "learning_rate": 2e-05, "loss": 5.656, "step": 3380 }, { "epoch": 0.11338978787624716, "grad_norm": 0.3909898803353868, "learning_rate": 2e-05, "loss": 5.5723, "step": 3381 }, { "epoch": 0.11342332522847322, "grad_norm": 0.45320431551138396, "learning_rate": 2e-05, "loss": 5.4984, "step": 3382 }, { "epoch": 0.11345686258069926, "grad_norm": 0.4493231192608552, "learning_rate": 2e-05, "loss": 5.2465, "step": 3383 }, { "epoch": 0.1134903999329253, "grad_norm": 0.40308747811323187, "learning_rate": 2e-05, "loss": 5.4414, "step": 3384 }, { "epoch": 0.11352393728515134, "grad_norm": 0.39537100009765136, "learning_rate": 2e-05, "loss": 5.678, "step": 3385 }, { "epoch": 0.11355747463737738, "grad_norm": 0.4299633650661535, "learning_rate": 2e-05, "loss": 5.4945, "step": 3386 }, { "epoch": 0.11359101198960342, "grad_norm": 0.4026468485485838, "learning_rate": 2e-05, "loss": 5.4948, "step": 3387 }, { "epoch": 0.11362454934182946, "grad_norm": 0.4442503725037061, "learning_rate": 2e-05, "loss": 5.4156, "step": 3388 }, { "epoch": 0.1136580866940555, "grad_norm": 0.4103506738380732, "learning_rate": 2e-05, "loss": 5.7114, "step": 3389 }, { "epoch": 0.11369162404628154, "grad_norm": 0.41820279160618584, "learning_rate": 2e-05, "loss": 5.6475, "step": 3390 }, { "epoch": 0.1137251613985076, "grad_norm": 0.41190985124397855, "learning_rate": 2e-05, "loss": 5.4099, "step": 3391 }, { "epoch": 0.11375869875073363, "grad_norm": 0.4240378195838521, "learning_rate": 2e-05, "loss": 5.5679, "step": 3392 }, { "epoch": 0.11379223610295967, "grad_norm": 0.4023825957968809, "learning_rate": 2e-05, "loss": 5.6591, "step": 3393 }, { "epoch": 0.11382577345518571, "grad_norm": 0.4165854702052816, "learning_rate": 2e-05, "loss": 5.5736, "step": 3394 }, { "epoch": 0.11385931080741175, "grad_norm": 0.44186542477295043, "learning_rate": 2e-05, "loss": 5.4819, "step": 3395 }, { "epoch": 0.1138928481596378, "grad_norm": 0.44848794073400966, "learning_rate": 2e-05, "loss": 5.6816, "step": 3396 }, { "epoch": 0.11392638551186383, "grad_norm": 0.39183535286720633, "learning_rate": 2e-05, "loss": 5.605, "step": 3397 }, { "epoch": 0.11395992286408987, "grad_norm": 0.4266300229242535, "learning_rate": 2e-05, "loss": 5.4242, "step": 3398 }, { "epoch": 0.11399346021631593, "grad_norm": 0.4556404562510776, "learning_rate": 2e-05, "loss": 5.5829, "step": 3399 }, { "epoch": 0.11402699756854197, "grad_norm": 0.4093409521715705, "learning_rate": 2e-05, "loss": 5.5608, "step": 3400 }, { "epoch": 0.11406053492076801, "grad_norm": 0.42336983754466306, "learning_rate": 2e-05, "loss": 5.6006, "step": 3401 }, { "epoch": 0.11409407227299405, "grad_norm": 0.4391016152593212, "learning_rate": 2e-05, "loss": 5.3454, "step": 3402 }, { "epoch": 0.11412760962522009, "grad_norm": 0.4133371038718865, "learning_rate": 2e-05, "loss": 5.5302, "step": 3403 }, { "epoch": 0.11416114697744613, "grad_norm": 0.43064878667631273, "learning_rate": 2e-05, "loss": 5.44, "step": 3404 }, { "epoch": 0.11419468432967217, "grad_norm": 0.4126842833149389, "learning_rate": 2e-05, "loss": 5.7493, "step": 3405 }, { "epoch": 0.11422822168189821, "grad_norm": 0.41920067487815726, "learning_rate": 2e-05, "loss": 5.5994, "step": 3406 }, { "epoch": 0.11426175903412425, "grad_norm": 0.4084278443407312, "learning_rate": 2e-05, "loss": 5.3743, "step": 3407 }, { "epoch": 0.1142952963863503, "grad_norm": 0.44502498244748007, "learning_rate": 2e-05, "loss": 5.5795, "step": 3408 }, { "epoch": 0.11432883373857634, "grad_norm": 0.39676526998133055, "learning_rate": 2e-05, "loss": 5.5321, "step": 3409 }, { "epoch": 0.11436237109080238, "grad_norm": 0.4067761388530224, "learning_rate": 2e-05, "loss": 5.452, "step": 3410 }, { "epoch": 0.11439590844302842, "grad_norm": 0.4117761526940116, "learning_rate": 2e-05, "loss": 5.5348, "step": 3411 }, { "epoch": 0.11442944579525446, "grad_norm": 0.4080625491697146, "learning_rate": 2e-05, "loss": 5.5414, "step": 3412 }, { "epoch": 0.1144629831474805, "grad_norm": 0.43513933881512884, "learning_rate": 2e-05, "loss": 5.3536, "step": 3413 }, { "epoch": 0.11449652049970654, "grad_norm": 0.39356080616250927, "learning_rate": 2e-05, "loss": 5.4363, "step": 3414 }, { "epoch": 0.11453005785193258, "grad_norm": 0.4452509503512141, "learning_rate": 2e-05, "loss": 5.3542, "step": 3415 }, { "epoch": 0.11456359520415862, "grad_norm": 0.42284296004357524, "learning_rate": 2e-05, "loss": 5.5801, "step": 3416 }, { "epoch": 0.11459713255638468, "grad_norm": 0.4131982345896729, "learning_rate": 2e-05, "loss": 5.6648, "step": 3417 }, { "epoch": 0.11463066990861072, "grad_norm": 0.3996412390068613, "learning_rate": 2e-05, "loss": 5.4722, "step": 3418 }, { "epoch": 0.11466420726083676, "grad_norm": 0.4059865516841846, "learning_rate": 2e-05, "loss": 5.5415, "step": 3419 }, { "epoch": 0.1146977446130628, "grad_norm": 0.4013127577930692, "learning_rate": 2e-05, "loss": 5.4762, "step": 3420 }, { "epoch": 0.11473128196528884, "grad_norm": 0.40495185336498946, "learning_rate": 2e-05, "loss": 5.6155, "step": 3421 }, { "epoch": 0.11476481931751488, "grad_norm": 0.4070119834018203, "learning_rate": 2e-05, "loss": 5.6868, "step": 3422 }, { "epoch": 0.11479835666974092, "grad_norm": 0.44357583640245524, "learning_rate": 2e-05, "loss": 5.4073, "step": 3423 }, { "epoch": 0.11483189402196696, "grad_norm": 0.4462316437759419, "learning_rate": 2e-05, "loss": 5.638, "step": 3424 }, { "epoch": 0.11486543137419301, "grad_norm": 0.42348403261637413, "learning_rate": 2e-05, "loss": 5.4607, "step": 3425 }, { "epoch": 0.11489896872641905, "grad_norm": 0.4131818540008933, "learning_rate": 2e-05, "loss": 5.4869, "step": 3426 }, { "epoch": 0.1149325060786451, "grad_norm": 0.4159324778565505, "learning_rate": 2e-05, "loss": 5.6993, "step": 3427 }, { "epoch": 0.11496604343087113, "grad_norm": 0.41014865638296283, "learning_rate": 2e-05, "loss": 5.5475, "step": 3428 }, { "epoch": 0.11499958078309717, "grad_norm": 0.41641933584836294, "learning_rate": 2e-05, "loss": 5.4992, "step": 3429 }, { "epoch": 0.11503311813532321, "grad_norm": 0.41656529327478153, "learning_rate": 2e-05, "loss": 5.4957, "step": 3430 }, { "epoch": 0.11506665548754925, "grad_norm": 0.44167166817796033, "learning_rate": 2e-05, "loss": 5.5596, "step": 3431 }, { "epoch": 0.1151001928397753, "grad_norm": 0.4140839669957901, "learning_rate": 2e-05, "loss": 5.7373, "step": 3432 }, { "epoch": 0.11513373019200134, "grad_norm": 0.4288953473013376, "learning_rate": 2e-05, "loss": 5.5701, "step": 3433 }, { "epoch": 0.11516726754422739, "grad_norm": 0.4112361616591792, "learning_rate": 2e-05, "loss": 5.712, "step": 3434 }, { "epoch": 0.11520080489645343, "grad_norm": 0.4377097167180035, "learning_rate": 2e-05, "loss": 5.7017, "step": 3435 }, { "epoch": 0.11523434224867947, "grad_norm": 0.4272805358938119, "learning_rate": 2e-05, "loss": 5.7808, "step": 3436 }, { "epoch": 0.11526787960090551, "grad_norm": 0.44682609342624907, "learning_rate": 2e-05, "loss": 5.5675, "step": 3437 }, { "epoch": 0.11530141695313155, "grad_norm": 0.4141086206025129, "learning_rate": 2e-05, "loss": 5.5099, "step": 3438 }, { "epoch": 0.11533495430535759, "grad_norm": 0.3926854075993095, "learning_rate": 2e-05, "loss": 5.6239, "step": 3439 }, { "epoch": 0.11536849165758363, "grad_norm": 0.44758027731165684, "learning_rate": 2e-05, "loss": 5.5572, "step": 3440 }, { "epoch": 0.11540202900980967, "grad_norm": 0.43154194375927263, "learning_rate": 2e-05, "loss": 5.6504, "step": 3441 }, { "epoch": 0.11543556636203572, "grad_norm": 0.4036453020019176, "learning_rate": 2e-05, "loss": 5.5355, "step": 3442 }, { "epoch": 0.11546910371426176, "grad_norm": 0.42579289783581975, "learning_rate": 2e-05, "loss": 5.4971, "step": 3443 }, { "epoch": 0.1155026410664878, "grad_norm": 0.4538048835145015, "learning_rate": 2e-05, "loss": 5.7173, "step": 3444 }, { "epoch": 0.11553617841871384, "grad_norm": 0.4131954190301345, "learning_rate": 2e-05, "loss": 5.5781, "step": 3445 }, { "epoch": 0.11556971577093988, "grad_norm": 0.4508669026014491, "learning_rate": 2e-05, "loss": 5.5863, "step": 3446 }, { "epoch": 0.11560325312316592, "grad_norm": 0.41606990815666844, "learning_rate": 2e-05, "loss": 5.4232, "step": 3447 }, { "epoch": 0.11563679047539197, "grad_norm": 0.4967349772239736, "learning_rate": 2e-05, "loss": 5.3877, "step": 3448 }, { "epoch": 0.115670327827618, "grad_norm": 0.3959344528339986, "learning_rate": 2e-05, "loss": 5.4629, "step": 3449 }, { "epoch": 0.11570386517984405, "grad_norm": 0.4299418983794895, "learning_rate": 2e-05, "loss": 5.8174, "step": 3450 }, { "epoch": 0.1157374025320701, "grad_norm": 0.4543533953641247, "learning_rate": 2e-05, "loss": 5.5663, "step": 3451 }, { "epoch": 0.11577093988429614, "grad_norm": 0.42428781726644166, "learning_rate": 2e-05, "loss": 5.6064, "step": 3452 }, { "epoch": 0.11580447723652218, "grad_norm": 0.4482674881779974, "learning_rate": 2e-05, "loss": 5.4671, "step": 3453 }, { "epoch": 0.11583801458874822, "grad_norm": 0.4063370358215364, "learning_rate": 2e-05, "loss": 5.5914, "step": 3454 }, { "epoch": 0.11587155194097426, "grad_norm": 0.4268967037295867, "learning_rate": 2e-05, "loss": 5.4486, "step": 3455 }, { "epoch": 0.1159050892932003, "grad_norm": 0.42238318583576073, "learning_rate": 2e-05, "loss": 5.5588, "step": 3456 }, { "epoch": 0.11593862664542634, "grad_norm": 0.4048385884926462, "learning_rate": 2e-05, "loss": 5.553, "step": 3457 }, { "epoch": 0.11597216399765238, "grad_norm": 0.4430351987662387, "learning_rate": 2e-05, "loss": 5.5772, "step": 3458 }, { "epoch": 0.11600570134987842, "grad_norm": 0.4068077403809545, "learning_rate": 2e-05, "loss": 5.6245, "step": 3459 }, { "epoch": 0.11603923870210447, "grad_norm": 0.41819363315710145, "learning_rate": 2e-05, "loss": 5.3499, "step": 3460 }, { "epoch": 0.11607277605433051, "grad_norm": 0.4152614748733522, "learning_rate": 2e-05, "loss": 5.6402, "step": 3461 }, { "epoch": 0.11610631340655655, "grad_norm": 0.41056716580825475, "learning_rate": 2e-05, "loss": 5.504, "step": 3462 }, { "epoch": 0.1161398507587826, "grad_norm": 0.42541359431362896, "learning_rate": 2e-05, "loss": 5.5975, "step": 3463 }, { "epoch": 0.11617338811100864, "grad_norm": 0.44889089068619903, "learning_rate": 2e-05, "loss": 5.5505, "step": 3464 }, { "epoch": 0.11620692546323468, "grad_norm": 0.427917864300303, "learning_rate": 2e-05, "loss": 5.4656, "step": 3465 }, { "epoch": 0.11624046281546072, "grad_norm": 0.42361599666505234, "learning_rate": 2e-05, "loss": 5.5978, "step": 3466 }, { "epoch": 0.11627400016768676, "grad_norm": 0.4500519964886614, "learning_rate": 2e-05, "loss": 5.5715, "step": 3467 }, { "epoch": 0.11630753751991281, "grad_norm": 0.6163435663784012, "learning_rate": 2e-05, "loss": 5.5609, "step": 3468 }, { "epoch": 0.11634107487213885, "grad_norm": 0.40314331511208695, "learning_rate": 2e-05, "loss": 5.4428, "step": 3469 }, { "epoch": 0.11637461222436489, "grad_norm": 0.4171953776134223, "learning_rate": 2e-05, "loss": 5.4392, "step": 3470 }, { "epoch": 0.11640814957659093, "grad_norm": 0.41727405109622356, "learning_rate": 2e-05, "loss": 5.4623, "step": 3471 }, { "epoch": 0.11644168692881697, "grad_norm": 0.42744661446754195, "learning_rate": 2e-05, "loss": 5.5109, "step": 3472 }, { "epoch": 0.11647522428104301, "grad_norm": 0.4194257487087995, "learning_rate": 2e-05, "loss": 5.5913, "step": 3473 }, { "epoch": 0.11650876163326905, "grad_norm": 0.42326256512295485, "learning_rate": 2e-05, "loss": 5.4582, "step": 3474 }, { "epoch": 0.11654229898549509, "grad_norm": 0.4486637447330504, "learning_rate": 2e-05, "loss": 5.6206, "step": 3475 }, { "epoch": 0.11657583633772113, "grad_norm": 0.49421167949841693, "learning_rate": 2e-05, "loss": 5.5211, "step": 3476 }, { "epoch": 0.11660937368994718, "grad_norm": 0.4041046086461682, "learning_rate": 2e-05, "loss": 5.5722, "step": 3477 }, { "epoch": 0.11664291104217323, "grad_norm": 0.43227766459783984, "learning_rate": 2e-05, "loss": 5.2776, "step": 3478 }, { "epoch": 0.11667644839439927, "grad_norm": 0.4251279070282107, "learning_rate": 2e-05, "loss": 5.5836, "step": 3479 }, { "epoch": 0.1167099857466253, "grad_norm": 0.43897485841906175, "learning_rate": 2e-05, "loss": 5.6435, "step": 3480 }, { "epoch": 0.11674352309885135, "grad_norm": 0.4392334336797583, "learning_rate": 2e-05, "loss": 5.4317, "step": 3481 }, { "epoch": 0.11677706045107739, "grad_norm": 0.40733889143962837, "learning_rate": 2e-05, "loss": 5.5067, "step": 3482 }, { "epoch": 0.11681059780330343, "grad_norm": 0.43212560074579065, "learning_rate": 2e-05, "loss": 5.5548, "step": 3483 }, { "epoch": 0.11684413515552947, "grad_norm": 0.44394388314217054, "learning_rate": 2e-05, "loss": 5.4855, "step": 3484 }, { "epoch": 0.1168776725077555, "grad_norm": 0.43361163550693765, "learning_rate": 2e-05, "loss": 5.4957, "step": 3485 }, { "epoch": 0.11691120985998156, "grad_norm": 0.41921454491831533, "learning_rate": 2e-05, "loss": 5.3925, "step": 3486 }, { "epoch": 0.1169447472122076, "grad_norm": 0.47619608435401856, "learning_rate": 2e-05, "loss": 5.5948, "step": 3487 }, { "epoch": 0.11697828456443364, "grad_norm": 0.42404688121215167, "learning_rate": 2e-05, "loss": 5.5039, "step": 3488 }, { "epoch": 0.11701182191665968, "grad_norm": 0.41935924404378794, "learning_rate": 2e-05, "loss": 5.6066, "step": 3489 }, { "epoch": 0.11704535926888572, "grad_norm": 0.41663417299143063, "learning_rate": 2e-05, "loss": 5.4736, "step": 3490 }, { "epoch": 0.11707889662111176, "grad_norm": 0.40859381982669946, "learning_rate": 2e-05, "loss": 5.6164, "step": 3491 }, { "epoch": 0.1171124339733378, "grad_norm": 0.43701679831724516, "learning_rate": 2e-05, "loss": 5.5561, "step": 3492 }, { "epoch": 0.11714597132556384, "grad_norm": 0.4005218786999848, "learning_rate": 2e-05, "loss": 5.5877, "step": 3493 }, { "epoch": 0.1171795086777899, "grad_norm": 0.40738309750409113, "learning_rate": 2e-05, "loss": 5.3472, "step": 3494 }, { "epoch": 0.11721304603001594, "grad_norm": 0.4020739269733684, "learning_rate": 2e-05, "loss": 5.567, "step": 3495 }, { "epoch": 0.11724658338224198, "grad_norm": 0.48046070037465277, "learning_rate": 2e-05, "loss": 5.5031, "step": 3496 }, { "epoch": 0.11728012073446802, "grad_norm": 0.4283168983285513, "learning_rate": 2e-05, "loss": 5.5152, "step": 3497 }, { "epoch": 0.11731365808669406, "grad_norm": 0.40403602731805194, "learning_rate": 2e-05, "loss": 5.5715, "step": 3498 }, { "epoch": 0.1173471954389201, "grad_norm": 0.44503654380180674, "learning_rate": 2e-05, "loss": 5.6474, "step": 3499 }, { "epoch": 0.11738073279114614, "grad_norm": 0.44919319285248144, "learning_rate": 2e-05, "loss": 5.7805, "step": 3500 }, { "epoch": 0.11741427014337218, "grad_norm": 0.41489619005873385, "learning_rate": 2e-05, "loss": 5.5379, "step": 3501 }, { "epoch": 0.11744780749559822, "grad_norm": 0.4409414001187572, "learning_rate": 2e-05, "loss": 5.752, "step": 3502 }, { "epoch": 0.11748134484782427, "grad_norm": 0.4462263173119379, "learning_rate": 2e-05, "loss": 5.6707, "step": 3503 }, { "epoch": 0.11751488220005031, "grad_norm": 0.4132656766238949, "learning_rate": 2e-05, "loss": 5.6521, "step": 3504 }, { "epoch": 0.11754841955227635, "grad_norm": 0.46214389901766795, "learning_rate": 2e-05, "loss": 5.5059, "step": 3505 }, { "epoch": 0.11758195690450239, "grad_norm": 0.44312028244161183, "learning_rate": 2e-05, "loss": 5.5124, "step": 3506 }, { "epoch": 0.11761549425672843, "grad_norm": 0.4151178828678519, "learning_rate": 2e-05, "loss": 5.5249, "step": 3507 }, { "epoch": 0.11764903160895447, "grad_norm": 0.44800384777824914, "learning_rate": 2e-05, "loss": 5.613, "step": 3508 }, { "epoch": 0.11768256896118051, "grad_norm": 0.42990150697456747, "learning_rate": 2e-05, "loss": 5.3718, "step": 3509 }, { "epoch": 0.11771610631340655, "grad_norm": 0.45557835096047655, "learning_rate": 2e-05, "loss": 5.5376, "step": 3510 }, { "epoch": 0.11774964366563259, "grad_norm": 0.42419892978577856, "learning_rate": 2e-05, "loss": 5.6455, "step": 3511 }, { "epoch": 0.11778318101785865, "grad_norm": 0.4150577228683093, "learning_rate": 2e-05, "loss": 5.3566, "step": 3512 }, { "epoch": 0.11781671837008469, "grad_norm": 0.4177546310439513, "learning_rate": 2e-05, "loss": 5.4416, "step": 3513 }, { "epoch": 0.11785025572231073, "grad_norm": 0.43872181970669266, "learning_rate": 2e-05, "loss": 5.4442, "step": 3514 }, { "epoch": 0.11788379307453677, "grad_norm": 0.40388235297579494, "learning_rate": 2e-05, "loss": 5.7296, "step": 3515 }, { "epoch": 0.1179173304267628, "grad_norm": 0.4220148978696395, "learning_rate": 2e-05, "loss": 5.5281, "step": 3516 }, { "epoch": 0.11795086777898885, "grad_norm": 0.4494823218489509, "learning_rate": 2e-05, "loss": 5.5441, "step": 3517 }, { "epoch": 0.11798440513121489, "grad_norm": 0.4062496044949884, "learning_rate": 2e-05, "loss": 5.3662, "step": 3518 }, { "epoch": 0.11801794248344093, "grad_norm": 0.3913792694956059, "learning_rate": 2e-05, "loss": 5.7167, "step": 3519 }, { "epoch": 0.11805147983566698, "grad_norm": 0.4410989145336361, "learning_rate": 2e-05, "loss": 5.4887, "step": 3520 }, { "epoch": 0.11808501718789302, "grad_norm": 0.42437107117201667, "learning_rate": 2e-05, "loss": 5.6866, "step": 3521 }, { "epoch": 0.11811855454011906, "grad_norm": 0.4180366486329521, "learning_rate": 2e-05, "loss": 5.5658, "step": 3522 }, { "epoch": 0.1181520918923451, "grad_norm": 0.44491714779908537, "learning_rate": 2e-05, "loss": 5.6171, "step": 3523 }, { "epoch": 0.11818562924457114, "grad_norm": 0.4206018300493496, "learning_rate": 2e-05, "loss": 5.4756, "step": 3524 }, { "epoch": 0.11821916659679718, "grad_norm": 0.41466277385819167, "learning_rate": 2e-05, "loss": 5.4933, "step": 3525 }, { "epoch": 0.11825270394902322, "grad_norm": 0.42226003646145954, "learning_rate": 2e-05, "loss": 5.5402, "step": 3526 }, { "epoch": 0.11828624130124926, "grad_norm": 0.39873064541182074, "learning_rate": 2e-05, "loss": 5.6435, "step": 3527 }, { "epoch": 0.1183197786534753, "grad_norm": 0.407706547845352, "learning_rate": 2e-05, "loss": 5.5949, "step": 3528 }, { "epoch": 0.11835331600570136, "grad_norm": 0.41653866721700483, "learning_rate": 2e-05, "loss": 5.4817, "step": 3529 }, { "epoch": 0.1183868533579274, "grad_norm": 0.39402822486753736, "learning_rate": 2e-05, "loss": 5.5177, "step": 3530 }, { "epoch": 0.11842039071015344, "grad_norm": 0.41442655137174456, "learning_rate": 2e-05, "loss": 5.5432, "step": 3531 }, { "epoch": 0.11845392806237948, "grad_norm": 0.43021992524830704, "learning_rate": 2e-05, "loss": 5.4814, "step": 3532 }, { "epoch": 0.11848746541460552, "grad_norm": 0.4118830884627654, "learning_rate": 2e-05, "loss": 5.3669, "step": 3533 }, { "epoch": 0.11852100276683156, "grad_norm": 0.42655019807047273, "learning_rate": 2e-05, "loss": 5.643, "step": 3534 }, { "epoch": 0.1185545401190576, "grad_norm": 0.43049021456201464, "learning_rate": 2e-05, "loss": 5.7482, "step": 3535 }, { "epoch": 0.11858807747128364, "grad_norm": 0.43800869032377165, "learning_rate": 2e-05, "loss": 5.7018, "step": 3536 }, { "epoch": 0.11862161482350968, "grad_norm": 0.4303049603696914, "learning_rate": 2e-05, "loss": 5.5565, "step": 3537 }, { "epoch": 0.11865515217573573, "grad_norm": 0.43196661466624214, "learning_rate": 2e-05, "loss": 5.7758, "step": 3538 }, { "epoch": 0.11868868952796177, "grad_norm": 0.4741661006692084, "learning_rate": 2e-05, "loss": 5.4394, "step": 3539 }, { "epoch": 0.11872222688018781, "grad_norm": 0.39336826180168266, "learning_rate": 2e-05, "loss": 5.5613, "step": 3540 }, { "epoch": 0.11875576423241385, "grad_norm": 0.4863270510056281, "learning_rate": 2e-05, "loss": 5.6026, "step": 3541 }, { "epoch": 0.11878930158463989, "grad_norm": 0.46538167068237535, "learning_rate": 2e-05, "loss": 5.7012, "step": 3542 }, { "epoch": 0.11882283893686593, "grad_norm": 0.4103834295782994, "learning_rate": 2e-05, "loss": 5.5574, "step": 3543 }, { "epoch": 0.11885637628909197, "grad_norm": 0.4238331433916693, "learning_rate": 2e-05, "loss": 5.5257, "step": 3544 }, { "epoch": 0.11888991364131801, "grad_norm": 0.4522900547564927, "learning_rate": 2e-05, "loss": 5.6054, "step": 3545 }, { "epoch": 0.11892345099354407, "grad_norm": 0.4175058049365316, "learning_rate": 2e-05, "loss": 5.6184, "step": 3546 }, { "epoch": 0.1189569883457701, "grad_norm": 0.4175092463637574, "learning_rate": 2e-05, "loss": 5.4748, "step": 3547 }, { "epoch": 0.11899052569799615, "grad_norm": 0.43763490993737825, "learning_rate": 2e-05, "loss": 5.5779, "step": 3548 }, { "epoch": 0.11902406305022219, "grad_norm": 0.41737105802089225, "learning_rate": 2e-05, "loss": 5.715, "step": 3549 }, { "epoch": 0.11905760040244823, "grad_norm": 0.4235674373291398, "learning_rate": 2e-05, "loss": 5.5848, "step": 3550 }, { "epoch": 0.11909113775467427, "grad_norm": 0.43781532502195347, "learning_rate": 2e-05, "loss": 5.5516, "step": 3551 }, { "epoch": 0.11912467510690031, "grad_norm": 0.434735146520225, "learning_rate": 2e-05, "loss": 5.4411, "step": 3552 }, { "epoch": 0.11915821245912635, "grad_norm": 0.4284725706961441, "learning_rate": 2e-05, "loss": 5.5655, "step": 3553 }, { "epoch": 0.11919174981135239, "grad_norm": 0.4418449384811729, "learning_rate": 2e-05, "loss": 5.4099, "step": 3554 }, { "epoch": 0.11922528716357844, "grad_norm": 0.4068785166392674, "learning_rate": 2e-05, "loss": 5.7376, "step": 3555 }, { "epoch": 0.11925882451580448, "grad_norm": 0.41196882958319353, "learning_rate": 2e-05, "loss": 5.7135, "step": 3556 }, { "epoch": 0.11929236186803052, "grad_norm": 0.4340698119196326, "learning_rate": 2e-05, "loss": 5.3477, "step": 3557 }, { "epoch": 0.11932589922025656, "grad_norm": 0.41520443609962576, "learning_rate": 2e-05, "loss": 5.4938, "step": 3558 }, { "epoch": 0.1193594365724826, "grad_norm": 0.4150114338490533, "learning_rate": 2e-05, "loss": 5.5972, "step": 3559 }, { "epoch": 0.11939297392470864, "grad_norm": 0.4243113920311667, "learning_rate": 2e-05, "loss": 5.3986, "step": 3560 }, { "epoch": 0.11942651127693468, "grad_norm": 0.41995083650553455, "learning_rate": 2e-05, "loss": 5.4563, "step": 3561 }, { "epoch": 0.11946004862916072, "grad_norm": 0.405132477149775, "learning_rate": 2e-05, "loss": 5.5932, "step": 3562 }, { "epoch": 0.11949358598138676, "grad_norm": 0.4059826509112207, "learning_rate": 2e-05, "loss": 5.7638, "step": 3563 }, { "epoch": 0.11952712333361282, "grad_norm": 0.45812247081253715, "learning_rate": 2e-05, "loss": 5.3909, "step": 3564 }, { "epoch": 0.11956066068583886, "grad_norm": 0.3986462992622332, "learning_rate": 2e-05, "loss": 5.689, "step": 3565 }, { "epoch": 0.1195941980380649, "grad_norm": 0.40322584608366846, "learning_rate": 2e-05, "loss": 5.5793, "step": 3566 }, { "epoch": 0.11962773539029094, "grad_norm": 0.4107877923645172, "learning_rate": 2e-05, "loss": 5.5394, "step": 3567 }, { "epoch": 0.11966127274251698, "grad_norm": 0.4049803120422942, "learning_rate": 2e-05, "loss": 5.8575, "step": 3568 }, { "epoch": 0.11969481009474302, "grad_norm": 0.4268970033626455, "learning_rate": 2e-05, "loss": 5.6713, "step": 3569 }, { "epoch": 0.11972834744696906, "grad_norm": 0.46612096113786633, "learning_rate": 2e-05, "loss": 5.5383, "step": 3570 }, { "epoch": 0.1197618847991951, "grad_norm": 0.44538005265839115, "learning_rate": 2e-05, "loss": 5.5609, "step": 3571 }, { "epoch": 0.11979542215142115, "grad_norm": 0.4334719768550704, "learning_rate": 2e-05, "loss": 5.5541, "step": 3572 }, { "epoch": 0.11982895950364719, "grad_norm": 0.39558553500191435, "learning_rate": 2e-05, "loss": 5.4983, "step": 3573 }, { "epoch": 0.11986249685587323, "grad_norm": 0.41262102442964604, "learning_rate": 2e-05, "loss": 5.458, "step": 3574 }, { "epoch": 0.11989603420809927, "grad_norm": 0.3875647573596463, "learning_rate": 2e-05, "loss": 5.6235, "step": 3575 }, { "epoch": 0.11992957156032531, "grad_norm": 0.4401879770328311, "learning_rate": 2e-05, "loss": 5.388, "step": 3576 }, { "epoch": 0.11996310891255135, "grad_norm": 0.4194825145562847, "learning_rate": 2e-05, "loss": 5.4322, "step": 3577 }, { "epoch": 0.11999664626477739, "grad_norm": 0.40904162591964544, "learning_rate": 2e-05, "loss": 5.7001, "step": 3578 }, { "epoch": 0.12003018361700343, "grad_norm": 0.39478274232294386, "learning_rate": 2e-05, "loss": 5.558, "step": 3579 }, { "epoch": 0.12006372096922947, "grad_norm": 0.426538642317221, "learning_rate": 2e-05, "loss": 5.5069, "step": 3580 }, { "epoch": 0.12009725832145553, "grad_norm": 0.40958291393429946, "learning_rate": 2e-05, "loss": 5.5033, "step": 3581 }, { "epoch": 0.12013079567368157, "grad_norm": 0.3743718018725689, "learning_rate": 2e-05, "loss": 5.4559, "step": 3582 }, { "epoch": 0.12016433302590761, "grad_norm": 0.3913544767231392, "learning_rate": 2e-05, "loss": 5.6169, "step": 3583 }, { "epoch": 0.12019787037813365, "grad_norm": 0.4283114876754919, "learning_rate": 2e-05, "loss": 5.6877, "step": 3584 }, { "epoch": 0.12023140773035969, "grad_norm": 0.4103127962515117, "learning_rate": 2e-05, "loss": 5.582, "step": 3585 }, { "epoch": 0.12026494508258573, "grad_norm": 0.4271056825280187, "learning_rate": 2e-05, "loss": 5.6454, "step": 3586 }, { "epoch": 0.12029848243481177, "grad_norm": 0.42119052623480174, "learning_rate": 2e-05, "loss": 5.6631, "step": 3587 }, { "epoch": 0.12033201978703781, "grad_norm": 0.4285129749858038, "learning_rate": 2e-05, "loss": 5.5592, "step": 3588 }, { "epoch": 0.12036555713926385, "grad_norm": 0.47000706366354034, "learning_rate": 2e-05, "loss": 5.5747, "step": 3589 }, { "epoch": 0.1203990944914899, "grad_norm": 0.43115643417113253, "learning_rate": 2e-05, "loss": 5.7679, "step": 3590 }, { "epoch": 0.12043263184371594, "grad_norm": 0.40728760036859435, "learning_rate": 2e-05, "loss": 5.5852, "step": 3591 }, { "epoch": 0.12046616919594198, "grad_norm": 0.45042414647664314, "learning_rate": 2e-05, "loss": 5.5208, "step": 3592 }, { "epoch": 0.12049970654816802, "grad_norm": 0.42506617461872487, "learning_rate": 2e-05, "loss": 5.5245, "step": 3593 }, { "epoch": 0.12053324390039406, "grad_norm": 0.4093209850524241, "learning_rate": 2e-05, "loss": 5.734, "step": 3594 }, { "epoch": 0.1205667812526201, "grad_norm": 0.42798965816934503, "learning_rate": 2e-05, "loss": 5.7402, "step": 3595 }, { "epoch": 0.12060031860484614, "grad_norm": 0.4588436437586222, "learning_rate": 2e-05, "loss": 5.5113, "step": 3596 }, { "epoch": 0.12063385595707218, "grad_norm": 0.39546814881255465, "learning_rate": 2e-05, "loss": 5.5495, "step": 3597 }, { "epoch": 0.12066739330929824, "grad_norm": 0.4385316759009483, "learning_rate": 2e-05, "loss": 5.4877, "step": 3598 }, { "epoch": 0.12070093066152428, "grad_norm": 0.435589162474905, "learning_rate": 2e-05, "loss": 5.6717, "step": 3599 }, { "epoch": 0.12073446801375032, "grad_norm": 0.42880642848541134, "learning_rate": 2e-05, "loss": 5.4092, "step": 3600 }, { "epoch": 0.12076800536597636, "grad_norm": 0.404635114165575, "learning_rate": 2e-05, "loss": 5.3044, "step": 3601 }, { "epoch": 0.1208015427182024, "grad_norm": 0.4160563184485114, "learning_rate": 2e-05, "loss": 5.495, "step": 3602 }, { "epoch": 0.12083508007042844, "grad_norm": 0.46958232412364626, "learning_rate": 2e-05, "loss": 5.5301, "step": 3603 }, { "epoch": 0.12086861742265448, "grad_norm": 0.44719662764731116, "learning_rate": 2e-05, "loss": 5.5391, "step": 3604 }, { "epoch": 0.12090215477488052, "grad_norm": 0.42524431920142347, "learning_rate": 2e-05, "loss": 5.6153, "step": 3605 }, { "epoch": 0.12093569212710656, "grad_norm": 0.42780383846886394, "learning_rate": 2e-05, "loss": 5.3889, "step": 3606 }, { "epoch": 0.12096922947933261, "grad_norm": 0.4210906039443553, "learning_rate": 2e-05, "loss": 5.4611, "step": 3607 }, { "epoch": 0.12100276683155865, "grad_norm": 0.41054906782327655, "learning_rate": 2e-05, "loss": 5.7106, "step": 3608 }, { "epoch": 0.1210363041837847, "grad_norm": 0.43064695805982317, "learning_rate": 2e-05, "loss": 5.2918, "step": 3609 }, { "epoch": 0.12106984153601073, "grad_norm": 0.4233434495110179, "learning_rate": 2e-05, "loss": 5.7473, "step": 3610 }, { "epoch": 0.12110337888823677, "grad_norm": 0.41564800887393194, "learning_rate": 2e-05, "loss": 5.6561, "step": 3611 }, { "epoch": 0.12113691624046281, "grad_norm": 0.41379181136204807, "learning_rate": 2e-05, "loss": 5.469, "step": 3612 }, { "epoch": 0.12117045359268885, "grad_norm": 0.4246754819605693, "learning_rate": 2e-05, "loss": 5.433, "step": 3613 }, { "epoch": 0.1212039909449149, "grad_norm": 0.4593208765834357, "learning_rate": 2e-05, "loss": 5.7834, "step": 3614 }, { "epoch": 0.12123752829714093, "grad_norm": 0.413952774342974, "learning_rate": 2e-05, "loss": 5.5367, "step": 3615 }, { "epoch": 0.12127106564936699, "grad_norm": 0.4477263482624505, "learning_rate": 2e-05, "loss": 5.5522, "step": 3616 }, { "epoch": 0.12130460300159303, "grad_norm": 0.4187237734280956, "learning_rate": 2e-05, "loss": 5.4465, "step": 3617 }, { "epoch": 0.12133814035381907, "grad_norm": 0.44713889202301, "learning_rate": 2e-05, "loss": 5.3124, "step": 3618 }, { "epoch": 0.12137167770604511, "grad_norm": 0.40096876427106054, "learning_rate": 2e-05, "loss": 5.5644, "step": 3619 }, { "epoch": 0.12140521505827115, "grad_norm": 0.418391378958951, "learning_rate": 2e-05, "loss": 5.5821, "step": 3620 }, { "epoch": 0.12143875241049719, "grad_norm": 0.4406848272680478, "learning_rate": 2e-05, "loss": 5.5356, "step": 3621 }, { "epoch": 0.12147228976272323, "grad_norm": 0.40210079252054176, "learning_rate": 2e-05, "loss": 5.4244, "step": 3622 }, { "epoch": 0.12150582711494927, "grad_norm": 0.4287149923293932, "learning_rate": 2e-05, "loss": 5.4352, "step": 3623 }, { "epoch": 0.12153936446717532, "grad_norm": 0.45135255509437194, "learning_rate": 2e-05, "loss": 5.6119, "step": 3624 }, { "epoch": 0.12157290181940136, "grad_norm": 0.4161896209508731, "learning_rate": 2e-05, "loss": 5.5922, "step": 3625 }, { "epoch": 0.1216064391716274, "grad_norm": 0.45494706658992, "learning_rate": 2e-05, "loss": 5.5523, "step": 3626 }, { "epoch": 0.12163997652385344, "grad_norm": 0.4907591127942919, "learning_rate": 2e-05, "loss": 5.5041, "step": 3627 }, { "epoch": 0.12167351387607948, "grad_norm": 0.4126936519773749, "learning_rate": 2e-05, "loss": 5.6288, "step": 3628 }, { "epoch": 0.12170705122830552, "grad_norm": 0.41773318807617343, "learning_rate": 2e-05, "loss": 5.5081, "step": 3629 }, { "epoch": 0.12174058858053156, "grad_norm": 0.4696357066950832, "learning_rate": 2e-05, "loss": 5.6829, "step": 3630 }, { "epoch": 0.1217741259327576, "grad_norm": 0.40291297979584056, "learning_rate": 2e-05, "loss": 5.7029, "step": 3631 }, { "epoch": 0.12180766328498364, "grad_norm": 0.4078915734387231, "learning_rate": 2e-05, "loss": 5.4693, "step": 3632 }, { "epoch": 0.1218412006372097, "grad_norm": 0.4225800065320419, "learning_rate": 2e-05, "loss": 5.4924, "step": 3633 }, { "epoch": 0.12187473798943574, "grad_norm": 0.42831915014390554, "learning_rate": 2e-05, "loss": 5.5172, "step": 3634 }, { "epoch": 0.12190827534166178, "grad_norm": 0.404064575748281, "learning_rate": 2e-05, "loss": 5.5459, "step": 3635 }, { "epoch": 0.12194181269388782, "grad_norm": 0.412036098219965, "learning_rate": 2e-05, "loss": 5.4682, "step": 3636 }, { "epoch": 0.12197535004611386, "grad_norm": 0.4158620225510405, "learning_rate": 2e-05, "loss": 5.5361, "step": 3637 }, { "epoch": 0.1220088873983399, "grad_norm": 0.42253335852513374, "learning_rate": 2e-05, "loss": 5.4858, "step": 3638 }, { "epoch": 0.12204242475056594, "grad_norm": 0.42893247451266137, "learning_rate": 2e-05, "loss": 5.414, "step": 3639 }, { "epoch": 0.12207596210279198, "grad_norm": 0.4153888527202187, "learning_rate": 2e-05, "loss": 5.5182, "step": 3640 }, { "epoch": 0.12210949945501802, "grad_norm": 0.40867479173744986, "learning_rate": 2e-05, "loss": 5.579, "step": 3641 }, { "epoch": 0.12214303680724407, "grad_norm": 0.46534631850217073, "learning_rate": 2e-05, "loss": 5.6551, "step": 3642 }, { "epoch": 0.12217657415947011, "grad_norm": 0.4104443168827506, "learning_rate": 2e-05, "loss": 5.7258, "step": 3643 }, { "epoch": 0.12221011151169615, "grad_norm": 0.43953210816343496, "learning_rate": 2e-05, "loss": 5.5043, "step": 3644 }, { "epoch": 0.1222436488639222, "grad_norm": 0.39065156464467615, "learning_rate": 2e-05, "loss": 5.5947, "step": 3645 }, { "epoch": 0.12227718621614823, "grad_norm": 0.4321838967420827, "learning_rate": 2e-05, "loss": 5.6512, "step": 3646 }, { "epoch": 0.12231072356837427, "grad_norm": 0.40543431930342816, "learning_rate": 2e-05, "loss": 5.2201, "step": 3647 }, { "epoch": 0.12234426092060031, "grad_norm": 0.4357887086512716, "learning_rate": 2e-05, "loss": 5.395, "step": 3648 }, { "epoch": 0.12237779827282635, "grad_norm": 0.42189638187435036, "learning_rate": 2e-05, "loss": 5.5359, "step": 3649 }, { "epoch": 0.12241133562505241, "grad_norm": 0.44478344213729015, "learning_rate": 2e-05, "loss": 5.5388, "step": 3650 }, { "epoch": 0.12244487297727845, "grad_norm": 0.39273127610319136, "learning_rate": 2e-05, "loss": 5.5083, "step": 3651 }, { "epoch": 0.12247841032950449, "grad_norm": 0.40539068423848346, "learning_rate": 2e-05, "loss": 5.3608, "step": 3652 }, { "epoch": 0.12251194768173053, "grad_norm": 0.41052568893192015, "learning_rate": 2e-05, "loss": 5.4265, "step": 3653 }, { "epoch": 0.12254548503395657, "grad_norm": 0.41300147587527275, "learning_rate": 2e-05, "loss": 5.5544, "step": 3654 }, { "epoch": 0.12257902238618261, "grad_norm": 0.4270873160310297, "learning_rate": 2e-05, "loss": 5.4842, "step": 3655 }, { "epoch": 0.12261255973840865, "grad_norm": 0.4197451466259273, "learning_rate": 2e-05, "loss": 5.4959, "step": 3656 }, { "epoch": 0.12264609709063469, "grad_norm": 0.4150311391723529, "learning_rate": 2e-05, "loss": 5.5859, "step": 3657 }, { "epoch": 0.12267963444286073, "grad_norm": 0.4541817388669099, "learning_rate": 2e-05, "loss": 5.4859, "step": 3658 }, { "epoch": 0.12271317179508678, "grad_norm": 0.4311345201524716, "learning_rate": 2e-05, "loss": 5.394, "step": 3659 }, { "epoch": 0.12274670914731282, "grad_norm": 0.44377425791551955, "learning_rate": 2e-05, "loss": 5.6107, "step": 3660 }, { "epoch": 0.12278024649953886, "grad_norm": 0.43772698096578516, "learning_rate": 2e-05, "loss": 5.6584, "step": 3661 }, { "epoch": 0.1228137838517649, "grad_norm": 0.4186497536105782, "learning_rate": 2e-05, "loss": 5.4539, "step": 3662 }, { "epoch": 0.12284732120399094, "grad_norm": 0.41423823964461476, "learning_rate": 2e-05, "loss": 5.456, "step": 3663 }, { "epoch": 0.12288085855621698, "grad_norm": 0.41222497702323224, "learning_rate": 2e-05, "loss": 5.5422, "step": 3664 }, { "epoch": 0.12291439590844302, "grad_norm": 0.4793861225198148, "learning_rate": 2e-05, "loss": 5.5907, "step": 3665 }, { "epoch": 0.12294793326066907, "grad_norm": 0.41379201128658016, "learning_rate": 2e-05, "loss": 5.4708, "step": 3666 }, { "epoch": 0.1229814706128951, "grad_norm": 0.4221067348806129, "learning_rate": 2e-05, "loss": 5.4141, "step": 3667 }, { "epoch": 0.12301500796512116, "grad_norm": 0.4375354473066025, "learning_rate": 2e-05, "loss": 5.5427, "step": 3668 }, { "epoch": 0.1230485453173472, "grad_norm": 0.40764066373099506, "learning_rate": 2e-05, "loss": 5.5619, "step": 3669 }, { "epoch": 0.12308208266957324, "grad_norm": 0.4254765909916127, "learning_rate": 2e-05, "loss": 5.5636, "step": 3670 }, { "epoch": 0.12311562002179928, "grad_norm": 0.3858182564122803, "learning_rate": 2e-05, "loss": 5.5672, "step": 3671 }, { "epoch": 0.12314915737402532, "grad_norm": 0.43592154595692484, "learning_rate": 2e-05, "loss": 5.5452, "step": 3672 }, { "epoch": 0.12318269472625136, "grad_norm": 0.42146610699056997, "learning_rate": 2e-05, "loss": 5.5147, "step": 3673 }, { "epoch": 0.1232162320784774, "grad_norm": 0.4143060541009148, "learning_rate": 2e-05, "loss": 5.5972, "step": 3674 }, { "epoch": 0.12324976943070344, "grad_norm": 0.41674990984900717, "learning_rate": 2e-05, "loss": 5.3054, "step": 3675 }, { "epoch": 0.1232833067829295, "grad_norm": 0.4386246493697533, "learning_rate": 2e-05, "loss": 5.5494, "step": 3676 }, { "epoch": 0.12331684413515553, "grad_norm": 0.5260487695495254, "learning_rate": 2e-05, "loss": 5.4753, "step": 3677 }, { "epoch": 0.12335038148738157, "grad_norm": 0.4235923460763713, "learning_rate": 2e-05, "loss": 5.4822, "step": 3678 }, { "epoch": 0.12338391883960761, "grad_norm": 0.42987348399053665, "learning_rate": 2e-05, "loss": 5.578, "step": 3679 }, { "epoch": 0.12341745619183365, "grad_norm": 0.44207102980690505, "learning_rate": 2e-05, "loss": 5.6599, "step": 3680 }, { "epoch": 0.1234509935440597, "grad_norm": 0.4351589745448102, "learning_rate": 2e-05, "loss": 5.4414, "step": 3681 }, { "epoch": 0.12348453089628574, "grad_norm": 0.4224023851747247, "learning_rate": 2e-05, "loss": 5.5744, "step": 3682 }, { "epoch": 0.12351806824851178, "grad_norm": 0.43061586243401073, "learning_rate": 2e-05, "loss": 5.652, "step": 3683 }, { "epoch": 0.12355160560073782, "grad_norm": 0.424259519575912, "learning_rate": 2e-05, "loss": 5.4508, "step": 3684 }, { "epoch": 0.12358514295296387, "grad_norm": 0.4355621230986763, "learning_rate": 2e-05, "loss": 5.5609, "step": 3685 }, { "epoch": 0.12361868030518991, "grad_norm": 0.40682667098519626, "learning_rate": 2e-05, "loss": 5.6442, "step": 3686 }, { "epoch": 0.12365221765741595, "grad_norm": 0.40809898525745003, "learning_rate": 2e-05, "loss": 5.4558, "step": 3687 }, { "epoch": 0.12368575500964199, "grad_norm": 0.40046895962559936, "learning_rate": 2e-05, "loss": 5.5191, "step": 3688 }, { "epoch": 0.12371929236186803, "grad_norm": 0.42409171351865504, "learning_rate": 2e-05, "loss": 5.5377, "step": 3689 }, { "epoch": 0.12375282971409407, "grad_norm": 0.48296852800661005, "learning_rate": 2e-05, "loss": 5.4998, "step": 3690 }, { "epoch": 0.12378636706632011, "grad_norm": 0.40157051162960017, "learning_rate": 2e-05, "loss": 5.4219, "step": 3691 }, { "epoch": 0.12381990441854615, "grad_norm": 0.434710965140484, "learning_rate": 2e-05, "loss": 5.3325, "step": 3692 }, { "epoch": 0.12385344177077219, "grad_norm": 0.4285148123412735, "learning_rate": 2e-05, "loss": 5.6211, "step": 3693 }, { "epoch": 0.12388697912299824, "grad_norm": 0.43206330820030714, "learning_rate": 2e-05, "loss": 5.6786, "step": 3694 }, { "epoch": 0.12392051647522428, "grad_norm": 0.4500406137415388, "learning_rate": 2e-05, "loss": 5.7356, "step": 3695 }, { "epoch": 0.12395405382745033, "grad_norm": 0.4372494554481821, "learning_rate": 2e-05, "loss": 5.5172, "step": 3696 }, { "epoch": 0.12398759117967637, "grad_norm": 0.4339716721444012, "learning_rate": 2e-05, "loss": 5.4619, "step": 3697 }, { "epoch": 0.1240211285319024, "grad_norm": 0.4213767471516007, "learning_rate": 2e-05, "loss": 5.6346, "step": 3698 }, { "epoch": 0.12405466588412845, "grad_norm": 0.42000787559130004, "learning_rate": 2e-05, "loss": 5.4589, "step": 3699 }, { "epoch": 0.12408820323635449, "grad_norm": 0.46515333998747366, "learning_rate": 2e-05, "loss": 5.5074, "step": 3700 }, { "epoch": 0.12412174058858053, "grad_norm": 0.4104758812193437, "learning_rate": 2e-05, "loss": 5.4398, "step": 3701 }, { "epoch": 0.12415527794080658, "grad_norm": 0.4424159237097239, "learning_rate": 2e-05, "loss": 5.4377, "step": 3702 }, { "epoch": 0.12418881529303262, "grad_norm": 0.4551082717560005, "learning_rate": 2e-05, "loss": 5.749, "step": 3703 }, { "epoch": 0.12422235264525866, "grad_norm": 0.4026799566972812, "learning_rate": 2e-05, "loss": 5.5305, "step": 3704 }, { "epoch": 0.1242558899974847, "grad_norm": 0.40334903191992366, "learning_rate": 2e-05, "loss": 5.6426, "step": 3705 }, { "epoch": 0.12428942734971074, "grad_norm": 0.4082299134519563, "learning_rate": 2e-05, "loss": 5.5133, "step": 3706 }, { "epoch": 0.12432296470193678, "grad_norm": 0.42330765102253726, "learning_rate": 2e-05, "loss": 5.3341, "step": 3707 }, { "epoch": 0.12435650205416282, "grad_norm": 0.4025771802297183, "learning_rate": 2e-05, "loss": 5.468, "step": 3708 }, { "epoch": 0.12439003940638886, "grad_norm": 0.4024810881733088, "learning_rate": 2e-05, "loss": 5.5925, "step": 3709 }, { "epoch": 0.1244235767586149, "grad_norm": 0.4293373541565458, "learning_rate": 2e-05, "loss": 5.4854, "step": 3710 }, { "epoch": 0.12445711411084096, "grad_norm": 0.3991497183609512, "learning_rate": 2e-05, "loss": 5.5998, "step": 3711 }, { "epoch": 0.124490651463067, "grad_norm": 0.4349603920790629, "learning_rate": 2e-05, "loss": 5.5456, "step": 3712 }, { "epoch": 0.12452418881529304, "grad_norm": 0.40220396586892787, "learning_rate": 2e-05, "loss": 5.5127, "step": 3713 }, { "epoch": 0.12455772616751908, "grad_norm": 0.4172565106822462, "learning_rate": 2e-05, "loss": 5.554, "step": 3714 }, { "epoch": 0.12459126351974512, "grad_norm": 0.45721244092227764, "learning_rate": 2e-05, "loss": 5.7169, "step": 3715 }, { "epoch": 0.12462480087197116, "grad_norm": 0.4141642744730864, "learning_rate": 2e-05, "loss": 5.4469, "step": 3716 }, { "epoch": 0.1246583382241972, "grad_norm": 0.41754078824177937, "learning_rate": 2e-05, "loss": 5.4869, "step": 3717 }, { "epoch": 0.12469187557642324, "grad_norm": 0.4399294907043807, "learning_rate": 2e-05, "loss": 5.6872, "step": 3718 }, { "epoch": 0.12472541292864928, "grad_norm": 0.41739693456764326, "learning_rate": 2e-05, "loss": 5.4219, "step": 3719 }, { "epoch": 0.12475895028087533, "grad_norm": 0.4229403332229963, "learning_rate": 2e-05, "loss": 5.4531, "step": 3720 }, { "epoch": 0.12479248763310137, "grad_norm": 0.4276737287944439, "learning_rate": 2e-05, "loss": 5.4188, "step": 3721 }, { "epoch": 0.12482602498532741, "grad_norm": 0.44343341406391285, "learning_rate": 2e-05, "loss": 5.4475, "step": 3722 }, { "epoch": 0.12485956233755345, "grad_norm": 0.40181318074789035, "learning_rate": 2e-05, "loss": 5.6543, "step": 3723 }, { "epoch": 0.12489309968977949, "grad_norm": 0.39923035114129274, "learning_rate": 2e-05, "loss": 5.5514, "step": 3724 }, { "epoch": 0.12492663704200553, "grad_norm": 0.4067345499295169, "learning_rate": 2e-05, "loss": 5.4393, "step": 3725 }, { "epoch": 0.12496017439423157, "grad_norm": 0.41577489023570857, "learning_rate": 2e-05, "loss": 5.556, "step": 3726 }, { "epoch": 0.12499371174645761, "grad_norm": 0.4036824269092258, "learning_rate": 2e-05, "loss": 5.5385, "step": 3727 }, { "epoch": 0.12502724909868365, "grad_norm": 0.42402180293287023, "learning_rate": 2e-05, "loss": 5.4407, "step": 3728 }, { "epoch": 0.1250607864509097, "grad_norm": 0.4201825424369067, "learning_rate": 2e-05, "loss": 5.6208, "step": 3729 }, { "epoch": 0.12509432380313573, "grad_norm": 0.43801787642258305, "learning_rate": 2e-05, "loss": 5.6255, "step": 3730 }, { "epoch": 0.12512786115536179, "grad_norm": 0.397743624124909, "learning_rate": 2e-05, "loss": 5.4267, "step": 3731 }, { "epoch": 0.1251613985075878, "grad_norm": 0.41652043506283437, "learning_rate": 2e-05, "loss": 5.4748, "step": 3732 }, { "epoch": 0.12519493585981387, "grad_norm": 0.4392036999061379, "learning_rate": 2e-05, "loss": 5.7583, "step": 3733 }, { "epoch": 0.12522847321203992, "grad_norm": 0.4731808508872884, "learning_rate": 2e-05, "loss": 5.6034, "step": 3734 }, { "epoch": 0.12526201056426595, "grad_norm": 0.42748404601108525, "learning_rate": 2e-05, "loss": 5.6878, "step": 3735 }, { "epoch": 0.125295547916492, "grad_norm": 0.3964896912642594, "learning_rate": 2e-05, "loss": 5.5163, "step": 3736 }, { "epoch": 0.12532908526871803, "grad_norm": 0.4481224883390228, "learning_rate": 2e-05, "loss": 5.4819, "step": 3737 }, { "epoch": 0.12536262262094408, "grad_norm": 0.43169153429410495, "learning_rate": 2e-05, "loss": 5.5854, "step": 3738 }, { "epoch": 0.1253961599731701, "grad_norm": 0.4028840151587588, "learning_rate": 2e-05, "loss": 5.4933, "step": 3739 }, { "epoch": 0.12542969732539616, "grad_norm": 0.3956640429414147, "learning_rate": 2e-05, "loss": 5.4042, "step": 3740 }, { "epoch": 0.12546323467762222, "grad_norm": 0.41160068241311676, "learning_rate": 2e-05, "loss": 5.7488, "step": 3741 }, { "epoch": 0.12549677202984824, "grad_norm": 0.42929805497370965, "learning_rate": 2e-05, "loss": 5.7373, "step": 3742 }, { "epoch": 0.1255303093820743, "grad_norm": 0.3986824021572648, "learning_rate": 2e-05, "loss": 5.5961, "step": 3743 }, { "epoch": 0.12556384673430032, "grad_norm": 0.4266452213948079, "learning_rate": 2e-05, "loss": 5.4726, "step": 3744 }, { "epoch": 0.12559738408652638, "grad_norm": 0.44161907054150623, "learning_rate": 2e-05, "loss": 5.536, "step": 3745 }, { "epoch": 0.1256309214387524, "grad_norm": 0.4101687331685574, "learning_rate": 2e-05, "loss": 5.6233, "step": 3746 }, { "epoch": 0.12566445879097846, "grad_norm": 0.4185165958555316, "learning_rate": 2e-05, "loss": 5.5003, "step": 3747 }, { "epoch": 0.12569799614320448, "grad_norm": 0.44013611139514, "learning_rate": 2e-05, "loss": 5.7016, "step": 3748 }, { "epoch": 0.12573153349543054, "grad_norm": 0.4086763418989339, "learning_rate": 2e-05, "loss": 5.5214, "step": 3749 }, { "epoch": 0.1257650708476566, "grad_norm": 0.4389303587952748, "learning_rate": 2e-05, "loss": 5.4498, "step": 3750 }, { "epoch": 0.12579860819988262, "grad_norm": 0.4183151649897733, "learning_rate": 2e-05, "loss": 5.5783, "step": 3751 }, { "epoch": 0.12583214555210867, "grad_norm": 0.4000297101991966, "learning_rate": 2e-05, "loss": 5.7977, "step": 3752 }, { "epoch": 0.1258656829043347, "grad_norm": 0.4108079908122202, "learning_rate": 2e-05, "loss": 5.5752, "step": 3753 }, { "epoch": 0.12589922025656075, "grad_norm": 0.44665326839872244, "learning_rate": 2e-05, "loss": 5.6395, "step": 3754 }, { "epoch": 0.12593275760878678, "grad_norm": 0.4595991555248922, "learning_rate": 2e-05, "loss": 5.4658, "step": 3755 }, { "epoch": 0.12596629496101283, "grad_norm": 0.4354827733163453, "learning_rate": 2e-05, "loss": 5.6843, "step": 3756 }, { "epoch": 0.12599983231323886, "grad_norm": 0.42860324032439273, "learning_rate": 2e-05, "loss": 5.4178, "step": 3757 }, { "epoch": 0.1260333696654649, "grad_norm": 0.44662003662306504, "learning_rate": 2e-05, "loss": 5.6892, "step": 3758 }, { "epoch": 0.12606690701769097, "grad_norm": 0.42116508277634535, "learning_rate": 2e-05, "loss": 5.7105, "step": 3759 }, { "epoch": 0.126100444369917, "grad_norm": 0.421364709861846, "learning_rate": 2e-05, "loss": 5.5454, "step": 3760 }, { "epoch": 0.12613398172214305, "grad_norm": 0.4026159769907067, "learning_rate": 2e-05, "loss": 5.6416, "step": 3761 }, { "epoch": 0.12616751907436907, "grad_norm": 0.41215053019607256, "learning_rate": 2e-05, "loss": 5.6107, "step": 3762 }, { "epoch": 0.12620105642659513, "grad_norm": 0.4282680104177883, "learning_rate": 2e-05, "loss": 5.7002, "step": 3763 }, { "epoch": 0.12623459377882115, "grad_norm": 0.38841602416115145, "learning_rate": 2e-05, "loss": 5.5168, "step": 3764 }, { "epoch": 0.1262681311310472, "grad_norm": 0.4312911063895508, "learning_rate": 2e-05, "loss": 5.6198, "step": 3765 }, { "epoch": 0.12630166848327323, "grad_norm": 0.4013834768761966, "learning_rate": 2e-05, "loss": 5.531, "step": 3766 }, { "epoch": 0.1263352058354993, "grad_norm": 0.41310795373428766, "learning_rate": 2e-05, "loss": 5.9205, "step": 3767 }, { "epoch": 0.12636874318772534, "grad_norm": 0.41021208810420656, "learning_rate": 2e-05, "loss": 5.7288, "step": 3768 }, { "epoch": 0.12640228053995137, "grad_norm": 0.45798794315573943, "learning_rate": 2e-05, "loss": 5.652, "step": 3769 }, { "epoch": 0.12643581789217742, "grad_norm": 0.40757638076695835, "learning_rate": 2e-05, "loss": 5.3464, "step": 3770 }, { "epoch": 0.12646935524440345, "grad_norm": 0.40038091068402965, "learning_rate": 2e-05, "loss": 5.6028, "step": 3771 }, { "epoch": 0.1265028925966295, "grad_norm": 0.3989331147209006, "learning_rate": 2e-05, "loss": 5.4722, "step": 3772 }, { "epoch": 0.12653642994885553, "grad_norm": 0.40280664487891044, "learning_rate": 2e-05, "loss": 5.7137, "step": 3773 }, { "epoch": 0.12656996730108158, "grad_norm": 0.4167683102784541, "learning_rate": 2e-05, "loss": 5.5821, "step": 3774 }, { "epoch": 0.1266035046533076, "grad_norm": 0.39486520278598775, "learning_rate": 2e-05, "loss": 5.4372, "step": 3775 }, { "epoch": 0.12663704200553366, "grad_norm": 0.3996805299769621, "learning_rate": 2e-05, "loss": 5.7883, "step": 3776 }, { "epoch": 0.12667057935775972, "grad_norm": 0.46975830144504305, "learning_rate": 2e-05, "loss": 5.6133, "step": 3777 }, { "epoch": 0.12670411670998574, "grad_norm": 0.4209389783315571, "learning_rate": 2e-05, "loss": 5.5611, "step": 3778 }, { "epoch": 0.1267376540622118, "grad_norm": 0.40620838954266913, "learning_rate": 2e-05, "loss": 5.5953, "step": 3779 }, { "epoch": 0.12677119141443782, "grad_norm": 0.44253388456562925, "learning_rate": 2e-05, "loss": 5.614, "step": 3780 }, { "epoch": 0.12680472876666388, "grad_norm": 0.4152061126644566, "learning_rate": 2e-05, "loss": 5.6721, "step": 3781 }, { "epoch": 0.1268382661188899, "grad_norm": 0.41702709891664036, "learning_rate": 2e-05, "loss": 5.4946, "step": 3782 }, { "epoch": 0.12687180347111596, "grad_norm": 0.4228556046378138, "learning_rate": 2e-05, "loss": 5.5827, "step": 3783 }, { "epoch": 0.12690534082334198, "grad_norm": 0.4385898030810971, "learning_rate": 2e-05, "loss": 5.3332, "step": 3784 }, { "epoch": 0.12693887817556804, "grad_norm": 0.4053209042429749, "learning_rate": 2e-05, "loss": 5.7148, "step": 3785 }, { "epoch": 0.1269724155277941, "grad_norm": 0.4297393618903181, "learning_rate": 2e-05, "loss": 5.539, "step": 3786 }, { "epoch": 0.12700595288002012, "grad_norm": 0.41679542188765856, "learning_rate": 2e-05, "loss": 5.5859, "step": 3787 }, { "epoch": 0.12703949023224617, "grad_norm": 0.39615497775841235, "learning_rate": 2e-05, "loss": 5.4624, "step": 3788 }, { "epoch": 0.1270730275844722, "grad_norm": 0.4239476546642492, "learning_rate": 2e-05, "loss": 5.4451, "step": 3789 }, { "epoch": 0.12710656493669825, "grad_norm": 0.43027816873995867, "learning_rate": 2e-05, "loss": 5.5298, "step": 3790 }, { "epoch": 0.12714010228892428, "grad_norm": 0.398999503054554, "learning_rate": 2e-05, "loss": 5.5688, "step": 3791 }, { "epoch": 0.12717363964115033, "grad_norm": 0.40864271866744106, "learning_rate": 2e-05, "loss": 5.4382, "step": 3792 }, { "epoch": 0.1272071769933764, "grad_norm": 0.4629221161528001, "learning_rate": 2e-05, "loss": 5.617, "step": 3793 }, { "epoch": 0.1272407143456024, "grad_norm": 0.42506974051988033, "learning_rate": 2e-05, "loss": 5.4766, "step": 3794 }, { "epoch": 0.12727425169782847, "grad_norm": 0.4200441116044747, "learning_rate": 2e-05, "loss": 5.615, "step": 3795 }, { "epoch": 0.1273077890500545, "grad_norm": 0.4027771510329035, "learning_rate": 2e-05, "loss": 5.494, "step": 3796 }, { "epoch": 0.12734132640228055, "grad_norm": 0.4418161178369067, "learning_rate": 2e-05, "loss": 5.3471, "step": 3797 }, { "epoch": 0.12737486375450657, "grad_norm": 0.423010067620721, "learning_rate": 2e-05, "loss": 5.4145, "step": 3798 }, { "epoch": 0.12740840110673263, "grad_norm": 0.42479227975894523, "learning_rate": 2e-05, "loss": 5.4556, "step": 3799 }, { "epoch": 0.12744193845895865, "grad_norm": 0.4209594215026914, "learning_rate": 2e-05, "loss": 5.6792, "step": 3800 }, { "epoch": 0.1274754758111847, "grad_norm": 0.40554104675279595, "learning_rate": 2e-05, "loss": 5.33, "step": 3801 }, { "epoch": 0.12750901316341076, "grad_norm": 0.4311144222338694, "learning_rate": 2e-05, "loss": 5.7407, "step": 3802 }, { "epoch": 0.1275425505156368, "grad_norm": 0.4089387935451406, "learning_rate": 2e-05, "loss": 5.4802, "step": 3803 }, { "epoch": 0.12757608786786284, "grad_norm": 0.43618108267083855, "learning_rate": 2e-05, "loss": 5.5676, "step": 3804 }, { "epoch": 0.12760962522008887, "grad_norm": 0.4219878390854274, "learning_rate": 2e-05, "loss": 5.6549, "step": 3805 }, { "epoch": 0.12764316257231492, "grad_norm": 0.43687517425337163, "learning_rate": 2e-05, "loss": 5.821, "step": 3806 }, { "epoch": 0.12767669992454095, "grad_norm": 0.41252568497179587, "learning_rate": 2e-05, "loss": 5.5979, "step": 3807 }, { "epoch": 0.127710237276767, "grad_norm": 0.43634369216441427, "learning_rate": 2e-05, "loss": 5.5254, "step": 3808 }, { "epoch": 0.12774377462899303, "grad_norm": 0.42156328007270655, "learning_rate": 2e-05, "loss": 5.7642, "step": 3809 }, { "epoch": 0.12777731198121908, "grad_norm": 0.4490443510535036, "learning_rate": 2e-05, "loss": 5.6904, "step": 3810 }, { "epoch": 0.12781084933344514, "grad_norm": 0.4167857318070993, "learning_rate": 2e-05, "loss": 5.4158, "step": 3811 }, { "epoch": 0.12784438668567116, "grad_norm": 0.3971518416435317, "learning_rate": 2e-05, "loss": 5.5444, "step": 3812 }, { "epoch": 0.12787792403789722, "grad_norm": 0.43185433879060425, "learning_rate": 2e-05, "loss": 5.5438, "step": 3813 }, { "epoch": 0.12791146139012324, "grad_norm": 0.4758911058165608, "learning_rate": 2e-05, "loss": 5.7196, "step": 3814 }, { "epoch": 0.1279449987423493, "grad_norm": 0.4340685131880768, "learning_rate": 2e-05, "loss": 5.6172, "step": 3815 }, { "epoch": 0.12797853609457532, "grad_norm": 0.4112774357578009, "learning_rate": 2e-05, "loss": 5.5041, "step": 3816 }, { "epoch": 0.12801207344680138, "grad_norm": 0.40397808054099704, "learning_rate": 2e-05, "loss": 5.5698, "step": 3817 }, { "epoch": 0.1280456107990274, "grad_norm": 0.410604135445546, "learning_rate": 2e-05, "loss": 5.5724, "step": 3818 }, { "epoch": 0.12807914815125346, "grad_norm": 0.4402659884089573, "learning_rate": 2e-05, "loss": 5.6578, "step": 3819 }, { "epoch": 0.1281126855034795, "grad_norm": 0.4696227258701683, "learning_rate": 2e-05, "loss": 5.6656, "step": 3820 }, { "epoch": 0.12814622285570554, "grad_norm": 0.42024095982682413, "learning_rate": 2e-05, "loss": 5.5089, "step": 3821 }, { "epoch": 0.1281797602079316, "grad_norm": 0.4451459028042263, "learning_rate": 2e-05, "loss": 5.6933, "step": 3822 }, { "epoch": 0.12821329756015762, "grad_norm": 0.4407446094287422, "learning_rate": 2e-05, "loss": 5.4882, "step": 3823 }, { "epoch": 0.12824683491238367, "grad_norm": 0.4016799289614767, "learning_rate": 2e-05, "loss": 5.6797, "step": 3824 }, { "epoch": 0.1282803722646097, "grad_norm": 0.45771114373672306, "learning_rate": 2e-05, "loss": 5.6866, "step": 3825 }, { "epoch": 0.12831390961683575, "grad_norm": 0.46802642504629055, "learning_rate": 2e-05, "loss": 5.6128, "step": 3826 }, { "epoch": 0.12834744696906178, "grad_norm": 0.47665972081844565, "learning_rate": 2e-05, "loss": 5.578, "step": 3827 }, { "epoch": 0.12838098432128783, "grad_norm": 0.44108879759349495, "learning_rate": 2e-05, "loss": 5.6578, "step": 3828 }, { "epoch": 0.1284145216735139, "grad_norm": 0.44875174688244784, "learning_rate": 2e-05, "loss": 5.5372, "step": 3829 }, { "epoch": 0.1284480590257399, "grad_norm": 0.48687693184699493, "learning_rate": 2e-05, "loss": 5.3378, "step": 3830 }, { "epoch": 0.12848159637796597, "grad_norm": 0.4162976237309057, "learning_rate": 2e-05, "loss": 5.6868, "step": 3831 }, { "epoch": 0.128515133730192, "grad_norm": 0.4317241077120028, "learning_rate": 2e-05, "loss": 5.6602, "step": 3832 }, { "epoch": 0.12854867108241805, "grad_norm": 0.4515555069904766, "learning_rate": 2e-05, "loss": 5.5836, "step": 3833 }, { "epoch": 0.12858220843464407, "grad_norm": 0.4073880593360406, "learning_rate": 2e-05, "loss": 5.6315, "step": 3834 }, { "epoch": 0.12861574578687013, "grad_norm": 0.44045828641899076, "learning_rate": 2e-05, "loss": 5.6737, "step": 3835 }, { "epoch": 0.12864928313909618, "grad_norm": 0.42401027496951194, "learning_rate": 2e-05, "loss": 5.4596, "step": 3836 }, { "epoch": 0.1286828204913222, "grad_norm": 0.4233082536076486, "learning_rate": 2e-05, "loss": 5.7484, "step": 3837 }, { "epoch": 0.12871635784354826, "grad_norm": 0.41865063810910474, "learning_rate": 2e-05, "loss": 5.4514, "step": 3838 }, { "epoch": 0.1287498951957743, "grad_norm": 0.4714406218925624, "learning_rate": 2e-05, "loss": 5.8558, "step": 3839 }, { "epoch": 0.12878343254800034, "grad_norm": 0.45616936806658154, "learning_rate": 2e-05, "loss": 5.5807, "step": 3840 }, { "epoch": 0.12881696990022637, "grad_norm": 0.4061008038148288, "learning_rate": 2e-05, "loss": 5.5422, "step": 3841 }, { "epoch": 0.12885050725245242, "grad_norm": 0.44218150197559625, "learning_rate": 2e-05, "loss": 5.4697, "step": 3842 }, { "epoch": 0.12888404460467845, "grad_norm": 0.3983946200223743, "learning_rate": 2e-05, "loss": 5.4178, "step": 3843 }, { "epoch": 0.1289175819569045, "grad_norm": 0.41878337188312753, "learning_rate": 2e-05, "loss": 5.7221, "step": 3844 }, { "epoch": 0.12895111930913056, "grad_norm": 0.4538965896003936, "learning_rate": 2e-05, "loss": 5.42, "step": 3845 }, { "epoch": 0.12898465666135658, "grad_norm": 0.40242401722419785, "learning_rate": 2e-05, "loss": 5.6774, "step": 3846 }, { "epoch": 0.12901819401358264, "grad_norm": 0.42244381053547986, "learning_rate": 2e-05, "loss": 5.5628, "step": 3847 }, { "epoch": 0.12905173136580866, "grad_norm": 0.39329952101481114, "learning_rate": 2e-05, "loss": 5.6954, "step": 3848 }, { "epoch": 0.12908526871803472, "grad_norm": 0.43761314948204955, "learning_rate": 2e-05, "loss": 5.4052, "step": 3849 }, { "epoch": 0.12911880607026074, "grad_norm": 0.4135597330174072, "learning_rate": 2e-05, "loss": 5.54, "step": 3850 }, { "epoch": 0.1291523434224868, "grad_norm": 0.4046815494641093, "learning_rate": 2e-05, "loss": 5.5724, "step": 3851 }, { "epoch": 0.12918588077471282, "grad_norm": 0.4237213987455574, "learning_rate": 2e-05, "loss": 5.5878, "step": 3852 }, { "epoch": 0.12921941812693888, "grad_norm": 0.4070949709094079, "learning_rate": 2e-05, "loss": 5.6593, "step": 3853 }, { "epoch": 0.12925295547916493, "grad_norm": 0.44299380751037143, "learning_rate": 2e-05, "loss": 5.7758, "step": 3854 }, { "epoch": 0.12928649283139096, "grad_norm": 0.416580849896157, "learning_rate": 2e-05, "loss": 5.5492, "step": 3855 }, { "epoch": 0.129320030183617, "grad_norm": 0.42721175106048936, "learning_rate": 2e-05, "loss": 5.5214, "step": 3856 }, { "epoch": 0.12935356753584304, "grad_norm": 0.40811926243934865, "learning_rate": 2e-05, "loss": 5.5187, "step": 3857 }, { "epoch": 0.1293871048880691, "grad_norm": 0.42298672470364923, "learning_rate": 2e-05, "loss": 5.5342, "step": 3858 }, { "epoch": 0.12942064224029512, "grad_norm": 0.40990486409899113, "learning_rate": 2e-05, "loss": 5.4827, "step": 3859 }, { "epoch": 0.12945417959252117, "grad_norm": 0.4137367127747551, "learning_rate": 2e-05, "loss": 5.4059, "step": 3860 }, { "epoch": 0.1294877169447472, "grad_norm": 0.41604277315487526, "learning_rate": 2e-05, "loss": 5.3578, "step": 3861 }, { "epoch": 0.12952125429697325, "grad_norm": 0.427330289107223, "learning_rate": 2e-05, "loss": 5.5193, "step": 3862 }, { "epoch": 0.1295547916491993, "grad_norm": 0.41872188676468675, "learning_rate": 2e-05, "loss": 5.483, "step": 3863 }, { "epoch": 0.12958832900142533, "grad_norm": 0.4442397288282782, "learning_rate": 2e-05, "loss": 5.5509, "step": 3864 }, { "epoch": 0.1296218663536514, "grad_norm": 0.40550112225890383, "learning_rate": 2e-05, "loss": 5.5809, "step": 3865 }, { "epoch": 0.12965540370587741, "grad_norm": 0.41926753593553084, "learning_rate": 2e-05, "loss": 5.6484, "step": 3866 }, { "epoch": 0.12968894105810347, "grad_norm": 0.4152468399538785, "learning_rate": 2e-05, "loss": 5.383, "step": 3867 }, { "epoch": 0.1297224784103295, "grad_norm": 0.4387731007024254, "learning_rate": 2e-05, "loss": 5.7789, "step": 3868 }, { "epoch": 0.12975601576255555, "grad_norm": 0.41510500400091355, "learning_rate": 2e-05, "loss": 5.623, "step": 3869 }, { "epoch": 0.12978955311478158, "grad_norm": 0.41031250587813917, "learning_rate": 2e-05, "loss": 5.6993, "step": 3870 }, { "epoch": 0.12982309046700763, "grad_norm": 0.43164726179030083, "learning_rate": 2e-05, "loss": 5.697, "step": 3871 }, { "epoch": 0.12985662781923368, "grad_norm": 0.44928949388206074, "learning_rate": 2e-05, "loss": 5.461, "step": 3872 }, { "epoch": 0.1298901651714597, "grad_norm": 0.41043975129637456, "learning_rate": 2e-05, "loss": 5.5035, "step": 3873 }, { "epoch": 0.12992370252368576, "grad_norm": 0.413540883231648, "learning_rate": 2e-05, "loss": 5.5537, "step": 3874 }, { "epoch": 0.1299572398759118, "grad_norm": 0.4409458385040428, "learning_rate": 2e-05, "loss": 5.4717, "step": 3875 }, { "epoch": 0.12999077722813784, "grad_norm": 0.3936331071771591, "learning_rate": 2e-05, "loss": 5.5085, "step": 3876 }, { "epoch": 0.13002431458036387, "grad_norm": 0.4014094360031237, "learning_rate": 2e-05, "loss": 5.637, "step": 3877 }, { "epoch": 0.13005785193258992, "grad_norm": 0.4458258202831046, "learning_rate": 2e-05, "loss": 5.6369, "step": 3878 }, { "epoch": 0.13009138928481595, "grad_norm": 0.39513571644897544, "learning_rate": 2e-05, "loss": 5.4802, "step": 3879 }, { "epoch": 0.130124926637042, "grad_norm": 0.41936948703685, "learning_rate": 2e-05, "loss": 5.6704, "step": 3880 }, { "epoch": 0.13015846398926806, "grad_norm": 0.41080386756901627, "learning_rate": 2e-05, "loss": 5.5318, "step": 3881 }, { "epoch": 0.13019200134149408, "grad_norm": 0.4214551760595563, "learning_rate": 2e-05, "loss": 5.5255, "step": 3882 }, { "epoch": 0.13022553869372014, "grad_norm": 0.4278709289892493, "learning_rate": 2e-05, "loss": 5.6245, "step": 3883 }, { "epoch": 0.13025907604594616, "grad_norm": 0.4617113341323326, "learning_rate": 2e-05, "loss": 5.3928, "step": 3884 }, { "epoch": 0.13029261339817222, "grad_norm": 0.4329735647829344, "learning_rate": 2e-05, "loss": 5.62, "step": 3885 }, { "epoch": 0.13032615075039825, "grad_norm": 0.4252845067183316, "learning_rate": 2e-05, "loss": 5.4891, "step": 3886 }, { "epoch": 0.1303596881026243, "grad_norm": 0.45145833003995584, "learning_rate": 2e-05, "loss": 5.5628, "step": 3887 }, { "epoch": 0.13039322545485035, "grad_norm": 0.43256074861352084, "learning_rate": 2e-05, "loss": 5.6079, "step": 3888 }, { "epoch": 0.13042676280707638, "grad_norm": 0.4238011800218643, "learning_rate": 2e-05, "loss": 5.3735, "step": 3889 }, { "epoch": 0.13046030015930243, "grad_norm": 0.427209776649263, "learning_rate": 2e-05, "loss": 5.7401, "step": 3890 }, { "epoch": 0.13049383751152846, "grad_norm": 0.4640914379002373, "learning_rate": 2e-05, "loss": 5.4887, "step": 3891 }, { "epoch": 0.13052737486375451, "grad_norm": 0.43584632125726935, "learning_rate": 2e-05, "loss": 5.5404, "step": 3892 }, { "epoch": 0.13056091221598054, "grad_norm": 0.3995811375592253, "learning_rate": 2e-05, "loss": 5.5305, "step": 3893 }, { "epoch": 0.1305944495682066, "grad_norm": 0.47196454281029154, "learning_rate": 2e-05, "loss": 5.5268, "step": 3894 }, { "epoch": 0.13062798692043262, "grad_norm": 0.43905355982722194, "learning_rate": 2e-05, "loss": 5.557, "step": 3895 }, { "epoch": 0.13066152427265867, "grad_norm": 0.44457492434698503, "learning_rate": 2e-05, "loss": 5.5405, "step": 3896 }, { "epoch": 0.13069506162488473, "grad_norm": 0.4972687310378915, "learning_rate": 2e-05, "loss": 5.5045, "step": 3897 }, { "epoch": 0.13072859897711075, "grad_norm": 0.4478343011169846, "learning_rate": 2e-05, "loss": 5.4286, "step": 3898 }, { "epoch": 0.1307621363293368, "grad_norm": 0.4337331743110395, "learning_rate": 2e-05, "loss": 5.6498, "step": 3899 }, { "epoch": 0.13079567368156284, "grad_norm": 0.4672900301358029, "learning_rate": 2e-05, "loss": 5.5774, "step": 3900 }, { "epoch": 0.1308292110337889, "grad_norm": 0.43835596711141317, "learning_rate": 2e-05, "loss": 5.4445, "step": 3901 }, { "epoch": 0.13086274838601492, "grad_norm": 0.42801814106181635, "learning_rate": 2e-05, "loss": 5.551, "step": 3902 }, { "epoch": 0.13089628573824097, "grad_norm": 0.5026850548773071, "learning_rate": 2e-05, "loss": 5.5857, "step": 3903 }, { "epoch": 0.130929823090467, "grad_norm": 0.44715496299407, "learning_rate": 2e-05, "loss": 5.6244, "step": 3904 }, { "epoch": 0.13096336044269305, "grad_norm": 0.4322580169571599, "learning_rate": 2e-05, "loss": 5.5404, "step": 3905 }, { "epoch": 0.1309968977949191, "grad_norm": 0.45903523553820236, "learning_rate": 2e-05, "loss": 5.4885, "step": 3906 }, { "epoch": 0.13103043514714513, "grad_norm": 0.4420379085111403, "learning_rate": 2e-05, "loss": 5.3425, "step": 3907 }, { "epoch": 0.13106397249937118, "grad_norm": 0.4541005281601501, "learning_rate": 2e-05, "loss": 5.4239, "step": 3908 }, { "epoch": 0.1310975098515972, "grad_norm": 0.40415061755401144, "learning_rate": 2e-05, "loss": 5.6089, "step": 3909 }, { "epoch": 0.13113104720382326, "grad_norm": 0.46568455397856284, "learning_rate": 2e-05, "loss": 5.5874, "step": 3910 }, { "epoch": 0.1311645845560493, "grad_norm": 0.4410148091373086, "learning_rate": 2e-05, "loss": 5.6415, "step": 3911 }, { "epoch": 0.13119812190827534, "grad_norm": 0.4051888463761353, "learning_rate": 2e-05, "loss": 5.5653, "step": 3912 }, { "epoch": 0.13123165926050137, "grad_norm": 0.4260784830403969, "learning_rate": 2e-05, "loss": 5.7706, "step": 3913 }, { "epoch": 0.13126519661272743, "grad_norm": 0.4432051694217987, "learning_rate": 2e-05, "loss": 5.8677, "step": 3914 }, { "epoch": 0.13129873396495348, "grad_norm": 0.4151171659277242, "learning_rate": 2e-05, "loss": 5.5165, "step": 3915 }, { "epoch": 0.1313322713171795, "grad_norm": 0.4199814495889765, "learning_rate": 2e-05, "loss": 5.3929, "step": 3916 }, { "epoch": 0.13136580866940556, "grad_norm": 0.4168498564443852, "learning_rate": 2e-05, "loss": 5.6056, "step": 3917 }, { "epoch": 0.13139934602163159, "grad_norm": 0.4344513757252489, "learning_rate": 2e-05, "loss": 5.4696, "step": 3918 }, { "epoch": 0.13143288337385764, "grad_norm": 0.4207785830333021, "learning_rate": 2e-05, "loss": 5.5331, "step": 3919 }, { "epoch": 0.13146642072608367, "grad_norm": 0.42317784565157646, "learning_rate": 2e-05, "loss": 5.7156, "step": 3920 }, { "epoch": 0.13149995807830972, "grad_norm": 0.4143273329067842, "learning_rate": 2e-05, "loss": 5.6355, "step": 3921 }, { "epoch": 0.13153349543053575, "grad_norm": 0.4362223989393561, "learning_rate": 2e-05, "loss": 5.5225, "step": 3922 }, { "epoch": 0.1315670327827618, "grad_norm": 0.4406869573790508, "learning_rate": 2e-05, "loss": 5.3455, "step": 3923 }, { "epoch": 0.13160057013498785, "grad_norm": 0.44045617082187655, "learning_rate": 2e-05, "loss": 5.731, "step": 3924 }, { "epoch": 0.13163410748721388, "grad_norm": 0.44105689618716415, "learning_rate": 2e-05, "loss": 5.6431, "step": 3925 }, { "epoch": 0.13166764483943993, "grad_norm": 0.4714872678844351, "learning_rate": 2e-05, "loss": 5.2872, "step": 3926 }, { "epoch": 0.13170118219166596, "grad_norm": 0.46308875226773144, "learning_rate": 2e-05, "loss": 5.8973, "step": 3927 }, { "epoch": 0.13173471954389201, "grad_norm": 0.4481377339149095, "learning_rate": 2e-05, "loss": 5.6782, "step": 3928 }, { "epoch": 0.13176825689611804, "grad_norm": 0.45974215404836793, "learning_rate": 2e-05, "loss": 5.7374, "step": 3929 }, { "epoch": 0.1318017942483441, "grad_norm": 0.45664392542411564, "learning_rate": 2e-05, "loss": 5.4674, "step": 3930 }, { "epoch": 0.13183533160057012, "grad_norm": 0.40083594720271065, "learning_rate": 2e-05, "loss": 5.4647, "step": 3931 }, { "epoch": 0.13186886895279618, "grad_norm": 0.46247214784922086, "learning_rate": 2e-05, "loss": 5.5595, "step": 3932 }, { "epoch": 0.13190240630502223, "grad_norm": 0.41473102295747427, "learning_rate": 2e-05, "loss": 5.5314, "step": 3933 }, { "epoch": 0.13193594365724826, "grad_norm": 0.4110767905735606, "learning_rate": 2e-05, "loss": 5.4897, "step": 3934 }, { "epoch": 0.1319694810094743, "grad_norm": 0.40649729796184575, "learning_rate": 2e-05, "loss": 5.5916, "step": 3935 }, { "epoch": 0.13200301836170034, "grad_norm": 0.43753440511142705, "learning_rate": 2e-05, "loss": 5.3947, "step": 3936 }, { "epoch": 0.1320365557139264, "grad_norm": 0.4460162000662764, "learning_rate": 2e-05, "loss": 5.5303, "step": 3937 }, { "epoch": 0.13207009306615242, "grad_norm": 0.42544967863096933, "learning_rate": 2e-05, "loss": 5.6165, "step": 3938 }, { "epoch": 0.13210363041837847, "grad_norm": 0.4398808069758069, "learning_rate": 2e-05, "loss": 5.674, "step": 3939 }, { "epoch": 0.13213716777060452, "grad_norm": 0.4501325240666828, "learning_rate": 2e-05, "loss": 5.616, "step": 3940 }, { "epoch": 0.13217070512283055, "grad_norm": 0.41502114686075586, "learning_rate": 2e-05, "loss": 5.6461, "step": 3941 }, { "epoch": 0.1322042424750566, "grad_norm": 0.4362374655416234, "learning_rate": 2e-05, "loss": 5.533, "step": 3942 }, { "epoch": 0.13223777982728263, "grad_norm": 0.4224763405673967, "learning_rate": 2e-05, "loss": 5.5522, "step": 3943 }, { "epoch": 0.13227131717950869, "grad_norm": 0.43293267214405107, "learning_rate": 2e-05, "loss": 5.6325, "step": 3944 }, { "epoch": 0.1323048545317347, "grad_norm": 0.4351393572335249, "learning_rate": 2e-05, "loss": 5.2625, "step": 3945 }, { "epoch": 0.13233839188396077, "grad_norm": 0.44183539037728997, "learning_rate": 2e-05, "loss": 5.8155, "step": 3946 }, { "epoch": 0.1323719292361868, "grad_norm": 0.42605796866139345, "learning_rate": 2e-05, "loss": 5.4646, "step": 3947 }, { "epoch": 0.13240546658841285, "grad_norm": 0.47125209150516645, "learning_rate": 2e-05, "loss": 5.5433, "step": 3948 }, { "epoch": 0.1324390039406389, "grad_norm": 0.471216934575364, "learning_rate": 2e-05, "loss": 5.5987, "step": 3949 }, { "epoch": 0.13247254129286493, "grad_norm": 0.4060102999075224, "learning_rate": 2e-05, "loss": 5.4604, "step": 3950 }, { "epoch": 0.13250607864509098, "grad_norm": 0.45362203629130715, "learning_rate": 2e-05, "loss": 5.5424, "step": 3951 }, { "epoch": 0.132539615997317, "grad_norm": 0.4567766696718289, "learning_rate": 2e-05, "loss": 5.5564, "step": 3952 }, { "epoch": 0.13257315334954306, "grad_norm": 0.4332688191413068, "learning_rate": 2e-05, "loss": 5.623, "step": 3953 }, { "epoch": 0.1326066907017691, "grad_norm": 0.40422849005262895, "learning_rate": 2e-05, "loss": 5.647, "step": 3954 }, { "epoch": 0.13264022805399514, "grad_norm": 0.4380601205754097, "learning_rate": 2e-05, "loss": 5.3691, "step": 3955 }, { "epoch": 0.13267376540622117, "grad_norm": 0.41591885893848896, "learning_rate": 2e-05, "loss": 5.7222, "step": 3956 }, { "epoch": 0.13270730275844722, "grad_norm": 0.4184226074542852, "learning_rate": 2e-05, "loss": 5.5692, "step": 3957 }, { "epoch": 0.13274084011067327, "grad_norm": 0.40363337927242365, "learning_rate": 2e-05, "loss": 5.3913, "step": 3958 }, { "epoch": 0.1327743774628993, "grad_norm": 0.45790972350925324, "learning_rate": 2e-05, "loss": 5.4435, "step": 3959 }, { "epoch": 0.13280791481512536, "grad_norm": 0.4660902682400704, "learning_rate": 2e-05, "loss": 5.6971, "step": 3960 }, { "epoch": 0.13284145216735138, "grad_norm": 0.417459705547088, "learning_rate": 2e-05, "loss": 5.5242, "step": 3961 }, { "epoch": 0.13287498951957744, "grad_norm": 0.4111739987086291, "learning_rate": 2e-05, "loss": 5.5905, "step": 3962 }, { "epoch": 0.13290852687180346, "grad_norm": 0.4373155755445841, "learning_rate": 2e-05, "loss": 5.4487, "step": 3963 }, { "epoch": 0.13294206422402952, "grad_norm": 0.4375764456432511, "learning_rate": 2e-05, "loss": 5.5019, "step": 3964 }, { "epoch": 0.13297560157625554, "grad_norm": 0.4178313141655196, "learning_rate": 2e-05, "loss": 5.7654, "step": 3965 }, { "epoch": 0.1330091389284816, "grad_norm": 0.4216563761529264, "learning_rate": 2e-05, "loss": 5.4733, "step": 3966 }, { "epoch": 0.13304267628070765, "grad_norm": 0.45441050833431496, "learning_rate": 2e-05, "loss": 5.5404, "step": 3967 }, { "epoch": 0.13307621363293368, "grad_norm": 0.4128272970174033, "learning_rate": 2e-05, "loss": 5.5775, "step": 3968 }, { "epoch": 0.13310975098515973, "grad_norm": 0.4127389426457412, "learning_rate": 2e-05, "loss": 5.4441, "step": 3969 }, { "epoch": 0.13314328833738576, "grad_norm": 0.44514119073375036, "learning_rate": 2e-05, "loss": 5.526, "step": 3970 }, { "epoch": 0.1331768256896118, "grad_norm": 0.42509294792688024, "learning_rate": 2e-05, "loss": 5.5082, "step": 3971 }, { "epoch": 0.13321036304183784, "grad_norm": 0.41562886312129366, "learning_rate": 2e-05, "loss": 5.6179, "step": 3972 }, { "epoch": 0.1332439003940639, "grad_norm": 0.4425295903163335, "learning_rate": 2e-05, "loss": 5.5764, "step": 3973 }, { "epoch": 0.13327743774628992, "grad_norm": 0.41945257027476174, "learning_rate": 2e-05, "loss": 5.6296, "step": 3974 }, { "epoch": 0.13331097509851597, "grad_norm": 0.41740489542334, "learning_rate": 2e-05, "loss": 5.5532, "step": 3975 }, { "epoch": 0.13334451245074203, "grad_norm": 0.4338012945284523, "learning_rate": 2e-05, "loss": 5.3774, "step": 3976 }, { "epoch": 0.13337804980296805, "grad_norm": 0.44267227374466595, "learning_rate": 2e-05, "loss": 5.3153, "step": 3977 }, { "epoch": 0.1334115871551941, "grad_norm": 0.41617723069874724, "learning_rate": 2e-05, "loss": 5.5394, "step": 3978 }, { "epoch": 0.13344512450742013, "grad_norm": 0.4160207147136461, "learning_rate": 2e-05, "loss": 5.4819, "step": 3979 }, { "epoch": 0.13347866185964619, "grad_norm": 0.42259461016136246, "learning_rate": 2e-05, "loss": 5.5673, "step": 3980 }, { "epoch": 0.1335121992118722, "grad_norm": 0.4191749029958105, "learning_rate": 2e-05, "loss": 5.5057, "step": 3981 }, { "epoch": 0.13354573656409827, "grad_norm": 0.42550166011297286, "learning_rate": 2e-05, "loss": 5.6592, "step": 3982 }, { "epoch": 0.1335792739163243, "grad_norm": 0.3952102995828304, "learning_rate": 2e-05, "loss": 5.5854, "step": 3983 }, { "epoch": 0.13361281126855035, "grad_norm": 0.4614629753632776, "learning_rate": 2e-05, "loss": 5.5429, "step": 3984 }, { "epoch": 0.1336463486207764, "grad_norm": 0.409023143243583, "learning_rate": 2e-05, "loss": 5.5464, "step": 3985 }, { "epoch": 0.13367988597300243, "grad_norm": 0.4065092198664392, "learning_rate": 2e-05, "loss": 5.3948, "step": 3986 }, { "epoch": 0.13371342332522848, "grad_norm": 0.3931695925203852, "learning_rate": 2e-05, "loss": 5.7547, "step": 3987 }, { "epoch": 0.1337469606774545, "grad_norm": 0.4074387553693259, "learning_rate": 2e-05, "loss": 5.7527, "step": 3988 }, { "epoch": 0.13378049802968056, "grad_norm": 0.409736225821174, "learning_rate": 2e-05, "loss": 5.5676, "step": 3989 }, { "epoch": 0.1338140353819066, "grad_norm": 0.3968471139227984, "learning_rate": 2e-05, "loss": 5.6983, "step": 3990 }, { "epoch": 0.13384757273413264, "grad_norm": 0.4169293413418156, "learning_rate": 2e-05, "loss": 5.4342, "step": 3991 }, { "epoch": 0.1338811100863587, "grad_norm": 0.42416134471446215, "learning_rate": 2e-05, "loss": 5.5498, "step": 3992 }, { "epoch": 0.13391464743858472, "grad_norm": 0.4012496900172955, "learning_rate": 2e-05, "loss": 5.4816, "step": 3993 }, { "epoch": 0.13394818479081078, "grad_norm": 0.41244621630436135, "learning_rate": 2e-05, "loss": 5.6007, "step": 3994 }, { "epoch": 0.1339817221430368, "grad_norm": 0.4178577902442295, "learning_rate": 2e-05, "loss": 5.3801, "step": 3995 }, { "epoch": 0.13401525949526286, "grad_norm": 0.44455959802345935, "learning_rate": 2e-05, "loss": 5.5343, "step": 3996 }, { "epoch": 0.13404879684748888, "grad_norm": 0.3945201007268363, "learning_rate": 2e-05, "loss": 5.4674, "step": 3997 }, { "epoch": 0.13408233419971494, "grad_norm": 0.4469537714829672, "learning_rate": 2e-05, "loss": 5.7262, "step": 3998 }, { "epoch": 0.13411587155194096, "grad_norm": 0.4232015989933779, "learning_rate": 2e-05, "loss": 5.6971, "step": 3999 }, { "epoch": 0.13414940890416702, "grad_norm": 0.39441458175522176, "learning_rate": 2e-05, "loss": 5.6601, "step": 4000 }, { "epoch": 0.13418294625639307, "grad_norm": 0.4384141926698213, "learning_rate": 2e-05, "loss": 5.4635, "step": 4001 }, { "epoch": 0.1342164836086191, "grad_norm": 0.4434260829084072, "learning_rate": 2e-05, "loss": 5.6094, "step": 4002 }, { "epoch": 0.13425002096084515, "grad_norm": 0.4360933863906353, "learning_rate": 2e-05, "loss": 5.5837, "step": 4003 }, { "epoch": 0.13428355831307118, "grad_norm": 0.41692203672540695, "learning_rate": 2e-05, "loss": 5.4997, "step": 4004 }, { "epoch": 0.13431709566529723, "grad_norm": 0.4343207575859585, "learning_rate": 2e-05, "loss": 5.6437, "step": 4005 }, { "epoch": 0.13435063301752326, "grad_norm": 0.40397321120149415, "learning_rate": 2e-05, "loss": 5.5053, "step": 4006 }, { "epoch": 0.1343841703697493, "grad_norm": 0.4286070970493671, "learning_rate": 2e-05, "loss": 5.5859, "step": 4007 }, { "epoch": 0.13441770772197534, "grad_norm": 0.44784958085355764, "learning_rate": 2e-05, "loss": 5.5954, "step": 4008 }, { "epoch": 0.1344512450742014, "grad_norm": 0.43214837640017434, "learning_rate": 2e-05, "loss": 5.6902, "step": 4009 }, { "epoch": 0.13448478242642745, "grad_norm": 0.4310612550095323, "learning_rate": 2e-05, "loss": 5.4939, "step": 4010 }, { "epoch": 0.13451831977865347, "grad_norm": 0.4271413868397642, "learning_rate": 2e-05, "loss": 5.5518, "step": 4011 }, { "epoch": 0.13455185713087953, "grad_norm": 0.42006259967777965, "learning_rate": 2e-05, "loss": 5.5624, "step": 4012 }, { "epoch": 0.13458539448310555, "grad_norm": 0.4765786049000855, "learning_rate": 2e-05, "loss": 5.467, "step": 4013 }, { "epoch": 0.1346189318353316, "grad_norm": 0.3946110751895658, "learning_rate": 2e-05, "loss": 5.629, "step": 4014 }, { "epoch": 0.13465246918755763, "grad_norm": 0.43154311093319425, "learning_rate": 2e-05, "loss": 5.5828, "step": 4015 }, { "epoch": 0.1346860065397837, "grad_norm": 0.43057648489812783, "learning_rate": 2e-05, "loss": 5.6635, "step": 4016 }, { "epoch": 0.1347195438920097, "grad_norm": 0.4351632775567595, "learning_rate": 2e-05, "loss": 5.5167, "step": 4017 }, { "epoch": 0.13475308124423577, "grad_norm": 0.3876046157157404, "learning_rate": 2e-05, "loss": 5.4023, "step": 4018 }, { "epoch": 0.13478661859646182, "grad_norm": 0.4175912977962918, "learning_rate": 2e-05, "loss": 5.5435, "step": 4019 }, { "epoch": 0.13482015594868785, "grad_norm": 0.4269135732946537, "learning_rate": 2e-05, "loss": 5.6065, "step": 4020 }, { "epoch": 0.1348536933009139, "grad_norm": 0.4282331088723087, "learning_rate": 2e-05, "loss": 5.4746, "step": 4021 }, { "epoch": 0.13488723065313993, "grad_norm": 0.41224815072836063, "learning_rate": 2e-05, "loss": 5.743, "step": 4022 }, { "epoch": 0.13492076800536598, "grad_norm": 0.4296625681025886, "learning_rate": 2e-05, "loss": 5.5066, "step": 4023 }, { "epoch": 0.134954305357592, "grad_norm": 0.42914434604996127, "learning_rate": 2e-05, "loss": 5.2766, "step": 4024 }, { "epoch": 0.13498784270981806, "grad_norm": 0.43402141365677444, "learning_rate": 2e-05, "loss": 5.3566, "step": 4025 }, { "epoch": 0.1350213800620441, "grad_norm": 0.41454286413572405, "learning_rate": 2e-05, "loss": 5.5704, "step": 4026 }, { "epoch": 0.13505491741427014, "grad_norm": 0.44158452399895187, "learning_rate": 2e-05, "loss": 5.527, "step": 4027 }, { "epoch": 0.1350884547664962, "grad_norm": 0.41220204228640395, "learning_rate": 2e-05, "loss": 5.3853, "step": 4028 }, { "epoch": 0.13512199211872222, "grad_norm": 0.42505655929762975, "learning_rate": 2e-05, "loss": 5.3624, "step": 4029 }, { "epoch": 0.13515552947094828, "grad_norm": 0.40739674973128426, "learning_rate": 2e-05, "loss": 5.84, "step": 4030 }, { "epoch": 0.1351890668231743, "grad_norm": 0.4082806155672756, "learning_rate": 2e-05, "loss": 5.5467, "step": 4031 }, { "epoch": 0.13522260417540036, "grad_norm": 0.4032681438807888, "learning_rate": 2e-05, "loss": 5.432, "step": 4032 }, { "epoch": 0.13525614152762638, "grad_norm": 0.4710191766105689, "learning_rate": 2e-05, "loss": 5.5719, "step": 4033 }, { "epoch": 0.13528967887985244, "grad_norm": 0.43460790038916025, "learning_rate": 2e-05, "loss": 5.6016, "step": 4034 }, { "epoch": 0.13532321623207846, "grad_norm": 0.4002686109998604, "learning_rate": 2e-05, "loss": 5.4882, "step": 4035 }, { "epoch": 0.13535675358430452, "grad_norm": 0.41653887085488966, "learning_rate": 2e-05, "loss": 5.6942, "step": 4036 }, { "epoch": 0.13539029093653057, "grad_norm": 0.4070153261744971, "learning_rate": 2e-05, "loss": 5.5707, "step": 4037 }, { "epoch": 0.1354238282887566, "grad_norm": 0.4057741533600638, "learning_rate": 2e-05, "loss": 5.6718, "step": 4038 }, { "epoch": 0.13545736564098265, "grad_norm": 0.48429978289244047, "learning_rate": 2e-05, "loss": 5.5506, "step": 4039 }, { "epoch": 0.13549090299320868, "grad_norm": 0.4454385761344747, "learning_rate": 2e-05, "loss": 5.424, "step": 4040 }, { "epoch": 0.13552444034543473, "grad_norm": 0.39403478859192814, "learning_rate": 2e-05, "loss": 5.6188, "step": 4041 }, { "epoch": 0.13555797769766076, "grad_norm": 0.4544962427131167, "learning_rate": 2e-05, "loss": 5.6208, "step": 4042 }, { "epoch": 0.1355915150498868, "grad_norm": 0.4399910737635273, "learning_rate": 2e-05, "loss": 5.5858, "step": 4043 }, { "epoch": 0.13562505240211287, "grad_norm": 0.4285986854730558, "learning_rate": 2e-05, "loss": 5.3774, "step": 4044 }, { "epoch": 0.1356585897543389, "grad_norm": 0.43732887148132504, "learning_rate": 2e-05, "loss": 5.4967, "step": 4045 }, { "epoch": 0.13569212710656495, "grad_norm": 0.4399564417754717, "learning_rate": 2e-05, "loss": 5.4493, "step": 4046 }, { "epoch": 0.13572566445879097, "grad_norm": 0.3994639297635337, "learning_rate": 2e-05, "loss": 5.7011, "step": 4047 }, { "epoch": 0.13575920181101703, "grad_norm": 0.4567162313850098, "learning_rate": 2e-05, "loss": 5.45, "step": 4048 }, { "epoch": 0.13579273916324305, "grad_norm": 0.4157149372870475, "learning_rate": 2e-05, "loss": 5.5664, "step": 4049 }, { "epoch": 0.1358262765154691, "grad_norm": 0.40325416837643185, "learning_rate": 2e-05, "loss": 5.2997, "step": 4050 }, { "epoch": 0.13585981386769513, "grad_norm": 0.4464426062538459, "learning_rate": 2e-05, "loss": 5.5676, "step": 4051 }, { "epoch": 0.1358933512199212, "grad_norm": 0.4339049339259656, "learning_rate": 2e-05, "loss": 5.5153, "step": 4052 }, { "epoch": 0.13592688857214724, "grad_norm": 0.4002688308855332, "learning_rate": 2e-05, "loss": 5.5702, "step": 4053 }, { "epoch": 0.13596042592437327, "grad_norm": 0.4529548447456314, "learning_rate": 2e-05, "loss": 5.5379, "step": 4054 }, { "epoch": 0.13599396327659932, "grad_norm": 0.4301282082175777, "learning_rate": 2e-05, "loss": 5.5825, "step": 4055 }, { "epoch": 0.13602750062882535, "grad_norm": 0.40208640831478165, "learning_rate": 2e-05, "loss": 5.5466, "step": 4056 }, { "epoch": 0.1360610379810514, "grad_norm": 0.4675436153684469, "learning_rate": 2e-05, "loss": 5.5301, "step": 4057 }, { "epoch": 0.13609457533327743, "grad_norm": 0.47832091231616986, "learning_rate": 2e-05, "loss": 5.3619, "step": 4058 }, { "epoch": 0.13612811268550348, "grad_norm": 0.43482376927769745, "learning_rate": 2e-05, "loss": 5.5581, "step": 4059 }, { "epoch": 0.1361616500377295, "grad_norm": 0.4609751445791806, "learning_rate": 2e-05, "loss": 5.3499, "step": 4060 }, { "epoch": 0.13619518738995556, "grad_norm": 0.43590176471443925, "learning_rate": 2e-05, "loss": 5.4753, "step": 4061 }, { "epoch": 0.13622872474218162, "grad_norm": 0.45071367940587215, "learning_rate": 2e-05, "loss": 5.4968, "step": 4062 }, { "epoch": 0.13626226209440764, "grad_norm": 0.40006301689907636, "learning_rate": 2e-05, "loss": 5.4975, "step": 4063 }, { "epoch": 0.1362957994466337, "grad_norm": 0.41724701741490156, "learning_rate": 2e-05, "loss": 5.5162, "step": 4064 }, { "epoch": 0.13632933679885972, "grad_norm": 0.38696171075194585, "learning_rate": 2e-05, "loss": 5.6023, "step": 4065 }, { "epoch": 0.13636287415108578, "grad_norm": 0.4242858812617037, "learning_rate": 2e-05, "loss": 5.362, "step": 4066 }, { "epoch": 0.1363964115033118, "grad_norm": 0.4232543659126156, "learning_rate": 2e-05, "loss": 5.6254, "step": 4067 }, { "epoch": 0.13642994885553786, "grad_norm": 0.4207987758880522, "learning_rate": 2e-05, "loss": 5.5459, "step": 4068 }, { "epoch": 0.13646348620776388, "grad_norm": 0.41941662783751826, "learning_rate": 2e-05, "loss": 5.4089, "step": 4069 }, { "epoch": 0.13649702355998994, "grad_norm": 0.40252946958811975, "learning_rate": 2e-05, "loss": 5.7153, "step": 4070 }, { "epoch": 0.136530560912216, "grad_norm": 0.4461251551533085, "learning_rate": 2e-05, "loss": 5.2393, "step": 4071 }, { "epoch": 0.13656409826444202, "grad_norm": 0.4255346335569431, "learning_rate": 2e-05, "loss": 5.5181, "step": 4072 }, { "epoch": 0.13659763561666807, "grad_norm": 0.44121508124506237, "learning_rate": 2e-05, "loss": 5.7054, "step": 4073 }, { "epoch": 0.1366311729688941, "grad_norm": 0.43314790619801535, "learning_rate": 2e-05, "loss": 5.4825, "step": 4074 }, { "epoch": 0.13666471032112015, "grad_norm": 0.43586720390500777, "learning_rate": 2e-05, "loss": 5.4402, "step": 4075 }, { "epoch": 0.13669824767334618, "grad_norm": 0.42144460007426116, "learning_rate": 2e-05, "loss": 5.5515, "step": 4076 }, { "epoch": 0.13673178502557223, "grad_norm": 0.40902815772250495, "learning_rate": 2e-05, "loss": 5.5228, "step": 4077 }, { "epoch": 0.13676532237779826, "grad_norm": 0.4261726326501328, "learning_rate": 2e-05, "loss": 5.7042, "step": 4078 }, { "epoch": 0.1367988597300243, "grad_norm": 0.4028909637138117, "learning_rate": 2e-05, "loss": 5.5548, "step": 4079 }, { "epoch": 0.13683239708225037, "grad_norm": 0.40920936761977855, "learning_rate": 2e-05, "loss": 5.5935, "step": 4080 }, { "epoch": 0.1368659344344764, "grad_norm": 0.40421756066662035, "learning_rate": 2e-05, "loss": 5.4755, "step": 4081 }, { "epoch": 0.13689947178670245, "grad_norm": 0.38589752463043736, "learning_rate": 2e-05, "loss": 5.455, "step": 4082 }, { "epoch": 0.13693300913892847, "grad_norm": 0.4085718318268207, "learning_rate": 2e-05, "loss": 5.6502, "step": 4083 }, { "epoch": 0.13696654649115453, "grad_norm": 0.414828292419512, "learning_rate": 2e-05, "loss": 5.5642, "step": 4084 }, { "epoch": 0.13700008384338055, "grad_norm": 0.4831262411498296, "learning_rate": 2e-05, "loss": 5.6382, "step": 4085 }, { "epoch": 0.1370336211956066, "grad_norm": 0.4101980578817355, "learning_rate": 2e-05, "loss": 5.6105, "step": 4086 }, { "epoch": 0.13706715854783263, "grad_norm": 0.39460882798028996, "learning_rate": 2e-05, "loss": 5.6587, "step": 4087 }, { "epoch": 0.1371006959000587, "grad_norm": 0.4023359920508546, "learning_rate": 2e-05, "loss": 5.5914, "step": 4088 }, { "epoch": 0.13713423325228474, "grad_norm": 0.4436659602847982, "learning_rate": 2e-05, "loss": 5.6564, "step": 4089 }, { "epoch": 0.13716777060451077, "grad_norm": 0.4064676378582546, "learning_rate": 2e-05, "loss": 5.4412, "step": 4090 }, { "epoch": 0.13720130795673682, "grad_norm": 0.44567711915325947, "learning_rate": 2e-05, "loss": 5.5161, "step": 4091 }, { "epoch": 0.13723484530896285, "grad_norm": 0.4204952617081151, "learning_rate": 2e-05, "loss": 5.5123, "step": 4092 }, { "epoch": 0.1372683826611889, "grad_norm": 0.39980901811717834, "learning_rate": 2e-05, "loss": 5.6418, "step": 4093 }, { "epoch": 0.13730192001341493, "grad_norm": 0.43838481598719115, "learning_rate": 2e-05, "loss": 5.563, "step": 4094 }, { "epoch": 0.13733545736564098, "grad_norm": 0.40469442689162927, "learning_rate": 2e-05, "loss": 5.6283, "step": 4095 }, { "epoch": 0.13736899471786704, "grad_norm": 0.4103780347402408, "learning_rate": 2e-05, "loss": 5.7099, "step": 4096 }, { "epoch": 0.13740253207009306, "grad_norm": 0.42087339033710336, "learning_rate": 2e-05, "loss": 5.6288, "step": 4097 }, { "epoch": 0.13743606942231912, "grad_norm": 0.408254608274534, "learning_rate": 2e-05, "loss": 5.4942, "step": 4098 }, { "epoch": 0.13746960677454514, "grad_norm": 0.42209483185683705, "learning_rate": 2e-05, "loss": 5.6254, "step": 4099 }, { "epoch": 0.1375031441267712, "grad_norm": 0.40365288257036475, "learning_rate": 2e-05, "loss": 5.8686, "step": 4100 }, { "epoch": 0.13753668147899722, "grad_norm": 0.42812208436372834, "learning_rate": 2e-05, "loss": 5.5883, "step": 4101 }, { "epoch": 0.13757021883122328, "grad_norm": 0.40002749870891097, "learning_rate": 2e-05, "loss": 5.4724, "step": 4102 }, { "epoch": 0.1376037561834493, "grad_norm": 0.4130233840680496, "learning_rate": 2e-05, "loss": 5.6349, "step": 4103 }, { "epoch": 0.13763729353567536, "grad_norm": 0.3936259015365099, "learning_rate": 2e-05, "loss": 5.6508, "step": 4104 }, { "epoch": 0.1376708308879014, "grad_norm": 0.42866223606991494, "learning_rate": 2e-05, "loss": 5.6653, "step": 4105 }, { "epoch": 0.13770436824012744, "grad_norm": 0.3898561092735433, "learning_rate": 2e-05, "loss": 5.7448, "step": 4106 }, { "epoch": 0.1377379055923535, "grad_norm": 0.4221810504217265, "learning_rate": 2e-05, "loss": 5.6574, "step": 4107 }, { "epoch": 0.13777144294457952, "grad_norm": 0.39973806868564093, "learning_rate": 2e-05, "loss": 5.6277, "step": 4108 }, { "epoch": 0.13780498029680557, "grad_norm": 0.45110661733002777, "learning_rate": 2e-05, "loss": 5.5606, "step": 4109 }, { "epoch": 0.1378385176490316, "grad_norm": 0.3854952594783482, "learning_rate": 2e-05, "loss": 5.671, "step": 4110 }, { "epoch": 0.13787205500125765, "grad_norm": 0.426053414826633, "learning_rate": 2e-05, "loss": 5.562, "step": 4111 }, { "epoch": 0.13790559235348368, "grad_norm": 0.4256824386752697, "learning_rate": 2e-05, "loss": 5.6559, "step": 4112 }, { "epoch": 0.13793912970570973, "grad_norm": 0.39864740412948596, "learning_rate": 2e-05, "loss": 5.444, "step": 4113 }, { "epoch": 0.1379726670579358, "grad_norm": 0.41513688181885205, "learning_rate": 2e-05, "loss": 5.5796, "step": 4114 }, { "epoch": 0.13800620441016181, "grad_norm": 0.44550857047777587, "learning_rate": 2e-05, "loss": 5.5904, "step": 4115 }, { "epoch": 0.13803974176238787, "grad_norm": 0.4269442938358149, "learning_rate": 2e-05, "loss": 5.525, "step": 4116 }, { "epoch": 0.1380732791146139, "grad_norm": 0.4490085108837496, "learning_rate": 2e-05, "loss": 5.2991, "step": 4117 }, { "epoch": 0.13810681646683995, "grad_norm": 0.4001799925711243, "learning_rate": 2e-05, "loss": 5.76, "step": 4118 }, { "epoch": 0.13814035381906598, "grad_norm": 0.39756872681978433, "learning_rate": 2e-05, "loss": 5.5283, "step": 4119 }, { "epoch": 0.13817389117129203, "grad_norm": 0.4080790418434683, "learning_rate": 2e-05, "loss": 5.5031, "step": 4120 }, { "epoch": 0.13820742852351806, "grad_norm": 0.49810784879556125, "learning_rate": 2e-05, "loss": 5.357, "step": 4121 }, { "epoch": 0.1382409658757441, "grad_norm": 0.4350584587370229, "learning_rate": 2e-05, "loss": 5.4733, "step": 4122 }, { "epoch": 0.13827450322797016, "grad_norm": 0.45487006734096835, "learning_rate": 2e-05, "loss": 5.5783, "step": 4123 }, { "epoch": 0.1383080405801962, "grad_norm": 0.445959377843316, "learning_rate": 2e-05, "loss": 5.7544, "step": 4124 }, { "epoch": 0.13834157793242224, "grad_norm": 0.46329001941769554, "learning_rate": 2e-05, "loss": 5.5931, "step": 4125 }, { "epoch": 0.13837511528464827, "grad_norm": 0.4265293485966475, "learning_rate": 2e-05, "loss": 5.4122, "step": 4126 }, { "epoch": 0.13840865263687432, "grad_norm": 0.4204846363412698, "learning_rate": 2e-05, "loss": 5.5714, "step": 4127 }, { "epoch": 0.13844218998910035, "grad_norm": 0.44497396339572953, "learning_rate": 2e-05, "loss": 5.5497, "step": 4128 }, { "epoch": 0.1384757273413264, "grad_norm": 0.43431940991318063, "learning_rate": 2e-05, "loss": 5.5703, "step": 4129 }, { "epoch": 0.13850926469355243, "grad_norm": 0.44275491628835917, "learning_rate": 2e-05, "loss": 5.4894, "step": 4130 }, { "epoch": 0.13854280204577848, "grad_norm": 0.3886245104259593, "learning_rate": 2e-05, "loss": 5.4791, "step": 4131 }, { "epoch": 0.13857633939800454, "grad_norm": 0.4098650382482774, "learning_rate": 2e-05, "loss": 5.5688, "step": 4132 }, { "epoch": 0.13860987675023057, "grad_norm": 0.4377550198961755, "learning_rate": 2e-05, "loss": 5.7376, "step": 4133 }, { "epoch": 0.13864341410245662, "grad_norm": 0.4014832840850744, "learning_rate": 2e-05, "loss": 5.4937, "step": 4134 }, { "epoch": 0.13867695145468265, "grad_norm": 0.4082856008248664, "learning_rate": 2e-05, "loss": 5.5265, "step": 4135 }, { "epoch": 0.1387104888069087, "grad_norm": 0.41797087772479663, "learning_rate": 2e-05, "loss": 5.5738, "step": 4136 }, { "epoch": 0.13874402615913473, "grad_norm": 0.41123533657386113, "learning_rate": 2e-05, "loss": 5.539, "step": 4137 }, { "epoch": 0.13877756351136078, "grad_norm": 0.41569925734168417, "learning_rate": 2e-05, "loss": 5.5788, "step": 4138 }, { "epoch": 0.1388111008635868, "grad_norm": 0.41298629831012806, "learning_rate": 2e-05, "loss": 5.4352, "step": 4139 }, { "epoch": 0.13884463821581286, "grad_norm": 0.46012278141410917, "learning_rate": 2e-05, "loss": 5.3509, "step": 4140 }, { "epoch": 0.13887817556803891, "grad_norm": 0.4041844196552973, "learning_rate": 2e-05, "loss": 5.5479, "step": 4141 }, { "epoch": 0.13891171292026494, "grad_norm": 0.41289365455664384, "learning_rate": 2e-05, "loss": 5.4971, "step": 4142 }, { "epoch": 0.138945250272491, "grad_norm": 0.43698448488295655, "learning_rate": 2e-05, "loss": 5.3937, "step": 4143 }, { "epoch": 0.13897878762471702, "grad_norm": 0.43104339778250816, "learning_rate": 2e-05, "loss": 5.4077, "step": 4144 }, { "epoch": 0.13901232497694307, "grad_norm": 0.41167699381361283, "learning_rate": 2e-05, "loss": 5.7661, "step": 4145 }, { "epoch": 0.1390458623291691, "grad_norm": 0.41157582846462726, "learning_rate": 2e-05, "loss": 5.5894, "step": 4146 }, { "epoch": 0.13907939968139515, "grad_norm": 0.4217286916775564, "learning_rate": 2e-05, "loss": 5.5378, "step": 4147 }, { "epoch": 0.1391129370336212, "grad_norm": 0.43883667853331837, "learning_rate": 2e-05, "loss": 5.6075, "step": 4148 }, { "epoch": 0.13914647438584724, "grad_norm": 0.4147105328049943, "learning_rate": 2e-05, "loss": 5.5375, "step": 4149 }, { "epoch": 0.1391800117380733, "grad_norm": 0.422130449233148, "learning_rate": 2e-05, "loss": 5.6012, "step": 4150 }, { "epoch": 0.13921354909029932, "grad_norm": 0.42129908110855907, "learning_rate": 2e-05, "loss": 5.4222, "step": 4151 }, { "epoch": 0.13924708644252537, "grad_norm": 0.4080667128210901, "learning_rate": 2e-05, "loss": 5.4431, "step": 4152 }, { "epoch": 0.1392806237947514, "grad_norm": 0.41093075118492517, "learning_rate": 2e-05, "loss": 5.586, "step": 4153 }, { "epoch": 0.13931416114697745, "grad_norm": 0.41043364593887155, "learning_rate": 2e-05, "loss": 5.6374, "step": 4154 }, { "epoch": 0.13934769849920348, "grad_norm": 0.39794433614625196, "learning_rate": 2e-05, "loss": 5.593, "step": 4155 }, { "epoch": 0.13938123585142953, "grad_norm": 0.4475379926452052, "learning_rate": 2e-05, "loss": 5.6122, "step": 4156 }, { "epoch": 0.13941477320365558, "grad_norm": 0.3827758890630505, "learning_rate": 2e-05, "loss": 5.5967, "step": 4157 }, { "epoch": 0.1394483105558816, "grad_norm": 0.40476725860577656, "learning_rate": 2e-05, "loss": 5.2987, "step": 4158 }, { "epoch": 0.13948184790810766, "grad_norm": 0.4274257888056829, "learning_rate": 2e-05, "loss": 5.4028, "step": 4159 }, { "epoch": 0.1395153852603337, "grad_norm": 0.4495085755049117, "learning_rate": 2e-05, "loss": 5.5975, "step": 4160 }, { "epoch": 0.13954892261255974, "grad_norm": 0.38863284695929595, "learning_rate": 2e-05, "loss": 5.5073, "step": 4161 }, { "epoch": 0.13958245996478577, "grad_norm": 0.4298209962638796, "learning_rate": 2e-05, "loss": 5.5606, "step": 4162 }, { "epoch": 0.13961599731701183, "grad_norm": 0.46572738611285674, "learning_rate": 2e-05, "loss": 5.5395, "step": 4163 }, { "epoch": 0.13964953466923785, "grad_norm": 0.411087533493546, "learning_rate": 2e-05, "loss": 5.5968, "step": 4164 }, { "epoch": 0.1396830720214639, "grad_norm": 0.44024545930928916, "learning_rate": 2e-05, "loss": 5.5479, "step": 4165 }, { "epoch": 0.13971660937368996, "grad_norm": 0.42854604138719715, "learning_rate": 2e-05, "loss": 5.703, "step": 4166 }, { "epoch": 0.13975014672591599, "grad_norm": 0.44913179809184756, "learning_rate": 2e-05, "loss": 5.6187, "step": 4167 }, { "epoch": 0.13978368407814204, "grad_norm": 0.4085457046199533, "learning_rate": 2e-05, "loss": 5.6951, "step": 4168 }, { "epoch": 0.13981722143036807, "grad_norm": 0.43513347740538844, "learning_rate": 2e-05, "loss": 5.5572, "step": 4169 }, { "epoch": 0.13985075878259412, "grad_norm": 0.4041404438501928, "learning_rate": 2e-05, "loss": 5.5437, "step": 4170 }, { "epoch": 0.13988429613482015, "grad_norm": 0.42861455307851, "learning_rate": 2e-05, "loss": 5.3037, "step": 4171 }, { "epoch": 0.1399178334870462, "grad_norm": 0.42200245227027067, "learning_rate": 2e-05, "loss": 5.6172, "step": 4172 }, { "epoch": 0.13995137083927223, "grad_norm": 0.4210438828346458, "learning_rate": 2e-05, "loss": 5.4273, "step": 4173 }, { "epoch": 0.13998490819149828, "grad_norm": 0.3917805263364051, "learning_rate": 2e-05, "loss": 5.5264, "step": 4174 }, { "epoch": 0.14001844554372433, "grad_norm": 0.4071089565590174, "learning_rate": 2e-05, "loss": 5.6882, "step": 4175 }, { "epoch": 0.14005198289595036, "grad_norm": 0.4057082009680316, "learning_rate": 2e-05, "loss": 5.6388, "step": 4176 }, { "epoch": 0.14008552024817642, "grad_norm": 0.4163309851225099, "learning_rate": 2e-05, "loss": 5.7396, "step": 4177 }, { "epoch": 0.14011905760040244, "grad_norm": 0.4100318924224916, "learning_rate": 2e-05, "loss": 5.5747, "step": 4178 }, { "epoch": 0.1401525949526285, "grad_norm": 0.3985121766127663, "learning_rate": 2e-05, "loss": 5.6663, "step": 4179 }, { "epoch": 0.14018613230485452, "grad_norm": 0.39687616147927246, "learning_rate": 2e-05, "loss": 5.6976, "step": 4180 }, { "epoch": 0.14021966965708058, "grad_norm": 0.4119246172258465, "learning_rate": 2e-05, "loss": 5.6212, "step": 4181 }, { "epoch": 0.1402532070093066, "grad_norm": 0.4140466307836286, "learning_rate": 2e-05, "loss": 5.5101, "step": 4182 }, { "epoch": 0.14028674436153266, "grad_norm": 0.44108518236164207, "learning_rate": 2e-05, "loss": 5.3891, "step": 4183 }, { "epoch": 0.1403202817137587, "grad_norm": 0.413816142099678, "learning_rate": 2e-05, "loss": 5.4358, "step": 4184 }, { "epoch": 0.14035381906598474, "grad_norm": 0.4010236013281337, "learning_rate": 2e-05, "loss": 5.3959, "step": 4185 }, { "epoch": 0.1403873564182108, "grad_norm": 0.3979577231186592, "learning_rate": 2e-05, "loss": 5.6632, "step": 4186 }, { "epoch": 0.14042089377043682, "grad_norm": 0.41995933939355756, "learning_rate": 2e-05, "loss": 5.7563, "step": 4187 }, { "epoch": 0.14045443112266287, "grad_norm": 0.4050302298878143, "learning_rate": 2e-05, "loss": 5.4828, "step": 4188 }, { "epoch": 0.1404879684748889, "grad_norm": 0.4070757125782221, "learning_rate": 2e-05, "loss": 5.6521, "step": 4189 }, { "epoch": 0.14052150582711495, "grad_norm": 0.4481095039173201, "learning_rate": 2e-05, "loss": 5.4416, "step": 4190 }, { "epoch": 0.140555043179341, "grad_norm": 0.4264740331859949, "learning_rate": 2e-05, "loss": 5.6655, "step": 4191 }, { "epoch": 0.14058858053156703, "grad_norm": 0.403607308299253, "learning_rate": 2e-05, "loss": 5.2863, "step": 4192 }, { "epoch": 0.14062211788379309, "grad_norm": 0.4269924652707196, "learning_rate": 2e-05, "loss": 5.6124, "step": 4193 }, { "epoch": 0.1406556552360191, "grad_norm": 0.40956288655300127, "learning_rate": 2e-05, "loss": 5.6511, "step": 4194 }, { "epoch": 0.14068919258824517, "grad_norm": 0.4061655444516234, "learning_rate": 2e-05, "loss": 5.574, "step": 4195 }, { "epoch": 0.1407227299404712, "grad_norm": 0.3986469936324796, "learning_rate": 2e-05, "loss": 5.5662, "step": 4196 }, { "epoch": 0.14075626729269725, "grad_norm": 0.39660405064084275, "learning_rate": 2e-05, "loss": 5.4954, "step": 4197 }, { "epoch": 0.14078980464492327, "grad_norm": 0.412342785537521, "learning_rate": 2e-05, "loss": 5.677, "step": 4198 }, { "epoch": 0.14082334199714933, "grad_norm": 0.42840868204555965, "learning_rate": 2e-05, "loss": 5.4945, "step": 4199 }, { "epoch": 0.14085687934937538, "grad_norm": 0.4772691349611647, "learning_rate": 2e-05, "loss": 5.5211, "step": 4200 }, { "epoch": 0.1408904167016014, "grad_norm": 0.5073330392913695, "learning_rate": 2e-05, "loss": 5.6509, "step": 4201 }, { "epoch": 0.14092395405382746, "grad_norm": 0.4106013411139102, "learning_rate": 2e-05, "loss": 5.5711, "step": 4202 }, { "epoch": 0.1409574914060535, "grad_norm": 0.43606581277949447, "learning_rate": 2e-05, "loss": 5.5851, "step": 4203 }, { "epoch": 0.14099102875827954, "grad_norm": 0.4728861709654067, "learning_rate": 2e-05, "loss": 5.3495, "step": 4204 }, { "epoch": 0.14102456611050557, "grad_norm": 0.39847894429291403, "learning_rate": 2e-05, "loss": 5.5709, "step": 4205 }, { "epoch": 0.14105810346273162, "grad_norm": 0.39307013356290144, "learning_rate": 2e-05, "loss": 5.6071, "step": 4206 }, { "epoch": 0.14109164081495765, "grad_norm": 0.45463466857112605, "learning_rate": 2e-05, "loss": 5.403, "step": 4207 }, { "epoch": 0.1411251781671837, "grad_norm": 0.4195676766623904, "learning_rate": 2e-05, "loss": 5.4484, "step": 4208 }, { "epoch": 0.14115871551940976, "grad_norm": 0.48186211894317177, "learning_rate": 2e-05, "loss": 5.568, "step": 4209 }, { "epoch": 0.14119225287163578, "grad_norm": 0.40637862153592424, "learning_rate": 2e-05, "loss": 5.5457, "step": 4210 }, { "epoch": 0.14122579022386184, "grad_norm": 0.40762920758733334, "learning_rate": 2e-05, "loss": 5.3721, "step": 4211 }, { "epoch": 0.14125932757608786, "grad_norm": 0.4058371128258638, "learning_rate": 2e-05, "loss": 5.4407, "step": 4212 }, { "epoch": 0.14129286492831392, "grad_norm": 0.42799609427468027, "learning_rate": 2e-05, "loss": 5.57, "step": 4213 }, { "epoch": 0.14132640228053994, "grad_norm": 0.4327706254839025, "learning_rate": 2e-05, "loss": 5.6721, "step": 4214 }, { "epoch": 0.141359939632766, "grad_norm": 0.4207944648232128, "learning_rate": 2e-05, "loss": 5.5294, "step": 4215 }, { "epoch": 0.14139347698499202, "grad_norm": 0.4079689225117015, "learning_rate": 2e-05, "loss": 5.4402, "step": 4216 }, { "epoch": 0.14142701433721808, "grad_norm": 0.42171905554552475, "learning_rate": 2e-05, "loss": 5.7509, "step": 4217 }, { "epoch": 0.14146055168944413, "grad_norm": 0.428895860777745, "learning_rate": 2e-05, "loss": 5.6648, "step": 4218 }, { "epoch": 0.14149408904167016, "grad_norm": 0.41923470456515866, "learning_rate": 2e-05, "loss": 5.6252, "step": 4219 }, { "epoch": 0.1415276263938962, "grad_norm": 0.40473741565968824, "learning_rate": 2e-05, "loss": 5.4554, "step": 4220 }, { "epoch": 0.14156116374612224, "grad_norm": 0.4645039026652314, "learning_rate": 2e-05, "loss": 5.6132, "step": 4221 }, { "epoch": 0.1415947010983483, "grad_norm": 0.4531172742139392, "learning_rate": 2e-05, "loss": 5.5414, "step": 4222 }, { "epoch": 0.14162823845057432, "grad_norm": 0.4039230847979707, "learning_rate": 2e-05, "loss": 5.6667, "step": 4223 }, { "epoch": 0.14166177580280037, "grad_norm": 0.48696126674458834, "learning_rate": 2e-05, "loss": 5.5859, "step": 4224 }, { "epoch": 0.1416953131550264, "grad_norm": 0.4487748287901976, "learning_rate": 2e-05, "loss": 5.4725, "step": 4225 }, { "epoch": 0.14172885050725245, "grad_norm": 0.3824060028923259, "learning_rate": 2e-05, "loss": 5.5087, "step": 4226 }, { "epoch": 0.1417623878594785, "grad_norm": 0.4436620712780397, "learning_rate": 2e-05, "loss": 5.6025, "step": 4227 }, { "epoch": 0.14179592521170453, "grad_norm": 0.42805141410094255, "learning_rate": 2e-05, "loss": 5.5683, "step": 4228 }, { "epoch": 0.1418294625639306, "grad_norm": 0.44351649839854557, "learning_rate": 2e-05, "loss": 5.5191, "step": 4229 }, { "epoch": 0.1418629999161566, "grad_norm": 0.41416370930893065, "learning_rate": 2e-05, "loss": 5.4221, "step": 4230 }, { "epoch": 0.14189653726838267, "grad_norm": 0.4016502643681876, "learning_rate": 2e-05, "loss": 5.649, "step": 4231 }, { "epoch": 0.1419300746206087, "grad_norm": 0.41300256014223274, "learning_rate": 2e-05, "loss": 5.511, "step": 4232 }, { "epoch": 0.14196361197283475, "grad_norm": 0.3999221812523863, "learning_rate": 2e-05, "loss": 5.4305, "step": 4233 }, { "epoch": 0.14199714932506077, "grad_norm": 0.41583975870784595, "learning_rate": 2e-05, "loss": 5.5799, "step": 4234 }, { "epoch": 0.14203068667728683, "grad_norm": 0.39357576961239077, "learning_rate": 2e-05, "loss": 5.5057, "step": 4235 }, { "epoch": 0.14206422402951288, "grad_norm": 0.3846283891406285, "learning_rate": 2e-05, "loss": 5.4939, "step": 4236 }, { "epoch": 0.1420977613817389, "grad_norm": 0.40609477143347383, "learning_rate": 2e-05, "loss": 5.8379, "step": 4237 }, { "epoch": 0.14213129873396496, "grad_norm": 0.422257142611464, "learning_rate": 2e-05, "loss": 5.4036, "step": 4238 }, { "epoch": 0.142164836086191, "grad_norm": 0.42418951453961445, "learning_rate": 2e-05, "loss": 5.8101, "step": 4239 }, { "epoch": 0.14219837343841704, "grad_norm": 0.40167014350474073, "learning_rate": 2e-05, "loss": 5.6109, "step": 4240 }, { "epoch": 0.14223191079064307, "grad_norm": 0.4251537887227981, "learning_rate": 2e-05, "loss": 5.5116, "step": 4241 }, { "epoch": 0.14226544814286912, "grad_norm": 0.4074196836370734, "learning_rate": 2e-05, "loss": 5.56, "step": 4242 }, { "epoch": 0.14229898549509518, "grad_norm": 0.4153943677404104, "learning_rate": 2e-05, "loss": 5.5428, "step": 4243 }, { "epoch": 0.1423325228473212, "grad_norm": 0.3950576094396381, "learning_rate": 2e-05, "loss": 5.5702, "step": 4244 }, { "epoch": 0.14236606019954726, "grad_norm": 0.42608169846187216, "learning_rate": 2e-05, "loss": 5.5977, "step": 4245 }, { "epoch": 0.14239959755177328, "grad_norm": 0.42285649913280865, "learning_rate": 2e-05, "loss": 5.5022, "step": 4246 }, { "epoch": 0.14243313490399934, "grad_norm": 0.39452834351165067, "learning_rate": 2e-05, "loss": 5.6381, "step": 4247 }, { "epoch": 0.14246667225622536, "grad_norm": 0.4279885353084483, "learning_rate": 2e-05, "loss": 5.6194, "step": 4248 }, { "epoch": 0.14250020960845142, "grad_norm": 0.40846061321922306, "learning_rate": 2e-05, "loss": 5.5953, "step": 4249 }, { "epoch": 0.14253374696067744, "grad_norm": 0.40827075300865484, "learning_rate": 2e-05, "loss": 5.5877, "step": 4250 }, { "epoch": 0.1425672843129035, "grad_norm": 0.40762456353886495, "learning_rate": 2e-05, "loss": 5.586, "step": 4251 }, { "epoch": 0.14260082166512955, "grad_norm": 0.4437619685482429, "learning_rate": 2e-05, "loss": 5.6098, "step": 4252 }, { "epoch": 0.14263435901735558, "grad_norm": 0.42059925284945543, "learning_rate": 2e-05, "loss": 5.326, "step": 4253 }, { "epoch": 0.14266789636958163, "grad_norm": 0.39734037810017864, "learning_rate": 2e-05, "loss": 5.4744, "step": 4254 }, { "epoch": 0.14270143372180766, "grad_norm": 0.42555673162298907, "learning_rate": 2e-05, "loss": 5.6992, "step": 4255 }, { "epoch": 0.1427349710740337, "grad_norm": 0.46223471925831594, "learning_rate": 2e-05, "loss": 5.7451, "step": 4256 }, { "epoch": 0.14276850842625974, "grad_norm": 0.43782647898890076, "learning_rate": 2e-05, "loss": 5.591, "step": 4257 }, { "epoch": 0.1428020457784858, "grad_norm": 0.4241619358886322, "learning_rate": 2e-05, "loss": 5.5379, "step": 4258 }, { "epoch": 0.14283558313071182, "grad_norm": 0.39510434131781624, "learning_rate": 2e-05, "loss": 5.6583, "step": 4259 }, { "epoch": 0.14286912048293787, "grad_norm": 0.42270520314898313, "learning_rate": 2e-05, "loss": 5.7116, "step": 4260 }, { "epoch": 0.14290265783516393, "grad_norm": 0.4148481788206463, "learning_rate": 2e-05, "loss": 5.4771, "step": 4261 }, { "epoch": 0.14293619518738995, "grad_norm": 0.4242206334502911, "learning_rate": 2e-05, "loss": 5.4547, "step": 4262 }, { "epoch": 0.142969732539616, "grad_norm": 0.4038259358707083, "learning_rate": 2e-05, "loss": 5.532, "step": 4263 }, { "epoch": 0.14300326989184203, "grad_norm": 0.4347229505774075, "learning_rate": 2e-05, "loss": 5.4543, "step": 4264 }, { "epoch": 0.1430368072440681, "grad_norm": 0.4519878115160352, "learning_rate": 2e-05, "loss": 5.4192, "step": 4265 }, { "epoch": 0.1430703445962941, "grad_norm": 0.4273186923644414, "learning_rate": 2e-05, "loss": 5.6994, "step": 4266 }, { "epoch": 0.14310388194852017, "grad_norm": 0.3998281242726557, "learning_rate": 2e-05, "loss": 5.7779, "step": 4267 }, { "epoch": 0.1431374193007462, "grad_norm": 0.4070673558910264, "learning_rate": 2e-05, "loss": 5.4946, "step": 4268 }, { "epoch": 0.14317095665297225, "grad_norm": 0.406605132845045, "learning_rate": 2e-05, "loss": 5.5493, "step": 4269 }, { "epoch": 0.1432044940051983, "grad_norm": 0.41853107947157825, "learning_rate": 2e-05, "loss": 5.6123, "step": 4270 }, { "epoch": 0.14323803135742433, "grad_norm": 0.4062193600601037, "learning_rate": 2e-05, "loss": 5.501, "step": 4271 }, { "epoch": 0.14327156870965038, "grad_norm": 0.39748641660562806, "learning_rate": 2e-05, "loss": 5.6648, "step": 4272 }, { "epoch": 0.1433051060618764, "grad_norm": 0.4036922883889314, "learning_rate": 2e-05, "loss": 5.5659, "step": 4273 }, { "epoch": 0.14333864341410246, "grad_norm": 0.4331073954062899, "learning_rate": 2e-05, "loss": 5.6319, "step": 4274 }, { "epoch": 0.1433721807663285, "grad_norm": 0.4202934563207324, "learning_rate": 2e-05, "loss": 5.4227, "step": 4275 }, { "epoch": 0.14340571811855454, "grad_norm": 0.4113805947420597, "learning_rate": 2e-05, "loss": 5.2829, "step": 4276 }, { "epoch": 0.14343925547078057, "grad_norm": 0.4185351175247445, "learning_rate": 2e-05, "loss": 5.6161, "step": 4277 }, { "epoch": 0.14347279282300662, "grad_norm": 0.39875644713960195, "learning_rate": 2e-05, "loss": 5.589, "step": 4278 }, { "epoch": 0.14350633017523268, "grad_norm": 0.41256702600594775, "learning_rate": 2e-05, "loss": 5.6481, "step": 4279 }, { "epoch": 0.1435398675274587, "grad_norm": 0.39995836635216236, "learning_rate": 2e-05, "loss": 5.5254, "step": 4280 }, { "epoch": 0.14357340487968476, "grad_norm": 0.4135557364910348, "learning_rate": 2e-05, "loss": 5.6001, "step": 4281 }, { "epoch": 0.14360694223191078, "grad_norm": 0.4222584702451757, "learning_rate": 2e-05, "loss": 5.6389, "step": 4282 }, { "epoch": 0.14364047958413684, "grad_norm": 0.41804150662891926, "learning_rate": 2e-05, "loss": 5.5129, "step": 4283 }, { "epoch": 0.14367401693636286, "grad_norm": 0.41191145651614286, "learning_rate": 2e-05, "loss": 5.659, "step": 4284 }, { "epoch": 0.14370755428858892, "grad_norm": 0.42109609581198026, "learning_rate": 2e-05, "loss": 5.7806, "step": 4285 }, { "epoch": 0.14374109164081494, "grad_norm": 0.41520697010268076, "learning_rate": 2e-05, "loss": 5.7282, "step": 4286 }, { "epoch": 0.143774628993041, "grad_norm": 0.4180240633258815, "learning_rate": 2e-05, "loss": 5.5054, "step": 4287 }, { "epoch": 0.14380816634526705, "grad_norm": 0.4186628632420284, "learning_rate": 2e-05, "loss": 5.599, "step": 4288 }, { "epoch": 0.14384170369749308, "grad_norm": 0.44409791469846505, "learning_rate": 2e-05, "loss": 5.5562, "step": 4289 }, { "epoch": 0.14387524104971913, "grad_norm": 0.4626754917165467, "learning_rate": 2e-05, "loss": 5.6906, "step": 4290 }, { "epoch": 0.14390877840194516, "grad_norm": 0.4234642476089001, "learning_rate": 2e-05, "loss": 5.5354, "step": 4291 }, { "epoch": 0.1439423157541712, "grad_norm": 0.48703612377795896, "learning_rate": 2e-05, "loss": 5.5621, "step": 4292 }, { "epoch": 0.14397585310639724, "grad_norm": 0.4631770827960269, "learning_rate": 2e-05, "loss": 5.5829, "step": 4293 }, { "epoch": 0.1440093904586233, "grad_norm": 0.44489127491848374, "learning_rate": 2e-05, "loss": 5.407, "step": 4294 }, { "epoch": 0.14404292781084935, "grad_norm": 0.4294325148444839, "learning_rate": 2e-05, "loss": 5.5045, "step": 4295 }, { "epoch": 0.14407646516307537, "grad_norm": 0.4512382876751248, "learning_rate": 2e-05, "loss": 5.5584, "step": 4296 }, { "epoch": 0.14411000251530143, "grad_norm": 0.533591878366879, "learning_rate": 2e-05, "loss": 5.2163, "step": 4297 }, { "epoch": 0.14414353986752745, "grad_norm": 0.48069137104960713, "learning_rate": 2e-05, "loss": 5.6263, "step": 4298 }, { "epoch": 0.1441770772197535, "grad_norm": 0.4019301858804224, "learning_rate": 2e-05, "loss": 5.8005, "step": 4299 }, { "epoch": 0.14421061457197953, "grad_norm": 0.5036845605081651, "learning_rate": 2e-05, "loss": 5.2529, "step": 4300 }, { "epoch": 0.1442441519242056, "grad_norm": 0.46839557740303656, "learning_rate": 2e-05, "loss": 5.7427, "step": 4301 }, { "epoch": 0.14427768927643161, "grad_norm": 0.4180679768027318, "learning_rate": 2e-05, "loss": 5.5412, "step": 4302 }, { "epoch": 0.14431122662865767, "grad_norm": 0.4746807407417717, "learning_rate": 2e-05, "loss": 5.5995, "step": 4303 }, { "epoch": 0.14434476398088372, "grad_norm": 0.4474820047129931, "learning_rate": 2e-05, "loss": 5.5853, "step": 4304 }, { "epoch": 0.14437830133310975, "grad_norm": 0.44015069030187676, "learning_rate": 2e-05, "loss": 5.4536, "step": 4305 }, { "epoch": 0.1444118386853358, "grad_norm": 0.412696835386798, "learning_rate": 2e-05, "loss": 5.5698, "step": 4306 }, { "epoch": 0.14444537603756183, "grad_norm": 0.4488216198173692, "learning_rate": 2e-05, "loss": 5.6651, "step": 4307 }, { "epoch": 0.14447891338978788, "grad_norm": 0.4820147631411911, "learning_rate": 2e-05, "loss": 5.5256, "step": 4308 }, { "epoch": 0.1445124507420139, "grad_norm": 0.41276021796889617, "learning_rate": 2e-05, "loss": 5.688, "step": 4309 }, { "epoch": 0.14454598809423996, "grad_norm": 0.4275655503576425, "learning_rate": 2e-05, "loss": 5.6745, "step": 4310 }, { "epoch": 0.144579525446466, "grad_norm": 0.4702092953584889, "learning_rate": 2e-05, "loss": 5.459, "step": 4311 }, { "epoch": 0.14461306279869204, "grad_norm": 0.4323281196367373, "learning_rate": 2e-05, "loss": 5.4567, "step": 4312 }, { "epoch": 0.1446466001509181, "grad_norm": 0.46934368924575287, "learning_rate": 2e-05, "loss": 5.6388, "step": 4313 }, { "epoch": 0.14468013750314412, "grad_norm": 0.42826161388440304, "learning_rate": 2e-05, "loss": 5.5776, "step": 4314 }, { "epoch": 0.14471367485537018, "grad_norm": 0.4347142995895451, "learning_rate": 2e-05, "loss": 5.5878, "step": 4315 }, { "epoch": 0.1447472122075962, "grad_norm": 0.448795065510049, "learning_rate": 2e-05, "loss": 5.4785, "step": 4316 }, { "epoch": 0.14478074955982226, "grad_norm": 0.45012489288548363, "learning_rate": 2e-05, "loss": 5.7499, "step": 4317 }, { "epoch": 0.14481428691204828, "grad_norm": 0.4415730440434602, "learning_rate": 2e-05, "loss": 5.7433, "step": 4318 }, { "epoch": 0.14484782426427434, "grad_norm": 0.4395107347659095, "learning_rate": 2e-05, "loss": 5.6079, "step": 4319 }, { "epoch": 0.14488136161650036, "grad_norm": 0.4270120208626256, "learning_rate": 2e-05, "loss": 5.5396, "step": 4320 }, { "epoch": 0.14491489896872642, "grad_norm": 0.4311094077656292, "learning_rate": 2e-05, "loss": 5.5666, "step": 4321 }, { "epoch": 0.14494843632095247, "grad_norm": 0.44022752593020864, "learning_rate": 2e-05, "loss": 5.6768, "step": 4322 }, { "epoch": 0.1449819736731785, "grad_norm": 0.45635761808020797, "learning_rate": 2e-05, "loss": 5.4891, "step": 4323 }, { "epoch": 0.14501551102540455, "grad_norm": 0.40870137710260557, "learning_rate": 2e-05, "loss": 5.7571, "step": 4324 }, { "epoch": 0.14504904837763058, "grad_norm": 0.43919718975130295, "learning_rate": 2e-05, "loss": 5.4076, "step": 4325 }, { "epoch": 0.14508258572985663, "grad_norm": 0.41710848923599964, "learning_rate": 2e-05, "loss": 5.6029, "step": 4326 }, { "epoch": 0.14511612308208266, "grad_norm": 0.4027355990491609, "learning_rate": 2e-05, "loss": 5.5406, "step": 4327 }, { "epoch": 0.1451496604343087, "grad_norm": 0.4003462807819665, "learning_rate": 2e-05, "loss": 5.3068, "step": 4328 }, { "epoch": 0.14518319778653474, "grad_norm": 0.42843120823861264, "learning_rate": 2e-05, "loss": 5.3789, "step": 4329 }, { "epoch": 0.1452167351387608, "grad_norm": 0.41338959202038206, "learning_rate": 2e-05, "loss": 5.5873, "step": 4330 }, { "epoch": 0.14525027249098685, "grad_norm": 0.4133777654706897, "learning_rate": 2e-05, "loss": 5.7047, "step": 4331 }, { "epoch": 0.14528380984321287, "grad_norm": 0.4198486653055659, "learning_rate": 2e-05, "loss": 5.6566, "step": 4332 }, { "epoch": 0.14531734719543893, "grad_norm": 0.4332539325380306, "learning_rate": 2e-05, "loss": 5.4013, "step": 4333 }, { "epoch": 0.14535088454766495, "grad_norm": 0.4650861187488767, "learning_rate": 2e-05, "loss": 5.5118, "step": 4334 }, { "epoch": 0.145384421899891, "grad_norm": 0.4222608008915207, "learning_rate": 2e-05, "loss": 5.5432, "step": 4335 }, { "epoch": 0.14541795925211703, "grad_norm": 0.4752719450711072, "learning_rate": 2e-05, "loss": 5.5877, "step": 4336 }, { "epoch": 0.1454514966043431, "grad_norm": 0.41944424987662554, "learning_rate": 2e-05, "loss": 5.5743, "step": 4337 }, { "epoch": 0.14548503395656912, "grad_norm": 0.4141668306809013, "learning_rate": 2e-05, "loss": 5.5733, "step": 4338 }, { "epoch": 0.14551857130879517, "grad_norm": 0.4350485557984654, "learning_rate": 2e-05, "loss": 5.3981, "step": 4339 }, { "epoch": 0.14555210866102122, "grad_norm": 0.42177409982114694, "learning_rate": 2e-05, "loss": 5.4833, "step": 4340 }, { "epoch": 0.14558564601324725, "grad_norm": 0.4362807775549409, "learning_rate": 2e-05, "loss": 5.5483, "step": 4341 }, { "epoch": 0.1456191833654733, "grad_norm": 0.4202677055281724, "learning_rate": 2e-05, "loss": 5.4736, "step": 4342 }, { "epoch": 0.14565272071769933, "grad_norm": 0.42147043729396994, "learning_rate": 2e-05, "loss": 5.5514, "step": 4343 }, { "epoch": 0.14568625806992538, "grad_norm": 0.40468779565353596, "learning_rate": 2e-05, "loss": 5.4442, "step": 4344 }, { "epoch": 0.1457197954221514, "grad_norm": 0.46158253630651774, "learning_rate": 2e-05, "loss": 5.4411, "step": 4345 }, { "epoch": 0.14575333277437746, "grad_norm": 0.4188845389995952, "learning_rate": 2e-05, "loss": 5.7415, "step": 4346 }, { "epoch": 0.14578687012660352, "grad_norm": 0.4004341857084083, "learning_rate": 2e-05, "loss": 5.5653, "step": 4347 }, { "epoch": 0.14582040747882954, "grad_norm": 0.4299170801954783, "learning_rate": 2e-05, "loss": 5.6553, "step": 4348 }, { "epoch": 0.1458539448310556, "grad_norm": 0.47401164094167025, "learning_rate": 2e-05, "loss": 5.4989, "step": 4349 }, { "epoch": 0.14588748218328162, "grad_norm": 0.4323646658209332, "learning_rate": 2e-05, "loss": 5.56, "step": 4350 }, { "epoch": 0.14592101953550768, "grad_norm": 0.41998728470899777, "learning_rate": 2e-05, "loss": 5.751, "step": 4351 }, { "epoch": 0.1459545568877337, "grad_norm": 0.5090276433151448, "learning_rate": 2e-05, "loss": 5.6604, "step": 4352 }, { "epoch": 0.14598809423995976, "grad_norm": 0.4042368526632891, "learning_rate": 2e-05, "loss": 5.6659, "step": 4353 }, { "epoch": 0.14602163159218579, "grad_norm": 0.4053246782396889, "learning_rate": 2e-05, "loss": 5.5771, "step": 4354 }, { "epoch": 0.14605516894441184, "grad_norm": 0.4437406571916282, "learning_rate": 2e-05, "loss": 5.5702, "step": 4355 }, { "epoch": 0.1460887062966379, "grad_norm": 0.4327689805186037, "learning_rate": 2e-05, "loss": 5.43, "step": 4356 }, { "epoch": 0.14612224364886392, "grad_norm": 0.40287808334546726, "learning_rate": 2e-05, "loss": 5.6237, "step": 4357 }, { "epoch": 0.14615578100108997, "grad_norm": 0.41658323409388537, "learning_rate": 2e-05, "loss": 5.6721, "step": 4358 }, { "epoch": 0.146189318353316, "grad_norm": 0.39742184260376906, "learning_rate": 2e-05, "loss": 5.4777, "step": 4359 }, { "epoch": 0.14622285570554205, "grad_norm": 0.38770044997615766, "learning_rate": 2e-05, "loss": 5.6324, "step": 4360 }, { "epoch": 0.14625639305776808, "grad_norm": 0.42549082715026276, "learning_rate": 2e-05, "loss": 5.4336, "step": 4361 }, { "epoch": 0.14628993040999413, "grad_norm": 0.42820881612890127, "learning_rate": 2e-05, "loss": 5.5982, "step": 4362 }, { "epoch": 0.14632346776222016, "grad_norm": 0.38672585337842685, "learning_rate": 2e-05, "loss": 5.5262, "step": 4363 }, { "epoch": 0.14635700511444621, "grad_norm": 0.4365193266191186, "learning_rate": 2e-05, "loss": 5.7706, "step": 4364 }, { "epoch": 0.14639054246667227, "grad_norm": 0.43181121771440395, "learning_rate": 2e-05, "loss": 5.5959, "step": 4365 }, { "epoch": 0.1464240798188983, "grad_norm": 0.42404616966777586, "learning_rate": 2e-05, "loss": 5.4965, "step": 4366 }, { "epoch": 0.14645761717112435, "grad_norm": 0.4092915070219167, "learning_rate": 2e-05, "loss": 5.4886, "step": 4367 }, { "epoch": 0.14649115452335038, "grad_norm": 0.39354137327374644, "learning_rate": 2e-05, "loss": 5.4817, "step": 4368 }, { "epoch": 0.14652469187557643, "grad_norm": 0.40244895635291295, "learning_rate": 2e-05, "loss": 5.6731, "step": 4369 }, { "epoch": 0.14655822922780246, "grad_norm": 0.4151472962810739, "learning_rate": 2e-05, "loss": 5.5112, "step": 4370 }, { "epoch": 0.1465917665800285, "grad_norm": 0.4215632367569019, "learning_rate": 2e-05, "loss": 5.461, "step": 4371 }, { "epoch": 0.14662530393225454, "grad_norm": 0.47050152139456247, "learning_rate": 2e-05, "loss": 5.7646, "step": 4372 }, { "epoch": 0.1466588412844806, "grad_norm": 0.4355986103029853, "learning_rate": 2e-05, "loss": 5.5224, "step": 4373 }, { "epoch": 0.14669237863670664, "grad_norm": 0.4068243489055351, "learning_rate": 2e-05, "loss": 5.5695, "step": 4374 }, { "epoch": 0.14672591598893267, "grad_norm": 0.43298253756130795, "learning_rate": 2e-05, "loss": 5.5914, "step": 4375 }, { "epoch": 0.14675945334115872, "grad_norm": 0.40501983960026294, "learning_rate": 2e-05, "loss": 5.3632, "step": 4376 }, { "epoch": 0.14679299069338475, "grad_norm": 0.40331013624423334, "learning_rate": 2e-05, "loss": 5.5169, "step": 4377 }, { "epoch": 0.1468265280456108, "grad_norm": 0.4073383257954762, "learning_rate": 2e-05, "loss": 5.4469, "step": 4378 }, { "epoch": 0.14686006539783683, "grad_norm": 0.40726026463311427, "learning_rate": 2e-05, "loss": 5.8414, "step": 4379 }, { "epoch": 0.14689360275006288, "grad_norm": 0.430657770812748, "learning_rate": 2e-05, "loss": 5.5104, "step": 4380 }, { "epoch": 0.1469271401022889, "grad_norm": 0.4004415580429553, "learning_rate": 2e-05, "loss": 5.5787, "step": 4381 }, { "epoch": 0.14696067745451497, "grad_norm": 0.39133119853966747, "learning_rate": 2e-05, "loss": 5.4757, "step": 4382 }, { "epoch": 0.14699421480674102, "grad_norm": 0.41758006344519566, "learning_rate": 2e-05, "loss": 5.7245, "step": 4383 }, { "epoch": 0.14702775215896705, "grad_norm": 0.40345068746245666, "learning_rate": 2e-05, "loss": 5.6386, "step": 4384 }, { "epoch": 0.1470612895111931, "grad_norm": 0.41646648476524445, "learning_rate": 2e-05, "loss": 5.5914, "step": 4385 }, { "epoch": 0.14709482686341913, "grad_norm": 0.43570755036796605, "learning_rate": 2e-05, "loss": 5.6034, "step": 4386 }, { "epoch": 0.14712836421564518, "grad_norm": 0.4301705690818156, "learning_rate": 2e-05, "loss": 5.5552, "step": 4387 }, { "epoch": 0.1471619015678712, "grad_norm": 0.4028166768580226, "learning_rate": 2e-05, "loss": 5.4705, "step": 4388 }, { "epoch": 0.14719543892009726, "grad_norm": 0.4168188767384076, "learning_rate": 2e-05, "loss": 5.4289, "step": 4389 }, { "epoch": 0.1472289762723233, "grad_norm": 0.4326971385396606, "learning_rate": 2e-05, "loss": 5.5644, "step": 4390 }, { "epoch": 0.14726251362454934, "grad_norm": 0.3964291700224803, "learning_rate": 2e-05, "loss": 5.5053, "step": 4391 }, { "epoch": 0.1472960509767754, "grad_norm": 0.3959432292334479, "learning_rate": 2e-05, "loss": 5.4765, "step": 4392 }, { "epoch": 0.14732958832900142, "grad_norm": 0.40540793402985936, "learning_rate": 2e-05, "loss": 5.6478, "step": 4393 }, { "epoch": 0.14736312568122747, "grad_norm": 0.4232400830781613, "learning_rate": 2e-05, "loss": 5.6203, "step": 4394 }, { "epoch": 0.1473966630334535, "grad_norm": 0.4027457555192761, "learning_rate": 2e-05, "loss": 5.6367, "step": 4395 }, { "epoch": 0.14743020038567956, "grad_norm": 0.4107899000495797, "learning_rate": 2e-05, "loss": 5.3197, "step": 4396 }, { "epoch": 0.14746373773790558, "grad_norm": 0.410383985384261, "learning_rate": 2e-05, "loss": 5.3517, "step": 4397 }, { "epoch": 0.14749727509013164, "grad_norm": 0.39442278024581423, "learning_rate": 2e-05, "loss": 5.557, "step": 4398 }, { "epoch": 0.1475308124423577, "grad_norm": 0.41057257957175686, "learning_rate": 2e-05, "loss": 5.5382, "step": 4399 }, { "epoch": 0.14756434979458372, "grad_norm": 0.4387754719455524, "learning_rate": 2e-05, "loss": 5.6544, "step": 4400 }, { "epoch": 0.14759788714680977, "grad_norm": 0.4512334652191596, "learning_rate": 2e-05, "loss": 5.5897, "step": 4401 }, { "epoch": 0.1476314244990358, "grad_norm": 0.40447399741746465, "learning_rate": 2e-05, "loss": 5.642, "step": 4402 }, { "epoch": 0.14766496185126185, "grad_norm": 0.40660601860195505, "learning_rate": 2e-05, "loss": 5.3204, "step": 4403 }, { "epoch": 0.14769849920348788, "grad_norm": 0.44082352282777115, "learning_rate": 2e-05, "loss": 5.7586, "step": 4404 }, { "epoch": 0.14773203655571393, "grad_norm": 0.3967532887446816, "learning_rate": 2e-05, "loss": 5.6394, "step": 4405 }, { "epoch": 0.14776557390793996, "grad_norm": 0.4124752384234477, "learning_rate": 2e-05, "loss": 5.5233, "step": 4406 }, { "epoch": 0.147799111260166, "grad_norm": 0.42809343669246513, "learning_rate": 2e-05, "loss": 5.422, "step": 4407 }, { "epoch": 0.14783264861239206, "grad_norm": 0.4302689484368151, "learning_rate": 2e-05, "loss": 5.5883, "step": 4408 }, { "epoch": 0.1478661859646181, "grad_norm": 0.4019288161104342, "learning_rate": 2e-05, "loss": 5.5146, "step": 4409 }, { "epoch": 0.14789972331684414, "grad_norm": 0.4375067416921475, "learning_rate": 2e-05, "loss": 5.5358, "step": 4410 }, { "epoch": 0.14793326066907017, "grad_norm": 0.4167669283274728, "learning_rate": 2e-05, "loss": 5.5019, "step": 4411 }, { "epoch": 0.14796679802129623, "grad_norm": 0.42240236357885513, "learning_rate": 2e-05, "loss": 5.4218, "step": 4412 }, { "epoch": 0.14800033537352225, "grad_norm": 0.4248771063496172, "learning_rate": 2e-05, "loss": 5.6531, "step": 4413 }, { "epoch": 0.1480338727257483, "grad_norm": 0.41245155266884986, "learning_rate": 2e-05, "loss": 5.784, "step": 4414 }, { "epoch": 0.14806741007797433, "grad_norm": 0.4415494125834638, "learning_rate": 2e-05, "loss": 5.7389, "step": 4415 }, { "epoch": 0.14810094743020039, "grad_norm": 0.40830980103537096, "learning_rate": 2e-05, "loss": 5.3989, "step": 4416 }, { "epoch": 0.14813448478242644, "grad_norm": 0.40957326434305974, "learning_rate": 2e-05, "loss": 5.4379, "step": 4417 }, { "epoch": 0.14816802213465247, "grad_norm": 0.4395088272772379, "learning_rate": 2e-05, "loss": 5.5997, "step": 4418 }, { "epoch": 0.14820155948687852, "grad_norm": 0.44541097067550456, "learning_rate": 2e-05, "loss": 5.7078, "step": 4419 }, { "epoch": 0.14823509683910455, "grad_norm": 0.41256878820371057, "learning_rate": 2e-05, "loss": 5.4635, "step": 4420 }, { "epoch": 0.1482686341913306, "grad_norm": 0.4687156576317609, "learning_rate": 2e-05, "loss": 5.7111, "step": 4421 }, { "epoch": 0.14830217154355663, "grad_norm": 0.4479435235015751, "learning_rate": 2e-05, "loss": 5.4419, "step": 4422 }, { "epoch": 0.14833570889578268, "grad_norm": 0.43513586586907715, "learning_rate": 2e-05, "loss": 5.705, "step": 4423 }, { "epoch": 0.1483692462480087, "grad_norm": 0.39908039606133827, "learning_rate": 2e-05, "loss": 5.7018, "step": 4424 }, { "epoch": 0.14840278360023476, "grad_norm": 0.4999504291095047, "learning_rate": 2e-05, "loss": 5.5678, "step": 4425 }, { "epoch": 0.14843632095246082, "grad_norm": 0.43145557748102653, "learning_rate": 2e-05, "loss": 5.4741, "step": 4426 }, { "epoch": 0.14846985830468684, "grad_norm": 0.42809868402133233, "learning_rate": 2e-05, "loss": 5.567, "step": 4427 }, { "epoch": 0.1485033956569129, "grad_norm": 0.4509766303916069, "learning_rate": 2e-05, "loss": 5.5428, "step": 4428 }, { "epoch": 0.14853693300913892, "grad_norm": 0.43703230529477244, "learning_rate": 2e-05, "loss": 5.4373, "step": 4429 }, { "epoch": 0.14857047036136498, "grad_norm": 0.4232406056135705, "learning_rate": 2e-05, "loss": 5.561, "step": 4430 }, { "epoch": 0.148604007713591, "grad_norm": 0.4362879161837818, "learning_rate": 2e-05, "loss": 5.6946, "step": 4431 }, { "epoch": 0.14863754506581706, "grad_norm": 0.4125569283218203, "learning_rate": 2e-05, "loss": 5.3829, "step": 4432 }, { "epoch": 0.14867108241804308, "grad_norm": 0.4240283105034296, "learning_rate": 2e-05, "loss": 5.5269, "step": 4433 }, { "epoch": 0.14870461977026914, "grad_norm": 0.4085957410312839, "learning_rate": 2e-05, "loss": 5.6192, "step": 4434 }, { "epoch": 0.1487381571224952, "grad_norm": 0.40099054742224255, "learning_rate": 2e-05, "loss": 5.4098, "step": 4435 }, { "epoch": 0.14877169447472122, "grad_norm": 0.3958322453040865, "learning_rate": 2e-05, "loss": 5.5651, "step": 4436 }, { "epoch": 0.14880523182694727, "grad_norm": 0.4265377317239287, "learning_rate": 2e-05, "loss": 5.5853, "step": 4437 }, { "epoch": 0.1488387691791733, "grad_norm": 0.4073038428965935, "learning_rate": 2e-05, "loss": 5.7272, "step": 4438 }, { "epoch": 0.14887230653139935, "grad_norm": 0.41973315488646923, "learning_rate": 2e-05, "loss": 5.4685, "step": 4439 }, { "epoch": 0.14890584388362538, "grad_norm": 0.40926366486848703, "learning_rate": 2e-05, "loss": 5.6065, "step": 4440 }, { "epoch": 0.14893938123585143, "grad_norm": 0.4049160251929286, "learning_rate": 2e-05, "loss": 5.3961, "step": 4441 }, { "epoch": 0.14897291858807746, "grad_norm": 0.40535229889481095, "learning_rate": 2e-05, "loss": 5.6944, "step": 4442 }, { "epoch": 0.1490064559403035, "grad_norm": 0.40440158854581704, "learning_rate": 2e-05, "loss": 5.6383, "step": 4443 }, { "epoch": 0.14903999329252957, "grad_norm": 0.41887311027391133, "learning_rate": 2e-05, "loss": 5.2662, "step": 4444 }, { "epoch": 0.1490735306447556, "grad_norm": 0.40060374014510836, "learning_rate": 2e-05, "loss": 5.8234, "step": 4445 }, { "epoch": 0.14910706799698165, "grad_norm": 0.4098089236170902, "learning_rate": 2e-05, "loss": 5.3626, "step": 4446 }, { "epoch": 0.14914060534920767, "grad_norm": 0.3966625545981212, "learning_rate": 2e-05, "loss": 5.5796, "step": 4447 }, { "epoch": 0.14917414270143373, "grad_norm": 0.4125241438402302, "learning_rate": 2e-05, "loss": 5.6245, "step": 4448 }, { "epoch": 0.14920768005365975, "grad_norm": 0.39143663209635626, "learning_rate": 2e-05, "loss": 5.5939, "step": 4449 }, { "epoch": 0.1492412174058858, "grad_norm": 0.3977837636165643, "learning_rate": 2e-05, "loss": 5.52, "step": 4450 }, { "epoch": 0.14927475475811186, "grad_norm": 0.39771369394908407, "learning_rate": 2e-05, "loss": 5.5103, "step": 4451 }, { "epoch": 0.1493082921103379, "grad_norm": 0.4102768667286003, "learning_rate": 2e-05, "loss": 5.5327, "step": 4452 }, { "epoch": 0.14934182946256394, "grad_norm": 0.4014925977544364, "learning_rate": 2e-05, "loss": 5.6519, "step": 4453 }, { "epoch": 0.14937536681478997, "grad_norm": 0.43768643056968576, "learning_rate": 2e-05, "loss": 5.5547, "step": 4454 }, { "epoch": 0.14940890416701602, "grad_norm": 0.3989949096090789, "learning_rate": 2e-05, "loss": 5.6693, "step": 4455 }, { "epoch": 0.14944244151924205, "grad_norm": 0.43848249327230737, "learning_rate": 2e-05, "loss": 5.5607, "step": 4456 }, { "epoch": 0.1494759788714681, "grad_norm": 0.4063405444664547, "learning_rate": 2e-05, "loss": 5.6396, "step": 4457 }, { "epoch": 0.14950951622369413, "grad_norm": 0.4564006710022309, "learning_rate": 2e-05, "loss": 5.586, "step": 4458 }, { "epoch": 0.14954305357592018, "grad_norm": 0.4233307682184927, "learning_rate": 2e-05, "loss": 5.4923, "step": 4459 }, { "epoch": 0.14957659092814624, "grad_norm": 0.40268733628734216, "learning_rate": 2e-05, "loss": 5.5532, "step": 4460 }, { "epoch": 0.14961012828037226, "grad_norm": 0.4279244402245542, "learning_rate": 2e-05, "loss": 5.4343, "step": 4461 }, { "epoch": 0.14964366563259832, "grad_norm": 0.4073966869360404, "learning_rate": 2e-05, "loss": 5.6996, "step": 4462 }, { "epoch": 0.14967720298482434, "grad_norm": 0.4294560184678652, "learning_rate": 2e-05, "loss": 5.486, "step": 4463 }, { "epoch": 0.1497107403370504, "grad_norm": 0.3926552738074727, "learning_rate": 2e-05, "loss": 5.7357, "step": 4464 }, { "epoch": 0.14974427768927642, "grad_norm": 0.40464004042100393, "learning_rate": 2e-05, "loss": 5.6702, "step": 4465 }, { "epoch": 0.14977781504150248, "grad_norm": 0.41721157921887, "learning_rate": 2e-05, "loss": 5.4642, "step": 4466 }, { "epoch": 0.1498113523937285, "grad_norm": 0.419647571542733, "learning_rate": 2e-05, "loss": 5.7663, "step": 4467 }, { "epoch": 0.14984488974595456, "grad_norm": 0.4185055319330211, "learning_rate": 2e-05, "loss": 5.5847, "step": 4468 }, { "epoch": 0.1498784270981806, "grad_norm": 0.3916399492657775, "learning_rate": 2e-05, "loss": 5.3426, "step": 4469 }, { "epoch": 0.14991196445040664, "grad_norm": 0.4029095050800343, "learning_rate": 2e-05, "loss": 5.5802, "step": 4470 }, { "epoch": 0.1499455018026327, "grad_norm": 0.42496772921672127, "learning_rate": 2e-05, "loss": 5.5614, "step": 4471 }, { "epoch": 0.14997903915485872, "grad_norm": 0.43368911522594267, "learning_rate": 2e-05, "loss": 5.5792, "step": 4472 }, { "epoch": 0.15001257650708477, "grad_norm": 0.40433079770749764, "learning_rate": 2e-05, "loss": 5.5549, "step": 4473 }, { "epoch": 0.1500461138593108, "grad_norm": 0.45407779874331716, "learning_rate": 2e-05, "loss": 5.512, "step": 4474 }, { "epoch": 0.15007965121153685, "grad_norm": 0.45140096170216276, "learning_rate": 2e-05, "loss": 5.7857, "step": 4475 }, { "epoch": 0.15011318856376288, "grad_norm": 0.38440022745817004, "learning_rate": 2e-05, "loss": 5.778, "step": 4476 }, { "epoch": 0.15014672591598893, "grad_norm": 0.43626978534660776, "learning_rate": 2e-05, "loss": 5.5997, "step": 4477 }, { "epoch": 0.150180263268215, "grad_norm": 0.4159778089932395, "learning_rate": 2e-05, "loss": 5.5473, "step": 4478 }, { "epoch": 0.150213800620441, "grad_norm": 0.3961655407943028, "learning_rate": 2e-05, "loss": 5.5632, "step": 4479 }, { "epoch": 0.15024733797266707, "grad_norm": 0.4045018691109815, "learning_rate": 2e-05, "loss": 5.5852, "step": 4480 }, { "epoch": 0.1502808753248931, "grad_norm": 0.3906380390410305, "learning_rate": 2e-05, "loss": 5.525, "step": 4481 }, { "epoch": 0.15031441267711915, "grad_norm": 0.43973255575910325, "learning_rate": 2e-05, "loss": 5.4799, "step": 4482 }, { "epoch": 0.15034795002934517, "grad_norm": 0.39097691555268904, "learning_rate": 2e-05, "loss": 5.4934, "step": 4483 }, { "epoch": 0.15038148738157123, "grad_norm": 0.401863221618038, "learning_rate": 2e-05, "loss": 5.5689, "step": 4484 }, { "epoch": 0.15041502473379725, "grad_norm": 0.401019387589479, "learning_rate": 2e-05, "loss": 5.6832, "step": 4485 }, { "epoch": 0.1504485620860233, "grad_norm": 0.4251465869913045, "learning_rate": 2e-05, "loss": 5.5577, "step": 4486 }, { "epoch": 0.15048209943824936, "grad_norm": 0.4203524029297611, "learning_rate": 2e-05, "loss": 5.4795, "step": 4487 }, { "epoch": 0.1505156367904754, "grad_norm": 0.4172733118746797, "learning_rate": 2e-05, "loss": 5.771, "step": 4488 }, { "epoch": 0.15054917414270144, "grad_norm": 0.40225692654703155, "learning_rate": 2e-05, "loss": 5.5844, "step": 4489 }, { "epoch": 0.15058271149492747, "grad_norm": 0.40788610909859124, "learning_rate": 2e-05, "loss": 5.6432, "step": 4490 }, { "epoch": 0.15061624884715352, "grad_norm": 0.38424664917992624, "learning_rate": 2e-05, "loss": 5.6319, "step": 4491 }, { "epoch": 0.15064978619937955, "grad_norm": 0.44250335677456726, "learning_rate": 2e-05, "loss": 5.4001, "step": 4492 }, { "epoch": 0.1506833235516056, "grad_norm": 0.48939102475999907, "learning_rate": 2e-05, "loss": 5.8373, "step": 4493 }, { "epoch": 0.15071686090383163, "grad_norm": 0.41943338946126346, "learning_rate": 2e-05, "loss": 5.6219, "step": 4494 }, { "epoch": 0.15075039825605768, "grad_norm": 0.4144970962384105, "learning_rate": 2e-05, "loss": 5.4475, "step": 4495 }, { "epoch": 0.15078393560828374, "grad_norm": 0.4659129540405675, "learning_rate": 2e-05, "loss": 5.6345, "step": 4496 }, { "epoch": 0.15081747296050976, "grad_norm": 0.4532099995548214, "learning_rate": 2e-05, "loss": 5.6517, "step": 4497 }, { "epoch": 0.15085101031273582, "grad_norm": 0.40434219678708655, "learning_rate": 2e-05, "loss": 5.7043, "step": 4498 }, { "epoch": 0.15088454766496184, "grad_norm": 0.4316140881239093, "learning_rate": 2e-05, "loss": 5.6289, "step": 4499 }, { "epoch": 0.1509180850171879, "grad_norm": 0.42108361594928106, "learning_rate": 2e-05, "loss": 5.5012, "step": 4500 }, { "epoch": 0.15095162236941392, "grad_norm": 0.435559276851284, "learning_rate": 2e-05, "loss": 5.3493, "step": 4501 }, { "epoch": 0.15098515972163998, "grad_norm": 0.43048777911995284, "learning_rate": 2e-05, "loss": 5.4008, "step": 4502 }, { "epoch": 0.15101869707386603, "grad_norm": 0.410756019930557, "learning_rate": 2e-05, "loss": 5.5323, "step": 4503 }, { "epoch": 0.15105223442609206, "grad_norm": 0.4089061351189824, "learning_rate": 2e-05, "loss": 5.7499, "step": 4504 }, { "epoch": 0.1510857717783181, "grad_norm": 0.4097028122435643, "learning_rate": 2e-05, "loss": 5.5939, "step": 4505 }, { "epoch": 0.15111930913054414, "grad_norm": 0.4203048634458422, "learning_rate": 2e-05, "loss": 5.5738, "step": 4506 }, { "epoch": 0.1511528464827702, "grad_norm": 0.4283241257609566, "learning_rate": 2e-05, "loss": 5.5639, "step": 4507 }, { "epoch": 0.15118638383499622, "grad_norm": 0.41448698205370443, "learning_rate": 2e-05, "loss": 5.4641, "step": 4508 }, { "epoch": 0.15121992118722227, "grad_norm": 0.38822590961310754, "learning_rate": 2e-05, "loss": 5.5285, "step": 4509 }, { "epoch": 0.1512534585394483, "grad_norm": 0.3993682763961168, "learning_rate": 2e-05, "loss": 5.5199, "step": 4510 }, { "epoch": 0.15128699589167435, "grad_norm": 0.4241879821663845, "learning_rate": 2e-05, "loss": 5.5409, "step": 4511 }, { "epoch": 0.1513205332439004, "grad_norm": 0.41223599063828104, "learning_rate": 2e-05, "loss": 5.5239, "step": 4512 }, { "epoch": 0.15135407059612643, "grad_norm": 0.40763615812832604, "learning_rate": 2e-05, "loss": 5.5317, "step": 4513 }, { "epoch": 0.1513876079483525, "grad_norm": 0.4043113677016996, "learning_rate": 2e-05, "loss": 5.3598, "step": 4514 }, { "epoch": 0.1514211453005785, "grad_norm": 0.41431242437609556, "learning_rate": 2e-05, "loss": 5.3765, "step": 4515 }, { "epoch": 0.15145468265280457, "grad_norm": 0.40048278536044213, "learning_rate": 2e-05, "loss": 5.6909, "step": 4516 }, { "epoch": 0.1514882200050306, "grad_norm": 0.4054845264949121, "learning_rate": 2e-05, "loss": 5.716, "step": 4517 }, { "epoch": 0.15152175735725665, "grad_norm": 0.4141337252175648, "learning_rate": 2e-05, "loss": 5.6486, "step": 4518 }, { "epoch": 0.15155529470948267, "grad_norm": 0.4025462600930529, "learning_rate": 2e-05, "loss": 5.5507, "step": 4519 }, { "epoch": 0.15158883206170873, "grad_norm": 0.40909801756208275, "learning_rate": 2e-05, "loss": 5.6217, "step": 4520 }, { "epoch": 0.15162236941393478, "grad_norm": 0.4167583508640881, "learning_rate": 2e-05, "loss": 5.495, "step": 4521 }, { "epoch": 0.1516559067661608, "grad_norm": 0.4229345677372978, "learning_rate": 2e-05, "loss": 5.4556, "step": 4522 }, { "epoch": 0.15168944411838686, "grad_norm": 0.387288877836576, "learning_rate": 2e-05, "loss": 5.4859, "step": 4523 }, { "epoch": 0.1517229814706129, "grad_norm": 0.3861204529273634, "learning_rate": 2e-05, "loss": 5.504, "step": 4524 }, { "epoch": 0.15175651882283894, "grad_norm": 0.4263554834136595, "learning_rate": 2e-05, "loss": 5.5868, "step": 4525 }, { "epoch": 0.15179005617506497, "grad_norm": 0.4062321115106488, "learning_rate": 2e-05, "loss": 5.5846, "step": 4526 }, { "epoch": 0.15182359352729102, "grad_norm": 0.417980525642548, "learning_rate": 2e-05, "loss": 5.6071, "step": 4527 }, { "epoch": 0.15185713087951705, "grad_norm": 0.43114151287258756, "learning_rate": 2e-05, "loss": 5.5008, "step": 4528 }, { "epoch": 0.1518906682317431, "grad_norm": 0.4300384768727506, "learning_rate": 2e-05, "loss": 5.4594, "step": 4529 }, { "epoch": 0.15192420558396916, "grad_norm": 0.40269583168761597, "learning_rate": 2e-05, "loss": 5.3636, "step": 4530 }, { "epoch": 0.15195774293619518, "grad_norm": 0.42011091876994905, "learning_rate": 2e-05, "loss": 5.8158, "step": 4531 }, { "epoch": 0.15199128028842124, "grad_norm": 0.4394743716683622, "learning_rate": 2e-05, "loss": 5.5204, "step": 4532 }, { "epoch": 0.15202481764064726, "grad_norm": 0.4226370775610406, "learning_rate": 2e-05, "loss": 5.5003, "step": 4533 }, { "epoch": 0.15205835499287332, "grad_norm": 0.4185499403340215, "learning_rate": 2e-05, "loss": 5.4007, "step": 4534 }, { "epoch": 0.15209189234509934, "grad_norm": 0.4311473134453717, "learning_rate": 2e-05, "loss": 5.5641, "step": 4535 }, { "epoch": 0.1521254296973254, "grad_norm": 0.4321149968026003, "learning_rate": 2e-05, "loss": 5.6019, "step": 4536 }, { "epoch": 0.15215896704955142, "grad_norm": 0.3934806952016483, "learning_rate": 2e-05, "loss": 5.4644, "step": 4537 }, { "epoch": 0.15219250440177748, "grad_norm": 0.4216495069429148, "learning_rate": 2e-05, "loss": 5.4885, "step": 4538 }, { "epoch": 0.15222604175400353, "grad_norm": 0.4171040233981254, "learning_rate": 2e-05, "loss": 5.6423, "step": 4539 }, { "epoch": 0.15225957910622956, "grad_norm": 0.4614686750517199, "learning_rate": 2e-05, "loss": 5.4703, "step": 4540 }, { "epoch": 0.1522931164584556, "grad_norm": 0.43200647144534005, "learning_rate": 2e-05, "loss": 5.6984, "step": 4541 }, { "epoch": 0.15232665381068164, "grad_norm": 0.4106264420616072, "learning_rate": 2e-05, "loss": 5.5678, "step": 4542 }, { "epoch": 0.1523601911629077, "grad_norm": 0.4275805372660633, "learning_rate": 2e-05, "loss": 5.729, "step": 4543 }, { "epoch": 0.15239372851513372, "grad_norm": 0.4273185261306941, "learning_rate": 2e-05, "loss": 5.5394, "step": 4544 }, { "epoch": 0.15242726586735977, "grad_norm": 0.4127409421667362, "learning_rate": 2e-05, "loss": 5.6677, "step": 4545 }, { "epoch": 0.1524608032195858, "grad_norm": 0.4248174074800159, "learning_rate": 2e-05, "loss": 5.4318, "step": 4546 }, { "epoch": 0.15249434057181185, "grad_norm": 0.4057270727546869, "learning_rate": 2e-05, "loss": 5.533, "step": 4547 }, { "epoch": 0.1525278779240379, "grad_norm": 0.40227045399961514, "learning_rate": 2e-05, "loss": 5.502, "step": 4548 }, { "epoch": 0.15256141527626393, "grad_norm": 0.4048517900398863, "learning_rate": 2e-05, "loss": 5.6705, "step": 4549 }, { "epoch": 0.15259495262849, "grad_norm": 0.41254198128778957, "learning_rate": 2e-05, "loss": 5.4599, "step": 4550 }, { "epoch": 0.15262848998071601, "grad_norm": 0.4097987323672371, "learning_rate": 2e-05, "loss": 5.8245, "step": 4551 }, { "epoch": 0.15266202733294207, "grad_norm": 0.405824524585306, "learning_rate": 2e-05, "loss": 5.7028, "step": 4552 }, { "epoch": 0.1526955646851681, "grad_norm": 0.4138222518187115, "learning_rate": 2e-05, "loss": 5.5066, "step": 4553 }, { "epoch": 0.15272910203739415, "grad_norm": 0.401075250898549, "learning_rate": 2e-05, "loss": 5.6126, "step": 4554 }, { "epoch": 0.1527626393896202, "grad_norm": 0.41339218473527944, "learning_rate": 2e-05, "loss": 5.5259, "step": 4555 }, { "epoch": 0.15279617674184623, "grad_norm": 0.4564806364679697, "learning_rate": 2e-05, "loss": 5.7696, "step": 4556 }, { "epoch": 0.15282971409407228, "grad_norm": 0.40723300458229544, "learning_rate": 2e-05, "loss": 5.3616, "step": 4557 }, { "epoch": 0.1528632514462983, "grad_norm": 0.42158138620940705, "learning_rate": 2e-05, "loss": 5.3454, "step": 4558 }, { "epoch": 0.15289678879852436, "grad_norm": 0.399172322264664, "learning_rate": 2e-05, "loss": 5.6541, "step": 4559 }, { "epoch": 0.1529303261507504, "grad_norm": 0.40941517622263046, "learning_rate": 2e-05, "loss": 5.3769, "step": 4560 }, { "epoch": 0.15296386350297644, "grad_norm": 0.4093772418495802, "learning_rate": 2e-05, "loss": 5.5831, "step": 4561 }, { "epoch": 0.15299740085520247, "grad_norm": 0.3994023682173863, "learning_rate": 2e-05, "loss": 5.3804, "step": 4562 }, { "epoch": 0.15303093820742852, "grad_norm": 0.38559533915818833, "learning_rate": 2e-05, "loss": 5.4381, "step": 4563 }, { "epoch": 0.15306447555965458, "grad_norm": 0.41193981922958506, "learning_rate": 2e-05, "loss": 5.5623, "step": 4564 }, { "epoch": 0.1530980129118806, "grad_norm": 0.40552516514855297, "learning_rate": 2e-05, "loss": 5.4509, "step": 4565 }, { "epoch": 0.15313155026410666, "grad_norm": 0.4416130527277365, "learning_rate": 2e-05, "loss": 5.6251, "step": 4566 }, { "epoch": 0.15316508761633268, "grad_norm": 0.40165624069151834, "learning_rate": 2e-05, "loss": 5.5121, "step": 4567 }, { "epoch": 0.15319862496855874, "grad_norm": 0.41708756830473853, "learning_rate": 2e-05, "loss": 5.6043, "step": 4568 }, { "epoch": 0.15323216232078476, "grad_norm": 0.41496954245764794, "learning_rate": 2e-05, "loss": 5.5711, "step": 4569 }, { "epoch": 0.15326569967301082, "grad_norm": 0.41349250718623576, "learning_rate": 2e-05, "loss": 5.6194, "step": 4570 }, { "epoch": 0.15329923702523685, "grad_norm": 0.412637214430252, "learning_rate": 2e-05, "loss": 5.6578, "step": 4571 }, { "epoch": 0.1533327743774629, "grad_norm": 0.46326991909736465, "learning_rate": 2e-05, "loss": 5.6266, "step": 4572 }, { "epoch": 0.15336631172968895, "grad_norm": 0.4055166634582801, "learning_rate": 2e-05, "loss": 5.4217, "step": 4573 }, { "epoch": 0.15339984908191498, "grad_norm": 0.4182775196945332, "learning_rate": 2e-05, "loss": 5.665, "step": 4574 }, { "epoch": 0.15343338643414103, "grad_norm": 0.4260642695283502, "learning_rate": 2e-05, "loss": 5.5108, "step": 4575 }, { "epoch": 0.15346692378636706, "grad_norm": 0.423811217749867, "learning_rate": 2e-05, "loss": 5.5962, "step": 4576 }, { "epoch": 0.15350046113859311, "grad_norm": 0.4005765090811562, "learning_rate": 2e-05, "loss": 5.6342, "step": 4577 }, { "epoch": 0.15353399849081914, "grad_norm": 0.41893438093844154, "learning_rate": 2e-05, "loss": 5.6264, "step": 4578 }, { "epoch": 0.1535675358430452, "grad_norm": 0.4000707828597674, "learning_rate": 2e-05, "loss": 5.3816, "step": 4579 }, { "epoch": 0.15360107319527122, "grad_norm": 0.43193071159696983, "learning_rate": 2e-05, "loss": 5.6587, "step": 4580 }, { "epoch": 0.15363461054749727, "grad_norm": 0.42256021965880136, "learning_rate": 2e-05, "loss": 5.6478, "step": 4581 }, { "epoch": 0.15366814789972333, "grad_norm": 0.40492766139475744, "learning_rate": 2e-05, "loss": 5.6268, "step": 4582 }, { "epoch": 0.15370168525194935, "grad_norm": 0.4346053548777933, "learning_rate": 2e-05, "loss": 5.536, "step": 4583 }, { "epoch": 0.1537352226041754, "grad_norm": 0.4233836138954294, "learning_rate": 2e-05, "loss": 5.6036, "step": 4584 }, { "epoch": 0.15376875995640144, "grad_norm": 0.4247507164416079, "learning_rate": 2e-05, "loss": 5.5759, "step": 4585 }, { "epoch": 0.1538022973086275, "grad_norm": 0.4159214336172877, "learning_rate": 2e-05, "loss": 5.4684, "step": 4586 }, { "epoch": 0.15383583466085352, "grad_norm": 0.4154426552776929, "learning_rate": 2e-05, "loss": 5.259, "step": 4587 }, { "epoch": 0.15386937201307957, "grad_norm": 0.4006500122621944, "learning_rate": 2e-05, "loss": 5.5942, "step": 4588 }, { "epoch": 0.1539029093653056, "grad_norm": 0.4205457741492306, "learning_rate": 2e-05, "loss": 5.5826, "step": 4589 }, { "epoch": 0.15393644671753165, "grad_norm": 0.4729555807607863, "learning_rate": 2e-05, "loss": 5.3603, "step": 4590 }, { "epoch": 0.1539699840697577, "grad_norm": 0.4126211147592109, "learning_rate": 2e-05, "loss": 5.5916, "step": 4591 }, { "epoch": 0.15400352142198373, "grad_norm": 0.42999377027609725, "learning_rate": 2e-05, "loss": 5.609, "step": 4592 }, { "epoch": 0.15403705877420978, "grad_norm": 0.45275061279079803, "learning_rate": 2e-05, "loss": 5.771, "step": 4593 }, { "epoch": 0.1540705961264358, "grad_norm": 0.4515660321089553, "learning_rate": 2e-05, "loss": 5.4939, "step": 4594 }, { "epoch": 0.15410413347866186, "grad_norm": 0.4058310501456585, "learning_rate": 2e-05, "loss": 5.3323, "step": 4595 }, { "epoch": 0.1541376708308879, "grad_norm": 0.4822040519250901, "learning_rate": 2e-05, "loss": 5.4997, "step": 4596 }, { "epoch": 0.15417120818311394, "grad_norm": 0.45809038109205624, "learning_rate": 2e-05, "loss": 5.6152, "step": 4597 }, { "epoch": 0.15420474553534, "grad_norm": 0.4134680736208071, "learning_rate": 2e-05, "loss": 5.6791, "step": 4598 }, { "epoch": 0.15423828288756603, "grad_norm": 0.4352234085823468, "learning_rate": 2e-05, "loss": 5.5002, "step": 4599 }, { "epoch": 0.15427182023979208, "grad_norm": 0.4439587787738096, "learning_rate": 2e-05, "loss": 5.5079, "step": 4600 }, { "epoch": 0.1543053575920181, "grad_norm": 0.4011747018964245, "learning_rate": 2e-05, "loss": 5.6049, "step": 4601 }, { "epoch": 0.15433889494424416, "grad_norm": 0.45801865873124553, "learning_rate": 2e-05, "loss": 5.3541, "step": 4602 }, { "epoch": 0.15437243229647019, "grad_norm": 0.47781027567140444, "learning_rate": 2e-05, "loss": 5.6733, "step": 4603 }, { "epoch": 0.15440596964869624, "grad_norm": 0.43969647294636, "learning_rate": 2e-05, "loss": 5.5974, "step": 4604 }, { "epoch": 0.15443950700092227, "grad_norm": 0.46461419827627176, "learning_rate": 2e-05, "loss": 5.36, "step": 4605 }, { "epoch": 0.15447304435314832, "grad_norm": 0.43266102188821215, "learning_rate": 2e-05, "loss": 5.6561, "step": 4606 }, { "epoch": 0.15450658170537437, "grad_norm": 0.44256068146526417, "learning_rate": 2e-05, "loss": 5.6815, "step": 4607 }, { "epoch": 0.1545401190576004, "grad_norm": 0.4285984194302087, "learning_rate": 2e-05, "loss": 5.6019, "step": 4608 }, { "epoch": 0.15457365640982645, "grad_norm": 0.4272396720544284, "learning_rate": 2e-05, "loss": 5.7495, "step": 4609 }, { "epoch": 0.15460719376205248, "grad_norm": 0.46773316980734414, "learning_rate": 2e-05, "loss": 5.7861, "step": 4610 }, { "epoch": 0.15464073111427853, "grad_norm": 0.44092087017973197, "learning_rate": 2e-05, "loss": 5.6245, "step": 4611 }, { "epoch": 0.15467426846650456, "grad_norm": 0.3883972909390754, "learning_rate": 2e-05, "loss": 5.537, "step": 4612 }, { "epoch": 0.15470780581873061, "grad_norm": 0.43011481016844194, "learning_rate": 2e-05, "loss": 5.5329, "step": 4613 }, { "epoch": 0.15474134317095664, "grad_norm": 0.43263046357115087, "learning_rate": 2e-05, "loss": 5.4337, "step": 4614 }, { "epoch": 0.1547748805231827, "grad_norm": 0.45203060735688416, "learning_rate": 2e-05, "loss": 5.696, "step": 4615 }, { "epoch": 0.15480841787540875, "grad_norm": 0.4223435716637023, "learning_rate": 2e-05, "loss": 5.4014, "step": 4616 }, { "epoch": 0.15484195522763478, "grad_norm": 0.42959678875024865, "learning_rate": 2e-05, "loss": 5.4586, "step": 4617 }, { "epoch": 0.15487549257986083, "grad_norm": 0.4169957386331198, "learning_rate": 2e-05, "loss": 5.5272, "step": 4618 }, { "epoch": 0.15490902993208686, "grad_norm": 0.40778473373289426, "learning_rate": 2e-05, "loss": 5.6355, "step": 4619 }, { "epoch": 0.1549425672843129, "grad_norm": 0.41607403705622825, "learning_rate": 2e-05, "loss": 5.4916, "step": 4620 }, { "epoch": 0.15497610463653894, "grad_norm": 0.41684560846270535, "learning_rate": 2e-05, "loss": 5.7063, "step": 4621 }, { "epoch": 0.155009641988765, "grad_norm": 0.4098904536010923, "learning_rate": 2e-05, "loss": 5.602, "step": 4622 }, { "epoch": 0.15504317934099102, "grad_norm": 0.39926499384000697, "learning_rate": 2e-05, "loss": 5.6206, "step": 4623 }, { "epoch": 0.15507671669321707, "grad_norm": 0.39850702531405996, "learning_rate": 2e-05, "loss": 5.6532, "step": 4624 }, { "epoch": 0.15511025404544312, "grad_norm": 0.44298919552012356, "learning_rate": 2e-05, "loss": 5.4519, "step": 4625 }, { "epoch": 0.15514379139766915, "grad_norm": 0.4002349869335704, "learning_rate": 2e-05, "loss": 5.3681, "step": 4626 }, { "epoch": 0.1551773287498952, "grad_norm": 0.4620633699525077, "learning_rate": 2e-05, "loss": 5.6613, "step": 4627 }, { "epoch": 0.15521086610212123, "grad_norm": 0.4389384547321956, "learning_rate": 2e-05, "loss": 5.5961, "step": 4628 }, { "epoch": 0.15524440345434729, "grad_norm": 0.40433622283123694, "learning_rate": 2e-05, "loss": 5.5018, "step": 4629 }, { "epoch": 0.1552779408065733, "grad_norm": 0.4021024226756669, "learning_rate": 2e-05, "loss": 5.3415, "step": 4630 }, { "epoch": 0.15531147815879937, "grad_norm": 0.41009267232400254, "learning_rate": 2e-05, "loss": 5.5331, "step": 4631 }, { "epoch": 0.1553450155110254, "grad_norm": 0.4254781515282819, "learning_rate": 2e-05, "loss": 5.6153, "step": 4632 }, { "epoch": 0.15537855286325145, "grad_norm": 0.41349908887881615, "learning_rate": 2e-05, "loss": 5.4923, "step": 4633 }, { "epoch": 0.1554120902154775, "grad_norm": 0.41425869866001713, "learning_rate": 2e-05, "loss": 5.6316, "step": 4634 }, { "epoch": 0.15544562756770353, "grad_norm": 0.4168905172994235, "learning_rate": 2e-05, "loss": 5.6709, "step": 4635 }, { "epoch": 0.15547916491992958, "grad_norm": 0.4403494600931103, "learning_rate": 2e-05, "loss": 5.7222, "step": 4636 }, { "epoch": 0.1555127022721556, "grad_norm": 0.4018469813942673, "learning_rate": 2e-05, "loss": 5.5225, "step": 4637 }, { "epoch": 0.15554623962438166, "grad_norm": 0.4363245369739339, "learning_rate": 2e-05, "loss": 5.4265, "step": 4638 }, { "epoch": 0.1555797769766077, "grad_norm": 0.4110372874063468, "learning_rate": 2e-05, "loss": 5.5747, "step": 4639 }, { "epoch": 0.15561331432883374, "grad_norm": 0.4141841409706471, "learning_rate": 2e-05, "loss": 5.4652, "step": 4640 }, { "epoch": 0.15564685168105977, "grad_norm": 0.41426281880486626, "learning_rate": 2e-05, "loss": 5.5511, "step": 4641 }, { "epoch": 0.15568038903328582, "grad_norm": 0.46966774375659254, "learning_rate": 2e-05, "loss": 5.4806, "step": 4642 }, { "epoch": 0.15571392638551187, "grad_norm": 0.38739357240237776, "learning_rate": 2e-05, "loss": 5.6894, "step": 4643 }, { "epoch": 0.1557474637377379, "grad_norm": 0.44916583270471044, "learning_rate": 2e-05, "loss": 5.632, "step": 4644 }, { "epoch": 0.15578100108996396, "grad_norm": 0.4147251412505526, "learning_rate": 2e-05, "loss": 5.5382, "step": 4645 }, { "epoch": 0.15581453844218998, "grad_norm": 0.41429392441491886, "learning_rate": 2e-05, "loss": 5.5894, "step": 4646 }, { "epoch": 0.15584807579441604, "grad_norm": 0.4038326952858859, "learning_rate": 2e-05, "loss": 5.6159, "step": 4647 }, { "epoch": 0.15588161314664206, "grad_norm": 0.44109409246295134, "learning_rate": 2e-05, "loss": 5.6421, "step": 4648 }, { "epoch": 0.15591515049886812, "grad_norm": 0.4238406986181899, "learning_rate": 2e-05, "loss": 5.5217, "step": 4649 }, { "epoch": 0.15594868785109417, "grad_norm": 0.44238741491295225, "learning_rate": 2e-05, "loss": 5.476, "step": 4650 }, { "epoch": 0.1559822252033202, "grad_norm": 0.4041380841595531, "learning_rate": 2e-05, "loss": 5.5992, "step": 4651 }, { "epoch": 0.15601576255554625, "grad_norm": 0.40633570085990794, "learning_rate": 2e-05, "loss": 5.4017, "step": 4652 }, { "epoch": 0.15604929990777228, "grad_norm": 0.4061021488537671, "learning_rate": 2e-05, "loss": 5.5142, "step": 4653 }, { "epoch": 0.15608283725999833, "grad_norm": 0.41863370527601607, "learning_rate": 2e-05, "loss": 5.4799, "step": 4654 }, { "epoch": 0.15611637461222436, "grad_norm": 0.4166809212768296, "learning_rate": 2e-05, "loss": 5.5808, "step": 4655 }, { "epoch": 0.1561499119644504, "grad_norm": 0.4211434727817283, "learning_rate": 2e-05, "loss": 5.5243, "step": 4656 }, { "epoch": 0.15618344931667644, "grad_norm": 0.4416987078333819, "learning_rate": 2e-05, "loss": 5.5063, "step": 4657 }, { "epoch": 0.1562169866689025, "grad_norm": 0.4239696768552652, "learning_rate": 2e-05, "loss": 5.3716, "step": 4658 }, { "epoch": 0.15625052402112855, "grad_norm": 0.4097907012297405, "learning_rate": 2e-05, "loss": 5.6977, "step": 4659 }, { "epoch": 0.15628406137335457, "grad_norm": 0.4912135792603889, "learning_rate": 2e-05, "loss": 5.3876, "step": 4660 }, { "epoch": 0.15631759872558063, "grad_norm": 0.43379958564999743, "learning_rate": 2e-05, "loss": 5.6907, "step": 4661 }, { "epoch": 0.15635113607780665, "grad_norm": 0.4324636397257099, "learning_rate": 2e-05, "loss": 5.5371, "step": 4662 }, { "epoch": 0.1563846734300327, "grad_norm": 0.46072043258002526, "learning_rate": 2e-05, "loss": 5.6236, "step": 4663 }, { "epoch": 0.15641821078225873, "grad_norm": 0.4423060336818684, "learning_rate": 2e-05, "loss": 5.5087, "step": 4664 }, { "epoch": 0.15645174813448479, "grad_norm": 0.4317066035058893, "learning_rate": 2e-05, "loss": 5.4426, "step": 4665 }, { "epoch": 0.1564852854867108, "grad_norm": 0.4187386895040852, "learning_rate": 2e-05, "loss": 5.6343, "step": 4666 }, { "epoch": 0.15651882283893687, "grad_norm": 0.41953344696324907, "learning_rate": 2e-05, "loss": 5.2901, "step": 4667 }, { "epoch": 0.15655236019116292, "grad_norm": 0.4269335195783967, "learning_rate": 2e-05, "loss": 5.5607, "step": 4668 }, { "epoch": 0.15658589754338895, "grad_norm": 0.42199054181764584, "learning_rate": 2e-05, "loss": 5.5076, "step": 4669 }, { "epoch": 0.156619434895615, "grad_norm": 0.4686036333416755, "learning_rate": 2e-05, "loss": 5.3601, "step": 4670 }, { "epoch": 0.15665297224784103, "grad_norm": 0.38726867566404416, "learning_rate": 2e-05, "loss": 5.4922, "step": 4671 }, { "epoch": 0.15668650960006708, "grad_norm": 0.3957999525456136, "learning_rate": 2e-05, "loss": 5.5181, "step": 4672 }, { "epoch": 0.1567200469522931, "grad_norm": 0.4015184811882267, "learning_rate": 2e-05, "loss": 5.5539, "step": 4673 }, { "epoch": 0.15675358430451916, "grad_norm": 0.4208191808064236, "learning_rate": 2e-05, "loss": 5.2899, "step": 4674 }, { "epoch": 0.1567871216567452, "grad_norm": 0.4293312727932955, "learning_rate": 2e-05, "loss": 5.6125, "step": 4675 }, { "epoch": 0.15682065900897124, "grad_norm": 0.38873968491975947, "learning_rate": 2e-05, "loss": 5.5203, "step": 4676 }, { "epoch": 0.1568541963611973, "grad_norm": 0.40801492689289337, "learning_rate": 2e-05, "loss": 5.6091, "step": 4677 }, { "epoch": 0.15688773371342332, "grad_norm": 0.423779010466685, "learning_rate": 2e-05, "loss": 5.6537, "step": 4678 }, { "epoch": 0.15692127106564938, "grad_norm": 0.42195986776962285, "learning_rate": 2e-05, "loss": 5.666, "step": 4679 }, { "epoch": 0.1569548084178754, "grad_norm": 0.4295916276532908, "learning_rate": 2e-05, "loss": 5.7296, "step": 4680 }, { "epoch": 0.15698834577010146, "grad_norm": 0.39404891220685445, "learning_rate": 2e-05, "loss": 5.5496, "step": 4681 }, { "epoch": 0.15702188312232748, "grad_norm": 0.40689153757078345, "learning_rate": 2e-05, "loss": 5.5501, "step": 4682 }, { "epoch": 0.15705542047455354, "grad_norm": 0.4123694767758268, "learning_rate": 2e-05, "loss": 5.6941, "step": 4683 }, { "epoch": 0.15708895782677956, "grad_norm": 0.41936789261893337, "learning_rate": 2e-05, "loss": 5.4834, "step": 4684 }, { "epoch": 0.15712249517900562, "grad_norm": 0.40986779447969857, "learning_rate": 2e-05, "loss": 5.4711, "step": 4685 }, { "epoch": 0.15715603253123167, "grad_norm": 0.39821807399949455, "learning_rate": 2e-05, "loss": 5.6153, "step": 4686 }, { "epoch": 0.1571895698834577, "grad_norm": 0.42042434830331016, "learning_rate": 2e-05, "loss": 5.5892, "step": 4687 }, { "epoch": 0.15722310723568375, "grad_norm": 0.4191757178754908, "learning_rate": 2e-05, "loss": 5.4744, "step": 4688 }, { "epoch": 0.15725664458790978, "grad_norm": 0.41247381462297467, "learning_rate": 2e-05, "loss": 5.4305, "step": 4689 }, { "epoch": 0.15729018194013583, "grad_norm": 0.4012194491056345, "learning_rate": 2e-05, "loss": 5.6069, "step": 4690 }, { "epoch": 0.15732371929236186, "grad_norm": 0.4668800382061404, "learning_rate": 2e-05, "loss": 5.4884, "step": 4691 }, { "epoch": 0.1573572566445879, "grad_norm": 0.4296654944898927, "learning_rate": 2e-05, "loss": 5.5897, "step": 4692 }, { "epoch": 0.15739079399681394, "grad_norm": 0.4035621745575776, "learning_rate": 2e-05, "loss": 5.6109, "step": 4693 }, { "epoch": 0.15742433134904, "grad_norm": 0.4322843944547074, "learning_rate": 2e-05, "loss": 5.5746, "step": 4694 }, { "epoch": 0.15745786870126605, "grad_norm": 0.38864762698991595, "learning_rate": 2e-05, "loss": 5.5716, "step": 4695 }, { "epoch": 0.15749140605349207, "grad_norm": 0.4189847211092787, "learning_rate": 2e-05, "loss": 5.5547, "step": 4696 }, { "epoch": 0.15752494340571813, "grad_norm": 0.4423407131342285, "learning_rate": 2e-05, "loss": 5.6847, "step": 4697 }, { "epoch": 0.15755848075794415, "grad_norm": 0.4122596875783245, "learning_rate": 2e-05, "loss": 5.754, "step": 4698 }, { "epoch": 0.1575920181101702, "grad_norm": 0.4407755972825917, "learning_rate": 2e-05, "loss": 5.7201, "step": 4699 }, { "epoch": 0.15762555546239623, "grad_norm": 0.4291580610195268, "learning_rate": 2e-05, "loss": 5.6323, "step": 4700 }, { "epoch": 0.1576590928146223, "grad_norm": 0.40939769645822577, "learning_rate": 2e-05, "loss": 5.3782, "step": 4701 }, { "epoch": 0.15769263016684834, "grad_norm": 0.4499691125008684, "learning_rate": 2e-05, "loss": 5.4946, "step": 4702 }, { "epoch": 0.15772616751907437, "grad_norm": 0.43893247530352575, "learning_rate": 2e-05, "loss": 5.6048, "step": 4703 }, { "epoch": 0.15775970487130042, "grad_norm": 0.4227994656409229, "learning_rate": 2e-05, "loss": 5.4958, "step": 4704 }, { "epoch": 0.15779324222352645, "grad_norm": 0.41347630395965235, "learning_rate": 2e-05, "loss": 5.6048, "step": 4705 }, { "epoch": 0.1578267795757525, "grad_norm": 0.4536119484537994, "learning_rate": 2e-05, "loss": 5.4568, "step": 4706 }, { "epoch": 0.15786031692797853, "grad_norm": 0.4157516726585809, "learning_rate": 2e-05, "loss": 5.5109, "step": 4707 }, { "epoch": 0.15789385428020458, "grad_norm": 0.40338192475790696, "learning_rate": 2e-05, "loss": 5.3878, "step": 4708 }, { "epoch": 0.1579273916324306, "grad_norm": 0.43424957069710346, "learning_rate": 2e-05, "loss": 5.6226, "step": 4709 }, { "epoch": 0.15796092898465666, "grad_norm": 0.42350054780662105, "learning_rate": 2e-05, "loss": 5.5629, "step": 4710 }, { "epoch": 0.15799446633688272, "grad_norm": 0.4257892006015584, "learning_rate": 2e-05, "loss": 5.669, "step": 4711 }, { "epoch": 0.15802800368910874, "grad_norm": 0.4201764630531297, "learning_rate": 2e-05, "loss": 5.7114, "step": 4712 }, { "epoch": 0.1580615410413348, "grad_norm": 0.40827153727843213, "learning_rate": 2e-05, "loss": 5.3957, "step": 4713 }, { "epoch": 0.15809507839356082, "grad_norm": 0.4940819712913945, "learning_rate": 2e-05, "loss": 5.3855, "step": 4714 }, { "epoch": 0.15812861574578688, "grad_norm": 0.463640932986127, "learning_rate": 2e-05, "loss": 5.3915, "step": 4715 }, { "epoch": 0.1581621530980129, "grad_norm": 0.411983453698666, "learning_rate": 2e-05, "loss": 5.4615, "step": 4716 }, { "epoch": 0.15819569045023896, "grad_norm": 0.40262667443253514, "learning_rate": 2e-05, "loss": 5.521, "step": 4717 }, { "epoch": 0.15822922780246498, "grad_norm": 0.4511574811642342, "learning_rate": 2e-05, "loss": 5.6139, "step": 4718 }, { "epoch": 0.15826276515469104, "grad_norm": 0.43428418780649336, "learning_rate": 2e-05, "loss": 5.5799, "step": 4719 }, { "epoch": 0.1582963025069171, "grad_norm": 0.4682463454640334, "learning_rate": 2e-05, "loss": 5.2081, "step": 4720 }, { "epoch": 0.15832983985914312, "grad_norm": 0.44002058150000894, "learning_rate": 2e-05, "loss": 5.435, "step": 4721 }, { "epoch": 0.15836337721136917, "grad_norm": 0.41740259922206696, "learning_rate": 2e-05, "loss": 5.8229, "step": 4722 }, { "epoch": 0.1583969145635952, "grad_norm": 0.425473804186787, "learning_rate": 2e-05, "loss": 5.5958, "step": 4723 }, { "epoch": 0.15843045191582125, "grad_norm": 0.41681924612085164, "learning_rate": 2e-05, "loss": 5.6192, "step": 4724 }, { "epoch": 0.15846398926804728, "grad_norm": 0.4269175307884712, "learning_rate": 2e-05, "loss": 5.4109, "step": 4725 }, { "epoch": 0.15849752662027333, "grad_norm": 0.4396000510445011, "learning_rate": 2e-05, "loss": 5.6111, "step": 4726 }, { "epoch": 0.15853106397249936, "grad_norm": 0.46758160345628064, "learning_rate": 2e-05, "loss": 5.7831, "step": 4727 }, { "epoch": 0.1585646013247254, "grad_norm": 0.44829501533119676, "learning_rate": 2e-05, "loss": 5.5618, "step": 4728 }, { "epoch": 0.15859813867695147, "grad_norm": 0.4193610873302402, "learning_rate": 2e-05, "loss": 5.6957, "step": 4729 }, { "epoch": 0.1586316760291775, "grad_norm": 0.44033826721393143, "learning_rate": 2e-05, "loss": 5.6494, "step": 4730 }, { "epoch": 0.15866521338140355, "grad_norm": 0.4856250284734715, "learning_rate": 2e-05, "loss": 5.432, "step": 4731 }, { "epoch": 0.15869875073362957, "grad_norm": 0.406725129143079, "learning_rate": 2e-05, "loss": 5.4911, "step": 4732 }, { "epoch": 0.15873228808585563, "grad_norm": 0.4658373170440892, "learning_rate": 2e-05, "loss": 5.5757, "step": 4733 }, { "epoch": 0.15876582543808165, "grad_norm": 0.4237551333922905, "learning_rate": 2e-05, "loss": 5.596, "step": 4734 }, { "epoch": 0.1587993627903077, "grad_norm": 0.4509530018996467, "learning_rate": 2e-05, "loss": 5.3245, "step": 4735 }, { "epoch": 0.15883290014253373, "grad_norm": 0.4400817596906915, "learning_rate": 2e-05, "loss": 5.6803, "step": 4736 }, { "epoch": 0.1588664374947598, "grad_norm": 0.46286650782874206, "learning_rate": 2e-05, "loss": 5.4761, "step": 4737 }, { "epoch": 0.15889997484698584, "grad_norm": 0.45592670508055894, "learning_rate": 2e-05, "loss": 5.6401, "step": 4738 }, { "epoch": 0.15893351219921187, "grad_norm": 0.4377241413032919, "learning_rate": 2e-05, "loss": 5.5987, "step": 4739 }, { "epoch": 0.15896704955143792, "grad_norm": 0.4993259578382045, "learning_rate": 2e-05, "loss": 5.5568, "step": 4740 }, { "epoch": 0.15900058690366395, "grad_norm": 0.43833192908293306, "learning_rate": 2e-05, "loss": 5.6082, "step": 4741 }, { "epoch": 0.15903412425589, "grad_norm": 0.4324769231115981, "learning_rate": 2e-05, "loss": 5.3558, "step": 4742 }, { "epoch": 0.15906766160811603, "grad_norm": 0.44209714016089147, "learning_rate": 2e-05, "loss": 5.6842, "step": 4743 }, { "epoch": 0.15910119896034208, "grad_norm": 0.44985123465127624, "learning_rate": 2e-05, "loss": 5.6004, "step": 4744 }, { "epoch": 0.1591347363125681, "grad_norm": 0.4264902132668579, "learning_rate": 2e-05, "loss": 5.6166, "step": 4745 }, { "epoch": 0.15916827366479416, "grad_norm": 0.4226748138826556, "learning_rate": 2e-05, "loss": 5.5882, "step": 4746 }, { "epoch": 0.15920181101702022, "grad_norm": 0.4236328237616548, "learning_rate": 2e-05, "loss": 5.5112, "step": 4747 }, { "epoch": 0.15923534836924624, "grad_norm": 0.4438021174330342, "learning_rate": 2e-05, "loss": 5.471, "step": 4748 }, { "epoch": 0.1592688857214723, "grad_norm": 0.40366131131138033, "learning_rate": 2e-05, "loss": 5.6736, "step": 4749 }, { "epoch": 0.15930242307369832, "grad_norm": 0.4388491247358948, "learning_rate": 2e-05, "loss": 5.7069, "step": 4750 }, { "epoch": 0.15933596042592438, "grad_norm": 0.4241211134847492, "learning_rate": 2e-05, "loss": 5.5503, "step": 4751 }, { "epoch": 0.1593694977781504, "grad_norm": 0.4225046533412652, "learning_rate": 2e-05, "loss": 5.7381, "step": 4752 }, { "epoch": 0.15940303513037646, "grad_norm": 0.4300454298949631, "learning_rate": 2e-05, "loss": 5.54, "step": 4753 }, { "epoch": 0.1594365724826025, "grad_norm": 0.42383787840044884, "learning_rate": 2e-05, "loss": 5.6068, "step": 4754 }, { "epoch": 0.15947010983482854, "grad_norm": 0.42085593354520134, "learning_rate": 2e-05, "loss": 5.7195, "step": 4755 }, { "epoch": 0.1595036471870546, "grad_norm": 0.4709142395384992, "learning_rate": 2e-05, "loss": 5.549, "step": 4756 }, { "epoch": 0.15953718453928062, "grad_norm": 0.41423329210768894, "learning_rate": 2e-05, "loss": 5.5906, "step": 4757 }, { "epoch": 0.15957072189150667, "grad_norm": 0.40084794722126926, "learning_rate": 2e-05, "loss": 5.4203, "step": 4758 }, { "epoch": 0.1596042592437327, "grad_norm": 0.42084128803791687, "learning_rate": 2e-05, "loss": 5.6805, "step": 4759 }, { "epoch": 0.15963779659595875, "grad_norm": 0.48017709073856907, "learning_rate": 2e-05, "loss": 5.3206, "step": 4760 }, { "epoch": 0.15967133394818478, "grad_norm": 0.40460402148100805, "learning_rate": 2e-05, "loss": 5.4786, "step": 4761 }, { "epoch": 0.15970487130041083, "grad_norm": 0.40329922553067554, "learning_rate": 2e-05, "loss": 5.6048, "step": 4762 }, { "epoch": 0.1597384086526369, "grad_norm": 0.4171420215428636, "learning_rate": 2e-05, "loss": 5.6167, "step": 4763 }, { "epoch": 0.1597719460048629, "grad_norm": 0.4120346471631478, "learning_rate": 2e-05, "loss": 5.4262, "step": 4764 }, { "epoch": 0.15980548335708897, "grad_norm": 0.4236873313061077, "learning_rate": 2e-05, "loss": 5.6468, "step": 4765 }, { "epoch": 0.159839020709315, "grad_norm": 0.4548880899510432, "learning_rate": 2e-05, "loss": 5.7201, "step": 4766 }, { "epoch": 0.15987255806154105, "grad_norm": 0.41561597995905425, "learning_rate": 2e-05, "loss": 5.4533, "step": 4767 }, { "epoch": 0.15990609541376707, "grad_norm": 0.40191506497958385, "learning_rate": 2e-05, "loss": 5.5458, "step": 4768 }, { "epoch": 0.15993963276599313, "grad_norm": 0.4277806669754893, "learning_rate": 2e-05, "loss": 5.461, "step": 4769 }, { "epoch": 0.15997317011821915, "grad_norm": 0.41226013945180723, "learning_rate": 2e-05, "loss": 5.7365, "step": 4770 }, { "epoch": 0.1600067074704452, "grad_norm": 0.4037441220702148, "learning_rate": 2e-05, "loss": 5.3765, "step": 4771 }, { "epoch": 0.16004024482267126, "grad_norm": 0.4218957419577253, "learning_rate": 2e-05, "loss": 5.6957, "step": 4772 }, { "epoch": 0.1600737821748973, "grad_norm": 0.42188756050618076, "learning_rate": 2e-05, "loss": 5.5382, "step": 4773 }, { "epoch": 0.16010731952712334, "grad_norm": 0.4396905048744676, "learning_rate": 2e-05, "loss": 5.616, "step": 4774 }, { "epoch": 0.16014085687934937, "grad_norm": 0.42708871268152243, "learning_rate": 2e-05, "loss": 5.6138, "step": 4775 }, { "epoch": 0.16017439423157542, "grad_norm": 0.44386397979892595, "learning_rate": 2e-05, "loss": 5.5166, "step": 4776 }, { "epoch": 0.16020793158380145, "grad_norm": 0.4102176795394533, "learning_rate": 2e-05, "loss": 5.5266, "step": 4777 }, { "epoch": 0.1602414689360275, "grad_norm": 0.4458595583566238, "learning_rate": 2e-05, "loss": 5.3219, "step": 4778 }, { "epoch": 0.16027500628825353, "grad_norm": 0.43233723952926095, "learning_rate": 2e-05, "loss": 5.7287, "step": 4779 }, { "epoch": 0.16030854364047958, "grad_norm": 0.41476309812346485, "learning_rate": 2e-05, "loss": 5.5756, "step": 4780 }, { "epoch": 0.16034208099270564, "grad_norm": 0.4226310025089891, "learning_rate": 2e-05, "loss": 5.6206, "step": 4781 }, { "epoch": 0.16037561834493166, "grad_norm": 0.4205177149141529, "learning_rate": 2e-05, "loss": 5.4136, "step": 4782 }, { "epoch": 0.16040915569715772, "grad_norm": 0.40147283918804644, "learning_rate": 2e-05, "loss": 5.5981, "step": 4783 }, { "epoch": 0.16044269304938374, "grad_norm": 0.4230902295800827, "learning_rate": 2e-05, "loss": 5.8256, "step": 4784 }, { "epoch": 0.1604762304016098, "grad_norm": 0.4237163724543084, "learning_rate": 2e-05, "loss": 5.6309, "step": 4785 }, { "epoch": 0.16050976775383582, "grad_norm": 0.40016675101168175, "learning_rate": 2e-05, "loss": 5.5498, "step": 4786 }, { "epoch": 0.16054330510606188, "grad_norm": 0.42873812864944316, "learning_rate": 2e-05, "loss": 5.5801, "step": 4787 }, { "epoch": 0.1605768424582879, "grad_norm": 0.4273126025344872, "learning_rate": 2e-05, "loss": 5.5656, "step": 4788 }, { "epoch": 0.16061037981051396, "grad_norm": 0.4162867031125269, "learning_rate": 2e-05, "loss": 5.7187, "step": 4789 }, { "epoch": 0.16064391716274, "grad_norm": 0.4285895992268759, "learning_rate": 2e-05, "loss": 5.6021, "step": 4790 }, { "epoch": 0.16067745451496604, "grad_norm": 0.4168013124326062, "learning_rate": 2e-05, "loss": 5.4684, "step": 4791 }, { "epoch": 0.1607109918671921, "grad_norm": 0.4167173057005659, "learning_rate": 2e-05, "loss": 5.3968, "step": 4792 }, { "epoch": 0.16074452921941812, "grad_norm": 0.40432212357992964, "learning_rate": 2e-05, "loss": 5.763, "step": 4793 }, { "epoch": 0.16077806657164417, "grad_norm": 0.4131751881098649, "learning_rate": 2e-05, "loss": 5.3205, "step": 4794 }, { "epoch": 0.1608116039238702, "grad_norm": 0.4454447171069459, "learning_rate": 2e-05, "loss": 5.6989, "step": 4795 }, { "epoch": 0.16084514127609625, "grad_norm": 0.4465398049123125, "learning_rate": 2e-05, "loss": 5.5093, "step": 4796 }, { "epoch": 0.16087867862832228, "grad_norm": 0.4048735317791292, "learning_rate": 2e-05, "loss": 5.6952, "step": 4797 }, { "epoch": 0.16091221598054833, "grad_norm": 0.4217391576677411, "learning_rate": 2e-05, "loss": 5.4115, "step": 4798 }, { "epoch": 0.1609457533327744, "grad_norm": 0.4156265252694945, "learning_rate": 2e-05, "loss": 5.5711, "step": 4799 }, { "epoch": 0.16097929068500041, "grad_norm": 0.41376582415658425, "learning_rate": 2e-05, "loss": 5.5416, "step": 4800 }, { "epoch": 0.16101282803722647, "grad_norm": 0.38937482385856137, "learning_rate": 2e-05, "loss": 5.4359, "step": 4801 }, { "epoch": 0.1610463653894525, "grad_norm": 0.4273971323611895, "learning_rate": 2e-05, "loss": 5.507, "step": 4802 }, { "epoch": 0.16107990274167855, "grad_norm": 0.4259613104539305, "learning_rate": 2e-05, "loss": 5.6931, "step": 4803 }, { "epoch": 0.16111344009390458, "grad_norm": 0.4631594274948452, "learning_rate": 2e-05, "loss": 5.4275, "step": 4804 }, { "epoch": 0.16114697744613063, "grad_norm": 0.4117462542507343, "learning_rate": 2e-05, "loss": 5.3091, "step": 4805 }, { "epoch": 0.16118051479835668, "grad_norm": 0.41152188375203985, "learning_rate": 2e-05, "loss": 5.5963, "step": 4806 }, { "epoch": 0.1612140521505827, "grad_norm": 0.4034829772486205, "learning_rate": 2e-05, "loss": 5.6501, "step": 4807 }, { "epoch": 0.16124758950280876, "grad_norm": 0.4190079831672594, "learning_rate": 2e-05, "loss": 5.6084, "step": 4808 }, { "epoch": 0.1612811268550348, "grad_norm": 0.4021292792006288, "learning_rate": 2e-05, "loss": 5.5643, "step": 4809 }, { "epoch": 0.16131466420726084, "grad_norm": 0.4569935200874294, "learning_rate": 2e-05, "loss": 5.5361, "step": 4810 }, { "epoch": 0.16134820155948687, "grad_norm": 0.42016214335390173, "learning_rate": 2e-05, "loss": 5.6697, "step": 4811 }, { "epoch": 0.16138173891171292, "grad_norm": 0.4074483514148006, "learning_rate": 2e-05, "loss": 5.5706, "step": 4812 }, { "epoch": 0.16141527626393895, "grad_norm": 0.43393799833927105, "learning_rate": 2e-05, "loss": 5.5724, "step": 4813 }, { "epoch": 0.161448813616165, "grad_norm": 0.4057020019472389, "learning_rate": 2e-05, "loss": 5.6994, "step": 4814 }, { "epoch": 0.16148235096839106, "grad_norm": 0.41625990353564607, "learning_rate": 2e-05, "loss": 5.5007, "step": 4815 }, { "epoch": 0.16151588832061708, "grad_norm": 0.44370889116399953, "learning_rate": 2e-05, "loss": 5.4397, "step": 4816 }, { "epoch": 0.16154942567284314, "grad_norm": 0.40075579385420995, "learning_rate": 2e-05, "loss": 5.4089, "step": 4817 }, { "epoch": 0.16158296302506917, "grad_norm": 0.41363498361193596, "learning_rate": 2e-05, "loss": 5.5189, "step": 4818 }, { "epoch": 0.16161650037729522, "grad_norm": 0.4026280716377453, "learning_rate": 2e-05, "loss": 5.5236, "step": 4819 }, { "epoch": 0.16165003772952125, "grad_norm": 0.4153893776297512, "learning_rate": 2e-05, "loss": 5.5466, "step": 4820 }, { "epoch": 0.1616835750817473, "grad_norm": 0.39083667787946375, "learning_rate": 2e-05, "loss": 5.5668, "step": 4821 }, { "epoch": 0.16171711243397333, "grad_norm": 0.4112071020652012, "learning_rate": 2e-05, "loss": 5.5849, "step": 4822 }, { "epoch": 0.16175064978619938, "grad_norm": 0.4203488275233119, "learning_rate": 2e-05, "loss": 5.4845, "step": 4823 }, { "epoch": 0.16178418713842543, "grad_norm": 0.4247001154101817, "learning_rate": 2e-05, "loss": 5.6637, "step": 4824 }, { "epoch": 0.16181772449065146, "grad_norm": 0.41836813596572336, "learning_rate": 2e-05, "loss": 5.4117, "step": 4825 }, { "epoch": 0.16185126184287751, "grad_norm": 0.40571743065214233, "learning_rate": 2e-05, "loss": 5.6446, "step": 4826 }, { "epoch": 0.16188479919510354, "grad_norm": 0.4611942432363951, "learning_rate": 2e-05, "loss": 5.6226, "step": 4827 }, { "epoch": 0.1619183365473296, "grad_norm": 0.43329430375967615, "learning_rate": 2e-05, "loss": 5.3885, "step": 4828 }, { "epoch": 0.16195187389955562, "grad_norm": 0.4186054021582019, "learning_rate": 2e-05, "loss": 5.5161, "step": 4829 }, { "epoch": 0.16198541125178167, "grad_norm": 0.4364243162469416, "learning_rate": 2e-05, "loss": 5.6115, "step": 4830 }, { "epoch": 0.1620189486040077, "grad_norm": 0.4308759633035056, "learning_rate": 2e-05, "loss": 5.5005, "step": 4831 }, { "epoch": 0.16205248595623375, "grad_norm": 0.44336318372619565, "learning_rate": 2e-05, "loss": 5.5764, "step": 4832 }, { "epoch": 0.1620860233084598, "grad_norm": 0.42241423810374185, "learning_rate": 2e-05, "loss": 5.5063, "step": 4833 }, { "epoch": 0.16211956066068584, "grad_norm": 0.4646515952998414, "learning_rate": 2e-05, "loss": 5.4393, "step": 4834 }, { "epoch": 0.1621530980129119, "grad_norm": 0.44724602926432605, "learning_rate": 2e-05, "loss": 5.6351, "step": 4835 }, { "epoch": 0.16218663536513792, "grad_norm": 0.42856011123113746, "learning_rate": 2e-05, "loss": 5.4433, "step": 4836 }, { "epoch": 0.16222017271736397, "grad_norm": 0.4551376272747264, "learning_rate": 2e-05, "loss": 5.5137, "step": 4837 }, { "epoch": 0.16225371006959, "grad_norm": 0.44054171274159726, "learning_rate": 2e-05, "loss": 5.367, "step": 4838 }, { "epoch": 0.16228724742181605, "grad_norm": 0.42925566302369, "learning_rate": 2e-05, "loss": 5.5387, "step": 4839 }, { "epoch": 0.16232078477404208, "grad_norm": 0.42583425024466315, "learning_rate": 2e-05, "loss": 5.605, "step": 4840 }, { "epoch": 0.16235432212626813, "grad_norm": 0.4142102638974413, "learning_rate": 2e-05, "loss": 5.5054, "step": 4841 }, { "epoch": 0.16238785947849418, "grad_norm": 0.4281667214509998, "learning_rate": 2e-05, "loss": 5.4676, "step": 4842 }, { "epoch": 0.1624213968307202, "grad_norm": 0.41356428489457303, "learning_rate": 2e-05, "loss": 5.5605, "step": 4843 }, { "epoch": 0.16245493418294626, "grad_norm": 0.42049653278317906, "learning_rate": 2e-05, "loss": 5.5712, "step": 4844 }, { "epoch": 0.1624884715351723, "grad_norm": 0.3984692115298974, "learning_rate": 2e-05, "loss": 5.4928, "step": 4845 }, { "epoch": 0.16252200888739834, "grad_norm": 0.4425142300919517, "learning_rate": 2e-05, "loss": 5.5382, "step": 4846 }, { "epoch": 0.16255554623962437, "grad_norm": 0.40190572087624743, "learning_rate": 2e-05, "loss": 5.6762, "step": 4847 }, { "epoch": 0.16258908359185043, "grad_norm": 0.42357999664405577, "learning_rate": 2e-05, "loss": 5.6247, "step": 4848 }, { "epoch": 0.16262262094407645, "grad_norm": 0.40341650726177597, "learning_rate": 2e-05, "loss": 5.6207, "step": 4849 }, { "epoch": 0.1626561582963025, "grad_norm": 0.4214689893077302, "learning_rate": 2e-05, "loss": 5.5477, "step": 4850 }, { "epoch": 0.16268969564852856, "grad_norm": 0.39910301828785283, "learning_rate": 2e-05, "loss": 5.481, "step": 4851 }, { "epoch": 0.16272323300075459, "grad_norm": 0.4219860982061035, "learning_rate": 2e-05, "loss": 5.5158, "step": 4852 }, { "epoch": 0.16275677035298064, "grad_norm": 0.4508324597256168, "learning_rate": 2e-05, "loss": 5.3362, "step": 4853 }, { "epoch": 0.16279030770520667, "grad_norm": 0.4483098294782542, "learning_rate": 2e-05, "loss": 5.6471, "step": 4854 }, { "epoch": 0.16282384505743272, "grad_norm": 0.42168614431607654, "learning_rate": 2e-05, "loss": 5.5008, "step": 4855 }, { "epoch": 0.16285738240965875, "grad_norm": 0.43177743943828356, "learning_rate": 2e-05, "loss": 5.8237, "step": 4856 }, { "epoch": 0.1628909197618848, "grad_norm": 0.47571135384765917, "learning_rate": 2e-05, "loss": 5.4195, "step": 4857 }, { "epoch": 0.16292445711411085, "grad_norm": 0.38844849106162455, "learning_rate": 2e-05, "loss": 5.3828, "step": 4858 }, { "epoch": 0.16295799446633688, "grad_norm": 0.4256503811952549, "learning_rate": 2e-05, "loss": 5.4333, "step": 4859 }, { "epoch": 0.16299153181856293, "grad_norm": 0.4595281653491619, "learning_rate": 2e-05, "loss": 5.5333, "step": 4860 }, { "epoch": 0.16302506917078896, "grad_norm": 0.4138765376256796, "learning_rate": 2e-05, "loss": 5.5421, "step": 4861 }, { "epoch": 0.16305860652301502, "grad_norm": 0.4036136939217679, "learning_rate": 2e-05, "loss": 5.5614, "step": 4862 }, { "epoch": 0.16309214387524104, "grad_norm": 0.40355628347963307, "learning_rate": 2e-05, "loss": 5.517, "step": 4863 }, { "epoch": 0.1631256812274671, "grad_norm": 0.44505498680071687, "learning_rate": 2e-05, "loss": 5.6149, "step": 4864 }, { "epoch": 0.16315921857969312, "grad_norm": 0.42845656400252885, "learning_rate": 2e-05, "loss": 5.5575, "step": 4865 }, { "epoch": 0.16319275593191918, "grad_norm": 0.4061331546475505, "learning_rate": 2e-05, "loss": 5.4272, "step": 4866 }, { "epoch": 0.16322629328414523, "grad_norm": 0.4274109896719184, "learning_rate": 2e-05, "loss": 5.3765, "step": 4867 }, { "epoch": 0.16325983063637126, "grad_norm": 0.41530465182655996, "learning_rate": 2e-05, "loss": 5.7094, "step": 4868 }, { "epoch": 0.1632933679885973, "grad_norm": 0.408025896152641, "learning_rate": 2e-05, "loss": 5.6072, "step": 4869 }, { "epoch": 0.16332690534082334, "grad_norm": 0.4400504621295664, "learning_rate": 2e-05, "loss": 5.6958, "step": 4870 }, { "epoch": 0.1633604426930494, "grad_norm": 0.4096144342352605, "learning_rate": 2e-05, "loss": 5.6726, "step": 4871 }, { "epoch": 0.16339398004527542, "grad_norm": 0.44656474330009244, "learning_rate": 2e-05, "loss": 5.4683, "step": 4872 }, { "epoch": 0.16342751739750147, "grad_norm": 0.4079120215722124, "learning_rate": 2e-05, "loss": 5.7129, "step": 4873 }, { "epoch": 0.1634610547497275, "grad_norm": 0.4434021416730648, "learning_rate": 2e-05, "loss": 5.7438, "step": 4874 }, { "epoch": 0.16349459210195355, "grad_norm": 0.40777778209463317, "learning_rate": 2e-05, "loss": 5.7594, "step": 4875 }, { "epoch": 0.1635281294541796, "grad_norm": 0.40635184119667384, "learning_rate": 2e-05, "loss": 5.4932, "step": 4876 }, { "epoch": 0.16356166680640563, "grad_norm": 0.44232688070317433, "learning_rate": 2e-05, "loss": 5.605, "step": 4877 }, { "epoch": 0.16359520415863169, "grad_norm": 0.39528582671020696, "learning_rate": 2e-05, "loss": 5.8379, "step": 4878 }, { "epoch": 0.1636287415108577, "grad_norm": 0.3925088814989561, "learning_rate": 2e-05, "loss": 5.4653, "step": 4879 }, { "epoch": 0.16366227886308377, "grad_norm": 0.45085087772521637, "learning_rate": 2e-05, "loss": 5.4074, "step": 4880 }, { "epoch": 0.1636958162153098, "grad_norm": 0.39242615272012954, "learning_rate": 2e-05, "loss": 5.5387, "step": 4881 }, { "epoch": 0.16372935356753585, "grad_norm": 0.4115754242484157, "learning_rate": 2e-05, "loss": 5.5618, "step": 4882 }, { "epoch": 0.16376289091976187, "grad_norm": 0.4381949349215291, "learning_rate": 2e-05, "loss": 5.3719, "step": 4883 }, { "epoch": 0.16379642827198793, "grad_norm": 0.4116401830819515, "learning_rate": 2e-05, "loss": 5.6065, "step": 4884 }, { "epoch": 0.16382996562421398, "grad_norm": 0.39991449279881525, "learning_rate": 2e-05, "loss": 5.5514, "step": 4885 }, { "epoch": 0.16386350297644, "grad_norm": 0.4145773773074368, "learning_rate": 2e-05, "loss": 5.4609, "step": 4886 }, { "epoch": 0.16389704032866606, "grad_norm": 0.42149697550047976, "learning_rate": 2e-05, "loss": 5.6968, "step": 4887 }, { "epoch": 0.1639305776808921, "grad_norm": 0.4201471545563952, "learning_rate": 2e-05, "loss": 5.6435, "step": 4888 }, { "epoch": 0.16396411503311814, "grad_norm": 0.43999829033630194, "learning_rate": 2e-05, "loss": 5.4188, "step": 4889 }, { "epoch": 0.16399765238534417, "grad_norm": 0.42786027395644, "learning_rate": 2e-05, "loss": 5.5609, "step": 4890 }, { "epoch": 0.16403118973757022, "grad_norm": 0.4497902899503063, "learning_rate": 2e-05, "loss": 5.635, "step": 4891 }, { "epoch": 0.16406472708979625, "grad_norm": 0.43378825325963005, "learning_rate": 2e-05, "loss": 5.3882, "step": 4892 }, { "epoch": 0.1640982644420223, "grad_norm": 0.45557866962513305, "learning_rate": 2e-05, "loss": 5.6103, "step": 4893 }, { "epoch": 0.16413180179424836, "grad_norm": 0.456689786818748, "learning_rate": 2e-05, "loss": 5.6192, "step": 4894 }, { "epoch": 0.16416533914647438, "grad_norm": 0.4188917745168141, "learning_rate": 2e-05, "loss": 5.4876, "step": 4895 }, { "epoch": 0.16419887649870044, "grad_norm": 0.40955872888363454, "learning_rate": 2e-05, "loss": 5.5904, "step": 4896 }, { "epoch": 0.16423241385092646, "grad_norm": 0.4257617067030856, "learning_rate": 2e-05, "loss": 5.5206, "step": 4897 }, { "epoch": 0.16426595120315252, "grad_norm": 0.4367759545861786, "learning_rate": 2e-05, "loss": 5.4043, "step": 4898 }, { "epoch": 0.16429948855537854, "grad_norm": 0.38718313374481284, "learning_rate": 2e-05, "loss": 5.4434, "step": 4899 }, { "epoch": 0.1643330259076046, "grad_norm": 0.48705640536207084, "learning_rate": 2e-05, "loss": 5.5157, "step": 4900 }, { "epoch": 0.16436656325983062, "grad_norm": 0.4478282358213817, "learning_rate": 2e-05, "loss": 5.6844, "step": 4901 }, { "epoch": 0.16440010061205668, "grad_norm": 0.43423053654633637, "learning_rate": 2e-05, "loss": 5.6892, "step": 4902 }, { "epoch": 0.16443363796428273, "grad_norm": 0.4053085478866008, "learning_rate": 2e-05, "loss": 5.509, "step": 4903 }, { "epoch": 0.16446717531650876, "grad_norm": 0.43300973026709816, "learning_rate": 2e-05, "loss": 5.6117, "step": 4904 }, { "epoch": 0.1645007126687348, "grad_norm": 0.4115014985492461, "learning_rate": 2e-05, "loss": 5.5928, "step": 4905 }, { "epoch": 0.16453425002096084, "grad_norm": 0.4052896989294002, "learning_rate": 2e-05, "loss": 5.5109, "step": 4906 }, { "epoch": 0.1645677873731869, "grad_norm": 0.433731950367646, "learning_rate": 2e-05, "loss": 5.4337, "step": 4907 }, { "epoch": 0.16460132472541292, "grad_norm": 0.42712886805976696, "learning_rate": 2e-05, "loss": 5.4647, "step": 4908 }, { "epoch": 0.16463486207763897, "grad_norm": 0.4131256589163218, "learning_rate": 2e-05, "loss": 5.4588, "step": 4909 }, { "epoch": 0.16466839942986503, "grad_norm": 0.43342844848672163, "learning_rate": 2e-05, "loss": 5.5988, "step": 4910 }, { "epoch": 0.16470193678209105, "grad_norm": 0.4197988056735222, "learning_rate": 2e-05, "loss": 5.5879, "step": 4911 }, { "epoch": 0.1647354741343171, "grad_norm": 0.4216590455327215, "learning_rate": 2e-05, "loss": 5.7033, "step": 4912 }, { "epoch": 0.16476901148654313, "grad_norm": 0.4367686114308364, "learning_rate": 2e-05, "loss": 5.4908, "step": 4913 }, { "epoch": 0.1648025488387692, "grad_norm": 0.43725865448606815, "learning_rate": 2e-05, "loss": 5.7342, "step": 4914 }, { "epoch": 0.1648360861909952, "grad_norm": 0.42887438312762055, "learning_rate": 2e-05, "loss": 5.5843, "step": 4915 }, { "epoch": 0.16486962354322127, "grad_norm": 0.41680897949997175, "learning_rate": 2e-05, "loss": 5.4581, "step": 4916 }, { "epoch": 0.1649031608954473, "grad_norm": 0.47604151158221897, "learning_rate": 2e-05, "loss": 5.6823, "step": 4917 }, { "epoch": 0.16493669824767335, "grad_norm": 0.4059108450418923, "learning_rate": 2e-05, "loss": 5.6278, "step": 4918 }, { "epoch": 0.1649702355998994, "grad_norm": 0.40516701479019157, "learning_rate": 2e-05, "loss": 5.5922, "step": 4919 }, { "epoch": 0.16500377295212543, "grad_norm": 0.4379080151011142, "learning_rate": 2e-05, "loss": 5.6519, "step": 4920 }, { "epoch": 0.16503731030435148, "grad_norm": 0.39782763184035846, "learning_rate": 2e-05, "loss": 5.468, "step": 4921 }, { "epoch": 0.1650708476565775, "grad_norm": 0.42100208527391586, "learning_rate": 2e-05, "loss": 5.5326, "step": 4922 }, { "epoch": 0.16510438500880356, "grad_norm": 0.41548950583849864, "learning_rate": 2e-05, "loss": 5.5431, "step": 4923 }, { "epoch": 0.1651379223610296, "grad_norm": 0.41543147659358065, "learning_rate": 2e-05, "loss": 5.7637, "step": 4924 }, { "epoch": 0.16517145971325564, "grad_norm": 0.420030040099365, "learning_rate": 2e-05, "loss": 5.531, "step": 4925 }, { "epoch": 0.16520499706548167, "grad_norm": 0.42226336042065765, "learning_rate": 2e-05, "loss": 5.4019, "step": 4926 }, { "epoch": 0.16523853441770772, "grad_norm": 0.40654838150258105, "learning_rate": 2e-05, "loss": 5.6303, "step": 4927 }, { "epoch": 0.16527207176993378, "grad_norm": 0.4250770212788331, "learning_rate": 2e-05, "loss": 5.5892, "step": 4928 }, { "epoch": 0.1653056091221598, "grad_norm": 0.41537288712772447, "learning_rate": 2e-05, "loss": 5.6735, "step": 4929 }, { "epoch": 0.16533914647438586, "grad_norm": 0.4057249384215996, "learning_rate": 2e-05, "loss": 5.6494, "step": 4930 }, { "epoch": 0.16537268382661188, "grad_norm": 0.3907870849071589, "learning_rate": 2e-05, "loss": 5.6093, "step": 4931 }, { "epoch": 0.16540622117883794, "grad_norm": 0.4066123318220848, "learning_rate": 2e-05, "loss": 5.4786, "step": 4932 }, { "epoch": 0.16543975853106396, "grad_norm": 0.39745294029522327, "learning_rate": 2e-05, "loss": 5.4617, "step": 4933 }, { "epoch": 0.16547329588329002, "grad_norm": 0.40308145393100475, "learning_rate": 2e-05, "loss": 5.5515, "step": 4934 }, { "epoch": 0.16550683323551604, "grad_norm": 0.4204885215868635, "learning_rate": 2e-05, "loss": 5.527, "step": 4935 }, { "epoch": 0.1655403705877421, "grad_norm": 0.428715911492752, "learning_rate": 2e-05, "loss": 5.5947, "step": 4936 }, { "epoch": 0.16557390793996815, "grad_norm": 0.3953868142845103, "learning_rate": 2e-05, "loss": 5.5961, "step": 4937 }, { "epoch": 0.16560744529219418, "grad_norm": 0.3975394852176204, "learning_rate": 2e-05, "loss": 5.5288, "step": 4938 }, { "epoch": 0.16564098264442023, "grad_norm": 0.4268351467577164, "learning_rate": 2e-05, "loss": 5.5446, "step": 4939 }, { "epoch": 0.16567451999664626, "grad_norm": 0.4221060048959595, "learning_rate": 2e-05, "loss": 5.547, "step": 4940 }, { "epoch": 0.1657080573488723, "grad_norm": 0.4011830701549682, "learning_rate": 2e-05, "loss": 5.7654, "step": 4941 }, { "epoch": 0.16574159470109834, "grad_norm": 0.41278117285379157, "learning_rate": 2e-05, "loss": 5.4812, "step": 4942 }, { "epoch": 0.1657751320533244, "grad_norm": 0.3871819796952964, "learning_rate": 2e-05, "loss": 5.4634, "step": 4943 }, { "epoch": 0.16580866940555042, "grad_norm": 0.40150837497341796, "learning_rate": 2e-05, "loss": 5.6934, "step": 4944 }, { "epoch": 0.16584220675777647, "grad_norm": 0.413035277069484, "learning_rate": 2e-05, "loss": 5.6385, "step": 4945 }, { "epoch": 0.16587574411000253, "grad_norm": 0.4146452986953042, "learning_rate": 2e-05, "loss": 5.6515, "step": 4946 }, { "epoch": 0.16590928146222855, "grad_norm": 0.4022525571518757, "learning_rate": 2e-05, "loss": 5.672, "step": 4947 }, { "epoch": 0.1659428188144546, "grad_norm": 0.4095101773174439, "learning_rate": 2e-05, "loss": 5.6531, "step": 4948 }, { "epoch": 0.16597635616668063, "grad_norm": 0.4338762894320066, "learning_rate": 2e-05, "loss": 5.4444, "step": 4949 }, { "epoch": 0.1660098935189067, "grad_norm": 0.4230282954599083, "learning_rate": 2e-05, "loss": 5.6014, "step": 4950 }, { "epoch": 0.1660434308711327, "grad_norm": 0.42261074128673637, "learning_rate": 2e-05, "loss": 5.7474, "step": 4951 }, { "epoch": 0.16607696822335877, "grad_norm": 0.42318929394012644, "learning_rate": 2e-05, "loss": 5.4734, "step": 4952 }, { "epoch": 0.16611050557558482, "grad_norm": 0.4095829414393271, "learning_rate": 2e-05, "loss": 5.6283, "step": 4953 }, { "epoch": 0.16614404292781085, "grad_norm": 0.4328687350815599, "learning_rate": 2e-05, "loss": 5.6275, "step": 4954 }, { "epoch": 0.1661775802800369, "grad_norm": 0.41065038336349874, "learning_rate": 2e-05, "loss": 5.3806, "step": 4955 }, { "epoch": 0.16621111763226293, "grad_norm": 0.4059744689112646, "learning_rate": 2e-05, "loss": 5.4754, "step": 4956 }, { "epoch": 0.16624465498448898, "grad_norm": 0.4240950482810038, "learning_rate": 2e-05, "loss": 5.5923, "step": 4957 }, { "epoch": 0.166278192336715, "grad_norm": 0.40367617108827913, "learning_rate": 2e-05, "loss": 5.4717, "step": 4958 }, { "epoch": 0.16631172968894106, "grad_norm": 0.4171753772397056, "learning_rate": 2e-05, "loss": 5.5813, "step": 4959 }, { "epoch": 0.1663452670411671, "grad_norm": 0.40412845450352486, "learning_rate": 2e-05, "loss": 5.6049, "step": 4960 }, { "epoch": 0.16637880439339314, "grad_norm": 0.410209417719671, "learning_rate": 2e-05, "loss": 5.6651, "step": 4961 }, { "epoch": 0.1664123417456192, "grad_norm": 0.4029183132550984, "learning_rate": 2e-05, "loss": 5.5748, "step": 4962 }, { "epoch": 0.16644587909784522, "grad_norm": 0.4291195899940135, "learning_rate": 2e-05, "loss": 5.4911, "step": 4963 }, { "epoch": 0.16647941645007128, "grad_norm": 0.4126100878659328, "learning_rate": 2e-05, "loss": 5.3996, "step": 4964 }, { "epoch": 0.1665129538022973, "grad_norm": 0.47787103877283055, "learning_rate": 2e-05, "loss": 5.7709, "step": 4965 }, { "epoch": 0.16654649115452336, "grad_norm": 0.44044042981386317, "learning_rate": 2e-05, "loss": 5.5175, "step": 4966 }, { "epoch": 0.16658002850674938, "grad_norm": 0.41529301809236085, "learning_rate": 2e-05, "loss": 5.6184, "step": 4967 }, { "epoch": 0.16661356585897544, "grad_norm": 0.488394155501217, "learning_rate": 2e-05, "loss": 5.7177, "step": 4968 }, { "epoch": 0.16664710321120146, "grad_norm": 0.47143200995741935, "learning_rate": 2e-05, "loss": 5.6239, "step": 4969 }, { "epoch": 0.16668064056342752, "grad_norm": 0.43571296538231075, "learning_rate": 2e-05, "loss": 5.5104, "step": 4970 }, { "epoch": 0.16671417791565357, "grad_norm": 0.4422585718013682, "learning_rate": 2e-05, "loss": 5.4322, "step": 4971 }, { "epoch": 0.1667477152678796, "grad_norm": 0.37512216006838794, "learning_rate": 2e-05, "loss": 5.4452, "step": 4972 }, { "epoch": 0.16678125262010565, "grad_norm": 0.41456789189824, "learning_rate": 2e-05, "loss": 5.4675, "step": 4973 }, { "epoch": 0.16681478997233168, "grad_norm": 0.41086137189261085, "learning_rate": 2e-05, "loss": 5.7246, "step": 4974 }, { "epoch": 0.16684832732455773, "grad_norm": 0.43426311325516387, "learning_rate": 2e-05, "loss": 5.669, "step": 4975 }, { "epoch": 0.16688186467678376, "grad_norm": 0.43002993016202745, "learning_rate": 2e-05, "loss": 5.5329, "step": 4976 }, { "epoch": 0.1669154020290098, "grad_norm": 0.42024875611973866, "learning_rate": 2e-05, "loss": 5.5196, "step": 4977 }, { "epoch": 0.16694893938123584, "grad_norm": 0.4603965936280258, "learning_rate": 2e-05, "loss": 5.4715, "step": 4978 }, { "epoch": 0.1669824767334619, "grad_norm": 0.405114010724049, "learning_rate": 2e-05, "loss": 5.6061, "step": 4979 }, { "epoch": 0.16701601408568795, "grad_norm": 0.4735474718685167, "learning_rate": 2e-05, "loss": 5.4928, "step": 4980 }, { "epoch": 0.16704955143791397, "grad_norm": 0.4722764343729304, "learning_rate": 2e-05, "loss": 5.4749, "step": 4981 }, { "epoch": 0.16708308879014003, "grad_norm": 0.41422958832084567, "learning_rate": 2e-05, "loss": 5.5011, "step": 4982 }, { "epoch": 0.16711662614236605, "grad_norm": 0.5248679114266839, "learning_rate": 2e-05, "loss": 5.5496, "step": 4983 }, { "epoch": 0.1671501634945921, "grad_norm": 0.4640666831917026, "learning_rate": 2e-05, "loss": 5.6939, "step": 4984 }, { "epoch": 0.16718370084681813, "grad_norm": 0.41121796386319076, "learning_rate": 2e-05, "loss": 5.5718, "step": 4985 }, { "epoch": 0.1672172381990442, "grad_norm": 0.4247223901240817, "learning_rate": 2e-05, "loss": 5.595, "step": 4986 }, { "epoch": 0.16725077555127021, "grad_norm": 0.46729709143925685, "learning_rate": 2e-05, "loss": 5.6204, "step": 4987 }, { "epoch": 0.16728431290349627, "grad_norm": 0.4359559160896439, "learning_rate": 2e-05, "loss": 5.4799, "step": 4988 }, { "epoch": 0.16731785025572232, "grad_norm": 0.4387679351043983, "learning_rate": 2e-05, "loss": 5.3496, "step": 4989 }, { "epoch": 0.16735138760794835, "grad_norm": 0.45169304390816395, "learning_rate": 2e-05, "loss": 5.3163, "step": 4990 }, { "epoch": 0.1673849249601744, "grad_norm": 0.41586441228683774, "learning_rate": 2e-05, "loss": 5.5237, "step": 4991 }, { "epoch": 0.16741846231240043, "grad_norm": 0.4308928907565406, "learning_rate": 2e-05, "loss": 5.4626, "step": 4992 }, { "epoch": 0.16745199966462648, "grad_norm": 0.4231363120287688, "learning_rate": 2e-05, "loss": 5.7181, "step": 4993 }, { "epoch": 0.1674855370168525, "grad_norm": 0.4169726572486246, "learning_rate": 2e-05, "loss": 5.7085, "step": 4994 }, { "epoch": 0.16751907436907856, "grad_norm": 0.44132058266606294, "learning_rate": 2e-05, "loss": 5.4598, "step": 4995 }, { "epoch": 0.1675526117213046, "grad_norm": 0.4204643441035701, "learning_rate": 2e-05, "loss": 5.4804, "step": 4996 }, { "epoch": 0.16758614907353064, "grad_norm": 0.4238125481221141, "learning_rate": 2e-05, "loss": 5.5179, "step": 4997 }, { "epoch": 0.1676196864257567, "grad_norm": 0.4452358235135272, "learning_rate": 2e-05, "loss": 5.712, "step": 4998 }, { "epoch": 0.16765322377798272, "grad_norm": 0.4179309603242429, "learning_rate": 2e-05, "loss": 5.5807, "step": 4999 }, { "epoch": 0.16768676113020878, "grad_norm": 0.5209430734634769, "learning_rate": 2e-05, "loss": 5.3637, "step": 5000 }, { "epoch": 0.1677202984824348, "grad_norm": 0.4128241015847656, "learning_rate": 2e-05, "loss": 5.4513, "step": 5001 }, { "epoch": 0.16775383583466086, "grad_norm": 0.41911431247206526, "learning_rate": 2e-05, "loss": 5.6005, "step": 5002 }, { "epoch": 0.16778737318688688, "grad_norm": 0.467033594583373, "learning_rate": 2e-05, "loss": 5.6879, "step": 5003 }, { "epoch": 0.16782091053911294, "grad_norm": 0.4321722144503195, "learning_rate": 2e-05, "loss": 5.5181, "step": 5004 }, { "epoch": 0.167854447891339, "grad_norm": 0.40669165877342833, "learning_rate": 2e-05, "loss": 5.7567, "step": 5005 }, { "epoch": 0.16788798524356502, "grad_norm": 0.429335183097458, "learning_rate": 2e-05, "loss": 5.5639, "step": 5006 }, { "epoch": 0.16792152259579107, "grad_norm": 0.4465522408141212, "learning_rate": 2e-05, "loss": 5.6221, "step": 5007 }, { "epoch": 0.1679550599480171, "grad_norm": 0.45116281846169654, "learning_rate": 2e-05, "loss": 5.4162, "step": 5008 }, { "epoch": 0.16798859730024315, "grad_norm": 0.4137005832621591, "learning_rate": 2e-05, "loss": 5.4854, "step": 5009 }, { "epoch": 0.16802213465246918, "grad_norm": 0.42581949440028505, "learning_rate": 2e-05, "loss": 5.6798, "step": 5010 }, { "epoch": 0.16805567200469523, "grad_norm": 0.3904980025745375, "learning_rate": 2e-05, "loss": 5.5655, "step": 5011 }, { "epoch": 0.16808920935692126, "grad_norm": 0.4180277799308434, "learning_rate": 2e-05, "loss": 5.4579, "step": 5012 }, { "epoch": 0.1681227467091473, "grad_norm": 0.415985477697211, "learning_rate": 2e-05, "loss": 5.5412, "step": 5013 }, { "epoch": 0.16815628406137337, "grad_norm": 0.41183681837018, "learning_rate": 2e-05, "loss": 5.7431, "step": 5014 }, { "epoch": 0.1681898214135994, "grad_norm": 0.43415980691744827, "learning_rate": 2e-05, "loss": 5.3858, "step": 5015 }, { "epoch": 0.16822335876582545, "grad_norm": 0.44081333097412456, "learning_rate": 2e-05, "loss": 5.5776, "step": 5016 }, { "epoch": 0.16825689611805147, "grad_norm": 0.40842600658294725, "learning_rate": 2e-05, "loss": 5.6131, "step": 5017 }, { "epoch": 0.16829043347027753, "grad_norm": 0.4230254880668572, "learning_rate": 2e-05, "loss": 5.6771, "step": 5018 }, { "epoch": 0.16832397082250355, "grad_norm": 0.44393030274565254, "learning_rate": 2e-05, "loss": 5.6989, "step": 5019 }, { "epoch": 0.1683575081747296, "grad_norm": 0.42739895896728763, "learning_rate": 2e-05, "loss": 5.5599, "step": 5020 }, { "epoch": 0.16839104552695563, "grad_norm": 0.38604829948360503, "learning_rate": 2e-05, "loss": 5.5476, "step": 5021 }, { "epoch": 0.1684245828791817, "grad_norm": 0.40668086868785136, "learning_rate": 2e-05, "loss": 5.7336, "step": 5022 }, { "epoch": 0.16845812023140774, "grad_norm": 0.4366961444899454, "learning_rate": 2e-05, "loss": 5.4434, "step": 5023 }, { "epoch": 0.16849165758363377, "grad_norm": 0.417831488797243, "learning_rate": 2e-05, "loss": 5.5875, "step": 5024 }, { "epoch": 0.16852519493585982, "grad_norm": 0.4064783579303128, "learning_rate": 2e-05, "loss": 5.7226, "step": 5025 }, { "epoch": 0.16855873228808585, "grad_norm": 0.40480648324407104, "learning_rate": 2e-05, "loss": 5.5421, "step": 5026 }, { "epoch": 0.1685922696403119, "grad_norm": 0.4233673112261556, "learning_rate": 2e-05, "loss": 5.6861, "step": 5027 }, { "epoch": 0.16862580699253793, "grad_norm": 0.42219211667643797, "learning_rate": 2e-05, "loss": 5.4255, "step": 5028 }, { "epoch": 0.16865934434476398, "grad_norm": 0.41802348804102735, "learning_rate": 2e-05, "loss": 5.8488, "step": 5029 }, { "epoch": 0.16869288169699, "grad_norm": 0.4336945607314848, "learning_rate": 2e-05, "loss": 5.4069, "step": 5030 }, { "epoch": 0.16872641904921606, "grad_norm": 0.3896269602274565, "learning_rate": 2e-05, "loss": 5.7262, "step": 5031 }, { "epoch": 0.16875995640144212, "grad_norm": 0.3796821838242164, "learning_rate": 2e-05, "loss": 5.3801, "step": 5032 }, { "epoch": 0.16879349375366814, "grad_norm": 0.41610660547050704, "learning_rate": 2e-05, "loss": 5.5965, "step": 5033 }, { "epoch": 0.1688270311058942, "grad_norm": 0.4309568433521211, "learning_rate": 2e-05, "loss": 5.5337, "step": 5034 }, { "epoch": 0.16886056845812022, "grad_norm": 0.4097454561048129, "learning_rate": 2e-05, "loss": 5.6147, "step": 5035 }, { "epoch": 0.16889410581034628, "grad_norm": 0.4479235086543691, "learning_rate": 2e-05, "loss": 5.4673, "step": 5036 }, { "epoch": 0.1689276431625723, "grad_norm": 0.40970999745829884, "learning_rate": 2e-05, "loss": 5.5689, "step": 5037 }, { "epoch": 0.16896118051479836, "grad_norm": 0.3972283651554761, "learning_rate": 2e-05, "loss": 5.4276, "step": 5038 }, { "epoch": 0.16899471786702439, "grad_norm": 0.42300243978215757, "learning_rate": 2e-05, "loss": 5.4104, "step": 5039 }, { "epoch": 0.16902825521925044, "grad_norm": 0.42545741576002993, "learning_rate": 2e-05, "loss": 5.4435, "step": 5040 }, { "epoch": 0.1690617925714765, "grad_norm": 0.42143167331715176, "learning_rate": 2e-05, "loss": 5.6257, "step": 5041 }, { "epoch": 0.16909532992370252, "grad_norm": 0.4348745642097047, "learning_rate": 2e-05, "loss": 5.5425, "step": 5042 }, { "epoch": 0.16912886727592857, "grad_norm": 0.41923225514026186, "learning_rate": 2e-05, "loss": 5.3011, "step": 5043 }, { "epoch": 0.1691624046281546, "grad_norm": 0.41374839506692973, "learning_rate": 2e-05, "loss": 5.7949, "step": 5044 }, { "epoch": 0.16919594198038065, "grad_norm": 0.3819791595552991, "learning_rate": 2e-05, "loss": 5.634, "step": 5045 }, { "epoch": 0.16922947933260668, "grad_norm": 0.39678586092688206, "learning_rate": 2e-05, "loss": 5.4708, "step": 5046 }, { "epoch": 0.16926301668483273, "grad_norm": 0.3933491227151412, "learning_rate": 2e-05, "loss": 5.5311, "step": 5047 }, { "epoch": 0.16929655403705876, "grad_norm": 0.40089007077654343, "learning_rate": 2e-05, "loss": 5.4398, "step": 5048 }, { "epoch": 0.16933009138928481, "grad_norm": 0.433718443968701, "learning_rate": 2e-05, "loss": 5.5681, "step": 5049 }, { "epoch": 0.16936362874151087, "grad_norm": 0.408212095757656, "learning_rate": 2e-05, "loss": 5.7289, "step": 5050 }, { "epoch": 0.1693971660937369, "grad_norm": 0.39565009671496626, "learning_rate": 2e-05, "loss": 5.374, "step": 5051 }, { "epoch": 0.16943070344596295, "grad_norm": 0.39161489180079434, "learning_rate": 2e-05, "loss": 5.3785, "step": 5052 }, { "epoch": 0.16946424079818898, "grad_norm": 0.4370589355832138, "learning_rate": 2e-05, "loss": 5.6132, "step": 5053 }, { "epoch": 0.16949777815041503, "grad_norm": 0.4112125535029912, "learning_rate": 2e-05, "loss": 5.3613, "step": 5054 }, { "epoch": 0.16953131550264106, "grad_norm": 0.3973396771957828, "learning_rate": 2e-05, "loss": 5.5381, "step": 5055 }, { "epoch": 0.1695648528548671, "grad_norm": 0.4336839462248201, "learning_rate": 2e-05, "loss": 5.6345, "step": 5056 }, { "epoch": 0.16959839020709316, "grad_norm": 0.43347850253023, "learning_rate": 2e-05, "loss": 5.7233, "step": 5057 }, { "epoch": 0.1696319275593192, "grad_norm": 0.4472917654655125, "learning_rate": 2e-05, "loss": 5.4411, "step": 5058 }, { "epoch": 0.16966546491154524, "grad_norm": 0.4431936427960446, "learning_rate": 2e-05, "loss": 5.5231, "step": 5059 }, { "epoch": 0.16969900226377127, "grad_norm": 0.41380101448737083, "learning_rate": 2e-05, "loss": 5.4816, "step": 5060 }, { "epoch": 0.16973253961599732, "grad_norm": 0.39496969854335484, "learning_rate": 2e-05, "loss": 5.6231, "step": 5061 }, { "epoch": 0.16976607696822335, "grad_norm": 0.43186139340856766, "learning_rate": 2e-05, "loss": 5.3968, "step": 5062 }, { "epoch": 0.1697996143204494, "grad_norm": 0.412224081072169, "learning_rate": 2e-05, "loss": 5.7646, "step": 5063 }, { "epoch": 0.16983315167267543, "grad_norm": 0.3944748446338093, "learning_rate": 2e-05, "loss": 5.4919, "step": 5064 }, { "epoch": 0.16986668902490148, "grad_norm": 0.42364212518804345, "learning_rate": 2e-05, "loss": 5.6218, "step": 5065 }, { "epoch": 0.16990022637712754, "grad_norm": 0.4552178673524036, "learning_rate": 2e-05, "loss": 5.6033, "step": 5066 }, { "epoch": 0.16993376372935357, "grad_norm": 0.4279896453890546, "learning_rate": 2e-05, "loss": 5.6968, "step": 5067 }, { "epoch": 0.16996730108157962, "grad_norm": 0.4464305355230549, "learning_rate": 2e-05, "loss": 5.6897, "step": 5068 }, { "epoch": 0.17000083843380565, "grad_norm": 0.4006335281846292, "learning_rate": 2e-05, "loss": 5.5296, "step": 5069 }, { "epoch": 0.1700343757860317, "grad_norm": 0.40384930492602616, "learning_rate": 2e-05, "loss": 5.5896, "step": 5070 }, { "epoch": 0.17006791313825773, "grad_norm": 0.40214217251211654, "learning_rate": 2e-05, "loss": 5.6485, "step": 5071 }, { "epoch": 0.17010145049048378, "grad_norm": 0.4397718008861285, "learning_rate": 2e-05, "loss": 5.4428, "step": 5072 }, { "epoch": 0.1701349878427098, "grad_norm": 0.402053979868581, "learning_rate": 2e-05, "loss": 5.5194, "step": 5073 }, { "epoch": 0.17016852519493586, "grad_norm": 0.41962357125800015, "learning_rate": 2e-05, "loss": 5.5227, "step": 5074 }, { "epoch": 0.17020206254716191, "grad_norm": 0.38635602655735696, "learning_rate": 2e-05, "loss": 5.5469, "step": 5075 }, { "epoch": 0.17023559989938794, "grad_norm": 0.39934118903000254, "learning_rate": 2e-05, "loss": 5.6742, "step": 5076 }, { "epoch": 0.170269137251614, "grad_norm": 0.40982212569781906, "learning_rate": 2e-05, "loss": 5.6954, "step": 5077 }, { "epoch": 0.17030267460384002, "grad_norm": 0.3975459209094831, "learning_rate": 2e-05, "loss": 5.7538, "step": 5078 }, { "epoch": 0.17033621195606607, "grad_norm": 0.4163644124750583, "learning_rate": 2e-05, "loss": 5.3649, "step": 5079 }, { "epoch": 0.1703697493082921, "grad_norm": 0.4376280788371486, "learning_rate": 2e-05, "loss": 5.6909, "step": 5080 }, { "epoch": 0.17040328666051816, "grad_norm": 0.39681796854630497, "learning_rate": 2e-05, "loss": 5.405, "step": 5081 }, { "epoch": 0.17043682401274418, "grad_norm": 0.442239923939907, "learning_rate": 2e-05, "loss": 5.2889, "step": 5082 }, { "epoch": 0.17047036136497024, "grad_norm": 0.40376743590941355, "learning_rate": 2e-05, "loss": 5.6783, "step": 5083 }, { "epoch": 0.1705038987171963, "grad_norm": 0.4165515105622094, "learning_rate": 2e-05, "loss": 5.4295, "step": 5084 }, { "epoch": 0.17053743606942232, "grad_norm": 0.4430028172073258, "learning_rate": 2e-05, "loss": 5.5594, "step": 5085 }, { "epoch": 0.17057097342164837, "grad_norm": 0.41164478963162104, "learning_rate": 2e-05, "loss": 5.3988, "step": 5086 }, { "epoch": 0.1706045107738744, "grad_norm": 0.40881159292890085, "learning_rate": 2e-05, "loss": 5.5136, "step": 5087 }, { "epoch": 0.17063804812610045, "grad_norm": 0.40264912203283837, "learning_rate": 2e-05, "loss": 5.5813, "step": 5088 }, { "epoch": 0.17067158547832648, "grad_norm": 0.4116393527646752, "learning_rate": 2e-05, "loss": 5.5834, "step": 5089 }, { "epoch": 0.17070512283055253, "grad_norm": 0.3964872454912041, "learning_rate": 2e-05, "loss": 5.4289, "step": 5090 }, { "epoch": 0.17073866018277856, "grad_norm": 0.42672354830375403, "learning_rate": 2e-05, "loss": 5.5503, "step": 5091 }, { "epoch": 0.1707721975350046, "grad_norm": 0.39170523155907744, "learning_rate": 2e-05, "loss": 5.6847, "step": 5092 }, { "epoch": 0.17080573488723066, "grad_norm": 0.4225784134933193, "learning_rate": 2e-05, "loss": 5.6356, "step": 5093 }, { "epoch": 0.1708392722394567, "grad_norm": 0.4137810493125522, "learning_rate": 2e-05, "loss": 5.5835, "step": 5094 }, { "epoch": 0.17087280959168274, "grad_norm": 0.4430003326927575, "learning_rate": 2e-05, "loss": 5.5921, "step": 5095 }, { "epoch": 0.17090634694390877, "grad_norm": 0.4281545526210783, "learning_rate": 2e-05, "loss": 5.4867, "step": 5096 }, { "epoch": 0.17093988429613483, "grad_norm": 0.43175013680412905, "learning_rate": 2e-05, "loss": 5.4119, "step": 5097 }, { "epoch": 0.17097342164836085, "grad_norm": 0.39573717513945367, "learning_rate": 2e-05, "loss": 5.3675, "step": 5098 }, { "epoch": 0.1710069590005869, "grad_norm": 0.4313743982952581, "learning_rate": 2e-05, "loss": 5.5558, "step": 5099 }, { "epoch": 0.17104049635281293, "grad_norm": 0.4126170103148774, "learning_rate": 2e-05, "loss": 5.616, "step": 5100 }, { "epoch": 0.17107403370503899, "grad_norm": 0.43247360086019315, "learning_rate": 2e-05, "loss": 5.59, "step": 5101 }, { "epoch": 0.17110757105726504, "grad_norm": 0.3983353493869467, "learning_rate": 2e-05, "loss": 5.8567, "step": 5102 }, { "epoch": 0.17114110840949107, "grad_norm": 0.41710594396121, "learning_rate": 2e-05, "loss": 5.5381, "step": 5103 }, { "epoch": 0.17117464576171712, "grad_norm": 0.41255028435257346, "learning_rate": 2e-05, "loss": 5.3589, "step": 5104 }, { "epoch": 0.17120818311394315, "grad_norm": 0.43554950183986024, "learning_rate": 2e-05, "loss": 5.4887, "step": 5105 }, { "epoch": 0.1712417204661692, "grad_norm": 0.4370485570454445, "learning_rate": 2e-05, "loss": 5.5718, "step": 5106 }, { "epoch": 0.17127525781839523, "grad_norm": 0.405556278961604, "learning_rate": 2e-05, "loss": 5.4011, "step": 5107 }, { "epoch": 0.17130879517062128, "grad_norm": 0.41060346870714015, "learning_rate": 2e-05, "loss": 5.5603, "step": 5108 }, { "epoch": 0.17134233252284733, "grad_norm": 0.4089325084578068, "learning_rate": 2e-05, "loss": 5.5846, "step": 5109 }, { "epoch": 0.17137586987507336, "grad_norm": 0.49631111082006013, "learning_rate": 2e-05, "loss": 5.5609, "step": 5110 }, { "epoch": 0.17140940722729942, "grad_norm": 0.4282362466982983, "learning_rate": 2e-05, "loss": 5.5732, "step": 5111 }, { "epoch": 0.17144294457952544, "grad_norm": 0.3999614575407078, "learning_rate": 2e-05, "loss": 5.6724, "step": 5112 }, { "epoch": 0.1714764819317515, "grad_norm": 0.40372695627218363, "learning_rate": 2e-05, "loss": 5.4677, "step": 5113 }, { "epoch": 0.17151001928397752, "grad_norm": 0.4381607171123124, "learning_rate": 2e-05, "loss": 5.4173, "step": 5114 }, { "epoch": 0.17154355663620358, "grad_norm": 0.4359609348355989, "learning_rate": 2e-05, "loss": 5.649, "step": 5115 }, { "epoch": 0.1715770939884296, "grad_norm": 0.41994252486346406, "learning_rate": 2e-05, "loss": 5.7717, "step": 5116 }, { "epoch": 0.17161063134065566, "grad_norm": 0.4438671639431117, "learning_rate": 2e-05, "loss": 5.4787, "step": 5117 }, { "epoch": 0.1716441686928817, "grad_norm": 0.41930441330729673, "learning_rate": 2e-05, "loss": 5.4082, "step": 5118 }, { "epoch": 0.17167770604510774, "grad_norm": 0.40331363889381444, "learning_rate": 2e-05, "loss": 5.4785, "step": 5119 }, { "epoch": 0.1717112433973338, "grad_norm": 0.42659195331666155, "learning_rate": 2e-05, "loss": 5.5527, "step": 5120 }, { "epoch": 0.17174478074955982, "grad_norm": 0.41166077193876915, "learning_rate": 2e-05, "loss": 5.3975, "step": 5121 }, { "epoch": 0.17177831810178587, "grad_norm": 0.43157261473784037, "learning_rate": 2e-05, "loss": 5.4938, "step": 5122 }, { "epoch": 0.1718118554540119, "grad_norm": 0.39104186414869935, "learning_rate": 2e-05, "loss": 5.5432, "step": 5123 }, { "epoch": 0.17184539280623795, "grad_norm": 0.40348959403557483, "learning_rate": 2e-05, "loss": 5.7388, "step": 5124 }, { "epoch": 0.17187893015846398, "grad_norm": 0.4101328646203236, "learning_rate": 2e-05, "loss": 5.4539, "step": 5125 }, { "epoch": 0.17191246751069003, "grad_norm": 0.4158543225683001, "learning_rate": 2e-05, "loss": 5.452, "step": 5126 }, { "epoch": 0.17194600486291609, "grad_norm": 0.41250638090901476, "learning_rate": 2e-05, "loss": 5.5161, "step": 5127 }, { "epoch": 0.1719795422151421, "grad_norm": 0.4135290535728127, "learning_rate": 2e-05, "loss": 5.6334, "step": 5128 }, { "epoch": 0.17201307956736817, "grad_norm": 0.4436768352354965, "learning_rate": 2e-05, "loss": 5.4194, "step": 5129 }, { "epoch": 0.1720466169195942, "grad_norm": 0.4048622483481952, "learning_rate": 2e-05, "loss": 5.5781, "step": 5130 }, { "epoch": 0.17208015427182025, "grad_norm": 0.4753748515721351, "learning_rate": 2e-05, "loss": 5.5498, "step": 5131 }, { "epoch": 0.17211369162404627, "grad_norm": 0.47073753736529184, "learning_rate": 2e-05, "loss": 5.7241, "step": 5132 }, { "epoch": 0.17214722897627233, "grad_norm": 0.47843288384023225, "learning_rate": 2e-05, "loss": 5.3855, "step": 5133 }, { "epoch": 0.17218076632849835, "grad_norm": 0.4128677529793387, "learning_rate": 2e-05, "loss": 5.4791, "step": 5134 }, { "epoch": 0.1722143036807244, "grad_norm": 0.39650998088780176, "learning_rate": 2e-05, "loss": 5.5601, "step": 5135 }, { "epoch": 0.17224784103295046, "grad_norm": 0.4103098731347195, "learning_rate": 2e-05, "loss": 5.6377, "step": 5136 }, { "epoch": 0.1722813783851765, "grad_norm": 0.45276875890939394, "learning_rate": 2e-05, "loss": 5.566, "step": 5137 }, { "epoch": 0.17231491573740254, "grad_norm": 0.42029153895012444, "learning_rate": 2e-05, "loss": 5.3926, "step": 5138 }, { "epoch": 0.17234845308962857, "grad_norm": 0.41816865190268887, "learning_rate": 2e-05, "loss": 5.677, "step": 5139 }, { "epoch": 0.17238199044185462, "grad_norm": 0.43710704620126817, "learning_rate": 2e-05, "loss": 5.5177, "step": 5140 }, { "epoch": 0.17241552779408065, "grad_norm": 0.4176353938139435, "learning_rate": 2e-05, "loss": 5.3168, "step": 5141 }, { "epoch": 0.1724490651463067, "grad_norm": 0.41148932269448346, "learning_rate": 2e-05, "loss": 5.7534, "step": 5142 }, { "epoch": 0.17248260249853273, "grad_norm": 0.43010387921899973, "learning_rate": 2e-05, "loss": 5.5776, "step": 5143 }, { "epoch": 0.17251613985075878, "grad_norm": 0.38868780188023416, "learning_rate": 2e-05, "loss": 5.4558, "step": 5144 }, { "epoch": 0.17254967720298484, "grad_norm": 0.4085997532762979, "learning_rate": 2e-05, "loss": 5.5627, "step": 5145 }, { "epoch": 0.17258321455521086, "grad_norm": 0.4059215701130067, "learning_rate": 2e-05, "loss": 5.7655, "step": 5146 }, { "epoch": 0.17261675190743692, "grad_norm": 0.4377042009893007, "learning_rate": 2e-05, "loss": 5.735, "step": 5147 }, { "epoch": 0.17265028925966294, "grad_norm": 0.39007242644044543, "learning_rate": 2e-05, "loss": 5.6675, "step": 5148 }, { "epoch": 0.172683826611889, "grad_norm": 0.40104110696596007, "learning_rate": 2e-05, "loss": 5.5834, "step": 5149 }, { "epoch": 0.17271736396411502, "grad_norm": 0.38725848713963723, "learning_rate": 2e-05, "loss": 5.5892, "step": 5150 }, { "epoch": 0.17275090131634108, "grad_norm": 0.400869607746331, "learning_rate": 2e-05, "loss": 5.5419, "step": 5151 }, { "epoch": 0.1727844386685671, "grad_norm": 0.40040243589297125, "learning_rate": 2e-05, "loss": 5.3324, "step": 5152 }, { "epoch": 0.17281797602079316, "grad_norm": 0.3938263613571763, "learning_rate": 2e-05, "loss": 5.5455, "step": 5153 }, { "epoch": 0.1728515133730192, "grad_norm": 0.4360112260941231, "learning_rate": 2e-05, "loss": 5.6803, "step": 5154 }, { "epoch": 0.17288505072524524, "grad_norm": 0.414618058199769, "learning_rate": 2e-05, "loss": 5.4126, "step": 5155 }, { "epoch": 0.1729185880774713, "grad_norm": 0.43665918327275577, "learning_rate": 2e-05, "loss": 5.5724, "step": 5156 }, { "epoch": 0.17295212542969732, "grad_norm": 0.39622213721685473, "learning_rate": 2e-05, "loss": 5.6386, "step": 5157 }, { "epoch": 0.17298566278192337, "grad_norm": 0.44157618042436925, "learning_rate": 2e-05, "loss": 5.6768, "step": 5158 }, { "epoch": 0.1730192001341494, "grad_norm": 0.4171120987044399, "learning_rate": 2e-05, "loss": 5.6855, "step": 5159 }, { "epoch": 0.17305273748637545, "grad_norm": 0.39297755460554384, "learning_rate": 2e-05, "loss": 5.5606, "step": 5160 }, { "epoch": 0.1730862748386015, "grad_norm": 0.4255286232318651, "learning_rate": 2e-05, "loss": 5.5206, "step": 5161 }, { "epoch": 0.17311981219082753, "grad_norm": 0.43289324371646515, "learning_rate": 2e-05, "loss": 5.449, "step": 5162 }, { "epoch": 0.1731533495430536, "grad_norm": 0.39280685393714526, "learning_rate": 2e-05, "loss": 5.6513, "step": 5163 }, { "epoch": 0.1731868868952796, "grad_norm": 0.4305195067228228, "learning_rate": 2e-05, "loss": 5.6489, "step": 5164 }, { "epoch": 0.17322042424750567, "grad_norm": 0.4001578908697971, "learning_rate": 2e-05, "loss": 5.5606, "step": 5165 }, { "epoch": 0.1732539615997317, "grad_norm": 0.4240598683005249, "learning_rate": 2e-05, "loss": 5.4789, "step": 5166 }, { "epoch": 0.17328749895195775, "grad_norm": 0.39772385171245134, "learning_rate": 2e-05, "loss": 5.7081, "step": 5167 }, { "epoch": 0.17332103630418377, "grad_norm": 0.38786920323579793, "learning_rate": 2e-05, "loss": 5.5647, "step": 5168 }, { "epoch": 0.17335457365640983, "grad_norm": 0.4113402550900438, "learning_rate": 2e-05, "loss": 5.5395, "step": 5169 }, { "epoch": 0.17338811100863588, "grad_norm": 0.410820153521561, "learning_rate": 2e-05, "loss": 5.4899, "step": 5170 }, { "epoch": 0.1734216483608619, "grad_norm": 0.42639204588809443, "learning_rate": 2e-05, "loss": 5.5197, "step": 5171 }, { "epoch": 0.17345518571308796, "grad_norm": 0.438167866995599, "learning_rate": 2e-05, "loss": 5.7166, "step": 5172 }, { "epoch": 0.173488723065314, "grad_norm": 0.44110075703756757, "learning_rate": 2e-05, "loss": 5.5328, "step": 5173 }, { "epoch": 0.17352226041754004, "grad_norm": 0.436459532102266, "learning_rate": 2e-05, "loss": 5.5114, "step": 5174 }, { "epoch": 0.17355579776976607, "grad_norm": 0.4047942809481286, "learning_rate": 2e-05, "loss": 5.5282, "step": 5175 }, { "epoch": 0.17358933512199212, "grad_norm": 0.422785416666094, "learning_rate": 2e-05, "loss": 5.6566, "step": 5176 }, { "epoch": 0.17362287247421815, "grad_norm": 0.4531079015696689, "learning_rate": 2e-05, "loss": 5.6146, "step": 5177 }, { "epoch": 0.1736564098264442, "grad_norm": 0.42018118329509413, "learning_rate": 2e-05, "loss": 5.532, "step": 5178 }, { "epoch": 0.17368994717867026, "grad_norm": 0.3915320821812527, "learning_rate": 2e-05, "loss": 5.6305, "step": 5179 }, { "epoch": 0.17372348453089628, "grad_norm": 0.4077426304076191, "learning_rate": 2e-05, "loss": 5.4398, "step": 5180 }, { "epoch": 0.17375702188312234, "grad_norm": 0.4171967962307781, "learning_rate": 2e-05, "loss": 5.5003, "step": 5181 }, { "epoch": 0.17379055923534836, "grad_norm": 0.39871726939155716, "learning_rate": 2e-05, "loss": 5.4687, "step": 5182 }, { "epoch": 0.17382409658757442, "grad_norm": 0.4258008598385456, "learning_rate": 2e-05, "loss": 5.4827, "step": 5183 }, { "epoch": 0.17385763393980044, "grad_norm": 0.398911140188752, "learning_rate": 2e-05, "loss": 5.4999, "step": 5184 }, { "epoch": 0.1738911712920265, "grad_norm": 0.41571766144893213, "learning_rate": 2e-05, "loss": 5.6203, "step": 5185 }, { "epoch": 0.17392470864425252, "grad_norm": 0.4282517262379038, "learning_rate": 2e-05, "loss": 5.7046, "step": 5186 }, { "epoch": 0.17395824599647858, "grad_norm": 0.4022585148246725, "learning_rate": 2e-05, "loss": 5.6335, "step": 5187 }, { "epoch": 0.17399178334870463, "grad_norm": 0.4570315675738948, "learning_rate": 2e-05, "loss": 5.6439, "step": 5188 }, { "epoch": 0.17402532070093066, "grad_norm": 0.4349583316036763, "learning_rate": 2e-05, "loss": 5.4105, "step": 5189 }, { "epoch": 0.1740588580531567, "grad_norm": 0.45544946363358996, "learning_rate": 2e-05, "loss": 5.621, "step": 5190 }, { "epoch": 0.17409239540538274, "grad_norm": 0.4448044387502188, "learning_rate": 2e-05, "loss": 5.3802, "step": 5191 }, { "epoch": 0.1741259327576088, "grad_norm": 0.41271299371027115, "learning_rate": 2e-05, "loss": 5.7695, "step": 5192 }, { "epoch": 0.17415947010983482, "grad_norm": 0.40145250541917404, "learning_rate": 2e-05, "loss": 5.582, "step": 5193 }, { "epoch": 0.17419300746206087, "grad_norm": 0.39830277465852393, "learning_rate": 2e-05, "loss": 5.6988, "step": 5194 }, { "epoch": 0.1742265448142869, "grad_norm": 0.4408886296088561, "learning_rate": 2e-05, "loss": 5.4012, "step": 5195 }, { "epoch": 0.17426008216651295, "grad_norm": 0.4284267979251364, "learning_rate": 2e-05, "loss": 5.3471, "step": 5196 }, { "epoch": 0.174293619518739, "grad_norm": 0.37925530230875554, "learning_rate": 2e-05, "loss": 5.5215, "step": 5197 }, { "epoch": 0.17432715687096503, "grad_norm": 0.4319093884289378, "learning_rate": 2e-05, "loss": 5.6127, "step": 5198 }, { "epoch": 0.1743606942231911, "grad_norm": 0.43671718300245166, "learning_rate": 2e-05, "loss": 5.5355, "step": 5199 }, { "epoch": 0.1743942315754171, "grad_norm": 0.41894099695652076, "learning_rate": 2e-05, "loss": 5.571, "step": 5200 }, { "epoch": 0.17442776892764317, "grad_norm": 0.40309627758438166, "learning_rate": 2e-05, "loss": 5.6528, "step": 5201 }, { "epoch": 0.1744613062798692, "grad_norm": 0.4167222997740923, "learning_rate": 2e-05, "loss": 5.6581, "step": 5202 }, { "epoch": 0.17449484363209525, "grad_norm": 0.43769266881025, "learning_rate": 2e-05, "loss": 5.6175, "step": 5203 }, { "epoch": 0.17452838098432127, "grad_norm": 0.3919395236318222, "learning_rate": 2e-05, "loss": 5.5511, "step": 5204 }, { "epoch": 0.17456191833654733, "grad_norm": 0.4125856192780989, "learning_rate": 2e-05, "loss": 5.6768, "step": 5205 }, { "epoch": 0.17459545568877338, "grad_norm": 0.4402800343609268, "learning_rate": 2e-05, "loss": 5.6251, "step": 5206 }, { "epoch": 0.1746289930409994, "grad_norm": 0.4098376246846836, "learning_rate": 2e-05, "loss": 5.6244, "step": 5207 }, { "epoch": 0.17466253039322546, "grad_norm": 0.40878692921231746, "learning_rate": 2e-05, "loss": 5.688, "step": 5208 }, { "epoch": 0.1746960677454515, "grad_norm": 0.4424112960606882, "learning_rate": 2e-05, "loss": 5.4334, "step": 5209 }, { "epoch": 0.17472960509767754, "grad_norm": 0.4313301671055867, "learning_rate": 2e-05, "loss": 5.65, "step": 5210 }, { "epoch": 0.17476314244990357, "grad_norm": 0.4360523401303478, "learning_rate": 2e-05, "loss": 5.4511, "step": 5211 }, { "epoch": 0.17479667980212962, "grad_norm": 0.4237871102170712, "learning_rate": 2e-05, "loss": 5.4059, "step": 5212 }, { "epoch": 0.17483021715435568, "grad_norm": 0.4304807988651403, "learning_rate": 2e-05, "loss": 5.4339, "step": 5213 }, { "epoch": 0.1748637545065817, "grad_norm": 0.4053151413554958, "learning_rate": 2e-05, "loss": 5.4527, "step": 5214 }, { "epoch": 0.17489729185880776, "grad_norm": 0.3999291740137288, "learning_rate": 2e-05, "loss": 5.6984, "step": 5215 }, { "epoch": 0.17493082921103378, "grad_norm": 0.43136304639089446, "learning_rate": 2e-05, "loss": 5.5285, "step": 5216 }, { "epoch": 0.17496436656325984, "grad_norm": 0.4474782533809687, "learning_rate": 2e-05, "loss": 5.6684, "step": 5217 }, { "epoch": 0.17499790391548586, "grad_norm": 0.3928395415056043, "learning_rate": 2e-05, "loss": 5.5442, "step": 5218 }, { "epoch": 0.17503144126771192, "grad_norm": 0.4459834443868448, "learning_rate": 2e-05, "loss": 5.4792, "step": 5219 }, { "epoch": 0.17506497861993794, "grad_norm": 0.4400917624608096, "learning_rate": 2e-05, "loss": 5.4629, "step": 5220 }, { "epoch": 0.175098515972164, "grad_norm": 0.42799011427620104, "learning_rate": 2e-05, "loss": 5.6594, "step": 5221 }, { "epoch": 0.17513205332439005, "grad_norm": 0.44251752089155655, "learning_rate": 2e-05, "loss": 5.558, "step": 5222 }, { "epoch": 0.17516559067661608, "grad_norm": 0.4248219586728939, "learning_rate": 2e-05, "loss": 5.4947, "step": 5223 }, { "epoch": 0.17519912802884213, "grad_norm": 0.42641091764459005, "learning_rate": 2e-05, "loss": 5.4384, "step": 5224 }, { "epoch": 0.17523266538106816, "grad_norm": 0.3976373340828291, "learning_rate": 2e-05, "loss": 5.5623, "step": 5225 }, { "epoch": 0.1752662027332942, "grad_norm": 0.42612594501321893, "learning_rate": 2e-05, "loss": 5.4073, "step": 5226 }, { "epoch": 0.17529974008552024, "grad_norm": 0.38913216715120585, "learning_rate": 2e-05, "loss": 5.3161, "step": 5227 }, { "epoch": 0.1753332774377463, "grad_norm": 0.4025873194931091, "learning_rate": 2e-05, "loss": 5.5428, "step": 5228 }, { "epoch": 0.17536681478997232, "grad_norm": 0.4441084504134053, "learning_rate": 2e-05, "loss": 5.5965, "step": 5229 }, { "epoch": 0.17540035214219837, "grad_norm": 0.4044328644815498, "learning_rate": 2e-05, "loss": 5.5079, "step": 5230 }, { "epoch": 0.17543388949442443, "grad_norm": 0.3969468637261114, "learning_rate": 2e-05, "loss": 5.3151, "step": 5231 }, { "epoch": 0.17546742684665045, "grad_norm": 0.42105354431149167, "learning_rate": 2e-05, "loss": 5.5694, "step": 5232 }, { "epoch": 0.1755009641988765, "grad_norm": 0.40197605165852734, "learning_rate": 2e-05, "loss": 5.5241, "step": 5233 }, { "epoch": 0.17553450155110253, "grad_norm": 0.4259113632559202, "learning_rate": 2e-05, "loss": 5.5041, "step": 5234 }, { "epoch": 0.1755680389033286, "grad_norm": 0.40658285769527286, "learning_rate": 2e-05, "loss": 5.4389, "step": 5235 }, { "epoch": 0.17560157625555461, "grad_norm": 0.3956653524247334, "learning_rate": 2e-05, "loss": 5.5691, "step": 5236 }, { "epoch": 0.17563511360778067, "grad_norm": 0.41873219602078837, "learning_rate": 2e-05, "loss": 5.4499, "step": 5237 }, { "epoch": 0.1756686509600067, "grad_norm": 0.4088729273373238, "learning_rate": 2e-05, "loss": 5.5491, "step": 5238 }, { "epoch": 0.17570218831223275, "grad_norm": 0.41687486756673925, "learning_rate": 2e-05, "loss": 5.6065, "step": 5239 }, { "epoch": 0.1757357256644588, "grad_norm": 0.4006361168865449, "learning_rate": 2e-05, "loss": 5.4538, "step": 5240 }, { "epoch": 0.17576926301668483, "grad_norm": 0.4287113625874941, "learning_rate": 2e-05, "loss": 5.5446, "step": 5241 }, { "epoch": 0.17580280036891088, "grad_norm": 0.4325505637363663, "learning_rate": 2e-05, "loss": 5.7628, "step": 5242 }, { "epoch": 0.1758363377211369, "grad_norm": 0.3898716515204915, "learning_rate": 2e-05, "loss": 5.4198, "step": 5243 }, { "epoch": 0.17586987507336296, "grad_norm": 0.407591145861884, "learning_rate": 2e-05, "loss": 5.55, "step": 5244 }, { "epoch": 0.175903412425589, "grad_norm": 0.43244934954953157, "learning_rate": 2e-05, "loss": 5.6554, "step": 5245 }, { "epoch": 0.17593694977781504, "grad_norm": 0.4244747510682507, "learning_rate": 2e-05, "loss": 5.4825, "step": 5246 }, { "epoch": 0.17597048713004107, "grad_norm": 0.42786450948003535, "learning_rate": 2e-05, "loss": 5.4313, "step": 5247 }, { "epoch": 0.17600402448226712, "grad_norm": 0.41341664524802046, "learning_rate": 2e-05, "loss": 5.6837, "step": 5248 }, { "epoch": 0.17603756183449318, "grad_norm": 0.40839991672195874, "learning_rate": 2e-05, "loss": 5.5697, "step": 5249 }, { "epoch": 0.1760710991867192, "grad_norm": 0.45872573216513, "learning_rate": 2e-05, "loss": 5.646, "step": 5250 }, { "epoch": 0.17610463653894526, "grad_norm": 0.41010874755149773, "learning_rate": 2e-05, "loss": 5.577, "step": 5251 }, { "epoch": 0.17613817389117128, "grad_norm": 0.425602047198378, "learning_rate": 2e-05, "loss": 5.6124, "step": 5252 }, { "epoch": 0.17617171124339734, "grad_norm": 0.39945705975894064, "learning_rate": 2e-05, "loss": 5.5108, "step": 5253 }, { "epoch": 0.17620524859562336, "grad_norm": 0.38048354238962556, "learning_rate": 2e-05, "loss": 5.5197, "step": 5254 }, { "epoch": 0.17623878594784942, "grad_norm": 0.4404682424329345, "learning_rate": 2e-05, "loss": 5.5113, "step": 5255 }, { "epoch": 0.17627232330007545, "grad_norm": 0.40986032213460233, "learning_rate": 2e-05, "loss": 5.7617, "step": 5256 }, { "epoch": 0.1763058606523015, "grad_norm": 0.386176846642309, "learning_rate": 2e-05, "loss": 5.5296, "step": 5257 }, { "epoch": 0.17633939800452755, "grad_norm": 0.4058586546283516, "learning_rate": 2e-05, "loss": 5.3617, "step": 5258 }, { "epoch": 0.17637293535675358, "grad_norm": 0.4025865737316668, "learning_rate": 2e-05, "loss": 5.546, "step": 5259 }, { "epoch": 0.17640647270897963, "grad_norm": 0.39418617020992325, "learning_rate": 2e-05, "loss": 5.724, "step": 5260 }, { "epoch": 0.17644001006120566, "grad_norm": 0.4111850853290083, "learning_rate": 2e-05, "loss": 5.7072, "step": 5261 }, { "epoch": 0.17647354741343171, "grad_norm": 0.40391217483846614, "learning_rate": 2e-05, "loss": 5.6849, "step": 5262 }, { "epoch": 0.17650708476565774, "grad_norm": 0.4010671979920518, "learning_rate": 2e-05, "loss": 5.4757, "step": 5263 }, { "epoch": 0.1765406221178838, "grad_norm": 0.3988454345010779, "learning_rate": 2e-05, "loss": 5.5224, "step": 5264 }, { "epoch": 0.17657415947010985, "grad_norm": 0.41741212516728354, "learning_rate": 2e-05, "loss": 5.7105, "step": 5265 }, { "epoch": 0.17660769682233587, "grad_norm": 0.4142718550849861, "learning_rate": 2e-05, "loss": 5.5674, "step": 5266 }, { "epoch": 0.17664123417456193, "grad_norm": 0.4123473591419538, "learning_rate": 2e-05, "loss": 5.6034, "step": 5267 }, { "epoch": 0.17667477152678795, "grad_norm": 0.4161147123386814, "learning_rate": 2e-05, "loss": 5.7051, "step": 5268 }, { "epoch": 0.176708308879014, "grad_norm": 0.43625199148634486, "learning_rate": 2e-05, "loss": 5.5623, "step": 5269 }, { "epoch": 0.17674184623124004, "grad_norm": 0.40046171226800326, "learning_rate": 2e-05, "loss": 5.423, "step": 5270 }, { "epoch": 0.1767753835834661, "grad_norm": 0.39992106718361253, "learning_rate": 2e-05, "loss": 5.4562, "step": 5271 }, { "epoch": 0.17680892093569212, "grad_norm": 0.4438409182292412, "learning_rate": 2e-05, "loss": 5.5871, "step": 5272 }, { "epoch": 0.17684245828791817, "grad_norm": 0.4382811157061352, "learning_rate": 2e-05, "loss": 5.2667, "step": 5273 }, { "epoch": 0.17687599564014422, "grad_norm": 0.44407855057412104, "learning_rate": 2e-05, "loss": 5.4381, "step": 5274 }, { "epoch": 0.17690953299237025, "grad_norm": 0.4258822681969254, "learning_rate": 2e-05, "loss": 5.3524, "step": 5275 }, { "epoch": 0.1769430703445963, "grad_norm": 0.4367839619775217, "learning_rate": 2e-05, "loss": 5.5281, "step": 5276 }, { "epoch": 0.17697660769682233, "grad_norm": 0.3954384900295963, "learning_rate": 2e-05, "loss": 5.4699, "step": 5277 }, { "epoch": 0.17701014504904838, "grad_norm": 0.4145875461031989, "learning_rate": 2e-05, "loss": 5.4882, "step": 5278 }, { "epoch": 0.1770436824012744, "grad_norm": 0.4522629343070089, "learning_rate": 2e-05, "loss": 5.4952, "step": 5279 }, { "epoch": 0.17707721975350046, "grad_norm": 0.44663825876058083, "learning_rate": 2e-05, "loss": 5.5596, "step": 5280 }, { "epoch": 0.1771107571057265, "grad_norm": 0.4018408651295552, "learning_rate": 2e-05, "loss": 5.6935, "step": 5281 }, { "epoch": 0.17714429445795254, "grad_norm": 0.4309860996650601, "learning_rate": 2e-05, "loss": 5.3738, "step": 5282 }, { "epoch": 0.1771778318101786, "grad_norm": 0.41716732618554947, "learning_rate": 2e-05, "loss": 5.5909, "step": 5283 }, { "epoch": 0.17721136916240462, "grad_norm": 0.43458448832019836, "learning_rate": 2e-05, "loss": 5.5036, "step": 5284 }, { "epoch": 0.17724490651463068, "grad_norm": 0.451610287244253, "learning_rate": 2e-05, "loss": 5.6896, "step": 5285 }, { "epoch": 0.1772784438668567, "grad_norm": 0.4070036334126495, "learning_rate": 2e-05, "loss": 5.5517, "step": 5286 }, { "epoch": 0.17731198121908276, "grad_norm": 0.41260191878603364, "learning_rate": 2e-05, "loss": 5.601, "step": 5287 }, { "epoch": 0.17734551857130879, "grad_norm": 0.42242872535616144, "learning_rate": 2e-05, "loss": 5.7916, "step": 5288 }, { "epoch": 0.17737905592353484, "grad_norm": 0.392414232268558, "learning_rate": 2e-05, "loss": 5.5996, "step": 5289 }, { "epoch": 0.17741259327576087, "grad_norm": 0.40092250757923237, "learning_rate": 2e-05, "loss": 5.5641, "step": 5290 }, { "epoch": 0.17744613062798692, "grad_norm": 0.42008801957648495, "learning_rate": 2e-05, "loss": 5.7542, "step": 5291 }, { "epoch": 0.17747966798021297, "grad_norm": 0.40990590876527977, "learning_rate": 2e-05, "loss": 5.6127, "step": 5292 }, { "epoch": 0.177513205332439, "grad_norm": 0.40855139281237585, "learning_rate": 2e-05, "loss": 5.4857, "step": 5293 }, { "epoch": 0.17754674268466505, "grad_norm": 0.4079302861578214, "learning_rate": 2e-05, "loss": 5.5812, "step": 5294 }, { "epoch": 0.17758028003689108, "grad_norm": 0.40792799600643886, "learning_rate": 2e-05, "loss": 5.5115, "step": 5295 }, { "epoch": 0.17761381738911713, "grad_norm": 0.42977277053022117, "learning_rate": 2e-05, "loss": 5.5091, "step": 5296 }, { "epoch": 0.17764735474134316, "grad_norm": 0.42041507391067406, "learning_rate": 2e-05, "loss": 5.5954, "step": 5297 }, { "epoch": 0.17768089209356921, "grad_norm": 0.41036539381245335, "learning_rate": 2e-05, "loss": 5.8408, "step": 5298 }, { "epoch": 0.17771442944579524, "grad_norm": 0.40065068023027695, "learning_rate": 2e-05, "loss": 5.8544, "step": 5299 }, { "epoch": 0.1777479667980213, "grad_norm": 0.3894595237752155, "learning_rate": 2e-05, "loss": 5.5334, "step": 5300 }, { "epoch": 0.17778150415024735, "grad_norm": 0.4183560750304181, "learning_rate": 2e-05, "loss": 5.6393, "step": 5301 }, { "epoch": 0.17781504150247338, "grad_norm": 0.39536732191369717, "learning_rate": 2e-05, "loss": 5.4263, "step": 5302 }, { "epoch": 0.17784857885469943, "grad_norm": 0.4310189258077339, "learning_rate": 2e-05, "loss": 5.4041, "step": 5303 }, { "epoch": 0.17788211620692546, "grad_norm": 0.39752953445395056, "learning_rate": 2e-05, "loss": 5.5064, "step": 5304 }, { "epoch": 0.1779156535591515, "grad_norm": 0.4012478304591894, "learning_rate": 2e-05, "loss": 5.4882, "step": 5305 }, { "epoch": 0.17794919091137754, "grad_norm": 0.4155491011381988, "learning_rate": 2e-05, "loss": 5.7142, "step": 5306 }, { "epoch": 0.1779827282636036, "grad_norm": 0.43043168136413545, "learning_rate": 2e-05, "loss": 5.5598, "step": 5307 }, { "epoch": 0.17801626561582962, "grad_norm": 0.41043775104758323, "learning_rate": 2e-05, "loss": 5.6092, "step": 5308 }, { "epoch": 0.17804980296805567, "grad_norm": 0.4160247248343745, "learning_rate": 2e-05, "loss": 5.5743, "step": 5309 }, { "epoch": 0.17808334032028172, "grad_norm": 0.3948430439645983, "learning_rate": 2e-05, "loss": 5.3633, "step": 5310 }, { "epoch": 0.17811687767250775, "grad_norm": 0.42854101547259504, "learning_rate": 2e-05, "loss": 5.5113, "step": 5311 }, { "epoch": 0.1781504150247338, "grad_norm": 0.4085601950265618, "learning_rate": 2e-05, "loss": 5.4967, "step": 5312 }, { "epoch": 0.17818395237695983, "grad_norm": 0.3992695104048675, "learning_rate": 2e-05, "loss": 5.7457, "step": 5313 }, { "epoch": 0.17821748972918589, "grad_norm": 0.4180923533445975, "learning_rate": 2e-05, "loss": 5.7058, "step": 5314 }, { "epoch": 0.1782510270814119, "grad_norm": 0.40615429984843704, "learning_rate": 2e-05, "loss": 5.6345, "step": 5315 }, { "epoch": 0.17828456443363797, "grad_norm": 0.40601907437923584, "learning_rate": 2e-05, "loss": 5.4486, "step": 5316 }, { "epoch": 0.17831810178586402, "grad_norm": 0.4248025261415373, "learning_rate": 2e-05, "loss": 5.6087, "step": 5317 }, { "epoch": 0.17835163913809005, "grad_norm": 0.38910358821179175, "learning_rate": 2e-05, "loss": 5.6028, "step": 5318 }, { "epoch": 0.1783851764903161, "grad_norm": 0.4321902330517999, "learning_rate": 2e-05, "loss": 5.3893, "step": 5319 }, { "epoch": 0.17841871384254213, "grad_norm": 0.4236514293689228, "learning_rate": 2e-05, "loss": 5.7507, "step": 5320 }, { "epoch": 0.17845225119476818, "grad_norm": 0.43050855973554075, "learning_rate": 2e-05, "loss": 5.587, "step": 5321 }, { "epoch": 0.1784857885469942, "grad_norm": 0.42514481451458325, "learning_rate": 2e-05, "loss": 5.4268, "step": 5322 }, { "epoch": 0.17851932589922026, "grad_norm": 0.41119012955404943, "learning_rate": 2e-05, "loss": 5.7767, "step": 5323 }, { "epoch": 0.1785528632514463, "grad_norm": 0.4042892761728731, "learning_rate": 2e-05, "loss": 5.5661, "step": 5324 }, { "epoch": 0.17858640060367234, "grad_norm": 0.41823365302714627, "learning_rate": 2e-05, "loss": 5.4889, "step": 5325 }, { "epoch": 0.1786199379558984, "grad_norm": 0.405524712917198, "learning_rate": 2e-05, "loss": 5.7511, "step": 5326 }, { "epoch": 0.17865347530812442, "grad_norm": 0.4268621160393368, "learning_rate": 2e-05, "loss": 5.5894, "step": 5327 }, { "epoch": 0.17868701266035047, "grad_norm": 0.42110879987416167, "learning_rate": 2e-05, "loss": 5.6023, "step": 5328 }, { "epoch": 0.1787205500125765, "grad_norm": 0.3967308309810518, "learning_rate": 2e-05, "loss": 5.4853, "step": 5329 }, { "epoch": 0.17875408736480256, "grad_norm": 0.41540897847898633, "learning_rate": 2e-05, "loss": 5.741, "step": 5330 }, { "epoch": 0.17878762471702858, "grad_norm": 0.4262264022828408, "learning_rate": 2e-05, "loss": 5.6626, "step": 5331 }, { "epoch": 0.17882116206925464, "grad_norm": 0.4328272035459623, "learning_rate": 2e-05, "loss": 5.4863, "step": 5332 }, { "epoch": 0.17885469942148066, "grad_norm": 0.43002465531224326, "learning_rate": 2e-05, "loss": 5.667, "step": 5333 }, { "epoch": 0.17888823677370672, "grad_norm": 0.4205581630682232, "learning_rate": 2e-05, "loss": 5.5491, "step": 5334 }, { "epoch": 0.17892177412593277, "grad_norm": 0.4180521539437721, "learning_rate": 2e-05, "loss": 5.5157, "step": 5335 }, { "epoch": 0.1789553114781588, "grad_norm": 0.42717997647828265, "learning_rate": 2e-05, "loss": 5.6007, "step": 5336 }, { "epoch": 0.17898884883038485, "grad_norm": 0.4264593734664647, "learning_rate": 2e-05, "loss": 5.6413, "step": 5337 }, { "epoch": 0.17902238618261088, "grad_norm": 0.4534177654368162, "learning_rate": 2e-05, "loss": 5.5939, "step": 5338 }, { "epoch": 0.17905592353483693, "grad_norm": 0.41387701777956254, "learning_rate": 2e-05, "loss": 5.6499, "step": 5339 }, { "epoch": 0.17908946088706296, "grad_norm": 0.4059285970635459, "learning_rate": 2e-05, "loss": 5.3854, "step": 5340 }, { "epoch": 0.179122998239289, "grad_norm": 0.4282477386507804, "learning_rate": 2e-05, "loss": 5.3561, "step": 5341 }, { "epoch": 0.17915653559151504, "grad_norm": 0.47656359371837864, "learning_rate": 2e-05, "loss": 5.5637, "step": 5342 }, { "epoch": 0.1791900729437411, "grad_norm": 0.4297983238187769, "learning_rate": 2e-05, "loss": 5.5819, "step": 5343 }, { "epoch": 0.17922361029596715, "grad_norm": 0.41811627219105185, "learning_rate": 2e-05, "loss": 5.5125, "step": 5344 }, { "epoch": 0.17925714764819317, "grad_norm": 0.42978561000739607, "learning_rate": 2e-05, "loss": 5.5829, "step": 5345 }, { "epoch": 0.17929068500041923, "grad_norm": 0.4247922785730071, "learning_rate": 2e-05, "loss": 5.3886, "step": 5346 }, { "epoch": 0.17932422235264525, "grad_norm": 0.4251825886040901, "learning_rate": 2e-05, "loss": 5.6988, "step": 5347 }, { "epoch": 0.1793577597048713, "grad_norm": 0.4088202774671188, "learning_rate": 2e-05, "loss": 5.7661, "step": 5348 }, { "epoch": 0.17939129705709733, "grad_norm": 0.4340287168062515, "learning_rate": 2e-05, "loss": 5.5586, "step": 5349 }, { "epoch": 0.17942483440932339, "grad_norm": 0.39703047745367526, "learning_rate": 2e-05, "loss": 5.4994, "step": 5350 }, { "epoch": 0.1794583717615494, "grad_norm": 0.43034301657942603, "learning_rate": 2e-05, "loss": 5.622, "step": 5351 }, { "epoch": 0.17949190911377547, "grad_norm": 0.41269861720337003, "learning_rate": 2e-05, "loss": 5.4171, "step": 5352 }, { "epoch": 0.17952544646600152, "grad_norm": 0.4027823201244567, "learning_rate": 2e-05, "loss": 5.5644, "step": 5353 }, { "epoch": 0.17955898381822755, "grad_norm": 0.4006310602772299, "learning_rate": 2e-05, "loss": 5.6908, "step": 5354 }, { "epoch": 0.1795925211704536, "grad_norm": 0.4245427561678106, "learning_rate": 2e-05, "loss": 5.6125, "step": 5355 }, { "epoch": 0.17962605852267963, "grad_norm": 0.4047002815885512, "learning_rate": 2e-05, "loss": 5.6477, "step": 5356 }, { "epoch": 0.17965959587490568, "grad_norm": 0.3875167739458271, "learning_rate": 2e-05, "loss": 5.6516, "step": 5357 }, { "epoch": 0.1796931332271317, "grad_norm": 0.42882842430170126, "learning_rate": 2e-05, "loss": 5.6573, "step": 5358 }, { "epoch": 0.17972667057935776, "grad_norm": 0.3861049637607319, "learning_rate": 2e-05, "loss": 5.4189, "step": 5359 }, { "epoch": 0.17976020793158382, "grad_norm": 0.40097012991620395, "learning_rate": 2e-05, "loss": 5.5662, "step": 5360 }, { "epoch": 0.17979374528380984, "grad_norm": 0.40475587822015346, "learning_rate": 2e-05, "loss": 5.4686, "step": 5361 }, { "epoch": 0.1798272826360359, "grad_norm": 0.3860012810888155, "learning_rate": 2e-05, "loss": 5.4851, "step": 5362 }, { "epoch": 0.17986081998826192, "grad_norm": 0.41253048763850503, "learning_rate": 2e-05, "loss": 5.6005, "step": 5363 }, { "epoch": 0.17989435734048798, "grad_norm": 0.3952698811836869, "learning_rate": 2e-05, "loss": 5.5914, "step": 5364 }, { "epoch": 0.179927894692714, "grad_norm": 0.41207580791315357, "learning_rate": 2e-05, "loss": 5.6476, "step": 5365 }, { "epoch": 0.17996143204494006, "grad_norm": 0.39683758076036885, "learning_rate": 2e-05, "loss": 5.678, "step": 5366 }, { "epoch": 0.17999496939716608, "grad_norm": 0.3978615220440711, "learning_rate": 2e-05, "loss": 5.5761, "step": 5367 }, { "epoch": 0.18002850674939214, "grad_norm": 0.41181017465240943, "learning_rate": 2e-05, "loss": 5.4423, "step": 5368 }, { "epoch": 0.1800620441016182, "grad_norm": 0.4400074874085895, "learning_rate": 2e-05, "loss": 5.5626, "step": 5369 }, { "epoch": 0.18009558145384422, "grad_norm": 0.3996108721972904, "learning_rate": 2e-05, "loss": 5.5617, "step": 5370 }, { "epoch": 0.18012911880607027, "grad_norm": 0.42913314614570414, "learning_rate": 2e-05, "loss": 5.7218, "step": 5371 }, { "epoch": 0.1801626561582963, "grad_norm": 0.3988459088135979, "learning_rate": 2e-05, "loss": 5.5874, "step": 5372 }, { "epoch": 0.18019619351052235, "grad_norm": 0.4134305454606478, "learning_rate": 2e-05, "loss": 5.6088, "step": 5373 }, { "epoch": 0.18022973086274838, "grad_norm": 0.4182926498206982, "learning_rate": 2e-05, "loss": 5.5141, "step": 5374 }, { "epoch": 0.18026326821497443, "grad_norm": 0.386515451379899, "learning_rate": 2e-05, "loss": 5.4921, "step": 5375 }, { "epoch": 0.18029680556720046, "grad_norm": 0.3943820345924363, "learning_rate": 2e-05, "loss": 5.4591, "step": 5376 }, { "epoch": 0.1803303429194265, "grad_norm": 0.42035178180524396, "learning_rate": 2e-05, "loss": 5.4915, "step": 5377 }, { "epoch": 0.18036388027165257, "grad_norm": 0.4064779228132342, "learning_rate": 2e-05, "loss": 5.5749, "step": 5378 }, { "epoch": 0.1803974176238786, "grad_norm": 0.4135424821699251, "learning_rate": 2e-05, "loss": 5.7429, "step": 5379 }, { "epoch": 0.18043095497610465, "grad_norm": 0.4460915993248404, "learning_rate": 2e-05, "loss": 5.6153, "step": 5380 }, { "epoch": 0.18046449232833067, "grad_norm": 0.3998292550887475, "learning_rate": 2e-05, "loss": 5.5543, "step": 5381 }, { "epoch": 0.18049802968055673, "grad_norm": 0.41704854708839817, "learning_rate": 2e-05, "loss": 5.4852, "step": 5382 }, { "epoch": 0.18053156703278275, "grad_norm": 0.41956377099202363, "learning_rate": 2e-05, "loss": 5.6247, "step": 5383 }, { "epoch": 0.1805651043850088, "grad_norm": 0.45806332615928586, "learning_rate": 2e-05, "loss": 5.5517, "step": 5384 }, { "epoch": 0.18059864173723483, "grad_norm": 0.45007563509907805, "learning_rate": 2e-05, "loss": 5.5449, "step": 5385 }, { "epoch": 0.1806321790894609, "grad_norm": 0.44376502020189457, "learning_rate": 2e-05, "loss": 5.6619, "step": 5386 }, { "epoch": 0.18066571644168694, "grad_norm": 0.4330844979250415, "learning_rate": 2e-05, "loss": 5.7039, "step": 5387 }, { "epoch": 0.18069925379391297, "grad_norm": 0.43646542464842614, "learning_rate": 2e-05, "loss": 5.4554, "step": 5388 }, { "epoch": 0.18073279114613902, "grad_norm": 0.41855026846639315, "learning_rate": 2e-05, "loss": 5.8476, "step": 5389 }, { "epoch": 0.18076632849836505, "grad_norm": 0.40707049149487756, "learning_rate": 2e-05, "loss": 5.558, "step": 5390 }, { "epoch": 0.1807998658505911, "grad_norm": 0.4286458788828664, "learning_rate": 2e-05, "loss": 5.5788, "step": 5391 }, { "epoch": 0.18083340320281713, "grad_norm": 0.4540676307294657, "learning_rate": 2e-05, "loss": 5.5973, "step": 5392 }, { "epoch": 0.18086694055504318, "grad_norm": 0.42289674355691353, "learning_rate": 2e-05, "loss": 5.6339, "step": 5393 }, { "epoch": 0.1809004779072692, "grad_norm": 0.40628909073967634, "learning_rate": 2e-05, "loss": 5.6263, "step": 5394 }, { "epoch": 0.18093401525949526, "grad_norm": 0.4222889492832027, "learning_rate": 2e-05, "loss": 5.674, "step": 5395 }, { "epoch": 0.18096755261172132, "grad_norm": 0.4135421919596199, "learning_rate": 2e-05, "loss": 5.541, "step": 5396 }, { "epoch": 0.18100108996394734, "grad_norm": 0.40242790280525037, "learning_rate": 2e-05, "loss": 5.4742, "step": 5397 }, { "epoch": 0.1810346273161734, "grad_norm": 0.4567568780819571, "learning_rate": 2e-05, "loss": 5.4313, "step": 5398 }, { "epoch": 0.18106816466839942, "grad_norm": 0.39120425638083806, "learning_rate": 2e-05, "loss": 5.5144, "step": 5399 }, { "epoch": 0.18110170202062548, "grad_norm": 0.43961087350792055, "learning_rate": 2e-05, "loss": 5.5966, "step": 5400 }, { "epoch": 0.1811352393728515, "grad_norm": 0.41402145967752224, "learning_rate": 2e-05, "loss": 5.7053, "step": 5401 }, { "epoch": 0.18116877672507756, "grad_norm": 0.4065229711064384, "learning_rate": 2e-05, "loss": 5.4517, "step": 5402 }, { "epoch": 0.18120231407730358, "grad_norm": 0.4557562758554575, "learning_rate": 2e-05, "loss": 5.4194, "step": 5403 }, { "epoch": 0.18123585142952964, "grad_norm": 0.4595500257664139, "learning_rate": 2e-05, "loss": 5.7103, "step": 5404 }, { "epoch": 0.1812693887817557, "grad_norm": 0.39628224261084083, "learning_rate": 2e-05, "loss": 5.2747, "step": 5405 }, { "epoch": 0.18130292613398172, "grad_norm": 0.4021166892699999, "learning_rate": 2e-05, "loss": 5.4245, "step": 5406 }, { "epoch": 0.18133646348620777, "grad_norm": 0.44575842566246227, "learning_rate": 2e-05, "loss": 5.5395, "step": 5407 }, { "epoch": 0.1813700008384338, "grad_norm": 0.41740065394917375, "learning_rate": 2e-05, "loss": 5.5813, "step": 5408 }, { "epoch": 0.18140353819065985, "grad_norm": 0.43383709282279587, "learning_rate": 2e-05, "loss": 5.3005, "step": 5409 }, { "epoch": 0.18143707554288588, "grad_norm": 0.4282270065337663, "learning_rate": 2e-05, "loss": 5.4579, "step": 5410 }, { "epoch": 0.18147061289511193, "grad_norm": 0.4260347059986194, "learning_rate": 2e-05, "loss": 5.6813, "step": 5411 }, { "epoch": 0.181504150247338, "grad_norm": 0.4223595605872885, "learning_rate": 2e-05, "loss": 5.6379, "step": 5412 }, { "epoch": 0.181537687599564, "grad_norm": 0.4172939780290584, "learning_rate": 2e-05, "loss": 5.5001, "step": 5413 }, { "epoch": 0.18157122495179007, "grad_norm": 0.42955926652775134, "learning_rate": 2e-05, "loss": 5.6052, "step": 5414 }, { "epoch": 0.1816047623040161, "grad_norm": 0.4109587538424818, "learning_rate": 2e-05, "loss": 5.6832, "step": 5415 }, { "epoch": 0.18163829965624215, "grad_norm": 0.4510350259451175, "learning_rate": 2e-05, "loss": 5.6676, "step": 5416 }, { "epoch": 0.18167183700846817, "grad_norm": 0.40469878674057336, "learning_rate": 2e-05, "loss": 5.6394, "step": 5417 }, { "epoch": 0.18170537436069423, "grad_norm": 0.4022761444086318, "learning_rate": 2e-05, "loss": 5.5754, "step": 5418 }, { "epoch": 0.18173891171292025, "grad_norm": 0.42269891885355737, "learning_rate": 2e-05, "loss": 5.5473, "step": 5419 }, { "epoch": 0.1817724490651463, "grad_norm": 0.43950798811233355, "learning_rate": 2e-05, "loss": 5.5545, "step": 5420 }, { "epoch": 0.18180598641737236, "grad_norm": 0.4149903151626163, "learning_rate": 2e-05, "loss": 5.5894, "step": 5421 }, { "epoch": 0.1818395237695984, "grad_norm": 0.4081057568923133, "learning_rate": 2e-05, "loss": 5.5576, "step": 5422 }, { "epoch": 0.18187306112182444, "grad_norm": 0.4252403480548328, "learning_rate": 2e-05, "loss": 5.5163, "step": 5423 }, { "epoch": 0.18190659847405047, "grad_norm": 0.40789590497428746, "learning_rate": 2e-05, "loss": 5.4721, "step": 5424 }, { "epoch": 0.18194013582627652, "grad_norm": 0.40678753757491326, "learning_rate": 2e-05, "loss": 5.4593, "step": 5425 }, { "epoch": 0.18197367317850255, "grad_norm": 0.4121099315825258, "learning_rate": 2e-05, "loss": 5.6775, "step": 5426 }, { "epoch": 0.1820072105307286, "grad_norm": 0.4106368408255001, "learning_rate": 2e-05, "loss": 5.7326, "step": 5427 }, { "epoch": 0.18204074788295463, "grad_norm": 0.42642381707108, "learning_rate": 2e-05, "loss": 5.6827, "step": 5428 }, { "epoch": 0.18207428523518068, "grad_norm": 0.39161047596669785, "learning_rate": 2e-05, "loss": 5.6365, "step": 5429 }, { "epoch": 0.18210782258740674, "grad_norm": 0.41147677996466625, "learning_rate": 2e-05, "loss": 5.6303, "step": 5430 }, { "epoch": 0.18214135993963276, "grad_norm": 0.41963836014241906, "learning_rate": 2e-05, "loss": 5.6077, "step": 5431 }, { "epoch": 0.18217489729185882, "grad_norm": 0.4402537224319732, "learning_rate": 2e-05, "loss": 5.5548, "step": 5432 }, { "epoch": 0.18220843464408484, "grad_norm": 0.4250760728791656, "learning_rate": 2e-05, "loss": 5.4786, "step": 5433 }, { "epoch": 0.1822419719963109, "grad_norm": 0.3951863419072836, "learning_rate": 2e-05, "loss": 5.4884, "step": 5434 }, { "epoch": 0.18227550934853692, "grad_norm": 0.4397312874338281, "learning_rate": 2e-05, "loss": 5.5074, "step": 5435 }, { "epoch": 0.18230904670076298, "grad_norm": 0.42720950067926694, "learning_rate": 2e-05, "loss": 5.5998, "step": 5436 }, { "epoch": 0.182342584052989, "grad_norm": 0.4082754545627108, "learning_rate": 2e-05, "loss": 5.4313, "step": 5437 }, { "epoch": 0.18237612140521506, "grad_norm": 0.4231026085790486, "learning_rate": 2e-05, "loss": 5.5078, "step": 5438 }, { "epoch": 0.1824096587574411, "grad_norm": 0.4181008165903873, "learning_rate": 2e-05, "loss": 5.5942, "step": 5439 }, { "epoch": 0.18244319610966714, "grad_norm": 0.4006120650483981, "learning_rate": 2e-05, "loss": 5.5206, "step": 5440 }, { "epoch": 0.1824767334618932, "grad_norm": 0.420592690782487, "learning_rate": 2e-05, "loss": 5.5974, "step": 5441 }, { "epoch": 0.18251027081411922, "grad_norm": 0.3941941451272695, "learning_rate": 2e-05, "loss": 5.5506, "step": 5442 }, { "epoch": 0.18254380816634527, "grad_norm": 0.41697091606084963, "learning_rate": 2e-05, "loss": 5.6604, "step": 5443 }, { "epoch": 0.1825773455185713, "grad_norm": 0.45273273123302493, "learning_rate": 2e-05, "loss": 5.717, "step": 5444 }, { "epoch": 0.18261088287079735, "grad_norm": 0.4169118031134352, "learning_rate": 2e-05, "loss": 5.4666, "step": 5445 }, { "epoch": 0.18264442022302338, "grad_norm": 0.4043673171029714, "learning_rate": 2e-05, "loss": 5.5227, "step": 5446 }, { "epoch": 0.18267795757524943, "grad_norm": 0.409215784911074, "learning_rate": 2e-05, "loss": 5.6423, "step": 5447 }, { "epoch": 0.1827114949274755, "grad_norm": 0.42208979785551043, "learning_rate": 2e-05, "loss": 5.4478, "step": 5448 }, { "epoch": 0.1827450322797015, "grad_norm": 0.46645444504258143, "learning_rate": 2e-05, "loss": 5.3633, "step": 5449 }, { "epoch": 0.18277856963192757, "grad_norm": 0.4112452344619882, "learning_rate": 2e-05, "loss": 5.7061, "step": 5450 }, { "epoch": 0.1828121069841536, "grad_norm": 0.45611089824569845, "learning_rate": 2e-05, "loss": 5.5794, "step": 5451 }, { "epoch": 0.18284564433637965, "grad_norm": 0.4687728059906983, "learning_rate": 2e-05, "loss": 5.5578, "step": 5452 }, { "epoch": 0.18287918168860567, "grad_norm": 0.41697783218308077, "learning_rate": 2e-05, "loss": 5.5566, "step": 5453 }, { "epoch": 0.18291271904083173, "grad_norm": 0.4576086773595539, "learning_rate": 2e-05, "loss": 5.573, "step": 5454 }, { "epoch": 0.18294625639305775, "grad_norm": 0.4757508738219736, "learning_rate": 2e-05, "loss": 5.5075, "step": 5455 }, { "epoch": 0.1829797937452838, "grad_norm": 0.4301741969669221, "learning_rate": 2e-05, "loss": 5.7123, "step": 5456 }, { "epoch": 0.18301333109750986, "grad_norm": 0.4352410605328376, "learning_rate": 2e-05, "loss": 5.5957, "step": 5457 }, { "epoch": 0.1830468684497359, "grad_norm": 0.4152832196605836, "learning_rate": 2e-05, "loss": 5.4316, "step": 5458 }, { "epoch": 0.18308040580196194, "grad_norm": 0.42415576516908216, "learning_rate": 2e-05, "loss": 5.3681, "step": 5459 }, { "epoch": 0.18311394315418797, "grad_norm": 0.42599073440110263, "learning_rate": 2e-05, "loss": 5.4953, "step": 5460 }, { "epoch": 0.18314748050641402, "grad_norm": 0.42070294216542353, "learning_rate": 2e-05, "loss": 5.63, "step": 5461 }, { "epoch": 0.18318101785864005, "grad_norm": 0.42556754986863315, "learning_rate": 2e-05, "loss": 5.7723, "step": 5462 }, { "epoch": 0.1832145552108661, "grad_norm": 0.4485932496231849, "learning_rate": 2e-05, "loss": 5.4954, "step": 5463 }, { "epoch": 0.18324809256309216, "grad_norm": 0.42600508597089914, "learning_rate": 2e-05, "loss": 5.3778, "step": 5464 }, { "epoch": 0.18328162991531818, "grad_norm": 0.4529030346231475, "learning_rate": 2e-05, "loss": 5.6787, "step": 5465 }, { "epoch": 0.18331516726754424, "grad_norm": 0.4323743688863427, "learning_rate": 2e-05, "loss": 5.3304, "step": 5466 }, { "epoch": 0.18334870461977026, "grad_norm": 0.4365961934070324, "learning_rate": 2e-05, "loss": 5.505, "step": 5467 }, { "epoch": 0.18338224197199632, "grad_norm": 0.41313486441469965, "learning_rate": 2e-05, "loss": 5.4336, "step": 5468 }, { "epoch": 0.18341577932422234, "grad_norm": 0.4402959662948167, "learning_rate": 2e-05, "loss": 5.4544, "step": 5469 }, { "epoch": 0.1834493166764484, "grad_norm": 0.41914488118933546, "learning_rate": 2e-05, "loss": 5.5879, "step": 5470 }, { "epoch": 0.18348285402867442, "grad_norm": 0.44125966567015207, "learning_rate": 2e-05, "loss": 5.6165, "step": 5471 }, { "epoch": 0.18351639138090048, "grad_norm": 0.44329000035546895, "learning_rate": 2e-05, "loss": 5.3531, "step": 5472 }, { "epoch": 0.18354992873312653, "grad_norm": 0.4142492590015674, "learning_rate": 2e-05, "loss": 5.6159, "step": 5473 }, { "epoch": 0.18358346608535256, "grad_norm": 0.4557333991582097, "learning_rate": 2e-05, "loss": 5.5625, "step": 5474 }, { "epoch": 0.1836170034375786, "grad_norm": 0.40869583741511717, "learning_rate": 2e-05, "loss": 5.5027, "step": 5475 }, { "epoch": 0.18365054078980464, "grad_norm": 0.426035990466835, "learning_rate": 2e-05, "loss": 5.5568, "step": 5476 }, { "epoch": 0.1836840781420307, "grad_norm": 0.43246104797648854, "learning_rate": 2e-05, "loss": 5.4943, "step": 5477 }, { "epoch": 0.18371761549425672, "grad_norm": 0.44209697390178515, "learning_rate": 2e-05, "loss": 5.6101, "step": 5478 }, { "epoch": 0.18375115284648277, "grad_norm": 0.40496843722553993, "learning_rate": 2e-05, "loss": 5.6931, "step": 5479 }, { "epoch": 0.1837846901987088, "grad_norm": 0.4144924654961706, "learning_rate": 2e-05, "loss": 5.3811, "step": 5480 }, { "epoch": 0.18381822755093485, "grad_norm": 0.4254890153897503, "learning_rate": 2e-05, "loss": 5.5557, "step": 5481 }, { "epoch": 0.1838517649031609, "grad_norm": 0.38919588453994375, "learning_rate": 2e-05, "loss": 5.5233, "step": 5482 }, { "epoch": 0.18388530225538693, "grad_norm": 0.4025084281436669, "learning_rate": 2e-05, "loss": 5.6898, "step": 5483 }, { "epoch": 0.183918839607613, "grad_norm": 0.44150197227665905, "learning_rate": 2e-05, "loss": 5.4822, "step": 5484 }, { "epoch": 0.18395237695983901, "grad_norm": 0.42432536393088854, "learning_rate": 2e-05, "loss": 5.7395, "step": 5485 }, { "epoch": 0.18398591431206507, "grad_norm": 0.45056248748067634, "learning_rate": 2e-05, "loss": 5.6741, "step": 5486 }, { "epoch": 0.1840194516642911, "grad_norm": 0.43464336689915734, "learning_rate": 2e-05, "loss": 5.6862, "step": 5487 }, { "epoch": 0.18405298901651715, "grad_norm": 0.44089433380827725, "learning_rate": 2e-05, "loss": 5.4113, "step": 5488 }, { "epoch": 0.18408652636874318, "grad_norm": 0.43051159460971317, "learning_rate": 2e-05, "loss": 5.5091, "step": 5489 }, { "epoch": 0.18412006372096923, "grad_norm": 0.4016080067062424, "learning_rate": 2e-05, "loss": 5.7438, "step": 5490 }, { "epoch": 0.18415360107319528, "grad_norm": 0.4248801304892817, "learning_rate": 2e-05, "loss": 5.4383, "step": 5491 }, { "epoch": 0.1841871384254213, "grad_norm": 0.404256848012259, "learning_rate": 2e-05, "loss": 5.5438, "step": 5492 }, { "epoch": 0.18422067577764736, "grad_norm": 0.39867335276422666, "learning_rate": 2e-05, "loss": 5.679, "step": 5493 }, { "epoch": 0.1842542131298734, "grad_norm": 0.4443873158395568, "learning_rate": 2e-05, "loss": 5.2732, "step": 5494 }, { "epoch": 0.18428775048209944, "grad_norm": 0.4221064970339122, "learning_rate": 2e-05, "loss": 5.6431, "step": 5495 }, { "epoch": 0.18432128783432547, "grad_norm": 0.40147785840891453, "learning_rate": 2e-05, "loss": 5.5737, "step": 5496 }, { "epoch": 0.18435482518655152, "grad_norm": 0.41056871314070215, "learning_rate": 2e-05, "loss": 5.3883, "step": 5497 }, { "epoch": 0.18438836253877755, "grad_norm": 0.4196382205642561, "learning_rate": 2e-05, "loss": 5.6092, "step": 5498 }, { "epoch": 0.1844218998910036, "grad_norm": 0.4072127775317205, "learning_rate": 2e-05, "loss": 5.5197, "step": 5499 }, { "epoch": 0.18445543724322966, "grad_norm": 0.39806198113018937, "learning_rate": 2e-05, "loss": 5.3727, "step": 5500 }, { "epoch": 0.18448897459545568, "grad_norm": 0.42955300717348915, "learning_rate": 2e-05, "loss": 5.6413, "step": 5501 }, { "epoch": 0.18452251194768174, "grad_norm": 0.3965440913011382, "learning_rate": 2e-05, "loss": 5.5329, "step": 5502 }, { "epoch": 0.18455604929990777, "grad_norm": 0.4202310089809748, "learning_rate": 2e-05, "loss": 5.5115, "step": 5503 }, { "epoch": 0.18458958665213382, "grad_norm": 0.44986930910709644, "learning_rate": 2e-05, "loss": 5.5069, "step": 5504 }, { "epoch": 0.18462312400435985, "grad_norm": 0.4156334931486732, "learning_rate": 2e-05, "loss": 5.4395, "step": 5505 }, { "epoch": 0.1846566613565859, "grad_norm": 0.3983043942570196, "learning_rate": 2e-05, "loss": 5.4687, "step": 5506 }, { "epoch": 0.18469019870881193, "grad_norm": 0.41650483827282314, "learning_rate": 2e-05, "loss": 5.7288, "step": 5507 }, { "epoch": 0.18472373606103798, "grad_norm": 0.4377222641192553, "learning_rate": 2e-05, "loss": 5.4906, "step": 5508 }, { "epoch": 0.18475727341326403, "grad_norm": 0.4069133825534681, "learning_rate": 2e-05, "loss": 5.4859, "step": 5509 }, { "epoch": 0.18479081076549006, "grad_norm": 0.4184361662711844, "learning_rate": 2e-05, "loss": 5.493, "step": 5510 }, { "epoch": 0.18482434811771611, "grad_norm": 0.42596844395970135, "learning_rate": 2e-05, "loss": 5.6681, "step": 5511 }, { "epoch": 0.18485788546994214, "grad_norm": 0.4232113796603212, "learning_rate": 2e-05, "loss": 5.5527, "step": 5512 }, { "epoch": 0.1848914228221682, "grad_norm": 0.41530221670874873, "learning_rate": 2e-05, "loss": 5.6894, "step": 5513 }, { "epoch": 0.18492496017439422, "grad_norm": 0.4068005318262833, "learning_rate": 2e-05, "loss": 5.5373, "step": 5514 }, { "epoch": 0.18495849752662027, "grad_norm": 0.4227546319921473, "learning_rate": 2e-05, "loss": 5.6513, "step": 5515 }, { "epoch": 0.18499203487884633, "grad_norm": 0.4366188277877699, "learning_rate": 2e-05, "loss": 5.5137, "step": 5516 }, { "epoch": 0.18502557223107235, "grad_norm": 0.4662262924337916, "learning_rate": 2e-05, "loss": 5.6062, "step": 5517 }, { "epoch": 0.1850591095832984, "grad_norm": 0.3992247071390688, "learning_rate": 2e-05, "loss": 5.6089, "step": 5518 }, { "epoch": 0.18509264693552444, "grad_norm": 0.46308066113632873, "learning_rate": 2e-05, "loss": 5.5625, "step": 5519 }, { "epoch": 0.1851261842877505, "grad_norm": 0.43546441748249903, "learning_rate": 2e-05, "loss": 5.6885, "step": 5520 }, { "epoch": 0.18515972163997652, "grad_norm": 0.39658540106845314, "learning_rate": 2e-05, "loss": 5.521, "step": 5521 }, { "epoch": 0.18519325899220257, "grad_norm": 0.4280077763775537, "learning_rate": 2e-05, "loss": 5.4749, "step": 5522 }, { "epoch": 0.1852267963444286, "grad_norm": 0.41483139587724865, "learning_rate": 2e-05, "loss": 5.5031, "step": 5523 }, { "epoch": 0.18526033369665465, "grad_norm": 0.4596814643306504, "learning_rate": 2e-05, "loss": 5.5818, "step": 5524 }, { "epoch": 0.1852938710488807, "grad_norm": 0.3921344215223195, "learning_rate": 2e-05, "loss": 5.4304, "step": 5525 }, { "epoch": 0.18532740840110673, "grad_norm": 0.43519472026582084, "learning_rate": 2e-05, "loss": 5.7237, "step": 5526 }, { "epoch": 0.18536094575333278, "grad_norm": 0.4406777175853832, "learning_rate": 2e-05, "loss": 5.5888, "step": 5527 }, { "epoch": 0.1853944831055588, "grad_norm": 0.43141129926666394, "learning_rate": 2e-05, "loss": 5.584, "step": 5528 }, { "epoch": 0.18542802045778486, "grad_norm": 0.4327693706739653, "learning_rate": 2e-05, "loss": 5.3091, "step": 5529 }, { "epoch": 0.1854615578100109, "grad_norm": 0.42277412019436367, "learning_rate": 2e-05, "loss": 5.4403, "step": 5530 }, { "epoch": 0.18549509516223694, "grad_norm": 0.4180365956769529, "learning_rate": 2e-05, "loss": 5.561, "step": 5531 }, { "epoch": 0.18552863251446297, "grad_norm": 0.43245633886622603, "learning_rate": 2e-05, "loss": 5.6278, "step": 5532 }, { "epoch": 0.18556216986668903, "grad_norm": 0.49715269502247, "learning_rate": 2e-05, "loss": 5.4335, "step": 5533 }, { "epoch": 0.18559570721891508, "grad_norm": 0.4274305282413191, "learning_rate": 2e-05, "loss": 5.4562, "step": 5534 }, { "epoch": 0.1856292445711411, "grad_norm": 0.4033937693866874, "learning_rate": 2e-05, "loss": 5.6331, "step": 5535 }, { "epoch": 0.18566278192336716, "grad_norm": 0.44619298015491043, "learning_rate": 2e-05, "loss": 5.5428, "step": 5536 }, { "epoch": 0.18569631927559319, "grad_norm": 0.4615433355085579, "learning_rate": 2e-05, "loss": 5.3988, "step": 5537 }, { "epoch": 0.18572985662781924, "grad_norm": 0.4004863015143426, "learning_rate": 2e-05, "loss": 5.7157, "step": 5538 }, { "epoch": 0.18576339398004527, "grad_norm": 0.4209919809504346, "learning_rate": 2e-05, "loss": 5.5985, "step": 5539 }, { "epoch": 0.18579693133227132, "grad_norm": 0.44260311080367304, "learning_rate": 2e-05, "loss": 5.5863, "step": 5540 }, { "epoch": 0.18583046868449735, "grad_norm": 0.4329443644287201, "learning_rate": 2e-05, "loss": 5.6218, "step": 5541 }, { "epoch": 0.1858640060367234, "grad_norm": 0.43365653023993933, "learning_rate": 2e-05, "loss": 5.64, "step": 5542 }, { "epoch": 0.18589754338894945, "grad_norm": 0.4312884057829163, "learning_rate": 2e-05, "loss": 5.6894, "step": 5543 }, { "epoch": 0.18593108074117548, "grad_norm": 0.4166801158972266, "learning_rate": 2e-05, "loss": 5.5662, "step": 5544 }, { "epoch": 0.18596461809340153, "grad_norm": 0.40100322160815943, "learning_rate": 2e-05, "loss": 5.5688, "step": 5545 }, { "epoch": 0.18599815544562756, "grad_norm": 0.4025400176164295, "learning_rate": 2e-05, "loss": 5.5572, "step": 5546 }, { "epoch": 0.18603169279785362, "grad_norm": 0.3929309965147358, "learning_rate": 2e-05, "loss": 5.4287, "step": 5547 }, { "epoch": 0.18606523015007964, "grad_norm": 0.43448095318900454, "learning_rate": 2e-05, "loss": 5.4727, "step": 5548 }, { "epoch": 0.1860987675023057, "grad_norm": 0.42104259919929365, "learning_rate": 2e-05, "loss": 5.7602, "step": 5549 }, { "epoch": 0.18613230485453172, "grad_norm": 0.40175401999502464, "learning_rate": 2e-05, "loss": 5.5686, "step": 5550 }, { "epoch": 0.18616584220675778, "grad_norm": 0.40992601301458453, "learning_rate": 2e-05, "loss": 5.4615, "step": 5551 }, { "epoch": 0.18619937955898383, "grad_norm": 0.46083309632597097, "learning_rate": 2e-05, "loss": 5.4614, "step": 5552 }, { "epoch": 0.18623291691120986, "grad_norm": 0.44595897027524234, "learning_rate": 2e-05, "loss": 5.6268, "step": 5553 }, { "epoch": 0.1862664542634359, "grad_norm": 0.45290525957292077, "learning_rate": 2e-05, "loss": 5.5326, "step": 5554 }, { "epoch": 0.18629999161566194, "grad_norm": 0.42722637525537954, "learning_rate": 2e-05, "loss": 5.5962, "step": 5555 }, { "epoch": 0.186333528967888, "grad_norm": 0.42898167578310537, "learning_rate": 2e-05, "loss": 5.4856, "step": 5556 }, { "epoch": 0.18636706632011402, "grad_norm": 0.43181940114554357, "learning_rate": 2e-05, "loss": 5.4043, "step": 5557 }, { "epoch": 0.18640060367234007, "grad_norm": 0.4023876667063918, "learning_rate": 2e-05, "loss": 5.5566, "step": 5558 }, { "epoch": 0.1864341410245661, "grad_norm": 0.4127316940917068, "learning_rate": 2e-05, "loss": 5.5395, "step": 5559 }, { "epoch": 0.18646767837679215, "grad_norm": 0.4294194788368256, "learning_rate": 2e-05, "loss": 5.6388, "step": 5560 }, { "epoch": 0.1865012157290182, "grad_norm": 0.45256549574751437, "learning_rate": 2e-05, "loss": 5.8322, "step": 5561 }, { "epoch": 0.18653475308124423, "grad_norm": 0.4014899226774748, "learning_rate": 2e-05, "loss": 5.6886, "step": 5562 }, { "epoch": 0.18656829043347029, "grad_norm": 0.4308786863557511, "learning_rate": 2e-05, "loss": 5.3841, "step": 5563 }, { "epoch": 0.1866018277856963, "grad_norm": 0.4776924207782618, "learning_rate": 2e-05, "loss": 5.7045, "step": 5564 }, { "epoch": 0.18663536513792237, "grad_norm": 0.43978635525756, "learning_rate": 2e-05, "loss": 5.6253, "step": 5565 }, { "epoch": 0.1866689024901484, "grad_norm": 0.44371154249455635, "learning_rate": 2e-05, "loss": 5.3921, "step": 5566 }, { "epoch": 0.18670243984237445, "grad_norm": 0.42527395629949266, "learning_rate": 2e-05, "loss": 5.6746, "step": 5567 }, { "epoch": 0.1867359771946005, "grad_norm": 0.4481558393608694, "learning_rate": 2e-05, "loss": 5.6143, "step": 5568 }, { "epoch": 0.18676951454682653, "grad_norm": 0.4357274249917432, "learning_rate": 2e-05, "loss": 5.4922, "step": 5569 }, { "epoch": 0.18680305189905258, "grad_norm": 0.41507788760279785, "learning_rate": 2e-05, "loss": 5.5743, "step": 5570 }, { "epoch": 0.1868365892512786, "grad_norm": 0.4163745357521912, "learning_rate": 2e-05, "loss": 5.6041, "step": 5571 }, { "epoch": 0.18687012660350466, "grad_norm": 0.4550047388216052, "learning_rate": 2e-05, "loss": 5.5876, "step": 5572 }, { "epoch": 0.1869036639557307, "grad_norm": 0.4072173312080057, "learning_rate": 2e-05, "loss": 5.5062, "step": 5573 }, { "epoch": 0.18693720130795674, "grad_norm": 0.4366454937256767, "learning_rate": 2e-05, "loss": 5.6203, "step": 5574 }, { "epoch": 0.18697073866018277, "grad_norm": 0.4207347859859702, "learning_rate": 2e-05, "loss": 5.7376, "step": 5575 }, { "epoch": 0.18700427601240882, "grad_norm": 0.4303315035147812, "learning_rate": 2e-05, "loss": 5.4853, "step": 5576 }, { "epoch": 0.18703781336463488, "grad_norm": 0.4186238001890467, "learning_rate": 2e-05, "loss": 5.3084, "step": 5577 }, { "epoch": 0.1870713507168609, "grad_norm": 0.4651152325189923, "learning_rate": 2e-05, "loss": 5.5355, "step": 5578 }, { "epoch": 0.18710488806908696, "grad_norm": 0.4330797463555183, "learning_rate": 2e-05, "loss": 5.574, "step": 5579 }, { "epoch": 0.18713842542131298, "grad_norm": 0.4453132758359331, "learning_rate": 2e-05, "loss": 5.5463, "step": 5580 }, { "epoch": 0.18717196277353904, "grad_norm": 0.44923995038294123, "learning_rate": 2e-05, "loss": 5.5087, "step": 5581 }, { "epoch": 0.18720550012576506, "grad_norm": 0.4243554949369918, "learning_rate": 2e-05, "loss": 5.3237, "step": 5582 }, { "epoch": 0.18723903747799112, "grad_norm": 0.4307722661203803, "learning_rate": 2e-05, "loss": 5.3836, "step": 5583 }, { "epoch": 0.18727257483021714, "grad_norm": 0.40894651025050627, "learning_rate": 2e-05, "loss": 5.8548, "step": 5584 }, { "epoch": 0.1873061121824432, "grad_norm": 0.43978088862975917, "learning_rate": 2e-05, "loss": 5.4875, "step": 5585 }, { "epoch": 0.18733964953466925, "grad_norm": 0.4115776419856953, "learning_rate": 2e-05, "loss": 5.5881, "step": 5586 }, { "epoch": 0.18737318688689528, "grad_norm": 0.4114545613690873, "learning_rate": 2e-05, "loss": 5.6423, "step": 5587 }, { "epoch": 0.18740672423912133, "grad_norm": 0.4308940612619781, "learning_rate": 2e-05, "loss": 5.7931, "step": 5588 }, { "epoch": 0.18744026159134736, "grad_norm": 0.4709769901470773, "learning_rate": 2e-05, "loss": 5.6713, "step": 5589 }, { "epoch": 0.1874737989435734, "grad_norm": 0.41711274190471537, "learning_rate": 2e-05, "loss": 5.4084, "step": 5590 }, { "epoch": 0.18750733629579944, "grad_norm": 0.3984243273950401, "learning_rate": 2e-05, "loss": 5.4364, "step": 5591 }, { "epoch": 0.1875408736480255, "grad_norm": 0.462230751895266, "learning_rate": 2e-05, "loss": 5.6046, "step": 5592 }, { "epoch": 0.18757441100025152, "grad_norm": 0.44225420418116834, "learning_rate": 2e-05, "loss": 5.5513, "step": 5593 }, { "epoch": 0.18760794835247757, "grad_norm": 0.42220045062556566, "learning_rate": 2e-05, "loss": 5.5444, "step": 5594 }, { "epoch": 0.18764148570470363, "grad_norm": 0.41714522478135024, "learning_rate": 2e-05, "loss": 5.5137, "step": 5595 }, { "epoch": 0.18767502305692965, "grad_norm": 0.4559509478811534, "learning_rate": 2e-05, "loss": 5.6334, "step": 5596 }, { "epoch": 0.1877085604091557, "grad_norm": 0.4198350134676136, "learning_rate": 2e-05, "loss": 5.3613, "step": 5597 }, { "epoch": 0.18774209776138173, "grad_norm": 0.4263935001171772, "learning_rate": 2e-05, "loss": 5.6121, "step": 5598 }, { "epoch": 0.1877756351136078, "grad_norm": 0.4751638666948611, "learning_rate": 2e-05, "loss": 5.5332, "step": 5599 }, { "epoch": 0.1878091724658338, "grad_norm": 0.43947256983826766, "learning_rate": 2e-05, "loss": 5.4978, "step": 5600 }, { "epoch": 0.18784270981805987, "grad_norm": 0.4067446752183386, "learning_rate": 2e-05, "loss": 5.3956, "step": 5601 }, { "epoch": 0.1878762471702859, "grad_norm": 0.4244138352819654, "learning_rate": 2e-05, "loss": 5.7255, "step": 5602 }, { "epoch": 0.18790978452251195, "grad_norm": 0.43995813149484553, "learning_rate": 2e-05, "loss": 5.4788, "step": 5603 }, { "epoch": 0.187943321874738, "grad_norm": 0.42968277119238873, "learning_rate": 2e-05, "loss": 5.3268, "step": 5604 }, { "epoch": 0.18797685922696403, "grad_norm": 0.41260345937028337, "learning_rate": 2e-05, "loss": 5.6005, "step": 5605 }, { "epoch": 0.18801039657919008, "grad_norm": 0.42145410074534995, "learning_rate": 2e-05, "loss": 5.4703, "step": 5606 }, { "epoch": 0.1880439339314161, "grad_norm": 0.42400174222778486, "learning_rate": 2e-05, "loss": 5.6272, "step": 5607 }, { "epoch": 0.18807747128364216, "grad_norm": 0.3999889850834085, "learning_rate": 2e-05, "loss": 5.6634, "step": 5608 }, { "epoch": 0.1881110086358682, "grad_norm": 0.4571734809429925, "learning_rate": 2e-05, "loss": 5.4927, "step": 5609 }, { "epoch": 0.18814454598809424, "grad_norm": 0.42323319610422666, "learning_rate": 2e-05, "loss": 5.6485, "step": 5610 }, { "epoch": 0.18817808334032027, "grad_norm": 0.38990735138442384, "learning_rate": 2e-05, "loss": 5.5261, "step": 5611 }, { "epoch": 0.18821162069254632, "grad_norm": 0.3853265232349409, "learning_rate": 2e-05, "loss": 5.5013, "step": 5612 }, { "epoch": 0.18824515804477238, "grad_norm": 0.41631226162062607, "learning_rate": 2e-05, "loss": 5.5361, "step": 5613 }, { "epoch": 0.1882786953969984, "grad_norm": 0.4497829954516893, "learning_rate": 2e-05, "loss": 5.7853, "step": 5614 }, { "epoch": 0.18831223274922446, "grad_norm": 0.39184166459300696, "learning_rate": 2e-05, "loss": 5.5927, "step": 5615 }, { "epoch": 0.18834577010145048, "grad_norm": 0.4151298803265593, "learning_rate": 2e-05, "loss": 5.5942, "step": 5616 }, { "epoch": 0.18837930745367654, "grad_norm": 0.41954505824547067, "learning_rate": 2e-05, "loss": 5.6671, "step": 5617 }, { "epoch": 0.18841284480590256, "grad_norm": 0.40392342730785424, "learning_rate": 2e-05, "loss": 5.4624, "step": 5618 }, { "epoch": 0.18844638215812862, "grad_norm": 0.40044778995581337, "learning_rate": 2e-05, "loss": 5.7049, "step": 5619 }, { "epoch": 0.18847991951035467, "grad_norm": 0.4004602678804603, "learning_rate": 2e-05, "loss": 5.8856, "step": 5620 }, { "epoch": 0.1885134568625807, "grad_norm": 0.4021086605103818, "learning_rate": 2e-05, "loss": 5.5927, "step": 5621 }, { "epoch": 0.18854699421480675, "grad_norm": 0.4280977811948527, "learning_rate": 2e-05, "loss": 5.4412, "step": 5622 }, { "epoch": 0.18858053156703278, "grad_norm": 0.41198693016214394, "learning_rate": 2e-05, "loss": 5.7394, "step": 5623 }, { "epoch": 0.18861406891925883, "grad_norm": 0.4322873023809146, "learning_rate": 2e-05, "loss": 5.5981, "step": 5624 }, { "epoch": 0.18864760627148486, "grad_norm": 0.4048507032646084, "learning_rate": 2e-05, "loss": 5.7047, "step": 5625 }, { "epoch": 0.1886811436237109, "grad_norm": 0.4077590191572535, "learning_rate": 2e-05, "loss": 5.5611, "step": 5626 }, { "epoch": 0.18871468097593694, "grad_norm": 0.412261907464591, "learning_rate": 2e-05, "loss": 5.8352, "step": 5627 }, { "epoch": 0.188748218328163, "grad_norm": 0.42145473371551917, "learning_rate": 2e-05, "loss": 5.5508, "step": 5628 }, { "epoch": 0.18878175568038905, "grad_norm": 0.410341435792584, "learning_rate": 2e-05, "loss": 5.6197, "step": 5629 }, { "epoch": 0.18881529303261507, "grad_norm": 0.42093629718035447, "learning_rate": 2e-05, "loss": 5.5777, "step": 5630 }, { "epoch": 0.18884883038484113, "grad_norm": 0.40472540078396935, "learning_rate": 2e-05, "loss": 5.4088, "step": 5631 }, { "epoch": 0.18888236773706715, "grad_norm": 0.4273317090176906, "learning_rate": 2e-05, "loss": 5.3643, "step": 5632 }, { "epoch": 0.1889159050892932, "grad_norm": 0.4093875200395667, "learning_rate": 2e-05, "loss": 5.5707, "step": 5633 }, { "epoch": 0.18894944244151923, "grad_norm": 0.4207808286896164, "learning_rate": 2e-05, "loss": 5.5268, "step": 5634 }, { "epoch": 0.1889829797937453, "grad_norm": 0.4469694460868872, "learning_rate": 2e-05, "loss": 5.5054, "step": 5635 }, { "epoch": 0.1890165171459713, "grad_norm": 0.419567741312301, "learning_rate": 2e-05, "loss": 5.6465, "step": 5636 }, { "epoch": 0.18905005449819737, "grad_norm": 0.4340232645260666, "learning_rate": 2e-05, "loss": 5.4014, "step": 5637 }, { "epoch": 0.18908359185042342, "grad_norm": 0.41172905453984, "learning_rate": 2e-05, "loss": 5.6294, "step": 5638 }, { "epoch": 0.18911712920264945, "grad_norm": 0.4435033684144264, "learning_rate": 2e-05, "loss": 5.4187, "step": 5639 }, { "epoch": 0.1891506665548755, "grad_norm": 0.4332796326503945, "learning_rate": 2e-05, "loss": 5.5731, "step": 5640 }, { "epoch": 0.18918420390710153, "grad_norm": 0.4050186709981533, "learning_rate": 2e-05, "loss": 5.5705, "step": 5641 }, { "epoch": 0.18921774125932758, "grad_norm": 0.41394950331346686, "learning_rate": 2e-05, "loss": 5.6633, "step": 5642 }, { "epoch": 0.1892512786115536, "grad_norm": 0.4503570194464215, "learning_rate": 2e-05, "loss": 5.37, "step": 5643 }, { "epoch": 0.18928481596377966, "grad_norm": 0.39716609199876546, "learning_rate": 2e-05, "loss": 5.6204, "step": 5644 }, { "epoch": 0.1893183533160057, "grad_norm": 0.4101336421166183, "learning_rate": 2e-05, "loss": 5.562, "step": 5645 }, { "epoch": 0.18935189066823174, "grad_norm": 0.39233070394937813, "learning_rate": 2e-05, "loss": 5.6181, "step": 5646 }, { "epoch": 0.1893854280204578, "grad_norm": 0.37878335693642107, "learning_rate": 2e-05, "loss": 5.4812, "step": 5647 }, { "epoch": 0.18941896537268382, "grad_norm": 0.3990384499892143, "learning_rate": 2e-05, "loss": 5.7662, "step": 5648 }, { "epoch": 0.18945250272490988, "grad_norm": 0.4385982360514527, "learning_rate": 2e-05, "loss": 5.4568, "step": 5649 }, { "epoch": 0.1894860400771359, "grad_norm": 0.38344576278031267, "learning_rate": 2e-05, "loss": 5.5426, "step": 5650 }, { "epoch": 0.18951957742936196, "grad_norm": 0.449870990668382, "learning_rate": 2e-05, "loss": 5.5871, "step": 5651 }, { "epoch": 0.18955311478158798, "grad_norm": 0.4097639465209838, "learning_rate": 2e-05, "loss": 5.6745, "step": 5652 }, { "epoch": 0.18958665213381404, "grad_norm": 0.4567068506979967, "learning_rate": 2e-05, "loss": 5.3407, "step": 5653 }, { "epoch": 0.18962018948604006, "grad_norm": 0.3873663025437742, "learning_rate": 2e-05, "loss": 5.373, "step": 5654 }, { "epoch": 0.18965372683826612, "grad_norm": 0.44754802509810315, "learning_rate": 2e-05, "loss": 5.4929, "step": 5655 }, { "epoch": 0.18968726419049217, "grad_norm": 0.3940300543978261, "learning_rate": 2e-05, "loss": 5.5369, "step": 5656 }, { "epoch": 0.1897208015427182, "grad_norm": 0.39648507072793554, "learning_rate": 2e-05, "loss": 5.4014, "step": 5657 }, { "epoch": 0.18975433889494425, "grad_norm": 0.38549472263695406, "learning_rate": 2e-05, "loss": 5.7856, "step": 5658 }, { "epoch": 0.18978787624717028, "grad_norm": 0.41451152192448576, "learning_rate": 2e-05, "loss": 5.686, "step": 5659 }, { "epoch": 0.18982141359939633, "grad_norm": 0.40530572997523867, "learning_rate": 2e-05, "loss": 5.5482, "step": 5660 }, { "epoch": 0.18985495095162236, "grad_norm": 0.41443857839999865, "learning_rate": 2e-05, "loss": 5.4116, "step": 5661 }, { "epoch": 0.1898884883038484, "grad_norm": 0.39021091192252616, "learning_rate": 2e-05, "loss": 5.6203, "step": 5662 }, { "epoch": 0.18992202565607444, "grad_norm": 0.39619521998448853, "learning_rate": 2e-05, "loss": 5.6935, "step": 5663 }, { "epoch": 0.1899555630083005, "grad_norm": 0.4155144617491569, "learning_rate": 2e-05, "loss": 5.7285, "step": 5664 }, { "epoch": 0.18998910036052655, "grad_norm": 0.40436954499379113, "learning_rate": 2e-05, "loss": 5.549, "step": 5665 }, { "epoch": 0.19002263771275257, "grad_norm": 0.40380390028223473, "learning_rate": 2e-05, "loss": 5.4375, "step": 5666 }, { "epoch": 0.19005617506497863, "grad_norm": 0.4365533165703493, "learning_rate": 2e-05, "loss": 5.697, "step": 5667 }, { "epoch": 0.19008971241720465, "grad_norm": 0.4028713005579805, "learning_rate": 2e-05, "loss": 5.5433, "step": 5668 }, { "epoch": 0.1901232497694307, "grad_norm": 0.3864943168969027, "learning_rate": 2e-05, "loss": 5.5612, "step": 5669 }, { "epoch": 0.19015678712165673, "grad_norm": 0.3970210571458761, "learning_rate": 2e-05, "loss": 5.5362, "step": 5670 }, { "epoch": 0.1901903244738828, "grad_norm": 0.41452671451878154, "learning_rate": 2e-05, "loss": 5.7814, "step": 5671 }, { "epoch": 0.19022386182610884, "grad_norm": 0.4438225993252444, "learning_rate": 2e-05, "loss": 5.4422, "step": 5672 }, { "epoch": 0.19025739917833487, "grad_norm": 0.3919674861005928, "learning_rate": 2e-05, "loss": 5.507, "step": 5673 }, { "epoch": 0.19029093653056092, "grad_norm": 0.4110277987609854, "learning_rate": 2e-05, "loss": 5.398, "step": 5674 }, { "epoch": 0.19032447388278695, "grad_norm": 0.40307867289142585, "learning_rate": 2e-05, "loss": 5.5562, "step": 5675 }, { "epoch": 0.190358011235013, "grad_norm": 0.4058302339456777, "learning_rate": 2e-05, "loss": 5.5386, "step": 5676 }, { "epoch": 0.19039154858723903, "grad_norm": 0.42141088282712713, "learning_rate": 2e-05, "loss": 5.6354, "step": 5677 }, { "epoch": 0.19042508593946508, "grad_norm": 0.40164216801993685, "learning_rate": 2e-05, "loss": 5.4483, "step": 5678 }, { "epoch": 0.1904586232916911, "grad_norm": 0.4358715430322761, "learning_rate": 2e-05, "loss": 5.6731, "step": 5679 }, { "epoch": 0.19049216064391716, "grad_norm": 0.4106782672602582, "learning_rate": 2e-05, "loss": 5.4813, "step": 5680 }, { "epoch": 0.19052569799614322, "grad_norm": 0.43805873367506704, "learning_rate": 2e-05, "loss": 5.4086, "step": 5681 }, { "epoch": 0.19055923534836924, "grad_norm": 0.42129243271506395, "learning_rate": 2e-05, "loss": 5.3573, "step": 5682 }, { "epoch": 0.1905927727005953, "grad_norm": 0.41428627421604136, "learning_rate": 2e-05, "loss": 5.543, "step": 5683 }, { "epoch": 0.19062631005282132, "grad_norm": 0.38879375545334605, "learning_rate": 2e-05, "loss": 5.5126, "step": 5684 }, { "epoch": 0.19065984740504738, "grad_norm": 0.42250235122442714, "learning_rate": 2e-05, "loss": 5.5016, "step": 5685 }, { "epoch": 0.1906933847572734, "grad_norm": 0.4006224353359919, "learning_rate": 2e-05, "loss": 5.3887, "step": 5686 }, { "epoch": 0.19072692210949946, "grad_norm": 0.4045924589614406, "learning_rate": 2e-05, "loss": 5.5389, "step": 5687 }, { "epoch": 0.19076045946172548, "grad_norm": 0.3940545780009821, "learning_rate": 2e-05, "loss": 5.5157, "step": 5688 }, { "epoch": 0.19079399681395154, "grad_norm": 0.40775875942183865, "learning_rate": 2e-05, "loss": 5.5968, "step": 5689 }, { "epoch": 0.1908275341661776, "grad_norm": 0.4092997991049627, "learning_rate": 2e-05, "loss": 5.4671, "step": 5690 }, { "epoch": 0.19086107151840362, "grad_norm": 0.3823193165345545, "learning_rate": 2e-05, "loss": 5.5631, "step": 5691 }, { "epoch": 0.19089460887062967, "grad_norm": 0.4103932921051752, "learning_rate": 2e-05, "loss": 5.624, "step": 5692 }, { "epoch": 0.1909281462228557, "grad_norm": 0.4164916258218843, "learning_rate": 2e-05, "loss": 5.4824, "step": 5693 }, { "epoch": 0.19096168357508175, "grad_norm": 0.41723800208098855, "learning_rate": 2e-05, "loss": 5.3513, "step": 5694 }, { "epoch": 0.19099522092730778, "grad_norm": 0.3876705712389222, "learning_rate": 2e-05, "loss": 5.5935, "step": 5695 }, { "epoch": 0.19102875827953383, "grad_norm": 0.3937732588660474, "learning_rate": 2e-05, "loss": 5.6143, "step": 5696 }, { "epoch": 0.19106229563175986, "grad_norm": 0.4361651546575943, "learning_rate": 2e-05, "loss": 5.5859, "step": 5697 }, { "epoch": 0.1910958329839859, "grad_norm": 0.39644714100866973, "learning_rate": 2e-05, "loss": 5.6045, "step": 5698 }, { "epoch": 0.19112937033621197, "grad_norm": 0.40317862473567534, "learning_rate": 2e-05, "loss": 5.4537, "step": 5699 }, { "epoch": 0.191162907688438, "grad_norm": 0.41856791463585136, "learning_rate": 2e-05, "loss": 5.717, "step": 5700 }, { "epoch": 0.19119644504066405, "grad_norm": 0.435294642272946, "learning_rate": 2e-05, "loss": 5.5555, "step": 5701 }, { "epoch": 0.19122998239289007, "grad_norm": 0.41906286100890416, "learning_rate": 2e-05, "loss": 5.4688, "step": 5702 }, { "epoch": 0.19126351974511613, "grad_norm": 0.4181768608607276, "learning_rate": 2e-05, "loss": 5.4698, "step": 5703 }, { "epoch": 0.19129705709734215, "grad_norm": 0.4137491433319877, "learning_rate": 2e-05, "loss": 5.7231, "step": 5704 }, { "epoch": 0.1913305944495682, "grad_norm": 0.4128233223471091, "learning_rate": 2e-05, "loss": 5.5578, "step": 5705 }, { "epoch": 0.19136413180179423, "grad_norm": 0.4347812091143818, "learning_rate": 2e-05, "loss": 5.2936, "step": 5706 }, { "epoch": 0.1913976691540203, "grad_norm": 0.42572362859680646, "learning_rate": 2e-05, "loss": 5.5399, "step": 5707 }, { "epoch": 0.19143120650624634, "grad_norm": 0.42502584822348316, "learning_rate": 2e-05, "loss": 5.5672, "step": 5708 }, { "epoch": 0.19146474385847237, "grad_norm": 0.42201005569236916, "learning_rate": 2e-05, "loss": 5.4235, "step": 5709 }, { "epoch": 0.19149828121069842, "grad_norm": 0.4167508274151774, "learning_rate": 2e-05, "loss": 5.6516, "step": 5710 }, { "epoch": 0.19153181856292445, "grad_norm": 0.4696502890733836, "learning_rate": 2e-05, "loss": 5.6216, "step": 5711 }, { "epoch": 0.1915653559151505, "grad_norm": 0.4294573702897293, "learning_rate": 2e-05, "loss": 5.8296, "step": 5712 }, { "epoch": 0.19159889326737653, "grad_norm": 0.43815550397558134, "learning_rate": 2e-05, "loss": 5.4524, "step": 5713 }, { "epoch": 0.19163243061960258, "grad_norm": 0.4273351295753746, "learning_rate": 2e-05, "loss": 5.4327, "step": 5714 }, { "epoch": 0.19166596797182864, "grad_norm": 0.4224679111694345, "learning_rate": 2e-05, "loss": 5.8926, "step": 5715 }, { "epoch": 0.19169950532405466, "grad_norm": 0.40849744681039735, "learning_rate": 2e-05, "loss": 5.5961, "step": 5716 }, { "epoch": 0.19173304267628072, "grad_norm": 0.40133963383989946, "learning_rate": 2e-05, "loss": 5.3453, "step": 5717 }, { "epoch": 0.19176658002850674, "grad_norm": 0.4620637174637691, "learning_rate": 2e-05, "loss": 5.7383, "step": 5718 }, { "epoch": 0.1918001173807328, "grad_norm": 0.40086605126931374, "learning_rate": 2e-05, "loss": 5.5553, "step": 5719 }, { "epoch": 0.19183365473295882, "grad_norm": 0.4202771342039118, "learning_rate": 2e-05, "loss": 5.629, "step": 5720 }, { "epoch": 0.19186719208518488, "grad_norm": 0.4256696193250752, "learning_rate": 2e-05, "loss": 5.662, "step": 5721 }, { "epoch": 0.1919007294374109, "grad_norm": 0.40059117173607856, "learning_rate": 2e-05, "loss": 5.7346, "step": 5722 }, { "epoch": 0.19193426678963696, "grad_norm": 0.42248061932250436, "learning_rate": 2e-05, "loss": 5.7635, "step": 5723 }, { "epoch": 0.191967804141863, "grad_norm": 0.4029003477733941, "learning_rate": 2e-05, "loss": 5.6096, "step": 5724 }, { "epoch": 0.19200134149408904, "grad_norm": 0.4121573465065748, "learning_rate": 2e-05, "loss": 5.6713, "step": 5725 }, { "epoch": 0.1920348788463151, "grad_norm": 0.4079669936870024, "learning_rate": 2e-05, "loss": 5.5873, "step": 5726 }, { "epoch": 0.19206841619854112, "grad_norm": 0.4125975416094036, "learning_rate": 2e-05, "loss": 5.4681, "step": 5727 }, { "epoch": 0.19210195355076717, "grad_norm": 0.41227408226296514, "learning_rate": 2e-05, "loss": 5.7932, "step": 5728 }, { "epoch": 0.1921354909029932, "grad_norm": 0.426201212280667, "learning_rate": 2e-05, "loss": 5.6425, "step": 5729 }, { "epoch": 0.19216902825521925, "grad_norm": 0.3988367223378302, "learning_rate": 2e-05, "loss": 5.6463, "step": 5730 }, { "epoch": 0.19220256560744528, "grad_norm": 0.4550786625960665, "learning_rate": 2e-05, "loss": 5.2933, "step": 5731 }, { "epoch": 0.19223610295967133, "grad_norm": 0.40552213090668626, "learning_rate": 2e-05, "loss": 5.49, "step": 5732 }, { "epoch": 0.1922696403118974, "grad_norm": 0.41432224997705713, "learning_rate": 2e-05, "loss": 5.6336, "step": 5733 }, { "epoch": 0.19230317766412341, "grad_norm": 0.46986572713961366, "learning_rate": 2e-05, "loss": 5.6148, "step": 5734 }, { "epoch": 0.19233671501634947, "grad_norm": 0.4135429982113512, "learning_rate": 2e-05, "loss": 5.2862, "step": 5735 }, { "epoch": 0.1923702523685755, "grad_norm": 0.43346319034830216, "learning_rate": 2e-05, "loss": 5.4576, "step": 5736 }, { "epoch": 0.19240378972080155, "grad_norm": 0.4424232723978343, "learning_rate": 2e-05, "loss": 5.8026, "step": 5737 }, { "epoch": 0.19243732707302758, "grad_norm": 0.40412447685994396, "learning_rate": 2e-05, "loss": 5.5439, "step": 5738 }, { "epoch": 0.19247086442525363, "grad_norm": 0.43251943219163846, "learning_rate": 2e-05, "loss": 5.4037, "step": 5739 }, { "epoch": 0.19250440177747966, "grad_norm": 0.4291432875482591, "learning_rate": 2e-05, "loss": 5.5677, "step": 5740 }, { "epoch": 0.1925379391297057, "grad_norm": 0.41652815466280174, "learning_rate": 2e-05, "loss": 5.4427, "step": 5741 }, { "epoch": 0.19257147648193176, "grad_norm": 0.4205279120055939, "learning_rate": 2e-05, "loss": 5.6197, "step": 5742 }, { "epoch": 0.1926050138341578, "grad_norm": 0.41400410310722063, "learning_rate": 2e-05, "loss": 5.43, "step": 5743 }, { "epoch": 0.19263855118638384, "grad_norm": 0.4262527420597544, "learning_rate": 2e-05, "loss": 5.5984, "step": 5744 }, { "epoch": 0.19267208853860987, "grad_norm": 0.4122422369268711, "learning_rate": 2e-05, "loss": 5.4068, "step": 5745 }, { "epoch": 0.19270562589083592, "grad_norm": 0.43292143346107387, "learning_rate": 2e-05, "loss": 5.5584, "step": 5746 }, { "epoch": 0.19273916324306195, "grad_norm": 0.4191547054119864, "learning_rate": 2e-05, "loss": 5.4574, "step": 5747 }, { "epoch": 0.192772700595288, "grad_norm": 0.3976777079480788, "learning_rate": 2e-05, "loss": 5.6759, "step": 5748 }, { "epoch": 0.19280623794751403, "grad_norm": 0.4107381751419521, "learning_rate": 2e-05, "loss": 5.2974, "step": 5749 }, { "epoch": 0.19283977529974008, "grad_norm": 0.4067395682657349, "learning_rate": 2e-05, "loss": 5.6843, "step": 5750 }, { "epoch": 0.19287331265196614, "grad_norm": 0.4075153939656044, "learning_rate": 2e-05, "loss": 5.4131, "step": 5751 }, { "epoch": 0.19290685000419217, "grad_norm": 0.3957571392498637, "learning_rate": 2e-05, "loss": 5.6324, "step": 5752 }, { "epoch": 0.19294038735641822, "grad_norm": 0.4078070833856838, "learning_rate": 2e-05, "loss": 5.8693, "step": 5753 }, { "epoch": 0.19297392470864425, "grad_norm": 0.4072455301210602, "learning_rate": 2e-05, "loss": 5.4186, "step": 5754 }, { "epoch": 0.1930074620608703, "grad_norm": 0.3991810179118033, "learning_rate": 2e-05, "loss": 5.6408, "step": 5755 }, { "epoch": 0.19304099941309633, "grad_norm": 0.4135560312177873, "learning_rate": 2e-05, "loss": 5.562, "step": 5756 }, { "epoch": 0.19307453676532238, "grad_norm": 0.3993141367571618, "learning_rate": 2e-05, "loss": 5.4022, "step": 5757 }, { "epoch": 0.1931080741175484, "grad_norm": 0.4162695013539427, "learning_rate": 2e-05, "loss": 5.6219, "step": 5758 }, { "epoch": 0.19314161146977446, "grad_norm": 0.4375523897273732, "learning_rate": 2e-05, "loss": 5.5395, "step": 5759 }, { "epoch": 0.19317514882200051, "grad_norm": 0.387377069592129, "learning_rate": 2e-05, "loss": 5.462, "step": 5760 }, { "epoch": 0.19320868617422654, "grad_norm": 0.41091162326500025, "learning_rate": 2e-05, "loss": 5.5504, "step": 5761 }, { "epoch": 0.1932422235264526, "grad_norm": 0.45145591975495625, "learning_rate": 2e-05, "loss": 5.6351, "step": 5762 }, { "epoch": 0.19327576087867862, "grad_norm": 0.43551305024579257, "learning_rate": 2e-05, "loss": 5.5049, "step": 5763 }, { "epoch": 0.19330929823090467, "grad_norm": 0.4315945798322283, "learning_rate": 2e-05, "loss": 5.4369, "step": 5764 }, { "epoch": 0.1933428355831307, "grad_norm": 0.41693129725925376, "learning_rate": 2e-05, "loss": 5.7233, "step": 5765 }, { "epoch": 0.19337637293535676, "grad_norm": 0.4449157253825799, "learning_rate": 2e-05, "loss": 5.558, "step": 5766 }, { "epoch": 0.1934099102875828, "grad_norm": 0.39996989157660495, "learning_rate": 2e-05, "loss": 5.6768, "step": 5767 }, { "epoch": 0.19344344763980884, "grad_norm": 0.42145868465197045, "learning_rate": 2e-05, "loss": 5.6142, "step": 5768 }, { "epoch": 0.1934769849920349, "grad_norm": 0.3805747894096906, "learning_rate": 2e-05, "loss": 5.5953, "step": 5769 }, { "epoch": 0.19351052234426092, "grad_norm": 0.41736770545529334, "learning_rate": 2e-05, "loss": 5.4774, "step": 5770 }, { "epoch": 0.19354405969648697, "grad_norm": 0.40004495453451805, "learning_rate": 2e-05, "loss": 5.6625, "step": 5771 }, { "epoch": 0.193577597048713, "grad_norm": 0.41926430432151424, "learning_rate": 2e-05, "loss": 5.6967, "step": 5772 }, { "epoch": 0.19361113440093905, "grad_norm": 0.4014172931087122, "learning_rate": 2e-05, "loss": 5.7652, "step": 5773 }, { "epoch": 0.19364467175316508, "grad_norm": 0.42405573170674765, "learning_rate": 2e-05, "loss": 5.6987, "step": 5774 }, { "epoch": 0.19367820910539113, "grad_norm": 0.4262204746532626, "learning_rate": 2e-05, "loss": 5.5149, "step": 5775 }, { "epoch": 0.19371174645761718, "grad_norm": 0.45379400495274097, "learning_rate": 2e-05, "loss": 5.6764, "step": 5776 }, { "epoch": 0.1937452838098432, "grad_norm": 0.4041001477754711, "learning_rate": 2e-05, "loss": 5.512, "step": 5777 }, { "epoch": 0.19377882116206926, "grad_norm": 0.40904363946036154, "learning_rate": 2e-05, "loss": 5.4931, "step": 5778 }, { "epoch": 0.1938123585142953, "grad_norm": 0.4390021165344298, "learning_rate": 2e-05, "loss": 5.4119, "step": 5779 }, { "epoch": 0.19384589586652134, "grad_norm": 0.43617983634022195, "learning_rate": 2e-05, "loss": 5.5329, "step": 5780 }, { "epoch": 0.19387943321874737, "grad_norm": 0.3863700752815897, "learning_rate": 2e-05, "loss": 5.5981, "step": 5781 }, { "epoch": 0.19391297057097343, "grad_norm": 0.4078784255978829, "learning_rate": 2e-05, "loss": 5.7153, "step": 5782 }, { "epoch": 0.19394650792319945, "grad_norm": 0.4362454099323166, "learning_rate": 2e-05, "loss": 5.6244, "step": 5783 }, { "epoch": 0.1939800452754255, "grad_norm": 0.46561322828266816, "learning_rate": 2e-05, "loss": 5.3638, "step": 5784 }, { "epoch": 0.19401358262765156, "grad_norm": 0.4302178416799145, "learning_rate": 2e-05, "loss": 5.6806, "step": 5785 }, { "epoch": 0.19404711997987759, "grad_norm": 0.4308171023430449, "learning_rate": 2e-05, "loss": 5.4283, "step": 5786 }, { "epoch": 0.19408065733210364, "grad_norm": 0.4266052265247991, "learning_rate": 2e-05, "loss": 5.5496, "step": 5787 }, { "epoch": 0.19411419468432967, "grad_norm": 0.4457457697274399, "learning_rate": 2e-05, "loss": 5.4411, "step": 5788 }, { "epoch": 0.19414773203655572, "grad_norm": 0.4233839088065904, "learning_rate": 2e-05, "loss": 5.4996, "step": 5789 }, { "epoch": 0.19418126938878175, "grad_norm": 0.38864705003016214, "learning_rate": 2e-05, "loss": 5.4474, "step": 5790 }, { "epoch": 0.1942148067410078, "grad_norm": 0.4411702268157332, "learning_rate": 2e-05, "loss": 5.5708, "step": 5791 }, { "epoch": 0.19424834409323383, "grad_norm": 0.4355661057818437, "learning_rate": 2e-05, "loss": 5.5784, "step": 5792 }, { "epoch": 0.19428188144545988, "grad_norm": 0.3984886526361702, "learning_rate": 2e-05, "loss": 5.6341, "step": 5793 }, { "epoch": 0.19431541879768593, "grad_norm": 0.40272935332915316, "learning_rate": 2e-05, "loss": 5.4785, "step": 5794 }, { "epoch": 0.19434895614991196, "grad_norm": 0.4342511303324639, "learning_rate": 2e-05, "loss": 5.5727, "step": 5795 }, { "epoch": 0.19438249350213802, "grad_norm": 0.4089614264809416, "learning_rate": 2e-05, "loss": 5.4381, "step": 5796 }, { "epoch": 0.19441603085436404, "grad_norm": 0.4120365858523453, "learning_rate": 2e-05, "loss": 5.6969, "step": 5797 }, { "epoch": 0.1944495682065901, "grad_norm": 0.4336440955298956, "learning_rate": 2e-05, "loss": 5.5587, "step": 5798 }, { "epoch": 0.19448310555881612, "grad_norm": 0.402341541590002, "learning_rate": 2e-05, "loss": 5.4051, "step": 5799 }, { "epoch": 0.19451664291104218, "grad_norm": 0.4149580881769841, "learning_rate": 2e-05, "loss": 5.5596, "step": 5800 }, { "epoch": 0.1945501802632682, "grad_norm": 0.41589470219906716, "learning_rate": 2e-05, "loss": 5.5352, "step": 5801 }, { "epoch": 0.19458371761549426, "grad_norm": 0.43328940458912657, "learning_rate": 2e-05, "loss": 5.6011, "step": 5802 }, { "epoch": 0.1946172549677203, "grad_norm": 0.42631948904524525, "learning_rate": 2e-05, "loss": 5.77, "step": 5803 }, { "epoch": 0.19465079231994634, "grad_norm": 0.410569772333143, "learning_rate": 2e-05, "loss": 5.5296, "step": 5804 }, { "epoch": 0.1946843296721724, "grad_norm": 0.42163240878307234, "learning_rate": 2e-05, "loss": 5.4991, "step": 5805 }, { "epoch": 0.19471786702439842, "grad_norm": 0.42227059955321233, "learning_rate": 2e-05, "loss": 5.4974, "step": 5806 }, { "epoch": 0.19475140437662447, "grad_norm": 0.40566704722689373, "learning_rate": 2e-05, "loss": 5.622, "step": 5807 }, { "epoch": 0.1947849417288505, "grad_norm": 0.4077023751613962, "learning_rate": 2e-05, "loss": 5.7077, "step": 5808 }, { "epoch": 0.19481847908107655, "grad_norm": 0.4452004405907598, "learning_rate": 2e-05, "loss": 5.6542, "step": 5809 }, { "epoch": 0.19485201643330258, "grad_norm": 0.390405216224906, "learning_rate": 2e-05, "loss": 5.2473, "step": 5810 }, { "epoch": 0.19488555378552863, "grad_norm": 0.4225313698540602, "learning_rate": 2e-05, "loss": 5.6178, "step": 5811 }, { "epoch": 0.19491909113775469, "grad_norm": 0.45843833847539633, "learning_rate": 2e-05, "loss": 5.6423, "step": 5812 }, { "epoch": 0.1949526284899807, "grad_norm": 0.4332468269410954, "learning_rate": 2e-05, "loss": 5.3181, "step": 5813 }, { "epoch": 0.19498616584220677, "grad_norm": 0.4013651935903242, "learning_rate": 2e-05, "loss": 5.4971, "step": 5814 }, { "epoch": 0.1950197031944328, "grad_norm": 0.43530405156110175, "learning_rate": 2e-05, "loss": 5.4596, "step": 5815 }, { "epoch": 0.19505324054665885, "grad_norm": 0.46606572208115304, "learning_rate": 2e-05, "loss": 5.546, "step": 5816 }, { "epoch": 0.19508677789888487, "grad_norm": 0.4270364131222305, "learning_rate": 2e-05, "loss": 5.4853, "step": 5817 }, { "epoch": 0.19512031525111093, "grad_norm": 0.42784205637338835, "learning_rate": 2e-05, "loss": 5.8272, "step": 5818 }, { "epoch": 0.19515385260333698, "grad_norm": 0.4524975990562913, "learning_rate": 2e-05, "loss": 5.5759, "step": 5819 }, { "epoch": 0.195187389955563, "grad_norm": 0.45503553080281645, "learning_rate": 2e-05, "loss": 5.6109, "step": 5820 }, { "epoch": 0.19522092730778906, "grad_norm": 0.4128914399382827, "learning_rate": 2e-05, "loss": 5.7367, "step": 5821 }, { "epoch": 0.1952544646600151, "grad_norm": 0.4932985542234374, "learning_rate": 2e-05, "loss": 5.587, "step": 5822 }, { "epoch": 0.19528800201224114, "grad_norm": 0.45896689277647484, "learning_rate": 2e-05, "loss": 5.5363, "step": 5823 }, { "epoch": 0.19532153936446717, "grad_norm": 0.41831433933386064, "learning_rate": 2e-05, "loss": 5.6204, "step": 5824 }, { "epoch": 0.19535507671669322, "grad_norm": 0.4538849670575826, "learning_rate": 2e-05, "loss": 5.5715, "step": 5825 }, { "epoch": 0.19538861406891925, "grad_norm": 0.43825281798154986, "learning_rate": 2e-05, "loss": 5.694, "step": 5826 }, { "epoch": 0.1954221514211453, "grad_norm": 0.4450108710667554, "learning_rate": 2e-05, "loss": 5.3317, "step": 5827 }, { "epoch": 0.19545568877337136, "grad_norm": 0.44323126455821343, "learning_rate": 2e-05, "loss": 5.5718, "step": 5828 }, { "epoch": 0.19548922612559738, "grad_norm": 0.43790235626178936, "learning_rate": 2e-05, "loss": 5.7335, "step": 5829 }, { "epoch": 0.19552276347782344, "grad_norm": 0.406531729894163, "learning_rate": 2e-05, "loss": 5.5858, "step": 5830 }, { "epoch": 0.19555630083004946, "grad_norm": 0.41995509395206376, "learning_rate": 2e-05, "loss": 5.5673, "step": 5831 }, { "epoch": 0.19558983818227552, "grad_norm": 0.4408232405700693, "learning_rate": 2e-05, "loss": 5.6392, "step": 5832 }, { "epoch": 0.19562337553450154, "grad_norm": 0.407790956033981, "learning_rate": 2e-05, "loss": 5.5518, "step": 5833 }, { "epoch": 0.1956569128867276, "grad_norm": 0.39393595612919347, "learning_rate": 2e-05, "loss": 5.5028, "step": 5834 }, { "epoch": 0.19569045023895362, "grad_norm": 0.451682704349777, "learning_rate": 2e-05, "loss": 5.5236, "step": 5835 }, { "epoch": 0.19572398759117968, "grad_norm": 0.4238815645389513, "learning_rate": 2e-05, "loss": 5.4852, "step": 5836 }, { "epoch": 0.19575752494340573, "grad_norm": 0.4115194772878478, "learning_rate": 2e-05, "loss": 5.5502, "step": 5837 }, { "epoch": 0.19579106229563176, "grad_norm": 0.426128235570427, "learning_rate": 2e-05, "loss": 5.5641, "step": 5838 }, { "epoch": 0.1958245996478578, "grad_norm": 0.45594371548047774, "learning_rate": 2e-05, "loss": 5.5304, "step": 5839 }, { "epoch": 0.19585813700008384, "grad_norm": 0.4087112678130264, "learning_rate": 2e-05, "loss": 5.6423, "step": 5840 }, { "epoch": 0.1958916743523099, "grad_norm": 0.4005392288018263, "learning_rate": 2e-05, "loss": 5.7342, "step": 5841 }, { "epoch": 0.19592521170453592, "grad_norm": 0.41698426534412825, "learning_rate": 2e-05, "loss": 5.6939, "step": 5842 }, { "epoch": 0.19595874905676197, "grad_norm": 0.40164577158523335, "learning_rate": 2e-05, "loss": 5.5104, "step": 5843 }, { "epoch": 0.195992286408988, "grad_norm": 0.40153479978809564, "learning_rate": 2e-05, "loss": 5.3969, "step": 5844 }, { "epoch": 0.19602582376121405, "grad_norm": 0.40120746183293554, "learning_rate": 2e-05, "loss": 5.3916, "step": 5845 }, { "epoch": 0.1960593611134401, "grad_norm": 0.40688739234992816, "learning_rate": 2e-05, "loss": 5.4675, "step": 5846 }, { "epoch": 0.19609289846566613, "grad_norm": 0.38544379253053873, "learning_rate": 2e-05, "loss": 5.4753, "step": 5847 }, { "epoch": 0.1961264358178922, "grad_norm": 0.3958964084287571, "learning_rate": 2e-05, "loss": 5.5324, "step": 5848 }, { "epoch": 0.1961599731701182, "grad_norm": 0.41244172709532156, "learning_rate": 2e-05, "loss": 5.5178, "step": 5849 }, { "epoch": 0.19619351052234427, "grad_norm": 0.4064810511425114, "learning_rate": 2e-05, "loss": 5.4452, "step": 5850 }, { "epoch": 0.1962270478745703, "grad_norm": 0.4353768421337051, "learning_rate": 2e-05, "loss": 5.57, "step": 5851 }, { "epoch": 0.19626058522679635, "grad_norm": 0.40273236425737574, "learning_rate": 2e-05, "loss": 5.4484, "step": 5852 }, { "epoch": 0.19629412257902237, "grad_norm": 0.4303970532267504, "learning_rate": 2e-05, "loss": 5.5437, "step": 5853 }, { "epoch": 0.19632765993124843, "grad_norm": 0.43904913305186294, "learning_rate": 2e-05, "loss": 5.5369, "step": 5854 }, { "epoch": 0.19636119728347448, "grad_norm": 0.41166508813246167, "learning_rate": 2e-05, "loss": 5.5995, "step": 5855 }, { "epoch": 0.1963947346357005, "grad_norm": 0.42183343330128736, "learning_rate": 2e-05, "loss": 5.5264, "step": 5856 }, { "epoch": 0.19642827198792656, "grad_norm": 0.4667791035381346, "learning_rate": 2e-05, "loss": 5.5803, "step": 5857 }, { "epoch": 0.1964618093401526, "grad_norm": 0.4245097520172963, "learning_rate": 2e-05, "loss": 5.5621, "step": 5858 }, { "epoch": 0.19649534669237864, "grad_norm": 0.42369959089907333, "learning_rate": 2e-05, "loss": 5.4302, "step": 5859 }, { "epoch": 0.19652888404460467, "grad_norm": 0.41755986507063647, "learning_rate": 2e-05, "loss": 5.3737, "step": 5860 }, { "epoch": 0.19656242139683072, "grad_norm": 0.4148855014982161, "learning_rate": 2e-05, "loss": 5.5241, "step": 5861 }, { "epoch": 0.19659595874905675, "grad_norm": 0.4226734503360574, "learning_rate": 2e-05, "loss": 5.395, "step": 5862 }, { "epoch": 0.1966294961012828, "grad_norm": 0.4419829074960076, "learning_rate": 2e-05, "loss": 5.6846, "step": 5863 }, { "epoch": 0.19666303345350886, "grad_norm": 0.4292149055899841, "learning_rate": 2e-05, "loss": 5.7037, "step": 5864 }, { "epoch": 0.19669657080573488, "grad_norm": 0.4425735317548673, "learning_rate": 2e-05, "loss": 5.5405, "step": 5865 }, { "epoch": 0.19673010815796094, "grad_norm": 0.4596318382452035, "learning_rate": 2e-05, "loss": 5.464, "step": 5866 }, { "epoch": 0.19676364551018696, "grad_norm": 0.41890708624998274, "learning_rate": 2e-05, "loss": 5.5207, "step": 5867 }, { "epoch": 0.19679718286241302, "grad_norm": 0.39299814456291315, "learning_rate": 2e-05, "loss": 5.4854, "step": 5868 }, { "epoch": 0.19683072021463904, "grad_norm": 0.4618565622301882, "learning_rate": 2e-05, "loss": 5.7779, "step": 5869 }, { "epoch": 0.1968642575668651, "grad_norm": 0.45787709455055753, "learning_rate": 2e-05, "loss": 5.5085, "step": 5870 }, { "epoch": 0.19689779491909115, "grad_norm": 0.4405667775698557, "learning_rate": 2e-05, "loss": 5.4614, "step": 5871 }, { "epoch": 0.19693133227131718, "grad_norm": 0.44787443111299113, "learning_rate": 2e-05, "loss": 5.4736, "step": 5872 }, { "epoch": 0.19696486962354323, "grad_norm": 0.44257400519363177, "learning_rate": 2e-05, "loss": 5.516, "step": 5873 }, { "epoch": 0.19699840697576926, "grad_norm": 0.39448695625813984, "learning_rate": 2e-05, "loss": 5.5457, "step": 5874 }, { "epoch": 0.1970319443279953, "grad_norm": 0.40939138591438123, "learning_rate": 2e-05, "loss": 5.4469, "step": 5875 }, { "epoch": 0.19706548168022134, "grad_norm": 0.43669515788469987, "learning_rate": 2e-05, "loss": 5.3208, "step": 5876 }, { "epoch": 0.1970990190324474, "grad_norm": 0.41489166888681467, "learning_rate": 2e-05, "loss": 5.3388, "step": 5877 }, { "epoch": 0.19713255638467342, "grad_norm": 0.40149027884389105, "learning_rate": 2e-05, "loss": 5.3521, "step": 5878 }, { "epoch": 0.19716609373689947, "grad_norm": 0.43874376272641147, "learning_rate": 2e-05, "loss": 5.5207, "step": 5879 }, { "epoch": 0.19719963108912553, "grad_norm": 0.3992837653755555, "learning_rate": 2e-05, "loss": 5.4992, "step": 5880 }, { "epoch": 0.19723316844135155, "grad_norm": 0.39831865376055925, "learning_rate": 2e-05, "loss": 5.5348, "step": 5881 }, { "epoch": 0.1972667057935776, "grad_norm": 0.40973978726915805, "learning_rate": 2e-05, "loss": 5.5641, "step": 5882 }, { "epoch": 0.19730024314580363, "grad_norm": 0.4489579855662191, "learning_rate": 2e-05, "loss": 5.3448, "step": 5883 }, { "epoch": 0.1973337804980297, "grad_norm": 0.4280447824828359, "learning_rate": 2e-05, "loss": 5.5483, "step": 5884 }, { "epoch": 0.1973673178502557, "grad_norm": 0.4055990166136047, "learning_rate": 2e-05, "loss": 5.552, "step": 5885 }, { "epoch": 0.19740085520248177, "grad_norm": 0.39275356674478556, "learning_rate": 2e-05, "loss": 5.3381, "step": 5886 }, { "epoch": 0.1974343925547078, "grad_norm": 0.42797209497036576, "learning_rate": 2e-05, "loss": 5.6341, "step": 5887 }, { "epoch": 0.19746792990693385, "grad_norm": 0.4143019137496628, "learning_rate": 2e-05, "loss": 5.3775, "step": 5888 }, { "epoch": 0.1975014672591599, "grad_norm": 0.4111187097966472, "learning_rate": 2e-05, "loss": 5.6124, "step": 5889 }, { "epoch": 0.19753500461138593, "grad_norm": 0.38440900394960065, "learning_rate": 2e-05, "loss": 5.6331, "step": 5890 }, { "epoch": 0.19756854196361198, "grad_norm": 0.3949104405495735, "learning_rate": 2e-05, "loss": 5.5181, "step": 5891 }, { "epoch": 0.197602079315838, "grad_norm": 0.4099442365124608, "learning_rate": 2e-05, "loss": 5.4395, "step": 5892 }, { "epoch": 0.19763561666806406, "grad_norm": 0.3993004097574303, "learning_rate": 2e-05, "loss": 5.6996, "step": 5893 }, { "epoch": 0.1976691540202901, "grad_norm": 0.39507887685977755, "learning_rate": 2e-05, "loss": 5.6124, "step": 5894 }, { "epoch": 0.19770269137251614, "grad_norm": 0.40490419649193116, "learning_rate": 2e-05, "loss": 5.3231, "step": 5895 }, { "epoch": 0.19773622872474217, "grad_norm": 0.39760549481711654, "learning_rate": 2e-05, "loss": 5.6445, "step": 5896 }, { "epoch": 0.19776976607696822, "grad_norm": 0.41143051015320625, "learning_rate": 2e-05, "loss": 5.5087, "step": 5897 }, { "epoch": 0.19780330342919428, "grad_norm": 0.4533715554226115, "learning_rate": 2e-05, "loss": 5.4243, "step": 5898 }, { "epoch": 0.1978368407814203, "grad_norm": 0.40523767083822576, "learning_rate": 2e-05, "loss": 5.5236, "step": 5899 }, { "epoch": 0.19787037813364636, "grad_norm": 0.4067609545697054, "learning_rate": 2e-05, "loss": 5.6248, "step": 5900 }, { "epoch": 0.19790391548587238, "grad_norm": 0.42972784861381813, "learning_rate": 2e-05, "loss": 5.5153, "step": 5901 }, { "epoch": 0.19793745283809844, "grad_norm": 0.4151210162656553, "learning_rate": 2e-05, "loss": 5.6787, "step": 5902 }, { "epoch": 0.19797099019032446, "grad_norm": 0.4107329971522196, "learning_rate": 2e-05, "loss": 5.6112, "step": 5903 }, { "epoch": 0.19800452754255052, "grad_norm": 0.4172344419335286, "learning_rate": 2e-05, "loss": 5.5363, "step": 5904 }, { "epoch": 0.19803806489477654, "grad_norm": 0.4461224978508169, "learning_rate": 2e-05, "loss": 5.3766, "step": 5905 }, { "epoch": 0.1980716022470026, "grad_norm": 0.42127920334923924, "learning_rate": 2e-05, "loss": 5.687, "step": 5906 }, { "epoch": 0.19810513959922865, "grad_norm": 0.42975718077687486, "learning_rate": 2e-05, "loss": 5.5382, "step": 5907 }, { "epoch": 0.19813867695145468, "grad_norm": 0.40861463992019736, "learning_rate": 2e-05, "loss": 5.4584, "step": 5908 }, { "epoch": 0.19817221430368073, "grad_norm": 0.44169992689040927, "learning_rate": 2e-05, "loss": 5.5596, "step": 5909 }, { "epoch": 0.19820575165590676, "grad_norm": 0.4395369203035339, "learning_rate": 2e-05, "loss": 5.6224, "step": 5910 }, { "epoch": 0.1982392890081328, "grad_norm": 0.46670360313102316, "learning_rate": 2e-05, "loss": 5.6279, "step": 5911 }, { "epoch": 0.19827282636035884, "grad_norm": 0.4285923652075029, "learning_rate": 2e-05, "loss": 5.3027, "step": 5912 }, { "epoch": 0.1983063637125849, "grad_norm": 0.4144464681007521, "learning_rate": 2e-05, "loss": 5.4269, "step": 5913 }, { "epoch": 0.19833990106481092, "grad_norm": 0.5025712471746233, "learning_rate": 2e-05, "loss": 5.5649, "step": 5914 }, { "epoch": 0.19837343841703697, "grad_norm": 0.4198079691762264, "learning_rate": 2e-05, "loss": 5.622, "step": 5915 }, { "epoch": 0.19840697576926303, "grad_norm": 0.415406276033551, "learning_rate": 2e-05, "loss": 5.4786, "step": 5916 }, { "epoch": 0.19844051312148905, "grad_norm": 0.45500342716530334, "learning_rate": 2e-05, "loss": 5.448, "step": 5917 }, { "epoch": 0.1984740504737151, "grad_norm": 0.41887753144784806, "learning_rate": 2e-05, "loss": 5.5746, "step": 5918 }, { "epoch": 0.19850758782594113, "grad_norm": 0.43335604933779764, "learning_rate": 2e-05, "loss": 5.5912, "step": 5919 }, { "epoch": 0.1985411251781672, "grad_norm": 0.45435691563278063, "learning_rate": 2e-05, "loss": 5.6193, "step": 5920 }, { "epoch": 0.19857466253039321, "grad_norm": 0.45330617495237513, "learning_rate": 2e-05, "loss": 5.5369, "step": 5921 }, { "epoch": 0.19860819988261927, "grad_norm": 0.4340527450667085, "learning_rate": 2e-05, "loss": 5.6573, "step": 5922 }, { "epoch": 0.19864173723484532, "grad_norm": 0.49559260327334487, "learning_rate": 2e-05, "loss": 5.5202, "step": 5923 }, { "epoch": 0.19867527458707135, "grad_norm": 0.4092913816275358, "learning_rate": 2e-05, "loss": 5.4379, "step": 5924 }, { "epoch": 0.1987088119392974, "grad_norm": 0.4390657041616229, "learning_rate": 2e-05, "loss": 5.535, "step": 5925 }, { "epoch": 0.19874234929152343, "grad_norm": 0.39960827061117443, "learning_rate": 2e-05, "loss": 5.6639, "step": 5926 }, { "epoch": 0.19877588664374948, "grad_norm": 0.42375330229835395, "learning_rate": 2e-05, "loss": 5.5382, "step": 5927 }, { "epoch": 0.1988094239959755, "grad_norm": 0.4289154784573178, "learning_rate": 2e-05, "loss": 5.6568, "step": 5928 }, { "epoch": 0.19884296134820156, "grad_norm": 0.4291092956868972, "learning_rate": 2e-05, "loss": 5.4026, "step": 5929 }, { "epoch": 0.1988764987004276, "grad_norm": 0.3977801757358605, "learning_rate": 2e-05, "loss": 5.5471, "step": 5930 }, { "epoch": 0.19891003605265364, "grad_norm": 0.4987862439789891, "learning_rate": 2e-05, "loss": 5.4332, "step": 5931 }, { "epoch": 0.1989435734048797, "grad_norm": 0.4207560009977726, "learning_rate": 2e-05, "loss": 5.6618, "step": 5932 }, { "epoch": 0.19897711075710572, "grad_norm": 0.40405003637676934, "learning_rate": 2e-05, "loss": 5.516, "step": 5933 }, { "epoch": 0.19901064810933178, "grad_norm": 0.43255641780201426, "learning_rate": 2e-05, "loss": 5.5729, "step": 5934 }, { "epoch": 0.1990441854615578, "grad_norm": 0.40128938502854355, "learning_rate": 2e-05, "loss": 5.5545, "step": 5935 }, { "epoch": 0.19907772281378386, "grad_norm": 0.4648188205308441, "learning_rate": 2e-05, "loss": 5.284, "step": 5936 }, { "epoch": 0.19911126016600988, "grad_norm": 0.40889793555414233, "learning_rate": 2e-05, "loss": 5.6578, "step": 5937 }, { "epoch": 0.19914479751823594, "grad_norm": 0.41698606864308824, "learning_rate": 2e-05, "loss": 5.4715, "step": 5938 }, { "epoch": 0.19917833487046196, "grad_norm": 0.4304575774955576, "learning_rate": 2e-05, "loss": 5.6718, "step": 5939 }, { "epoch": 0.19921187222268802, "grad_norm": 0.4392440703700795, "learning_rate": 2e-05, "loss": 5.5469, "step": 5940 }, { "epoch": 0.19924540957491407, "grad_norm": 0.4170222412402659, "learning_rate": 2e-05, "loss": 5.6214, "step": 5941 }, { "epoch": 0.1992789469271401, "grad_norm": 0.42844307250057706, "learning_rate": 2e-05, "loss": 5.5795, "step": 5942 }, { "epoch": 0.19931248427936615, "grad_norm": 0.4132563459164744, "learning_rate": 2e-05, "loss": 5.4444, "step": 5943 }, { "epoch": 0.19934602163159218, "grad_norm": 0.4368359047088253, "learning_rate": 2e-05, "loss": 5.6577, "step": 5944 }, { "epoch": 0.19937955898381823, "grad_norm": 0.4347237693597113, "learning_rate": 2e-05, "loss": 5.5313, "step": 5945 }, { "epoch": 0.19941309633604426, "grad_norm": 0.5071248496398076, "learning_rate": 2e-05, "loss": 5.4784, "step": 5946 }, { "epoch": 0.19944663368827031, "grad_norm": 0.460388519556642, "learning_rate": 2e-05, "loss": 5.5429, "step": 5947 }, { "epoch": 0.19948017104049634, "grad_norm": 0.4281394982143212, "learning_rate": 2e-05, "loss": 5.545, "step": 5948 }, { "epoch": 0.1995137083927224, "grad_norm": 0.45867713688833256, "learning_rate": 2e-05, "loss": 5.5802, "step": 5949 }, { "epoch": 0.19954724574494845, "grad_norm": 0.42204097804080803, "learning_rate": 2e-05, "loss": 5.7375, "step": 5950 }, { "epoch": 0.19958078309717447, "grad_norm": 0.428499620599109, "learning_rate": 2e-05, "loss": 5.7538, "step": 5951 }, { "epoch": 0.19961432044940053, "grad_norm": 0.4599182771190301, "learning_rate": 2e-05, "loss": 5.4322, "step": 5952 }, { "epoch": 0.19964785780162655, "grad_norm": 0.4055600931751596, "learning_rate": 2e-05, "loss": 5.5247, "step": 5953 }, { "epoch": 0.1996813951538526, "grad_norm": 0.43230168447250134, "learning_rate": 2e-05, "loss": 5.5238, "step": 5954 }, { "epoch": 0.19971493250607864, "grad_norm": 0.4854907936249668, "learning_rate": 2e-05, "loss": 5.6393, "step": 5955 }, { "epoch": 0.1997484698583047, "grad_norm": 0.4082204441591155, "learning_rate": 2e-05, "loss": 5.7123, "step": 5956 }, { "epoch": 0.19978200721053072, "grad_norm": 0.4448188280713212, "learning_rate": 2e-05, "loss": 5.8219, "step": 5957 }, { "epoch": 0.19981554456275677, "grad_norm": 0.43762227749716315, "learning_rate": 2e-05, "loss": 5.5374, "step": 5958 }, { "epoch": 0.19984908191498282, "grad_norm": 0.4023315383171217, "learning_rate": 2e-05, "loss": 5.3159, "step": 5959 }, { "epoch": 0.19988261926720885, "grad_norm": 0.3947905002514403, "learning_rate": 2e-05, "loss": 5.5943, "step": 5960 }, { "epoch": 0.1999161566194349, "grad_norm": 0.43568024749655215, "learning_rate": 2e-05, "loss": 5.5547, "step": 5961 }, { "epoch": 0.19994969397166093, "grad_norm": 0.3999014642463519, "learning_rate": 2e-05, "loss": 5.5729, "step": 5962 }, { "epoch": 0.19998323132388698, "grad_norm": 0.39756430507959484, "learning_rate": 2e-05, "loss": 5.6455, "step": 5963 }, { "epoch": 0.200016768676113, "grad_norm": 0.4114806161538018, "learning_rate": 2e-05, "loss": 5.4719, "step": 5964 }, { "epoch": 0.20005030602833906, "grad_norm": 0.4253375220060448, "learning_rate": 2e-05, "loss": 5.8222, "step": 5965 }, { "epoch": 0.2000838433805651, "grad_norm": 0.41328116659705383, "learning_rate": 2e-05, "loss": 5.4406, "step": 5966 }, { "epoch": 0.20011738073279114, "grad_norm": 0.4025350999586575, "learning_rate": 2e-05, "loss": 5.5865, "step": 5967 }, { "epoch": 0.2001509180850172, "grad_norm": 0.4029419929562431, "learning_rate": 2e-05, "loss": 5.5171, "step": 5968 }, { "epoch": 0.20018445543724322, "grad_norm": 0.43003713497134766, "learning_rate": 2e-05, "loss": 5.8118, "step": 5969 }, { "epoch": 0.20021799278946928, "grad_norm": 0.3911510244434747, "learning_rate": 2e-05, "loss": 5.6061, "step": 5970 }, { "epoch": 0.2002515301416953, "grad_norm": 0.38999391019955065, "learning_rate": 2e-05, "loss": 5.5573, "step": 5971 }, { "epoch": 0.20028506749392136, "grad_norm": 0.4188370904944231, "learning_rate": 2e-05, "loss": 5.423, "step": 5972 }, { "epoch": 0.20031860484614739, "grad_norm": 0.41047721192605324, "learning_rate": 2e-05, "loss": 5.6163, "step": 5973 }, { "epoch": 0.20035214219837344, "grad_norm": 0.3844062539503017, "learning_rate": 2e-05, "loss": 5.577, "step": 5974 }, { "epoch": 0.2003856795505995, "grad_norm": 0.40099519766165875, "learning_rate": 2e-05, "loss": 5.56, "step": 5975 }, { "epoch": 0.20041921690282552, "grad_norm": 0.4303123985792166, "learning_rate": 2e-05, "loss": 5.4742, "step": 5976 }, { "epoch": 0.20045275425505157, "grad_norm": 0.40895292373684283, "learning_rate": 2e-05, "loss": 5.4953, "step": 5977 }, { "epoch": 0.2004862916072776, "grad_norm": 0.3950525631568, "learning_rate": 2e-05, "loss": 5.7058, "step": 5978 }, { "epoch": 0.20051982895950365, "grad_norm": 0.42267599738092243, "learning_rate": 2e-05, "loss": 5.5939, "step": 5979 }, { "epoch": 0.20055336631172968, "grad_norm": 0.5101982326789107, "learning_rate": 2e-05, "loss": 5.6719, "step": 5980 }, { "epoch": 0.20058690366395573, "grad_norm": 0.41949593085385023, "learning_rate": 2e-05, "loss": 5.3749, "step": 5981 }, { "epoch": 0.20062044101618176, "grad_norm": 0.4344460021891022, "learning_rate": 2e-05, "loss": 5.5116, "step": 5982 }, { "epoch": 0.20065397836840781, "grad_norm": 0.3953696682392684, "learning_rate": 2e-05, "loss": 5.6802, "step": 5983 }, { "epoch": 0.20068751572063387, "grad_norm": 0.44651665479495256, "learning_rate": 2e-05, "loss": 5.5486, "step": 5984 }, { "epoch": 0.2007210530728599, "grad_norm": 0.45445780579486583, "learning_rate": 2e-05, "loss": 5.4429, "step": 5985 }, { "epoch": 0.20075459042508595, "grad_norm": 0.43455700141239356, "learning_rate": 2e-05, "loss": 5.5536, "step": 5986 }, { "epoch": 0.20078812777731198, "grad_norm": 0.40384564534285233, "learning_rate": 2e-05, "loss": 5.4912, "step": 5987 }, { "epoch": 0.20082166512953803, "grad_norm": 0.44349591730828797, "learning_rate": 2e-05, "loss": 5.4397, "step": 5988 }, { "epoch": 0.20085520248176406, "grad_norm": 0.4176619542612294, "learning_rate": 2e-05, "loss": 5.614, "step": 5989 }, { "epoch": 0.2008887398339901, "grad_norm": 0.43283860180827777, "learning_rate": 2e-05, "loss": 5.3996, "step": 5990 }, { "epoch": 0.20092227718621614, "grad_norm": 0.3960664872261612, "learning_rate": 2e-05, "loss": 5.58, "step": 5991 }, { "epoch": 0.2009558145384422, "grad_norm": 0.40554331469000515, "learning_rate": 2e-05, "loss": 5.5051, "step": 5992 }, { "epoch": 0.20098935189066824, "grad_norm": 0.421309423596072, "learning_rate": 2e-05, "loss": 5.3904, "step": 5993 }, { "epoch": 0.20102288924289427, "grad_norm": 0.4340636800022589, "learning_rate": 2e-05, "loss": 5.701, "step": 5994 }, { "epoch": 0.20105642659512032, "grad_norm": 0.4091439743678592, "learning_rate": 2e-05, "loss": 5.7903, "step": 5995 }, { "epoch": 0.20108996394734635, "grad_norm": 0.45371771150440066, "learning_rate": 2e-05, "loss": 5.4311, "step": 5996 }, { "epoch": 0.2011235012995724, "grad_norm": 0.4112117311123726, "learning_rate": 2e-05, "loss": 5.5624, "step": 5997 }, { "epoch": 0.20115703865179843, "grad_norm": 0.3982662390905758, "learning_rate": 2e-05, "loss": 5.4538, "step": 5998 }, { "epoch": 0.20119057600402449, "grad_norm": 0.4353688478808637, "learning_rate": 2e-05, "loss": 5.4324, "step": 5999 }, { "epoch": 0.2012241133562505, "grad_norm": 0.40117526571943624, "learning_rate": 2e-05, "loss": 5.5032, "step": 6000 }, { "epoch": 0.20125765070847657, "grad_norm": 0.4483083841725656, "learning_rate": 2e-05, "loss": 5.3894, "step": 6001 }, { "epoch": 0.20129118806070262, "grad_norm": 0.38460387997601236, "learning_rate": 2e-05, "loss": 5.4817, "step": 6002 }, { "epoch": 0.20132472541292865, "grad_norm": 0.45665513699133664, "learning_rate": 2e-05, "loss": 5.5791, "step": 6003 }, { "epoch": 0.2013582627651547, "grad_norm": 0.4138310838334034, "learning_rate": 2e-05, "loss": 5.5218, "step": 6004 }, { "epoch": 0.20139180011738073, "grad_norm": 0.4406516812260356, "learning_rate": 2e-05, "loss": 5.5202, "step": 6005 }, { "epoch": 0.20142533746960678, "grad_norm": 0.43837544251693683, "learning_rate": 2e-05, "loss": 5.4528, "step": 6006 }, { "epoch": 0.2014588748218328, "grad_norm": 0.433441115836985, "learning_rate": 2e-05, "loss": 5.7845, "step": 6007 }, { "epoch": 0.20149241217405886, "grad_norm": 0.4394486543768353, "learning_rate": 2e-05, "loss": 5.6944, "step": 6008 }, { "epoch": 0.2015259495262849, "grad_norm": 0.409396480894388, "learning_rate": 2e-05, "loss": 5.3959, "step": 6009 }, { "epoch": 0.20155948687851094, "grad_norm": 0.4155543240515267, "learning_rate": 2e-05, "loss": 5.4409, "step": 6010 }, { "epoch": 0.201593024230737, "grad_norm": 0.4273209697547888, "learning_rate": 2e-05, "loss": 5.7615, "step": 6011 }, { "epoch": 0.20162656158296302, "grad_norm": 0.41411631573860147, "learning_rate": 2e-05, "loss": 5.4603, "step": 6012 }, { "epoch": 0.20166009893518907, "grad_norm": 0.4141550367092287, "learning_rate": 2e-05, "loss": 5.8849, "step": 6013 }, { "epoch": 0.2016936362874151, "grad_norm": 0.40060417411537147, "learning_rate": 2e-05, "loss": 5.5302, "step": 6014 }, { "epoch": 0.20172717363964116, "grad_norm": 0.40554378292144094, "learning_rate": 2e-05, "loss": 5.5116, "step": 6015 }, { "epoch": 0.20176071099186718, "grad_norm": 0.4138378338297434, "learning_rate": 2e-05, "loss": 5.3829, "step": 6016 }, { "epoch": 0.20179424834409324, "grad_norm": 0.4340783009521319, "learning_rate": 2e-05, "loss": 5.6188, "step": 6017 }, { "epoch": 0.20182778569631926, "grad_norm": 0.39570948068875267, "learning_rate": 2e-05, "loss": 5.5695, "step": 6018 }, { "epoch": 0.20186132304854532, "grad_norm": 0.40523167953614103, "learning_rate": 2e-05, "loss": 5.6696, "step": 6019 }, { "epoch": 0.20189486040077137, "grad_norm": 0.4097788520966997, "learning_rate": 2e-05, "loss": 5.6546, "step": 6020 }, { "epoch": 0.2019283977529974, "grad_norm": 0.39036359097213735, "learning_rate": 2e-05, "loss": 5.4158, "step": 6021 }, { "epoch": 0.20196193510522345, "grad_norm": 0.40276005065323983, "learning_rate": 2e-05, "loss": 5.8658, "step": 6022 }, { "epoch": 0.20199547245744948, "grad_norm": 0.39142308851433294, "learning_rate": 2e-05, "loss": 5.5518, "step": 6023 }, { "epoch": 0.20202900980967553, "grad_norm": 0.414766240621844, "learning_rate": 2e-05, "loss": 5.4783, "step": 6024 }, { "epoch": 0.20206254716190156, "grad_norm": 0.404314974383118, "learning_rate": 2e-05, "loss": 5.3221, "step": 6025 }, { "epoch": 0.2020960845141276, "grad_norm": 0.3945251665772677, "learning_rate": 2e-05, "loss": 5.5628, "step": 6026 }, { "epoch": 0.20212962186635366, "grad_norm": 0.4226951798744075, "learning_rate": 2e-05, "loss": 5.4383, "step": 6027 }, { "epoch": 0.2021631592185797, "grad_norm": 0.4117247560695282, "learning_rate": 2e-05, "loss": 5.5937, "step": 6028 }, { "epoch": 0.20219669657080575, "grad_norm": 0.45164844787523234, "learning_rate": 2e-05, "loss": 5.6386, "step": 6029 }, { "epoch": 0.20223023392303177, "grad_norm": 0.38775375234833553, "learning_rate": 2e-05, "loss": 5.5011, "step": 6030 }, { "epoch": 0.20226377127525783, "grad_norm": 0.3892722927169267, "learning_rate": 2e-05, "loss": 5.6209, "step": 6031 }, { "epoch": 0.20229730862748385, "grad_norm": 0.44037844773939516, "learning_rate": 2e-05, "loss": 5.6192, "step": 6032 }, { "epoch": 0.2023308459797099, "grad_norm": 0.43548292375239883, "learning_rate": 2e-05, "loss": 5.5177, "step": 6033 }, { "epoch": 0.20236438333193593, "grad_norm": 0.3985585733134575, "learning_rate": 2e-05, "loss": 5.2792, "step": 6034 }, { "epoch": 0.20239792068416199, "grad_norm": 0.40450394072840024, "learning_rate": 2e-05, "loss": 5.5128, "step": 6035 }, { "epoch": 0.20243145803638804, "grad_norm": 0.4362468159101731, "learning_rate": 2e-05, "loss": 5.5112, "step": 6036 }, { "epoch": 0.20246499538861407, "grad_norm": 0.4190630446889907, "learning_rate": 2e-05, "loss": 5.5756, "step": 6037 }, { "epoch": 0.20249853274084012, "grad_norm": 0.39581416948408815, "learning_rate": 2e-05, "loss": 5.427, "step": 6038 }, { "epoch": 0.20253207009306615, "grad_norm": 0.40372304946280113, "learning_rate": 2e-05, "loss": 5.6574, "step": 6039 }, { "epoch": 0.2025656074452922, "grad_norm": 0.43826095032688894, "learning_rate": 2e-05, "loss": 5.4468, "step": 6040 }, { "epoch": 0.20259914479751823, "grad_norm": 0.4096705309527531, "learning_rate": 2e-05, "loss": 5.4902, "step": 6041 }, { "epoch": 0.20263268214974428, "grad_norm": 0.42147879498201113, "learning_rate": 2e-05, "loss": 5.6584, "step": 6042 }, { "epoch": 0.2026662195019703, "grad_norm": 0.4448286827698877, "learning_rate": 2e-05, "loss": 5.4858, "step": 6043 }, { "epoch": 0.20269975685419636, "grad_norm": 0.4127321094809041, "learning_rate": 2e-05, "loss": 5.5253, "step": 6044 }, { "epoch": 0.20273329420642242, "grad_norm": 0.43814196233083746, "learning_rate": 2e-05, "loss": 5.5857, "step": 6045 }, { "epoch": 0.20276683155864844, "grad_norm": 0.4087249794386802, "learning_rate": 2e-05, "loss": 5.8252, "step": 6046 }, { "epoch": 0.2028003689108745, "grad_norm": 0.4087797541419222, "learning_rate": 2e-05, "loss": 5.4521, "step": 6047 }, { "epoch": 0.20283390626310052, "grad_norm": 0.41639406560023795, "learning_rate": 2e-05, "loss": 5.5827, "step": 6048 }, { "epoch": 0.20286744361532658, "grad_norm": 0.4047910155171215, "learning_rate": 2e-05, "loss": 5.7568, "step": 6049 }, { "epoch": 0.2029009809675526, "grad_norm": 0.38844633322050287, "learning_rate": 2e-05, "loss": 5.6549, "step": 6050 }, { "epoch": 0.20293451831977866, "grad_norm": 0.4306513251613028, "learning_rate": 2e-05, "loss": 5.5993, "step": 6051 }, { "epoch": 0.20296805567200468, "grad_norm": 0.41164499087716433, "learning_rate": 2e-05, "loss": 5.5845, "step": 6052 }, { "epoch": 0.20300159302423074, "grad_norm": 0.4057836054219851, "learning_rate": 2e-05, "loss": 5.6609, "step": 6053 }, { "epoch": 0.2030351303764568, "grad_norm": 0.39365451904352533, "learning_rate": 2e-05, "loss": 5.413, "step": 6054 }, { "epoch": 0.20306866772868282, "grad_norm": 0.41922071808210526, "learning_rate": 2e-05, "loss": 5.5393, "step": 6055 }, { "epoch": 0.20310220508090887, "grad_norm": 0.4219029225273832, "learning_rate": 2e-05, "loss": 5.4267, "step": 6056 }, { "epoch": 0.2031357424331349, "grad_norm": 0.43110197538653705, "learning_rate": 2e-05, "loss": 5.7085, "step": 6057 }, { "epoch": 0.20316927978536095, "grad_norm": 0.4216248996057697, "learning_rate": 2e-05, "loss": 5.5902, "step": 6058 }, { "epoch": 0.20320281713758698, "grad_norm": 0.4200250751045814, "learning_rate": 2e-05, "loss": 5.5156, "step": 6059 }, { "epoch": 0.20323635448981303, "grad_norm": 0.4765239853850767, "learning_rate": 2e-05, "loss": 5.5199, "step": 6060 }, { "epoch": 0.20326989184203906, "grad_norm": 0.4459462913347461, "learning_rate": 2e-05, "loss": 5.6275, "step": 6061 }, { "epoch": 0.2033034291942651, "grad_norm": 0.46095064273307657, "learning_rate": 2e-05, "loss": 5.4158, "step": 6062 }, { "epoch": 0.20333696654649117, "grad_norm": 0.41230854146949014, "learning_rate": 2e-05, "loss": 5.5842, "step": 6063 }, { "epoch": 0.2033705038987172, "grad_norm": 0.47458549086592766, "learning_rate": 2e-05, "loss": 5.6626, "step": 6064 }, { "epoch": 0.20340404125094325, "grad_norm": 0.46853120308323887, "learning_rate": 2e-05, "loss": 5.5484, "step": 6065 }, { "epoch": 0.20343757860316927, "grad_norm": 0.4165784186542839, "learning_rate": 2e-05, "loss": 5.5869, "step": 6066 }, { "epoch": 0.20347111595539533, "grad_norm": 0.43098441170609975, "learning_rate": 2e-05, "loss": 5.5774, "step": 6067 }, { "epoch": 0.20350465330762135, "grad_norm": 0.4406222797247527, "learning_rate": 2e-05, "loss": 5.4026, "step": 6068 }, { "epoch": 0.2035381906598474, "grad_norm": 0.4386053475346928, "learning_rate": 2e-05, "loss": 5.4857, "step": 6069 }, { "epoch": 0.20357172801207343, "grad_norm": 0.41038515345600995, "learning_rate": 2e-05, "loss": 5.6714, "step": 6070 }, { "epoch": 0.2036052653642995, "grad_norm": 0.44752990043500285, "learning_rate": 2e-05, "loss": 5.317, "step": 6071 }, { "epoch": 0.20363880271652554, "grad_norm": 0.4612341712091405, "learning_rate": 2e-05, "loss": 5.5333, "step": 6072 }, { "epoch": 0.20367234006875157, "grad_norm": 0.4198366846405781, "learning_rate": 2e-05, "loss": 5.5058, "step": 6073 }, { "epoch": 0.20370587742097762, "grad_norm": 0.4204708707656554, "learning_rate": 2e-05, "loss": 5.4015, "step": 6074 }, { "epoch": 0.20373941477320365, "grad_norm": 0.41143560121485295, "learning_rate": 2e-05, "loss": 5.6378, "step": 6075 }, { "epoch": 0.2037729521254297, "grad_norm": 0.4207084954913916, "learning_rate": 2e-05, "loss": 5.6225, "step": 6076 }, { "epoch": 0.20380648947765573, "grad_norm": 0.4059105720451941, "learning_rate": 2e-05, "loss": 5.5204, "step": 6077 }, { "epoch": 0.20384002682988178, "grad_norm": 0.44682170078422556, "learning_rate": 2e-05, "loss": 5.5298, "step": 6078 }, { "epoch": 0.20387356418210784, "grad_norm": 0.41587435441306153, "learning_rate": 2e-05, "loss": 5.6236, "step": 6079 }, { "epoch": 0.20390710153433386, "grad_norm": 0.454908101262474, "learning_rate": 2e-05, "loss": 5.4911, "step": 6080 }, { "epoch": 0.20394063888655992, "grad_norm": 0.38873907713257705, "learning_rate": 2e-05, "loss": 5.6568, "step": 6081 }, { "epoch": 0.20397417623878594, "grad_norm": 0.4236619189972051, "learning_rate": 2e-05, "loss": 5.5106, "step": 6082 }, { "epoch": 0.204007713591012, "grad_norm": 0.4460015978458238, "learning_rate": 2e-05, "loss": 5.4901, "step": 6083 }, { "epoch": 0.20404125094323802, "grad_norm": 0.4038544377431419, "learning_rate": 2e-05, "loss": 5.482, "step": 6084 }, { "epoch": 0.20407478829546408, "grad_norm": 0.4090154002622958, "learning_rate": 2e-05, "loss": 5.7085, "step": 6085 }, { "epoch": 0.2041083256476901, "grad_norm": 0.4118046853665766, "learning_rate": 2e-05, "loss": 5.4229, "step": 6086 }, { "epoch": 0.20414186299991616, "grad_norm": 0.41323528412021593, "learning_rate": 2e-05, "loss": 5.5509, "step": 6087 }, { "epoch": 0.2041754003521422, "grad_norm": 0.3919887590218332, "learning_rate": 2e-05, "loss": 5.4121, "step": 6088 }, { "epoch": 0.20420893770436824, "grad_norm": 0.46613841851422244, "learning_rate": 2e-05, "loss": 5.4579, "step": 6089 }, { "epoch": 0.2042424750565943, "grad_norm": 0.4097161099809648, "learning_rate": 2e-05, "loss": 5.5783, "step": 6090 }, { "epoch": 0.20427601240882032, "grad_norm": 0.42637691597263394, "learning_rate": 2e-05, "loss": 5.7407, "step": 6091 }, { "epoch": 0.20430954976104637, "grad_norm": 0.4136178776100978, "learning_rate": 2e-05, "loss": 5.5177, "step": 6092 }, { "epoch": 0.2043430871132724, "grad_norm": 0.42881311860351307, "learning_rate": 2e-05, "loss": 5.6187, "step": 6093 }, { "epoch": 0.20437662446549845, "grad_norm": 0.4385434188236436, "learning_rate": 2e-05, "loss": 5.6684, "step": 6094 }, { "epoch": 0.20441016181772448, "grad_norm": 0.42084067809494935, "learning_rate": 2e-05, "loss": 5.7291, "step": 6095 }, { "epoch": 0.20444369916995053, "grad_norm": 0.40744207274418254, "learning_rate": 2e-05, "loss": 5.5178, "step": 6096 }, { "epoch": 0.2044772365221766, "grad_norm": 0.45340170838212956, "learning_rate": 2e-05, "loss": 5.5009, "step": 6097 }, { "epoch": 0.2045107738744026, "grad_norm": 0.4207197557315151, "learning_rate": 2e-05, "loss": 5.4899, "step": 6098 }, { "epoch": 0.20454431122662867, "grad_norm": 0.3966990457875647, "learning_rate": 2e-05, "loss": 5.5761, "step": 6099 }, { "epoch": 0.2045778485788547, "grad_norm": 0.4198310408372227, "learning_rate": 2e-05, "loss": 5.5935, "step": 6100 }, { "epoch": 0.20461138593108075, "grad_norm": 0.4427722389511519, "learning_rate": 2e-05, "loss": 5.5182, "step": 6101 }, { "epoch": 0.20464492328330677, "grad_norm": 0.43861693562366294, "learning_rate": 2e-05, "loss": 5.3176, "step": 6102 }, { "epoch": 0.20467846063553283, "grad_norm": 0.40992266878799166, "learning_rate": 2e-05, "loss": 5.6784, "step": 6103 }, { "epoch": 0.20471199798775885, "grad_norm": 0.44999625858415687, "learning_rate": 2e-05, "loss": 5.5443, "step": 6104 }, { "epoch": 0.2047455353399849, "grad_norm": 0.40991614853445485, "learning_rate": 2e-05, "loss": 5.6635, "step": 6105 }, { "epoch": 0.20477907269221096, "grad_norm": 0.4432891502161572, "learning_rate": 2e-05, "loss": 5.7805, "step": 6106 }, { "epoch": 0.204812610044437, "grad_norm": 0.41636601972732357, "learning_rate": 2e-05, "loss": 5.3991, "step": 6107 }, { "epoch": 0.20484614739666304, "grad_norm": 0.40355779787452317, "learning_rate": 2e-05, "loss": 5.6187, "step": 6108 }, { "epoch": 0.20487968474888907, "grad_norm": 0.46703217598590924, "learning_rate": 2e-05, "loss": 5.4967, "step": 6109 }, { "epoch": 0.20491322210111512, "grad_norm": 0.4325099410114738, "learning_rate": 2e-05, "loss": 5.7124, "step": 6110 }, { "epoch": 0.20494675945334115, "grad_norm": 0.3942823972425758, "learning_rate": 2e-05, "loss": 5.4474, "step": 6111 }, { "epoch": 0.2049802968055672, "grad_norm": 0.45242690197515417, "learning_rate": 2e-05, "loss": 5.4874, "step": 6112 }, { "epoch": 0.20501383415779323, "grad_norm": 0.4166001425500802, "learning_rate": 2e-05, "loss": 5.6016, "step": 6113 }, { "epoch": 0.20504737151001928, "grad_norm": 0.40193107573569614, "learning_rate": 2e-05, "loss": 5.4423, "step": 6114 }, { "epoch": 0.20508090886224534, "grad_norm": 0.407321774137127, "learning_rate": 2e-05, "loss": 5.5929, "step": 6115 }, { "epoch": 0.20511444621447136, "grad_norm": 0.4114708317250594, "learning_rate": 2e-05, "loss": 5.6455, "step": 6116 }, { "epoch": 0.20514798356669742, "grad_norm": 0.3958283989079531, "learning_rate": 2e-05, "loss": 5.5884, "step": 6117 }, { "epoch": 0.20518152091892344, "grad_norm": 0.45485148557147176, "learning_rate": 2e-05, "loss": 5.54, "step": 6118 }, { "epoch": 0.2052150582711495, "grad_norm": 0.4086371957471192, "learning_rate": 2e-05, "loss": 5.5082, "step": 6119 }, { "epoch": 0.20524859562337552, "grad_norm": 0.4254706356465719, "learning_rate": 2e-05, "loss": 5.6857, "step": 6120 }, { "epoch": 0.20528213297560158, "grad_norm": 0.4230612874189742, "learning_rate": 2e-05, "loss": 5.5124, "step": 6121 }, { "epoch": 0.20531567032782763, "grad_norm": 0.4235051897855058, "learning_rate": 2e-05, "loss": 5.5798, "step": 6122 }, { "epoch": 0.20534920768005366, "grad_norm": 0.4166696338796559, "learning_rate": 2e-05, "loss": 5.4469, "step": 6123 }, { "epoch": 0.2053827450322797, "grad_norm": 0.41647245927100734, "learning_rate": 2e-05, "loss": 5.5597, "step": 6124 }, { "epoch": 0.20541628238450574, "grad_norm": 0.40106786343667133, "learning_rate": 2e-05, "loss": 5.7755, "step": 6125 }, { "epoch": 0.2054498197367318, "grad_norm": 0.40125883041145405, "learning_rate": 2e-05, "loss": 5.6864, "step": 6126 }, { "epoch": 0.20548335708895782, "grad_norm": 0.4060167551273288, "learning_rate": 2e-05, "loss": 5.5175, "step": 6127 }, { "epoch": 0.20551689444118387, "grad_norm": 0.3943089126282064, "learning_rate": 2e-05, "loss": 5.7279, "step": 6128 }, { "epoch": 0.2055504317934099, "grad_norm": 0.40988151650319593, "learning_rate": 2e-05, "loss": 5.798, "step": 6129 }, { "epoch": 0.20558396914563595, "grad_norm": 0.4224596173717434, "learning_rate": 2e-05, "loss": 5.7344, "step": 6130 }, { "epoch": 0.205617506497862, "grad_norm": 0.41650715342235584, "learning_rate": 2e-05, "loss": 5.4205, "step": 6131 }, { "epoch": 0.20565104385008803, "grad_norm": 0.4107699426099042, "learning_rate": 2e-05, "loss": 5.6536, "step": 6132 }, { "epoch": 0.2056845812023141, "grad_norm": 0.44117519738794714, "learning_rate": 2e-05, "loss": 5.51, "step": 6133 }, { "epoch": 0.2057181185545401, "grad_norm": 0.4050372571955054, "learning_rate": 2e-05, "loss": 5.4242, "step": 6134 }, { "epoch": 0.20575165590676617, "grad_norm": 0.44291575596698923, "learning_rate": 2e-05, "loss": 5.8074, "step": 6135 }, { "epoch": 0.2057851932589922, "grad_norm": 0.4427268859012417, "learning_rate": 2e-05, "loss": 5.5907, "step": 6136 }, { "epoch": 0.20581873061121825, "grad_norm": 0.4058769648786659, "learning_rate": 2e-05, "loss": 5.6348, "step": 6137 }, { "epoch": 0.20585226796344427, "grad_norm": 0.4401479348419004, "learning_rate": 2e-05, "loss": 5.6553, "step": 6138 }, { "epoch": 0.20588580531567033, "grad_norm": 0.43849977631413717, "learning_rate": 2e-05, "loss": 5.4346, "step": 6139 }, { "epoch": 0.20591934266789638, "grad_norm": 0.3901201009898754, "learning_rate": 2e-05, "loss": 5.6339, "step": 6140 }, { "epoch": 0.2059528800201224, "grad_norm": 0.4224794373281922, "learning_rate": 2e-05, "loss": 5.5689, "step": 6141 }, { "epoch": 0.20598641737234846, "grad_norm": 0.43816019522526234, "learning_rate": 2e-05, "loss": 5.5301, "step": 6142 }, { "epoch": 0.2060199547245745, "grad_norm": 0.4259283915324506, "learning_rate": 2e-05, "loss": 5.7405, "step": 6143 }, { "epoch": 0.20605349207680054, "grad_norm": 0.3952701473895364, "learning_rate": 2e-05, "loss": 5.4971, "step": 6144 }, { "epoch": 0.20608702942902657, "grad_norm": 0.41216177109205077, "learning_rate": 2e-05, "loss": 5.6063, "step": 6145 }, { "epoch": 0.20612056678125262, "grad_norm": 0.4151340383462027, "learning_rate": 2e-05, "loss": 5.5027, "step": 6146 }, { "epoch": 0.20615410413347865, "grad_norm": 0.39313423169163536, "learning_rate": 2e-05, "loss": 5.6576, "step": 6147 }, { "epoch": 0.2061876414857047, "grad_norm": 0.40185653326001447, "learning_rate": 2e-05, "loss": 5.4035, "step": 6148 }, { "epoch": 0.20622117883793076, "grad_norm": 0.4219315226603934, "learning_rate": 2e-05, "loss": 5.5183, "step": 6149 }, { "epoch": 0.20625471619015678, "grad_norm": 0.41663413694279755, "learning_rate": 2e-05, "loss": 5.3965, "step": 6150 }, { "epoch": 0.20628825354238284, "grad_norm": 0.412795249012436, "learning_rate": 2e-05, "loss": 5.7855, "step": 6151 }, { "epoch": 0.20632179089460886, "grad_norm": 0.40908867414255695, "learning_rate": 2e-05, "loss": 5.5011, "step": 6152 }, { "epoch": 0.20635532824683492, "grad_norm": 0.41121082718670354, "learning_rate": 2e-05, "loss": 5.5592, "step": 6153 }, { "epoch": 0.20638886559906094, "grad_norm": 0.39279479648684373, "learning_rate": 2e-05, "loss": 5.5673, "step": 6154 }, { "epoch": 0.206422402951287, "grad_norm": 0.4240744628461942, "learning_rate": 2e-05, "loss": 5.4322, "step": 6155 }, { "epoch": 0.20645594030351302, "grad_norm": 0.4008202408349246, "learning_rate": 2e-05, "loss": 5.6281, "step": 6156 }, { "epoch": 0.20648947765573908, "grad_norm": 0.42508949740763, "learning_rate": 2e-05, "loss": 5.4649, "step": 6157 }, { "epoch": 0.20652301500796513, "grad_norm": 0.407019489724784, "learning_rate": 2e-05, "loss": 5.5736, "step": 6158 }, { "epoch": 0.20655655236019116, "grad_norm": 0.41214429022554583, "learning_rate": 2e-05, "loss": 5.5556, "step": 6159 }, { "epoch": 0.2065900897124172, "grad_norm": 0.4460383766209075, "learning_rate": 2e-05, "loss": 5.4271, "step": 6160 }, { "epoch": 0.20662362706464324, "grad_norm": 0.4269084864607592, "learning_rate": 2e-05, "loss": 5.4506, "step": 6161 }, { "epoch": 0.2066571644168693, "grad_norm": 0.3929334049197619, "learning_rate": 2e-05, "loss": 5.5316, "step": 6162 }, { "epoch": 0.20669070176909532, "grad_norm": 0.4459859729080603, "learning_rate": 2e-05, "loss": 5.4854, "step": 6163 }, { "epoch": 0.20672423912132137, "grad_norm": 0.4074899595874173, "learning_rate": 2e-05, "loss": 5.3873, "step": 6164 }, { "epoch": 0.2067577764735474, "grad_norm": 0.417644569771157, "learning_rate": 2e-05, "loss": 5.4925, "step": 6165 }, { "epoch": 0.20679131382577345, "grad_norm": 0.4456816082661978, "learning_rate": 2e-05, "loss": 5.7182, "step": 6166 }, { "epoch": 0.2068248511779995, "grad_norm": 0.42025140600192035, "learning_rate": 2e-05, "loss": 5.8088, "step": 6167 }, { "epoch": 0.20685838853022553, "grad_norm": 0.42999019139369654, "learning_rate": 2e-05, "loss": 5.5505, "step": 6168 }, { "epoch": 0.2068919258824516, "grad_norm": 0.4544498080652552, "learning_rate": 2e-05, "loss": 5.5087, "step": 6169 }, { "epoch": 0.20692546323467761, "grad_norm": 0.4774379713755226, "learning_rate": 2e-05, "loss": 5.542, "step": 6170 }, { "epoch": 0.20695900058690367, "grad_norm": 0.3975456035653554, "learning_rate": 2e-05, "loss": 5.5406, "step": 6171 }, { "epoch": 0.2069925379391297, "grad_norm": 0.42370012796497025, "learning_rate": 2e-05, "loss": 5.733, "step": 6172 }, { "epoch": 0.20702607529135575, "grad_norm": 0.43070946696362394, "learning_rate": 2e-05, "loss": 5.7129, "step": 6173 }, { "epoch": 0.2070596126435818, "grad_norm": 0.42056172905433065, "learning_rate": 2e-05, "loss": 5.3967, "step": 6174 }, { "epoch": 0.20709314999580783, "grad_norm": 0.4041837068441284, "learning_rate": 2e-05, "loss": 5.567, "step": 6175 }, { "epoch": 0.20712668734803388, "grad_norm": 0.39506855943538455, "learning_rate": 2e-05, "loss": 5.4663, "step": 6176 }, { "epoch": 0.2071602247002599, "grad_norm": 0.4012581691143563, "learning_rate": 2e-05, "loss": 5.4715, "step": 6177 }, { "epoch": 0.20719376205248596, "grad_norm": 0.4708808144095038, "learning_rate": 2e-05, "loss": 5.6496, "step": 6178 }, { "epoch": 0.207227299404712, "grad_norm": 0.41056799883795375, "learning_rate": 2e-05, "loss": 5.5017, "step": 6179 }, { "epoch": 0.20726083675693804, "grad_norm": 0.42195778872009243, "learning_rate": 2e-05, "loss": 5.5895, "step": 6180 }, { "epoch": 0.20729437410916407, "grad_norm": 0.4354411748598886, "learning_rate": 2e-05, "loss": 5.5056, "step": 6181 }, { "epoch": 0.20732791146139012, "grad_norm": 0.40784981971096584, "learning_rate": 2e-05, "loss": 5.566, "step": 6182 }, { "epoch": 0.20736144881361618, "grad_norm": 0.4244694196734598, "learning_rate": 2e-05, "loss": 5.5242, "step": 6183 }, { "epoch": 0.2073949861658422, "grad_norm": 0.4053700167077527, "learning_rate": 2e-05, "loss": 5.3276, "step": 6184 }, { "epoch": 0.20742852351806826, "grad_norm": 0.44058759819145443, "learning_rate": 2e-05, "loss": 5.6765, "step": 6185 }, { "epoch": 0.20746206087029428, "grad_norm": 0.41619968351479414, "learning_rate": 2e-05, "loss": 5.4834, "step": 6186 }, { "epoch": 0.20749559822252034, "grad_norm": 0.4140577832742815, "learning_rate": 2e-05, "loss": 5.6245, "step": 6187 }, { "epoch": 0.20752913557474637, "grad_norm": 0.4263328594868355, "learning_rate": 2e-05, "loss": 5.7085, "step": 6188 }, { "epoch": 0.20756267292697242, "grad_norm": 0.45208379589877806, "learning_rate": 2e-05, "loss": 5.5963, "step": 6189 }, { "epoch": 0.20759621027919845, "grad_norm": 0.438320763176137, "learning_rate": 2e-05, "loss": 5.5228, "step": 6190 }, { "epoch": 0.2076297476314245, "grad_norm": 0.43475224184938444, "learning_rate": 2e-05, "loss": 5.5857, "step": 6191 }, { "epoch": 0.20766328498365055, "grad_norm": 0.4573857473172549, "learning_rate": 2e-05, "loss": 5.5156, "step": 6192 }, { "epoch": 0.20769682233587658, "grad_norm": 0.4225556505945732, "learning_rate": 2e-05, "loss": 5.6021, "step": 6193 }, { "epoch": 0.20773035968810263, "grad_norm": 0.43620388937525395, "learning_rate": 2e-05, "loss": 5.4749, "step": 6194 }, { "epoch": 0.20776389704032866, "grad_norm": 0.45325260916369353, "learning_rate": 2e-05, "loss": 5.5469, "step": 6195 }, { "epoch": 0.20779743439255471, "grad_norm": 0.4382558966722888, "learning_rate": 2e-05, "loss": 5.5497, "step": 6196 }, { "epoch": 0.20783097174478074, "grad_norm": 0.40978437075963303, "learning_rate": 2e-05, "loss": 5.6948, "step": 6197 }, { "epoch": 0.2078645090970068, "grad_norm": 0.4242609139133173, "learning_rate": 2e-05, "loss": 5.5346, "step": 6198 }, { "epoch": 0.20789804644923282, "grad_norm": 0.44695339113757193, "learning_rate": 2e-05, "loss": 5.5593, "step": 6199 }, { "epoch": 0.20793158380145887, "grad_norm": 0.4019955887998427, "learning_rate": 2e-05, "loss": 5.643, "step": 6200 }, { "epoch": 0.20796512115368493, "grad_norm": 0.4133431062578411, "learning_rate": 2e-05, "loss": 5.5788, "step": 6201 }, { "epoch": 0.20799865850591095, "grad_norm": 0.43581955810310297, "learning_rate": 2e-05, "loss": 5.5479, "step": 6202 }, { "epoch": 0.208032195858137, "grad_norm": 0.390879505769007, "learning_rate": 2e-05, "loss": 5.5509, "step": 6203 }, { "epoch": 0.20806573321036304, "grad_norm": 0.4228128944895384, "learning_rate": 2e-05, "loss": 5.597, "step": 6204 }, { "epoch": 0.2080992705625891, "grad_norm": 0.45492500456474144, "learning_rate": 2e-05, "loss": 5.6972, "step": 6205 }, { "epoch": 0.20813280791481512, "grad_norm": 0.3929425237642684, "learning_rate": 2e-05, "loss": 5.6327, "step": 6206 }, { "epoch": 0.20816634526704117, "grad_norm": 0.40818293720440724, "learning_rate": 2e-05, "loss": 5.7103, "step": 6207 }, { "epoch": 0.2081998826192672, "grad_norm": 0.42549237187095157, "learning_rate": 2e-05, "loss": 5.555, "step": 6208 }, { "epoch": 0.20823341997149325, "grad_norm": 0.44419836617563485, "learning_rate": 2e-05, "loss": 5.4979, "step": 6209 }, { "epoch": 0.2082669573237193, "grad_norm": 0.4055883433698909, "learning_rate": 2e-05, "loss": 5.6334, "step": 6210 }, { "epoch": 0.20830049467594533, "grad_norm": 0.4158728934562631, "learning_rate": 2e-05, "loss": 5.653, "step": 6211 }, { "epoch": 0.20833403202817138, "grad_norm": 0.4244954001815614, "learning_rate": 2e-05, "loss": 5.2786, "step": 6212 }, { "epoch": 0.2083675693803974, "grad_norm": 0.4267787979225505, "learning_rate": 2e-05, "loss": 5.5141, "step": 6213 }, { "epoch": 0.20840110673262346, "grad_norm": 0.40178440690086326, "learning_rate": 2e-05, "loss": 5.6901, "step": 6214 }, { "epoch": 0.2084346440848495, "grad_norm": 0.46812443320947356, "learning_rate": 2e-05, "loss": 5.6437, "step": 6215 }, { "epoch": 0.20846818143707554, "grad_norm": 0.43391698598185313, "learning_rate": 2e-05, "loss": 5.6209, "step": 6216 }, { "epoch": 0.20850171878930157, "grad_norm": 0.39000127483845626, "learning_rate": 2e-05, "loss": 5.8063, "step": 6217 }, { "epoch": 0.20853525614152763, "grad_norm": 0.42712916276727303, "learning_rate": 2e-05, "loss": 5.6749, "step": 6218 }, { "epoch": 0.20856879349375368, "grad_norm": 0.45330424702379346, "learning_rate": 2e-05, "loss": 5.5308, "step": 6219 }, { "epoch": 0.2086023308459797, "grad_norm": 0.3945214736095787, "learning_rate": 2e-05, "loss": 5.6373, "step": 6220 }, { "epoch": 0.20863586819820576, "grad_norm": 0.4676313835896531, "learning_rate": 2e-05, "loss": 5.587, "step": 6221 }, { "epoch": 0.20866940555043179, "grad_norm": 0.42448649925288, "learning_rate": 2e-05, "loss": 5.5021, "step": 6222 }, { "epoch": 0.20870294290265784, "grad_norm": 0.40594963518930477, "learning_rate": 2e-05, "loss": 5.4439, "step": 6223 }, { "epoch": 0.20873648025488387, "grad_norm": 0.41196896772626773, "learning_rate": 2e-05, "loss": 5.6558, "step": 6224 }, { "epoch": 0.20877001760710992, "grad_norm": 0.42096400698335007, "learning_rate": 2e-05, "loss": 5.5045, "step": 6225 }, { "epoch": 0.20880355495933597, "grad_norm": 0.4220649338229826, "learning_rate": 2e-05, "loss": 5.5675, "step": 6226 }, { "epoch": 0.208837092311562, "grad_norm": 0.4171583778669517, "learning_rate": 2e-05, "loss": 5.4814, "step": 6227 }, { "epoch": 0.20887062966378805, "grad_norm": 0.4193953104808689, "learning_rate": 2e-05, "loss": 5.7445, "step": 6228 }, { "epoch": 0.20890416701601408, "grad_norm": 0.42992322510593894, "learning_rate": 2e-05, "loss": 5.4196, "step": 6229 }, { "epoch": 0.20893770436824013, "grad_norm": 0.39709718163540936, "learning_rate": 2e-05, "loss": 5.4535, "step": 6230 }, { "epoch": 0.20897124172046616, "grad_norm": 0.4399835503660976, "learning_rate": 2e-05, "loss": 5.5101, "step": 6231 }, { "epoch": 0.20900477907269222, "grad_norm": 0.44637588088522884, "learning_rate": 2e-05, "loss": 5.5365, "step": 6232 }, { "epoch": 0.20903831642491824, "grad_norm": 0.4370032271276628, "learning_rate": 2e-05, "loss": 5.5202, "step": 6233 }, { "epoch": 0.2090718537771443, "grad_norm": 0.4276363467613641, "learning_rate": 2e-05, "loss": 5.3813, "step": 6234 }, { "epoch": 0.20910539112937035, "grad_norm": 0.41455325304691965, "learning_rate": 2e-05, "loss": 5.6769, "step": 6235 }, { "epoch": 0.20913892848159638, "grad_norm": 0.4061284681851343, "learning_rate": 2e-05, "loss": 5.4873, "step": 6236 }, { "epoch": 0.20917246583382243, "grad_norm": 0.38808456332856656, "learning_rate": 2e-05, "loss": 5.5019, "step": 6237 }, { "epoch": 0.20920600318604846, "grad_norm": 0.3851253179910079, "learning_rate": 2e-05, "loss": 5.51, "step": 6238 }, { "epoch": 0.2092395405382745, "grad_norm": 0.3969860347033866, "learning_rate": 2e-05, "loss": 5.5373, "step": 6239 }, { "epoch": 0.20927307789050054, "grad_norm": 0.4307757539620144, "learning_rate": 2e-05, "loss": 5.594, "step": 6240 }, { "epoch": 0.2093066152427266, "grad_norm": 0.4027555164701322, "learning_rate": 2e-05, "loss": 5.4708, "step": 6241 }, { "epoch": 0.20934015259495262, "grad_norm": 0.38603095109615226, "learning_rate": 2e-05, "loss": 5.4517, "step": 6242 }, { "epoch": 0.20937368994717867, "grad_norm": 0.45408882906202674, "learning_rate": 2e-05, "loss": 5.7683, "step": 6243 }, { "epoch": 0.20940722729940472, "grad_norm": 0.39553836222690736, "learning_rate": 2e-05, "loss": 5.755, "step": 6244 }, { "epoch": 0.20944076465163075, "grad_norm": 0.3991477597570361, "learning_rate": 2e-05, "loss": 5.7223, "step": 6245 }, { "epoch": 0.2094743020038568, "grad_norm": 0.45203880995102297, "learning_rate": 2e-05, "loss": 5.7913, "step": 6246 }, { "epoch": 0.20950783935608283, "grad_norm": 0.41670862994296726, "learning_rate": 2e-05, "loss": 5.5426, "step": 6247 }, { "epoch": 0.20954137670830889, "grad_norm": 0.40590756733992234, "learning_rate": 2e-05, "loss": 5.4084, "step": 6248 }, { "epoch": 0.2095749140605349, "grad_norm": 0.44961769955922015, "learning_rate": 2e-05, "loss": 5.5372, "step": 6249 }, { "epoch": 0.20960845141276097, "grad_norm": 0.4090010309611362, "learning_rate": 2e-05, "loss": 5.6443, "step": 6250 }, { "epoch": 0.209641988764987, "grad_norm": 0.442500088471053, "learning_rate": 2e-05, "loss": 5.4131, "step": 6251 }, { "epoch": 0.20967552611721305, "grad_norm": 0.4077092405225861, "learning_rate": 2e-05, "loss": 5.5747, "step": 6252 }, { "epoch": 0.2097090634694391, "grad_norm": 0.4691435556842563, "learning_rate": 2e-05, "loss": 5.5844, "step": 6253 }, { "epoch": 0.20974260082166513, "grad_norm": 0.45547393549341625, "learning_rate": 2e-05, "loss": 5.622, "step": 6254 }, { "epoch": 0.20977613817389118, "grad_norm": 0.4330295264416765, "learning_rate": 2e-05, "loss": 5.6485, "step": 6255 }, { "epoch": 0.2098096755261172, "grad_norm": 0.41655648592991457, "learning_rate": 2e-05, "loss": 5.6446, "step": 6256 }, { "epoch": 0.20984321287834326, "grad_norm": 0.4301006295474627, "learning_rate": 2e-05, "loss": 5.5124, "step": 6257 }, { "epoch": 0.2098767502305693, "grad_norm": 0.4380005280215203, "learning_rate": 2e-05, "loss": 5.5809, "step": 6258 }, { "epoch": 0.20991028758279534, "grad_norm": 0.41233032897524596, "learning_rate": 2e-05, "loss": 5.3245, "step": 6259 }, { "epoch": 0.20994382493502137, "grad_norm": 0.4126828929495264, "learning_rate": 2e-05, "loss": 5.6816, "step": 6260 }, { "epoch": 0.20997736228724742, "grad_norm": 0.4137523573882179, "learning_rate": 2e-05, "loss": 5.5486, "step": 6261 }, { "epoch": 0.21001089963947348, "grad_norm": 0.4090669571623561, "learning_rate": 2e-05, "loss": 5.353, "step": 6262 }, { "epoch": 0.2100444369916995, "grad_norm": 0.4395811632773946, "learning_rate": 2e-05, "loss": 5.593, "step": 6263 }, { "epoch": 0.21007797434392556, "grad_norm": 0.42882854738327914, "learning_rate": 2e-05, "loss": 5.4841, "step": 6264 }, { "epoch": 0.21011151169615158, "grad_norm": 0.40699892731296144, "learning_rate": 2e-05, "loss": 5.4944, "step": 6265 }, { "epoch": 0.21014504904837764, "grad_norm": 0.43856994206769945, "learning_rate": 2e-05, "loss": 5.6955, "step": 6266 }, { "epoch": 0.21017858640060366, "grad_norm": 0.4253846882760815, "learning_rate": 2e-05, "loss": 5.4011, "step": 6267 }, { "epoch": 0.21021212375282972, "grad_norm": 0.3982079633259018, "learning_rate": 2e-05, "loss": 5.504, "step": 6268 }, { "epoch": 0.21024566110505574, "grad_norm": 0.4132096029273046, "learning_rate": 2e-05, "loss": 5.5521, "step": 6269 }, { "epoch": 0.2102791984572818, "grad_norm": 0.4238583395241385, "learning_rate": 2e-05, "loss": 5.5719, "step": 6270 }, { "epoch": 0.21031273580950785, "grad_norm": 0.3770575069433992, "learning_rate": 2e-05, "loss": 5.6282, "step": 6271 }, { "epoch": 0.21034627316173388, "grad_norm": 0.40889438564620295, "learning_rate": 2e-05, "loss": 5.4677, "step": 6272 }, { "epoch": 0.21037981051395993, "grad_norm": 0.4099618192324312, "learning_rate": 2e-05, "loss": 5.8827, "step": 6273 }, { "epoch": 0.21041334786618596, "grad_norm": 0.42212052564410907, "learning_rate": 2e-05, "loss": 5.5383, "step": 6274 }, { "epoch": 0.210446885218412, "grad_norm": 0.40046215849743877, "learning_rate": 2e-05, "loss": 5.7379, "step": 6275 }, { "epoch": 0.21048042257063804, "grad_norm": 0.41603523680316673, "learning_rate": 2e-05, "loss": 5.8484, "step": 6276 }, { "epoch": 0.2105139599228641, "grad_norm": 0.4228307254676788, "learning_rate": 2e-05, "loss": 5.449, "step": 6277 }, { "epoch": 0.21054749727509015, "grad_norm": 0.41537831037899814, "learning_rate": 2e-05, "loss": 5.537, "step": 6278 }, { "epoch": 0.21058103462731617, "grad_norm": 0.4159914054283428, "learning_rate": 2e-05, "loss": 5.4418, "step": 6279 }, { "epoch": 0.21061457197954223, "grad_norm": 0.40575984561434236, "learning_rate": 2e-05, "loss": 5.6063, "step": 6280 }, { "epoch": 0.21064810933176825, "grad_norm": 0.41094277558217146, "learning_rate": 2e-05, "loss": 5.559, "step": 6281 }, { "epoch": 0.2106816466839943, "grad_norm": 0.42753554107954594, "learning_rate": 2e-05, "loss": 5.5132, "step": 6282 }, { "epoch": 0.21071518403622033, "grad_norm": 0.41955029947771644, "learning_rate": 2e-05, "loss": 5.4337, "step": 6283 }, { "epoch": 0.2107487213884464, "grad_norm": 0.3969973707377389, "learning_rate": 2e-05, "loss": 5.4563, "step": 6284 }, { "epoch": 0.2107822587406724, "grad_norm": 0.4125405161513492, "learning_rate": 2e-05, "loss": 5.3805, "step": 6285 }, { "epoch": 0.21081579609289847, "grad_norm": 0.4113751359164881, "learning_rate": 2e-05, "loss": 5.5188, "step": 6286 }, { "epoch": 0.21084933344512452, "grad_norm": 0.41544241645461616, "learning_rate": 2e-05, "loss": 5.7363, "step": 6287 }, { "epoch": 0.21088287079735055, "grad_norm": 0.413049354610662, "learning_rate": 2e-05, "loss": 5.5157, "step": 6288 }, { "epoch": 0.2109164081495766, "grad_norm": 0.4412376855651565, "learning_rate": 2e-05, "loss": 5.6033, "step": 6289 }, { "epoch": 0.21094994550180263, "grad_norm": 0.4071687777049574, "learning_rate": 2e-05, "loss": 5.569, "step": 6290 }, { "epoch": 0.21098348285402868, "grad_norm": 0.4252560527855679, "learning_rate": 2e-05, "loss": 5.5524, "step": 6291 }, { "epoch": 0.2110170202062547, "grad_norm": 0.4528427489677014, "learning_rate": 2e-05, "loss": 5.6692, "step": 6292 }, { "epoch": 0.21105055755848076, "grad_norm": 0.443433948260257, "learning_rate": 2e-05, "loss": 5.5671, "step": 6293 }, { "epoch": 0.2110840949107068, "grad_norm": 0.38869146619267764, "learning_rate": 2e-05, "loss": 5.8117, "step": 6294 }, { "epoch": 0.21111763226293284, "grad_norm": 0.4266414548643382, "learning_rate": 2e-05, "loss": 5.564, "step": 6295 }, { "epoch": 0.2111511696151589, "grad_norm": 0.40588785084142637, "learning_rate": 2e-05, "loss": 5.4752, "step": 6296 }, { "epoch": 0.21118470696738492, "grad_norm": 0.40138411757730863, "learning_rate": 2e-05, "loss": 5.5025, "step": 6297 }, { "epoch": 0.21121824431961098, "grad_norm": 0.43533776861119, "learning_rate": 2e-05, "loss": 5.5668, "step": 6298 }, { "epoch": 0.211251781671837, "grad_norm": 0.4121922715970318, "learning_rate": 2e-05, "loss": 5.5119, "step": 6299 }, { "epoch": 0.21128531902406306, "grad_norm": 0.41168428016393105, "learning_rate": 2e-05, "loss": 5.5312, "step": 6300 }, { "epoch": 0.21131885637628908, "grad_norm": 0.41453768142498565, "learning_rate": 2e-05, "loss": 5.4722, "step": 6301 }, { "epoch": 0.21135239372851514, "grad_norm": 0.3742057240235668, "learning_rate": 2e-05, "loss": 5.6584, "step": 6302 }, { "epoch": 0.21138593108074116, "grad_norm": 0.4226346946423929, "learning_rate": 2e-05, "loss": 5.5439, "step": 6303 }, { "epoch": 0.21141946843296722, "grad_norm": 0.41092366646082795, "learning_rate": 2e-05, "loss": 5.638, "step": 6304 }, { "epoch": 0.21145300578519327, "grad_norm": 0.3984599150400232, "learning_rate": 2e-05, "loss": 5.5383, "step": 6305 }, { "epoch": 0.2114865431374193, "grad_norm": 0.3960508735320022, "learning_rate": 2e-05, "loss": 5.8181, "step": 6306 }, { "epoch": 0.21152008048964535, "grad_norm": 0.441291084242581, "learning_rate": 2e-05, "loss": 5.5785, "step": 6307 }, { "epoch": 0.21155361784187138, "grad_norm": 0.4025657001423715, "learning_rate": 2e-05, "loss": 5.5494, "step": 6308 }, { "epoch": 0.21158715519409743, "grad_norm": 0.4098698676887577, "learning_rate": 2e-05, "loss": 5.4897, "step": 6309 }, { "epoch": 0.21162069254632346, "grad_norm": 0.40040274488027516, "learning_rate": 2e-05, "loss": 5.6414, "step": 6310 }, { "epoch": 0.2116542298985495, "grad_norm": 0.3847435532100315, "learning_rate": 2e-05, "loss": 5.5637, "step": 6311 }, { "epoch": 0.21168776725077554, "grad_norm": 0.40543028021897526, "learning_rate": 2e-05, "loss": 5.4951, "step": 6312 }, { "epoch": 0.2117213046030016, "grad_norm": 0.4040664117417693, "learning_rate": 2e-05, "loss": 5.6382, "step": 6313 }, { "epoch": 0.21175484195522765, "grad_norm": 0.39345535399242515, "learning_rate": 2e-05, "loss": 5.5981, "step": 6314 }, { "epoch": 0.21178837930745367, "grad_norm": 0.4018324243452872, "learning_rate": 2e-05, "loss": 5.8006, "step": 6315 }, { "epoch": 0.21182191665967973, "grad_norm": 0.4488527305772551, "learning_rate": 2e-05, "loss": 5.5745, "step": 6316 }, { "epoch": 0.21185545401190575, "grad_norm": 0.3999408903495995, "learning_rate": 2e-05, "loss": 5.5524, "step": 6317 }, { "epoch": 0.2118889913641318, "grad_norm": 0.3992973277815785, "learning_rate": 2e-05, "loss": 5.657, "step": 6318 }, { "epoch": 0.21192252871635783, "grad_norm": 0.41971297616603953, "learning_rate": 2e-05, "loss": 5.3751, "step": 6319 }, { "epoch": 0.2119560660685839, "grad_norm": 0.4049362226327207, "learning_rate": 2e-05, "loss": 5.5616, "step": 6320 }, { "epoch": 0.2119896034208099, "grad_norm": 0.4563868821437798, "learning_rate": 2e-05, "loss": 5.4682, "step": 6321 }, { "epoch": 0.21202314077303597, "grad_norm": 0.41616155830897744, "learning_rate": 2e-05, "loss": 5.4089, "step": 6322 }, { "epoch": 0.21205667812526202, "grad_norm": 0.4321841541433287, "learning_rate": 2e-05, "loss": 5.4546, "step": 6323 }, { "epoch": 0.21209021547748805, "grad_norm": 0.40607498554314303, "learning_rate": 2e-05, "loss": 5.3869, "step": 6324 }, { "epoch": 0.2121237528297141, "grad_norm": 0.40743254724530104, "learning_rate": 2e-05, "loss": 5.557, "step": 6325 }, { "epoch": 0.21215729018194013, "grad_norm": 0.44440662233892464, "learning_rate": 2e-05, "loss": 5.3542, "step": 6326 }, { "epoch": 0.21219082753416618, "grad_norm": 0.45848005371527, "learning_rate": 2e-05, "loss": 5.6129, "step": 6327 }, { "epoch": 0.2122243648863922, "grad_norm": 0.41549655452908, "learning_rate": 2e-05, "loss": 5.7048, "step": 6328 }, { "epoch": 0.21225790223861826, "grad_norm": 0.45434542242714754, "learning_rate": 2e-05, "loss": 5.4746, "step": 6329 }, { "epoch": 0.21229143959084432, "grad_norm": 0.42266359630220135, "learning_rate": 2e-05, "loss": 5.5199, "step": 6330 }, { "epoch": 0.21232497694307034, "grad_norm": 0.40280077654975605, "learning_rate": 2e-05, "loss": 5.4214, "step": 6331 }, { "epoch": 0.2123585142952964, "grad_norm": 0.40470706292234454, "learning_rate": 2e-05, "loss": 5.3501, "step": 6332 }, { "epoch": 0.21239205164752242, "grad_norm": 0.422734031613085, "learning_rate": 2e-05, "loss": 5.6623, "step": 6333 }, { "epoch": 0.21242558899974848, "grad_norm": 0.4117304777358191, "learning_rate": 2e-05, "loss": 5.6908, "step": 6334 }, { "epoch": 0.2124591263519745, "grad_norm": 0.42644532998398876, "learning_rate": 2e-05, "loss": 5.67, "step": 6335 }, { "epoch": 0.21249266370420056, "grad_norm": 0.4111209508614782, "learning_rate": 2e-05, "loss": 5.6662, "step": 6336 }, { "epoch": 0.21252620105642658, "grad_norm": 0.38754990530362554, "learning_rate": 2e-05, "loss": 5.5161, "step": 6337 }, { "epoch": 0.21255973840865264, "grad_norm": 0.42002402178316, "learning_rate": 2e-05, "loss": 5.4721, "step": 6338 }, { "epoch": 0.2125932757608787, "grad_norm": 0.4102308444346176, "learning_rate": 2e-05, "loss": 5.6958, "step": 6339 }, { "epoch": 0.21262681311310472, "grad_norm": 0.4357943808417423, "learning_rate": 2e-05, "loss": 5.5225, "step": 6340 }, { "epoch": 0.21266035046533077, "grad_norm": 0.43294110505768163, "learning_rate": 2e-05, "loss": 5.3613, "step": 6341 }, { "epoch": 0.2126938878175568, "grad_norm": 0.4109843376494709, "learning_rate": 2e-05, "loss": 5.4196, "step": 6342 }, { "epoch": 0.21272742516978285, "grad_norm": 0.4057866190222064, "learning_rate": 2e-05, "loss": 5.3707, "step": 6343 }, { "epoch": 0.21276096252200888, "grad_norm": 0.5127105728044541, "learning_rate": 2e-05, "loss": 5.4626, "step": 6344 }, { "epoch": 0.21279449987423493, "grad_norm": 0.4079591449709264, "learning_rate": 2e-05, "loss": 5.637, "step": 6345 }, { "epoch": 0.21282803722646096, "grad_norm": 0.42909813675633884, "learning_rate": 2e-05, "loss": 5.4181, "step": 6346 }, { "epoch": 0.212861574578687, "grad_norm": 0.40878277707611266, "learning_rate": 2e-05, "loss": 5.6379, "step": 6347 }, { "epoch": 0.21289511193091307, "grad_norm": 0.4158871840668602, "learning_rate": 2e-05, "loss": 5.6403, "step": 6348 }, { "epoch": 0.2129286492831391, "grad_norm": 0.41558613994736254, "learning_rate": 2e-05, "loss": 5.5111, "step": 6349 }, { "epoch": 0.21296218663536515, "grad_norm": 0.40879287564522493, "learning_rate": 2e-05, "loss": 5.5239, "step": 6350 }, { "epoch": 0.21299572398759117, "grad_norm": 0.41753408037336964, "learning_rate": 2e-05, "loss": 5.5353, "step": 6351 }, { "epoch": 0.21302926133981723, "grad_norm": 0.4224810092993957, "learning_rate": 2e-05, "loss": 5.7681, "step": 6352 }, { "epoch": 0.21306279869204325, "grad_norm": 0.4447134565566554, "learning_rate": 2e-05, "loss": 5.5365, "step": 6353 }, { "epoch": 0.2130963360442693, "grad_norm": 0.39614017881665803, "learning_rate": 2e-05, "loss": 5.5274, "step": 6354 }, { "epoch": 0.21312987339649533, "grad_norm": 0.39756850708696573, "learning_rate": 2e-05, "loss": 5.6954, "step": 6355 }, { "epoch": 0.2131634107487214, "grad_norm": 0.4121703898328387, "learning_rate": 2e-05, "loss": 5.4364, "step": 6356 }, { "epoch": 0.21319694810094744, "grad_norm": 0.3942387981863863, "learning_rate": 2e-05, "loss": 5.4792, "step": 6357 }, { "epoch": 0.21323048545317347, "grad_norm": 0.4079317552870825, "learning_rate": 2e-05, "loss": 5.5344, "step": 6358 }, { "epoch": 0.21326402280539952, "grad_norm": 0.4469584954403291, "learning_rate": 2e-05, "loss": 5.3933, "step": 6359 }, { "epoch": 0.21329756015762555, "grad_norm": 0.43569901648778253, "learning_rate": 2e-05, "loss": 5.3802, "step": 6360 }, { "epoch": 0.2133310975098516, "grad_norm": 0.41080468227767336, "learning_rate": 2e-05, "loss": 5.6432, "step": 6361 }, { "epoch": 0.21336463486207763, "grad_norm": 0.40785996233447863, "learning_rate": 2e-05, "loss": 5.5986, "step": 6362 }, { "epoch": 0.21339817221430368, "grad_norm": 0.46633186890191247, "learning_rate": 2e-05, "loss": 5.6413, "step": 6363 }, { "epoch": 0.2134317095665297, "grad_norm": 0.4061332059129573, "learning_rate": 2e-05, "loss": 5.4955, "step": 6364 }, { "epoch": 0.21346524691875576, "grad_norm": 0.4023591811548727, "learning_rate": 2e-05, "loss": 5.6606, "step": 6365 }, { "epoch": 0.21349878427098182, "grad_norm": 0.43871014806107933, "learning_rate": 2e-05, "loss": 5.7277, "step": 6366 }, { "epoch": 0.21353232162320784, "grad_norm": 0.4168094556359876, "learning_rate": 2e-05, "loss": 5.5785, "step": 6367 }, { "epoch": 0.2135658589754339, "grad_norm": 0.3969189541195239, "learning_rate": 2e-05, "loss": 5.555, "step": 6368 }, { "epoch": 0.21359939632765992, "grad_norm": 0.4285215666541663, "learning_rate": 2e-05, "loss": 5.6117, "step": 6369 }, { "epoch": 0.21363293367988598, "grad_norm": 0.4267887752479216, "learning_rate": 2e-05, "loss": 5.5193, "step": 6370 }, { "epoch": 0.213666471032112, "grad_norm": 0.4451509637107159, "learning_rate": 2e-05, "loss": 5.5854, "step": 6371 }, { "epoch": 0.21370000838433806, "grad_norm": 0.4574548897520437, "learning_rate": 2e-05, "loss": 5.5104, "step": 6372 }, { "epoch": 0.21373354573656408, "grad_norm": 0.4155745400645231, "learning_rate": 2e-05, "loss": 5.4727, "step": 6373 }, { "epoch": 0.21376708308879014, "grad_norm": 0.4601830417464352, "learning_rate": 2e-05, "loss": 5.5634, "step": 6374 }, { "epoch": 0.2138006204410162, "grad_norm": 0.40730300188539303, "learning_rate": 2e-05, "loss": 5.6672, "step": 6375 }, { "epoch": 0.21383415779324222, "grad_norm": 0.45352858910040017, "learning_rate": 2e-05, "loss": 5.649, "step": 6376 }, { "epoch": 0.21386769514546827, "grad_norm": 0.41725996435421703, "learning_rate": 2e-05, "loss": 5.5261, "step": 6377 }, { "epoch": 0.2139012324976943, "grad_norm": 0.3952162202672676, "learning_rate": 2e-05, "loss": 5.6138, "step": 6378 }, { "epoch": 0.21393476984992035, "grad_norm": 0.41473762606884934, "learning_rate": 2e-05, "loss": 5.5819, "step": 6379 }, { "epoch": 0.21396830720214638, "grad_norm": 0.48360076288022513, "learning_rate": 2e-05, "loss": 5.3345, "step": 6380 }, { "epoch": 0.21400184455437243, "grad_norm": 0.42818903730076097, "learning_rate": 2e-05, "loss": 5.6463, "step": 6381 }, { "epoch": 0.2140353819065985, "grad_norm": 0.45694233941525403, "learning_rate": 2e-05, "loss": 5.6069, "step": 6382 }, { "epoch": 0.2140689192588245, "grad_norm": 0.4056276738055149, "learning_rate": 2e-05, "loss": 5.5571, "step": 6383 }, { "epoch": 0.21410245661105057, "grad_norm": 0.42374259417347493, "learning_rate": 2e-05, "loss": 5.5557, "step": 6384 }, { "epoch": 0.2141359939632766, "grad_norm": 0.4847113058119507, "learning_rate": 2e-05, "loss": 5.5988, "step": 6385 }, { "epoch": 0.21416953131550265, "grad_norm": 0.44572616385666103, "learning_rate": 2e-05, "loss": 5.6623, "step": 6386 }, { "epoch": 0.21420306866772867, "grad_norm": 0.42927781833444356, "learning_rate": 2e-05, "loss": 5.5066, "step": 6387 }, { "epoch": 0.21423660601995473, "grad_norm": 0.4367495546684468, "learning_rate": 2e-05, "loss": 5.449, "step": 6388 }, { "epoch": 0.21427014337218075, "grad_norm": 0.43683306557762275, "learning_rate": 2e-05, "loss": 5.8164, "step": 6389 }, { "epoch": 0.2143036807244068, "grad_norm": 0.43447011910680916, "learning_rate": 2e-05, "loss": 5.4141, "step": 6390 }, { "epoch": 0.21433721807663286, "grad_norm": 0.42878922879443976, "learning_rate": 2e-05, "loss": 5.4662, "step": 6391 }, { "epoch": 0.2143707554288589, "grad_norm": 0.41899185229359076, "learning_rate": 2e-05, "loss": 5.6609, "step": 6392 }, { "epoch": 0.21440429278108494, "grad_norm": 0.4531126214736824, "learning_rate": 2e-05, "loss": 5.5579, "step": 6393 }, { "epoch": 0.21443783013331097, "grad_norm": 0.42491759969023124, "learning_rate": 2e-05, "loss": 5.8183, "step": 6394 }, { "epoch": 0.21447136748553702, "grad_norm": 0.4178864433857018, "learning_rate": 2e-05, "loss": 5.686, "step": 6395 }, { "epoch": 0.21450490483776305, "grad_norm": 0.39865787868748986, "learning_rate": 2e-05, "loss": 5.4509, "step": 6396 }, { "epoch": 0.2145384421899891, "grad_norm": 0.43596476166336556, "learning_rate": 2e-05, "loss": 5.4743, "step": 6397 }, { "epoch": 0.21457197954221513, "grad_norm": 0.432532396782672, "learning_rate": 2e-05, "loss": 5.7651, "step": 6398 }, { "epoch": 0.21460551689444118, "grad_norm": 0.4212434641800118, "learning_rate": 2e-05, "loss": 5.4489, "step": 6399 }, { "epoch": 0.21463905424666724, "grad_norm": 0.428181168113188, "learning_rate": 2e-05, "loss": 5.5371, "step": 6400 }, { "epoch": 0.21467259159889326, "grad_norm": 0.41647829019250715, "learning_rate": 2e-05, "loss": 5.744, "step": 6401 }, { "epoch": 0.21470612895111932, "grad_norm": 0.44745600976125555, "learning_rate": 2e-05, "loss": 5.4748, "step": 6402 }, { "epoch": 0.21473966630334534, "grad_norm": 0.4225927202460737, "learning_rate": 2e-05, "loss": 5.5156, "step": 6403 }, { "epoch": 0.2147732036555714, "grad_norm": 0.44726847144984844, "learning_rate": 2e-05, "loss": 5.5027, "step": 6404 }, { "epoch": 0.21480674100779742, "grad_norm": 0.39938445586218685, "learning_rate": 2e-05, "loss": 5.5044, "step": 6405 }, { "epoch": 0.21484027836002348, "grad_norm": 0.41970801143911635, "learning_rate": 2e-05, "loss": 5.6857, "step": 6406 }, { "epoch": 0.2148738157122495, "grad_norm": 0.4182338570191476, "learning_rate": 2e-05, "loss": 5.4336, "step": 6407 }, { "epoch": 0.21490735306447556, "grad_norm": 0.4178303172256174, "learning_rate": 2e-05, "loss": 5.4607, "step": 6408 }, { "epoch": 0.2149408904167016, "grad_norm": 0.41789601717208136, "learning_rate": 2e-05, "loss": 5.4032, "step": 6409 }, { "epoch": 0.21497442776892764, "grad_norm": 0.43993032564461604, "learning_rate": 2e-05, "loss": 5.3036, "step": 6410 }, { "epoch": 0.2150079651211537, "grad_norm": 0.4080356616232663, "learning_rate": 2e-05, "loss": 5.6436, "step": 6411 }, { "epoch": 0.21504150247337972, "grad_norm": 0.4201139708257042, "learning_rate": 2e-05, "loss": 5.576, "step": 6412 }, { "epoch": 0.21507503982560577, "grad_norm": 0.47229246078593123, "learning_rate": 2e-05, "loss": 5.5131, "step": 6413 }, { "epoch": 0.2151085771778318, "grad_norm": 0.4184357405770339, "learning_rate": 2e-05, "loss": 5.7594, "step": 6414 }, { "epoch": 0.21514211453005785, "grad_norm": 0.4381279989641913, "learning_rate": 2e-05, "loss": 5.3257, "step": 6415 }, { "epoch": 0.21517565188228388, "grad_norm": 0.3992409790526948, "learning_rate": 2e-05, "loss": 5.5838, "step": 6416 }, { "epoch": 0.21520918923450993, "grad_norm": 0.4276767510645074, "learning_rate": 2e-05, "loss": 5.4926, "step": 6417 }, { "epoch": 0.215242726586736, "grad_norm": 0.4002001660616871, "learning_rate": 2e-05, "loss": 5.6413, "step": 6418 }, { "epoch": 0.21527626393896201, "grad_norm": 0.398380589257899, "learning_rate": 2e-05, "loss": 5.6258, "step": 6419 }, { "epoch": 0.21530980129118807, "grad_norm": 0.4008506677913928, "learning_rate": 2e-05, "loss": 5.425, "step": 6420 }, { "epoch": 0.2153433386434141, "grad_norm": 0.437844657854862, "learning_rate": 2e-05, "loss": 5.6536, "step": 6421 }, { "epoch": 0.21537687599564015, "grad_norm": 0.42993379563445205, "learning_rate": 2e-05, "loss": 5.7342, "step": 6422 }, { "epoch": 0.21541041334786618, "grad_norm": 0.42197341897185053, "learning_rate": 2e-05, "loss": 5.7588, "step": 6423 }, { "epoch": 0.21544395070009223, "grad_norm": 0.4002381965727696, "learning_rate": 2e-05, "loss": 5.7205, "step": 6424 }, { "epoch": 0.21547748805231826, "grad_norm": 0.412120509296118, "learning_rate": 2e-05, "loss": 5.528, "step": 6425 }, { "epoch": 0.2155110254045443, "grad_norm": 0.42757199851434907, "learning_rate": 2e-05, "loss": 5.6109, "step": 6426 }, { "epoch": 0.21554456275677036, "grad_norm": 0.41275682566857064, "learning_rate": 2e-05, "loss": 5.4346, "step": 6427 }, { "epoch": 0.2155781001089964, "grad_norm": 0.4238450574927966, "learning_rate": 2e-05, "loss": 5.3522, "step": 6428 }, { "epoch": 0.21561163746122244, "grad_norm": 0.40294721993233656, "learning_rate": 2e-05, "loss": 5.7465, "step": 6429 }, { "epoch": 0.21564517481344847, "grad_norm": 0.4021774905069276, "learning_rate": 2e-05, "loss": 5.5493, "step": 6430 }, { "epoch": 0.21567871216567452, "grad_norm": 0.400015453202559, "learning_rate": 2e-05, "loss": 5.6465, "step": 6431 }, { "epoch": 0.21571224951790055, "grad_norm": 0.4165089104558145, "learning_rate": 2e-05, "loss": 5.4981, "step": 6432 }, { "epoch": 0.2157457868701266, "grad_norm": 0.4228174141709622, "learning_rate": 2e-05, "loss": 5.6427, "step": 6433 }, { "epoch": 0.21577932422235266, "grad_norm": 0.3982079124250789, "learning_rate": 2e-05, "loss": 5.587, "step": 6434 }, { "epoch": 0.21581286157457868, "grad_norm": 0.44649876425349455, "learning_rate": 2e-05, "loss": 5.5229, "step": 6435 }, { "epoch": 0.21584639892680474, "grad_norm": 0.4386640027283794, "learning_rate": 2e-05, "loss": 5.5665, "step": 6436 }, { "epoch": 0.21587993627903077, "grad_norm": 0.4038534346762183, "learning_rate": 2e-05, "loss": 5.5075, "step": 6437 }, { "epoch": 0.21591347363125682, "grad_norm": 0.39925647795456765, "learning_rate": 2e-05, "loss": 5.6595, "step": 6438 }, { "epoch": 0.21594701098348285, "grad_norm": 0.3964450923091322, "learning_rate": 2e-05, "loss": 5.2327, "step": 6439 }, { "epoch": 0.2159805483357089, "grad_norm": 0.4099364770792512, "learning_rate": 2e-05, "loss": 5.643, "step": 6440 }, { "epoch": 0.21601408568793493, "grad_norm": 0.409082976316479, "learning_rate": 2e-05, "loss": 5.6304, "step": 6441 }, { "epoch": 0.21604762304016098, "grad_norm": 0.4207459845408582, "learning_rate": 2e-05, "loss": 5.8362, "step": 6442 }, { "epoch": 0.21608116039238703, "grad_norm": 0.3944055437281195, "learning_rate": 2e-05, "loss": 5.6994, "step": 6443 }, { "epoch": 0.21611469774461306, "grad_norm": 0.4255410184644629, "learning_rate": 2e-05, "loss": 5.6115, "step": 6444 }, { "epoch": 0.21614823509683911, "grad_norm": 0.41095614925031015, "learning_rate": 2e-05, "loss": 5.5431, "step": 6445 }, { "epoch": 0.21618177244906514, "grad_norm": 0.41679314643631743, "learning_rate": 2e-05, "loss": 5.3985, "step": 6446 }, { "epoch": 0.2162153098012912, "grad_norm": 0.4021042318314482, "learning_rate": 2e-05, "loss": 5.601, "step": 6447 }, { "epoch": 0.21624884715351722, "grad_norm": 0.41293139009786456, "learning_rate": 2e-05, "loss": 5.4036, "step": 6448 }, { "epoch": 0.21628238450574327, "grad_norm": 0.4405383376733375, "learning_rate": 2e-05, "loss": 5.5771, "step": 6449 }, { "epoch": 0.2163159218579693, "grad_norm": 0.4003962639020606, "learning_rate": 2e-05, "loss": 5.5096, "step": 6450 }, { "epoch": 0.21634945921019536, "grad_norm": 0.4075569616589043, "learning_rate": 2e-05, "loss": 5.5021, "step": 6451 }, { "epoch": 0.2163829965624214, "grad_norm": 0.39963186592119493, "learning_rate": 2e-05, "loss": 5.5442, "step": 6452 }, { "epoch": 0.21641653391464744, "grad_norm": 0.42632106447645235, "learning_rate": 2e-05, "loss": 5.7428, "step": 6453 }, { "epoch": 0.2164500712668735, "grad_norm": 0.407698120690841, "learning_rate": 2e-05, "loss": 5.5242, "step": 6454 }, { "epoch": 0.21648360861909952, "grad_norm": 0.3991442605278535, "learning_rate": 2e-05, "loss": 5.3097, "step": 6455 }, { "epoch": 0.21651714597132557, "grad_norm": 0.4023274516244714, "learning_rate": 2e-05, "loss": 5.5823, "step": 6456 }, { "epoch": 0.2165506833235516, "grad_norm": 0.3970339342065568, "learning_rate": 2e-05, "loss": 5.4404, "step": 6457 }, { "epoch": 0.21658422067577765, "grad_norm": 0.43865751421283866, "learning_rate": 2e-05, "loss": 5.708, "step": 6458 }, { "epoch": 0.21661775802800368, "grad_norm": 0.4154634959254879, "learning_rate": 2e-05, "loss": 5.5974, "step": 6459 }, { "epoch": 0.21665129538022973, "grad_norm": 0.39708329102535184, "learning_rate": 2e-05, "loss": 5.5692, "step": 6460 }, { "epoch": 0.21668483273245578, "grad_norm": 0.41616596598066014, "learning_rate": 2e-05, "loss": 5.4519, "step": 6461 }, { "epoch": 0.2167183700846818, "grad_norm": 0.4388865349938775, "learning_rate": 2e-05, "loss": 5.5576, "step": 6462 }, { "epoch": 0.21675190743690786, "grad_norm": 0.399198605513511, "learning_rate": 2e-05, "loss": 5.3989, "step": 6463 }, { "epoch": 0.2167854447891339, "grad_norm": 0.4745888700575552, "learning_rate": 2e-05, "loss": 5.5322, "step": 6464 }, { "epoch": 0.21681898214135994, "grad_norm": 0.4373762079259817, "learning_rate": 2e-05, "loss": 5.5257, "step": 6465 }, { "epoch": 0.21685251949358597, "grad_norm": 0.404405739040116, "learning_rate": 2e-05, "loss": 5.5153, "step": 6466 }, { "epoch": 0.21688605684581203, "grad_norm": 0.40374418987322086, "learning_rate": 2e-05, "loss": 5.5913, "step": 6467 }, { "epoch": 0.21691959419803805, "grad_norm": 0.4130973014029232, "learning_rate": 2e-05, "loss": 5.5494, "step": 6468 }, { "epoch": 0.2169531315502641, "grad_norm": 0.40341362656405844, "learning_rate": 2e-05, "loss": 5.6169, "step": 6469 }, { "epoch": 0.21698666890249016, "grad_norm": 0.4342117240728808, "learning_rate": 2e-05, "loss": 5.6067, "step": 6470 }, { "epoch": 0.21702020625471619, "grad_norm": 0.4026460142184357, "learning_rate": 2e-05, "loss": 5.5279, "step": 6471 }, { "epoch": 0.21705374360694224, "grad_norm": 0.4246889929284196, "learning_rate": 2e-05, "loss": 5.3949, "step": 6472 }, { "epoch": 0.21708728095916827, "grad_norm": 0.41400765436741505, "learning_rate": 2e-05, "loss": 5.4961, "step": 6473 }, { "epoch": 0.21712081831139432, "grad_norm": 0.3990656330193259, "learning_rate": 2e-05, "loss": 5.5894, "step": 6474 }, { "epoch": 0.21715435566362035, "grad_norm": 0.4084403752451926, "learning_rate": 2e-05, "loss": 5.6906, "step": 6475 }, { "epoch": 0.2171878930158464, "grad_norm": 0.4003681021419443, "learning_rate": 2e-05, "loss": 5.8138, "step": 6476 }, { "epoch": 0.21722143036807245, "grad_norm": 0.4271284928929962, "learning_rate": 2e-05, "loss": 5.6778, "step": 6477 }, { "epoch": 0.21725496772029848, "grad_norm": 0.40707909074055115, "learning_rate": 2e-05, "loss": 5.6928, "step": 6478 }, { "epoch": 0.21728850507252453, "grad_norm": 0.4147683070959465, "learning_rate": 2e-05, "loss": 5.7225, "step": 6479 }, { "epoch": 0.21732204242475056, "grad_norm": 0.3991065573446879, "learning_rate": 2e-05, "loss": 5.5114, "step": 6480 }, { "epoch": 0.21735557977697662, "grad_norm": 0.43442346835020457, "learning_rate": 2e-05, "loss": 5.3737, "step": 6481 }, { "epoch": 0.21738911712920264, "grad_norm": 0.4288484829247577, "learning_rate": 2e-05, "loss": 5.6337, "step": 6482 }, { "epoch": 0.2174226544814287, "grad_norm": 0.4236258051682129, "learning_rate": 2e-05, "loss": 5.7337, "step": 6483 }, { "epoch": 0.21745619183365472, "grad_norm": 0.4205230888266296, "learning_rate": 2e-05, "loss": 5.6505, "step": 6484 }, { "epoch": 0.21748972918588078, "grad_norm": 0.38973667646840593, "learning_rate": 2e-05, "loss": 5.6935, "step": 6485 }, { "epoch": 0.21752326653810683, "grad_norm": 0.4474703720320268, "learning_rate": 2e-05, "loss": 5.5976, "step": 6486 }, { "epoch": 0.21755680389033286, "grad_norm": 0.43450499020428157, "learning_rate": 2e-05, "loss": 5.7038, "step": 6487 }, { "epoch": 0.2175903412425589, "grad_norm": 0.4085247359168235, "learning_rate": 2e-05, "loss": 5.5483, "step": 6488 }, { "epoch": 0.21762387859478494, "grad_norm": 0.4067188231755875, "learning_rate": 2e-05, "loss": 5.548, "step": 6489 }, { "epoch": 0.217657415947011, "grad_norm": 0.4017058995808491, "learning_rate": 2e-05, "loss": 5.4612, "step": 6490 }, { "epoch": 0.21769095329923702, "grad_norm": 0.38797385839683285, "learning_rate": 2e-05, "loss": 5.4694, "step": 6491 }, { "epoch": 0.21772449065146307, "grad_norm": 0.48442521317016873, "learning_rate": 2e-05, "loss": 5.5397, "step": 6492 }, { "epoch": 0.2177580280036891, "grad_norm": 0.39050188217709075, "learning_rate": 2e-05, "loss": 5.5365, "step": 6493 }, { "epoch": 0.21779156535591515, "grad_norm": 0.44708234538706765, "learning_rate": 2e-05, "loss": 5.7995, "step": 6494 }, { "epoch": 0.2178251027081412, "grad_norm": 0.40966030220831207, "learning_rate": 2e-05, "loss": 5.5546, "step": 6495 }, { "epoch": 0.21785864006036723, "grad_norm": 0.4290828115304669, "learning_rate": 2e-05, "loss": 5.5085, "step": 6496 }, { "epoch": 0.21789217741259329, "grad_norm": 0.4006473847159605, "learning_rate": 2e-05, "loss": 5.617, "step": 6497 }, { "epoch": 0.2179257147648193, "grad_norm": 0.4080413726456874, "learning_rate": 2e-05, "loss": 5.6541, "step": 6498 }, { "epoch": 0.21795925211704537, "grad_norm": 0.4179020068786683, "learning_rate": 2e-05, "loss": 5.6225, "step": 6499 }, { "epoch": 0.2179927894692714, "grad_norm": 0.4200706539286377, "learning_rate": 2e-05, "loss": 5.6419, "step": 6500 }, { "epoch": 0.21802632682149745, "grad_norm": 0.39563972252337004, "learning_rate": 2e-05, "loss": 5.4635, "step": 6501 }, { "epoch": 0.21805986417372347, "grad_norm": 0.41548551972700715, "learning_rate": 2e-05, "loss": 5.5304, "step": 6502 }, { "epoch": 0.21809340152594953, "grad_norm": 0.42526412788223505, "learning_rate": 2e-05, "loss": 5.5088, "step": 6503 }, { "epoch": 0.21812693887817558, "grad_norm": 0.42031224443963366, "learning_rate": 2e-05, "loss": 5.5844, "step": 6504 }, { "epoch": 0.2181604762304016, "grad_norm": 0.4435166408863867, "learning_rate": 2e-05, "loss": 5.559, "step": 6505 }, { "epoch": 0.21819401358262766, "grad_norm": 0.4133959645347217, "learning_rate": 2e-05, "loss": 5.5301, "step": 6506 }, { "epoch": 0.2182275509348537, "grad_norm": 0.39256709428280384, "learning_rate": 2e-05, "loss": 5.5927, "step": 6507 }, { "epoch": 0.21826108828707974, "grad_norm": 0.40776166232403716, "learning_rate": 2e-05, "loss": 5.5207, "step": 6508 }, { "epoch": 0.21829462563930577, "grad_norm": 0.3822658295276917, "learning_rate": 2e-05, "loss": 5.6235, "step": 6509 }, { "epoch": 0.21832816299153182, "grad_norm": 0.4152670708108576, "learning_rate": 2e-05, "loss": 5.4468, "step": 6510 }, { "epoch": 0.21836170034375785, "grad_norm": 0.4186192279492332, "learning_rate": 2e-05, "loss": 5.6438, "step": 6511 }, { "epoch": 0.2183952376959839, "grad_norm": 0.4059615884691238, "learning_rate": 2e-05, "loss": 5.6273, "step": 6512 }, { "epoch": 0.21842877504820996, "grad_norm": 0.43494571271737825, "learning_rate": 2e-05, "loss": 5.4809, "step": 6513 }, { "epoch": 0.21846231240043598, "grad_norm": 0.4506004692830499, "learning_rate": 2e-05, "loss": 5.6632, "step": 6514 }, { "epoch": 0.21849584975266204, "grad_norm": 0.41779358322178384, "learning_rate": 2e-05, "loss": 5.6665, "step": 6515 }, { "epoch": 0.21852938710488806, "grad_norm": 0.4228630588067648, "learning_rate": 2e-05, "loss": 5.3688, "step": 6516 }, { "epoch": 0.21856292445711412, "grad_norm": 0.4069755021463526, "learning_rate": 2e-05, "loss": 5.5721, "step": 6517 }, { "epoch": 0.21859646180934014, "grad_norm": 0.40502966356241993, "learning_rate": 2e-05, "loss": 5.468, "step": 6518 }, { "epoch": 0.2186299991615662, "grad_norm": 0.4361098354763115, "learning_rate": 2e-05, "loss": 5.5276, "step": 6519 }, { "epoch": 0.21866353651379222, "grad_norm": 0.3800262624226887, "learning_rate": 2e-05, "loss": 5.4435, "step": 6520 }, { "epoch": 0.21869707386601828, "grad_norm": 0.4417295612563221, "learning_rate": 2e-05, "loss": 5.6513, "step": 6521 }, { "epoch": 0.21873061121824433, "grad_norm": 0.3926264323977715, "learning_rate": 2e-05, "loss": 5.6015, "step": 6522 }, { "epoch": 0.21876414857047036, "grad_norm": 0.3984992153031971, "learning_rate": 2e-05, "loss": 5.5022, "step": 6523 }, { "epoch": 0.2187976859226964, "grad_norm": 0.4212721587989004, "learning_rate": 2e-05, "loss": 5.7236, "step": 6524 }, { "epoch": 0.21883122327492244, "grad_norm": 0.41067807593427735, "learning_rate": 2e-05, "loss": 5.302, "step": 6525 }, { "epoch": 0.2188647606271485, "grad_norm": 0.4063110141975833, "learning_rate": 2e-05, "loss": 5.5013, "step": 6526 }, { "epoch": 0.21889829797937452, "grad_norm": 0.43998853484542005, "learning_rate": 2e-05, "loss": 5.4942, "step": 6527 }, { "epoch": 0.21893183533160057, "grad_norm": 0.39224696564137707, "learning_rate": 2e-05, "loss": 5.5039, "step": 6528 }, { "epoch": 0.21896537268382663, "grad_norm": 0.4185921098141212, "learning_rate": 2e-05, "loss": 5.6477, "step": 6529 }, { "epoch": 0.21899891003605265, "grad_norm": 0.4124839225415556, "learning_rate": 2e-05, "loss": 5.5872, "step": 6530 }, { "epoch": 0.2190324473882787, "grad_norm": 0.3893237466518972, "learning_rate": 2e-05, "loss": 5.466, "step": 6531 }, { "epoch": 0.21906598474050473, "grad_norm": 0.38246392473738944, "learning_rate": 2e-05, "loss": 5.7872, "step": 6532 }, { "epoch": 0.2190995220927308, "grad_norm": 0.4085832246328909, "learning_rate": 2e-05, "loss": 5.8103, "step": 6533 }, { "epoch": 0.2191330594449568, "grad_norm": 0.4103364360437967, "learning_rate": 2e-05, "loss": 5.5295, "step": 6534 }, { "epoch": 0.21916659679718287, "grad_norm": 0.40283928182592904, "learning_rate": 2e-05, "loss": 5.5566, "step": 6535 }, { "epoch": 0.2192001341494089, "grad_norm": 0.4403037259716487, "learning_rate": 2e-05, "loss": 5.4781, "step": 6536 }, { "epoch": 0.21923367150163495, "grad_norm": 0.39901091137474914, "learning_rate": 2e-05, "loss": 5.7, "step": 6537 }, { "epoch": 0.219267208853861, "grad_norm": 0.4263521480598137, "learning_rate": 2e-05, "loss": 5.6963, "step": 6538 }, { "epoch": 0.21930074620608703, "grad_norm": 0.4381124183862021, "learning_rate": 2e-05, "loss": 5.5313, "step": 6539 }, { "epoch": 0.21933428355831308, "grad_norm": 0.43734815031524027, "learning_rate": 2e-05, "loss": 5.4653, "step": 6540 }, { "epoch": 0.2193678209105391, "grad_norm": 0.456310986685186, "learning_rate": 2e-05, "loss": 5.6947, "step": 6541 }, { "epoch": 0.21940135826276516, "grad_norm": 0.4209133439898435, "learning_rate": 2e-05, "loss": 5.4567, "step": 6542 }, { "epoch": 0.2194348956149912, "grad_norm": 0.4100712785835162, "learning_rate": 2e-05, "loss": 5.4726, "step": 6543 }, { "epoch": 0.21946843296721724, "grad_norm": 0.43078739320322224, "learning_rate": 2e-05, "loss": 5.8143, "step": 6544 }, { "epoch": 0.21950197031944327, "grad_norm": 0.39328067781899256, "learning_rate": 2e-05, "loss": 5.7636, "step": 6545 }, { "epoch": 0.21953550767166932, "grad_norm": 0.412509260233115, "learning_rate": 2e-05, "loss": 5.5547, "step": 6546 }, { "epoch": 0.21956904502389538, "grad_norm": 0.41020854939929974, "learning_rate": 2e-05, "loss": 5.3277, "step": 6547 }, { "epoch": 0.2196025823761214, "grad_norm": 0.41097352834473105, "learning_rate": 2e-05, "loss": 5.5332, "step": 6548 }, { "epoch": 0.21963611972834746, "grad_norm": 0.41341108002034527, "learning_rate": 2e-05, "loss": 5.5873, "step": 6549 }, { "epoch": 0.21966965708057348, "grad_norm": 0.4116566520496846, "learning_rate": 2e-05, "loss": 5.7199, "step": 6550 }, { "epoch": 0.21970319443279954, "grad_norm": 0.42189395936407387, "learning_rate": 2e-05, "loss": 5.6781, "step": 6551 }, { "epoch": 0.21973673178502556, "grad_norm": 0.41829780314085563, "learning_rate": 2e-05, "loss": 5.5929, "step": 6552 }, { "epoch": 0.21977026913725162, "grad_norm": 0.4223245190432512, "learning_rate": 2e-05, "loss": 5.6473, "step": 6553 }, { "epoch": 0.21980380648947764, "grad_norm": 0.4157356372873521, "learning_rate": 2e-05, "loss": 5.6174, "step": 6554 }, { "epoch": 0.2198373438417037, "grad_norm": 0.4266607832436779, "learning_rate": 2e-05, "loss": 5.5519, "step": 6555 }, { "epoch": 0.21987088119392975, "grad_norm": 0.4063167474312311, "learning_rate": 2e-05, "loss": 5.4172, "step": 6556 }, { "epoch": 0.21990441854615578, "grad_norm": 0.42191672665382524, "learning_rate": 2e-05, "loss": 5.5677, "step": 6557 }, { "epoch": 0.21993795589838183, "grad_norm": 0.3981957689594188, "learning_rate": 2e-05, "loss": 5.404, "step": 6558 }, { "epoch": 0.21997149325060786, "grad_norm": 0.40384922404906753, "learning_rate": 2e-05, "loss": 5.5732, "step": 6559 }, { "epoch": 0.2200050306028339, "grad_norm": 0.42814691102323266, "learning_rate": 2e-05, "loss": 5.7556, "step": 6560 }, { "epoch": 0.22003856795505994, "grad_norm": 0.43537814424374205, "learning_rate": 2e-05, "loss": 5.4334, "step": 6561 }, { "epoch": 0.220072105307286, "grad_norm": 0.3940999387232034, "learning_rate": 2e-05, "loss": 5.556, "step": 6562 }, { "epoch": 0.22010564265951202, "grad_norm": 0.4155255156834039, "learning_rate": 2e-05, "loss": 5.7063, "step": 6563 }, { "epoch": 0.22013918001173807, "grad_norm": 0.44321519079366245, "learning_rate": 2e-05, "loss": 5.6253, "step": 6564 }, { "epoch": 0.22017271736396413, "grad_norm": 0.39498943494388, "learning_rate": 2e-05, "loss": 5.5308, "step": 6565 }, { "epoch": 0.22020625471619015, "grad_norm": 0.41565928671958363, "learning_rate": 2e-05, "loss": 5.54, "step": 6566 }, { "epoch": 0.2202397920684162, "grad_norm": 0.4126697377563337, "learning_rate": 2e-05, "loss": 5.5436, "step": 6567 }, { "epoch": 0.22027332942064223, "grad_norm": 0.43114302533353915, "learning_rate": 2e-05, "loss": 5.5879, "step": 6568 }, { "epoch": 0.2203068667728683, "grad_norm": 0.4295889899608642, "learning_rate": 2e-05, "loss": 5.3782, "step": 6569 }, { "epoch": 0.2203404041250943, "grad_norm": 0.4113432773956469, "learning_rate": 2e-05, "loss": 5.7666, "step": 6570 }, { "epoch": 0.22037394147732037, "grad_norm": 0.44848758991062787, "learning_rate": 2e-05, "loss": 5.3232, "step": 6571 }, { "epoch": 0.2204074788295464, "grad_norm": 0.4132638850808921, "learning_rate": 2e-05, "loss": 5.4162, "step": 6572 }, { "epoch": 0.22044101618177245, "grad_norm": 0.40742143263466973, "learning_rate": 2e-05, "loss": 5.5326, "step": 6573 }, { "epoch": 0.2204745535339985, "grad_norm": 0.44271651557778136, "learning_rate": 2e-05, "loss": 5.3976, "step": 6574 }, { "epoch": 0.22050809088622453, "grad_norm": 0.443904923883335, "learning_rate": 2e-05, "loss": 5.2964, "step": 6575 }, { "epoch": 0.22054162823845058, "grad_norm": 0.4155587515908196, "learning_rate": 2e-05, "loss": 5.6686, "step": 6576 }, { "epoch": 0.2205751655906766, "grad_norm": 0.42981632394954866, "learning_rate": 2e-05, "loss": 5.5602, "step": 6577 }, { "epoch": 0.22060870294290266, "grad_norm": 0.4465189535137663, "learning_rate": 2e-05, "loss": 5.4775, "step": 6578 }, { "epoch": 0.2206422402951287, "grad_norm": 0.41220096662383615, "learning_rate": 2e-05, "loss": 5.4035, "step": 6579 }, { "epoch": 0.22067577764735474, "grad_norm": 0.4124097131631435, "learning_rate": 2e-05, "loss": 5.5553, "step": 6580 }, { "epoch": 0.2207093149995808, "grad_norm": 0.42490393467725524, "learning_rate": 2e-05, "loss": 5.5752, "step": 6581 }, { "epoch": 0.22074285235180682, "grad_norm": 0.4371833797227912, "learning_rate": 2e-05, "loss": 5.5539, "step": 6582 }, { "epoch": 0.22077638970403288, "grad_norm": 0.4292855602709827, "learning_rate": 2e-05, "loss": 5.6398, "step": 6583 }, { "epoch": 0.2208099270562589, "grad_norm": 0.4062082796406313, "learning_rate": 2e-05, "loss": 5.4386, "step": 6584 }, { "epoch": 0.22084346440848496, "grad_norm": 0.44032163444173295, "learning_rate": 2e-05, "loss": 5.6253, "step": 6585 }, { "epoch": 0.22087700176071098, "grad_norm": 0.4231907165341094, "learning_rate": 2e-05, "loss": 5.555, "step": 6586 }, { "epoch": 0.22091053911293704, "grad_norm": 0.45699541979125163, "learning_rate": 2e-05, "loss": 5.3631, "step": 6587 }, { "epoch": 0.22094407646516306, "grad_norm": 0.42830133352094724, "learning_rate": 2e-05, "loss": 5.4951, "step": 6588 }, { "epoch": 0.22097761381738912, "grad_norm": 0.5328152022473822, "learning_rate": 2e-05, "loss": 5.4763, "step": 6589 }, { "epoch": 0.22101115116961517, "grad_norm": 0.41219707610227024, "learning_rate": 2e-05, "loss": 5.5367, "step": 6590 }, { "epoch": 0.2210446885218412, "grad_norm": 0.4616250234156509, "learning_rate": 2e-05, "loss": 5.4428, "step": 6591 }, { "epoch": 0.22107822587406725, "grad_norm": 0.4310892659030591, "learning_rate": 2e-05, "loss": 5.5637, "step": 6592 }, { "epoch": 0.22111176322629328, "grad_norm": 0.4143834183364127, "learning_rate": 2e-05, "loss": 5.4168, "step": 6593 }, { "epoch": 0.22114530057851933, "grad_norm": 0.403235709207464, "learning_rate": 2e-05, "loss": 5.5974, "step": 6594 }, { "epoch": 0.22117883793074536, "grad_norm": 0.4584725351636609, "learning_rate": 2e-05, "loss": 5.712, "step": 6595 }, { "epoch": 0.2212123752829714, "grad_norm": 0.4279233088993764, "learning_rate": 2e-05, "loss": 5.5032, "step": 6596 }, { "epoch": 0.22124591263519744, "grad_norm": 0.4031840509640872, "learning_rate": 2e-05, "loss": 5.427, "step": 6597 }, { "epoch": 0.2212794499874235, "grad_norm": 0.42031280314373276, "learning_rate": 2e-05, "loss": 5.4211, "step": 6598 }, { "epoch": 0.22131298733964955, "grad_norm": 0.43121340596392876, "learning_rate": 2e-05, "loss": 5.4586, "step": 6599 }, { "epoch": 0.22134652469187557, "grad_norm": 0.39950636749667595, "learning_rate": 2e-05, "loss": 5.5397, "step": 6600 }, { "epoch": 0.22138006204410163, "grad_norm": 0.41775427910369684, "learning_rate": 2e-05, "loss": 5.5764, "step": 6601 }, { "epoch": 0.22141359939632765, "grad_norm": 0.4022429702627908, "learning_rate": 2e-05, "loss": 5.7643, "step": 6602 }, { "epoch": 0.2214471367485537, "grad_norm": 0.4020757909011183, "learning_rate": 2e-05, "loss": 5.6752, "step": 6603 }, { "epoch": 0.22148067410077973, "grad_norm": 0.43410529860079283, "learning_rate": 2e-05, "loss": 5.5513, "step": 6604 }, { "epoch": 0.2215142114530058, "grad_norm": 0.4067613599293316, "learning_rate": 2e-05, "loss": 5.5903, "step": 6605 }, { "epoch": 0.22154774880523181, "grad_norm": 0.429774951134017, "learning_rate": 2e-05, "loss": 5.6436, "step": 6606 }, { "epoch": 0.22158128615745787, "grad_norm": 0.3943405571504995, "learning_rate": 2e-05, "loss": 5.5006, "step": 6607 }, { "epoch": 0.22161482350968392, "grad_norm": 0.424080714316995, "learning_rate": 2e-05, "loss": 5.5579, "step": 6608 }, { "epoch": 0.22164836086190995, "grad_norm": 0.41191733829596394, "learning_rate": 2e-05, "loss": 5.5459, "step": 6609 }, { "epoch": 0.221681898214136, "grad_norm": 0.4001924330814263, "learning_rate": 2e-05, "loss": 5.5923, "step": 6610 }, { "epoch": 0.22171543556636203, "grad_norm": 0.4173950211036085, "learning_rate": 2e-05, "loss": 5.4944, "step": 6611 }, { "epoch": 0.22174897291858808, "grad_norm": 0.4659058440251125, "learning_rate": 2e-05, "loss": 5.5731, "step": 6612 }, { "epoch": 0.2217825102708141, "grad_norm": 0.4069037955352893, "learning_rate": 2e-05, "loss": 5.6656, "step": 6613 }, { "epoch": 0.22181604762304016, "grad_norm": 0.4212308022329704, "learning_rate": 2e-05, "loss": 5.4292, "step": 6614 }, { "epoch": 0.2218495849752662, "grad_norm": 0.40594368693987615, "learning_rate": 2e-05, "loss": 5.5348, "step": 6615 }, { "epoch": 0.22188312232749224, "grad_norm": 0.4371160850394256, "learning_rate": 2e-05, "loss": 5.464, "step": 6616 }, { "epoch": 0.2219166596797183, "grad_norm": 0.3958811464112705, "learning_rate": 2e-05, "loss": 5.6322, "step": 6617 }, { "epoch": 0.22195019703194432, "grad_norm": 0.39818855345822096, "learning_rate": 2e-05, "loss": 5.607, "step": 6618 }, { "epoch": 0.22198373438417038, "grad_norm": 0.38953472455292365, "learning_rate": 2e-05, "loss": 5.4362, "step": 6619 }, { "epoch": 0.2220172717363964, "grad_norm": 0.37797331714165955, "learning_rate": 2e-05, "loss": 5.7929, "step": 6620 }, { "epoch": 0.22205080908862246, "grad_norm": 0.4294583245503137, "learning_rate": 2e-05, "loss": 5.5596, "step": 6621 }, { "epoch": 0.22208434644084848, "grad_norm": 0.39451044333912744, "learning_rate": 2e-05, "loss": 5.4987, "step": 6622 }, { "epoch": 0.22211788379307454, "grad_norm": 0.38582652892461394, "learning_rate": 2e-05, "loss": 5.4285, "step": 6623 }, { "epoch": 0.22215142114530056, "grad_norm": 0.4285089051142177, "learning_rate": 2e-05, "loss": 5.5012, "step": 6624 }, { "epoch": 0.22218495849752662, "grad_norm": 0.3861575150384722, "learning_rate": 2e-05, "loss": 5.5362, "step": 6625 }, { "epoch": 0.22221849584975267, "grad_norm": 0.3952892899671321, "learning_rate": 2e-05, "loss": 5.3854, "step": 6626 }, { "epoch": 0.2222520332019787, "grad_norm": 0.46063063663779924, "learning_rate": 2e-05, "loss": 5.4921, "step": 6627 }, { "epoch": 0.22228557055420475, "grad_norm": 0.4403007403902562, "learning_rate": 2e-05, "loss": 5.4078, "step": 6628 }, { "epoch": 0.22231910790643078, "grad_norm": 0.3999270768207834, "learning_rate": 2e-05, "loss": 5.6871, "step": 6629 }, { "epoch": 0.22235264525865683, "grad_norm": 0.4344602701436295, "learning_rate": 2e-05, "loss": 5.6746, "step": 6630 }, { "epoch": 0.22238618261088286, "grad_norm": 0.41241571544987876, "learning_rate": 2e-05, "loss": 5.4772, "step": 6631 }, { "epoch": 0.22241971996310891, "grad_norm": 0.4051129602161037, "learning_rate": 2e-05, "loss": 5.5898, "step": 6632 }, { "epoch": 0.22245325731533497, "grad_norm": 0.43420816538065543, "learning_rate": 2e-05, "loss": 5.5982, "step": 6633 }, { "epoch": 0.222486794667561, "grad_norm": 0.4486275282027727, "learning_rate": 2e-05, "loss": 5.6117, "step": 6634 }, { "epoch": 0.22252033201978705, "grad_norm": 0.40112204269089236, "learning_rate": 2e-05, "loss": 5.7377, "step": 6635 }, { "epoch": 0.22255386937201307, "grad_norm": 0.4290359351948171, "learning_rate": 2e-05, "loss": 5.607, "step": 6636 }, { "epoch": 0.22258740672423913, "grad_norm": 0.4230097784950328, "learning_rate": 2e-05, "loss": 5.5383, "step": 6637 }, { "epoch": 0.22262094407646515, "grad_norm": 0.41137274943804997, "learning_rate": 2e-05, "loss": 5.694, "step": 6638 }, { "epoch": 0.2226544814286912, "grad_norm": 0.42216185286649394, "learning_rate": 2e-05, "loss": 5.5961, "step": 6639 }, { "epoch": 0.22268801878091724, "grad_norm": 0.3956752887991969, "learning_rate": 2e-05, "loss": 5.4672, "step": 6640 }, { "epoch": 0.2227215561331433, "grad_norm": 0.44751653586847895, "learning_rate": 2e-05, "loss": 5.7539, "step": 6641 }, { "epoch": 0.22275509348536934, "grad_norm": 0.4216998134853455, "learning_rate": 2e-05, "loss": 5.651, "step": 6642 }, { "epoch": 0.22278863083759537, "grad_norm": 0.4341983838924697, "learning_rate": 2e-05, "loss": 5.5474, "step": 6643 }, { "epoch": 0.22282216818982142, "grad_norm": 0.4353717546883984, "learning_rate": 2e-05, "loss": 5.4899, "step": 6644 }, { "epoch": 0.22285570554204745, "grad_norm": 0.3905617073331392, "learning_rate": 2e-05, "loss": 5.4917, "step": 6645 }, { "epoch": 0.2228892428942735, "grad_norm": 0.4146959065772637, "learning_rate": 2e-05, "loss": 5.4028, "step": 6646 }, { "epoch": 0.22292278024649953, "grad_norm": 0.40468597044852733, "learning_rate": 2e-05, "loss": 5.2713, "step": 6647 }, { "epoch": 0.22295631759872558, "grad_norm": 0.41059248718396596, "learning_rate": 2e-05, "loss": 5.6115, "step": 6648 }, { "epoch": 0.2229898549509516, "grad_norm": 0.42498425055661476, "learning_rate": 2e-05, "loss": 5.5453, "step": 6649 }, { "epoch": 0.22302339230317766, "grad_norm": 0.4039769945247319, "learning_rate": 2e-05, "loss": 5.5041, "step": 6650 }, { "epoch": 0.22305692965540372, "grad_norm": 0.41579683799398953, "learning_rate": 2e-05, "loss": 5.4298, "step": 6651 }, { "epoch": 0.22309046700762974, "grad_norm": 0.41084458847458893, "learning_rate": 2e-05, "loss": 5.5682, "step": 6652 }, { "epoch": 0.2231240043598558, "grad_norm": 0.4452071120374486, "learning_rate": 2e-05, "loss": 5.5073, "step": 6653 }, { "epoch": 0.22315754171208182, "grad_norm": 0.4328820400277072, "learning_rate": 2e-05, "loss": 5.4895, "step": 6654 }, { "epoch": 0.22319107906430788, "grad_norm": 0.40828884792728476, "learning_rate": 2e-05, "loss": 5.5662, "step": 6655 }, { "epoch": 0.2232246164165339, "grad_norm": 0.4272506148807678, "learning_rate": 2e-05, "loss": 5.6711, "step": 6656 }, { "epoch": 0.22325815376875996, "grad_norm": 0.39368359630758604, "learning_rate": 2e-05, "loss": 5.5208, "step": 6657 }, { "epoch": 0.22329169112098599, "grad_norm": 0.42489600500812386, "learning_rate": 2e-05, "loss": 5.4424, "step": 6658 }, { "epoch": 0.22332522847321204, "grad_norm": 0.42745698358217576, "learning_rate": 2e-05, "loss": 5.4515, "step": 6659 }, { "epoch": 0.2233587658254381, "grad_norm": 0.4256520369903802, "learning_rate": 2e-05, "loss": 5.47, "step": 6660 }, { "epoch": 0.22339230317766412, "grad_norm": 0.42132458065661205, "learning_rate": 2e-05, "loss": 5.6083, "step": 6661 }, { "epoch": 0.22342584052989017, "grad_norm": 0.4413742527323294, "learning_rate": 2e-05, "loss": 5.644, "step": 6662 }, { "epoch": 0.2234593778821162, "grad_norm": 0.44610316527141514, "learning_rate": 2e-05, "loss": 5.4922, "step": 6663 }, { "epoch": 0.22349291523434225, "grad_norm": 0.48816987038654436, "learning_rate": 2e-05, "loss": 5.405, "step": 6664 }, { "epoch": 0.22352645258656828, "grad_norm": 0.444518528015145, "learning_rate": 2e-05, "loss": 5.4354, "step": 6665 }, { "epoch": 0.22355998993879433, "grad_norm": 0.40324245347419513, "learning_rate": 2e-05, "loss": 5.6555, "step": 6666 }, { "epoch": 0.22359352729102036, "grad_norm": 0.4605310889286342, "learning_rate": 2e-05, "loss": 5.5519, "step": 6667 }, { "epoch": 0.22362706464324641, "grad_norm": 0.42717376478975416, "learning_rate": 2e-05, "loss": 5.6084, "step": 6668 }, { "epoch": 0.22366060199547247, "grad_norm": 0.41333070739086725, "learning_rate": 2e-05, "loss": 5.7262, "step": 6669 }, { "epoch": 0.2236941393476985, "grad_norm": 0.4136485339283993, "learning_rate": 2e-05, "loss": 5.7315, "step": 6670 }, { "epoch": 0.22372767669992455, "grad_norm": 0.47602646847543584, "learning_rate": 2e-05, "loss": 5.3905, "step": 6671 }, { "epoch": 0.22376121405215058, "grad_norm": 0.3776757277666493, "learning_rate": 2e-05, "loss": 5.5244, "step": 6672 }, { "epoch": 0.22379475140437663, "grad_norm": 0.4287832477393356, "learning_rate": 2e-05, "loss": 5.2969, "step": 6673 }, { "epoch": 0.22382828875660266, "grad_norm": 0.43681814287717224, "learning_rate": 2e-05, "loss": 5.5403, "step": 6674 }, { "epoch": 0.2238618261088287, "grad_norm": 0.410387885632589, "learning_rate": 2e-05, "loss": 5.4986, "step": 6675 }, { "epoch": 0.22389536346105474, "grad_norm": 0.4052950130304827, "learning_rate": 2e-05, "loss": 5.5435, "step": 6676 }, { "epoch": 0.2239289008132808, "grad_norm": 0.40087509071286775, "learning_rate": 2e-05, "loss": 5.3514, "step": 6677 }, { "epoch": 0.22396243816550684, "grad_norm": 0.4461472746045566, "learning_rate": 2e-05, "loss": 5.5914, "step": 6678 }, { "epoch": 0.22399597551773287, "grad_norm": 0.4228964971727862, "learning_rate": 2e-05, "loss": 5.501, "step": 6679 }, { "epoch": 0.22402951286995892, "grad_norm": 0.4784895038889825, "learning_rate": 2e-05, "loss": 5.6552, "step": 6680 }, { "epoch": 0.22406305022218495, "grad_norm": 0.44559961197444486, "learning_rate": 2e-05, "loss": 5.8855, "step": 6681 }, { "epoch": 0.224096587574411, "grad_norm": 0.438468028867736, "learning_rate": 2e-05, "loss": 5.3242, "step": 6682 }, { "epoch": 0.22413012492663703, "grad_norm": 0.3877198611433735, "learning_rate": 2e-05, "loss": 5.6892, "step": 6683 }, { "epoch": 0.22416366227886309, "grad_norm": 0.45230243805716575, "learning_rate": 2e-05, "loss": 5.4058, "step": 6684 }, { "epoch": 0.22419719963108914, "grad_norm": 0.43603728953865356, "learning_rate": 2e-05, "loss": 5.5727, "step": 6685 }, { "epoch": 0.22423073698331517, "grad_norm": 0.40982813725551487, "learning_rate": 2e-05, "loss": 5.498, "step": 6686 }, { "epoch": 0.22426427433554122, "grad_norm": 0.45718877460412666, "learning_rate": 2e-05, "loss": 5.5157, "step": 6687 }, { "epoch": 0.22429781168776725, "grad_norm": 0.43550529155166656, "learning_rate": 2e-05, "loss": 5.4208, "step": 6688 }, { "epoch": 0.2243313490399933, "grad_norm": 0.39723526824949096, "learning_rate": 2e-05, "loss": 5.5787, "step": 6689 }, { "epoch": 0.22436488639221933, "grad_norm": 0.48037129886437113, "learning_rate": 2e-05, "loss": 5.4775, "step": 6690 }, { "epoch": 0.22439842374444538, "grad_norm": 0.4518150180127794, "learning_rate": 2e-05, "loss": 5.4356, "step": 6691 }, { "epoch": 0.2244319610966714, "grad_norm": 0.42918548469819723, "learning_rate": 2e-05, "loss": 5.4825, "step": 6692 }, { "epoch": 0.22446549844889746, "grad_norm": 0.42538257356082615, "learning_rate": 2e-05, "loss": 5.4521, "step": 6693 }, { "epoch": 0.22449903580112351, "grad_norm": 0.4570096485230319, "learning_rate": 2e-05, "loss": 5.3719, "step": 6694 }, { "epoch": 0.22453257315334954, "grad_norm": 0.4068495271800081, "learning_rate": 2e-05, "loss": 5.5354, "step": 6695 }, { "epoch": 0.2245661105055756, "grad_norm": 0.42511574484495224, "learning_rate": 2e-05, "loss": 5.4752, "step": 6696 }, { "epoch": 0.22459964785780162, "grad_norm": 0.4765132947004604, "learning_rate": 2e-05, "loss": 5.6008, "step": 6697 }, { "epoch": 0.22463318521002767, "grad_norm": 0.4487888089119024, "learning_rate": 2e-05, "loss": 5.6799, "step": 6698 }, { "epoch": 0.2246667225622537, "grad_norm": 0.40990173660785023, "learning_rate": 2e-05, "loss": 5.5296, "step": 6699 }, { "epoch": 0.22470025991447976, "grad_norm": 0.4522076660591053, "learning_rate": 2e-05, "loss": 5.5248, "step": 6700 }, { "epoch": 0.22473379726670578, "grad_norm": 0.4451508861079596, "learning_rate": 2e-05, "loss": 5.5744, "step": 6701 }, { "epoch": 0.22476733461893184, "grad_norm": 0.4497090219485326, "learning_rate": 2e-05, "loss": 5.5621, "step": 6702 }, { "epoch": 0.2248008719711579, "grad_norm": 0.4256099087684673, "learning_rate": 2e-05, "loss": 5.3783, "step": 6703 }, { "epoch": 0.22483440932338392, "grad_norm": 0.3932044122934347, "learning_rate": 2e-05, "loss": 5.696, "step": 6704 }, { "epoch": 0.22486794667560997, "grad_norm": 0.4437328095779458, "learning_rate": 2e-05, "loss": 5.6864, "step": 6705 }, { "epoch": 0.224901484027836, "grad_norm": 0.4140105753775785, "learning_rate": 2e-05, "loss": 5.6989, "step": 6706 }, { "epoch": 0.22493502138006205, "grad_norm": 0.3930710587699897, "learning_rate": 2e-05, "loss": 5.4821, "step": 6707 }, { "epoch": 0.22496855873228808, "grad_norm": 0.40361160455927003, "learning_rate": 2e-05, "loss": 5.6335, "step": 6708 }, { "epoch": 0.22500209608451413, "grad_norm": 0.4240059359499137, "learning_rate": 2e-05, "loss": 5.4597, "step": 6709 }, { "epoch": 0.22503563343674016, "grad_norm": 0.4100014283651909, "learning_rate": 2e-05, "loss": 5.6951, "step": 6710 }, { "epoch": 0.2250691707889662, "grad_norm": 0.4184503324681981, "learning_rate": 2e-05, "loss": 5.5544, "step": 6711 }, { "epoch": 0.22510270814119226, "grad_norm": 0.4553084001483807, "learning_rate": 2e-05, "loss": 5.5584, "step": 6712 }, { "epoch": 0.2251362454934183, "grad_norm": 0.41219364536251063, "learning_rate": 2e-05, "loss": 5.592, "step": 6713 }, { "epoch": 0.22516978284564435, "grad_norm": 0.41269342626151534, "learning_rate": 2e-05, "loss": 5.5371, "step": 6714 }, { "epoch": 0.22520332019787037, "grad_norm": 0.39641244151811666, "learning_rate": 2e-05, "loss": 5.5326, "step": 6715 }, { "epoch": 0.22523685755009643, "grad_norm": 0.40719877486133776, "learning_rate": 2e-05, "loss": 5.4187, "step": 6716 }, { "epoch": 0.22527039490232245, "grad_norm": 0.4074348661129201, "learning_rate": 2e-05, "loss": 5.4111, "step": 6717 }, { "epoch": 0.2253039322545485, "grad_norm": 0.3934212043469895, "learning_rate": 2e-05, "loss": 5.4777, "step": 6718 }, { "epoch": 0.22533746960677453, "grad_norm": 0.43938514568337744, "learning_rate": 2e-05, "loss": 5.5708, "step": 6719 }, { "epoch": 0.22537100695900059, "grad_norm": 0.40117175493848917, "learning_rate": 2e-05, "loss": 5.5648, "step": 6720 }, { "epoch": 0.22540454431122664, "grad_norm": 0.4018103172143217, "learning_rate": 2e-05, "loss": 5.4892, "step": 6721 }, { "epoch": 0.22543808166345267, "grad_norm": 0.4306836654864745, "learning_rate": 2e-05, "loss": 5.4891, "step": 6722 }, { "epoch": 0.22547161901567872, "grad_norm": 0.43912392671027567, "learning_rate": 2e-05, "loss": 5.56, "step": 6723 }, { "epoch": 0.22550515636790475, "grad_norm": 0.42393690349746366, "learning_rate": 2e-05, "loss": 5.5099, "step": 6724 }, { "epoch": 0.2255386937201308, "grad_norm": 0.461878948965918, "learning_rate": 2e-05, "loss": 5.5312, "step": 6725 }, { "epoch": 0.22557223107235683, "grad_norm": 0.4042259408486707, "learning_rate": 2e-05, "loss": 5.6086, "step": 6726 }, { "epoch": 0.22560576842458288, "grad_norm": 0.42822411295939455, "learning_rate": 2e-05, "loss": 5.4369, "step": 6727 }, { "epoch": 0.2256393057768089, "grad_norm": 0.4914795043287681, "learning_rate": 2e-05, "loss": 5.4997, "step": 6728 }, { "epoch": 0.22567284312903496, "grad_norm": 0.4610853410220082, "learning_rate": 2e-05, "loss": 5.4501, "step": 6729 }, { "epoch": 0.22570638048126102, "grad_norm": 0.4100001280481875, "learning_rate": 2e-05, "loss": 5.5747, "step": 6730 }, { "epoch": 0.22573991783348704, "grad_norm": 0.4542805798131405, "learning_rate": 2e-05, "loss": 5.6522, "step": 6731 }, { "epoch": 0.2257734551857131, "grad_norm": 0.4102431336005242, "learning_rate": 2e-05, "loss": 5.6281, "step": 6732 }, { "epoch": 0.22580699253793912, "grad_norm": 0.42338841534522464, "learning_rate": 2e-05, "loss": 5.6188, "step": 6733 }, { "epoch": 0.22584052989016518, "grad_norm": 0.452445861578857, "learning_rate": 2e-05, "loss": 5.556, "step": 6734 }, { "epoch": 0.2258740672423912, "grad_norm": 0.3978659289504882, "learning_rate": 2e-05, "loss": 5.3999, "step": 6735 }, { "epoch": 0.22590760459461726, "grad_norm": 0.4333447556035997, "learning_rate": 2e-05, "loss": 5.5215, "step": 6736 }, { "epoch": 0.2259411419468433, "grad_norm": 0.4371134551672353, "learning_rate": 2e-05, "loss": 5.6187, "step": 6737 }, { "epoch": 0.22597467929906934, "grad_norm": 0.4047846613631021, "learning_rate": 2e-05, "loss": 5.696, "step": 6738 }, { "epoch": 0.2260082166512954, "grad_norm": 0.4081070878234143, "learning_rate": 2e-05, "loss": 5.6932, "step": 6739 }, { "epoch": 0.22604175400352142, "grad_norm": 0.4813176033704923, "learning_rate": 2e-05, "loss": 5.5528, "step": 6740 }, { "epoch": 0.22607529135574747, "grad_norm": 0.4190284716255468, "learning_rate": 2e-05, "loss": 5.5282, "step": 6741 }, { "epoch": 0.2261088287079735, "grad_norm": 0.40888343072867067, "learning_rate": 2e-05, "loss": 5.5655, "step": 6742 }, { "epoch": 0.22614236606019955, "grad_norm": 0.4068506298531447, "learning_rate": 2e-05, "loss": 5.7468, "step": 6743 }, { "epoch": 0.22617590341242558, "grad_norm": 0.4526731600919284, "learning_rate": 2e-05, "loss": 5.6685, "step": 6744 }, { "epoch": 0.22620944076465163, "grad_norm": 0.4212034746049672, "learning_rate": 2e-05, "loss": 5.5198, "step": 6745 }, { "epoch": 0.22624297811687769, "grad_norm": 0.44574097100183924, "learning_rate": 2e-05, "loss": 5.7098, "step": 6746 }, { "epoch": 0.2262765154691037, "grad_norm": 0.4025859830697341, "learning_rate": 2e-05, "loss": 5.5932, "step": 6747 }, { "epoch": 0.22631005282132977, "grad_norm": 0.4329074360135622, "learning_rate": 2e-05, "loss": 5.6201, "step": 6748 }, { "epoch": 0.2263435901735558, "grad_norm": 0.4433717741774771, "learning_rate": 2e-05, "loss": 5.5378, "step": 6749 }, { "epoch": 0.22637712752578185, "grad_norm": 0.4330560699969568, "learning_rate": 2e-05, "loss": 5.4171, "step": 6750 }, { "epoch": 0.22641066487800787, "grad_norm": 0.4086915646719466, "learning_rate": 2e-05, "loss": 5.4087, "step": 6751 }, { "epoch": 0.22644420223023393, "grad_norm": 0.4589093778404784, "learning_rate": 2e-05, "loss": 5.6764, "step": 6752 }, { "epoch": 0.22647773958245995, "grad_norm": 0.40659189230704124, "learning_rate": 2e-05, "loss": 5.7155, "step": 6753 }, { "epoch": 0.226511276934686, "grad_norm": 0.4016206220628673, "learning_rate": 2e-05, "loss": 5.3675, "step": 6754 }, { "epoch": 0.22654481428691206, "grad_norm": 0.39988545398979913, "learning_rate": 2e-05, "loss": 5.4607, "step": 6755 }, { "epoch": 0.2265783516391381, "grad_norm": 0.4179177509435044, "learning_rate": 2e-05, "loss": 5.4478, "step": 6756 }, { "epoch": 0.22661188899136414, "grad_norm": 0.4395643774798915, "learning_rate": 2e-05, "loss": 5.4451, "step": 6757 }, { "epoch": 0.22664542634359017, "grad_norm": 0.4300895230811274, "learning_rate": 2e-05, "loss": 5.6193, "step": 6758 }, { "epoch": 0.22667896369581622, "grad_norm": 0.39396735375731107, "learning_rate": 2e-05, "loss": 5.61, "step": 6759 }, { "epoch": 0.22671250104804225, "grad_norm": 0.4039930056211381, "learning_rate": 2e-05, "loss": 5.5165, "step": 6760 }, { "epoch": 0.2267460384002683, "grad_norm": 0.3889381373948161, "learning_rate": 2e-05, "loss": 5.6845, "step": 6761 }, { "epoch": 0.22677957575249433, "grad_norm": 0.395668116997339, "learning_rate": 2e-05, "loss": 5.3418, "step": 6762 }, { "epoch": 0.22681311310472038, "grad_norm": 0.4295568444861363, "learning_rate": 2e-05, "loss": 5.3131, "step": 6763 }, { "epoch": 0.22684665045694644, "grad_norm": 0.38893891121425034, "learning_rate": 2e-05, "loss": 5.536, "step": 6764 }, { "epoch": 0.22688018780917246, "grad_norm": 0.39471417143646537, "learning_rate": 2e-05, "loss": 5.462, "step": 6765 }, { "epoch": 0.22691372516139852, "grad_norm": 0.41128341508389915, "learning_rate": 2e-05, "loss": 5.4113, "step": 6766 }, { "epoch": 0.22694726251362454, "grad_norm": 0.40590586847588483, "learning_rate": 2e-05, "loss": 5.686, "step": 6767 }, { "epoch": 0.2269807998658506, "grad_norm": 0.4090746311439489, "learning_rate": 2e-05, "loss": 5.4645, "step": 6768 }, { "epoch": 0.22701433721807662, "grad_norm": 0.4104304770226716, "learning_rate": 2e-05, "loss": 5.6148, "step": 6769 }, { "epoch": 0.22704787457030268, "grad_norm": 0.42740411608491785, "learning_rate": 2e-05, "loss": 5.3118, "step": 6770 }, { "epoch": 0.2270814119225287, "grad_norm": 0.39432559246532733, "learning_rate": 2e-05, "loss": 5.5549, "step": 6771 }, { "epoch": 0.22711494927475476, "grad_norm": 0.41335644167405755, "learning_rate": 2e-05, "loss": 5.6519, "step": 6772 }, { "epoch": 0.2271484866269808, "grad_norm": 0.42191245673500055, "learning_rate": 2e-05, "loss": 5.6463, "step": 6773 }, { "epoch": 0.22718202397920684, "grad_norm": 0.4331214599472149, "learning_rate": 2e-05, "loss": 5.6152, "step": 6774 }, { "epoch": 0.2272155613314329, "grad_norm": 0.4080121730971167, "learning_rate": 2e-05, "loss": 5.6464, "step": 6775 }, { "epoch": 0.22724909868365892, "grad_norm": 0.398136385163572, "learning_rate": 2e-05, "loss": 5.5119, "step": 6776 }, { "epoch": 0.22728263603588497, "grad_norm": 0.42021023425563986, "learning_rate": 2e-05, "loss": 5.3845, "step": 6777 }, { "epoch": 0.227316173388111, "grad_norm": 0.39559117988249176, "learning_rate": 2e-05, "loss": 5.4301, "step": 6778 }, { "epoch": 0.22734971074033705, "grad_norm": 0.4541069320884796, "learning_rate": 2e-05, "loss": 5.5544, "step": 6779 }, { "epoch": 0.22738324809256308, "grad_norm": 0.40875077650236735, "learning_rate": 2e-05, "loss": 5.5376, "step": 6780 }, { "epoch": 0.22741678544478913, "grad_norm": 0.46203600537979067, "learning_rate": 2e-05, "loss": 5.8291, "step": 6781 }, { "epoch": 0.2274503227970152, "grad_norm": 0.45382842972196713, "learning_rate": 2e-05, "loss": 5.5939, "step": 6782 }, { "epoch": 0.2274838601492412, "grad_norm": 0.4643199222604237, "learning_rate": 2e-05, "loss": 5.5395, "step": 6783 }, { "epoch": 0.22751739750146727, "grad_norm": 0.5162116827625535, "learning_rate": 2e-05, "loss": 5.4825, "step": 6784 }, { "epoch": 0.2275509348536933, "grad_norm": 0.44666745506039984, "learning_rate": 2e-05, "loss": 5.3355, "step": 6785 }, { "epoch": 0.22758447220591935, "grad_norm": 0.4187536078250517, "learning_rate": 2e-05, "loss": 5.4703, "step": 6786 }, { "epoch": 0.22761800955814537, "grad_norm": 0.4179653263005616, "learning_rate": 2e-05, "loss": 5.3818, "step": 6787 }, { "epoch": 0.22765154691037143, "grad_norm": 0.4778349989821761, "learning_rate": 2e-05, "loss": 5.4299, "step": 6788 }, { "epoch": 0.22768508426259748, "grad_norm": 0.40498536716416844, "learning_rate": 2e-05, "loss": 5.5296, "step": 6789 }, { "epoch": 0.2277186216148235, "grad_norm": 0.43191363188418097, "learning_rate": 2e-05, "loss": 5.3329, "step": 6790 }, { "epoch": 0.22775215896704956, "grad_norm": 0.4190691364494727, "learning_rate": 2e-05, "loss": 5.6497, "step": 6791 }, { "epoch": 0.2277856963192756, "grad_norm": 0.4231986252378018, "learning_rate": 2e-05, "loss": 5.7366, "step": 6792 }, { "epoch": 0.22781923367150164, "grad_norm": 0.3962153951113457, "learning_rate": 2e-05, "loss": 5.5454, "step": 6793 }, { "epoch": 0.22785277102372767, "grad_norm": 0.43919665548940107, "learning_rate": 2e-05, "loss": 5.6334, "step": 6794 }, { "epoch": 0.22788630837595372, "grad_norm": 0.4450402940372412, "learning_rate": 2e-05, "loss": 5.4881, "step": 6795 }, { "epoch": 0.22791984572817975, "grad_norm": 0.4237037090688943, "learning_rate": 2e-05, "loss": 5.5233, "step": 6796 }, { "epoch": 0.2279533830804058, "grad_norm": 0.42520049963138445, "learning_rate": 2e-05, "loss": 5.534, "step": 6797 }, { "epoch": 0.22798692043263186, "grad_norm": 0.44632617781239875, "learning_rate": 2e-05, "loss": 5.5485, "step": 6798 }, { "epoch": 0.22802045778485788, "grad_norm": 0.44580169632940553, "learning_rate": 2e-05, "loss": 5.5775, "step": 6799 }, { "epoch": 0.22805399513708394, "grad_norm": 0.4079594919252126, "learning_rate": 2e-05, "loss": 5.5042, "step": 6800 }, { "epoch": 0.22808753248930996, "grad_norm": 0.45064959055825077, "learning_rate": 2e-05, "loss": 5.6321, "step": 6801 }, { "epoch": 0.22812106984153602, "grad_norm": 0.4554171709125283, "learning_rate": 2e-05, "loss": 5.5785, "step": 6802 }, { "epoch": 0.22815460719376204, "grad_norm": 0.38201169080498926, "learning_rate": 2e-05, "loss": 5.5974, "step": 6803 }, { "epoch": 0.2281881445459881, "grad_norm": 0.4347368255103503, "learning_rate": 2e-05, "loss": 5.4459, "step": 6804 }, { "epoch": 0.22822168189821412, "grad_norm": 0.4413898581977395, "learning_rate": 2e-05, "loss": 5.5521, "step": 6805 }, { "epoch": 0.22825521925044018, "grad_norm": 0.420266910510359, "learning_rate": 2e-05, "loss": 5.5295, "step": 6806 }, { "epoch": 0.22828875660266623, "grad_norm": 0.4051279543062526, "learning_rate": 2e-05, "loss": 5.4384, "step": 6807 }, { "epoch": 0.22832229395489226, "grad_norm": 0.4244543721638881, "learning_rate": 2e-05, "loss": 5.534, "step": 6808 }, { "epoch": 0.2283558313071183, "grad_norm": 0.4730874116624465, "learning_rate": 2e-05, "loss": 5.6204, "step": 6809 }, { "epoch": 0.22838936865934434, "grad_norm": 0.438980325542667, "learning_rate": 2e-05, "loss": 5.4466, "step": 6810 }, { "epoch": 0.2284229060115704, "grad_norm": 0.45957874505978935, "learning_rate": 2e-05, "loss": 5.5857, "step": 6811 }, { "epoch": 0.22845644336379642, "grad_norm": 0.4323641920744367, "learning_rate": 2e-05, "loss": 5.4194, "step": 6812 }, { "epoch": 0.22848998071602247, "grad_norm": 0.45958285315350705, "learning_rate": 2e-05, "loss": 5.5343, "step": 6813 }, { "epoch": 0.2285235180682485, "grad_norm": 0.44802976934546224, "learning_rate": 2e-05, "loss": 5.4163, "step": 6814 }, { "epoch": 0.22855705542047455, "grad_norm": 0.45728271800290016, "learning_rate": 2e-05, "loss": 5.5248, "step": 6815 }, { "epoch": 0.2285905927727006, "grad_norm": 0.4625921511749474, "learning_rate": 2e-05, "loss": 5.4743, "step": 6816 }, { "epoch": 0.22862413012492663, "grad_norm": 0.4227637891153538, "learning_rate": 2e-05, "loss": 5.3249, "step": 6817 }, { "epoch": 0.2286576674771527, "grad_norm": 0.5000487317696993, "learning_rate": 2e-05, "loss": 5.7045, "step": 6818 }, { "epoch": 0.2286912048293787, "grad_norm": 0.41972577627919483, "learning_rate": 2e-05, "loss": 5.5783, "step": 6819 }, { "epoch": 0.22872474218160477, "grad_norm": 0.42633037317690287, "learning_rate": 2e-05, "loss": 5.5428, "step": 6820 }, { "epoch": 0.2287582795338308, "grad_norm": 0.45903650535282914, "learning_rate": 2e-05, "loss": 5.7425, "step": 6821 }, { "epoch": 0.22879181688605685, "grad_norm": 0.42452932845870384, "learning_rate": 2e-05, "loss": 5.5352, "step": 6822 }, { "epoch": 0.22882535423828287, "grad_norm": 0.4035019025275022, "learning_rate": 2e-05, "loss": 5.6856, "step": 6823 }, { "epoch": 0.22885889159050893, "grad_norm": 0.4406425245574789, "learning_rate": 2e-05, "loss": 5.4653, "step": 6824 }, { "epoch": 0.22889242894273498, "grad_norm": 0.44830005165672343, "learning_rate": 2e-05, "loss": 5.4062, "step": 6825 }, { "epoch": 0.228925966294961, "grad_norm": 0.39946193505573496, "learning_rate": 2e-05, "loss": 5.4145, "step": 6826 }, { "epoch": 0.22895950364718706, "grad_norm": 0.43690476061678324, "learning_rate": 2e-05, "loss": 5.5044, "step": 6827 }, { "epoch": 0.2289930409994131, "grad_norm": 0.41460083130602243, "learning_rate": 2e-05, "loss": 5.4956, "step": 6828 }, { "epoch": 0.22902657835163914, "grad_norm": 0.3889238491638938, "learning_rate": 2e-05, "loss": 5.7571, "step": 6829 }, { "epoch": 0.22906011570386517, "grad_norm": 0.4027294025170611, "learning_rate": 2e-05, "loss": 5.5621, "step": 6830 }, { "epoch": 0.22909365305609122, "grad_norm": 0.4125935390906627, "learning_rate": 2e-05, "loss": 5.493, "step": 6831 }, { "epoch": 0.22912719040831725, "grad_norm": 0.4503288486811216, "learning_rate": 2e-05, "loss": 5.4724, "step": 6832 }, { "epoch": 0.2291607277605433, "grad_norm": 0.3884697848830615, "learning_rate": 2e-05, "loss": 5.6344, "step": 6833 }, { "epoch": 0.22919426511276936, "grad_norm": 0.4162213327453295, "learning_rate": 2e-05, "loss": 5.5164, "step": 6834 }, { "epoch": 0.22922780246499538, "grad_norm": 0.4356057170136128, "learning_rate": 2e-05, "loss": 5.4379, "step": 6835 }, { "epoch": 0.22926133981722144, "grad_norm": 0.40341508273686877, "learning_rate": 2e-05, "loss": 5.6129, "step": 6836 }, { "epoch": 0.22929487716944746, "grad_norm": 0.44124257770536224, "learning_rate": 2e-05, "loss": 5.6497, "step": 6837 }, { "epoch": 0.22932841452167352, "grad_norm": 0.44400600316695243, "learning_rate": 2e-05, "loss": 5.5715, "step": 6838 }, { "epoch": 0.22936195187389954, "grad_norm": 0.3953873897890544, "learning_rate": 2e-05, "loss": 5.6338, "step": 6839 }, { "epoch": 0.2293954892261256, "grad_norm": 0.460191710595598, "learning_rate": 2e-05, "loss": 5.7737, "step": 6840 }, { "epoch": 0.22942902657835165, "grad_norm": 0.4241180341342458, "learning_rate": 2e-05, "loss": 5.5137, "step": 6841 }, { "epoch": 0.22946256393057768, "grad_norm": 0.4088974607387766, "learning_rate": 2e-05, "loss": 5.6518, "step": 6842 }, { "epoch": 0.22949610128280373, "grad_norm": 0.4143740459517002, "learning_rate": 2e-05, "loss": 5.4961, "step": 6843 }, { "epoch": 0.22952963863502976, "grad_norm": 0.39920954031754585, "learning_rate": 2e-05, "loss": 5.6983, "step": 6844 }, { "epoch": 0.2295631759872558, "grad_norm": 0.44384674281256153, "learning_rate": 2e-05, "loss": 5.434, "step": 6845 }, { "epoch": 0.22959671333948184, "grad_norm": 0.44213715745415283, "learning_rate": 2e-05, "loss": 5.7452, "step": 6846 }, { "epoch": 0.2296302506917079, "grad_norm": 0.39537271990679934, "learning_rate": 2e-05, "loss": 5.4705, "step": 6847 }, { "epoch": 0.22966378804393392, "grad_norm": 0.39843111998661157, "learning_rate": 2e-05, "loss": 5.6335, "step": 6848 }, { "epoch": 0.22969732539615997, "grad_norm": 0.44803787750725266, "learning_rate": 2e-05, "loss": 5.6243, "step": 6849 }, { "epoch": 0.22973086274838603, "grad_norm": 0.3883008783872166, "learning_rate": 2e-05, "loss": 5.5559, "step": 6850 }, { "epoch": 0.22976440010061205, "grad_norm": 0.4072038132813442, "learning_rate": 2e-05, "loss": 5.4473, "step": 6851 }, { "epoch": 0.2297979374528381, "grad_norm": 0.39120433901108265, "learning_rate": 2e-05, "loss": 5.569, "step": 6852 }, { "epoch": 0.22983147480506413, "grad_norm": 0.41847966590124774, "learning_rate": 2e-05, "loss": 5.5374, "step": 6853 }, { "epoch": 0.2298650121572902, "grad_norm": 0.44256819043576073, "learning_rate": 2e-05, "loss": 5.5904, "step": 6854 }, { "epoch": 0.22989854950951621, "grad_norm": 0.4113540745821266, "learning_rate": 2e-05, "loss": 5.5917, "step": 6855 }, { "epoch": 0.22993208686174227, "grad_norm": 0.41449535576014873, "learning_rate": 2e-05, "loss": 5.6943, "step": 6856 }, { "epoch": 0.2299656242139683, "grad_norm": 0.41866286103052686, "learning_rate": 2e-05, "loss": 5.637, "step": 6857 }, { "epoch": 0.22999916156619435, "grad_norm": 0.414388383065438, "learning_rate": 2e-05, "loss": 5.4592, "step": 6858 }, { "epoch": 0.2300326989184204, "grad_norm": 0.4394922404888926, "learning_rate": 2e-05, "loss": 5.5437, "step": 6859 }, { "epoch": 0.23006623627064643, "grad_norm": 0.40355624761705494, "learning_rate": 2e-05, "loss": 5.5375, "step": 6860 }, { "epoch": 0.23009977362287248, "grad_norm": 0.4014792159357148, "learning_rate": 2e-05, "loss": 5.513, "step": 6861 }, { "epoch": 0.2301333109750985, "grad_norm": 0.41970619422337285, "learning_rate": 2e-05, "loss": 5.3745, "step": 6862 }, { "epoch": 0.23016684832732456, "grad_norm": 0.40571520576163583, "learning_rate": 2e-05, "loss": 5.641, "step": 6863 }, { "epoch": 0.2302003856795506, "grad_norm": 0.41400636934125323, "learning_rate": 2e-05, "loss": 5.6098, "step": 6864 }, { "epoch": 0.23023392303177664, "grad_norm": 0.3912000421627172, "learning_rate": 2e-05, "loss": 5.6012, "step": 6865 }, { "epoch": 0.23026746038400267, "grad_norm": 0.4192668766522363, "learning_rate": 2e-05, "loss": 5.4954, "step": 6866 }, { "epoch": 0.23030099773622872, "grad_norm": 0.4229836954140482, "learning_rate": 2e-05, "loss": 5.3384, "step": 6867 }, { "epoch": 0.23033453508845478, "grad_norm": 0.4378953905513217, "learning_rate": 2e-05, "loss": 5.4501, "step": 6868 }, { "epoch": 0.2303680724406808, "grad_norm": 0.42318655358320995, "learning_rate": 2e-05, "loss": 5.5477, "step": 6869 }, { "epoch": 0.23040160979290686, "grad_norm": 0.4076440903629149, "learning_rate": 2e-05, "loss": 5.5161, "step": 6870 }, { "epoch": 0.23043514714513288, "grad_norm": 0.4381901172284208, "learning_rate": 2e-05, "loss": 5.534, "step": 6871 }, { "epoch": 0.23046868449735894, "grad_norm": 0.43571364983862015, "learning_rate": 2e-05, "loss": 5.557, "step": 6872 }, { "epoch": 0.23050222184958497, "grad_norm": 0.38635163974392395, "learning_rate": 2e-05, "loss": 5.5236, "step": 6873 }, { "epoch": 0.23053575920181102, "grad_norm": 0.3991202853155787, "learning_rate": 2e-05, "loss": 5.2924, "step": 6874 }, { "epoch": 0.23056929655403705, "grad_norm": 0.4785478412100158, "learning_rate": 2e-05, "loss": 5.5716, "step": 6875 }, { "epoch": 0.2306028339062631, "grad_norm": 0.43477605609506564, "learning_rate": 2e-05, "loss": 5.4376, "step": 6876 }, { "epoch": 0.23063637125848915, "grad_norm": 0.4251203964121384, "learning_rate": 2e-05, "loss": 5.6332, "step": 6877 }, { "epoch": 0.23066990861071518, "grad_norm": 0.40700832971745, "learning_rate": 2e-05, "loss": 5.4663, "step": 6878 }, { "epoch": 0.23070344596294123, "grad_norm": 0.4694987139540581, "learning_rate": 2e-05, "loss": 5.6237, "step": 6879 }, { "epoch": 0.23073698331516726, "grad_norm": 0.4076279143157602, "learning_rate": 2e-05, "loss": 5.5881, "step": 6880 }, { "epoch": 0.23077052066739331, "grad_norm": 0.41418807157734566, "learning_rate": 2e-05, "loss": 5.5807, "step": 6881 }, { "epoch": 0.23080405801961934, "grad_norm": 0.40580467161622197, "learning_rate": 2e-05, "loss": 5.8054, "step": 6882 }, { "epoch": 0.2308375953718454, "grad_norm": 0.4561672468367136, "learning_rate": 2e-05, "loss": 5.396, "step": 6883 }, { "epoch": 0.23087113272407145, "grad_norm": 0.4368045666431574, "learning_rate": 2e-05, "loss": 5.5496, "step": 6884 }, { "epoch": 0.23090467007629747, "grad_norm": 0.39718007241005254, "learning_rate": 2e-05, "loss": 5.5427, "step": 6885 }, { "epoch": 0.23093820742852353, "grad_norm": 0.42854319045991296, "learning_rate": 2e-05, "loss": 5.584, "step": 6886 }, { "epoch": 0.23097174478074955, "grad_norm": 0.4310638000718454, "learning_rate": 2e-05, "loss": 5.5145, "step": 6887 }, { "epoch": 0.2310052821329756, "grad_norm": 0.41379146838002767, "learning_rate": 2e-05, "loss": 5.6281, "step": 6888 }, { "epoch": 0.23103881948520164, "grad_norm": 0.4013419984899383, "learning_rate": 2e-05, "loss": 5.4931, "step": 6889 }, { "epoch": 0.2310723568374277, "grad_norm": 0.41400769462434595, "learning_rate": 2e-05, "loss": 5.7054, "step": 6890 }, { "epoch": 0.23110589418965372, "grad_norm": 0.43014162091092034, "learning_rate": 2e-05, "loss": 5.5231, "step": 6891 }, { "epoch": 0.23113943154187977, "grad_norm": 0.43736424143914165, "learning_rate": 2e-05, "loss": 5.4187, "step": 6892 }, { "epoch": 0.23117296889410582, "grad_norm": 0.4057883286225602, "learning_rate": 2e-05, "loss": 5.5783, "step": 6893 }, { "epoch": 0.23120650624633185, "grad_norm": 0.4503902104735009, "learning_rate": 2e-05, "loss": 5.5026, "step": 6894 }, { "epoch": 0.2312400435985579, "grad_norm": 0.3992440766287794, "learning_rate": 2e-05, "loss": 5.6926, "step": 6895 }, { "epoch": 0.23127358095078393, "grad_norm": 0.4585260136587066, "learning_rate": 2e-05, "loss": 5.3384, "step": 6896 }, { "epoch": 0.23130711830300998, "grad_norm": 0.40465506858225125, "learning_rate": 2e-05, "loss": 5.6044, "step": 6897 }, { "epoch": 0.231340655655236, "grad_norm": 0.4041147381578955, "learning_rate": 2e-05, "loss": 5.6728, "step": 6898 }, { "epoch": 0.23137419300746206, "grad_norm": 0.43845722616107446, "learning_rate": 2e-05, "loss": 5.6593, "step": 6899 }, { "epoch": 0.2314077303596881, "grad_norm": 0.45779133609028355, "learning_rate": 2e-05, "loss": 5.4289, "step": 6900 }, { "epoch": 0.23144126771191414, "grad_norm": 0.4381500897291909, "learning_rate": 2e-05, "loss": 5.4488, "step": 6901 }, { "epoch": 0.2314748050641402, "grad_norm": 0.39426065652391223, "learning_rate": 2e-05, "loss": 5.3751, "step": 6902 }, { "epoch": 0.23150834241636623, "grad_norm": 0.44751664208534925, "learning_rate": 2e-05, "loss": 5.4902, "step": 6903 }, { "epoch": 0.23154187976859228, "grad_norm": 0.39483813296454306, "learning_rate": 2e-05, "loss": 5.5029, "step": 6904 }, { "epoch": 0.2315754171208183, "grad_norm": 0.3958067445948934, "learning_rate": 2e-05, "loss": 5.7984, "step": 6905 }, { "epoch": 0.23160895447304436, "grad_norm": 0.4091534215872691, "learning_rate": 2e-05, "loss": 5.521, "step": 6906 }, { "epoch": 0.23164249182527039, "grad_norm": 0.43118242844325916, "learning_rate": 2e-05, "loss": 5.4737, "step": 6907 }, { "epoch": 0.23167602917749644, "grad_norm": 0.45838456090395063, "learning_rate": 2e-05, "loss": 5.6838, "step": 6908 }, { "epoch": 0.23170956652972247, "grad_norm": 0.42770310910443904, "learning_rate": 2e-05, "loss": 5.4195, "step": 6909 }, { "epoch": 0.23174310388194852, "grad_norm": 0.42039856841694534, "learning_rate": 2e-05, "loss": 5.4984, "step": 6910 }, { "epoch": 0.23177664123417457, "grad_norm": 0.40102485427735435, "learning_rate": 2e-05, "loss": 5.4532, "step": 6911 }, { "epoch": 0.2318101785864006, "grad_norm": 0.447606682699034, "learning_rate": 2e-05, "loss": 5.6464, "step": 6912 }, { "epoch": 0.23184371593862665, "grad_norm": 0.4529441075469899, "learning_rate": 2e-05, "loss": 5.5795, "step": 6913 }, { "epoch": 0.23187725329085268, "grad_norm": 0.4234470141842503, "learning_rate": 2e-05, "loss": 5.5411, "step": 6914 }, { "epoch": 0.23191079064307873, "grad_norm": 0.429399135096955, "learning_rate": 2e-05, "loss": 5.6074, "step": 6915 }, { "epoch": 0.23194432799530476, "grad_norm": 0.41554009757618854, "learning_rate": 2e-05, "loss": 5.5933, "step": 6916 }, { "epoch": 0.23197786534753081, "grad_norm": 0.4181000967779368, "learning_rate": 2e-05, "loss": 5.3947, "step": 6917 }, { "epoch": 0.23201140269975684, "grad_norm": 0.4560785590768731, "learning_rate": 2e-05, "loss": 5.5627, "step": 6918 }, { "epoch": 0.2320449400519829, "grad_norm": 0.4084318231776419, "learning_rate": 2e-05, "loss": 5.669, "step": 6919 }, { "epoch": 0.23207847740420895, "grad_norm": 0.43214307891216214, "learning_rate": 2e-05, "loss": 5.6462, "step": 6920 }, { "epoch": 0.23211201475643498, "grad_norm": 0.45611488667282923, "learning_rate": 2e-05, "loss": 5.4962, "step": 6921 }, { "epoch": 0.23214555210866103, "grad_norm": 0.3914398494554131, "learning_rate": 2e-05, "loss": 5.7422, "step": 6922 }, { "epoch": 0.23217908946088706, "grad_norm": 0.3838308878909644, "learning_rate": 2e-05, "loss": 5.3644, "step": 6923 }, { "epoch": 0.2322126268131131, "grad_norm": 0.42200826223540433, "learning_rate": 2e-05, "loss": 5.5412, "step": 6924 }, { "epoch": 0.23224616416533914, "grad_norm": 0.4389313875922686, "learning_rate": 2e-05, "loss": 5.6129, "step": 6925 }, { "epoch": 0.2322797015175652, "grad_norm": 0.4037689581192651, "learning_rate": 2e-05, "loss": 5.591, "step": 6926 }, { "epoch": 0.23231323886979122, "grad_norm": 0.42323110187699575, "learning_rate": 2e-05, "loss": 5.4378, "step": 6927 }, { "epoch": 0.23234677622201727, "grad_norm": 0.39371140263297894, "learning_rate": 2e-05, "loss": 5.4107, "step": 6928 }, { "epoch": 0.23238031357424332, "grad_norm": 0.39059991271896094, "learning_rate": 2e-05, "loss": 5.3844, "step": 6929 }, { "epoch": 0.23241385092646935, "grad_norm": 0.44051317633839215, "learning_rate": 2e-05, "loss": 5.5635, "step": 6930 }, { "epoch": 0.2324473882786954, "grad_norm": 0.48825469240651465, "learning_rate": 2e-05, "loss": 5.3949, "step": 6931 }, { "epoch": 0.23248092563092143, "grad_norm": 0.3968021458059623, "learning_rate": 2e-05, "loss": 5.3272, "step": 6932 }, { "epoch": 0.23251446298314749, "grad_norm": 0.4069339457664754, "learning_rate": 2e-05, "loss": 5.5034, "step": 6933 }, { "epoch": 0.2325480003353735, "grad_norm": 0.4526057890572705, "learning_rate": 2e-05, "loss": 5.5264, "step": 6934 }, { "epoch": 0.23258153768759957, "grad_norm": 0.39415350918315634, "learning_rate": 2e-05, "loss": 5.4484, "step": 6935 }, { "epoch": 0.23261507503982562, "grad_norm": 0.38639775566134615, "learning_rate": 2e-05, "loss": 5.6682, "step": 6936 }, { "epoch": 0.23264861239205165, "grad_norm": 0.4049079705527422, "learning_rate": 2e-05, "loss": 5.5736, "step": 6937 }, { "epoch": 0.2326821497442777, "grad_norm": 0.41658376091083565, "learning_rate": 2e-05, "loss": 5.6254, "step": 6938 }, { "epoch": 0.23271568709650373, "grad_norm": 0.379879453023097, "learning_rate": 2e-05, "loss": 5.4026, "step": 6939 }, { "epoch": 0.23274922444872978, "grad_norm": 0.40059100107355905, "learning_rate": 2e-05, "loss": 5.6156, "step": 6940 }, { "epoch": 0.2327827618009558, "grad_norm": 0.41934891642392746, "learning_rate": 2e-05, "loss": 5.7231, "step": 6941 }, { "epoch": 0.23281629915318186, "grad_norm": 0.4252509624335183, "learning_rate": 2e-05, "loss": 5.4746, "step": 6942 }, { "epoch": 0.2328498365054079, "grad_norm": 0.4226984184219273, "learning_rate": 2e-05, "loss": 5.5605, "step": 6943 }, { "epoch": 0.23288337385763394, "grad_norm": 0.427404500607368, "learning_rate": 2e-05, "loss": 5.5152, "step": 6944 }, { "epoch": 0.23291691120986, "grad_norm": 0.41561848953319824, "learning_rate": 2e-05, "loss": 5.4876, "step": 6945 }, { "epoch": 0.23295044856208602, "grad_norm": 0.41556797241887283, "learning_rate": 2e-05, "loss": 5.6592, "step": 6946 }, { "epoch": 0.23298398591431208, "grad_norm": 0.46832680355331796, "learning_rate": 2e-05, "loss": 5.7133, "step": 6947 }, { "epoch": 0.2330175232665381, "grad_norm": 0.43797595739754946, "learning_rate": 2e-05, "loss": 5.5882, "step": 6948 }, { "epoch": 0.23305106061876416, "grad_norm": 0.43612632630329, "learning_rate": 2e-05, "loss": 5.3597, "step": 6949 }, { "epoch": 0.23308459797099018, "grad_norm": 0.41767966352445507, "learning_rate": 2e-05, "loss": 5.5406, "step": 6950 }, { "epoch": 0.23311813532321624, "grad_norm": 0.4121837449711073, "learning_rate": 2e-05, "loss": 5.4861, "step": 6951 }, { "epoch": 0.23315167267544226, "grad_norm": 0.4105407232426818, "learning_rate": 2e-05, "loss": 5.58, "step": 6952 }, { "epoch": 0.23318521002766832, "grad_norm": 0.41348201677944413, "learning_rate": 2e-05, "loss": 5.5727, "step": 6953 }, { "epoch": 0.23321874737989437, "grad_norm": 0.3866031617543446, "learning_rate": 2e-05, "loss": 5.6684, "step": 6954 }, { "epoch": 0.2332522847321204, "grad_norm": 0.3959668210388458, "learning_rate": 2e-05, "loss": 5.6234, "step": 6955 }, { "epoch": 0.23328582208434645, "grad_norm": 0.4311232766219035, "learning_rate": 2e-05, "loss": 5.4447, "step": 6956 }, { "epoch": 0.23331935943657248, "grad_norm": 0.38578490054955683, "learning_rate": 2e-05, "loss": 5.3768, "step": 6957 }, { "epoch": 0.23335289678879853, "grad_norm": 0.4075606769814694, "learning_rate": 2e-05, "loss": 5.7153, "step": 6958 }, { "epoch": 0.23338643414102456, "grad_norm": 0.3979409934890077, "learning_rate": 2e-05, "loss": 5.6288, "step": 6959 }, { "epoch": 0.2334199714932506, "grad_norm": 0.38485637703512465, "learning_rate": 2e-05, "loss": 5.4118, "step": 6960 }, { "epoch": 0.23345350884547664, "grad_norm": 0.4142120175458311, "learning_rate": 2e-05, "loss": 5.4624, "step": 6961 }, { "epoch": 0.2334870461977027, "grad_norm": 0.39643348219794566, "learning_rate": 2e-05, "loss": 5.4927, "step": 6962 }, { "epoch": 0.23352058354992875, "grad_norm": 0.40185184485379305, "learning_rate": 2e-05, "loss": 5.3159, "step": 6963 }, { "epoch": 0.23355412090215477, "grad_norm": 0.4225875016901845, "learning_rate": 2e-05, "loss": 5.5421, "step": 6964 }, { "epoch": 0.23358765825438083, "grad_norm": 0.3933150181515341, "learning_rate": 2e-05, "loss": 5.5077, "step": 6965 }, { "epoch": 0.23362119560660685, "grad_norm": 0.4083445675239589, "learning_rate": 2e-05, "loss": 5.4453, "step": 6966 }, { "epoch": 0.2336547329588329, "grad_norm": 0.4149972981126468, "learning_rate": 2e-05, "loss": 5.6718, "step": 6967 }, { "epoch": 0.23368827031105893, "grad_norm": 0.409882661947719, "learning_rate": 2e-05, "loss": 5.5785, "step": 6968 }, { "epoch": 0.233721807663285, "grad_norm": 0.40621911170969277, "learning_rate": 2e-05, "loss": 5.4531, "step": 6969 }, { "epoch": 0.233755345015511, "grad_norm": 0.4141824197097097, "learning_rate": 2e-05, "loss": 5.6303, "step": 6970 }, { "epoch": 0.23378888236773707, "grad_norm": 0.4219458532316901, "learning_rate": 2e-05, "loss": 5.5424, "step": 6971 }, { "epoch": 0.23382241971996312, "grad_norm": 0.3993370738185198, "learning_rate": 2e-05, "loss": 5.4265, "step": 6972 }, { "epoch": 0.23385595707218915, "grad_norm": 0.38475190537587, "learning_rate": 2e-05, "loss": 5.6814, "step": 6973 }, { "epoch": 0.2338894944244152, "grad_norm": 0.4084135379086657, "learning_rate": 2e-05, "loss": 5.3334, "step": 6974 }, { "epoch": 0.23392303177664123, "grad_norm": 0.4072247378398912, "learning_rate": 2e-05, "loss": 5.3693, "step": 6975 }, { "epoch": 0.23395656912886728, "grad_norm": 0.44504522736458996, "learning_rate": 2e-05, "loss": 5.5271, "step": 6976 }, { "epoch": 0.2339901064810933, "grad_norm": 0.409296027002698, "learning_rate": 2e-05, "loss": 5.4143, "step": 6977 }, { "epoch": 0.23402364383331936, "grad_norm": 0.408550005918098, "learning_rate": 2e-05, "loss": 5.4481, "step": 6978 }, { "epoch": 0.2340571811855454, "grad_norm": 0.4282675043204197, "learning_rate": 2e-05, "loss": 5.6435, "step": 6979 }, { "epoch": 0.23409071853777144, "grad_norm": 0.387858718133038, "learning_rate": 2e-05, "loss": 5.4509, "step": 6980 }, { "epoch": 0.2341242558899975, "grad_norm": 0.4126925970885519, "learning_rate": 2e-05, "loss": 5.5191, "step": 6981 }, { "epoch": 0.23415779324222352, "grad_norm": 0.4244402977749779, "learning_rate": 2e-05, "loss": 5.7881, "step": 6982 }, { "epoch": 0.23419133059444958, "grad_norm": 0.4033670301885627, "learning_rate": 2e-05, "loss": 5.4889, "step": 6983 }, { "epoch": 0.2342248679466756, "grad_norm": 0.42510673705396707, "learning_rate": 2e-05, "loss": 5.4017, "step": 6984 }, { "epoch": 0.23425840529890166, "grad_norm": 0.42830290611653343, "learning_rate": 2e-05, "loss": 5.5138, "step": 6985 }, { "epoch": 0.23429194265112768, "grad_norm": 0.42747139228475584, "learning_rate": 2e-05, "loss": 5.4176, "step": 6986 }, { "epoch": 0.23432548000335374, "grad_norm": 0.40825222609469675, "learning_rate": 2e-05, "loss": 5.5775, "step": 6987 }, { "epoch": 0.2343590173555798, "grad_norm": 0.4310049576255509, "learning_rate": 2e-05, "loss": 5.3999, "step": 6988 }, { "epoch": 0.23439255470780582, "grad_norm": 0.45197076801205094, "learning_rate": 2e-05, "loss": 5.91, "step": 6989 }, { "epoch": 0.23442609206003187, "grad_norm": 0.38967897740632174, "learning_rate": 2e-05, "loss": 5.3557, "step": 6990 }, { "epoch": 0.2344596294122579, "grad_norm": 0.43213017936865106, "learning_rate": 2e-05, "loss": 5.6121, "step": 6991 }, { "epoch": 0.23449316676448395, "grad_norm": 0.4902220028828586, "learning_rate": 2e-05, "loss": 5.3922, "step": 6992 }, { "epoch": 0.23452670411670998, "grad_norm": 0.42624920780791226, "learning_rate": 2e-05, "loss": 5.6205, "step": 6993 }, { "epoch": 0.23456024146893603, "grad_norm": 0.4281652764011821, "learning_rate": 2e-05, "loss": 5.7445, "step": 6994 }, { "epoch": 0.23459377882116206, "grad_norm": 0.46461187799535747, "learning_rate": 2e-05, "loss": 5.4659, "step": 6995 }, { "epoch": 0.2346273161733881, "grad_norm": 0.46988520443959103, "learning_rate": 2e-05, "loss": 5.5673, "step": 6996 }, { "epoch": 0.23466085352561417, "grad_norm": 0.4037370480088678, "learning_rate": 2e-05, "loss": 5.4709, "step": 6997 }, { "epoch": 0.2346943908778402, "grad_norm": 0.41913355952967357, "learning_rate": 2e-05, "loss": 5.5835, "step": 6998 }, { "epoch": 0.23472792823006625, "grad_norm": 0.4271883770029438, "learning_rate": 2e-05, "loss": 5.4561, "step": 6999 }, { "epoch": 0.23476146558229227, "grad_norm": 0.3992883496411983, "learning_rate": 2e-05, "loss": 5.5122, "step": 7000 }, { "epoch": 0.23479500293451833, "grad_norm": 0.43740258435034335, "learning_rate": 2e-05, "loss": 5.3936, "step": 7001 }, { "epoch": 0.23482854028674435, "grad_norm": 0.4438287599399057, "learning_rate": 2e-05, "loss": 5.5829, "step": 7002 }, { "epoch": 0.2348620776389704, "grad_norm": 0.42744369451802555, "learning_rate": 2e-05, "loss": 5.7291, "step": 7003 }, { "epoch": 0.23489561499119643, "grad_norm": 0.443185656198512, "learning_rate": 2e-05, "loss": 5.6426, "step": 7004 }, { "epoch": 0.2349291523434225, "grad_norm": 0.437657240932892, "learning_rate": 2e-05, "loss": 5.4534, "step": 7005 }, { "epoch": 0.23496268969564854, "grad_norm": 0.45874722708380977, "learning_rate": 2e-05, "loss": 5.518, "step": 7006 }, { "epoch": 0.23499622704787457, "grad_norm": 0.43453586013853607, "learning_rate": 2e-05, "loss": 5.6889, "step": 7007 }, { "epoch": 0.23502976440010062, "grad_norm": 0.41555334283125034, "learning_rate": 2e-05, "loss": 5.5653, "step": 7008 }, { "epoch": 0.23506330175232665, "grad_norm": 0.4078561841799919, "learning_rate": 2e-05, "loss": 5.6222, "step": 7009 }, { "epoch": 0.2350968391045527, "grad_norm": 0.42208671226645766, "learning_rate": 2e-05, "loss": 5.585, "step": 7010 }, { "epoch": 0.23513037645677873, "grad_norm": 0.4488706249333662, "learning_rate": 2e-05, "loss": 5.4471, "step": 7011 }, { "epoch": 0.23516391380900478, "grad_norm": 0.44016356492539493, "learning_rate": 2e-05, "loss": 5.7481, "step": 7012 }, { "epoch": 0.2351974511612308, "grad_norm": 0.45205796899668554, "learning_rate": 2e-05, "loss": 5.5065, "step": 7013 }, { "epoch": 0.23523098851345686, "grad_norm": 0.44526970475535593, "learning_rate": 2e-05, "loss": 5.8182, "step": 7014 }, { "epoch": 0.23526452586568292, "grad_norm": 0.4227419160755182, "learning_rate": 2e-05, "loss": 5.543, "step": 7015 }, { "epoch": 0.23529806321790894, "grad_norm": 0.4517007152640535, "learning_rate": 2e-05, "loss": 5.7338, "step": 7016 }, { "epoch": 0.235331600570135, "grad_norm": 0.442236940731969, "learning_rate": 2e-05, "loss": 5.5879, "step": 7017 }, { "epoch": 0.23536513792236102, "grad_norm": 0.4071887707903729, "learning_rate": 2e-05, "loss": 5.4549, "step": 7018 }, { "epoch": 0.23539867527458708, "grad_norm": 0.40433585627459895, "learning_rate": 2e-05, "loss": 5.5801, "step": 7019 }, { "epoch": 0.2354322126268131, "grad_norm": 0.43356125299461384, "learning_rate": 2e-05, "loss": 5.6254, "step": 7020 }, { "epoch": 0.23546574997903916, "grad_norm": 0.3887105818134577, "learning_rate": 2e-05, "loss": 5.3858, "step": 7021 }, { "epoch": 0.23549928733126518, "grad_norm": 0.39772787756945027, "learning_rate": 2e-05, "loss": 5.5725, "step": 7022 }, { "epoch": 0.23553282468349124, "grad_norm": 0.40636179116079313, "learning_rate": 2e-05, "loss": 5.5164, "step": 7023 }, { "epoch": 0.2355663620357173, "grad_norm": 0.388924401587917, "learning_rate": 2e-05, "loss": 5.4239, "step": 7024 }, { "epoch": 0.23559989938794332, "grad_norm": 0.40624908036495827, "learning_rate": 2e-05, "loss": 5.4119, "step": 7025 }, { "epoch": 0.23563343674016937, "grad_norm": 0.4090937133664446, "learning_rate": 2e-05, "loss": 5.5381, "step": 7026 }, { "epoch": 0.2356669740923954, "grad_norm": 0.41817918927687475, "learning_rate": 2e-05, "loss": 5.6637, "step": 7027 }, { "epoch": 0.23570051144462145, "grad_norm": 0.40668658363956617, "learning_rate": 2e-05, "loss": 5.5501, "step": 7028 }, { "epoch": 0.23573404879684748, "grad_norm": 0.40574002775855295, "learning_rate": 2e-05, "loss": 5.6213, "step": 7029 }, { "epoch": 0.23576758614907353, "grad_norm": 0.39222363270041843, "learning_rate": 2e-05, "loss": 5.5234, "step": 7030 }, { "epoch": 0.23580112350129956, "grad_norm": 0.416535292297758, "learning_rate": 2e-05, "loss": 5.6988, "step": 7031 }, { "epoch": 0.2358346608535256, "grad_norm": 0.4016931417628294, "learning_rate": 2e-05, "loss": 5.5692, "step": 7032 }, { "epoch": 0.23586819820575167, "grad_norm": 0.43368240047255985, "learning_rate": 2e-05, "loss": 5.4432, "step": 7033 }, { "epoch": 0.2359017355579777, "grad_norm": 0.4400269168248658, "learning_rate": 2e-05, "loss": 5.2898, "step": 7034 }, { "epoch": 0.23593527291020375, "grad_norm": 0.41009886740036583, "learning_rate": 2e-05, "loss": 5.7907, "step": 7035 }, { "epoch": 0.23596881026242977, "grad_norm": 0.42411289388391493, "learning_rate": 2e-05, "loss": 5.6883, "step": 7036 }, { "epoch": 0.23600234761465583, "grad_norm": 0.4605656910463158, "learning_rate": 2e-05, "loss": 5.3659, "step": 7037 }, { "epoch": 0.23603588496688185, "grad_norm": 0.4014699562173609, "learning_rate": 2e-05, "loss": 5.4803, "step": 7038 }, { "epoch": 0.2360694223191079, "grad_norm": 0.4093186766074338, "learning_rate": 2e-05, "loss": 5.6411, "step": 7039 }, { "epoch": 0.23610295967133396, "grad_norm": 0.4233067904411261, "learning_rate": 2e-05, "loss": 5.5564, "step": 7040 }, { "epoch": 0.23613649702356, "grad_norm": 0.4293537150695059, "learning_rate": 2e-05, "loss": 5.3273, "step": 7041 }, { "epoch": 0.23617003437578604, "grad_norm": 0.44287778044290266, "learning_rate": 2e-05, "loss": 5.6102, "step": 7042 }, { "epoch": 0.23620357172801207, "grad_norm": 0.3997089619609716, "learning_rate": 2e-05, "loss": 5.3648, "step": 7043 }, { "epoch": 0.23623710908023812, "grad_norm": 0.41410017313892644, "learning_rate": 2e-05, "loss": 5.3989, "step": 7044 }, { "epoch": 0.23627064643246415, "grad_norm": 0.4084889361398123, "learning_rate": 2e-05, "loss": 5.6842, "step": 7045 }, { "epoch": 0.2363041837846902, "grad_norm": 0.44688546578541893, "learning_rate": 2e-05, "loss": 5.5511, "step": 7046 }, { "epoch": 0.23633772113691623, "grad_norm": 0.41349689897668246, "learning_rate": 2e-05, "loss": 5.6465, "step": 7047 }, { "epoch": 0.23637125848914228, "grad_norm": 0.4283961096844762, "learning_rate": 2e-05, "loss": 5.3199, "step": 7048 }, { "epoch": 0.23640479584136834, "grad_norm": 0.4458658828605276, "learning_rate": 2e-05, "loss": 5.3654, "step": 7049 }, { "epoch": 0.23643833319359436, "grad_norm": 0.44265180224942713, "learning_rate": 2e-05, "loss": 5.5143, "step": 7050 }, { "epoch": 0.23647187054582042, "grad_norm": 0.4846147462843089, "learning_rate": 2e-05, "loss": 5.4157, "step": 7051 }, { "epoch": 0.23650540789804644, "grad_norm": 0.4093682540941013, "learning_rate": 2e-05, "loss": 5.5719, "step": 7052 }, { "epoch": 0.2365389452502725, "grad_norm": 0.4124177395914357, "learning_rate": 2e-05, "loss": 5.6288, "step": 7053 }, { "epoch": 0.23657248260249852, "grad_norm": 0.4274916616586883, "learning_rate": 2e-05, "loss": 5.6798, "step": 7054 }, { "epoch": 0.23660601995472458, "grad_norm": 0.3912892287090729, "learning_rate": 2e-05, "loss": 5.5746, "step": 7055 }, { "epoch": 0.2366395573069506, "grad_norm": 0.41925043051947947, "learning_rate": 2e-05, "loss": 5.42, "step": 7056 }, { "epoch": 0.23667309465917666, "grad_norm": 0.3968186339872425, "learning_rate": 2e-05, "loss": 5.8116, "step": 7057 }, { "epoch": 0.2367066320114027, "grad_norm": 0.3978760376551435, "learning_rate": 2e-05, "loss": 5.3968, "step": 7058 }, { "epoch": 0.23674016936362874, "grad_norm": 0.4130056250495598, "learning_rate": 2e-05, "loss": 5.713, "step": 7059 }, { "epoch": 0.2367737067158548, "grad_norm": 0.42006904786721855, "learning_rate": 2e-05, "loss": 5.3992, "step": 7060 }, { "epoch": 0.23680724406808082, "grad_norm": 0.39972158354835857, "learning_rate": 2e-05, "loss": 5.4244, "step": 7061 }, { "epoch": 0.23684078142030687, "grad_norm": 0.4098303824121002, "learning_rate": 2e-05, "loss": 5.5774, "step": 7062 }, { "epoch": 0.2368743187725329, "grad_norm": 0.41937168417995435, "learning_rate": 2e-05, "loss": 5.4374, "step": 7063 }, { "epoch": 0.23690785612475895, "grad_norm": 0.39824089631521403, "learning_rate": 2e-05, "loss": 5.6505, "step": 7064 }, { "epoch": 0.23694139347698498, "grad_norm": 0.44724002178050365, "learning_rate": 2e-05, "loss": 5.2824, "step": 7065 }, { "epoch": 0.23697493082921103, "grad_norm": 0.41839089782779393, "learning_rate": 2e-05, "loss": 5.423, "step": 7066 }, { "epoch": 0.2370084681814371, "grad_norm": 0.41066170825500176, "learning_rate": 2e-05, "loss": 5.3674, "step": 7067 }, { "epoch": 0.2370420055336631, "grad_norm": 0.40215659105380425, "learning_rate": 2e-05, "loss": 5.5609, "step": 7068 }, { "epoch": 0.23707554288588917, "grad_norm": 0.41468579376420733, "learning_rate": 2e-05, "loss": 5.4875, "step": 7069 }, { "epoch": 0.2371090802381152, "grad_norm": 0.40975775569686573, "learning_rate": 2e-05, "loss": 5.6115, "step": 7070 }, { "epoch": 0.23714261759034125, "grad_norm": 0.4103519497954735, "learning_rate": 2e-05, "loss": 5.4909, "step": 7071 }, { "epoch": 0.23717615494256727, "grad_norm": 0.430601292532349, "learning_rate": 2e-05, "loss": 5.523, "step": 7072 }, { "epoch": 0.23720969229479333, "grad_norm": 0.40549949191741813, "learning_rate": 2e-05, "loss": 5.5808, "step": 7073 }, { "epoch": 0.23724322964701935, "grad_norm": 0.4012034390669736, "learning_rate": 2e-05, "loss": 5.6749, "step": 7074 }, { "epoch": 0.2372767669992454, "grad_norm": 0.42150777004753937, "learning_rate": 2e-05, "loss": 5.5598, "step": 7075 }, { "epoch": 0.23731030435147146, "grad_norm": 0.40456288698444604, "learning_rate": 2e-05, "loss": 5.4771, "step": 7076 }, { "epoch": 0.2373438417036975, "grad_norm": 0.41181879854139763, "learning_rate": 2e-05, "loss": 5.5409, "step": 7077 }, { "epoch": 0.23737737905592354, "grad_norm": 0.3968882885894124, "learning_rate": 2e-05, "loss": 5.5597, "step": 7078 }, { "epoch": 0.23741091640814957, "grad_norm": 0.4095917939055766, "learning_rate": 2e-05, "loss": 5.6197, "step": 7079 }, { "epoch": 0.23744445376037562, "grad_norm": 0.4017292865615012, "learning_rate": 2e-05, "loss": 5.4777, "step": 7080 }, { "epoch": 0.23747799111260165, "grad_norm": 0.39990638350849556, "learning_rate": 2e-05, "loss": 5.5515, "step": 7081 }, { "epoch": 0.2375115284648277, "grad_norm": 0.4021392122766915, "learning_rate": 2e-05, "loss": 5.5453, "step": 7082 }, { "epoch": 0.23754506581705373, "grad_norm": 0.38460036344328086, "learning_rate": 2e-05, "loss": 5.5684, "step": 7083 }, { "epoch": 0.23757860316927978, "grad_norm": 0.4866604813520934, "learning_rate": 2e-05, "loss": 5.4361, "step": 7084 }, { "epoch": 0.23761214052150584, "grad_norm": 0.4131184773684932, "learning_rate": 2e-05, "loss": 5.5923, "step": 7085 }, { "epoch": 0.23764567787373186, "grad_norm": 0.41158743145652865, "learning_rate": 2e-05, "loss": 5.4301, "step": 7086 }, { "epoch": 0.23767921522595792, "grad_norm": 0.41909122103543106, "learning_rate": 2e-05, "loss": 5.4162, "step": 7087 }, { "epoch": 0.23771275257818394, "grad_norm": 0.39087062912558623, "learning_rate": 2e-05, "loss": 5.589, "step": 7088 }, { "epoch": 0.23774628993041, "grad_norm": 0.42044809862969595, "learning_rate": 2e-05, "loss": 5.7911, "step": 7089 }, { "epoch": 0.23777982728263602, "grad_norm": 0.41166351490792824, "learning_rate": 2e-05, "loss": 5.5972, "step": 7090 }, { "epoch": 0.23781336463486208, "grad_norm": 0.44860695658766586, "learning_rate": 2e-05, "loss": 5.5764, "step": 7091 }, { "epoch": 0.23784690198708813, "grad_norm": 0.4156009547496754, "learning_rate": 2e-05, "loss": 5.6028, "step": 7092 }, { "epoch": 0.23788043933931416, "grad_norm": 0.4227217817957098, "learning_rate": 2e-05, "loss": 5.3636, "step": 7093 }, { "epoch": 0.2379139766915402, "grad_norm": 0.4435238485441568, "learning_rate": 2e-05, "loss": 5.2606, "step": 7094 }, { "epoch": 0.23794751404376624, "grad_norm": 0.4207833939208977, "learning_rate": 2e-05, "loss": 5.5937, "step": 7095 }, { "epoch": 0.2379810513959923, "grad_norm": 0.4192661588427735, "learning_rate": 2e-05, "loss": 5.4882, "step": 7096 }, { "epoch": 0.23801458874821832, "grad_norm": 0.4333999587869167, "learning_rate": 2e-05, "loss": 5.2753, "step": 7097 }, { "epoch": 0.23804812610044437, "grad_norm": 0.47534100794154177, "learning_rate": 2e-05, "loss": 5.6229, "step": 7098 }, { "epoch": 0.2380816634526704, "grad_norm": 0.4345358007028644, "learning_rate": 2e-05, "loss": 5.4113, "step": 7099 }, { "epoch": 0.23811520080489645, "grad_norm": 0.4171328780702143, "learning_rate": 2e-05, "loss": 5.4905, "step": 7100 }, { "epoch": 0.2381487381571225, "grad_norm": 0.5303532296915515, "learning_rate": 2e-05, "loss": 5.7061, "step": 7101 }, { "epoch": 0.23818227550934853, "grad_norm": 0.4399495980742932, "learning_rate": 2e-05, "loss": 5.3928, "step": 7102 }, { "epoch": 0.2382158128615746, "grad_norm": 0.405399594618067, "learning_rate": 2e-05, "loss": 5.4495, "step": 7103 }, { "epoch": 0.23824935021380061, "grad_norm": 0.40945801989877495, "learning_rate": 2e-05, "loss": 5.4166, "step": 7104 }, { "epoch": 0.23828288756602667, "grad_norm": 0.4378280447548125, "learning_rate": 2e-05, "loss": 5.609, "step": 7105 }, { "epoch": 0.2383164249182527, "grad_norm": 0.41450896038897095, "learning_rate": 2e-05, "loss": 5.5007, "step": 7106 }, { "epoch": 0.23834996227047875, "grad_norm": 0.4249120554908628, "learning_rate": 2e-05, "loss": 5.52, "step": 7107 }, { "epoch": 0.23838349962270478, "grad_norm": 0.39960706255722533, "learning_rate": 2e-05, "loss": 5.4704, "step": 7108 }, { "epoch": 0.23841703697493083, "grad_norm": 0.4288542444981679, "learning_rate": 2e-05, "loss": 5.4854, "step": 7109 }, { "epoch": 0.23845057432715688, "grad_norm": 0.3967727078352327, "learning_rate": 2e-05, "loss": 5.5271, "step": 7110 }, { "epoch": 0.2384841116793829, "grad_norm": 0.41947185526460734, "learning_rate": 2e-05, "loss": 5.6421, "step": 7111 }, { "epoch": 0.23851764903160896, "grad_norm": 0.4041887868613722, "learning_rate": 2e-05, "loss": 5.5497, "step": 7112 }, { "epoch": 0.238551186383835, "grad_norm": 0.41339943893284903, "learning_rate": 2e-05, "loss": 5.7081, "step": 7113 }, { "epoch": 0.23858472373606104, "grad_norm": 0.412766761587711, "learning_rate": 2e-05, "loss": 5.6542, "step": 7114 }, { "epoch": 0.23861826108828707, "grad_norm": 0.4136698191551376, "learning_rate": 2e-05, "loss": 5.6459, "step": 7115 }, { "epoch": 0.23865179844051312, "grad_norm": 0.4186899920940395, "learning_rate": 2e-05, "loss": 5.3309, "step": 7116 }, { "epoch": 0.23868533579273915, "grad_norm": 0.4027919840471012, "learning_rate": 2e-05, "loss": 5.5997, "step": 7117 }, { "epoch": 0.2387188731449652, "grad_norm": 0.42173417324985285, "learning_rate": 2e-05, "loss": 5.6293, "step": 7118 }, { "epoch": 0.23875241049719126, "grad_norm": 0.41785250717418665, "learning_rate": 2e-05, "loss": 5.7751, "step": 7119 }, { "epoch": 0.23878594784941728, "grad_norm": 0.4128943145632721, "learning_rate": 2e-05, "loss": 5.6581, "step": 7120 }, { "epoch": 0.23881948520164334, "grad_norm": 0.422562864634911, "learning_rate": 2e-05, "loss": 5.6659, "step": 7121 }, { "epoch": 0.23885302255386937, "grad_norm": 0.4280887362411111, "learning_rate": 2e-05, "loss": 5.6287, "step": 7122 }, { "epoch": 0.23888655990609542, "grad_norm": 0.4067861440778785, "learning_rate": 2e-05, "loss": 5.5532, "step": 7123 }, { "epoch": 0.23892009725832145, "grad_norm": 0.3902082817824287, "learning_rate": 2e-05, "loss": 5.6785, "step": 7124 }, { "epoch": 0.2389536346105475, "grad_norm": 0.43708207351530576, "learning_rate": 2e-05, "loss": 5.5364, "step": 7125 }, { "epoch": 0.23898717196277353, "grad_norm": 0.4258275368265794, "learning_rate": 2e-05, "loss": 5.7843, "step": 7126 }, { "epoch": 0.23902070931499958, "grad_norm": 0.4416204704718981, "learning_rate": 2e-05, "loss": 5.5856, "step": 7127 }, { "epoch": 0.23905424666722563, "grad_norm": 0.4681384664144364, "learning_rate": 2e-05, "loss": 5.4461, "step": 7128 }, { "epoch": 0.23908778401945166, "grad_norm": 0.38678394582927994, "learning_rate": 2e-05, "loss": 5.5981, "step": 7129 }, { "epoch": 0.23912132137167771, "grad_norm": 0.42739097246825786, "learning_rate": 2e-05, "loss": 5.5131, "step": 7130 }, { "epoch": 0.23915485872390374, "grad_norm": 0.4327841689811742, "learning_rate": 2e-05, "loss": 5.4721, "step": 7131 }, { "epoch": 0.2391883960761298, "grad_norm": 0.4397663378129953, "learning_rate": 2e-05, "loss": 5.6434, "step": 7132 }, { "epoch": 0.23922193342835582, "grad_norm": 0.40517497196161667, "learning_rate": 2e-05, "loss": 5.5116, "step": 7133 }, { "epoch": 0.23925547078058187, "grad_norm": 0.39446124646582414, "learning_rate": 2e-05, "loss": 5.507, "step": 7134 }, { "epoch": 0.2392890081328079, "grad_norm": 0.40345628379874215, "learning_rate": 2e-05, "loss": 5.8142, "step": 7135 }, { "epoch": 0.23932254548503396, "grad_norm": 0.4009166155138019, "learning_rate": 2e-05, "loss": 5.316, "step": 7136 }, { "epoch": 0.23935608283726, "grad_norm": 0.4405945063935094, "learning_rate": 2e-05, "loss": 5.7139, "step": 7137 }, { "epoch": 0.23938962018948604, "grad_norm": 0.4393750561225412, "learning_rate": 2e-05, "loss": 5.4351, "step": 7138 }, { "epoch": 0.2394231575417121, "grad_norm": 0.39287387926708606, "learning_rate": 2e-05, "loss": 5.349, "step": 7139 }, { "epoch": 0.23945669489393812, "grad_norm": 0.41725292542624354, "learning_rate": 2e-05, "loss": 5.5912, "step": 7140 }, { "epoch": 0.23949023224616417, "grad_norm": 0.42064513002133325, "learning_rate": 2e-05, "loss": 5.4767, "step": 7141 }, { "epoch": 0.2395237695983902, "grad_norm": 0.39038132360147276, "learning_rate": 2e-05, "loss": 5.4715, "step": 7142 }, { "epoch": 0.23955730695061625, "grad_norm": 0.4063546012712161, "learning_rate": 2e-05, "loss": 5.4318, "step": 7143 }, { "epoch": 0.2395908443028423, "grad_norm": 0.4438508357505531, "learning_rate": 2e-05, "loss": 5.4907, "step": 7144 }, { "epoch": 0.23962438165506833, "grad_norm": 0.40636545610737423, "learning_rate": 2e-05, "loss": 5.4665, "step": 7145 }, { "epoch": 0.23965791900729438, "grad_norm": 0.42908786476317456, "learning_rate": 2e-05, "loss": 5.6959, "step": 7146 }, { "epoch": 0.2396914563595204, "grad_norm": 0.4053029187423674, "learning_rate": 2e-05, "loss": 5.4027, "step": 7147 }, { "epoch": 0.23972499371174646, "grad_norm": 0.40902220625312125, "learning_rate": 2e-05, "loss": 5.4156, "step": 7148 }, { "epoch": 0.2397585310639725, "grad_norm": 0.47191143499211935, "learning_rate": 2e-05, "loss": 5.4613, "step": 7149 }, { "epoch": 0.23979206841619854, "grad_norm": 0.4324554331344453, "learning_rate": 2e-05, "loss": 5.8484, "step": 7150 }, { "epoch": 0.23982560576842457, "grad_norm": 0.4477626075919717, "learning_rate": 2e-05, "loss": 5.3777, "step": 7151 }, { "epoch": 0.23985914312065063, "grad_norm": 0.42769218725867236, "learning_rate": 2e-05, "loss": 5.6333, "step": 7152 }, { "epoch": 0.23989268047287668, "grad_norm": 0.4507262931685071, "learning_rate": 2e-05, "loss": 5.5978, "step": 7153 }, { "epoch": 0.2399262178251027, "grad_norm": 0.436950993179638, "learning_rate": 2e-05, "loss": 5.5646, "step": 7154 }, { "epoch": 0.23995975517732876, "grad_norm": 0.4108897383266148, "learning_rate": 2e-05, "loss": 5.7799, "step": 7155 }, { "epoch": 0.23999329252955479, "grad_norm": 0.4267021916127006, "learning_rate": 2e-05, "loss": 5.4617, "step": 7156 }, { "epoch": 0.24002682988178084, "grad_norm": 0.4576624834340325, "learning_rate": 2e-05, "loss": 5.4091, "step": 7157 }, { "epoch": 0.24006036723400687, "grad_norm": 0.4102682673408626, "learning_rate": 2e-05, "loss": 5.6631, "step": 7158 }, { "epoch": 0.24009390458623292, "grad_norm": 0.41229233703958784, "learning_rate": 2e-05, "loss": 5.5403, "step": 7159 }, { "epoch": 0.24012744193845895, "grad_norm": 0.40824221511092407, "learning_rate": 2e-05, "loss": 5.4915, "step": 7160 }, { "epoch": 0.240160979290685, "grad_norm": 0.41991200660642897, "learning_rate": 2e-05, "loss": 5.4198, "step": 7161 }, { "epoch": 0.24019451664291105, "grad_norm": 0.41873711831380284, "learning_rate": 2e-05, "loss": 5.6164, "step": 7162 }, { "epoch": 0.24022805399513708, "grad_norm": 0.4121928938514092, "learning_rate": 2e-05, "loss": 5.5791, "step": 7163 }, { "epoch": 0.24026159134736313, "grad_norm": 0.4026179611281598, "learning_rate": 2e-05, "loss": 5.5388, "step": 7164 }, { "epoch": 0.24029512869958916, "grad_norm": 0.39006193189088967, "learning_rate": 2e-05, "loss": 5.7366, "step": 7165 }, { "epoch": 0.24032866605181522, "grad_norm": 0.3981871158223616, "learning_rate": 2e-05, "loss": 5.781, "step": 7166 }, { "epoch": 0.24036220340404124, "grad_norm": 0.42123567206918655, "learning_rate": 2e-05, "loss": 5.4897, "step": 7167 }, { "epoch": 0.2403957407562673, "grad_norm": 0.4133069038267945, "learning_rate": 2e-05, "loss": 5.7384, "step": 7168 }, { "epoch": 0.24042927810849332, "grad_norm": 0.3999985913835946, "learning_rate": 2e-05, "loss": 5.6291, "step": 7169 }, { "epoch": 0.24046281546071938, "grad_norm": 0.41472137279394583, "learning_rate": 2e-05, "loss": 5.7131, "step": 7170 }, { "epoch": 0.24049635281294543, "grad_norm": 0.4553907602739238, "learning_rate": 2e-05, "loss": 5.4774, "step": 7171 }, { "epoch": 0.24052989016517146, "grad_norm": 0.41109766004980447, "learning_rate": 2e-05, "loss": 5.4316, "step": 7172 }, { "epoch": 0.2405634275173975, "grad_norm": 0.4048970869688025, "learning_rate": 2e-05, "loss": 5.7715, "step": 7173 }, { "epoch": 0.24059696486962354, "grad_norm": 0.4093800170747989, "learning_rate": 2e-05, "loss": 5.4671, "step": 7174 }, { "epoch": 0.2406305022218496, "grad_norm": 0.3856203905240292, "learning_rate": 2e-05, "loss": 5.4416, "step": 7175 }, { "epoch": 0.24066403957407562, "grad_norm": 0.3913065352430262, "learning_rate": 2e-05, "loss": 5.4797, "step": 7176 }, { "epoch": 0.24069757692630167, "grad_norm": 0.46755271770651635, "learning_rate": 2e-05, "loss": 5.5522, "step": 7177 }, { "epoch": 0.2407311142785277, "grad_norm": 0.4249300983441669, "learning_rate": 2e-05, "loss": 5.7059, "step": 7178 }, { "epoch": 0.24076465163075375, "grad_norm": 0.4001022769647417, "learning_rate": 2e-05, "loss": 5.4727, "step": 7179 }, { "epoch": 0.2407981889829798, "grad_norm": 0.397395318343053, "learning_rate": 2e-05, "loss": 5.4242, "step": 7180 }, { "epoch": 0.24083172633520583, "grad_norm": 0.43956648877083987, "learning_rate": 2e-05, "loss": 5.5351, "step": 7181 }, { "epoch": 0.24086526368743189, "grad_norm": 0.443918163480878, "learning_rate": 2e-05, "loss": 5.6584, "step": 7182 }, { "epoch": 0.2408988010396579, "grad_norm": 0.41236911289403416, "learning_rate": 2e-05, "loss": 5.4677, "step": 7183 }, { "epoch": 0.24093233839188397, "grad_norm": 0.3977658069812571, "learning_rate": 2e-05, "loss": 5.5291, "step": 7184 }, { "epoch": 0.24096587574411, "grad_norm": 0.4224332501777208, "learning_rate": 2e-05, "loss": 5.6157, "step": 7185 }, { "epoch": 0.24099941309633605, "grad_norm": 0.40705654580660555, "learning_rate": 2e-05, "loss": 5.5726, "step": 7186 }, { "epoch": 0.24103295044856207, "grad_norm": 0.40605965583405274, "learning_rate": 2e-05, "loss": 5.6337, "step": 7187 }, { "epoch": 0.24106648780078813, "grad_norm": 0.43494098369803036, "learning_rate": 2e-05, "loss": 5.5198, "step": 7188 }, { "epoch": 0.24110002515301418, "grad_norm": 0.504850120492993, "learning_rate": 2e-05, "loss": 5.5394, "step": 7189 }, { "epoch": 0.2411335625052402, "grad_norm": 0.41276083379308515, "learning_rate": 2e-05, "loss": 5.5733, "step": 7190 }, { "epoch": 0.24116709985746626, "grad_norm": 0.4137325689456248, "learning_rate": 2e-05, "loss": 5.5279, "step": 7191 }, { "epoch": 0.2412006372096923, "grad_norm": 0.4160784489665338, "learning_rate": 2e-05, "loss": 5.5282, "step": 7192 }, { "epoch": 0.24123417456191834, "grad_norm": 0.44266273530631484, "learning_rate": 2e-05, "loss": 5.8338, "step": 7193 }, { "epoch": 0.24126771191414437, "grad_norm": 0.43491118704600706, "learning_rate": 2e-05, "loss": 5.6957, "step": 7194 }, { "epoch": 0.24130124926637042, "grad_norm": 0.46114718388333625, "learning_rate": 2e-05, "loss": 5.4887, "step": 7195 }, { "epoch": 0.24133478661859648, "grad_norm": 0.42061547631447227, "learning_rate": 2e-05, "loss": 5.4259, "step": 7196 }, { "epoch": 0.2413683239708225, "grad_norm": 0.40541673338816714, "learning_rate": 2e-05, "loss": 5.6832, "step": 7197 }, { "epoch": 0.24140186132304856, "grad_norm": 0.42965047903466785, "learning_rate": 2e-05, "loss": 5.5746, "step": 7198 }, { "epoch": 0.24143539867527458, "grad_norm": 0.452105780122535, "learning_rate": 2e-05, "loss": 5.5054, "step": 7199 }, { "epoch": 0.24146893602750064, "grad_norm": 0.4429233509107776, "learning_rate": 2e-05, "loss": 5.6207, "step": 7200 }, { "epoch": 0.24150247337972666, "grad_norm": 0.4231134215934593, "learning_rate": 2e-05, "loss": 5.3675, "step": 7201 }, { "epoch": 0.24153601073195272, "grad_norm": 0.4060301815528162, "learning_rate": 2e-05, "loss": 5.2236, "step": 7202 }, { "epoch": 0.24156954808417874, "grad_norm": 0.42912046339230986, "learning_rate": 2e-05, "loss": 5.7139, "step": 7203 }, { "epoch": 0.2416030854364048, "grad_norm": 0.40677848877650397, "learning_rate": 2e-05, "loss": 5.7878, "step": 7204 }, { "epoch": 0.24163662278863085, "grad_norm": 0.4619349112926033, "learning_rate": 2e-05, "loss": 5.4273, "step": 7205 }, { "epoch": 0.24167016014085688, "grad_norm": 0.4002081104799632, "learning_rate": 2e-05, "loss": 5.5877, "step": 7206 }, { "epoch": 0.24170369749308293, "grad_norm": 0.4415006402091745, "learning_rate": 2e-05, "loss": 5.567, "step": 7207 }, { "epoch": 0.24173723484530896, "grad_norm": 0.44796105875418574, "learning_rate": 2e-05, "loss": 5.4224, "step": 7208 }, { "epoch": 0.241770772197535, "grad_norm": 0.4180421083200138, "learning_rate": 2e-05, "loss": 5.5629, "step": 7209 }, { "epoch": 0.24180430954976104, "grad_norm": 0.45522885757655396, "learning_rate": 2e-05, "loss": 5.4297, "step": 7210 }, { "epoch": 0.2418378469019871, "grad_norm": 0.446903938488984, "learning_rate": 2e-05, "loss": 5.6397, "step": 7211 }, { "epoch": 0.24187138425421312, "grad_norm": 0.41130920384284037, "learning_rate": 2e-05, "loss": 5.5711, "step": 7212 }, { "epoch": 0.24190492160643917, "grad_norm": 0.44189427508776485, "learning_rate": 2e-05, "loss": 5.6269, "step": 7213 }, { "epoch": 0.24193845895866523, "grad_norm": 0.4015284574872279, "learning_rate": 2e-05, "loss": 5.7291, "step": 7214 }, { "epoch": 0.24197199631089125, "grad_norm": 0.39581864243436543, "learning_rate": 2e-05, "loss": 5.5603, "step": 7215 }, { "epoch": 0.2420055336631173, "grad_norm": 0.40014920908552976, "learning_rate": 2e-05, "loss": 5.4397, "step": 7216 }, { "epoch": 0.24203907101534333, "grad_norm": 0.4154221625942033, "learning_rate": 2e-05, "loss": 5.549, "step": 7217 }, { "epoch": 0.2420726083675694, "grad_norm": 0.43614314059723297, "learning_rate": 2e-05, "loss": 5.5759, "step": 7218 }, { "epoch": 0.2421061457197954, "grad_norm": 0.41501686697356205, "learning_rate": 2e-05, "loss": 5.5267, "step": 7219 }, { "epoch": 0.24213968307202147, "grad_norm": 0.41006779082515926, "learning_rate": 2e-05, "loss": 5.474, "step": 7220 }, { "epoch": 0.2421732204242475, "grad_norm": 0.41836720911086434, "learning_rate": 2e-05, "loss": 5.7279, "step": 7221 }, { "epoch": 0.24220675777647355, "grad_norm": 0.44793766878277413, "learning_rate": 2e-05, "loss": 5.583, "step": 7222 }, { "epoch": 0.2422402951286996, "grad_norm": 0.3917632809884335, "learning_rate": 2e-05, "loss": 5.5811, "step": 7223 }, { "epoch": 0.24227383248092563, "grad_norm": 0.4094936006942791, "learning_rate": 2e-05, "loss": 5.6585, "step": 7224 }, { "epoch": 0.24230736983315168, "grad_norm": 0.4154756042201101, "learning_rate": 2e-05, "loss": 5.6588, "step": 7225 }, { "epoch": 0.2423409071853777, "grad_norm": 0.42304990399312253, "learning_rate": 2e-05, "loss": 5.5528, "step": 7226 }, { "epoch": 0.24237444453760376, "grad_norm": 0.3967462715421193, "learning_rate": 2e-05, "loss": 5.6495, "step": 7227 }, { "epoch": 0.2424079818898298, "grad_norm": 0.43806844489970115, "learning_rate": 2e-05, "loss": 5.4683, "step": 7228 }, { "epoch": 0.24244151924205584, "grad_norm": 0.4224785870368922, "learning_rate": 2e-05, "loss": 5.4902, "step": 7229 }, { "epoch": 0.24247505659428187, "grad_norm": 0.3989561870978253, "learning_rate": 2e-05, "loss": 5.5157, "step": 7230 }, { "epoch": 0.24250859394650792, "grad_norm": 0.41211366825969725, "learning_rate": 2e-05, "loss": 5.5745, "step": 7231 }, { "epoch": 0.24254213129873398, "grad_norm": 0.40332621614819, "learning_rate": 2e-05, "loss": 5.6462, "step": 7232 }, { "epoch": 0.24257566865096, "grad_norm": 0.40970372390410376, "learning_rate": 2e-05, "loss": 5.2914, "step": 7233 }, { "epoch": 0.24260920600318606, "grad_norm": 0.4163554184195372, "learning_rate": 2e-05, "loss": 5.5763, "step": 7234 }, { "epoch": 0.24264274335541208, "grad_norm": 0.44435759850263534, "learning_rate": 2e-05, "loss": 5.5277, "step": 7235 }, { "epoch": 0.24267628070763814, "grad_norm": 0.4143861443139332, "learning_rate": 2e-05, "loss": 5.5527, "step": 7236 }, { "epoch": 0.24270981805986416, "grad_norm": 0.3810475314821738, "learning_rate": 2e-05, "loss": 5.5121, "step": 7237 }, { "epoch": 0.24274335541209022, "grad_norm": 0.4137968707620515, "learning_rate": 2e-05, "loss": 5.3594, "step": 7238 }, { "epoch": 0.24277689276431627, "grad_norm": 0.42487029650169533, "learning_rate": 2e-05, "loss": 5.4686, "step": 7239 }, { "epoch": 0.2428104301165423, "grad_norm": 0.42767207424561987, "learning_rate": 2e-05, "loss": 5.612, "step": 7240 }, { "epoch": 0.24284396746876835, "grad_norm": 0.4216960750276479, "learning_rate": 2e-05, "loss": 5.4659, "step": 7241 }, { "epoch": 0.24287750482099438, "grad_norm": 0.40863644474770777, "learning_rate": 2e-05, "loss": 5.5561, "step": 7242 }, { "epoch": 0.24291104217322043, "grad_norm": 0.44520962349527493, "learning_rate": 2e-05, "loss": 5.5288, "step": 7243 }, { "epoch": 0.24294457952544646, "grad_norm": 0.41288167112055346, "learning_rate": 2e-05, "loss": 5.6474, "step": 7244 }, { "epoch": 0.2429781168776725, "grad_norm": 0.44047242743700304, "learning_rate": 2e-05, "loss": 5.5148, "step": 7245 }, { "epoch": 0.24301165422989854, "grad_norm": 0.40915211847273136, "learning_rate": 2e-05, "loss": 5.3841, "step": 7246 }, { "epoch": 0.2430451915821246, "grad_norm": 0.43513098902033787, "learning_rate": 2e-05, "loss": 5.3861, "step": 7247 }, { "epoch": 0.24307872893435065, "grad_norm": 0.4112904547613977, "learning_rate": 2e-05, "loss": 5.2939, "step": 7248 }, { "epoch": 0.24311226628657667, "grad_norm": 0.4210923821009864, "learning_rate": 2e-05, "loss": 5.4426, "step": 7249 }, { "epoch": 0.24314580363880273, "grad_norm": 0.39382703971420396, "learning_rate": 2e-05, "loss": 5.5253, "step": 7250 }, { "epoch": 0.24317934099102875, "grad_norm": 0.4389840033534009, "learning_rate": 2e-05, "loss": 5.56, "step": 7251 }, { "epoch": 0.2432128783432548, "grad_norm": 0.41490379691135254, "learning_rate": 2e-05, "loss": 5.7279, "step": 7252 }, { "epoch": 0.24324641569548083, "grad_norm": 0.40638504876445697, "learning_rate": 2e-05, "loss": 5.6798, "step": 7253 }, { "epoch": 0.2432799530477069, "grad_norm": 0.42062268219098475, "learning_rate": 2e-05, "loss": 5.4251, "step": 7254 }, { "epoch": 0.2433134903999329, "grad_norm": 0.43178586148702475, "learning_rate": 2e-05, "loss": 5.6563, "step": 7255 }, { "epoch": 0.24334702775215897, "grad_norm": 0.4180784512943841, "learning_rate": 2e-05, "loss": 5.62, "step": 7256 }, { "epoch": 0.24338056510438502, "grad_norm": 0.40864966397730557, "learning_rate": 2e-05, "loss": 5.7509, "step": 7257 }, { "epoch": 0.24341410245661105, "grad_norm": 0.4665049161855023, "learning_rate": 2e-05, "loss": 5.6035, "step": 7258 }, { "epoch": 0.2434476398088371, "grad_norm": 0.476521551803704, "learning_rate": 2e-05, "loss": 5.6313, "step": 7259 }, { "epoch": 0.24348117716106313, "grad_norm": 0.4193585968323944, "learning_rate": 2e-05, "loss": 5.5454, "step": 7260 }, { "epoch": 0.24351471451328918, "grad_norm": 0.4060488693137392, "learning_rate": 2e-05, "loss": 5.5574, "step": 7261 }, { "epoch": 0.2435482518655152, "grad_norm": 0.48595417853300493, "learning_rate": 2e-05, "loss": 5.5413, "step": 7262 }, { "epoch": 0.24358178921774126, "grad_norm": 0.4546488426868079, "learning_rate": 2e-05, "loss": 5.6079, "step": 7263 }, { "epoch": 0.2436153265699673, "grad_norm": 0.4267831554588344, "learning_rate": 2e-05, "loss": 5.414, "step": 7264 }, { "epoch": 0.24364886392219334, "grad_norm": 0.43084080806033825, "learning_rate": 2e-05, "loss": 5.6642, "step": 7265 }, { "epoch": 0.2436824012744194, "grad_norm": 0.41627391881640874, "learning_rate": 2e-05, "loss": 5.5698, "step": 7266 }, { "epoch": 0.24371593862664542, "grad_norm": 0.43334220478891483, "learning_rate": 2e-05, "loss": 5.5483, "step": 7267 }, { "epoch": 0.24374947597887148, "grad_norm": 0.40909752692334855, "learning_rate": 2e-05, "loss": 5.5658, "step": 7268 }, { "epoch": 0.2437830133310975, "grad_norm": 0.4241522017669702, "learning_rate": 2e-05, "loss": 5.7154, "step": 7269 }, { "epoch": 0.24381655068332356, "grad_norm": 0.41191493397921414, "learning_rate": 2e-05, "loss": 5.5297, "step": 7270 }, { "epoch": 0.24385008803554958, "grad_norm": 0.42848794315025096, "learning_rate": 2e-05, "loss": 5.373, "step": 7271 }, { "epoch": 0.24388362538777564, "grad_norm": 0.3923114039649763, "learning_rate": 2e-05, "loss": 5.4324, "step": 7272 }, { "epoch": 0.24391716274000166, "grad_norm": 0.40383175649806324, "learning_rate": 2e-05, "loss": 5.5755, "step": 7273 }, { "epoch": 0.24395070009222772, "grad_norm": 0.42898910732106016, "learning_rate": 2e-05, "loss": 5.6042, "step": 7274 }, { "epoch": 0.24398423744445377, "grad_norm": 0.3932706741255838, "learning_rate": 2e-05, "loss": 5.4617, "step": 7275 }, { "epoch": 0.2440177747966798, "grad_norm": 0.41018284124353593, "learning_rate": 2e-05, "loss": 5.5975, "step": 7276 }, { "epoch": 0.24405131214890585, "grad_norm": 0.41263287656036324, "learning_rate": 2e-05, "loss": 5.6897, "step": 7277 }, { "epoch": 0.24408484950113188, "grad_norm": 0.39129997262286664, "learning_rate": 2e-05, "loss": 5.6613, "step": 7278 }, { "epoch": 0.24411838685335793, "grad_norm": 0.40211606155669904, "learning_rate": 2e-05, "loss": 5.6993, "step": 7279 }, { "epoch": 0.24415192420558396, "grad_norm": 0.4370907397150923, "learning_rate": 2e-05, "loss": 5.4987, "step": 7280 }, { "epoch": 0.24418546155781, "grad_norm": 0.4600098418410755, "learning_rate": 2e-05, "loss": 5.7601, "step": 7281 }, { "epoch": 0.24421899891003604, "grad_norm": 0.4155549987992205, "learning_rate": 2e-05, "loss": 5.6377, "step": 7282 }, { "epoch": 0.2442525362622621, "grad_norm": 0.418150341998943, "learning_rate": 2e-05, "loss": 5.5404, "step": 7283 }, { "epoch": 0.24428607361448815, "grad_norm": 0.4203112519956657, "learning_rate": 2e-05, "loss": 5.6898, "step": 7284 }, { "epoch": 0.24431961096671417, "grad_norm": 0.39908740799283593, "learning_rate": 2e-05, "loss": 5.45, "step": 7285 }, { "epoch": 0.24435314831894023, "grad_norm": 0.44410099709305606, "learning_rate": 2e-05, "loss": 5.5287, "step": 7286 }, { "epoch": 0.24438668567116625, "grad_norm": 0.39544212103707965, "learning_rate": 2e-05, "loss": 5.4523, "step": 7287 }, { "epoch": 0.2444202230233923, "grad_norm": 0.427951139983195, "learning_rate": 2e-05, "loss": 5.7181, "step": 7288 }, { "epoch": 0.24445376037561833, "grad_norm": 0.43447233475212255, "learning_rate": 2e-05, "loss": 5.5877, "step": 7289 }, { "epoch": 0.2444872977278444, "grad_norm": 0.4020172142417674, "learning_rate": 2e-05, "loss": 5.7194, "step": 7290 }, { "epoch": 0.24452083508007044, "grad_norm": 0.38993358725922517, "learning_rate": 2e-05, "loss": 5.5104, "step": 7291 }, { "epoch": 0.24455437243229647, "grad_norm": 0.4082166731230306, "learning_rate": 2e-05, "loss": 5.7205, "step": 7292 }, { "epoch": 0.24458790978452252, "grad_norm": 0.4377795819997396, "learning_rate": 2e-05, "loss": 5.6729, "step": 7293 }, { "epoch": 0.24462144713674855, "grad_norm": 0.4166593777552625, "learning_rate": 2e-05, "loss": 5.4229, "step": 7294 }, { "epoch": 0.2446549844889746, "grad_norm": 0.3865894949485458, "learning_rate": 2e-05, "loss": 5.5643, "step": 7295 }, { "epoch": 0.24468852184120063, "grad_norm": 0.4242313218366644, "learning_rate": 2e-05, "loss": 5.2558, "step": 7296 }, { "epoch": 0.24472205919342668, "grad_norm": 0.40154057379900426, "learning_rate": 2e-05, "loss": 5.4636, "step": 7297 }, { "epoch": 0.2447555965456527, "grad_norm": 0.41829219312860655, "learning_rate": 2e-05, "loss": 5.5951, "step": 7298 }, { "epoch": 0.24478913389787876, "grad_norm": 0.41431869423806533, "learning_rate": 2e-05, "loss": 5.543, "step": 7299 }, { "epoch": 0.24482267125010482, "grad_norm": 0.4172309354887602, "learning_rate": 2e-05, "loss": 5.5275, "step": 7300 }, { "epoch": 0.24485620860233084, "grad_norm": 0.42905191982497654, "learning_rate": 2e-05, "loss": 5.6396, "step": 7301 }, { "epoch": 0.2448897459545569, "grad_norm": 0.43446710734278515, "learning_rate": 2e-05, "loss": 5.5269, "step": 7302 }, { "epoch": 0.24492328330678292, "grad_norm": 0.3948180118291375, "learning_rate": 2e-05, "loss": 5.6561, "step": 7303 }, { "epoch": 0.24495682065900898, "grad_norm": 0.44644752303398644, "learning_rate": 2e-05, "loss": 5.7401, "step": 7304 }, { "epoch": 0.244990358011235, "grad_norm": 0.41457384908227923, "learning_rate": 2e-05, "loss": 5.5917, "step": 7305 }, { "epoch": 0.24502389536346106, "grad_norm": 0.4119068217234372, "learning_rate": 2e-05, "loss": 5.6657, "step": 7306 }, { "epoch": 0.24505743271568708, "grad_norm": 0.4237300604148494, "learning_rate": 2e-05, "loss": 5.5388, "step": 7307 }, { "epoch": 0.24509097006791314, "grad_norm": 0.4606383525944634, "learning_rate": 2e-05, "loss": 5.7582, "step": 7308 }, { "epoch": 0.2451245074201392, "grad_norm": 0.42653806488673107, "learning_rate": 2e-05, "loss": 5.7447, "step": 7309 }, { "epoch": 0.24515804477236522, "grad_norm": 0.4288711971948673, "learning_rate": 2e-05, "loss": 5.357, "step": 7310 }, { "epoch": 0.24519158212459127, "grad_norm": 0.4393475744534242, "learning_rate": 2e-05, "loss": 5.4707, "step": 7311 }, { "epoch": 0.2452251194768173, "grad_norm": 0.43478509652853353, "learning_rate": 2e-05, "loss": 5.4489, "step": 7312 }, { "epoch": 0.24525865682904335, "grad_norm": 0.4083571644392455, "learning_rate": 2e-05, "loss": 5.5115, "step": 7313 }, { "epoch": 0.24529219418126938, "grad_norm": 0.40144213903380727, "learning_rate": 2e-05, "loss": 5.59, "step": 7314 }, { "epoch": 0.24532573153349543, "grad_norm": 0.4533302630876408, "learning_rate": 2e-05, "loss": 5.3694, "step": 7315 }, { "epoch": 0.24535926888572146, "grad_norm": 0.42778986153976006, "learning_rate": 2e-05, "loss": 5.5463, "step": 7316 }, { "epoch": 0.24539280623794751, "grad_norm": 0.4140594023730771, "learning_rate": 2e-05, "loss": 5.441, "step": 7317 }, { "epoch": 0.24542634359017357, "grad_norm": 0.4255959015556585, "learning_rate": 2e-05, "loss": 5.5788, "step": 7318 }, { "epoch": 0.2454598809423996, "grad_norm": 0.46171988002519876, "learning_rate": 2e-05, "loss": 5.5143, "step": 7319 }, { "epoch": 0.24549341829462565, "grad_norm": 0.45489955957962175, "learning_rate": 2e-05, "loss": 5.334, "step": 7320 }, { "epoch": 0.24552695564685167, "grad_norm": 0.4019042003025307, "learning_rate": 2e-05, "loss": 5.8097, "step": 7321 }, { "epoch": 0.24556049299907773, "grad_norm": 0.40369240624333286, "learning_rate": 2e-05, "loss": 5.692, "step": 7322 }, { "epoch": 0.24559403035130375, "grad_norm": 0.42014513545775445, "learning_rate": 2e-05, "loss": 5.5969, "step": 7323 }, { "epoch": 0.2456275677035298, "grad_norm": 0.4454844043096177, "learning_rate": 2e-05, "loss": 5.7967, "step": 7324 }, { "epoch": 0.24566110505575584, "grad_norm": 0.4003547559225892, "learning_rate": 2e-05, "loss": 5.6325, "step": 7325 }, { "epoch": 0.2456946424079819, "grad_norm": 0.4083250436158727, "learning_rate": 2e-05, "loss": 5.6142, "step": 7326 }, { "epoch": 0.24572817976020794, "grad_norm": 0.417021928410807, "learning_rate": 2e-05, "loss": 5.5287, "step": 7327 }, { "epoch": 0.24576171711243397, "grad_norm": 0.43431696860907726, "learning_rate": 2e-05, "loss": 5.6215, "step": 7328 }, { "epoch": 0.24579525446466002, "grad_norm": 0.4144919617147779, "learning_rate": 2e-05, "loss": 5.6717, "step": 7329 }, { "epoch": 0.24582879181688605, "grad_norm": 0.41673060590740574, "learning_rate": 2e-05, "loss": 5.7056, "step": 7330 }, { "epoch": 0.2458623291691121, "grad_norm": 0.4158018874876663, "learning_rate": 2e-05, "loss": 5.62, "step": 7331 }, { "epoch": 0.24589586652133813, "grad_norm": 0.3877770134090114, "learning_rate": 2e-05, "loss": 5.7946, "step": 7332 }, { "epoch": 0.24592940387356418, "grad_norm": 0.4495339746027789, "learning_rate": 2e-05, "loss": 5.4343, "step": 7333 }, { "epoch": 0.2459629412257902, "grad_norm": 0.4218678542041085, "learning_rate": 2e-05, "loss": 5.6925, "step": 7334 }, { "epoch": 0.24599647857801626, "grad_norm": 0.4159433440512623, "learning_rate": 2e-05, "loss": 5.5476, "step": 7335 }, { "epoch": 0.24603001593024232, "grad_norm": 0.4290136565109328, "learning_rate": 2e-05, "loss": 5.3715, "step": 7336 }, { "epoch": 0.24606355328246834, "grad_norm": 0.3991572249524634, "learning_rate": 2e-05, "loss": 5.5142, "step": 7337 }, { "epoch": 0.2460970906346944, "grad_norm": 0.4000940542976735, "learning_rate": 2e-05, "loss": 5.7159, "step": 7338 }, { "epoch": 0.24613062798692042, "grad_norm": 0.4316044174100264, "learning_rate": 2e-05, "loss": 5.4354, "step": 7339 }, { "epoch": 0.24616416533914648, "grad_norm": 0.4302272647286517, "learning_rate": 2e-05, "loss": 5.5806, "step": 7340 }, { "epoch": 0.2461977026913725, "grad_norm": 0.3912463876208573, "learning_rate": 2e-05, "loss": 5.6101, "step": 7341 }, { "epoch": 0.24623124004359856, "grad_norm": 0.429066268973672, "learning_rate": 2e-05, "loss": 5.425, "step": 7342 }, { "epoch": 0.2462647773958246, "grad_norm": 0.4010187222720923, "learning_rate": 2e-05, "loss": 5.4736, "step": 7343 }, { "epoch": 0.24629831474805064, "grad_norm": 0.3866155147188603, "learning_rate": 2e-05, "loss": 5.3522, "step": 7344 }, { "epoch": 0.2463318521002767, "grad_norm": 0.4206523666237102, "learning_rate": 2e-05, "loss": 5.5926, "step": 7345 }, { "epoch": 0.24636538945250272, "grad_norm": 0.39796959659270753, "learning_rate": 2e-05, "loss": 5.4325, "step": 7346 }, { "epoch": 0.24639892680472877, "grad_norm": 0.43281018214089306, "learning_rate": 2e-05, "loss": 5.6939, "step": 7347 }, { "epoch": 0.2464324641569548, "grad_norm": 0.40895744035889375, "learning_rate": 2e-05, "loss": 5.5663, "step": 7348 }, { "epoch": 0.24646600150918085, "grad_norm": 0.39947045994325636, "learning_rate": 2e-05, "loss": 5.5035, "step": 7349 }, { "epoch": 0.24649953886140688, "grad_norm": 0.4291066986803248, "learning_rate": 2e-05, "loss": 5.5986, "step": 7350 }, { "epoch": 0.24653307621363293, "grad_norm": 0.38413102451670056, "learning_rate": 2e-05, "loss": 5.4432, "step": 7351 }, { "epoch": 0.246566613565859, "grad_norm": 0.38592114225573426, "learning_rate": 2e-05, "loss": 5.4719, "step": 7352 }, { "epoch": 0.24660015091808501, "grad_norm": 0.3986088640847563, "learning_rate": 2e-05, "loss": 5.7443, "step": 7353 }, { "epoch": 0.24663368827031107, "grad_norm": 0.3821049423935318, "learning_rate": 2e-05, "loss": 5.6164, "step": 7354 }, { "epoch": 0.2466672256225371, "grad_norm": 0.38976993231260193, "learning_rate": 2e-05, "loss": 5.5727, "step": 7355 }, { "epoch": 0.24670076297476315, "grad_norm": 0.419989524296931, "learning_rate": 2e-05, "loss": 5.6617, "step": 7356 }, { "epoch": 0.24673430032698918, "grad_norm": 0.4313932239502972, "learning_rate": 2e-05, "loss": 5.3842, "step": 7357 }, { "epoch": 0.24676783767921523, "grad_norm": 0.3997136715071625, "learning_rate": 2e-05, "loss": 5.5065, "step": 7358 }, { "epoch": 0.24680137503144126, "grad_norm": 0.3943090943700709, "learning_rate": 2e-05, "loss": 5.6115, "step": 7359 }, { "epoch": 0.2468349123836673, "grad_norm": 0.43921643428242324, "learning_rate": 2e-05, "loss": 5.6737, "step": 7360 }, { "epoch": 0.24686844973589336, "grad_norm": 0.46480165508016547, "learning_rate": 2e-05, "loss": 5.5633, "step": 7361 }, { "epoch": 0.2469019870881194, "grad_norm": 0.39891044268236425, "learning_rate": 2e-05, "loss": 5.5782, "step": 7362 }, { "epoch": 0.24693552444034544, "grad_norm": 0.4564307682626677, "learning_rate": 2e-05, "loss": 5.596, "step": 7363 }, { "epoch": 0.24696906179257147, "grad_norm": 0.41609366477675125, "learning_rate": 2e-05, "loss": 5.4906, "step": 7364 }, { "epoch": 0.24700259914479752, "grad_norm": 0.40325385518296974, "learning_rate": 2e-05, "loss": 5.4124, "step": 7365 }, { "epoch": 0.24703613649702355, "grad_norm": 0.44263805209342083, "learning_rate": 2e-05, "loss": 5.6341, "step": 7366 }, { "epoch": 0.2470696738492496, "grad_norm": 0.4515154633534592, "learning_rate": 2e-05, "loss": 5.4069, "step": 7367 }, { "epoch": 0.24710321120147563, "grad_norm": 0.41448664937427826, "learning_rate": 2e-05, "loss": 5.5413, "step": 7368 }, { "epoch": 0.24713674855370169, "grad_norm": 0.4345485108337104, "learning_rate": 2e-05, "loss": 5.458, "step": 7369 }, { "epoch": 0.24717028590592774, "grad_norm": 0.45587610442861926, "learning_rate": 2e-05, "loss": 5.5204, "step": 7370 }, { "epoch": 0.24720382325815377, "grad_norm": 0.38426769554971685, "learning_rate": 2e-05, "loss": 5.7053, "step": 7371 }, { "epoch": 0.24723736061037982, "grad_norm": 0.4650482470061347, "learning_rate": 2e-05, "loss": 5.5059, "step": 7372 }, { "epoch": 0.24727089796260585, "grad_norm": 0.41844290479040747, "learning_rate": 2e-05, "loss": 5.4142, "step": 7373 }, { "epoch": 0.2473044353148319, "grad_norm": 0.4013279557596427, "learning_rate": 2e-05, "loss": 5.4309, "step": 7374 }, { "epoch": 0.24733797266705793, "grad_norm": 0.4390241755713293, "learning_rate": 2e-05, "loss": 5.4343, "step": 7375 }, { "epoch": 0.24737151001928398, "grad_norm": 0.45303647818230974, "learning_rate": 2e-05, "loss": 5.5879, "step": 7376 }, { "epoch": 0.24740504737151, "grad_norm": 0.41817530463875385, "learning_rate": 2e-05, "loss": 5.2782, "step": 7377 }, { "epoch": 0.24743858472373606, "grad_norm": 0.42470261051361835, "learning_rate": 2e-05, "loss": 5.5749, "step": 7378 }, { "epoch": 0.24747212207596211, "grad_norm": 0.4287544554723061, "learning_rate": 2e-05, "loss": 5.5204, "step": 7379 }, { "epoch": 0.24750565942818814, "grad_norm": 0.4718334992279759, "learning_rate": 2e-05, "loss": 5.5605, "step": 7380 }, { "epoch": 0.2475391967804142, "grad_norm": 0.40744639559265344, "learning_rate": 2e-05, "loss": 5.7079, "step": 7381 }, { "epoch": 0.24757273413264022, "grad_norm": 0.4485411941869882, "learning_rate": 2e-05, "loss": 5.4032, "step": 7382 }, { "epoch": 0.24760627148486627, "grad_norm": 0.4327235924184248, "learning_rate": 2e-05, "loss": 5.3631, "step": 7383 }, { "epoch": 0.2476398088370923, "grad_norm": 0.39923033617347486, "learning_rate": 2e-05, "loss": 5.6746, "step": 7384 }, { "epoch": 0.24767334618931836, "grad_norm": 0.4180701476356559, "learning_rate": 2e-05, "loss": 5.5361, "step": 7385 }, { "epoch": 0.24770688354154438, "grad_norm": 0.4090942116505808, "learning_rate": 2e-05, "loss": 5.6267, "step": 7386 }, { "epoch": 0.24774042089377044, "grad_norm": 0.4009396067795556, "learning_rate": 2e-05, "loss": 5.3512, "step": 7387 }, { "epoch": 0.2477739582459965, "grad_norm": 0.38993538449832865, "learning_rate": 2e-05, "loss": 5.6654, "step": 7388 }, { "epoch": 0.24780749559822252, "grad_norm": 0.3919265863236874, "learning_rate": 2e-05, "loss": 5.4023, "step": 7389 }, { "epoch": 0.24784103295044857, "grad_norm": 0.38006221199210155, "learning_rate": 2e-05, "loss": 5.5967, "step": 7390 }, { "epoch": 0.2478745703026746, "grad_norm": 0.422936850055099, "learning_rate": 2e-05, "loss": 5.4071, "step": 7391 }, { "epoch": 0.24790810765490065, "grad_norm": 0.3944297712559516, "learning_rate": 2e-05, "loss": 5.5621, "step": 7392 }, { "epoch": 0.24794164500712668, "grad_norm": 0.43613379182680473, "learning_rate": 2e-05, "loss": 5.42, "step": 7393 }, { "epoch": 0.24797518235935273, "grad_norm": 0.44720482837531345, "learning_rate": 2e-05, "loss": 5.6082, "step": 7394 }, { "epoch": 0.24800871971157878, "grad_norm": 0.412162957621388, "learning_rate": 2e-05, "loss": 5.5971, "step": 7395 }, { "epoch": 0.2480422570638048, "grad_norm": 0.4079910362392394, "learning_rate": 2e-05, "loss": 5.6808, "step": 7396 }, { "epoch": 0.24807579441603086, "grad_norm": 0.4498385985364974, "learning_rate": 2e-05, "loss": 5.3993, "step": 7397 }, { "epoch": 0.2481093317682569, "grad_norm": 0.4250190248333128, "learning_rate": 2e-05, "loss": 5.5326, "step": 7398 }, { "epoch": 0.24814286912048295, "grad_norm": 0.41571427325547744, "learning_rate": 2e-05, "loss": 5.5433, "step": 7399 }, { "epoch": 0.24817640647270897, "grad_norm": 0.3858078261740412, "learning_rate": 2e-05, "loss": 5.8128, "step": 7400 }, { "epoch": 0.24820994382493503, "grad_norm": 0.43302738838113003, "learning_rate": 2e-05, "loss": 5.4701, "step": 7401 }, { "epoch": 0.24824348117716105, "grad_norm": 0.42665096679012354, "learning_rate": 2e-05, "loss": 5.6521, "step": 7402 }, { "epoch": 0.2482770185293871, "grad_norm": 0.4028428599832054, "learning_rate": 2e-05, "loss": 5.4898, "step": 7403 }, { "epoch": 0.24831055588161316, "grad_norm": 0.4088400268382619, "learning_rate": 2e-05, "loss": 5.389, "step": 7404 }, { "epoch": 0.24834409323383919, "grad_norm": 0.431493820488837, "learning_rate": 2e-05, "loss": 5.5102, "step": 7405 }, { "epoch": 0.24837763058606524, "grad_norm": 0.42828928959330276, "learning_rate": 2e-05, "loss": 5.4905, "step": 7406 }, { "epoch": 0.24841116793829127, "grad_norm": 0.40881883290659077, "learning_rate": 2e-05, "loss": 5.3121, "step": 7407 }, { "epoch": 0.24844470529051732, "grad_norm": 0.43453757613094474, "learning_rate": 2e-05, "loss": 5.3373, "step": 7408 }, { "epoch": 0.24847824264274335, "grad_norm": 0.4233515010690232, "learning_rate": 2e-05, "loss": 5.4518, "step": 7409 }, { "epoch": 0.2485117799949694, "grad_norm": 0.3906028516879295, "learning_rate": 2e-05, "loss": 5.5017, "step": 7410 }, { "epoch": 0.24854531734719543, "grad_norm": 0.40860651158434436, "learning_rate": 2e-05, "loss": 5.5063, "step": 7411 }, { "epoch": 0.24857885469942148, "grad_norm": 0.40186417284883125, "learning_rate": 2e-05, "loss": 5.782, "step": 7412 }, { "epoch": 0.24861239205164753, "grad_norm": 0.4389669494766069, "learning_rate": 2e-05, "loss": 5.4, "step": 7413 }, { "epoch": 0.24864592940387356, "grad_norm": 0.4071679396945033, "learning_rate": 2e-05, "loss": 5.496, "step": 7414 }, { "epoch": 0.24867946675609962, "grad_norm": 0.42178691149871966, "learning_rate": 2e-05, "loss": 5.7265, "step": 7415 }, { "epoch": 0.24871300410832564, "grad_norm": 0.42494548058546383, "learning_rate": 2e-05, "loss": 5.345, "step": 7416 }, { "epoch": 0.2487465414605517, "grad_norm": 0.40289021068897213, "learning_rate": 2e-05, "loss": 5.6229, "step": 7417 }, { "epoch": 0.24878007881277772, "grad_norm": 0.43264546763733464, "learning_rate": 2e-05, "loss": 5.5574, "step": 7418 }, { "epoch": 0.24881361616500378, "grad_norm": 0.3984908090046585, "learning_rate": 2e-05, "loss": 5.5106, "step": 7419 }, { "epoch": 0.2488471535172298, "grad_norm": 0.4214391918601935, "learning_rate": 2e-05, "loss": 5.6035, "step": 7420 }, { "epoch": 0.24888069086945586, "grad_norm": 0.4121022198472262, "learning_rate": 2e-05, "loss": 5.5183, "step": 7421 }, { "epoch": 0.2489142282216819, "grad_norm": 0.404171358749156, "learning_rate": 2e-05, "loss": 5.6108, "step": 7422 }, { "epoch": 0.24894776557390794, "grad_norm": 0.40945197131363503, "learning_rate": 2e-05, "loss": 5.4777, "step": 7423 }, { "epoch": 0.248981302926134, "grad_norm": 0.40167744397588134, "learning_rate": 2e-05, "loss": 5.5064, "step": 7424 }, { "epoch": 0.24901484027836002, "grad_norm": 0.4162678020946538, "learning_rate": 2e-05, "loss": 5.7672, "step": 7425 }, { "epoch": 0.24904837763058607, "grad_norm": 0.422411067506891, "learning_rate": 2e-05, "loss": 5.5795, "step": 7426 }, { "epoch": 0.2490819149828121, "grad_norm": 0.4127883087748523, "learning_rate": 2e-05, "loss": 5.5162, "step": 7427 }, { "epoch": 0.24911545233503815, "grad_norm": 0.4387264651730765, "learning_rate": 2e-05, "loss": 5.556, "step": 7428 }, { "epoch": 0.24914898968726418, "grad_norm": 0.4306499037718356, "learning_rate": 2e-05, "loss": 5.5118, "step": 7429 }, { "epoch": 0.24918252703949023, "grad_norm": 0.40770271435397193, "learning_rate": 2e-05, "loss": 5.5136, "step": 7430 }, { "epoch": 0.24921606439171629, "grad_norm": 0.409983157881464, "learning_rate": 2e-05, "loss": 5.5184, "step": 7431 }, { "epoch": 0.2492496017439423, "grad_norm": 0.41791682156978077, "learning_rate": 2e-05, "loss": 5.7114, "step": 7432 }, { "epoch": 0.24928313909616837, "grad_norm": 0.3922657226387623, "learning_rate": 2e-05, "loss": 5.5513, "step": 7433 }, { "epoch": 0.2493166764483944, "grad_norm": 0.4073086871377411, "learning_rate": 2e-05, "loss": 5.4004, "step": 7434 }, { "epoch": 0.24935021380062045, "grad_norm": 0.41016504503487067, "learning_rate": 2e-05, "loss": 5.6964, "step": 7435 }, { "epoch": 0.24938375115284647, "grad_norm": 0.41130547481845825, "learning_rate": 2e-05, "loss": 5.4386, "step": 7436 }, { "epoch": 0.24941728850507253, "grad_norm": 0.39879219026215856, "learning_rate": 2e-05, "loss": 5.5625, "step": 7437 }, { "epoch": 0.24945082585729855, "grad_norm": 0.3959489788802639, "learning_rate": 2e-05, "loss": 5.5381, "step": 7438 }, { "epoch": 0.2494843632095246, "grad_norm": 0.4195147435379613, "learning_rate": 2e-05, "loss": 5.7316, "step": 7439 }, { "epoch": 0.24951790056175066, "grad_norm": 0.41364181028000835, "learning_rate": 2e-05, "loss": 5.3613, "step": 7440 }, { "epoch": 0.2495514379139767, "grad_norm": 0.4362077981058673, "learning_rate": 2e-05, "loss": 5.6892, "step": 7441 }, { "epoch": 0.24958497526620274, "grad_norm": 0.41353388092437027, "learning_rate": 2e-05, "loss": 5.5404, "step": 7442 }, { "epoch": 0.24961851261842877, "grad_norm": 0.5307434452061184, "learning_rate": 2e-05, "loss": 5.5629, "step": 7443 }, { "epoch": 0.24965204997065482, "grad_norm": 0.45114227852149386, "learning_rate": 2e-05, "loss": 5.6139, "step": 7444 }, { "epoch": 0.24968558732288085, "grad_norm": 0.3818052275335721, "learning_rate": 2e-05, "loss": 5.2965, "step": 7445 }, { "epoch": 0.2497191246751069, "grad_norm": 0.4534417499949561, "learning_rate": 2e-05, "loss": 5.5193, "step": 7446 }, { "epoch": 0.24975266202733296, "grad_norm": 0.46529631215097983, "learning_rate": 2e-05, "loss": 5.4834, "step": 7447 }, { "epoch": 0.24978619937955898, "grad_norm": 0.4263648106494731, "learning_rate": 2e-05, "loss": 5.6811, "step": 7448 }, { "epoch": 0.24981973673178504, "grad_norm": 0.48524536071804136, "learning_rate": 2e-05, "loss": 5.6157, "step": 7449 }, { "epoch": 0.24985327408401106, "grad_norm": 0.42459358874605735, "learning_rate": 2e-05, "loss": 5.4457, "step": 7450 }, { "epoch": 0.24988681143623712, "grad_norm": 0.39929348133263465, "learning_rate": 2e-05, "loss": 5.4615, "step": 7451 }, { "epoch": 0.24992034878846314, "grad_norm": 0.426416887943843, "learning_rate": 2e-05, "loss": 5.6174, "step": 7452 }, { "epoch": 0.2499538861406892, "grad_norm": 0.47099385738699917, "learning_rate": 2e-05, "loss": 5.5937, "step": 7453 }, { "epoch": 0.24998742349291522, "grad_norm": 0.3989905382759475, "learning_rate": 2e-05, "loss": 5.3183, "step": 7454 }, { "epoch": 0.25002096084514125, "grad_norm": 0.44736651481177697, "learning_rate": 2e-05, "loss": 5.7802, "step": 7455 }, { "epoch": 0.2500544981973673, "grad_norm": 0.47452923381907874, "learning_rate": 2e-05, "loss": 5.6427, "step": 7456 }, { "epoch": 0.25008803554959336, "grad_norm": 0.45437234720504815, "learning_rate": 2e-05, "loss": 5.6499, "step": 7457 }, { "epoch": 0.2501215729018194, "grad_norm": 0.3990071498050306, "learning_rate": 2e-05, "loss": 5.7842, "step": 7458 }, { "epoch": 0.25015511025404547, "grad_norm": 0.4545257268531933, "learning_rate": 2e-05, "loss": 5.573, "step": 7459 }, { "epoch": 0.25018864760627146, "grad_norm": 0.45024777580382336, "learning_rate": 2e-05, "loss": 5.7402, "step": 7460 }, { "epoch": 0.2502221849584975, "grad_norm": 0.4028328819604428, "learning_rate": 2e-05, "loss": 5.7116, "step": 7461 }, { "epoch": 0.25025572231072357, "grad_norm": 0.44458865826303284, "learning_rate": 2e-05, "loss": 5.3989, "step": 7462 }, { "epoch": 0.2502892596629496, "grad_norm": 0.4680678790441565, "learning_rate": 2e-05, "loss": 5.6937, "step": 7463 }, { "epoch": 0.2503227970151756, "grad_norm": 0.4443694091619902, "learning_rate": 2e-05, "loss": 5.5117, "step": 7464 }, { "epoch": 0.2503563343674017, "grad_norm": 0.4591956241982985, "learning_rate": 2e-05, "loss": 5.4964, "step": 7465 }, { "epoch": 0.25038987171962773, "grad_norm": 0.4387744709814849, "learning_rate": 2e-05, "loss": 5.3868, "step": 7466 }, { "epoch": 0.2504234090718538, "grad_norm": 0.4141897384879715, "learning_rate": 2e-05, "loss": 5.6837, "step": 7467 }, { "epoch": 0.25045694642407984, "grad_norm": 0.40476901130462883, "learning_rate": 2e-05, "loss": 5.4584, "step": 7468 }, { "epoch": 0.25049048377630584, "grad_norm": 0.4311176681179517, "learning_rate": 2e-05, "loss": 5.6794, "step": 7469 }, { "epoch": 0.2505240211285319, "grad_norm": 0.42882758790163755, "learning_rate": 2e-05, "loss": 5.6475, "step": 7470 }, { "epoch": 0.25055755848075795, "grad_norm": 0.3933745519804256, "learning_rate": 2e-05, "loss": 5.678, "step": 7471 }, { "epoch": 0.250591095832984, "grad_norm": 0.39226829549205483, "learning_rate": 2e-05, "loss": 5.5875, "step": 7472 }, { "epoch": 0.25062463318521006, "grad_norm": 0.43387303380806036, "learning_rate": 2e-05, "loss": 5.5193, "step": 7473 }, { "epoch": 0.25065817053743605, "grad_norm": 0.4411422992509637, "learning_rate": 2e-05, "loss": 5.7293, "step": 7474 }, { "epoch": 0.2506917078896621, "grad_norm": 0.40173702362157343, "learning_rate": 2e-05, "loss": 5.4648, "step": 7475 }, { "epoch": 0.25072524524188816, "grad_norm": 0.4377802744560139, "learning_rate": 2e-05, "loss": 5.5399, "step": 7476 }, { "epoch": 0.2507587825941142, "grad_norm": 0.435068340459688, "learning_rate": 2e-05, "loss": 5.7643, "step": 7477 }, { "epoch": 0.2507923199463402, "grad_norm": 0.4423763228602525, "learning_rate": 2e-05, "loss": 5.6382, "step": 7478 }, { "epoch": 0.25082585729856627, "grad_norm": 0.4639326335447429, "learning_rate": 2e-05, "loss": 5.5389, "step": 7479 }, { "epoch": 0.2508593946507923, "grad_norm": 0.4525744493564813, "learning_rate": 2e-05, "loss": 5.3779, "step": 7480 }, { "epoch": 0.2508929320030184, "grad_norm": 0.4172519188927454, "learning_rate": 2e-05, "loss": 5.7078, "step": 7481 }, { "epoch": 0.25092646935524443, "grad_norm": 0.4320789509008364, "learning_rate": 2e-05, "loss": 5.6404, "step": 7482 }, { "epoch": 0.25096000670747043, "grad_norm": 0.4912360573133338, "learning_rate": 2e-05, "loss": 5.3933, "step": 7483 }, { "epoch": 0.2509935440596965, "grad_norm": 0.39136012474822574, "learning_rate": 2e-05, "loss": 5.663, "step": 7484 }, { "epoch": 0.25102708141192254, "grad_norm": 0.4442069826624463, "learning_rate": 2e-05, "loss": 5.4325, "step": 7485 }, { "epoch": 0.2510606187641486, "grad_norm": 0.4818476410549538, "learning_rate": 2e-05, "loss": 5.5216, "step": 7486 }, { "epoch": 0.2510941561163746, "grad_norm": 0.44747893472329525, "learning_rate": 2e-05, "loss": 5.4318, "step": 7487 }, { "epoch": 0.25112769346860064, "grad_norm": 0.3957259395173843, "learning_rate": 2e-05, "loss": 5.3971, "step": 7488 }, { "epoch": 0.2511612308208267, "grad_norm": 0.49629012259400335, "learning_rate": 2e-05, "loss": 5.466, "step": 7489 }, { "epoch": 0.25119476817305275, "grad_norm": 0.4169695394178449, "learning_rate": 2e-05, "loss": 5.3784, "step": 7490 }, { "epoch": 0.2512283055252788, "grad_norm": 0.3958216651436187, "learning_rate": 2e-05, "loss": 5.5501, "step": 7491 }, { "epoch": 0.2512618428775048, "grad_norm": 0.4236286758890101, "learning_rate": 2e-05, "loss": 5.5705, "step": 7492 }, { "epoch": 0.25129538022973086, "grad_norm": 0.4625673696380057, "learning_rate": 2e-05, "loss": 5.4809, "step": 7493 }, { "epoch": 0.2513289175819569, "grad_norm": 0.4059823430267562, "learning_rate": 2e-05, "loss": 5.6295, "step": 7494 }, { "epoch": 0.25136245493418297, "grad_norm": 0.4052504963305987, "learning_rate": 2e-05, "loss": 5.4449, "step": 7495 }, { "epoch": 0.25139599228640896, "grad_norm": 0.4650811716619383, "learning_rate": 2e-05, "loss": 5.5341, "step": 7496 }, { "epoch": 0.251429529638635, "grad_norm": 0.43714449265691363, "learning_rate": 2e-05, "loss": 5.5288, "step": 7497 }, { "epoch": 0.2514630669908611, "grad_norm": 0.4132234059223191, "learning_rate": 2e-05, "loss": 5.3856, "step": 7498 }, { "epoch": 0.2514966043430871, "grad_norm": 0.4469746091182627, "learning_rate": 2e-05, "loss": 5.5377, "step": 7499 }, { "epoch": 0.2515301416953132, "grad_norm": 0.42867154657188056, "learning_rate": 2e-05, "loss": 5.6107, "step": 7500 }, { "epoch": 0.2515636790475392, "grad_norm": 0.44595385297273904, "learning_rate": 2e-05, "loss": 5.2222, "step": 7501 }, { "epoch": 0.25159721639976523, "grad_norm": 0.4841320519163009, "learning_rate": 2e-05, "loss": 5.4818, "step": 7502 }, { "epoch": 0.2516307537519913, "grad_norm": 0.49882316145856104, "learning_rate": 2e-05, "loss": 5.569, "step": 7503 }, { "epoch": 0.25166429110421734, "grad_norm": 0.46379389521386455, "learning_rate": 2e-05, "loss": 5.7488, "step": 7504 }, { "epoch": 0.25169782845644334, "grad_norm": 0.4623809729181762, "learning_rate": 2e-05, "loss": 5.6879, "step": 7505 }, { "epoch": 0.2517313658086694, "grad_norm": 0.4211955143245347, "learning_rate": 2e-05, "loss": 5.4158, "step": 7506 }, { "epoch": 0.25176490316089545, "grad_norm": 0.456150215349589, "learning_rate": 2e-05, "loss": 5.6314, "step": 7507 }, { "epoch": 0.2517984405131215, "grad_norm": 0.4777026215106775, "learning_rate": 2e-05, "loss": 5.7219, "step": 7508 }, { "epoch": 0.25183197786534756, "grad_norm": 0.4210743309501562, "learning_rate": 2e-05, "loss": 5.7046, "step": 7509 }, { "epoch": 0.25186551521757355, "grad_norm": 0.42370591648921513, "learning_rate": 2e-05, "loss": 5.4162, "step": 7510 }, { "epoch": 0.2518990525697996, "grad_norm": 0.4742812070637246, "learning_rate": 2e-05, "loss": 5.6481, "step": 7511 }, { "epoch": 0.25193258992202566, "grad_norm": 0.5038303372084749, "learning_rate": 2e-05, "loss": 5.6449, "step": 7512 }, { "epoch": 0.2519661272742517, "grad_norm": 0.4540366560411621, "learning_rate": 2e-05, "loss": 5.5213, "step": 7513 }, { "epoch": 0.2519996646264777, "grad_norm": 0.40941885077282153, "learning_rate": 2e-05, "loss": 5.5354, "step": 7514 }, { "epoch": 0.25203320197870377, "grad_norm": 0.45906272481380306, "learning_rate": 2e-05, "loss": 5.6203, "step": 7515 }, { "epoch": 0.2520667393309298, "grad_norm": 0.467420613225269, "learning_rate": 2e-05, "loss": 5.3834, "step": 7516 }, { "epoch": 0.2521002766831559, "grad_norm": 0.4244847919850093, "learning_rate": 2e-05, "loss": 5.4479, "step": 7517 }, { "epoch": 0.25213381403538193, "grad_norm": 0.4313320518710457, "learning_rate": 2e-05, "loss": 5.4503, "step": 7518 }, { "epoch": 0.25216735138760793, "grad_norm": 0.4074967357940553, "learning_rate": 2e-05, "loss": 5.5496, "step": 7519 }, { "epoch": 0.252200888739834, "grad_norm": 0.4406116629655568, "learning_rate": 2e-05, "loss": 5.5709, "step": 7520 }, { "epoch": 0.25223442609206004, "grad_norm": 0.4214235214225516, "learning_rate": 2e-05, "loss": 5.6803, "step": 7521 }, { "epoch": 0.2522679634442861, "grad_norm": 0.4309879432452185, "learning_rate": 2e-05, "loss": 5.2454, "step": 7522 }, { "epoch": 0.2523015007965121, "grad_norm": 0.4042845242644598, "learning_rate": 2e-05, "loss": 5.6805, "step": 7523 }, { "epoch": 0.25233503814873814, "grad_norm": 0.41919593062711863, "learning_rate": 2e-05, "loss": 5.4612, "step": 7524 }, { "epoch": 0.2523685755009642, "grad_norm": 0.41833655986930557, "learning_rate": 2e-05, "loss": 5.53, "step": 7525 }, { "epoch": 0.25240211285319025, "grad_norm": 0.41784302206580837, "learning_rate": 2e-05, "loss": 5.485, "step": 7526 }, { "epoch": 0.2524356502054163, "grad_norm": 0.40712370921846963, "learning_rate": 2e-05, "loss": 5.7091, "step": 7527 }, { "epoch": 0.2524691875576423, "grad_norm": 0.4156580808209029, "learning_rate": 2e-05, "loss": 5.4822, "step": 7528 }, { "epoch": 0.25250272490986836, "grad_norm": 0.3921999247913614, "learning_rate": 2e-05, "loss": 5.5443, "step": 7529 }, { "epoch": 0.2525362622620944, "grad_norm": 0.4190926669832706, "learning_rate": 2e-05, "loss": 5.458, "step": 7530 }, { "epoch": 0.25256979961432047, "grad_norm": 0.41178829628806285, "learning_rate": 2e-05, "loss": 5.7568, "step": 7531 }, { "epoch": 0.25260333696654647, "grad_norm": 0.4008902930929028, "learning_rate": 2e-05, "loss": 5.6989, "step": 7532 }, { "epoch": 0.2526368743187725, "grad_norm": 0.4241935885648209, "learning_rate": 2e-05, "loss": 5.499, "step": 7533 }, { "epoch": 0.2526704116709986, "grad_norm": 0.39438142871274773, "learning_rate": 2e-05, "loss": 5.6307, "step": 7534 }, { "epoch": 0.2527039490232246, "grad_norm": 0.41895728744805, "learning_rate": 2e-05, "loss": 5.2854, "step": 7535 }, { "epoch": 0.2527374863754507, "grad_norm": 0.4413806446016244, "learning_rate": 2e-05, "loss": 5.5948, "step": 7536 }, { "epoch": 0.2527710237276767, "grad_norm": 0.41776312924103176, "learning_rate": 2e-05, "loss": 5.6147, "step": 7537 }, { "epoch": 0.25280456107990273, "grad_norm": 0.389480336235615, "learning_rate": 2e-05, "loss": 5.5665, "step": 7538 }, { "epoch": 0.2528380984321288, "grad_norm": 0.456630212160333, "learning_rate": 2e-05, "loss": 5.5074, "step": 7539 }, { "epoch": 0.25287163578435484, "grad_norm": 0.4241241718058411, "learning_rate": 2e-05, "loss": 5.5256, "step": 7540 }, { "epoch": 0.25290517313658084, "grad_norm": 0.438275947497942, "learning_rate": 2e-05, "loss": 5.6142, "step": 7541 }, { "epoch": 0.2529387104888069, "grad_norm": 0.4031023190615965, "learning_rate": 2e-05, "loss": 5.4002, "step": 7542 }, { "epoch": 0.25297224784103295, "grad_norm": 0.4114993875466142, "learning_rate": 2e-05, "loss": 5.5488, "step": 7543 }, { "epoch": 0.253005785193259, "grad_norm": 0.4079970196617503, "learning_rate": 2e-05, "loss": 5.5498, "step": 7544 }, { "epoch": 0.25303932254548506, "grad_norm": 0.4127801523707418, "learning_rate": 2e-05, "loss": 5.7228, "step": 7545 }, { "epoch": 0.25307285989771106, "grad_norm": 0.39855074738403534, "learning_rate": 2e-05, "loss": 5.5795, "step": 7546 }, { "epoch": 0.2531063972499371, "grad_norm": 0.4106545405358854, "learning_rate": 2e-05, "loss": 5.6669, "step": 7547 }, { "epoch": 0.25313993460216316, "grad_norm": 0.42461147877427535, "learning_rate": 2e-05, "loss": 5.5305, "step": 7548 }, { "epoch": 0.2531734719543892, "grad_norm": 0.42605016858602296, "learning_rate": 2e-05, "loss": 5.4667, "step": 7549 }, { "epoch": 0.2532070093066152, "grad_norm": 0.4113575233779811, "learning_rate": 2e-05, "loss": 5.584, "step": 7550 }, { "epoch": 0.25324054665884127, "grad_norm": 0.42078001772748835, "learning_rate": 2e-05, "loss": 5.4228, "step": 7551 }, { "epoch": 0.2532740840110673, "grad_norm": 0.46080978968502984, "learning_rate": 2e-05, "loss": 5.5147, "step": 7552 }, { "epoch": 0.2533076213632934, "grad_norm": 0.42915907383799495, "learning_rate": 2e-05, "loss": 5.4422, "step": 7553 }, { "epoch": 0.25334115871551943, "grad_norm": 0.46888664622987564, "learning_rate": 2e-05, "loss": 5.5443, "step": 7554 }, { "epoch": 0.25337469606774543, "grad_norm": 0.42681387254937797, "learning_rate": 2e-05, "loss": 5.4655, "step": 7555 }, { "epoch": 0.2534082334199715, "grad_norm": 0.4157973451374697, "learning_rate": 2e-05, "loss": 5.7772, "step": 7556 }, { "epoch": 0.25344177077219754, "grad_norm": 0.41267058762225317, "learning_rate": 2e-05, "loss": 5.6722, "step": 7557 }, { "epoch": 0.2534753081244236, "grad_norm": 0.41557959020998125, "learning_rate": 2e-05, "loss": 5.3901, "step": 7558 }, { "epoch": 0.2535088454766496, "grad_norm": 0.426992969768203, "learning_rate": 2e-05, "loss": 5.725, "step": 7559 }, { "epoch": 0.25354238282887565, "grad_norm": 0.42026582634521853, "learning_rate": 2e-05, "loss": 5.6827, "step": 7560 }, { "epoch": 0.2535759201811017, "grad_norm": 0.41372465079508924, "learning_rate": 2e-05, "loss": 5.4397, "step": 7561 }, { "epoch": 0.25360945753332775, "grad_norm": 0.4671631297069025, "learning_rate": 2e-05, "loss": 5.5294, "step": 7562 }, { "epoch": 0.2536429948855538, "grad_norm": 0.44406357546941155, "learning_rate": 2e-05, "loss": 5.585, "step": 7563 }, { "epoch": 0.2536765322377798, "grad_norm": 0.44891199758998634, "learning_rate": 2e-05, "loss": 5.4658, "step": 7564 }, { "epoch": 0.25371006959000586, "grad_norm": 0.41047942841803337, "learning_rate": 2e-05, "loss": 5.5753, "step": 7565 }, { "epoch": 0.2537436069422319, "grad_norm": 0.46174131983855105, "learning_rate": 2e-05, "loss": 5.4203, "step": 7566 }, { "epoch": 0.25377714429445797, "grad_norm": 0.4566960675797235, "learning_rate": 2e-05, "loss": 5.6162, "step": 7567 }, { "epoch": 0.25381068164668397, "grad_norm": 0.429498127279442, "learning_rate": 2e-05, "loss": 5.3185, "step": 7568 }, { "epoch": 0.25384421899891, "grad_norm": 0.4323922001464632, "learning_rate": 2e-05, "loss": 5.6964, "step": 7569 }, { "epoch": 0.2538777563511361, "grad_norm": 0.43169091519198566, "learning_rate": 2e-05, "loss": 5.3868, "step": 7570 }, { "epoch": 0.25391129370336213, "grad_norm": 0.42660153247708216, "learning_rate": 2e-05, "loss": 5.5747, "step": 7571 }, { "epoch": 0.2539448310555882, "grad_norm": 0.3994660628826817, "learning_rate": 2e-05, "loss": 5.5219, "step": 7572 }, { "epoch": 0.2539783684078142, "grad_norm": 0.43948468927280826, "learning_rate": 2e-05, "loss": 5.5727, "step": 7573 }, { "epoch": 0.25401190576004024, "grad_norm": 0.4285914279818128, "learning_rate": 2e-05, "loss": 5.6778, "step": 7574 }, { "epoch": 0.2540454431122663, "grad_norm": 0.41740319332762915, "learning_rate": 2e-05, "loss": 5.6114, "step": 7575 }, { "epoch": 0.25407898046449234, "grad_norm": 0.4318402454157769, "learning_rate": 2e-05, "loss": 5.6839, "step": 7576 }, { "epoch": 0.2541125178167184, "grad_norm": 0.40049973635150277, "learning_rate": 2e-05, "loss": 5.2961, "step": 7577 }, { "epoch": 0.2541460551689444, "grad_norm": 0.41280631794939177, "learning_rate": 2e-05, "loss": 5.6711, "step": 7578 }, { "epoch": 0.25417959252117045, "grad_norm": 0.4344994238994677, "learning_rate": 2e-05, "loss": 5.5195, "step": 7579 }, { "epoch": 0.2542131298733965, "grad_norm": 0.37390425869068145, "learning_rate": 2e-05, "loss": 5.2953, "step": 7580 }, { "epoch": 0.25424666722562256, "grad_norm": 0.4152224815341749, "learning_rate": 2e-05, "loss": 5.5914, "step": 7581 }, { "epoch": 0.25428020457784856, "grad_norm": 0.4173478727324958, "learning_rate": 2e-05, "loss": 5.531, "step": 7582 }, { "epoch": 0.2543137419300746, "grad_norm": 0.42963787652925683, "learning_rate": 2e-05, "loss": 5.3616, "step": 7583 }, { "epoch": 0.25434727928230066, "grad_norm": 0.37043198094921875, "learning_rate": 2e-05, "loss": 5.4503, "step": 7584 }, { "epoch": 0.2543808166345267, "grad_norm": 0.4219880662475447, "learning_rate": 2e-05, "loss": 5.5394, "step": 7585 }, { "epoch": 0.2544143539867528, "grad_norm": 0.4736325607115495, "learning_rate": 2e-05, "loss": 5.5135, "step": 7586 }, { "epoch": 0.25444789133897877, "grad_norm": 0.4139779149294624, "learning_rate": 2e-05, "loss": 5.536, "step": 7587 }, { "epoch": 0.2544814286912048, "grad_norm": 0.42456359035808616, "learning_rate": 2e-05, "loss": 5.6228, "step": 7588 }, { "epoch": 0.2545149660434309, "grad_norm": 0.42003912525298837, "learning_rate": 2e-05, "loss": 5.5194, "step": 7589 }, { "epoch": 0.25454850339565693, "grad_norm": 0.38734162737917976, "learning_rate": 2e-05, "loss": 5.5258, "step": 7590 }, { "epoch": 0.25458204074788293, "grad_norm": 0.4013825474758251, "learning_rate": 2e-05, "loss": 5.5023, "step": 7591 }, { "epoch": 0.254615578100109, "grad_norm": 0.38847677446813567, "learning_rate": 2e-05, "loss": 5.6156, "step": 7592 }, { "epoch": 0.25464911545233504, "grad_norm": 0.4045920990732631, "learning_rate": 2e-05, "loss": 5.5402, "step": 7593 }, { "epoch": 0.2546826528045611, "grad_norm": 0.39870771102727587, "learning_rate": 2e-05, "loss": 5.5309, "step": 7594 }, { "epoch": 0.25471619015678715, "grad_norm": 0.38727419843034605, "learning_rate": 2e-05, "loss": 5.4332, "step": 7595 }, { "epoch": 0.25474972750901315, "grad_norm": 0.4266406820024452, "learning_rate": 2e-05, "loss": 5.4638, "step": 7596 }, { "epoch": 0.2547832648612392, "grad_norm": 0.40743309024510477, "learning_rate": 2e-05, "loss": 5.7812, "step": 7597 }, { "epoch": 0.25481680221346525, "grad_norm": 0.4171857477103427, "learning_rate": 2e-05, "loss": 5.6931, "step": 7598 }, { "epoch": 0.2548503395656913, "grad_norm": 0.43976749070813465, "learning_rate": 2e-05, "loss": 5.5955, "step": 7599 }, { "epoch": 0.2548838769179173, "grad_norm": 0.38691454062076425, "learning_rate": 2e-05, "loss": 5.505, "step": 7600 }, { "epoch": 0.25491741427014336, "grad_norm": 0.41925179625766484, "learning_rate": 2e-05, "loss": 5.5273, "step": 7601 }, { "epoch": 0.2549509516223694, "grad_norm": 0.39956404660880673, "learning_rate": 2e-05, "loss": 5.548, "step": 7602 }, { "epoch": 0.25498448897459547, "grad_norm": 0.40655161818662267, "learning_rate": 2e-05, "loss": 5.3524, "step": 7603 }, { "epoch": 0.2550180263268215, "grad_norm": 0.38055016551102594, "learning_rate": 2e-05, "loss": 5.4117, "step": 7604 }, { "epoch": 0.2550515636790475, "grad_norm": 0.38942510788793194, "learning_rate": 2e-05, "loss": 5.5341, "step": 7605 }, { "epoch": 0.2550851010312736, "grad_norm": 0.44566867893031764, "learning_rate": 2e-05, "loss": 5.4068, "step": 7606 }, { "epoch": 0.25511863838349963, "grad_norm": 0.44152879781480453, "learning_rate": 2e-05, "loss": 5.7598, "step": 7607 }, { "epoch": 0.2551521757357257, "grad_norm": 0.3902236515047825, "learning_rate": 2e-05, "loss": 5.5925, "step": 7608 }, { "epoch": 0.2551857130879517, "grad_norm": 0.4133045323310657, "learning_rate": 2e-05, "loss": 5.3676, "step": 7609 }, { "epoch": 0.25521925044017774, "grad_norm": 0.40909293320614293, "learning_rate": 2e-05, "loss": 5.6088, "step": 7610 }, { "epoch": 0.2552527877924038, "grad_norm": 0.3974969819191066, "learning_rate": 2e-05, "loss": 5.4495, "step": 7611 }, { "epoch": 0.25528632514462984, "grad_norm": 0.39768956261139016, "learning_rate": 2e-05, "loss": 5.712, "step": 7612 }, { "epoch": 0.2553198624968559, "grad_norm": 0.4409404555547227, "learning_rate": 2e-05, "loss": 5.4439, "step": 7613 }, { "epoch": 0.2553533998490819, "grad_norm": 0.40684896717213836, "learning_rate": 2e-05, "loss": 5.58, "step": 7614 }, { "epoch": 0.25538693720130795, "grad_norm": 0.4080931316380655, "learning_rate": 2e-05, "loss": 5.4123, "step": 7615 }, { "epoch": 0.255420474553534, "grad_norm": 0.416601990623262, "learning_rate": 2e-05, "loss": 5.4319, "step": 7616 }, { "epoch": 0.25545401190576006, "grad_norm": 0.41893403919147953, "learning_rate": 2e-05, "loss": 5.7329, "step": 7617 }, { "epoch": 0.25548754925798606, "grad_norm": 0.421315741145484, "learning_rate": 2e-05, "loss": 5.4381, "step": 7618 }, { "epoch": 0.2555210866102121, "grad_norm": 0.39844082004872994, "learning_rate": 2e-05, "loss": 5.7777, "step": 7619 }, { "epoch": 0.25555462396243817, "grad_norm": 0.38767385767604207, "learning_rate": 2e-05, "loss": 5.7065, "step": 7620 }, { "epoch": 0.2555881613146642, "grad_norm": 0.43728593057649345, "learning_rate": 2e-05, "loss": 5.6423, "step": 7621 }, { "epoch": 0.2556216986668903, "grad_norm": 0.4166065536153484, "learning_rate": 2e-05, "loss": 5.6249, "step": 7622 }, { "epoch": 0.25565523601911627, "grad_norm": 0.39514584286818777, "learning_rate": 2e-05, "loss": 5.5499, "step": 7623 }, { "epoch": 0.2556887733713423, "grad_norm": 0.4359037765167219, "learning_rate": 2e-05, "loss": 5.3939, "step": 7624 }, { "epoch": 0.2557223107235684, "grad_norm": 0.41060872430918255, "learning_rate": 2e-05, "loss": 5.7567, "step": 7625 }, { "epoch": 0.25575584807579443, "grad_norm": 0.43410980874293964, "learning_rate": 2e-05, "loss": 5.5628, "step": 7626 }, { "epoch": 0.25578938542802043, "grad_norm": 0.40898008040763073, "learning_rate": 2e-05, "loss": 5.5112, "step": 7627 }, { "epoch": 0.2558229227802465, "grad_norm": 0.4129836030542937, "learning_rate": 2e-05, "loss": 5.4718, "step": 7628 }, { "epoch": 0.25585646013247254, "grad_norm": 0.41778321084956227, "learning_rate": 2e-05, "loss": 5.5704, "step": 7629 }, { "epoch": 0.2558899974846986, "grad_norm": 0.3952201087282049, "learning_rate": 2e-05, "loss": 5.6797, "step": 7630 }, { "epoch": 0.25592353483692465, "grad_norm": 0.39641787210654245, "learning_rate": 2e-05, "loss": 5.5827, "step": 7631 }, { "epoch": 0.25595707218915065, "grad_norm": 0.42584168220572555, "learning_rate": 2e-05, "loss": 5.5741, "step": 7632 }, { "epoch": 0.2559906095413767, "grad_norm": 0.41749815887959063, "learning_rate": 2e-05, "loss": 5.6375, "step": 7633 }, { "epoch": 0.25602414689360276, "grad_norm": 0.4252181096131336, "learning_rate": 2e-05, "loss": 5.7719, "step": 7634 }, { "epoch": 0.2560576842458288, "grad_norm": 0.39594534407927795, "learning_rate": 2e-05, "loss": 5.4971, "step": 7635 }, { "epoch": 0.2560912215980548, "grad_norm": 0.3914428484731217, "learning_rate": 2e-05, "loss": 5.5251, "step": 7636 }, { "epoch": 0.25612475895028086, "grad_norm": 0.4068571288715792, "learning_rate": 2e-05, "loss": 5.5634, "step": 7637 }, { "epoch": 0.2561582963025069, "grad_norm": 0.3960229844963865, "learning_rate": 2e-05, "loss": 5.7107, "step": 7638 }, { "epoch": 0.25619183365473297, "grad_norm": 0.38822974981964403, "learning_rate": 2e-05, "loss": 5.7285, "step": 7639 }, { "epoch": 0.256225371006959, "grad_norm": 0.4402480947143523, "learning_rate": 2e-05, "loss": 5.7496, "step": 7640 }, { "epoch": 0.256258908359185, "grad_norm": 0.39707010082627203, "learning_rate": 2e-05, "loss": 5.8387, "step": 7641 }, { "epoch": 0.2562924457114111, "grad_norm": 0.3970712641548753, "learning_rate": 2e-05, "loss": 5.7157, "step": 7642 }, { "epoch": 0.25632598306363713, "grad_norm": 0.42627122265821127, "learning_rate": 2e-05, "loss": 5.3009, "step": 7643 }, { "epoch": 0.2563595204158632, "grad_norm": 0.3870627898066188, "learning_rate": 2e-05, "loss": 5.6592, "step": 7644 }, { "epoch": 0.2563930577680892, "grad_norm": 0.40091467180439067, "learning_rate": 2e-05, "loss": 5.5122, "step": 7645 }, { "epoch": 0.25642659512031524, "grad_norm": 0.39483498633686404, "learning_rate": 2e-05, "loss": 5.7851, "step": 7646 }, { "epoch": 0.2564601324725413, "grad_norm": 0.40159849731090236, "learning_rate": 2e-05, "loss": 5.7609, "step": 7647 }, { "epoch": 0.25649366982476735, "grad_norm": 0.3767725147135203, "learning_rate": 2e-05, "loss": 5.583, "step": 7648 }, { "epoch": 0.2565272071769934, "grad_norm": 0.4135912925314849, "learning_rate": 2e-05, "loss": 5.522, "step": 7649 }, { "epoch": 0.2565607445292194, "grad_norm": 0.3854870518577914, "learning_rate": 2e-05, "loss": 5.6058, "step": 7650 }, { "epoch": 0.25659428188144545, "grad_norm": 0.4210098346776732, "learning_rate": 2e-05, "loss": 5.6035, "step": 7651 }, { "epoch": 0.2566278192336715, "grad_norm": 0.41590007368199544, "learning_rate": 2e-05, "loss": 5.5714, "step": 7652 }, { "epoch": 0.25666135658589756, "grad_norm": 0.37801694161417504, "learning_rate": 2e-05, "loss": 5.6701, "step": 7653 }, { "epoch": 0.25669489393812356, "grad_norm": 0.393667880727308, "learning_rate": 2e-05, "loss": 5.5581, "step": 7654 }, { "epoch": 0.2567284312903496, "grad_norm": 0.41398968109444334, "learning_rate": 2e-05, "loss": 5.554, "step": 7655 }, { "epoch": 0.25676196864257567, "grad_norm": 0.4025740494006266, "learning_rate": 2e-05, "loss": 5.4495, "step": 7656 }, { "epoch": 0.2567955059948017, "grad_norm": 0.45387042395592425, "learning_rate": 2e-05, "loss": 5.6263, "step": 7657 }, { "epoch": 0.2568290433470278, "grad_norm": 0.43263001777819343, "learning_rate": 2e-05, "loss": 5.4963, "step": 7658 }, { "epoch": 0.2568625806992538, "grad_norm": 0.471688225480153, "learning_rate": 2e-05, "loss": 5.45, "step": 7659 }, { "epoch": 0.2568961180514798, "grad_norm": 0.4480540923415364, "learning_rate": 2e-05, "loss": 5.5914, "step": 7660 }, { "epoch": 0.2569296554037059, "grad_norm": 0.4101234338977576, "learning_rate": 2e-05, "loss": 5.4751, "step": 7661 }, { "epoch": 0.25696319275593194, "grad_norm": 0.39722160933294764, "learning_rate": 2e-05, "loss": 5.4163, "step": 7662 }, { "epoch": 0.25699673010815793, "grad_norm": 0.41859079053952103, "learning_rate": 2e-05, "loss": 5.7865, "step": 7663 }, { "epoch": 0.257030267460384, "grad_norm": 0.4304620291594224, "learning_rate": 2e-05, "loss": 5.4507, "step": 7664 }, { "epoch": 0.25706380481261004, "grad_norm": 0.4386818191580843, "learning_rate": 2e-05, "loss": 5.7755, "step": 7665 }, { "epoch": 0.2570973421648361, "grad_norm": 0.42085957235629134, "learning_rate": 2e-05, "loss": 5.3406, "step": 7666 }, { "epoch": 0.25713087951706215, "grad_norm": 0.4265288486787988, "learning_rate": 2e-05, "loss": 5.5042, "step": 7667 }, { "epoch": 0.25716441686928815, "grad_norm": 0.41401788530757605, "learning_rate": 2e-05, "loss": 5.583, "step": 7668 }, { "epoch": 0.2571979542215142, "grad_norm": 0.40252931365676925, "learning_rate": 2e-05, "loss": 5.6022, "step": 7669 }, { "epoch": 0.25723149157374026, "grad_norm": 0.4188223412764425, "learning_rate": 2e-05, "loss": 5.4902, "step": 7670 }, { "epoch": 0.2572650289259663, "grad_norm": 0.41787494198700437, "learning_rate": 2e-05, "loss": 5.5372, "step": 7671 }, { "epoch": 0.25729856627819236, "grad_norm": 0.4362489632002235, "learning_rate": 2e-05, "loss": 5.541, "step": 7672 }, { "epoch": 0.25733210363041836, "grad_norm": 0.39672924048041985, "learning_rate": 2e-05, "loss": 5.4629, "step": 7673 }, { "epoch": 0.2573656409826444, "grad_norm": 0.39436414178701307, "learning_rate": 2e-05, "loss": 5.726, "step": 7674 }, { "epoch": 0.25739917833487047, "grad_norm": 0.4087524931174222, "learning_rate": 2e-05, "loss": 5.6669, "step": 7675 }, { "epoch": 0.2574327156870965, "grad_norm": 0.43072224500746453, "learning_rate": 2e-05, "loss": 5.3969, "step": 7676 }, { "epoch": 0.2574662530393225, "grad_norm": 0.4252935410896513, "learning_rate": 2e-05, "loss": 5.6627, "step": 7677 }, { "epoch": 0.2574997903915486, "grad_norm": 0.41560929226096427, "learning_rate": 2e-05, "loss": 5.4303, "step": 7678 }, { "epoch": 0.25753332774377463, "grad_norm": 0.47631738384876476, "learning_rate": 2e-05, "loss": 5.3397, "step": 7679 }, { "epoch": 0.2575668650960007, "grad_norm": 0.43515544320354416, "learning_rate": 2e-05, "loss": 5.7788, "step": 7680 }, { "epoch": 0.25760040244822674, "grad_norm": 0.40643296551964736, "learning_rate": 2e-05, "loss": 5.5244, "step": 7681 }, { "epoch": 0.25763393980045274, "grad_norm": 0.4407031967283672, "learning_rate": 2e-05, "loss": 5.5541, "step": 7682 }, { "epoch": 0.2576674771526788, "grad_norm": 0.4309084383167675, "learning_rate": 2e-05, "loss": 5.5922, "step": 7683 }, { "epoch": 0.25770101450490485, "grad_norm": 0.41659884451676327, "learning_rate": 2e-05, "loss": 5.5663, "step": 7684 }, { "epoch": 0.2577345518571309, "grad_norm": 0.4130776556026916, "learning_rate": 2e-05, "loss": 5.5091, "step": 7685 }, { "epoch": 0.2577680892093569, "grad_norm": 0.4276381139200555, "learning_rate": 2e-05, "loss": 5.5499, "step": 7686 }, { "epoch": 0.25780162656158295, "grad_norm": 0.42617247084073284, "learning_rate": 2e-05, "loss": 5.6794, "step": 7687 }, { "epoch": 0.257835163913809, "grad_norm": 0.47324578439864473, "learning_rate": 2e-05, "loss": 5.5979, "step": 7688 }, { "epoch": 0.25786870126603506, "grad_norm": 0.44842697037420204, "learning_rate": 2e-05, "loss": 5.8008, "step": 7689 }, { "epoch": 0.2579022386182611, "grad_norm": 0.4312595885262779, "learning_rate": 2e-05, "loss": 5.6316, "step": 7690 }, { "epoch": 0.2579357759704871, "grad_norm": 0.4426616313832017, "learning_rate": 2e-05, "loss": 5.3704, "step": 7691 }, { "epoch": 0.25796931332271317, "grad_norm": 0.4304028300693386, "learning_rate": 2e-05, "loss": 5.6903, "step": 7692 }, { "epoch": 0.2580028506749392, "grad_norm": 0.41199965849011766, "learning_rate": 2e-05, "loss": 5.6462, "step": 7693 }, { "epoch": 0.2580363880271653, "grad_norm": 0.4327588042079067, "learning_rate": 2e-05, "loss": 5.5102, "step": 7694 }, { "epoch": 0.2580699253793913, "grad_norm": 0.45403665159390855, "learning_rate": 2e-05, "loss": 5.5416, "step": 7695 }, { "epoch": 0.25810346273161733, "grad_norm": 0.3944628140120538, "learning_rate": 2e-05, "loss": 5.4982, "step": 7696 }, { "epoch": 0.2581370000838434, "grad_norm": 0.4536129207205487, "learning_rate": 2e-05, "loss": 5.5006, "step": 7697 }, { "epoch": 0.25817053743606944, "grad_norm": 0.43421705353634216, "learning_rate": 2e-05, "loss": 5.6995, "step": 7698 }, { "epoch": 0.2582040747882955, "grad_norm": 0.43149619983191523, "learning_rate": 2e-05, "loss": 5.5197, "step": 7699 }, { "epoch": 0.2582376121405215, "grad_norm": 0.4019733954167661, "learning_rate": 2e-05, "loss": 5.6258, "step": 7700 }, { "epoch": 0.25827114949274754, "grad_norm": 0.43119760337589813, "learning_rate": 2e-05, "loss": 5.5381, "step": 7701 }, { "epoch": 0.2583046868449736, "grad_norm": 0.40296016124973816, "learning_rate": 2e-05, "loss": 5.5389, "step": 7702 }, { "epoch": 0.25833822419719965, "grad_norm": 0.3955866746131784, "learning_rate": 2e-05, "loss": 5.7607, "step": 7703 }, { "epoch": 0.25837176154942565, "grad_norm": 0.3982753231078751, "learning_rate": 2e-05, "loss": 5.6108, "step": 7704 }, { "epoch": 0.2584052989016517, "grad_norm": 0.4924247312738456, "learning_rate": 2e-05, "loss": 5.708, "step": 7705 }, { "epoch": 0.25843883625387776, "grad_norm": 0.38168464758361714, "learning_rate": 2e-05, "loss": 5.6536, "step": 7706 }, { "epoch": 0.2584723736061038, "grad_norm": 0.4069824919848173, "learning_rate": 2e-05, "loss": 5.5045, "step": 7707 }, { "epoch": 0.25850591095832987, "grad_norm": 0.399866422489953, "learning_rate": 2e-05, "loss": 5.731, "step": 7708 }, { "epoch": 0.25853944831055586, "grad_norm": 0.4244015150496547, "learning_rate": 2e-05, "loss": 5.6128, "step": 7709 }, { "epoch": 0.2585729856627819, "grad_norm": 0.4066906546622518, "learning_rate": 2e-05, "loss": 5.6181, "step": 7710 }, { "epoch": 0.25860652301500797, "grad_norm": 0.3689874327268957, "learning_rate": 2e-05, "loss": 5.2516, "step": 7711 }, { "epoch": 0.258640060367234, "grad_norm": 0.41536881256422337, "learning_rate": 2e-05, "loss": 5.5229, "step": 7712 }, { "epoch": 0.25867359771946, "grad_norm": 0.4278391958952722, "learning_rate": 2e-05, "loss": 5.6426, "step": 7713 }, { "epoch": 0.2587071350716861, "grad_norm": 0.41732556126097264, "learning_rate": 2e-05, "loss": 5.5355, "step": 7714 }, { "epoch": 0.25874067242391213, "grad_norm": 0.41477590262298214, "learning_rate": 2e-05, "loss": 5.7758, "step": 7715 }, { "epoch": 0.2587742097761382, "grad_norm": 0.3971619827336448, "learning_rate": 2e-05, "loss": 5.4125, "step": 7716 }, { "epoch": 0.25880774712836424, "grad_norm": 0.39291962298412314, "learning_rate": 2e-05, "loss": 5.3744, "step": 7717 }, { "epoch": 0.25884128448059024, "grad_norm": 0.419305042219319, "learning_rate": 2e-05, "loss": 5.5783, "step": 7718 }, { "epoch": 0.2588748218328163, "grad_norm": 0.41378806859542844, "learning_rate": 2e-05, "loss": 5.7578, "step": 7719 }, { "epoch": 0.25890835918504235, "grad_norm": 0.3879536347314396, "learning_rate": 2e-05, "loss": 5.5139, "step": 7720 }, { "epoch": 0.2589418965372684, "grad_norm": 0.39488470908454015, "learning_rate": 2e-05, "loss": 5.5889, "step": 7721 }, { "epoch": 0.2589754338894944, "grad_norm": 0.40656699444148653, "learning_rate": 2e-05, "loss": 5.5308, "step": 7722 }, { "epoch": 0.25900897124172045, "grad_norm": 0.40420015587250685, "learning_rate": 2e-05, "loss": 5.4565, "step": 7723 }, { "epoch": 0.2590425085939465, "grad_norm": 0.4084018341119093, "learning_rate": 2e-05, "loss": 5.6346, "step": 7724 }, { "epoch": 0.25907604594617256, "grad_norm": 0.38579849968864544, "learning_rate": 2e-05, "loss": 5.5922, "step": 7725 }, { "epoch": 0.2591095832983986, "grad_norm": 0.4050333209080895, "learning_rate": 2e-05, "loss": 5.4941, "step": 7726 }, { "epoch": 0.2591431206506246, "grad_norm": 0.4458027801927214, "learning_rate": 2e-05, "loss": 5.3966, "step": 7727 }, { "epoch": 0.25917665800285067, "grad_norm": 0.40804127237330384, "learning_rate": 2e-05, "loss": 5.5693, "step": 7728 }, { "epoch": 0.2592101953550767, "grad_norm": 0.4104732384492037, "learning_rate": 2e-05, "loss": 5.5391, "step": 7729 }, { "epoch": 0.2592437327073028, "grad_norm": 0.43098080628714985, "learning_rate": 2e-05, "loss": 5.6814, "step": 7730 }, { "epoch": 0.2592772700595288, "grad_norm": 0.40758895031042897, "learning_rate": 2e-05, "loss": 5.4312, "step": 7731 }, { "epoch": 0.25931080741175483, "grad_norm": 0.40457187482330365, "learning_rate": 2e-05, "loss": 5.7282, "step": 7732 }, { "epoch": 0.2593443447639809, "grad_norm": 0.43693131460872364, "learning_rate": 2e-05, "loss": 5.6585, "step": 7733 }, { "epoch": 0.25937788211620694, "grad_norm": 0.4027771185748179, "learning_rate": 2e-05, "loss": 5.5548, "step": 7734 }, { "epoch": 0.259411419468433, "grad_norm": 0.4345585875435593, "learning_rate": 2e-05, "loss": 5.3928, "step": 7735 }, { "epoch": 0.259444956820659, "grad_norm": 0.4207896826382502, "learning_rate": 2e-05, "loss": 5.4057, "step": 7736 }, { "epoch": 0.25947849417288504, "grad_norm": 0.42938481286025626, "learning_rate": 2e-05, "loss": 5.608, "step": 7737 }, { "epoch": 0.2595120315251111, "grad_norm": 0.40256704336747084, "learning_rate": 2e-05, "loss": 5.602, "step": 7738 }, { "epoch": 0.25954556887733715, "grad_norm": 0.3944566708802331, "learning_rate": 2e-05, "loss": 5.7309, "step": 7739 }, { "epoch": 0.25957910622956315, "grad_norm": 0.4170882718586834, "learning_rate": 2e-05, "loss": 5.5121, "step": 7740 }, { "epoch": 0.2596126435817892, "grad_norm": 0.4530662606852841, "learning_rate": 2e-05, "loss": 5.459, "step": 7741 }, { "epoch": 0.25964618093401526, "grad_norm": 0.41195596306030174, "learning_rate": 2e-05, "loss": 5.5104, "step": 7742 }, { "epoch": 0.2596797182862413, "grad_norm": 0.40725867052321907, "learning_rate": 2e-05, "loss": 5.5497, "step": 7743 }, { "epoch": 0.25971325563846737, "grad_norm": 0.4204596856315337, "learning_rate": 2e-05, "loss": 5.7299, "step": 7744 }, { "epoch": 0.25974679299069336, "grad_norm": 0.4273594032545115, "learning_rate": 2e-05, "loss": 5.4222, "step": 7745 }, { "epoch": 0.2597803303429194, "grad_norm": 0.42293803670397784, "learning_rate": 2e-05, "loss": 5.4052, "step": 7746 }, { "epoch": 0.2598138676951455, "grad_norm": 0.4034834342385803, "learning_rate": 2e-05, "loss": 5.6425, "step": 7747 }, { "epoch": 0.2598474050473715, "grad_norm": 0.40139473572927103, "learning_rate": 2e-05, "loss": 5.3276, "step": 7748 }, { "epoch": 0.2598809423995975, "grad_norm": 0.42178896425236073, "learning_rate": 2e-05, "loss": 5.4419, "step": 7749 }, { "epoch": 0.2599144797518236, "grad_norm": 0.41059997432087947, "learning_rate": 2e-05, "loss": 5.6903, "step": 7750 }, { "epoch": 0.25994801710404963, "grad_norm": 0.4231992850777064, "learning_rate": 2e-05, "loss": 5.4777, "step": 7751 }, { "epoch": 0.2599815544562757, "grad_norm": 0.39564393462249364, "learning_rate": 2e-05, "loss": 5.5865, "step": 7752 }, { "epoch": 0.26001509180850174, "grad_norm": 0.43757129959301183, "learning_rate": 2e-05, "loss": 5.5175, "step": 7753 }, { "epoch": 0.26004862916072774, "grad_norm": 0.42024304639666493, "learning_rate": 2e-05, "loss": 5.6356, "step": 7754 }, { "epoch": 0.2600821665129538, "grad_norm": 0.3929180885350585, "learning_rate": 2e-05, "loss": 5.637, "step": 7755 }, { "epoch": 0.26011570386517985, "grad_norm": 0.43528780873744116, "learning_rate": 2e-05, "loss": 5.5788, "step": 7756 }, { "epoch": 0.2601492412174059, "grad_norm": 0.39419047925387707, "learning_rate": 2e-05, "loss": 5.3851, "step": 7757 }, { "epoch": 0.2601827785696319, "grad_norm": 0.42165592989515077, "learning_rate": 2e-05, "loss": 5.6474, "step": 7758 }, { "epoch": 0.26021631592185795, "grad_norm": 0.4031125365699952, "learning_rate": 2e-05, "loss": 5.5724, "step": 7759 }, { "epoch": 0.260249853274084, "grad_norm": 0.40658763075819926, "learning_rate": 2e-05, "loss": 5.5218, "step": 7760 }, { "epoch": 0.26028339062631006, "grad_norm": 0.4163676691528259, "learning_rate": 2e-05, "loss": 5.5662, "step": 7761 }, { "epoch": 0.2603169279785361, "grad_norm": 0.39730220806442956, "learning_rate": 2e-05, "loss": 5.6268, "step": 7762 }, { "epoch": 0.2603504653307621, "grad_norm": 0.3863652067651593, "learning_rate": 2e-05, "loss": 5.5431, "step": 7763 }, { "epoch": 0.26038400268298817, "grad_norm": 0.4010789513613316, "learning_rate": 2e-05, "loss": 5.6438, "step": 7764 }, { "epoch": 0.2604175400352142, "grad_norm": 0.4260163478317359, "learning_rate": 2e-05, "loss": 5.6094, "step": 7765 }, { "epoch": 0.2604510773874403, "grad_norm": 0.43349379649255587, "learning_rate": 2e-05, "loss": 5.5713, "step": 7766 }, { "epoch": 0.2604846147396663, "grad_norm": 0.41446304345325113, "learning_rate": 2e-05, "loss": 5.7548, "step": 7767 }, { "epoch": 0.26051815209189233, "grad_norm": 0.4424631607129033, "learning_rate": 2e-05, "loss": 5.5269, "step": 7768 }, { "epoch": 0.2605516894441184, "grad_norm": 0.4190316270117311, "learning_rate": 2e-05, "loss": 5.4694, "step": 7769 }, { "epoch": 0.26058522679634444, "grad_norm": 0.42633808168484433, "learning_rate": 2e-05, "loss": 5.6446, "step": 7770 }, { "epoch": 0.2606187641485705, "grad_norm": 0.40383044248263306, "learning_rate": 2e-05, "loss": 5.4549, "step": 7771 }, { "epoch": 0.2606523015007965, "grad_norm": 0.40564708747478473, "learning_rate": 2e-05, "loss": 5.6587, "step": 7772 }, { "epoch": 0.26068583885302254, "grad_norm": 0.41918687886400147, "learning_rate": 2e-05, "loss": 5.6054, "step": 7773 }, { "epoch": 0.2607193762052486, "grad_norm": 0.40501588675457123, "learning_rate": 2e-05, "loss": 5.6195, "step": 7774 }, { "epoch": 0.26075291355747465, "grad_norm": 0.38182570089972373, "learning_rate": 2e-05, "loss": 5.6511, "step": 7775 }, { "epoch": 0.2607864509097007, "grad_norm": 0.409053641903271, "learning_rate": 2e-05, "loss": 5.4308, "step": 7776 }, { "epoch": 0.2608199882619267, "grad_norm": 0.40237984180763237, "learning_rate": 2e-05, "loss": 5.5966, "step": 7777 }, { "epoch": 0.26085352561415276, "grad_norm": 0.397227968699785, "learning_rate": 2e-05, "loss": 5.7387, "step": 7778 }, { "epoch": 0.2608870629663788, "grad_norm": 0.386651834207112, "learning_rate": 2e-05, "loss": 5.6186, "step": 7779 }, { "epoch": 0.26092060031860487, "grad_norm": 0.3980255008352379, "learning_rate": 2e-05, "loss": 5.4759, "step": 7780 }, { "epoch": 0.26095413767083087, "grad_norm": 0.4060173594774028, "learning_rate": 2e-05, "loss": 5.6021, "step": 7781 }, { "epoch": 0.2609876750230569, "grad_norm": 0.4068642256715479, "learning_rate": 2e-05, "loss": 5.6756, "step": 7782 }, { "epoch": 0.261021212375283, "grad_norm": 0.39543434174934, "learning_rate": 2e-05, "loss": 5.6891, "step": 7783 }, { "epoch": 0.26105474972750903, "grad_norm": 0.40401310984157557, "learning_rate": 2e-05, "loss": 5.5448, "step": 7784 }, { "epoch": 0.2610882870797351, "grad_norm": 0.38574666208502395, "learning_rate": 2e-05, "loss": 5.6609, "step": 7785 }, { "epoch": 0.2611218244319611, "grad_norm": 0.4389642390474139, "learning_rate": 2e-05, "loss": 5.531, "step": 7786 }, { "epoch": 0.26115536178418713, "grad_norm": 0.44169950820470893, "learning_rate": 2e-05, "loss": 5.316, "step": 7787 }, { "epoch": 0.2611888991364132, "grad_norm": 0.43199389167801283, "learning_rate": 2e-05, "loss": 5.633, "step": 7788 }, { "epoch": 0.26122243648863924, "grad_norm": 0.41929722915816914, "learning_rate": 2e-05, "loss": 5.6184, "step": 7789 }, { "epoch": 0.26125597384086524, "grad_norm": 0.3974618218608074, "learning_rate": 2e-05, "loss": 5.2647, "step": 7790 }, { "epoch": 0.2612895111930913, "grad_norm": 0.3909419273017183, "learning_rate": 2e-05, "loss": 5.4973, "step": 7791 }, { "epoch": 0.26132304854531735, "grad_norm": 0.3967584175997162, "learning_rate": 2e-05, "loss": 5.674, "step": 7792 }, { "epoch": 0.2613565858975434, "grad_norm": 0.39861902664961435, "learning_rate": 2e-05, "loss": 5.5235, "step": 7793 }, { "epoch": 0.26139012324976946, "grad_norm": 0.39698968768367915, "learning_rate": 2e-05, "loss": 5.6981, "step": 7794 }, { "epoch": 0.26142366060199546, "grad_norm": 0.43099527425827944, "learning_rate": 2e-05, "loss": 5.4903, "step": 7795 }, { "epoch": 0.2614571979542215, "grad_norm": 0.4140351028412172, "learning_rate": 2e-05, "loss": 5.5994, "step": 7796 }, { "epoch": 0.26149073530644756, "grad_norm": 0.3747298096470196, "learning_rate": 2e-05, "loss": 5.3295, "step": 7797 }, { "epoch": 0.2615242726586736, "grad_norm": 0.3952365531189489, "learning_rate": 2e-05, "loss": 5.5581, "step": 7798 }, { "epoch": 0.2615578100108996, "grad_norm": 0.4286676195778689, "learning_rate": 2e-05, "loss": 5.6933, "step": 7799 }, { "epoch": 0.26159134736312567, "grad_norm": 0.4277275737251345, "learning_rate": 2e-05, "loss": 5.4432, "step": 7800 }, { "epoch": 0.2616248847153517, "grad_norm": 0.39081512010734293, "learning_rate": 2e-05, "loss": 5.3282, "step": 7801 }, { "epoch": 0.2616584220675778, "grad_norm": 0.4042616535303643, "learning_rate": 2e-05, "loss": 5.5714, "step": 7802 }, { "epoch": 0.26169195941980383, "grad_norm": 0.44838422641011877, "learning_rate": 2e-05, "loss": 5.7434, "step": 7803 }, { "epoch": 0.26172549677202983, "grad_norm": 0.3939048621067917, "learning_rate": 2e-05, "loss": 5.6313, "step": 7804 }, { "epoch": 0.2617590341242559, "grad_norm": 0.4307825111839262, "learning_rate": 2e-05, "loss": 5.5937, "step": 7805 }, { "epoch": 0.26179257147648194, "grad_norm": 0.4239415714256835, "learning_rate": 2e-05, "loss": 5.5745, "step": 7806 }, { "epoch": 0.261826108828708, "grad_norm": 0.41387030330015306, "learning_rate": 2e-05, "loss": 5.437, "step": 7807 }, { "epoch": 0.261859646180934, "grad_norm": 0.42152006347728554, "learning_rate": 2e-05, "loss": 5.7115, "step": 7808 }, { "epoch": 0.26189318353316005, "grad_norm": 0.43091150822181473, "learning_rate": 2e-05, "loss": 5.4513, "step": 7809 }, { "epoch": 0.2619267208853861, "grad_norm": 0.4149235610472827, "learning_rate": 2e-05, "loss": 5.4782, "step": 7810 }, { "epoch": 0.26196025823761215, "grad_norm": 0.40202220492075247, "learning_rate": 2e-05, "loss": 5.5289, "step": 7811 }, { "epoch": 0.2619937955898382, "grad_norm": 0.4315344651094597, "learning_rate": 2e-05, "loss": 5.4168, "step": 7812 }, { "epoch": 0.2620273329420642, "grad_norm": 0.4349840215274383, "learning_rate": 2e-05, "loss": 5.524, "step": 7813 }, { "epoch": 0.26206087029429026, "grad_norm": 0.3883561217531676, "learning_rate": 2e-05, "loss": 5.6107, "step": 7814 }, { "epoch": 0.2620944076465163, "grad_norm": 0.4292475278467018, "learning_rate": 2e-05, "loss": 5.6587, "step": 7815 }, { "epoch": 0.26212794499874237, "grad_norm": 0.4365453550506471, "learning_rate": 2e-05, "loss": 5.5209, "step": 7816 }, { "epoch": 0.26216148235096837, "grad_norm": 0.38893878445633123, "learning_rate": 2e-05, "loss": 5.5215, "step": 7817 }, { "epoch": 0.2621950197031944, "grad_norm": 0.39246823607173525, "learning_rate": 2e-05, "loss": 5.7056, "step": 7818 }, { "epoch": 0.2622285570554205, "grad_norm": 0.4232611427231658, "learning_rate": 2e-05, "loss": 5.6274, "step": 7819 }, { "epoch": 0.26226209440764653, "grad_norm": 0.4146436060453461, "learning_rate": 2e-05, "loss": 5.4018, "step": 7820 }, { "epoch": 0.2622956317598726, "grad_norm": 0.3866423864843106, "learning_rate": 2e-05, "loss": 5.3674, "step": 7821 }, { "epoch": 0.2623291691120986, "grad_norm": 0.41141935334537383, "learning_rate": 2e-05, "loss": 5.5329, "step": 7822 }, { "epoch": 0.26236270646432464, "grad_norm": 0.39459424060604303, "learning_rate": 2e-05, "loss": 5.6272, "step": 7823 }, { "epoch": 0.2623962438165507, "grad_norm": 0.4235399361177632, "learning_rate": 2e-05, "loss": 5.5943, "step": 7824 }, { "epoch": 0.26242978116877674, "grad_norm": 0.44124972148163566, "learning_rate": 2e-05, "loss": 5.2394, "step": 7825 }, { "epoch": 0.26246331852100274, "grad_norm": 0.40318497813907395, "learning_rate": 2e-05, "loss": 5.3436, "step": 7826 }, { "epoch": 0.2624968558732288, "grad_norm": 0.4375562319368683, "learning_rate": 2e-05, "loss": 5.6024, "step": 7827 }, { "epoch": 0.26253039322545485, "grad_norm": 0.4034425739609815, "learning_rate": 2e-05, "loss": 5.5262, "step": 7828 }, { "epoch": 0.2625639305776809, "grad_norm": 0.43830115183081975, "learning_rate": 2e-05, "loss": 5.5838, "step": 7829 }, { "epoch": 0.26259746792990696, "grad_norm": 0.39077498344617595, "learning_rate": 2e-05, "loss": 5.5962, "step": 7830 }, { "epoch": 0.26263100528213296, "grad_norm": 0.4078475438573897, "learning_rate": 2e-05, "loss": 5.511, "step": 7831 }, { "epoch": 0.262664542634359, "grad_norm": 0.40153262620833746, "learning_rate": 2e-05, "loss": 5.9119, "step": 7832 }, { "epoch": 0.26269807998658506, "grad_norm": 0.395681318799079, "learning_rate": 2e-05, "loss": 5.603, "step": 7833 }, { "epoch": 0.2627316173388111, "grad_norm": 0.41059969127805107, "learning_rate": 2e-05, "loss": 5.7927, "step": 7834 }, { "epoch": 0.2627651546910371, "grad_norm": 0.41601965156446347, "learning_rate": 2e-05, "loss": 5.4592, "step": 7835 }, { "epoch": 0.26279869204326317, "grad_norm": 0.40455181328258916, "learning_rate": 2e-05, "loss": 5.4953, "step": 7836 }, { "epoch": 0.2628322293954892, "grad_norm": 0.41213811286594965, "learning_rate": 2e-05, "loss": 5.6514, "step": 7837 }, { "epoch": 0.2628657667477153, "grad_norm": 0.3980938789034521, "learning_rate": 2e-05, "loss": 5.4326, "step": 7838 }, { "epoch": 0.26289930409994133, "grad_norm": 0.3911467503552144, "learning_rate": 2e-05, "loss": 5.5073, "step": 7839 }, { "epoch": 0.26293284145216733, "grad_norm": 0.40843326099520555, "learning_rate": 2e-05, "loss": 5.5302, "step": 7840 }, { "epoch": 0.2629663788043934, "grad_norm": 0.4118815998823778, "learning_rate": 2e-05, "loss": 5.2874, "step": 7841 }, { "epoch": 0.26299991615661944, "grad_norm": 0.3791098546273506, "learning_rate": 2e-05, "loss": 5.6413, "step": 7842 }, { "epoch": 0.2630334535088455, "grad_norm": 0.40666805046117055, "learning_rate": 2e-05, "loss": 5.3929, "step": 7843 }, { "epoch": 0.2630669908610715, "grad_norm": 0.4428439875855965, "learning_rate": 2e-05, "loss": 5.6869, "step": 7844 }, { "epoch": 0.26310052821329755, "grad_norm": 0.3788853588007283, "learning_rate": 2e-05, "loss": 5.4638, "step": 7845 }, { "epoch": 0.2631340655655236, "grad_norm": 0.4053776194521038, "learning_rate": 2e-05, "loss": 5.4439, "step": 7846 }, { "epoch": 0.26316760291774965, "grad_norm": 0.39968678009754066, "learning_rate": 2e-05, "loss": 5.4231, "step": 7847 }, { "epoch": 0.2632011402699757, "grad_norm": 0.40085176884704216, "learning_rate": 2e-05, "loss": 5.382, "step": 7848 }, { "epoch": 0.2632346776222017, "grad_norm": 0.4325283997093157, "learning_rate": 2e-05, "loss": 5.4271, "step": 7849 }, { "epoch": 0.26326821497442776, "grad_norm": 0.4302994673327398, "learning_rate": 2e-05, "loss": 5.5146, "step": 7850 }, { "epoch": 0.2633017523266538, "grad_norm": 0.41186483670822666, "learning_rate": 2e-05, "loss": 5.3473, "step": 7851 }, { "epoch": 0.26333528967887987, "grad_norm": 0.41516255461169227, "learning_rate": 2e-05, "loss": 5.5578, "step": 7852 }, { "epoch": 0.26336882703110587, "grad_norm": 0.4133050509717465, "learning_rate": 2e-05, "loss": 5.4435, "step": 7853 }, { "epoch": 0.2634023643833319, "grad_norm": 0.4097005381214751, "learning_rate": 2e-05, "loss": 5.5189, "step": 7854 }, { "epoch": 0.263435901735558, "grad_norm": 0.42567899393732206, "learning_rate": 2e-05, "loss": 5.8005, "step": 7855 }, { "epoch": 0.26346943908778403, "grad_norm": 0.459149610512999, "learning_rate": 2e-05, "loss": 5.4102, "step": 7856 }, { "epoch": 0.2635029764400101, "grad_norm": 0.4277611137265714, "learning_rate": 2e-05, "loss": 5.4862, "step": 7857 }, { "epoch": 0.2635365137922361, "grad_norm": 0.4247960428567147, "learning_rate": 2e-05, "loss": 5.3167, "step": 7858 }, { "epoch": 0.26357005114446214, "grad_norm": 0.4763242911236707, "learning_rate": 2e-05, "loss": 5.4428, "step": 7859 }, { "epoch": 0.2636035884966882, "grad_norm": 0.42233413985045615, "learning_rate": 2e-05, "loss": 5.6146, "step": 7860 }, { "epoch": 0.26363712584891424, "grad_norm": 0.41692221853264416, "learning_rate": 2e-05, "loss": 5.4627, "step": 7861 }, { "epoch": 0.26367066320114024, "grad_norm": 0.416324044418896, "learning_rate": 2e-05, "loss": 5.4345, "step": 7862 }, { "epoch": 0.2637042005533663, "grad_norm": 0.4267283536807432, "learning_rate": 2e-05, "loss": 5.6225, "step": 7863 }, { "epoch": 0.26373773790559235, "grad_norm": 0.466514967550382, "learning_rate": 2e-05, "loss": 5.3801, "step": 7864 }, { "epoch": 0.2637712752578184, "grad_norm": 0.4115413722977344, "learning_rate": 2e-05, "loss": 5.5075, "step": 7865 }, { "epoch": 0.26380481261004446, "grad_norm": 0.47475201533919476, "learning_rate": 2e-05, "loss": 5.3779, "step": 7866 }, { "epoch": 0.26383834996227046, "grad_norm": 0.4276139399516249, "learning_rate": 2e-05, "loss": 5.4213, "step": 7867 }, { "epoch": 0.2638718873144965, "grad_norm": 0.41703607722411096, "learning_rate": 2e-05, "loss": 5.5467, "step": 7868 }, { "epoch": 0.26390542466672257, "grad_norm": 0.4248716990467693, "learning_rate": 2e-05, "loss": 5.6424, "step": 7869 }, { "epoch": 0.2639389620189486, "grad_norm": 0.44756934623745864, "learning_rate": 2e-05, "loss": 5.6621, "step": 7870 }, { "epoch": 0.2639724993711746, "grad_norm": 0.41919675620213537, "learning_rate": 2e-05, "loss": 5.5574, "step": 7871 }, { "epoch": 0.26400603672340067, "grad_norm": 0.4134052099648643, "learning_rate": 2e-05, "loss": 5.3241, "step": 7872 }, { "epoch": 0.2640395740756267, "grad_norm": 0.4192810098233826, "learning_rate": 2e-05, "loss": 5.6498, "step": 7873 }, { "epoch": 0.2640731114278528, "grad_norm": 0.4637874418257186, "learning_rate": 2e-05, "loss": 5.4177, "step": 7874 }, { "epoch": 0.26410664878007883, "grad_norm": 0.4032693857902287, "learning_rate": 2e-05, "loss": 5.6436, "step": 7875 }, { "epoch": 0.26414018613230483, "grad_norm": 0.4618491847345454, "learning_rate": 2e-05, "loss": 5.7825, "step": 7876 }, { "epoch": 0.2641737234845309, "grad_norm": 0.40262635593706125, "learning_rate": 2e-05, "loss": 5.4974, "step": 7877 }, { "epoch": 0.26420726083675694, "grad_norm": 0.38829736193283754, "learning_rate": 2e-05, "loss": 5.6565, "step": 7878 }, { "epoch": 0.264240798188983, "grad_norm": 0.4123048338290554, "learning_rate": 2e-05, "loss": 5.4775, "step": 7879 }, { "epoch": 0.26427433554120905, "grad_norm": 0.4197980415069367, "learning_rate": 2e-05, "loss": 5.7564, "step": 7880 }, { "epoch": 0.26430787289343505, "grad_norm": 0.39813473255623827, "learning_rate": 2e-05, "loss": 5.6143, "step": 7881 }, { "epoch": 0.2643414102456611, "grad_norm": 0.4358526836306498, "learning_rate": 2e-05, "loss": 5.4784, "step": 7882 }, { "epoch": 0.26437494759788716, "grad_norm": 0.4142065346368484, "learning_rate": 2e-05, "loss": 5.4714, "step": 7883 }, { "epoch": 0.2644084849501132, "grad_norm": 0.3958858712531611, "learning_rate": 2e-05, "loss": 5.428, "step": 7884 }, { "epoch": 0.2644420223023392, "grad_norm": 0.390707799251104, "learning_rate": 2e-05, "loss": 5.6313, "step": 7885 }, { "epoch": 0.26447555965456526, "grad_norm": 0.4119997790797007, "learning_rate": 2e-05, "loss": 5.6621, "step": 7886 }, { "epoch": 0.2645090970067913, "grad_norm": 0.4084775880134557, "learning_rate": 2e-05, "loss": 5.5718, "step": 7887 }, { "epoch": 0.26454263435901737, "grad_norm": 0.39548167156542613, "learning_rate": 2e-05, "loss": 5.6117, "step": 7888 }, { "epoch": 0.2645761717112434, "grad_norm": 0.39156560525532097, "learning_rate": 2e-05, "loss": 5.6459, "step": 7889 }, { "epoch": 0.2646097090634694, "grad_norm": 0.43375705696921235, "learning_rate": 2e-05, "loss": 5.4859, "step": 7890 }, { "epoch": 0.2646432464156955, "grad_norm": 0.4202152199171607, "learning_rate": 2e-05, "loss": 5.4339, "step": 7891 }, { "epoch": 0.26467678376792153, "grad_norm": 0.40348995973371177, "learning_rate": 2e-05, "loss": 5.731, "step": 7892 }, { "epoch": 0.2647103211201476, "grad_norm": 0.45717816600576633, "learning_rate": 2e-05, "loss": 5.5847, "step": 7893 }, { "epoch": 0.2647438584723736, "grad_norm": 0.44734621228877924, "learning_rate": 2e-05, "loss": 5.5363, "step": 7894 }, { "epoch": 0.26477739582459964, "grad_norm": 0.3909940883436801, "learning_rate": 2e-05, "loss": 5.551, "step": 7895 }, { "epoch": 0.2648109331768257, "grad_norm": 0.44209672284997953, "learning_rate": 2e-05, "loss": 5.6264, "step": 7896 }, { "epoch": 0.26484447052905175, "grad_norm": 0.4333552803756077, "learning_rate": 2e-05, "loss": 5.5498, "step": 7897 }, { "epoch": 0.2648780078812778, "grad_norm": 0.43864490922491767, "learning_rate": 2e-05, "loss": 5.4986, "step": 7898 }, { "epoch": 0.2649115452335038, "grad_norm": 0.38393912326733765, "learning_rate": 2e-05, "loss": 5.5657, "step": 7899 }, { "epoch": 0.26494508258572985, "grad_norm": 0.48257859488528715, "learning_rate": 2e-05, "loss": 5.3374, "step": 7900 }, { "epoch": 0.2649786199379559, "grad_norm": 0.4381277753474094, "learning_rate": 2e-05, "loss": 5.5995, "step": 7901 }, { "epoch": 0.26501215729018196, "grad_norm": 0.4463445012034656, "learning_rate": 2e-05, "loss": 5.5011, "step": 7902 }, { "epoch": 0.26504569464240796, "grad_norm": 0.4183153980847296, "learning_rate": 2e-05, "loss": 5.4074, "step": 7903 }, { "epoch": 0.265079231994634, "grad_norm": 0.4702260866392888, "learning_rate": 2e-05, "loss": 5.5268, "step": 7904 }, { "epoch": 0.26511276934686007, "grad_norm": 0.40620818539448145, "learning_rate": 2e-05, "loss": 5.4605, "step": 7905 }, { "epoch": 0.2651463066990861, "grad_norm": 0.40316509188542243, "learning_rate": 2e-05, "loss": 5.6243, "step": 7906 }, { "epoch": 0.2651798440513122, "grad_norm": 0.41530916303500814, "learning_rate": 2e-05, "loss": 5.5741, "step": 7907 }, { "epoch": 0.2652133814035382, "grad_norm": 0.44740287438649273, "learning_rate": 2e-05, "loss": 5.6628, "step": 7908 }, { "epoch": 0.2652469187557642, "grad_norm": 0.4329649384829868, "learning_rate": 2e-05, "loss": 5.6832, "step": 7909 }, { "epoch": 0.2652804561079903, "grad_norm": 0.38416091037812666, "learning_rate": 2e-05, "loss": 5.4722, "step": 7910 }, { "epoch": 0.26531399346021634, "grad_norm": 0.41604613228840115, "learning_rate": 2e-05, "loss": 5.6004, "step": 7911 }, { "epoch": 0.26534753081244233, "grad_norm": 0.41624404893260136, "learning_rate": 2e-05, "loss": 5.3913, "step": 7912 }, { "epoch": 0.2653810681646684, "grad_norm": 0.4086998093139068, "learning_rate": 2e-05, "loss": 5.7429, "step": 7913 }, { "epoch": 0.26541460551689444, "grad_norm": 0.4355997282789697, "learning_rate": 2e-05, "loss": 5.6336, "step": 7914 }, { "epoch": 0.2654481428691205, "grad_norm": 0.4114388557592619, "learning_rate": 2e-05, "loss": 5.7219, "step": 7915 }, { "epoch": 0.26548168022134655, "grad_norm": 0.41185316463477817, "learning_rate": 2e-05, "loss": 5.7085, "step": 7916 }, { "epoch": 0.26551521757357255, "grad_norm": 0.3975239398211049, "learning_rate": 2e-05, "loss": 5.4894, "step": 7917 }, { "epoch": 0.2655487549257986, "grad_norm": 0.44233801140448525, "learning_rate": 2e-05, "loss": 5.4762, "step": 7918 }, { "epoch": 0.26558229227802466, "grad_norm": 0.44394538393375177, "learning_rate": 2e-05, "loss": 5.6648, "step": 7919 }, { "epoch": 0.2656158296302507, "grad_norm": 0.3951324697600354, "learning_rate": 2e-05, "loss": 5.4918, "step": 7920 }, { "epoch": 0.2656493669824767, "grad_norm": 0.42861348242663855, "learning_rate": 2e-05, "loss": 5.5397, "step": 7921 }, { "epoch": 0.26568290433470276, "grad_norm": 0.45312851056346315, "learning_rate": 2e-05, "loss": 5.4801, "step": 7922 }, { "epoch": 0.2657164416869288, "grad_norm": 0.41072287642643474, "learning_rate": 2e-05, "loss": 5.5035, "step": 7923 }, { "epoch": 0.26574997903915487, "grad_norm": 0.4029918464318737, "learning_rate": 2e-05, "loss": 5.6629, "step": 7924 }, { "epoch": 0.2657835163913809, "grad_norm": 0.42915697950312026, "learning_rate": 2e-05, "loss": 5.6484, "step": 7925 }, { "epoch": 0.2658170537436069, "grad_norm": 0.41329586612798724, "learning_rate": 2e-05, "loss": 5.3462, "step": 7926 }, { "epoch": 0.265850591095833, "grad_norm": 0.4323668638590562, "learning_rate": 2e-05, "loss": 5.6425, "step": 7927 }, { "epoch": 0.26588412844805903, "grad_norm": 0.37066358807652233, "learning_rate": 2e-05, "loss": 5.6947, "step": 7928 }, { "epoch": 0.2659176658002851, "grad_norm": 0.4160623283350826, "learning_rate": 2e-05, "loss": 5.3644, "step": 7929 }, { "epoch": 0.2659512031525111, "grad_norm": 0.43598205074170404, "learning_rate": 2e-05, "loss": 5.52, "step": 7930 }, { "epoch": 0.26598474050473714, "grad_norm": 0.3994922138270626, "learning_rate": 2e-05, "loss": 5.63, "step": 7931 }, { "epoch": 0.2660182778569632, "grad_norm": 0.4206009005303026, "learning_rate": 2e-05, "loss": 5.5978, "step": 7932 }, { "epoch": 0.26605181520918925, "grad_norm": 0.4345986332804542, "learning_rate": 2e-05, "loss": 5.3961, "step": 7933 }, { "epoch": 0.2660853525614153, "grad_norm": 0.4451175844747589, "learning_rate": 2e-05, "loss": 5.3705, "step": 7934 }, { "epoch": 0.2661188899136413, "grad_norm": 0.40307958852579384, "learning_rate": 2e-05, "loss": 5.3722, "step": 7935 }, { "epoch": 0.26615242726586735, "grad_norm": 0.4210983183161427, "learning_rate": 2e-05, "loss": 5.516, "step": 7936 }, { "epoch": 0.2661859646180934, "grad_norm": 0.4088643884520903, "learning_rate": 2e-05, "loss": 5.6318, "step": 7937 }, { "epoch": 0.26621950197031946, "grad_norm": 0.41880564229172035, "learning_rate": 2e-05, "loss": 5.586, "step": 7938 }, { "epoch": 0.26625303932254546, "grad_norm": 0.416555737127255, "learning_rate": 2e-05, "loss": 5.4041, "step": 7939 }, { "epoch": 0.2662865766747715, "grad_norm": 0.40429821479140504, "learning_rate": 2e-05, "loss": 5.292, "step": 7940 }, { "epoch": 0.26632011402699757, "grad_norm": 0.39293312469603603, "learning_rate": 2e-05, "loss": 5.5902, "step": 7941 }, { "epoch": 0.2663536513792236, "grad_norm": 0.4269527289971546, "learning_rate": 2e-05, "loss": 5.5512, "step": 7942 }, { "epoch": 0.2663871887314497, "grad_norm": 0.4193548038283657, "learning_rate": 2e-05, "loss": 5.6291, "step": 7943 }, { "epoch": 0.2664207260836757, "grad_norm": 0.3702686980304952, "learning_rate": 2e-05, "loss": 5.4199, "step": 7944 }, { "epoch": 0.26645426343590173, "grad_norm": 0.40821227016358674, "learning_rate": 2e-05, "loss": 5.2303, "step": 7945 }, { "epoch": 0.2664878007881278, "grad_norm": 0.4147593065644731, "learning_rate": 2e-05, "loss": 5.3651, "step": 7946 }, { "epoch": 0.26652133814035384, "grad_norm": 0.40975932777059937, "learning_rate": 2e-05, "loss": 5.5612, "step": 7947 }, { "epoch": 0.26655487549257983, "grad_norm": 0.39522371160708986, "learning_rate": 2e-05, "loss": 5.7081, "step": 7948 }, { "epoch": 0.2665884128448059, "grad_norm": 0.44487114349209533, "learning_rate": 2e-05, "loss": 5.4568, "step": 7949 }, { "epoch": 0.26662195019703194, "grad_norm": 0.38967623710060423, "learning_rate": 2e-05, "loss": 5.5009, "step": 7950 }, { "epoch": 0.266655487549258, "grad_norm": 0.39748797015798937, "learning_rate": 2e-05, "loss": 5.3832, "step": 7951 }, { "epoch": 0.26668902490148405, "grad_norm": 0.4050258289935741, "learning_rate": 2e-05, "loss": 5.4331, "step": 7952 }, { "epoch": 0.26672256225371005, "grad_norm": 0.4114943179343298, "learning_rate": 2e-05, "loss": 5.5431, "step": 7953 }, { "epoch": 0.2667560996059361, "grad_norm": 0.4353878525247504, "learning_rate": 2e-05, "loss": 5.5191, "step": 7954 }, { "epoch": 0.26678963695816216, "grad_norm": 0.41193064949966024, "learning_rate": 2e-05, "loss": 5.592, "step": 7955 }, { "epoch": 0.2668231743103882, "grad_norm": 0.4312406724535568, "learning_rate": 2e-05, "loss": 5.6491, "step": 7956 }, { "epoch": 0.2668567116626142, "grad_norm": 0.4010797370221922, "learning_rate": 2e-05, "loss": 5.5987, "step": 7957 }, { "epoch": 0.26689024901484026, "grad_norm": 0.44890399658003277, "learning_rate": 2e-05, "loss": 5.6707, "step": 7958 }, { "epoch": 0.2669237863670663, "grad_norm": 0.3985044411767856, "learning_rate": 2e-05, "loss": 5.5072, "step": 7959 }, { "epoch": 0.26695732371929237, "grad_norm": 0.4605803884166703, "learning_rate": 2e-05, "loss": 5.5964, "step": 7960 }, { "epoch": 0.2669908610715184, "grad_norm": 0.4512904744979873, "learning_rate": 2e-05, "loss": 5.6807, "step": 7961 }, { "epoch": 0.2670243984237444, "grad_norm": 0.42032858832459147, "learning_rate": 2e-05, "loss": 5.5797, "step": 7962 }, { "epoch": 0.2670579357759705, "grad_norm": 0.43406683231843624, "learning_rate": 2e-05, "loss": 5.4543, "step": 7963 }, { "epoch": 0.26709147312819653, "grad_norm": 0.4010784238946041, "learning_rate": 2e-05, "loss": 5.5216, "step": 7964 }, { "epoch": 0.2671250104804226, "grad_norm": 0.4218960843078834, "learning_rate": 2e-05, "loss": 5.632, "step": 7965 }, { "epoch": 0.2671585478326486, "grad_norm": 0.40574143316268846, "learning_rate": 2e-05, "loss": 5.4176, "step": 7966 }, { "epoch": 0.26719208518487464, "grad_norm": 0.4082646444825693, "learning_rate": 2e-05, "loss": 5.6293, "step": 7967 }, { "epoch": 0.2672256225371007, "grad_norm": 0.43005032568050344, "learning_rate": 2e-05, "loss": 5.5548, "step": 7968 }, { "epoch": 0.26725915988932675, "grad_norm": 0.4239815978090803, "learning_rate": 2e-05, "loss": 5.6678, "step": 7969 }, { "epoch": 0.2672926972415528, "grad_norm": 0.4335478913147225, "learning_rate": 2e-05, "loss": 5.4912, "step": 7970 }, { "epoch": 0.2673262345937788, "grad_norm": 0.40321334403997056, "learning_rate": 2e-05, "loss": 5.6269, "step": 7971 }, { "epoch": 0.26735977194600485, "grad_norm": 0.41411483706379265, "learning_rate": 2e-05, "loss": 5.3817, "step": 7972 }, { "epoch": 0.2673933092982309, "grad_norm": 0.4261373729616435, "learning_rate": 2e-05, "loss": 5.4934, "step": 7973 }, { "epoch": 0.26742684665045696, "grad_norm": 0.45318469933514677, "learning_rate": 2e-05, "loss": 5.5352, "step": 7974 }, { "epoch": 0.26746038400268296, "grad_norm": 0.4197186778000799, "learning_rate": 2e-05, "loss": 5.3008, "step": 7975 }, { "epoch": 0.267493921354909, "grad_norm": 0.4606028786227758, "learning_rate": 2e-05, "loss": 5.6834, "step": 7976 }, { "epoch": 0.26752745870713507, "grad_norm": 0.40076302967203353, "learning_rate": 2e-05, "loss": 5.5573, "step": 7977 }, { "epoch": 0.2675609960593611, "grad_norm": 0.4192230505086319, "learning_rate": 2e-05, "loss": 5.6109, "step": 7978 }, { "epoch": 0.2675945334115872, "grad_norm": 0.3905529275646823, "learning_rate": 2e-05, "loss": 5.4869, "step": 7979 }, { "epoch": 0.2676280707638132, "grad_norm": 0.4027585519778622, "learning_rate": 2e-05, "loss": 5.5791, "step": 7980 }, { "epoch": 0.26766160811603923, "grad_norm": 0.4451543467080938, "learning_rate": 2e-05, "loss": 5.5381, "step": 7981 }, { "epoch": 0.2676951454682653, "grad_norm": 0.40294173399932726, "learning_rate": 2e-05, "loss": 5.7671, "step": 7982 }, { "epoch": 0.26772868282049134, "grad_norm": 0.3904327877141427, "learning_rate": 2e-05, "loss": 5.6082, "step": 7983 }, { "epoch": 0.2677622201727174, "grad_norm": 0.4264284596646759, "learning_rate": 2e-05, "loss": 5.6461, "step": 7984 }, { "epoch": 0.2677957575249434, "grad_norm": 0.43049211112332125, "learning_rate": 2e-05, "loss": 5.6345, "step": 7985 }, { "epoch": 0.26782929487716944, "grad_norm": 0.4301969071310566, "learning_rate": 2e-05, "loss": 5.6447, "step": 7986 }, { "epoch": 0.2678628322293955, "grad_norm": 0.43883464919969334, "learning_rate": 2e-05, "loss": 5.6738, "step": 7987 }, { "epoch": 0.26789636958162155, "grad_norm": 0.4289532148479637, "learning_rate": 2e-05, "loss": 5.6103, "step": 7988 }, { "epoch": 0.26792990693384755, "grad_norm": 0.40488724328377307, "learning_rate": 2e-05, "loss": 5.6056, "step": 7989 }, { "epoch": 0.2679634442860736, "grad_norm": 0.4162333466886629, "learning_rate": 2e-05, "loss": 5.5856, "step": 7990 }, { "epoch": 0.26799698163829966, "grad_norm": 0.41221101918198116, "learning_rate": 2e-05, "loss": 5.4471, "step": 7991 }, { "epoch": 0.2680305189905257, "grad_norm": 0.41854104273890497, "learning_rate": 2e-05, "loss": 5.6361, "step": 7992 }, { "epoch": 0.26806405634275177, "grad_norm": 0.4163942504486926, "learning_rate": 2e-05, "loss": 5.5323, "step": 7993 }, { "epoch": 0.26809759369497776, "grad_norm": 0.40877559166906335, "learning_rate": 2e-05, "loss": 5.603, "step": 7994 }, { "epoch": 0.2681311310472038, "grad_norm": 0.410749046757467, "learning_rate": 2e-05, "loss": 5.5773, "step": 7995 }, { "epoch": 0.2681646683994299, "grad_norm": 0.41500443319044517, "learning_rate": 2e-05, "loss": 5.4796, "step": 7996 }, { "epoch": 0.2681982057516559, "grad_norm": 0.4004170657087304, "learning_rate": 2e-05, "loss": 5.4183, "step": 7997 }, { "epoch": 0.2682317431038819, "grad_norm": 0.4061400908035317, "learning_rate": 2e-05, "loss": 5.6658, "step": 7998 }, { "epoch": 0.268265280456108, "grad_norm": 0.4008662749185075, "learning_rate": 2e-05, "loss": 5.5795, "step": 7999 }, { "epoch": 0.26829881780833403, "grad_norm": 0.40161355946021365, "learning_rate": 2e-05, "loss": 5.5677, "step": 8000 }, { "epoch": 0.2683323551605601, "grad_norm": 0.4026708645062183, "learning_rate": 2e-05, "loss": 5.4835, "step": 8001 }, { "epoch": 0.26836589251278614, "grad_norm": 0.39703093179577414, "learning_rate": 2e-05, "loss": 5.3209, "step": 8002 }, { "epoch": 0.26839942986501214, "grad_norm": 0.3952814180747127, "learning_rate": 2e-05, "loss": 5.3506, "step": 8003 }, { "epoch": 0.2684329672172382, "grad_norm": 0.39526527438204784, "learning_rate": 2e-05, "loss": 5.4584, "step": 8004 }, { "epoch": 0.26846650456946425, "grad_norm": 0.42233622101052276, "learning_rate": 2e-05, "loss": 5.6375, "step": 8005 }, { "epoch": 0.2685000419216903, "grad_norm": 0.39989761147920005, "learning_rate": 2e-05, "loss": 5.3378, "step": 8006 }, { "epoch": 0.2685335792739163, "grad_norm": 0.3822838671006359, "learning_rate": 2e-05, "loss": 5.4342, "step": 8007 }, { "epoch": 0.26856711662614235, "grad_norm": 0.41094772125419493, "learning_rate": 2e-05, "loss": 5.5881, "step": 8008 }, { "epoch": 0.2686006539783684, "grad_norm": 0.39117689495360697, "learning_rate": 2e-05, "loss": 5.5869, "step": 8009 }, { "epoch": 0.26863419133059446, "grad_norm": 0.41864064134693235, "learning_rate": 2e-05, "loss": 5.348, "step": 8010 }, { "epoch": 0.2686677286828205, "grad_norm": 0.4341462901312165, "learning_rate": 2e-05, "loss": 5.3839, "step": 8011 }, { "epoch": 0.2687012660350465, "grad_norm": 0.40431342842561674, "learning_rate": 2e-05, "loss": 5.5444, "step": 8012 }, { "epoch": 0.26873480338727257, "grad_norm": 0.43346364107415, "learning_rate": 2e-05, "loss": 5.739, "step": 8013 }, { "epoch": 0.2687683407394986, "grad_norm": 0.4051083318184174, "learning_rate": 2e-05, "loss": 5.6544, "step": 8014 }, { "epoch": 0.2688018780917247, "grad_norm": 0.4079908918587191, "learning_rate": 2e-05, "loss": 5.6116, "step": 8015 }, { "epoch": 0.2688354154439507, "grad_norm": 0.4373550026422337, "learning_rate": 2e-05, "loss": 5.4992, "step": 8016 }, { "epoch": 0.26886895279617673, "grad_norm": 0.4033014212118601, "learning_rate": 2e-05, "loss": 5.3689, "step": 8017 }, { "epoch": 0.2689024901484028, "grad_norm": 0.3926166908805169, "learning_rate": 2e-05, "loss": 5.578, "step": 8018 }, { "epoch": 0.26893602750062884, "grad_norm": 0.3956467793666182, "learning_rate": 2e-05, "loss": 5.6929, "step": 8019 }, { "epoch": 0.2689695648528549, "grad_norm": 0.3853504400800167, "learning_rate": 2e-05, "loss": 5.3753, "step": 8020 }, { "epoch": 0.2690031022050809, "grad_norm": 0.40359761730234434, "learning_rate": 2e-05, "loss": 5.5582, "step": 8021 }, { "epoch": 0.26903663955730694, "grad_norm": 0.40636292366672816, "learning_rate": 2e-05, "loss": 5.6212, "step": 8022 }, { "epoch": 0.269070176909533, "grad_norm": 0.40666271960756284, "learning_rate": 2e-05, "loss": 5.5991, "step": 8023 }, { "epoch": 0.26910371426175905, "grad_norm": 0.39619382682981125, "learning_rate": 2e-05, "loss": 5.5783, "step": 8024 }, { "epoch": 0.26913725161398505, "grad_norm": 0.38818364700509483, "learning_rate": 2e-05, "loss": 5.7162, "step": 8025 }, { "epoch": 0.2691707889662111, "grad_norm": 0.40629960656964886, "learning_rate": 2e-05, "loss": 5.6653, "step": 8026 }, { "epoch": 0.26920432631843716, "grad_norm": 0.4024033322595997, "learning_rate": 2e-05, "loss": 5.6759, "step": 8027 }, { "epoch": 0.2692378636706632, "grad_norm": 0.40519816434337264, "learning_rate": 2e-05, "loss": 5.4287, "step": 8028 }, { "epoch": 0.26927140102288927, "grad_norm": 0.4009228484664486, "learning_rate": 2e-05, "loss": 5.7752, "step": 8029 }, { "epoch": 0.26930493837511527, "grad_norm": 0.39744191107030735, "learning_rate": 2e-05, "loss": 5.5187, "step": 8030 }, { "epoch": 0.2693384757273413, "grad_norm": 0.37759463701579704, "learning_rate": 2e-05, "loss": 5.3721, "step": 8031 }, { "epoch": 0.2693720130795674, "grad_norm": 0.39553007226621806, "learning_rate": 2e-05, "loss": 5.4449, "step": 8032 }, { "epoch": 0.26940555043179343, "grad_norm": 0.39198615493210354, "learning_rate": 2e-05, "loss": 5.4961, "step": 8033 }, { "epoch": 0.2694390877840194, "grad_norm": 0.3976983309824223, "learning_rate": 2e-05, "loss": 5.3877, "step": 8034 }, { "epoch": 0.2694726251362455, "grad_norm": 0.40272026022066715, "learning_rate": 2e-05, "loss": 5.6043, "step": 8035 }, { "epoch": 0.26950616248847153, "grad_norm": 0.40416711288272855, "learning_rate": 2e-05, "loss": 5.4701, "step": 8036 }, { "epoch": 0.2695396998406976, "grad_norm": 0.39799875814993013, "learning_rate": 2e-05, "loss": 5.5156, "step": 8037 }, { "epoch": 0.26957323719292364, "grad_norm": 0.4298484406874756, "learning_rate": 2e-05, "loss": 5.4554, "step": 8038 }, { "epoch": 0.26960677454514964, "grad_norm": 0.4229849403111361, "learning_rate": 2e-05, "loss": 5.5981, "step": 8039 }, { "epoch": 0.2696403118973757, "grad_norm": 0.3937143967501352, "learning_rate": 2e-05, "loss": 5.6758, "step": 8040 }, { "epoch": 0.26967384924960175, "grad_norm": 0.4129965880127931, "learning_rate": 2e-05, "loss": 5.6512, "step": 8041 }, { "epoch": 0.2697073866018278, "grad_norm": 0.4312354546949759, "learning_rate": 2e-05, "loss": 5.3451, "step": 8042 }, { "epoch": 0.2697409239540538, "grad_norm": 0.4255220376611984, "learning_rate": 2e-05, "loss": 5.6651, "step": 8043 }, { "epoch": 0.26977446130627986, "grad_norm": 0.4165608401246821, "learning_rate": 2e-05, "loss": 5.514, "step": 8044 }, { "epoch": 0.2698079986585059, "grad_norm": 0.4354328369387444, "learning_rate": 2e-05, "loss": 5.6976, "step": 8045 }, { "epoch": 0.26984153601073196, "grad_norm": 0.4401063347361363, "learning_rate": 2e-05, "loss": 5.61, "step": 8046 }, { "epoch": 0.269875073362958, "grad_norm": 0.4032972153134518, "learning_rate": 2e-05, "loss": 5.6008, "step": 8047 }, { "epoch": 0.269908610715184, "grad_norm": 0.39476069615040404, "learning_rate": 2e-05, "loss": 5.5108, "step": 8048 }, { "epoch": 0.26994214806741007, "grad_norm": 0.42497107070285955, "learning_rate": 2e-05, "loss": 5.594, "step": 8049 }, { "epoch": 0.2699756854196361, "grad_norm": 0.4067647265988586, "learning_rate": 2e-05, "loss": 5.5353, "step": 8050 }, { "epoch": 0.2700092227718622, "grad_norm": 0.3942885648809105, "learning_rate": 2e-05, "loss": 5.4114, "step": 8051 }, { "epoch": 0.2700427601240882, "grad_norm": 0.43303919253377804, "learning_rate": 2e-05, "loss": 5.3804, "step": 8052 }, { "epoch": 0.27007629747631423, "grad_norm": 0.44574247450159515, "learning_rate": 2e-05, "loss": 5.4863, "step": 8053 }, { "epoch": 0.2701098348285403, "grad_norm": 0.40979970348658457, "learning_rate": 2e-05, "loss": 5.5675, "step": 8054 }, { "epoch": 0.27014337218076634, "grad_norm": 0.40160886878511487, "learning_rate": 2e-05, "loss": 5.3412, "step": 8055 }, { "epoch": 0.2701769095329924, "grad_norm": 0.43182405844475097, "learning_rate": 2e-05, "loss": 5.6273, "step": 8056 }, { "epoch": 0.2702104468852184, "grad_norm": 0.420115021971731, "learning_rate": 2e-05, "loss": 5.5664, "step": 8057 }, { "epoch": 0.27024398423744445, "grad_norm": 0.3872156322663994, "learning_rate": 2e-05, "loss": 5.3949, "step": 8058 }, { "epoch": 0.2702775215896705, "grad_norm": 0.44753639128458717, "learning_rate": 2e-05, "loss": 5.7357, "step": 8059 }, { "epoch": 0.27031105894189655, "grad_norm": 0.4390242166854914, "learning_rate": 2e-05, "loss": 5.7064, "step": 8060 }, { "epoch": 0.27034459629412255, "grad_norm": 0.40383586871350285, "learning_rate": 2e-05, "loss": 5.4979, "step": 8061 }, { "epoch": 0.2703781336463486, "grad_norm": 0.4259200920597373, "learning_rate": 2e-05, "loss": 5.4987, "step": 8062 }, { "epoch": 0.27041167099857466, "grad_norm": 0.44350909486101997, "learning_rate": 2e-05, "loss": 5.527, "step": 8063 }, { "epoch": 0.2704452083508007, "grad_norm": 0.41604382226962755, "learning_rate": 2e-05, "loss": 5.5995, "step": 8064 }, { "epoch": 0.27047874570302677, "grad_norm": 0.4035398064214201, "learning_rate": 2e-05, "loss": 5.5231, "step": 8065 }, { "epoch": 0.27051228305525277, "grad_norm": 0.39485590784652214, "learning_rate": 2e-05, "loss": 5.8214, "step": 8066 }, { "epoch": 0.2705458204074788, "grad_norm": 0.42600901546441045, "learning_rate": 2e-05, "loss": 5.5503, "step": 8067 }, { "epoch": 0.2705793577597049, "grad_norm": 0.4057764427136664, "learning_rate": 2e-05, "loss": 5.6395, "step": 8068 }, { "epoch": 0.27061289511193093, "grad_norm": 0.42828491405289715, "learning_rate": 2e-05, "loss": 5.5591, "step": 8069 }, { "epoch": 0.2706464324641569, "grad_norm": 0.40667546482912525, "learning_rate": 2e-05, "loss": 5.591, "step": 8070 }, { "epoch": 0.270679969816383, "grad_norm": 0.41899950691094245, "learning_rate": 2e-05, "loss": 5.4153, "step": 8071 }, { "epoch": 0.27071350716860904, "grad_norm": 0.40213144743333046, "learning_rate": 2e-05, "loss": 5.6469, "step": 8072 }, { "epoch": 0.2707470445208351, "grad_norm": 0.3940041463799073, "learning_rate": 2e-05, "loss": 5.5051, "step": 8073 }, { "epoch": 0.27078058187306114, "grad_norm": 0.42452104521514106, "learning_rate": 2e-05, "loss": 5.5551, "step": 8074 }, { "epoch": 0.27081411922528714, "grad_norm": 0.44954753674839737, "learning_rate": 2e-05, "loss": 5.4857, "step": 8075 }, { "epoch": 0.2708476565775132, "grad_norm": 0.4005717158585168, "learning_rate": 2e-05, "loss": 5.3528, "step": 8076 }, { "epoch": 0.27088119392973925, "grad_norm": 0.43989748741242707, "learning_rate": 2e-05, "loss": 5.6562, "step": 8077 }, { "epoch": 0.2709147312819653, "grad_norm": 0.4075759332919316, "learning_rate": 2e-05, "loss": 5.5657, "step": 8078 }, { "epoch": 0.27094826863419136, "grad_norm": 0.39309753145818, "learning_rate": 2e-05, "loss": 5.6135, "step": 8079 }, { "epoch": 0.27098180598641736, "grad_norm": 0.396822320600008, "learning_rate": 2e-05, "loss": 5.6605, "step": 8080 }, { "epoch": 0.2710153433386434, "grad_norm": 0.39468511754688723, "learning_rate": 2e-05, "loss": 5.4466, "step": 8081 }, { "epoch": 0.27104888069086946, "grad_norm": 0.394732094026696, "learning_rate": 2e-05, "loss": 5.527, "step": 8082 }, { "epoch": 0.2710824180430955, "grad_norm": 0.42155598941102396, "learning_rate": 2e-05, "loss": 5.607, "step": 8083 }, { "epoch": 0.2711159553953215, "grad_norm": 0.421225719987517, "learning_rate": 2e-05, "loss": 5.5322, "step": 8084 }, { "epoch": 0.27114949274754757, "grad_norm": 0.406921521894606, "learning_rate": 2e-05, "loss": 5.6762, "step": 8085 }, { "epoch": 0.2711830300997736, "grad_norm": 0.4179914768082538, "learning_rate": 2e-05, "loss": 5.4546, "step": 8086 }, { "epoch": 0.2712165674519997, "grad_norm": 0.39135531202103474, "learning_rate": 2e-05, "loss": 5.6686, "step": 8087 }, { "epoch": 0.27125010480422573, "grad_norm": 0.41132939604266977, "learning_rate": 2e-05, "loss": 5.6272, "step": 8088 }, { "epoch": 0.27128364215645173, "grad_norm": 0.45741284316540515, "learning_rate": 2e-05, "loss": 5.6178, "step": 8089 }, { "epoch": 0.2713171795086778, "grad_norm": 0.3986412394826346, "learning_rate": 2e-05, "loss": 5.4977, "step": 8090 }, { "epoch": 0.27135071686090384, "grad_norm": 0.4034516529513605, "learning_rate": 2e-05, "loss": 5.212, "step": 8091 }, { "epoch": 0.2713842542131299, "grad_norm": 0.41313983171295365, "learning_rate": 2e-05, "loss": 5.6448, "step": 8092 }, { "epoch": 0.2714177915653559, "grad_norm": 0.4065215254316753, "learning_rate": 2e-05, "loss": 5.4523, "step": 8093 }, { "epoch": 0.27145132891758195, "grad_norm": 0.4094337601134091, "learning_rate": 2e-05, "loss": 5.4829, "step": 8094 }, { "epoch": 0.271484866269808, "grad_norm": 0.4178419973513982, "learning_rate": 2e-05, "loss": 5.7019, "step": 8095 }, { "epoch": 0.27151840362203405, "grad_norm": 0.4179070371418559, "learning_rate": 2e-05, "loss": 5.3836, "step": 8096 }, { "epoch": 0.2715519409742601, "grad_norm": 0.43427251521173005, "learning_rate": 2e-05, "loss": 5.661, "step": 8097 }, { "epoch": 0.2715854783264861, "grad_norm": 0.41255375528223304, "learning_rate": 2e-05, "loss": 5.5311, "step": 8098 }, { "epoch": 0.27161901567871216, "grad_norm": 0.42266323559967417, "learning_rate": 2e-05, "loss": 5.5412, "step": 8099 }, { "epoch": 0.2716525530309382, "grad_norm": 0.39358914780627374, "learning_rate": 2e-05, "loss": 5.6416, "step": 8100 }, { "epoch": 0.27168609038316427, "grad_norm": 0.44222474533941286, "learning_rate": 2e-05, "loss": 5.3983, "step": 8101 }, { "epoch": 0.27171962773539027, "grad_norm": 0.4177166803163321, "learning_rate": 2e-05, "loss": 5.5371, "step": 8102 }, { "epoch": 0.2717531650876163, "grad_norm": 0.42175799361297445, "learning_rate": 2e-05, "loss": 5.5771, "step": 8103 }, { "epoch": 0.2717867024398424, "grad_norm": 0.41335811635021436, "learning_rate": 2e-05, "loss": 5.6159, "step": 8104 }, { "epoch": 0.27182023979206843, "grad_norm": 0.41513743209660403, "learning_rate": 2e-05, "loss": 5.5478, "step": 8105 }, { "epoch": 0.2718537771442945, "grad_norm": 0.4084278403078434, "learning_rate": 2e-05, "loss": 5.4463, "step": 8106 }, { "epoch": 0.2718873144965205, "grad_norm": 0.38920309310169227, "learning_rate": 2e-05, "loss": 5.5759, "step": 8107 }, { "epoch": 0.27192085184874654, "grad_norm": 0.42498697871485275, "learning_rate": 2e-05, "loss": 5.6733, "step": 8108 }, { "epoch": 0.2719543892009726, "grad_norm": 0.42095344903523974, "learning_rate": 2e-05, "loss": 5.5737, "step": 8109 }, { "epoch": 0.27198792655319864, "grad_norm": 0.4065242524280315, "learning_rate": 2e-05, "loss": 5.579, "step": 8110 }, { "epoch": 0.27202146390542464, "grad_norm": 0.4128171578923302, "learning_rate": 2e-05, "loss": 5.5229, "step": 8111 }, { "epoch": 0.2720550012576507, "grad_norm": 0.40021581980028126, "learning_rate": 2e-05, "loss": 5.7801, "step": 8112 }, { "epoch": 0.27208853860987675, "grad_norm": 0.40167426589884253, "learning_rate": 2e-05, "loss": 5.4947, "step": 8113 }, { "epoch": 0.2721220759621028, "grad_norm": 0.40990725932132227, "learning_rate": 2e-05, "loss": 5.5913, "step": 8114 }, { "epoch": 0.27215561331432886, "grad_norm": 0.39595712456608845, "learning_rate": 2e-05, "loss": 5.4043, "step": 8115 }, { "epoch": 0.27218915066655486, "grad_norm": 0.40046803292467004, "learning_rate": 2e-05, "loss": 5.6, "step": 8116 }, { "epoch": 0.2722226880187809, "grad_norm": 0.40762968120920917, "learning_rate": 2e-05, "loss": 5.5416, "step": 8117 }, { "epoch": 0.27225622537100697, "grad_norm": 0.42593481657729043, "learning_rate": 2e-05, "loss": 5.6127, "step": 8118 }, { "epoch": 0.272289762723233, "grad_norm": 0.4025788893195396, "learning_rate": 2e-05, "loss": 5.4286, "step": 8119 }, { "epoch": 0.272323300075459, "grad_norm": 0.3946537663066472, "learning_rate": 2e-05, "loss": 5.5281, "step": 8120 }, { "epoch": 0.2723568374276851, "grad_norm": 0.4440274751623818, "learning_rate": 2e-05, "loss": 5.6563, "step": 8121 }, { "epoch": 0.2723903747799111, "grad_norm": 0.432194481059124, "learning_rate": 2e-05, "loss": 5.322, "step": 8122 }, { "epoch": 0.2724239121321372, "grad_norm": 0.42249080021643365, "learning_rate": 2e-05, "loss": 5.5669, "step": 8123 }, { "epoch": 0.27245744948436323, "grad_norm": 0.4119232058611118, "learning_rate": 2e-05, "loss": 5.7846, "step": 8124 }, { "epoch": 0.27249098683658923, "grad_norm": 0.43538513795243483, "learning_rate": 2e-05, "loss": 5.6242, "step": 8125 }, { "epoch": 0.2725245241888153, "grad_norm": 0.4359985554318899, "learning_rate": 2e-05, "loss": 5.5828, "step": 8126 }, { "epoch": 0.27255806154104134, "grad_norm": 0.45373568622766564, "learning_rate": 2e-05, "loss": 5.4978, "step": 8127 }, { "epoch": 0.2725915988932674, "grad_norm": 0.3970593606246072, "learning_rate": 2e-05, "loss": 5.5801, "step": 8128 }, { "epoch": 0.2726251362454934, "grad_norm": 0.4235075595098674, "learning_rate": 2e-05, "loss": 5.4806, "step": 8129 }, { "epoch": 0.27265867359771945, "grad_norm": 0.461706190563639, "learning_rate": 2e-05, "loss": 5.5235, "step": 8130 }, { "epoch": 0.2726922109499455, "grad_norm": 0.4203499590890807, "learning_rate": 2e-05, "loss": 5.5442, "step": 8131 }, { "epoch": 0.27272574830217156, "grad_norm": 0.4151132491782214, "learning_rate": 2e-05, "loss": 5.599, "step": 8132 }, { "epoch": 0.2727592856543976, "grad_norm": 0.4038349099466041, "learning_rate": 2e-05, "loss": 5.6152, "step": 8133 }, { "epoch": 0.2727928230066236, "grad_norm": 0.41195789329936605, "learning_rate": 2e-05, "loss": 5.4742, "step": 8134 }, { "epoch": 0.27282636035884966, "grad_norm": 0.42300416348767456, "learning_rate": 2e-05, "loss": 5.533, "step": 8135 }, { "epoch": 0.2728598977110757, "grad_norm": 0.41406692117332244, "learning_rate": 2e-05, "loss": 5.4614, "step": 8136 }, { "epoch": 0.27289343506330177, "grad_norm": 0.42331148129448937, "learning_rate": 2e-05, "loss": 5.4066, "step": 8137 }, { "epoch": 0.27292697241552777, "grad_norm": 0.4230700491989063, "learning_rate": 2e-05, "loss": 5.426, "step": 8138 }, { "epoch": 0.2729605097677538, "grad_norm": 0.4093425048870177, "learning_rate": 2e-05, "loss": 5.5352, "step": 8139 }, { "epoch": 0.2729940471199799, "grad_norm": 0.4329727254601788, "learning_rate": 2e-05, "loss": 5.492, "step": 8140 }, { "epoch": 0.27302758447220593, "grad_norm": 0.4286778965117736, "learning_rate": 2e-05, "loss": 5.5033, "step": 8141 }, { "epoch": 0.273061121824432, "grad_norm": 0.42399263796221387, "learning_rate": 2e-05, "loss": 5.4982, "step": 8142 }, { "epoch": 0.273094659176658, "grad_norm": 0.41615904479775817, "learning_rate": 2e-05, "loss": 5.4592, "step": 8143 }, { "epoch": 0.27312819652888404, "grad_norm": 0.4127631814867339, "learning_rate": 2e-05, "loss": 5.6209, "step": 8144 }, { "epoch": 0.2731617338811101, "grad_norm": 0.414751826936659, "learning_rate": 2e-05, "loss": 5.6722, "step": 8145 }, { "epoch": 0.27319527123333615, "grad_norm": 0.40445964119946143, "learning_rate": 2e-05, "loss": 5.4419, "step": 8146 }, { "epoch": 0.27322880858556214, "grad_norm": 0.4239291022703543, "learning_rate": 2e-05, "loss": 5.7013, "step": 8147 }, { "epoch": 0.2732623459377882, "grad_norm": 0.45130885137308757, "learning_rate": 2e-05, "loss": 5.7794, "step": 8148 }, { "epoch": 0.27329588329001425, "grad_norm": 0.43453990219789723, "learning_rate": 2e-05, "loss": 5.6195, "step": 8149 }, { "epoch": 0.2733294206422403, "grad_norm": 0.3913315121662435, "learning_rate": 2e-05, "loss": 5.4837, "step": 8150 }, { "epoch": 0.27336295799446636, "grad_norm": 0.49437021169220646, "learning_rate": 2e-05, "loss": 5.6703, "step": 8151 }, { "epoch": 0.27339649534669236, "grad_norm": 0.44441606024803476, "learning_rate": 2e-05, "loss": 5.5208, "step": 8152 }, { "epoch": 0.2734300326989184, "grad_norm": 0.4330675575380129, "learning_rate": 2e-05, "loss": 5.5755, "step": 8153 }, { "epoch": 0.27346357005114447, "grad_norm": 0.40505618328713383, "learning_rate": 2e-05, "loss": 5.5691, "step": 8154 }, { "epoch": 0.2734971074033705, "grad_norm": 0.37466591887591155, "learning_rate": 2e-05, "loss": 5.7109, "step": 8155 }, { "epoch": 0.2735306447555965, "grad_norm": 0.4115339169888913, "learning_rate": 2e-05, "loss": 5.6589, "step": 8156 }, { "epoch": 0.2735641821078226, "grad_norm": 0.3901011964150718, "learning_rate": 2e-05, "loss": 5.5609, "step": 8157 }, { "epoch": 0.2735977194600486, "grad_norm": 0.41769022567167374, "learning_rate": 2e-05, "loss": 5.5149, "step": 8158 }, { "epoch": 0.2736312568122747, "grad_norm": 0.39607475349395743, "learning_rate": 2e-05, "loss": 5.6635, "step": 8159 }, { "epoch": 0.27366479416450074, "grad_norm": 0.39020149837799556, "learning_rate": 2e-05, "loss": 5.3654, "step": 8160 }, { "epoch": 0.27369833151672673, "grad_norm": 0.39805205836817487, "learning_rate": 2e-05, "loss": 5.7445, "step": 8161 }, { "epoch": 0.2737318688689528, "grad_norm": 0.3757261989005731, "learning_rate": 2e-05, "loss": 5.6611, "step": 8162 }, { "epoch": 0.27376540622117884, "grad_norm": 0.3865840179064943, "learning_rate": 2e-05, "loss": 5.473, "step": 8163 }, { "epoch": 0.2737989435734049, "grad_norm": 0.40589751277109803, "learning_rate": 2e-05, "loss": 5.4903, "step": 8164 }, { "epoch": 0.2738324809256309, "grad_norm": 0.38520114020395296, "learning_rate": 2e-05, "loss": 5.2132, "step": 8165 }, { "epoch": 0.27386601827785695, "grad_norm": 0.40350062638819567, "learning_rate": 2e-05, "loss": 5.6696, "step": 8166 }, { "epoch": 0.273899555630083, "grad_norm": 0.4163256764122913, "learning_rate": 2e-05, "loss": 5.8773, "step": 8167 }, { "epoch": 0.27393309298230906, "grad_norm": 0.40110016142137894, "learning_rate": 2e-05, "loss": 5.5815, "step": 8168 }, { "epoch": 0.2739666303345351, "grad_norm": 0.4237212488276908, "learning_rate": 2e-05, "loss": 5.5642, "step": 8169 }, { "epoch": 0.2740001676867611, "grad_norm": 0.44241460132845223, "learning_rate": 2e-05, "loss": 5.5908, "step": 8170 }, { "epoch": 0.27403370503898716, "grad_norm": 0.3886583655221728, "learning_rate": 2e-05, "loss": 5.6025, "step": 8171 }, { "epoch": 0.2740672423912132, "grad_norm": 0.44223184596841886, "learning_rate": 2e-05, "loss": 5.596, "step": 8172 }, { "epoch": 0.27410077974343927, "grad_norm": 0.43196792687895214, "learning_rate": 2e-05, "loss": 5.5579, "step": 8173 }, { "epoch": 0.27413431709566527, "grad_norm": 0.3777605147404908, "learning_rate": 2e-05, "loss": 5.6982, "step": 8174 }, { "epoch": 0.2741678544478913, "grad_norm": 0.4173931140722959, "learning_rate": 2e-05, "loss": 5.5828, "step": 8175 }, { "epoch": 0.2742013918001174, "grad_norm": 0.4432216427560934, "learning_rate": 2e-05, "loss": 5.2465, "step": 8176 }, { "epoch": 0.27423492915234343, "grad_norm": 0.4280817599703126, "learning_rate": 2e-05, "loss": 5.6959, "step": 8177 }, { "epoch": 0.2742684665045695, "grad_norm": 0.3949537497318912, "learning_rate": 2e-05, "loss": 5.549, "step": 8178 }, { "epoch": 0.2743020038567955, "grad_norm": 0.4252294768470657, "learning_rate": 2e-05, "loss": 5.5822, "step": 8179 }, { "epoch": 0.27433554120902154, "grad_norm": 0.42863756630537536, "learning_rate": 2e-05, "loss": 5.3848, "step": 8180 }, { "epoch": 0.2743690785612476, "grad_norm": 0.4145401275664284, "learning_rate": 2e-05, "loss": 5.6609, "step": 8181 }, { "epoch": 0.27440261591347365, "grad_norm": 0.3892281303506181, "learning_rate": 2e-05, "loss": 5.4347, "step": 8182 }, { "epoch": 0.2744361532656997, "grad_norm": 0.43457677518831167, "learning_rate": 2e-05, "loss": 5.4793, "step": 8183 }, { "epoch": 0.2744696906179257, "grad_norm": 0.3845027969475578, "learning_rate": 2e-05, "loss": 5.6622, "step": 8184 }, { "epoch": 0.27450322797015175, "grad_norm": 0.4150330117890706, "learning_rate": 2e-05, "loss": 5.6774, "step": 8185 }, { "epoch": 0.2745367653223778, "grad_norm": 0.40177431266024605, "learning_rate": 2e-05, "loss": 5.677, "step": 8186 }, { "epoch": 0.27457030267460386, "grad_norm": 0.3940309746058934, "learning_rate": 2e-05, "loss": 5.7553, "step": 8187 }, { "epoch": 0.27460384002682986, "grad_norm": 0.41360326235072786, "learning_rate": 2e-05, "loss": 5.5398, "step": 8188 }, { "epoch": 0.2746373773790559, "grad_norm": 0.40321477747642265, "learning_rate": 2e-05, "loss": 5.4955, "step": 8189 }, { "epoch": 0.27467091473128197, "grad_norm": 0.41268810715066107, "learning_rate": 2e-05, "loss": 5.5108, "step": 8190 }, { "epoch": 0.274704452083508, "grad_norm": 0.403341381656728, "learning_rate": 2e-05, "loss": 5.5201, "step": 8191 }, { "epoch": 0.2747379894357341, "grad_norm": 0.40031318232392044, "learning_rate": 2e-05, "loss": 5.5958, "step": 8192 }, { "epoch": 0.2747715267879601, "grad_norm": 0.41651190137099176, "learning_rate": 2e-05, "loss": 5.5388, "step": 8193 }, { "epoch": 0.27480506414018613, "grad_norm": 0.39986659909680816, "learning_rate": 2e-05, "loss": 5.6569, "step": 8194 }, { "epoch": 0.2748386014924122, "grad_norm": 0.3939474419474777, "learning_rate": 2e-05, "loss": 5.3921, "step": 8195 }, { "epoch": 0.27487213884463824, "grad_norm": 0.42439286745705573, "learning_rate": 2e-05, "loss": 5.4422, "step": 8196 }, { "epoch": 0.27490567619686423, "grad_norm": 0.49345268926319213, "learning_rate": 2e-05, "loss": 5.5583, "step": 8197 }, { "epoch": 0.2749392135490903, "grad_norm": 0.39852473449964115, "learning_rate": 2e-05, "loss": 5.31, "step": 8198 }, { "epoch": 0.27497275090131634, "grad_norm": 0.4142066044913427, "learning_rate": 2e-05, "loss": 5.5149, "step": 8199 }, { "epoch": 0.2750062882535424, "grad_norm": 0.38432694019409264, "learning_rate": 2e-05, "loss": 5.5962, "step": 8200 }, { "epoch": 0.27503982560576845, "grad_norm": 0.39500248978881053, "learning_rate": 2e-05, "loss": 5.7271, "step": 8201 }, { "epoch": 0.27507336295799445, "grad_norm": 0.41588237964724845, "learning_rate": 2e-05, "loss": 5.6793, "step": 8202 }, { "epoch": 0.2751069003102205, "grad_norm": 0.42251757777728377, "learning_rate": 2e-05, "loss": 5.2071, "step": 8203 }, { "epoch": 0.27514043766244656, "grad_norm": 0.41278767550604656, "learning_rate": 2e-05, "loss": 5.4519, "step": 8204 }, { "epoch": 0.2751739750146726, "grad_norm": 0.3920445206624354, "learning_rate": 2e-05, "loss": 5.7112, "step": 8205 }, { "epoch": 0.2752075123668986, "grad_norm": 0.4373930330339168, "learning_rate": 2e-05, "loss": 5.5695, "step": 8206 }, { "epoch": 0.27524104971912466, "grad_norm": 0.41585888384371433, "learning_rate": 2e-05, "loss": 5.535, "step": 8207 }, { "epoch": 0.2752745870713507, "grad_norm": 0.47536410763162146, "learning_rate": 2e-05, "loss": 5.545, "step": 8208 }, { "epoch": 0.27530812442357677, "grad_norm": 0.4206816081958966, "learning_rate": 2e-05, "loss": 5.7024, "step": 8209 }, { "epoch": 0.2753416617758028, "grad_norm": 0.3995674245005109, "learning_rate": 2e-05, "loss": 5.4141, "step": 8210 }, { "epoch": 0.2753751991280288, "grad_norm": 0.40376878129308263, "learning_rate": 2e-05, "loss": 5.3132, "step": 8211 }, { "epoch": 0.2754087364802549, "grad_norm": 0.3969770628945122, "learning_rate": 2e-05, "loss": 5.6285, "step": 8212 }, { "epoch": 0.27544227383248093, "grad_norm": 0.41334567257361166, "learning_rate": 2e-05, "loss": 5.6257, "step": 8213 }, { "epoch": 0.275475811184707, "grad_norm": 0.42924429761682653, "learning_rate": 2e-05, "loss": 5.4876, "step": 8214 }, { "epoch": 0.275509348536933, "grad_norm": 0.37998311413606584, "learning_rate": 2e-05, "loss": 5.75, "step": 8215 }, { "epoch": 0.27554288588915904, "grad_norm": 0.40592666073821165, "learning_rate": 2e-05, "loss": 5.5673, "step": 8216 }, { "epoch": 0.2755764232413851, "grad_norm": 0.41287325872251873, "learning_rate": 2e-05, "loss": 5.5049, "step": 8217 }, { "epoch": 0.27560996059361115, "grad_norm": 0.3965721254745297, "learning_rate": 2e-05, "loss": 5.4727, "step": 8218 }, { "epoch": 0.2756434979458372, "grad_norm": 0.41798358037565114, "learning_rate": 2e-05, "loss": 5.6734, "step": 8219 }, { "epoch": 0.2756770352980632, "grad_norm": 0.4397328188182528, "learning_rate": 2e-05, "loss": 5.6265, "step": 8220 }, { "epoch": 0.27571057265028925, "grad_norm": 0.38280798918772957, "learning_rate": 2e-05, "loss": 5.6611, "step": 8221 }, { "epoch": 0.2757441100025153, "grad_norm": 0.40739541841759835, "learning_rate": 2e-05, "loss": 5.7267, "step": 8222 }, { "epoch": 0.27577764735474136, "grad_norm": 0.4380283724326374, "learning_rate": 2e-05, "loss": 5.4532, "step": 8223 }, { "epoch": 0.27581118470696736, "grad_norm": 0.4233482809388684, "learning_rate": 2e-05, "loss": 5.5014, "step": 8224 }, { "epoch": 0.2758447220591934, "grad_norm": 0.4245427602066574, "learning_rate": 2e-05, "loss": 5.6335, "step": 8225 }, { "epoch": 0.27587825941141947, "grad_norm": 0.40352462833449576, "learning_rate": 2e-05, "loss": 5.5693, "step": 8226 }, { "epoch": 0.2759117967636455, "grad_norm": 0.4098943550134626, "learning_rate": 2e-05, "loss": 5.7263, "step": 8227 }, { "epoch": 0.2759453341158716, "grad_norm": 0.42875416694112556, "learning_rate": 2e-05, "loss": 5.5263, "step": 8228 }, { "epoch": 0.2759788714680976, "grad_norm": 0.4040648137878701, "learning_rate": 2e-05, "loss": 5.5105, "step": 8229 }, { "epoch": 0.27601240882032363, "grad_norm": 0.43031644999400015, "learning_rate": 2e-05, "loss": 5.5752, "step": 8230 }, { "epoch": 0.2760459461725497, "grad_norm": 0.4225526960028716, "learning_rate": 2e-05, "loss": 5.7763, "step": 8231 }, { "epoch": 0.27607948352477574, "grad_norm": 0.41803852880647074, "learning_rate": 2e-05, "loss": 5.6757, "step": 8232 }, { "epoch": 0.27611302087700174, "grad_norm": 0.40756389192919085, "learning_rate": 2e-05, "loss": 5.495, "step": 8233 }, { "epoch": 0.2761465582292278, "grad_norm": 0.41002740602908666, "learning_rate": 2e-05, "loss": 5.5748, "step": 8234 }, { "epoch": 0.27618009558145384, "grad_norm": 0.4147286421242118, "learning_rate": 2e-05, "loss": 5.5948, "step": 8235 }, { "epoch": 0.2762136329336799, "grad_norm": 0.3949965157399722, "learning_rate": 2e-05, "loss": 5.4628, "step": 8236 }, { "epoch": 0.27624717028590595, "grad_norm": 0.4379657862637084, "learning_rate": 2e-05, "loss": 5.2167, "step": 8237 }, { "epoch": 0.27628070763813195, "grad_norm": 0.4292742405342916, "learning_rate": 2e-05, "loss": 5.6938, "step": 8238 }, { "epoch": 0.276314244990358, "grad_norm": 0.45202109669737833, "learning_rate": 2e-05, "loss": 5.479, "step": 8239 }, { "epoch": 0.27634778234258406, "grad_norm": 0.4640932273966934, "learning_rate": 2e-05, "loss": 5.33, "step": 8240 }, { "epoch": 0.2763813196948101, "grad_norm": 0.4091906501016684, "learning_rate": 2e-05, "loss": 5.5892, "step": 8241 }, { "epoch": 0.2764148570470361, "grad_norm": 0.4581314608878707, "learning_rate": 2e-05, "loss": 5.4637, "step": 8242 }, { "epoch": 0.27644839439926217, "grad_norm": 0.4246777374951765, "learning_rate": 2e-05, "loss": 5.5063, "step": 8243 }, { "epoch": 0.2764819317514882, "grad_norm": 0.39012140287791586, "learning_rate": 2e-05, "loss": 5.5087, "step": 8244 }, { "epoch": 0.2765154691037143, "grad_norm": 0.4449821460294694, "learning_rate": 2e-05, "loss": 5.5531, "step": 8245 }, { "epoch": 0.2765490064559403, "grad_norm": 0.439126068958703, "learning_rate": 2e-05, "loss": 5.6398, "step": 8246 }, { "epoch": 0.2765825438081663, "grad_norm": 0.39500294161377025, "learning_rate": 2e-05, "loss": 5.6742, "step": 8247 }, { "epoch": 0.2766160811603924, "grad_norm": 0.4069384990762394, "learning_rate": 2e-05, "loss": 5.7043, "step": 8248 }, { "epoch": 0.27664961851261843, "grad_norm": 0.45627488986511583, "learning_rate": 2e-05, "loss": 5.7656, "step": 8249 }, { "epoch": 0.2766831558648445, "grad_norm": 0.42756516188270444, "learning_rate": 2e-05, "loss": 5.4811, "step": 8250 }, { "epoch": 0.2767166932170705, "grad_norm": 0.4274298307496364, "learning_rate": 2e-05, "loss": 5.484, "step": 8251 }, { "epoch": 0.27675023056929654, "grad_norm": 0.5053473227437993, "learning_rate": 2e-05, "loss": 5.5641, "step": 8252 }, { "epoch": 0.2767837679215226, "grad_norm": 0.4432117894320038, "learning_rate": 2e-05, "loss": 5.6611, "step": 8253 }, { "epoch": 0.27681730527374865, "grad_norm": 0.3855389306012186, "learning_rate": 2e-05, "loss": 5.4564, "step": 8254 }, { "epoch": 0.2768508426259747, "grad_norm": 0.4535193461885291, "learning_rate": 2e-05, "loss": 5.3401, "step": 8255 }, { "epoch": 0.2768843799782007, "grad_norm": 0.4675534442373698, "learning_rate": 2e-05, "loss": 5.6444, "step": 8256 }, { "epoch": 0.27691791733042675, "grad_norm": 0.418068378371807, "learning_rate": 2e-05, "loss": 5.5946, "step": 8257 }, { "epoch": 0.2769514546826528, "grad_norm": 0.4073169000430906, "learning_rate": 2e-05, "loss": 5.3184, "step": 8258 }, { "epoch": 0.27698499203487886, "grad_norm": 0.41554320774015535, "learning_rate": 2e-05, "loss": 5.4307, "step": 8259 }, { "epoch": 0.27701852938710486, "grad_norm": 0.40512889855253864, "learning_rate": 2e-05, "loss": 5.6464, "step": 8260 }, { "epoch": 0.2770520667393309, "grad_norm": 0.3991543972604124, "learning_rate": 2e-05, "loss": 5.646, "step": 8261 }, { "epoch": 0.27708560409155697, "grad_norm": 0.438009918030896, "learning_rate": 2e-05, "loss": 5.7025, "step": 8262 }, { "epoch": 0.277119141443783, "grad_norm": 0.42267780162400614, "learning_rate": 2e-05, "loss": 5.4226, "step": 8263 }, { "epoch": 0.2771526787960091, "grad_norm": 0.4002122292345354, "learning_rate": 2e-05, "loss": 5.6513, "step": 8264 }, { "epoch": 0.2771862161482351, "grad_norm": 0.4476429359537658, "learning_rate": 2e-05, "loss": 5.4892, "step": 8265 }, { "epoch": 0.27721975350046113, "grad_norm": 0.4287236447182565, "learning_rate": 2e-05, "loss": 5.4586, "step": 8266 }, { "epoch": 0.2772532908526872, "grad_norm": 0.39199088348440914, "learning_rate": 2e-05, "loss": 5.5245, "step": 8267 }, { "epoch": 0.27728682820491324, "grad_norm": 0.39396365006662964, "learning_rate": 2e-05, "loss": 5.53, "step": 8268 }, { "epoch": 0.27732036555713924, "grad_norm": 0.4151625182991488, "learning_rate": 2e-05, "loss": 5.3745, "step": 8269 }, { "epoch": 0.2773539029093653, "grad_norm": 0.41413459886294984, "learning_rate": 2e-05, "loss": 5.592, "step": 8270 }, { "epoch": 0.27738744026159134, "grad_norm": 0.4451451087590791, "learning_rate": 2e-05, "loss": 5.5592, "step": 8271 }, { "epoch": 0.2774209776138174, "grad_norm": 0.4068142495890796, "learning_rate": 2e-05, "loss": 5.358, "step": 8272 }, { "epoch": 0.27745451496604345, "grad_norm": 0.4115258367797362, "learning_rate": 2e-05, "loss": 5.5564, "step": 8273 }, { "epoch": 0.27748805231826945, "grad_norm": 0.3875803299674118, "learning_rate": 2e-05, "loss": 5.5539, "step": 8274 }, { "epoch": 0.2775215896704955, "grad_norm": 0.40663539285459455, "learning_rate": 2e-05, "loss": 5.3767, "step": 8275 }, { "epoch": 0.27755512702272156, "grad_norm": 0.40005891603721666, "learning_rate": 2e-05, "loss": 5.5234, "step": 8276 }, { "epoch": 0.2775886643749476, "grad_norm": 0.4017551963340739, "learning_rate": 2e-05, "loss": 5.6616, "step": 8277 }, { "epoch": 0.2776222017271736, "grad_norm": 0.4416836925198995, "learning_rate": 2e-05, "loss": 5.6197, "step": 8278 }, { "epoch": 0.27765573907939967, "grad_norm": 0.4103615487124511, "learning_rate": 2e-05, "loss": 5.7211, "step": 8279 }, { "epoch": 0.2776892764316257, "grad_norm": 0.39161015608263994, "learning_rate": 2e-05, "loss": 5.782, "step": 8280 }, { "epoch": 0.2777228137838518, "grad_norm": 0.418652080825483, "learning_rate": 2e-05, "loss": 5.7559, "step": 8281 }, { "epoch": 0.27775635113607783, "grad_norm": 0.36839907558519525, "learning_rate": 2e-05, "loss": 5.6219, "step": 8282 }, { "epoch": 0.2777898884883038, "grad_norm": 0.39031819974383386, "learning_rate": 2e-05, "loss": 5.6913, "step": 8283 }, { "epoch": 0.2778234258405299, "grad_norm": 0.4639307318580001, "learning_rate": 2e-05, "loss": 5.4724, "step": 8284 }, { "epoch": 0.27785696319275593, "grad_norm": 0.41160503184087677, "learning_rate": 2e-05, "loss": 5.6778, "step": 8285 }, { "epoch": 0.277890500544982, "grad_norm": 0.4054487387098727, "learning_rate": 2e-05, "loss": 5.6464, "step": 8286 }, { "epoch": 0.27792403789720804, "grad_norm": 0.43694255679371646, "learning_rate": 2e-05, "loss": 5.6503, "step": 8287 }, { "epoch": 0.27795757524943404, "grad_norm": 0.4108600367136194, "learning_rate": 2e-05, "loss": 5.4972, "step": 8288 }, { "epoch": 0.2779911126016601, "grad_norm": 0.4014017825412155, "learning_rate": 2e-05, "loss": 5.3981, "step": 8289 }, { "epoch": 0.27802464995388615, "grad_norm": 0.40606252740824206, "learning_rate": 2e-05, "loss": 5.5591, "step": 8290 }, { "epoch": 0.2780581873061122, "grad_norm": 0.388826237576901, "learning_rate": 2e-05, "loss": 5.5659, "step": 8291 }, { "epoch": 0.2780917246583382, "grad_norm": 0.3784506538536084, "learning_rate": 2e-05, "loss": 5.445, "step": 8292 }, { "epoch": 0.27812526201056426, "grad_norm": 0.4024158724300405, "learning_rate": 2e-05, "loss": 5.6685, "step": 8293 }, { "epoch": 0.2781587993627903, "grad_norm": 0.40860667443741455, "learning_rate": 2e-05, "loss": 5.4635, "step": 8294 }, { "epoch": 0.27819233671501636, "grad_norm": 0.4078920212159316, "learning_rate": 2e-05, "loss": 5.6426, "step": 8295 }, { "epoch": 0.2782258740672424, "grad_norm": 0.4228096157610471, "learning_rate": 2e-05, "loss": 5.5512, "step": 8296 }, { "epoch": 0.2782594114194684, "grad_norm": 0.3812661423503472, "learning_rate": 2e-05, "loss": 5.5267, "step": 8297 }, { "epoch": 0.27829294877169447, "grad_norm": 0.402369554515184, "learning_rate": 2e-05, "loss": 5.5195, "step": 8298 }, { "epoch": 0.2783264861239205, "grad_norm": 0.3956875534987787, "learning_rate": 2e-05, "loss": 5.584, "step": 8299 }, { "epoch": 0.2783600234761466, "grad_norm": 0.4232109982841505, "learning_rate": 2e-05, "loss": 5.7768, "step": 8300 }, { "epoch": 0.2783935608283726, "grad_norm": 0.4190358784681584, "learning_rate": 2e-05, "loss": 5.7135, "step": 8301 }, { "epoch": 0.27842709818059863, "grad_norm": 0.40260713694309047, "learning_rate": 2e-05, "loss": 5.6578, "step": 8302 }, { "epoch": 0.2784606355328247, "grad_norm": 0.40288806683313233, "learning_rate": 2e-05, "loss": 5.5266, "step": 8303 }, { "epoch": 0.27849417288505074, "grad_norm": 0.4358478478275407, "learning_rate": 2e-05, "loss": 5.512, "step": 8304 }, { "epoch": 0.2785277102372768, "grad_norm": 0.39869043691758044, "learning_rate": 2e-05, "loss": 5.3824, "step": 8305 }, { "epoch": 0.2785612475895028, "grad_norm": 0.46345384759772207, "learning_rate": 2e-05, "loss": 5.5504, "step": 8306 }, { "epoch": 0.27859478494172885, "grad_norm": 0.4312101469942713, "learning_rate": 2e-05, "loss": 5.5527, "step": 8307 }, { "epoch": 0.2786283222939549, "grad_norm": 0.3957181541618579, "learning_rate": 2e-05, "loss": 5.4386, "step": 8308 }, { "epoch": 0.27866185964618095, "grad_norm": 0.420421404047623, "learning_rate": 2e-05, "loss": 5.5549, "step": 8309 }, { "epoch": 0.27869539699840695, "grad_norm": 0.42157493414193264, "learning_rate": 2e-05, "loss": 5.5336, "step": 8310 }, { "epoch": 0.278728934350633, "grad_norm": 0.4007276729183844, "learning_rate": 2e-05, "loss": 5.6858, "step": 8311 }, { "epoch": 0.27876247170285906, "grad_norm": 0.4161964451974469, "learning_rate": 2e-05, "loss": 5.4866, "step": 8312 }, { "epoch": 0.2787960090550851, "grad_norm": 0.46241103560912045, "learning_rate": 2e-05, "loss": 5.6805, "step": 8313 }, { "epoch": 0.27882954640731117, "grad_norm": 0.4034635958054457, "learning_rate": 2e-05, "loss": 5.59, "step": 8314 }, { "epoch": 0.27886308375953717, "grad_norm": 0.4040410319519543, "learning_rate": 2e-05, "loss": 5.674, "step": 8315 }, { "epoch": 0.2788966211117632, "grad_norm": 0.4662219135615048, "learning_rate": 2e-05, "loss": 5.3912, "step": 8316 }, { "epoch": 0.2789301584639893, "grad_norm": 0.42381763331502853, "learning_rate": 2e-05, "loss": 5.61, "step": 8317 }, { "epoch": 0.27896369581621533, "grad_norm": 0.43262018139375846, "learning_rate": 2e-05, "loss": 5.4629, "step": 8318 }, { "epoch": 0.2789972331684413, "grad_norm": 0.4058466745518152, "learning_rate": 2e-05, "loss": 5.5855, "step": 8319 }, { "epoch": 0.2790307705206674, "grad_norm": 0.4550432151305498, "learning_rate": 2e-05, "loss": 5.5414, "step": 8320 }, { "epoch": 0.27906430787289344, "grad_norm": 0.4014924611415838, "learning_rate": 2e-05, "loss": 5.5503, "step": 8321 }, { "epoch": 0.2790978452251195, "grad_norm": 0.3970490555297612, "learning_rate": 2e-05, "loss": 5.6378, "step": 8322 }, { "epoch": 0.27913138257734554, "grad_norm": 0.3833624644334978, "learning_rate": 2e-05, "loss": 5.5113, "step": 8323 }, { "epoch": 0.27916491992957154, "grad_norm": 0.4004783594501469, "learning_rate": 2e-05, "loss": 5.6776, "step": 8324 }, { "epoch": 0.2791984572817976, "grad_norm": 0.42612094764983965, "learning_rate": 2e-05, "loss": 5.7229, "step": 8325 }, { "epoch": 0.27923199463402365, "grad_norm": 0.44288503881819485, "learning_rate": 2e-05, "loss": 5.6257, "step": 8326 }, { "epoch": 0.2792655319862497, "grad_norm": 0.40219214712004164, "learning_rate": 2e-05, "loss": 5.5055, "step": 8327 }, { "epoch": 0.2792990693384757, "grad_norm": 0.4717153553512655, "learning_rate": 2e-05, "loss": 5.4632, "step": 8328 }, { "epoch": 0.27933260669070176, "grad_norm": 0.41471706637247363, "learning_rate": 2e-05, "loss": 5.5807, "step": 8329 }, { "epoch": 0.2793661440429278, "grad_norm": 0.43210082826834006, "learning_rate": 2e-05, "loss": 5.5136, "step": 8330 }, { "epoch": 0.27939968139515386, "grad_norm": 0.41702651307467753, "learning_rate": 2e-05, "loss": 5.6867, "step": 8331 }, { "epoch": 0.2794332187473799, "grad_norm": 0.3879205602877275, "learning_rate": 2e-05, "loss": 5.5386, "step": 8332 }, { "epoch": 0.2794667560996059, "grad_norm": 0.39898166056803536, "learning_rate": 2e-05, "loss": 5.2333, "step": 8333 }, { "epoch": 0.27950029345183197, "grad_norm": 0.4119123292603056, "learning_rate": 2e-05, "loss": 5.6551, "step": 8334 }, { "epoch": 0.279533830804058, "grad_norm": 0.4114663530347612, "learning_rate": 2e-05, "loss": 5.5915, "step": 8335 }, { "epoch": 0.2795673681562841, "grad_norm": 0.44087714976592135, "learning_rate": 2e-05, "loss": 5.5254, "step": 8336 }, { "epoch": 0.2796009055085101, "grad_norm": 0.41839159189375036, "learning_rate": 2e-05, "loss": 5.4589, "step": 8337 }, { "epoch": 0.27963444286073613, "grad_norm": 0.40296538503541635, "learning_rate": 2e-05, "loss": 5.3507, "step": 8338 }, { "epoch": 0.2796679802129622, "grad_norm": 0.43111956154418646, "learning_rate": 2e-05, "loss": 5.5889, "step": 8339 }, { "epoch": 0.27970151756518824, "grad_norm": 0.4000430347159812, "learning_rate": 2e-05, "loss": 5.5041, "step": 8340 }, { "epoch": 0.2797350549174143, "grad_norm": 0.3993681752874965, "learning_rate": 2e-05, "loss": 5.3922, "step": 8341 }, { "epoch": 0.2797685922696403, "grad_norm": 0.4269726124490717, "learning_rate": 2e-05, "loss": 5.4685, "step": 8342 }, { "epoch": 0.27980212962186635, "grad_norm": 0.4231140357626588, "learning_rate": 2e-05, "loss": 5.6676, "step": 8343 }, { "epoch": 0.2798356669740924, "grad_norm": 0.39593165322320045, "learning_rate": 2e-05, "loss": 5.5349, "step": 8344 }, { "epoch": 0.27986920432631845, "grad_norm": 0.41342569389228795, "learning_rate": 2e-05, "loss": 5.5188, "step": 8345 }, { "epoch": 0.27990274167854445, "grad_norm": 0.3983124445896529, "learning_rate": 2e-05, "loss": 5.4082, "step": 8346 }, { "epoch": 0.2799362790307705, "grad_norm": 0.3936426420042424, "learning_rate": 2e-05, "loss": 5.4611, "step": 8347 }, { "epoch": 0.27996981638299656, "grad_norm": 0.3960653293181765, "learning_rate": 2e-05, "loss": 5.4379, "step": 8348 }, { "epoch": 0.2800033537352226, "grad_norm": 0.4171922234985496, "learning_rate": 2e-05, "loss": 5.2926, "step": 8349 }, { "epoch": 0.28003689108744867, "grad_norm": 0.39133291262218417, "learning_rate": 2e-05, "loss": 5.6419, "step": 8350 }, { "epoch": 0.28007042843967467, "grad_norm": 0.38549371613755923, "learning_rate": 2e-05, "loss": 5.6278, "step": 8351 }, { "epoch": 0.2801039657919007, "grad_norm": 0.39747890820420856, "learning_rate": 2e-05, "loss": 5.7894, "step": 8352 }, { "epoch": 0.2801375031441268, "grad_norm": 0.37981426945734653, "learning_rate": 2e-05, "loss": 5.4379, "step": 8353 }, { "epoch": 0.28017104049635283, "grad_norm": 0.4141329446899798, "learning_rate": 2e-05, "loss": 5.498, "step": 8354 }, { "epoch": 0.28020457784857883, "grad_norm": 0.3952359922074294, "learning_rate": 2e-05, "loss": 5.4438, "step": 8355 }, { "epoch": 0.2802381152008049, "grad_norm": 0.4016467022963427, "learning_rate": 2e-05, "loss": 5.4685, "step": 8356 }, { "epoch": 0.28027165255303094, "grad_norm": 0.4221489678304664, "learning_rate": 2e-05, "loss": 5.4448, "step": 8357 }, { "epoch": 0.280305189905257, "grad_norm": 0.4343469726841569, "learning_rate": 2e-05, "loss": 5.5642, "step": 8358 }, { "epoch": 0.28033872725748304, "grad_norm": 0.40889603001933067, "learning_rate": 2e-05, "loss": 5.4409, "step": 8359 }, { "epoch": 0.28037226460970904, "grad_norm": 0.46204848659338876, "learning_rate": 2e-05, "loss": 5.4592, "step": 8360 }, { "epoch": 0.2804058019619351, "grad_norm": 0.39538533591124053, "learning_rate": 2e-05, "loss": 5.5847, "step": 8361 }, { "epoch": 0.28043933931416115, "grad_norm": 0.3932652755392057, "learning_rate": 2e-05, "loss": 5.5705, "step": 8362 }, { "epoch": 0.2804728766663872, "grad_norm": 0.42550228075514296, "learning_rate": 2e-05, "loss": 5.5375, "step": 8363 }, { "epoch": 0.2805064140186132, "grad_norm": 0.44437547475309797, "learning_rate": 2e-05, "loss": 5.4239, "step": 8364 }, { "epoch": 0.28053995137083926, "grad_norm": 0.40672263462212593, "learning_rate": 2e-05, "loss": 5.4105, "step": 8365 }, { "epoch": 0.2805734887230653, "grad_norm": 0.4277532732094582, "learning_rate": 2e-05, "loss": 5.5853, "step": 8366 }, { "epoch": 0.28060702607529137, "grad_norm": 0.42617485325466664, "learning_rate": 2e-05, "loss": 5.5476, "step": 8367 }, { "epoch": 0.2806405634275174, "grad_norm": 0.4164374144983158, "learning_rate": 2e-05, "loss": 5.5836, "step": 8368 }, { "epoch": 0.2806741007797434, "grad_norm": 0.39804697870634853, "learning_rate": 2e-05, "loss": 5.5051, "step": 8369 }, { "epoch": 0.2807076381319695, "grad_norm": 0.42957212519908744, "learning_rate": 2e-05, "loss": 5.5274, "step": 8370 }, { "epoch": 0.2807411754841955, "grad_norm": 0.4319262963461062, "learning_rate": 2e-05, "loss": 5.505, "step": 8371 }, { "epoch": 0.2807747128364216, "grad_norm": 0.41140407292824877, "learning_rate": 2e-05, "loss": 5.4814, "step": 8372 }, { "epoch": 0.2808082501886476, "grad_norm": 0.4443125052240632, "learning_rate": 2e-05, "loss": 5.4709, "step": 8373 }, { "epoch": 0.28084178754087363, "grad_norm": 0.4208627076695131, "learning_rate": 2e-05, "loss": 5.5824, "step": 8374 }, { "epoch": 0.2808753248930997, "grad_norm": 0.4159116035779284, "learning_rate": 2e-05, "loss": 5.5152, "step": 8375 }, { "epoch": 0.28090886224532574, "grad_norm": 0.39742827606160536, "learning_rate": 2e-05, "loss": 5.6476, "step": 8376 }, { "epoch": 0.2809423995975518, "grad_norm": 0.4261038587333859, "learning_rate": 2e-05, "loss": 5.6034, "step": 8377 }, { "epoch": 0.2809759369497778, "grad_norm": 0.43313282368331507, "learning_rate": 2e-05, "loss": 5.4441, "step": 8378 }, { "epoch": 0.28100947430200385, "grad_norm": 0.4185166418793655, "learning_rate": 2e-05, "loss": 5.61, "step": 8379 }, { "epoch": 0.2810430116542299, "grad_norm": 0.46473197765505647, "learning_rate": 2e-05, "loss": 5.5163, "step": 8380 }, { "epoch": 0.28107654900645596, "grad_norm": 0.40775655105443714, "learning_rate": 2e-05, "loss": 5.5882, "step": 8381 }, { "epoch": 0.281110086358682, "grad_norm": 0.4272646070540617, "learning_rate": 2e-05, "loss": 5.683, "step": 8382 }, { "epoch": 0.281143623710908, "grad_norm": 0.41763672725108886, "learning_rate": 2e-05, "loss": 5.2492, "step": 8383 }, { "epoch": 0.28117716106313406, "grad_norm": 0.3962978209246481, "learning_rate": 2e-05, "loss": 5.5684, "step": 8384 }, { "epoch": 0.2812106984153601, "grad_norm": 0.4412204944389236, "learning_rate": 2e-05, "loss": 5.4894, "step": 8385 }, { "epoch": 0.28124423576758617, "grad_norm": 0.41092209722894774, "learning_rate": 2e-05, "loss": 5.5117, "step": 8386 }, { "epoch": 0.28127777311981217, "grad_norm": 0.39217805750082646, "learning_rate": 2e-05, "loss": 5.6794, "step": 8387 }, { "epoch": 0.2813113104720382, "grad_norm": 0.4999281238090629, "learning_rate": 2e-05, "loss": 5.5324, "step": 8388 }, { "epoch": 0.2813448478242643, "grad_norm": 0.43413803950484336, "learning_rate": 2e-05, "loss": 5.3056, "step": 8389 }, { "epoch": 0.28137838517649033, "grad_norm": 0.403900109561623, "learning_rate": 2e-05, "loss": 5.6747, "step": 8390 }, { "epoch": 0.2814119225287164, "grad_norm": 0.3911802307459027, "learning_rate": 2e-05, "loss": 5.4916, "step": 8391 }, { "epoch": 0.2814454598809424, "grad_norm": 0.4006970039119402, "learning_rate": 2e-05, "loss": 5.5071, "step": 8392 }, { "epoch": 0.28147899723316844, "grad_norm": 0.4376029893277018, "learning_rate": 2e-05, "loss": 5.5444, "step": 8393 }, { "epoch": 0.2815125345853945, "grad_norm": 0.4014654154706057, "learning_rate": 2e-05, "loss": 5.7488, "step": 8394 }, { "epoch": 0.28154607193762055, "grad_norm": 0.3973410747690381, "learning_rate": 2e-05, "loss": 5.5451, "step": 8395 }, { "epoch": 0.28157960928984654, "grad_norm": 0.40608625811761034, "learning_rate": 2e-05, "loss": 5.5652, "step": 8396 }, { "epoch": 0.2816131466420726, "grad_norm": 0.39099963193346077, "learning_rate": 2e-05, "loss": 5.5568, "step": 8397 }, { "epoch": 0.28164668399429865, "grad_norm": 0.41841234327795046, "learning_rate": 2e-05, "loss": 5.6667, "step": 8398 }, { "epoch": 0.2816802213465247, "grad_norm": 0.405745372357954, "learning_rate": 2e-05, "loss": 5.488, "step": 8399 }, { "epoch": 0.28171375869875076, "grad_norm": 0.4271525691093331, "learning_rate": 2e-05, "loss": 5.449, "step": 8400 }, { "epoch": 0.28174729605097676, "grad_norm": 0.38353977007279916, "learning_rate": 2e-05, "loss": 5.3337, "step": 8401 }, { "epoch": 0.2817808334032028, "grad_norm": 0.4521131379207844, "learning_rate": 2e-05, "loss": 5.5338, "step": 8402 }, { "epoch": 0.28181437075542887, "grad_norm": 0.39152745531022143, "learning_rate": 2e-05, "loss": 5.7784, "step": 8403 }, { "epoch": 0.2818479081076549, "grad_norm": 0.4041294949808349, "learning_rate": 2e-05, "loss": 5.3716, "step": 8404 }, { "epoch": 0.2818814454598809, "grad_norm": 0.390438511645858, "learning_rate": 2e-05, "loss": 5.5715, "step": 8405 }, { "epoch": 0.281914982812107, "grad_norm": 0.44374659627856305, "learning_rate": 2e-05, "loss": 5.4158, "step": 8406 }, { "epoch": 0.281948520164333, "grad_norm": 0.42743473703845153, "learning_rate": 2e-05, "loss": 5.5238, "step": 8407 }, { "epoch": 0.2819820575165591, "grad_norm": 0.44930793113378076, "learning_rate": 2e-05, "loss": 5.6152, "step": 8408 }, { "epoch": 0.28201559486878514, "grad_norm": 0.39743317628595226, "learning_rate": 2e-05, "loss": 5.5115, "step": 8409 }, { "epoch": 0.28204913222101113, "grad_norm": 0.4093378138973006, "learning_rate": 2e-05, "loss": 5.3472, "step": 8410 }, { "epoch": 0.2820826695732372, "grad_norm": 0.4333654875780024, "learning_rate": 2e-05, "loss": 5.3155, "step": 8411 }, { "epoch": 0.28211620692546324, "grad_norm": 0.4239045353338963, "learning_rate": 2e-05, "loss": 5.4705, "step": 8412 }, { "epoch": 0.2821497442776893, "grad_norm": 0.45221124882520536, "learning_rate": 2e-05, "loss": 5.585, "step": 8413 }, { "epoch": 0.2821832816299153, "grad_norm": 0.42221292904940383, "learning_rate": 2e-05, "loss": 5.5276, "step": 8414 }, { "epoch": 0.28221681898214135, "grad_norm": 0.38783958984433237, "learning_rate": 2e-05, "loss": 5.5114, "step": 8415 }, { "epoch": 0.2822503563343674, "grad_norm": 0.4300591688338691, "learning_rate": 2e-05, "loss": 5.9331, "step": 8416 }, { "epoch": 0.28228389368659346, "grad_norm": 0.42389069711887134, "learning_rate": 2e-05, "loss": 5.5042, "step": 8417 }, { "epoch": 0.2823174310388195, "grad_norm": 0.39642560626189627, "learning_rate": 2e-05, "loss": 5.4233, "step": 8418 }, { "epoch": 0.2823509683910455, "grad_norm": 0.4301222598357932, "learning_rate": 2e-05, "loss": 5.5845, "step": 8419 }, { "epoch": 0.28238450574327156, "grad_norm": 0.39930695797191335, "learning_rate": 2e-05, "loss": 5.5608, "step": 8420 }, { "epoch": 0.2824180430954976, "grad_norm": 0.3768008083063502, "learning_rate": 2e-05, "loss": 5.6427, "step": 8421 }, { "epoch": 0.28245158044772367, "grad_norm": 0.40431622922100546, "learning_rate": 2e-05, "loss": 5.3171, "step": 8422 }, { "epoch": 0.28248511779994967, "grad_norm": 0.3949337689552143, "learning_rate": 2e-05, "loss": 5.7344, "step": 8423 }, { "epoch": 0.2825186551521757, "grad_norm": 0.41103582462348653, "learning_rate": 2e-05, "loss": 5.4397, "step": 8424 }, { "epoch": 0.2825521925044018, "grad_norm": 0.4010333909217831, "learning_rate": 2e-05, "loss": 5.3539, "step": 8425 }, { "epoch": 0.28258572985662783, "grad_norm": 0.36100294092741975, "learning_rate": 2e-05, "loss": 5.4601, "step": 8426 }, { "epoch": 0.2826192672088539, "grad_norm": 0.4423519870253353, "learning_rate": 2e-05, "loss": 5.4333, "step": 8427 }, { "epoch": 0.2826528045610799, "grad_norm": 0.3867159309300517, "learning_rate": 2e-05, "loss": 5.6488, "step": 8428 }, { "epoch": 0.28268634191330594, "grad_norm": 0.43063853250831324, "learning_rate": 2e-05, "loss": 5.5442, "step": 8429 }, { "epoch": 0.282719879265532, "grad_norm": 0.40002744445259203, "learning_rate": 2e-05, "loss": 5.7188, "step": 8430 }, { "epoch": 0.28275341661775805, "grad_norm": 0.41684440634868014, "learning_rate": 2e-05, "loss": 5.4339, "step": 8431 }, { "epoch": 0.28278695396998405, "grad_norm": 0.3833431169460291, "learning_rate": 2e-05, "loss": 5.6591, "step": 8432 }, { "epoch": 0.2828204913222101, "grad_norm": 0.4016826942106243, "learning_rate": 2e-05, "loss": 5.5672, "step": 8433 }, { "epoch": 0.28285402867443615, "grad_norm": 0.4479900395722954, "learning_rate": 2e-05, "loss": 5.5592, "step": 8434 }, { "epoch": 0.2828875660266622, "grad_norm": 0.4048826173470439, "learning_rate": 2e-05, "loss": 5.3146, "step": 8435 }, { "epoch": 0.28292110337888826, "grad_norm": 0.39542866024977075, "learning_rate": 2e-05, "loss": 5.497, "step": 8436 }, { "epoch": 0.28295464073111426, "grad_norm": 0.40800302210624184, "learning_rate": 2e-05, "loss": 5.7216, "step": 8437 }, { "epoch": 0.2829881780833403, "grad_norm": 0.40762288674055247, "learning_rate": 2e-05, "loss": 5.7216, "step": 8438 }, { "epoch": 0.28302171543556637, "grad_norm": 0.3777377030748909, "learning_rate": 2e-05, "loss": 5.5366, "step": 8439 }, { "epoch": 0.2830552527877924, "grad_norm": 0.42333140198364333, "learning_rate": 2e-05, "loss": 5.4341, "step": 8440 }, { "epoch": 0.2830887901400184, "grad_norm": 0.40681038065526526, "learning_rate": 2e-05, "loss": 5.3863, "step": 8441 }, { "epoch": 0.2831223274922445, "grad_norm": 0.3995225604766671, "learning_rate": 2e-05, "loss": 5.3829, "step": 8442 }, { "epoch": 0.28315586484447053, "grad_norm": 0.43901404392587656, "learning_rate": 2e-05, "loss": 5.6635, "step": 8443 }, { "epoch": 0.2831894021966966, "grad_norm": 0.45441319164437183, "learning_rate": 2e-05, "loss": 5.4508, "step": 8444 }, { "epoch": 0.28322293954892264, "grad_norm": 0.3987260805951781, "learning_rate": 2e-05, "loss": 5.6069, "step": 8445 }, { "epoch": 0.28325647690114863, "grad_norm": 0.41303218748213005, "learning_rate": 2e-05, "loss": 5.5261, "step": 8446 }, { "epoch": 0.2832900142533747, "grad_norm": 0.43193856345572396, "learning_rate": 2e-05, "loss": 5.5273, "step": 8447 }, { "epoch": 0.28332355160560074, "grad_norm": 0.4213278349185551, "learning_rate": 2e-05, "loss": 5.3458, "step": 8448 }, { "epoch": 0.2833570889578268, "grad_norm": 0.4489244469276429, "learning_rate": 2e-05, "loss": 5.5319, "step": 8449 }, { "epoch": 0.2833906263100528, "grad_norm": 0.42561257043708195, "learning_rate": 2e-05, "loss": 5.506, "step": 8450 }, { "epoch": 0.28342416366227885, "grad_norm": 0.4793220082703101, "learning_rate": 2e-05, "loss": 5.484, "step": 8451 }, { "epoch": 0.2834577010145049, "grad_norm": 0.40997069880393117, "learning_rate": 2e-05, "loss": 5.5753, "step": 8452 }, { "epoch": 0.28349123836673096, "grad_norm": 0.39921584184324255, "learning_rate": 2e-05, "loss": 5.4818, "step": 8453 }, { "epoch": 0.283524775718957, "grad_norm": 0.4008606128459378, "learning_rate": 2e-05, "loss": 5.6942, "step": 8454 }, { "epoch": 0.283558313071183, "grad_norm": 0.5251187651665927, "learning_rate": 2e-05, "loss": 5.5756, "step": 8455 }, { "epoch": 0.28359185042340906, "grad_norm": 0.38228748636529475, "learning_rate": 2e-05, "loss": 5.6375, "step": 8456 }, { "epoch": 0.2836253877756351, "grad_norm": 0.4210933354862932, "learning_rate": 2e-05, "loss": 5.5216, "step": 8457 }, { "epoch": 0.2836589251278612, "grad_norm": 0.44519041935695375, "learning_rate": 2e-05, "loss": 5.7154, "step": 8458 }, { "epoch": 0.28369246248008717, "grad_norm": 0.4247397719799459, "learning_rate": 2e-05, "loss": 5.5674, "step": 8459 }, { "epoch": 0.2837259998323132, "grad_norm": 0.4218813338455458, "learning_rate": 2e-05, "loss": 5.7042, "step": 8460 }, { "epoch": 0.2837595371845393, "grad_norm": 0.4032460904209926, "learning_rate": 2e-05, "loss": 5.6412, "step": 8461 }, { "epoch": 0.28379307453676533, "grad_norm": 0.4127639355374321, "learning_rate": 2e-05, "loss": 5.4623, "step": 8462 }, { "epoch": 0.2838266118889914, "grad_norm": 0.3928372744067844, "learning_rate": 2e-05, "loss": 5.5639, "step": 8463 }, { "epoch": 0.2838601492412174, "grad_norm": 0.40643363741096294, "learning_rate": 2e-05, "loss": 5.6006, "step": 8464 }, { "epoch": 0.28389368659344344, "grad_norm": 0.41232769766509253, "learning_rate": 2e-05, "loss": 5.3202, "step": 8465 }, { "epoch": 0.2839272239456695, "grad_norm": 0.40534495927896574, "learning_rate": 2e-05, "loss": 5.5848, "step": 8466 }, { "epoch": 0.28396076129789555, "grad_norm": 0.4637055433685565, "learning_rate": 2e-05, "loss": 5.4653, "step": 8467 }, { "epoch": 0.28399429865012155, "grad_norm": 0.42689434143147853, "learning_rate": 2e-05, "loss": 5.6845, "step": 8468 }, { "epoch": 0.2840278360023476, "grad_norm": 0.38851316968025007, "learning_rate": 2e-05, "loss": 5.5476, "step": 8469 }, { "epoch": 0.28406137335457365, "grad_norm": 0.40913783499900874, "learning_rate": 2e-05, "loss": 5.5476, "step": 8470 }, { "epoch": 0.2840949107067997, "grad_norm": 0.40705567572367524, "learning_rate": 2e-05, "loss": 5.6925, "step": 8471 }, { "epoch": 0.28412844805902576, "grad_norm": 0.44875052030136964, "learning_rate": 2e-05, "loss": 5.3552, "step": 8472 }, { "epoch": 0.28416198541125176, "grad_norm": 0.4204455442586745, "learning_rate": 2e-05, "loss": 5.3863, "step": 8473 }, { "epoch": 0.2841955227634778, "grad_norm": 0.4112896128819815, "learning_rate": 2e-05, "loss": 5.6084, "step": 8474 }, { "epoch": 0.28422906011570387, "grad_norm": 0.4306889512120822, "learning_rate": 2e-05, "loss": 5.3629, "step": 8475 }, { "epoch": 0.2842625974679299, "grad_norm": 0.3929051943525914, "learning_rate": 2e-05, "loss": 5.6221, "step": 8476 }, { "epoch": 0.2842961348201559, "grad_norm": 0.40726864395832324, "learning_rate": 2e-05, "loss": 5.5622, "step": 8477 }, { "epoch": 0.284329672172382, "grad_norm": 0.4156743999914452, "learning_rate": 2e-05, "loss": 5.5114, "step": 8478 }, { "epoch": 0.28436320952460803, "grad_norm": 0.4033175583058278, "learning_rate": 2e-05, "loss": 5.5351, "step": 8479 }, { "epoch": 0.2843967468768341, "grad_norm": 0.4292364530167875, "learning_rate": 2e-05, "loss": 5.5945, "step": 8480 }, { "epoch": 0.28443028422906014, "grad_norm": 0.4174500497253605, "learning_rate": 2e-05, "loss": 5.4301, "step": 8481 }, { "epoch": 0.28446382158128614, "grad_norm": 0.4192296322798615, "learning_rate": 2e-05, "loss": 5.486, "step": 8482 }, { "epoch": 0.2844973589335122, "grad_norm": 0.40150847264421513, "learning_rate": 2e-05, "loss": 5.4806, "step": 8483 }, { "epoch": 0.28453089628573824, "grad_norm": 0.40750591691833254, "learning_rate": 2e-05, "loss": 5.5734, "step": 8484 }, { "epoch": 0.2845644336379643, "grad_norm": 0.41755756112783243, "learning_rate": 2e-05, "loss": 5.47, "step": 8485 }, { "epoch": 0.28459797099019035, "grad_norm": 0.4410772063230114, "learning_rate": 2e-05, "loss": 5.5756, "step": 8486 }, { "epoch": 0.28463150834241635, "grad_norm": 0.40713604141993825, "learning_rate": 2e-05, "loss": 5.6541, "step": 8487 }, { "epoch": 0.2846650456946424, "grad_norm": 0.4355033186532311, "learning_rate": 2e-05, "loss": 5.5818, "step": 8488 }, { "epoch": 0.28469858304686846, "grad_norm": 0.4483455802557622, "learning_rate": 2e-05, "loss": 5.6792, "step": 8489 }, { "epoch": 0.2847321203990945, "grad_norm": 0.4144503830923972, "learning_rate": 2e-05, "loss": 5.5016, "step": 8490 }, { "epoch": 0.2847656577513205, "grad_norm": 0.41008687807392896, "learning_rate": 2e-05, "loss": 5.3751, "step": 8491 }, { "epoch": 0.28479919510354657, "grad_norm": 0.42910251022027124, "learning_rate": 2e-05, "loss": 5.5209, "step": 8492 }, { "epoch": 0.2848327324557726, "grad_norm": 0.4000138353354615, "learning_rate": 2e-05, "loss": 5.7027, "step": 8493 }, { "epoch": 0.2848662698079987, "grad_norm": 0.40393143825763966, "learning_rate": 2e-05, "loss": 5.555, "step": 8494 }, { "epoch": 0.2848998071602247, "grad_norm": 0.435660331962526, "learning_rate": 2e-05, "loss": 5.4337, "step": 8495 }, { "epoch": 0.2849333445124507, "grad_norm": 0.37952850505829533, "learning_rate": 2e-05, "loss": 5.4938, "step": 8496 }, { "epoch": 0.2849668818646768, "grad_norm": 0.4502634056619968, "learning_rate": 2e-05, "loss": 5.7292, "step": 8497 }, { "epoch": 0.28500041921690283, "grad_norm": 0.38383011682731577, "learning_rate": 2e-05, "loss": 5.6573, "step": 8498 }, { "epoch": 0.2850339565691289, "grad_norm": 0.3884350721941835, "learning_rate": 2e-05, "loss": 5.4546, "step": 8499 }, { "epoch": 0.2850674939213549, "grad_norm": 0.4093736438176435, "learning_rate": 2e-05, "loss": 5.4774, "step": 8500 }, { "epoch": 0.28510103127358094, "grad_norm": 0.3936696213578012, "learning_rate": 2e-05, "loss": 5.5804, "step": 8501 }, { "epoch": 0.285134568625807, "grad_norm": 0.4037163777400249, "learning_rate": 2e-05, "loss": 5.7191, "step": 8502 }, { "epoch": 0.28516810597803305, "grad_norm": 0.39886978865620887, "learning_rate": 2e-05, "loss": 5.4243, "step": 8503 }, { "epoch": 0.2852016433302591, "grad_norm": 0.3833676860428376, "learning_rate": 2e-05, "loss": 5.5256, "step": 8504 }, { "epoch": 0.2852351806824851, "grad_norm": 0.4207849574709601, "learning_rate": 2e-05, "loss": 5.4211, "step": 8505 }, { "epoch": 0.28526871803471116, "grad_norm": 0.41535949581782466, "learning_rate": 2e-05, "loss": 5.3496, "step": 8506 }, { "epoch": 0.2853022553869372, "grad_norm": 0.38108883243815417, "learning_rate": 2e-05, "loss": 5.57, "step": 8507 }, { "epoch": 0.28533579273916326, "grad_norm": 0.3903583746396262, "learning_rate": 2e-05, "loss": 5.4527, "step": 8508 }, { "epoch": 0.28536933009138926, "grad_norm": 0.41056567273775174, "learning_rate": 2e-05, "loss": 5.5417, "step": 8509 }, { "epoch": 0.2854028674436153, "grad_norm": 0.38639501885377103, "learning_rate": 2e-05, "loss": 5.6836, "step": 8510 }, { "epoch": 0.28543640479584137, "grad_norm": 0.38601726641221096, "learning_rate": 2e-05, "loss": 5.6977, "step": 8511 }, { "epoch": 0.2854699421480674, "grad_norm": 0.42356114080391555, "learning_rate": 2e-05, "loss": 5.4896, "step": 8512 }, { "epoch": 0.2855034795002935, "grad_norm": 0.3861725960241148, "learning_rate": 2e-05, "loss": 5.5167, "step": 8513 }, { "epoch": 0.2855370168525195, "grad_norm": 0.4063428476250139, "learning_rate": 2e-05, "loss": 5.5284, "step": 8514 }, { "epoch": 0.28557055420474553, "grad_norm": 0.3819961395770903, "learning_rate": 2e-05, "loss": 5.5576, "step": 8515 }, { "epoch": 0.2856040915569716, "grad_norm": 0.40584722903861203, "learning_rate": 2e-05, "loss": 5.6166, "step": 8516 }, { "epoch": 0.28563762890919764, "grad_norm": 0.3757313371864102, "learning_rate": 2e-05, "loss": 5.5596, "step": 8517 }, { "epoch": 0.28567116626142364, "grad_norm": 0.3953331379220096, "learning_rate": 2e-05, "loss": 5.6351, "step": 8518 }, { "epoch": 0.2857047036136497, "grad_norm": 0.37938890768919065, "learning_rate": 2e-05, "loss": 5.4841, "step": 8519 }, { "epoch": 0.28573824096587574, "grad_norm": 0.41405700941014406, "learning_rate": 2e-05, "loss": 5.521, "step": 8520 }, { "epoch": 0.2857717783181018, "grad_norm": 0.3986188702218795, "learning_rate": 2e-05, "loss": 5.6755, "step": 8521 }, { "epoch": 0.28580531567032785, "grad_norm": 0.3994915301534736, "learning_rate": 2e-05, "loss": 5.552, "step": 8522 }, { "epoch": 0.28583885302255385, "grad_norm": 0.4186648472163571, "learning_rate": 2e-05, "loss": 5.4567, "step": 8523 }, { "epoch": 0.2858723903747799, "grad_norm": 0.3954734985169546, "learning_rate": 2e-05, "loss": 5.7606, "step": 8524 }, { "epoch": 0.28590592772700596, "grad_norm": 0.40197293187456656, "learning_rate": 2e-05, "loss": 5.3793, "step": 8525 }, { "epoch": 0.285939465079232, "grad_norm": 0.41505563509888355, "learning_rate": 2e-05, "loss": 5.6315, "step": 8526 }, { "epoch": 0.285973002431458, "grad_norm": 0.4216178804400487, "learning_rate": 2e-05, "loss": 5.592, "step": 8527 }, { "epoch": 0.28600653978368407, "grad_norm": 0.3985161826098693, "learning_rate": 2e-05, "loss": 5.5577, "step": 8528 }, { "epoch": 0.2860400771359101, "grad_norm": 0.4458361313622213, "learning_rate": 2e-05, "loss": 5.3916, "step": 8529 }, { "epoch": 0.2860736144881362, "grad_norm": 0.38980870322824757, "learning_rate": 2e-05, "loss": 5.5221, "step": 8530 }, { "epoch": 0.28610715184036223, "grad_norm": 0.3875874539452723, "learning_rate": 2e-05, "loss": 5.4584, "step": 8531 }, { "epoch": 0.2861406891925882, "grad_norm": 0.4708709496681643, "learning_rate": 2e-05, "loss": 5.4146, "step": 8532 }, { "epoch": 0.2861742265448143, "grad_norm": 0.42269791378163607, "learning_rate": 2e-05, "loss": 5.5517, "step": 8533 }, { "epoch": 0.28620776389704033, "grad_norm": 0.38755166317677553, "learning_rate": 2e-05, "loss": 5.6283, "step": 8534 }, { "epoch": 0.2862413012492664, "grad_norm": 0.41633639925456045, "learning_rate": 2e-05, "loss": 5.6394, "step": 8535 }, { "epoch": 0.2862748386014924, "grad_norm": 0.42047163652790215, "learning_rate": 2e-05, "loss": 5.5201, "step": 8536 }, { "epoch": 0.28630837595371844, "grad_norm": 0.39624102799924327, "learning_rate": 2e-05, "loss": 5.8285, "step": 8537 }, { "epoch": 0.2863419133059445, "grad_norm": 0.40155062992648094, "learning_rate": 2e-05, "loss": 5.5371, "step": 8538 }, { "epoch": 0.28637545065817055, "grad_norm": 0.4373682748704607, "learning_rate": 2e-05, "loss": 5.2793, "step": 8539 }, { "epoch": 0.2864089880103966, "grad_norm": 0.41017016945652457, "learning_rate": 2e-05, "loss": 5.3579, "step": 8540 }, { "epoch": 0.2864425253626226, "grad_norm": 0.4083593831088005, "learning_rate": 2e-05, "loss": 5.4596, "step": 8541 }, { "epoch": 0.28647606271484866, "grad_norm": 0.4103624165911699, "learning_rate": 2e-05, "loss": 5.5012, "step": 8542 }, { "epoch": 0.2865096000670747, "grad_norm": 0.4147516895251568, "learning_rate": 2e-05, "loss": 5.4972, "step": 8543 }, { "epoch": 0.28654313741930076, "grad_norm": 0.3850518070452749, "learning_rate": 2e-05, "loss": 5.6361, "step": 8544 }, { "epoch": 0.28657667477152676, "grad_norm": 0.413206216042355, "learning_rate": 2e-05, "loss": 5.7164, "step": 8545 }, { "epoch": 0.2866102121237528, "grad_norm": 0.41883788001540917, "learning_rate": 2e-05, "loss": 5.5113, "step": 8546 }, { "epoch": 0.28664374947597887, "grad_norm": 0.4094830732604075, "learning_rate": 2e-05, "loss": 5.6548, "step": 8547 }, { "epoch": 0.2866772868282049, "grad_norm": 0.4177745093142573, "learning_rate": 2e-05, "loss": 5.7922, "step": 8548 }, { "epoch": 0.286710824180431, "grad_norm": 0.41929531443496926, "learning_rate": 2e-05, "loss": 5.6003, "step": 8549 }, { "epoch": 0.286744361532657, "grad_norm": 0.40172075116546463, "learning_rate": 2e-05, "loss": 5.7284, "step": 8550 }, { "epoch": 0.28677789888488303, "grad_norm": 0.4303572788676747, "learning_rate": 2e-05, "loss": 5.5937, "step": 8551 }, { "epoch": 0.2868114362371091, "grad_norm": 0.40989990885278027, "learning_rate": 2e-05, "loss": 5.6157, "step": 8552 }, { "epoch": 0.28684497358933514, "grad_norm": 0.4244187253078759, "learning_rate": 2e-05, "loss": 5.3552, "step": 8553 }, { "epoch": 0.28687851094156114, "grad_norm": 0.37587987481181284, "learning_rate": 2e-05, "loss": 5.5629, "step": 8554 }, { "epoch": 0.2869120482937872, "grad_norm": 0.40027569318648004, "learning_rate": 2e-05, "loss": 5.4725, "step": 8555 }, { "epoch": 0.28694558564601325, "grad_norm": 0.411813855809605, "learning_rate": 2e-05, "loss": 5.4854, "step": 8556 }, { "epoch": 0.2869791229982393, "grad_norm": 0.40915421218561626, "learning_rate": 2e-05, "loss": 5.532, "step": 8557 }, { "epoch": 0.28701266035046535, "grad_norm": 0.4364282324369869, "learning_rate": 2e-05, "loss": 5.5901, "step": 8558 }, { "epoch": 0.28704619770269135, "grad_norm": 0.4189333652311453, "learning_rate": 2e-05, "loss": 5.6639, "step": 8559 }, { "epoch": 0.2870797350549174, "grad_norm": 0.3764880058462476, "learning_rate": 2e-05, "loss": 5.674, "step": 8560 }, { "epoch": 0.28711327240714346, "grad_norm": 0.427347350158335, "learning_rate": 2e-05, "loss": 5.676, "step": 8561 }, { "epoch": 0.2871468097593695, "grad_norm": 0.39495138641477096, "learning_rate": 2e-05, "loss": 5.4445, "step": 8562 }, { "epoch": 0.2871803471115955, "grad_norm": 0.3938950061964623, "learning_rate": 2e-05, "loss": 5.5021, "step": 8563 }, { "epoch": 0.28721388446382157, "grad_norm": 0.40322774933374456, "learning_rate": 2e-05, "loss": 5.2214, "step": 8564 }, { "epoch": 0.2872474218160476, "grad_norm": 0.40924771772470614, "learning_rate": 2e-05, "loss": 5.5355, "step": 8565 }, { "epoch": 0.2872809591682737, "grad_norm": 0.3933593672814951, "learning_rate": 2e-05, "loss": 5.5571, "step": 8566 }, { "epoch": 0.28731449652049973, "grad_norm": 0.4176013833577063, "learning_rate": 2e-05, "loss": 5.4602, "step": 8567 }, { "epoch": 0.2873480338727257, "grad_norm": 0.3886540103159703, "learning_rate": 2e-05, "loss": 5.5084, "step": 8568 }, { "epoch": 0.2873815712249518, "grad_norm": 0.3920361190768572, "learning_rate": 2e-05, "loss": 5.5876, "step": 8569 }, { "epoch": 0.28741510857717784, "grad_norm": 0.4765206189795333, "learning_rate": 2e-05, "loss": 5.5067, "step": 8570 }, { "epoch": 0.2874486459294039, "grad_norm": 0.4177051997412381, "learning_rate": 2e-05, "loss": 5.6645, "step": 8571 }, { "epoch": 0.2874821832816299, "grad_norm": 0.3976478303548399, "learning_rate": 2e-05, "loss": 5.3653, "step": 8572 }, { "epoch": 0.28751572063385594, "grad_norm": 0.38306630162710953, "learning_rate": 2e-05, "loss": 5.3493, "step": 8573 }, { "epoch": 0.287549257986082, "grad_norm": 0.4356671832944413, "learning_rate": 2e-05, "loss": 5.5448, "step": 8574 }, { "epoch": 0.28758279533830805, "grad_norm": 0.4170171795782599, "learning_rate": 2e-05, "loss": 5.6284, "step": 8575 }, { "epoch": 0.2876163326905341, "grad_norm": 0.41323423174094165, "learning_rate": 2e-05, "loss": 5.5564, "step": 8576 }, { "epoch": 0.2876498700427601, "grad_norm": 0.41768607626672943, "learning_rate": 2e-05, "loss": 5.4195, "step": 8577 }, { "epoch": 0.28768340739498616, "grad_norm": 0.4182938844670882, "learning_rate": 2e-05, "loss": 5.555, "step": 8578 }, { "epoch": 0.2877169447472122, "grad_norm": 0.4043935041142821, "learning_rate": 2e-05, "loss": 5.535, "step": 8579 }, { "epoch": 0.28775048209943827, "grad_norm": 0.41757121092960603, "learning_rate": 2e-05, "loss": 5.5604, "step": 8580 }, { "epoch": 0.28778401945166426, "grad_norm": 0.4217770177703419, "learning_rate": 2e-05, "loss": 5.592, "step": 8581 }, { "epoch": 0.2878175568038903, "grad_norm": 0.40737761239766335, "learning_rate": 2e-05, "loss": 5.5618, "step": 8582 }, { "epoch": 0.28785109415611637, "grad_norm": 0.4181720185243025, "learning_rate": 2e-05, "loss": 5.4159, "step": 8583 }, { "epoch": 0.2878846315083424, "grad_norm": 0.45809206165639915, "learning_rate": 2e-05, "loss": 5.639, "step": 8584 }, { "epoch": 0.2879181688605685, "grad_norm": 0.4174608036278582, "learning_rate": 2e-05, "loss": 5.6469, "step": 8585 }, { "epoch": 0.2879517062127945, "grad_norm": 0.4106680350873037, "learning_rate": 2e-05, "loss": 5.4489, "step": 8586 }, { "epoch": 0.28798524356502053, "grad_norm": 0.42698561969611104, "learning_rate": 2e-05, "loss": 5.888, "step": 8587 }, { "epoch": 0.2880187809172466, "grad_norm": 0.48247901598162274, "learning_rate": 2e-05, "loss": 5.4183, "step": 8588 }, { "epoch": 0.28805231826947264, "grad_norm": 0.448886455709332, "learning_rate": 2e-05, "loss": 5.5896, "step": 8589 }, { "epoch": 0.2880858556216987, "grad_norm": 0.40382701643027846, "learning_rate": 2e-05, "loss": 5.6013, "step": 8590 }, { "epoch": 0.2881193929739247, "grad_norm": 0.3960452948363658, "learning_rate": 2e-05, "loss": 5.703, "step": 8591 }, { "epoch": 0.28815293032615075, "grad_norm": 0.4299938605908345, "learning_rate": 2e-05, "loss": 5.7009, "step": 8592 }, { "epoch": 0.2881864676783768, "grad_norm": 0.4098429273633442, "learning_rate": 2e-05, "loss": 5.4623, "step": 8593 }, { "epoch": 0.28822000503060285, "grad_norm": 0.4577466809237201, "learning_rate": 2e-05, "loss": 5.4404, "step": 8594 }, { "epoch": 0.28825354238282885, "grad_norm": 0.40922864317226687, "learning_rate": 2e-05, "loss": 5.6572, "step": 8595 }, { "epoch": 0.2882870797350549, "grad_norm": 0.41283072878642435, "learning_rate": 2e-05, "loss": 5.5006, "step": 8596 }, { "epoch": 0.28832061708728096, "grad_norm": 0.40815344723618685, "learning_rate": 2e-05, "loss": 5.6544, "step": 8597 }, { "epoch": 0.288354154439507, "grad_norm": 0.4331832399083643, "learning_rate": 2e-05, "loss": 5.6204, "step": 8598 }, { "epoch": 0.28838769179173307, "grad_norm": 0.42440236047044166, "learning_rate": 2e-05, "loss": 5.6595, "step": 8599 }, { "epoch": 0.28842122914395907, "grad_norm": 0.417475281460208, "learning_rate": 2e-05, "loss": 5.4685, "step": 8600 }, { "epoch": 0.2884547664961851, "grad_norm": 0.423359236252886, "learning_rate": 2e-05, "loss": 5.5284, "step": 8601 }, { "epoch": 0.2884883038484112, "grad_norm": 0.4299233685884682, "learning_rate": 2e-05, "loss": 5.6648, "step": 8602 }, { "epoch": 0.28852184120063723, "grad_norm": 0.4238345594538399, "learning_rate": 2e-05, "loss": 5.2537, "step": 8603 }, { "epoch": 0.28855537855286323, "grad_norm": 0.4661271725611311, "learning_rate": 2e-05, "loss": 5.6437, "step": 8604 }, { "epoch": 0.2885889159050893, "grad_norm": 0.43683321696410826, "learning_rate": 2e-05, "loss": 5.3788, "step": 8605 }, { "epoch": 0.28862245325731534, "grad_norm": 0.42558093627055554, "learning_rate": 2e-05, "loss": 5.6278, "step": 8606 }, { "epoch": 0.2886559906095414, "grad_norm": 0.4219176756649824, "learning_rate": 2e-05, "loss": 5.7116, "step": 8607 }, { "epoch": 0.28868952796176744, "grad_norm": 0.4145119749500734, "learning_rate": 2e-05, "loss": 5.3165, "step": 8608 }, { "epoch": 0.28872306531399344, "grad_norm": 0.4030401725346359, "learning_rate": 2e-05, "loss": 5.5175, "step": 8609 }, { "epoch": 0.2887566026662195, "grad_norm": 0.44967043323308076, "learning_rate": 2e-05, "loss": 5.5527, "step": 8610 }, { "epoch": 0.28879014001844555, "grad_norm": 0.3986782117482476, "learning_rate": 2e-05, "loss": 5.5199, "step": 8611 }, { "epoch": 0.2888236773706716, "grad_norm": 0.41616194092523084, "learning_rate": 2e-05, "loss": 5.4107, "step": 8612 }, { "epoch": 0.2888572147228976, "grad_norm": 0.42783938336714206, "learning_rate": 2e-05, "loss": 5.4772, "step": 8613 }, { "epoch": 0.28889075207512366, "grad_norm": 0.3893984229946606, "learning_rate": 2e-05, "loss": 5.6607, "step": 8614 }, { "epoch": 0.2889242894273497, "grad_norm": 0.40353256011421457, "learning_rate": 2e-05, "loss": 5.4805, "step": 8615 }, { "epoch": 0.28895782677957577, "grad_norm": 0.39522185747490735, "learning_rate": 2e-05, "loss": 5.3213, "step": 8616 }, { "epoch": 0.2889913641318018, "grad_norm": 0.412832369722458, "learning_rate": 2e-05, "loss": 5.5389, "step": 8617 }, { "epoch": 0.2890249014840278, "grad_norm": 0.40285629146427715, "learning_rate": 2e-05, "loss": 5.5928, "step": 8618 }, { "epoch": 0.2890584388362539, "grad_norm": 0.42614399553299503, "learning_rate": 2e-05, "loss": 5.4828, "step": 8619 }, { "epoch": 0.2890919761884799, "grad_norm": 0.4059824819741172, "learning_rate": 2e-05, "loss": 5.6643, "step": 8620 }, { "epoch": 0.289125513540706, "grad_norm": 0.434881169006611, "learning_rate": 2e-05, "loss": 5.5896, "step": 8621 }, { "epoch": 0.289159050892932, "grad_norm": 0.39475254403101795, "learning_rate": 2e-05, "loss": 5.6725, "step": 8622 }, { "epoch": 0.28919258824515803, "grad_norm": 0.4148736035373595, "learning_rate": 2e-05, "loss": 5.6076, "step": 8623 }, { "epoch": 0.2892261255973841, "grad_norm": 0.4152249060966152, "learning_rate": 2e-05, "loss": 5.5133, "step": 8624 }, { "epoch": 0.28925966294961014, "grad_norm": 0.3880298764688737, "learning_rate": 2e-05, "loss": 5.6472, "step": 8625 }, { "epoch": 0.2892932003018362, "grad_norm": 0.41919067939026555, "learning_rate": 2e-05, "loss": 5.4809, "step": 8626 }, { "epoch": 0.2893267376540622, "grad_norm": 0.4235763647647719, "learning_rate": 2e-05, "loss": 5.626, "step": 8627 }, { "epoch": 0.28936027500628825, "grad_norm": 0.39597512070222424, "learning_rate": 2e-05, "loss": 5.5657, "step": 8628 }, { "epoch": 0.2893938123585143, "grad_norm": 0.40722216050297044, "learning_rate": 2e-05, "loss": 5.4308, "step": 8629 }, { "epoch": 0.28942734971074036, "grad_norm": 0.41500058423917074, "learning_rate": 2e-05, "loss": 5.6645, "step": 8630 }, { "epoch": 0.28946088706296635, "grad_norm": 0.45661757171473843, "learning_rate": 2e-05, "loss": 5.4005, "step": 8631 }, { "epoch": 0.2894944244151924, "grad_norm": 0.40660159842082794, "learning_rate": 2e-05, "loss": 5.4141, "step": 8632 }, { "epoch": 0.28952796176741846, "grad_norm": 0.4183768276022267, "learning_rate": 2e-05, "loss": 5.5338, "step": 8633 }, { "epoch": 0.2895614991196445, "grad_norm": 0.45754409957406256, "learning_rate": 2e-05, "loss": 5.5302, "step": 8634 }, { "epoch": 0.28959503647187057, "grad_norm": 0.40847724838806176, "learning_rate": 2e-05, "loss": 5.5909, "step": 8635 }, { "epoch": 0.28962857382409657, "grad_norm": 0.4043987073569773, "learning_rate": 2e-05, "loss": 5.5369, "step": 8636 }, { "epoch": 0.2896621111763226, "grad_norm": 0.4792987070723066, "learning_rate": 2e-05, "loss": 5.3437, "step": 8637 }, { "epoch": 0.2896956485285487, "grad_norm": 0.4161377774602973, "learning_rate": 2e-05, "loss": 5.3252, "step": 8638 }, { "epoch": 0.28972918588077473, "grad_norm": 0.41394615745973773, "learning_rate": 2e-05, "loss": 5.6796, "step": 8639 }, { "epoch": 0.28976272323300073, "grad_norm": 0.41193772598288453, "learning_rate": 2e-05, "loss": 5.8155, "step": 8640 }, { "epoch": 0.2897962605852268, "grad_norm": 0.4657816946501277, "learning_rate": 2e-05, "loss": 5.5436, "step": 8641 }, { "epoch": 0.28982979793745284, "grad_norm": 0.42930041309676364, "learning_rate": 2e-05, "loss": 5.5154, "step": 8642 }, { "epoch": 0.2898633352896789, "grad_norm": 0.4153660179395073, "learning_rate": 2e-05, "loss": 5.5923, "step": 8643 }, { "epoch": 0.28989687264190495, "grad_norm": 0.4157514769554059, "learning_rate": 2e-05, "loss": 5.4955, "step": 8644 }, { "epoch": 0.28993040999413094, "grad_norm": 0.4058249459143245, "learning_rate": 2e-05, "loss": 5.4163, "step": 8645 }, { "epoch": 0.289963947346357, "grad_norm": 0.44203137126761183, "learning_rate": 2e-05, "loss": 5.5017, "step": 8646 }, { "epoch": 0.28999748469858305, "grad_norm": 0.42867334484079866, "learning_rate": 2e-05, "loss": 5.6252, "step": 8647 }, { "epoch": 0.2900310220508091, "grad_norm": 0.4132222009206334, "learning_rate": 2e-05, "loss": 5.5147, "step": 8648 }, { "epoch": 0.2900645594030351, "grad_norm": 0.41558507439308356, "learning_rate": 2e-05, "loss": 5.658, "step": 8649 }, { "epoch": 0.29009809675526116, "grad_norm": 0.3996012009649312, "learning_rate": 2e-05, "loss": 5.5507, "step": 8650 }, { "epoch": 0.2901316341074872, "grad_norm": 0.38762434303345056, "learning_rate": 2e-05, "loss": 5.6932, "step": 8651 }, { "epoch": 0.29016517145971327, "grad_norm": 0.4152999434935515, "learning_rate": 2e-05, "loss": 5.5662, "step": 8652 }, { "epoch": 0.2901987088119393, "grad_norm": 0.4012101516836905, "learning_rate": 2e-05, "loss": 5.5265, "step": 8653 }, { "epoch": 0.2902322461641653, "grad_norm": 0.40401743142847485, "learning_rate": 2e-05, "loss": 5.5746, "step": 8654 }, { "epoch": 0.2902657835163914, "grad_norm": 0.43770794018748255, "learning_rate": 2e-05, "loss": 5.5398, "step": 8655 }, { "epoch": 0.2902993208686174, "grad_norm": 0.39156498308870735, "learning_rate": 2e-05, "loss": 5.7617, "step": 8656 }, { "epoch": 0.2903328582208435, "grad_norm": 0.4223334310637247, "learning_rate": 2e-05, "loss": 5.5475, "step": 8657 }, { "epoch": 0.2903663955730695, "grad_norm": 0.43662557315551004, "learning_rate": 2e-05, "loss": 5.6236, "step": 8658 }, { "epoch": 0.29039993292529553, "grad_norm": 0.3967532118336658, "learning_rate": 2e-05, "loss": 5.6633, "step": 8659 }, { "epoch": 0.2904334702775216, "grad_norm": 0.40972894586807607, "learning_rate": 2e-05, "loss": 5.5836, "step": 8660 }, { "epoch": 0.29046700762974764, "grad_norm": 0.44540983094304215, "learning_rate": 2e-05, "loss": 5.7262, "step": 8661 }, { "epoch": 0.2905005449819737, "grad_norm": 0.38882610507533555, "learning_rate": 2e-05, "loss": 5.6424, "step": 8662 }, { "epoch": 0.2905340823341997, "grad_norm": 0.40273380091072586, "learning_rate": 2e-05, "loss": 5.6659, "step": 8663 }, { "epoch": 0.29056761968642575, "grad_norm": 0.44119175479743955, "learning_rate": 2e-05, "loss": 5.4914, "step": 8664 }, { "epoch": 0.2906011570386518, "grad_norm": 0.40582503276773396, "learning_rate": 2e-05, "loss": 5.7432, "step": 8665 }, { "epoch": 0.29063469439087786, "grad_norm": 0.434525644802975, "learning_rate": 2e-05, "loss": 5.3257, "step": 8666 }, { "epoch": 0.29066823174310386, "grad_norm": 0.4023226275214207, "learning_rate": 2e-05, "loss": 5.5431, "step": 8667 }, { "epoch": 0.2907017690953299, "grad_norm": 0.4105310888252143, "learning_rate": 2e-05, "loss": 5.4262, "step": 8668 }, { "epoch": 0.29073530644755596, "grad_norm": 0.41816326218629146, "learning_rate": 2e-05, "loss": 5.4163, "step": 8669 }, { "epoch": 0.290768843799782, "grad_norm": 0.4069053784151737, "learning_rate": 2e-05, "loss": 5.3942, "step": 8670 }, { "epoch": 0.29080238115200807, "grad_norm": 0.37376213648622936, "learning_rate": 2e-05, "loss": 5.5153, "step": 8671 }, { "epoch": 0.29083591850423407, "grad_norm": 0.40733641284975375, "learning_rate": 2e-05, "loss": 5.4069, "step": 8672 }, { "epoch": 0.2908694558564601, "grad_norm": 0.42545588044013544, "learning_rate": 2e-05, "loss": 5.5641, "step": 8673 }, { "epoch": 0.2909029932086862, "grad_norm": 0.4322649681563883, "learning_rate": 2e-05, "loss": 5.7555, "step": 8674 }, { "epoch": 0.29093653056091223, "grad_norm": 0.38627662704587484, "learning_rate": 2e-05, "loss": 5.5891, "step": 8675 }, { "epoch": 0.29097006791313823, "grad_norm": 0.45477249272864617, "learning_rate": 2e-05, "loss": 5.4958, "step": 8676 }, { "epoch": 0.2910036052653643, "grad_norm": 0.4363389188724634, "learning_rate": 2e-05, "loss": 5.692, "step": 8677 }, { "epoch": 0.29103714261759034, "grad_norm": 0.3784657391751084, "learning_rate": 2e-05, "loss": 5.6046, "step": 8678 }, { "epoch": 0.2910706799698164, "grad_norm": 0.4249373735080681, "learning_rate": 2e-05, "loss": 5.621, "step": 8679 }, { "epoch": 0.29110421732204245, "grad_norm": 0.435258926871558, "learning_rate": 2e-05, "loss": 5.5811, "step": 8680 }, { "epoch": 0.29113775467426845, "grad_norm": 0.4366031080545618, "learning_rate": 2e-05, "loss": 5.4526, "step": 8681 }, { "epoch": 0.2911712920264945, "grad_norm": 0.4008801090738924, "learning_rate": 2e-05, "loss": 5.6366, "step": 8682 }, { "epoch": 0.29120482937872055, "grad_norm": 0.42377819288335405, "learning_rate": 2e-05, "loss": 5.6543, "step": 8683 }, { "epoch": 0.2912383667309466, "grad_norm": 0.3927290409765962, "learning_rate": 2e-05, "loss": 5.5767, "step": 8684 }, { "epoch": 0.2912719040831726, "grad_norm": 0.41453416154924455, "learning_rate": 2e-05, "loss": 5.5291, "step": 8685 }, { "epoch": 0.29130544143539866, "grad_norm": 0.4354661519606929, "learning_rate": 2e-05, "loss": 5.4532, "step": 8686 }, { "epoch": 0.2913389787876247, "grad_norm": 0.44085958519964863, "learning_rate": 2e-05, "loss": 5.6729, "step": 8687 }, { "epoch": 0.29137251613985077, "grad_norm": 0.4298761907087721, "learning_rate": 2e-05, "loss": 5.6073, "step": 8688 }, { "epoch": 0.2914060534920768, "grad_norm": 0.4239008489020184, "learning_rate": 2e-05, "loss": 5.6107, "step": 8689 }, { "epoch": 0.2914395908443028, "grad_norm": 0.39550758978379513, "learning_rate": 2e-05, "loss": 5.4751, "step": 8690 }, { "epoch": 0.2914731281965289, "grad_norm": 0.40767677091521054, "learning_rate": 2e-05, "loss": 5.4252, "step": 8691 }, { "epoch": 0.29150666554875493, "grad_norm": 0.39046570927484375, "learning_rate": 2e-05, "loss": 5.5972, "step": 8692 }, { "epoch": 0.291540202900981, "grad_norm": 0.4178846396476744, "learning_rate": 2e-05, "loss": 5.4775, "step": 8693 }, { "epoch": 0.29157374025320704, "grad_norm": 0.3968023258176083, "learning_rate": 2e-05, "loss": 5.4233, "step": 8694 }, { "epoch": 0.29160727760543304, "grad_norm": 0.444320315689303, "learning_rate": 2e-05, "loss": 5.58, "step": 8695 }, { "epoch": 0.2916408149576591, "grad_norm": 0.421185189778483, "learning_rate": 2e-05, "loss": 5.4592, "step": 8696 }, { "epoch": 0.29167435230988514, "grad_norm": 0.40393944694557443, "learning_rate": 2e-05, "loss": 5.4159, "step": 8697 }, { "epoch": 0.2917078896621112, "grad_norm": 0.37541222024143833, "learning_rate": 2e-05, "loss": 5.4919, "step": 8698 }, { "epoch": 0.2917414270143372, "grad_norm": 0.4127082883476606, "learning_rate": 2e-05, "loss": 5.5766, "step": 8699 }, { "epoch": 0.29177496436656325, "grad_norm": 0.39583866675134183, "learning_rate": 2e-05, "loss": 5.6385, "step": 8700 }, { "epoch": 0.2918085017187893, "grad_norm": 0.43397152564915564, "learning_rate": 2e-05, "loss": 5.5918, "step": 8701 }, { "epoch": 0.29184203907101536, "grad_norm": 0.3909690007183733, "learning_rate": 2e-05, "loss": 5.5537, "step": 8702 }, { "epoch": 0.2918755764232414, "grad_norm": 0.4022106807621302, "learning_rate": 2e-05, "loss": 5.5425, "step": 8703 }, { "epoch": 0.2919091137754674, "grad_norm": 0.41686097999337157, "learning_rate": 2e-05, "loss": 5.4988, "step": 8704 }, { "epoch": 0.29194265112769346, "grad_norm": 0.39362703852616765, "learning_rate": 2e-05, "loss": 5.5293, "step": 8705 }, { "epoch": 0.2919761884799195, "grad_norm": 0.411807810261369, "learning_rate": 2e-05, "loss": 5.6904, "step": 8706 }, { "epoch": 0.2920097258321456, "grad_norm": 0.429017835179802, "learning_rate": 2e-05, "loss": 5.5107, "step": 8707 }, { "epoch": 0.29204326318437157, "grad_norm": 0.43162718950489315, "learning_rate": 2e-05, "loss": 5.5861, "step": 8708 }, { "epoch": 0.2920768005365976, "grad_norm": 0.45534301276165107, "learning_rate": 2e-05, "loss": 5.5368, "step": 8709 }, { "epoch": 0.2921103378888237, "grad_norm": 0.41452038689338155, "learning_rate": 2e-05, "loss": 5.5897, "step": 8710 }, { "epoch": 0.29214387524104973, "grad_norm": 0.41868743403033926, "learning_rate": 2e-05, "loss": 5.3935, "step": 8711 }, { "epoch": 0.2921774125932758, "grad_norm": 0.43125741402675283, "learning_rate": 2e-05, "loss": 5.6107, "step": 8712 }, { "epoch": 0.2922109499455018, "grad_norm": 0.4592407054255629, "learning_rate": 2e-05, "loss": 5.5085, "step": 8713 }, { "epoch": 0.29224448729772784, "grad_norm": 0.3905612590051517, "learning_rate": 2e-05, "loss": 5.6462, "step": 8714 }, { "epoch": 0.2922780246499539, "grad_norm": 0.3969964695389376, "learning_rate": 2e-05, "loss": 5.8637, "step": 8715 }, { "epoch": 0.29231156200217995, "grad_norm": 0.4346174113589727, "learning_rate": 2e-05, "loss": 5.5346, "step": 8716 }, { "epoch": 0.29234509935440595, "grad_norm": 0.43350295237513864, "learning_rate": 2e-05, "loss": 5.3996, "step": 8717 }, { "epoch": 0.292378636706632, "grad_norm": 0.38603686143409954, "learning_rate": 2e-05, "loss": 5.7657, "step": 8718 }, { "epoch": 0.29241217405885805, "grad_norm": 0.45660334458248053, "learning_rate": 2e-05, "loss": 5.5838, "step": 8719 }, { "epoch": 0.2924457114110841, "grad_norm": 0.3972247037478579, "learning_rate": 2e-05, "loss": 5.4611, "step": 8720 }, { "epoch": 0.29247924876331016, "grad_norm": 0.4387836762830778, "learning_rate": 2e-05, "loss": 5.469, "step": 8721 }, { "epoch": 0.29251278611553616, "grad_norm": 0.39158987729691347, "learning_rate": 2e-05, "loss": 5.6489, "step": 8722 }, { "epoch": 0.2925463234677622, "grad_norm": 0.3821972315238393, "learning_rate": 2e-05, "loss": 5.5826, "step": 8723 }, { "epoch": 0.29257986081998827, "grad_norm": 0.404338287357167, "learning_rate": 2e-05, "loss": 5.5211, "step": 8724 }, { "epoch": 0.2926133981722143, "grad_norm": 0.4193924554075645, "learning_rate": 2e-05, "loss": 5.476, "step": 8725 }, { "epoch": 0.2926469355244403, "grad_norm": 0.3992298139250697, "learning_rate": 2e-05, "loss": 5.5616, "step": 8726 }, { "epoch": 0.2926804728766664, "grad_norm": 0.45447904115272153, "learning_rate": 2e-05, "loss": 5.5511, "step": 8727 }, { "epoch": 0.29271401022889243, "grad_norm": 0.420210172014443, "learning_rate": 2e-05, "loss": 5.4609, "step": 8728 }, { "epoch": 0.2927475475811185, "grad_norm": 0.39921235349125345, "learning_rate": 2e-05, "loss": 5.3477, "step": 8729 }, { "epoch": 0.29278108493334454, "grad_norm": 0.40853656426010326, "learning_rate": 2e-05, "loss": 5.6026, "step": 8730 }, { "epoch": 0.29281462228557054, "grad_norm": 0.44431858707552757, "learning_rate": 2e-05, "loss": 5.4536, "step": 8731 }, { "epoch": 0.2928481596377966, "grad_norm": 0.4249243652606768, "learning_rate": 2e-05, "loss": 5.5847, "step": 8732 }, { "epoch": 0.29288169699002264, "grad_norm": 0.3987707401297685, "learning_rate": 2e-05, "loss": 5.5161, "step": 8733 }, { "epoch": 0.2929152343422487, "grad_norm": 0.40848965530352843, "learning_rate": 2e-05, "loss": 5.5744, "step": 8734 }, { "epoch": 0.2929487716944747, "grad_norm": 0.43381023372571953, "learning_rate": 2e-05, "loss": 5.509, "step": 8735 }, { "epoch": 0.29298230904670075, "grad_norm": 0.41000858683447544, "learning_rate": 2e-05, "loss": 5.4491, "step": 8736 }, { "epoch": 0.2930158463989268, "grad_norm": 0.4215311434108397, "learning_rate": 2e-05, "loss": 5.3824, "step": 8737 }, { "epoch": 0.29304938375115286, "grad_norm": 0.4179435993562411, "learning_rate": 2e-05, "loss": 5.3378, "step": 8738 }, { "epoch": 0.2930829211033789, "grad_norm": 0.4153482979068075, "learning_rate": 2e-05, "loss": 5.5448, "step": 8739 }, { "epoch": 0.2931164584556049, "grad_norm": 0.4040382358550522, "learning_rate": 2e-05, "loss": 5.627, "step": 8740 }, { "epoch": 0.29314999580783097, "grad_norm": 0.4058420730810456, "learning_rate": 2e-05, "loss": 5.5827, "step": 8741 }, { "epoch": 0.293183533160057, "grad_norm": 0.41334781718689223, "learning_rate": 2e-05, "loss": 5.4817, "step": 8742 }, { "epoch": 0.2932170705122831, "grad_norm": 0.4180064498655711, "learning_rate": 2e-05, "loss": 5.6061, "step": 8743 }, { "epoch": 0.29325060786450907, "grad_norm": 0.4240605022633985, "learning_rate": 2e-05, "loss": 5.5796, "step": 8744 }, { "epoch": 0.2932841452167351, "grad_norm": 0.39501952930006673, "learning_rate": 2e-05, "loss": 5.487, "step": 8745 }, { "epoch": 0.2933176825689612, "grad_norm": 0.39905926458966806, "learning_rate": 2e-05, "loss": 5.5587, "step": 8746 }, { "epoch": 0.29335121992118723, "grad_norm": 0.40975466775145397, "learning_rate": 2e-05, "loss": 5.6282, "step": 8747 }, { "epoch": 0.2933847572734133, "grad_norm": 0.4248608845106638, "learning_rate": 2e-05, "loss": 5.3495, "step": 8748 }, { "epoch": 0.2934182946256393, "grad_norm": 0.39175245536354303, "learning_rate": 2e-05, "loss": 5.5745, "step": 8749 }, { "epoch": 0.29345183197786534, "grad_norm": 0.39581108292835365, "learning_rate": 2e-05, "loss": 5.5666, "step": 8750 }, { "epoch": 0.2934853693300914, "grad_norm": 0.393147548149008, "learning_rate": 2e-05, "loss": 5.534, "step": 8751 }, { "epoch": 0.29351890668231745, "grad_norm": 0.4305734597590743, "learning_rate": 2e-05, "loss": 5.4233, "step": 8752 }, { "epoch": 0.29355244403454345, "grad_norm": 0.4082597163414406, "learning_rate": 2e-05, "loss": 5.5021, "step": 8753 }, { "epoch": 0.2935859813867695, "grad_norm": 0.40007114942348887, "learning_rate": 2e-05, "loss": 5.5522, "step": 8754 }, { "epoch": 0.29361951873899556, "grad_norm": 0.39287699620434297, "learning_rate": 2e-05, "loss": 5.5631, "step": 8755 }, { "epoch": 0.2936530560912216, "grad_norm": 0.4061303224916916, "learning_rate": 2e-05, "loss": 5.4391, "step": 8756 }, { "epoch": 0.29368659344344766, "grad_norm": 0.42849146301234675, "learning_rate": 2e-05, "loss": 5.4833, "step": 8757 }, { "epoch": 0.29372013079567366, "grad_norm": 0.43231089705458314, "learning_rate": 2e-05, "loss": 5.6515, "step": 8758 }, { "epoch": 0.2937536681478997, "grad_norm": 0.39394924981233154, "learning_rate": 2e-05, "loss": 5.292, "step": 8759 }, { "epoch": 0.29378720550012577, "grad_norm": 0.4401356747037602, "learning_rate": 2e-05, "loss": 5.4422, "step": 8760 }, { "epoch": 0.2938207428523518, "grad_norm": 0.4265889604662508, "learning_rate": 2e-05, "loss": 5.3288, "step": 8761 }, { "epoch": 0.2938542802045778, "grad_norm": 0.4567463727558903, "learning_rate": 2e-05, "loss": 5.6179, "step": 8762 }, { "epoch": 0.2938878175568039, "grad_norm": 0.4443584938949942, "learning_rate": 2e-05, "loss": 5.5437, "step": 8763 }, { "epoch": 0.29392135490902993, "grad_norm": 0.44518378595329133, "learning_rate": 2e-05, "loss": 5.4859, "step": 8764 }, { "epoch": 0.293954892261256, "grad_norm": 0.4119907813683039, "learning_rate": 2e-05, "loss": 5.4717, "step": 8765 }, { "epoch": 0.29398842961348204, "grad_norm": 0.4551242347239755, "learning_rate": 2e-05, "loss": 5.5997, "step": 8766 }, { "epoch": 0.29402196696570804, "grad_norm": 0.4134544680858004, "learning_rate": 2e-05, "loss": 5.5993, "step": 8767 }, { "epoch": 0.2940555043179341, "grad_norm": 0.43632864818020556, "learning_rate": 2e-05, "loss": 5.6188, "step": 8768 }, { "epoch": 0.29408904167016015, "grad_norm": 0.4066223295864041, "learning_rate": 2e-05, "loss": 5.5504, "step": 8769 }, { "epoch": 0.2941225790223862, "grad_norm": 0.4426007562780232, "learning_rate": 2e-05, "loss": 5.444, "step": 8770 }, { "epoch": 0.2941561163746122, "grad_norm": 0.378100419309188, "learning_rate": 2e-05, "loss": 5.5504, "step": 8771 }, { "epoch": 0.29418965372683825, "grad_norm": 0.38842821123727345, "learning_rate": 2e-05, "loss": 5.5022, "step": 8772 }, { "epoch": 0.2942231910790643, "grad_norm": 0.4402325082424859, "learning_rate": 2e-05, "loss": 5.8719, "step": 8773 }, { "epoch": 0.29425672843129036, "grad_norm": 0.41341901909322876, "learning_rate": 2e-05, "loss": 5.5714, "step": 8774 }, { "epoch": 0.2942902657835164, "grad_norm": 0.3989000116239137, "learning_rate": 2e-05, "loss": 5.547, "step": 8775 }, { "epoch": 0.2943238031357424, "grad_norm": 0.37979113135126663, "learning_rate": 2e-05, "loss": 5.3172, "step": 8776 }, { "epoch": 0.29435734048796847, "grad_norm": 0.41142289428719886, "learning_rate": 2e-05, "loss": 5.4426, "step": 8777 }, { "epoch": 0.2943908778401945, "grad_norm": 0.40260126813743546, "learning_rate": 2e-05, "loss": 5.5586, "step": 8778 }, { "epoch": 0.2944244151924206, "grad_norm": 0.39287619933272355, "learning_rate": 2e-05, "loss": 5.5182, "step": 8779 }, { "epoch": 0.2944579525446466, "grad_norm": 0.3931032616779051, "learning_rate": 2e-05, "loss": 5.5251, "step": 8780 }, { "epoch": 0.2944914898968726, "grad_norm": 0.39149301047900953, "learning_rate": 2e-05, "loss": 5.4129, "step": 8781 }, { "epoch": 0.2945250272490987, "grad_norm": 0.3987499073143215, "learning_rate": 2e-05, "loss": 5.5311, "step": 8782 }, { "epoch": 0.29455856460132473, "grad_norm": 0.41039460136891054, "learning_rate": 2e-05, "loss": 5.6631, "step": 8783 }, { "epoch": 0.2945921019535508, "grad_norm": 0.4158029440499682, "learning_rate": 2e-05, "loss": 5.3709, "step": 8784 }, { "epoch": 0.2946256393057768, "grad_norm": 0.38628352543484096, "learning_rate": 2e-05, "loss": 5.6375, "step": 8785 }, { "epoch": 0.29465917665800284, "grad_norm": 0.4378279022952301, "learning_rate": 2e-05, "loss": 5.5688, "step": 8786 }, { "epoch": 0.2946927140102289, "grad_norm": 0.42264683575916484, "learning_rate": 2e-05, "loss": 5.7334, "step": 8787 }, { "epoch": 0.29472625136245495, "grad_norm": 0.4064425338051392, "learning_rate": 2e-05, "loss": 5.6249, "step": 8788 }, { "epoch": 0.294759788714681, "grad_norm": 0.43104676740160236, "learning_rate": 2e-05, "loss": 5.5504, "step": 8789 }, { "epoch": 0.294793326066907, "grad_norm": 0.4154309816449611, "learning_rate": 2e-05, "loss": 5.4947, "step": 8790 }, { "epoch": 0.29482686341913306, "grad_norm": 0.39017943885773226, "learning_rate": 2e-05, "loss": 5.7528, "step": 8791 }, { "epoch": 0.2948604007713591, "grad_norm": 0.4095079247174387, "learning_rate": 2e-05, "loss": 5.4784, "step": 8792 }, { "epoch": 0.29489393812358516, "grad_norm": 0.4381710066697961, "learning_rate": 2e-05, "loss": 5.2911, "step": 8793 }, { "epoch": 0.29492747547581116, "grad_norm": 0.3876412026300458, "learning_rate": 2e-05, "loss": 5.649, "step": 8794 }, { "epoch": 0.2949610128280372, "grad_norm": 0.4010887747558399, "learning_rate": 2e-05, "loss": 5.3187, "step": 8795 }, { "epoch": 0.29499455018026327, "grad_norm": 0.40408961839610513, "learning_rate": 2e-05, "loss": 5.637, "step": 8796 }, { "epoch": 0.2950280875324893, "grad_norm": 0.3948948258858771, "learning_rate": 2e-05, "loss": 5.5546, "step": 8797 }, { "epoch": 0.2950616248847154, "grad_norm": 0.39546080937323974, "learning_rate": 2e-05, "loss": 5.8076, "step": 8798 }, { "epoch": 0.2950951622369414, "grad_norm": 0.41251424010453913, "learning_rate": 2e-05, "loss": 5.5018, "step": 8799 }, { "epoch": 0.29512869958916743, "grad_norm": 0.40124893040491644, "learning_rate": 2e-05, "loss": 5.6073, "step": 8800 }, { "epoch": 0.2951622369413935, "grad_norm": 0.38815696176625547, "learning_rate": 2e-05, "loss": 5.6141, "step": 8801 }, { "epoch": 0.29519577429361954, "grad_norm": 0.3997825731074156, "learning_rate": 2e-05, "loss": 5.5728, "step": 8802 }, { "epoch": 0.29522931164584554, "grad_norm": 0.4117592804048583, "learning_rate": 2e-05, "loss": 5.4813, "step": 8803 }, { "epoch": 0.2952628489980716, "grad_norm": 0.3876538774966946, "learning_rate": 2e-05, "loss": 5.3424, "step": 8804 }, { "epoch": 0.29529638635029765, "grad_norm": 0.4230022544682944, "learning_rate": 2e-05, "loss": 5.4911, "step": 8805 }, { "epoch": 0.2953299237025237, "grad_norm": 0.40274113510629456, "learning_rate": 2e-05, "loss": 5.6249, "step": 8806 }, { "epoch": 0.29536346105474975, "grad_norm": 0.4254554385519244, "learning_rate": 2e-05, "loss": 5.4491, "step": 8807 }, { "epoch": 0.29539699840697575, "grad_norm": 0.41069860046058687, "learning_rate": 2e-05, "loss": 5.5621, "step": 8808 }, { "epoch": 0.2954305357592018, "grad_norm": 0.45975962007817955, "learning_rate": 2e-05, "loss": 5.6458, "step": 8809 }, { "epoch": 0.29546407311142786, "grad_norm": 0.4311333555705549, "learning_rate": 2e-05, "loss": 5.5857, "step": 8810 }, { "epoch": 0.2954976104636539, "grad_norm": 0.4380776760620289, "learning_rate": 2e-05, "loss": 5.5752, "step": 8811 }, { "epoch": 0.2955311478158799, "grad_norm": 0.41144121815361545, "learning_rate": 2e-05, "loss": 5.4344, "step": 8812 }, { "epoch": 0.29556468516810597, "grad_norm": 0.4148816016318975, "learning_rate": 2e-05, "loss": 5.4881, "step": 8813 }, { "epoch": 0.295598222520332, "grad_norm": 0.44265166499221476, "learning_rate": 2e-05, "loss": 5.6958, "step": 8814 }, { "epoch": 0.2956317598725581, "grad_norm": 0.3922189163567841, "learning_rate": 2e-05, "loss": 5.5684, "step": 8815 }, { "epoch": 0.29566529722478413, "grad_norm": 0.44416326667269607, "learning_rate": 2e-05, "loss": 5.4543, "step": 8816 }, { "epoch": 0.29569883457701013, "grad_norm": 0.4318599398542228, "learning_rate": 2e-05, "loss": 5.493, "step": 8817 }, { "epoch": 0.2957323719292362, "grad_norm": 0.3797463922875973, "learning_rate": 2e-05, "loss": 5.4541, "step": 8818 }, { "epoch": 0.29576590928146224, "grad_norm": 0.42353889450515675, "learning_rate": 2e-05, "loss": 5.5087, "step": 8819 }, { "epoch": 0.2957994466336883, "grad_norm": 0.4069451764622385, "learning_rate": 2e-05, "loss": 5.6697, "step": 8820 }, { "epoch": 0.2958329839859143, "grad_norm": 0.41436510931860426, "learning_rate": 2e-05, "loss": 5.5314, "step": 8821 }, { "epoch": 0.29586652133814034, "grad_norm": 0.4215214424159029, "learning_rate": 2e-05, "loss": 5.3665, "step": 8822 }, { "epoch": 0.2959000586903664, "grad_norm": 0.4176283221836119, "learning_rate": 2e-05, "loss": 5.6001, "step": 8823 }, { "epoch": 0.29593359604259245, "grad_norm": 0.41964623577448074, "learning_rate": 2e-05, "loss": 5.7072, "step": 8824 }, { "epoch": 0.2959671333948185, "grad_norm": 0.4456736028695237, "learning_rate": 2e-05, "loss": 5.4422, "step": 8825 }, { "epoch": 0.2960006707470445, "grad_norm": 0.4065150305526772, "learning_rate": 2e-05, "loss": 5.5311, "step": 8826 }, { "epoch": 0.29603420809927056, "grad_norm": 0.404781324504034, "learning_rate": 2e-05, "loss": 5.384, "step": 8827 }, { "epoch": 0.2960677454514966, "grad_norm": 0.4271566712278436, "learning_rate": 2e-05, "loss": 5.5163, "step": 8828 }, { "epoch": 0.29610128280372267, "grad_norm": 0.44418908355861775, "learning_rate": 2e-05, "loss": 5.4848, "step": 8829 }, { "epoch": 0.29613482015594866, "grad_norm": 0.43559363633842796, "learning_rate": 2e-05, "loss": 5.6678, "step": 8830 }, { "epoch": 0.2961683575081747, "grad_norm": 0.4271356751846176, "learning_rate": 2e-05, "loss": 5.6333, "step": 8831 }, { "epoch": 0.29620189486040077, "grad_norm": 0.4259321417920018, "learning_rate": 2e-05, "loss": 5.424, "step": 8832 }, { "epoch": 0.2962354322126268, "grad_norm": 0.40443217977863544, "learning_rate": 2e-05, "loss": 5.4846, "step": 8833 }, { "epoch": 0.2962689695648529, "grad_norm": 0.41876628611101374, "learning_rate": 2e-05, "loss": 5.583, "step": 8834 }, { "epoch": 0.2963025069170789, "grad_norm": 0.42627933375201943, "learning_rate": 2e-05, "loss": 5.7666, "step": 8835 }, { "epoch": 0.29633604426930493, "grad_norm": 0.421226100418312, "learning_rate": 2e-05, "loss": 5.6521, "step": 8836 }, { "epoch": 0.296369581621531, "grad_norm": 0.45471843247541194, "learning_rate": 2e-05, "loss": 5.6557, "step": 8837 }, { "epoch": 0.29640311897375704, "grad_norm": 0.4436096845194614, "learning_rate": 2e-05, "loss": 5.3776, "step": 8838 }, { "epoch": 0.29643665632598304, "grad_norm": 0.41838427664013433, "learning_rate": 2e-05, "loss": 5.4844, "step": 8839 }, { "epoch": 0.2964701936782091, "grad_norm": 0.4057499366872864, "learning_rate": 2e-05, "loss": 5.4563, "step": 8840 }, { "epoch": 0.29650373103043515, "grad_norm": 0.4079340365129137, "learning_rate": 2e-05, "loss": 5.436, "step": 8841 }, { "epoch": 0.2965372683826612, "grad_norm": 0.46249562734761523, "learning_rate": 2e-05, "loss": 5.4795, "step": 8842 }, { "epoch": 0.29657080573488726, "grad_norm": 0.37172354884738157, "learning_rate": 2e-05, "loss": 5.4439, "step": 8843 }, { "epoch": 0.29660434308711325, "grad_norm": 0.42456796938662533, "learning_rate": 2e-05, "loss": 5.6235, "step": 8844 }, { "epoch": 0.2966378804393393, "grad_norm": 0.4281519488521596, "learning_rate": 2e-05, "loss": 5.3205, "step": 8845 }, { "epoch": 0.29667141779156536, "grad_norm": 0.4167682286664728, "learning_rate": 2e-05, "loss": 5.4719, "step": 8846 }, { "epoch": 0.2967049551437914, "grad_norm": 0.4497318164387546, "learning_rate": 2e-05, "loss": 5.5912, "step": 8847 }, { "epoch": 0.2967384924960174, "grad_norm": 0.4715699540458433, "learning_rate": 2e-05, "loss": 5.4549, "step": 8848 }, { "epoch": 0.29677202984824347, "grad_norm": 0.4040625716650248, "learning_rate": 2e-05, "loss": 5.6076, "step": 8849 }, { "epoch": 0.2968055672004695, "grad_norm": 0.442235268963118, "learning_rate": 2e-05, "loss": 5.3119, "step": 8850 }, { "epoch": 0.2968391045526956, "grad_norm": 0.4548754607291342, "learning_rate": 2e-05, "loss": 5.6439, "step": 8851 }, { "epoch": 0.29687264190492163, "grad_norm": 0.4291122916047976, "learning_rate": 2e-05, "loss": 5.7934, "step": 8852 }, { "epoch": 0.29690617925714763, "grad_norm": 0.452796933640974, "learning_rate": 2e-05, "loss": 5.7291, "step": 8853 }, { "epoch": 0.2969397166093737, "grad_norm": 0.4406699800875588, "learning_rate": 2e-05, "loss": 5.4512, "step": 8854 }, { "epoch": 0.29697325396159974, "grad_norm": 0.44490229212947835, "learning_rate": 2e-05, "loss": 5.495, "step": 8855 }, { "epoch": 0.2970067913138258, "grad_norm": 0.4103907872014384, "learning_rate": 2e-05, "loss": 5.7333, "step": 8856 }, { "epoch": 0.2970403286660518, "grad_norm": 0.388338753394292, "learning_rate": 2e-05, "loss": 5.4784, "step": 8857 }, { "epoch": 0.29707386601827784, "grad_norm": 0.43277548099737495, "learning_rate": 2e-05, "loss": 5.6787, "step": 8858 }, { "epoch": 0.2971074033705039, "grad_norm": 0.4741967306994331, "learning_rate": 2e-05, "loss": 5.3433, "step": 8859 }, { "epoch": 0.29714094072272995, "grad_norm": 0.40860846188331307, "learning_rate": 2e-05, "loss": 5.6177, "step": 8860 }, { "epoch": 0.297174478074956, "grad_norm": 0.44185771428296944, "learning_rate": 2e-05, "loss": 5.5421, "step": 8861 }, { "epoch": 0.297208015427182, "grad_norm": 0.5108913666804438, "learning_rate": 2e-05, "loss": 5.4761, "step": 8862 }, { "epoch": 0.29724155277940806, "grad_norm": 0.41989588220652296, "learning_rate": 2e-05, "loss": 5.7117, "step": 8863 }, { "epoch": 0.2972750901316341, "grad_norm": 0.41395178693919465, "learning_rate": 2e-05, "loss": 5.6335, "step": 8864 }, { "epoch": 0.29730862748386017, "grad_norm": 0.44765506801086036, "learning_rate": 2e-05, "loss": 5.5741, "step": 8865 }, { "epoch": 0.29734216483608616, "grad_norm": 0.4245196532901145, "learning_rate": 2e-05, "loss": 5.4751, "step": 8866 }, { "epoch": 0.2973757021883122, "grad_norm": 0.4225874419962668, "learning_rate": 2e-05, "loss": 5.6249, "step": 8867 }, { "epoch": 0.2974092395405383, "grad_norm": 0.4098165438408717, "learning_rate": 2e-05, "loss": 5.4995, "step": 8868 }, { "epoch": 0.2974427768927643, "grad_norm": 0.41465571249152067, "learning_rate": 2e-05, "loss": 5.3856, "step": 8869 }, { "epoch": 0.2974763142449904, "grad_norm": 0.39074749085563365, "learning_rate": 2e-05, "loss": 5.7623, "step": 8870 }, { "epoch": 0.2975098515972164, "grad_norm": 0.4186981003874423, "learning_rate": 2e-05, "loss": 5.6842, "step": 8871 }, { "epoch": 0.29754338894944243, "grad_norm": 0.4096457940293087, "learning_rate": 2e-05, "loss": 5.4829, "step": 8872 }, { "epoch": 0.2975769263016685, "grad_norm": 0.3928937932300637, "learning_rate": 2e-05, "loss": 5.5467, "step": 8873 }, { "epoch": 0.29761046365389454, "grad_norm": 0.46698571841591824, "learning_rate": 2e-05, "loss": 5.7594, "step": 8874 }, { "epoch": 0.29764400100612054, "grad_norm": 0.4234904053327707, "learning_rate": 2e-05, "loss": 5.645, "step": 8875 }, { "epoch": 0.2976775383583466, "grad_norm": 0.40589603063257773, "learning_rate": 2e-05, "loss": 5.383, "step": 8876 }, { "epoch": 0.29771107571057265, "grad_norm": 0.444066659001347, "learning_rate": 2e-05, "loss": 5.5141, "step": 8877 }, { "epoch": 0.2977446130627987, "grad_norm": 0.4007530727168522, "learning_rate": 2e-05, "loss": 5.4497, "step": 8878 }, { "epoch": 0.29777815041502476, "grad_norm": 0.4303424997528199, "learning_rate": 2e-05, "loss": 5.3187, "step": 8879 }, { "epoch": 0.29781168776725075, "grad_norm": 0.3711083831492513, "learning_rate": 2e-05, "loss": 5.6732, "step": 8880 }, { "epoch": 0.2978452251194768, "grad_norm": 0.4033691380980878, "learning_rate": 2e-05, "loss": 5.4147, "step": 8881 }, { "epoch": 0.29787876247170286, "grad_norm": 0.42607674502136456, "learning_rate": 2e-05, "loss": 5.6268, "step": 8882 }, { "epoch": 0.2979122998239289, "grad_norm": 0.43959281749923906, "learning_rate": 2e-05, "loss": 5.7929, "step": 8883 }, { "epoch": 0.2979458371761549, "grad_norm": 0.42976618188820076, "learning_rate": 2e-05, "loss": 5.6567, "step": 8884 }, { "epoch": 0.29797937452838097, "grad_norm": 0.42276199465558795, "learning_rate": 2e-05, "loss": 5.4322, "step": 8885 }, { "epoch": 0.298012911880607, "grad_norm": 0.4067228694539188, "learning_rate": 2e-05, "loss": 5.3536, "step": 8886 }, { "epoch": 0.2980464492328331, "grad_norm": 0.465924016175292, "learning_rate": 2e-05, "loss": 5.3607, "step": 8887 }, { "epoch": 0.29807998658505913, "grad_norm": 0.4507842698346395, "learning_rate": 2e-05, "loss": 5.6565, "step": 8888 }, { "epoch": 0.29811352393728513, "grad_norm": 0.4087860583932723, "learning_rate": 2e-05, "loss": 5.5717, "step": 8889 }, { "epoch": 0.2981470612895112, "grad_norm": 0.42969734816943184, "learning_rate": 2e-05, "loss": 5.5606, "step": 8890 }, { "epoch": 0.29818059864173724, "grad_norm": 0.4572875194219204, "learning_rate": 2e-05, "loss": 5.5675, "step": 8891 }, { "epoch": 0.2982141359939633, "grad_norm": 0.46274801242462615, "learning_rate": 2e-05, "loss": 5.4342, "step": 8892 }, { "epoch": 0.29824767334618935, "grad_norm": 0.4048285596310589, "learning_rate": 2e-05, "loss": 5.5083, "step": 8893 }, { "epoch": 0.29828121069841534, "grad_norm": 0.43951355855624125, "learning_rate": 2e-05, "loss": 5.655, "step": 8894 }, { "epoch": 0.2983147480506414, "grad_norm": 0.4219272328801159, "learning_rate": 2e-05, "loss": 5.4656, "step": 8895 }, { "epoch": 0.29834828540286745, "grad_norm": 0.4144704337066946, "learning_rate": 2e-05, "loss": 5.4364, "step": 8896 }, { "epoch": 0.2983818227550935, "grad_norm": 0.40581167048162164, "learning_rate": 2e-05, "loss": 5.4746, "step": 8897 }, { "epoch": 0.2984153601073195, "grad_norm": 0.4310758303622431, "learning_rate": 2e-05, "loss": 5.5094, "step": 8898 }, { "epoch": 0.29844889745954556, "grad_norm": 0.43358607035032154, "learning_rate": 2e-05, "loss": 5.5787, "step": 8899 }, { "epoch": 0.2984824348117716, "grad_norm": 0.4091306478789689, "learning_rate": 2e-05, "loss": 5.6883, "step": 8900 }, { "epoch": 0.29851597216399767, "grad_norm": 0.4170262385725075, "learning_rate": 2e-05, "loss": 5.6765, "step": 8901 }, { "epoch": 0.2985495095162237, "grad_norm": 0.42521954292672787, "learning_rate": 2e-05, "loss": 5.7394, "step": 8902 }, { "epoch": 0.2985830468684497, "grad_norm": 0.4266526697567547, "learning_rate": 2e-05, "loss": 5.5031, "step": 8903 }, { "epoch": 0.2986165842206758, "grad_norm": 0.4600381915657836, "learning_rate": 2e-05, "loss": 5.5433, "step": 8904 }, { "epoch": 0.2986501215729018, "grad_norm": 0.42412348350666174, "learning_rate": 2e-05, "loss": 5.493, "step": 8905 }, { "epoch": 0.2986836589251279, "grad_norm": 0.4464987782189415, "learning_rate": 2e-05, "loss": 5.4637, "step": 8906 }, { "epoch": 0.2987171962773539, "grad_norm": 0.420502542727182, "learning_rate": 2e-05, "loss": 5.5245, "step": 8907 }, { "epoch": 0.29875073362957993, "grad_norm": 0.4081872188840884, "learning_rate": 2e-05, "loss": 5.5489, "step": 8908 }, { "epoch": 0.298784270981806, "grad_norm": 0.42754430110327707, "learning_rate": 2e-05, "loss": 5.6483, "step": 8909 }, { "epoch": 0.29881780833403204, "grad_norm": 0.4288434901708007, "learning_rate": 2e-05, "loss": 5.605, "step": 8910 }, { "epoch": 0.2988513456862581, "grad_norm": 0.4067592294096256, "learning_rate": 2e-05, "loss": 5.5341, "step": 8911 }, { "epoch": 0.2988848830384841, "grad_norm": 0.4556103408187748, "learning_rate": 2e-05, "loss": 5.4363, "step": 8912 }, { "epoch": 0.29891842039071015, "grad_norm": 0.4111782410786103, "learning_rate": 2e-05, "loss": 5.5102, "step": 8913 }, { "epoch": 0.2989519577429362, "grad_norm": 0.4621628098101654, "learning_rate": 2e-05, "loss": 5.3963, "step": 8914 }, { "epoch": 0.29898549509516226, "grad_norm": 0.4501929213625887, "learning_rate": 2e-05, "loss": 5.448, "step": 8915 }, { "epoch": 0.29901903244738826, "grad_norm": 0.4230197114056676, "learning_rate": 2e-05, "loss": 5.4861, "step": 8916 }, { "epoch": 0.2990525697996143, "grad_norm": 0.39383625627766167, "learning_rate": 2e-05, "loss": 5.637, "step": 8917 }, { "epoch": 0.29908610715184036, "grad_norm": 0.42517134396028616, "learning_rate": 2e-05, "loss": 5.7173, "step": 8918 }, { "epoch": 0.2991196445040664, "grad_norm": 0.4161341924089774, "learning_rate": 2e-05, "loss": 5.4598, "step": 8919 }, { "epoch": 0.29915318185629247, "grad_norm": 0.3862176850886168, "learning_rate": 2e-05, "loss": 5.5498, "step": 8920 }, { "epoch": 0.29918671920851847, "grad_norm": 0.4648732114452244, "learning_rate": 2e-05, "loss": 5.6337, "step": 8921 }, { "epoch": 0.2992202565607445, "grad_norm": 0.40592458353604804, "learning_rate": 2e-05, "loss": 5.5487, "step": 8922 }, { "epoch": 0.2992537939129706, "grad_norm": 0.4233195902905454, "learning_rate": 2e-05, "loss": 5.5679, "step": 8923 }, { "epoch": 0.29928733126519663, "grad_norm": 0.4057794034537227, "learning_rate": 2e-05, "loss": 5.6706, "step": 8924 }, { "epoch": 0.29932086861742263, "grad_norm": 0.44006620114524886, "learning_rate": 2e-05, "loss": 5.4143, "step": 8925 }, { "epoch": 0.2993544059696487, "grad_norm": 0.44056339098173813, "learning_rate": 2e-05, "loss": 5.4387, "step": 8926 }, { "epoch": 0.29938794332187474, "grad_norm": 0.4056065315756434, "learning_rate": 2e-05, "loss": 5.6373, "step": 8927 }, { "epoch": 0.2994214806741008, "grad_norm": 0.44510516673714723, "learning_rate": 2e-05, "loss": 5.5075, "step": 8928 }, { "epoch": 0.29945501802632685, "grad_norm": 0.45161041636648913, "learning_rate": 2e-05, "loss": 5.5836, "step": 8929 }, { "epoch": 0.29948855537855285, "grad_norm": 0.4831166400045596, "learning_rate": 2e-05, "loss": 5.4565, "step": 8930 }, { "epoch": 0.2995220927307789, "grad_norm": 0.4503789767362137, "learning_rate": 2e-05, "loss": 5.7552, "step": 8931 }, { "epoch": 0.29955563008300495, "grad_norm": 0.4202060134288332, "learning_rate": 2e-05, "loss": 5.517, "step": 8932 }, { "epoch": 0.299589167435231, "grad_norm": 0.41663712536218844, "learning_rate": 2e-05, "loss": 5.491, "step": 8933 }, { "epoch": 0.299622704787457, "grad_norm": 0.48478434386745384, "learning_rate": 2e-05, "loss": 5.5655, "step": 8934 }, { "epoch": 0.29965624213968306, "grad_norm": 0.390099288122925, "learning_rate": 2e-05, "loss": 5.5368, "step": 8935 }, { "epoch": 0.2996897794919091, "grad_norm": 0.4057518245197129, "learning_rate": 2e-05, "loss": 5.7009, "step": 8936 }, { "epoch": 0.29972331684413517, "grad_norm": 0.4410808532674565, "learning_rate": 2e-05, "loss": 5.6088, "step": 8937 }, { "epoch": 0.2997568541963612, "grad_norm": 0.4025389723810942, "learning_rate": 2e-05, "loss": 5.6388, "step": 8938 }, { "epoch": 0.2997903915485872, "grad_norm": 0.3940616490879894, "learning_rate": 2e-05, "loss": 5.608, "step": 8939 }, { "epoch": 0.2998239289008133, "grad_norm": 0.40713062095408037, "learning_rate": 2e-05, "loss": 5.7433, "step": 8940 }, { "epoch": 0.29985746625303933, "grad_norm": 0.4120179045350677, "learning_rate": 2e-05, "loss": 5.6572, "step": 8941 }, { "epoch": 0.2998910036052654, "grad_norm": 0.419485217893987, "learning_rate": 2e-05, "loss": 5.7097, "step": 8942 }, { "epoch": 0.2999245409574914, "grad_norm": 0.43150742142626436, "learning_rate": 2e-05, "loss": 5.4554, "step": 8943 }, { "epoch": 0.29995807830971744, "grad_norm": 0.4139763776205298, "learning_rate": 2e-05, "loss": 5.5331, "step": 8944 }, { "epoch": 0.2999916156619435, "grad_norm": 0.41929453742194267, "learning_rate": 2e-05, "loss": 5.6199, "step": 8945 }, { "epoch": 0.30002515301416954, "grad_norm": 0.463168588483093, "learning_rate": 2e-05, "loss": 5.4954, "step": 8946 }, { "epoch": 0.3000586903663956, "grad_norm": 0.4127853068167453, "learning_rate": 2e-05, "loss": 5.4815, "step": 8947 }, { "epoch": 0.3000922277186216, "grad_norm": 0.4056860385238715, "learning_rate": 2e-05, "loss": 5.6231, "step": 8948 }, { "epoch": 0.30012576507084765, "grad_norm": 0.417579515089709, "learning_rate": 2e-05, "loss": 5.55, "step": 8949 }, { "epoch": 0.3001593024230737, "grad_norm": 0.3963086464025198, "learning_rate": 2e-05, "loss": 5.7256, "step": 8950 }, { "epoch": 0.30019283977529976, "grad_norm": 0.41225424723961124, "learning_rate": 2e-05, "loss": 5.4888, "step": 8951 }, { "epoch": 0.30022637712752576, "grad_norm": 0.40104962678037703, "learning_rate": 2e-05, "loss": 5.725, "step": 8952 }, { "epoch": 0.3002599144797518, "grad_norm": 0.4459010196620511, "learning_rate": 2e-05, "loss": 5.4447, "step": 8953 }, { "epoch": 0.30029345183197786, "grad_norm": 0.4306303115067692, "learning_rate": 2e-05, "loss": 5.4713, "step": 8954 }, { "epoch": 0.3003269891842039, "grad_norm": 0.3939507331315059, "learning_rate": 2e-05, "loss": 5.6153, "step": 8955 }, { "epoch": 0.30036052653643, "grad_norm": 0.38952838083639957, "learning_rate": 2e-05, "loss": 5.5811, "step": 8956 }, { "epoch": 0.30039406388865597, "grad_norm": 0.3952135641030281, "learning_rate": 2e-05, "loss": 5.7422, "step": 8957 }, { "epoch": 0.300427601240882, "grad_norm": 0.42316354347770235, "learning_rate": 2e-05, "loss": 5.483, "step": 8958 }, { "epoch": 0.3004611385931081, "grad_norm": 0.4176295478978904, "learning_rate": 2e-05, "loss": 5.7128, "step": 8959 }, { "epoch": 0.30049467594533413, "grad_norm": 0.41195069769893194, "learning_rate": 2e-05, "loss": 5.4637, "step": 8960 }, { "epoch": 0.30052821329756013, "grad_norm": 0.4166990934103077, "learning_rate": 2e-05, "loss": 5.583, "step": 8961 }, { "epoch": 0.3005617506497862, "grad_norm": 0.42663709653151066, "learning_rate": 2e-05, "loss": 5.6896, "step": 8962 }, { "epoch": 0.30059528800201224, "grad_norm": 0.39970364867596014, "learning_rate": 2e-05, "loss": 5.5342, "step": 8963 }, { "epoch": 0.3006288253542383, "grad_norm": 0.41506908998012415, "learning_rate": 2e-05, "loss": 5.4769, "step": 8964 }, { "epoch": 0.30066236270646435, "grad_norm": 0.3962946258346293, "learning_rate": 2e-05, "loss": 5.5713, "step": 8965 }, { "epoch": 0.30069590005869035, "grad_norm": 0.38644828004025045, "learning_rate": 2e-05, "loss": 5.6392, "step": 8966 }, { "epoch": 0.3007294374109164, "grad_norm": 0.4039212938981173, "learning_rate": 2e-05, "loss": 5.5868, "step": 8967 }, { "epoch": 0.30076297476314245, "grad_norm": 0.37505929201622185, "learning_rate": 2e-05, "loss": 5.5144, "step": 8968 }, { "epoch": 0.3007965121153685, "grad_norm": 0.39355780324939516, "learning_rate": 2e-05, "loss": 5.5839, "step": 8969 }, { "epoch": 0.3008300494675945, "grad_norm": 0.39805231446977957, "learning_rate": 2e-05, "loss": 5.4627, "step": 8970 }, { "epoch": 0.30086358681982056, "grad_norm": 0.3915788092398458, "learning_rate": 2e-05, "loss": 5.5874, "step": 8971 }, { "epoch": 0.3008971241720466, "grad_norm": 0.39345531739034295, "learning_rate": 2e-05, "loss": 5.5794, "step": 8972 }, { "epoch": 0.30093066152427267, "grad_norm": 0.44856583059553273, "learning_rate": 2e-05, "loss": 5.6465, "step": 8973 }, { "epoch": 0.3009641988764987, "grad_norm": 0.39689457055506006, "learning_rate": 2e-05, "loss": 5.7147, "step": 8974 }, { "epoch": 0.3009977362287247, "grad_norm": 0.38863903890458934, "learning_rate": 2e-05, "loss": 5.4939, "step": 8975 }, { "epoch": 0.3010312735809508, "grad_norm": 0.39277850669596903, "learning_rate": 2e-05, "loss": 5.6978, "step": 8976 }, { "epoch": 0.30106481093317683, "grad_norm": 0.45080326881281324, "learning_rate": 2e-05, "loss": 5.7082, "step": 8977 }, { "epoch": 0.3010983482854029, "grad_norm": 0.3854704297792505, "learning_rate": 2e-05, "loss": 5.4835, "step": 8978 }, { "epoch": 0.3011318856376289, "grad_norm": 0.4324048700418973, "learning_rate": 2e-05, "loss": 5.5362, "step": 8979 }, { "epoch": 0.30116542298985494, "grad_norm": 0.4269648701056489, "learning_rate": 2e-05, "loss": 5.6799, "step": 8980 }, { "epoch": 0.301198960342081, "grad_norm": 0.4114159694515823, "learning_rate": 2e-05, "loss": 5.6096, "step": 8981 }, { "epoch": 0.30123249769430704, "grad_norm": 0.3823372135544357, "learning_rate": 2e-05, "loss": 5.5129, "step": 8982 }, { "epoch": 0.3012660350465331, "grad_norm": 0.41397807191929026, "learning_rate": 2e-05, "loss": 5.4709, "step": 8983 }, { "epoch": 0.3012995723987591, "grad_norm": 0.41024815647124596, "learning_rate": 2e-05, "loss": 5.6285, "step": 8984 }, { "epoch": 0.30133310975098515, "grad_norm": 0.3845130155406384, "learning_rate": 2e-05, "loss": 5.5239, "step": 8985 }, { "epoch": 0.3013666471032112, "grad_norm": 0.3923852672045005, "learning_rate": 2e-05, "loss": 5.7196, "step": 8986 }, { "epoch": 0.30140018445543726, "grad_norm": 0.42209229040321444, "learning_rate": 2e-05, "loss": 5.4443, "step": 8987 }, { "epoch": 0.30143372180766326, "grad_norm": 0.4043203781703325, "learning_rate": 2e-05, "loss": 5.4195, "step": 8988 }, { "epoch": 0.3014672591598893, "grad_norm": 0.39998871597939706, "learning_rate": 2e-05, "loss": 5.7083, "step": 8989 }, { "epoch": 0.30150079651211537, "grad_norm": 0.4246403452977764, "learning_rate": 2e-05, "loss": 5.4957, "step": 8990 }, { "epoch": 0.3015343338643414, "grad_norm": 0.4425531585851143, "learning_rate": 2e-05, "loss": 5.5863, "step": 8991 }, { "epoch": 0.3015678712165675, "grad_norm": 0.38727502293708116, "learning_rate": 2e-05, "loss": 5.5857, "step": 8992 }, { "epoch": 0.30160140856879347, "grad_norm": 0.4085631154476505, "learning_rate": 2e-05, "loss": 5.5879, "step": 8993 }, { "epoch": 0.3016349459210195, "grad_norm": 0.43857633559070824, "learning_rate": 2e-05, "loss": 5.5091, "step": 8994 }, { "epoch": 0.3016684832732456, "grad_norm": 0.40829816842536104, "learning_rate": 2e-05, "loss": 5.7051, "step": 8995 }, { "epoch": 0.30170202062547163, "grad_norm": 0.4068443251990074, "learning_rate": 2e-05, "loss": 5.2781, "step": 8996 }, { "epoch": 0.3017355579776977, "grad_norm": 0.44525819344582107, "learning_rate": 2e-05, "loss": 5.5966, "step": 8997 }, { "epoch": 0.3017690953299237, "grad_norm": 0.4136777481144135, "learning_rate": 2e-05, "loss": 5.5092, "step": 8998 }, { "epoch": 0.30180263268214974, "grad_norm": 0.4117811018622107, "learning_rate": 2e-05, "loss": 5.4448, "step": 8999 }, { "epoch": 0.3018361700343758, "grad_norm": 0.4260338331240906, "learning_rate": 2e-05, "loss": 5.3557, "step": 9000 }, { "epoch": 0.30186970738660185, "grad_norm": 0.41352442132002154, "learning_rate": 2e-05, "loss": 5.605, "step": 9001 }, { "epoch": 0.30190324473882785, "grad_norm": 0.39224714378788333, "learning_rate": 2e-05, "loss": 5.5963, "step": 9002 }, { "epoch": 0.3019367820910539, "grad_norm": 0.4547207686174021, "learning_rate": 2e-05, "loss": 5.6619, "step": 9003 }, { "epoch": 0.30197031944327996, "grad_norm": 0.4164129347445948, "learning_rate": 2e-05, "loss": 5.6674, "step": 9004 }, { "epoch": 0.302003856795506, "grad_norm": 0.39542395092388577, "learning_rate": 2e-05, "loss": 5.6117, "step": 9005 }, { "epoch": 0.30203739414773206, "grad_norm": 0.5225400328278823, "learning_rate": 2e-05, "loss": 5.5287, "step": 9006 }, { "epoch": 0.30207093149995806, "grad_norm": 0.4069893004352399, "learning_rate": 2e-05, "loss": 5.545, "step": 9007 }, { "epoch": 0.3021044688521841, "grad_norm": 0.4129688474265654, "learning_rate": 2e-05, "loss": 5.545, "step": 9008 }, { "epoch": 0.30213800620441017, "grad_norm": 0.445947913761937, "learning_rate": 2e-05, "loss": 5.3836, "step": 9009 }, { "epoch": 0.3021715435566362, "grad_norm": 0.44328422381066196, "learning_rate": 2e-05, "loss": 5.5077, "step": 9010 }, { "epoch": 0.3022050809088622, "grad_norm": 0.3992224178397819, "learning_rate": 2e-05, "loss": 5.5915, "step": 9011 }, { "epoch": 0.3022386182610883, "grad_norm": 0.47488631530770187, "learning_rate": 2e-05, "loss": 5.525, "step": 9012 }, { "epoch": 0.30227215561331433, "grad_norm": 0.3956884252671863, "learning_rate": 2e-05, "loss": 5.4927, "step": 9013 }, { "epoch": 0.3023056929655404, "grad_norm": 0.39619560140168114, "learning_rate": 2e-05, "loss": 5.3721, "step": 9014 }, { "epoch": 0.30233923031776644, "grad_norm": 0.4242173822516326, "learning_rate": 2e-05, "loss": 5.5245, "step": 9015 }, { "epoch": 0.30237276766999244, "grad_norm": 0.42923170282246303, "learning_rate": 2e-05, "loss": 5.3908, "step": 9016 }, { "epoch": 0.3024063050222185, "grad_norm": 0.373888081523987, "learning_rate": 2e-05, "loss": 5.623, "step": 9017 }, { "epoch": 0.30243984237444455, "grad_norm": 0.42389314410636797, "learning_rate": 2e-05, "loss": 5.5136, "step": 9018 }, { "epoch": 0.3024733797266706, "grad_norm": 0.39425593495931793, "learning_rate": 2e-05, "loss": 5.4348, "step": 9019 }, { "epoch": 0.3025069170788966, "grad_norm": 0.39599371305422654, "learning_rate": 2e-05, "loss": 5.6415, "step": 9020 }, { "epoch": 0.30254045443112265, "grad_norm": 0.4703829455023994, "learning_rate": 2e-05, "loss": 5.606, "step": 9021 }, { "epoch": 0.3025739917833487, "grad_norm": 0.4001730151882824, "learning_rate": 2e-05, "loss": 5.4017, "step": 9022 }, { "epoch": 0.30260752913557476, "grad_norm": 0.4022908051076858, "learning_rate": 2e-05, "loss": 5.7428, "step": 9023 }, { "epoch": 0.3026410664878008, "grad_norm": 0.40579952389537916, "learning_rate": 2e-05, "loss": 5.5393, "step": 9024 }, { "epoch": 0.3026746038400268, "grad_norm": 0.43165867097183086, "learning_rate": 2e-05, "loss": 5.5394, "step": 9025 }, { "epoch": 0.30270814119225287, "grad_norm": 0.383353516509711, "learning_rate": 2e-05, "loss": 5.5204, "step": 9026 }, { "epoch": 0.3027416785444789, "grad_norm": 0.4101497425761473, "learning_rate": 2e-05, "loss": 5.5184, "step": 9027 }, { "epoch": 0.302775215896705, "grad_norm": 0.42087096671803725, "learning_rate": 2e-05, "loss": 5.6413, "step": 9028 }, { "epoch": 0.302808753248931, "grad_norm": 0.38427388575108734, "learning_rate": 2e-05, "loss": 5.5934, "step": 9029 }, { "epoch": 0.302842290601157, "grad_norm": 0.4179920660112417, "learning_rate": 2e-05, "loss": 5.7758, "step": 9030 }, { "epoch": 0.3028758279533831, "grad_norm": 0.39787859966895067, "learning_rate": 2e-05, "loss": 5.6823, "step": 9031 }, { "epoch": 0.30290936530560914, "grad_norm": 0.41342145955241455, "learning_rate": 2e-05, "loss": 5.3609, "step": 9032 }, { "epoch": 0.3029429026578352, "grad_norm": 0.40842673773565114, "learning_rate": 2e-05, "loss": 5.4472, "step": 9033 }, { "epoch": 0.3029764400100612, "grad_norm": 0.42325550113538524, "learning_rate": 2e-05, "loss": 5.5198, "step": 9034 }, { "epoch": 0.30300997736228724, "grad_norm": 0.41275333401971664, "learning_rate": 2e-05, "loss": 5.6783, "step": 9035 }, { "epoch": 0.3030435147145133, "grad_norm": 0.38212875404208113, "learning_rate": 2e-05, "loss": 5.5482, "step": 9036 }, { "epoch": 0.30307705206673935, "grad_norm": 0.44393878505113743, "learning_rate": 2e-05, "loss": 5.5229, "step": 9037 }, { "epoch": 0.30311058941896535, "grad_norm": 0.41451533649275385, "learning_rate": 2e-05, "loss": 5.5225, "step": 9038 }, { "epoch": 0.3031441267711914, "grad_norm": 0.4267221965045668, "learning_rate": 2e-05, "loss": 5.5752, "step": 9039 }, { "epoch": 0.30317766412341746, "grad_norm": 0.4059354998391792, "learning_rate": 2e-05, "loss": 5.3471, "step": 9040 }, { "epoch": 0.3032112014756435, "grad_norm": 0.4589792635283009, "learning_rate": 2e-05, "loss": 5.5443, "step": 9041 }, { "epoch": 0.30324473882786956, "grad_norm": 0.4342412131480997, "learning_rate": 2e-05, "loss": 5.5828, "step": 9042 }, { "epoch": 0.30327827618009556, "grad_norm": 0.42355910612924635, "learning_rate": 2e-05, "loss": 5.4745, "step": 9043 }, { "epoch": 0.3033118135323216, "grad_norm": 0.4207606768369512, "learning_rate": 2e-05, "loss": 5.5838, "step": 9044 }, { "epoch": 0.30334535088454767, "grad_norm": 0.4199082455644336, "learning_rate": 2e-05, "loss": 5.5206, "step": 9045 }, { "epoch": 0.3033788882367737, "grad_norm": 0.41004051254476065, "learning_rate": 2e-05, "loss": 5.557, "step": 9046 }, { "epoch": 0.3034124255889997, "grad_norm": 0.41691292057005297, "learning_rate": 2e-05, "loss": 5.589, "step": 9047 }, { "epoch": 0.3034459629412258, "grad_norm": 0.42725362186697546, "learning_rate": 2e-05, "loss": 5.6405, "step": 9048 }, { "epoch": 0.30347950029345183, "grad_norm": 0.4872789312678803, "learning_rate": 2e-05, "loss": 5.5798, "step": 9049 }, { "epoch": 0.3035130376456779, "grad_norm": 0.37586028433119784, "learning_rate": 2e-05, "loss": 5.5555, "step": 9050 }, { "epoch": 0.30354657499790394, "grad_norm": 0.3863752825479619, "learning_rate": 2e-05, "loss": 5.4088, "step": 9051 }, { "epoch": 0.30358011235012994, "grad_norm": 0.42929621917277944, "learning_rate": 2e-05, "loss": 5.787, "step": 9052 }, { "epoch": 0.303613649702356, "grad_norm": 0.43390976861444436, "learning_rate": 2e-05, "loss": 5.5095, "step": 9053 }, { "epoch": 0.30364718705458205, "grad_norm": 0.43767522890879224, "learning_rate": 2e-05, "loss": 5.442, "step": 9054 }, { "epoch": 0.3036807244068081, "grad_norm": 0.40654666847304827, "learning_rate": 2e-05, "loss": 5.4946, "step": 9055 }, { "epoch": 0.3037142617590341, "grad_norm": 0.3940308552556775, "learning_rate": 2e-05, "loss": 5.7614, "step": 9056 }, { "epoch": 0.30374779911126015, "grad_norm": 0.4323086587353705, "learning_rate": 2e-05, "loss": 5.5936, "step": 9057 }, { "epoch": 0.3037813364634862, "grad_norm": 0.40892726128667967, "learning_rate": 2e-05, "loss": 5.6287, "step": 9058 }, { "epoch": 0.30381487381571226, "grad_norm": 0.39978225800879297, "learning_rate": 2e-05, "loss": 5.4923, "step": 9059 }, { "epoch": 0.3038484111679383, "grad_norm": 0.40495586520519805, "learning_rate": 2e-05, "loss": 5.61, "step": 9060 }, { "epoch": 0.3038819485201643, "grad_norm": 0.4187626433180712, "learning_rate": 2e-05, "loss": 5.663, "step": 9061 }, { "epoch": 0.30391548587239037, "grad_norm": 0.40271009545705955, "learning_rate": 2e-05, "loss": 5.6098, "step": 9062 }, { "epoch": 0.3039490232246164, "grad_norm": 0.4002301267483244, "learning_rate": 2e-05, "loss": 5.4264, "step": 9063 }, { "epoch": 0.3039825605768425, "grad_norm": 0.3913533628374865, "learning_rate": 2e-05, "loss": 5.5093, "step": 9064 }, { "epoch": 0.3040160979290685, "grad_norm": 0.4061994828417966, "learning_rate": 2e-05, "loss": 5.5646, "step": 9065 }, { "epoch": 0.30404963528129453, "grad_norm": 0.38971166468824175, "learning_rate": 2e-05, "loss": 5.4765, "step": 9066 }, { "epoch": 0.3040831726335206, "grad_norm": 0.4595241922618335, "learning_rate": 2e-05, "loss": 5.6288, "step": 9067 }, { "epoch": 0.30411670998574664, "grad_norm": 0.4191772604256244, "learning_rate": 2e-05, "loss": 5.3996, "step": 9068 }, { "epoch": 0.3041502473379727, "grad_norm": 0.3867334290566686, "learning_rate": 2e-05, "loss": 5.6933, "step": 9069 }, { "epoch": 0.3041837846901987, "grad_norm": 0.41588121869810896, "learning_rate": 2e-05, "loss": 5.4855, "step": 9070 }, { "epoch": 0.30421732204242474, "grad_norm": 0.4682315490680906, "learning_rate": 2e-05, "loss": 5.4404, "step": 9071 }, { "epoch": 0.3042508593946508, "grad_norm": 0.3751462537609221, "learning_rate": 2e-05, "loss": 5.5895, "step": 9072 }, { "epoch": 0.30428439674687685, "grad_norm": 0.4258605882408976, "learning_rate": 2e-05, "loss": 5.1794, "step": 9073 }, { "epoch": 0.30431793409910285, "grad_norm": 0.42019457667849663, "learning_rate": 2e-05, "loss": 5.703, "step": 9074 }, { "epoch": 0.3043514714513289, "grad_norm": 0.4280091490519372, "learning_rate": 2e-05, "loss": 5.5855, "step": 9075 }, { "epoch": 0.30438500880355496, "grad_norm": 0.40147693890603875, "learning_rate": 2e-05, "loss": 5.4913, "step": 9076 }, { "epoch": 0.304418546155781, "grad_norm": 0.4488217930860071, "learning_rate": 2e-05, "loss": 5.3364, "step": 9077 }, { "epoch": 0.30445208350800707, "grad_norm": 0.43515282663382643, "learning_rate": 2e-05, "loss": 5.4856, "step": 9078 }, { "epoch": 0.30448562086023306, "grad_norm": 0.4065424003539664, "learning_rate": 2e-05, "loss": 5.4284, "step": 9079 }, { "epoch": 0.3045191582124591, "grad_norm": 0.4099045046652272, "learning_rate": 2e-05, "loss": 5.517, "step": 9080 }, { "epoch": 0.30455269556468517, "grad_norm": 0.4918110521547937, "learning_rate": 2e-05, "loss": 5.4991, "step": 9081 }, { "epoch": 0.3045862329169112, "grad_norm": 0.40302619682335267, "learning_rate": 2e-05, "loss": 5.4184, "step": 9082 }, { "epoch": 0.3046197702691372, "grad_norm": 0.393154814274811, "learning_rate": 2e-05, "loss": 5.6289, "step": 9083 }, { "epoch": 0.3046533076213633, "grad_norm": 0.4271223282923499, "learning_rate": 2e-05, "loss": 5.6005, "step": 9084 }, { "epoch": 0.30468684497358933, "grad_norm": 0.4344437985523215, "learning_rate": 2e-05, "loss": 5.5673, "step": 9085 }, { "epoch": 0.3047203823258154, "grad_norm": 0.42388630317233195, "learning_rate": 2e-05, "loss": 5.603, "step": 9086 }, { "epoch": 0.30475391967804144, "grad_norm": 0.41288147553810484, "learning_rate": 2e-05, "loss": 5.5055, "step": 9087 }, { "epoch": 0.30478745703026744, "grad_norm": 0.4059636530848351, "learning_rate": 2e-05, "loss": 5.4613, "step": 9088 }, { "epoch": 0.3048209943824935, "grad_norm": 0.39084835186441336, "learning_rate": 2e-05, "loss": 5.4978, "step": 9089 }, { "epoch": 0.30485453173471955, "grad_norm": 0.38627011570754644, "learning_rate": 2e-05, "loss": 5.5475, "step": 9090 }, { "epoch": 0.3048880690869456, "grad_norm": 0.41931715607552134, "learning_rate": 2e-05, "loss": 5.3806, "step": 9091 }, { "epoch": 0.3049216064391716, "grad_norm": 0.4171567172968393, "learning_rate": 2e-05, "loss": 5.4531, "step": 9092 }, { "epoch": 0.30495514379139765, "grad_norm": 0.41070031045676464, "learning_rate": 2e-05, "loss": 5.6984, "step": 9093 }, { "epoch": 0.3049886811436237, "grad_norm": 0.418747605536371, "learning_rate": 2e-05, "loss": 5.887, "step": 9094 }, { "epoch": 0.30502221849584976, "grad_norm": 0.445642914610642, "learning_rate": 2e-05, "loss": 5.5039, "step": 9095 }, { "epoch": 0.3050557558480758, "grad_norm": 0.40677590652501455, "learning_rate": 2e-05, "loss": 5.606, "step": 9096 }, { "epoch": 0.3050892932003018, "grad_norm": 0.4369531425506109, "learning_rate": 2e-05, "loss": 5.5363, "step": 9097 }, { "epoch": 0.30512283055252787, "grad_norm": 0.4022257093509486, "learning_rate": 2e-05, "loss": 5.5937, "step": 9098 }, { "epoch": 0.3051563679047539, "grad_norm": 0.4172680307822653, "learning_rate": 2e-05, "loss": 5.4787, "step": 9099 }, { "epoch": 0.30518990525698, "grad_norm": 0.4131877522497547, "learning_rate": 2e-05, "loss": 5.5067, "step": 9100 }, { "epoch": 0.30522344260920603, "grad_norm": 0.41310851176632446, "learning_rate": 2e-05, "loss": 5.633, "step": 9101 }, { "epoch": 0.30525697996143203, "grad_norm": 0.42736091605251575, "learning_rate": 2e-05, "loss": 5.5941, "step": 9102 }, { "epoch": 0.3052905173136581, "grad_norm": 0.44366195588122137, "learning_rate": 2e-05, "loss": 5.4548, "step": 9103 }, { "epoch": 0.30532405466588414, "grad_norm": 0.4303723161405025, "learning_rate": 2e-05, "loss": 5.5159, "step": 9104 }, { "epoch": 0.3053575920181102, "grad_norm": 0.4151242107154663, "learning_rate": 2e-05, "loss": 5.6888, "step": 9105 }, { "epoch": 0.3053911293703362, "grad_norm": 0.40593123476696785, "learning_rate": 2e-05, "loss": 5.5479, "step": 9106 }, { "epoch": 0.30542466672256224, "grad_norm": 0.42314106085159564, "learning_rate": 2e-05, "loss": 5.5333, "step": 9107 }, { "epoch": 0.3054582040747883, "grad_norm": 0.41153747534333973, "learning_rate": 2e-05, "loss": 5.4184, "step": 9108 }, { "epoch": 0.30549174142701435, "grad_norm": 0.4254280011616888, "learning_rate": 2e-05, "loss": 5.6093, "step": 9109 }, { "epoch": 0.3055252787792404, "grad_norm": 0.46533670456649595, "learning_rate": 2e-05, "loss": 5.6222, "step": 9110 }, { "epoch": 0.3055588161314664, "grad_norm": 0.40452811419721424, "learning_rate": 2e-05, "loss": 5.5037, "step": 9111 }, { "epoch": 0.30559235348369246, "grad_norm": 0.4124156012952919, "learning_rate": 2e-05, "loss": 5.6469, "step": 9112 }, { "epoch": 0.3056258908359185, "grad_norm": 0.4021562454569473, "learning_rate": 2e-05, "loss": 5.4752, "step": 9113 }, { "epoch": 0.30565942818814457, "grad_norm": 0.4061779753898541, "learning_rate": 2e-05, "loss": 5.5278, "step": 9114 }, { "epoch": 0.30569296554037056, "grad_norm": 0.4593342118917841, "learning_rate": 2e-05, "loss": 5.4606, "step": 9115 }, { "epoch": 0.3057265028925966, "grad_norm": 0.42812595260444464, "learning_rate": 2e-05, "loss": 5.6869, "step": 9116 }, { "epoch": 0.3057600402448227, "grad_norm": 0.417160422504052, "learning_rate": 2e-05, "loss": 5.7479, "step": 9117 }, { "epoch": 0.3057935775970487, "grad_norm": 0.41529945234022414, "learning_rate": 2e-05, "loss": 5.5822, "step": 9118 }, { "epoch": 0.3058271149492748, "grad_norm": 0.4274126079211814, "learning_rate": 2e-05, "loss": 5.5179, "step": 9119 }, { "epoch": 0.3058606523015008, "grad_norm": 0.4138043091010832, "learning_rate": 2e-05, "loss": 5.5491, "step": 9120 }, { "epoch": 0.30589418965372683, "grad_norm": 0.4118330560847116, "learning_rate": 2e-05, "loss": 5.5715, "step": 9121 }, { "epoch": 0.3059277270059529, "grad_norm": 0.41157398504674286, "learning_rate": 2e-05, "loss": 5.5477, "step": 9122 }, { "epoch": 0.30596126435817894, "grad_norm": 0.4101977802271984, "learning_rate": 2e-05, "loss": 5.5208, "step": 9123 }, { "epoch": 0.30599480171040494, "grad_norm": 0.42551738419346635, "learning_rate": 2e-05, "loss": 5.4401, "step": 9124 }, { "epoch": 0.306028339062631, "grad_norm": 0.45071910516042424, "learning_rate": 2e-05, "loss": 5.6034, "step": 9125 }, { "epoch": 0.30606187641485705, "grad_norm": 0.4443073176443632, "learning_rate": 2e-05, "loss": 5.6224, "step": 9126 }, { "epoch": 0.3060954137670831, "grad_norm": 0.4274689471214548, "learning_rate": 2e-05, "loss": 5.6499, "step": 9127 }, { "epoch": 0.30612895111930916, "grad_norm": 0.4749756318996016, "learning_rate": 2e-05, "loss": 5.4585, "step": 9128 }, { "epoch": 0.30616248847153515, "grad_norm": 0.43263713771470175, "learning_rate": 2e-05, "loss": 5.5944, "step": 9129 }, { "epoch": 0.3061960258237612, "grad_norm": 0.38608435792879353, "learning_rate": 2e-05, "loss": 5.446, "step": 9130 }, { "epoch": 0.30622956317598726, "grad_norm": 0.43556491601265535, "learning_rate": 2e-05, "loss": 5.374, "step": 9131 }, { "epoch": 0.3062631005282133, "grad_norm": 0.4448898863705919, "learning_rate": 2e-05, "loss": 5.555, "step": 9132 }, { "epoch": 0.3062966378804393, "grad_norm": 0.42878182148122346, "learning_rate": 2e-05, "loss": 5.56, "step": 9133 }, { "epoch": 0.30633017523266537, "grad_norm": 0.4704871263933644, "learning_rate": 2e-05, "loss": 5.483, "step": 9134 }, { "epoch": 0.3063637125848914, "grad_norm": 0.42452940642607206, "learning_rate": 2e-05, "loss": 5.516, "step": 9135 }, { "epoch": 0.3063972499371175, "grad_norm": 0.46134222728035, "learning_rate": 2e-05, "loss": 5.3926, "step": 9136 }, { "epoch": 0.30643078728934353, "grad_norm": 0.395769858610061, "learning_rate": 2e-05, "loss": 5.4589, "step": 9137 }, { "epoch": 0.30646432464156953, "grad_norm": 0.4337264975009272, "learning_rate": 2e-05, "loss": 5.3684, "step": 9138 }, { "epoch": 0.3064978619937956, "grad_norm": 0.42596597733884217, "learning_rate": 2e-05, "loss": 5.7025, "step": 9139 }, { "epoch": 0.30653139934602164, "grad_norm": 0.38834953549316403, "learning_rate": 2e-05, "loss": 5.5233, "step": 9140 }, { "epoch": 0.3065649366982477, "grad_norm": 0.3986660708765398, "learning_rate": 2e-05, "loss": 5.437, "step": 9141 }, { "epoch": 0.3065984740504737, "grad_norm": 0.4150098911448841, "learning_rate": 2e-05, "loss": 5.5767, "step": 9142 }, { "epoch": 0.30663201140269974, "grad_norm": 0.4191633139032729, "learning_rate": 2e-05, "loss": 5.4213, "step": 9143 }, { "epoch": 0.3066655487549258, "grad_norm": 0.38063405395455485, "learning_rate": 2e-05, "loss": 5.534, "step": 9144 }, { "epoch": 0.30669908610715185, "grad_norm": 0.37527643119374404, "learning_rate": 2e-05, "loss": 5.5192, "step": 9145 }, { "epoch": 0.3067326234593779, "grad_norm": 0.43052840815348364, "learning_rate": 2e-05, "loss": 5.4632, "step": 9146 }, { "epoch": 0.3067661608116039, "grad_norm": 0.39536085352011213, "learning_rate": 2e-05, "loss": 5.5762, "step": 9147 }, { "epoch": 0.30679969816382996, "grad_norm": 0.3878241699674232, "learning_rate": 2e-05, "loss": 5.5544, "step": 9148 }, { "epoch": 0.306833235516056, "grad_norm": 0.39204543395838615, "learning_rate": 2e-05, "loss": 5.5469, "step": 9149 }, { "epoch": 0.30686677286828207, "grad_norm": 0.436572048695488, "learning_rate": 2e-05, "loss": 5.3479, "step": 9150 }, { "epoch": 0.30690031022050807, "grad_norm": 0.41887842669116476, "learning_rate": 2e-05, "loss": 5.4997, "step": 9151 }, { "epoch": 0.3069338475727341, "grad_norm": 0.39975020568654374, "learning_rate": 2e-05, "loss": 5.6162, "step": 9152 }, { "epoch": 0.3069673849249602, "grad_norm": 0.38838491208843945, "learning_rate": 2e-05, "loss": 5.3046, "step": 9153 }, { "epoch": 0.30700092227718623, "grad_norm": 0.4254298601629104, "learning_rate": 2e-05, "loss": 5.664, "step": 9154 }, { "epoch": 0.3070344596294123, "grad_norm": 0.3861231637974706, "learning_rate": 2e-05, "loss": 5.4447, "step": 9155 }, { "epoch": 0.3070679969816383, "grad_norm": 0.4253157378001889, "learning_rate": 2e-05, "loss": 5.7276, "step": 9156 }, { "epoch": 0.30710153433386433, "grad_norm": 0.4128471757421679, "learning_rate": 2e-05, "loss": 5.4883, "step": 9157 }, { "epoch": 0.3071350716860904, "grad_norm": 0.5015661692622201, "learning_rate": 2e-05, "loss": 5.4699, "step": 9158 }, { "epoch": 0.30716860903831644, "grad_norm": 0.421095877284298, "learning_rate": 2e-05, "loss": 5.6438, "step": 9159 }, { "epoch": 0.30720214639054244, "grad_norm": 0.4219241934533887, "learning_rate": 2e-05, "loss": 5.488, "step": 9160 }, { "epoch": 0.3072356837427685, "grad_norm": 0.39906673903092404, "learning_rate": 2e-05, "loss": 5.6267, "step": 9161 }, { "epoch": 0.30726922109499455, "grad_norm": 0.3934716280630118, "learning_rate": 2e-05, "loss": 5.378, "step": 9162 }, { "epoch": 0.3073027584472206, "grad_norm": 0.4164943913129981, "learning_rate": 2e-05, "loss": 5.4794, "step": 9163 }, { "epoch": 0.30733629579944666, "grad_norm": 0.45452783288839227, "learning_rate": 2e-05, "loss": 5.6519, "step": 9164 }, { "epoch": 0.30736983315167266, "grad_norm": 0.4102042416963979, "learning_rate": 2e-05, "loss": 5.432, "step": 9165 }, { "epoch": 0.3074033705038987, "grad_norm": 0.4215273731101714, "learning_rate": 2e-05, "loss": 5.6306, "step": 9166 }, { "epoch": 0.30743690785612476, "grad_norm": 0.4116708419134264, "learning_rate": 2e-05, "loss": 5.5831, "step": 9167 }, { "epoch": 0.3074704452083508, "grad_norm": 0.45095719160374714, "learning_rate": 2e-05, "loss": 5.6662, "step": 9168 }, { "epoch": 0.3075039825605768, "grad_norm": 0.40886580991540666, "learning_rate": 2e-05, "loss": 5.5565, "step": 9169 }, { "epoch": 0.30753751991280287, "grad_norm": 0.42195462806761297, "learning_rate": 2e-05, "loss": 5.436, "step": 9170 }, { "epoch": 0.3075710572650289, "grad_norm": 0.42546581376993303, "learning_rate": 2e-05, "loss": 5.4704, "step": 9171 }, { "epoch": 0.307604594617255, "grad_norm": 0.4011390933979419, "learning_rate": 2e-05, "loss": 5.3975, "step": 9172 }, { "epoch": 0.30763813196948103, "grad_norm": 0.4028706730456391, "learning_rate": 2e-05, "loss": 5.5199, "step": 9173 }, { "epoch": 0.30767166932170703, "grad_norm": 0.396887433282081, "learning_rate": 2e-05, "loss": 5.5919, "step": 9174 }, { "epoch": 0.3077052066739331, "grad_norm": 0.40905741542737173, "learning_rate": 2e-05, "loss": 5.5658, "step": 9175 }, { "epoch": 0.30773874402615914, "grad_norm": 0.41870315994745894, "learning_rate": 2e-05, "loss": 5.5861, "step": 9176 }, { "epoch": 0.3077722813783852, "grad_norm": 0.40272153616207573, "learning_rate": 2e-05, "loss": 5.6784, "step": 9177 }, { "epoch": 0.3078058187306112, "grad_norm": 0.39022121824309186, "learning_rate": 2e-05, "loss": 5.3256, "step": 9178 }, { "epoch": 0.30783935608283725, "grad_norm": 0.3817985461257073, "learning_rate": 2e-05, "loss": 5.4108, "step": 9179 }, { "epoch": 0.3078728934350633, "grad_norm": 0.3977587951710843, "learning_rate": 2e-05, "loss": 5.4955, "step": 9180 }, { "epoch": 0.30790643078728935, "grad_norm": 0.3894747979711148, "learning_rate": 2e-05, "loss": 5.817, "step": 9181 }, { "epoch": 0.3079399681395154, "grad_norm": 0.3931905747528451, "learning_rate": 2e-05, "loss": 5.6452, "step": 9182 }, { "epoch": 0.3079735054917414, "grad_norm": 0.425137634629871, "learning_rate": 2e-05, "loss": 5.5529, "step": 9183 }, { "epoch": 0.30800704284396746, "grad_norm": 0.3752267138086095, "learning_rate": 2e-05, "loss": 5.532, "step": 9184 }, { "epoch": 0.3080405801961935, "grad_norm": 0.395614121549777, "learning_rate": 2e-05, "loss": 5.474, "step": 9185 }, { "epoch": 0.30807411754841957, "grad_norm": 0.4344316260788266, "learning_rate": 2e-05, "loss": 5.5346, "step": 9186 }, { "epoch": 0.30810765490064557, "grad_norm": 0.4135863173115789, "learning_rate": 2e-05, "loss": 5.5102, "step": 9187 }, { "epoch": 0.3081411922528716, "grad_norm": 0.41021656604002166, "learning_rate": 2e-05, "loss": 5.7658, "step": 9188 }, { "epoch": 0.3081747296050977, "grad_norm": 0.4332704116784367, "learning_rate": 2e-05, "loss": 5.7603, "step": 9189 }, { "epoch": 0.30820826695732373, "grad_norm": 0.3880771462867289, "learning_rate": 2e-05, "loss": 5.4639, "step": 9190 }, { "epoch": 0.3082418043095498, "grad_norm": 0.3831142301719486, "learning_rate": 2e-05, "loss": 5.5023, "step": 9191 }, { "epoch": 0.3082753416617758, "grad_norm": 0.4566091990070566, "learning_rate": 2e-05, "loss": 5.4076, "step": 9192 }, { "epoch": 0.30830887901400184, "grad_norm": 0.40702906130298916, "learning_rate": 2e-05, "loss": 5.5142, "step": 9193 }, { "epoch": 0.3083424163662279, "grad_norm": 0.4028608119962689, "learning_rate": 2e-05, "loss": 5.5294, "step": 9194 }, { "epoch": 0.30837595371845394, "grad_norm": 0.3968791261848066, "learning_rate": 2e-05, "loss": 5.4457, "step": 9195 }, { "epoch": 0.30840949107068, "grad_norm": 0.4109651506019768, "learning_rate": 2e-05, "loss": 5.5788, "step": 9196 }, { "epoch": 0.308443028422906, "grad_norm": 0.4085352767835987, "learning_rate": 2e-05, "loss": 5.6293, "step": 9197 }, { "epoch": 0.30847656577513205, "grad_norm": 0.39897088697939026, "learning_rate": 2e-05, "loss": 5.5492, "step": 9198 }, { "epoch": 0.3085101031273581, "grad_norm": 0.4117631704093835, "learning_rate": 2e-05, "loss": 5.5352, "step": 9199 }, { "epoch": 0.30854364047958416, "grad_norm": 0.4190663994028657, "learning_rate": 2e-05, "loss": 5.4351, "step": 9200 }, { "epoch": 0.30857717783181016, "grad_norm": 0.40499862087830574, "learning_rate": 2e-05, "loss": 5.9062, "step": 9201 }, { "epoch": 0.3086107151840362, "grad_norm": 0.45339946872498016, "learning_rate": 2e-05, "loss": 5.3546, "step": 9202 }, { "epoch": 0.30864425253626226, "grad_norm": 0.4226588373136475, "learning_rate": 2e-05, "loss": 5.4944, "step": 9203 }, { "epoch": 0.3086777898884883, "grad_norm": 0.4035334213668358, "learning_rate": 2e-05, "loss": 5.3756, "step": 9204 }, { "epoch": 0.3087113272407144, "grad_norm": 0.43417582588627474, "learning_rate": 2e-05, "loss": 5.5597, "step": 9205 }, { "epoch": 0.30874486459294037, "grad_norm": 0.429597586166785, "learning_rate": 2e-05, "loss": 5.5113, "step": 9206 }, { "epoch": 0.3087784019451664, "grad_norm": 0.39891378928029, "learning_rate": 2e-05, "loss": 5.5382, "step": 9207 }, { "epoch": 0.3088119392973925, "grad_norm": 0.48585560121134713, "learning_rate": 2e-05, "loss": 5.4888, "step": 9208 }, { "epoch": 0.30884547664961853, "grad_norm": 0.4219883326809402, "learning_rate": 2e-05, "loss": 5.515, "step": 9209 }, { "epoch": 0.30887901400184453, "grad_norm": 0.4151857240882028, "learning_rate": 2e-05, "loss": 5.5512, "step": 9210 }, { "epoch": 0.3089125513540706, "grad_norm": 0.46864421591712474, "learning_rate": 2e-05, "loss": 5.6129, "step": 9211 }, { "epoch": 0.30894608870629664, "grad_norm": 0.4418570417925739, "learning_rate": 2e-05, "loss": 5.6708, "step": 9212 }, { "epoch": 0.3089796260585227, "grad_norm": 0.43151776985290785, "learning_rate": 2e-05, "loss": 5.6377, "step": 9213 }, { "epoch": 0.30901316341074875, "grad_norm": 0.4008231620902026, "learning_rate": 2e-05, "loss": 5.6099, "step": 9214 }, { "epoch": 0.30904670076297475, "grad_norm": 0.41966365448359966, "learning_rate": 2e-05, "loss": 5.4579, "step": 9215 }, { "epoch": 0.3090802381152008, "grad_norm": 0.45385975563056336, "learning_rate": 2e-05, "loss": 5.6367, "step": 9216 }, { "epoch": 0.30911377546742685, "grad_norm": 0.4146081160083247, "learning_rate": 2e-05, "loss": 5.5106, "step": 9217 }, { "epoch": 0.3091473128196529, "grad_norm": 0.4497411144980937, "learning_rate": 2e-05, "loss": 5.509, "step": 9218 }, { "epoch": 0.3091808501718789, "grad_norm": 0.5100151085907018, "learning_rate": 2e-05, "loss": 5.4723, "step": 9219 }, { "epoch": 0.30921438752410496, "grad_norm": 0.37870334143908063, "learning_rate": 2e-05, "loss": 5.359, "step": 9220 }, { "epoch": 0.309247924876331, "grad_norm": 0.4574444184955396, "learning_rate": 2e-05, "loss": 5.5373, "step": 9221 }, { "epoch": 0.30928146222855707, "grad_norm": 0.4244512523951922, "learning_rate": 2e-05, "loss": 5.5593, "step": 9222 }, { "epoch": 0.3093149995807831, "grad_norm": 0.3900422642688676, "learning_rate": 2e-05, "loss": 5.6125, "step": 9223 }, { "epoch": 0.3093485369330091, "grad_norm": 0.4621824778144373, "learning_rate": 2e-05, "loss": 5.5271, "step": 9224 }, { "epoch": 0.3093820742852352, "grad_norm": 0.4213402451247459, "learning_rate": 2e-05, "loss": 5.4964, "step": 9225 }, { "epoch": 0.30941561163746123, "grad_norm": 0.41874060318727224, "learning_rate": 2e-05, "loss": 5.6205, "step": 9226 }, { "epoch": 0.3094491489896873, "grad_norm": 0.43668661287386673, "learning_rate": 2e-05, "loss": 5.4812, "step": 9227 }, { "epoch": 0.3094826863419133, "grad_norm": 0.42791567452686907, "learning_rate": 2e-05, "loss": 5.5043, "step": 9228 }, { "epoch": 0.30951622369413934, "grad_norm": 0.41240189565873764, "learning_rate": 2e-05, "loss": 5.6667, "step": 9229 }, { "epoch": 0.3095497610463654, "grad_norm": 0.40726427943765314, "learning_rate": 2e-05, "loss": 5.3582, "step": 9230 }, { "epoch": 0.30958329839859144, "grad_norm": 0.4029203486569034, "learning_rate": 2e-05, "loss": 5.5079, "step": 9231 }, { "epoch": 0.3096168357508175, "grad_norm": 0.4425497099488413, "learning_rate": 2e-05, "loss": 5.742, "step": 9232 }, { "epoch": 0.3096503731030435, "grad_norm": 0.4191653442635105, "learning_rate": 2e-05, "loss": 5.415, "step": 9233 }, { "epoch": 0.30968391045526955, "grad_norm": 0.45659031491357777, "learning_rate": 2e-05, "loss": 5.5028, "step": 9234 }, { "epoch": 0.3097174478074956, "grad_norm": 0.399094751626352, "learning_rate": 2e-05, "loss": 5.7025, "step": 9235 }, { "epoch": 0.30975098515972166, "grad_norm": 0.4141620521335062, "learning_rate": 2e-05, "loss": 5.5152, "step": 9236 }, { "epoch": 0.30978452251194766, "grad_norm": 0.40361229909499363, "learning_rate": 2e-05, "loss": 5.6038, "step": 9237 }, { "epoch": 0.3098180598641737, "grad_norm": 0.39338392853994497, "learning_rate": 2e-05, "loss": 5.6523, "step": 9238 }, { "epoch": 0.30985159721639977, "grad_norm": 0.39380048456533484, "learning_rate": 2e-05, "loss": 5.5583, "step": 9239 }, { "epoch": 0.3098851345686258, "grad_norm": 0.4276551991507644, "learning_rate": 2e-05, "loss": 5.4641, "step": 9240 }, { "epoch": 0.3099186719208519, "grad_norm": 0.4044316703015719, "learning_rate": 2e-05, "loss": 5.577, "step": 9241 }, { "epoch": 0.30995220927307787, "grad_norm": 0.4203819106290734, "learning_rate": 2e-05, "loss": 5.4105, "step": 9242 }, { "epoch": 0.3099857466253039, "grad_norm": 0.43314318682381403, "learning_rate": 2e-05, "loss": 5.6757, "step": 9243 }, { "epoch": 0.31001928397753, "grad_norm": 0.40376850475022447, "learning_rate": 2e-05, "loss": 5.4278, "step": 9244 }, { "epoch": 0.31005282132975603, "grad_norm": 0.3967938295882895, "learning_rate": 2e-05, "loss": 5.6377, "step": 9245 }, { "epoch": 0.31008635868198203, "grad_norm": 0.41008723036555866, "learning_rate": 2e-05, "loss": 5.8048, "step": 9246 }, { "epoch": 0.3101198960342081, "grad_norm": 0.40119488609341514, "learning_rate": 2e-05, "loss": 5.5922, "step": 9247 }, { "epoch": 0.31015343338643414, "grad_norm": 0.402321153077001, "learning_rate": 2e-05, "loss": 5.5231, "step": 9248 }, { "epoch": 0.3101869707386602, "grad_norm": 0.39780028484872193, "learning_rate": 2e-05, "loss": 5.1886, "step": 9249 }, { "epoch": 0.31022050809088625, "grad_norm": 0.39662812911991113, "learning_rate": 2e-05, "loss": 5.4941, "step": 9250 }, { "epoch": 0.31025404544311225, "grad_norm": 0.407304642482583, "learning_rate": 2e-05, "loss": 5.662, "step": 9251 }, { "epoch": 0.3102875827953383, "grad_norm": 0.4434745000172737, "learning_rate": 2e-05, "loss": 5.6043, "step": 9252 }, { "epoch": 0.31032112014756436, "grad_norm": 0.41564389587218553, "learning_rate": 2e-05, "loss": 5.441, "step": 9253 }, { "epoch": 0.3103546574997904, "grad_norm": 0.4066048821272186, "learning_rate": 2e-05, "loss": 5.6164, "step": 9254 }, { "epoch": 0.3103881948520164, "grad_norm": 0.391529933242678, "learning_rate": 2e-05, "loss": 5.567, "step": 9255 }, { "epoch": 0.31042173220424246, "grad_norm": 0.41339340535747643, "learning_rate": 2e-05, "loss": 5.4421, "step": 9256 }, { "epoch": 0.3104552695564685, "grad_norm": 0.3902467785948317, "learning_rate": 2e-05, "loss": 5.4595, "step": 9257 }, { "epoch": 0.31048880690869457, "grad_norm": 0.39510843451355937, "learning_rate": 2e-05, "loss": 5.526, "step": 9258 }, { "epoch": 0.3105223442609206, "grad_norm": 0.41514020044815997, "learning_rate": 2e-05, "loss": 5.5419, "step": 9259 }, { "epoch": 0.3105558816131466, "grad_norm": 0.40648720617681255, "learning_rate": 2e-05, "loss": 5.5783, "step": 9260 }, { "epoch": 0.3105894189653727, "grad_norm": 0.3804634640870772, "learning_rate": 2e-05, "loss": 5.5586, "step": 9261 }, { "epoch": 0.31062295631759873, "grad_norm": 0.4211925931709068, "learning_rate": 2e-05, "loss": 5.6811, "step": 9262 }, { "epoch": 0.3106564936698248, "grad_norm": 0.42396489986444263, "learning_rate": 2e-05, "loss": 5.6696, "step": 9263 }, { "epoch": 0.3106900310220508, "grad_norm": 0.42359775635570446, "learning_rate": 2e-05, "loss": 5.7189, "step": 9264 }, { "epoch": 0.31072356837427684, "grad_norm": 0.4429350929582602, "learning_rate": 2e-05, "loss": 5.5981, "step": 9265 }, { "epoch": 0.3107571057265029, "grad_norm": 0.4248790614557681, "learning_rate": 2e-05, "loss": 5.5025, "step": 9266 }, { "epoch": 0.31079064307872895, "grad_norm": 0.43836782976659944, "learning_rate": 2e-05, "loss": 5.5077, "step": 9267 }, { "epoch": 0.310824180430955, "grad_norm": 0.41697181887104323, "learning_rate": 2e-05, "loss": 5.6264, "step": 9268 }, { "epoch": 0.310857717783181, "grad_norm": 0.425583206804602, "learning_rate": 2e-05, "loss": 5.6319, "step": 9269 }, { "epoch": 0.31089125513540705, "grad_norm": 0.5062552569693521, "learning_rate": 2e-05, "loss": 5.4876, "step": 9270 }, { "epoch": 0.3109247924876331, "grad_norm": 0.4412403336062824, "learning_rate": 2e-05, "loss": 5.6769, "step": 9271 }, { "epoch": 0.31095832983985916, "grad_norm": 0.41455704848157987, "learning_rate": 2e-05, "loss": 5.5216, "step": 9272 }, { "epoch": 0.31099186719208516, "grad_norm": 0.3898743446478664, "learning_rate": 2e-05, "loss": 5.5526, "step": 9273 }, { "epoch": 0.3110254045443112, "grad_norm": 0.455353772400488, "learning_rate": 2e-05, "loss": 5.4391, "step": 9274 }, { "epoch": 0.31105894189653727, "grad_norm": 0.4065648497710358, "learning_rate": 2e-05, "loss": 5.4873, "step": 9275 }, { "epoch": 0.3110924792487633, "grad_norm": 0.44422154620711785, "learning_rate": 2e-05, "loss": 5.4879, "step": 9276 }, { "epoch": 0.3111260166009894, "grad_norm": 0.41470947110183654, "learning_rate": 2e-05, "loss": 5.6934, "step": 9277 }, { "epoch": 0.3111595539532154, "grad_norm": 0.4179445528814471, "learning_rate": 2e-05, "loss": 5.5497, "step": 9278 }, { "epoch": 0.3111930913054414, "grad_norm": 0.39957359334048603, "learning_rate": 2e-05, "loss": 5.5998, "step": 9279 }, { "epoch": 0.3112266286576675, "grad_norm": 0.4462823744391283, "learning_rate": 2e-05, "loss": 5.4388, "step": 9280 }, { "epoch": 0.31126016600989354, "grad_norm": 0.4267140984154471, "learning_rate": 2e-05, "loss": 5.6183, "step": 9281 }, { "epoch": 0.31129370336211953, "grad_norm": 0.4165871521221859, "learning_rate": 2e-05, "loss": 5.3927, "step": 9282 }, { "epoch": 0.3113272407143456, "grad_norm": 0.3825618565151189, "learning_rate": 2e-05, "loss": 5.7321, "step": 9283 }, { "epoch": 0.31136077806657164, "grad_norm": 0.3877785233331228, "learning_rate": 2e-05, "loss": 5.5004, "step": 9284 }, { "epoch": 0.3113943154187977, "grad_norm": 0.3956579341752307, "learning_rate": 2e-05, "loss": 5.5558, "step": 9285 }, { "epoch": 0.31142785277102375, "grad_norm": 0.3791934065956968, "learning_rate": 2e-05, "loss": 5.5754, "step": 9286 }, { "epoch": 0.31146139012324975, "grad_norm": 0.3999520506737601, "learning_rate": 2e-05, "loss": 5.5975, "step": 9287 }, { "epoch": 0.3114949274754758, "grad_norm": 0.40206975170907894, "learning_rate": 2e-05, "loss": 5.7292, "step": 9288 }, { "epoch": 0.31152846482770186, "grad_norm": 0.4398271930673597, "learning_rate": 2e-05, "loss": 5.74, "step": 9289 }, { "epoch": 0.3115620021799279, "grad_norm": 0.3793790289709166, "learning_rate": 2e-05, "loss": 5.5596, "step": 9290 }, { "epoch": 0.3115955395321539, "grad_norm": 0.446695877111042, "learning_rate": 2e-05, "loss": 5.5492, "step": 9291 }, { "epoch": 0.31162907688437996, "grad_norm": 0.4186603315605003, "learning_rate": 2e-05, "loss": 5.6792, "step": 9292 }, { "epoch": 0.311662614236606, "grad_norm": 0.4327337744082157, "learning_rate": 2e-05, "loss": 5.5572, "step": 9293 }, { "epoch": 0.31169615158883207, "grad_norm": 0.42630127845998417, "learning_rate": 2e-05, "loss": 5.6138, "step": 9294 }, { "epoch": 0.3117296889410581, "grad_norm": 0.40383821947402704, "learning_rate": 2e-05, "loss": 5.5154, "step": 9295 }, { "epoch": 0.3117632262932841, "grad_norm": 0.39925868579366935, "learning_rate": 2e-05, "loss": 5.581, "step": 9296 }, { "epoch": 0.3117967636455102, "grad_norm": 0.4997751247544776, "learning_rate": 2e-05, "loss": 5.585, "step": 9297 }, { "epoch": 0.31183030099773623, "grad_norm": 0.42321891487776836, "learning_rate": 2e-05, "loss": 5.2673, "step": 9298 }, { "epoch": 0.3118638383499623, "grad_norm": 0.40621597284284733, "learning_rate": 2e-05, "loss": 5.4884, "step": 9299 }, { "epoch": 0.31189737570218834, "grad_norm": 0.4123267823257959, "learning_rate": 2e-05, "loss": 5.5324, "step": 9300 }, { "epoch": 0.31193091305441434, "grad_norm": 0.45002634451737056, "learning_rate": 2e-05, "loss": 5.6569, "step": 9301 }, { "epoch": 0.3119644504066404, "grad_norm": 0.41207909922377517, "learning_rate": 2e-05, "loss": 5.3607, "step": 9302 }, { "epoch": 0.31199798775886645, "grad_norm": 0.40687325897399873, "learning_rate": 2e-05, "loss": 5.5833, "step": 9303 }, { "epoch": 0.3120315251110925, "grad_norm": 0.4366201475346321, "learning_rate": 2e-05, "loss": 5.4377, "step": 9304 }, { "epoch": 0.3120650624633185, "grad_norm": 0.39146438289724306, "learning_rate": 2e-05, "loss": 5.3739, "step": 9305 }, { "epoch": 0.31209859981554455, "grad_norm": 0.3882572381409826, "learning_rate": 2e-05, "loss": 5.5275, "step": 9306 }, { "epoch": 0.3121321371677706, "grad_norm": 0.382850292786803, "learning_rate": 2e-05, "loss": 5.5923, "step": 9307 }, { "epoch": 0.31216567451999666, "grad_norm": 0.40584849281880264, "learning_rate": 2e-05, "loss": 5.5037, "step": 9308 }, { "epoch": 0.3121992118722227, "grad_norm": 0.42850846502563184, "learning_rate": 2e-05, "loss": 5.4378, "step": 9309 }, { "epoch": 0.3122327492244487, "grad_norm": 0.3984506426821538, "learning_rate": 2e-05, "loss": 5.319, "step": 9310 }, { "epoch": 0.31226628657667477, "grad_norm": 0.3864446227976368, "learning_rate": 2e-05, "loss": 5.571, "step": 9311 }, { "epoch": 0.3122998239289008, "grad_norm": 0.3840564212084651, "learning_rate": 2e-05, "loss": 5.5735, "step": 9312 }, { "epoch": 0.3123333612811269, "grad_norm": 0.3999119248317481, "learning_rate": 2e-05, "loss": 5.5278, "step": 9313 }, { "epoch": 0.3123668986333529, "grad_norm": 0.4007476064146955, "learning_rate": 2e-05, "loss": 5.5923, "step": 9314 }, { "epoch": 0.31240043598557893, "grad_norm": 0.4437087870264575, "learning_rate": 2e-05, "loss": 5.4217, "step": 9315 }, { "epoch": 0.312433973337805, "grad_norm": 0.40020692693951004, "learning_rate": 2e-05, "loss": 5.7939, "step": 9316 }, { "epoch": 0.31246751069003104, "grad_norm": 0.40070879831171746, "learning_rate": 2e-05, "loss": 5.6939, "step": 9317 }, { "epoch": 0.3125010480422571, "grad_norm": 0.41670842966789046, "learning_rate": 2e-05, "loss": 5.4812, "step": 9318 }, { "epoch": 0.3125345853944831, "grad_norm": 0.4073622940731657, "learning_rate": 2e-05, "loss": 5.7954, "step": 9319 }, { "epoch": 0.31256812274670914, "grad_norm": 0.3969395910076735, "learning_rate": 2e-05, "loss": 5.5439, "step": 9320 }, { "epoch": 0.3126016600989352, "grad_norm": 0.41665409764706657, "learning_rate": 2e-05, "loss": 5.5653, "step": 9321 }, { "epoch": 0.31263519745116125, "grad_norm": 0.4102472194405555, "learning_rate": 2e-05, "loss": 5.5518, "step": 9322 }, { "epoch": 0.31266873480338725, "grad_norm": 0.3996354763481077, "learning_rate": 2e-05, "loss": 5.4142, "step": 9323 }, { "epoch": 0.3127022721556133, "grad_norm": 0.44609795841725275, "learning_rate": 2e-05, "loss": 5.4848, "step": 9324 }, { "epoch": 0.31273580950783936, "grad_norm": 0.4448870143584696, "learning_rate": 2e-05, "loss": 5.5888, "step": 9325 }, { "epoch": 0.3127693468600654, "grad_norm": 0.39081576397561596, "learning_rate": 2e-05, "loss": 5.5002, "step": 9326 }, { "epoch": 0.31280288421229147, "grad_norm": 0.41611306168723017, "learning_rate": 2e-05, "loss": 5.4868, "step": 9327 }, { "epoch": 0.31283642156451746, "grad_norm": 0.39692915953313485, "learning_rate": 2e-05, "loss": 5.5551, "step": 9328 }, { "epoch": 0.3128699589167435, "grad_norm": 0.4338964175291676, "learning_rate": 2e-05, "loss": 5.3346, "step": 9329 }, { "epoch": 0.31290349626896957, "grad_norm": 0.40841732952516663, "learning_rate": 2e-05, "loss": 5.4782, "step": 9330 }, { "epoch": 0.3129370336211956, "grad_norm": 0.40913192637432905, "learning_rate": 2e-05, "loss": 5.6217, "step": 9331 }, { "epoch": 0.3129705709734216, "grad_norm": 0.42061444026973055, "learning_rate": 2e-05, "loss": 5.784, "step": 9332 }, { "epoch": 0.3130041083256477, "grad_norm": 0.40900688032667915, "learning_rate": 2e-05, "loss": 5.5042, "step": 9333 }, { "epoch": 0.31303764567787373, "grad_norm": 0.4204034211037899, "learning_rate": 2e-05, "loss": 5.7681, "step": 9334 }, { "epoch": 0.3130711830300998, "grad_norm": 0.4504818022377473, "learning_rate": 2e-05, "loss": 5.4934, "step": 9335 }, { "epoch": 0.31310472038232584, "grad_norm": 0.421000186496759, "learning_rate": 2e-05, "loss": 5.6628, "step": 9336 }, { "epoch": 0.31313825773455184, "grad_norm": 0.4073137207523434, "learning_rate": 2e-05, "loss": 5.3837, "step": 9337 }, { "epoch": 0.3131717950867779, "grad_norm": 0.4268794721734825, "learning_rate": 2e-05, "loss": 5.4848, "step": 9338 }, { "epoch": 0.31320533243900395, "grad_norm": 0.427351304234192, "learning_rate": 2e-05, "loss": 5.4219, "step": 9339 }, { "epoch": 0.31323886979123, "grad_norm": 0.425247650760435, "learning_rate": 2e-05, "loss": 5.6008, "step": 9340 }, { "epoch": 0.313272407143456, "grad_norm": 0.42471728659156655, "learning_rate": 2e-05, "loss": 5.5652, "step": 9341 }, { "epoch": 0.31330594449568205, "grad_norm": 0.4072657489345007, "learning_rate": 2e-05, "loss": 5.7397, "step": 9342 }, { "epoch": 0.3133394818479081, "grad_norm": 0.3882064405110786, "learning_rate": 2e-05, "loss": 5.7408, "step": 9343 }, { "epoch": 0.31337301920013416, "grad_norm": 0.44036650144639206, "learning_rate": 2e-05, "loss": 5.6507, "step": 9344 }, { "epoch": 0.3134065565523602, "grad_norm": 0.395056805712951, "learning_rate": 2e-05, "loss": 5.5522, "step": 9345 }, { "epoch": 0.3134400939045862, "grad_norm": 0.4109299577547163, "learning_rate": 2e-05, "loss": 5.5707, "step": 9346 }, { "epoch": 0.31347363125681227, "grad_norm": 0.417150456181431, "learning_rate": 2e-05, "loss": 5.4431, "step": 9347 }, { "epoch": 0.3135071686090383, "grad_norm": 0.3903958722233507, "learning_rate": 2e-05, "loss": 5.7889, "step": 9348 }, { "epoch": 0.3135407059612644, "grad_norm": 0.420608900061243, "learning_rate": 2e-05, "loss": 5.7954, "step": 9349 }, { "epoch": 0.3135742433134904, "grad_norm": 0.40681646647976105, "learning_rate": 2e-05, "loss": 5.7281, "step": 9350 }, { "epoch": 0.31360778066571643, "grad_norm": 0.4276818677692382, "learning_rate": 2e-05, "loss": 5.5776, "step": 9351 }, { "epoch": 0.3136413180179425, "grad_norm": 0.4291474368420305, "learning_rate": 2e-05, "loss": 5.6402, "step": 9352 }, { "epoch": 0.31367485537016854, "grad_norm": 0.39618305974712487, "learning_rate": 2e-05, "loss": 5.6916, "step": 9353 }, { "epoch": 0.3137083927223946, "grad_norm": 0.41172868356174736, "learning_rate": 2e-05, "loss": 5.4844, "step": 9354 }, { "epoch": 0.3137419300746206, "grad_norm": 0.39175300952227265, "learning_rate": 2e-05, "loss": 5.6559, "step": 9355 }, { "epoch": 0.31377546742684664, "grad_norm": 0.39237604267705234, "learning_rate": 2e-05, "loss": 5.532, "step": 9356 }, { "epoch": 0.3138090047790727, "grad_norm": 0.3894927604531899, "learning_rate": 2e-05, "loss": 5.7388, "step": 9357 }, { "epoch": 0.31384254213129875, "grad_norm": 0.4243775102698843, "learning_rate": 2e-05, "loss": 5.5837, "step": 9358 }, { "epoch": 0.31387607948352475, "grad_norm": 0.40254115070259616, "learning_rate": 2e-05, "loss": 5.4961, "step": 9359 }, { "epoch": 0.3139096168357508, "grad_norm": 0.43390520955729917, "learning_rate": 2e-05, "loss": 5.5045, "step": 9360 }, { "epoch": 0.31394315418797686, "grad_norm": 0.42174491270610365, "learning_rate": 2e-05, "loss": 5.4179, "step": 9361 }, { "epoch": 0.3139766915402029, "grad_norm": 0.39283915268253594, "learning_rate": 2e-05, "loss": 5.5788, "step": 9362 }, { "epoch": 0.31401022889242897, "grad_norm": 0.40080825465797587, "learning_rate": 2e-05, "loss": 5.5813, "step": 9363 }, { "epoch": 0.31404376624465496, "grad_norm": 0.40081980666061245, "learning_rate": 2e-05, "loss": 5.5483, "step": 9364 }, { "epoch": 0.314077303596881, "grad_norm": 0.3935816366808478, "learning_rate": 2e-05, "loss": 5.6935, "step": 9365 }, { "epoch": 0.3141108409491071, "grad_norm": 0.4023104788225877, "learning_rate": 2e-05, "loss": 5.6402, "step": 9366 }, { "epoch": 0.3141443783013331, "grad_norm": 0.38430988590110843, "learning_rate": 2e-05, "loss": 5.3805, "step": 9367 }, { "epoch": 0.3141779156535591, "grad_norm": 0.3992841437939976, "learning_rate": 2e-05, "loss": 5.5567, "step": 9368 }, { "epoch": 0.3142114530057852, "grad_norm": 0.4003975302167479, "learning_rate": 2e-05, "loss": 5.6895, "step": 9369 }, { "epoch": 0.31424499035801123, "grad_norm": 0.4180502680693425, "learning_rate": 2e-05, "loss": 5.5734, "step": 9370 }, { "epoch": 0.3142785277102373, "grad_norm": 0.43143965636539805, "learning_rate": 2e-05, "loss": 5.4176, "step": 9371 }, { "epoch": 0.31431206506246334, "grad_norm": 0.4031970847022838, "learning_rate": 2e-05, "loss": 5.4405, "step": 9372 }, { "epoch": 0.31434560241468934, "grad_norm": 0.38065029609510564, "learning_rate": 2e-05, "loss": 5.553, "step": 9373 }, { "epoch": 0.3143791397669154, "grad_norm": 0.3919759900430265, "learning_rate": 2e-05, "loss": 5.7587, "step": 9374 }, { "epoch": 0.31441267711914145, "grad_norm": 0.3942647154653057, "learning_rate": 2e-05, "loss": 5.5458, "step": 9375 }, { "epoch": 0.3144462144713675, "grad_norm": 0.412030078463623, "learning_rate": 2e-05, "loss": 5.2911, "step": 9376 }, { "epoch": 0.3144797518235935, "grad_norm": 0.39917917520333246, "learning_rate": 2e-05, "loss": 5.5926, "step": 9377 }, { "epoch": 0.31451328917581955, "grad_norm": 0.39328861719000874, "learning_rate": 2e-05, "loss": 5.4824, "step": 9378 }, { "epoch": 0.3145468265280456, "grad_norm": 0.3828106072315135, "learning_rate": 2e-05, "loss": 5.5972, "step": 9379 }, { "epoch": 0.31458036388027166, "grad_norm": 0.3787898453157349, "learning_rate": 2e-05, "loss": 5.4926, "step": 9380 }, { "epoch": 0.3146139012324977, "grad_norm": 0.40119422788513265, "learning_rate": 2e-05, "loss": 5.4846, "step": 9381 }, { "epoch": 0.3146474385847237, "grad_norm": 0.39811204426412555, "learning_rate": 2e-05, "loss": 5.6786, "step": 9382 }, { "epoch": 0.31468097593694977, "grad_norm": 0.36903173143163615, "learning_rate": 2e-05, "loss": 5.5286, "step": 9383 }, { "epoch": 0.3147145132891758, "grad_norm": 0.4084278727015385, "learning_rate": 2e-05, "loss": 5.4287, "step": 9384 }, { "epoch": 0.3147480506414019, "grad_norm": 0.39881872695793824, "learning_rate": 2e-05, "loss": 5.518, "step": 9385 }, { "epoch": 0.3147815879936279, "grad_norm": 0.43853755104136577, "learning_rate": 2e-05, "loss": 5.7489, "step": 9386 }, { "epoch": 0.31481512534585393, "grad_norm": 0.4113811283972634, "learning_rate": 2e-05, "loss": 5.5767, "step": 9387 }, { "epoch": 0.31484866269808, "grad_norm": 0.3841755976216962, "learning_rate": 2e-05, "loss": 5.462, "step": 9388 }, { "epoch": 0.31488220005030604, "grad_norm": 0.41891434275212064, "learning_rate": 2e-05, "loss": 5.46, "step": 9389 }, { "epoch": 0.3149157374025321, "grad_norm": 0.45504156162882176, "learning_rate": 2e-05, "loss": 5.5115, "step": 9390 }, { "epoch": 0.3149492747547581, "grad_norm": 0.3951398916439876, "learning_rate": 2e-05, "loss": 5.5553, "step": 9391 }, { "epoch": 0.31498281210698414, "grad_norm": 0.4053895966597531, "learning_rate": 2e-05, "loss": 5.4625, "step": 9392 }, { "epoch": 0.3150163494592102, "grad_norm": 0.428218792663051, "learning_rate": 2e-05, "loss": 5.5002, "step": 9393 }, { "epoch": 0.31504988681143625, "grad_norm": 0.3951689960985019, "learning_rate": 2e-05, "loss": 5.6551, "step": 9394 }, { "epoch": 0.31508342416366225, "grad_norm": 0.39523523358747586, "learning_rate": 2e-05, "loss": 5.5956, "step": 9395 }, { "epoch": 0.3151169615158883, "grad_norm": 0.4406348829135891, "learning_rate": 2e-05, "loss": 5.4531, "step": 9396 }, { "epoch": 0.31515049886811436, "grad_norm": 0.38995740872022283, "learning_rate": 2e-05, "loss": 5.7848, "step": 9397 }, { "epoch": 0.3151840362203404, "grad_norm": 0.4445639337938921, "learning_rate": 2e-05, "loss": 5.3473, "step": 9398 }, { "epoch": 0.31521757357256647, "grad_norm": 0.41528321043304384, "learning_rate": 2e-05, "loss": 5.6849, "step": 9399 }, { "epoch": 0.31525111092479247, "grad_norm": 0.4486135874761416, "learning_rate": 2e-05, "loss": 5.5346, "step": 9400 }, { "epoch": 0.3152846482770185, "grad_norm": 0.38890191006186253, "learning_rate": 2e-05, "loss": 5.5221, "step": 9401 }, { "epoch": 0.3153181856292446, "grad_norm": 0.47249305110267636, "learning_rate": 2e-05, "loss": 5.4899, "step": 9402 }, { "epoch": 0.31535172298147063, "grad_norm": 0.4795291984572842, "learning_rate": 2e-05, "loss": 5.6349, "step": 9403 }, { "epoch": 0.3153852603336967, "grad_norm": 0.4092617233738659, "learning_rate": 2e-05, "loss": 5.4116, "step": 9404 }, { "epoch": 0.3154187976859227, "grad_norm": 0.4117489171073055, "learning_rate": 2e-05, "loss": 5.5211, "step": 9405 }, { "epoch": 0.31545233503814873, "grad_norm": 0.4197930659567572, "learning_rate": 2e-05, "loss": 5.5919, "step": 9406 }, { "epoch": 0.3154858723903748, "grad_norm": 0.4228517962576205, "learning_rate": 2e-05, "loss": 5.5585, "step": 9407 }, { "epoch": 0.31551940974260084, "grad_norm": 0.4173968078256289, "learning_rate": 2e-05, "loss": 5.3951, "step": 9408 }, { "epoch": 0.31555294709482684, "grad_norm": 0.4106838577588428, "learning_rate": 2e-05, "loss": 5.7772, "step": 9409 }, { "epoch": 0.3155864844470529, "grad_norm": 0.41241813027912794, "learning_rate": 2e-05, "loss": 5.4835, "step": 9410 }, { "epoch": 0.31562002179927895, "grad_norm": 0.40501016570089937, "learning_rate": 2e-05, "loss": 5.6112, "step": 9411 }, { "epoch": 0.315653559151505, "grad_norm": 0.38066782095925883, "learning_rate": 2e-05, "loss": 5.5009, "step": 9412 }, { "epoch": 0.31568709650373106, "grad_norm": 0.40219795774194417, "learning_rate": 2e-05, "loss": 5.3452, "step": 9413 }, { "epoch": 0.31572063385595706, "grad_norm": 0.45514697886426336, "learning_rate": 2e-05, "loss": 5.5049, "step": 9414 }, { "epoch": 0.3157541712081831, "grad_norm": 0.4085324648306774, "learning_rate": 2e-05, "loss": 5.6518, "step": 9415 }, { "epoch": 0.31578770856040916, "grad_norm": 0.4204239399812212, "learning_rate": 2e-05, "loss": 5.4683, "step": 9416 }, { "epoch": 0.3158212459126352, "grad_norm": 0.3937510512285471, "learning_rate": 2e-05, "loss": 5.6459, "step": 9417 }, { "epoch": 0.3158547832648612, "grad_norm": 0.408619230408321, "learning_rate": 2e-05, "loss": 5.4015, "step": 9418 }, { "epoch": 0.31588832061708727, "grad_norm": 0.4140348408460743, "learning_rate": 2e-05, "loss": 5.4222, "step": 9419 }, { "epoch": 0.3159218579693133, "grad_norm": 0.37885418880761546, "learning_rate": 2e-05, "loss": 5.5286, "step": 9420 }, { "epoch": 0.3159553953215394, "grad_norm": 0.43830142781666614, "learning_rate": 2e-05, "loss": 5.5964, "step": 9421 }, { "epoch": 0.31598893267376543, "grad_norm": 0.3882448662966704, "learning_rate": 2e-05, "loss": 5.5657, "step": 9422 }, { "epoch": 0.31602247002599143, "grad_norm": 0.38686908501296047, "learning_rate": 2e-05, "loss": 5.4444, "step": 9423 }, { "epoch": 0.3160560073782175, "grad_norm": 0.3790422531148773, "learning_rate": 2e-05, "loss": 5.4187, "step": 9424 }, { "epoch": 0.31608954473044354, "grad_norm": 0.39643268171975415, "learning_rate": 2e-05, "loss": 5.547, "step": 9425 }, { "epoch": 0.3161230820826696, "grad_norm": 0.4122257566455125, "learning_rate": 2e-05, "loss": 5.3303, "step": 9426 }, { "epoch": 0.3161566194348956, "grad_norm": 0.400173967146561, "learning_rate": 2e-05, "loss": 5.6526, "step": 9427 }, { "epoch": 0.31619015678712165, "grad_norm": 0.3948341469228234, "learning_rate": 2e-05, "loss": 5.4597, "step": 9428 }, { "epoch": 0.3162236941393477, "grad_norm": 0.4082330715329297, "learning_rate": 2e-05, "loss": 5.6527, "step": 9429 }, { "epoch": 0.31625723149157375, "grad_norm": 0.39573917280562154, "learning_rate": 2e-05, "loss": 5.4616, "step": 9430 }, { "epoch": 0.3162907688437998, "grad_norm": 0.37975550894430155, "learning_rate": 2e-05, "loss": 5.4892, "step": 9431 }, { "epoch": 0.3163243061960258, "grad_norm": 0.42422895841305436, "learning_rate": 2e-05, "loss": 5.4344, "step": 9432 }, { "epoch": 0.31635784354825186, "grad_norm": 0.41448496129535445, "learning_rate": 2e-05, "loss": 5.5799, "step": 9433 }, { "epoch": 0.3163913809004779, "grad_norm": 0.392899863463747, "learning_rate": 2e-05, "loss": 5.5944, "step": 9434 }, { "epoch": 0.31642491825270397, "grad_norm": 0.4001490947400554, "learning_rate": 2e-05, "loss": 5.4953, "step": 9435 }, { "epoch": 0.31645845560492997, "grad_norm": 0.4008848357840441, "learning_rate": 2e-05, "loss": 5.4066, "step": 9436 }, { "epoch": 0.316491992957156, "grad_norm": 0.4157113213822178, "learning_rate": 2e-05, "loss": 5.5795, "step": 9437 }, { "epoch": 0.3165255303093821, "grad_norm": 0.3949723131896945, "learning_rate": 2e-05, "loss": 5.6282, "step": 9438 }, { "epoch": 0.31655906766160813, "grad_norm": 0.3852908482393858, "learning_rate": 2e-05, "loss": 5.8365, "step": 9439 }, { "epoch": 0.3165926050138342, "grad_norm": 0.38675912108086163, "learning_rate": 2e-05, "loss": 5.6296, "step": 9440 }, { "epoch": 0.3166261423660602, "grad_norm": 0.401562650175933, "learning_rate": 2e-05, "loss": 5.5889, "step": 9441 }, { "epoch": 0.31665967971828624, "grad_norm": 0.4310368664578149, "learning_rate": 2e-05, "loss": 5.5881, "step": 9442 }, { "epoch": 0.3166932170705123, "grad_norm": 0.3749123531673481, "learning_rate": 2e-05, "loss": 5.5514, "step": 9443 }, { "epoch": 0.31672675442273834, "grad_norm": 0.3780659419627106, "learning_rate": 2e-05, "loss": 5.3058, "step": 9444 }, { "epoch": 0.31676029177496434, "grad_norm": 0.389182506044128, "learning_rate": 2e-05, "loss": 5.4364, "step": 9445 }, { "epoch": 0.3167938291271904, "grad_norm": 0.37197286637792, "learning_rate": 2e-05, "loss": 5.6122, "step": 9446 }, { "epoch": 0.31682736647941645, "grad_norm": 0.38790406588145504, "learning_rate": 2e-05, "loss": 5.7271, "step": 9447 }, { "epoch": 0.3168609038316425, "grad_norm": 0.42436043713517646, "learning_rate": 2e-05, "loss": 5.5826, "step": 9448 }, { "epoch": 0.31689444118386856, "grad_norm": 0.4194227095134646, "learning_rate": 2e-05, "loss": 5.6591, "step": 9449 }, { "epoch": 0.31692797853609456, "grad_norm": 0.4149644201642546, "learning_rate": 2e-05, "loss": 5.4846, "step": 9450 }, { "epoch": 0.3169615158883206, "grad_norm": 0.3989537161098853, "learning_rate": 2e-05, "loss": 5.6874, "step": 9451 }, { "epoch": 0.31699505324054666, "grad_norm": 0.4077696208490073, "learning_rate": 2e-05, "loss": 5.4297, "step": 9452 }, { "epoch": 0.3170285905927727, "grad_norm": 0.3798736518155071, "learning_rate": 2e-05, "loss": 5.7639, "step": 9453 }, { "epoch": 0.3170621279449987, "grad_norm": 0.39575085067489446, "learning_rate": 2e-05, "loss": 5.349, "step": 9454 }, { "epoch": 0.31709566529722477, "grad_norm": 0.4171091989913468, "learning_rate": 2e-05, "loss": 5.5901, "step": 9455 }, { "epoch": 0.3171292026494508, "grad_norm": 0.4110720996428187, "learning_rate": 2e-05, "loss": 5.5461, "step": 9456 }, { "epoch": 0.3171627400016769, "grad_norm": 0.4011728621055987, "learning_rate": 2e-05, "loss": 5.56, "step": 9457 }, { "epoch": 0.31719627735390293, "grad_norm": 0.39579515803253096, "learning_rate": 2e-05, "loss": 5.542, "step": 9458 }, { "epoch": 0.31722981470612893, "grad_norm": 0.3927448537676442, "learning_rate": 2e-05, "loss": 5.4461, "step": 9459 }, { "epoch": 0.317263352058355, "grad_norm": 0.4164905481321128, "learning_rate": 2e-05, "loss": 5.6006, "step": 9460 }, { "epoch": 0.31729688941058104, "grad_norm": 0.3898557443304704, "learning_rate": 2e-05, "loss": 5.8051, "step": 9461 }, { "epoch": 0.3173304267628071, "grad_norm": 0.4053178583270287, "learning_rate": 2e-05, "loss": 5.575, "step": 9462 }, { "epoch": 0.3173639641150331, "grad_norm": 0.40710025289106094, "learning_rate": 2e-05, "loss": 5.7506, "step": 9463 }, { "epoch": 0.31739750146725915, "grad_norm": 0.3991222666701993, "learning_rate": 2e-05, "loss": 5.6813, "step": 9464 }, { "epoch": 0.3174310388194852, "grad_norm": 0.4085435740381886, "learning_rate": 2e-05, "loss": 5.2757, "step": 9465 }, { "epoch": 0.31746457617171125, "grad_norm": 0.38106051493900284, "learning_rate": 2e-05, "loss": 5.5877, "step": 9466 }, { "epoch": 0.3174981135239373, "grad_norm": 0.4061808961075653, "learning_rate": 2e-05, "loss": 5.7194, "step": 9467 }, { "epoch": 0.3175316508761633, "grad_norm": 0.3981785038163821, "learning_rate": 2e-05, "loss": 5.4112, "step": 9468 }, { "epoch": 0.31756518822838936, "grad_norm": 0.3963137929444252, "learning_rate": 2e-05, "loss": 5.5334, "step": 9469 }, { "epoch": 0.3175987255806154, "grad_norm": 0.3876776554309992, "learning_rate": 2e-05, "loss": 5.3566, "step": 9470 }, { "epoch": 0.31763226293284147, "grad_norm": 0.378272788475613, "learning_rate": 2e-05, "loss": 5.7286, "step": 9471 }, { "epoch": 0.31766580028506747, "grad_norm": 0.40474742251493806, "learning_rate": 2e-05, "loss": 5.7889, "step": 9472 }, { "epoch": 0.3176993376372935, "grad_norm": 0.4142547044320111, "learning_rate": 2e-05, "loss": 5.7682, "step": 9473 }, { "epoch": 0.3177328749895196, "grad_norm": 0.3888007517648564, "learning_rate": 2e-05, "loss": 5.3776, "step": 9474 }, { "epoch": 0.31776641234174563, "grad_norm": 0.38099848403078196, "learning_rate": 2e-05, "loss": 5.4826, "step": 9475 }, { "epoch": 0.3177999496939717, "grad_norm": 0.39560419705768235, "learning_rate": 2e-05, "loss": 5.4613, "step": 9476 }, { "epoch": 0.3178334870461977, "grad_norm": 0.41304387562042616, "learning_rate": 2e-05, "loss": 5.578, "step": 9477 }, { "epoch": 0.31786702439842374, "grad_norm": 0.3877515370786295, "learning_rate": 2e-05, "loss": 5.5881, "step": 9478 }, { "epoch": 0.3179005617506498, "grad_norm": 0.38975599031177083, "learning_rate": 2e-05, "loss": 5.6645, "step": 9479 }, { "epoch": 0.31793409910287584, "grad_norm": 0.37900827206639326, "learning_rate": 2e-05, "loss": 5.8168, "step": 9480 }, { "epoch": 0.31796763645510184, "grad_norm": 0.38718169251445295, "learning_rate": 2e-05, "loss": 5.479, "step": 9481 }, { "epoch": 0.3180011738073279, "grad_norm": 0.40601830699936003, "learning_rate": 2e-05, "loss": 5.5124, "step": 9482 }, { "epoch": 0.31803471115955395, "grad_norm": 0.3762652934349904, "learning_rate": 2e-05, "loss": 5.6412, "step": 9483 }, { "epoch": 0.31806824851178, "grad_norm": 0.3812400050992693, "learning_rate": 2e-05, "loss": 5.5558, "step": 9484 }, { "epoch": 0.31810178586400606, "grad_norm": 0.39480750789163616, "learning_rate": 2e-05, "loss": 5.6193, "step": 9485 }, { "epoch": 0.31813532321623206, "grad_norm": 0.4179878717698014, "learning_rate": 2e-05, "loss": 5.5854, "step": 9486 }, { "epoch": 0.3181688605684581, "grad_norm": 0.39777320891573803, "learning_rate": 2e-05, "loss": 5.5857, "step": 9487 }, { "epoch": 0.31820239792068417, "grad_norm": 0.4255174730059033, "learning_rate": 2e-05, "loss": 5.5189, "step": 9488 }, { "epoch": 0.3182359352729102, "grad_norm": 0.38891447064844475, "learning_rate": 2e-05, "loss": 5.6844, "step": 9489 }, { "epoch": 0.3182694726251362, "grad_norm": 0.37241445863265077, "learning_rate": 2e-05, "loss": 5.6521, "step": 9490 }, { "epoch": 0.3183030099773623, "grad_norm": 0.4080484334970389, "learning_rate": 2e-05, "loss": 5.6955, "step": 9491 }, { "epoch": 0.3183365473295883, "grad_norm": 0.39494302277910837, "learning_rate": 2e-05, "loss": 5.6931, "step": 9492 }, { "epoch": 0.3183700846818144, "grad_norm": 0.4157673484863632, "learning_rate": 2e-05, "loss": 5.4423, "step": 9493 }, { "epoch": 0.31840362203404043, "grad_norm": 0.3995032619974458, "learning_rate": 2e-05, "loss": 5.6451, "step": 9494 }, { "epoch": 0.31843715938626643, "grad_norm": 0.42605032291061323, "learning_rate": 2e-05, "loss": 5.5055, "step": 9495 }, { "epoch": 0.3184706967384925, "grad_norm": 0.4102161869614296, "learning_rate": 2e-05, "loss": 5.728, "step": 9496 }, { "epoch": 0.31850423409071854, "grad_norm": 0.46013280149154895, "learning_rate": 2e-05, "loss": 5.4279, "step": 9497 }, { "epoch": 0.3185377714429446, "grad_norm": 0.3986420234510726, "learning_rate": 2e-05, "loss": 5.6105, "step": 9498 }, { "epoch": 0.31857130879517065, "grad_norm": 0.4461021920873525, "learning_rate": 2e-05, "loss": 5.6254, "step": 9499 }, { "epoch": 0.31860484614739665, "grad_norm": 0.4083112451419342, "learning_rate": 2e-05, "loss": 5.6759, "step": 9500 }, { "epoch": 0.3186383834996227, "grad_norm": 0.42345916220349883, "learning_rate": 2e-05, "loss": 5.3568, "step": 9501 }, { "epoch": 0.31867192085184876, "grad_norm": 0.3874901762589321, "learning_rate": 2e-05, "loss": 5.6179, "step": 9502 }, { "epoch": 0.3187054582040748, "grad_norm": 0.4357048469698415, "learning_rate": 2e-05, "loss": 5.74, "step": 9503 }, { "epoch": 0.3187389955563008, "grad_norm": 0.4039910153186201, "learning_rate": 2e-05, "loss": 5.3945, "step": 9504 }, { "epoch": 0.31877253290852686, "grad_norm": 0.41036206323014446, "learning_rate": 2e-05, "loss": 5.6859, "step": 9505 }, { "epoch": 0.3188060702607529, "grad_norm": 0.43353479177857424, "learning_rate": 2e-05, "loss": 5.6622, "step": 9506 }, { "epoch": 0.31883960761297897, "grad_norm": 0.39842177864270106, "learning_rate": 2e-05, "loss": 5.7346, "step": 9507 }, { "epoch": 0.318873144965205, "grad_norm": 0.44588185398481317, "learning_rate": 2e-05, "loss": 5.4152, "step": 9508 }, { "epoch": 0.318906682317431, "grad_norm": 0.39480406856963574, "learning_rate": 2e-05, "loss": 5.363, "step": 9509 }, { "epoch": 0.3189402196696571, "grad_norm": 0.3897866538080771, "learning_rate": 2e-05, "loss": 5.5157, "step": 9510 }, { "epoch": 0.31897375702188313, "grad_norm": 0.4135963455188956, "learning_rate": 2e-05, "loss": 5.5356, "step": 9511 }, { "epoch": 0.3190072943741092, "grad_norm": 0.4034728025786498, "learning_rate": 2e-05, "loss": 5.5764, "step": 9512 }, { "epoch": 0.3190408317263352, "grad_norm": 0.39175319943080844, "learning_rate": 2e-05, "loss": 5.4624, "step": 9513 }, { "epoch": 0.31907436907856124, "grad_norm": 0.4494861846246086, "learning_rate": 2e-05, "loss": 5.5917, "step": 9514 }, { "epoch": 0.3191079064307873, "grad_norm": 0.4164710119915116, "learning_rate": 2e-05, "loss": 5.7198, "step": 9515 }, { "epoch": 0.31914144378301335, "grad_norm": 0.37391759896393717, "learning_rate": 2e-05, "loss": 5.5843, "step": 9516 }, { "epoch": 0.3191749811352394, "grad_norm": 0.4117793520466327, "learning_rate": 2e-05, "loss": 5.4319, "step": 9517 }, { "epoch": 0.3192085184874654, "grad_norm": 0.4133057517905579, "learning_rate": 2e-05, "loss": 5.574, "step": 9518 }, { "epoch": 0.31924205583969145, "grad_norm": 0.42608244755083563, "learning_rate": 2e-05, "loss": 5.5269, "step": 9519 }, { "epoch": 0.3192755931919175, "grad_norm": 0.440732608574647, "learning_rate": 2e-05, "loss": 5.4911, "step": 9520 }, { "epoch": 0.31930913054414356, "grad_norm": 0.38421801720202725, "learning_rate": 2e-05, "loss": 5.8614, "step": 9521 }, { "epoch": 0.31934266789636956, "grad_norm": 0.39873609278931377, "learning_rate": 2e-05, "loss": 5.4989, "step": 9522 }, { "epoch": 0.3193762052485956, "grad_norm": 0.3983642103796405, "learning_rate": 2e-05, "loss": 5.6636, "step": 9523 }, { "epoch": 0.31940974260082167, "grad_norm": 0.37947760980832423, "learning_rate": 2e-05, "loss": 5.4505, "step": 9524 }, { "epoch": 0.3194432799530477, "grad_norm": 0.39737086713920194, "learning_rate": 2e-05, "loss": 5.5993, "step": 9525 }, { "epoch": 0.3194768173052738, "grad_norm": 0.4276147793204861, "learning_rate": 2e-05, "loss": 5.557, "step": 9526 }, { "epoch": 0.3195103546574998, "grad_norm": 0.3964541441541294, "learning_rate": 2e-05, "loss": 5.7145, "step": 9527 }, { "epoch": 0.3195438920097258, "grad_norm": 0.3921753282988752, "learning_rate": 2e-05, "loss": 5.5229, "step": 9528 }, { "epoch": 0.3195774293619519, "grad_norm": 0.394583884756774, "learning_rate": 2e-05, "loss": 5.3365, "step": 9529 }, { "epoch": 0.31961096671417794, "grad_norm": 0.4303115401972538, "learning_rate": 2e-05, "loss": 5.5637, "step": 9530 }, { "epoch": 0.31964450406640393, "grad_norm": 0.37781188888704903, "learning_rate": 2e-05, "loss": 5.497, "step": 9531 }, { "epoch": 0.31967804141863, "grad_norm": 0.38834985886718393, "learning_rate": 2e-05, "loss": 5.6053, "step": 9532 }, { "epoch": 0.31971157877085604, "grad_norm": 0.39093475080149315, "learning_rate": 2e-05, "loss": 5.4615, "step": 9533 }, { "epoch": 0.3197451161230821, "grad_norm": 0.41780338651042637, "learning_rate": 2e-05, "loss": 5.5416, "step": 9534 }, { "epoch": 0.31977865347530815, "grad_norm": 0.4983172664789574, "learning_rate": 2e-05, "loss": 5.5734, "step": 9535 }, { "epoch": 0.31981219082753415, "grad_norm": 0.4147446544859658, "learning_rate": 2e-05, "loss": 5.4926, "step": 9536 }, { "epoch": 0.3198457281797602, "grad_norm": 0.4489414932332244, "learning_rate": 2e-05, "loss": 5.6876, "step": 9537 }, { "epoch": 0.31987926553198626, "grad_norm": 0.4000549486445227, "learning_rate": 2e-05, "loss": 5.5242, "step": 9538 }, { "epoch": 0.3199128028842123, "grad_norm": 0.4930649166786032, "learning_rate": 2e-05, "loss": 5.5922, "step": 9539 }, { "epoch": 0.3199463402364383, "grad_norm": 0.3847598398115225, "learning_rate": 2e-05, "loss": 5.5872, "step": 9540 }, { "epoch": 0.31997987758866436, "grad_norm": 0.4186050067064802, "learning_rate": 2e-05, "loss": 5.5491, "step": 9541 }, { "epoch": 0.3200134149408904, "grad_norm": 0.406805053183547, "learning_rate": 2e-05, "loss": 5.2417, "step": 9542 }, { "epoch": 0.32004695229311647, "grad_norm": 0.395565410969709, "learning_rate": 2e-05, "loss": 5.6054, "step": 9543 }, { "epoch": 0.3200804896453425, "grad_norm": 0.4026256607641214, "learning_rate": 2e-05, "loss": 5.7173, "step": 9544 }, { "epoch": 0.3201140269975685, "grad_norm": 0.427804164921661, "learning_rate": 2e-05, "loss": 5.6216, "step": 9545 }, { "epoch": 0.3201475643497946, "grad_norm": 0.3975769374199228, "learning_rate": 2e-05, "loss": 5.501, "step": 9546 }, { "epoch": 0.32018110170202063, "grad_norm": 0.4223703589469262, "learning_rate": 2e-05, "loss": 5.4637, "step": 9547 }, { "epoch": 0.3202146390542467, "grad_norm": 0.4360609722081582, "learning_rate": 2e-05, "loss": 5.5336, "step": 9548 }, { "epoch": 0.3202481764064727, "grad_norm": 0.40052782131443715, "learning_rate": 2e-05, "loss": 5.6933, "step": 9549 }, { "epoch": 0.32028171375869874, "grad_norm": 0.41949434550668074, "learning_rate": 2e-05, "loss": 5.5818, "step": 9550 }, { "epoch": 0.3203152511109248, "grad_norm": 0.3945902780782183, "learning_rate": 2e-05, "loss": 5.586, "step": 9551 }, { "epoch": 0.32034878846315085, "grad_norm": 0.44036439395637106, "learning_rate": 2e-05, "loss": 5.5946, "step": 9552 }, { "epoch": 0.3203823258153769, "grad_norm": 0.39981141797893865, "learning_rate": 2e-05, "loss": 5.6282, "step": 9553 }, { "epoch": 0.3204158631676029, "grad_norm": 0.4420081486973907, "learning_rate": 2e-05, "loss": 5.6817, "step": 9554 }, { "epoch": 0.32044940051982895, "grad_norm": 0.46066243392215067, "learning_rate": 2e-05, "loss": 5.4968, "step": 9555 }, { "epoch": 0.320482937872055, "grad_norm": 0.42134189058659227, "learning_rate": 2e-05, "loss": 5.3961, "step": 9556 }, { "epoch": 0.32051647522428106, "grad_norm": 0.4170468196463698, "learning_rate": 2e-05, "loss": 5.6704, "step": 9557 }, { "epoch": 0.32055001257650706, "grad_norm": 0.4392375938898337, "learning_rate": 2e-05, "loss": 5.5473, "step": 9558 }, { "epoch": 0.3205835499287331, "grad_norm": 0.4642181455925235, "learning_rate": 2e-05, "loss": 5.4289, "step": 9559 }, { "epoch": 0.32061708728095917, "grad_norm": 0.40741296947025724, "learning_rate": 2e-05, "loss": 5.5544, "step": 9560 }, { "epoch": 0.3206506246331852, "grad_norm": 0.408827509668084, "learning_rate": 2e-05, "loss": 5.4137, "step": 9561 }, { "epoch": 0.3206841619854113, "grad_norm": 0.41352828226560956, "learning_rate": 2e-05, "loss": 5.61, "step": 9562 }, { "epoch": 0.3207176993376373, "grad_norm": 0.4281331157090204, "learning_rate": 2e-05, "loss": 5.6945, "step": 9563 }, { "epoch": 0.32075123668986333, "grad_norm": 0.4204084139568944, "learning_rate": 2e-05, "loss": 5.6722, "step": 9564 }, { "epoch": 0.3207847740420894, "grad_norm": 0.394018558611576, "learning_rate": 2e-05, "loss": 5.4591, "step": 9565 }, { "epoch": 0.32081831139431544, "grad_norm": 0.4345994084826636, "learning_rate": 2e-05, "loss": 5.5127, "step": 9566 }, { "epoch": 0.32085184874654143, "grad_norm": 0.40436147452265553, "learning_rate": 2e-05, "loss": 5.6085, "step": 9567 }, { "epoch": 0.3208853860987675, "grad_norm": 0.4406592678649824, "learning_rate": 2e-05, "loss": 5.6407, "step": 9568 }, { "epoch": 0.32091892345099354, "grad_norm": 0.3825614766482925, "learning_rate": 2e-05, "loss": 5.4967, "step": 9569 }, { "epoch": 0.3209524608032196, "grad_norm": 0.3907004845045296, "learning_rate": 2e-05, "loss": 5.6282, "step": 9570 }, { "epoch": 0.32098599815544565, "grad_norm": 0.42566778692045487, "learning_rate": 2e-05, "loss": 5.4219, "step": 9571 }, { "epoch": 0.32101953550767165, "grad_norm": 0.40783786289392665, "learning_rate": 2e-05, "loss": 5.6583, "step": 9572 }, { "epoch": 0.3210530728598977, "grad_norm": 0.4143622481714828, "learning_rate": 2e-05, "loss": 5.5236, "step": 9573 }, { "epoch": 0.32108661021212376, "grad_norm": 0.41599053201694935, "learning_rate": 2e-05, "loss": 5.8068, "step": 9574 }, { "epoch": 0.3211201475643498, "grad_norm": 0.40391462601257866, "learning_rate": 2e-05, "loss": 5.4913, "step": 9575 }, { "epoch": 0.3211536849165758, "grad_norm": 0.4337764702659426, "learning_rate": 2e-05, "loss": 5.5807, "step": 9576 }, { "epoch": 0.32118722226880186, "grad_norm": 0.43397391174935085, "learning_rate": 2e-05, "loss": 5.6474, "step": 9577 }, { "epoch": 0.3212207596210279, "grad_norm": 0.4331652594523937, "learning_rate": 2e-05, "loss": 5.4711, "step": 9578 }, { "epoch": 0.32125429697325397, "grad_norm": 0.397883094923889, "learning_rate": 2e-05, "loss": 5.4377, "step": 9579 }, { "epoch": 0.32128783432548, "grad_norm": 0.40867700608064744, "learning_rate": 2e-05, "loss": 5.4508, "step": 9580 }, { "epoch": 0.321321371677706, "grad_norm": 0.42266755398033196, "learning_rate": 2e-05, "loss": 5.5181, "step": 9581 }, { "epoch": 0.3213549090299321, "grad_norm": 0.40397595809599013, "learning_rate": 2e-05, "loss": 5.518, "step": 9582 }, { "epoch": 0.32138844638215813, "grad_norm": 0.4253160643241712, "learning_rate": 2e-05, "loss": 5.651, "step": 9583 }, { "epoch": 0.3214219837343842, "grad_norm": 0.4265584097609216, "learning_rate": 2e-05, "loss": 5.5558, "step": 9584 }, { "epoch": 0.3214555210866102, "grad_norm": 0.4158887785431418, "learning_rate": 2e-05, "loss": 5.7901, "step": 9585 }, { "epoch": 0.32148905843883624, "grad_norm": 0.44566455960024065, "learning_rate": 2e-05, "loss": 5.6753, "step": 9586 }, { "epoch": 0.3215225957910623, "grad_norm": 0.4648968776015103, "learning_rate": 2e-05, "loss": 5.3798, "step": 9587 }, { "epoch": 0.32155613314328835, "grad_norm": 0.3952205107902172, "learning_rate": 2e-05, "loss": 5.6676, "step": 9588 }, { "epoch": 0.3215896704955144, "grad_norm": 0.40446512618784264, "learning_rate": 2e-05, "loss": 5.5036, "step": 9589 }, { "epoch": 0.3216232078477404, "grad_norm": 0.41235510237802026, "learning_rate": 2e-05, "loss": 5.6728, "step": 9590 }, { "epoch": 0.32165674519996645, "grad_norm": 0.4179899491576124, "learning_rate": 2e-05, "loss": 5.6374, "step": 9591 }, { "epoch": 0.3216902825521925, "grad_norm": 0.3976694633462004, "learning_rate": 2e-05, "loss": 5.6042, "step": 9592 }, { "epoch": 0.32172381990441856, "grad_norm": 0.4143902318925949, "learning_rate": 2e-05, "loss": 5.6989, "step": 9593 }, { "epoch": 0.32175735725664456, "grad_norm": 0.4173822811045971, "learning_rate": 2e-05, "loss": 5.5886, "step": 9594 }, { "epoch": 0.3217908946088706, "grad_norm": 0.41871470410413186, "learning_rate": 2e-05, "loss": 5.391, "step": 9595 }, { "epoch": 0.32182443196109667, "grad_norm": 0.3986945133672902, "learning_rate": 2e-05, "loss": 5.6914, "step": 9596 }, { "epoch": 0.3218579693133227, "grad_norm": 0.43132732103758237, "learning_rate": 2e-05, "loss": 5.5746, "step": 9597 }, { "epoch": 0.3218915066655488, "grad_norm": 0.44527578482027086, "learning_rate": 2e-05, "loss": 5.5337, "step": 9598 }, { "epoch": 0.3219250440177748, "grad_norm": 0.37465200844153146, "learning_rate": 2e-05, "loss": 5.3849, "step": 9599 }, { "epoch": 0.32195858137000083, "grad_norm": 0.4136770374635313, "learning_rate": 2e-05, "loss": 5.6267, "step": 9600 }, { "epoch": 0.3219921187222269, "grad_norm": 0.4437817329642909, "learning_rate": 2e-05, "loss": 5.5196, "step": 9601 }, { "epoch": 0.32202565607445294, "grad_norm": 0.38809354867374374, "learning_rate": 2e-05, "loss": 5.5721, "step": 9602 }, { "epoch": 0.322059193426679, "grad_norm": 0.40489748275639476, "learning_rate": 2e-05, "loss": 5.6481, "step": 9603 }, { "epoch": 0.322092730778905, "grad_norm": 0.45819905156460905, "learning_rate": 2e-05, "loss": 5.4764, "step": 9604 }, { "epoch": 0.32212626813113104, "grad_norm": 0.4026188109636763, "learning_rate": 2e-05, "loss": 5.373, "step": 9605 }, { "epoch": 0.3221598054833571, "grad_norm": 0.4212310953454412, "learning_rate": 2e-05, "loss": 5.578, "step": 9606 }, { "epoch": 0.32219334283558315, "grad_norm": 0.3990058027448269, "learning_rate": 2e-05, "loss": 5.7745, "step": 9607 }, { "epoch": 0.32222688018780915, "grad_norm": 0.40541968438868237, "learning_rate": 2e-05, "loss": 5.6095, "step": 9608 }, { "epoch": 0.3222604175400352, "grad_norm": 0.3961450308267874, "learning_rate": 2e-05, "loss": 5.5244, "step": 9609 }, { "epoch": 0.32229395489226126, "grad_norm": 0.3946381206820747, "learning_rate": 2e-05, "loss": 5.4408, "step": 9610 }, { "epoch": 0.3223274922444873, "grad_norm": 0.40324568951782525, "learning_rate": 2e-05, "loss": 5.5058, "step": 9611 }, { "epoch": 0.32236102959671337, "grad_norm": 0.43393765051710215, "learning_rate": 2e-05, "loss": 5.5156, "step": 9612 }, { "epoch": 0.32239456694893937, "grad_norm": 0.38434867660000244, "learning_rate": 2e-05, "loss": 5.3726, "step": 9613 }, { "epoch": 0.3224281043011654, "grad_norm": 0.43271043246778135, "learning_rate": 2e-05, "loss": 5.4607, "step": 9614 }, { "epoch": 0.3224616416533915, "grad_norm": 0.4044826961183898, "learning_rate": 2e-05, "loss": 5.5062, "step": 9615 }, { "epoch": 0.3224951790056175, "grad_norm": 0.40737632496061754, "learning_rate": 2e-05, "loss": 5.5622, "step": 9616 }, { "epoch": 0.3225287163578435, "grad_norm": 0.4311549542374209, "learning_rate": 2e-05, "loss": 5.5606, "step": 9617 }, { "epoch": 0.3225622537100696, "grad_norm": 0.4290537947062558, "learning_rate": 2e-05, "loss": 5.5035, "step": 9618 }, { "epoch": 0.32259579106229563, "grad_norm": 0.43033652459651356, "learning_rate": 2e-05, "loss": 5.5457, "step": 9619 }, { "epoch": 0.3226293284145217, "grad_norm": 0.4190547515875924, "learning_rate": 2e-05, "loss": 5.5437, "step": 9620 }, { "epoch": 0.32266286576674774, "grad_norm": 0.42206279700438176, "learning_rate": 2e-05, "loss": 5.5891, "step": 9621 }, { "epoch": 0.32269640311897374, "grad_norm": 0.46024819584159704, "learning_rate": 2e-05, "loss": 5.5418, "step": 9622 }, { "epoch": 0.3227299404711998, "grad_norm": 0.3966993837693675, "learning_rate": 2e-05, "loss": 5.5482, "step": 9623 }, { "epoch": 0.32276347782342585, "grad_norm": 0.40054231147052966, "learning_rate": 2e-05, "loss": 5.5765, "step": 9624 }, { "epoch": 0.3227970151756519, "grad_norm": 0.4190026908881721, "learning_rate": 2e-05, "loss": 5.5654, "step": 9625 }, { "epoch": 0.3228305525278779, "grad_norm": 0.44322474426113684, "learning_rate": 2e-05, "loss": 5.7411, "step": 9626 }, { "epoch": 0.32286408988010395, "grad_norm": 0.37896036479049067, "learning_rate": 2e-05, "loss": 5.6037, "step": 9627 }, { "epoch": 0.32289762723233, "grad_norm": 0.38276813111549346, "learning_rate": 2e-05, "loss": 5.5372, "step": 9628 }, { "epoch": 0.32293116458455606, "grad_norm": 0.3928981374173729, "learning_rate": 2e-05, "loss": 5.4194, "step": 9629 }, { "epoch": 0.3229647019367821, "grad_norm": 0.40461868615760077, "learning_rate": 2e-05, "loss": 5.5505, "step": 9630 }, { "epoch": 0.3229982392890081, "grad_norm": 0.37314125998026515, "learning_rate": 2e-05, "loss": 5.5678, "step": 9631 }, { "epoch": 0.32303177664123417, "grad_norm": 0.5618171099059791, "learning_rate": 2e-05, "loss": 5.5831, "step": 9632 }, { "epoch": 0.3230653139934602, "grad_norm": 0.3890829806513151, "learning_rate": 2e-05, "loss": 5.8474, "step": 9633 }, { "epoch": 0.3230988513456863, "grad_norm": 0.42840570810855255, "learning_rate": 2e-05, "loss": 5.7037, "step": 9634 }, { "epoch": 0.3231323886979123, "grad_norm": 0.40430419414640156, "learning_rate": 2e-05, "loss": 5.5917, "step": 9635 }, { "epoch": 0.32316592605013833, "grad_norm": 0.38986079199698304, "learning_rate": 2e-05, "loss": 5.6558, "step": 9636 }, { "epoch": 0.3231994634023644, "grad_norm": 0.37990623625481973, "learning_rate": 2e-05, "loss": 5.7438, "step": 9637 }, { "epoch": 0.32323300075459044, "grad_norm": 0.4544913104224134, "learning_rate": 2e-05, "loss": 5.5984, "step": 9638 }, { "epoch": 0.3232665381068165, "grad_norm": 0.3749834997419641, "learning_rate": 2e-05, "loss": 5.6914, "step": 9639 }, { "epoch": 0.3233000754590425, "grad_norm": 0.6373321972953658, "learning_rate": 2e-05, "loss": 5.5412, "step": 9640 }, { "epoch": 0.32333361281126854, "grad_norm": 0.3956260708988764, "learning_rate": 2e-05, "loss": 5.5202, "step": 9641 }, { "epoch": 0.3233671501634946, "grad_norm": 0.4153164336850243, "learning_rate": 2e-05, "loss": 5.3145, "step": 9642 }, { "epoch": 0.32340068751572065, "grad_norm": 0.41475569846195925, "learning_rate": 2e-05, "loss": 5.5161, "step": 9643 }, { "epoch": 0.32343422486794665, "grad_norm": 0.4045328696131455, "learning_rate": 2e-05, "loss": 5.5125, "step": 9644 }, { "epoch": 0.3234677622201727, "grad_norm": 0.39307605501422127, "learning_rate": 2e-05, "loss": 5.4784, "step": 9645 }, { "epoch": 0.32350129957239876, "grad_norm": 0.4247148177611166, "learning_rate": 2e-05, "loss": 5.4725, "step": 9646 }, { "epoch": 0.3235348369246248, "grad_norm": 0.3958104499843153, "learning_rate": 2e-05, "loss": 5.4338, "step": 9647 }, { "epoch": 0.32356837427685087, "grad_norm": 0.47021159300450766, "learning_rate": 2e-05, "loss": 5.681, "step": 9648 }, { "epoch": 0.32360191162907687, "grad_norm": 0.3975331256586216, "learning_rate": 2e-05, "loss": 5.5521, "step": 9649 }, { "epoch": 0.3236354489813029, "grad_norm": 0.44220736488393986, "learning_rate": 2e-05, "loss": 5.5413, "step": 9650 }, { "epoch": 0.323668986333529, "grad_norm": 0.4201601149133484, "learning_rate": 2e-05, "loss": 5.6063, "step": 9651 }, { "epoch": 0.32370252368575503, "grad_norm": 0.4105796235717404, "learning_rate": 2e-05, "loss": 5.5722, "step": 9652 }, { "epoch": 0.323736061037981, "grad_norm": 0.4161982994895233, "learning_rate": 2e-05, "loss": 5.4051, "step": 9653 }, { "epoch": 0.3237695983902071, "grad_norm": 0.37182017802996653, "learning_rate": 2e-05, "loss": 5.5067, "step": 9654 }, { "epoch": 0.32380313574243313, "grad_norm": 0.4329723931001101, "learning_rate": 2e-05, "loss": 5.3991, "step": 9655 }, { "epoch": 0.3238366730946592, "grad_norm": 0.4404014303843905, "learning_rate": 2e-05, "loss": 5.4973, "step": 9656 }, { "epoch": 0.32387021044688524, "grad_norm": 0.39951648631474523, "learning_rate": 2e-05, "loss": 5.6771, "step": 9657 }, { "epoch": 0.32390374779911124, "grad_norm": 0.4222118803976458, "learning_rate": 2e-05, "loss": 5.4301, "step": 9658 }, { "epoch": 0.3239372851513373, "grad_norm": 0.4216667418580415, "learning_rate": 2e-05, "loss": 5.6109, "step": 9659 }, { "epoch": 0.32397082250356335, "grad_norm": 0.43024217204588794, "learning_rate": 2e-05, "loss": 5.7311, "step": 9660 }, { "epoch": 0.3240043598557894, "grad_norm": 0.40238247878753197, "learning_rate": 2e-05, "loss": 5.6176, "step": 9661 }, { "epoch": 0.3240378972080154, "grad_norm": 0.39283182493051766, "learning_rate": 2e-05, "loss": 5.5889, "step": 9662 }, { "epoch": 0.32407143456024146, "grad_norm": 0.43457871894543226, "learning_rate": 2e-05, "loss": 5.4648, "step": 9663 }, { "epoch": 0.3241049719124675, "grad_norm": 0.39652618361325964, "learning_rate": 2e-05, "loss": 5.5743, "step": 9664 }, { "epoch": 0.32413850926469356, "grad_norm": 0.3776059787229576, "learning_rate": 2e-05, "loss": 5.5047, "step": 9665 }, { "epoch": 0.3241720466169196, "grad_norm": 0.41742736715496054, "learning_rate": 2e-05, "loss": 5.449, "step": 9666 }, { "epoch": 0.3242055839691456, "grad_norm": 0.40905258693963037, "learning_rate": 2e-05, "loss": 5.2977, "step": 9667 }, { "epoch": 0.32423912132137167, "grad_norm": 0.40452793176518614, "learning_rate": 2e-05, "loss": 5.5182, "step": 9668 }, { "epoch": 0.3242726586735977, "grad_norm": 0.3931334884385533, "learning_rate": 2e-05, "loss": 5.3472, "step": 9669 }, { "epoch": 0.3243061960258238, "grad_norm": 0.4696132523726339, "learning_rate": 2e-05, "loss": 5.6592, "step": 9670 }, { "epoch": 0.3243397333780498, "grad_norm": 0.3999067502986846, "learning_rate": 2e-05, "loss": 5.4107, "step": 9671 }, { "epoch": 0.32437327073027583, "grad_norm": 0.3980878419875126, "learning_rate": 2e-05, "loss": 5.7592, "step": 9672 }, { "epoch": 0.3244068080825019, "grad_norm": 0.40233642873744596, "learning_rate": 2e-05, "loss": 5.5115, "step": 9673 }, { "epoch": 0.32444034543472794, "grad_norm": 0.3831031137324087, "learning_rate": 2e-05, "loss": 5.3843, "step": 9674 }, { "epoch": 0.324473882786954, "grad_norm": 0.41098802613131197, "learning_rate": 2e-05, "loss": 5.7354, "step": 9675 }, { "epoch": 0.32450742013918, "grad_norm": 0.3982516806503682, "learning_rate": 2e-05, "loss": 5.703, "step": 9676 }, { "epoch": 0.32454095749140605, "grad_norm": 0.3903291045547207, "learning_rate": 2e-05, "loss": 5.6939, "step": 9677 }, { "epoch": 0.3245744948436321, "grad_norm": 0.42054098364979436, "learning_rate": 2e-05, "loss": 5.5629, "step": 9678 }, { "epoch": 0.32460803219585815, "grad_norm": 0.4017959352040851, "learning_rate": 2e-05, "loss": 5.4578, "step": 9679 }, { "epoch": 0.32464156954808415, "grad_norm": 0.40373801470952614, "learning_rate": 2e-05, "loss": 5.557, "step": 9680 }, { "epoch": 0.3246751069003102, "grad_norm": 0.4225891402785222, "learning_rate": 2e-05, "loss": 5.6549, "step": 9681 }, { "epoch": 0.32470864425253626, "grad_norm": 0.4205627868919013, "learning_rate": 2e-05, "loss": 5.5196, "step": 9682 }, { "epoch": 0.3247421816047623, "grad_norm": 0.4119898847442794, "learning_rate": 2e-05, "loss": 5.4761, "step": 9683 }, { "epoch": 0.32477571895698837, "grad_norm": 0.4061682610147377, "learning_rate": 2e-05, "loss": 5.6356, "step": 9684 }, { "epoch": 0.32480925630921437, "grad_norm": 0.4491096605317274, "learning_rate": 2e-05, "loss": 5.6665, "step": 9685 }, { "epoch": 0.3248427936614404, "grad_norm": 0.41463910970685036, "learning_rate": 2e-05, "loss": 5.7251, "step": 9686 }, { "epoch": 0.3248763310136665, "grad_norm": 0.4102204277897675, "learning_rate": 2e-05, "loss": 5.4011, "step": 9687 }, { "epoch": 0.32490986836589253, "grad_norm": 0.3914999479819591, "learning_rate": 2e-05, "loss": 5.4005, "step": 9688 }, { "epoch": 0.3249434057181185, "grad_norm": 0.4088613567368756, "learning_rate": 2e-05, "loss": 5.6148, "step": 9689 }, { "epoch": 0.3249769430703446, "grad_norm": 0.42915268970041254, "learning_rate": 2e-05, "loss": 5.6695, "step": 9690 }, { "epoch": 0.32501048042257064, "grad_norm": 0.3979920180924184, "learning_rate": 2e-05, "loss": 5.6419, "step": 9691 }, { "epoch": 0.3250440177747967, "grad_norm": 0.42960854161824674, "learning_rate": 2e-05, "loss": 5.5394, "step": 9692 }, { "epoch": 0.32507755512702274, "grad_norm": 0.42838864962799206, "learning_rate": 2e-05, "loss": 5.5537, "step": 9693 }, { "epoch": 0.32511109247924874, "grad_norm": 0.41014510950821914, "learning_rate": 2e-05, "loss": 5.5169, "step": 9694 }, { "epoch": 0.3251446298314748, "grad_norm": 0.39959033172591957, "learning_rate": 2e-05, "loss": 5.5769, "step": 9695 }, { "epoch": 0.32517816718370085, "grad_norm": 0.43744436312970286, "learning_rate": 2e-05, "loss": 5.659, "step": 9696 }, { "epoch": 0.3252117045359269, "grad_norm": 0.4082579408556268, "learning_rate": 2e-05, "loss": 5.6601, "step": 9697 }, { "epoch": 0.3252452418881529, "grad_norm": 0.3862725790120564, "learning_rate": 2e-05, "loss": 5.4392, "step": 9698 }, { "epoch": 0.32527877924037896, "grad_norm": 0.40096457422442255, "learning_rate": 2e-05, "loss": 5.3413, "step": 9699 }, { "epoch": 0.325312316592605, "grad_norm": 0.46357372677942266, "learning_rate": 2e-05, "loss": 5.548, "step": 9700 }, { "epoch": 0.32534585394483106, "grad_norm": 0.40477886571506505, "learning_rate": 2e-05, "loss": 5.4844, "step": 9701 }, { "epoch": 0.3253793912970571, "grad_norm": 0.44308049249317805, "learning_rate": 2e-05, "loss": 5.4653, "step": 9702 }, { "epoch": 0.3254129286492831, "grad_norm": 0.4127308833636351, "learning_rate": 2e-05, "loss": 5.8755, "step": 9703 }, { "epoch": 0.32544646600150917, "grad_norm": 0.4276328504866786, "learning_rate": 2e-05, "loss": 5.6804, "step": 9704 }, { "epoch": 0.3254800033537352, "grad_norm": 0.406880072203713, "learning_rate": 2e-05, "loss": 5.5765, "step": 9705 }, { "epoch": 0.3255135407059613, "grad_norm": 0.4778264996991065, "learning_rate": 2e-05, "loss": 5.3954, "step": 9706 }, { "epoch": 0.32554707805818733, "grad_norm": 0.43024469414561745, "learning_rate": 2e-05, "loss": 5.4678, "step": 9707 }, { "epoch": 0.32558061541041333, "grad_norm": 0.4246717985546628, "learning_rate": 2e-05, "loss": 5.4838, "step": 9708 }, { "epoch": 0.3256141527626394, "grad_norm": 0.4471247412399386, "learning_rate": 2e-05, "loss": 5.5496, "step": 9709 }, { "epoch": 0.32564769011486544, "grad_norm": 0.4028635584744948, "learning_rate": 2e-05, "loss": 5.4346, "step": 9710 }, { "epoch": 0.3256812274670915, "grad_norm": 0.39315064594811266, "learning_rate": 2e-05, "loss": 5.5982, "step": 9711 }, { "epoch": 0.3257147648193175, "grad_norm": 0.42828623234752583, "learning_rate": 2e-05, "loss": 5.7532, "step": 9712 }, { "epoch": 0.32574830217154355, "grad_norm": 0.40117412964257615, "learning_rate": 2e-05, "loss": 5.4604, "step": 9713 }, { "epoch": 0.3257818395237696, "grad_norm": 0.37660274422253615, "learning_rate": 2e-05, "loss": 5.3472, "step": 9714 }, { "epoch": 0.32581537687599565, "grad_norm": 0.41577793233411, "learning_rate": 2e-05, "loss": 5.5875, "step": 9715 }, { "epoch": 0.3258489142282217, "grad_norm": 0.4493947459081777, "learning_rate": 2e-05, "loss": 5.556, "step": 9716 }, { "epoch": 0.3258824515804477, "grad_norm": 0.39530406925456824, "learning_rate": 2e-05, "loss": 5.5167, "step": 9717 }, { "epoch": 0.32591598893267376, "grad_norm": 0.42557608951787773, "learning_rate": 2e-05, "loss": 5.3932, "step": 9718 }, { "epoch": 0.3259495262848998, "grad_norm": 0.4310312919628719, "learning_rate": 2e-05, "loss": 5.3909, "step": 9719 }, { "epoch": 0.32598306363712587, "grad_norm": 0.4141673553981002, "learning_rate": 2e-05, "loss": 5.5949, "step": 9720 }, { "epoch": 0.32601660098935187, "grad_norm": 0.3839291002596034, "learning_rate": 2e-05, "loss": 5.5699, "step": 9721 }, { "epoch": 0.3260501383415779, "grad_norm": 0.4339100807705814, "learning_rate": 2e-05, "loss": 5.4765, "step": 9722 }, { "epoch": 0.326083675693804, "grad_norm": 0.4233524146891143, "learning_rate": 2e-05, "loss": 5.5682, "step": 9723 }, { "epoch": 0.32611721304603003, "grad_norm": 0.39752187034601466, "learning_rate": 2e-05, "loss": 5.7364, "step": 9724 }, { "epoch": 0.3261507503982561, "grad_norm": 0.3994056162891233, "learning_rate": 2e-05, "loss": 5.5849, "step": 9725 }, { "epoch": 0.3261842877504821, "grad_norm": 0.39608052173157715, "learning_rate": 2e-05, "loss": 5.5909, "step": 9726 }, { "epoch": 0.32621782510270814, "grad_norm": 0.43955750886834916, "learning_rate": 2e-05, "loss": 5.6405, "step": 9727 }, { "epoch": 0.3262513624549342, "grad_norm": 0.42955098961898724, "learning_rate": 2e-05, "loss": 5.3668, "step": 9728 }, { "epoch": 0.32628489980716024, "grad_norm": 0.38617071675354586, "learning_rate": 2e-05, "loss": 5.4569, "step": 9729 }, { "epoch": 0.32631843715938624, "grad_norm": 0.4423969842532429, "learning_rate": 2e-05, "loss": 5.5647, "step": 9730 }, { "epoch": 0.3263519745116123, "grad_norm": 0.40240173040906035, "learning_rate": 2e-05, "loss": 5.4714, "step": 9731 }, { "epoch": 0.32638551186383835, "grad_norm": 0.39702631463370236, "learning_rate": 2e-05, "loss": 5.3405, "step": 9732 }, { "epoch": 0.3264190492160644, "grad_norm": 0.3986112052640135, "learning_rate": 2e-05, "loss": 5.6666, "step": 9733 }, { "epoch": 0.32645258656829046, "grad_norm": 0.43218902052136804, "learning_rate": 2e-05, "loss": 5.5444, "step": 9734 }, { "epoch": 0.32648612392051646, "grad_norm": 0.42381591783353406, "learning_rate": 2e-05, "loss": 5.4074, "step": 9735 }, { "epoch": 0.3265196612727425, "grad_norm": 0.39501845623313375, "learning_rate": 2e-05, "loss": 5.7933, "step": 9736 }, { "epoch": 0.32655319862496857, "grad_norm": 0.4180082175529356, "learning_rate": 2e-05, "loss": 5.7165, "step": 9737 }, { "epoch": 0.3265867359771946, "grad_norm": 0.41289870032122883, "learning_rate": 2e-05, "loss": 5.4939, "step": 9738 }, { "epoch": 0.3266202733294206, "grad_norm": 0.4140984485064364, "learning_rate": 2e-05, "loss": 5.5477, "step": 9739 }, { "epoch": 0.3266538106816467, "grad_norm": 0.3980072676930558, "learning_rate": 2e-05, "loss": 5.3846, "step": 9740 }, { "epoch": 0.3266873480338727, "grad_norm": 0.4149994672702992, "learning_rate": 2e-05, "loss": 5.6286, "step": 9741 }, { "epoch": 0.3267208853860988, "grad_norm": 0.417432471583039, "learning_rate": 2e-05, "loss": 5.5754, "step": 9742 }, { "epoch": 0.32675442273832483, "grad_norm": 0.4190813370043795, "learning_rate": 2e-05, "loss": 5.5558, "step": 9743 }, { "epoch": 0.32678796009055083, "grad_norm": 0.44583908350256546, "learning_rate": 2e-05, "loss": 5.5452, "step": 9744 }, { "epoch": 0.3268214974427769, "grad_norm": 0.38428908390274574, "learning_rate": 2e-05, "loss": 5.4827, "step": 9745 }, { "epoch": 0.32685503479500294, "grad_norm": 0.41258542251494024, "learning_rate": 2e-05, "loss": 5.4826, "step": 9746 }, { "epoch": 0.326888572147229, "grad_norm": 0.44960749372356806, "learning_rate": 2e-05, "loss": 5.4529, "step": 9747 }, { "epoch": 0.326922109499455, "grad_norm": 0.39958985827109095, "learning_rate": 2e-05, "loss": 5.5072, "step": 9748 }, { "epoch": 0.32695564685168105, "grad_norm": 0.4160584569026633, "learning_rate": 2e-05, "loss": 5.5349, "step": 9749 }, { "epoch": 0.3269891842039071, "grad_norm": 0.4203639367977156, "learning_rate": 2e-05, "loss": 5.4739, "step": 9750 }, { "epoch": 0.32702272155613316, "grad_norm": 0.395968688817954, "learning_rate": 2e-05, "loss": 5.4711, "step": 9751 }, { "epoch": 0.3270562589083592, "grad_norm": 0.3944417125992109, "learning_rate": 2e-05, "loss": 5.4956, "step": 9752 }, { "epoch": 0.3270897962605852, "grad_norm": 0.3812848114262163, "learning_rate": 2e-05, "loss": 5.6862, "step": 9753 }, { "epoch": 0.32712333361281126, "grad_norm": 0.43288493716685267, "learning_rate": 2e-05, "loss": 5.5917, "step": 9754 }, { "epoch": 0.3271568709650373, "grad_norm": 0.43492446739658, "learning_rate": 2e-05, "loss": 5.5348, "step": 9755 }, { "epoch": 0.32719040831726337, "grad_norm": 0.42281306911294103, "learning_rate": 2e-05, "loss": 5.3691, "step": 9756 }, { "epoch": 0.32722394566948937, "grad_norm": 0.45793760687802954, "learning_rate": 2e-05, "loss": 5.4335, "step": 9757 }, { "epoch": 0.3272574830217154, "grad_norm": 0.493406154491605, "learning_rate": 2e-05, "loss": 5.4818, "step": 9758 }, { "epoch": 0.3272910203739415, "grad_norm": 0.4262818197790047, "learning_rate": 2e-05, "loss": 5.4886, "step": 9759 }, { "epoch": 0.32732455772616753, "grad_norm": 0.45539251583053353, "learning_rate": 2e-05, "loss": 5.6346, "step": 9760 }, { "epoch": 0.3273580950783936, "grad_norm": 0.40381331044532165, "learning_rate": 2e-05, "loss": 5.5802, "step": 9761 }, { "epoch": 0.3273916324306196, "grad_norm": 0.4304704111495583, "learning_rate": 2e-05, "loss": 5.3514, "step": 9762 }, { "epoch": 0.32742516978284564, "grad_norm": 0.39602292613103224, "learning_rate": 2e-05, "loss": 5.6692, "step": 9763 }, { "epoch": 0.3274587071350717, "grad_norm": 0.47614387236854105, "learning_rate": 2e-05, "loss": 5.5403, "step": 9764 }, { "epoch": 0.32749224448729775, "grad_norm": 0.41388775109751325, "learning_rate": 2e-05, "loss": 5.5674, "step": 9765 }, { "epoch": 0.32752578183952374, "grad_norm": 0.4242064916593157, "learning_rate": 2e-05, "loss": 5.7465, "step": 9766 }, { "epoch": 0.3275593191917498, "grad_norm": 0.4190888824064034, "learning_rate": 2e-05, "loss": 5.5997, "step": 9767 }, { "epoch": 0.32759285654397585, "grad_norm": 0.4356805198361111, "learning_rate": 2e-05, "loss": 5.4671, "step": 9768 }, { "epoch": 0.3276263938962019, "grad_norm": 0.4134916764140117, "learning_rate": 2e-05, "loss": 5.4677, "step": 9769 }, { "epoch": 0.32765993124842796, "grad_norm": 0.4035233034649566, "learning_rate": 2e-05, "loss": 5.684, "step": 9770 }, { "epoch": 0.32769346860065396, "grad_norm": 0.5347937075910526, "learning_rate": 2e-05, "loss": 5.4715, "step": 9771 }, { "epoch": 0.32772700595288, "grad_norm": 0.40677316497130206, "learning_rate": 2e-05, "loss": 5.6151, "step": 9772 }, { "epoch": 0.32776054330510607, "grad_norm": 0.39575169165662033, "learning_rate": 2e-05, "loss": 5.6203, "step": 9773 }, { "epoch": 0.3277940806573321, "grad_norm": 0.4365136249497754, "learning_rate": 2e-05, "loss": 5.4585, "step": 9774 }, { "epoch": 0.3278276180095581, "grad_norm": 0.41682229727574527, "learning_rate": 2e-05, "loss": 5.5539, "step": 9775 }, { "epoch": 0.3278611553617842, "grad_norm": 0.4153685187309011, "learning_rate": 2e-05, "loss": 5.5335, "step": 9776 }, { "epoch": 0.3278946927140102, "grad_norm": 0.42260294799787085, "learning_rate": 2e-05, "loss": 5.706, "step": 9777 }, { "epoch": 0.3279282300662363, "grad_norm": 0.4374628478019085, "learning_rate": 2e-05, "loss": 5.1785, "step": 9778 }, { "epoch": 0.32796176741846234, "grad_norm": 0.3983895575189102, "learning_rate": 2e-05, "loss": 5.6661, "step": 9779 }, { "epoch": 0.32799530477068833, "grad_norm": 0.4175381576473703, "learning_rate": 2e-05, "loss": 5.6031, "step": 9780 }, { "epoch": 0.3280288421229144, "grad_norm": 0.4322336287529385, "learning_rate": 2e-05, "loss": 5.3601, "step": 9781 }, { "epoch": 0.32806237947514044, "grad_norm": 0.43078365048159145, "learning_rate": 2e-05, "loss": 5.5807, "step": 9782 }, { "epoch": 0.3280959168273665, "grad_norm": 0.42776950546280235, "learning_rate": 2e-05, "loss": 5.6302, "step": 9783 }, { "epoch": 0.3281294541795925, "grad_norm": 0.42260269147522284, "learning_rate": 2e-05, "loss": 5.6752, "step": 9784 }, { "epoch": 0.32816299153181855, "grad_norm": 0.41967194553120224, "learning_rate": 2e-05, "loss": 5.3995, "step": 9785 }, { "epoch": 0.3281965288840446, "grad_norm": 0.43663545864888426, "learning_rate": 2e-05, "loss": 5.4356, "step": 9786 }, { "epoch": 0.32823006623627066, "grad_norm": 0.3972836969184074, "learning_rate": 2e-05, "loss": 5.5594, "step": 9787 }, { "epoch": 0.3282636035884967, "grad_norm": 0.41659774449647, "learning_rate": 2e-05, "loss": 5.557, "step": 9788 }, { "epoch": 0.3282971409407227, "grad_norm": 0.4204509077225363, "learning_rate": 2e-05, "loss": 5.4276, "step": 9789 }, { "epoch": 0.32833067829294876, "grad_norm": 0.41113134636853854, "learning_rate": 2e-05, "loss": 5.5654, "step": 9790 }, { "epoch": 0.3283642156451748, "grad_norm": 0.39053406643906796, "learning_rate": 2e-05, "loss": 5.5047, "step": 9791 }, { "epoch": 0.32839775299740087, "grad_norm": 0.4303000806312437, "learning_rate": 2e-05, "loss": 5.4207, "step": 9792 }, { "epoch": 0.32843129034962687, "grad_norm": 0.40553630903765536, "learning_rate": 2e-05, "loss": 5.5351, "step": 9793 }, { "epoch": 0.3284648277018529, "grad_norm": 0.3929134933895049, "learning_rate": 2e-05, "loss": 5.4974, "step": 9794 }, { "epoch": 0.328498365054079, "grad_norm": 0.4135188684599535, "learning_rate": 2e-05, "loss": 5.4134, "step": 9795 }, { "epoch": 0.32853190240630503, "grad_norm": 0.4228345985426996, "learning_rate": 2e-05, "loss": 5.7126, "step": 9796 }, { "epoch": 0.3285654397585311, "grad_norm": 0.3785929726907466, "learning_rate": 2e-05, "loss": 5.6037, "step": 9797 }, { "epoch": 0.3285989771107571, "grad_norm": 0.4039828417489621, "learning_rate": 2e-05, "loss": 5.5253, "step": 9798 }, { "epoch": 0.32863251446298314, "grad_norm": 0.4003818272700346, "learning_rate": 2e-05, "loss": 5.5151, "step": 9799 }, { "epoch": 0.3286660518152092, "grad_norm": 0.42424213194177585, "learning_rate": 2e-05, "loss": 5.4044, "step": 9800 }, { "epoch": 0.32869958916743525, "grad_norm": 0.38821317244165143, "learning_rate": 2e-05, "loss": 5.3443, "step": 9801 }, { "epoch": 0.32873312651966125, "grad_norm": 0.384921437140911, "learning_rate": 2e-05, "loss": 5.457, "step": 9802 }, { "epoch": 0.3287666638718873, "grad_norm": 0.4170651894312752, "learning_rate": 2e-05, "loss": 5.5374, "step": 9803 }, { "epoch": 0.32880020122411335, "grad_norm": 0.3864863097242285, "learning_rate": 2e-05, "loss": 5.5603, "step": 9804 }, { "epoch": 0.3288337385763394, "grad_norm": 0.40061770527062757, "learning_rate": 2e-05, "loss": 5.399, "step": 9805 }, { "epoch": 0.32886727592856546, "grad_norm": 0.43674421497628474, "learning_rate": 2e-05, "loss": 5.6786, "step": 9806 }, { "epoch": 0.32890081328079146, "grad_norm": 0.40479449262498013, "learning_rate": 2e-05, "loss": 5.5747, "step": 9807 }, { "epoch": 0.3289343506330175, "grad_norm": 0.39692006578007316, "learning_rate": 2e-05, "loss": 5.4023, "step": 9808 }, { "epoch": 0.32896788798524357, "grad_norm": 0.4415141773872498, "learning_rate": 2e-05, "loss": 5.5076, "step": 9809 }, { "epoch": 0.3290014253374696, "grad_norm": 0.41354768963177363, "learning_rate": 2e-05, "loss": 5.6463, "step": 9810 }, { "epoch": 0.3290349626896957, "grad_norm": 0.4052327648744211, "learning_rate": 2e-05, "loss": 5.6734, "step": 9811 }, { "epoch": 0.3290685000419217, "grad_norm": 0.39847579909533243, "learning_rate": 2e-05, "loss": 5.5492, "step": 9812 }, { "epoch": 0.32910203739414773, "grad_norm": 0.40834188593373755, "learning_rate": 2e-05, "loss": 5.4497, "step": 9813 }, { "epoch": 0.3291355747463738, "grad_norm": 0.4080649845498385, "learning_rate": 2e-05, "loss": 5.5301, "step": 9814 }, { "epoch": 0.32916911209859984, "grad_norm": 0.3840699020304892, "learning_rate": 2e-05, "loss": 5.6157, "step": 9815 }, { "epoch": 0.32920264945082583, "grad_norm": 0.4134623351842412, "learning_rate": 2e-05, "loss": 5.5659, "step": 9816 }, { "epoch": 0.3292361868030519, "grad_norm": 0.4192332336493829, "learning_rate": 2e-05, "loss": 5.542, "step": 9817 }, { "epoch": 0.32926972415527794, "grad_norm": 0.39039446736737, "learning_rate": 2e-05, "loss": 5.755, "step": 9818 }, { "epoch": 0.329303261507504, "grad_norm": 0.4186463772673425, "learning_rate": 2e-05, "loss": 5.4534, "step": 9819 }, { "epoch": 0.32933679885973005, "grad_norm": 0.3890445085731774, "learning_rate": 2e-05, "loss": 5.5069, "step": 9820 }, { "epoch": 0.32937033621195605, "grad_norm": 0.4091759930602918, "learning_rate": 2e-05, "loss": 5.6532, "step": 9821 }, { "epoch": 0.3294038735641821, "grad_norm": 0.4165467515806994, "learning_rate": 2e-05, "loss": 5.5015, "step": 9822 }, { "epoch": 0.32943741091640816, "grad_norm": 0.4298384674235669, "learning_rate": 2e-05, "loss": 5.5328, "step": 9823 }, { "epoch": 0.3294709482686342, "grad_norm": 0.39632320938191995, "learning_rate": 2e-05, "loss": 5.444, "step": 9824 }, { "epoch": 0.3295044856208602, "grad_norm": 0.3807024478872307, "learning_rate": 2e-05, "loss": 5.4939, "step": 9825 }, { "epoch": 0.32953802297308626, "grad_norm": 0.42244280018939134, "learning_rate": 2e-05, "loss": 5.75, "step": 9826 }, { "epoch": 0.3295715603253123, "grad_norm": 0.393860091407598, "learning_rate": 2e-05, "loss": 5.3385, "step": 9827 }, { "epoch": 0.3296050976775384, "grad_norm": 0.38263566200909693, "learning_rate": 2e-05, "loss": 5.3289, "step": 9828 }, { "epoch": 0.3296386350297644, "grad_norm": 0.3885180837987283, "learning_rate": 2e-05, "loss": 5.6721, "step": 9829 }, { "epoch": 0.3296721723819904, "grad_norm": 0.4256133717908715, "learning_rate": 2e-05, "loss": 5.589, "step": 9830 }, { "epoch": 0.3297057097342165, "grad_norm": 0.4064600789452487, "learning_rate": 2e-05, "loss": 5.3595, "step": 9831 }, { "epoch": 0.32973924708644253, "grad_norm": 0.39340719442132, "learning_rate": 2e-05, "loss": 5.4583, "step": 9832 }, { "epoch": 0.3297727844386686, "grad_norm": 0.383845193061771, "learning_rate": 2e-05, "loss": 5.6586, "step": 9833 }, { "epoch": 0.3298063217908946, "grad_norm": 0.3789339864300046, "learning_rate": 2e-05, "loss": 5.7977, "step": 9834 }, { "epoch": 0.32983985914312064, "grad_norm": 0.4020858039939126, "learning_rate": 2e-05, "loss": 5.4028, "step": 9835 }, { "epoch": 0.3298733964953467, "grad_norm": 0.4136829850242352, "learning_rate": 2e-05, "loss": 5.504, "step": 9836 }, { "epoch": 0.32990693384757275, "grad_norm": 0.4035021854196444, "learning_rate": 2e-05, "loss": 5.7224, "step": 9837 }, { "epoch": 0.3299404711997988, "grad_norm": 0.38474185725235743, "learning_rate": 2e-05, "loss": 5.9584, "step": 9838 }, { "epoch": 0.3299740085520248, "grad_norm": 0.3887028180953249, "learning_rate": 2e-05, "loss": 5.4064, "step": 9839 }, { "epoch": 0.33000754590425085, "grad_norm": 0.4025844954607163, "learning_rate": 2e-05, "loss": 5.4593, "step": 9840 }, { "epoch": 0.3300410832564769, "grad_norm": 0.3859522937969577, "learning_rate": 2e-05, "loss": 5.4761, "step": 9841 }, { "epoch": 0.33007462060870296, "grad_norm": 0.4068715961575043, "learning_rate": 2e-05, "loss": 5.6513, "step": 9842 }, { "epoch": 0.33010815796092896, "grad_norm": 0.4025109919473299, "learning_rate": 2e-05, "loss": 5.6451, "step": 9843 }, { "epoch": 0.330141695313155, "grad_norm": 0.4118655885690185, "learning_rate": 2e-05, "loss": 5.6087, "step": 9844 }, { "epoch": 0.33017523266538107, "grad_norm": 0.39308737351148443, "learning_rate": 2e-05, "loss": 5.4989, "step": 9845 }, { "epoch": 0.3302087700176071, "grad_norm": 0.4324493585200765, "learning_rate": 2e-05, "loss": 5.5894, "step": 9846 }, { "epoch": 0.3302423073698332, "grad_norm": 0.4140477710308329, "learning_rate": 2e-05, "loss": 5.3732, "step": 9847 }, { "epoch": 0.3302758447220592, "grad_norm": 0.434140340740447, "learning_rate": 2e-05, "loss": 5.5702, "step": 9848 }, { "epoch": 0.33030938207428523, "grad_norm": 0.43329260857308793, "learning_rate": 2e-05, "loss": 5.5311, "step": 9849 }, { "epoch": 0.3303429194265113, "grad_norm": 0.39326545375714217, "learning_rate": 2e-05, "loss": 5.422, "step": 9850 }, { "epoch": 0.33037645677873734, "grad_norm": 0.41571138832443866, "learning_rate": 2e-05, "loss": 5.6615, "step": 9851 }, { "epoch": 0.33040999413096334, "grad_norm": 0.47753624480486595, "learning_rate": 2e-05, "loss": 5.5777, "step": 9852 }, { "epoch": 0.3304435314831894, "grad_norm": 0.45093108452085173, "learning_rate": 2e-05, "loss": 5.5705, "step": 9853 }, { "epoch": 0.33047706883541544, "grad_norm": 0.4091618258745831, "learning_rate": 2e-05, "loss": 5.2269, "step": 9854 }, { "epoch": 0.3305106061876415, "grad_norm": 0.46758486461675886, "learning_rate": 2e-05, "loss": 5.5428, "step": 9855 }, { "epoch": 0.33054414353986755, "grad_norm": 0.4248734182543071, "learning_rate": 2e-05, "loss": 5.4904, "step": 9856 }, { "epoch": 0.33057768089209355, "grad_norm": 0.39682677033771985, "learning_rate": 2e-05, "loss": 5.4822, "step": 9857 }, { "epoch": 0.3306112182443196, "grad_norm": 0.44121150130652925, "learning_rate": 2e-05, "loss": 5.5161, "step": 9858 }, { "epoch": 0.33064475559654566, "grad_norm": 0.4761685077673373, "learning_rate": 2e-05, "loss": 5.3807, "step": 9859 }, { "epoch": 0.3306782929487717, "grad_norm": 0.4251247094763717, "learning_rate": 2e-05, "loss": 5.5116, "step": 9860 }, { "epoch": 0.3307118303009977, "grad_norm": 0.4195872228194027, "learning_rate": 2e-05, "loss": 5.7529, "step": 9861 }, { "epoch": 0.33074536765322377, "grad_norm": 0.4414221918540686, "learning_rate": 2e-05, "loss": 5.5619, "step": 9862 }, { "epoch": 0.3307789050054498, "grad_norm": 0.3975684868831208, "learning_rate": 2e-05, "loss": 5.5352, "step": 9863 }, { "epoch": 0.3308124423576759, "grad_norm": 0.42833777569767323, "learning_rate": 2e-05, "loss": 5.3775, "step": 9864 }, { "epoch": 0.3308459797099019, "grad_norm": 0.41396262918678933, "learning_rate": 2e-05, "loss": 5.5528, "step": 9865 }, { "epoch": 0.3308795170621279, "grad_norm": 0.39670843910271086, "learning_rate": 2e-05, "loss": 5.4548, "step": 9866 }, { "epoch": 0.330913054414354, "grad_norm": 0.4182721680968891, "learning_rate": 2e-05, "loss": 5.3109, "step": 9867 }, { "epoch": 0.33094659176658003, "grad_norm": 0.46829102203649764, "learning_rate": 2e-05, "loss": 5.4512, "step": 9868 }, { "epoch": 0.3309801291188061, "grad_norm": 0.396043022089877, "learning_rate": 2e-05, "loss": 5.7124, "step": 9869 }, { "epoch": 0.3310136664710321, "grad_norm": 0.41323050523930965, "learning_rate": 2e-05, "loss": 5.6476, "step": 9870 }, { "epoch": 0.33104720382325814, "grad_norm": 0.42409426651510185, "learning_rate": 2e-05, "loss": 5.4585, "step": 9871 }, { "epoch": 0.3310807411754842, "grad_norm": 0.4183440465458492, "learning_rate": 2e-05, "loss": 5.4629, "step": 9872 }, { "epoch": 0.33111427852771025, "grad_norm": 0.40419200940941213, "learning_rate": 2e-05, "loss": 5.6015, "step": 9873 }, { "epoch": 0.3311478158799363, "grad_norm": 0.4182655767834654, "learning_rate": 2e-05, "loss": 5.7215, "step": 9874 }, { "epoch": 0.3311813532321623, "grad_norm": 0.3885639620388715, "learning_rate": 2e-05, "loss": 5.3891, "step": 9875 }, { "epoch": 0.33121489058438836, "grad_norm": 0.4163801441865172, "learning_rate": 2e-05, "loss": 5.5156, "step": 9876 }, { "epoch": 0.3312484279366144, "grad_norm": 0.41581476499191766, "learning_rate": 2e-05, "loss": 5.589, "step": 9877 }, { "epoch": 0.33128196528884046, "grad_norm": 0.4270744391716285, "learning_rate": 2e-05, "loss": 5.5346, "step": 9878 }, { "epoch": 0.33131550264106646, "grad_norm": 0.40152383290459503, "learning_rate": 2e-05, "loss": 5.4316, "step": 9879 }, { "epoch": 0.3313490399932925, "grad_norm": 0.44034302649563334, "learning_rate": 2e-05, "loss": 5.5482, "step": 9880 }, { "epoch": 0.33138257734551857, "grad_norm": 0.41357647942110515, "learning_rate": 2e-05, "loss": 5.351, "step": 9881 }, { "epoch": 0.3314161146977446, "grad_norm": 0.39962236366611026, "learning_rate": 2e-05, "loss": 5.4909, "step": 9882 }, { "epoch": 0.3314496520499707, "grad_norm": 0.4333589329488678, "learning_rate": 2e-05, "loss": 5.7257, "step": 9883 }, { "epoch": 0.3314831894021967, "grad_norm": 0.4507550436060644, "learning_rate": 2e-05, "loss": 5.5991, "step": 9884 }, { "epoch": 0.33151672675442273, "grad_norm": 0.4002813615644623, "learning_rate": 2e-05, "loss": 5.5806, "step": 9885 }, { "epoch": 0.3315502641066488, "grad_norm": 0.4028815170602126, "learning_rate": 2e-05, "loss": 5.4452, "step": 9886 }, { "epoch": 0.33158380145887484, "grad_norm": 0.4551890291384541, "learning_rate": 2e-05, "loss": 5.6024, "step": 9887 }, { "epoch": 0.33161733881110084, "grad_norm": 0.433032771935693, "learning_rate": 2e-05, "loss": 5.4923, "step": 9888 }, { "epoch": 0.3316508761633269, "grad_norm": 0.38021032700928475, "learning_rate": 2e-05, "loss": 5.5511, "step": 9889 }, { "epoch": 0.33168441351555294, "grad_norm": 0.4318196787832438, "learning_rate": 2e-05, "loss": 5.4268, "step": 9890 }, { "epoch": 0.331717950867779, "grad_norm": 0.4598927573568117, "learning_rate": 2e-05, "loss": 5.7111, "step": 9891 }, { "epoch": 0.33175148822000505, "grad_norm": 0.3861192733783482, "learning_rate": 2e-05, "loss": 5.4652, "step": 9892 }, { "epoch": 0.33178502557223105, "grad_norm": 0.40361463454049884, "learning_rate": 2e-05, "loss": 5.4108, "step": 9893 }, { "epoch": 0.3318185629244571, "grad_norm": 0.4279988913307767, "learning_rate": 2e-05, "loss": 5.4641, "step": 9894 }, { "epoch": 0.33185210027668316, "grad_norm": 0.41319197493197696, "learning_rate": 2e-05, "loss": 5.5542, "step": 9895 }, { "epoch": 0.3318856376289092, "grad_norm": 0.4302306362277591, "learning_rate": 2e-05, "loss": 5.4449, "step": 9896 }, { "epoch": 0.3319191749811352, "grad_norm": 0.440268944554374, "learning_rate": 2e-05, "loss": 5.2913, "step": 9897 }, { "epoch": 0.33195271233336127, "grad_norm": 0.4797325704792437, "learning_rate": 2e-05, "loss": 5.376, "step": 9898 }, { "epoch": 0.3319862496855873, "grad_norm": 0.4474262881648919, "learning_rate": 2e-05, "loss": 5.5467, "step": 9899 }, { "epoch": 0.3320197870378134, "grad_norm": 0.3991339028232438, "learning_rate": 2e-05, "loss": 5.5685, "step": 9900 }, { "epoch": 0.33205332439003943, "grad_norm": 0.3842007003161774, "learning_rate": 2e-05, "loss": 5.5329, "step": 9901 }, { "epoch": 0.3320868617422654, "grad_norm": 0.43101146638270826, "learning_rate": 2e-05, "loss": 5.4694, "step": 9902 }, { "epoch": 0.3321203990944915, "grad_norm": 0.39167588338509207, "learning_rate": 2e-05, "loss": 5.4114, "step": 9903 }, { "epoch": 0.33215393644671753, "grad_norm": 0.38517704959123794, "learning_rate": 2e-05, "loss": 5.5334, "step": 9904 }, { "epoch": 0.3321874737989436, "grad_norm": 0.38914578300215424, "learning_rate": 2e-05, "loss": 5.5548, "step": 9905 }, { "epoch": 0.33222101115116964, "grad_norm": 0.39499637305066776, "learning_rate": 2e-05, "loss": 5.6625, "step": 9906 }, { "epoch": 0.33225454850339564, "grad_norm": 0.3646320219042849, "learning_rate": 2e-05, "loss": 5.2932, "step": 9907 }, { "epoch": 0.3322880858556217, "grad_norm": 0.38414326840208296, "learning_rate": 2e-05, "loss": 5.5623, "step": 9908 }, { "epoch": 0.33232162320784775, "grad_norm": 0.4181286207959991, "learning_rate": 2e-05, "loss": 5.4491, "step": 9909 }, { "epoch": 0.3323551605600738, "grad_norm": 0.37564499865131923, "learning_rate": 2e-05, "loss": 5.3938, "step": 9910 }, { "epoch": 0.3323886979122998, "grad_norm": 0.40647722431645567, "learning_rate": 2e-05, "loss": 5.4804, "step": 9911 }, { "epoch": 0.33242223526452586, "grad_norm": 0.4058009593999785, "learning_rate": 2e-05, "loss": 5.4315, "step": 9912 }, { "epoch": 0.3324557726167519, "grad_norm": 0.40006653969954725, "learning_rate": 2e-05, "loss": 5.6703, "step": 9913 }, { "epoch": 0.33248930996897796, "grad_norm": 0.38095124227978905, "learning_rate": 2e-05, "loss": 5.5441, "step": 9914 }, { "epoch": 0.332522847321204, "grad_norm": 0.3764278625891248, "learning_rate": 2e-05, "loss": 5.4289, "step": 9915 }, { "epoch": 0.33255638467343, "grad_norm": 0.39253631241114734, "learning_rate": 2e-05, "loss": 5.6271, "step": 9916 }, { "epoch": 0.33258992202565607, "grad_norm": 0.39321876950344986, "learning_rate": 2e-05, "loss": 5.579, "step": 9917 }, { "epoch": 0.3326234593778821, "grad_norm": 0.4027682420849033, "learning_rate": 2e-05, "loss": 5.4749, "step": 9918 }, { "epoch": 0.3326569967301082, "grad_norm": 0.41551725757148533, "learning_rate": 2e-05, "loss": 5.5007, "step": 9919 }, { "epoch": 0.3326905340823342, "grad_norm": 0.38297978568286906, "learning_rate": 2e-05, "loss": 5.5754, "step": 9920 }, { "epoch": 0.33272407143456023, "grad_norm": 0.4100601243094927, "learning_rate": 2e-05, "loss": 5.5677, "step": 9921 }, { "epoch": 0.3327576087867863, "grad_norm": 0.4318455529113431, "learning_rate": 2e-05, "loss": 5.5563, "step": 9922 }, { "epoch": 0.33279114613901234, "grad_norm": 0.3974595935321655, "learning_rate": 2e-05, "loss": 5.4787, "step": 9923 }, { "epoch": 0.3328246834912384, "grad_norm": 0.4218065264887689, "learning_rate": 2e-05, "loss": 5.3694, "step": 9924 }, { "epoch": 0.3328582208434644, "grad_norm": 0.39063205197673717, "learning_rate": 2e-05, "loss": 5.5914, "step": 9925 }, { "epoch": 0.33289175819569045, "grad_norm": 0.43360010245056313, "learning_rate": 2e-05, "loss": 5.5617, "step": 9926 }, { "epoch": 0.3329252955479165, "grad_norm": 0.44562304832042476, "learning_rate": 2e-05, "loss": 5.4858, "step": 9927 }, { "epoch": 0.33295883290014255, "grad_norm": 0.42037803551196234, "learning_rate": 2e-05, "loss": 5.5961, "step": 9928 }, { "epoch": 0.33299237025236855, "grad_norm": 0.41385023255997705, "learning_rate": 2e-05, "loss": 5.5492, "step": 9929 }, { "epoch": 0.3330259076045946, "grad_norm": 0.3918447383775576, "learning_rate": 2e-05, "loss": 5.4556, "step": 9930 }, { "epoch": 0.33305944495682066, "grad_norm": 0.397134235945195, "learning_rate": 2e-05, "loss": 5.6068, "step": 9931 }, { "epoch": 0.3330929823090467, "grad_norm": 0.3766899816109856, "learning_rate": 2e-05, "loss": 5.684, "step": 9932 }, { "epoch": 0.33312651966127277, "grad_norm": 0.39590742128803924, "learning_rate": 2e-05, "loss": 5.4491, "step": 9933 }, { "epoch": 0.33316005701349877, "grad_norm": 0.40700739296358013, "learning_rate": 2e-05, "loss": 5.6254, "step": 9934 }, { "epoch": 0.3331935943657248, "grad_norm": 0.4081923323129668, "learning_rate": 2e-05, "loss": 5.5607, "step": 9935 }, { "epoch": 0.3332271317179509, "grad_norm": 0.4013192509407809, "learning_rate": 2e-05, "loss": 5.6099, "step": 9936 }, { "epoch": 0.33326066907017693, "grad_norm": 0.4000964887650887, "learning_rate": 2e-05, "loss": 5.5114, "step": 9937 }, { "epoch": 0.3332942064224029, "grad_norm": 0.40218439884982843, "learning_rate": 2e-05, "loss": 5.6593, "step": 9938 }, { "epoch": 0.333327743774629, "grad_norm": 0.3778587691552241, "learning_rate": 2e-05, "loss": 5.3901, "step": 9939 }, { "epoch": 0.33336128112685504, "grad_norm": 0.4048643526091735, "learning_rate": 2e-05, "loss": 5.5882, "step": 9940 }, { "epoch": 0.3333948184790811, "grad_norm": 0.41129087417634863, "learning_rate": 2e-05, "loss": 5.6415, "step": 9941 }, { "epoch": 0.33342835583130714, "grad_norm": 0.41061154442109693, "learning_rate": 2e-05, "loss": 5.6376, "step": 9942 }, { "epoch": 0.33346189318353314, "grad_norm": 0.44937569723537973, "learning_rate": 2e-05, "loss": 5.3694, "step": 9943 }, { "epoch": 0.3334954305357592, "grad_norm": 0.39633405028661006, "learning_rate": 2e-05, "loss": 5.5104, "step": 9944 }, { "epoch": 0.33352896788798525, "grad_norm": 0.40647336006788265, "learning_rate": 2e-05, "loss": 5.5516, "step": 9945 }, { "epoch": 0.3335625052402113, "grad_norm": 0.39212147653817775, "learning_rate": 2e-05, "loss": 5.6307, "step": 9946 }, { "epoch": 0.3335960425924373, "grad_norm": 0.41343113435721235, "learning_rate": 2e-05, "loss": 5.3434, "step": 9947 }, { "epoch": 0.33362957994466336, "grad_norm": 0.3831487433392385, "learning_rate": 2e-05, "loss": 5.3384, "step": 9948 }, { "epoch": 0.3336631172968894, "grad_norm": 0.42305650421903257, "learning_rate": 2e-05, "loss": 5.5922, "step": 9949 }, { "epoch": 0.33369665464911547, "grad_norm": 0.42179408626001397, "learning_rate": 2e-05, "loss": 5.4731, "step": 9950 }, { "epoch": 0.3337301920013415, "grad_norm": 0.386285413388975, "learning_rate": 2e-05, "loss": 5.5201, "step": 9951 }, { "epoch": 0.3337637293535675, "grad_norm": 0.4467694238679314, "learning_rate": 2e-05, "loss": 5.4772, "step": 9952 }, { "epoch": 0.33379726670579357, "grad_norm": 0.44995976399354665, "learning_rate": 2e-05, "loss": 5.4463, "step": 9953 }, { "epoch": 0.3338308040580196, "grad_norm": 0.39005735889340243, "learning_rate": 2e-05, "loss": 5.5927, "step": 9954 }, { "epoch": 0.3338643414102457, "grad_norm": 0.4248495946733899, "learning_rate": 2e-05, "loss": 5.5363, "step": 9955 }, { "epoch": 0.3338978787624717, "grad_norm": 0.4490552512413156, "learning_rate": 2e-05, "loss": 5.2989, "step": 9956 }, { "epoch": 0.33393141611469773, "grad_norm": 0.4129433238949361, "learning_rate": 2e-05, "loss": 5.7106, "step": 9957 }, { "epoch": 0.3339649534669238, "grad_norm": 0.4403578979118761, "learning_rate": 2e-05, "loss": 5.2356, "step": 9958 }, { "epoch": 0.33399849081914984, "grad_norm": 0.4203792688971747, "learning_rate": 2e-05, "loss": 5.3795, "step": 9959 }, { "epoch": 0.3340320281713759, "grad_norm": 0.45396707168945133, "learning_rate": 2e-05, "loss": 5.6007, "step": 9960 }, { "epoch": 0.3340655655236019, "grad_norm": 0.4151750486280281, "learning_rate": 2e-05, "loss": 5.5515, "step": 9961 }, { "epoch": 0.33409910287582795, "grad_norm": 0.3961197755475331, "learning_rate": 2e-05, "loss": 5.5954, "step": 9962 }, { "epoch": 0.334132640228054, "grad_norm": 0.3968061716453658, "learning_rate": 2e-05, "loss": 5.7473, "step": 9963 }, { "epoch": 0.33416617758028005, "grad_norm": 0.3871355784395955, "learning_rate": 2e-05, "loss": 5.4889, "step": 9964 }, { "epoch": 0.33419971493250605, "grad_norm": 0.40693438807850785, "learning_rate": 2e-05, "loss": 5.6081, "step": 9965 }, { "epoch": 0.3342332522847321, "grad_norm": 0.39218625750792263, "learning_rate": 2e-05, "loss": 5.6942, "step": 9966 }, { "epoch": 0.33426678963695816, "grad_norm": 0.40949595226638036, "learning_rate": 2e-05, "loss": 5.5899, "step": 9967 }, { "epoch": 0.3343003269891842, "grad_norm": 0.38359303985391063, "learning_rate": 2e-05, "loss": 5.6286, "step": 9968 }, { "epoch": 0.33433386434141027, "grad_norm": 0.4112315668347235, "learning_rate": 2e-05, "loss": 5.4651, "step": 9969 }, { "epoch": 0.33436740169363627, "grad_norm": 0.37490655905417053, "learning_rate": 2e-05, "loss": 5.5037, "step": 9970 }, { "epoch": 0.3344009390458623, "grad_norm": 0.4324645142502245, "learning_rate": 2e-05, "loss": 5.5687, "step": 9971 }, { "epoch": 0.3344344763980884, "grad_norm": 0.4002274826744477, "learning_rate": 2e-05, "loss": 5.6742, "step": 9972 }, { "epoch": 0.33446801375031443, "grad_norm": 0.389137782694464, "learning_rate": 2e-05, "loss": 5.5468, "step": 9973 }, { "epoch": 0.33450155110254043, "grad_norm": 0.41922568846565006, "learning_rate": 2e-05, "loss": 5.5024, "step": 9974 }, { "epoch": 0.3345350884547665, "grad_norm": 0.39138214978298586, "learning_rate": 2e-05, "loss": 5.8466, "step": 9975 }, { "epoch": 0.33456862580699254, "grad_norm": 0.38974815319265044, "learning_rate": 2e-05, "loss": 5.6695, "step": 9976 }, { "epoch": 0.3346021631592186, "grad_norm": 0.4169728707315341, "learning_rate": 2e-05, "loss": 5.5594, "step": 9977 }, { "epoch": 0.33463570051144464, "grad_norm": 0.410735609749305, "learning_rate": 2e-05, "loss": 5.3211, "step": 9978 }, { "epoch": 0.33466923786367064, "grad_norm": 0.4319882211474714, "learning_rate": 2e-05, "loss": 5.6038, "step": 9979 }, { "epoch": 0.3347027752158967, "grad_norm": 0.4190206812482582, "learning_rate": 2e-05, "loss": 5.5126, "step": 9980 }, { "epoch": 0.33473631256812275, "grad_norm": 0.4609600774416755, "learning_rate": 2e-05, "loss": 5.4562, "step": 9981 }, { "epoch": 0.3347698499203488, "grad_norm": 0.429368923728505, "learning_rate": 2e-05, "loss": 5.471, "step": 9982 }, { "epoch": 0.3348033872725748, "grad_norm": 0.3909710714289288, "learning_rate": 2e-05, "loss": 5.6962, "step": 9983 }, { "epoch": 0.33483692462480086, "grad_norm": 0.4188006906043117, "learning_rate": 2e-05, "loss": 5.5586, "step": 9984 }, { "epoch": 0.3348704619770269, "grad_norm": 0.44536267389904977, "learning_rate": 2e-05, "loss": 5.6861, "step": 9985 }, { "epoch": 0.33490399932925297, "grad_norm": 0.3782800084675579, "learning_rate": 2e-05, "loss": 5.6332, "step": 9986 }, { "epoch": 0.334937536681479, "grad_norm": 0.42356959863572563, "learning_rate": 2e-05, "loss": 5.5518, "step": 9987 }, { "epoch": 0.334971074033705, "grad_norm": 0.41744143673571205, "learning_rate": 2e-05, "loss": 5.5347, "step": 9988 }, { "epoch": 0.3350046113859311, "grad_norm": 0.41355957174594404, "learning_rate": 2e-05, "loss": 5.6746, "step": 9989 }, { "epoch": 0.3350381487381571, "grad_norm": 0.40966008191797476, "learning_rate": 2e-05, "loss": 5.5762, "step": 9990 }, { "epoch": 0.3350716860903832, "grad_norm": 0.40840754880784647, "learning_rate": 2e-05, "loss": 5.6602, "step": 9991 }, { "epoch": 0.3351052234426092, "grad_norm": 0.39518822621917016, "learning_rate": 2e-05, "loss": 5.6399, "step": 9992 }, { "epoch": 0.33513876079483523, "grad_norm": 0.4303730890334408, "learning_rate": 2e-05, "loss": 5.5313, "step": 9993 }, { "epoch": 0.3351722981470613, "grad_norm": 0.4256834838837952, "learning_rate": 2e-05, "loss": 5.4695, "step": 9994 }, { "epoch": 0.33520583549928734, "grad_norm": 0.4072992059350928, "learning_rate": 2e-05, "loss": 5.4056, "step": 9995 }, { "epoch": 0.3352393728515134, "grad_norm": 0.3926362848024084, "learning_rate": 2e-05, "loss": 5.6107, "step": 9996 }, { "epoch": 0.3352729102037394, "grad_norm": 0.413013310667938, "learning_rate": 2e-05, "loss": 5.6393, "step": 9997 }, { "epoch": 0.33530644755596545, "grad_norm": 0.43150921992755226, "learning_rate": 2e-05, "loss": 5.7758, "step": 9998 }, { "epoch": 0.3353399849081915, "grad_norm": 0.4163770477771978, "learning_rate": 2e-05, "loss": 5.5727, "step": 9999 }, { "epoch": 0.33537352226041756, "grad_norm": 0.47546354014125436, "learning_rate": 2e-05, "loss": 5.4929, "step": 10000 } ], "logging_steps": 1.0, "max_steps": 1490850, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1129717786492928.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }