{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 9258, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003240440699935191, "grad_norm": 9.704249382019043, "learning_rate": 0.0, "loss": 0.6798, "step": 1 }, { "epoch": 0.0006480881399870382, "grad_norm": 8.982847213745117, "learning_rate": 1.798561151079137e-08, "loss": 0.6615, "step": 2 }, { "epoch": 0.0009721322099805574, "grad_norm": 9.398996353149414, "learning_rate": 3.597122302158274e-08, "loss": 0.6612, "step": 3 }, { "epoch": 0.0012961762799740765, "grad_norm": 9.116962432861328, "learning_rate": 5.395683453237411e-08, "loss": 0.6734, "step": 4 }, { "epoch": 0.0016202203499675956, "grad_norm": 9.689047813415527, "learning_rate": 7.194244604316547e-08, "loss": 0.6595, "step": 5 }, { "epoch": 0.0019442644199611147, "grad_norm": 9.494297981262207, "learning_rate": 8.992805755395684e-08, "loss": 0.6694, "step": 6 }, { "epoch": 0.002268308489954634, "grad_norm": 8.994853019714355, "learning_rate": 1.0791366906474822e-07, "loss": 0.6465, "step": 7 }, { "epoch": 0.002592352559948153, "grad_norm": 9.176651954650879, "learning_rate": 1.2589928057553958e-07, "loss": 0.6729, "step": 8 }, { "epoch": 0.002916396629941672, "grad_norm": 9.519684791564941, "learning_rate": 1.4388489208633095e-07, "loss": 0.6765, "step": 9 }, { "epoch": 0.0032404406999351912, "grad_norm": 8.757326126098633, "learning_rate": 1.618705035971223e-07, "loss": 0.654, "step": 10 }, { "epoch": 0.0035644847699287103, "grad_norm": 9.101982116699219, "learning_rate": 1.7985611510791368e-07, "loss": 0.6451, "step": 11 }, { "epoch": 0.0038885288399222295, "grad_norm": 8.611491203308105, "learning_rate": 1.9784172661870504e-07, "loss": 0.6484, "step": 12 }, { "epoch": 0.004212572909915748, "grad_norm": 9.016146659851074, "learning_rate": 2.1582733812949643e-07, "loss": 0.6544, "step": 13 }, { "epoch": 0.004536616979909268, "grad_norm": 9.414257049560547, "learning_rate": 2.338129496402878e-07, "loss": 0.6798, "step": 14 }, { "epoch": 0.004860661049902786, "grad_norm": 8.944530487060547, "learning_rate": 2.5179856115107916e-07, "loss": 0.6736, "step": 15 }, { "epoch": 0.005184705119896306, "grad_norm": 7.937344074249268, "learning_rate": 2.697841726618705e-07, "loss": 0.6335, "step": 16 }, { "epoch": 0.005508749189889825, "grad_norm": 8.436976432800293, "learning_rate": 2.877697841726619e-07, "loss": 0.6361, "step": 17 }, { "epoch": 0.005832793259883344, "grad_norm": 8.096851348876953, "learning_rate": 3.057553956834533e-07, "loss": 0.6176, "step": 18 }, { "epoch": 0.006156837329876863, "grad_norm": 8.40873908996582, "learning_rate": 3.237410071942446e-07, "loss": 0.6179, "step": 19 }, { "epoch": 0.0064808813998703824, "grad_norm": 7.971982479095459, "learning_rate": 3.41726618705036e-07, "loss": 0.6322, "step": 20 }, { "epoch": 0.006804925469863901, "grad_norm": 8.637293815612793, "learning_rate": 3.5971223021582736e-07, "loss": 0.634, "step": 21 }, { "epoch": 0.007128969539857421, "grad_norm": 8.830659866333008, "learning_rate": 3.7769784172661875e-07, "loss": 0.5987, "step": 22 }, { "epoch": 0.007453013609850939, "grad_norm": 6.82891321182251, "learning_rate": 3.956834532374101e-07, "loss": 0.5337, "step": 23 }, { "epoch": 0.007777057679844459, "grad_norm": 6.408069133758545, "learning_rate": 4.136690647482015e-07, "loss": 0.5231, "step": 24 }, { "epoch": 0.008101101749837978, "grad_norm": 6.321592807769775, "learning_rate": 4.3165467625899287e-07, "loss": 0.5346, "step": 25 }, { "epoch": 0.008425145819831496, "grad_norm": 5.974789619445801, "learning_rate": 4.496402877697842e-07, "loss": 0.5239, "step": 26 }, { "epoch": 0.008749189889825017, "grad_norm": 6.678211688995361, "learning_rate": 4.676258992805756e-07, "loss": 0.5051, "step": 27 }, { "epoch": 0.009073233959818535, "grad_norm": 6.219130992889404, "learning_rate": 4.85611510791367e-07, "loss": 0.5005, "step": 28 }, { "epoch": 0.009397278029812054, "grad_norm": 6.158231735229492, "learning_rate": 5.035971223021583e-07, "loss": 0.492, "step": 29 }, { "epoch": 0.009721322099805573, "grad_norm": 5.8069071769714355, "learning_rate": 5.215827338129497e-07, "loss": 0.4757, "step": 30 }, { "epoch": 0.010045366169799093, "grad_norm": 6.29368257522583, "learning_rate": 5.39568345323741e-07, "loss": 0.3917, "step": 31 }, { "epoch": 0.010369410239792612, "grad_norm": 4.626503944396973, "learning_rate": 5.575539568345325e-07, "loss": 0.3609, "step": 32 }, { "epoch": 0.01069345430978613, "grad_norm": 4.250292778015137, "learning_rate": 5.755395683453238e-07, "loss": 0.3723, "step": 33 }, { "epoch": 0.01101749837977965, "grad_norm": 4.216755390167236, "learning_rate": 5.935251798561151e-07, "loss": 0.3696, "step": 34 }, { "epoch": 0.01134154244977317, "grad_norm": 3.7793445587158203, "learning_rate": 6.115107913669066e-07, "loss": 0.3648, "step": 35 }, { "epoch": 0.011665586519766688, "grad_norm": 3.3505563735961914, "learning_rate": 6.294964028776979e-07, "loss": 0.343, "step": 36 }, { "epoch": 0.011989630589760207, "grad_norm": 2.7636797428131104, "learning_rate": 6.474820143884893e-07, "loss": 0.3059, "step": 37 }, { "epoch": 0.012313674659753726, "grad_norm": 2.806562662124634, "learning_rate": 6.654676258992807e-07, "loss": 0.3271, "step": 38 }, { "epoch": 0.012637718729747246, "grad_norm": 2.9375364780426025, "learning_rate": 6.83453237410072e-07, "loss": 0.3347, "step": 39 }, { "epoch": 0.012961762799740765, "grad_norm": 2.538407325744629, "learning_rate": 7.014388489208633e-07, "loss": 0.3039, "step": 40 }, { "epoch": 0.013285806869734284, "grad_norm": 2.7263214588165283, "learning_rate": 7.194244604316547e-07, "loss": 0.3153, "step": 41 }, { "epoch": 0.013609850939727802, "grad_norm": 2.802217721939087, "learning_rate": 7.37410071942446e-07, "loss": 0.3141, "step": 42 }, { "epoch": 0.013933895009721323, "grad_norm": 2.8862125873565674, "learning_rate": 7.553956834532375e-07, "loss": 0.2925, "step": 43 }, { "epoch": 0.014257939079714841, "grad_norm": 3.05582594871521, "learning_rate": 7.733812949640289e-07, "loss": 0.3012, "step": 44 }, { "epoch": 0.01458198314970836, "grad_norm": 3.127528667449951, "learning_rate": 7.913669064748202e-07, "loss": 0.3073, "step": 45 }, { "epoch": 0.014906027219701879, "grad_norm": 2.8370361328125, "learning_rate": 8.093525179856115e-07, "loss": 0.29, "step": 46 }, { "epoch": 0.0152300712896954, "grad_norm": 2.813434362411499, "learning_rate": 8.27338129496403e-07, "loss": 0.2866, "step": 47 }, { "epoch": 0.015554115359688918, "grad_norm": 2.4069221019744873, "learning_rate": 8.453237410071943e-07, "loss": 0.2856, "step": 48 }, { "epoch": 0.015878159429682438, "grad_norm": 2.02687406539917, "learning_rate": 8.633093525179857e-07, "loss": 0.2856, "step": 49 }, { "epoch": 0.016202203499675955, "grad_norm": 2.191495418548584, "learning_rate": 8.81294964028777e-07, "loss": 0.2905, "step": 50 }, { "epoch": 0.016526247569669476, "grad_norm": 2.063936233520508, "learning_rate": 8.992805755395684e-07, "loss": 0.2556, "step": 51 }, { "epoch": 0.016850291639662993, "grad_norm": 2.148212194442749, "learning_rate": 9.172661870503598e-07, "loss": 0.2684, "step": 52 }, { "epoch": 0.017174335709656513, "grad_norm": 2.0932915210723877, "learning_rate": 9.352517985611512e-07, "loss": 0.2865, "step": 53 }, { "epoch": 0.017498379779650033, "grad_norm": 2.168978452682495, "learning_rate": 9.532374100719425e-07, "loss": 0.2919, "step": 54 }, { "epoch": 0.01782242384964355, "grad_norm": 1.9223214387893677, "learning_rate": 9.71223021582734e-07, "loss": 0.2609, "step": 55 }, { "epoch": 0.01814646791963707, "grad_norm": 1.9711604118347168, "learning_rate": 9.892086330935252e-07, "loss": 0.2645, "step": 56 }, { "epoch": 0.01847051198963059, "grad_norm": 2.067918062210083, "learning_rate": 1.0071942446043167e-06, "loss": 0.2945, "step": 57 }, { "epoch": 0.018794556059624108, "grad_norm": 1.9994969367980957, "learning_rate": 1.025179856115108e-06, "loss": 0.2597, "step": 58 }, { "epoch": 0.01911860012961763, "grad_norm": 1.9406379461288452, "learning_rate": 1.0431654676258993e-06, "loss": 0.2505, "step": 59 }, { "epoch": 0.019442644199611146, "grad_norm": 1.9860442876815796, "learning_rate": 1.0611510791366908e-06, "loss": 0.2872, "step": 60 }, { "epoch": 0.019766688269604666, "grad_norm": 2.0322153568267822, "learning_rate": 1.079136690647482e-06, "loss": 0.2602, "step": 61 }, { "epoch": 0.020090732339598186, "grad_norm": 1.8435291051864624, "learning_rate": 1.0971223021582735e-06, "loss": 0.248, "step": 62 }, { "epoch": 0.020414776409591703, "grad_norm": 1.874155879020691, "learning_rate": 1.115107913669065e-06, "loss": 0.2502, "step": 63 }, { "epoch": 0.020738820479585224, "grad_norm": 1.779914140701294, "learning_rate": 1.1330935251798561e-06, "loss": 0.2317, "step": 64 }, { "epoch": 0.021062864549578744, "grad_norm": 2.0697224140167236, "learning_rate": 1.1510791366906476e-06, "loss": 0.2508, "step": 65 }, { "epoch": 0.02138690861957226, "grad_norm": 1.8721024990081787, "learning_rate": 1.1690647482014388e-06, "loss": 0.2524, "step": 66 }, { "epoch": 0.02171095268956578, "grad_norm": 1.8915222883224487, "learning_rate": 1.1870503597122303e-06, "loss": 0.2572, "step": 67 }, { "epoch": 0.0220349967595593, "grad_norm": 1.9579988718032837, "learning_rate": 1.2050359712230217e-06, "loss": 0.233, "step": 68 }, { "epoch": 0.02235904082955282, "grad_norm": 1.8511942625045776, "learning_rate": 1.2230215827338131e-06, "loss": 0.2311, "step": 69 }, { "epoch": 0.02268308489954634, "grad_norm": 1.8020142316818237, "learning_rate": 1.2410071942446044e-06, "loss": 0.2366, "step": 70 }, { "epoch": 0.023007128969539856, "grad_norm": 1.8556476831436157, "learning_rate": 1.2589928057553958e-06, "loss": 0.2255, "step": 71 }, { "epoch": 0.023331173039533377, "grad_norm": 1.8471894264221191, "learning_rate": 1.2769784172661873e-06, "loss": 0.2414, "step": 72 }, { "epoch": 0.023655217109526897, "grad_norm": 1.9478641748428345, "learning_rate": 1.2949640287769785e-06, "loss": 0.2471, "step": 73 }, { "epoch": 0.023979261179520414, "grad_norm": 1.799368143081665, "learning_rate": 1.3129496402877697e-06, "loss": 0.2506, "step": 74 }, { "epoch": 0.024303305249513935, "grad_norm": 1.9307256937026978, "learning_rate": 1.3309352517985614e-06, "loss": 0.2624, "step": 75 }, { "epoch": 0.02462734931950745, "grad_norm": 1.8890374898910522, "learning_rate": 1.3489208633093526e-06, "loss": 0.2626, "step": 76 }, { "epoch": 0.024951393389500972, "grad_norm": 1.7502024173736572, "learning_rate": 1.366906474820144e-06, "loss": 0.2349, "step": 77 }, { "epoch": 0.025275437459494492, "grad_norm": 1.7886251211166382, "learning_rate": 1.3848920863309353e-06, "loss": 0.249, "step": 78 }, { "epoch": 0.02559948152948801, "grad_norm": 1.7713961601257324, "learning_rate": 1.4028776978417265e-06, "loss": 0.2432, "step": 79 }, { "epoch": 0.02592352559948153, "grad_norm": 1.7449482679367065, "learning_rate": 1.4208633093525182e-06, "loss": 0.2425, "step": 80 }, { "epoch": 0.02624756966947505, "grad_norm": 1.9202888011932373, "learning_rate": 1.4388489208633094e-06, "loss": 0.2579, "step": 81 }, { "epoch": 0.026571613739468567, "grad_norm": 1.7376736402511597, "learning_rate": 1.4568345323741009e-06, "loss": 0.2077, "step": 82 }, { "epoch": 0.026895657809462088, "grad_norm": 1.8440021276474, "learning_rate": 1.474820143884892e-06, "loss": 0.2394, "step": 83 }, { "epoch": 0.027219701879455604, "grad_norm": 1.7778856754302979, "learning_rate": 1.4928057553956835e-06, "loss": 0.2037, "step": 84 }, { "epoch": 0.027543745949449125, "grad_norm": 1.9841382503509521, "learning_rate": 1.510791366906475e-06, "loss": 0.2461, "step": 85 }, { "epoch": 0.027867790019442645, "grad_norm": 1.8014554977416992, "learning_rate": 1.5287769784172662e-06, "loss": 0.2258, "step": 86 }, { "epoch": 0.028191834089436162, "grad_norm": 1.8475525379180908, "learning_rate": 1.5467625899280579e-06, "loss": 0.2142, "step": 87 }, { "epoch": 0.028515878159429683, "grad_norm": 1.8251590728759766, "learning_rate": 1.5647482014388491e-06, "loss": 0.2222, "step": 88 }, { "epoch": 0.028839922229423203, "grad_norm": 1.7561265230178833, "learning_rate": 1.5827338129496403e-06, "loss": 0.2283, "step": 89 }, { "epoch": 0.02916396629941672, "grad_norm": 1.9121075868606567, "learning_rate": 1.6007194244604318e-06, "loss": 0.2415, "step": 90 }, { "epoch": 0.02948801036941024, "grad_norm": 1.8700357675552368, "learning_rate": 1.618705035971223e-06, "loss": 0.2329, "step": 91 }, { "epoch": 0.029812054439403757, "grad_norm": 1.8043524026870728, "learning_rate": 1.6366906474820147e-06, "loss": 0.2327, "step": 92 }, { "epoch": 0.030136098509397278, "grad_norm": 1.8841935396194458, "learning_rate": 1.654676258992806e-06, "loss": 0.2269, "step": 93 }, { "epoch": 0.0304601425793908, "grad_norm": 1.8864487409591675, "learning_rate": 1.6726618705035971e-06, "loss": 0.2223, "step": 94 }, { "epoch": 0.030784186649384315, "grad_norm": 1.8025115728378296, "learning_rate": 1.6906474820143886e-06, "loss": 0.2257, "step": 95 }, { "epoch": 0.031108230719377836, "grad_norm": 1.8227530717849731, "learning_rate": 1.7086330935251798e-06, "loss": 0.2419, "step": 96 }, { "epoch": 0.031432274789371356, "grad_norm": 1.745831847190857, "learning_rate": 1.7266187050359715e-06, "loss": 0.2227, "step": 97 }, { "epoch": 0.031756318859364877, "grad_norm": 1.9700807332992554, "learning_rate": 1.7446043165467627e-06, "loss": 0.2159, "step": 98 }, { "epoch": 0.03208036292935839, "grad_norm": 1.744411826133728, "learning_rate": 1.762589928057554e-06, "loss": 0.2259, "step": 99 }, { "epoch": 0.03240440699935191, "grad_norm": 1.7854716777801514, "learning_rate": 1.7805755395683456e-06, "loss": 0.2127, "step": 100 }, { "epoch": 0.03272845106934543, "grad_norm": 1.7840704917907715, "learning_rate": 1.7985611510791368e-06, "loss": 0.2399, "step": 101 }, { "epoch": 0.03305249513933895, "grad_norm": 1.8360185623168945, "learning_rate": 1.8165467625899283e-06, "loss": 0.2069, "step": 102 }, { "epoch": 0.03337653920933247, "grad_norm": 1.716846227645874, "learning_rate": 1.8345323741007195e-06, "loss": 0.2235, "step": 103 }, { "epoch": 0.033700583279325985, "grad_norm": 1.7586473226547241, "learning_rate": 1.8525179856115107e-06, "loss": 0.1966, "step": 104 }, { "epoch": 0.034024627349319506, "grad_norm": 1.776848316192627, "learning_rate": 1.8705035971223024e-06, "loss": 0.2095, "step": 105 }, { "epoch": 0.034348671419313026, "grad_norm": 2.3353116512298584, "learning_rate": 1.8884892086330936e-06, "loss": 0.2112, "step": 106 }, { "epoch": 0.034672715489306546, "grad_norm": 1.820418357849121, "learning_rate": 1.906474820143885e-06, "loss": 0.2251, "step": 107 }, { "epoch": 0.03499675955930007, "grad_norm": 1.8014744520187378, "learning_rate": 1.9244604316546765e-06, "loss": 0.2182, "step": 108 }, { "epoch": 0.03532080362929359, "grad_norm": 1.767818570137024, "learning_rate": 1.942446043165468e-06, "loss": 0.207, "step": 109 }, { "epoch": 0.0356448476992871, "grad_norm": 1.7806322574615479, "learning_rate": 1.960431654676259e-06, "loss": 0.2138, "step": 110 }, { "epoch": 0.03596889176928062, "grad_norm": 1.7334893941879272, "learning_rate": 1.9784172661870504e-06, "loss": 0.2135, "step": 111 }, { "epoch": 0.03629293583927414, "grad_norm": 1.7799729108810425, "learning_rate": 1.996402877697842e-06, "loss": 0.2122, "step": 112 }, { "epoch": 0.03661697990926766, "grad_norm": 1.7285608053207397, "learning_rate": 2.0143884892086333e-06, "loss": 0.2204, "step": 113 }, { "epoch": 0.03694102397926118, "grad_norm": 1.7246173620224, "learning_rate": 2.0323741007194248e-06, "loss": 0.2261, "step": 114 }, { "epoch": 0.037265068049254696, "grad_norm": 1.7286440134048462, "learning_rate": 2.050359712230216e-06, "loss": 0.2343, "step": 115 }, { "epoch": 0.037589112119248216, "grad_norm": 1.736197829246521, "learning_rate": 2.0683453237410072e-06, "loss": 0.2161, "step": 116 }, { "epoch": 0.03791315618924174, "grad_norm": 1.6097439527511597, "learning_rate": 2.0863309352517987e-06, "loss": 0.2004, "step": 117 }, { "epoch": 0.03823720025923526, "grad_norm": 1.6844645738601685, "learning_rate": 2.10431654676259e-06, "loss": 0.1974, "step": 118 }, { "epoch": 0.03856124432922878, "grad_norm": 1.8262434005737305, "learning_rate": 2.1223021582733816e-06, "loss": 0.2268, "step": 119 }, { "epoch": 0.03888528839922229, "grad_norm": 2.0028789043426514, "learning_rate": 2.140287769784173e-06, "loss": 0.2118, "step": 120 }, { "epoch": 0.03920933246921581, "grad_norm": 1.803137183189392, "learning_rate": 2.158273381294964e-06, "loss": 0.212, "step": 121 }, { "epoch": 0.03953337653920933, "grad_norm": 1.7705177068710327, "learning_rate": 2.1762589928057555e-06, "loss": 0.2231, "step": 122 }, { "epoch": 0.03985742060920285, "grad_norm": 1.6794323921203613, "learning_rate": 2.194244604316547e-06, "loss": 0.2138, "step": 123 }, { "epoch": 0.04018146467919637, "grad_norm": 1.6739569902420044, "learning_rate": 2.2122302158273384e-06, "loss": 0.2289, "step": 124 }, { "epoch": 0.04050550874918989, "grad_norm": 1.684012532234192, "learning_rate": 2.23021582733813e-06, "loss": 0.2059, "step": 125 }, { "epoch": 0.04082955281918341, "grad_norm": 1.6604381799697876, "learning_rate": 2.248201438848921e-06, "loss": 0.211, "step": 126 }, { "epoch": 0.04115359688917693, "grad_norm": 1.837553858757019, "learning_rate": 2.2661870503597123e-06, "loss": 0.2355, "step": 127 }, { "epoch": 0.04147764095917045, "grad_norm": 1.6966252326965332, "learning_rate": 2.2841726618705037e-06, "loss": 0.1959, "step": 128 }, { "epoch": 0.04180168502916397, "grad_norm": 2.023315906524658, "learning_rate": 2.302158273381295e-06, "loss": 0.2232, "step": 129 }, { "epoch": 0.04212572909915749, "grad_norm": 1.8194133043289185, "learning_rate": 2.3201438848920866e-06, "loss": 0.2147, "step": 130 }, { "epoch": 0.042449773169151, "grad_norm": 1.71575927734375, "learning_rate": 2.3381294964028776e-06, "loss": 0.2104, "step": 131 }, { "epoch": 0.04277381723914452, "grad_norm": 1.6668249368667603, "learning_rate": 2.3561151079136695e-06, "loss": 0.2085, "step": 132 }, { "epoch": 0.04309786130913804, "grad_norm": 1.722672700881958, "learning_rate": 2.3741007194244605e-06, "loss": 0.1993, "step": 133 }, { "epoch": 0.04342190537913156, "grad_norm": 1.736364722251892, "learning_rate": 2.392086330935252e-06, "loss": 0.2156, "step": 134 }, { "epoch": 0.043745949449125084, "grad_norm": 1.9815757274627686, "learning_rate": 2.4100719424460434e-06, "loss": 0.2081, "step": 135 }, { "epoch": 0.0440699935191186, "grad_norm": 1.8326865434646606, "learning_rate": 2.4280575539568344e-06, "loss": 0.2107, "step": 136 }, { "epoch": 0.04439403758911212, "grad_norm": 2.0552074909210205, "learning_rate": 2.4460431654676263e-06, "loss": 0.2275, "step": 137 }, { "epoch": 0.04471808165910564, "grad_norm": 1.8263514041900635, "learning_rate": 2.4640287769784173e-06, "loss": 0.1993, "step": 138 }, { "epoch": 0.04504212572909916, "grad_norm": 1.835706353187561, "learning_rate": 2.4820143884892088e-06, "loss": 0.2067, "step": 139 }, { "epoch": 0.04536616979909268, "grad_norm": 1.6910218000411987, "learning_rate": 2.5e-06, "loss": 0.2078, "step": 140 }, { "epoch": 0.04569021386908619, "grad_norm": 1.7438052892684937, "learning_rate": 2.5179856115107916e-06, "loss": 0.2288, "step": 141 }, { "epoch": 0.04601425793907971, "grad_norm": 1.739857792854309, "learning_rate": 2.5359712230215827e-06, "loss": 0.237, "step": 142 }, { "epoch": 0.04633830200907323, "grad_norm": 1.7011078596115112, "learning_rate": 2.5539568345323745e-06, "loss": 0.2159, "step": 143 }, { "epoch": 0.046662346079066754, "grad_norm": 1.6127219200134277, "learning_rate": 2.571942446043166e-06, "loss": 0.2079, "step": 144 }, { "epoch": 0.046986390149060274, "grad_norm": 1.6532703638076782, "learning_rate": 2.589928057553957e-06, "loss": 0.2019, "step": 145 }, { "epoch": 0.047310434219053794, "grad_norm": 1.6992716789245605, "learning_rate": 2.6079136690647484e-06, "loss": 0.1957, "step": 146 }, { "epoch": 0.04763447828904731, "grad_norm": 2.0122122764587402, "learning_rate": 2.6258992805755395e-06, "loss": 0.2147, "step": 147 }, { "epoch": 0.04795852235904083, "grad_norm": 1.7450064420700073, "learning_rate": 2.6438848920863313e-06, "loss": 0.2197, "step": 148 }, { "epoch": 0.04828256642903435, "grad_norm": 1.612120270729065, "learning_rate": 2.6618705035971228e-06, "loss": 0.1964, "step": 149 }, { "epoch": 0.04860661049902787, "grad_norm": 1.8553955554962158, "learning_rate": 2.679856115107914e-06, "loss": 0.1978, "step": 150 }, { "epoch": 0.04893065456902139, "grad_norm": 1.8830714225769043, "learning_rate": 2.6978417266187052e-06, "loss": 0.2102, "step": 151 }, { "epoch": 0.0492546986390149, "grad_norm": 1.759135127067566, "learning_rate": 2.7158273381294963e-06, "loss": 0.2221, "step": 152 }, { "epoch": 0.04957874270900842, "grad_norm": 1.7659202814102173, "learning_rate": 2.733812949640288e-06, "loss": 0.2033, "step": 153 }, { "epoch": 0.049902786779001944, "grad_norm": 1.6272228956222534, "learning_rate": 2.7517985611510796e-06, "loss": 0.2066, "step": 154 }, { "epoch": 0.050226830848995464, "grad_norm": 1.6313395500183105, "learning_rate": 2.7697841726618706e-06, "loss": 0.2106, "step": 155 }, { "epoch": 0.050550874918988985, "grad_norm": 2.1572105884552, "learning_rate": 2.787769784172662e-06, "loss": 0.2298, "step": 156 }, { "epoch": 0.0508749189889825, "grad_norm": 1.8465955257415771, "learning_rate": 2.805755395683453e-06, "loss": 0.2172, "step": 157 }, { "epoch": 0.05119896305897602, "grad_norm": 1.7251900434494019, "learning_rate": 2.823741007194245e-06, "loss": 0.2014, "step": 158 }, { "epoch": 0.05152300712896954, "grad_norm": 1.6110676527023315, "learning_rate": 2.8417266187050364e-06, "loss": 0.2063, "step": 159 }, { "epoch": 0.05184705119896306, "grad_norm": 1.714495301246643, "learning_rate": 2.8597122302158274e-06, "loss": 0.2074, "step": 160 }, { "epoch": 0.05217109526895658, "grad_norm": 1.6673401594161987, "learning_rate": 2.877697841726619e-06, "loss": 0.201, "step": 161 }, { "epoch": 0.0524951393389501, "grad_norm": 1.5677164793014526, "learning_rate": 2.89568345323741e-06, "loss": 0.2016, "step": 162 }, { "epoch": 0.052819183408943614, "grad_norm": 1.8159863948822021, "learning_rate": 2.9136690647482017e-06, "loss": 0.2317, "step": 163 }, { "epoch": 0.053143227478937134, "grad_norm": 1.7845139503479004, "learning_rate": 2.931654676258993e-06, "loss": 0.219, "step": 164 }, { "epoch": 0.053467271548930655, "grad_norm": 1.5887399911880493, "learning_rate": 2.949640287769784e-06, "loss": 0.2021, "step": 165 }, { "epoch": 0.053791315618924175, "grad_norm": 1.6343255043029785, "learning_rate": 2.9676258992805756e-06, "loss": 0.201, "step": 166 }, { "epoch": 0.054115359688917695, "grad_norm": 1.9027572870254517, "learning_rate": 2.985611510791367e-06, "loss": 0.2172, "step": 167 }, { "epoch": 0.05443940375891121, "grad_norm": 1.7987514734268188, "learning_rate": 3.0035971223021585e-06, "loss": 0.2225, "step": 168 }, { "epoch": 0.05476344782890473, "grad_norm": 1.7745798826217651, "learning_rate": 3.02158273381295e-06, "loss": 0.2089, "step": 169 }, { "epoch": 0.05508749189889825, "grad_norm": 1.7217110395431519, "learning_rate": 3.0395683453237414e-06, "loss": 0.2015, "step": 170 }, { "epoch": 0.05541153596889177, "grad_norm": 1.7840914726257324, "learning_rate": 3.0575539568345324e-06, "loss": 0.2309, "step": 171 }, { "epoch": 0.05573558003888529, "grad_norm": 1.6944200992584229, "learning_rate": 3.075539568345324e-06, "loss": 0.2101, "step": 172 }, { "epoch": 0.056059624108878804, "grad_norm": 1.6712582111358643, "learning_rate": 3.0935251798561158e-06, "loss": 0.2138, "step": 173 }, { "epoch": 0.056383668178872325, "grad_norm": 1.7150169610977173, "learning_rate": 3.1115107913669068e-06, "loss": 0.2295, "step": 174 }, { "epoch": 0.056707712248865845, "grad_norm": 1.8611018657684326, "learning_rate": 3.1294964028776982e-06, "loss": 0.2064, "step": 175 }, { "epoch": 0.057031756318859365, "grad_norm": 1.6113686561584473, "learning_rate": 3.1474820143884892e-06, "loss": 0.2012, "step": 176 }, { "epoch": 0.057355800388852886, "grad_norm": 2.0248565673828125, "learning_rate": 3.1654676258992807e-06, "loss": 0.2188, "step": 177 }, { "epoch": 0.057679844458846406, "grad_norm": 1.7024465799331665, "learning_rate": 3.1834532374100726e-06, "loss": 0.1992, "step": 178 }, { "epoch": 0.05800388852883992, "grad_norm": 1.689802885055542, "learning_rate": 3.2014388489208636e-06, "loss": 0.2075, "step": 179 }, { "epoch": 0.05832793259883344, "grad_norm": 1.6592146158218384, "learning_rate": 3.219424460431655e-06, "loss": 0.2079, "step": 180 }, { "epoch": 0.05865197666882696, "grad_norm": 1.5495208501815796, "learning_rate": 3.237410071942446e-06, "loss": 0.178, "step": 181 }, { "epoch": 0.05897602073882048, "grad_norm": 1.6852517127990723, "learning_rate": 3.2553956834532375e-06, "loss": 0.213, "step": 182 }, { "epoch": 0.059300064808814, "grad_norm": 1.772308349609375, "learning_rate": 3.2733812949640294e-06, "loss": 0.2156, "step": 183 }, { "epoch": 0.059624108878807515, "grad_norm": 1.5808868408203125, "learning_rate": 3.2913669064748204e-06, "loss": 0.2204, "step": 184 }, { "epoch": 0.059948152948801035, "grad_norm": 1.6497232913970947, "learning_rate": 3.309352517985612e-06, "loss": 0.1995, "step": 185 }, { "epoch": 0.060272197018794556, "grad_norm": 1.7812473773956299, "learning_rate": 3.327338129496403e-06, "loss": 0.2339, "step": 186 }, { "epoch": 0.060596241088788076, "grad_norm": 1.710668921470642, "learning_rate": 3.3453237410071943e-06, "loss": 0.2096, "step": 187 }, { "epoch": 0.0609202851587816, "grad_norm": 1.5381975173950195, "learning_rate": 3.363309352517986e-06, "loss": 0.1935, "step": 188 }, { "epoch": 0.06124432922877511, "grad_norm": 1.5938481092453003, "learning_rate": 3.381294964028777e-06, "loss": 0.1973, "step": 189 }, { "epoch": 0.06156837329876863, "grad_norm": 1.6954132318496704, "learning_rate": 3.3992805755395686e-06, "loss": 0.21, "step": 190 }, { "epoch": 0.06189241736876215, "grad_norm": 1.6907073259353638, "learning_rate": 3.4172661870503596e-06, "loss": 0.1942, "step": 191 }, { "epoch": 0.06221646143875567, "grad_norm": 1.7327340841293335, "learning_rate": 3.435251798561151e-06, "loss": 0.2102, "step": 192 }, { "epoch": 0.06254050550874919, "grad_norm": 1.7439619302749634, "learning_rate": 3.453237410071943e-06, "loss": 0.2111, "step": 193 }, { "epoch": 0.06286454957874271, "grad_norm": 1.8764787912368774, "learning_rate": 3.471223021582734e-06, "loss": 0.2021, "step": 194 }, { "epoch": 0.06318859364873623, "grad_norm": 1.610394835472107, "learning_rate": 3.4892086330935254e-06, "loss": 0.1965, "step": 195 }, { "epoch": 0.06351263771872975, "grad_norm": 1.7442972660064697, "learning_rate": 3.507194244604317e-06, "loss": 0.214, "step": 196 }, { "epoch": 0.06383668178872326, "grad_norm": 1.5908910036087036, "learning_rate": 3.525179856115108e-06, "loss": 0.2101, "step": 197 }, { "epoch": 0.06416072585871678, "grad_norm": 1.7243609428405762, "learning_rate": 3.5431654676258998e-06, "loss": 0.229, "step": 198 }, { "epoch": 0.0644847699287103, "grad_norm": 1.6044604778289795, "learning_rate": 3.561151079136691e-06, "loss": 0.2052, "step": 199 }, { "epoch": 0.06480881399870382, "grad_norm": 1.5355489253997803, "learning_rate": 3.5791366906474822e-06, "loss": 0.2168, "step": 200 }, { "epoch": 0.06513285806869734, "grad_norm": 1.5240367650985718, "learning_rate": 3.5971223021582737e-06, "loss": 0.1988, "step": 201 }, { "epoch": 0.06545690213869086, "grad_norm": 1.5550951957702637, "learning_rate": 3.6151079136690647e-06, "loss": 0.1931, "step": 202 }, { "epoch": 0.06578094620868438, "grad_norm": 1.6683835983276367, "learning_rate": 3.6330935251798566e-06, "loss": 0.2151, "step": 203 }, { "epoch": 0.0661049902786779, "grad_norm": 1.51242995262146, "learning_rate": 3.651079136690648e-06, "loss": 0.1894, "step": 204 }, { "epoch": 0.06642903434867142, "grad_norm": 1.6069519519805908, "learning_rate": 3.669064748201439e-06, "loss": 0.2019, "step": 205 }, { "epoch": 0.06675307841866494, "grad_norm": 1.5711833238601685, "learning_rate": 3.6870503597122305e-06, "loss": 0.1927, "step": 206 }, { "epoch": 0.06707712248865846, "grad_norm": 1.57998526096344, "learning_rate": 3.7050359712230215e-06, "loss": 0.2146, "step": 207 }, { "epoch": 0.06740116655865197, "grad_norm": 2.213890552520752, "learning_rate": 3.7230215827338134e-06, "loss": 0.2017, "step": 208 }, { "epoch": 0.06772521062864549, "grad_norm": 2.3263840675354004, "learning_rate": 3.741007194244605e-06, "loss": 0.1953, "step": 209 }, { "epoch": 0.06804925469863901, "grad_norm": 1.5306557416915894, "learning_rate": 3.758992805755396e-06, "loss": 0.2037, "step": 210 }, { "epoch": 0.06837329876863253, "grad_norm": 1.7475553750991821, "learning_rate": 3.7769784172661873e-06, "loss": 0.2144, "step": 211 }, { "epoch": 0.06869734283862605, "grad_norm": 1.5679391622543335, "learning_rate": 3.794964028776979e-06, "loss": 0.1995, "step": 212 }, { "epoch": 0.06902138690861957, "grad_norm": 1.5701528787612915, "learning_rate": 3.81294964028777e-06, "loss": 0.1902, "step": 213 }, { "epoch": 0.06934543097861309, "grad_norm": 1.6837743520736694, "learning_rate": 3.830935251798562e-06, "loss": 0.1998, "step": 214 }, { "epoch": 0.06966947504860661, "grad_norm": 1.5841065645217896, "learning_rate": 3.848920863309353e-06, "loss": 0.2024, "step": 215 }, { "epoch": 0.06999351911860013, "grad_norm": 1.5467228889465332, "learning_rate": 3.866906474820144e-06, "loss": 0.1954, "step": 216 }, { "epoch": 0.07031756318859365, "grad_norm": 1.5360314846038818, "learning_rate": 3.884892086330936e-06, "loss": 0.1948, "step": 217 }, { "epoch": 0.07064160725858717, "grad_norm": 1.6404602527618408, "learning_rate": 3.902877697841727e-06, "loss": 0.2148, "step": 218 }, { "epoch": 0.07096565132858068, "grad_norm": 1.636122465133667, "learning_rate": 3.920863309352518e-06, "loss": 0.2039, "step": 219 }, { "epoch": 0.0712896953985742, "grad_norm": 1.574469804763794, "learning_rate": 3.938848920863309e-06, "loss": 0.2205, "step": 220 }, { "epoch": 0.07161373946856772, "grad_norm": 1.6298938989639282, "learning_rate": 3.956834532374101e-06, "loss": 0.2021, "step": 221 }, { "epoch": 0.07193778353856124, "grad_norm": 1.631216287612915, "learning_rate": 3.974820143884892e-06, "loss": 0.2062, "step": 222 }, { "epoch": 0.07226182760855476, "grad_norm": 1.546505331993103, "learning_rate": 3.992805755395684e-06, "loss": 0.2061, "step": 223 }, { "epoch": 0.07258587167854828, "grad_norm": 1.629062294960022, "learning_rate": 4.010791366906475e-06, "loss": 0.2172, "step": 224 }, { "epoch": 0.0729099157485418, "grad_norm": 1.4732308387756348, "learning_rate": 4.028776978417267e-06, "loss": 0.212, "step": 225 }, { "epoch": 0.07323395981853532, "grad_norm": 1.3658744096755981, "learning_rate": 4.046762589928058e-06, "loss": 0.1766, "step": 226 }, { "epoch": 0.07355800388852884, "grad_norm": 1.543238639831543, "learning_rate": 4.0647482014388495e-06, "loss": 0.2065, "step": 227 }, { "epoch": 0.07388204795852236, "grad_norm": 1.507412075996399, "learning_rate": 4.082733812949641e-06, "loss": 0.1977, "step": 228 }, { "epoch": 0.07420609202851587, "grad_norm": 1.5816974639892578, "learning_rate": 4.100719424460432e-06, "loss": 0.2148, "step": 229 }, { "epoch": 0.07453013609850939, "grad_norm": 1.497773289680481, "learning_rate": 4.118705035971223e-06, "loss": 0.2019, "step": 230 }, { "epoch": 0.07485418016850291, "grad_norm": 1.5865081548690796, "learning_rate": 4.1366906474820145e-06, "loss": 0.2074, "step": 231 }, { "epoch": 0.07517822423849643, "grad_norm": 1.6510618925094604, "learning_rate": 4.154676258992807e-06, "loss": 0.2251, "step": 232 }, { "epoch": 0.07550226830848995, "grad_norm": 1.5500881671905518, "learning_rate": 4.172661870503597e-06, "loss": 0.2134, "step": 233 }, { "epoch": 0.07582631237848347, "grad_norm": 1.786974549293518, "learning_rate": 4.190647482014389e-06, "loss": 0.2086, "step": 234 }, { "epoch": 0.076150356448477, "grad_norm": 1.4506052732467651, "learning_rate": 4.20863309352518e-06, "loss": 0.2073, "step": 235 }, { "epoch": 0.07647440051847051, "grad_norm": 1.7376664876937866, "learning_rate": 4.226618705035972e-06, "loss": 0.235, "step": 236 }, { "epoch": 0.07679844458846403, "grad_norm": 1.600990891456604, "learning_rate": 4.244604316546763e-06, "loss": 0.2214, "step": 237 }, { "epoch": 0.07712248865845756, "grad_norm": 1.6190606355667114, "learning_rate": 4.2625899280575546e-06, "loss": 0.2103, "step": 238 }, { "epoch": 0.07744653272845108, "grad_norm": 1.4629621505737305, "learning_rate": 4.280575539568346e-06, "loss": 0.2059, "step": 239 }, { "epoch": 0.07777057679844458, "grad_norm": 1.7007418870925903, "learning_rate": 4.298561151079137e-06, "loss": 0.2195, "step": 240 }, { "epoch": 0.0780946208684381, "grad_norm": 1.6682326793670654, "learning_rate": 4.316546762589928e-06, "loss": 0.2291, "step": 241 }, { "epoch": 0.07841866493843162, "grad_norm": 1.6797760725021362, "learning_rate": 4.33453237410072e-06, "loss": 0.2099, "step": 242 }, { "epoch": 0.07874270900842514, "grad_norm": 1.5488982200622559, "learning_rate": 4.352517985611511e-06, "loss": 0.1945, "step": 243 }, { "epoch": 0.07906675307841866, "grad_norm": 1.6398717164993286, "learning_rate": 4.370503597122302e-06, "loss": 0.2127, "step": 244 }, { "epoch": 0.07939079714841218, "grad_norm": 1.6234924793243408, "learning_rate": 4.388489208633094e-06, "loss": 0.22, "step": 245 }, { "epoch": 0.0797148412184057, "grad_norm": 1.6633092164993286, "learning_rate": 4.406474820143885e-06, "loss": 0.1997, "step": 246 }, { "epoch": 0.08003888528839923, "grad_norm": 1.5225783586502075, "learning_rate": 4.424460431654677e-06, "loss": 0.2063, "step": 247 }, { "epoch": 0.08036292935839275, "grad_norm": 1.5646318197250366, "learning_rate": 4.442446043165468e-06, "loss": 0.2013, "step": 248 }, { "epoch": 0.08068697342838627, "grad_norm": 1.5946208238601685, "learning_rate": 4.46043165467626e-06, "loss": 0.1889, "step": 249 }, { "epoch": 0.08101101749837979, "grad_norm": 1.4035966396331787, "learning_rate": 4.478417266187051e-06, "loss": 0.1801, "step": 250 }, { "epoch": 0.08133506156837329, "grad_norm": 1.5441200733184814, "learning_rate": 4.496402877697842e-06, "loss": 0.2025, "step": 251 }, { "epoch": 0.08165910563836681, "grad_norm": 1.6632438898086548, "learning_rate": 4.514388489208634e-06, "loss": 0.1919, "step": 252 }, { "epoch": 0.08198314970836033, "grad_norm": 1.5009981393814087, "learning_rate": 4.5323741007194245e-06, "loss": 0.2122, "step": 253 }, { "epoch": 0.08230719377835385, "grad_norm": 1.409143090248108, "learning_rate": 4.550359712230216e-06, "loss": 0.1963, "step": 254 }, { "epoch": 0.08263123784834737, "grad_norm": 1.8114739656448364, "learning_rate": 4.5683453237410074e-06, "loss": 0.2144, "step": 255 }, { "epoch": 0.0829552819183409, "grad_norm": 1.529091238975525, "learning_rate": 4.586330935251799e-06, "loss": 0.2014, "step": 256 }, { "epoch": 0.08327932598833442, "grad_norm": 1.4776802062988281, "learning_rate": 4.60431654676259e-06, "loss": 0.1995, "step": 257 }, { "epoch": 0.08360337005832794, "grad_norm": 1.6742963790893555, "learning_rate": 4.622302158273382e-06, "loss": 0.1949, "step": 258 }, { "epoch": 0.08392741412832146, "grad_norm": 1.5035185813903809, "learning_rate": 4.640287769784173e-06, "loss": 0.2033, "step": 259 }, { "epoch": 0.08425145819831498, "grad_norm": 1.47641122341156, "learning_rate": 4.658273381294965e-06, "loss": 0.2082, "step": 260 }, { "epoch": 0.08457550226830848, "grad_norm": 1.4812216758728027, "learning_rate": 4.676258992805755e-06, "loss": 0.2137, "step": 261 }, { "epoch": 0.084899546338302, "grad_norm": 1.5529314279556274, "learning_rate": 4.6942446043165475e-06, "loss": 0.2112, "step": 262 }, { "epoch": 0.08522359040829552, "grad_norm": 1.3942975997924805, "learning_rate": 4.712230215827339e-06, "loss": 0.1954, "step": 263 }, { "epoch": 0.08554763447828904, "grad_norm": 1.439681053161621, "learning_rate": 4.73021582733813e-06, "loss": 0.1926, "step": 264 }, { "epoch": 0.08587167854828257, "grad_norm": 1.56781005859375, "learning_rate": 4.748201438848921e-06, "loss": 0.2144, "step": 265 }, { "epoch": 0.08619572261827609, "grad_norm": 1.6256016492843628, "learning_rate": 4.7661870503597125e-06, "loss": 0.2052, "step": 266 }, { "epoch": 0.0865197666882696, "grad_norm": 1.5104886293411255, "learning_rate": 4.784172661870504e-06, "loss": 0.2063, "step": 267 }, { "epoch": 0.08684381075826313, "grad_norm": 1.588659405708313, "learning_rate": 4.802158273381295e-06, "loss": 0.1996, "step": 268 }, { "epoch": 0.08716785482825665, "grad_norm": 1.7575246095657349, "learning_rate": 4.820143884892087e-06, "loss": 0.2145, "step": 269 }, { "epoch": 0.08749189889825017, "grad_norm": 1.5401091575622559, "learning_rate": 4.838129496402878e-06, "loss": 0.2057, "step": 270 }, { "epoch": 0.08781594296824369, "grad_norm": 1.5030951499938965, "learning_rate": 4.856115107913669e-06, "loss": 0.1922, "step": 271 }, { "epoch": 0.0881399870382372, "grad_norm": 1.4104645252227783, "learning_rate": 4.874100719424461e-06, "loss": 0.201, "step": 272 }, { "epoch": 0.08846403110823071, "grad_norm": 1.5870946645736694, "learning_rate": 4.892086330935253e-06, "loss": 0.209, "step": 273 }, { "epoch": 0.08878807517822424, "grad_norm": 1.4110219478607178, "learning_rate": 4.910071942446043e-06, "loss": 0.2037, "step": 274 }, { "epoch": 0.08911211924821776, "grad_norm": 1.5320945978164673, "learning_rate": 4.928057553956835e-06, "loss": 0.2145, "step": 275 }, { "epoch": 0.08943616331821128, "grad_norm": 1.565433382987976, "learning_rate": 4.946043165467626e-06, "loss": 0.2311, "step": 276 }, { "epoch": 0.0897602073882048, "grad_norm": 1.4688873291015625, "learning_rate": 4.9640287769784175e-06, "loss": 0.2169, "step": 277 }, { "epoch": 0.09008425145819832, "grad_norm": 1.3488482236862183, "learning_rate": 4.982014388489209e-06, "loss": 0.1774, "step": 278 }, { "epoch": 0.09040829552819184, "grad_norm": 1.523825764656067, "learning_rate": 5e-06, "loss": 0.1916, "step": 279 }, { "epoch": 0.09073233959818536, "grad_norm": 1.4836606979370117, "learning_rate": 4.999999847012101e-06, "loss": 0.2061, "step": 280 }, { "epoch": 0.09105638366817888, "grad_norm": 1.5318763256072998, "learning_rate": 4.9999993880484235e-06, "loss": 0.1889, "step": 281 }, { "epoch": 0.09138042773817238, "grad_norm": 1.487181544303894, "learning_rate": 4.999998623109022e-06, "loss": 0.2027, "step": 282 }, { "epoch": 0.0917044718081659, "grad_norm": 1.6822458505630493, "learning_rate": 4.99999755219399e-06, "loss": 0.2108, "step": 283 }, { "epoch": 0.09202851587815943, "grad_norm": 1.552655816078186, "learning_rate": 4.9999961753034595e-06, "loss": 0.2097, "step": 284 }, { "epoch": 0.09235255994815295, "grad_norm": 1.4975074529647827, "learning_rate": 4.9999944924376e-06, "loss": 0.1946, "step": 285 }, { "epoch": 0.09267660401814647, "grad_norm": 1.4986190795898438, "learning_rate": 4.999992503596616e-06, "loss": 0.2013, "step": 286 }, { "epoch": 0.09300064808813999, "grad_norm": 1.628769040107727, "learning_rate": 4.999990208780751e-06, "loss": 0.2232, "step": 287 }, { "epoch": 0.09332469215813351, "grad_norm": 1.578641414642334, "learning_rate": 4.999987607990287e-06, "loss": 0.2099, "step": 288 }, { "epoch": 0.09364873622812703, "grad_norm": 1.4884730577468872, "learning_rate": 4.999984701225542e-06, "loss": 0.2151, "step": 289 }, { "epoch": 0.09397278029812055, "grad_norm": 1.5223939418792725, "learning_rate": 4.9999814884868705e-06, "loss": 0.2058, "step": 290 }, { "epoch": 0.09429682436811407, "grad_norm": 1.3765919208526611, "learning_rate": 4.999977969774666e-06, "loss": 0.1892, "step": 291 }, { "epoch": 0.09462086843810759, "grad_norm": 1.4410372972488403, "learning_rate": 4.99997414508936e-06, "loss": 0.2049, "step": 292 }, { "epoch": 0.0949449125081011, "grad_norm": 1.507271409034729, "learning_rate": 4.999970014431421e-06, "loss": 0.199, "step": 293 }, { "epoch": 0.09526895657809462, "grad_norm": 1.416298747062683, "learning_rate": 4.999965577801354e-06, "loss": 0.1996, "step": 294 }, { "epoch": 0.09559300064808814, "grad_norm": 1.3537237644195557, "learning_rate": 4.999960835199701e-06, "loss": 0.1947, "step": 295 }, { "epoch": 0.09591704471808166, "grad_norm": 1.4421045780181885, "learning_rate": 4.999955786627042e-06, "loss": 0.1857, "step": 296 }, { "epoch": 0.09624108878807518, "grad_norm": 1.487221598625183, "learning_rate": 4.999950432083998e-06, "loss": 0.212, "step": 297 }, { "epoch": 0.0965651328580687, "grad_norm": 1.4410208463668823, "learning_rate": 4.999944771571222e-06, "loss": 0.1949, "step": 298 }, { "epoch": 0.09688917692806222, "grad_norm": 1.4714524745941162, "learning_rate": 4.999938805089407e-06, "loss": 0.2045, "step": 299 }, { "epoch": 0.09721322099805574, "grad_norm": 1.4626051187515259, "learning_rate": 4.999932532639285e-06, "loss": 0.2116, "step": 300 }, { "epoch": 0.09753726506804926, "grad_norm": 1.4867863655090332, "learning_rate": 4.99992595422162e-06, "loss": 0.2194, "step": 301 }, { "epoch": 0.09786130913804278, "grad_norm": 1.3757691383361816, "learning_rate": 4.9999190698372216e-06, "loss": 0.1946, "step": 302 }, { "epoch": 0.0981853532080363, "grad_norm": 1.4051896333694458, "learning_rate": 4.9999118794869285e-06, "loss": 0.2055, "step": 303 }, { "epoch": 0.0985093972780298, "grad_norm": 1.3222601413726807, "learning_rate": 4.999904383171623e-06, "loss": 0.1899, "step": 304 }, { "epoch": 0.09883344134802333, "grad_norm": 1.4336018562316895, "learning_rate": 4.999896580892221e-06, "loss": 0.2018, "step": 305 }, { "epoch": 0.09915748541801685, "grad_norm": 1.3262324333190918, "learning_rate": 4.99988847264968e-06, "loss": 0.1848, "step": 306 }, { "epoch": 0.09948152948801037, "grad_norm": 1.374320387840271, "learning_rate": 4.99988005844499e-06, "loss": 0.1948, "step": 307 }, { "epoch": 0.09980557355800389, "grad_norm": 1.347687005996704, "learning_rate": 4.999871338279181e-06, "loss": 0.189, "step": 308 }, { "epoch": 0.10012961762799741, "grad_norm": 1.3867069482803345, "learning_rate": 4.999862312153322e-06, "loss": 0.182, "step": 309 }, { "epoch": 0.10045366169799093, "grad_norm": 1.3333046436309814, "learning_rate": 4.999852980068516e-06, "loss": 0.1862, "step": 310 }, { "epoch": 0.10077770576798445, "grad_norm": 1.2988485097885132, "learning_rate": 4.9998433420259055e-06, "loss": 0.19, "step": 311 }, { "epoch": 0.10110174983797797, "grad_norm": 1.490419864654541, "learning_rate": 4.99983339802667e-06, "loss": 0.2118, "step": 312 }, { "epoch": 0.10142579390797149, "grad_norm": 1.5834200382232666, "learning_rate": 4.999823148072027e-06, "loss": 0.1984, "step": 313 }, { "epoch": 0.101749837977965, "grad_norm": 1.3932247161865234, "learning_rate": 4.999812592163232e-06, "loss": 0.1853, "step": 314 }, { "epoch": 0.10207388204795852, "grad_norm": 1.5058470964431763, "learning_rate": 4.9998017303015735e-06, "loss": 0.2086, "step": 315 }, { "epoch": 0.10239792611795204, "grad_norm": 1.5837807655334473, "learning_rate": 4.999790562488385e-06, "loss": 0.1863, "step": 316 }, { "epoch": 0.10272197018794556, "grad_norm": 1.9013391733169556, "learning_rate": 4.999779088725031e-06, "loss": 0.1974, "step": 317 }, { "epoch": 0.10304601425793908, "grad_norm": 1.4886730909347534, "learning_rate": 4.999767309012916e-06, "loss": 0.2042, "step": 318 }, { "epoch": 0.1033700583279326, "grad_norm": 1.4408074617385864, "learning_rate": 4.999755223353483e-06, "loss": 0.1949, "step": 319 }, { "epoch": 0.10369410239792612, "grad_norm": 1.3255444765090942, "learning_rate": 4.9997428317482086e-06, "loss": 0.189, "step": 320 }, { "epoch": 0.10401814646791964, "grad_norm": 1.4605129957199097, "learning_rate": 4.999730134198612e-06, "loss": 0.2046, "step": 321 }, { "epoch": 0.10434219053791316, "grad_norm": 1.4560701847076416, "learning_rate": 4.999717130706247e-06, "loss": 0.1994, "step": 322 }, { "epoch": 0.10466623460790668, "grad_norm": 1.5182634592056274, "learning_rate": 4.999703821272702e-06, "loss": 0.2241, "step": 323 }, { "epoch": 0.1049902786779002, "grad_norm": 1.4704344272613525, "learning_rate": 4.99969020589961e-06, "loss": 0.2173, "step": 324 }, { "epoch": 0.10531432274789371, "grad_norm": 1.4940931797027588, "learning_rate": 4.999676284588635e-06, "loss": 0.2165, "step": 325 }, { "epoch": 0.10563836681788723, "grad_norm": 1.3513116836547852, "learning_rate": 4.999662057341482e-06, "loss": 0.1954, "step": 326 }, { "epoch": 0.10596241088788075, "grad_norm": 1.4675105810165405, "learning_rate": 4.999647524159892e-06, "loss": 0.1961, "step": 327 }, { "epoch": 0.10628645495787427, "grad_norm": 1.3306858539581299, "learning_rate": 4.9996326850456435e-06, "loss": 0.1886, "step": 328 }, { "epoch": 0.10661049902786779, "grad_norm": 1.3358745574951172, "learning_rate": 4.999617540000552e-06, "loss": 0.1833, "step": 329 }, { "epoch": 0.10693454309786131, "grad_norm": 1.5109752416610718, "learning_rate": 4.999602089026472e-06, "loss": 0.2237, "step": 330 }, { "epoch": 0.10725858716785483, "grad_norm": 1.499991536140442, "learning_rate": 4.999586332125294e-06, "loss": 0.2212, "step": 331 }, { "epoch": 0.10758263123784835, "grad_norm": 1.4174326658248901, "learning_rate": 4.9995702692989476e-06, "loss": 0.1931, "step": 332 }, { "epoch": 0.10790667530784187, "grad_norm": 1.3674904108047485, "learning_rate": 4.999553900549398e-06, "loss": 0.1928, "step": 333 }, { "epoch": 0.10823071937783539, "grad_norm": 1.3582217693328857, "learning_rate": 4.999537225878648e-06, "loss": 0.1884, "step": 334 }, { "epoch": 0.10855476344782891, "grad_norm": 1.3601632118225098, "learning_rate": 4.999520245288739e-06, "loss": 0.1849, "step": 335 }, { "epoch": 0.10887880751782242, "grad_norm": 1.4074798822402954, "learning_rate": 4.999502958781749e-06, "loss": 0.2129, "step": 336 }, { "epoch": 0.10920285158781594, "grad_norm": 1.4298856258392334, "learning_rate": 4.999485366359794e-06, "loss": 0.2149, "step": 337 }, { "epoch": 0.10952689565780946, "grad_norm": 1.4826171398162842, "learning_rate": 4.999467468025028e-06, "loss": 0.216, "step": 338 }, { "epoch": 0.10985093972780298, "grad_norm": 1.3365073204040527, "learning_rate": 4.99944926377964e-06, "loss": 0.1888, "step": 339 }, { "epoch": 0.1101749837977965, "grad_norm": 1.359991192817688, "learning_rate": 4.999430753625858e-06, "loss": 0.1882, "step": 340 }, { "epoch": 0.11049902786779002, "grad_norm": 1.4163484573364258, "learning_rate": 4.999411937565949e-06, "loss": 0.2003, "step": 341 }, { "epoch": 0.11082307193778354, "grad_norm": 1.3787403106689453, "learning_rate": 4.999392815602214e-06, "loss": 0.207, "step": 342 }, { "epoch": 0.11114711600777706, "grad_norm": 1.3745408058166504, "learning_rate": 4.999373387736996e-06, "loss": 0.1864, "step": 343 }, { "epoch": 0.11147116007777058, "grad_norm": 1.473127007484436, "learning_rate": 4.999353653972669e-06, "loss": 0.188, "step": 344 }, { "epoch": 0.1117952041477641, "grad_norm": 1.4026800394058228, "learning_rate": 4.999333614311652e-06, "loss": 0.1809, "step": 345 }, { "epoch": 0.11211924821775761, "grad_norm": 1.5205342769622803, "learning_rate": 4.999313268756396e-06, "loss": 0.1965, "step": 346 }, { "epoch": 0.11244329228775113, "grad_norm": 1.5150617361068726, "learning_rate": 4.99929261730939e-06, "loss": 0.214, "step": 347 }, { "epoch": 0.11276733635774465, "grad_norm": 1.642708659172058, "learning_rate": 4.999271659973164e-06, "loss": 0.2094, "step": 348 }, { "epoch": 0.11309138042773817, "grad_norm": 1.6056296825408936, "learning_rate": 4.999250396750281e-06, "loss": 0.2051, "step": 349 }, { "epoch": 0.11341542449773169, "grad_norm": 1.3636996746063232, "learning_rate": 4.999228827643344e-06, "loss": 0.2132, "step": 350 }, { "epoch": 0.11373946856772521, "grad_norm": 1.3864322900772095, "learning_rate": 4.999206952654993e-06, "loss": 0.1984, "step": 351 }, { "epoch": 0.11406351263771873, "grad_norm": 1.4255362749099731, "learning_rate": 4.999184771787905e-06, "loss": 0.1827, "step": 352 }, { "epoch": 0.11438755670771225, "grad_norm": 1.4326763153076172, "learning_rate": 4.999162285044795e-06, "loss": 0.19, "step": 353 }, { "epoch": 0.11471160077770577, "grad_norm": 1.3926764726638794, "learning_rate": 4.9991394924284155e-06, "loss": 0.1942, "step": 354 }, { "epoch": 0.11503564484769929, "grad_norm": 1.4934500455856323, "learning_rate": 4.999116393941556e-06, "loss": 0.1966, "step": 355 }, { "epoch": 0.11535968891769281, "grad_norm": 1.5054737329483032, "learning_rate": 4.999092989587042e-06, "loss": 0.1813, "step": 356 }, { "epoch": 0.11568373298768632, "grad_norm": 1.4996047019958496, "learning_rate": 4.9990692793677395e-06, "loss": 0.1848, "step": 357 }, { "epoch": 0.11600777705767984, "grad_norm": 1.534684181213379, "learning_rate": 4.999045263286551e-06, "loss": 0.1881, "step": 358 }, { "epoch": 0.11633182112767336, "grad_norm": 1.467447280883789, "learning_rate": 4.999020941346414e-06, "loss": 0.18, "step": 359 }, { "epoch": 0.11665586519766688, "grad_norm": 1.4896132946014404, "learning_rate": 4.998996313550306e-06, "loss": 0.1949, "step": 360 }, { "epoch": 0.1169799092676604, "grad_norm": 1.3926950693130493, "learning_rate": 4.998971379901242e-06, "loss": 0.2029, "step": 361 }, { "epoch": 0.11730395333765392, "grad_norm": 1.5150505304336548, "learning_rate": 4.998946140402273e-06, "loss": 0.2077, "step": 362 }, { "epoch": 0.11762799740764744, "grad_norm": 1.2856528759002686, "learning_rate": 4.998920595056488e-06, "loss": 0.1987, "step": 363 }, { "epoch": 0.11795204147764096, "grad_norm": 1.3384044170379639, "learning_rate": 4.998894743867013e-06, "loss": 0.192, "step": 364 }, { "epoch": 0.11827608554763448, "grad_norm": 1.3708319664001465, "learning_rate": 4.998868586837013e-06, "loss": 0.1769, "step": 365 }, { "epoch": 0.118600129617628, "grad_norm": 1.3513811826705933, "learning_rate": 4.998842123969689e-06, "loss": 0.1963, "step": 366 }, { "epoch": 0.11892417368762152, "grad_norm": 1.3095284700393677, "learning_rate": 4.998815355268279e-06, "loss": 0.194, "step": 367 }, { "epoch": 0.11924821775761503, "grad_norm": 1.3722801208496094, "learning_rate": 4.998788280736061e-06, "loss": 0.2084, "step": 368 }, { "epoch": 0.11957226182760855, "grad_norm": 1.3854695558547974, "learning_rate": 4.998760900376347e-06, "loss": 0.186, "step": 369 }, { "epoch": 0.11989630589760207, "grad_norm": 1.3411400318145752, "learning_rate": 4.99873321419249e-06, "loss": 0.1975, "step": 370 }, { "epoch": 0.12022034996759559, "grad_norm": 1.2395044565200806, "learning_rate": 4.998705222187875e-06, "loss": 0.1871, "step": 371 }, { "epoch": 0.12054439403758911, "grad_norm": 1.3219211101531982, "learning_rate": 4.998676924365931e-06, "loss": 0.1842, "step": 372 }, { "epoch": 0.12086843810758263, "grad_norm": 1.3255659341812134, "learning_rate": 4.998648320730121e-06, "loss": 0.1973, "step": 373 }, { "epoch": 0.12119248217757615, "grad_norm": 1.2957849502563477, "learning_rate": 4.998619411283945e-06, "loss": 0.1992, "step": 374 }, { "epoch": 0.12151652624756967, "grad_norm": 1.3687437772750854, "learning_rate": 4.998590196030942e-06, "loss": 0.2024, "step": 375 }, { "epoch": 0.1218405703175632, "grad_norm": 1.472497582435608, "learning_rate": 4.998560674974686e-06, "loss": 0.2079, "step": 376 }, { "epoch": 0.12216461438755671, "grad_norm": 1.4358025789260864, "learning_rate": 4.998530848118792e-06, "loss": 0.2117, "step": 377 }, { "epoch": 0.12248865845755022, "grad_norm": 1.23166823387146, "learning_rate": 4.99850071546691e-06, "loss": 0.1909, "step": 378 }, { "epoch": 0.12281270252754374, "grad_norm": 1.4100823402404785, "learning_rate": 4.998470277022728e-06, "loss": 0.2054, "step": 379 }, { "epoch": 0.12313674659753726, "grad_norm": 1.3292949199676514, "learning_rate": 4.99843953278997e-06, "loss": 0.2005, "step": 380 }, { "epoch": 0.12346079066753078, "grad_norm": 1.3157209157943726, "learning_rate": 4.998408482772401e-06, "loss": 0.2004, "step": 381 }, { "epoch": 0.1237848347375243, "grad_norm": 1.3142346143722534, "learning_rate": 4.99837712697382e-06, "loss": 0.1836, "step": 382 }, { "epoch": 0.12410887880751782, "grad_norm": 1.3334403038024902, "learning_rate": 4.998345465398066e-06, "loss": 0.2, "step": 383 }, { "epoch": 0.12443292287751134, "grad_norm": 1.2839090824127197, "learning_rate": 4.998313498049011e-06, "loss": 0.1899, "step": 384 }, { "epoch": 0.12475696694750486, "grad_norm": 1.3406338691711426, "learning_rate": 4.9982812249305704e-06, "loss": 0.1834, "step": 385 }, { "epoch": 0.12508101101749838, "grad_norm": 1.3714925050735474, "learning_rate": 4.998248646046693e-06, "loss": 0.2022, "step": 386 }, { "epoch": 0.1254050550874919, "grad_norm": 1.3312101364135742, "learning_rate": 4.998215761401366e-06, "loss": 0.1975, "step": 387 }, { "epoch": 0.12572909915748542, "grad_norm": 1.2964175939559937, "learning_rate": 4.9981825709986145e-06, "loss": 0.1953, "step": 388 }, { "epoch": 0.12605314322747893, "grad_norm": 1.2531015872955322, "learning_rate": 4.9981490748425e-06, "loss": 0.1928, "step": 389 }, { "epoch": 0.12637718729747247, "grad_norm": 1.4965262413024902, "learning_rate": 4.998115272937123e-06, "loss": 0.1909, "step": 390 }, { "epoch": 0.12670123136746597, "grad_norm": 1.3003981113433838, "learning_rate": 4.998081165286621e-06, "loss": 0.1953, "step": 391 }, { "epoch": 0.1270252754374595, "grad_norm": 1.2948582172393799, "learning_rate": 4.9980467518951666e-06, "loss": 0.1945, "step": 392 }, { "epoch": 0.127349319507453, "grad_norm": 1.4373679161071777, "learning_rate": 4.998012032766974e-06, "loss": 0.1925, "step": 393 }, { "epoch": 0.12767336357744652, "grad_norm": 1.4847952127456665, "learning_rate": 4.997977007906291e-06, "loss": 0.2031, "step": 394 }, { "epoch": 0.12799740764744005, "grad_norm": 1.2792102098464966, "learning_rate": 4.997941677317403e-06, "loss": 0.184, "step": 395 }, { "epoch": 0.12832145171743356, "grad_norm": 1.2261104583740234, "learning_rate": 4.997906041004637e-06, "loss": 0.1957, "step": 396 }, { "epoch": 0.1286454957874271, "grad_norm": 1.330273985862732, "learning_rate": 4.997870098972353e-06, "loss": 0.206, "step": 397 }, { "epoch": 0.1289695398574206, "grad_norm": 1.3119021654129028, "learning_rate": 4.99783385122495e-06, "loss": 0.176, "step": 398 }, { "epoch": 0.12929358392741414, "grad_norm": 1.3380613327026367, "learning_rate": 4.997797297766864e-06, "loss": 0.1909, "step": 399 }, { "epoch": 0.12961762799740764, "grad_norm": 1.312547206878662, "learning_rate": 4.9977604386025704e-06, "loss": 0.1905, "step": 400 }, { "epoch": 0.12994167206740118, "grad_norm": 1.2903507947921753, "learning_rate": 4.997723273736579e-06, "loss": 0.1803, "step": 401 }, { "epoch": 0.13026571613739468, "grad_norm": 1.2392823696136475, "learning_rate": 4.9976858031734375e-06, "loss": 0.1833, "step": 402 }, { "epoch": 0.13058976020738822, "grad_norm": 1.2063300609588623, "learning_rate": 4.9976480269177345e-06, "loss": 0.1778, "step": 403 }, { "epoch": 0.13091380427738172, "grad_norm": 1.3096263408660889, "learning_rate": 4.997609944974092e-06, "loss": 0.1878, "step": 404 }, { "epoch": 0.13123784834737523, "grad_norm": 1.3855416774749756, "learning_rate": 4.99757155734717e-06, "loss": 0.1939, "step": 405 }, { "epoch": 0.13156189241736876, "grad_norm": 1.3690385818481445, "learning_rate": 4.997532864041669e-06, "loss": 0.1958, "step": 406 }, { "epoch": 0.13188593648736227, "grad_norm": 1.4282350540161133, "learning_rate": 4.997493865062323e-06, "loss": 0.1979, "step": 407 }, { "epoch": 0.1322099805573558, "grad_norm": 1.3318852186203003, "learning_rate": 4.9974545604139055e-06, "loss": 0.1975, "step": 408 }, { "epoch": 0.1325340246273493, "grad_norm": 1.3509126901626587, "learning_rate": 4.997414950101227e-06, "loss": 0.1985, "step": 409 }, { "epoch": 0.13285806869734285, "grad_norm": 1.3912403583526611, "learning_rate": 4.997375034129135e-06, "loss": 0.2103, "step": 410 }, { "epoch": 0.13318211276733635, "grad_norm": 1.2692376375198364, "learning_rate": 4.997334812502516e-06, "loss": 0.1849, "step": 411 }, { "epoch": 0.1335061568373299, "grad_norm": 1.3442357778549194, "learning_rate": 4.9972942852262915e-06, "loss": 0.1898, "step": 412 }, { "epoch": 0.1338302009073234, "grad_norm": 1.2724248170852661, "learning_rate": 4.997253452305423e-06, "loss": 0.1917, "step": 413 }, { "epoch": 0.13415424497731693, "grad_norm": 1.4180892705917358, "learning_rate": 4.9972123137449065e-06, "loss": 0.2009, "step": 414 }, { "epoch": 0.13447828904731043, "grad_norm": 1.3496129512786865, "learning_rate": 4.997170869549778e-06, "loss": 0.1932, "step": 415 }, { "epoch": 0.13480233311730394, "grad_norm": 1.1647557020187378, "learning_rate": 4.99712911972511e-06, "loss": 0.1763, "step": 416 }, { "epoch": 0.13512637718729748, "grad_norm": 1.286051630973816, "learning_rate": 4.99708706427601e-06, "loss": 0.185, "step": 417 }, { "epoch": 0.13545042125729098, "grad_norm": 1.241931438446045, "learning_rate": 4.997044703207629e-06, "loss": 0.2015, "step": 418 }, { "epoch": 0.13577446532728452, "grad_norm": 1.4348373413085938, "learning_rate": 4.9970020365251485e-06, "loss": 0.1929, "step": 419 }, { "epoch": 0.13609850939727802, "grad_norm": 1.2149462699890137, "learning_rate": 4.996959064233792e-06, "loss": 0.183, "step": 420 }, { "epoch": 0.13642255346727156, "grad_norm": 1.2824324369430542, "learning_rate": 4.996915786338818e-06, "loss": 0.2014, "step": 421 }, { "epoch": 0.13674659753726506, "grad_norm": 1.2929573059082031, "learning_rate": 4.9968722028455245e-06, "loss": 0.1894, "step": 422 }, { "epoch": 0.1370706416072586, "grad_norm": 1.458095669746399, "learning_rate": 4.996828313759245e-06, "loss": 0.1986, "step": 423 }, { "epoch": 0.1373946856772521, "grad_norm": 1.3376439809799194, "learning_rate": 4.99678411908535e-06, "loss": 0.1878, "step": 424 }, { "epoch": 0.13771872974724564, "grad_norm": 1.4877251386642456, "learning_rate": 4.996739618829251e-06, "loss": 0.2029, "step": 425 }, { "epoch": 0.13804277381723914, "grad_norm": 1.4747685194015503, "learning_rate": 4.996694812996391e-06, "loss": 0.1788, "step": 426 }, { "epoch": 0.13836681788723265, "grad_norm": 1.5499141216278076, "learning_rate": 4.996649701592258e-06, "loss": 0.2201, "step": 427 }, { "epoch": 0.13869086195722619, "grad_norm": 1.3866907358169556, "learning_rate": 4.99660428462237e-06, "loss": 0.1858, "step": 428 }, { "epoch": 0.1390149060272197, "grad_norm": 1.3738040924072266, "learning_rate": 4.996558562092286e-06, "loss": 0.1759, "step": 429 }, { "epoch": 0.13933895009721323, "grad_norm": 1.41068696975708, "learning_rate": 4.996512534007602e-06, "loss": 0.2034, "step": 430 }, { "epoch": 0.13966299416720673, "grad_norm": 1.2787730693817139, "learning_rate": 4.996466200373954e-06, "loss": 0.1811, "step": 431 }, { "epoch": 0.13998703823720027, "grad_norm": 1.2944291830062866, "learning_rate": 4.99641956119701e-06, "loss": 0.1965, "step": 432 }, { "epoch": 0.14031108230719377, "grad_norm": 1.319860577583313, "learning_rate": 4.996372616482478e-06, "loss": 0.1961, "step": 433 }, { "epoch": 0.1406351263771873, "grad_norm": 1.2821969985961914, "learning_rate": 4.996325366236105e-06, "loss": 0.2068, "step": 434 }, { "epoch": 0.14095917044718081, "grad_norm": 1.3399871587753296, "learning_rate": 4.996277810463675e-06, "loss": 0.2055, "step": 435 }, { "epoch": 0.14128321451717435, "grad_norm": 1.2458884716033936, "learning_rate": 4.996229949171004e-06, "loss": 0.1983, "step": 436 }, { "epoch": 0.14160725858716786, "grad_norm": 1.2212367057800293, "learning_rate": 4.996181782363955e-06, "loss": 0.1952, "step": 437 }, { "epoch": 0.14193130265716136, "grad_norm": 1.197187066078186, "learning_rate": 4.99613331004842e-06, "loss": 0.1869, "step": 438 }, { "epoch": 0.1422553467271549, "grad_norm": 1.1908378601074219, "learning_rate": 4.996084532230332e-06, "loss": 0.1987, "step": 439 }, { "epoch": 0.1425793907971484, "grad_norm": 1.388664960861206, "learning_rate": 4.996035448915661e-06, "loss": 0.2039, "step": 440 }, { "epoch": 0.14290343486714194, "grad_norm": 1.3541980981826782, "learning_rate": 4.995986060110415e-06, "loss": 0.192, "step": 441 }, { "epoch": 0.14322747893713544, "grad_norm": 1.3946161270141602, "learning_rate": 4.995936365820638e-06, "loss": 0.1903, "step": 442 }, { "epoch": 0.14355152300712898, "grad_norm": 1.1276389360427856, "learning_rate": 4.9958863660524125e-06, "loss": 0.1585, "step": 443 }, { "epoch": 0.14387556707712248, "grad_norm": 1.1817028522491455, "learning_rate": 4.995836060811859e-06, "loss": 0.1906, "step": 444 }, { "epoch": 0.14419961114711602, "grad_norm": 1.2277320623397827, "learning_rate": 4.995785450105131e-06, "loss": 0.1823, "step": 445 }, { "epoch": 0.14452365521710953, "grad_norm": 1.2285058498382568, "learning_rate": 4.995734533938427e-06, "loss": 0.1835, "step": 446 }, { "epoch": 0.14484769928710303, "grad_norm": 1.2986756563186646, "learning_rate": 4.995683312317975e-06, "loss": 0.1847, "step": 447 }, { "epoch": 0.14517174335709657, "grad_norm": 1.2609082460403442, "learning_rate": 4.995631785250046e-06, "loss": 0.182, "step": 448 }, { "epoch": 0.14549578742709007, "grad_norm": 1.2614150047302246, "learning_rate": 4.9955799527409465e-06, "loss": 0.1937, "step": 449 }, { "epoch": 0.1458198314970836, "grad_norm": 1.3610444068908691, "learning_rate": 4.99552781479702e-06, "loss": 0.1908, "step": 450 }, { "epoch": 0.14614387556707711, "grad_norm": 1.259647250175476, "learning_rate": 4.995475371424648e-06, "loss": 0.1909, "step": 451 }, { "epoch": 0.14646791963707065, "grad_norm": 1.150729775428772, "learning_rate": 4.995422622630247e-06, "loss": 0.1734, "step": 452 }, { "epoch": 0.14679196370706415, "grad_norm": 1.2474488019943237, "learning_rate": 4.995369568420276e-06, "loss": 0.1765, "step": 453 }, { "epoch": 0.1471160077770577, "grad_norm": 1.356665015220642, "learning_rate": 4.995316208801226e-06, "loss": 0.1949, "step": 454 }, { "epoch": 0.1474400518470512, "grad_norm": 1.299708366394043, "learning_rate": 4.99526254377963e-06, "loss": 0.1886, "step": 455 }, { "epoch": 0.14776409591704473, "grad_norm": 1.213100552558899, "learning_rate": 4.995208573362053e-06, "loss": 0.1885, "step": 456 }, { "epoch": 0.14808813998703824, "grad_norm": 1.3678295612335205, "learning_rate": 4.995154297555103e-06, "loss": 0.192, "step": 457 }, { "epoch": 0.14841218405703174, "grad_norm": 1.2744077444076538, "learning_rate": 4.995099716365421e-06, "loss": 0.1979, "step": 458 }, { "epoch": 0.14873622812702528, "grad_norm": 1.3041316270828247, "learning_rate": 4.995044829799689e-06, "loss": 0.1951, "step": 459 }, { "epoch": 0.14906027219701878, "grad_norm": 1.2977657318115234, "learning_rate": 4.994989637864624e-06, "loss": 0.1824, "step": 460 }, { "epoch": 0.14938431626701232, "grad_norm": 1.3233217000961304, "learning_rate": 4.99493414056698e-06, "loss": 0.1786, "step": 461 }, { "epoch": 0.14970836033700582, "grad_norm": 1.3560962677001953, "learning_rate": 4.99487833791355e-06, "loss": 0.1838, "step": 462 }, { "epoch": 0.15003240440699936, "grad_norm": 1.3699971437454224, "learning_rate": 4.9948222299111644e-06, "loss": 0.1924, "step": 463 }, { "epoch": 0.15035644847699287, "grad_norm": 1.2370259761810303, "learning_rate": 4.994765816566689e-06, "loss": 0.189, "step": 464 }, { "epoch": 0.1506804925469864, "grad_norm": 1.428999423980713, "learning_rate": 4.994709097887029e-06, "loss": 0.1995, "step": 465 }, { "epoch": 0.1510045366169799, "grad_norm": 1.3747769594192505, "learning_rate": 4.994652073879127e-06, "loss": 0.1891, "step": 466 }, { "epoch": 0.15132858068697344, "grad_norm": 1.290266752243042, "learning_rate": 4.994594744549961e-06, "loss": 0.1868, "step": 467 }, { "epoch": 0.15165262475696695, "grad_norm": 1.3300879001617432, "learning_rate": 4.994537109906546e-06, "loss": 0.1826, "step": 468 }, { "epoch": 0.15197666882696045, "grad_norm": 1.2477858066558838, "learning_rate": 4.99447916995594e-06, "loss": 0.1817, "step": 469 }, { "epoch": 0.152300712896954, "grad_norm": 1.1481152772903442, "learning_rate": 4.99442092470523e-06, "loss": 0.1688, "step": 470 }, { "epoch": 0.1526247569669475, "grad_norm": 1.2472997903823853, "learning_rate": 4.994362374161548e-06, "loss": 0.1946, "step": 471 }, { "epoch": 0.15294880103694103, "grad_norm": 1.2494356632232666, "learning_rate": 4.994303518332059e-06, "loss": 0.1998, "step": 472 }, { "epoch": 0.15327284510693454, "grad_norm": 1.3602643013000488, "learning_rate": 4.994244357223965e-06, "loss": 0.1881, "step": 473 }, { "epoch": 0.15359688917692807, "grad_norm": 1.2709087133407593, "learning_rate": 4.994184890844509e-06, "loss": 0.1818, "step": 474 }, { "epoch": 0.15392093324692158, "grad_norm": 1.2899342775344849, "learning_rate": 4.9941251192009665e-06, "loss": 0.2027, "step": 475 }, { "epoch": 0.1542449773169151, "grad_norm": 1.3323594331741333, "learning_rate": 4.994065042300655e-06, "loss": 0.2063, "step": 476 }, { "epoch": 0.15456902138690862, "grad_norm": 1.3085150718688965, "learning_rate": 4.994004660150927e-06, "loss": 0.1992, "step": 477 }, { "epoch": 0.15489306545690215, "grad_norm": 1.2538460493087769, "learning_rate": 4.993943972759173e-06, "loss": 0.1807, "step": 478 }, { "epoch": 0.15521710952689566, "grad_norm": 1.2298994064331055, "learning_rate": 4.993882980132819e-06, "loss": 0.177, "step": 479 }, { "epoch": 0.15554115359688916, "grad_norm": 1.2511969804763794, "learning_rate": 4.993821682279332e-06, "loss": 0.1908, "step": 480 }, { "epoch": 0.1558651976668827, "grad_norm": 1.2835829257965088, "learning_rate": 4.993760079206212e-06, "loss": 0.189, "step": 481 }, { "epoch": 0.1561892417368762, "grad_norm": 1.252970814704895, "learning_rate": 4.993698170920999e-06, "loss": 0.1988, "step": 482 }, { "epoch": 0.15651328580686974, "grad_norm": 1.2566627264022827, "learning_rate": 4.993635957431273e-06, "loss": 0.1787, "step": 483 }, { "epoch": 0.15683732987686325, "grad_norm": 1.2273926734924316, "learning_rate": 4.993573438744645e-06, "loss": 0.196, "step": 484 }, { "epoch": 0.15716137394685678, "grad_norm": 1.194635033607483, "learning_rate": 4.993510614868767e-06, "loss": 0.1762, "step": 485 }, { "epoch": 0.1574854180168503, "grad_norm": 1.2113969326019287, "learning_rate": 4.99344748581133e-06, "loss": 0.1882, "step": 486 }, { "epoch": 0.15780946208684382, "grad_norm": 1.3154219388961792, "learning_rate": 4.993384051580059e-06, "loss": 0.2097, "step": 487 }, { "epoch": 0.15813350615683733, "grad_norm": 1.3013001680374146, "learning_rate": 4.993320312182718e-06, "loss": 0.1935, "step": 488 }, { "epoch": 0.15845755022683086, "grad_norm": 1.2724648714065552, "learning_rate": 4.993256267627108e-06, "loss": 0.1931, "step": 489 }, { "epoch": 0.15878159429682437, "grad_norm": 1.1998355388641357, "learning_rate": 4.993191917921066e-06, "loss": 0.1756, "step": 490 }, { "epoch": 0.15910563836681788, "grad_norm": 1.2753697633743286, "learning_rate": 4.9931272630724704e-06, "loss": 0.1835, "step": 491 }, { "epoch": 0.1594296824368114, "grad_norm": 1.2968438863754272, "learning_rate": 4.993062303089233e-06, "loss": 0.1967, "step": 492 }, { "epoch": 0.15975372650680492, "grad_norm": 1.2662702798843384, "learning_rate": 4.992997037979304e-06, "loss": 0.185, "step": 493 }, { "epoch": 0.16007777057679845, "grad_norm": 1.1670453548431396, "learning_rate": 4.992931467750673e-06, "loss": 0.1666, "step": 494 }, { "epoch": 0.16040181464679196, "grad_norm": 1.1736934185028076, "learning_rate": 4.992865592411362e-06, "loss": 0.1824, "step": 495 }, { "epoch": 0.1607258587167855, "grad_norm": 1.1742326021194458, "learning_rate": 4.992799411969436e-06, "loss": 0.1718, "step": 496 }, { "epoch": 0.161049902786779, "grad_norm": 1.3553086519241333, "learning_rate": 4.992732926432995e-06, "loss": 0.2112, "step": 497 }, { "epoch": 0.16137394685677253, "grad_norm": 1.3199315071105957, "learning_rate": 4.9926661358101745e-06, "loss": 0.1954, "step": 498 }, { "epoch": 0.16169799092676604, "grad_norm": 1.2116035223007202, "learning_rate": 4.9925990401091505e-06, "loss": 0.1846, "step": 499 }, { "epoch": 0.16202203499675957, "grad_norm": 1.3013466596603394, "learning_rate": 4.992531639338133e-06, "loss": 0.1938, "step": 500 }, { "epoch": 0.16234607906675308, "grad_norm": 1.2134751081466675, "learning_rate": 4.992463933505374e-06, "loss": 0.1813, "step": 501 }, { "epoch": 0.16267012313674659, "grad_norm": 1.275068759918213, "learning_rate": 4.9923959226191574e-06, "loss": 0.1942, "step": 502 }, { "epoch": 0.16299416720674012, "grad_norm": 1.2700871229171753, "learning_rate": 4.992327606687808e-06, "loss": 0.1936, "step": 503 }, { "epoch": 0.16331821127673363, "grad_norm": 1.2754777669906616, "learning_rate": 4.992258985719688e-06, "loss": 0.2123, "step": 504 }, { "epoch": 0.16364225534672716, "grad_norm": 1.177857518196106, "learning_rate": 4.992190059723194e-06, "loss": 0.1782, "step": 505 }, { "epoch": 0.16396629941672067, "grad_norm": 1.1868280172348022, "learning_rate": 4.992120828706763e-06, "loss": 0.1863, "step": 506 }, { "epoch": 0.1642903434867142, "grad_norm": 1.2085050344467163, "learning_rate": 4.99205129267887e-06, "loss": 0.1805, "step": 507 }, { "epoch": 0.1646143875567077, "grad_norm": 1.1440081596374512, "learning_rate": 4.991981451648022e-06, "loss": 0.1719, "step": 508 }, { "epoch": 0.16493843162670124, "grad_norm": 1.2123503684997559, "learning_rate": 4.9919113056227685e-06, "loss": 0.1929, "step": 509 }, { "epoch": 0.16526247569669475, "grad_norm": 1.1267462968826294, "learning_rate": 4.991840854611696e-06, "loss": 0.1631, "step": 510 }, { "epoch": 0.16558651976668826, "grad_norm": 1.198703646659851, "learning_rate": 4.991770098623425e-06, "loss": 0.1747, "step": 511 }, { "epoch": 0.1659105638366818, "grad_norm": 1.1974960565567017, "learning_rate": 4.9916990376666156e-06, "loss": 0.1703, "step": 512 }, { "epoch": 0.1662346079066753, "grad_norm": 1.3251335620880127, "learning_rate": 4.991627671749966e-06, "loss": 0.1917, "step": 513 }, { "epoch": 0.16655865197666883, "grad_norm": 1.2980823516845703, "learning_rate": 4.9915560008822105e-06, "loss": 0.1798, "step": 514 }, { "epoch": 0.16688269604666234, "grad_norm": 1.24545419216156, "learning_rate": 4.99148402507212e-06, "loss": 0.183, "step": 515 }, { "epoch": 0.16720674011665587, "grad_norm": 1.3871142864227295, "learning_rate": 4.991411744328505e-06, "loss": 0.196, "step": 516 }, { "epoch": 0.16753078418664938, "grad_norm": 1.186227560043335, "learning_rate": 4.991339158660211e-06, "loss": 0.1788, "step": 517 }, { "epoch": 0.1678548282566429, "grad_norm": 1.1392605304718018, "learning_rate": 4.991266268076121e-06, "loss": 0.1847, "step": 518 }, { "epoch": 0.16817887232663642, "grad_norm": 1.200305461883545, "learning_rate": 4.991193072585158e-06, "loss": 0.1906, "step": 519 }, { "epoch": 0.16850291639662995, "grad_norm": 1.3353350162506104, "learning_rate": 4.99111957219628e-06, "loss": 0.1819, "step": 520 }, { "epoch": 0.16882696046662346, "grad_norm": 1.2189663648605347, "learning_rate": 4.991045766918482e-06, "loss": 0.1764, "step": 521 }, { "epoch": 0.16915100453661697, "grad_norm": 1.2358533143997192, "learning_rate": 4.990971656760797e-06, "loss": 0.1869, "step": 522 }, { "epoch": 0.1694750486066105, "grad_norm": 1.2056154012680054, "learning_rate": 4.990897241732296e-06, "loss": 0.2036, "step": 523 }, { "epoch": 0.169799092676604, "grad_norm": 1.3262749910354614, "learning_rate": 4.990822521842086e-06, "loss": 0.2051, "step": 524 }, { "epoch": 0.17012313674659754, "grad_norm": 1.2622315883636475, "learning_rate": 4.990747497099312e-06, "loss": 0.1693, "step": 525 }, { "epoch": 0.17044718081659105, "grad_norm": 1.2696737051010132, "learning_rate": 4.990672167513158e-06, "loss": 0.2031, "step": 526 }, { "epoch": 0.17077122488658458, "grad_norm": 1.2847379446029663, "learning_rate": 4.990596533092841e-06, "loss": 0.1903, "step": 527 }, { "epoch": 0.1710952689565781, "grad_norm": 1.1462384462356567, "learning_rate": 4.9905205938476195e-06, "loss": 0.1963, "step": 528 }, { "epoch": 0.17141931302657162, "grad_norm": 1.2983579635620117, "learning_rate": 4.990444349786788e-06, "loss": 0.1967, "step": 529 }, { "epoch": 0.17174335709656513, "grad_norm": 1.3803335428237915, "learning_rate": 4.990367800919677e-06, "loss": 0.1682, "step": 530 }, { "epoch": 0.17206740116655866, "grad_norm": 1.2707942724227905, "learning_rate": 4.990290947255656e-06, "loss": 0.1938, "step": 531 }, { "epoch": 0.17239144523655217, "grad_norm": 1.0509445667266846, "learning_rate": 4.9902137888041304e-06, "loss": 0.1651, "step": 532 }, { "epoch": 0.17271548930654568, "grad_norm": 1.1488968133926392, "learning_rate": 4.990136325574545e-06, "loss": 0.168, "step": 533 }, { "epoch": 0.1730395333765392, "grad_norm": 1.2975245714187622, "learning_rate": 4.990058557576379e-06, "loss": 0.1844, "step": 534 }, { "epoch": 0.17336357744653272, "grad_norm": 1.2090622186660767, "learning_rate": 4.989980484819152e-06, "loss": 0.1867, "step": 535 }, { "epoch": 0.17368762151652625, "grad_norm": 1.2510167360305786, "learning_rate": 4.9899021073124175e-06, "loss": 0.1929, "step": 536 }, { "epoch": 0.17401166558651976, "grad_norm": 1.250388503074646, "learning_rate": 4.989823425065769e-06, "loss": 0.1863, "step": 537 }, { "epoch": 0.1743357096565133, "grad_norm": 1.1425954103469849, "learning_rate": 4.989744438088838e-06, "loss": 0.1763, "step": 538 }, { "epoch": 0.1746597537265068, "grad_norm": 1.3070205450057983, "learning_rate": 4.98966514639129e-06, "loss": 0.1941, "step": 539 }, { "epoch": 0.17498379779650033, "grad_norm": 1.1972076892852783, "learning_rate": 4.98958554998283e-06, "loss": 0.1877, "step": 540 }, { "epoch": 0.17530784186649384, "grad_norm": 1.2954251766204834, "learning_rate": 4.989505648873198e-06, "loss": 0.2039, "step": 541 }, { "epoch": 0.17563188593648738, "grad_norm": 1.1820374727249146, "learning_rate": 4.989425443072177e-06, "loss": 0.1837, "step": 542 }, { "epoch": 0.17595593000648088, "grad_norm": 1.264702558517456, "learning_rate": 4.9893449325895804e-06, "loss": 0.2013, "step": 543 }, { "epoch": 0.1762799740764744, "grad_norm": 1.1946943998336792, "learning_rate": 4.989264117435263e-06, "loss": 0.1873, "step": 544 }, { "epoch": 0.17660401814646792, "grad_norm": 1.220134973526001, "learning_rate": 4.9891829976191155e-06, "loss": 0.2003, "step": 545 }, { "epoch": 0.17692806221646143, "grad_norm": 1.138335943222046, "learning_rate": 4.9891015731510665e-06, "loss": 0.1678, "step": 546 }, { "epoch": 0.17725210628645496, "grad_norm": 1.111392855644226, "learning_rate": 4.989019844041081e-06, "loss": 0.163, "step": 547 }, { "epoch": 0.17757615035644847, "grad_norm": 1.2142761945724487, "learning_rate": 4.988937810299161e-06, "loss": 0.183, "step": 548 }, { "epoch": 0.177900194426442, "grad_norm": 1.1644195318222046, "learning_rate": 4.98885547193535e-06, "loss": 0.1832, "step": 549 }, { "epoch": 0.1782242384964355, "grad_norm": 1.2614682912826538, "learning_rate": 4.988772828959722e-06, "loss": 0.1738, "step": 550 }, { "epoch": 0.17854828256642905, "grad_norm": 1.1590503454208374, "learning_rate": 4.988689881382392e-06, "loss": 0.1929, "step": 551 }, { "epoch": 0.17887232663642255, "grad_norm": 1.0835431814193726, "learning_rate": 4.988606629213515e-06, "loss": 0.1567, "step": 552 }, { "epoch": 0.17919637070641609, "grad_norm": 1.1390941143035889, "learning_rate": 4.9885230724632775e-06, "loss": 0.1757, "step": 553 }, { "epoch": 0.1795204147764096, "grad_norm": 1.2216302156448364, "learning_rate": 4.9884392111419056e-06, "loss": 0.1826, "step": 554 }, { "epoch": 0.1798444588464031, "grad_norm": 1.1715116500854492, "learning_rate": 4.988355045259665e-06, "loss": 0.1802, "step": 555 }, { "epoch": 0.18016850291639663, "grad_norm": 1.1871827840805054, "learning_rate": 4.988270574826857e-06, "loss": 0.179, "step": 556 }, { "epoch": 0.18049254698639014, "grad_norm": 1.1360725164413452, "learning_rate": 4.9881857998538175e-06, "loss": 0.1763, "step": 557 }, { "epoch": 0.18081659105638367, "grad_norm": 1.1245923042297363, "learning_rate": 4.988100720350924e-06, "loss": 0.1901, "step": 558 }, { "epoch": 0.18114063512637718, "grad_norm": 1.2312275171279907, "learning_rate": 4.988015336328589e-06, "loss": 0.1939, "step": 559 }, { "epoch": 0.18146467919637072, "grad_norm": 1.1649733781814575, "learning_rate": 4.987929647797263e-06, "loss": 0.1601, "step": 560 }, { "epoch": 0.18178872326636422, "grad_norm": 1.1686375141143799, "learning_rate": 4.987843654767432e-06, "loss": 0.1818, "step": 561 }, { "epoch": 0.18211276733635776, "grad_norm": 1.3112282752990723, "learning_rate": 4.987757357249623e-06, "loss": 0.2011, "step": 562 }, { "epoch": 0.18243681140635126, "grad_norm": 1.2649235725402832, "learning_rate": 4.987670755254397e-06, "loss": 0.2082, "step": 563 }, { "epoch": 0.18276085547634477, "grad_norm": 1.2089769840240479, "learning_rate": 4.987583848792353e-06, "loss": 0.1771, "step": 564 }, { "epoch": 0.1830848995463383, "grad_norm": 1.185096263885498, "learning_rate": 4.987496637874127e-06, "loss": 0.1883, "step": 565 }, { "epoch": 0.1834089436163318, "grad_norm": 1.1989091634750366, "learning_rate": 4.987409122510394e-06, "loss": 0.1969, "step": 566 }, { "epoch": 0.18373298768632534, "grad_norm": 1.0628764629364014, "learning_rate": 4.9873213027118635e-06, "loss": 0.1644, "step": 567 }, { "epoch": 0.18405703175631885, "grad_norm": 1.2635180950164795, "learning_rate": 4.987233178489285e-06, "loss": 0.1812, "step": 568 }, { "epoch": 0.18438107582631239, "grad_norm": 1.1539015769958496, "learning_rate": 4.987144749853444e-06, "loss": 0.1954, "step": 569 }, { "epoch": 0.1847051198963059, "grad_norm": 1.1005996465682983, "learning_rate": 4.987056016815163e-06, "loss": 0.1718, "step": 570 }, { "epoch": 0.18502916396629943, "grad_norm": 1.2113686800003052, "learning_rate": 4.986966979385302e-06, "loss": 0.1957, "step": 571 }, { "epoch": 0.18535320803629293, "grad_norm": 1.204167366027832, "learning_rate": 4.986877637574758e-06, "loss": 0.1787, "step": 572 }, { "epoch": 0.18567725210628647, "grad_norm": 1.0967299938201904, "learning_rate": 4.986787991394467e-06, "loss": 0.1563, "step": 573 }, { "epoch": 0.18600129617627997, "grad_norm": 1.188849925994873, "learning_rate": 4.9866980408554e-06, "loss": 0.1832, "step": 574 }, { "epoch": 0.18632534024627348, "grad_norm": 1.205378770828247, "learning_rate": 4.986607785968565e-06, "loss": 0.1817, "step": 575 }, { "epoch": 0.18664938431626701, "grad_norm": 1.2052730321884155, "learning_rate": 4.986517226745009e-06, "loss": 0.1909, "step": 576 }, { "epoch": 0.18697342838626052, "grad_norm": 1.2092132568359375, "learning_rate": 4.9864263631958165e-06, "loss": 0.1742, "step": 577 }, { "epoch": 0.18729747245625405, "grad_norm": 1.1333421468734741, "learning_rate": 4.986335195332107e-06, "loss": 0.1745, "step": 578 }, { "epoch": 0.18762151652624756, "grad_norm": 1.1709595918655396, "learning_rate": 4.986243723165039e-06, "loss": 0.1843, "step": 579 }, { "epoch": 0.1879455605962411, "grad_norm": 1.1630189418792725, "learning_rate": 4.9861519467058094e-06, "loss": 0.1949, "step": 580 }, { "epoch": 0.1882696046662346, "grad_norm": 1.1940983533859253, "learning_rate": 4.986059865965649e-06, "loss": 0.1836, "step": 581 }, { "epoch": 0.18859364873622814, "grad_norm": 1.1740580797195435, "learning_rate": 4.985967480955827e-06, "loss": 0.1966, "step": 582 }, { "epoch": 0.18891769280622164, "grad_norm": 1.1827421188354492, "learning_rate": 4.9858747916876515e-06, "loss": 0.1855, "step": 583 }, { "epoch": 0.18924173687621518, "grad_norm": 1.240294337272644, "learning_rate": 4.985781798172467e-06, "loss": 0.1844, "step": 584 }, { "epoch": 0.18956578094620868, "grad_norm": 1.2834677696228027, "learning_rate": 4.9856885004216545e-06, "loss": 0.1923, "step": 585 }, { "epoch": 0.1898898250162022, "grad_norm": 1.1834359169006348, "learning_rate": 4.985594898446633e-06, "loss": 0.1759, "step": 586 }, { "epoch": 0.19021386908619572, "grad_norm": 1.2249093055725098, "learning_rate": 4.9855009922588585e-06, "loss": 0.1893, "step": 587 }, { "epoch": 0.19053791315618923, "grad_norm": 1.1315933465957642, "learning_rate": 4.985406781869824e-06, "loss": 0.1697, "step": 588 }, { "epoch": 0.19086195722618277, "grad_norm": 1.2335212230682373, "learning_rate": 4.98531226729106e-06, "loss": 0.2065, "step": 589 }, { "epoch": 0.19118600129617627, "grad_norm": 1.1094428300857544, "learning_rate": 4.985217448534134e-06, "loss": 0.1651, "step": 590 }, { "epoch": 0.1915100453661698, "grad_norm": 1.2721391916275024, "learning_rate": 4.985122325610651e-06, "loss": 0.1967, "step": 591 }, { "epoch": 0.1918340894361633, "grad_norm": 1.1110994815826416, "learning_rate": 4.985026898532253e-06, "loss": 0.1709, "step": 592 }, { "epoch": 0.19215813350615685, "grad_norm": 1.2032331228256226, "learning_rate": 4.98493116731062e-06, "loss": 0.174, "step": 593 }, { "epoch": 0.19248217757615035, "grad_norm": 1.286197543144226, "learning_rate": 4.984835131957468e-06, "loss": 0.1896, "step": 594 }, { "epoch": 0.1928062216461439, "grad_norm": 1.1432693004608154, "learning_rate": 4.98473879248455e-06, "loss": 0.1721, "step": 595 }, { "epoch": 0.1931302657161374, "grad_norm": 1.290723443031311, "learning_rate": 4.984642148903659e-06, "loss": 0.1959, "step": 596 }, { "epoch": 0.1934543097861309, "grad_norm": 1.2293765544891357, "learning_rate": 4.984545201226623e-06, "loss": 0.1815, "step": 597 }, { "epoch": 0.19377835385612444, "grad_norm": 1.2856359481811523, "learning_rate": 4.984447949465305e-06, "loss": 0.1954, "step": 598 }, { "epoch": 0.19410239792611794, "grad_norm": 1.1901377439498901, "learning_rate": 4.98435039363161e-06, "loss": 0.1781, "step": 599 }, { "epoch": 0.19442644199611148, "grad_norm": 1.2254037857055664, "learning_rate": 4.984252533737477e-06, "loss": 0.1898, "step": 600 }, { "epoch": 0.19475048606610498, "grad_norm": 1.1281192302703857, "learning_rate": 4.984154369794883e-06, "loss": 0.1809, "step": 601 }, { "epoch": 0.19507453013609852, "grad_norm": 1.2336041927337646, "learning_rate": 4.984055901815844e-06, "loss": 0.1738, "step": 602 }, { "epoch": 0.19539857420609202, "grad_norm": 1.119174838066101, "learning_rate": 4.983957129812409e-06, "loss": 0.1735, "step": 603 }, { "epoch": 0.19572261827608556, "grad_norm": 1.2780518531799316, "learning_rate": 4.9838580537966676e-06, "loss": 0.1965, "step": 604 }, { "epoch": 0.19604666234607906, "grad_norm": 1.16103196144104, "learning_rate": 4.983758673780747e-06, "loss": 0.1845, "step": 605 }, { "epoch": 0.1963707064160726, "grad_norm": 1.1239101886749268, "learning_rate": 4.9836589897768084e-06, "loss": 0.1696, "step": 606 }, { "epoch": 0.1966947504860661, "grad_norm": 1.3181592226028442, "learning_rate": 4.983559001797054e-06, "loss": 0.2048, "step": 607 }, { "epoch": 0.1970187945560596, "grad_norm": 1.1426509618759155, "learning_rate": 4.983458709853719e-06, "loss": 0.177, "step": 608 }, { "epoch": 0.19734283862605315, "grad_norm": 1.1979957818984985, "learning_rate": 4.9833581139590814e-06, "loss": 0.1853, "step": 609 }, { "epoch": 0.19766688269604665, "grad_norm": 1.1111805438995361, "learning_rate": 4.983257214125451e-06, "loss": 0.1932, "step": 610 }, { "epoch": 0.1979909267660402, "grad_norm": 1.2329010963439941, "learning_rate": 4.9831560103651765e-06, "loss": 0.204, "step": 611 }, { "epoch": 0.1983149708360337, "grad_norm": 1.1231783628463745, "learning_rate": 4.983054502690646e-06, "loss": 0.1691, "step": 612 }, { "epoch": 0.19863901490602723, "grad_norm": 1.1197031736373901, "learning_rate": 4.9829526911142825e-06, "loss": 0.1593, "step": 613 }, { "epoch": 0.19896305897602073, "grad_norm": 1.194003939628601, "learning_rate": 4.982850575648545e-06, "loss": 0.183, "step": 614 }, { "epoch": 0.19928710304601427, "grad_norm": 1.1786737442016602, "learning_rate": 4.982748156305934e-06, "loss": 0.1864, "step": 615 }, { "epoch": 0.19961114711600778, "grad_norm": 1.1826181411743164, "learning_rate": 4.982645433098984e-06, "loss": 0.1753, "step": 616 }, { "epoch": 0.1999351911860013, "grad_norm": 1.1305351257324219, "learning_rate": 4.982542406040266e-06, "loss": 0.1964, "step": 617 }, { "epoch": 0.20025923525599482, "grad_norm": 1.084776759147644, "learning_rate": 4.98243907514239e-06, "loss": 0.1831, "step": 618 }, { "epoch": 0.20058327932598832, "grad_norm": 1.1831001043319702, "learning_rate": 4.982335440418004e-06, "loss": 0.1833, "step": 619 }, { "epoch": 0.20090732339598186, "grad_norm": 1.1076908111572266, "learning_rate": 4.98223150187979e-06, "loss": 0.1732, "step": 620 }, { "epoch": 0.20123136746597536, "grad_norm": 1.15507173538208, "learning_rate": 4.982127259540471e-06, "loss": 0.1737, "step": 621 }, { "epoch": 0.2015554115359689, "grad_norm": 1.2487183809280396, "learning_rate": 4.9820227134128045e-06, "loss": 0.1937, "step": 622 }, { "epoch": 0.2018794556059624, "grad_norm": 1.2484679222106934, "learning_rate": 4.981917863509585e-06, "loss": 0.1968, "step": 623 }, { "epoch": 0.20220349967595594, "grad_norm": 1.2015026807785034, "learning_rate": 4.981812709843646e-06, "loss": 0.1881, "step": 624 }, { "epoch": 0.20252754374594945, "grad_norm": 1.1933735609054565, "learning_rate": 4.981707252427857e-06, "loss": 0.187, "step": 625 }, { "epoch": 0.20285158781594298, "grad_norm": 1.1229411363601685, "learning_rate": 4.981601491275125e-06, "loss": 0.183, "step": 626 }, { "epoch": 0.2031756318859365, "grad_norm": 1.1262580156326294, "learning_rate": 4.981495426398395e-06, "loss": 0.1798, "step": 627 }, { "epoch": 0.20349967595593, "grad_norm": 1.1283111572265625, "learning_rate": 4.981389057810647e-06, "loss": 0.1835, "step": 628 }, { "epoch": 0.20382372002592353, "grad_norm": 1.294592261314392, "learning_rate": 4.9812823855248996e-06, "loss": 0.1819, "step": 629 }, { "epoch": 0.20414776409591703, "grad_norm": 1.1340450048446655, "learning_rate": 4.98117540955421e-06, "loss": 0.1735, "step": 630 }, { "epoch": 0.20447180816591057, "grad_norm": 1.2071174383163452, "learning_rate": 4.981068129911669e-06, "loss": 0.1872, "step": 631 }, { "epoch": 0.20479585223590407, "grad_norm": 1.153228998184204, "learning_rate": 4.980960546610408e-06, "loss": 0.19, "step": 632 }, { "epoch": 0.2051198963058976, "grad_norm": 1.1905288696289062, "learning_rate": 4.980852659663593e-06, "loss": 0.1668, "step": 633 }, { "epoch": 0.20544394037589112, "grad_norm": 1.1860299110412598, "learning_rate": 4.9807444690844296e-06, "loss": 0.1827, "step": 634 }, { "epoch": 0.20576798444588465, "grad_norm": 1.1758476495742798, "learning_rate": 4.980635974886158e-06, "loss": 0.199, "step": 635 }, { "epoch": 0.20609202851587816, "grad_norm": 1.2125898599624634, "learning_rate": 4.980527177082058e-06, "loss": 0.2062, "step": 636 }, { "epoch": 0.2064160725858717, "grad_norm": 1.203070878982544, "learning_rate": 4.980418075685445e-06, "loss": 0.1927, "step": 637 }, { "epoch": 0.2067401166558652, "grad_norm": 1.188751459121704, "learning_rate": 4.980308670709671e-06, "loss": 0.1865, "step": 638 }, { "epoch": 0.2070641607258587, "grad_norm": 1.1638119220733643, "learning_rate": 4.980198962168128e-06, "loss": 0.1928, "step": 639 }, { "epoch": 0.20738820479585224, "grad_norm": 1.1287533044815063, "learning_rate": 4.9800889500742415e-06, "loss": 0.1873, "step": 640 }, { "epoch": 0.20771224886584574, "grad_norm": 1.1128509044647217, "learning_rate": 4.979978634441477e-06, "loss": 0.1735, "step": 641 }, { "epoch": 0.20803629293583928, "grad_norm": 1.259926676750183, "learning_rate": 4.979868015283336e-06, "loss": 0.2012, "step": 642 }, { "epoch": 0.20836033700583279, "grad_norm": 1.155649185180664, "learning_rate": 4.979757092613357e-06, "loss": 0.1834, "step": 643 }, { "epoch": 0.20868438107582632, "grad_norm": 1.2149689197540283, "learning_rate": 4.979645866445114e-06, "loss": 0.2143, "step": 644 }, { "epoch": 0.20900842514581983, "grad_norm": 1.041944146156311, "learning_rate": 4.9795343367922235e-06, "loss": 0.1683, "step": 645 }, { "epoch": 0.20933246921581336, "grad_norm": 1.1962753534317017, "learning_rate": 4.979422503668334e-06, "loss": 0.172, "step": 646 }, { "epoch": 0.20965651328580687, "grad_norm": 1.273494005203247, "learning_rate": 4.979310367087132e-06, "loss": 0.1867, "step": 647 }, { "epoch": 0.2099805573558004, "grad_norm": 1.0932351350784302, "learning_rate": 4.979197927062343e-06, "loss": 0.187, "step": 648 }, { "epoch": 0.2103046014257939, "grad_norm": 1.18181312084198, "learning_rate": 4.979085183607728e-06, "loss": 0.1884, "step": 649 }, { "epoch": 0.21062864549578741, "grad_norm": 1.156614065170288, "learning_rate": 4.978972136737086e-06, "loss": 0.1961, "step": 650 }, { "epoch": 0.21095268956578095, "grad_norm": 1.1621402502059937, "learning_rate": 4.978858786464252e-06, "loss": 0.1901, "step": 651 }, { "epoch": 0.21127673363577446, "grad_norm": 1.298018455505371, "learning_rate": 4.978745132803101e-06, "loss": 0.1982, "step": 652 }, { "epoch": 0.211600777705768, "grad_norm": 1.1199668645858765, "learning_rate": 4.9786311757675425e-06, "loss": 0.1726, "step": 653 }, { "epoch": 0.2119248217757615, "grad_norm": 1.1561723947525024, "learning_rate": 4.978516915371522e-06, "loss": 0.1814, "step": 654 }, { "epoch": 0.21224886584575503, "grad_norm": 1.095726728439331, "learning_rate": 4.978402351629024e-06, "loss": 0.1768, "step": 655 }, { "epoch": 0.21257290991574854, "grad_norm": 1.2473907470703125, "learning_rate": 4.9782874845540715e-06, "loss": 0.2086, "step": 656 }, { "epoch": 0.21289695398574207, "grad_norm": 1.1992051601409912, "learning_rate": 4.978172314160724e-06, "loss": 0.1901, "step": 657 }, { "epoch": 0.21322099805573558, "grad_norm": 1.1524806022644043, "learning_rate": 4.9780568404630746e-06, "loss": 0.1879, "step": 658 }, { "epoch": 0.2135450421257291, "grad_norm": 1.2208423614501953, "learning_rate": 4.977941063475258e-06, "loss": 0.1853, "step": 659 }, { "epoch": 0.21386908619572262, "grad_norm": 1.1771621704101562, "learning_rate": 4.977824983211443e-06, "loss": 0.2026, "step": 660 }, { "epoch": 0.21419313026571613, "grad_norm": 1.1515275239944458, "learning_rate": 4.977708599685837e-06, "loss": 0.1769, "step": 661 }, { "epoch": 0.21451717433570966, "grad_norm": 1.1625581979751587, "learning_rate": 4.977591912912685e-06, "loss": 0.1933, "step": 662 }, { "epoch": 0.21484121840570317, "grad_norm": 1.1556047201156616, "learning_rate": 4.977474922906268e-06, "loss": 0.1885, "step": 663 }, { "epoch": 0.2151652624756967, "grad_norm": 1.1594843864440918, "learning_rate": 4.977357629680903e-06, "loss": 0.1899, "step": 664 }, { "epoch": 0.2154893065456902, "grad_norm": 1.0708738565444946, "learning_rate": 4.977240033250948e-06, "loss": 0.1737, "step": 665 }, { "epoch": 0.21581335061568374, "grad_norm": 1.0543522834777832, "learning_rate": 4.977122133630795e-06, "loss": 0.1781, "step": 666 }, { "epoch": 0.21613739468567725, "grad_norm": 1.1143603324890137, "learning_rate": 4.9770039308348725e-06, "loss": 0.1808, "step": 667 }, { "epoch": 0.21646143875567078, "grad_norm": 1.2150770425796509, "learning_rate": 4.9768854248776475e-06, "loss": 0.1799, "step": 668 }, { "epoch": 0.2167854828256643, "grad_norm": 1.2431052923202515, "learning_rate": 4.976766615773626e-06, "loss": 0.1911, "step": 669 }, { "epoch": 0.21710952689565782, "grad_norm": 1.1677272319793701, "learning_rate": 4.976647503537347e-06, "loss": 0.1765, "step": 670 }, { "epoch": 0.21743357096565133, "grad_norm": 1.0950998067855835, "learning_rate": 4.9765280881833885e-06, "loss": 0.162, "step": 671 }, { "epoch": 0.21775761503564484, "grad_norm": 1.2298495769500732, "learning_rate": 4.976408369726368e-06, "loss": 0.1857, "step": 672 }, { "epoch": 0.21808165910563837, "grad_norm": 1.0828574895858765, "learning_rate": 4.976288348180935e-06, "loss": 0.1803, "step": 673 }, { "epoch": 0.21840570317563188, "grad_norm": 1.1887153387069702, "learning_rate": 4.976168023561782e-06, "loss": 0.1801, "step": 674 }, { "epoch": 0.2187297472456254, "grad_norm": 1.1923915147781372, "learning_rate": 4.976047395883634e-06, "loss": 0.1932, "step": 675 }, { "epoch": 0.21905379131561892, "grad_norm": 1.193912386894226, "learning_rate": 4.975926465161254e-06, "loss": 0.1954, "step": 676 }, { "epoch": 0.21937783538561245, "grad_norm": 1.124260425567627, "learning_rate": 4.975805231409444e-06, "loss": 0.1728, "step": 677 }, { "epoch": 0.21970187945560596, "grad_norm": 1.070874571800232, "learning_rate": 4.975683694643041e-06, "loss": 0.1753, "step": 678 }, { "epoch": 0.2200259235255995, "grad_norm": 1.173336386680603, "learning_rate": 4.97556185487692e-06, "loss": 0.1731, "step": 679 }, { "epoch": 0.220349967595593, "grad_norm": 1.197383999824524, "learning_rate": 4.9754397121259935e-06, "loss": 0.1874, "step": 680 }, { "epoch": 0.22067401166558653, "grad_norm": 1.1929588317871094, "learning_rate": 4.975317266405211e-06, "loss": 0.1887, "step": 681 }, { "epoch": 0.22099805573558004, "grad_norm": 1.1419634819030762, "learning_rate": 4.975194517729557e-06, "loss": 0.1744, "step": 682 }, { "epoch": 0.22132209980557355, "grad_norm": 1.1331706047058105, "learning_rate": 4.975071466114057e-06, "loss": 0.1772, "step": 683 }, { "epoch": 0.22164614387556708, "grad_norm": 1.1220134496688843, "learning_rate": 4.974948111573768e-06, "loss": 0.191, "step": 684 }, { "epoch": 0.2219701879455606, "grad_norm": 1.1688811779022217, "learning_rate": 4.9748244541237915e-06, "loss": 0.167, "step": 685 }, { "epoch": 0.22229423201555412, "grad_norm": 1.1049573421478271, "learning_rate": 4.97470049377926e-06, "loss": 0.175, "step": 686 }, { "epoch": 0.22261827608554763, "grad_norm": 1.1855946779251099, "learning_rate": 4.974576230555344e-06, "loss": 0.1788, "step": 687 }, { "epoch": 0.22294232015554116, "grad_norm": 1.2324986457824707, "learning_rate": 4.974451664467253e-06, "loss": 0.1661, "step": 688 }, { "epoch": 0.22326636422553467, "grad_norm": 1.261545181274414, "learning_rate": 4.974326795530234e-06, "loss": 0.1904, "step": 689 }, { "epoch": 0.2235904082955282, "grad_norm": 1.0972044467926025, "learning_rate": 4.974201623759568e-06, "loss": 0.1738, "step": 690 }, { "epoch": 0.2239144523655217, "grad_norm": 1.1361660957336426, "learning_rate": 4.974076149170575e-06, "loss": 0.1679, "step": 691 }, { "epoch": 0.22423849643551522, "grad_norm": 1.222582221031189, "learning_rate": 4.973950371778612e-06, "loss": 0.1853, "step": 692 }, { "epoch": 0.22456254050550875, "grad_norm": 1.1474609375, "learning_rate": 4.973824291599074e-06, "loss": 0.1789, "step": 693 }, { "epoch": 0.22488658457550226, "grad_norm": 1.0796842575073242, "learning_rate": 4.973697908647391e-06, "loss": 0.17, "step": 694 }, { "epoch": 0.2252106286454958, "grad_norm": 1.19746732711792, "learning_rate": 4.973571222939031e-06, "loss": 0.1819, "step": 695 }, { "epoch": 0.2255346727154893, "grad_norm": 1.0817978382110596, "learning_rate": 4.973444234489499e-06, "loss": 0.191, "step": 696 }, { "epoch": 0.22585871678548283, "grad_norm": 1.1662395000457764, "learning_rate": 4.973316943314338e-06, "loss": 0.189, "step": 697 }, { "epoch": 0.22618276085547634, "grad_norm": 1.2320014238357544, "learning_rate": 4.9731893494291275e-06, "loss": 0.1777, "step": 698 }, { "epoch": 0.22650680492546987, "grad_norm": 1.099103331565857, "learning_rate": 4.973061452849481e-06, "loss": 0.1662, "step": 699 }, { "epoch": 0.22683084899546338, "grad_norm": 1.195725917816162, "learning_rate": 4.972933253591056e-06, "loss": 0.1798, "step": 700 }, { "epoch": 0.22715489306545691, "grad_norm": 1.120126724243164, "learning_rate": 4.972804751669539e-06, "loss": 0.1895, "step": 701 }, { "epoch": 0.22747893713545042, "grad_norm": 1.2362133264541626, "learning_rate": 4.972675947100659e-06, "loss": 0.1854, "step": 702 }, { "epoch": 0.22780298120544393, "grad_norm": 1.2055583000183105, "learning_rate": 4.972546839900181e-06, "loss": 0.1935, "step": 703 }, { "epoch": 0.22812702527543746, "grad_norm": 1.1982338428497314, "learning_rate": 4.972417430083906e-06, "loss": 0.1887, "step": 704 }, { "epoch": 0.22845106934543097, "grad_norm": 1.174058198928833, "learning_rate": 4.972287717667672e-06, "loss": 0.1722, "step": 705 }, { "epoch": 0.2287751134154245, "grad_norm": 1.1342368125915527, "learning_rate": 4.972157702667356e-06, "loss": 0.1741, "step": 706 }, { "epoch": 0.229099157485418, "grad_norm": 1.2124993801116943, "learning_rate": 4.972027385098868e-06, "loss": 0.1814, "step": 707 }, { "epoch": 0.22942320155541154, "grad_norm": 1.1575313806533813, "learning_rate": 4.97189676497816e-06, "loss": 0.1804, "step": 708 }, { "epoch": 0.22974724562540505, "grad_norm": 1.0987682342529297, "learning_rate": 4.971765842321218e-06, "loss": 0.1745, "step": 709 }, { "epoch": 0.23007128969539858, "grad_norm": 1.1387704610824585, "learning_rate": 4.971634617144065e-06, "loss": 0.1791, "step": 710 }, { "epoch": 0.2303953337653921, "grad_norm": 1.0869576930999756, "learning_rate": 4.971503089462762e-06, "loss": 0.168, "step": 711 }, { "epoch": 0.23071937783538563, "grad_norm": 1.177750587463379, "learning_rate": 4.9713712592934075e-06, "loss": 0.1879, "step": 712 }, { "epoch": 0.23104342190537913, "grad_norm": 1.0906344652175903, "learning_rate": 4.971239126652135e-06, "loss": 0.1755, "step": 713 }, { "epoch": 0.23136746597537264, "grad_norm": 1.2198588848114014, "learning_rate": 4.971106691555116e-06, "loss": 0.198, "step": 714 }, { "epoch": 0.23169151004536617, "grad_norm": 1.144925594329834, "learning_rate": 4.9709739540185616e-06, "loss": 0.1679, "step": 715 }, { "epoch": 0.23201555411535968, "grad_norm": 1.0273765325546265, "learning_rate": 4.970840914058716e-06, "loss": 0.1588, "step": 716 }, { "epoch": 0.2323395981853532, "grad_norm": 1.1109790802001953, "learning_rate": 4.970707571691862e-06, "loss": 0.1926, "step": 717 }, { "epoch": 0.23266364225534672, "grad_norm": 1.127719759941101, "learning_rate": 4.970573926934319e-06, "loss": 0.1947, "step": 718 }, { "epoch": 0.23298768632534025, "grad_norm": 1.1377744674682617, "learning_rate": 4.970439979802445e-06, "loss": 0.1811, "step": 719 }, { "epoch": 0.23331173039533376, "grad_norm": 1.1827045679092407, "learning_rate": 4.970305730312632e-06, "loss": 0.1919, "step": 720 }, { "epoch": 0.2336357744653273, "grad_norm": 1.0300381183624268, "learning_rate": 4.9701711784813135e-06, "loss": 0.1602, "step": 721 }, { "epoch": 0.2339598185353208, "grad_norm": 1.158111810684204, "learning_rate": 4.970036324324955e-06, "loss": 0.1767, "step": 722 }, { "epoch": 0.23428386260531434, "grad_norm": 1.1824687719345093, "learning_rate": 4.969901167860063e-06, "loss": 0.1798, "step": 723 }, { "epoch": 0.23460790667530784, "grad_norm": 1.1232125759124756, "learning_rate": 4.969765709103177e-06, "loss": 0.1692, "step": 724 }, { "epoch": 0.23493195074530135, "grad_norm": 1.1801738739013672, "learning_rate": 4.9696299480708785e-06, "loss": 0.1813, "step": 725 }, { "epoch": 0.23525599481529488, "grad_norm": 1.1177990436553955, "learning_rate": 4.969493884779783e-06, "loss": 0.1867, "step": 726 }, { "epoch": 0.2355800388852884, "grad_norm": 1.1591545343399048, "learning_rate": 4.969357519246542e-06, "loss": 0.1828, "step": 727 }, { "epoch": 0.23590408295528192, "grad_norm": 1.1052945852279663, "learning_rate": 4.9692208514878445e-06, "loss": 0.1872, "step": 728 }, { "epoch": 0.23622812702527543, "grad_norm": 1.1025199890136719, "learning_rate": 4.96908388152042e-06, "loss": 0.1709, "step": 729 }, { "epoch": 0.23655217109526896, "grad_norm": 1.0618928670883179, "learning_rate": 4.968946609361031e-06, "loss": 0.1665, "step": 730 }, { "epoch": 0.23687621516526247, "grad_norm": 1.199231743812561, "learning_rate": 4.968809035026477e-06, "loss": 0.1885, "step": 731 }, { "epoch": 0.237200259235256, "grad_norm": 1.0359363555908203, "learning_rate": 4.968671158533599e-06, "loss": 0.1678, "step": 732 }, { "epoch": 0.2375243033052495, "grad_norm": 1.1739076375961304, "learning_rate": 4.968532979899269e-06, "loss": 0.1826, "step": 733 }, { "epoch": 0.23784834737524305, "grad_norm": 1.2278923988342285, "learning_rate": 4.9683944991403985e-06, "loss": 0.1802, "step": 734 }, { "epoch": 0.23817239144523655, "grad_norm": 1.165088415145874, "learning_rate": 4.968255716273938e-06, "loss": 0.1694, "step": 735 }, { "epoch": 0.23849643551523006, "grad_norm": 1.1245152950286865, "learning_rate": 4.968116631316873e-06, "loss": 0.1769, "step": 736 }, { "epoch": 0.2388204795852236, "grad_norm": 1.0869536399841309, "learning_rate": 4.967977244286225e-06, "loss": 0.1569, "step": 737 }, { "epoch": 0.2391445236552171, "grad_norm": 1.4542876482009888, "learning_rate": 4.967837555199054e-06, "loss": 0.1847, "step": 738 }, { "epoch": 0.23946856772521063, "grad_norm": 1.2349416017532349, "learning_rate": 4.967697564072457e-06, "loss": 0.1916, "step": 739 }, { "epoch": 0.23979261179520414, "grad_norm": 1.1966452598571777, "learning_rate": 4.9675572709235665e-06, "loss": 0.1919, "step": 740 }, { "epoch": 0.24011665586519768, "grad_norm": 1.1190122365951538, "learning_rate": 4.967416675769555e-06, "loss": 0.182, "step": 741 }, { "epoch": 0.24044069993519118, "grad_norm": 1.224090814590454, "learning_rate": 4.967275778627628e-06, "loss": 0.1923, "step": 742 }, { "epoch": 0.24076474400518472, "grad_norm": 1.151762843132019, "learning_rate": 4.967134579515032e-06, "loss": 0.1895, "step": 743 }, { "epoch": 0.24108878807517822, "grad_norm": 1.1457250118255615, "learning_rate": 4.966993078449046e-06, "loss": 0.1753, "step": 744 }, { "epoch": 0.24141283214517173, "grad_norm": 1.1521213054656982, "learning_rate": 4.96685127544699e-06, "loss": 0.1725, "step": 745 }, { "epoch": 0.24173687621516526, "grad_norm": 1.084601640701294, "learning_rate": 4.966709170526219e-06, "loss": 0.1835, "step": 746 }, { "epoch": 0.24206092028515877, "grad_norm": 1.0784162282943726, "learning_rate": 4.966566763704124e-06, "loss": 0.1975, "step": 747 }, { "epoch": 0.2423849643551523, "grad_norm": 1.2336294651031494, "learning_rate": 4.966424054998137e-06, "loss": 0.1887, "step": 748 }, { "epoch": 0.2427090084251458, "grad_norm": 1.1423250436782837, "learning_rate": 4.966281044425722e-06, "loss": 0.182, "step": 749 }, { "epoch": 0.24303305249513935, "grad_norm": 1.0632977485656738, "learning_rate": 4.9661377320043815e-06, "loss": 0.1719, "step": 750 }, { "epoch": 0.24335709656513285, "grad_norm": 1.178665280342102, "learning_rate": 4.965994117751658e-06, "loss": 0.1949, "step": 751 }, { "epoch": 0.2436811406351264, "grad_norm": 1.1657874584197998, "learning_rate": 4.965850201685126e-06, "loss": 0.1817, "step": 752 }, { "epoch": 0.2440051847051199, "grad_norm": 1.0570935010910034, "learning_rate": 4.965705983822401e-06, "loss": 0.1731, "step": 753 }, { "epoch": 0.24432922877511343, "grad_norm": 1.027815818786621, "learning_rate": 4.965561464181134e-06, "loss": 0.164, "step": 754 }, { "epoch": 0.24465327284510693, "grad_norm": 1.262858271598816, "learning_rate": 4.965416642779012e-06, "loss": 0.1866, "step": 755 }, { "epoch": 0.24497731691510044, "grad_norm": 1.0563353300094604, "learning_rate": 4.96527151963376e-06, "loss": 0.1751, "step": 756 }, { "epoch": 0.24530136098509397, "grad_norm": 0.9677377939224243, "learning_rate": 4.9651260947631395e-06, "loss": 0.1582, "step": 757 }, { "epoch": 0.24562540505508748, "grad_norm": 1.0785053968429565, "learning_rate": 4.9649803681849495e-06, "loss": 0.1863, "step": 758 }, { "epoch": 0.24594944912508102, "grad_norm": 1.1994812488555908, "learning_rate": 4.9648343399170254e-06, "loss": 0.1812, "step": 759 }, { "epoch": 0.24627349319507452, "grad_norm": 1.0674124956130981, "learning_rate": 4.964688009977239e-06, "loss": 0.1596, "step": 760 }, { "epoch": 0.24659753726506806, "grad_norm": 1.0390790700912476, "learning_rate": 4.9645413783835006e-06, "loss": 0.1614, "step": 761 }, { "epoch": 0.24692158133506156, "grad_norm": 1.1015980243682861, "learning_rate": 4.964394445153756e-06, "loss": 0.1716, "step": 762 }, { "epoch": 0.2472456254050551, "grad_norm": 1.1416356563568115, "learning_rate": 4.964247210305989e-06, "loss": 0.1701, "step": 763 }, { "epoch": 0.2475696694750486, "grad_norm": 1.1309244632720947, "learning_rate": 4.964099673858219e-06, "loss": 0.1721, "step": 764 }, { "epoch": 0.24789371354504214, "grad_norm": 1.2402899265289307, "learning_rate": 4.963951835828503e-06, "loss": 0.1881, "step": 765 }, { "epoch": 0.24821775761503564, "grad_norm": 1.167535424232483, "learning_rate": 4.963803696234935e-06, "loss": 0.1874, "step": 766 }, { "epoch": 0.24854180168502915, "grad_norm": 1.0330973863601685, "learning_rate": 4.9636552550956465e-06, "loss": 0.1794, "step": 767 }, { "epoch": 0.24886584575502269, "grad_norm": 1.0965633392333984, "learning_rate": 4.963506512428804e-06, "loss": 0.1952, "step": 768 }, { "epoch": 0.2491898898250162, "grad_norm": 1.0930819511413574, "learning_rate": 4.963357468252614e-06, "loss": 0.1896, "step": 769 }, { "epoch": 0.24951393389500973, "grad_norm": 1.1125822067260742, "learning_rate": 4.9632081225853165e-06, "loss": 0.1961, "step": 770 }, { "epoch": 0.24983797796500323, "grad_norm": 1.1762914657592773, "learning_rate": 4.9630584754451906e-06, "loss": 0.1833, "step": 771 }, { "epoch": 0.25016202203499677, "grad_norm": 1.0865849256515503, "learning_rate": 4.962908526850552e-06, "loss": 0.1759, "step": 772 }, { "epoch": 0.2504860661049903, "grad_norm": 1.1505688428878784, "learning_rate": 4.962758276819752e-06, "loss": 0.1835, "step": 773 }, { "epoch": 0.2508101101749838, "grad_norm": 1.1822468042373657, "learning_rate": 4.9626077253711805e-06, "loss": 0.1771, "step": 774 }, { "epoch": 0.2511341542449773, "grad_norm": 1.0646265745162964, "learning_rate": 4.962456872523263e-06, "loss": 0.1753, "step": 775 }, { "epoch": 0.25145819831497085, "grad_norm": 1.2307766675949097, "learning_rate": 4.962305718294462e-06, "loss": 0.1814, "step": 776 }, { "epoch": 0.2517822423849643, "grad_norm": 1.13232421875, "learning_rate": 4.96215426270328e-06, "loss": 0.1847, "step": 777 }, { "epoch": 0.25210628645495786, "grad_norm": 1.1121189594268799, "learning_rate": 4.962002505768251e-06, "loss": 0.1823, "step": 778 }, { "epoch": 0.2524303305249514, "grad_norm": 1.018474817276001, "learning_rate": 4.961850447507948e-06, "loss": 0.1638, "step": 779 }, { "epoch": 0.25275437459494493, "grad_norm": 1.1667485237121582, "learning_rate": 4.961698087940984e-06, "loss": 0.187, "step": 780 }, { "epoch": 0.2530784186649384, "grad_norm": 1.1762861013412476, "learning_rate": 4.961545427086006e-06, "loss": 0.1791, "step": 781 }, { "epoch": 0.25340246273493194, "grad_norm": 1.076377034187317, "learning_rate": 4.961392464961695e-06, "loss": 0.1729, "step": 782 }, { "epoch": 0.2537265068049255, "grad_norm": 1.1636652946472168, "learning_rate": 4.961239201586776e-06, "loss": 0.2067, "step": 783 }, { "epoch": 0.254050550874919, "grad_norm": 1.1299415826797485, "learning_rate": 4.961085636980005e-06, "loss": 0.1841, "step": 784 }, { "epoch": 0.2543745949449125, "grad_norm": 1.199076771736145, "learning_rate": 4.960931771160177e-06, "loss": 0.193, "step": 785 }, { "epoch": 0.254698639014906, "grad_norm": 1.0781837701797485, "learning_rate": 4.960777604146124e-06, "loss": 0.1701, "step": 786 }, { "epoch": 0.25502268308489956, "grad_norm": 1.0722193717956543, "learning_rate": 4.9606231359567146e-06, "loss": 0.1797, "step": 787 }, { "epoch": 0.25534672715489304, "grad_norm": 1.0604737997055054, "learning_rate": 4.960468366610854e-06, "loss": 0.1689, "step": 788 }, { "epoch": 0.2556707712248866, "grad_norm": 1.0014824867248535, "learning_rate": 4.960313296127485e-06, "loss": 0.1559, "step": 789 }, { "epoch": 0.2559948152948801, "grad_norm": 1.0583043098449707, "learning_rate": 4.960157924525585e-06, "loss": 0.1781, "step": 790 }, { "epoch": 0.25631885936487364, "grad_norm": 1.066277265548706, "learning_rate": 4.960002251824172e-06, "loss": 0.1762, "step": 791 }, { "epoch": 0.2566429034348671, "grad_norm": 1.135353922843933, "learning_rate": 4.959846278042298e-06, "loss": 0.1952, "step": 792 }, { "epoch": 0.25696694750486065, "grad_norm": 1.230164647102356, "learning_rate": 4.959690003199052e-06, "loss": 0.1929, "step": 793 }, { "epoch": 0.2572909915748542, "grad_norm": 1.0832064151763916, "learning_rate": 4.959533427313562e-06, "loss": 0.1643, "step": 794 }, { "epoch": 0.2576150356448477, "grad_norm": 1.0479978322982788, "learning_rate": 4.95937655040499e-06, "loss": 0.1645, "step": 795 }, { "epoch": 0.2579390797148412, "grad_norm": 1.0821533203125, "learning_rate": 4.959219372492539e-06, "loss": 0.1767, "step": 796 }, { "epoch": 0.25826312378483474, "grad_norm": 1.1445401906967163, "learning_rate": 4.9590618935954415e-06, "loss": 0.1909, "step": 797 }, { "epoch": 0.25858716785482827, "grad_norm": 1.0938869714736938, "learning_rate": 4.958904113732975e-06, "loss": 0.1642, "step": 798 }, { "epoch": 0.25891121192482175, "grad_norm": 1.0983704328536987, "learning_rate": 4.958746032924449e-06, "loss": 0.1801, "step": 799 }, { "epoch": 0.2592352559948153, "grad_norm": 1.0518333911895752, "learning_rate": 4.95858765118921e-06, "loss": 0.1786, "step": 800 }, { "epoch": 0.2595593000648088, "grad_norm": 1.1234681606292725, "learning_rate": 4.9584289685466444e-06, "loss": 0.1826, "step": 801 }, { "epoch": 0.25988334413480235, "grad_norm": 1.0565016269683838, "learning_rate": 4.958269985016172e-06, "loss": 0.165, "step": 802 }, { "epoch": 0.26020738820479583, "grad_norm": 1.0922951698303223, "learning_rate": 4.958110700617251e-06, "loss": 0.1702, "step": 803 }, { "epoch": 0.26053143227478937, "grad_norm": 1.077223539352417, "learning_rate": 4.957951115369378e-06, "loss": 0.1686, "step": 804 }, { "epoch": 0.2608554763447829, "grad_norm": 1.0731221437454224, "learning_rate": 4.957791229292082e-06, "loss": 0.1793, "step": 805 }, { "epoch": 0.26117952041477643, "grad_norm": 1.3346308469772339, "learning_rate": 4.957631042404934e-06, "loss": 0.1741, "step": 806 }, { "epoch": 0.2615035644847699, "grad_norm": 1.1206022500991821, "learning_rate": 4.957470554727536e-06, "loss": 0.1904, "step": 807 }, { "epoch": 0.26182760855476345, "grad_norm": 1.0771183967590332, "learning_rate": 4.9573097662795344e-06, "loss": 0.1658, "step": 808 }, { "epoch": 0.262151652624757, "grad_norm": 1.1210774183273315, "learning_rate": 4.957148677080605e-06, "loss": 0.1886, "step": 809 }, { "epoch": 0.26247569669475046, "grad_norm": 1.1223516464233398, "learning_rate": 4.956987287150465e-06, "loss": 0.1809, "step": 810 }, { "epoch": 0.262799740764744, "grad_norm": 1.106168508529663, "learning_rate": 4.956825596508867e-06, "loss": 0.17, "step": 811 }, { "epoch": 0.26312378483473753, "grad_norm": 1.1239475011825562, "learning_rate": 4.956663605175599e-06, "loss": 0.192, "step": 812 }, { "epoch": 0.26344782890473106, "grad_norm": 1.1581978797912598, "learning_rate": 4.956501313170487e-06, "loss": 0.1939, "step": 813 }, { "epoch": 0.26377187297472454, "grad_norm": 1.0884262323379517, "learning_rate": 4.956338720513397e-06, "loss": 0.1865, "step": 814 }, { "epoch": 0.2640959170447181, "grad_norm": 1.0481289625167847, "learning_rate": 4.956175827224226e-06, "loss": 0.1759, "step": 815 }, { "epoch": 0.2644199611147116, "grad_norm": 1.0824658870697021, "learning_rate": 4.956012633322912e-06, "loss": 0.1657, "step": 816 }, { "epoch": 0.26474400518470514, "grad_norm": 1.0250821113586426, "learning_rate": 4.955849138829428e-06, "loss": 0.1563, "step": 817 }, { "epoch": 0.2650680492546986, "grad_norm": 1.0275894403457642, "learning_rate": 4.955685343763782e-06, "loss": 0.172, "step": 818 }, { "epoch": 0.26539209332469216, "grad_norm": 1.0740388631820679, "learning_rate": 4.9555212481460245e-06, "loss": 0.1767, "step": 819 }, { "epoch": 0.2657161373946857, "grad_norm": 1.1377601623535156, "learning_rate": 4.955356851996236e-06, "loss": 0.1786, "step": 820 }, { "epoch": 0.26604018146467917, "grad_norm": 1.0854454040527344, "learning_rate": 4.955192155334539e-06, "loss": 0.182, "step": 821 }, { "epoch": 0.2663642255346727, "grad_norm": 1.1744272708892822, "learning_rate": 4.955027158181092e-06, "loss": 0.1882, "step": 822 }, { "epoch": 0.26668826960466624, "grad_norm": 1.0906330347061157, "learning_rate": 4.9548618605560855e-06, "loss": 0.1832, "step": 823 }, { "epoch": 0.2670123136746598, "grad_norm": 1.0744832754135132, "learning_rate": 4.954696262479753e-06, "loss": 0.1705, "step": 824 }, { "epoch": 0.26733635774465325, "grad_norm": 1.241999864578247, "learning_rate": 4.954530363972361e-06, "loss": 0.1893, "step": 825 }, { "epoch": 0.2676604018146468, "grad_norm": 1.0803509950637817, "learning_rate": 4.954364165054214e-06, "loss": 0.1735, "step": 826 }, { "epoch": 0.2679844458846403, "grad_norm": 1.1593369245529175, "learning_rate": 4.9541976657456535e-06, "loss": 0.166, "step": 827 }, { "epoch": 0.26830848995463386, "grad_norm": 1.1206544637680054, "learning_rate": 4.954030866067057e-06, "loss": 0.1675, "step": 828 }, { "epoch": 0.26863253402462733, "grad_norm": 1.1313645839691162, "learning_rate": 4.95386376603884e-06, "loss": 0.1818, "step": 829 }, { "epoch": 0.26895657809462087, "grad_norm": 1.0753988027572632, "learning_rate": 4.953696365681452e-06, "loss": 0.1762, "step": 830 }, { "epoch": 0.2692806221646144, "grad_norm": 1.1497502326965332, "learning_rate": 4.953528665015383e-06, "loss": 0.173, "step": 831 }, { "epoch": 0.2696046662346079, "grad_norm": 1.0890166759490967, "learning_rate": 4.953360664061159e-06, "loss": 0.1823, "step": 832 }, { "epoch": 0.2699287103046014, "grad_norm": 1.1478276252746582, "learning_rate": 4.953192362839338e-06, "loss": 0.1752, "step": 833 }, { "epoch": 0.27025275437459495, "grad_norm": 1.0240693092346191, "learning_rate": 4.953023761370521e-06, "loss": 0.1663, "step": 834 }, { "epoch": 0.2705767984445885, "grad_norm": 1.083274006843567, "learning_rate": 4.952854859675343e-06, "loss": 0.1947, "step": 835 }, { "epoch": 0.27090084251458196, "grad_norm": 1.0814635753631592, "learning_rate": 4.952685657774476e-06, "loss": 0.1757, "step": 836 }, { "epoch": 0.2712248865845755, "grad_norm": 1.0839900970458984, "learning_rate": 4.952516155688628e-06, "loss": 0.1824, "step": 837 }, { "epoch": 0.27154893065456903, "grad_norm": 1.1437290906906128, "learning_rate": 4.9523463534385444e-06, "loss": 0.1772, "step": 838 }, { "epoch": 0.27187297472456257, "grad_norm": 1.006161093711853, "learning_rate": 4.952176251045008e-06, "loss": 0.1686, "step": 839 }, { "epoch": 0.27219701879455604, "grad_norm": 1.0428589582443237, "learning_rate": 4.952005848528838e-06, "loss": 0.1675, "step": 840 }, { "epoch": 0.2725210628645496, "grad_norm": 1.1923826932907104, "learning_rate": 4.951835145910888e-06, "loss": 0.1846, "step": 841 }, { "epoch": 0.2728451069345431, "grad_norm": 1.1122922897338867, "learning_rate": 4.951664143212053e-06, "loss": 0.1819, "step": 842 }, { "epoch": 0.2731691510045366, "grad_norm": 1.1033662557601929, "learning_rate": 4.95149284045326e-06, "loss": 0.161, "step": 843 }, { "epoch": 0.2734931950745301, "grad_norm": 1.053223967552185, "learning_rate": 4.951321237655477e-06, "loss": 0.1905, "step": 844 }, { "epoch": 0.27381723914452366, "grad_norm": 1.1286718845367432, "learning_rate": 4.951149334839703e-06, "loss": 0.1857, "step": 845 }, { "epoch": 0.2741412832145172, "grad_norm": 1.0990140438079834, "learning_rate": 4.950977132026981e-06, "loss": 0.1918, "step": 846 }, { "epoch": 0.2744653272845107, "grad_norm": 1.0953593254089355, "learning_rate": 4.9508046292383846e-06, "loss": 0.1701, "step": 847 }, { "epoch": 0.2747893713545042, "grad_norm": 1.0887120962142944, "learning_rate": 4.950631826495027e-06, "loss": 0.1678, "step": 848 }, { "epoch": 0.27511341542449774, "grad_norm": 1.1221007108688354, "learning_rate": 4.950458723818058e-06, "loss": 0.1741, "step": 849 }, { "epoch": 0.2754374594944913, "grad_norm": 1.0366301536560059, "learning_rate": 4.950285321228664e-06, "loss": 0.1573, "step": 850 }, { "epoch": 0.27576150356448476, "grad_norm": 1.098291277885437, "learning_rate": 4.950111618748067e-06, "loss": 0.1849, "step": 851 }, { "epoch": 0.2760855476344783, "grad_norm": 1.1351509094238281, "learning_rate": 4.949937616397527e-06, "loss": 0.1765, "step": 852 }, { "epoch": 0.2764095917044718, "grad_norm": 1.0571086406707764, "learning_rate": 4.949763314198339e-06, "loss": 0.1761, "step": 853 }, { "epoch": 0.2767336357744653, "grad_norm": 1.0983611345291138, "learning_rate": 4.949588712171838e-06, "loss": 0.1784, "step": 854 }, { "epoch": 0.27705767984445884, "grad_norm": 1.1928962469100952, "learning_rate": 4.949413810339392e-06, "loss": 0.1795, "step": 855 }, { "epoch": 0.27738172391445237, "grad_norm": 1.1288187503814697, "learning_rate": 4.949238608722408e-06, "loss": 0.1989, "step": 856 }, { "epoch": 0.2777057679844459, "grad_norm": 1.1986991167068481, "learning_rate": 4.949063107342329e-06, "loss": 0.196, "step": 857 }, { "epoch": 0.2780298120544394, "grad_norm": 1.0543721914291382, "learning_rate": 4.948887306220634e-06, "loss": 0.1713, "step": 858 }, { "epoch": 0.2783538561244329, "grad_norm": 1.0088876485824585, "learning_rate": 4.94871120537884e-06, "loss": 0.1732, "step": 859 }, { "epoch": 0.27867790019442645, "grad_norm": 1.0996417999267578, "learning_rate": 4.9485348048385e-06, "loss": 0.1724, "step": 860 }, { "epoch": 0.27900194426442, "grad_norm": 1.0093612670898438, "learning_rate": 4.9483581046212025e-06, "loss": 0.1711, "step": 861 }, { "epoch": 0.27932598833441347, "grad_norm": 1.0933109521865845, "learning_rate": 4.948181104748576e-06, "loss": 0.1835, "step": 862 }, { "epoch": 0.279650032404407, "grad_norm": 1.0886743068695068, "learning_rate": 4.948003805242282e-06, "loss": 0.1663, "step": 863 }, { "epoch": 0.27997407647440054, "grad_norm": 1.0580806732177734, "learning_rate": 4.9478262061240216e-06, "loss": 0.179, "step": 864 }, { "epoch": 0.280298120544394, "grad_norm": 1.0770388841629028, "learning_rate": 4.947648307415529e-06, "loss": 0.1539, "step": 865 }, { "epoch": 0.28062216461438755, "grad_norm": 1.1204828023910522, "learning_rate": 4.947470109138579e-06, "loss": 0.1519, "step": 866 }, { "epoch": 0.2809462086843811, "grad_norm": 1.175567388534546, "learning_rate": 4.947291611314981e-06, "loss": 0.1882, "step": 867 }, { "epoch": 0.2812702527543746, "grad_norm": 1.0687494277954102, "learning_rate": 4.9471128139665826e-06, "loss": 0.1696, "step": 868 }, { "epoch": 0.2815942968243681, "grad_norm": 1.1533077955245972, "learning_rate": 4.9469337171152645e-06, "loss": 0.185, "step": 869 }, { "epoch": 0.28191834089436163, "grad_norm": 1.1578819751739502, "learning_rate": 4.946754320782948e-06, "loss": 0.186, "step": 870 }, { "epoch": 0.28224238496435516, "grad_norm": 1.2411943674087524, "learning_rate": 4.946574624991589e-06, "loss": 0.1921, "step": 871 }, { "epoch": 0.2825664290343487, "grad_norm": 0.9703081250190735, "learning_rate": 4.946394629763181e-06, "loss": 0.1565, "step": 872 }, { "epoch": 0.2828904731043422, "grad_norm": 1.061703085899353, "learning_rate": 4.946214335119752e-06, "loss": 0.1664, "step": 873 }, { "epoch": 0.2832145171743357, "grad_norm": 1.0972583293914795, "learning_rate": 4.94603374108337e-06, "loss": 0.1915, "step": 874 }, { "epoch": 0.28353856124432925, "grad_norm": 1.1283047199249268, "learning_rate": 4.945852847676138e-06, "loss": 0.1836, "step": 875 }, { "epoch": 0.2838626053143227, "grad_norm": 1.1044758558273315, "learning_rate": 4.945671654920195e-06, "loss": 0.17, "step": 876 }, { "epoch": 0.28418664938431626, "grad_norm": 1.046217679977417, "learning_rate": 4.945490162837718e-06, "loss": 0.1671, "step": 877 }, { "epoch": 0.2845106934543098, "grad_norm": 1.0820338726043701, "learning_rate": 4.945308371450919e-06, "loss": 0.173, "step": 878 }, { "epoch": 0.2848347375243033, "grad_norm": 1.1840169429779053, "learning_rate": 4.945126280782047e-06, "loss": 0.1952, "step": 879 }, { "epoch": 0.2851587815942968, "grad_norm": 1.0911513566970825, "learning_rate": 4.944943890853389e-06, "loss": 0.1781, "step": 880 }, { "epoch": 0.28548282566429034, "grad_norm": 1.0861856937408447, "learning_rate": 4.944761201687268e-06, "loss": 0.1661, "step": 881 }, { "epoch": 0.2858068697342839, "grad_norm": 1.0911214351654053, "learning_rate": 4.944578213306043e-06, "loss": 0.189, "step": 882 }, { "epoch": 0.2861309138042774, "grad_norm": 1.0918217897415161, "learning_rate": 4.94439492573211e-06, "loss": 0.1879, "step": 883 }, { "epoch": 0.2864549578742709, "grad_norm": 1.1766140460968018, "learning_rate": 4.944211338987901e-06, "loss": 0.1774, "step": 884 }, { "epoch": 0.2867790019442644, "grad_norm": 1.0490213632583618, "learning_rate": 4.944027453095887e-06, "loss": 0.1666, "step": 885 }, { "epoch": 0.28710304601425796, "grad_norm": 1.2016412019729614, "learning_rate": 4.943843268078572e-06, "loss": 0.1794, "step": 886 }, { "epoch": 0.28742709008425144, "grad_norm": 1.0137540102005005, "learning_rate": 4.9436587839585e-06, "loss": 0.1671, "step": 887 }, { "epoch": 0.28775113415424497, "grad_norm": 1.1008466482162476, "learning_rate": 4.9434740007582485e-06, "loss": 0.1864, "step": 888 }, { "epoch": 0.2880751782242385, "grad_norm": 1.0474399328231812, "learning_rate": 4.943288918500434e-06, "loss": 0.1699, "step": 889 }, { "epoch": 0.28839922229423204, "grad_norm": 0.9579008221626282, "learning_rate": 4.94310353720771e-06, "loss": 0.1554, "step": 890 }, { "epoch": 0.2887232663642255, "grad_norm": 1.095854640007019, "learning_rate": 4.942917856902763e-06, "loss": 0.187, "step": 891 }, { "epoch": 0.28904731043421905, "grad_norm": 1.0819822549819946, "learning_rate": 4.942731877608319e-06, "loss": 0.1728, "step": 892 }, { "epoch": 0.2893713545042126, "grad_norm": 1.2087551355361938, "learning_rate": 4.942545599347142e-06, "loss": 0.1852, "step": 893 }, { "epoch": 0.28969539857420606, "grad_norm": 1.020182490348816, "learning_rate": 4.942359022142028e-06, "loss": 0.1591, "step": 894 }, { "epoch": 0.2900194426441996, "grad_norm": 1.0779310464859009, "learning_rate": 4.942172146015814e-06, "loss": 0.1695, "step": 895 }, { "epoch": 0.29034348671419313, "grad_norm": 1.0906720161437988, "learning_rate": 4.941984970991372e-06, "loss": 0.1781, "step": 896 }, { "epoch": 0.29066753078418667, "grad_norm": 0.9882294535636902, "learning_rate": 4.9417974970916096e-06, "loss": 0.164, "step": 897 }, { "epoch": 0.29099157485418015, "grad_norm": 1.0383557081222534, "learning_rate": 4.9416097243394725e-06, "loss": 0.183, "step": 898 }, { "epoch": 0.2913156189241737, "grad_norm": 1.0150779485702515, "learning_rate": 4.94142165275794e-06, "loss": 0.1712, "step": 899 }, { "epoch": 0.2916396629941672, "grad_norm": 1.071306824684143, "learning_rate": 4.941233282370034e-06, "loss": 0.1839, "step": 900 }, { "epoch": 0.29196370706416075, "grad_norm": 0.9918048977851868, "learning_rate": 4.941044613198807e-06, "loss": 0.172, "step": 901 }, { "epoch": 0.29228775113415423, "grad_norm": 0.9918202757835388, "learning_rate": 4.940855645267349e-06, "loss": 0.1625, "step": 902 }, { "epoch": 0.29261179520414776, "grad_norm": 1.1438778638839722, "learning_rate": 4.94066637859879e-06, "loss": 0.1832, "step": 903 }, { "epoch": 0.2929358392741413, "grad_norm": 1.005126953125, "learning_rate": 4.940476813216294e-06, "loss": 0.1769, "step": 904 }, { "epoch": 0.2932598833441348, "grad_norm": 1.1077789068222046, "learning_rate": 4.940286949143061e-06, "loss": 0.179, "step": 905 }, { "epoch": 0.2935839274141283, "grad_norm": 1.115727186203003, "learning_rate": 4.940096786402331e-06, "loss": 0.1653, "step": 906 }, { "epoch": 0.29390797148412184, "grad_norm": 1.1475930213928223, "learning_rate": 4.939906325017374e-06, "loss": 0.183, "step": 907 }, { "epoch": 0.2942320155541154, "grad_norm": 1.0840696096420288, "learning_rate": 4.939715565011504e-06, "loss": 0.1747, "step": 908 }, { "epoch": 0.29455605962410886, "grad_norm": 1.0846959352493286, "learning_rate": 4.939524506408068e-06, "loss": 0.1763, "step": 909 }, { "epoch": 0.2948801036941024, "grad_norm": 1.0708680152893066, "learning_rate": 4.939333149230447e-06, "loss": 0.1832, "step": 910 }, { "epoch": 0.2952041477640959, "grad_norm": 1.0281907320022583, "learning_rate": 4.9391414935020656e-06, "loss": 0.1721, "step": 911 }, { "epoch": 0.29552819183408946, "grad_norm": 0.8992030620574951, "learning_rate": 4.938949539246376e-06, "loss": 0.1537, "step": 912 }, { "epoch": 0.29585223590408294, "grad_norm": 1.1132668256759644, "learning_rate": 4.938757286486874e-06, "loss": 0.1725, "step": 913 }, { "epoch": 0.2961762799740765, "grad_norm": 1.146109700202942, "learning_rate": 4.93856473524709e-06, "loss": 0.179, "step": 914 }, { "epoch": 0.29650032404407, "grad_norm": 0.9988548755645752, "learning_rate": 4.938371885550589e-06, "loss": 0.1581, "step": 915 }, { "epoch": 0.2968243681140635, "grad_norm": 1.0798379182815552, "learning_rate": 4.938178737420974e-06, "loss": 0.1761, "step": 916 }, { "epoch": 0.297148412184057, "grad_norm": 1.0951101779937744, "learning_rate": 4.937985290881886e-06, "loss": 0.1619, "step": 917 }, { "epoch": 0.29747245625405055, "grad_norm": 0.9965536594390869, "learning_rate": 4.9377915459569995e-06, "loss": 0.1564, "step": 918 }, { "epoch": 0.2977965003240441, "grad_norm": 1.1427702903747559, "learning_rate": 4.937597502670027e-06, "loss": 0.1853, "step": 919 }, { "epoch": 0.29812054439403757, "grad_norm": 1.0869373083114624, "learning_rate": 4.9374031610447185e-06, "loss": 0.1657, "step": 920 }, { "epoch": 0.2984445884640311, "grad_norm": 1.1138787269592285, "learning_rate": 4.937208521104858e-06, "loss": 0.1665, "step": 921 }, { "epoch": 0.29876863253402464, "grad_norm": 1.1613776683807373, "learning_rate": 4.937013582874269e-06, "loss": 0.1818, "step": 922 }, { "epoch": 0.29909267660401817, "grad_norm": 1.084401249885559, "learning_rate": 4.93681834637681e-06, "loss": 0.1811, "step": 923 }, { "epoch": 0.29941672067401165, "grad_norm": 1.1521601676940918, "learning_rate": 4.936622811636376e-06, "loss": 0.1704, "step": 924 }, { "epoch": 0.2997407647440052, "grad_norm": 1.006294846534729, "learning_rate": 4.936426978676897e-06, "loss": 0.1649, "step": 925 }, { "epoch": 0.3000648088139987, "grad_norm": 1.1440045833587646, "learning_rate": 4.936230847522343e-06, "loss": 0.179, "step": 926 }, { "epoch": 0.3003888528839922, "grad_norm": 1.0526880025863647, "learning_rate": 4.936034418196718e-06, "loss": 0.1828, "step": 927 }, { "epoch": 0.30071289695398573, "grad_norm": 1.721049427986145, "learning_rate": 4.935837690724063e-06, "loss": 0.176, "step": 928 }, { "epoch": 0.30103694102397927, "grad_norm": 1.0776753425598145, "learning_rate": 4.935640665128454e-06, "loss": 0.1637, "step": 929 }, { "epoch": 0.3013609850939728, "grad_norm": 1.0923445224761963, "learning_rate": 4.935443341434008e-06, "loss": 0.1707, "step": 930 }, { "epoch": 0.3016850291639663, "grad_norm": 1.0009198188781738, "learning_rate": 4.935245719664873e-06, "loss": 0.1715, "step": 931 }, { "epoch": 0.3020090732339598, "grad_norm": 1.0673625469207764, "learning_rate": 4.935047799845238e-06, "loss": 0.1711, "step": 932 }, { "epoch": 0.30233311730395335, "grad_norm": 1.2885209321975708, "learning_rate": 4.9348495819993235e-06, "loss": 0.1728, "step": 933 }, { "epoch": 0.3026571613739469, "grad_norm": 1.1276648044586182, "learning_rate": 4.9346510661513924e-06, "loss": 0.1873, "step": 934 }, { "epoch": 0.30298120544394036, "grad_norm": 1.1598179340362549, "learning_rate": 4.93445225232574e-06, "loss": 0.1845, "step": 935 }, { "epoch": 0.3033052495139339, "grad_norm": 1.1591609716415405, "learning_rate": 4.9342531405467e-06, "loss": 0.1778, "step": 936 }, { "epoch": 0.30362929358392743, "grad_norm": 1.0711930990219116, "learning_rate": 4.934053730838639e-06, "loss": 0.1662, "step": 937 }, { "epoch": 0.3039533376539209, "grad_norm": 1.1723405122756958, "learning_rate": 4.9338540232259664e-06, "loss": 0.177, "step": 938 }, { "epoch": 0.30427738172391444, "grad_norm": 1.033252239227295, "learning_rate": 4.9336540177331225e-06, "loss": 0.1706, "step": 939 }, { "epoch": 0.304601425793908, "grad_norm": 1.1548298597335815, "learning_rate": 4.9334537143845876e-06, "loss": 0.1886, "step": 940 }, { "epoch": 0.3049254698639015, "grad_norm": 1.0255663394927979, "learning_rate": 4.933253113204874e-06, "loss": 0.18, "step": 941 }, { "epoch": 0.305249513933895, "grad_norm": 1.3654695749282837, "learning_rate": 4.933052214218535e-06, "loss": 0.1961, "step": 942 }, { "epoch": 0.3055735580038885, "grad_norm": 1.3150925636291504, "learning_rate": 4.93285101745016e-06, "loss": 0.178, "step": 943 }, { "epoch": 0.30589760207388206, "grad_norm": 1.116111397743225, "learning_rate": 4.932649522924372e-06, "loss": 0.1777, "step": 944 }, { "epoch": 0.3062216461438756, "grad_norm": 1.0210191011428833, "learning_rate": 4.932447730665832e-06, "loss": 0.1585, "step": 945 }, { "epoch": 0.30654569021386907, "grad_norm": 0.9345679879188538, "learning_rate": 4.932245640699238e-06, "loss": 0.1651, "step": 946 }, { "epoch": 0.3068697342838626, "grad_norm": 1.1611742973327637, "learning_rate": 4.932043253049323e-06, "loss": 0.1863, "step": 947 }, { "epoch": 0.30719377835385614, "grad_norm": 1.1058744192123413, "learning_rate": 4.931840567740858e-06, "loss": 0.1713, "step": 948 }, { "epoch": 0.3075178224238496, "grad_norm": 1.1998214721679688, "learning_rate": 4.93163758479865e-06, "loss": 0.2018, "step": 949 }, { "epoch": 0.30784186649384315, "grad_norm": 1.105220079421997, "learning_rate": 4.931434304247541e-06, "loss": 0.1756, "step": 950 }, { "epoch": 0.3081659105638367, "grad_norm": 1.0230122804641724, "learning_rate": 4.931230726112412e-06, "loss": 0.1769, "step": 951 }, { "epoch": 0.3084899546338302, "grad_norm": 1.0697253942489624, "learning_rate": 4.9310268504181764e-06, "loss": 0.1809, "step": 952 }, { "epoch": 0.3088139987038237, "grad_norm": 1.0562597513198853, "learning_rate": 4.930822677189791e-06, "loss": 0.161, "step": 953 }, { "epoch": 0.30913804277381723, "grad_norm": 0.9493795037269592, "learning_rate": 4.93061820645224e-06, "loss": 0.1547, "step": 954 }, { "epoch": 0.30946208684381077, "grad_norm": 0.9875866770744324, "learning_rate": 4.930413438230552e-06, "loss": 0.17, "step": 955 }, { "epoch": 0.3097861309138043, "grad_norm": 0.9486485123634338, "learning_rate": 4.930208372549787e-06, "loss": 0.1583, "step": 956 }, { "epoch": 0.3101101749837978, "grad_norm": 1.0512800216674805, "learning_rate": 4.930003009435043e-06, "loss": 0.1717, "step": 957 }, { "epoch": 0.3104342190537913, "grad_norm": 0.9418579936027527, "learning_rate": 4.9297973489114565e-06, "loss": 0.1573, "step": 958 }, { "epoch": 0.31075826312378485, "grad_norm": 1.1517508029937744, "learning_rate": 4.929591391004196e-06, "loss": 0.1868, "step": 959 }, { "epoch": 0.31108230719377833, "grad_norm": 1.0254417657852173, "learning_rate": 4.929385135738469e-06, "loss": 0.1613, "step": 960 }, { "epoch": 0.31140635126377186, "grad_norm": 1.1666157245635986, "learning_rate": 4.92917858313952e-06, "loss": 0.1892, "step": 961 }, { "epoch": 0.3117303953337654, "grad_norm": 1.1078205108642578, "learning_rate": 4.928971733232628e-06, "loss": 0.1685, "step": 962 }, { "epoch": 0.31205443940375893, "grad_norm": 1.0600954294204712, "learning_rate": 4.928764586043111e-06, "loss": 0.1754, "step": 963 }, { "epoch": 0.3123784834737524, "grad_norm": 1.190826177597046, "learning_rate": 4.9285571415963205e-06, "loss": 0.1771, "step": 964 }, { "epoch": 0.31270252754374595, "grad_norm": 1.0361576080322266, "learning_rate": 4.928349399917646e-06, "loss": 0.1838, "step": 965 }, { "epoch": 0.3130265716137395, "grad_norm": 0.980185329914093, "learning_rate": 4.928141361032513e-06, "loss": 0.1643, "step": 966 }, { "epoch": 0.313350615683733, "grad_norm": 1.0426617860794067, "learning_rate": 4.927933024966385e-06, "loss": 0.1656, "step": 967 }, { "epoch": 0.3136746597537265, "grad_norm": 1.0412538051605225, "learning_rate": 4.927724391744758e-06, "loss": 0.1661, "step": 968 }, { "epoch": 0.31399870382372, "grad_norm": 1.0205472707748413, "learning_rate": 4.927515461393167e-06, "loss": 0.1811, "step": 969 }, { "epoch": 0.31432274789371356, "grad_norm": 0.9813237190246582, "learning_rate": 4.927306233937185e-06, "loss": 0.1561, "step": 970 }, { "epoch": 0.31464679196370704, "grad_norm": 1.0640448331832886, "learning_rate": 4.927096709402417e-06, "loss": 0.1701, "step": 971 }, { "epoch": 0.3149708360337006, "grad_norm": 1.0201969146728516, "learning_rate": 4.926886887814509e-06, "loss": 0.1727, "step": 972 }, { "epoch": 0.3152948801036941, "grad_norm": 1.0877408981323242, "learning_rate": 4.926676769199139e-06, "loss": 0.171, "step": 973 }, { "epoch": 0.31561892417368764, "grad_norm": 1.0435376167297363, "learning_rate": 4.9264663535820256e-06, "loss": 0.1806, "step": 974 }, { "epoch": 0.3159429682436811, "grad_norm": 1.0373156070709229, "learning_rate": 4.926255640988919e-06, "loss": 0.178, "step": 975 }, { "epoch": 0.31626701231367466, "grad_norm": 1.0691494941711426, "learning_rate": 4.926044631445611e-06, "loss": 0.175, "step": 976 }, { "epoch": 0.3165910563836682, "grad_norm": 0.9887287616729736, "learning_rate": 4.925833324977926e-06, "loss": 0.1791, "step": 977 }, { "epoch": 0.3169151004536617, "grad_norm": 1.038283109664917, "learning_rate": 4.925621721611726e-06, "loss": 0.1734, "step": 978 }, { "epoch": 0.3172391445236552, "grad_norm": 1.0908006429672241, "learning_rate": 4.925409821372908e-06, "loss": 0.1838, "step": 979 }, { "epoch": 0.31756318859364874, "grad_norm": 1.0472310781478882, "learning_rate": 4.925197624287409e-06, "loss": 0.191, "step": 980 }, { "epoch": 0.31788723266364227, "grad_norm": 1.0285770893096924, "learning_rate": 4.924985130381198e-06, "loss": 0.1593, "step": 981 }, { "epoch": 0.31821127673363575, "grad_norm": 1.0358308553695679, "learning_rate": 4.924772339680283e-06, "loss": 0.1876, "step": 982 }, { "epoch": 0.3185353208036293, "grad_norm": 1.0257309675216675, "learning_rate": 4.9245592522107065e-06, "loss": 0.1766, "step": 983 }, { "epoch": 0.3188593648736228, "grad_norm": 0.9954764246940613, "learning_rate": 4.92434586799855e-06, "loss": 0.1617, "step": 984 }, { "epoch": 0.31918340894361635, "grad_norm": 1.1356722116470337, "learning_rate": 4.924132187069928e-06, "loss": 0.1799, "step": 985 }, { "epoch": 0.31950745301360983, "grad_norm": 0.9893943071365356, "learning_rate": 4.923918209450994e-06, "loss": 0.1634, "step": 986 }, { "epoch": 0.31983149708360337, "grad_norm": 1.1390126943588257, "learning_rate": 4.9237039351679365e-06, "loss": 0.1855, "step": 987 }, { "epoch": 0.3201555411535969, "grad_norm": 1.0541259050369263, "learning_rate": 4.923489364246981e-06, "loss": 0.1807, "step": 988 }, { "epoch": 0.32047958522359044, "grad_norm": 1.0718092918395996, "learning_rate": 4.923274496714387e-06, "loss": 0.1763, "step": 989 }, { "epoch": 0.3208036292935839, "grad_norm": 1.102406620979309, "learning_rate": 4.923059332596456e-06, "loss": 0.173, "step": 990 }, { "epoch": 0.32112767336357745, "grad_norm": 1.1300830841064453, "learning_rate": 4.922843871919518e-06, "loss": 0.1818, "step": 991 }, { "epoch": 0.321451717433571, "grad_norm": 1.021759033203125, "learning_rate": 4.922628114709945e-06, "loss": 0.1656, "step": 992 }, { "epoch": 0.32177576150356446, "grad_norm": 1.0214760303497314, "learning_rate": 4.922412060994145e-06, "loss": 0.1733, "step": 993 }, { "epoch": 0.322099805573558, "grad_norm": 1.0010415315628052, "learning_rate": 4.922195710798559e-06, "loss": 0.1529, "step": 994 }, { "epoch": 0.32242384964355153, "grad_norm": 1.140674114227295, "learning_rate": 4.9219790641496656e-06, "loss": 0.177, "step": 995 }, { "epoch": 0.32274789371354506, "grad_norm": 0.9754520654678345, "learning_rate": 4.9217621210739826e-06, "loss": 0.1698, "step": 996 }, { "epoch": 0.32307193778353854, "grad_norm": 1.0422990322113037, "learning_rate": 4.921544881598059e-06, "loss": 0.1808, "step": 997 }, { "epoch": 0.3233959818535321, "grad_norm": 1.1023666858673096, "learning_rate": 4.921327345748486e-06, "loss": 0.1743, "step": 998 }, { "epoch": 0.3237200259235256, "grad_norm": 1.0531352758407593, "learning_rate": 4.921109513551885e-06, "loss": 0.1463, "step": 999 }, { "epoch": 0.32404406999351915, "grad_norm": 0.9822701811790466, "learning_rate": 4.920891385034918e-06, "loss": 0.156, "step": 1000 }, { "epoch": 0.3243681140635126, "grad_norm": 1.0433939695358276, "learning_rate": 4.920672960224282e-06, "loss": 0.1799, "step": 1001 }, { "epoch": 0.32469215813350616, "grad_norm": 1.1296837329864502, "learning_rate": 4.920454239146709e-06, "loss": 0.1774, "step": 1002 }, { "epoch": 0.3250162022034997, "grad_norm": 1.0009548664093018, "learning_rate": 4.92023522182897e-06, "loss": 0.1501, "step": 1003 }, { "epoch": 0.32534024627349317, "grad_norm": 1.0985316038131714, "learning_rate": 4.9200159082978685e-06, "loss": 0.1746, "step": 1004 }, { "epoch": 0.3256642903434867, "grad_norm": 1.0474634170532227, "learning_rate": 4.919796298580247e-06, "loss": 0.1661, "step": 1005 }, { "epoch": 0.32598833441348024, "grad_norm": 1.036765694618225, "learning_rate": 4.919576392702984e-06, "loss": 0.1812, "step": 1006 }, { "epoch": 0.3263123784834738, "grad_norm": 1.0654313564300537, "learning_rate": 4.9193561906929945e-06, "loss": 0.1777, "step": 1007 }, { "epoch": 0.32663642255346725, "grad_norm": 1.0503085851669312, "learning_rate": 4.919135692577229e-06, "loss": 0.1525, "step": 1008 }, { "epoch": 0.3269604666234608, "grad_norm": 1.0202323198318481, "learning_rate": 4.918914898382673e-06, "loss": 0.1728, "step": 1009 }, { "epoch": 0.3272845106934543, "grad_norm": 1.02118718624115, "learning_rate": 4.91869380813635e-06, "loss": 0.1763, "step": 1010 }, { "epoch": 0.3276085547634478, "grad_norm": 0.99228435754776, "learning_rate": 4.91847242186532e-06, "loss": 0.1563, "step": 1011 }, { "epoch": 0.32793259883344134, "grad_norm": 1.0361772775650024, "learning_rate": 4.918250739596678e-06, "loss": 0.1779, "step": 1012 }, { "epoch": 0.32825664290343487, "grad_norm": 1.1137558221817017, "learning_rate": 4.918028761357557e-06, "loss": 0.1614, "step": 1013 }, { "epoch": 0.3285806869734284, "grad_norm": 0.9790223240852356, "learning_rate": 4.917806487175123e-06, "loss": 0.1627, "step": 1014 }, { "epoch": 0.3289047310434219, "grad_norm": 1.0379266738891602, "learning_rate": 4.917583917076581e-06, "loss": 0.1651, "step": 1015 }, { "epoch": 0.3292287751134154, "grad_norm": 1.0073673725128174, "learning_rate": 4.917361051089172e-06, "loss": 0.1838, "step": 1016 }, { "epoch": 0.32955281918340895, "grad_norm": 1.0644619464874268, "learning_rate": 4.917137889240172e-06, "loss": 0.1724, "step": 1017 }, { "epoch": 0.3298768632534025, "grad_norm": 1.0988903045654297, "learning_rate": 4.916914431556895e-06, "loss": 0.1787, "step": 1018 }, { "epoch": 0.33020090732339596, "grad_norm": 1.063129186630249, "learning_rate": 4.916690678066688e-06, "loss": 0.181, "step": 1019 }, { "epoch": 0.3305249513933895, "grad_norm": 1.057511806488037, "learning_rate": 4.916466628796938e-06, "loss": 0.174, "step": 1020 }, { "epoch": 0.33084899546338303, "grad_norm": 0.9521802663803101, "learning_rate": 4.9162422837750654e-06, "loss": 0.1579, "step": 1021 }, { "epoch": 0.3311730395333765, "grad_norm": 1.0636956691741943, "learning_rate": 4.916017643028529e-06, "loss": 0.1627, "step": 1022 }, { "epoch": 0.33149708360337005, "grad_norm": 1.0339527130126953, "learning_rate": 4.915792706584821e-06, "loss": 0.1567, "step": 1023 }, { "epoch": 0.3318211276733636, "grad_norm": 0.9493985772132874, "learning_rate": 4.9155674744714725e-06, "loss": 0.1567, "step": 1024 }, { "epoch": 0.3321451717433571, "grad_norm": 1.0365159511566162, "learning_rate": 4.91534194671605e-06, "loss": 0.1746, "step": 1025 }, { "epoch": 0.3324692158133506, "grad_norm": 1.083169937133789, "learning_rate": 4.915116123346155e-06, "loss": 0.1913, "step": 1026 }, { "epoch": 0.33279325988334413, "grad_norm": 1.019592046737671, "learning_rate": 4.9148900043894275e-06, "loss": 0.1843, "step": 1027 }, { "epoch": 0.33311730395333766, "grad_norm": 1.0441523790359497, "learning_rate": 4.914663589873541e-06, "loss": 0.1679, "step": 1028 }, { "epoch": 0.3334413480233312, "grad_norm": 1.0396127700805664, "learning_rate": 4.914436879826207e-06, "loss": 0.1785, "step": 1029 }, { "epoch": 0.3337653920933247, "grad_norm": 1.0079026222229004, "learning_rate": 4.9142098742751726e-06, "loss": 0.17, "step": 1030 }, { "epoch": 0.3340894361633182, "grad_norm": 1.001125454902649, "learning_rate": 4.9139825732482205e-06, "loss": 0.1643, "step": 1031 }, { "epoch": 0.33441348023331174, "grad_norm": 1.0608137845993042, "learning_rate": 4.91375497677317e-06, "loss": 0.1898, "step": 1032 }, { "epoch": 0.3347375243033052, "grad_norm": 0.992143452167511, "learning_rate": 4.913527084877879e-06, "loss": 0.166, "step": 1033 }, { "epoch": 0.33506156837329876, "grad_norm": 1.0682899951934814, "learning_rate": 4.913298897590237e-06, "loss": 0.1838, "step": 1034 }, { "epoch": 0.3353856124432923, "grad_norm": 1.0318293571472168, "learning_rate": 4.913070414938172e-06, "loss": 0.1728, "step": 1035 }, { "epoch": 0.3357096565132858, "grad_norm": 1.0967031717300415, "learning_rate": 4.912841636949649e-06, "loss": 0.1753, "step": 1036 }, { "epoch": 0.3360337005832793, "grad_norm": 1.0476478338241577, "learning_rate": 4.912612563652667e-06, "loss": 0.1706, "step": 1037 }, { "epoch": 0.33635774465327284, "grad_norm": 1.0481637716293335, "learning_rate": 4.912383195075264e-06, "loss": 0.1742, "step": 1038 }, { "epoch": 0.3366817887232664, "grad_norm": 1.0776970386505127, "learning_rate": 4.912153531245511e-06, "loss": 0.1827, "step": 1039 }, { "epoch": 0.3370058327932599, "grad_norm": 1.0306576490402222, "learning_rate": 4.9119235721915174e-06, "loss": 0.1608, "step": 1040 }, { "epoch": 0.3373298768632534, "grad_norm": 1.0382986068725586, "learning_rate": 4.911693317941428e-06, "loss": 0.1674, "step": 1041 }, { "epoch": 0.3376539209332469, "grad_norm": 1.069576621055603, "learning_rate": 4.911462768523423e-06, "loss": 0.1754, "step": 1042 }, { "epoch": 0.33797796500324045, "grad_norm": 0.9400745630264282, "learning_rate": 4.9112319239657204e-06, "loss": 0.1719, "step": 1043 }, { "epoch": 0.33830200907323393, "grad_norm": 1.0903481245040894, "learning_rate": 4.911000784296572e-06, "loss": 0.1731, "step": 1044 }, { "epoch": 0.33862605314322747, "grad_norm": 1.0023761987686157, "learning_rate": 4.910769349544269e-06, "loss": 0.1606, "step": 1045 }, { "epoch": 0.338950097213221, "grad_norm": 1.0253771543502808, "learning_rate": 4.9105376197371355e-06, "loss": 0.1739, "step": 1046 }, { "epoch": 0.33927414128321454, "grad_norm": 1.0307811498641968, "learning_rate": 4.9103055949035326e-06, "loss": 0.1889, "step": 1047 }, { "epoch": 0.339598185353208, "grad_norm": 1.1487541198730469, "learning_rate": 4.910073275071858e-06, "loss": 0.1858, "step": 1048 }, { "epoch": 0.33992222942320155, "grad_norm": 1.0212219953536987, "learning_rate": 4.909840660270547e-06, "loss": 0.163, "step": 1049 }, { "epoch": 0.3402462734931951, "grad_norm": 0.9798551201820374, "learning_rate": 4.909607750528068e-06, "loss": 0.1625, "step": 1050 }, { "epoch": 0.3405703175631886, "grad_norm": 0.9840319156646729, "learning_rate": 4.909374545872927e-06, "loss": 0.1456, "step": 1051 }, { "epoch": 0.3408943616331821, "grad_norm": 1.056037187576294, "learning_rate": 4.909141046333666e-06, "loss": 0.1666, "step": 1052 }, { "epoch": 0.34121840570317563, "grad_norm": 1.102063775062561, "learning_rate": 4.908907251938864e-06, "loss": 0.1763, "step": 1053 }, { "epoch": 0.34154244977316917, "grad_norm": 0.9911783337593079, "learning_rate": 4.908673162717133e-06, "loss": 0.1614, "step": 1054 }, { "epoch": 0.34186649384316264, "grad_norm": 1.1577961444854736, "learning_rate": 4.908438778697125e-06, "loss": 0.1811, "step": 1055 }, { "epoch": 0.3421905379131562, "grad_norm": 1.0934644937515259, "learning_rate": 4.908204099907527e-06, "loss": 0.1796, "step": 1056 }, { "epoch": 0.3425145819831497, "grad_norm": 1.0998642444610596, "learning_rate": 4.907969126377059e-06, "loss": 0.1804, "step": 1057 }, { "epoch": 0.34283862605314325, "grad_norm": 0.9529743790626526, "learning_rate": 4.907733858134482e-06, "loss": 0.1647, "step": 1058 }, { "epoch": 0.3431626701231367, "grad_norm": 1.0420947074890137, "learning_rate": 4.907498295208589e-06, "loss": 0.1743, "step": 1059 }, { "epoch": 0.34348671419313026, "grad_norm": 0.9765776991844177, "learning_rate": 4.907262437628211e-06, "loss": 0.1688, "step": 1060 }, { "epoch": 0.3438107582631238, "grad_norm": 1.0862605571746826, "learning_rate": 4.907026285422215e-06, "loss": 0.1809, "step": 1061 }, { "epoch": 0.34413480233311733, "grad_norm": 0.9697964191436768, "learning_rate": 4.906789838619504e-06, "loss": 0.1777, "step": 1062 }, { "epoch": 0.3444588464031108, "grad_norm": 0.9970915913581848, "learning_rate": 4.906553097249015e-06, "loss": 0.1543, "step": 1063 }, { "epoch": 0.34478289047310434, "grad_norm": 1.0512598752975464, "learning_rate": 4.906316061339724e-06, "loss": 0.1766, "step": 1064 }, { "epoch": 0.3451069345430979, "grad_norm": 1.0634959936141968, "learning_rate": 4.9060787309206436e-06, "loss": 0.1799, "step": 1065 }, { "epoch": 0.34543097861309136, "grad_norm": 1.0844204425811768, "learning_rate": 4.905841106020818e-06, "loss": 0.1902, "step": 1066 }, { "epoch": 0.3457550226830849, "grad_norm": 1.1198972463607788, "learning_rate": 4.905603186669332e-06, "loss": 0.1662, "step": 1067 }, { "epoch": 0.3460790667530784, "grad_norm": 1.0448524951934814, "learning_rate": 4.905364972895304e-06, "loss": 0.1798, "step": 1068 }, { "epoch": 0.34640311082307196, "grad_norm": 1.0317894220352173, "learning_rate": 4.9051264647278886e-06, "loss": 0.172, "step": 1069 }, { "epoch": 0.34672715489306544, "grad_norm": 1.1010342836380005, "learning_rate": 4.904887662196277e-06, "loss": 0.1537, "step": 1070 }, { "epoch": 0.34705119896305897, "grad_norm": 1.052812099456787, "learning_rate": 4.904648565329697e-06, "loss": 0.1891, "step": 1071 }, { "epoch": 0.3473752430330525, "grad_norm": 1.0273829698562622, "learning_rate": 4.904409174157412e-06, "loss": 0.1735, "step": 1072 }, { "epoch": 0.34769928710304604, "grad_norm": 0.980993926525116, "learning_rate": 4.90416948870872e-06, "loss": 0.1677, "step": 1073 }, { "epoch": 0.3480233311730395, "grad_norm": 0.9855329394340515, "learning_rate": 4.903929509012957e-06, "loss": 0.1524, "step": 1074 }, { "epoch": 0.34834737524303305, "grad_norm": 1.0246031284332275, "learning_rate": 4.9036892350994935e-06, "loss": 0.166, "step": 1075 }, { "epoch": 0.3486714193130266, "grad_norm": 0.99644935131073, "learning_rate": 4.9034486669977375e-06, "loss": 0.162, "step": 1076 }, { "epoch": 0.34899546338302007, "grad_norm": 1.0822203159332275, "learning_rate": 4.903207804737132e-06, "loss": 0.1737, "step": 1077 }, { "epoch": 0.3493195074530136, "grad_norm": 1.0989410877227783, "learning_rate": 4.902966648347156e-06, "loss": 0.169, "step": 1078 }, { "epoch": 0.34964355152300713, "grad_norm": 1.1245644092559814, "learning_rate": 4.902725197857325e-06, "loss": 0.1771, "step": 1079 }, { "epoch": 0.34996759559300067, "grad_norm": 1.121975302696228, "learning_rate": 4.902483453297189e-06, "loss": 0.1796, "step": 1080 }, { "epoch": 0.35029163966299415, "grad_norm": 1.0160001516342163, "learning_rate": 4.902241414696337e-06, "loss": 0.1639, "step": 1081 }, { "epoch": 0.3506156837329877, "grad_norm": 0.9480010271072388, "learning_rate": 4.901999082084391e-06, "loss": 0.1544, "step": 1082 }, { "epoch": 0.3509397278029812, "grad_norm": 1.0363365411758423, "learning_rate": 4.901756455491011e-06, "loss": 0.186, "step": 1083 }, { "epoch": 0.35126377187297475, "grad_norm": 1.0564618110656738, "learning_rate": 4.901513534945891e-06, "loss": 0.1787, "step": 1084 }, { "epoch": 0.35158781594296823, "grad_norm": 1.0492441654205322, "learning_rate": 4.901270320478763e-06, "loss": 0.1695, "step": 1085 }, { "epoch": 0.35191186001296176, "grad_norm": 1.025549054145813, "learning_rate": 4.901026812119394e-06, "loss": 0.171, "step": 1086 }, { "epoch": 0.3522359040829553, "grad_norm": 1.0320312976837158, "learning_rate": 4.9007830098975875e-06, "loss": 0.1716, "step": 1087 }, { "epoch": 0.3525599481529488, "grad_norm": 1.0225930213928223, "learning_rate": 4.900538913843181e-06, "loss": 0.1717, "step": 1088 }, { "epoch": 0.3528839922229423, "grad_norm": 1.103419542312622, "learning_rate": 4.900294523986051e-06, "loss": 0.1929, "step": 1089 }, { "epoch": 0.35320803629293585, "grad_norm": 1.0638277530670166, "learning_rate": 4.900049840356107e-06, "loss": 0.183, "step": 1090 }, { "epoch": 0.3535320803629294, "grad_norm": 0.9254642724990845, "learning_rate": 4.899804862983298e-06, "loss": 0.1541, "step": 1091 }, { "epoch": 0.35385612443292286, "grad_norm": 0.9626860618591309, "learning_rate": 4.899559591897604e-06, "loss": 0.169, "step": 1092 }, { "epoch": 0.3541801685029164, "grad_norm": 1.0504587888717651, "learning_rate": 4.899314027129047e-06, "loss": 0.1759, "step": 1093 }, { "epoch": 0.3545042125729099, "grad_norm": 1.0354413986206055, "learning_rate": 4.89906816870768e-06, "loss": 0.173, "step": 1094 }, { "epoch": 0.35482825664290346, "grad_norm": 0.9573935270309448, "learning_rate": 4.898822016663595e-06, "loss": 0.1683, "step": 1095 }, { "epoch": 0.35515230071289694, "grad_norm": 1.1483545303344727, "learning_rate": 4.898575571026916e-06, "loss": 0.1916, "step": 1096 }, { "epoch": 0.3554763447828905, "grad_norm": 0.990126371383667, "learning_rate": 4.898328831827808e-06, "loss": 0.164, "step": 1097 }, { "epoch": 0.355800388852884, "grad_norm": 1.0703517198562622, "learning_rate": 4.898081799096467e-06, "loss": 0.1678, "step": 1098 }, { "epoch": 0.3561244329228775, "grad_norm": 1.0092120170593262, "learning_rate": 4.897834472863131e-06, "loss": 0.1688, "step": 1099 }, { "epoch": 0.356448476992871, "grad_norm": 1.0366566181182861, "learning_rate": 4.897586853158067e-06, "loss": 0.1699, "step": 1100 }, { "epoch": 0.35677252106286456, "grad_norm": 1.0042214393615723, "learning_rate": 4.897338940011583e-06, "loss": 0.1557, "step": 1101 }, { "epoch": 0.3570965651328581, "grad_norm": 0.9984369277954102, "learning_rate": 4.897090733454021e-06, "loss": 0.1863, "step": 1102 }, { "epoch": 0.35742060920285157, "grad_norm": 1.1319072246551514, "learning_rate": 4.896842233515759e-06, "loss": 0.1892, "step": 1103 }, { "epoch": 0.3577446532728451, "grad_norm": 1.0137627124786377, "learning_rate": 4.89659344022721e-06, "loss": 0.1744, "step": 1104 }, { "epoch": 0.35806869734283864, "grad_norm": 1.036480188369751, "learning_rate": 4.896344353618826e-06, "loss": 0.1684, "step": 1105 }, { "epoch": 0.35839274141283217, "grad_norm": 1.005391001701355, "learning_rate": 4.896094973721091e-06, "loss": 0.1568, "step": 1106 }, { "epoch": 0.35871678548282565, "grad_norm": 0.9591735601425171, "learning_rate": 4.8958453005645265e-06, "loss": 0.1481, "step": 1107 }, { "epoch": 0.3590408295528192, "grad_norm": 1.1032018661499023, "learning_rate": 4.895595334179692e-06, "loss": 0.1889, "step": 1108 }, { "epoch": 0.3593648736228127, "grad_norm": 1.04655122756958, "learning_rate": 4.89534507459718e-06, "loss": 0.1749, "step": 1109 }, { "epoch": 0.3596889176928062, "grad_norm": 1.006601095199585, "learning_rate": 4.895094521847617e-06, "loss": 0.1757, "step": 1110 }, { "epoch": 0.36001296176279973, "grad_norm": 0.9353299736976624, "learning_rate": 4.894843675961673e-06, "loss": 0.1597, "step": 1111 }, { "epoch": 0.36033700583279327, "grad_norm": 0.9540934562683105, "learning_rate": 4.894592536970047e-06, "loss": 0.1551, "step": 1112 }, { "epoch": 0.3606610499027868, "grad_norm": 1.0541253089904785, "learning_rate": 4.894341104903476e-06, "loss": 0.1736, "step": 1113 }, { "epoch": 0.3609850939727803, "grad_norm": 0.9918221235275269, "learning_rate": 4.894089379792731e-06, "loss": 0.1707, "step": 1114 }, { "epoch": 0.3613091380427738, "grad_norm": 1.0815917253494263, "learning_rate": 4.893837361668624e-06, "loss": 0.1773, "step": 1115 }, { "epoch": 0.36163318211276735, "grad_norm": 1.0560083389282227, "learning_rate": 4.8935850505619985e-06, "loss": 0.1782, "step": 1116 }, { "epoch": 0.3619572261827609, "grad_norm": 1.0156229734420776, "learning_rate": 4.8933324465037334e-06, "loss": 0.1783, "step": 1117 }, { "epoch": 0.36228127025275436, "grad_norm": 1.0813552141189575, "learning_rate": 4.893079549524747e-06, "loss": 0.1656, "step": 1118 }, { "epoch": 0.3626053143227479, "grad_norm": 1.0563653707504272, "learning_rate": 4.89282635965599e-06, "loss": 0.1606, "step": 1119 }, { "epoch": 0.36292935839274143, "grad_norm": 0.9194615483283997, "learning_rate": 4.8925728769284504e-06, "loss": 0.1498, "step": 1120 }, { "epoch": 0.3632534024627349, "grad_norm": 1.0284903049468994, "learning_rate": 4.892319101373154e-06, "loss": 0.1706, "step": 1121 }, { "epoch": 0.36357744653272844, "grad_norm": 1.003746747970581, "learning_rate": 4.892065033021158e-06, "loss": 0.1718, "step": 1122 }, { "epoch": 0.363901490602722, "grad_norm": 1.0391348600387573, "learning_rate": 4.8918106719035594e-06, "loss": 0.1683, "step": 1123 }, { "epoch": 0.3642255346727155, "grad_norm": 1.1661279201507568, "learning_rate": 4.891556018051489e-06, "loss": 0.1863, "step": 1124 }, { "epoch": 0.364549578742709, "grad_norm": 1.0064976215362549, "learning_rate": 4.891301071496113e-06, "loss": 0.1622, "step": 1125 }, { "epoch": 0.3648736228127025, "grad_norm": 1.1605420112609863, "learning_rate": 4.891045832268637e-06, "loss": 0.1911, "step": 1126 }, { "epoch": 0.36519766688269606, "grad_norm": 1.0069886445999146, "learning_rate": 4.890790300400297e-06, "loss": 0.1785, "step": 1127 }, { "epoch": 0.36552171095268954, "grad_norm": 1.0143624544143677, "learning_rate": 4.8905344759223696e-06, "loss": 0.1756, "step": 1128 }, { "epoch": 0.3658457550226831, "grad_norm": 1.0237759351730347, "learning_rate": 4.890278358866165e-06, "loss": 0.1686, "step": 1129 }, { "epoch": 0.3661697990926766, "grad_norm": 1.0224316120147705, "learning_rate": 4.890021949263027e-06, "loss": 0.176, "step": 1130 }, { "epoch": 0.36649384316267014, "grad_norm": 1.0045219659805298, "learning_rate": 4.889765247144341e-06, "loss": 0.1667, "step": 1131 }, { "epoch": 0.3668178872326636, "grad_norm": 1.0561575889587402, "learning_rate": 4.889508252541524e-06, "loss": 0.1809, "step": 1132 }, { "epoch": 0.36714193130265715, "grad_norm": 0.9486965537071228, "learning_rate": 4.889250965486029e-06, "loss": 0.1544, "step": 1133 }, { "epoch": 0.3674659753726507, "grad_norm": 0.9930503964424133, "learning_rate": 4.888993386009345e-06, "loss": 0.1568, "step": 1134 }, { "epoch": 0.3677900194426442, "grad_norm": 1.032828450202942, "learning_rate": 4.888735514142998e-06, "loss": 0.1623, "step": 1135 }, { "epoch": 0.3681140635126377, "grad_norm": 0.9454177021980286, "learning_rate": 4.8884773499185485e-06, "loss": 0.1597, "step": 1136 }, { "epoch": 0.36843810758263124, "grad_norm": 1.0029901266098022, "learning_rate": 4.8882188933675935e-06, "loss": 0.1599, "step": 1137 }, { "epoch": 0.36876215165262477, "grad_norm": 1.074245572090149, "learning_rate": 4.887960144521766e-06, "loss": 0.1737, "step": 1138 }, { "epoch": 0.36908619572261825, "grad_norm": 1.103922724723816, "learning_rate": 4.887701103412734e-06, "loss": 0.1617, "step": 1139 }, { "epoch": 0.3694102397926118, "grad_norm": 1.051020860671997, "learning_rate": 4.8874417700722025e-06, "loss": 0.1682, "step": 1140 }, { "epoch": 0.3697342838626053, "grad_norm": 0.9718478322029114, "learning_rate": 4.887182144531909e-06, "loss": 0.1616, "step": 1141 }, { "epoch": 0.37005832793259885, "grad_norm": 1.0041842460632324, "learning_rate": 4.886922226823632e-06, "loss": 0.1524, "step": 1142 }, { "epoch": 0.37038237200259233, "grad_norm": 1.0239226818084717, "learning_rate": 4.8866620169791815e-06, "loss": 0.1788, "step": 1143 }, { "epoch": 0.37070641607258586, "grad_norm": 0.9961230754852295, "learning_rate": 4.886401515030404e-06, "loss": 0.1594, "step": 1144 }, { "epoch": 0.3710304601425794, "grad_norm": 1.0124776363372803, "learning_rate": 4.886140721009184e-06, "loss": 0.1747, "step": 1145 }, { "epoch": 0.37135450421257293, "grad_norm": 0.9768791794776917, "learning_rate": 4.885879634947439e-06, "loss": 0.1564, "step": 1146 }, { "epoch": 0.3716785482825664, "grad_norm": 1.1680079698562622, "learning_rate": 4.885618256877123e-06, "loss": 0.1892, "step": 1147 }, { "epoch": 0.37200259235255995, "grad_norm": 0.9732511043548584, "learning_rate": 4.885356586830229e-06, "loss": 0.1536, "step": 1148 }, { "epoch": 0.3723266364225535, "grad_norm": 1.0803674459457397, "learning_rate": 4.8850946248387795e-06, "loss": 0.1577, "step": 1149 }, { "epoch": 0.37265068049254696, "grad_norm": 1.022645115852356, "learning_rate": 4.884832370934838e-06, "loss": 0.1623, "step": 1150 }, { "epoch": 0.3729747245625405, "grad_norm": 0.979411244392395, "learning_rate": 4.8845698251505e-06, "loss": 0.1505, "step": 1151 }, { "epoch": 0.37329876863253403, "grad_norm": 1.043480396270752, "learning_rate": 4.8843069875179005e-06, "loss": 0.1792, "step": 1152 }, { "epoch": 0.37362281270252756, "grad_norm": 1.0261013507843018, "learning_rate": 4.884043858069208e-06, "loss": 0.1688, "step": 1153 }, { "epoch": 0.37394685677252104, "grad_norm": 1.000368356704712, "learning_rate": 4.883780436836627e-06, "loss": 0.168, "step": 1154 }, { "epoch": 0.3742709008425146, "grad_norm": 0.9413356781005859, "learning_rate": 4.883516723852396e-06, "loss": 0.1667, "step": 1155 }, { "epoch": 0.3745949449125081, "grad_norm": 1.012265920639038, "learning_rate": 4.883252719148794e-06, "loss": 0.1638, "step": 1156 }, { "epoch": 0.37491898898250164, "grad_norm": 0.9922842979431152, "learning_rate": 4.8829884227581294e-06, "loss": 0.1693, "step": 1157 }, { "epoch": 0.3752430330524951, "grad_norm": 0.9883171319961548, "learning_rate": 4.88272383471275e-06, "loss": 0.1535, "step": 1158 }, { "epoch": 0.37556707712248866, "grad_norm": 0.9470762610435486, "learning_rate": 4.8824589550450415e-06, "loss": 0.1475, "step": 1159 }, { "epoch": 0.3758911211924822, "grad_norm": 0.9897817373275757, "learning_rate": 4.882193783787421e-06, "loss": 0.1802, "step": 1160 }, { "epoch": 0.37621516526247567, "grad_norm": 1.0278658866882324, "learning_rate": 4.881928320972342e-06, "loss": 0.1799, "step": 1161 }, { "epoch": 0.3765392093324692, "grad_norm": 1.0314370393753052, "learning_rate": 4.881662566632296e-06, "loss": 0.1789, "step": 1162 }, { "epoch": 0.37686325340246274, "grad_norm": 0.9717196226119995, "learning_rate": 4.881396520799808e-06, "loss": 0.1569, "step": 1163 }, { "epoch": 0.3771872974724563, "grad_norm": 1.0661332607269287, "learning_rate": 4.8811301835074384e-06, "loss": 0.1821, "step": 1164 }, { "epoch": 0.37751134154244975, "grad_norm": 0.9644685983657837, "learning_rate": 4.880863554787787e-06, "loss": 0.1586, "step": 1165 }, { "epoch": 0.3778353856124433, "grad_norm": 1.0331933498382568, "learning_rate": 4.880596634673484e-06, "loss": 0.1865, "step": 1166 }, { "epoch": 0.3781594296824368, "grad_norm": 1.0733047723770142, "learning_rate": 4.8803294231972e-06, "loss": 0.1742, "step": 1167 }, { "epoch": 0.37848347375243035, "grad_norm": 1.0722155570983887, "learning_rate": 4.8800619203916376e-06, "loss": 0.1963, "step": 1168 }, { "epoch": 0.37880751782242383, "grad_norm": 1.0205607414245605, "learning_rate": 4.8797941262895365e-06, "loss": 0.1777, "step": 1169 }, { "epoch": 0.37913156189241737, "grad_norm": 0.9205136299133301, "learning_rate": 4.8795260409236725e-06, "loss": 0.1503, "step": 1170 }, { "epoch": 0.3794556059624109, "grad_norm": 0.9154426455497742, "learning_rate": 4.879257664326856e-06, "loss": 0.1568, "step": 1171 }, { "epoch": 0.3797796500324044, "grad_norm": 1.0617220401763916, "learning_rate": 4.8789889965319355e-06, "loss": 0.1827, "step": 1172 }, { "epoch": 0.3801036941023979, "grad_norm": 0.9944412112236023, "learning_rate": 4.878720037571792e-06, "loss": 0.1659, "step": 1173 }, { "epoch": 0.38042773817239145, "grad_norm": 0.992470383644104, "learning_rate": 4.878450787479344e-06, "loss": 0.17, "step": 1174 }, { "epoch": 0.380751782242385, "grad_norm": 1.038560152053833, "learning_rate": 4.878181246287544e-06, "loss": 0.169, "step": 1175 }, { "epoch": 0.38107582631237846, "grad_norm": 0.9579697847366333, "learning_rate": 4.877911414029382e-06, "loss": 0.1578, "step": 1176 }, { "epoch": 0.381399870382372, "grad_norm": 0.9731485247612, "learning_rate": 4.8776412907378845e-06, "loss": 0.1648, "step": 1177 }, { "epoch": 0.38172391445236553, "grad_norm": 0.9061117172241211, "learning_rate": 4.877370876446109e-06, "loss": 0.1609, "step": 1178 }, { "epoch": 0.38204795852235907, "grad_norm": 0.983727753162384, "learning_rate": 4.877100171187154e-06, "loss": 0.1774, "step": 1179 }, { "epoch": 0.38237200259235254, "grad_norm": 1.0166122913360596, "learning_rate": 4.876829174994149e-06, "loss": 0.1586, "step": 1180 }, { "epoch": 0.3826960466623461, "grad_norm": 0.9968889355659485, "learning_rate": 4.8765578879002625e-06, "loss": 0.1544, "step": 1181 }, { "epoch": 0.3830200907323396, "grad_norm": 0.990476667881012, "learning_rate": 4.8762863099386984e-06, "loss": 0.1724, "step": 1182 }, { "epoch": 0.3833441348023331, "grad_norm": 0.9911254644393921, "learning_rate": 4.876014441142693e-06, "loss": 0.173, "step": 1183 }, { "epoch": 0.3836681788723266, "grad_norm": 0.9807798266410828, "learning_rate": 4.8757422815455215e-06, "loss": 0.171, "step": 1184 }, { "epoch": 0.38399222294232016, "grad_norm": 0.9989405870437622, "learning_rate": 4.875469831180495e-06, "loss": 0.1648, "step": 1185 }, { "epoch": 0.3843162670123137, "grad_norm": 1.0662082433700562, "learning_rate": 4.875197090080957e-06, "loss": 0.1713, "step": 1186 }, { "epoch": 0.3846403110823072, "grad_norm": 1.0581663846969604, "learning_rate": 4.874924058280288e-06, "loss": 0.1865, "step": 1187 }, { "epoch": 0.3849643551523007, "grad_norm": 0.9903859496116638, "learning_rate": 4.874650735811906e-06, "loss": 0.1794, "step": 1188 }, { "epoch": 0.38528839922229424, "grad_norm": 0.9108482003211975, "learning_rate": 4.874377122709263e-06, "loss": 0.1642, "step": 1189 }, { "epoch": 0.3856124432922878, "grad_norm": 1.0256187915802002, "learning_rate": 4.874103219005845e-06, "loss": 0.1617, "step": 1190 }, { "epoch": 0.38593648736228126, "grad_norm": 0.9225676655769348, "learning_rate": 4.873829024735176e-06, "loss": 0.1685, "step": 1191 }, { "epoch": 0.3862605314322748, "grad_norm": 0.9761823415756226, "learning_rate": 4.873554539930815e-06, "loss": 0.1672, "step": 1192 }, { "epoch": 0.3865845755022683, "grad_norm": 1.0413146018981934, "learning_rate": 4.873279764626357e-06, "loss": 0.1751, "step": 1193 }, { "epoch": 0.3869086195722618, "grad_norm": 1.0643138885498047, "learning_rate": 4.87300469885543e-06, "loss": 0.1738, "step": 1194 }, { "epoch": 0.38723266364225534, "grad_norm": 1.077222228050232, "learning_rate": 4.872729342651701e-06, "loss": 0.1918, "step": 1195 }, { "epoch": 0.38755670771224887, "grad_norm": 1.0266451835632324, "learning_rate": 4.87245369604887e-06, "loss": 0.1765, "step": 1196 }, { "epoch": 0.3878807517822424, "grad_norm": 1.0106679201126099, "learning_rate": 4.872177759080673e-06, "loss": 0.1745, "step": 1197 }, { "epoch": 0.3882047958522359, "grad_norm": 0.9687255024909973, "learning_rate": 4.8719015317808835e-06, "loss": 0.1704, "step": 1198 }, { "epoch": 0.3885288399222294, "grad_norm": 0.9420855045318604, "learning_rate": 4.871625014183308e-06, "loss": 0.1563, "step": 1199 }, { "epoch": 0.38885288399222295, "grad_norm": 0.9460660815238953, "learning_rate": 4.8713482063217895e-06, "loss": 0.1617, "step": 1200 }, { "epoch": 0.3891769280622165, "grad_norm": 1.044751524925232, "learning_rate": 4.871071108230208e-06, "loss": 0.1814, "step": 1201 }, { "epoch": 0.38950097213220997, "grad_norm": 0.9933145046234131, "learning_rate": 4.8707937199424756e-06, "loss": 0.1715, "step": 1202 }, { "epoch": 0.3898250162022035, "grad_norm": 1.1108256578445435, "learning_rate": 4.870516041492543e-06, "loss": 0.186, "step": 1203 }, { "epoch": 0.39014906027219703, "grad_norm": 0.9695666432380676, "learning_rate": 4.870238072914396e-06, "loss": 0.1576, "step": 1204 }, { "epoch": 0.3904731043421905, "grad_norm": 0.9143527150154114, "learning_rate": 4.869959814242054e-06, "loss": 0.1625, "step": 1205 }, { "epoch": 0.39079714841218405, "grad_norm": 0.9999296069145203, "learning_rate": 4.8696812655095744e-06, "loss": 0.1693, "step": 1206 }, { "epoch": 0.3911211924821776, "grad_norm": 0.982399582862854, "learning_rate": 4.869402426751048e-06, "loss": 0.1725, "step": 1207 }, { "epoch": 0.3914452365521711, "grad_norm": 1.0316600799560547, "learning_rate": 4.8691232980006015e-06, "loss": 0.1759, "step": 1208 }, { "epoch": 0.3917692806221646, "grad_norm": 0.9703241586685181, "learning_rate": 4.868843879292399e-06, "loss": 0.1744, "step": 1209 }, { "epoch": 0.39209332469215813, "grad_norm": 0.9049566388130188, "learning_rate": 4.868564170660637e-06, "loss": 0.1503, "step": 1210 }, { "epoch": 0.39241736876215166, "grad_norm": 1.0091451406478882, "learning_rate": 4.868284172139551e-06, "loss": 0.1625, "step": 1211 }, { "epoch": 0.3927414128321452, "grad_norm": 0.9870232343673706, "learning_rate": 4.868003883763408e-06, "loss": 0.1686, "step": 1212 }, { "epoch": 0.3930654569021387, "grad_norm": 1.030659556388855, "learning_rate": 4.867723305566514e-06, "loss": 0.1903, "step": 1213 }, { "epoch": 0.3933895009721322, "grad_norm": 1.165837287902832, "learning_rate": 4.86744243758321e-06, "loss": 0.1819, "step": 1214 }, { "epoch": 0.39371354504212575, "grad_norm": 0.9671489596366882, "learning_rate": 4.8671612798478685e-06, "loss": 0.1603, "step": 1215 }, { "epoch": 0.3940375891121192, "grad_norm": 1.0202622413635254, "learning_rate": 4.866879832394903e-06, "loss": 0.1697, "step": 1216 }, { "epoch": 0.39436163318211276, "grad_norm": 1.043723702430725, "learning_rate": 4.86659809525876e-06, "loss": 0.181, "step": 1217 }, { "epoch": 0.3946856772521063, "grad_norm": 0.9605787396430969, "learning_rate": 4.866316068473919e-06, "loss": 0.1566, "step": 1218 }, { "epoch": 0.3950097213220998, "grad_norm": 1.0066555738449097, "learning_rate": 4.8660337520749e-06, "loss": 0.1648, "step": 1219 }, { "epoch": 0.3953337653920933, "grad_norm": 1.0389704704284668, "learning_rate": 4.865751146096255e-06, "loss": 0.1872, "step": 1220 }, { "epoch": 0.39565780946208684, "grad_norm": 1.052191138267517, "learning_rate": 4.865468250572571e-06, "loss": 0.1532, "step": 1221 }, { "epoch": 0.3959818535320804, "grad_norm": 0.9602256417274475, "learning_rate": 4.865185065538472e-06, "loss": 0.1608, "step": 1222 }, { "epoch": 0.3963058976020739, "grad_norm": 1.0805506706237793, "learning_rate": 4.86490159102862e-06, "loss": 0.1763, "step": 1223 }, { "epoch": 0.3966299416720674, "grad_norm": 1.0248594284057617, "learning_rate": 4.8646178270777055e-06, "loss": 0.1774, "step": 1224 }, { "epoch": 0.3969539857420609, "grad_norm": 1.0524420738220215, "learning_rate": 4.864333773720461e-06, "loss": 0.1564, "step": 1225 }, { "epoch": 0.39727802981205446, "grad_norm": 1.0076290369033813, "learning_rate": 4.8640494309916506e-06, "loss": 0.165, "step": 1226 }, { "epoch": 0.39760207388204793, "grad_norm": 1.1074403524398804, "learning_rate": 4.863764798926076e-06, "loss": 0.1941, "step": 1227 }, { "epoch": 0.39792611795204147, "grad_norm": 1.0143996477127075, "learning_rate": 4.863479877558573e-06, "loss": 0.1663, "step": 1228 }, { "epoch": 0.398250162022035, "grad_norm": 0.888302206993103, "learning_rate": 4.863194666924013e-06, "loss": 0.1523, "step": 1229 }, { "epoch": 0.39857420609202854, "grad_norm": 1.0188536643981934, "learning_rate": 4.862909167057304e-06, "loss": 0.18, "step": 1230 }, { "epoch": 0.398898250162022, "grad_norm": 1.0219933986663818, "learning_rate": 4.862623377993387e-06, "loss": 0.1524, "step": 1231 }, { "epoch": 0.39922229423201555, "grad_norm": 1.0216704607009888, "learning_rate": 4.862337299767241e-06, "loss": 0.1629, "step": 1232 }, { "epoch": 0.3995463383020091, "grad_norm": 0.9859946370124817, "learning_rate": 4.862050932413878e-06, "loss": 0.1643, "step": 1233 }, { "epoch": 0.3998703823720026, "grad_norm": 1.0218664407730103, "learning_rate": 4.8617642759683474e-06, "loss": 0.1714, "step": 1234 }, { "epoch": 0.4001944264419961, "grad_norm": 0.9597703218460083, "learning_rate": 4.861477330465734e-06, "loss": 0.1628, "step": 1235 }, { "epoch": 0.40051847051198963, "grad_norm": 1.0279196500778198, "learning_rate": 4.861190095941155e-06, "loss": 0.1744, "step": 1236 }, { "epoch": 0.40084251458198317, "grad_norm": 0.953804075717926, "learning_rate": 4.860902572429767e-06, "loss": 0.1734, "step": 1237 }, { "epoch": 0.40116655865197665, "grad_norm": 1.052389144897461, "learning_rate": 4.86061475996676e-06, "loss": 0.1778, "step": 1238 }, { "epoch": 0.4014906027219702, "grad_norm": 0.9892643094062805, "learning_rate": 4.860326658587358e-06, "loss": 0.179, "step": 1239 }, { "epoch": 0.4018146467919637, "grad_norm": 1.012811303138733, "learning_rate": 4.860038268326823e-06, "loss": 0.1713, "step": 1240 }, { "epoch": 0.40213869086195725, "grad_norm": 0.994625449180603, "learning_rate": 4.85974958922045e-06, "loss": 0.1785, "step": 1241 }, { "epoch": 0.4024627349319507, "grad_norm": 0.9277936220169067, "learning_rate": 4.859460621303572e-06, "loss": 0.1622, "step": 1242 }, { "epoch": 0.40278677900194426, "grad_norm": 0.9426575303077698, "learning_rate": 4.859171364611556e-06, "loss": 0.1676, "step": 1243 }, { "epoch": 0.4031108230719378, "grad_norm": 0.9697012901306152, "learning_rate": 4.8588818191798035e-06, "loss": 0.1644, "step": 1244 }, { "epoch": 0.40343486714193133, "grad_norm": 0.9683858752250671, "learning_rate": 4.858591985043751e-06, "loss": 0.1693, "step": 1245 }, { "epoch": 0.4037589112119248, "grad_norm": 0.9764914512634277, "learning_rate": 4.858301862238874e-06, "loss": 0.1727, "step": 1246 }, { "epoch": 0.40408295528191834, "grad_norm": 1.0774421691894531, "learning_rate": 4.858011450800678e-06, "loss": 0.1741, "step": 1247 }, { "epoch": 0.4044069993519119, "grad_norm": 1.0993943214416504, "learning_rate": 4.857720750764708e-06, "loss": 0.1666, "step": 1248 }, { "epoch": 0.40473104342190536, "grad_norm": 1.1373517513275146, "learning_rate": 4.857429762166543e-06, "loss": 0.1678, "step": 1249 }, { "epoch": 0.4050550874918989, "grad_norm": 1.045294165611267, "learning_rate": 4.857138485041797e-06, "loss": 0.1626, "step": 1250 }, { "epoch": 0.4053791315618924, "grad_norm": 1.0252169370651245, "learning_rate": 4.856846919426118e-06, "loss": 0.1762, "step": 1251 }, { "epoch": 0.40570317563188596, "grad_norm": 0.9912242293357849, "learning_rate": 4.856555065355193e-06, "loss": 0.1658, "step": 1252 }, { "epoch": 0.40602721970187944, "grad_norm": 1.0291924476623535, "learning_rate": 4.856262922864741e-06, "loss": 0.1616, "step": 1253 }, { "epoch": 0.406351263771873, "grad_norm": 1.0011651515960693, "learning_rate": 4.855970491990518e-06, "loss": 0.161, "step": 1254 }, { "epoch": 0.4066753078418665, "grad_norm": 0.9770960211753845, "learning_rate": 4.855677772768315e-06, "loss": 0.1673, "step": 1255 }, { "epoch": 0.40699935191186, "grad_norm": 0.967759370803833, "learning_rate": 4.855384765233956e-06, "loss": 0.1582, "step": 1256 }, { "epoch": 0.4073233959818535, "grad_norm": 1.005577564239502, "learning_rate": 4.8550914694233045e-06, "loss": 0.1626, "step": 1257 }, { "epoch": 0.40764744005184705, "grad_norm": 1.0803974866867065, "learning_rate": 4.854797885372255e-06, "loss": 0.1827, "step": 1258 }, { "epoch": 0.4079714841218406, "grad_norm": 1.0000061988830566, "learning_rate": 4.854504013116741e-06, "loss": 0.1501, "step": 1259 }, { "epoch": 0.40829552819183407, "grad_norm": 0.997345507144928, "learning_rate": 4.8542098526927304e-06, "loss": 0.1597, "step": 1260 }, { "epoch": 0.4086195722618276, "grad_norm": 0.9689996242523193, "learning_rate": 4.853915404136223e-06, "loss": 0.1626, "step": 1261 }, { "epoch": 0.40894361633182114, "grad_norm": 0.95046067237854, "learning_rate": 4.853620667483259e-06, "loss": 0.1603, "step": 1262 }, { "epoch": 0.40926766040181467, "grad_norm": 1.1230173110961914, "learning_rate": 4.853325642769908e-06, "loss": 0.1964, "step": 1263 }, { "epoch": 0.40959170447180815, "grad_norm": 0.9287686347961426, "learning_rate": 4.853030330032283e-06, "loss": 0.1575, "step": 1264 }, { "epoch": 0.4099157485418017, "grad_norm": 1.087897777557373, "learning_rate": 4.852734729306523e-06, "loss": 0.1707, "step": 1265 }, { "epoch": 0.4102397926117952, "grad_norm": 0.95196533203125, "learning_rate": 4.852438840628808e-06, "loss": 0.1537, "step": 1266 }, { "epoch": 0.4105638366817887, "grad_norm": 0.9249547123908997, "learning_rate": 4.852142664035353e-06, "loss": 0.1565, "step": 1267 }, { "epoch": 0.41088788075178223, "grad_norm": 1.0064045190811157, "learning_rate": 4.8518461995624064e-06, "loss": 0.1623, "step": 1268 }, { "epoch": 0.41121192482177576, "grad_norm": 1.031893014907837, "learning_rate": 4.851549447246253e-06, "loss": 0.1737, "step": 1269 }, { "epoch": 0.4115359688917693, "grad_norm": 0.9710052609443665, "learning_rate": 4.851252407123211e-06, "loss": 0.1637, "step": 1270 }, { "epoch": 0.4118600129617628, "grad_norm": 1.0140632390975952, "learning_rate": 4.850955079229637e-06, "loss": 0.1629, "step": 1271 }, { "epoch": 0.4121840570317563, "grad_norm": 0.9252100586891174, "learning_rate": 4.850657463601921e-06, "loss": 0.1543, "step": 1272 }, { "epoch": 0.41250810110174985, "grad_norm": 0.9155288338661194, "learning_rate": 4.850359560276486e-06, "loss": 0.1565, "step": 1273 }, { "epoch": 0.4128321451717434, "grad_norm": 1.0258336067199707, "learning_rate": 4.850061369289795e-06, "loss": 0.179, "step": 1274 }, { "epoch": 0.41315618924173686, "grad_norm": 1.0771592855453491, "learning_rate": 4.8497628906783425e-06, "loss": 0.1706, "step": 1275 }, { "epoch": 0.4134802333117304, "grad_norm": 1.0786904096603394, "learning_rate": 4.84946412447866e-06, "loss": 0.1765, "step": 1276 }, { "epoch": 0.41380427738172393, "grad_norm": 0.9597084522247314, "learning_rate": 4.849165070727313e-06, "loss": 0.1464, "step": 1277 }, { "epoch": 0.4141283214517174, "grad_norm": 0.9457292556762695, "learning_rate": 4.848865729460903e-06, "loss": 0.1499, "step": 1278 }, { "epoch": 0.41445236552171094, "grad_norm": 0.9749992489814758, "learning_rate": 4.848566100716066e-06, "loss": 0.1672, "step": 1279 }, { "epoch": 0.4147764095917045, "grad_norm": 1.054645299911499, "learning_rate": 4.848266184529475e-06, "loss": 0.1816, "step": 1280 }, { "epoch": 0.415100453661698, "grad_norm": 0.9836113452911377, "learning_rate": 4.847965980937836e-06, "loss": 0.1528, "step": 1281 }, { "epoch": 0.4154244977316915, "grad_norm": 0.9920374155044556, "learning_rate": 4.847665489977891e-06, "loss": 0.1672, "step": 1282 }, { "epoch": 0.415748541801685, "grad_norm": 0.9647442102432251, "learning_rate": 4.847364711686417e-06, "loss": 0.1694, "step": 1283 }, { "epoch": 0.41607258587167856, "grad_norm": 0.9995275139808655, "learning_rate": 4.847063646100226e-06, "loss": 0.1708, "step": 1284 }, { "epoch": 0.4163966299416721, "grad_norm": 1.002206563949585, "learning_rate": 4.846762293256167e-06, "loss": 0.1611, "step": 1285 }, { "epoch": 0.41672067401166557, "grad_norm": 0.9814795851707458, "learning_rate": 4.846460653191121e-06, "loss": 0.1717, "step": 1286 }, { "epoch": 0.4170447180816591, "grad_norm": 0.9551466703414917, "learning_rate": 4.846158725942006e-06, "loss": 0.1716, "step": 1287 }, { "epoch": 0.41736876215165264, "grad_norm": 0.985569953918457, "learning_rate": 4.845856511545777e-06, "loss": 0.1756, "step": 1288 }, { "epoch": 0.4176928062216461, "grad_norm": 0.9430775046348572, "learning_rate": 4.84555401003942e-06, "loss": 0.1591, "step": 1289 }, { "epoch": 0.41801685029163965, "grad_norm": 1.0009950399398804, "learning_rate": 4.845251221459958e-06, "loss": 0.1838, "step": 1290 }, { "epoch": 0.4183408943616332, "grad_norm": 0.9523888230323792, "learning_rate": 4.844948145844452e-06, "loss": 0.1475, "step": 1291 }, { "epoch": 0.4186649384316267, "grad_norm": 1.0713485479354858, "learning_rate": 4.844644783229993e-06, "loss": 0.169, "step": 1292 }, { "epoch": 0.4189889825016202, "grad_norm": 1.074339509010315, "learning_rate": 4.844341133653709e-06, "loss": 0.1923, "step": 1293 }, { "epoch": 0.41931302657161373, "grad_norm": 0.9841317534446716, "learning_rate": 4.844037197152767e-06, "loss": 0.1743, "step": 1294 }, { "epoch": 0.41963707064160727, "grad_norm": 1.0041999816894531, "learning_rate": 4.843732973764363e-06, "loss": 0.174, "step": 1295 }, { "epoch": 0.4199611147116008, "grad_norm": 0.9640102982521057, "learning_rate": 4.8434284635257335e-06, "loss": 0.1655, "step": 1296 }, { "epoch": 0.4202851587815943, "grad_norm": 0.9690929651260376, "learning_rate": 4.843123666474146e-06, "loss": 0.1666, "step": 1297 }, { "epoch": 0.4206092028515878, "grad_norm": 1.0001012086868286, "learning_rate": 4.842818582646904e-06, "loss": 0.1749, "step": 1298 }, { "epoch": 0.42093324692158135, "grad_norm": 1.098995566368103, "learning_rate": 4.842513212081348e-06, "loss": 0.1761, "step": 1299 }, { "epoch": 0.42125729099157483, "grad_norm": 0.9785960912704468, "learning_rate": 4.8422075548148525e-06, "loss": 0.1538, "step": 1300 }, { "epoch": 0.42158133506156836, "grad_norm": 0.9744332432746887, "learning_rate": 4.841901610884826e-06, "loss": 0.1822, "step": 1301 }, { "epoch": 0.4219053791315619, "grad_norm": 0.9197339415550232, "learning_rate": 4.841595380328714e-06, "loss": 0.164, "step": 1302 }, { "epoch": 0.42222942320155543, "grad_norm": 0.9323499202728271, "learning_rate": 4.841288863183996e-06, "loss": 0.1605, "step": 1303 }, { "epoch": 0.4225534672715489, "grad_norm": 1.0368022918701172, "learning_rate": 4.840982059488186e-06, "loss": 0.1796, "step": 1304 }, { "epoch": 0.42287751134154244, "grad_norm": 0.9731484651565552, "learning_rate": 4.840674969278836e-06, "loss": 0.163, "step": 1305 }, { "epoch": 0.423201555411536, "grad_norm": 0.9851950407028198, "learning_rate": 4.8403675925935275e-06, "loss": 0.1697, "step": 1306 }, { "epoch": 0.4235255994815295, "grad_norm": 0.8935414552688599, "learning_rate": 4.8400599294698825e-06, "loss": 0.1496, "step": 1307 }, { "epoch": 0.423849643551523, "grad_norm": 0.9661771059036255, "learning_rate": 4.839751979945556e-06, "loss": 0.1557, "step": 1308 }, { "epoch": 0.4241736876215165, "grad_norm": 1.0194766521453857, "learning_rate": 4.839443744058238e-06, "loss": 0.1668, "step": 1309 }, { "epoch": 0.42449773169151006, "grad_norm": 0.9830193519592285, "learning_rate": 4.839135221845654e-06, "loss": 0.1632, "step": 1310 }, { "epoch": 0.42482177576150354, "grad_norm": 1.0080420970916748, "learning_rate": 4.838826413345561e-06, "loss": 0.1706, "step": 1311 }, { "epoch": 0.4251458198314971, "grad_norm": 1.0843628644943237, "learning_rate": 4.838517318595758e-06, "loss": 0.177, "step": 1312 }, { "epoch": 0.4254698639014906, "grad_norm": 0.9669689536094666, "learning_rate": 4.838207937634074e-06, "loss": 0.1624, "step": 1313 }, { "epoch": 0.42579390797148414, "grad_norm": 0.9528621435165405, "learning_rate": 4.837898270498374e-06, "loss": 0.1645, "step": 1314 }, { "epoch": 0.4261179520414776, "grad_norm": 1.02474844455719, "learning_rate": 4.837588317226558e-06, "loss": 0.1746, "step": 1315 }, { "epoch": 0.42644199611147116, "grad_norm": 1.0107295513153076, "learning_rate": 4.837278077856562e-06, "loss": 0.1806, "step": 1316 }, { "epoch": 0.4267660401814647, "grad_norm": 1.029313564300537, "learning_rate": 4.836967552426355e-06, "loss": 0.1587, "step": 1317 }, { "epoch": 0.4270900842514582, "grad_norm": 0.9927104115486145, "learning_rate": 4.836656740973944e-06, "loss": 0.1679, "step": 1318 }, { "epoch": 0.4274141283214517, "grad_norm": 0.93535977602005, "learning_rate": 4.836345643537368e-06, "loss": 0.1695, "step": 1319 }, { "epoch": 0.42773817239144524, "grad_norm": 0.9303317070007324, "learning_rate": 4.836034260154704e-06, "loss": 0.1575, "step": 1320 }, { "epoch": 0.42806221646143877, "grad_norm": 0.9873595237731934, "learning_rate": 4.83572259086406e-06, "loss": 0.1645, "step": 1321 }, { "epoch": 0.42838626053143225, "grad_norm": 1.003503441810608, "learning_rate": 4.835410635703582e-06, "loss": 0.1627, "step": 1322 }, { "epoch": 0.4287103046014258, "grad_norm": 0.9385678768157959, "learning_rate": 4.835098394711451e-06, "loss": 0.1565, "step": 1323 }, { "epoch": 0.4290343486714193, "grad_norm": 0.9281715154647827, "learning_rate": 4.834785867925883e-06, "loss": 0.1657, "step": 1324 }, { "epoch": 0.42935839274141285, "grad_norm": 0.9996689558029175, "learning_rate": 4.8344730553851275e-06, "loss": 0.1681, "step": 1325 }, { "epoch": 0.42968243681140633, "grad_norm": 1.0321195125579834, "learning_rate": 4.834159957127468e-06, "loss": 0.1747, "step": 1326 }, { "epoch": 0.43000648088139987, "grad_norm": 1.0136144161224365, "learning_rate": 4.833846573191227e-06, "loss": 0.1617, "step": 1327 }, { "epoch": 0.4303305249513934, "grad_norm": 0.9769629836082458, "learning_rate": 4.833532903614758e-06, "loss": 0.1769, "step": 1328 }, { "epoch": 0.43065456902138693, "grad_norm": 1.0463759899139404, "learning_rate": 4.833218948436453e-06, "loss": 0.169, "step": 1329 }, { "epoch": 0.4309786130913804, "grad_norm": 0.9294410347938538, "learning_rate": 4.832904707694736e-06, "loss": 0.1636, "step": 1330 }, { "epoch": 0.43130265716137395, "grad_norm": 0.8959236741065979, "learning_rate": 4.832590181428066e-06, "loss": 0.1675, "step": 1331 }, { "epoch": 0.4316267012313675, "grad_norm": 1.002616047859192, "learning_rate": 4.832275369674939e-06, "loss": 0.1457, "step": 1332 }, { "epoch": 0.43195074530136096, "grad_norm": 1.065735101699829, "learning_rate": 4.831960272473886e-06, "loss": 0.1665, "step": 1333 }, { "epoch": 0.4322747893713545, "grad_norm": 1.0663241147994995, "learning_rate": 4.831644889863471e-06, "loss": 0.1823, "step": 1334 }, { "epoch": 0.43259883344134803, "grad_norm": 0.9952039122581482, "learning_rate": 4.831329221882291e-06, "loss": 0.1589, "step": 1335 }, { "epoch": 0.43292287751134156, "grad_norm": 0.9810091853141785, "learning_rate": 4.831013268568986e-06, "loss": 0.1681, "step": 1336 }, { "epoch": 0.43324692158133504, "grad_norm": 1.0748008489608765, "learning_rate": 4.830697029962222e-06, "loss": 0.1676, "step": 1337 }, { "epoch": 0.4335709656513286, "grad_norm": 0.9215734601020813, "learning_rate": 4.830380506100704e-06, "loss": 0.1638, "step": 1338 }, { "epoch": 0.4338950097213221, "grad_norm": 1.0052947998046875, "learning_rate": 4.830063697023173e-06, "loss": 0.1735, "step": 1339 }, { "epoch": 0.43421905379131565, "grad_norm": 0.974852979183197, "learning_rate": 4.829746602768401e-06, "loss": 0.1587, "step": 1340 }, { "epoch": 0.4345430978613091, "grad_norm": 1.002942681312561, "learning_rate": 4.8294292233752e-06, "loss": 0.1504, "step": 1341 }, { "epoch": 0.43486714193130266, "grad_norm": 0.9386513233184814, "learning_rate": 4.829111558882411e-06, "loss": 0.1623, "step": 1342 }, { "epoch": 0.4351911860012962, "grad_norm": 0.9189992547035217, "learning_rate": 4.828793609328916e-06, "loss": 0.1595, "step": 1343 }, { "epoch": 0.43551523007128967, "grad_norm": 1.1138416528701782, "learning_rate": 4.828475374753627e-06, "loss": 0.1723, "step": 1344 }, { "epoch": 0.4358392741412832, "grad_norm": 1.056380033493042, "learning_rate": 4.828156855195493e-06, "loss": 0.1802, "step": 1345 }, { "epoch": 0.43616331821127674, "grad_norm": 0.9843893051147461, "learning_rate": 4.827838050693499e-06, "loss": 0.1635, "step": 1346 }, { "epoch": 0.4364873622812703, "grad_norm": 0.991206169128418, "learning_rate": 4.827518961286663e-06, "loss": 0.1829, "step": 1347 }, { "epoch": 0.43681140635126375, "grad_norm": 1.008387804031372, "learning_rate": 4.827199587014038e-06, "loss": 0.1648, "step": 1348 }, { "epoch": 0.4371354504212573, "grad_norm": 1.040220856666565, "learning_rate": 4.826879927914713e-06, "loss": 0.1898, "step": 1349 }, { "epoch": 0.4374594944912508, "grad_norm": 0.9465731978416443, "learning_rate": 4.82655998402781e-06, "loss": 0.156, "step": 1350 }, { "epoch": 0.43778353856124436, "grad_norm": 1.0382673740386963, "learning_rate": 4.826239755392488e-06, "loss": 0.1802, "step": 1351 }, { "epoch": 0.43810758263123784, "grad_norm": 1.010624647140503, "learning_rate": 4.8259192420479395e-06, "loss": 0.179, "step": 1352 }, { "epoch": 0.43843162670123137, "grad_norm": 1.0043545961380005, "learning_rate": 4.825598444033393e-06, "loss": 0.1821, "step": 1353 }, { "epoch": 0.4387556707712249, "grad_norm": 0.9797499179840088, "learning_rate": 4.82527736138811e-06, "loss": 0.1774, "step": 1354 }, { "epoch": 0.4390797148412184, "grad_norm": 1.036889910697937, "learning_rate": 4.824955994151389e-06, "loss": 0.1719, "step": 1355 }, { "epoch": 0.4394037589112119, "grad_norm": 1.041792869567871, "learning_rate": 4.824634342362561e-06, "loss": 0.1888, "step": 1356 }, { "epoch": 0.43972780298120545, "grad_norm": 0.9451294541358948, "learning_rate": 4.824312406060995e-06, "loss": 0.1678, "step": 1357 }, { "epoch": 0.440051847051199, "grad_norm": 1.011522889137268, "learning_rate": 4.82399018528609e-06, "loss": 0.1658, "step": 1358 }, { "epoch": 0.44037589112119246, "grad_norm": 0.9879494905471802, "learning_rate": 4.823667680077285e-06, "loss": 0.1664, "step": 1359 }, { "epoch": 0.440699935191186, "grad_norm": 1.0607211589813232, "learning_rate": 4.8233448904740505e-06, "loss": 0.184, "step": 1360 }, { "epoch": 0.44102397926117953, "grad_norm": 1.0475084781646729, "learning_rate": 4.823021816515893e-06, "loss": 0.17, "step": 1361 }, { "epoch": 0.44134802333117307, "grad_norm": 0.9797631502151489, "learning_rate": 4.8226984582423545e-06, "loss": 0.1547, "step": 1362 }, { "epoch": 0.44167206740116655, "grad_norm": 0.9320208430290222, "learning_rate": 4.82237481569301e-06, "loss": 0.1627, "step": 1363 }, { "epoch": 0.4419961114711601, "grad_norm": 1.0192317962646484, "learning_rate": 4.822050888907469e-06, "loss": 0.1582, "step": 1364 }, { "epoch": 0.4423201555411536, "grad_norm": 0.9490489363670349, "learning_rate": 4.82172667792538e-06, "loss": 0.1567, "step": 1365 }, { "epoch": 0.4426441996111471, "grad_norm": 0.9601473212242126, "learning_rate": 4.821402182786421e-06, "loss": 0.1719, "step": 1366 }, { "epoch": 0.4429682436811406, "grad_norm": 0.9360396265983582, "learning_rate": 4.8210774035303085e-06, "loss": 0.165, "step": 1367 }, { "epoch": 0.44329228775113416, "grad_norm": 1.0148745775222778, "learning_rate": 4.82075234019679e-06, "loss": 0.1722, "step": 1368 }, { "epoch": 0.4436163318211277, "grad_norm": 1.180428147315979, "learning_rate": 4.820426992825653e-06, "loss": 0.1852, "step": 1369 }, { "epoch": 0.4439403758911212, "grad_norm": 0.9918110966682434, "learning_rate": 4.820101361456715e-06, "loss": 0.1789, "step": 1370 }, { "epoch": 0.4442644199611147, "grad_norm": 0.9531265497207642, "learning_rate": 4.819775446129832e-06, "loss": 0.1599, "step": 1371 }, { "epoch": 0.44458846403110824, "grad_norm": 0.9279592633247375, "learning_rate": 4.8194492468848895e-06, "loss": 0.1569, "step": 1372 }, { "epoch": 0.4449125081011017, "grad_norm": 0.9266502857208252, "learning_rate": 4.8191227637618145e-06, "loss": 0.1635, "step": 1373 }, { "epoch": 0.44523655217109526, "grad_norm": 0.9631732106208801, "learning_rate": 4.818795996800564e-06, "loss": 0.1661, "step": 1374 }, { "epoch": 0.4455605962410888, "grad_norm": 0.9835265874862671, "learning_rate": 4.8184689460411306e-06, "loss": 0.1763, "step": 1375 }, { "epoch": 0.4458846403110823, "grad_norm": 0.9746513366699219, "learning_rate": 4.818141611523543e-06, "loss": 0.1729, "step": 1376 }, { "epoch": 0.4462086843810758, "grad_norm": 0.9658451080322266, "learning_rate": 4.817813993287863e-06, "loss": 0.1622, "step": 1377 }, { "epoch": 0.44653272845106934, "grad_norm": 0.898226261138916, "learning_rate": 4.817486091374189e-06, "loss": 0.1486, "step": 1378 }, { "epoch": 0.4468567725210629, "grad_norm": 1.0004597902297974, "learning_rate": 4.817157905822652e-06, "loss": 0.1764, "step": 1379 }, { "epoch": 0.4471808165910564, "grad_norm": 1.0172264575958252, "learning_rate": 4.816829436673421e-06, "loss": 0.1729, "step": 1380 }, { "epoch": 0.4475048606610499, "grad_norm": 0.9500515460968018, "learning_rate": 4.816500683966694e-06, "loss": 0.1579, "step": 1381 }, { "epoch": 0.4478289047310434, "grad_norm": 1.0111055374145508, "learning_rate": 4.816171647742708e-06, "loss": 0.1705, "step": 1382 }, { "epoch": 0.44815294880103695, "grad_norm": 1.0100187063217163, "learning_rate": 4.815842328041736e-06, "loss": 0.1729, "step": 1383 }, { "epoch": 0.44847699287103043, "grad_norm": 0.8995622992515564, "learning_rate": 4.815512724904081e-06, "loss": 0.1504, "step": 1384 }, { "epoch": 0.44880103694102397, "grad_norm": 1.2931605577468872, "learning_rate": 4.815182838370085e-06, "loss": 0.1682, "step": 1385 }, { "epoch": 0.4491250810110175, "grad_norm": 1.0237258672714233, "learning_rate": 4.814852668480122e-06, "loss": 0.161, "step": 1386 }, { "epoch": 0.44944912508101104, "grad_norm": 0.9830361008644104, "learning_rate": 4.814522215274603e-06, "loss": 0.1693, "step": 1387 }, { "epoch": 0.4497731691510045, "grad_norm": 1.0150436162948608, "learning_rate": 4.81419147879397e-06, "loss": 0.1773, "step": 1388 }, { "epoch": 0.45009721322099805, "grad_norm": 0.9565200209617615, "learning_rate": 4.813860459078703e-06, "loss": 0.1611, "step": 1389 }, { "epoch": 0.4504212572909916, "grad_norm": 1.0266962051391602, "learning_rate": 4.813529156169317e-06, "loss": 0.1715, "step": 1390 }, { "epoch": 0.4507453013609851, "grad_norm": 0.9856990575790405, "learning_rate": 4.813197570106357e-06, "loss": 0.1586, "step": 1391 }, { "epoch": 0.4510693454309786, "grad_norm": 0.9939296245574951, "learning_rate": 4.8128657009304096e-06, "loss": 0.155, "step": 1392 }, { "epoch": 0.45139338950097213, "grad_norm": 0.9616846442222595, "learning_rate": 4.8125335486820905e-06, "loss": 0.1571, "step": 1393 }, { "epoch": 0.45171743357096567, "grad_norm": 0.8807049989700317, "learning_rate": 4.8122011134020505e-06, "loss": 0.1526, "step": 1394 }, { "epoch": 0.45204147764095914, "grad_norm": 1.0040156841278076, "learning_rate": 4.8118683951309795e-06, "loss": 0.1561, "step": 1395 }, { "epoch": 0.4523655217109527, "grad_norm": 1.0367448329925537, "learning_rate": 4.811535393909598e-06, "loss": 0.1871, "step": 1396 }, { "epoch": 0.4526895657809462, "grad_norm": 1.0239020586013794, "learning_rate": 4.811202109778661e-06, "loss": 0.1636, "step": 1397 }, { "epoch": 0.45301360985093975, "grad_norm": 0.949949324131012, "learning_rate": 4.810868542778959e-06, "loss": 0.1662, "step": 1398 }, { "epoch": 0.4533376539209332, "grad_norm": 1.001170039176941, "learning_rate": 4.81053469295132e-06, "loss": 0.1654, "step": 1399 }, { "epoch": 0.45366169799092676, "grad_norm": 0.9990749955177307, "learning_rate": 4.810200560336601e-06, "loss": 0.1718, "step": 1400 }, { "epoch": 0.4539857420609203, "grad_norm": 0.9623527526855469, "learning_rate": 4.809866144975699e-06, "loss": 0.1649, "step": 1401 }, { "epoch": 0.45430978613091383, "grad_norm": 0.9943154454231262, "learning_rate": 4.809531446909541e-06, "loss": 0.1808, "step": 1402 }, { "epoch": 0.4546338302009073, "grad_norm": 0.9126044511795044, "learning_rate": 4.8091964661790926e-06, "loss": 0.1587, "step": 1403 }, { "epoch": 0.45495787427090084, "grad_norm": 0.9064966440200806, "learning_rate": 4.808861202825351e-06, "loss": 0.1654, "step": 1404 }, { "epoch": 0.4552819183408944, "grad_norm": 1.045502781867981, "learning_rate": 4.80852565688935e-06, "loss": 0.1482, "step": 1405 }, { "epoch": 0.45560596241088785, "grad_norm": 0.9608927965164185, "learning_rate": 4.808189828412157e-06, "loss": 0.1692, "step": 1406 }, { "epoch": 0.4559300064808814, "grad_norm": 1.019291639328003, "learning_rate": 4.807853717434874e-06, "loss": 0.1792, "step": 1407 }, { "epoch": 0.4562540505508749, "grad_norm": 1.4600938558578491, "learning_rate": 4.807517323998637e-06, "loss": 0.1613, "step": 1408 }, { "epoch": 0.45657809462086846, "grad_norm": 0.9859667420387268, "learning_rate": 4.8071806481446194e-06, "loss": 0.162, "step": 1409 }, { "epoch": 0.45690213869086194, "grad_norm": 0.9860358238220215, "learning_rate": 4.806843689914025e-06, "loss": 0.1731, "step": 1410 }, { "epoch": 0.45722618276085547, "grad_norm": 0.9931458830833435, "learning_rate": 4.806506449348094e-06, "loss": 0.1724, "step": 1411 }, { "epoch": 0.457550226830849, "grad_norm": 0.9938206672668457, "learning_rate": 4.8061689264881036e-06, "loss": 0.1695, "step": 1412 }, { "epoch": 0.45787427090084254, "grad_norm": 0.9255531430244446, "learning_rate": 4.805831121375361e-06, "loss": 0.1694, "step": 1413 }, { "epoch": 0.458198314970836, "grad_norm": 0.9822893738746643, "learning_rate": 4.805493034051212e-06, "loss": 0.1648, "step": 1414 }, { "epoch": 0.45852235904082955, "grad_norm": 0.9348422288894653, "learning_rate": 4.805154664557034e-06, "loss": 0.1477, "step": 1415 }, { "epoch": 0.4588464031108231, "grad_norm": 0.9459620118141174, "learning_rate": 4.804816012934242e-06, "loss": 0.1728, "step": 1416 }, { "epoch": 0.45917044718081657, "grad_norm": 0.8711515069007874, "learning_rate": 4.8044770792242815e-06, "loss": 0.1472, "step": 1417 }, { "epoch": 0.4594944912508101, "grad_norm": 1.0231870412826538, "learning_rate": 4.8041378634686355e-06, "loss": 0.16, "step": 1418 }, { "epoch": 0.45981853532080363, "grad_norm": 0.9538334608078003, "learning_rate": 4.803798365708821e-06, "loss": 0.1615, "step": 1419 }, { "epoch": 0.46014257939079717, "grad_norm": 1.0552150011062622, "learning_rate": 4.803458585986389e-06, "loss": 0.186, "step": 1420 }, { "epoch": 0.46046662346079065, "grad_norm": 1.0478894710540771, "learning_rate": 4.803118524342925e-06, "loss": 0.1639, "step": 1421 }, { "epoch": 0.4607906675307842, "grad_norm": 0.9332813024520874, "learning_rate": 4.80277818082005e-06, "loss": 0.1566, "step": 1422 }, { "epoch": 0.4611147116007777, "grad_norm": 1.0140115022659302, "learning_rate": 4.802437555459418e-06, "loss": 0.1678, "step": 1423 }, { "epoch": 0.46143875567077125, "grad_norm": 1.0074522495269775, "learning_rate": 4.802096648302718e-06, "loss": 0.206, "step": 1424 }, { "epoch": 0.46176279974076473, "grad_norm": 1.0931317806243896, "learning_rate": 4.801755459391675e-06, "loss": 0.1796, "step": 1425 }, { "epoch": 0.46208684381075826, "grad_norm": 0.940055787563324, "learning_rate": 4.801413988768047e-06, "loss": 0.1609, "step": 1426 }, { "epoch": 0.4624108878807518, "grad_norm": 0.938195526599884, "learning_rate": 4.801072236473625e-06, "loss": 0.1637, "step": 1427 }, { "epoch": 0.4627349319507453, "grad_norm": 0.9516801834106445, "learning_rate": 4.800730202550237e-06, "loss": 0.1676, "step": 1428 }, { "epoch": 0.4630589760207388, "grad_norm": 0.93316251039505, "learning_rate": 4.800387887039747e-06, "loss": 0.158, "step": 1429 }, { "epoch": 0.46338302009073234, "grad_norm": 0.9559198021888733, "learning_rate": 4.800045289984047e-06, "loss": 0.1562, "step": 1430 }, { "epoch": 0.4637070641607259, "grad_norm": 0.9279831051826477, "learning_rate": 4.799702411425071e-06, "loss": 0.1626, "step": 1431 }, { "epoch": 0.46403110823071936, "grad_norm": 0.9327981472015381, "learning_rate": 4.7993592514047825e-06, "loss": 0.1579, "step": 1432 }, { "epoch": 0.4643551523007129, "grad_norm": 1.0297558307647705, "learning_rate": 4.7990158099651815e-06, "loss": 0.1646, "step": 1433 }, { "epoch": 0.4646791963707064, "grad_norm": 0.8880211114883423, "learning_rate": 4.798672087148301e-06, "loss": 0.1631, "step": 1434 }, { "epoch": 0.46500324044069996, "grad_norm": 0.8490897417068481, "learning_rate": 4.79832808299621e-06, "loss": 0.1439, "step": 1435 }, { "epoch": 0.46532728451069344, "grad_norm": 0.9423319101333618, "learning_rate": 4.797983797551011e-06, "loss": 0.1675, "step": 1436 }, { "epoch": 0.465651328580687, "grad_norm": 1.0397905111312866, "learning_rate": 4.7976392308548416e-06, "loss": 0.1734, "step": 1437 }, { "epoch": 0.4659753726506805, "grad_norm": 0.9998886585235596, "learning_rate": 4.797294382949873e-06, "loss": 0.1602, "step": 1438 }, { "epoch": 0.466299416720674, "grad_norm": 0.8817910552024841, "learning_rate": 4.796949253878311e-06, "loss": 0.1516, "step": 1439 }, { "epoch": 0.4666234607906675, "grad_norm": 0.9884592890739441, "learning_rate": 4.796603843682397e-06, "loss": 0.1666, "step": 1440 }, { "epoch": 0.46694750486066106, "grad_norm": 0.9682788848876953, "learning_rate": 4.796258152404406e-06, "loss": 0.1719, "step": 1441 }, { "epoch": 0.4672715489306546, "grad_norm": 0.9318127036094666, "learning_rate": 4.795912180086646e-06, "loss": 0.1572, "step": 1442 }, { "epoch": 0.46759559300064807, "grad_norm": 0.9371210336685181, "learning_rate": 4.795565926771461e-06, "loss": 0.1684, "step": 1443 }, { "epoch": 0.4679196370706416, "grad_norm": 0.968223512172699, "learning_rate": 4.79521939250123e-06, "loss": 0.1627, "step": 1444 }, { "epoch": 0.46824368114063514, "grad_norm": 0.9787799715995789, "learning_rate": 4.7948725773183645e-06, "loss": 0.1781, "step": 1445 }, { "epoch": 0.46856772521062867, "grad_norm": 1.0054574012756348, "learning_rate": 4.794525481265312e-06, "loss": 0.1825, "step": 1446 }, { "epoch": 0.46889176928062215, "grad_norm": 0.9471763372421265, "learning_rate": 4.794178104384554e-06, "loss": 0.1568, "step": 1447 }, { "epoch": 0.4692158133506157, "grad_norm": 1.0143144130706787, "learning_rate": 4.7938304467186036e-06, "loss": 0.1659, "step": 1448 }, { "epoch": 0.4695398574206092, "grad_norm": 0.891257107257843, "learning_rate": 4.793482508310014e-06, "loss": 0.15, "step": 1449 }, { "epoch": 0.4698639014906027, "grad_norm": 0.9457550048828125, "learning_rate": 4.793134289201367e-06, "loss": 0.1551, "step": 1450 }, { "epoch": 0.47018794556059623, "grad_norm": 0.9223624467849731, "learning_rate": 4.792785789435283e-06, "loss": 0.149, "step": 1451 }, { "epoch": 0.47051198963058977, "grad_norm": 0.9519339799880981, "learning_rate": 4.792437009054413e-06, "loss": 0.1596, "step": 1452 }, { "epoch": 0.4708360337005833, "grad_norm": 0.9895346760749817, "learning_rate": 4.792087948101447e-06, "loss": 0.1659, "step": 1453 }, { "epoch": 0.4711600777705768, "grad_norm": 0.9164407849311829, "learning_rate": 4.791738606619105e-06, "loss": 0.1615, "step": 1454 }, { "epoch": 0.4714841218405703, "grad_norm": 0.9475098252296448, "learning_rate": 4.791388984650143e-06, "loss": 0.148, "step": 1455 }, { "epoch": 0.47180816591056385, "grad_norm": 0.9886651635169983, "learning_rate": 4.791039082237352e-06, "loss": 0.1867, "step": 1456 }, { "epoch": 0.4721322099805574, "grad_norm": 0.8633338809013367, "learning_rate": 4.790688899423556e-06, "loss": 0.1466, "step": 1457 }, { "epoch": 0.47245625405055086, "grad_norm": 1.012751817703247, "learning_rate": 4.7903384362516135e-06, "loss": 0.1747, "step": 1458 }, { "epoch": 0.4727802981205444, "grad_norm": 1.0237362384796143, "learning_rate": 4.78998769276442e-06, "loss": 0.1719, "step": 1459 }, { "epoch": 0.47310434219053793, "grad_norm": 0.9652113318443298, "learning_rate": 4.7896366690049016e-06, "loss": 0.1583, "step": 1460 }, { "epoch": 0.4734283862605314, "grad_norm": 0.9437934756278992, "learning_rate": 4.789285365016019e-06, "loss": 0.1584, "step": 1461 }, { "epoch": 0.47375243033052494, "grad_norm": 0.9734069108963013, "learning_rate": 4.788933780840771e-06, "loss": 0.1593, "step": 1462 }, { "epoch": 0.4740764744005185, "grad_norm": 0.9889543652534485, "learning_rate": 4.788581916522186e-06, "loss": 0.1707, "step": 1463 }, { "epoch": 0.474400518470512, "grad_norm": 1.0652320384979248, "learning_rate": 4.78822977210333e-06, "loss": 0.1699, "step": 1464 }, { "epoch": 0.4747245625405055, "grad_norm": 0.91072016954422, "learning_rate": 4.787877347627302e-06, "loss": 0.1548, "step": 1465 }, { "epoch": 0.475048606610499, "grad_norm": 0.9999649524688721, "learning_rate": 4.787524643137235e-06, "loss": 0.1639, "step": 1466 }, { "epoch": 0.47537265068049256, "grad_norm": 1.094711422920227, "learning_rate": 4.7871716586762965e-06, "loss": 0.182, "step": 1467 }, { "epoch": 0.4756966947504861, "grad_norm": 0.98603755235672, "learning_rate": 4.786818394287688e-06, "loss": 0.1679, "step": 1468 }, { "epoch": 0.47602073882047957, "grad_norm": 0.9653013348579407, "learning_rate": 4.786464850014646e-06, "loss": 0.1611, "step": 1469 }, { "epoch": 0.4763447828904731, "grad_norm": 0.971269965171814, "learning_rate": 4.786111025900442e-06, "loss": 0.1724, "step": 1470 }, { "epoch": 0.47666882696046664, "grad_norm": 0.9750480055809021, "learning_rate": 4.785756921988379e-06, "loss": 0.1637, "step": 1471 }, { "epoch": 0.4769928710304601, "grad_norm": 0.9388756155967712, "learning_rate": 4.785402538321798e-06, "loss": 0.1637, "step": 1472 }, { "epoch": 0.47731691510045365, "grad_norm": 1.0086320638656616, "learning_rate": 4.785047874944069e-06, "loss": 0.1616, "step": 1473 }, { "epoch": 0.4776409591704472, "grad_norm": 1.0045452117919922, "learning_rate": 4.784692931898601e-06, "loss": 0.1541, "step": 1474 }, { "epoch": 0.4779650032404407, "grad_norm": 0.9847915768623352, "learning_rate": 4.7843377092288365e-06, "loss": 0.164, "step": 1475 }, { "epoch": 0.4782890473104342, "grad_norm": 0.996334969997406, "learning_rate": 4.7839822069782505e-06, "loss": 0.1659, "step": 1476 }, { "epoch": 0.47861309138042774, "grad_norm": 0.9724141359329224, "learning_rate": 4.783626425190353e-06, "loss": 0.1675, "step": 1477 }, { "epoch": 0.47893713545042127, "grad_norm": 0.9559745192527771, "learning_rate": 4.783270363908687e-06, "loss": 0.1688, "step": 1478 }, { "epoch": 0.4792611795204148, "grad_norm": 1.0229589939117432, "learning_rate": 4.782914023176834e-06, "loss": 0.1793, "step": 1479 }, { "epoch": 0.4795852235904083, "grad_norm": 0.9505227208137512, "learning_rate": 4.782557403038404e-06, "loss": 0.1658, "step": 1480 }, { "epoch": 0.4799092676604018, "grad_norm": 0.9692978262901306, "learning_rate": 4.7822005035370455e-06, "loss": 0.1596, "step": 1481 }, { "epoch": 0.48023331173039535, "grad_norm": 1.0005214214324951, "learning_rate": 4.781843324716437e-06, "loss": 0.1836, "step": 1482 }, { "epoch": 0.48055735580038883, "grad_norm": 0.8998204469680786, "learning_rate": 4.7814858666202975e-06, "loss": 0.1557, "step": 1483 }, { "epoch": 0.48088139987038236, "grad_norm": 0.9466003179550171, "learning_rate": 4.781128129292374e-06, "loss": 0.1658, "step": 1484 }, { "epoch": 0.4812054439403759, "grad_norm": 0.9802458882331848, "learning_rate": 4.7807701127764506e-06, "loss": 0.1604, "step": 1485 }, { "epoch": 0.48152948801036943, "grad_norm": 0.9373664855957031, "learning_rate": 4.780411817116344e-06, "loss": 0.1781, "step": 1486 }, { "epoch": 0.4818535320803629, "grad_norm": 0.9663504958152771, "learning_rate": 4.780053242355908e-06, "loss": 0.1761, "step": 1487 }, { "epoch": 0.48217757615035645, "grad_norm": 0.8889632821083069, "learning_rate": 4.779694388539027e-06, "loss": 0.1677, "step": 1488 }, { "epoch": 0.48250162022035, "grad_norm": 0.9445527791976929, "learning_rate": 4.779335255709623e-06, "loss": 0.1644, "step": 1489 }, { "epoch": 0.48282566429034346, "grad_norm": 0.9753222465515137, "learning_rate": 4.778975843911649e-06, "loss": 0.165, "step": 1490 }, { "epoch": 0.483149708360337, "grad_norm": 0.9640643000602722, "learning_rate": 4.778616153189093e-06, "loss": 0.1621, "step": 1491 }, { "epoch": 0.48347375243033053, "grad_norm": 0.9978510737419128, "learning_rate": 4.7782561835859795e-06, "loss": 0.1775, "step": 1492 }, { "epoch": 0.48379779650032406, "grad_norm": 0.9495340585708618, "learning_rate": 4.777895935146364e-06, "loss": 0.1687, "step": 1493 }, { "epoch": 0.48412184057031754, "grad_norm": 0.9323464035987854, "learning_rate": 4.777535407914338e-06, "loss": 0.1569, "step": 1494 }, { "epoch": 0.4844458846403111, "grad_norm": 0.881629228591919, "learning_rate": 4.777174601934026e-06, "loss": 0.1582, "step": 1495 }, { "epoch": 0.4847699287103046, "grad_norm": 0.9408942461013794, "learning_rate": 4.776813517249588e-06, "loss": 0.1677, "step": 1496 }, { "epoch": 0.48509397278029814, "grad_norm": 1.020785927772522, "learning_rate": 4.776452153905216e-06, "loss": 0.186, "step": 1497 }, { "epoch": 0.4854180168502916, "grad_norm": 0.990865170955658, "learning_rate": 4.776090511945139e-06, "loss": 0.1713, "step": 1498 }, { "epoch": 0.48574206092028516, "grad_norm": 0.942308783531189, "learning_rate": 4.775728591413616e-06, "loss": 0.1633, "step": 1499 }, { "epoch": 0.4860661049902787, "grad_norm": 0.9622128009796143, "learning_rate": 4.775366392354946e-06, "loss": 0.1612, "step": 1500 }, { "epoch": 0.48639014906027217, "grad_norm": 0.9915785193443298, "learning_rate": 4.775003914813456e-06, "loss": 0.1655, "step": 1501 }, { "epoch": 0.4867141931302657, "grad_norm": 1.0087889432907104, "learning_rate": 4.7746411588335105e-06, "loss": 0.1766, "step": 1502 }, { "epoch": 0.48703823720025924, "grad_norm": 0.9488150477409363, "learning_rate": 4.774278124459509e-06, "loss": 0.1667, "step": 1503 }, { "epoch": 0.4873622812702528, "grad_norm": 0.8961800336837769, "learning_rate": 4.773914811735879e-06, "loss": 0.1537, "step": 1504 }, { "epoch": 0.48768632534024625, "grad_norm": 0.9038843512535095, "learning_rate": 4.773551220707091e-06, "loss": 0.1574, "step": 1505 }, { "epoch": 0.4880103694102398, "grad_norm": 0.9799198508262634, "learning_rate": 4.773187351417643e-06, "loss": 0.1741, "step": 1506 }, { "epoch": 0.4883344134802333, "grad_norm": 1.0378434658050537, "learning_rate": 4.772823203912069e-06, "loss": 0.169, "step": 1507 }, { "epoch": 0.48865845755022685, "grad_norm": 0.9953622817993164, "learning_rate": 4.772458778234938e-06, "loss": 0.181, "step": 1508 }, { "epoch": 0.48898250162022033, "grad_norm": 1.0660994052886963, "learning_rate": 4.772094074430852e-06, "loss": 0.1734, "step": 1509 }, { "epoch": 0.48930654569021387, "grad_norm": 0.8679867386817932, "learning_rate": 4.771729092544446e-06, "loss": 0.1565, "step": 1510 }, { "epoch": 0.4896305897602074, "grad_norm": 0.9982333183288574, "learning_rate": 4.771363832620391e-06, "loss": 0.1826, "step": 1511 }, { "epoch": 0.4899546338302009, "grad_norm": 1.1239075660705566, "learning_rate": 4.770998294703392e-06, "loss": 0.1739, "step": 1512 }, { "epoch": 0.4902786779001944, "grad_norm": 0.9775390028953552, "learning_rate": 4.7706324788381865e-06, "loss": 0.1711, "step": 1513 }, { "epoch": 0.49060272197018795, "grad_norm": 1.0100210905075073, "learning_rate": 4.770266385069547e-06, "loss": 0.1774, "step": 1514 }, { "epoch": 0.4909267660401815, "grad_norm": 0.9602349996566772, "learning_rate": 4.769900013442279e-06, "loss": 0.1631, "step": 1515 }, { "epoch": 0.49125081011017496, "grad_norm": 1.0033910274505615, "learning_rate": 4.769533364001225e-06, "loss": 0.1764, "step": 1516 }, { "epoch": 0.4915748541801685, "grad_norm": 0.9373995661735535, "learning_rate": 4.769166436791257e-06, "loss": 0.1545, "step": 1517 }, { "epoch": 0.49189889825016203, "grad_norm": 0.8880525231361389, "learning_rate": 4.768799231857285e-06, "loss": 0.1442, "step": 1518 }, { "epoch": 0.49222294232015557, "grad_norm": 0.9575677514076233, "learning_rate": 4.768431749244251e-06, "loss": 0.1735, "step": 1519 }, { "epoch": 0.49254698639014904, "grad_norm": 0.9560174942016602, "learning_rate": 4.76806398899713e-06, "loss": 0.166, "step": 1520 }, { "epoch": 0.4928710304601426, "grad_norm": 0.9476724863052368, "learning_rate": 4.767695951160934e-06, "loss": 0.1689, "step": 1521 }, { "epoch": 0.4931950745301361, "grad_norm": 0.9879964590072632, "learning_rate": 4.767327635780707e-06, "loss": 0.1708, "step": 1522 }, { "epoch": 0.4935191186001296, "grad_norm": 0.9266806840896606, "learning_rate": 4.7669590429015265e-06, "loss": 0.1576, "step": 1523 }, { "epoch": 0.4938431626701231, "grad_norm": 1.020934820175171, "learning_rate": 4.7665901725685045e-06, "loss": 0.169, "step": 1524 }, { "epoch": 0.49416720674011666, "grad_norm": 0.8984940052032471, "learning_rate": 4.766221024826788e-06, "loss": 0.1624, "step": 1525 }, { "epoch": 0.4944912508101102, "grad_norm": 0.9921505451202393, "learning_rate": 4.765851599721557e-06, "loss": 0.1668, "step": 1526 }, { "epoch": 0.4948152948801037, "grad_norm": 0.965148389339447, "learning_rate": 4.765481897298025e-06, "loss": 0.1684, "step": 1527 }, { "epoch": 0.4951393389500972, "grad_norm": 0.9692872166633606, "learning_rate": 4.76511191760144e-06, "loss": 0.154, "step": 1528 }, { "epoch": 0.49546338302009074, "grad_norm": 1.029179573059082, "learning_rate": 4.764741660677085e-06, "loss": 0.1872, "step": 1529 }, { "epoch": 0.4957874270900843, "grad_norm": 0.9152058959007263, "learning_rate": 4.764371126570275e-06, "loss": 0.1618, "step": 1530 }, { "epoch": 0.49611147116007775, "grad_norm": 0.9520514607429504, "learning_rate": 4.76400031532636e-06, "loss": 0.165, "step": 1531 }, { "epoch": 0.4964355152300713, "grad_norm": 0.9545121192932129, "learning_rate": 4.763629226990724e-06, "loss": 0.1744, "step": 1532 }, { "epoch": 0.4967595593000648, "grad_norm": 0.9610336422920227, "learning_rate": 4.763257861608783e-06, "loss": 0.1603, "step": 1533 }, { "epoch": 0.4970836033700583, "grad_norm": 0.9071905016899109, "learning_rate": 4.762886219225991e-06, "loss": 0.152, "step": 1534 }, { "epoch": 0.49740764744005184, "grad_norm": 0.9342719912528992, "learning_rate": 4.762514299887831e-06, "loss": 0.1625, "step": 1535 }, { "epoch": 0.49773169151004537, "grad_norm": 1.0375604629516602, "learning_rate": 4.762142103639824e-06, "loss": 0.18, "step": 1536 }, { "epoch": 0.4980557355800389, "grad_norm": 0.9291142225265503, "learning_rate": 4.761769630527523e-06, "loss": 0.1569, "step": 1537 }, { "epoch": 0.4983797796500324, "grad_norm": 0.9139031767845154, "learning_rate": 4.761396880596515e-06, "loss": 0.1517, "step": 1538 }, { "epoch": 0.4987038237200259, "grad_norm": 0.8803319334983826, "learning_rate": 4.76102385389242e-06, "loss": 0.1512, "step": 1539 }, { "epoch": 0.49902786779001945, "grad_norm": 0.8804055452346802, "learning_rate": 4.760650550460895e-06, "loss": 0.1537, "step": 1540 }, { "epoch": 0.499351911860013, "grad_norm": 0.8991281986236572, "learning_rate": 4.760276970347627e-06, "loss": 0.1597, "step": 1541 }, { "epoch": 0.49967595593000647, "grad_norm": 0.9166383743286133, "learning_rate": 4.759903113598338e-06, "loss": 0.1686, "step": 1542 }, { "epoch": 0.5, "grad_norm": 0.854396641254425, "learning_rate": 4.759528980258786e-06, "loss": 0.1364, "step": 1543 }, { "epoch": 0.5003240440699935, "grad_norm": 1.0105361938476562, "learning_rate": 4.759154570374761e-06, "loss": 0.1794, "step": 1544 }, { "epoch": 0.5006480881399871, "grad_norm": 0.9273484349250793, "learning_rate": 4.758779883992087e-06, "loss": 0.1607, "step": 1545 }, { "epoch": 0.5009721322099806, "grad_norm": 1.0614210367202759, "learning_rate": 4.758404921156622e-06, "loss": 0.1927, "step": 1546 }, { "epoch": 0.501296176279974, "grad_norm": 0.9139055609703064, "learning_rate": 4.7580296819142565e-06, "loss": 0.1525, "step": 1547 }, { "epoch": 0.5016202203499676, "grad_norm": 0.9235586524009705, "learning_rate": 4.757654166310919e-06, "loss": 0.1526, "step": 1548 }, { "epoch": 0.5019442644199611, "grad_norm": 0.9597269296646118, "learning_rate": 4.757278374392567e-06, "loss": 0.1707, "step": 1549 }, { "epoch": 0.5022683084899546, "grad_norm": 0.9178736805915833, "learning_rate": 4.7569023062051936e-06, "loss": 0.159, "step": 1550 }, { "epoch": 0.5025923525599482, "grad_norm": 0.9147335886955261, "learning_rate": 4.756525961794826e-06, "loss": 0.1573, "step": 1551 }, { "epoch": 0.5029163966299417, "grad_norm": 0.9195179343223572, "learning_rate": 4.756149341207526e-06, "loss": 0.165, "step": 1552 }, { "epoch": 0.5032404406999352, "grad_norm": 0.9070149660110474, "learning_rate": 4.755772444489388e-06, "loss": 0.1631, "step": 1553 }, { "epoch": 0.5035644847699287, "grad_norm": 0.9737628102302551, "learning_rate": 4.75539527168654e-06, "loss": 0.173, "step": 1554 }, { "epoch": 0.5038885288399222, "grad_norm": 1.0073634386062622, "learning_rate": 4.755017822845145e-06, "loss": 0.1799, "step": 1555 }, { "epoch": 0.5042125729099157, "grad_norm": 0.8783349394798279, "learning_rate": 4.754640098011399e-06, "loss": 0.1545, "step": 1556 }, { "epoch": 0.5045366169799093, "grad_norm": 0.896638035774231, "learning_rate": 4.754262097231531e-06, "loss": 0.1603, "step": 1557 }, { "epoch": 0.5048606610499028, "grad_norm": 0.9870060086250305, "learning_rate": 4.753883820551806e-06, "loss": 0.1601, "step": 1558 }, { "epoch": 0.5051847051198963, "grad_norm": 0.9582774043083191, "learning_rate": 4.75350526801852e-06, "loss": 0.1682, "step": 1559 }, { "epoch": 0.5055087491898899, "grad_norm": 0.9573325514793396, "learning_rate": 4.753126439678005e-06, "loss": 0.163, "step": 1560 }, { "epoch": 0.5058327932598834, "grad_norm": 0.9994848370552063, "learning_rate": 4.752747335576626e-06, "loss": 0.1694, "step": 1561 }, { "epoch": 0.5061568373298768, "grad_norm": 0.9292284250259399, "learning_rate": 4.752367955760781e-06, "loss": 0.1619, "step": 1562 }, { "epoch": 0.5064808813998704, "grad_norm": 0.9492481350898743, "learning_rate": 4.751988300276903e-06, "loss": 0.1673, "step": 1563 }, { "epoch": 0.5068049254698639, "grad_norm": 0.9279813170433044, "learning_rate": 4.751608369171458e-06, "loss": 0.1544, "step": 1564 }, { "epoch": 0.5071289695398574, "grad_norm": 0.9612503051757812, "learning_rate": 4.751228162490946e-06, "loss": 0.1599, "step": 1565 }, { "epoch": 0.507453013609851, "grad_norm": 1.0000669956207275, "learning_rate": 4.750847680281901e-06, "loss": 0.1655, "step": 1566 }, { "epoch": 0.5077770576798445, "grad_norm": 0.9342029690742493, "learning_rate": 4.750466922590888e-06, "loss": 0.1537, "step": 1567 }, { "epoch": 0.508101101749838, "grad_norm": 0.8906809091567993, "learning_rate": 4.750085889464512e-06, "loss": 0.162, "step": 1568 }, { "epoch": 0.5084251458198314, "grad_norm": 1.016288161277771, "learning_rate": 4.749704580949404e-06, "loss": 0.167, "step": 1569 }, { "epoch": 0.508749189889825, "grad_norm": 1.0042933225631714, "learning_rate": 4.749322997092235e-06, "loss": 0.1605, "step": 1570 }, { "epoch": 0.5090732339598185, "grad_norm": 1.0259679555892944, "learning_rate": 4.748941137939706e-06, "loss": 0.1699, "step": 1571 }, { "epoch": 0.509397278029812, "grad_norm": 0.8490434885025024, "learning_rate": 4.748559003538553e-06, "loss": 0.1486, "step": 1572 }, { "epoch": 0.5097213220998056, "grad_norm": 0.9260954260826111, "learning_rate": 4.748176593935546e-06, "loss": 0.1751, "step": 1573 }, { "epoch": 0.5100453661697991, "grad_norm": 0.8708962798118591, "learning_rate": 4.7477939091774885e-06, "loss": 0.154, "step": 1574 }, { "epoch": 0.5103694102397927, "grad_norm": 1.035266399383545, "learning_rate": 4.7474109493112154e-06, "loss": 0.1629, "step": 1575 }, { "epoch": 0.5106934543097861, "grad_norm": 1.014716625213623, "learning_rate": 4.7470277143836e-06, "loss": 0.1741, "step": 1576 }, { "epoch": 0.5110174983797796, "grad_norm": 0.9917327761650085, "learning_rate": 4.746644204441545e-06, "loss": 0.1772, "step": 1577 }, { "epoch": 0.5113415424497731, "grad_norm": 1.0170183181762695, "learning_rate": 4.746260419531989e-06, "loss": 0.1718, "step": 1578 }, { "epoch": 0.5116655865197667, "grad_norm": 0.9468106031417847, "learning_rate": 4.745876359701902e-06, "loss": 0.1652, "step": 1579 }, { "epoch": 0.5119896305897602, "grad_norm": 0.9813267588615417, "learning_rate": 4.745492024998291e-06, "loss": 0.17, "step": 1580 }, { "epoch": 0.5123136746597537, "grad_norm": 1.0143340826034546, "learning_rate": 4.745107415468194e-06, "loss": 0.1665, "step": 1581 }, { "epoch": 0.5126377187297473, "grad_norm": 0.9156917929649353, "learning_rate": 4.744722531158683e-06, "loss": 0.1556, "step": 1582 }, { "epoch": 0.5129617627997408, "grad_norm": 0.8644934892654419, "learning_rate": 4.744337372116866e-06, "loss": 0.1604, "step": 1583 }, { "epoch": 0.5132858068697342, "grad_norm": 0.9044967293739319, "learning_rate": 4.743951938389881e-06, "loss": 0.1516, "step": 1584 }, { "epoch": 0.5136098509397278, "grad_norm": 1.0245859622955322, "learning_rate": 4.743566230024902e-06, "loss": 0.1574, "step": 1585 }, { "epoch": 0.5139338950097213, "grad_norm": 1.0180058479309082, "learning_rate": 4.7431802470691355e-06, "loss": 0.1725, "step": 1586 }, { "epoch": 0.5142579390797148, "grad_norm": 0.9526636600494385, "learning_rate": 4.7427939895698235e-06, "loss": 0.1586, "step": 1587 }, { "epoch": 0.5145819831497084, "grad_norm": 0.9528282284736633, "learning_rate": 4.742407457574238e-06, "loss": 0.1678, "step": 1588 }, { "epoch": 0.5149060272197019, "grad_norm": 0.8863286375999451, "learning_rate": 4.7420206511296885e-06, "loss": 0.149, "step": 1589 }, { "epoch": 0.5152300712896954, "grad_norm": 0.8912634253501892, "learning_rate": 4.7416335702835155e-06, "loss": 0.1614, "step": 1590 }, { "epoch": 0.5155541153596889, "grad_norm": 1.000412940979004, "learning_rate": 4.741246215083094e-06, "loss": 0.1785, "step": 1591 }, { "epoch": 0.5158781594296824, "grad_norm": 1.007895827293396, "learning_rate": 4.740858585575832e-06, "loss": 0.1659, "step": 1592 }, { "epoch": 0.5162022034996759, "grad_norm": 0.9396275281906128, "learning_rate": 4.7404706818091736e-06, "loss": 0.1656, "step": 1593 }, { "epoch": 0.5165262475696695, "grad_norm": 1.000503420829773, "learning_rate": 4.740082503830593e-06, "loss": 0.1299, "step": 1594 }, { "epoch": 0.516850291639663, "grad_norm": 0.9946775436401367, "learning_rate": 4.7396940516875996e-06, "loss": 0.1719, "step": 1595 }, { "epoch": 0.5171743357096565, "grad_norm": 0.8963751792907715, "learning_rate": 4.739305325427736e-06, "loss": 0.1536, "step": 1596 }, { "epoch": 0.5174983797796501, "grad_norm": 1.0010433197021484, "learning_rate": 4.738916325098579e-06, "loss": 0.1629, "step": 1597 }, { "epoch": 0.5178224238496435, "grad_norm": 0.9205980896949768, "learning_rate": 4.738527050747738e-06, "loss": 0.1569, "step": 1598 }, { "epoch": 0.518146467919637, "grad_norm": 0.9161039590835571, "learning_rate": 4.738137502422856e-06, "loss": 0.1559, "step": 1599 }, { "epoch": 0.5184705119896306, "grad_norm": 0.9791836738586426, "learning_rate": 4.737747680171611e-06, "loss": 0.1663, "step": 1600 }, { "epoch": 0.5187945560596241, "grad_norm": 0.9533772468566895, "learning_rate": 4.737357584041713e-06, "loss": 0.1763, "step": 1601 }, { "epoch": 0.5191186001296176, "grad_norm": 0.8820740580558777, "learning_rate": 4.7369672140809065e-06, "loss": 0.1528, "step": 1602 }, { "epoch": 0.5194426441996112, "grad_norm": 0.9435256123542786, "learning_rate": 4.736576570336968e-06, "loss": 0.1746, "step": 1603 }, { "epoch": 0.5197666882696047, "grad_norm": 0.9638010263442993, "learning_rate": 4.736185652857709e-06, "loss": 0.1581, "step": 1604 }, { "epoch": 0.5200907323395982, "grad_norm": 0.9515169262886047, "learning_rate": 4.7357944616909745e-06, "loss": 0.159, "step": 1605 }, { "epoch": 0.5204147764095917, "grad_norm": 0.9088611602783203, "learning_rate": 4.735402996884642e-06, "loss": 0.1648, "step": 1606 }, { "epoch": 0.5207388204795852, "grad_norm": 0.8556360006332397, "learning_rate": 4.7350112584866225e-06, "loss": 0.1603, "step": 1607 }, { "epoch": 0.5210628645495787, "grad_norm": 0.9666315913200378, "learning_rate": 4.734619246544862e-06, "loss": 0.1751, "step": 1608 }, { "epoch": 0.5213869086195723, "grad_norm": 0.9698085188865662, "learning_rate": 4.734226961107338e-06, "loss": 0.1634, "step": 1609 }, { "epoch": 0.5217109526895658, "grad_norm": 0.9516555666923523, "learning_rate": 4.733834402222064e-06, "loss": 0.1646, "step": 1610 }, { "epoch": 0.5220349967595593, "grad_norm": 1.0506342649459839, "learning_rate": 4.7334415699370825e-06, "loss": 0.1772, "step": 1611 }, { "epoch": 0.5223590408295529, "grad_norm": 0.9331313967704773, "learning_rate": 4.733048464300476e-06, "loss": 0.1487, "step": 1612 }, { "epoch": 0.5226830848995463, "grad_norm": 1.0250540971755981, "learning_rate": 4.732655085360355e-06, "loss": 0.179, "step": 1613 }, { "epoch": 0.5230071289695398, "grad_norm": 0.9178912043571472, "learning_rate": 4.7322614331648645e-06, "loss": 0.1594, "step": 1614 }, { "epoch": 0.5233311730395334, "grad_norm": 0.9130881428718567, "learning_rate": 4.731867507762184e-06, "loss": 0.1538, "step": 1615 }, { "epoch": 0.5236552171095269, "grad_norm": 0.8410662412643433, "learning_rate": 4.731473309200528e-06, "loss": 0.1489, "step": 1616 }, { "epoch": 0.5239792611795204, "grad_norm": 0.925051748752594, "learning_rate": 4.731078837528141e-06, "loss": 0.1653, "step": 1617 }, { "epoch": 0.524303305249514, "grad_norm": 0.8911797404289246, "learning_rate": 4.730684092793302e-06, "loss": 0.1587, "step": 1618 }, { "epoch": 0.5246273493195075, "grad_norm": 0.9711928367614746, "learning_rate": 4.730289075044326e-06, "loss": 0.1706, "step": 1619 }, { "epoch": 0.5249513933895009, "grad_norm": 0.9292394518852234, "learning_rate": 4.729893784329557e-06, "loss": 0.1465, "step": 1620 }, { "epoch": 0.5252754374594945, "grad_norm": 0.9164044857025146, "learning_rate": 4.729498220697377e-06, "loss": 0.1635, "step": 1621 }, { "epoch": 0.525599481529488, "grad_norm": 0.8762049674987793, "learning_rate": 4.729102384196197e-06, "loss": 0.1554, "step": 1622 }, { "epoch": 0.5259235255994815, "grad_norm": 0.8459444046020508, "learning_rate": 4.728706274874465e-06, "loss": 0.1619, "step": 1623 }, { "epoch": 0.5262475696694751, "grad_norm": 0.9501531720161438, "learning_rate": 4.72830989278066e-06, "loss": 0.1756, "step": 1624 }, { "epoch": 0.5265716137394686, "grad_norm": 0.9610527157783508, "learning_rate": 4.727913237963296e-06, "loss": 0.1707, "step": 1625 }, { "epoch": 0.5268956578094621, "grad_norm": 0.9790906310081482, "learning_rate": 4.72751631047092e-06, "loss": 0.1887, "step": 1626 }, { "epoch": 0.5272197018794557, "grad_norm": 1.0699243545532227, "learning_rate": 4.727119110352112e-06, "loss": 0.1571, "step": 1627 }, { "epoch": 0.5275437459494491, "grad_norm": 0.8611209988594055, "learning_rate": 4.726721637655484e-06, "loss": 0.1649, "step": 1628 }, { "epoch": 0.5278677900194426, "grad_norm": 0.9416497349739075, "learning_rate": 4.7263238924296835e-06, "loss": 0.162, "step": 1629 }, { "epoch": 0.5281918340894362, "grad_norm": 0.8431905508041382, "learning_rate": 4.725925874723393e-06, "loss": 0.1535, "step": 1630 }, { "epoch": 0.5285158781594297, "grad_norm": 1.0187968015670776, "learning_rate": 4.725527584585322e-06, "loss": 0.1622, "step": 1631 }, { "epoch": 0.5288399222294232, "grad_norm": 0.9045505523681641, "learning_rate": 4.725129022064221e-06, "loss": 0.1442, "step": 1632 }, { "epoch": 0.5291639662994168, "grad_norm": 0.9165852665901184, "learning_rate": 4.724730187208868e-06, "loss": 0.1612, "step": 1633 }, { "epoch": 0.5294880103694103, "grad_norm": 1.0090407133102417, "learning_rate": 4.724331080068077e-06, "loss": 0.181, "step": 1634 }, { "epoch": 0.5298120544394037, "grad_norm": 0.9618867635726929, "learning_rate": 4.723931700690695e-06, "loss": 0.1576, "step": 1635 }, { "epoch": 0.5301360985093972, "grad_norm": 0.8928550481796265, "learning_rate": 4.7235320491256026e-06, "loss": 0.1512, "step": 1636 }, { "epoch": 0.5304601425793908, "grad_norm": 0.9328296184539795, "learning_rate": 4.723132125421712e-06, "loss": 0.1681, "step": 1637 }, { "epoch": 0.5307841866493843, "grad_norm": 0.9727100133895874, "learning_rate": 4.722731929627971e-06, "loss": 0.1634, "step": 1638 }, { "epoch": 0.5311082307193778, "grad_norm": 0.88592129945755, "learning_rate": 4.722331461793361e-06, "loss": 0.1529, "step": 1639 }, { "epoch": 0.5314322747893714, "grad_norm": 0.94303959608078, "learning_rate": 4.721930721966893e-06, "loss": 0.148, "step": 1640 }, { "epoch": 0.5317563188593649, "grad_norm": 0.8974708914756775, "learning_rate": 4.7215297101976145e-06, "loss": 0.1583, "step": 1641 }, { "epoch": 0.5320803629293583, "grad_norm": 0.9344980120658875, "learning_rate": 4.721128426534605e-06, "loss": 0.1624, "step": 1642 }, { "epoch": 0.5324044069993519, "grad_norm": 1.0182918310165405, "learning_rate": 4.720726871026978e-06, "loss": 0.1764, "step": 1643 }, { "epoch": 0.5327284510693454, "grad_norm": 0.9536001682281494, "learning_rate": 4.720325043723881e-06, "loss": 0.1679, "step": 1644 }, { "epoch": 0.5330524951393389, "grad_norm": 0.8858970403671265, "learning_rate": 4.719922944674494e-06, "loss": 0.1511, "step": 1645 }, { "epoch": 0.5333765392093325, "grad_norm": 0.9926290512084961, "learning_rate": 4.719520573928028e-06, "loss": 0.1723, "step": 1646 }, { "epoch": 0.533700583279326, "grad_norm": 0.9548216462135315, "learning_rate": 4.71911793153373e-06, "loss": 0.1684, "step": 1647 }, { "epoch": 0.5340246273493195, "grad_norm": 1.0088659524917603, "learning_rate": 4.7187150175408805e-06, "loss": 0.1768, "step": 1648 }, { "epoch": 0.5343486714193131, "grad_norm": 0.9483785033226013, "learning_rate": 4.718311831998792e-06, "loss": 0.1788, "step": 1649 }, { "epoch": 0.5346727154893065, "grad_norm": 0.8539042472839355, "learning_rate": 4.71790837495681e-06, "loss": 0.1612, "step": 1650 }, { "epoch": 0.5349967595593, "grad_norm": 0.8781387209892273, "learning_rate": 4.717504646464314e-06, "loss": 0.1654, "step": 1651 }, { "epoch": 0.5353208036292936, "grad_norm": 0.888820469379425, "learning_rate": 4.717100646570716e-06, "loss": 0.1558, "step": 1652 }, { "epoch": 0.5356448476992871, "grad_norm": 0.8900648951530457, "learning_rate": 4.7166963753254616e-06, "loss": 0.1484, "step": 1653 }, { "epoch": 0.5359688917692806, "grad_norm": 0.8877013325691223, "learning_rate": 4.716291832778031e-06, "loss": 0.1614, "step": 1654 }, { "epoch": 0.5362929358392742, "grad_norm": 0.9067593216896057, "learning_rate": 4.715887018977935e-06, "loss": 0.1511, "step": 1655 }, { "epoch": 0.5366169799092677, "grad_norm": 0.9863800406455994, "learning_rate": 4.715481933974719e-06, "loss": 0.1576, "step": 1656 }, { "epoch": 0.5369410239792611, "grad_norm": 0.9320042729377747, "learning_rate": 4.715076577817963e-06, "loss": 0.164, "step": 1657 }, { "epoch": 0.5372650680492547, "grad_norm": 1.0090923309326172, "learning_rate": 4.714670950557276e-06, "loss": 0.1868, "step": 1658 }, { "epoch": 0.5375891121192482, "grad_norm": 0.9123954772949219, "learning_rate": 4.714265052242306e-06, "loss": 0.1631, "step": 1659 }, { "epoch": 0.5379131561892417, "grad_norm": 0.9024103879928589, "learning_rate": 4.7138588829227285e-06, "loss": 0.1576, "step": 1660 }, { "epoch": 0.5382372002592353, "grad_norm": 0.9303012490272522, "learning_rate": 4.713452442648255e-06, "loss": 0.1581, "step": 1661 }, { "epoch": 0.5385612443292288, "grad_norm": 0.9538670778274536, "learning_rate": 4.7130457314686316e-06, "loss": 0.1716, "step": 1662 }, { "epoch": 0.5388852883992223, "grad_norm": 0.8686206936836243, "learning_rate": 4.712638749433634e-06, "loss": 0.1473, "step": 1663 }, { "epoch": 0.5392093324692158, "grad_norm": 0.9704967141151428, "learning_rate": 4.7122314965930724e-06, "loss": 0.1639, "step": 1664 }, { "epoch": 0.5395333765392093, "grad_norm": 0.953475832939148, "learning_rate": 4.711823972996793e-06, "loss": 0.1729, "step": 1665 }, { "epoch": 0.5398574206092028, "grad_norm": 0.9771159291267395, "learning_rate": 4.711416178694671e-06, "loss": 0.1882, "step": 1666 }, { "epoch": 0.5401814646791964, "grad_norm": 0.9252657890319824, "learning_rate": 4.711008113736617e-06, "loss": 0.16, "step": 1667 }, { "epoch": 0.5405055087491899, "grad_norm": 0.9023372530937195, "learning_rate": 4.710599778172575e-06, "loss": 0.161, "step": 1668 }, { "epoch": 0.5408295528191834, "grad_norm": 0.9718496799468994, "learning_rate": 4.7101911720525186e-06, "loss": 0.1513, "step": 1669 }, { "epoch": 0.541153596889177, "grad_norm": 0.952275276184082, "learning_rate": 4.70978229542646e-06, "loss": 0.1693, "step": 1670 }, { "epoch": 0.5414776409591704, "grad_norm": 0.8786360025405884, "learning_rate": 4.709373148344441e-06, "loss": 0.1574, "step": 1671 }, { "epoch": 0.5418016850291639, "grad_norm": 0.9894282221794128, "learning_rate": 4.708963730856536e-06, "loss": 0.1632, "step": 1672 }, { "epoch": 0.5421257290991575, "grad_norm": 0.9938233494758606, "learning_rate": 4.708554043012857e-06, "loss": 0.1622, "step": 1673 }, { "epoch": 0.542449773169151, "grad_norm": 1.0080715417861938, "learning_rate": 4.708144084863541e-06, "loss": 0.1603, "step": 1674 }, { "epoch": 0.5427738172391445, "grad_norm": 1.285111665725708, "learning_rate": 4.707733856458767e-06, "loss": 0.1904, "step": 1675 }, { "epoch": 0.5430978613091381, "grad_norm": 0.9874075055122375, "learning_rate": 4.707323357848741e-06, "loss": 0.1682, "step": 1676 }, { "epoch": 0.5434219053791316, "grad_norm": 0.9716241955757141, "learning_rate": 4.706912589083704e-06, "loss": 0.1737, "step": 1677 }, { "epoch": 0.5437459494491251, "grad_norm": 0.8894767761230469, "learning_rate": 4.706501550213932e-06, "loss": 0.1582, "step": 1678 }, { "epoch": 0.5440699935191186, "grad_norm": 1.0196465253829956, "learning_rate": 4.70609024128973e-06, "loss": 0.1846, "step": 1679 }, { "epoch": 0.5443940375891121, "grad_norm": 0.9901070594787598, "learning_rate": 4.7056786623614395e-06, "loss": 0.1629, "step": 1680 }, { "epoch": 0.5447180816591056, "grad_norm": 0.9512284994125366, "learning_rate": 4.705266813479434e-06, "loss": 0.1527, "step": 1681 }, { "epoch": 0.5450421257290992, "grad_norm": 0.9086491465568542, "learning_rate": 4.704854694694117e-06, "loss": 0.1581, "step": 1682 }, { "epoch": 0.5453661697990927, "grad_norm": 0.9667937755584717, "learning_rate": 4.704442306055932e-06, "loss": 0.1622, "step": 1683 }, { "epoch": 0.5456902138690862, "grad_norm": 0.9949867129325867, "learning_rate": 4.704029647615348e-06, "loss": 0.1639, "step": 1684 }, { "epoch": 0.5460142579390798, "grad_norm": 0.9589911103248596, "learning_rate": 4.703616719422873e-06, "loss": 0.1692, "step": 1685 }, { "epoch": 0.5463383020090732, "grad_norm": 0.9363300204277039, "learning_rate": 4.703203521529044e-06, "loss": 0.1641, "step": 1686 }, { "epoch": 0.5466623460790667, "grad_norm": 0.9348986148834229, "learning_rate": 4.702790053984432e-06, "loss": 0.1566, "step": 1687 }, { "epoch": 0.5469863901490603, "grad_norm": 1.3050991296768188, "learning_rate": 4.702376316839642e-06, "loss": 0.1701, "step": 1688 }, { "epoch": 0.5473104342190538, "grad_norm": 0.8916712403297424, "learning_rate": 4.701962310145312e-06, "loss": 0.1613, "step": 1689 }, { "epoch": 0.5476344782890473, "grad_norm": 0.8960726857185364, "learning_rate": 4.7015480339521115e-06, "loss": 0.1574, "step": 1690 }, { "epoch": 0.5479585223590409, "grad_norm": 0.9802014231681824, "learning_rate": 4.701133488310744e-06, "loss": 0.1691, "step": 1691 }, { "epoch": 0.5482825664290344, "grad_norm": 0.8750230669975281, "learning_rate": 4.700718673271947e-06, "loss": 0.1495, "step": 1692 }, { "epoch": 0.5486066104990278, "grad_norm": 0.9495435953140259, "learning_rate": 4.700303588886489e-06, "loss": 0.1607, "step": 1693 }, { "epoch": 0.5489306545690213, "grad_norm": 0.9570188522338867, "learning_rate": 4.699888235205172e-06, "loss": 0.1641, "step": 1694 }, { "epoch": 0.5492546986390149, "grad_norm": 0.9574050903320312, "learning_rate": 4.699472612278831e-06, "loss": 0.1766, "step": 1695 }, { "epoch": 0.5495787427090084, "grad_norm": 0.9535946846008301, "learning_rate": 4.699056720158336e-06, "loss": 0.1609, "step": 1696 }, { "epoch": 0.549902786779002, "grad_norm": 0.9584629535675049, "learning_rate": 4.698640558894586e-06, "loss": 0.165, "step": 1697 }, { "epoch": 0.5502268308489955, "grad_norm": 0.932038426399231, "learning_rate": 4.698224128538517e-06, "loss": 0.162, "step": 1698 }, { "epoch": 0.550550874918989, "grad_norm": 0.9042884111404419, "learning_rate": 4.6978074291410936e-06, "loss": 0.1565, "step": 1699 }, { "epoch": 0.5508749189889826, "grad_norm": 0.9677349925041199, "learning_rate": 4.697390460753318e-06, "loss": 0.1815, "step": 1700 }, { "epoch": 0.551198963058976, "grad_norm": 0.867965817451477, "learning_rate": 4.696973223426224e-06, "loss": 0.1427, "step": 1701 }, { "epoch": 0.5515230071289695, "grad_norm": 0.8912596106529236, "learning_rate": 4.696555717210873e-06, "loss": 0.1511, "step": 1702 }, { "epoch": 0.551847051198963, "grad_norm": 0.9456349015235901, "learning_rate": 4.6961379421583685e-06, "loss": 0.1618, "step": 1703 }, { "epoch": 0.5521710952689566, "grad_norm": 0.9106193780899048, "learning_rate": 4.695719898319839e-06, "loss": 0.1529, "step": 1704 }, { "epoch": 0.5524951393389501, "grad_norm": 0.9977267980575562, "learning_rate": 4.695301585746451e-06, "loss": 0.1705, "step": 1705 }, { "epoch": 0.5528191834089436, "grad_norm": 1.021049976348877, "learning_rate": 4.6948830044894016e-06, "loss": 0.1711, "step": 1706 }, { "epoch": 0.5531432274789372, "grad_norm": 0.953581690788269, "learning_rate": 4.6944641545999194e-06, "loss": 0.1588, "step": 1707 }, { "epoch": 0.5534672715489306, "grad_norm": 0.9142120480537415, "learning_rate": 4.694045036129269e-06, "loss": 0.1667, "step": 1708 }, { "epoch": 0.5537913156189241, "grad_norm": 0.9687793850898743, "learning_rate": 4.693625649128746e-06, "loss": 0.1669, "step": 1709 }, { "epoch": 0.5541153596889177, "grad_norm": 0.9391654133796692, "learning_rate": 4.69320599364968e-06, "loss": 0.1739, "step": 1710 }, { "epoch": 0.5544394037589112, "grad_norm": 0.8609366416931152, "learning_rate": 4.692786069743432e-06, "loss": 0.1523, "step": 1711 }, { "epoch": 0.5547634478289047, "grad_norm": 0.9310189485549927, "learning_rate": 4.692365877461397e-06, "loss": 0.1693, "step": 1712 }, { "epoch": 0.5550874918988983, "grad_norm": 0.925957202911377, "learning_rate": 4.691945416855002e-06, "loss": 0.164, "step": 1713 }, { "epoch": 0.5554115359688918, "grad_norm": 0.9316194653511047, "learning_rate": 4.6915246879757084e-06, "loss": 0.1649, "step": 1714 }, { "epoch": 0.5557355800388852, "grad_norm": 0.9110631346702576, "learning_rate": 4.691103690875007e-06, "loss": 0.1579, "step": 1715 }, { "epoch": 0.5560596241088788, "grad_norm": 0.9508496522903442, "learning_rate": 4.690682425604427e-06, "loss": 0.1705, "step": 1716 }, { "epoch": 0.5563836681788723, "grad_norm": 0.9079408645629883, "learning_rate": 4.690260892215525e-06, "loss": 0.1683, "step": 1717 }, { "epoch": 0.5567077122488658, "grad_norm": 0.9047849178314209, "learning_rate": 4.689839090759893e-06, "loss": 0.1633, "step": 1718 }, { "epoch": 0.5570317563188594, "grad_norm": 0.9550807476043701, "learning_rate": 4.689417021289157e-06, "loss": 0.1776, "step": 1719 }, { "epoch": 0.5573558003888529, "grad_norm": 0.9622770547866821, "learning_rate": 4.68899468385497e-06, "loss": 0.1701, "step": 1720 }, { "epoch": 0.5576798444588464, "grad_norm": 0.8848745226860046, "learning_rate": 4.688572078509027e-06, "loss": 0.1558, "step": 1721 }, { "epoch": 0.55800388852884, "grad_norm": 0.9164038896560669, "learning_rate": 4.688149205303048e-06, "loss": 0.1597, "step": 1722 }, { "epoch": 0.5583279325988334, "grad_norm": 0.9261012673377991, "learning_rate": 4.687726064288789e-06, "loss": 0.1656, "step": 1723 }, { "epoch": 0.5586519766688269, "grad_norm": 0.8843210339546204, "learning_rate": 4.6873026555180386e-06, "loss": 0.1481, "step": 1724 }, { "epoch": 0.5589760207388205, "grad_norm": 0.8937526345252991, "learning_rate": 4.6868789790426185e-06, "loss": 0.1594, "step": 1725 }, { "epoch": 0.559300064808814, "grad_norm": 0.8904445767402649, "learning_rate": 4.6864550349143815e-06, "loss": 0.1554, "step": 1726 }, { "epoch": 0.5596241088788075, "grad_norm": 0.9143627882003784, "learning_rate": 4.686030823185215e-06, "loss": 0.1535, "step": 1727 }, { "epoch": 0.5599481529488011, "grad_norm": 0.9604513645172119, "learning_rate": 4.685606343907038e-06, "loss": 0.1797, "step": 1728 }, { "epoch": 0.5602721970187946, "grad_norm": 0.9768103957176208, "learning_rate": 4.685181597131802e-06, "loss": 0.1694, "step": 1729 }, { "epoch": 0.560596241088788, "grad_norm": 0.9122162461280823, "learning_rate": 4.684756582911494e-06, "loss": 0.1681, "step": 1730 }, { "epoch": 0.5609202851587816, "grad_norm": 0.9020214676856995, "learning_rate": 4.6843313012981295e-06, "loss": 0.1618, "step": 1731 }, { "epoch": 0.5612443292287751, "grad_norm": 0.9545856714248657, "learning_rate": 4.6839057523437606e-06, "loss": 0.1671, "step": 1732 }, { "epoch": 0.5615683732987686, "grad_norm": 0.8946810364723206, "learning_rate": 4.683479936100468e-06, "loss": 0.1694, "step": 1733 }, { "epoch": 0.5618924173687622, "grad_norm": 0.9462122321128845, "learning_rate": 4.68305385262037e-06, "loss": 0.162, "step": 1734 }, { "epoch": 0.5622164614387557, "grad_norm": 0.8827371001243591, "learning_rate": 4.682627501955614e-06, "loss": 0.1497, "step": 1735 }, { "epoch": 0.5625405055087492, "grad_norm": 0.9082047939300537, "learning_rate": 4.682200884158381e-06, "loss": 0.1565, "step": 1736 }, { "epoch": 0.5628645495787427, "grad_norm": 0.9250028729438782, "learning_rate": 4.6817739992808855e-06, "loss": 0.1735, "step": 1737 }, { "epoch": 0.5631885936487362, "grad_norm": 0.9381402730941772, "learning_rate": 4.681346847375373e-06, "loss": 0.1711, "step": 1738 }, { "epoch": 0.5635126377187297, "grad_norm": 0.9299399852752686, "learning_rate": 4.6809194284941236e-06, "loss": 0.1763, "step": 1739 }, { "epoch": 0.5638366817887233, "grad_norm": 0.8783348202705383, "learning_rate": 4.6804917426894495e-06, "loss": 0.1583, "step": 1740 }, { "epoch": 0.5641607258587168, "grad_norm": 0.8714194297790527, "learning_rate": 4.6800637900136944e-06, "loss": 0.143, "step": 1741 }, { "epoch": 0.5644847699287103, "grad_norm": 1.0816848278045654, "learning_rate": 4.679635570519236e-06, "loss": 0.1597, "step": 1742 }, { "epoch": 0.5648088139987039, "grad_norm": 0.9509788155555725, "learning_rate": 4.6792070842584855e-06, "loss": 0.1745, "step": 1743 }, { "epoch": 0.5651328580686974, "grad_norm": 0.9559815526008606, "learning_rate": 4.678778331283883e-06, "loss": 0.1757, "step": 1744 }, { "epoch": 0.5654569021386908, "grad_norm": 0.8636212944984436, "learning_rate": 4.678349311647905e-06, "loss": 0.1552, "step": 1745 }, { "epoch": 0.5657809462086844, "grad_norm": 0.9815044403076172, "learning_rate": 4.67792002540306e-06, "loss": 0.161, "step": 1746 }, { "epoch": 0.5661049902786779, "grad_norm": 0.9376473426818848, "learning_rate": 4.677490472601888e-06, "loss": 0.1514, "step": 1747 }, { "epoch": 0.5664290343486714, "grad_norm": 1.0068804025650024, "learning_rate": 4.677060653296961e-06, "loss": 0.1676, "step": 1748 }, { "epoch": 0.566753078418665, "grad_norm": 0.8940487504005432, "learning_rate": 4.676630567540886e-06, "loss": 0.1468, "step": 1749 }, { "epoch": 0.5670771224886585, "grad_norm": 0.8850117921829224, "learning_rate": 4.6762002153863e-06, "loss": 0.1557, "step": 1750 }, { "epoch": 0.567401166558652, "grad_norm": 0.9821330904960632, "learning_rate": 4.675769596885877e-06, "loss": 0.1742, "step": 1751 }, { "epoch": 0.5677252106286454, "grad_norm": 0.9198369979858398, "learning_rate": 4.675338712092316e-06, "loss": 0.1653, "step": 1752 }, { "epoch": 0.568049254698639, "grad_norm": 0.8890524506568909, "learning_rate": 4.674907561058358e-06, "loss": 0.1525, "step": 1753 }, { "epoch": 0.5683732987686325, "grad_norm": 0.9032002091407776, "learning_rate": 4.674476143836768e-06, "loss": 0.1542, "step": 1754 }, { "epoch": 0.568697342838626, "grad_norm": 0.9556316137313843, "learning_rate": 4.674044460480348e-06, "loss": 0.1637, "step": 1755 }, { "epoch": 0.5690213869086196, "grad_norm": 0.9863228797912598, "learning_rate": 4.673612511041933e-06, "loss": 0.1714, "step": 1756 }, { "epoch": 0.5693454309786131, "grad_norm": 0.9153149127960205, "learning_rate": 4.673180295574389e-06, "loss": 0.1523, "step": 1757 }, { "epoch": 0.5696694750486067, "grad_norm": 1.0959794521331787, "learning_rate": 4.672747814130615e-06, "loss": 0.1797, "step": 1758 }, { "epoch": 0.5699935191186001, "grad_norm": 0.9302141070365906, "learning_rate": 4.672315066763542e-06, "loss": 0.1592, "step": 1759 }, { "epoch": 0.5703175631885936, "grad_norm": 0.8578373193740845, "learning_rate": 4.671882053526135e-06, "loss": 0.1507, "step": 1760 }, { "epoch": 0.5706416072585871, "grad_norm": 0.8829067945480347, "learning_rate": 4.671448774471389e-06, "loss": 0.1525, "step": 1761 }, { "epoch": 0.5709656513285807, "grad_norm": 0.9160066843032837, "learning_rate": 4.671015229652335e-06, "loss": 0.1695, "step": 1762 }, { "epoch": 0.5712896953985742, "grad_norm": 0.9778125286102295, "learning_rate": 4.670581419122034e-06, "loss": 0.1595, "step": 1763 }, { "epoch": 0.5716137394685677, "grad_norm": 0.9264950156211853, "learning_rate": 4.67014734293358e-06, "loss": 0.1729, "step": 1764 }, { "epoch": 0.5719377835385613, "grad_norm": 0.8530830144882202, "learning_rate": 4.6697130011401e-06, "loss": 0.1556, "step": 1765 }, { "epoch": 0.5722618276085548, "grad_norm": 0.9190473556518555, "learning_rate": 4.669278393794753e-06, "loss": 0.1623, "step": 1766 }, { "epoch": 0.5725858716785482, "grad_norm": 0.9246593117713928, "learning_rate": 4.6688435209507305e-06, "loss": 0.1595, "step": 1767 }, { "epoch": 0.5729099157485418, "grad_norm": 0.9285086989402771, "learning_rate": 4.668408382661257e-06, "loss": 0.1518, "step": 1768 }, { "epoch": 0.5732339598185353, "grad_norm": 0.8804833889007568, "learning_rate": 4.66797297897959e-06, "loss": 0.1574, "step": 1769 }, { "epoch": 0.5735580038885288, "grad_norm": 0.9340384602546692, "learning_rate": 4.667537309959018e-06, "loss": 0.1584, "step": 1770 }, { "epoch": 0.5738820479585224, "grad_norm": 0.9027433395385742, "learning_rate": 4.667101375652862e-06, "loss": 0.1642, "step": 1771 }, { "epoch": 0.5742060920285159, "grad_norm": 0.9212940335273743, "learning_rate": 4.666665176114477e-06, "loss": 0.1612, "step": 1772 }, { "epoch": 0.5745301360985094, "grad_norm": 0.9400113224983215, "learning_rate": 4.666228711397249e-06, "loss": 0.1651, "step": 1773 }, { "epoch": 0.5748541801685029, "grad_norm": 0.9218817353248596, "learning_rate": 4.665791981554598e-06, "loss": 0.1614, "step": 1774 }, { "epoch": 0.5751782242384964, "grad_norm": 0.9443331360816956, "learning_rate": 4.665354986639975e-06, "loss": 0.1615, "step": 1775 }, { "epoch": 0.5755022683084899, "grad_norm": 0.8934869170188904, "learning_rate": 4.664917726706864e-06, "loss": 0.1417, "step": 1776 }, { "epoch": 0.5758263123784835, "grad_norm": 0.9580870270729065, "learning_rate": 4.6644802018087806e-06, "loss": 0.1595, "step": 1777 }, { "epoch": 0.576150356448477, "grad_norm": 0.9523159861564636, "learning_rate": 4.664042411999276e-06, "loss": 0.1667, "step": 1778 }, { "epoch": 0.5764744005184705, "grad_norm": 0.9081785082817078, "learning_rate": 4.663604357331928e-06, "loss": 0.1654, "step": 1779 }, { "epoch": 0.5767984445884641, "grad_norm": 0.9075709581375122, "learning_rate": 4.6631660378603526e-06, "loss": 0.1674, "step": 1780 }, { "epoch": 0.5771224886584575, "grad_norm": 0.8578202724456787, "learning_rate": 4.662727453638195e-06, "loss": 0.1501, "step": 1781 }, { "epoch": 0.577446532728451, "grad_norm": 0.859342098236084, "learning_rate": 4.662288604719134e-06, "loss": 0.1467, "step": 1782 }, { "epoch": 0.5777705767984446, "grad_norm": 0.9672659635543823, "learning_rate": 4.66184949115688e-06, "loss": 0.1663, "step": 1783 }, { "epoch": 0.5780946208684381, "grad_norm": 0.9163477420806885, "learning_rate": 4.661410113005177e-06, "loss": 0.1571, "step": 1784 }, { "epoch": 0.5784186649384316, "grad_norm": 0.9425603151321411, "learning_rate": 4.6609704703178e-06, "loss": 0.1613, "step": 1785 }, { "epoch": 0.5787427090084252, "grad_norm": 0.8870068192481995, "learning_rate": 4.660530563148557e-06, "loss": 0.1512, "step": 1786 }, { "epoch": 0.5790667530784187, "grad_norm": 0.8830798268318176, "learning_rate": 4.66009039155129e-06, "loss": 0.1444, "step": 1787 }, { "epoch": 0.5793907971484121, "grad_norm": 0.8862148523330688, "learning_rate": 4.659649955579869e-06, "loss": 0.1597, "step": 1788 }, { "epoch": 0.5797148412184057, "grad_norm": 0.978209912776947, "learning_rate": 4.659209255288201e-06, "loss": 0.1543, "step": 1789 }, { "epoch": 0.5800388852883992, "grad_norm": 1.030468225479126, "learning_rate": 4.658768290730222e-06, "loss": 0.1787, "step": 1790 }, { "epoch": 0.5803629293583927, "grad_norm": 1.020377278327942, "learning_rate": 4.658327061959904e-06, "loss": 0.1736, "step": 1791 }, { "epoch": 0.5806869734283863, "grad_norm": 0.8843832015991211, "learning_rate": 4.6578855690312474e-06, "loss": 0.1502, "step": 1792 }, { "epoch": 0.5810110174983798, "grad_norm": 0.9210875034332275, "learning_rate": 4.657443811998287e-06, "loss": 0.1556, "step": 1793 }, { "epoch": 0.5813350615683733, "grad_norm": 0.9051605463027954, "learning_rate": 4.65700179091509e-06, "loss": 0.1414, "step": 1794 }, { "epoch": 0.5816591056383669, "grad_norm": 0.9195812940597534, "learning_rate": 4.656559505835755e-06, "loss": 0.1558, "step": 1795 }, { "epoch": 0.5819831497083603, "grad_norm": 0.8479184508323669, "learning_rate": 4.656116956814414e-06, "loss": 0.1533, "step": 1796 }, { "epoch": 0.5823071937783538, "grad_norm": 0.9624475240707397, "learning_rate": 4.655674143905229e-06, "loss": 0.1715, "step": 1797 }, { "epoch": 0.5826312378483474, "grad_norm": 0.8872055411338806, "learning_rate": 4.655231067162398e-06, "loss": 0.1449, "step": 1798 }, { "epoch": 0.5829552819183409, "grad_norm": 0.7795613408088684, "learning_rate": 4.65478772664015e-06, "loss": 0.1366, "step": 1799 }, { "epoch": 0.5832793259883344, "grad_norm": 1.0152753591537476, "learning_rate": 4.654344122392742e-06, "loss": 0.1559, "step": 1800 }, { "epoch": 0.583603370058328, "grad_norm": 0.9903998374938965, "learning_rate": 4.6539002544744705e-06, "loss": 0.1492, "step": 1801 }, { "epoch": 0.5839274141283215, "grad_norm": 0.9595767259597778, "learning_rate": 4.653456122939659e-06, "loss": 0.1616, "step": 1802 }, { "epoch": 0.5842514581983149, "grad_norm": 0.8353689312934875, "learning_rate": 4.653011727842665e-06, "loss": 0.1418, "step": 1803 }, { "epoch": 0.5845755022683085, "grad_norm": 0.9445038437843323, "learning_rate": 4.652567069237877e-06, "loss": 0.1664, "step": 1804 }, { "epoch": 0.584899546338302, "grad_norm": 0.9006994962692261, "learning_rate": 4.652122147179721e-06, "loss": 0.1495, "step": 1805 }, { "epoch": 0.5852235904082955, "grad_norm": 0.9477143883705139, "learning_rate": 4.651676961722647e-06, "loss": 0.1673, "step": 1806 }, { "epoch": 0.5855476344782891, "grad_norm": 0.9442145824432373, "learning_rate": 4.651231512921142e-06, "loss": 0.1603, "step": 1807 }, { "epoch": 0.5858716785482826, "grad_norm": 0.9480714797973633, "learning_rate": 4.650785800829726e-06, "loss": 0.1666, "step": 1808 }, { "epoch": 0.5861957226182761, "grad_norm": 0.8913865089416504, "learning_rate": 4.650339825502949e-06, "loss": 0.1503, "step": 1809 }, { "epoch": 0.5865197666882696, "grad_norm": 0.8414224982261658, "learning_rate": 4.6498935869953945e-06, "loss": 0.1514, "step": 1810 }, { "epoch": 0.5868438107582631, "grad_norm": 0.9178133606910706, "learning_rate": 4.649447085361677e-06, "loss": 0.1674, "step": 1811 }, { "epoch": 0.5871678548282566, "grad_norm": 0.8867731690406799, "learning_rate": 4.649000320656445e-06, "loss": 0.1516, "step": 1812 }, { "epoch": 0.5874918988982502, "grad_norm": 0.9027390480041504, "learning_rate": 4.648553292934377e-06, "loss": 0.1693, "step": 1813 }, { "epoch": 0.5878159429682437, "grad_norm": 0.9258837699890137, "learning_rate": 4.648106002250186e-06, "loss": 0.1694, "step": 1814 }, { "epoch": 0.5881399870382372, "grad_norm": 0.8763899207115173, "learning_rate": 4.647658448658616e-06, "loss": 0.1591, "step": 1815 }, { "epoch": 0.5884640311082308, "grad_norm": 0.8758101463317871, "learning_rate": 4.647210632214443e-06, "loss": 0.1601, "step": 1816 }, { "epoch": 0.5887880751782243, "grad_norm": 0.9090110659599304, "learning_rate": 4.646762552972475e-06, "loss": 0.1536, "step": 1817 }, { "epoch": 0.5891121192482177, "grad_norm": 0.9815348982810974, "learning_rate": 4.646314210987552e-06, "loss": 0.1717, "step": 1818 }, { "epoch": 0.5894361633182112, "grad_norm": 0.9743311405181885, "learning_rate": 4.645865606314548e-06, "loss": 0.1639, "step": 1819 }, { "epoch": 0.5897602073882048, "grad_norm": 0.9204444289207458, "learning_rate": 4.645416739008367e-06, "loss": 0.1537, "step": 1820 }, { "epoch": 0.5900842514581983, "grad_norm": 0.9639553427696228, "learning_rate": 4.644967609123947e-06, "loss": 0.1704, "step": 1821 }, { "epoch": 0.5904082955281919, "grad_norm": 1.0289804935455322, "learning_rate": 4.644518216716256e-06, "loss": 0.1718, "step": 1822 }, { "epoch": 0.5907323395981854, "grad_norm": 0.8550962805747986, "learning_rate": 4.644068561840297e-06, "loss": 0.1629, "step": 1823 }, { "epoch": 0.5910563836681789, "grad_norm": 0.8934510350227356, "learning_rate": 4.643618644551101e-06, "loss": 0.164, "step": 1824 }, { "epoch": 0.5913804277381723, "grad_norm": 1.0084798336029053, "learning_rate": 4.643168464903736e-06, "loss": 0.1552, "step": 1825 }, { "epoch": 0.5917044718081659, "grad_norm": 0.8919445276260376, "learning_rate": 4.642718022953297e-06, "loss": 0.1692, "step": 1826 }, { "epoch": 0.5920285158781594, "grad_norm": 0.883470892906189, "learning_rate": 4.642267318754915e-06, "loss": 0.1646, "step": 1827 }, { "epoch": 0.592352559948153, "grad_norm": 0.8612281680107117, "learning_rate": 4.641816352363753e-06, "loss": 0.1591, "step": 1828 }, { "epoch": 0.5926766040181465, "grad_norm": 0.9234902858734131, "learning_rate": 4.641365123835004e-06, "loss": 0.1766, "step": 1829 }, { "epoch": 0.59300064808814, "grad_norm": 0.9338532090187073, "learning_rate": 4.640913633223893e-06, "loss": 0.1626, "step": 1830 }, { "epoch": 0.5933246921581335, "grad_norm": 0.8946052193641663, "learning_rate": 4.64046188058568e-06, "loss": 0.1668, "step": 1831 }, { "epoch": 0.593648736228127, "grad_norm": 0.8641635179519653, "learning_rate": 4.6400098659756525e-06, "loss": 0.1568, "step": 1832 }, { "epoch": 0.5939727802981205, "grad_norm": 0.8863862156867981, "learning_rate": 4.639557589449135e-06, "loss": 0.1596, "step": 1833 }, { "epoch": 0.594296824368114, "grad_norm": 0.895317018032074, "learning_rate": 4.639105051061481e-06, "loss": 0.1609, "step": 1834 }, { "epoch": 0.5946208684381076, "grad_norm": 0.9277777671813965, "learning_rate": 4.638652250868078e-06, "loss": 0.1574, "step": 1835 }, { "epoch": 0.5949449125081011, "grad_norm": 0.9136897921562195, "learning_rate": 4.6381991889243416e-06, "loss": 0.162, "step": 1836 }, { "epoch": 0.5952689565780946, "grad_norm": 1.0817289352416992, "learning_rate": 4.637745865285725e-06, "loss": 0.1736, "step": 1837 }, { "epoch": 0.5955930006480882, "grad_norm": 0.915195107460022, "learning_rate": 4.637292280007709e-06, "loss": 0.1501, "step": 1838 }, { "epoch": 0.5959170447180817, "grad_norm": 0.9343981146812439, "learning_rate": 4.6368384331458085e-06, "loss": 0.1637, "step": 1839 }, { "epoch": 0.5962410887880751, "grad_norm": 0.9210292100906372, "learning_rate": 4.63638432475557e-06, "loss": 0.1636, "step": 1840 }, { "epoch": 0.5965651328580687, "grad_norm": 0.8031634092330933, "learning_rate": 4.635929954892572e-06, "loss": 0.1447, "step": 1841 }, { "epoch": 0.5968891769280622, "grad_norm": 0.9125548601150513, "learning_rate": 4.6354753236124254e-06, "loss": 0.157, "step": 1842 }, { "epoch": 0.5972132209980557, "grad_norm": 0.9125232100486755, "learning_rate": 4.635020430970771e-06, "loss": 0.1625, "step": 1843 }, { "epoch": 0.5975372650680493, "grad_norm": 1.0112113952636719, "learning_rate": 4.6345652770232856e-06, "loss": 0.1616, "step": 1844 }, { "epoch": 0.5978613091380428, "grad_norm": 0.8509640693664551, "learning_rate": 4.6341098618256745e-06, "loss": 0.1402, "step": 1845 }, { "epoch": 0.5981853532080363, "grad_norm": 0.8826951384544373, "learning_rate": 4.633654185433676e-06, "loss": 0.1684, "step": 1846 }, { "epoch": 0.5985093972780298, "grad_norm": 0.9403404593467712, "learning_rate": 4.63319824790306e-06, "loss": 0.1612, "step": 1847 }, { "epoch": 0.5988334413480233, "grad_norm": 0.8916503190994263, "learning_rate": 4.6327420492896295e-06, "loss": 0.1527, "step": 1848 }, { "epoch": 0.5991574854180168, "grad_norm": 0.9854899644851685, "learning_rate": 4.632285589649219e-06, "loss": 0.168, "step": 1849 }, { "epoch": 0.5994815294880104, "grad_norm": 0.9854278564453125, "learning_rate": 4.631828869037694e-06, "loss": 0.1587, "step": 1850 }, { "epoch": 0.5998055735580039, "grad_norm": 0.8996358513832092, "learning_rate": 4.631371887510954e-06, "loss": 0.1694, "step": 1851 }, { "epoch": 0.6001296176279974, "grad_norm": 0.8837627172470093, "learning_rate": 4.630914645124928e-06, "loss": 0.1648, "step": 1852 }, { "epoch": 0.600453661697991, "grad_norm": 0.9942870736122131, "learning_rate": 4.630457141935577e-06, "loss": 0.1545, "step": 1853 }, { "epoch": 0.6007777057679844, "grad_norm": 0.9469466209411621, "learning_rate": 4.629999377998898e-06, "loss": 0.1825, "step": 1854 }, { "epoch": 0.6011017498379779, "grad_norm": 0.858815610408783, "learning_rate": 4.629541353370914e-06, "loss": 0.1495, "step": 1855 }, { "epoch": 0.6014257939079715, "grad_norm": 0.8882916569709778, "learning_rate": 4.629083068107684e-06, "loss": 0.1618, "step": 1856 }, { "epoch": 0.601749837977965, "grad_norm": 0.9027432203292847, "learning_rate": 4.628624522265298e-06, "loss": 0.1667, "step": 1857 }, { "epoch": 0.6020738820479585, "grad_norm": 0.9251678586006165, "learning_rate": 4.628165715899877e-06, "loss": 0.1621, "step": 1858 }, { "epoch": 0.6023979261179521, "grad_norm": 1.0207189321517944, "learning_rate": 4.627706649067575e-06, "loss": 0.1701, "step": 1859 }, { "epoch": 0.6027219701879456, "grad_norm": 0.8564452528953552, "learning_rate": 4.627247321824576e-06, "loss": 0.1558, "step": 1860 }, { "epoch": 0.6030460142579391, "grad_norm": 0.8749232888221741, "learning_rate": 4.6267877342271e-06, "loss": 0.1466, "step": 1861 }, { "epoch": 0.6033700583279326, "grad_norm": 0.9126169085502625, "learning_rate": 4.626327886331392e-06, "loss": 0.1704, "step": 1862 }, { "epoch": 0.6036941023979261, "grad_norm": 0.863114595413208, "learning_rate": 4.625867778193737e-06, "loss": 0.1567, "step": 1863 }, { "epoch": 0.6040181464679196, "grad_norm": 0.8384044170379639, "learning_rate": 4.625407409870444e-06, "loss": 0.1432, "step": 1864 }, { "epoch": 0.6043421905379132, "grad_norm": 0.8923614621162415, "learning_rate": 4.624946781417861e-06, "loss": 0.1513, "step": 1865 }, { "epoch": 0.6046662346079067, "grad_norm": 0.9929710626602173, "learning_rate": 4.624485892892363e-06, "loss": 0.1813, "step": 1866 }, { "epoch": 0.6049902786779002, "grad_norm": 0.8921619057655334, "learning_rate": 4.624024744350358e-06, "loss": 0.157, "step": 1867 }, { "epoch": 0.6053143227478938, "grad_norm": 0.9352006316184998, "learning_rate": 4.623563335848286e-06, "loss": 0.1704, "step": 1868 }, { "epoch": 0.6056383668178872, "grad_norm": 0.9039468765258789, "learning_rate": 4.62310166744262e-06, "loss": 0.1439, "step": 1869 }, { "epoch": 0.6059624108878807, "grad_norm": 0.8728505969047546, "learning_rate": 4.622639739189863e-06, "loss": 0.1464, "step": 1870 }, { "epoch": 0.6062864549578743, "grad_norm": 0.9441261887550354, "learning_rate": 4.62217755114655e-06, "loss": 0.1716, "step": 1871 }, { "epoch": 0.6066104990278678, "grad_norm": 0.8899536728858948, "learning_rate": 4.62171510336925e-06, "loss": 0.1627, "step": 1872 }, { "epoch": 0.6069345430978613, "grad_norm": 0.9109644889831543, "learning_rate": 4.621252395914561e-06, "loss": 0.1657, "step": 1873 }, { "epoch": 0.6072585871678549, "grad_norm": 0.8352120518684387, "learning_rate": 4.620789428839114e-06, "loss": 0.1511, "step": 1874 }, { "epoch": 0.6075826312378484, "grad_norm": 0.8798180222511292, "learning_rate": 4.620326202199572e-06, "loss": 0.148, "step": 1875 }, { "epoch": 0.6079066753078418, "grad_norm": 0.8908928036689758, "learning_rate": 4.619862716052629e-06, "loss": 0.167, "step": 1876 }, { "epoch": 0.6082307193778353, "grad_norm": 0.9110801219940186, "learning_rate": 4.6193989704550105e-06, "loss": 0.1693, "step": 1877 }, { "epoch": 0.6085547634478289, "grad_norm": 0.9067312479019165, "learning_rate": 4.6189349654634766e-06, "loss": 0.1601, "step": 1878 }, { "epoch": 0.6088788075178224, "grad_norm": 0.9549569487571716, "learning_rate": 4.618470701134815e-06, "loss": 0.1732, "step": 1879 }, { "epoch": 0.609202851587816, "grad_norm": 0.8671174645423889, "learning_rate": 4.618006177525849e-06, "loss": 0.1459, "step": 1880 }, { "epoch": 0.6095268956578095, "grad_norm": 0.8848288059234619, "learning_rate": 4.61754139469343e-06, "loss": 0.1562, "step": 1881 }, { "epoch": 0.609850939727803, "grad_norm": 0.9089393019676208, "learning_rate": 4.6170763526944425e-06, "loss": 0.1545, "step": 1882 }, { "epoch": 0.6101749837977966, "grad_norm": 0.8325822949409485, "learning_rate": 4.616611051585806e-06, "loss": 0.1411, "step": 1883 }, { "epoch": 0.61049902786779, "grad_norm": 0.8850380778312683, "learning_rate": 4.6161454914244665e-06, "loss": 0.1566, "step": 1884 }, { "epoch": 0.6108230719377835, "grad_norm": 0.889552891254425, "learning_rate": 4.615679672267405e-06, "loss": 0.152, "step": 1885 }, { "epoch": 0.611147116007777, "grad_norm": 0.8596039414405823, "learning_rate": 4.615213594171633e-06, "loss": 0.1443, "step": 1886 }, { "epoch": 0.6114711600777706, "grad_norm": 0.9552878737449646, "learning_rate": 4.614747257194194e-06, "loss": 0.1743, "step": 1887 }, { "epoch": 0.6117952041477641, "grad_norm": 0.8836682438850403, "learning_rate": 4.614280661392163e-06, "loss": 0.1666, "step": 1888 }, { "epoch": 0.6121192482177576, "grad_norm": 0.8738223910331726, "learning_rate": 4.613813806822647e-06, "loss": 0.1571, "step": 1889 }, { "epoch": 0.6124432922877512, "grad_norm": 0.9057800769805908, "learning_rate": 4.613346693542784e-06, "loss": 0.1436, "step": 1890 }, { "epoch": 0.6127673363577446, "grad_norm": 0.8955968022346497, "learning_rate": 4.6128793216097445e-06, "loss": 0.1544, "step": 1891 }, { "epoch": 0.6130913804277381, "grad_norm": 0.9024104475975037, "learning_rate": 4.61241169108073e-06, "loss": 0.1527, "step": 1892 }, { "epoch": 0.6134154244977317, "grad_norm": 0.807380735874176, "learning_rate": 4.611943802012975e-06, "loss": 0.148, "step": 1893 }, { "epoch": 0.6137394685677252, "grad_norm": 0.9021716117858887, "learning_rate": 4.611475654463743e-06, "loss": 0.1506, "step": 1894 }, { "epoch": 0.6140635126377187, "grad_norm": 0.9311186671257019, "learning_rate": 4.6110072484903326e-06, "loss": 0.1724, "step": 1895 }, { "epoch": 0.6143875567077123, "grad_norm": 0.8692731261253357, "learning_rate": 4.610538584150071e-06, "loss": 0.155, "step": 1896 }, { "epoch": 0.6147116007777058, "grad_norm": 0.9471341967582703, "learning_rate": 4.610069661500317e-06, "loss": 0.1704, "step": 1897 }, { "epoch": 0.6150356448476992, "grad_norm": 0.8956695199012756, "learning_rate": 4.609600480598464e-06, "loss": 0.1688, "step": 1898 }, { "epoch": 0.6153596889176928, "grad_norm": 0.9151939749717712, "learning_rate": 4.6091310415019355e-06, "loss": 0.1757, "step": 1899 }, { "epoch": 0.6156837329876863, "grad_norm": 0.8341889381408691, "learning_rate": 4.608661344268185e-06, "loss": 0.1417, "step": 1900 }, { "epoch": 0.6160077770576798, "grad_norm": 0.9647911787033081, "learning_rate": 4.608191388954699e-06, "loss": 0.1558, "step": 1901 }, { "epoch": 0.6163318211276734, "grad_norm": 0.9372779726982117, "learning_rate": 4.607721175618997e-06, "loss": 0.1571, "step": 1902 }, { "epoch": 0.6166558651976669, "grad_norm": 0.9415113925933838, "learning_rate": 4.6072507043186265e-06, "loss": 0.1635, "step": 1903 }, { "epoch": 0.6169799092676604, "grad_norm": 0.9065474271774292, "learning_rate": 4.60677997511117e-06, "loss": 0.1594, "step": 1904 }, { "epoch": 0.6173039533376539, "grad_norm": 0.9385161995887756, "learning_rate": 4.606308988054239e-06, "loss": 0.1619, "step": 1905 }, { "epoch": 0.6176279974076474, "grad_norm": 0.9063534140586853, "learning_rate": 4.605837743205479e-06, "loss": 0.1594, "step": 1906 }, { "epoch": 0.6179520414776409, "grad_norm": 0.908365786075592, "learning_rate": 4.605366240622565e-06, "loss": 0.1643, "step": 1907 }, { "epoch": 0.6182760855476345, "grad_norm": 0.924689769744873, "learning_rate": 4.604894480363205e-06, "loss": 0.1593, "step": 1908 }, { "epoch": 0.618600129617628, "grad_norm": 0.8969070911407471, "learning_rate": 4.604422462485138e-06, "loss": 0.1576, "step": 1909 }, { "epoch": 0.6189241736876215, "grad_norm": 0.9201893210411072, "learning_rate": 4.603950187046134e-06, "loss": 0.1711, "step": 1910 }, { "epoch": 0.6192482177576151, "grad_norm": 0.9483135342597961, "learning_rate": 4.603477654103994e-06, "loss": 0.178, "step": 1911 }, { "epoch": 0.6195722618276086, "grad_norm": 0.8227450847625732, "learning_rate": 4.603004863716553e-06, "loss": 0.1516, "step": 1912 }, { "epoch": 0.619896305897602, "grad_norm": 0.8497554659843445, "learning_rate": 4.602531815941676e-06, "loss": 0.1537, "step": 1913 }, { "epoch": 0.6202203499675956, "grad_norm": 0.8721224069595337, "learning_rate": 4.602058510837257e-06, "loss": 0.1622, "step": 1914 }, { "epoch": 0.6205443940375891, "grad_norm": 0.9595826864242554, "learning_rate": 4.6015849484612265e-06, "loss": 0.1696, "step": 1915 }, { "epoch": 0.6208684381075826, "grad_norm": 0.8733614087104797, "learning_rate": 4.601111128871544e-06, "loss": 0.1496, "step": 1916 }, { "epoch": 0.6211924821775762, "grad_norm": 0.8793935775756836, "learning_rate": 4.600637052126199e-06, "loss": 0.1503, "step": 1917 }, { "epoch": 0.6215165262475697, "grad_norm": 0.9011240005493164, "learning_rate": 4.600162718283215e-06, "loss": 0.1603, "step": 1918 }, { "epoch": 0.6218405703175632, "grad_norm": 1.0156891345977783, "learning_rate": 4.599688127400645e-06, "loss": 0.1696, "step": 1919 }, { "epoch": 0.6221646143875567, "grad_norm": 0.9367722868919373, "learning_rate": 4.599213279536575e-06, "loss": 0.1665, "step": 1920 }, { "epoch": 0.6224886584575502, "grad_norm": 0.9469424486160278, "learning_rate": 4.598738174749121e-06, "loss": 0.1747, "step": 1921 }, { "epoch": 0.6228127025275437, "grad_norm": 0.9037867784500122, "learning_rate": 4.598262813096432e-06, "loss": 0.1555, "step": 1922 }, { "epoch": 0.6231367465975373, "grad_norm": 0.954498291015625, "learning_rate": 4.597787194636688e-06, "loss": 0.1571, "step": 1923 }, { "epoch": 0.6234607906675308, "grad_norm": 0.9314236044883728, "learning_rate": 4.597311319428099e-06, "loss": 0.156, "step": 1924 }, { "epoch": 0.6237848347375243, "grad_norm": 0.9310925006866455, "learning_rate": 4.596835187528908e-06, "loss": 0.1643, "step": 1925 }, { "epoch": 0.6241088788075179, "grad_norm": 0.7893508076667786, "learning_rate": 4.59635879899739e-06, "loss": 0.1468, "step": 1926 }, { "epoch": 0.6244329228775113, "grad_norm": 0.8389633893966675, "learning_rate": 4.595882153891849e-06, "loss": 0.1462, "step": 1927 }, { "epoch": 0.6247569669475048, "grad_norm": 0.9003433585166931, "learning_rate": 4.595405252270622e-06, "loss": 0.1646, "step": 1928 }, { "epoch": 0.6250810110174984, "grad_norm": 0.9383062720298767, "learning_rate": 4.594928094192076e-06, "loss": 0.1631, "step": 1929 }, { "epoch": 0.6254050550874919, "grad_norm": 0.8632388710975647, "learning_rate": 4.594450679714613e-06, "loss": 0.1489, "step": 1930 }, { "epoch": 0.6257290991574854, "grad_norm": 0.8126484751701355, "learning_rate": 4.593973008896662e-06, "loss": 0.1439, "step": 1931 }, { "epoch": 0.626053143227479, "grad_norm": 0.9024901986122131, "learning_rate": 4.593495081796686e-06, "loss": 0.1499, "step": 1932 }, { "epoch": 0.6263771872974725, "grad_norm": 0.8754759430885315, "learning_rate": 4.59301689847318e-06, "loss": 0.1414, "step": 1933 }, { "epoch": 0.626701231367466, "grad_norm": 0.93331378698349, "learning_rate": 4.592538458984666e-06, "loss": 0.1634, "step": 1934 }, { "epoch": 0.6270252754374595, "grad_norm": 0.9175758957862854, "learning_rate": 4.5920597633897015e-06, "loss": 0.1518, "step": 1935 }, { "epoch": 0.627349319507453, "grad_norm": 1.0189933776855469, "learning_rate": 4.5915808117468766e-06, "loss": 0.1678, "step": 1936 }, { "epoch": 0.6276733635774465, "grad_norm": 0.9787598252296448, "learning_rate": 4.591101604114807e-06, "loss": 0.1695, "step": 1937 }, { "epoch": 0.62799740764744, "grad_norm": 0.8894520998001099, "learning_rate": 4.590622140552144e-06, "loss": 0.1647, "step": 1938 }, { "epoch": 0.6283214517174336, "grad_norm": 0.9015518426895142, "learning_rate": 4.5901424211175715e-06, "loss": 0.1558, "step": 1939 }, { "epoch": 0.6286454957874271, "grad_norm": 0.9240670800209045, "learning_rate": 4.5896624458698e-06, "loss": 0.1564, "step": 1940 }, { "epoch": 0.6289695398574207, "grad_norm": 0.9072241187095642, "learning_rate": 4.5891822148675745e-06, "loss": 0.1511, "step": 1941 }, { "epoch": 0.6292935839274141, "grad_norm": 0.8959493637084961, "learning_rate": 4.588701728169671e-06, "loss": 0.1642, "step": 1942 }, { "epoch": 0.6296176279974076, "grad_norm": 0.924720823764801, "learning_rate": 4.5882209858348956e-06, "loss": 0.1605, "step": 1943 }, { "epoch": 0.6299416720674011, "grad_norm": 0.9390182495117188, "learning_rate": 4.587739987922087e-06, "loss": 0.1647, "step": 1944 }, { "epoch": 0.6302657161373947, "grad_norm": 0.8489257097244263, "learning_rate": 4.587258734490115e-06, "loss": 0.1432, "step": 1945 }, { "epoch": 0.6305897602073882, "grad_norm": 0.9559697508811951, "learning_rate": 4.586777225597881e-06, "loss": 0.1582, "step": 1946 }, { "epoch": 0.6309138042773818, "grad_norm": 0.9959377646446228, "learning_rate": 4.586295461304315e-06, "loss": 0.1829, "step": 1947 }, { "epoch": 0.6312378483473753, "grad_norm": 0.8491382002830505, "learning_rate": 4.585813441668383e-06, "loss": 0.1528, "step": 1948 }, { "epoch": 0.6315618924173687, "grad_norm": 0.8617604374885559, "learning_rate": 4.585331166749077e-06, "loss": 0.1583, "step": 1949 }, { "epoch": 0.6318859364873622, "grad_norm": 0.8658198118209839, "learning_rate": 4.584848636605423e-06, "loss": 0.1505, "step": 1950 }, { "epoch": 0.6322099805573558, "grad_norm": 0.894616961479187, "learning_rate": 4.58436585129648e-06, "loss": 0.1539, "step": 1951 }, { "epoch": 0.6325340246273493, "grad_norm": 0.9101769328117371, "learning_rate": 4.583882810881334e-06, "loss": 0.1623, "step": 1952 }, { "epoch": 0.6328580686973428, "grad_norm": 0.8776459097862244, "learning_rate": 4.583399515419106e-06, "loss": 0.1501, "step": 1953 }, { "epoch": 0.6331821127673364, "grad_norm": 0.9368626475334167, "learning_rate": 4.582915964968946e-06, "loss": 0.1716, "step": 1954 }, { "epoch": 0.6335061568373299, "grad_norm": 0.931447446346283, "learning_rate": 4.582432159590037e-06, "loss": 0.1792, "step": 1955 }, { "epoch": 0.6338302009073234, "grad_norm": 0.8453537821769714, "learning_rate": 4.58194809934159e-06, "loss": 0.1514, "step": 1956 }, { "epoch": 0.6341542449773169, "grad_norm": 0.8716861605644226, "learning_rate": 4.5814637842828506e-06, "loss": 0.1556, "step": 1957 }, { "epoch": 0.6344782890473104, "grad_norm": 0.8787641525268555, "learning_rate": 4.580979214473095e-06, "loss": 0.1608, "step": 1958 }, { "epoch": 0.6348023331173039, "grad_norm": 0.9362958073616028, "learning_rate": 4.580494389971628e-06, "loss": 0.1673, "step": 1959 }, { "epoch": 0.6351263771872975, "grad_norm": 0.9452981352806091, "learning_rate": 4.580009310837789e-06, "loss": 0.1627, "step": 1960 }, { "epoch": 0.635450421257291, "grad_norm": 0.885234534740448, "learning_rate": 4.579523977130946e-06, "loss": 0.1511, "step": 1961 }, { "epoch": 0.6357744653272845, "grad_norm": 0.8860384225845337, "learning_rate": 4.579038388910499e-06, "loss": 0.1612, "step": 1962 }, { "epoch": 0.6360985093972781, "grad_norm": 0.8743710517883301, "learning_rate": 4.578552546235882e-06, "loss": 0.1653, "step": 1963 }, { "epoch": 0.6364225534672715, "grad_norm": 0.8612011075019836, "learning_rate": 4.578066449166554e-06, "loss": 0.1495, "step": 1964 }, { "epoch": 0.636746597537265, "grad_norm": 0.8718269467353821, "learning_rate": 4.57758009776201e-06, "loss": 0.1518, "step": 1965 }, { "epoch": 0.6370706416072586, "grad_norm": 0.8312247395515442, "learning_rate": 4.577093492081774e-06, "loss": 0.1475, "step": 1966 }, { "epoch": 0.6373946856772521, "grad_norm": 0.8253561854362488, "learning_rate": 4.576606632185403e-06, "loss": 0.1415, "step": 1967 }, { "epoch": 0.6377187297472456, "grad_norm": 0.8615391850471497, "learning_rate": 4.576119518132483e-06, "loss": 0.1562, "step": 1968 }, { "epoch": 0.6380427738172392, "grad_norm": 0.8764085173606873, "learning_rate": 4.575632149982631e-06, "loss": 0.1444, "step": 1969 }, { "epoch": 0.6383668178872327, "grad_norm": 0.8978087306022644, "learning_rate": 4.5751445277955e-06, "loss": 0.1504, "step": 1970 }, { "epoch": 0.6386908619572261, "grad_norm": 0.9768132567405701, "learning_rate": 4.574656651630767e-06, "loss": 0.1644, "step": 1971 }, { "epoch": 0.6390149060272197, "grad_norm": 0.9122047424316406, "learning_rate": 4.574168521548144e-06, "loss": 0.1627, "step": 1972 }, { "epoch": 0.6393389500972132, "grad_norm": 0.8582742810249329, "learning_rate": 4.573680137607373e-06, "loss": 0.1547, "step": 1973 }, { "epoch": 0.6396629941672067, "grad_norm": 0.9364274144172668, "learning_rate": 4.573191499868228e-06, "loss": 0.1497, "step": 1974 }, { "epoch": 0.6399870382372003, "grad_norm": 1.0029749870300293, "learning_rate": 4.572702608390513e-06, "loss": 0.1576, "step": 1975 }, { "epoch": 0.6403110823071938, "grad_norm": 0.9168883562088013, "learning_rate": 4.572213463234065e-06, "loss": 0.165, "step": 1976 }, { "epoch": 0.6406351263771873, "grad_norm": 0.9613797664642334, "learning_rate": 4.5717240644587495e-06, "loss": 0.1644, "step": 1977 }, { "epoch": 0.6409591704471809, "grad_norm": 0.8673607110977173, "learning_rate": 4.571234412124464e-06, "loss": 0.1622, "step": 1978 }, { "epoch": 0.6412832145171743, "grad_norm": 0.8543764352798462, "learning_rate": 4.570744506291138e-06, "loss": 0.1593, "step": 1979 }, { "epoch": 0.6416072585871678, "grad_norm": 0.9339621663093567, "learning_rate": 4.570254347018731e-06, "loss": 0.1754, "step": 1980 }, { "epoch": 0.6419313026571614, "grad_norm": 0.8893762826919556, "learning_rate": 4.5697639343672325e-06, "loss": 0.1639, "step": 1981 }, { "epoch": 0.6422553467271549, "grad_norm": 0.8264583945274353, "learning_rate": 4.569273268396667e-06, "loss": 0.1303, "step": 1982 }, { "epoch": 0.6425793907971484, "grad_norm": 0.9855983257293701, "learning_rate": 4.568782349167084e-06, "loss": 0.1679, "step": 1983 }, { "epoch": 0.642903434867142, "grad_norm": 0.8214596509933472, "learning_rate": 4.56829117673857e-06, "loss": 0.1484, "step": 1984 }, { "epoch": 0.6432274789371355, "grad_norm": 0.8593002557754517, "learning_rate": 4.567799751171237e-06, "loss": 0.1418, "step": 1985 }, { "epoch": 0.6435515230071289, "grad_norm": 0.959836483001709, "learning_rate": 4.567308072525233e-06, "loss": 0.1593, "step": 1986 }, { "epoch": 0.6438755670771225, "grad_norm": 0.9470639228820801, "learning_rate": 4.566816140860735e-06, "loss": 0.1392, "step": 1987 }, { "epoch": 0.644199611147116, "grad_norm": 0.9426745176315308, "learning_rate": 4.566323956237948e-06, "loss": 0.1654, "step": 1988 }, { "epoch": 0.6445236552171095, "grad_norm": 0.9331299066543579, "learning_rate": 4.565831518717114e-06, "loss": 0.1481, "step": 1989 }, { "epoch": 0.6448476992871031, "grad_norm": 0.9420998692512512, "learning_rate": 4.5653388283585e-06, "loss": 0.1599, "step": 1990 }, { "epoch": 0.6451717433570966, "grad_norm": 0.8443018198013306, "learning_rate": 4.564845885222407e-06, "loss": 0.145, "step": 1991 }, { "epoch": 0.6454957874270901, "grad_norm": 0.8921066522598267, "learning_rate": 4.564352689369168e-06, "loss": 0.1553, "step": 1992 }, { "epoch": 0.6458198314970836, "grad_norm": 1.0385701656341553, "learning_rate": 4.563859240859144e-06, "loss": 0.1769, "step": 1993 }, { "epoch": 0.6461438755670771, "grad_norm": 0.9667747616767883, "learning_rate": 4.563365539752728e-06, "loss": 0.1691, "step": 1994 }, { "epoch": 0.6464679196370706, "grad_norm": 0.9109490513801575, "learning_rate": 4.5628715861103455e-06, "loss": 0.1491, "step": 1995 }, { "epoch": 0.6467919637070642, "grad_norm": 0.8816606998443604, "learning_rate": 4.562377379992451e-06, "loss": 0.151, "step": 1996 }, { "epoch": 0.6471160077770577, "grad_norm": 0.8787545561790466, "learning_rate": 4.56188292145953e-06, "loss": 0.1628, "step": 1997 }, { "epoch": 0.6474400518470512, "grad_norm": 0.9294182062149048, "learning_rate": 4.561388210572101e-06, "loss": 0.1683, "step": 1998 }, { "epoch": 0.6477640959170448, "grad_norm": 0.9135559797286987, "learning_rate": 4.56089324739071e-06, "loss": 0.1687, "step": 1999 }, { "epoch": 0.6480881399870383, "grad_norm": 0.9418561458587646, "learning_rate": 4.560398031975937e-06, "loss": 0.1566, "step": 2000 }, { "epoch": 0.6484121840570317, "grad_norm": 0.8324270844459534, "learning_rate": 4.55990256438839e-06, "loss": 0.1436, "step": 2001 }, { "epoch": 0.6487362281270252, "grad_norm": 0.9083436131477356, "learning_rate": 4.559406844688711e-06, "loss": 0.1526, "step": 2002 }, { "epoch": 0.6490602721970188, "grad_norm": 0.8812525272369385, "learning_rate": 4.558910872937572e-06, "loss": 0.1622, "step": 2003 }, { "epoch": 0.6493843162670123, "grad_norm": 0.8936917185783386, "learning_rate": 4.558414649195673e-06, "loss": 0.1517, "step": 2004 }, { "epoch": 0.6497083603370059, "grad_norm": 0.8962358832359314, "learning_rate": 4.557918173523747e-06, "loss": 0.155, "step": 2005 }, { "epoch": 0.6500324044069994, "grad_norm": 0.8386431336402893, "learning_rate": 4.55742144598256e-06, "loss": 0.153, "step": 2006 }, { "epoch": 0.6503564484769929, "grad_norm": 0.8774044513702393, "learning_rate": 4.5569244666329055e-06, "loss": 0.154, "step": 2007 }, { "epoch": 0.6506804925469863, "grad_norm": 0.8934404253959656, "learning_rate": 4.5564272355356085e-06, "loss": 0.1502, "step": 2008 }, { "epoch": 0.6510045366169799, "grad_norm": 0.8520406484603882, "learning_rate": 4.555929752751526e-06, "loss": 0.1455, "step": 2009 }, { "epoch": 0.6513285806869734, "grad_norm": 0.827907383441925, "learning_rate": 4.5554320183415435e-06, "loss": 0.1503, "step": 2010 }, { "epoch": 0.651652624756967, "grad_norm": 0.9456222057342529, "learning_rate": 4.5549340323665815e-06, "loss": 0.1573, "step": 2011 }, { "epoch": 0.6519766688269605, "grad_norm": 0.8829612731933594, "learning_rate": 4.554435794887586e-06, "loss": 0.163, "step": 2012 }, { "epoch": 0.652300712896954, "grad_norm": 0.8887278437614441, "learning_rate": 4.553937305965539e-06, "loss": 0.1396, "step": 2013 }, { "epoch": 0.6526247569669476, "grad_norm": 0.9172626733779907, "learning_rate": 4.553438565661448e-06, "loss": 0.1656, "step": 2014 }, { "epoch": 0.652948801036941, "grad_norm": 0.8400613069534302, "learning_rate": 4.552939574036356e-06, "loss": 0.165, "step": 2015 }, { "epoch": 0.6532728451069345, "grad_norm": 0.879304826259613, "learning_rate": 4.552440331151334e-06, "loss": 0.1559, "step": 2016 }, { "epoch": 0.653596889176928, "grad_norm": 0.8935216069221497, "learning_rate": 4.551940837067486e-06, "loss": 0.164, "step": 2017 }, { "epoch": 0.6539209332469216, "grad_norm": 0.8760375380516052, "learning_rate": 4.551441091845942e-06, "loss": 0.1501, "step": 2018 }, { "epoch": 0.6542449773169151, "grad_norm": 0.8720047473907471, "learning_rate": 4.550941095547869e-06, "loss": 0.1545, "step": 2019 }, { "epoch": 0.6545690213869086, "grad_norm": 0.8929414749145508, "learning_rate": 4.55044084823446e-06, "loss": 0.1543, "step": 2020 }, { "epoch": 0.6548930654569022, "grad_norm": 0.9258971214294434, "learning_rate": 4.5499403499669415e-06, "loss": 0.164, "step": 2021 }, { "epoch": 0.6552171095268956, "grad_norm": 0.8609963655471802, "learning_rate": 4.549439600806568e-06, "loss": 0.1483, "step": 2022 }, { "epoch": 0.6555411535968891, "grad_norm": 0.9027585387229919, "learning_rate": 4.548938600814629e-06, "loss": 0.1656, "step": 2023 }, { "epoch": 0.6558651976668827, "grad_norm": 0.8795916438102722, "learning_rate": 4.5484373500524395e-06, "loss": 0.1619, "step": 2024 }, { "epoch": 0.6561892417368762, "grad_norm": 0.8832913041114807, "learning_rate": 4.547935848581349e-06, "loss": 0.1583, "step": 2025 }, { "epoch": 0.6565132858068697, "grad_norm": 0.8522735834121704, "learning_rate": 4.5474340964627365e-06, "loss": 0.1629, "step": 2026 }, { "epoch": 0.6568373298768633, "grad_norm": 0.8502964973449707, "learning_rate": 4.5469320937580105e-06, "loss": 0.1511, "step": 2027 }, { "epoch": 0.6571613739468568, "grad_norm": 0.8551455736160278, "learning_rate": 4.546429840528612e-06, "loss": 0.147, "step": 2028 }, { "epoch": 0.6574854180168503, "grad_norm": 0.9344658851623535, "learning_rate": 4.545927336836013e-06, "loss": 0.1699, "step": 2029 }, { "epoch": 0.6578094620868438, "grad_norm": 0.8650590777397156, "learning_rate": 4.545424582741714e-06, "loss": 0.1465, "step": 2030 }, { "epoch": 0.6581335061568373, "grad_norm": 0.9409444332122803, "learning_rate": 4.544921578307246e-06, "loss": 0.1746, "step": 2031 }, { "epoch": 0.6584575502268308, "grad_norm": 0.8694846630096436, "learning_rate": 4.544418323594175e-06, "loss": 0.1583, "step": 2032 }, { "epoch": 0.6587815942968244, "grad_norm": 0.9473041892051697, "learning_rate": 4.543914818664092e-06, "loss": 0.1662, "step": 2033 }, { "epoch": 0.6591056383668179, "grad_norm": 0.792140007019043, "learning_rate": 4.543411063578621e-06, "loss": 0.1422, "step": 2034 }, { "epoch": 0.6594296824368114, "grad_norm": 0.8943572640419006, "learning_rate": 4.5429070583994185e-06, "loss": 0.145, "step": 2035 }, { "epoch": 0.659753726506805, "grad_norm": 0.8899739384651184, "learning_rate": 4.542402803188168e-06, "loss": 0.1653, "step": 2036 }, { "epoch": 0.6600777705767984, "grad_norm": 0.8766211271286011, "learning_rate": 4.5418982980065874e-06, "loss": 0.1566, "step": 2037 }, { "epoch": 0.6604018146467919, "grad_norm": 0.8960506319999695, "learning_rate": 4.541393542916423e-06, "loss": 0.1631, "step": 2038 }, { "epoch": 0.6607258587167855, "grad_norm": 0.8128004670143127, "learning_rate": 4.540888537979449e-06, "loss": 0.1522, "step": 2039 }, { "epoch": 0.661049902786779, "grad_norm": 0.9594781398773193, "learning_rate": 4.540383283257477e-06, "loss": 0.1645, "step": 2040 }, { "epoch": 0.6613739468567725, "grad_norm": 0.9011057615280151, "learning_rate": 4.539877778812342e-06, "loss": 0.1463, "step": 2041 }, { "epoch": 0.6616979909267661, "grad_norm": 0.8882570862770081, "learning_rate": 4.539372024705916e-06, "loss": 0.1532, "step": 2042 }, { "epoch": 0.6620220349967596, "grad_norm": 0.8770557045936584, "learning_rate": 4.538866021000096e-06, "loss": 0.1573, "step": 2043 }, { "epoch": 0.662346079066753, "grad_norm": 0.9828415513038635, "learning_rate": 4.538359767756813e-06, "loss": 0.1709, "step": 2044 }, { "epoch": 0.6626701231367466, "grad_norm": 0.9431034326553345, "learning_rate": 4.537853265038027e-06, "loss": 0.1647, "step": 2045 }, { "epoch": 0.6629941672067401, "grad_norm": 0.9096002578735352, "learning_rate": 4.537346512905729e-06, "loss": 0.1665, "step": 2046 }, { "epoch": 0.6633182112767336, "grad_norm": 0.8507137298583984, "learning_rate": 4.536839511421941e-06, "loss": 0.1435, "step": 2047 }, { "epoch": 0.6636422553467272, "grad_norm": 0.9147508144378662, "learning_rate": 4.536332260648716e-06, "loss": 0.1585, "step": 2048 }, { "epoch": 0.6639662994167207, "grad_norm": 0.9033685326576233, "learning_rate": 4.535824760648135e-06, "loss": 0.161, "step": 2049 }, { "epoch": 0.6642903434867142, "grad_norm": 0.8964657783508301, "learning_rate": 4.535317011482311e-06, "loss": 0.1644, "step": 2050 }, { "epoch": 0.6646143875567078, "grad_norm": 0.8217459321022034, "learning_rate": 4.534809013213389e-06, "loss": 0.1467, "step": 2051 }, { "epoch": 0.6649384316267012, "grad_norm": 0.871848464012146, "learning_rate": 4.534300765903542e-06, "loss": 0.1422, "step": 2052 }, { "epoch": 0.6652624756966947, "grad_norm": 0.9146044254302979, "learning_rate": 4.533792269614974e-06, "loss": 0.1733, "step": 2053 }, { "epoch": 0.6655865197666883, "grad_norm": 0.8984795808792114, "learning_rate": 4.533283524409922e-06, "loss": 0.1619, "step": 2054 }, { "epoch": 0.6659105638366818, "grad_norm": 0.8733825087547302, "learning_rate": 4.53277453035065e-06, "loss": 0.1575, "step": 2055 }, { "epoch": 0.6662346079066753, "grad_norm": 0.8426579833030701, "learning_rate": 4.532265287499454e-06, "loss": 0.1419, "step": 2056 }, { "epoch": 0.6665586519766689, "grad_norm": 0.8925921320915222, "learning_rate": 4.531755795918661e-06, "loss": 0.1527, "step": 2057 }, { "epoch": 0.6668826960466624, "grad_norm": 0.9321977496147156, "learning_rate": 4.531246055670627e-06, "loss": 0.1731, "step": 2058 }, { "epoch": 0.6672067401166558, "grad_norm": 0.8613082766532898, "learning_rate": 4.53073606681774e-06, "loss": 0.1434, "step": 2059 }, { "epoch": 0.6675307841866494, "grad_norm": 0.906181275844574, "learning_rate": 4.530225829422418e-06, "loss": 0.1543, "step": 2060 }, { "epoch": 0.6678548282566429, "grad_norm": 0.9868515729904175, "learning_rate": 4.529715343547107e-06, "loss": 0.1832, "step": 2061 }, { "epoch": 0.6681788723266364, "grad_norm": 0.8776630163192749, "learning_rate": 4.5292046092542885e-06, "loss": 0.1487, "step": 2062 }, { "epoch": 0.66850291639663, "grad_norm": 0.8811656832695007, "learning_rate": 4.52869362660647e-06, "loss": 0.1584, "step": 2063 }, { "epoch": 0.6688269604666235, "grad_norm": 0.8803176879882812, "learning_rate": 4.5281823956661905e-06, "loss": 0.1545, "step": 2064 }, { "epoch": 0.669151004536617, "grad_norm": 0.8835181593894958, "learning_rate": 4.527670916496021e-06, "loss": 0.1579, "step": 2065 }, { "epoch": 0.6694750486066104, "grad_norm": 0.9012269973754883, "learning_rate": 4.52715918915856e-06, "loss": 0.16, "step": 2066 }, { "epoch": 0.669799092676604, "grad_norm": 0.8266863226890564, "learning_rate": 4.526647213716438e-06, "loss": 0.1454, "step": 2067 }, { "epoch": 0.6701231367465975, "grad_norm": 0.9318245649337769, "learning_rate": 4.526134990232317e-06, "loss": 0.1644, "step": 2068 }, { "epoch": 0.670447180816591, "grad_norm": 0.8861281275749207, "learning_rate": 4.525622518768888e-06, "loss": 0.1601, "step": 2069 }, { "epoch": 0.6707712248865846, "grad_norm": 0.8727645874023438, "learning_rate": 4.5251097993888726e-06, "loss": 0.1538, "step": 2070 }, { "epoch": 0.6710952689565781, "grad_norm": 0.8234463930130005, "learning_rate": 4.524596832155022e-06, "loss": 0.1543, "step": 2071 }, { "epoch": 0.6714193130265717, "grad_norm": 0.8433378338813782, "learning_rate": 4.524083617130118e-06, "loss": 0.1475, "step": 2072 }, { "epoch": 0.6717433570965652, "grad_norm": 0.9181938171386719, "learning_rate": 4.523570154376975e-06, "loss": 0.1601, "step": 2073 }, { "epoch": 0.6720674011665586, "grad_norm": 0.8135218620300293, "learning_rate": 4.5230564439584335e-06, "loss": 0.1459, "step": 2074 }, { "epoch": 0.6723914452365521, "grad_norm": 0.8569528460502625, "learning_rate": 4.522542485937369e-06, "loss": 0.152, "step": 2075 }, { "epoch": 0.6727154893065457, "grad_norm": 0.875869870185852, "learning_rate": 4.522028280376683e-06, "loss": 0.1576, "step": 2076 }, { "epoch": 0.6730395333765392, "grad_norm": 0.9111397862434387, "learning_rate": 4.521513827339311e-06, "loss": 0.1613, "step": 2077 }, { "epoch": 0.6733635774465327, "grad_norm": 0.9533864855766296, "learning_rate": 4.5209991268882165e-06, "loss": 0.1723, "step": 2078 }, { "epoch": 0.6736876215165263, "grad_norm": 0.9166319966316223, "learning_rate": 4.520484179086394e-06, "loss": 0.1597, "step": 2079 }, { "epoch": 0.6740116655865198, "grad_norm": 0.8753485083580017, "learning_rate": 4.519968983996867e-06, "loss": 0.1572, "step": 2080 }, { "epoch": 0.6743357096565132, "grad_norm": 0.9850584864616394, "learning_rate": 4.519453541682691e-06, "loss": 0.1632, "step": 2081 }, { "epoch": 0.6746597537265068, "grad_norm": 0.8764849901199341, "learning_rate": 4.518937852206952e-06, "loss": 0.1594, "step": 2082 }, { "epoch": 0.6749837977965003, "grad_norm": 0.8675936460494995, "learning_rate": 4.518421915632764e-06, "loss": 0.1493, "step": 2083 }, { "epoch": 0.6753078418664938, "grad_norm": 0.8667171001434326, "learning_rate": 4.5179057320232735e-06, "loss": 0.134, "step": 2084 }, { "epoch": 0.6756318859364874, "grad_norm": 0.8813750147819519, "learning_rate": 4.517389301441657e-06, "loss": 0.1566, "step": 2085 }, { "epoch": 0.6759559300064809, "grad_norm": 0.8764129281044006, "learning_rate": 4.51687262395112e-06, "loss": 0.15, "step": 2086 }, { "epoch": 0.6762799740764744, "grad_norm": 0.8690442442893982, "learning_rate": 4.516355699614897e-06, "loss": 0.1537, "step": 2087 }, { "epoch": 0.6766040181464679, "grad_norm": 0.9069793224334717, "learning_rate": 4.515838528496257e-06, "loss": 0.1676, "step": 2088 }, { "epoch": 0.6769280622164614, "grad_norm": 0.8847238421440125, "learning_rate": 4.5153211106584965e-06, "loss": 0.1581, "step": 2089 }, { "epoch": 0.6772521062864549, "grad_norm": 0.8980826139450073, "learning_rate": 4.514803446164941e-06, "loss": 0.1544, "step": 2090 }, { "epoch": 0.6775761503564485, "grad_norm": 0.9628041386604309, "learning_rate": 4.514285535078949e-06, "loss": 0.1632, "step": 2091 }, { "epoch": 0.677900194426442, "grad_norm": 0.8706278204917908, "learning_rate": 4.513767377463908e-06, "loss": 0.1526, "step": 2092 }, { "epoch": 0.6782242384964355, "grad_norm": 0.8483056426048279, "learning_rate": 4.513248973383234e-06, "loss": 0.1558, "step": 2093 }, { "epoch": 0.6785482825664291, "grad_norm": 0.9565305709838867, "learning_rate": 4.512730322900375e-06, "loss": 0.1809, "step": 2094 }, { "epoch": 0.6788723266364226, "grad_norm": 0.9398934841156006, "learning_rate": 4.51221142607881e-06, "loss": 0.1675, "step": 2095 }, { "epoch": 0.679196370706416, "grad_norm": 0.821510910987854, "learning_rate": 4.511692282982047e-06, "loss": 0.1437, "step": 2096 }, { "epoch": 0.6795204147764096, "grad_norm": 0.8104325532913208, "learning_rate": 4.511172893673621e-06, "loss": 0.1493, "step": 2097 }, { "epoch": 0.6798444588464031, "grad_norm": 0.8659233450889587, "learning_rate": 4.510653258217103e-06, "loss": 0.162, "step": 2098 }, { "epoch": 0.6801685029163966, "grad_norm": 0.8639686703681946, "learning_rate": 4.5101333766760926e-06, "loss": 0.1578, "step": 2099 }, { "epoch": 0.6804925469863902, "grad_norm": 0.8878895044326782, "learning_rate": 4.509613249114215e-06, "loss": 0.1631, "step": 2100 }, { "epoch": 0.6808165910563837, "grad_norm": 0.8527207970619202, "learning_rate": 4.509092875595131e-06, "loss": 0.1542, "step": 2101 }, { "epoch": 0.6811406351263772, "grad_norm": 0.8527224659919739, "learning_rate": 4.508572256182528e-06, "loss": 0.1533, "step": 2102 }, { "epoch": 0.6814646791963707, "grad_norm": 0.840473473072052, "learning_rate": 4.508051390940125e-06, "loss": 0.1435, "step": 2103 }, { "epoch": 0.6817887232663642, "grad_norm": 0.8408612012863159, "learning_rate": 4.507530279931673e-06, "loss": 0.1476, "step": 2104 }, { "epoch": 0.6821127673363577, "grad_norm": 1.008131980895996, "learning_rate": 4.5070089232209465e-06, "loss": 0.1717, "step": 2105 }, { "epoch": 0.6824368114063513, "grad_norm": 0.8935782313346863, "learning_rate": 4.506487320871758e-06, "loss": 0.1552, "step": 2106 }, { "epoch": 0.6827608554763448, "grad_norm": 0.8052367568016052, "learning_rate": 4.5059654729479474e-06, "loss": 0.1458, "step": 2107 }, { "epoch": 0.6830848995463383, "grad_norm": 0.9380152225494385, "learning_rate": 4.505443379513381e-06, "loss": 0.16, "step": 2108 }, { "epoch": 0.6834089436163319, "grad_norm": 0.9281724095344543, "learning_rate": 4.5049210406319585e-06, "loss": 0.1549, "step": 2109 }, { "epoch": 0.6837329876863253, "grad_norm": 0.8564958572387695, "learning_rate": 4.5043984563676105e-06, "loss": 0.1492, "step": 2110 }, { "epoch": 0.6840570317563188, "grad_norm": 0.9268948435783386, "learning_rate": 4.503875626784295e-06, "loss": 0.1694, "step": 2111 }, { "epoch": 0.6843810758263124, "grad_norm": 0.8347535133361816, "learning_rate": 4.503352551946003e-06, "loss": 0.1386, "step": 2112 }, { "epoch": 0.6847051198963059, "grad_norm": 1.0070157051086426, "learning_rate": 4.5028292319167515e-06, "loss": 0.1743, "step": 2113 }, { "epoch": 0.6850291639662994, "grad_norm": 0.9377530813217163, "learning_rate": 4.502305666760592e-06, "loss": 0.159, "step": 2114 }, { "epoch": 0.685353208036293, "grad_norm": 0.869188666343689, "learning_rate": 4.501781856541601e-06, "loss": 0.1466, "step": 2115 }, { "epoch": 0.6856772521062865, "grad_norm": 0.8491179943084717, "learning_rate": 4.50125780132389e-06, "loss": 0.1486, "step": 2116 }, { "epoch": 0.68600129617628, "grad_norm": 0.9667462110519409, "learning_rate": 4.500733501171599e-06, "loss": 0.1893, "step": 2117 }, { "epoch": 0.6863253402462735, "grad_norm": 0.8738124370574951, "learning_rate": 4.500208956148895e-06, "loss": 0.1741, "step": 2118 }, { "epoch": 0.686649384316267, "grad_norm": 0.8235524892807007, "learning_rate": 4.499684166319978e-06, "loss": 0.153, "step": 2119 }, { "epoch": 0.6869734283862605, "grad_norm": 0.8553236722946167, "learning_rate": 4.499159131749079e-06, "loss": 0.149, "step": 2120 }, { "epoch": 0.687297472456254, "grad_norm": 0.9009736180305481, "learning_rate": 4.498633852500455e-06, "loss": 0.1642, "step": 2121 }, { "epoch": 0.6876215165262476, "grad_norm": 0.9023570418357849, "learning_rate": 4.498108328638395e-06, "loss": 0.1455, "step": 2122 }, { "epoch": 0.6879455605962411, "grad_norm": 0.9002334475517273, "learning_rate": 4.4975825602272185e-06, "loss": 0.1607, "step": 2123 }, { "epoch": 0.6882696046662347, "grad_norm": 0.9422554969787598, "learning_rate": 4.497056547331276e-06, "loss": 0.1688, "step": 2124 }, { "epoch": 0.6885936487362281, "grad_norm": 0.8739017248153687, "learning_rate": 4.496530290014945e-06, "loss": 0.1644, "step": 2125 }, { "epoch": 0.6889176928062216, "grad_norm": 0.9366029500961304, "learning_rate": 4.496003788342633e-06, "loss": 0.1601, "step": 2126 }, { "epoch": 0.6892417368762151, "grad_norm": 0.899392306804657, "learning_rate": 4.495477042378781e-06, "loss": 0.1522, "step": 2127 }, { "epoch": 0.6895657809462087, "grad_norm": 0.8177936673164368, "learning_rate": 4.494950052187857e-06, "loss": 0.1438, "step": 2128 }, { "epoch": 0.6898898250162022, "grad_norm": 0.857815682888031, "learning_rate": 4.494422817834359e-06, "loss": 0.1548, "step": 2129 }, { "epoch": 0.6902138690861958, "grad_norm": 0.8262993097305298, "learning_rate": 4.493895339382815e-06, "loss": 0.1498, "step": 2130 }, { "epoch": 0.6905379131561893, "grad_norm": 0.8081302046775818, "learning_rate": 4.493367616897785e-06, "loss": 0.148, "step": 2131 }, { "epoch": 0.6908619572261827, "grad_norm": 0.8847200870513916, "learning_rate": 4.4928396504438555e-06, "loss": 0.1638, "step": 2132 }, { "epoch": 0.6911860012961762, "grad_norm": 0.8429677486419678, "learning_rate": 4.4923114400856445e-06, "loss": 0.1586, "step": 2133 }, { "epoch": 0.6915100453661698, "grad_norm": 0.9239067435264587, "learning_rate": 4.491782985887802e-06, "loss": 0.1628, "step": 2134 }, { "epoch": 0.6918340894361633, "grad_norm": 0.8306019902229309, "learning_rate": 4.491254287915003e-06, "loss": 0.1402, "step": 2135 }, { "epoch": 0.6921581335061568, "grad_norm": 0.9087281823158264, "learning_rate": 4.490725346231954e-06, "loss": 0.168, "step": 2136 }, { "epoch": 0.6924821775761504, "grad_norm": 0.8837271928787231, "learning_rate": 4.4901961609033965e-06, "loss": 0.1625, "step": 2137 }, { "epoch": 0.6928062216461439, "grad_norm": 0.8362112045288086, "learning_rate": 4.489666731994095e-06, "loss": 0.1521, "step": 2138 }, { "epoch": 0.6931302657161373, "grad_norm": 0.8731306195259094, "learning_rate": 4.489137059568847e-06, "loss": 0.1504, "step": 2139 }, { "epoch": 0.6934543097861309, "grad_norm": 0.9544700980186462, "learning_rate": 4.48860714369248e-06, "loss": 0.1722, "step": 2140 }, { "epoch": 0.6937783538561244, "grad_norm": 0.8797779083251953, "learning_rate": 4.488076984429849e-06, "loss": 0.1574, "step": 2141 }, { "epoch": 0.6941023979261179, "grad_norm": 0.8555868864059448, "learning_rate": 4.4875465818458404e-06, "loss": 0.1483, "step": 2142 }, { "epoch": 0.6944264419961115, "grad_norm": 0.9178819060325623, "learning_rate": 4.4870159360053725e-06, "loss": 0.1646, "step": 2143 }, { "epoch": 0.694750486066105, "grad_norm": 0.8088469505310059, "learning_rate": 4.4864850469733886e-06, "loss": 0.1365, "step": 2144 }, { "epoch": 0.6950745301360985, "grad_norm": 0.9104022979736328, "learning_rate": 4.485953914814867e-06, "loss": 0.1477, "step": 2145 }, { "epoch": 0.6953985742060921, "grad_norm": 0.9033350348472595, "learning_rate": 4.485422539594811e-06, "loss": 0.1532, "step": 2146 }, { "epoch": 0.6957226182760855, "grad_norm": 0.9154496192932129, "learning_rate": 4.4848909213782566e-06, "loss": 0.1596, "step": 2147 }, { "epoch": 0.696046662346079, "grad_norm": 0.8063889741897583, "learning_rate": 4.484359060230269e-06, "loss": 0.1341, "step": 2148 }, { "epoch": 0.6963707064160726, "grad_norm": 0.8335157632827759, "learning_rate": 4.483826956215942e-06, "loss": 0.1493, "step": 2149 }, { "epoch": 0.6966947504860661, "grad_norm": 0.8637626767158508, "learning_rate": 4.4832946094004e-06, "loss": 0.1513, "step": 2150 }, { "epoch": 0.6970187945560596, "grad_norm": 0.9334248900413513, "learning_rate": 4.482762019848799e-06, "loss": 0.1634, "step": 2151 }, { "epoch": 0.6973428386260532, "grad_norm": 0.8626821041107178, "learning_rate": 4.48222918762632e-06, "loss": 0.1487, "step": 2152 }, { "epoch": 0.6976668826960467, "grad_norm": 0.9040731191635132, "learning_rate": 4.481696112798179e-06, "loss": 0.158, "step": 2153 }, { "epoch": 0.6979909267660401, "grad_norm": 0.8181210160255432, "learning_rate": 4.481162795429618e-06, "loss": 0.147, "step": 2154 }, { "epoch": 0.6983149708360337, "grad_norm": 0.9151508808135986, "learning_rate": 4.480629235585909e-06, "loss": 0.1527, "step": 2155 }, { "epoch": 0.6986390149060272, "grad_norm": 0.909498929977417, "learning_rate": 4.480095433332357e-06, "loss": 0.1604, "step": 2156 }, { "epoch": 0.6989630589760207, "grad_norm": 0.9044917821884155, "learning_rate": 4.4795613887342916e-06, "loss": 0.1577, "step": 2157 }, { "epoch": 0.6992871030460143, "grad_norm": 0.8315892219543457, "learning_rate": 4.479027101857076e-06, "loss": 0.1459, "step": 2158 }, { "epoch": 0.6996111471160078, "grad_norm": 0.9254205822944641, "learning_rate": 4.4784925727661025e-06, "loss": 0.1633, "step": 2159 }, { "epoch": 0.6999351911860013, "grad_norm": 0.9383320808410645, "learning_rate": 4.47795780152679e-06, "loss": 0.1617, "step": 2160 }, { "epoch": 0.7002592352559948, "grad_norm": 0.8765949606895447, "learning_rate": 4.477422788204592e-06, "loss": 0.1571, "step": 2161 }, { "epoch": 0.7005832793259883, "grad_norm": 0.8948960900306702, "learning_rate": 4.476887532864986e-06, "loss": 0.1367, "step": 2162 }, { "epoch": 0.7009073233959818, "grad_norm": 0.9318114519119263, "learning_rate": 4.476352035573486e-06, "loss": 0.1636, "step": 2163 }, { "epoch": 0.7012313674659754, "grad_norm": 0.8219728469848633, "learning_rate": 4.475816296395627e-06, "loss": 0.1455, "step": 2164 }, { "epoch": 0.7015554115359689, "grad_norm": 0.952003538608551, "learning_rate": 4.475280315396982e-06, "loss": 0.1677, "step": 2165 }, { "epoch": 0.7018794556059624, "grad_norm": 0.9319960474967957, "learning_rate": 4.474744092643149e-06, "loss": 0.157, "step": 2166 }, { "epoch": 0.702203499675956, "grad_norm": 0.8745998740196228, "learning_rate": 4.474207628199756e-06, "loss": 0.1497, "step": 2167 }, { "epoch": 0.7025275437459495, "grad_norm": 0.8776024580001831, "learning_rate": 4.47367092213246e-06, "loss": 0.152, "step": 2168 }, { "epoch": 0.7028515878159429, "grad_norm": 0.845687985420227, "learning_rate": 4.473133974506951e-06, "loss": 0.1473, "step": 2169 }, { "epoch": 0.7031756318859365, "grad_norm": 0.9918162226676941, "learning_rate": 4.472596785388944e-06, "loss": 0.1871, "step": 2170 }, { "epoch": 0.70349967595593, "grad_norm": 0.9364629983901978, "learning_rate": 4.472059354844187e-06, "loss": 0.16, "step": 2171 }, { "epoch": 0.7038237200259235, "grad_norm": 0.8628015518188477, "learning_rate": 4.4715216829384566e-06, "loss": 0.1676, "step": 2172 }, { "epoch": 0.7041477640959171, "grad_norm": 0.9118626713752747, "learning_rate": 4.470983769737557e-06, "loss": 0.1758, "step": 2173 }, { "epoch": 0.7044718081659106, "grad_norm": 0.9084286689758301, "learning_rate": 4.470445615307325e-06, "loss": 0.1684, "step": 2174 }, { "epoch": 0.7047958522359041, "grad_norm": 0.8909494280815125, "learning_rate": 4.4699072197136255e-06, "loss": 0.1707, "step": 2175 }, { "epoch": 0.7051198963058976, "grad_norm": 0.9201353192329407, "learning_rate": 4.469368583022352e-06, "loss": 0.167, "step": 2176 }, { "epoch": 0.7054439403758911, "grad_norm": 0.9684481024742126, "learning_rate": 4.468829705299429e-06, "loss": 0.1635, "step": 2177 }, { "epoch": 0.7057679844458846, "grad_norm": 0.9111968278884888, "learning_rate": 4.4682905866108094e-06, "loss": 0.1672, "step": 2178 }, { "epoch": 0.7060920285158782, "grad_norm": 0.9797288775444031, "learning_rate": 4.467751227022478e-06, "loss": 0.1512, "step": 2179 }, { "epoch": 0.7064160725858717, "grad_norm": 0.8910669088363647, "learning_rate": 4.467211626600444e-06, "loss": 0.1626, "step": 2180 }, { "epoch": 0.7067401166558652, "grad_norm": 0.8507121205329895, "learning_rate": 4.466671785410752e-06, "loss": 0.1566, "step": 2181 }, { "epoch": 0.7070641607258588, "grad_norm": 0.8694670796394348, "learning_rate": 4.4661317035194716e-06, "loss": 0.155, "step": 2182 }, { "epoch": 0.7073882047958522, "grad_norm": 0.9297208189964294, "learning_rate": 4.4655913809927045e-06, "loss": 0.1554, "step": 2183 }, { "epoch": 0.7077122488658457, "grad_norm": 1.0536330938339233, "learning_rate": 4.4650508178965814e-06, "loss": 0.1626, "step": 2184 }, { "epoch": 0.7080362929358393, "grad_norm": 0.979632556438446, "learning_rate": 4.464510014297261e-06, "loss": 0.1503, "step": 2185 }, { "epoch": 0.7083603370058328, "grad_norm": 0.869288980960846, "learning_rate": 4.4639689702609326e-06, "loss": 0.1545, "step": 2186 }, { "epoch": 0.7086843810758263, "grad_norm": 0.9722819328308105, "learning_rate": 4.463427685853815e-06, "loss": 0.1575, "step": 2187 }, { "epoch": 0.7090084251458199, "grad_norm": 0.958224892616272, "learning_rate": 4.462886161142157e-06, "loss": 0.1641, "step": 2188 }, { "epoch": 0.7093324692158134, "grad_norm": 0.9742621183395386, "learning_rate": 4.4623443961922334e-06, "loss": 0.1651, "step": 2189 }, { "epoch": 0.7096565132858069, "grad_norm": 0.92420494556427, "learning_rate": 4.461802391070354e-06, "loss": 0.1536, "step": 2190 }, { "epoch": 0.7099805573558003, "grad_norm": 0.9209905862808228, "learning_rate": 4.4612601458428525e-06, "loss": 0.153, "step": 2191 }, { "epoch": 0.7103046014257939, "grad_norm": 1.007217526435852, "learning_rate": 4.460717660576097e-06, "loss": 0.1683, "step": 2192 }, { "epoch": 0.7106286454957874, "grad_norm": 0.8647215366363525, "learning_rate": 4.46017493533648e-06, "loss": 0.153, "step": 2193 }, { "epoch": 0.710952689565781, "grad_norm": 0.9210792183876038, "learning_rate": 4.459631970190428e-06, "loss": 0.1606, "step": 2194 }, { "epoch": 0.7112767336357745, "grad_norm": 0.8727303743362427, "learning_rate": 4.4590887652043925e-06, "loss": 0.1507, "step": 2195 }, { "epoch": 0.711600777705768, "grad_norm": 1.004441261291504, "learning_rate": 4.458545320444857e-06, "loss": 0.1669, "step": 2196 }, { "epoch": 0.7119248217757616, "grad_norm": 0.9051446914672852, "learning_rate": 4.458001635978335e-06, "loss": 0.1516, "step": 2197 }, { "epoch": 0.712248865845755, "grad_norm": 0.879884660243988, "learning_rate": 4.457457711871369e-06, "loss": 0.1618, "step": 2198 }, { "epoch": 0.7125729099157485, "grad_norm": 0.9076919555664062, "learning_rate": 4.4569135481905274e-06, "loss": 0.1543, "step": 2199 }, { "epoch": 0.712896953985742, "grad_norm": 0.8741816878318787, "learning_rate": 4.456369145002412e-06, "loss": 0.1498, "step": 2200 }, { "epoch": 0.7132209980557356, "grad_norm": 0.9390980005264282, "learning_rate": 4.455824502373653e-06, "loss": 0.1487, "step": 2201 }, { "epoch": 0.7135450421257291, "grad_norm": 0.8313080072402954, "learning_rate": 4.455279620370908e-06, "loss": 0.1516, "step": 2202 }, { "epoch": 0.7138690861957226, "grad_norm": 0.8874267339706421, "learning_rate": 4.454734499060867e-06, "loss": 0.161, "step": 2203 }, { "epoch": 0.7141931302657162, "grad_norm": 0.8838033676147461, "learning_rate": 4.454189138510246e-06, "loss": 0.1545, "step": 2204 }, { "epoch": 0.7145171743357096, "grad_norm": 1.1029689311981201, "learning_rate": 4.453643538785793e-06, "loss": 0.1689, "step": 2205 }, { "epoch": 0.7148412184057031, "grad_norm": 0.9252147674560547, "learning_rate": 4.453097699954282e-06, "loss": 0.1704, "step": 2206 }, { "epoch": 0.7151652624756967, "grad_norm": 0.8694695234298706, "learning_rate": 4.452551622082522e-06, "loss": 0.1456, "step": 2207 }, { "epoch": 0.7154893065456902, "grad_norm": 1.0107353925704956, "learning_rate": 4.452005305237344e-06, "loss": 0.1657, "step": 2208 }, { "epoch": 0.7158133506156837, "grad_norm": 0.841675877571106, "learning_rate": 4.451458749485614e-06, "loss": 0.1541, "step": 2209 }, { "epoch": 0.7161373946856773, "grad_norm": 0.9358766674995422, "learning_rate": 4.4509119548942245e-06, "loss": 0.1616, "step": 2210 }, { "epoch": 0.7164614387556708, "grad_norm": 0.8442674875259399, "learning_rate": 4.450364921530099e-06, "loss": 0.155, "step": 2211 }, { "epoch": 0.7167854828256643, "grad_norm": 0.9010113477706909, "learning_rate": 4.449817649460187e-06, "loss": 0.1539, "step": 2212 }, { "epoch": 0.7171095268956578, "grad_norm": 0.8915073275566101, "learning_rate": 4.449270138751471e-06, "loss": 0.1473, "step": 2213 }, { "epoch": 0.7174335709656513, "grad_norm": 0.9104732275009155, "learning_rate": 4.4487223894709606e-06, "loss": 0.1461, "step": 2214 }, { "epoch": 0.7177576150356448, "grad_norm": 0.8660483956336975, "learning_rate": 4.448174401685694e-06, "loss": 0.1528, "step": 2215 }, { "epoch": 0.7180816591056384, "grad_norm": 0.897094190120697, "learning_rate": 4.447626175462741e-06, "loss": 0.1566, "step": 2216 }, { "epoch": 0.7184057031756319, "grad_norm": 0.9597735404968262, "learning_rate": 4.447077710869199e-06, "loss": 0.1585, "step": 2217 }, { "epoch": 0.7187297472456254, "grad_norm": 0.880084753036499, "learning_rate": 4.4465290079721935e-06, "loss": 0.1567, "step": 2218 }, { "epoch": 0.719053791315619, "grad_norm": 0.8820602297782898, "learning_rate": 4.445980066838882e-06, "loss": 0.1564, "step": 2219 }, { "epoch": 0.7193778353856124, "grad_norm": 0.9745096564292908, "learning_rate": 4.4454308875364486e-06, "loss": 0.1735, "step": 2220 }, { "epoch": 0.7197018794556059, "grad_norm": 0.8503068089485168, "learning_rate": 4.444881470132108e-06, "loss": 0.1488, "step": 2221 }, { "epoch": 0.7200259235255995, "grad_norm": 0.8765119314193726, "learning_rate": 4.444331814693103e-06, "loss": 0.1539, "step": 2222 }, { "epoch": 0.720349967595593, "grad_norm": 0.8700873255729675, "learning_rate": 4.443781921286706e-06, "loss": 0.1485, "step": 2223 }, { "epoch": 0.7206740116655865, "grad_norm": 0.8490604162216187, "learning_rate": 4.4432317899802205e-06, "loss": 0.157, "step": 2224 }, { "epoch": 0.7209980557355801, "grad_norm": 0.8531603813171387, "learning_rate": 4.442681420840974e-06, "loss": 0.1606, "step": 2225 }, { "epoch": 0.7213220998055736, "grad_norm": 0.8722713589668274, "learning_rate": 4.44213081393633e-06, "loss": 0.1389, "step": 2226 }, { "epoch": 0.721646143875567, "grad_norm": 0.9597254991531372, "learning_rate": 4.441579969333675e-06, "loss": 0.1544, "step": 2227 }, { "epoch": 0.7219701879455606, "grad_norm": 0.896874725818634, "learning_rate": 4.441028887100427e-06, "loss": 0.1721, "step": 2228 }, { "epoch": 0.7222942320155541, "grad_norm": 0.8581936955451965, "learning_rate": 4.4404775673040346e-06, "loss": 0.159, "step": 2229 }, { "epoch": 0.7226182760855476, "grad_norm": 0.8402219414710999, "learning_rate": 4.4399260100119726e-06, "loss": 0.1543, "step": 2230 }, { "epoch": 0.7229423201555412, "grad_norm": 0.9325534105300903, "learning_rate": 4.439374215291748e-06, "loss": 0.1589, "step": 2231 }, { "epoch": 0.7232663642255347, "grad_norm": 0.805816650390625, "learning_rate": 4.438822183210894e-06, "loss": 0.1422, "step": 2232 }, { "epoch": 0.7235904082955282, "grad_norm": 0.8733918070793152, "learning_rate": 4.438269913836972e-06, "loss": 0.1603, "step": 2233 }, { "epoch": 0.7239144523655218, "grad_norm": 0.9027920961380005, "learning_rate": 4.437717407237578e-06, "loss": 0.1584, "step": 2234 }, { "epoch": 0.7242384964355152, "grad_norm": 0.8571335077285767, "learning_rate": 4.437164663480332e-06, "loss": 0.1588, "step": 2235 }, { "epoch": 0.7245625405055087, "grad_norm": 0.8303107619285583, "learning_rate": 4.436611682632884e-06, "loss": 0.153, "step": 2236 }, { "epoch": 0.7248865845755023, "grad_norm": 0.8509790897369385, "learning_rate": 4.436058464762915e-06, "loss": 0.1602, "step": 2237 }, { "epoch": 0.7252106286454958, "grad_norm": 0.8622452616691589, "learning_rate": 4.435505009938131e-06, "loss": 0.1532, "step": 2238 }, { "epoch": 0.7255346727154893, "grad_norm": 0.8336579203605652, "learning_rate": 4.434951318226272e-06, "loss": 0.1349, "step": 2239 }, { "epoch": 0.7258587167854829, "grad_norm": 0.9228907227516174, "learning_rate": 4.434397389695102e-06, "loss": 0.1675, "step": 2240 }, { "epoch": 0.7261827608554764, "grad_norm": 0.8288503289222717, "learning_rate": 4.433843224412419e-06, "loss": 0.1423, "step": 2241 }, { "epoch": 0.7265068049254698, "grad_norm": 0.9091185927391052, "learning_rate": 4.4332888224460466e-06, "loss": 0.1498, "step": 2242 }, { "epoch": 0.7268308489954634, "grad_norm": 0.8939113616943359, "learning_rate": 4.432734183863837e-06, "loss": 0.152, "step": 2243 }, { "epoch": 0.7271548930654569, "grad_norm": 0.8943285942077637, "learning_rate": 4.432179308733674e-06, "loss": 0.1574, "step": 2244 }, { "epoch": 0.7274789371354504, "grad_norm": 0.8595091104507446, "learning_rate": 4.43162419712347e-06, "loss": 0.1569, "step": 2245 }, { "epoch": 0.727802981205444, "grad_norm": 0.874617874622345, "learning_rate": 4.431068849101162e-06, "loss": 0.1532, "step": 2246 }, { "epoch": 0.7281270252754375, "grad_norm": 0.9029631018638611, "learning_rate": 4.4305132647347215e-06, "loss": 0.1656, "step": 2247 }, { "epoch": 0.728451069345431, "grad_norm": 0.9923983812332153, "learning_rate": 4.429957444092146e-06, "loss": 0.1887, "step": 2248 }, { "epoch": 0.7287751134154244, "grad_norm": 0.8730635046958923, "learning_rate": 4.429401387241464e-06, "loss": 0.1687, "step": 2249 }, { "epoch": 0.729099157485418, "grad_norm": 0.9126926064491272, "learning_rate": 4.428845094250729e-06, "loss": 0.1624, "step": 2250 }, { "epoch": 0.7294232015554115, "grad_norm": 0.8735573887825012, "learning_rate": 4.428288565188028e-06, "loss": 0.1564, "step": 2251 }, { "epoch": 0.729747245625405, "grad_norm": 0.8695078492164612, "learning_rate": 4.427731800121473e-06, "loss": 0.1528, "step": 2252 }, { "epoch": 0.7300712896953986, "grad_norm": 0.8787115812301636, "learning_rate": 4.427174799119208e-06, "loss": 0.156, "step": 2253 }, { "epoch": 0.7303953337653921, "grad_norm": 0.8693944215774536, "learning_rate": 4.426617562249405e-06, "loss": 0.1605, "step": 2254 }, { "epoch": 0.7307193778353857, "grad_norm": 0.9276489019393921, "learning_rate": 4.426060089580262e-06, "loss": 0.1643, "step": 2255 }, { "epoch": 0.7310434219053791, "grad_norm": 0.8228742480278015, "learning_rate": 4.42550238118001e-06, "loss": 0.1482, "step": 2256 }, { "epoch": 0.7313674659753726, "grad_norm": 0.9286083579063416, "learning_rate": 4.424944437116907e-06, "loss": 0.1643, "step": 2257 }, { "epoch": 0.7316915100453661, "grad_norm": 0.8886469602584839, "learning_rate": 4.424386257459241e-06, "loss": 0.169, "step": 2258 }, { "epoch": 0.7320155541153597, "grad_norm": 0.9932120442390442, "learning_rate": 4.423827842275325e-06, "loss": 0.1628, "step": 2259 }, { "epoch": 0.7323395981853532, "grad_norm": 0.8697348237037659, "learning_rate": 4.4232691916335055e-06, "loss": 0.1387, "step": 2260 }, { "epoch": 0.7326636422553467, "grad_norm": 0.9891642928123474, "learning_rate": 4.422710305602156e-06, "loss": 0.1627, "step": 2261 }, { "epoch": 0.7329876863253403, "grad_norm": 0.8648738861083984, "learning_rate": 4.422151184249679e-06, "loss": 0.1644, "step": 2262 }, { "epoch": 0.7333117303953338, "grad_norm": 0.8476166129112244, "learning_rate": 4.421591827644503e-06, "loss": 0.1588, "step": 2263 }, { "epoch": 0.7336357744653272, "grad_norm": 0.9727128744125366, "learning_rate": 4.4210322358550915e-06, "loss": 0.167, "step": 2264 }, { "epoch": 0.7339598185353208, "grad_norm": 0.8248828649520874, "learning_rate": 4.420472408949931e-06, "loss": 0.141, "step": 2265 }, { "epoch": 0.7342838626053143, "grad_norm": 0.8558880090713501, "learning_rate": 4.419912346997539e-06, "loss": 0.1369, "step": 2266 }, { "epoch": 0.7346079066753078, "grad_norm": 0.9087838530540466, "learning_rate": 4.419352050066462e-06, "loss": 0.1654, "step": 2267 }, { "epoch": 0.7349319507453014, "grad_norm": 0.8426334857940674, "learning_rate": 4.418791518225275e-06, "loss": 0.1451, "step": 2268 }, { "epoch": 0.7352559948152949, "grad_norm": 0.8864561915397644, "learning_rate": 4.418230751542581e-06, "loss": 0.1666, "step": 2269 }, { "epoch": 0.7355800388852884, "grad_norm": 0.8837295174598694, "learning_rate": 4.417669750087014e-06, "loss": 0.1488, "step": 2270 }, { "epoch": 0.7359040829552819, "grad_norm": 0.8623096942901611, "learning_rate": 4.417108513927233e-06, "loss": 0.1403, "step": 2271 }, { "epoch": 0.7362281270252754, "grad_norm": 0.841560423374176, "learning_rate": 4.416547043131929e-06, "loss": 0.1416, "step": 2272 }, { "epoch": 0.7365521710952689, "grad_norm": 0.935539960861206, "learning_rate": 4.41598533776982e-06, "loss": 0.1668, "step": 2273 }, { "epoch": 0.7368762151652625, "grad_norm": 0.8458617329597473, "learning_rate": 4.415423397909655e-06, "loss": 0.1599, "step": 2274 }, { "epoch": 0.737200259235256, "grad_norm": 0.7807754278182983, "learning_rate": 4.414861223620209e-06, "loss": 0.1348, "step": 2275 }, { "epoch": 0.7375243033052495, "grad_norm": 0.8110257983207703, "learning_rate": 4.414298814970286e-06, "loss": 0.1532, "step": 2276 }, { "epoch": 0.7378483473752431, "grad_norm": 0.8469443917274475, "learning_rate": 4.41373617202872e-06, "loss": 0.15, "step": 2277 }, { "epoch": 0.7381723914452365, "grad_norm": 0.8829174041748047, "learning_rate": 4.413173294864373e-06, "loss": 0.1585, "step": 2278 }, { "epoch": 0.73849643551523, "grad_norm": 0.7983418703079224, "learning_rate": 4.412610183546135e-06, "loss": 0.1429, "step": 2279 }, { "epoch": 0.7388204795852236, "grad_norm": 0.8902999758720398, "learning_rate": 4.412046838142927e-06, "loss": 0.1519, "step": 2280 }, { "epoch": 0.7391445236552171, "grad_norm": 0.8876487612724304, "learning_rate": 4.411483258723695e-06, "loss": 0.155, "step": 2281 }, { "epoch": 0.7394685677252106, "grad_norm": 0.8737650513648987, "learning_rate": 4.410919445357418e-06, "loss": 0.1547, "step": 2282 }, { "epoch": 0.7397926117952042, "grad_norm": 0.8292451500892639, "learning_rate": 4.410355398113099e-06, "loss": 0.1442, "step": 2283 }, { "epoch": 0.7401166558651977, "grad_norm": 0.8985834121704102, "learning_rate": 4.409791117059773e-06, "loss": 0.1526, "step": 2284 }, { "epoch": 0.7404406999351912, "grad_norm": 0.841582715511322, "learning_rate": 4.409226602266503e-06, "loss": 0.1523, "step": 2285 }, { "epoch": 0.7407647440051847, "grad_norm": 0.8735675811767578, "learning_rate": 4.408661853802379e-06, "loss": 0.1638, "step": 2286 }, { "epoch": 0.7410887880751782, "grad_norm": 0.9003371596336365, "learning_rate": 4.408096871736522e-06, "loss": 0.1668, "step": 2287 }, { "epoch": 0.7414128321451717, "grad_norm": 0.8409478068351746, "learning_rate": 4.407531656138079e-06, "loss": 0.1567, "step": 2288 }, { "epoch": 0.7417368762151653, "grad_norm": 0.8526241183280945, "learning_rate": 4.406966207076229e-06, "loss": 0.1491, "step": 2289 }, { "epoch": 0.7420609202851588, "grad_norm": 0.8444311618804932, "learning_rate": 4.406400524620174e-06, "loss": 0.1456, "step": 2290 }, { "epoch": 0.7423849643551523, "grad_norm": 0.8632143139839172, "learning_rate": 4.405834608839152e-06, "loss": 0.1535, "step": 2291 }, { "epoch": 0.7427090084251459, "grad_norm": 0.8817870020866394, "learning_rate": 4.405268459802423e-06, "loss": 0.1597, "step": 2292 }, { "epoch": 0.7430330524951393, "grad_norm": 0.8612931370735168, "learning_rate": 4.404702077579279e-06, "loss": 0.1703, "step": 2293 }, { "epoch": 0.7433570965651328, "grad_norm": 0.8779714703559875, "learning_rate": 4.4041354622390395e-06, "loss": 0.146, "step": 2294 }, { "epoch": 0.7436811406351264, "grad_norm": 0.9611407518386841, "learning_rate": 4.403568613851054e-06, "loss": 0.1677, "step": 2295 }, { "epoch": 0.7440051847051199, "grad_norm": 0.8865842223167419, "learning_rate": 4.403001532484697e-06, "loss": 0.159, "step": 2296 }, { "epoch": 0.7443292287751134, "grad_norm": 0.8867451548576355, "learning_rate": 4.4024342182093745e-06, "loss": 0.1642, "step": 2297 }, { "epoch": 0.744653272845107, "grad_norm": 0.828064501285553, "learning_rate": 4.401866671094522e-06, "loss": 0.15, "step": 2298 }, { "epoch": 0.7449773169151005, "grad_norm": 0.8317776918411255, "learning_rate": 4.4012988912096e-06, "loss": 0.1406, "step": 2299 }, { "epoch": 0.7453013609850939, "grad_norm": 0.8799874186515808, "learning_rate": 4.4007308786241e-06, "loss": 0.1477, "step": 2300 }, { "epoch": 0.7456254050550875, "grad_norm": 0.8931517004966736, "learning_rate": 4.40016263340754e-06, "loss": 0.1444, "step": 2301 }, { "epoch": 0.745949449125081, "grad_norm": 0.8301799893379211, "learning_rate": 4.399594155629469e-06, "loss": 0.1455, "step": 2302 }, { "epoch": 0.7462734931950745, "grad_norm": 0.8843263387680054, "learning_rate": 4.3990254453594634e-06, "loss": 0.146, "step": 2303 }, { "epoch": 0.7465975372650681, "grad_norm": 0.8951470255851746, "learning_rate": 4.398456502667127e-06, "loss": 0.1623, "step": 2304 }, { "epoch": 0.7469215813350616, "grad_norm": 0.9557040333747864, "learning_rate": 4.397887327622093e-06, "loss": 0.1693, "step": 2305 }, { "epoch": 0.7472456254050551, "grad_norm": 0.8738280534744263, "learning_rate": 4.397317920294023e-06, "loss": 0.1553, "step": 2306 }, { "epoch": 0.7475696694750487, "grad_norm": 0.9042795896530151, "learning_rate": 4.396748280752608e-06, "loss": 0.1601, "step": 2307 }, { "epoch": 0.7478937135450421, "grad_norm": 0.8857934474945068, "learning_rate": 4.396178409067564e-06, "loss": 0.1633, "step": 2308 }, { "epoch": 0.7482177576150356, "grad_norm": 0.9887219071388245, "learning_rate": 4.395608305308639e-06, "loss": 0.1663, "step": 2309 }, { "epoch": 0.7485418016850292, "grad_norm": 0.9533881545066833, "learning_rate": 4.395037969545609e-06, "loss": 0.1609, "step": 2310 }, { "epoch": 0.7488658457550227, "grad_norm": 0.8651255965232849, "learning_rate": 4.394467401848277e-06, "loss": 0.1505, "step": 2311 }, { "epoch": 0.7491898898250162, "grad_norm": 0.8285210132598877, "learning_rate": 4.393896602286475e-06, "loss": 0.1569, "step": 2312 }, { "epoch": 0.7495139338950098, "grad_norm": 0.8599982857704163, "learning_rate": 4.3933255709300635e-06, "loss": 0.166, "step": 2313 }, { "epoch": 0.7498379779650033, "grad_norm": 0.8919265270233154, "learning_rate": 4.3927543078489295e-06, "loss": 0.1662, "step": 2314 }, { "epoch": 0.7501620220349967, "grad_norm": 0.8909933567047119, "learning_rate": 4.392182813112993e-06, "loss": 0.1619, "step": 2315 }, { "epoch": 0.7504860661049902, "grad_norm": 0.9427803158760071, "learning_rate": 4.391611086792198e-06, "loss": 0.1523, "step": 2316 }, { "epoch": 0.7508101101749838, "grad_norm": 0.8598654270172119, "learning_rate": 4.391039128956517e-06, "loss": 0.1614, "step": 2317 }, { "epoch": 0.7511341542449773, "grad_norm": 0.9585804343223572, "learning_rate": 4.390466939675954e-06, "loss": 0.1602, "step": 2318 }, { "epoch": 0.7514581983149708, "grad_norm": 0.7937201857566833, "learning_rate": 4.389894519020539e-06, "loss": 0.1382, "step": 2319 }, { "epoch": 0.7517822423849644, "grad_norm": 0.8627025485038757, "learning_rate": 4.38932186706033e-06, "loss": 0.1467, "step": 2320 }, { "epoch": 0.7521062864549579, "grad_norm": 0.9446385502815247, "learning_rate": 4.388748983865414e-06, "loss": 0.1666, "step": 2321 }, { "epoch": 0.7524303305249513, "grad_norm": 0.869536280632019, "learning_rate": 4.388175869505908e-06, "loss": 0.148, "step": 2322 }, { "epoch": 0.7527543745949449, "grad_norm": 0.8737410306930542, "learning_rate": 4.387602524051954e-06, "loss": 0.1463, "step": 2323 }, { "epoch": 0.7530784186649384, "grad_norm": 0.8531762957572937, "learning_rate": 4.387028947573724e-06, "loss": 0.136, "step": 2324 }, { "epoch": 0.7534024627349319, "grad_norm": 0.9215272068977356, "learning_rate": 4.3864551401414195e-06, "loss": 0.166, "step": 2325 }, { "epoch": 0.7537265068049255, "grad_norm": 0.8515351414680481, "learning_rate": 4.385881101825268e-06, "loss": 0.16, "step": 2326 }, { "epoch": 0.754050550874919, "grad_norm": 0.8563395738601685, "learning_rate": 4.385306832695526e-06, "loss": 0.1488, "step": 2327 }, { "epoch": 0.7543745949449125, "grad_norm": 0.8613479733467102, "learning_rate": 4.384732332822479e-06, "loss": 0.146, "step": 2328 }, { "epoch": 0.7546986390149061, "grad_norm": 0.8307859897613525, "learning_rate": 4.38415760227644e-06, "loss": 0.1554, "step": 2329 }, { "epoch": 0.7550226830848995, "grad_norm": 0.8669750690460205, "learning_rate": 4.38358264112775e-06, "loss": 0.1494, "step": 2330 }, { "epoch": 0.755346727154893, "grad_norm": 0.986054003238678, "learning_rate": 4.3830074494467815e-06, "loss": 0.1739, "step": 2331 }, { "epoch": 0.7556707712248866, "grad_norm": 0.8789486885070801, "learning_rate": 4.382432027303928e-06, "loss": 0.1444, "step": 2332 }, { "epoch": 0.7559948152948801, "grad_norm": 0.8285205960273743, "learning_rate": 4.381856374769617e-06, "loss": 0.1608, "step": 2333 }, { "epoch": 0.7563188593648736, "grad_norm": 0.9019232392311096, "learning_rate": 4.3812804919143055e-06, "loss": 0.1461, "step": 2334 }, { "epoch": 0.7566429034348672, "grad_norm": 0.9552561640739441, "learning_rate": 4.380704378808473e-06, "loss": 0.1699, "step": 2335 }, { "epoch": 0.7569669475048607, "grad_norm": 0.9247875213623047, "learning_rate": 4.380128035522632e-06, "loss": 0.1642, "step": 2336 }, { "epoch": 0.7572909915748541, "grad_norm": 0.9469590783119202, "learning_rate": 4.379551462127319e-06, "loss": 0.1793, "step": 2337 }, { "epoch": 0.7576150356448477, "grad_norm": 0.9114015698432922, "learning_rate": 4.3789746586931034e-06, "loss": 0.1636, "step": 2338 }, { "epoch": 0.7579390797148412, "grad_norm": 0.8937296271324158, "learning_rate": 4.37839762529058e-06, "loss": 0.1426, "step": 2339 }, { "epoch": 0.7582631237848347, "grad_norm": 0.9419205188751221, "learning_rate": 4.3778203619903716e-06, "loss": 0.1729, "step": 2340 }, { "epoch": 0.7585871678548283, "grad_norm": 0.8397945165634155, "learning_rate": 4.3772428688631285e-06, "loss": 0.1488, "step": 2341 }, { "epoch": 0.7589112119248218, "grad_norm": 0.8474755883216858, "learning_rate": 4.376665145979532e-06, "loss": 0.1493, "step": 2342 }, { "epoch": 0.7592352559948153, "grad_norm": 0.9139355421066284, "learning_rate": 4.376087193410289e-06, "loss": 0.1618, "step": 2343 }, { "epoch": 0.7595593000648088, "grad_norm": 0.9013419151306152, "learning_rate": 4.375509011226135e-06, "loss": 0.1483, "step": 2344 }, { "epoch": 0.7598833441348023, "grad_norm": 0.881345808506012, "learning_rate": 4.374930599497835e-06, "loss": 0.1484, "step": 2345 }, { "epoch": 0.7602073882047958, "grad_norm": 0.882448673248291, "learning_rate": 4.37435195829618e-06, "loss": 0.1491, "step": 2346 }, { "epoch": 0.7605314322747894, "grad_norm": 0.9138561487197876, "learning_rate": 4.373773087691992e-06, "loss": 0.1575, "step": 2347 }, { "epoch": 0.7608554763447829, "grad_norm": 0.7963294982910156, "learning_rate": 4.373193987756116e-06, "loss": 0.1482, "step": 2348 }, { "epoch": 0.7611795204147764, "grad_norm": 0.8454092144966125, "learning_rate": 4.3726146585594296e-06, "loss": 0.1556, "step": 2349 }, { "epoch": 0.76150356448477, "grad_norm": 0.9024544358253479, "learning_rate": 4.372035100172838e-06, "loss": 0.1663, "step": 2350 }, { "epoch": 0.7618276085547635, "grad_norm": 0.8852647542953491, "learning_rate": 4.371455312667272e-06, "loss": 0.1622, "step": 2351 }, { "epoch": 0.7621516526247569, "grad_norm": 0.9493899345397949, "learning_rate": 4.370875296113694e-06, "loss": 0.1781, "step": 2352 }, { "epoch": 0.7624756966947505, "grad_norm": 0.847676694393158, "learning_rate": 4.370295050583091e-06, "loss": 0.1501, "step": 2353 }, { "epoch": 0.762799740764744, "grad_norm": 0.9140247702598572, "learning_rate": 4.3697145761464785e-06, "loss": 0.1599, "step": 2354 }, { "epoch": 0.7631237848347375, "grad_norm": 0.8939958214759827, "learning_rate": 4.369133872874903e-06, "loss": 0.1621, "step": 2355 }, { "epoch": 0.7634478289047311, "grad_norm": 0.8538257479667664, "learning_rate": 4.368552940839436e-06, "loss": 0.1685, "step": 2356 }, { "epoch": 0.7637718729747246, "grad_norm": 0.9288912415504456, "learning_rate": 4.367971780111179e-06, "loss": 0.1538, "step": 2357 }, { "epoch": 0.7640959170447181, "grad_norm": 0.830361545085907, "learning_rate": 4.367390390761258e-06, "loss": 0.1318, "step": 2358 }, { "epoch": 0.7644199611147116, "grad_norm": 0.8867520093917847, "learning_rate": 4.3668087728608314e-06, "loss": 0.1494, "step": 2359 }, { "epoch": 0.7647440051847051, "grad_norm": 0.887715220451355, "learning_rate": 4.366226926481083e-06, "loss": 0.1604, "step": 2360 }, { "epoch": 0.7650680492546986, "grad_norm": 0.9055389761924744, "learning_rate": 4.365644851693226e-06, "loss": 0.1605, "step": 2361 }, { "epoch": 0.7653920933246922, "grad_norm": 0.8388006687164307, "learning_rate": 4.3650625485685e-06, "loss": 0.1508, "step": 2362 }, { "epoch": 0.7657161373946857, "grad_norm": 0.8231101036071777, "learning_rate": 4.364480017178172e-06, "loss": 0.147, "step": 2363 }, { "epoch": 0.7660401814646792, "grad_norm": 0.9128398895263672, "learning_rate": 4.36389725759354e-06, "loss": 0.169, "step": 2364 }, { "epoch": 0.7663642255346728, "grad_norm": 0.8485229015350342, "learning_rate": 4.363314269885928e-06, "loss": 0.1539, "step": 2365 }, { "epoch": 0.7666882696046662, "grad_norm": 0.9628329277038574, "learning_rate": 4.362731054126687e-06, "loss": 0.1779, "step": 2366 }, { "epoch": 0.7670123136746597, "grad_norm": 0.855089008808136, "learning_rate": 4.362147610387198e-06, "loss": 0.1575, "step": 2367 }, { "epoch": 0.7673363577446533, "grad_norm": 0.8463426828384399, "learning_rate": 4.361563938738869e-06, "loss": 0.1525, "step": 2368 }, { "epoch": 0.7676604018146468, "grad_norm": 0.8714507818222046, "learning_rate": 4.3609800392531345e-06, "loss": 0.163, "step": 2369 }, { "epoch": 0.7679844458846403, "grad_norm": 0.9294506907463074, "learning_rate": 4.36039591200146e-06, "loss": 0.176, "step": 2370 }, { "epoch": 0.7683084899546339, "grad_norm": 0.8144478797912598, "learning_rate": 4.359811557055335e-06, "loss": 0.1452, "step": 2371 }, { "epoch": 0.7686325340246274, "grad_norm": 0.9185246825218201, "learning_rate": 4.3592269744862794e-06, "loss": 0.1645, "step": 2372 }, { "epoch": 0.7689565780946209, "grad_norm": 0.8482814431190491, "learning_rate": 4.3586421643658404e-06, "loss": 0.1533, "step": 2373 }, { "epoch": 0.7692806221646143, "grad_norm": 0.8046854138374329, "learning_rate": 4.3580571267655945e-06, "loss": 0.1535, "step": 2374 }, { "epoch": 0.7696046662346079, "grad_norm": 0.858644425868988, "learning_rate": 4.357471861757144e-06, "loss": 0.1616, "step": 2375 }, { "epoch": 0.7699287103046014, "grad_norm": 0.8516557812690735, "learning_rate": 4.3568863694121185e-06, "loss": 0.1598, "step": 2376 }, { "epoch": 0.770252754374595, "grad_norm": 0.868341863155365, "learning_rate": 4.356300649802178e-06, "loss": 0.1553, "step": 2377 }, { "epoch": 0.7705767984445885, "grad_norm": 0.8729805946350098, "learning_rate": 4.355714702999008e-06, "loss": 0.1544, "step": 2378 }, { "epoch": 0.770900842514582, "grad_norm": 0.847970187664032, "learning_rate": 4.355128529074323e-06, "loss": 0.1426, "step": 2379 }, { "epoch": 0.7712248865845756, "grad_norm": 0.916247546672821, "learning_rate": 4.354542128099866e-06, "loss": 0.1604, "step": 2380 }, { "epoch": 0.771548930654569, "grad_norm": 0.8671932816505432, "learning_rate": 4.353955500147405e-06, "loss": 0.1557, "step": 2381 }, { "epoch": 0.7718729747245625, "grad_norm": 0.9008615016937256, "learning_rate": 4.353368645288738e-06, "loss": 0.1552, "step": 2382 }, { "epoch": 0.772197018794556, "grad_norm": 0.8007634878158569, "learning_rate": 4.352781563595691e-06, "loss": 0.1498, "step": 2383 }, { "epoch": 0.7725210628645496, "grad_norm": 0.9064083099365234, "learning_rate": 4.352194255140118e-06, "loss": 0.1662, "step": 2384 }, { "epoch": 0.7728451069345431, "grad_norm": 0.8306066989898682, "learning_rate": 4.351606719993899e-06, "loss": 0.147, "step": 2385 }, { "epoch": 0.7731691510045366, "grad_norm": 0.8699860572814941, "learning_rate": 4.351018958228941e-06, "loss": 0.1704, "step": 2386 }, { "epoch": 0.7734931950745302, "grad_norm": 0.9070653319358826, "learning_rate": 4.350430969917182e-06, "loss": 0.1594, "step": 2387 }, { "epoch": 0.7738172391445236, "grad_norm": 0.9005059599876404, "learning_rate": 4.349842755130587e-06, "loss": 0.1675, "step": 2388 }, { "epoch": 0.7741412832145171, "grad_norm": 0.8573256731033325, "learning_rate": 4.349254313941146e-06, "loss": 0.1646, "step": 2389 }, { "epoch": 0.7744653272845107, "grad_norm": 0.8498200178146362, "learning_rate": 4.3486656464208785e-06, "loss": 0.1666, "step": 2390 }, { "epoch": 0.7747893713545042, "grad_norm": 0.7561559081077576, "learning_rate": 4.348076752641834e-06, "loss": 0.13, "step": 2391 }, { "epoch": 0.7751134154244977, "grad_norm": 0.8790591359138489, "learning_rate": 4.347487632676084e-06, "loss": 0.1481, "step": 2392 }, { "epoch": 0.7754374594944913, "grad_norm": 0.8516284823417664, "learning_rate": 4.346898286595733e-06, "loss": 0.1547, "step": 2393 }, { "epoch": 0.7757615035644848, "grad_norm": 0.803137481212616, "learning_rate": 4.3463087144729115e-06, "loss": 0.1431, "step": 2394 }, { "epoch": 0.7760855476344782, "grad_norm": 0.796620786190033, "learning_rate": 4.3457189163797776e-06, "loss": 0.1441, "step": 2395 }, { "epoch": 0.7764095917044718, "grad_norm": 0.9133745431900024, "learning_rate": 4.345128892388515e-06, "loss": 0.1605, "step": 2396 }, { "epoch": 0.7767336357744653, "grad_norm": 0.8595781326293945, "learning_rate": 4.344538642571339e-06, "loss": 0.1483, "step": 2397 }, { "epoch": 0.7770576798444588, "grad_norm": 0.8615599274635315, "learning_rate": 4.3439481670004895e-06, "loss": 0.1482, "step": 2398 }, { "epoch": 0.7773817239144524, "grad_norm": 0.852824866771698, "learning_rate": 4.343357465748235e-06, "loss": 0.1526, "step": 2399 }, { "epoch": 0.7777057679844459, "grad_norm": 0.7731544375419617, "learning_rate": 4.342766538886872e-06, "loss": 0.1371, "step": 2400 }, { "epoch": 0.7780298120544394, "grad_norm": 0.920595645904541, "learning_rate": 4.342175386488724e-06, "loss": 0.1728, "step": 2401 }, { "epoch": 0.778353856124433, "grad_norm": 0.8988751769065857, "learning_rate": 4.341584008626143e-06, "loss": 0.1701, "step": 2402 }, { "epoch": 0.7786779001944264, "grad_norm": 0.8690599799156189, "learning_rate": 4.340992405371506e-06, "loss": 0.1398, "step": 2403 }, { "epoch": 0.7790019442644199, "grad_norm": 0.8576830625534058, "learning_rate": 4.340400576797221e-06, "loss": 0.1348, "step": 2404 }, { "epoch": 0.7793259883344135, "grad_norm": 0.8800456523895264, "learning_rate": 4.339808522975722e-06, "loss": 0.1526, "step": 2405 }, { "epoch": 0.779650032404407, "grad_norm": 0.8863540291786194, "learning_rate": 4.339216243979471e-06, "loss": 0.1441, "step": 2406 }, { "epoch": 0.7799740764744005, "grad_norm": 0.92470383644104, "learning_rate": 4.3386237398809576e-06, "loss": 0.1441, "step": 2407 }, { "epoch": 0.7802981205443941, "grad_norm": 0.9302868247032166, "learning_rate": 4.338031010752696e-06, "loss": 0.1704, "step": 2408 }, { "epoch": 0.7806221646143876, "grad_norm": 0.9741601347923279, "learning_rate": 4.337438056667233e-06, "loss": 0.1597, "step": 2409 }, { "epoch": 0.780946208684381, "grad_norm": 0.8258799314498901, "learning_rate": 4.336844877697139e-06, "loss": 0.1507, "step": 2410 }, { "epoch": 0.7812702527543746, "grad_norm": 0.8625882863998413, "learning_rate": 4.336251473915015e-06, "loss": 0.154, "step": 2411 }, { "epoch": 0.7815942968243681, "grad_norm": 0.929656445980072, "learning_rate": 4.335657845393486e-06, "loss": 0.1698, "step": 2412 }, { "epoch": 0.7819183408943616, "grad_norm": 0.8270826935768127, "learning_rate": 4.335063992205207e-06, "loss": 0.1537, "step": 2413 }, { "epoch": 0.7822423849643552, "grad_norm": 0.8252003788948059, "learning_rate": 4.3344699144228605e-06, "loss": 0.1365, "step": 2414 }, { "epoch": 0.7825664290343487, "grad_norm": 0.8627497553825378, "learning_rate": 4.333875612119156e-06, "loss": 0.1558, "step": 2415 }, { "epoch": 0.7828904731043422, "grad_norm": 0.905093252658844, "learning_rate": 4.333281085366829e-06, "loss": 0.1434, "step": 2416 }, { "epoch": 0.7832145171743357, "grad_norm": 0.8470600247383118, "learning_rate": 4.332686334238646e-06, "loss": 0.1496, "step": 2417 }, { "epoch": 0.7835385612443292, "grad_norm": 0.9364388585090637, "learning_rate": 4.332091358807397e-06, "loss": 0.1678, "step": 2418 }, { "epoch": 0.7838626053143227, "grad_norm": 0.7869712710380554, "learning_rate": 4.3314961591459015e-06, "loss": 0.1384, "step": 2419 }, { "epoch": 0.7841866493843163, "grad_norm": 0.824519157409668, "learning_rate": 4.330900735327006e-06, "loss": 0.1475, "step": 2420 }, { "epoch": 0.7845106934543098, "grad_norm": 0.8825136423110962, "learning_rate": 4.330305087423585e-06, "loss": 0.1589, "step": 2421 }, { "epoch": 0.7848347375243033, "grad_norm": 0.8884319067001343, "learning_rate": 4.329709215508541e-06, "loss": 0.1592, "step": 2422 }, { "epoch": 0.7851587815942969, "grad_norm": 0.8827762007713318, "learning_rate": 4.329113119654801e-06, "loss": 0.1526, "step": 2423 }, { "epoch": 0.7854828256642904, "grad_norm": 0.8353465795516968, "learning_rate": 4.328516799935323e-06, "loss": 0.1563, "step": 2424 }, { "epoch": 0.7858068697342838, "grad_norm": 0.8486441373825073, "learning_rate": 4.327920256423089e-06, "loss": 0.1582, "step": 2425 }, { "epoch": 0.7861309138042774, "grad_norm": 0.9012899398803711, "learning_rate": 4.3273234891911135e-06, "loss": 0.1608, "step": 2426 }, { "epoch": 0.7864549578742709, "grad_norm": 0.8898869752883911, "learning_rate": 4.3267264983124304e-06, "loss": 0.1568, "step": 2427 }, { "epoch": 0.7867790019442644, "grad_norm": 0.9775742888450623, "learning_rate": 4.326129283860109e-06, "loss": 0.1559, "step": 2428 }, { "epoch": 0.787103046014258, "grad_norm": 0.8252977132797241, "learning_rate": 4.3255318459072415e-06, "loss": 0.1414, "step": 2429 }, { "epoch": 0.7874270900842515, "grad_norm": 0.962891697883606, "learning_rate": 4.324934184526949e-06, "loss": 0.1728, "step": 2430 }, { "epoch": 0.787751134154245, "grad_norm": 0.8263359069824219, "learning_rate": 4.324336299792378e-06, "loss": 0.1541, "step": 2431 }, { "epoch": 0.7880751782242384, "grad_norm": 0.8529121279716492, "learning_rate": 4.3237381917767054e-06, "loss": 0.1476, "step": 2432 }, { "epoch": 0.788399222294232, "grad_norm": 0.8236402869224548, "learning_rate": 4.323139860553133e-06, "loss": 0.1552, "step": 2433 }, { "epoch": 0.7887232663642255, "grad_norm": 0.8603233098983765, "learning_rate": 4.3225413061948915e-06, "loss": 0.1409, "step": 2434 }, { "epoch": 0.789047310434219, "grad_norm": 0.8792175650596619, "learning_rate": 4.321942528775238e-06, "loss": 0.1503, "step": 2435 }, { "epoch": 0.7893713545042126, "grad_norm": 0.8012834787368774, "learning_rate": 4.3213435283674556e-06, "loss": 0.1428, "step": 2436 }, { "epoch": 0.7896953985742061, "grad_norm": 0.8402407169342041, "learning_rate": 4.320744305044858e-06, "loss": 0.1547, "step": 2437 }, { "epoch": 0.7900194426441997, "grad_norm": 0.8767852187156677, "learning_rate": 4.320144858880784e-06, "loss": 0.156, "step": 2438 }, { "epoch": 0.7903434867141931, "grad_norm": 0.7511712312698364, "learning_rate": 4.319545189948599e-06, "loss": 0.1397, "step": 2439 }, { "epoch": 0.7906675307841866, "grad_norm": 0.7794116735458374, "learning_rate": 4.318945298321698e-06, "loss": 0.1305, "step": 2440 }, { "epoch": 0.7909915748541801, "grad_norm": 0.8676954507827759, "learning_rate": 4.3183451840735e-06, "loss": 0.1573, "step": 2441 }, { "epoch": 0.7913156189241737, "grad_norm": 0.8750085234642029, "learning_rate": 4.3177448472774566e-06, "loss": 0.1627, "step": 2442 }, { "epoch": 0.7916396629941672, "grad_norm": 0.9190241694450378, "learning_rate": 4.317144288007039e-06, "loss": 0.144, "step": 2443 }, { "epoch": 0.7919637070641607, "grad_norm": 0.8547205924987793, "learning_rate": 4.316543506335752e-06, "loss": 0.144, "step": 2444 }, { "epoch": 0.7922877511341543, "grad_norm": 0.8410033583641052, "learning_rate": 4.315942502337126e-06, "loss": 0.1499, "step": 2445 }, { "epoch": 0.7926117952041478, "grad_norm": 0.8728411197662354, "learning_rate": 4.315341276084717e-06, "loss": 0.1593, "step": 2446 }, { "epoch": 0.7929358392741412, "grad_norm": 0.8590639233589172, "learning_rate": 4.3147398276521105e-06, "loss": 0.1559, "step": 2447 }, { "epoch": 0.7932598833441348, "grad_norm": 0.788457989692688, "learning_rate": 4.314138157112916e-06, "loss": 0.1345, "step": 2448 }, { "epoch": 0.7935839274141283, "grad_norm": 0.9031588435173035, "learning_rate": 4.313536264540774e-06, "loss": 0.1688, "step": 2449 }, { "epoch": 0.7939079714841218, "grad_norm": 0.8987663388252258, "learning_rate": 4.312934150009351e-06, "loss": 0.1625, "step": 2450 }, { "epoch": 0.7942320155541154, "grad_norm": 0.8170994520187378, "learning_rate": 4.3123318135923355e-06, "loss": 0.1302, "step": 2451 }, { "epoch": 0.7945560596241089, "grad_norm": 0.8526625633239746, "learning_rate": 4.311729255363453e-06, "loss": 0.1456, "step": 2452 }, { "epoch": 0.7948801036941024, "grad_norm": 0.7795225381851196, "learning_rate": 4.3111264753964475e-06, "loss": 0.1478, "step": 2453 }, { "epoch": 0.7952041477640959, "grad_norm": 0.8411941528320312, "learning_rate": 4.310523473765095e-06, "loss": 0.1452, "step": 2454 }, { "epoch": 0.7955281918340894, "grad_norm": 0.9050775766372681, "learning_rate": 4.309920250543196e-06, "loss": 0.1628, "step": 2455 }, { "epoch": 0.7958522359040829, "grad_norm": 0.8332045674324036, "learning_rate": 4.30931680580458e-06, "loss": 0.1504, "step": 2456 }, { "epoch": 0.7961762799740765, "grad_norm": 0.8788651823997498, "learning_rate": 4.308713139623103e-06, "loss": 0.1613, "step": 2457 }, { "epoch": 0.79650032404407, "grad_norm": 0.8998255729675293, "learning_rate": 4.308109252072647e-06, "loss": 0.1557, "step": 2458 }, { "epoch": 0.7968243681140635, "grad_norm": 0.8916454315185547, "learning_rate": 4.307505143227122e-06, "loss": 0.1641, "step": 2459 }, { "epoch": 0.7971484121840571, "grad_norm": 0.8175183534622192, "learning_rate": 4.306900813160466e-06, "loss": 0.1479, "step": 2460 }, { "epoch": 0.7974724562540505, "grad_norm": 0.8839316964149475, "learning_rate": 4.306296261946643e-06, "loss": 0.1449, "step": 2461 }, { "epoch": 0.797796500324044, "grad_norm": 0.8067646026611328, "learning_rate": 4.305691489659643e-06, "loss": 0.1492, "step": 2462 }, { "epoch": 0.7981205443940376, "grad_norm": 0.9073911905288696, "learning_rate": 4.3050864963734854e-06, "loss": 0.1603, "step": 2463 }, { "epoch": 0.7984445884640311, "grad_norm": 0.893830418586731, "learning_rate": 4.304481282162215e-06, "loss": 0.16, "step": 2464 }, { "epoch": 0.7987686325340246, "grad_norm": 0.9255582690238953, "learning_rate": 4.3038758470999056e-06, "loss": 0.1771, "step": 2465 }, { "epoch": 0.7990926766040182, "grad_norm": 0.9087891578674316, "learning_rate": 4.303270191260654e-06, "loss": 0.138, "step": 2466 }, { "epoch": 0.7994167206740117, "grad_norm": 0.8127539753913879, "learning_rate": 4.302664314718588e-06, "loss": 0.1409, "step": 2467 }, { "epoch": 0.7997407647440052, "grad_norm": 0.8776745796203613, "learning_rate": 4.302058217547862e-06, "loss": 0.1619, "step": 2468 }, { "epoch": 0.8000648088139987, "grad_norm": 0.8019484281539917, "learning_rate": 4.301451899822655e-06, "loss": 0.1527, "step": 2469 }, { "epoch": 0.8003888528839922, "grad_norm": 1.013856291770935, "learning_rate": 4.3008453616171746e-06, "loss": 0.1623, "step": 2470 }, { "epoch": 0.8007128969539857, "grad_norm": 0.9002841711044312, "learning_rate": 4.300238603005656e-06, "loss": 0.1626, "step": 2471 }, { "epoch": 0.8010369410239793, "grad_norm": 0.8387237787246704, "learning_rate": 4.299631624062359e-06, "loss": 0.1596, "step": 2472 }, { "epoch": 0.8013609850939728, "grad_norm": 0.8484622240066528, "learning_rate": 4.299024424861574e-06, "loss": 0.1568, "step": 2473 }, { "epoch": 0.8016850291639663, "grad_norm": 0.7935303449630737, "learning_rate": 4.298417005477616e-06, "loss": 0.1405, "step": 2474 }, { "epoch": 0.8020090732339599, "grad_norm": 0.8886042237281799, "learning_rate": 4.2978093659848255e-06, "loss": 0.1485, "step": 2475 }, { "epoch": 0.8023331173039533, "grad_norm": 0.8659217953681946, "learning_rate": 4.2972015064575726e-06, "loss": 0.1561, "step": 2476 }, { "epoch": 0.8026571613739468, "grad_norm": 0.8989936709403992, "learning_rate": 4.2965934269702535e-06, "loss": 0.1637, "step": 2477 }, { "epoch": 0.8029812054439404, "grad_norm": 0.9043972492218018, "learning_rate": 4.295985127597291e-06, "loss": 0.1596, "step": 2478 }, { "epoch": 0.8033052495139339, "grad_norm": 0.9152305126190186, "learning_rate": 4.295376608413136e-06, "loss": 0.169, "step": 2479 }, { "epoch": 0.8036292935839274, "grad_norm": 0.8600202798843384, "learning_rate": 4.294767869492265e-06, "loss": 0.1575, "step": 2480 }, { "epoch": 0.803953337653921, "grad_norm": 0.8142397999763489, "learning_rate": 4.294158910909181e-06, "loss": 0.1401, "step": 2481 }, { "epoch": 0.8042773817239145, "grad_norm": 0.8893610835075378, "learning_rate": 4.293549732738415e-06, "loss": 0.1619, "step": 2482 }, { "epoch": 0.8046014257939079, "grad_norm": 0.872360110282898, "learning_rate": 4.2929403350545255e-06, "loss": 0.1542, "step": 2483 }, { "epoch": 0.8049254698639015, "grad_norm": 0.8567931652069092, "learning_rate": 4.292330717932095e-06, "loss": 0.1521, "step": 2484 }, { "epoch": 0.805249513933895, "grad_norm": 0.8640723824501038, "learning_rate": 4.2917208814457364e-06, "loss": 0.1532, "step": 2485 }, { "epoch": 0.8055735580038885, "grad_norm": 0.8216172456741333, "learning_rate": 4.291110825670087e-06, "loss": 0.1353, "step": 2486 }, { "epoch": 0.8058976020738821, "grad_norm": 0.8190692067146301, "learning_rate": 4.290500550679811e-06, "loss": 0.1593, "step": 2487 }, { "epoch": 0.8062216461438756, "grad_norm": 0.9348410367965698, "learning_rate": 4.289890056549603e-06, "loss": 0.1559, "step": 2488 }, { "epoch": 0.8065456902138691, "grad_norm": 0.7952721118927002, "learning_rate": 4.289279343354178e-06, "loss": 0.1409, "step": 2489 }, { "epoch": 0.8068697342838627, "grad_norm": 0.9495409727096558, "learning_rate": 4.288668411168283e-06, "loss": 0.1705, "step": 2490 }, { "epoch": 0.8071937783538561, "grad_norm": 0.9091714024543762, "learning_rate": 4.28805726006669e-06, "loss": 0.1559, "step": 2491 }, { "epoch": 0.8075178224238496, "grad_norm": 0.8902946710586548, "learning_rate": 4.287445890124198e-06, "loss": 0.1521, "step": 2492 }, { "epoch": 0.8078418664938432, "grad_norm": 0.889609158039093, "learning_rate": 4.286834301415634e-06, "loss": 0.1493, "step": 2493 }, { "epoch": 0.8081659105638367, "grad_norm": 0.9025107026100159, "learning_rate": 4.286222494015848e-06, "loss": 0.1489, "step": 2494 }, { "epoch": 0.8084899546338302, "grad_norm": 0.8463943600654602, "learning_rate": 4.285610467999722e-06, "loss": 0.1407, "step": 2495 }, { "epoch": 0.8088139987038238, "grad_norm": 0.9571568965911865, "learning_rate": 4.28499822344216e-06, "loss": 0.1551, "step": 2496 }, { "epoch": 0.8091380427738173, "grad_norm": 0.8634231090545654, "learning_rate": 4.2843857604180955e-06, "loss": 0.1561, "step": 2497 }, { "epoch": 0.8094620868438107, "grad_norm": 0.9505967497825623, "learning_rate": 4.283773079002488e-06, "loss": 0.1591, "step": 2498 }, { "epoch": 0.8097861309138042, "grad_norm": 0.8230482339859009, "learning_rate": 4.283160179270325e-06, "loss": 0.1439, "step": 2499 }, { "epoch": 0.8101101749837978, "grad_norm": 0.8318188786506653, "learning_rate": 4.282547061296618e-06, "loss": 0.1527, "step": 2500 }, { "epoch": 0.8104342190537913, "grad_norm": 0.7912052869796753, "learning_rate": 4.281933725156406e-06, "loss": 0.1449, "step": 2501 }, { "epoch": 0.8107582631237849, "grad_norm": 0.9153440594673157, "learning_rate": 4.281320170924758e-06, "loss": 0.1436, "step": 2502 }, { "epoch": 0.8110823071937784, "grad_norm": 0.8174635171890259, "learning_rate": 4.280706398676764e-06, "loss": 0.1495, "step": 2503 }, { "epoch": 0.8114063512637719, "grad_norm": 0.8115018010139465, "learning_rate": 4.2800924084875465e-06, "loss": 0.1494, "step": 2504 }, { "epoch": 0.8117303953337653, "grad_norm": 0.82187819480896, "learning_rate": 4.27947820043225e-06, "loss": 0.1493, "step": 2505 }, { "epoch": 0.8120544394037589, "grad_norm": 0.8348486423492432, "learning_rate": 4.278863774586049e-06, "loss": 0.1596, "step": 2506 }, { "epoch": 0.8123784834737524, "grad_norm": 0.8637591600418091, "learning_rate": 4.2782491310241426e-06, "loss": 0.1504, "step": 2507 }, { "epoch": 0.812702527543746, "grad_norm": 0.8602386713027954, "learning_rate": 4.2776342698217575e-06, "loss": 0.1504, "step": 2508 }, { "epoch": 0.8130265716137395, "grad_norm": 0.9002025723457336, "learning_rate": 4.277019191054146e-06, "loss": 0.1616, "step": 2509 }, { "epoch": 0.813350615683733, "grad_norm": 0.8553583025932312, "learning_rate": 4.276403894796589e-06, "loss": 0.1496, "step": 2510 }, { "epoch": 0.8136746597537265, "grad_norm": 0.8963319659233093, "learning_rate": 4.275788381124393e-06, "loss": 0.1569, "step": 2511 }, { "epoch": 0.81399870382372, "grad_norm": 0.8563810586929321, "learning_rate": 4.275172650112889e-06, "loss": 0.1492, "step": 2512 }, { "epoch": 0.8143227478937135, "grad_norm": 0.8267882466316223, "learning_rate": 4.274556701837438e-06, "loss": 0.1425, "step": 2513 }, { "epoch": 0.814646791963707, "grad_norm": 0.8709789514541626, "learning_rate": 4.273940536373426e-06, "loss": 0.1552, "step": 2514 }, { "epoch": 0.8149708360337006, "grad_norm": 0.8784293532371521, "learning_rate": 4.273324153796264e-06, "loss": 0.1548, "step": 2515 }, { "epoch": 0.8152948801036941, "grad_norm": 0.8772052526473999, "learning_rate": 4.2727075541813945e-06, "loss": 0.1597, "step": 2516 }, { "epoch": 0.8156189241736876, "grad_norm": 0.7947052717208862, "learning_rate": 4.27209073760428e-06, "loss": 0.1505, "step": 2517 }, { "epoch": 0.8159429682436812, "grad_norm": 0.8064588904380798, "learning_rate": 4.271473704140415e-06, "loss": 0.1517, "step": 2518 }, { "epoch": 0.8162670123136747, "grad_norm": 0.8790615797042847, "learning_rate": 4.270856453865318e-06, "loss": 0.156, "step": 2519 }, { "epoch": 0.8165910563836681, "grad_norm": 0.790266215801239, "learning_rate": 4.270238986854534e-06, "loss": 0.1371, "step": 2520 }, { "epoch": 0.8169151004536617, "grad_norm": 0.9105173945426941, "learning_rate": 4.2696213031836355e-06, "loss": 0.1632, "step": 2521 }, { "epoch": 0.8172391445236552, "grad_norm": 0.8295355439186096, "learning_rate": 4.2690034029282214e-06, "loss": 0.1537, "step": 2522 }, { "epoch": 0.8175631885936487, "grad_norm": 0.8722296357154846, "learning_rate": 4.268385286163915e-06, "loss": 0.1675, "step": 2523 }, { "epoch": 0.8178872326636423, "grad_norm": 0.8620377779006958, "learning_rate": 4.267766952966369e-06, "loss": 0.1493, "step": 2524 }, { "epoch": 0.8182112767336358, "grad_norm": 0.8079911470413208, "learning_rate": 4.267148403411261e-06, "loss": 0.1395, "step": 2525 }, { "epoch": 0.8185353208036293, "grad_norm": 0.8399182558059692, "learning_rate": 4.266529637574297e-06, "loss": 0.1499, "step": 2526 }, { "epoch": 0.8188593648736228, "grad_norm": 0.8858416676521301, "learning_rate": 4.265910655531206e-06, "loss": 0.1418, "step": 2527 }, { "epoch": 0.8191834089436163, "grad_norm": 0.820108950138092, "learning_rate": 4.265291457357746e-06, "loss": 0.1467, "step": 2528 }, { "epoch": 0.8195074530136098, "grad_norm": 0.8316980004310608, "learning_rate": 4.2646720431297006e-06, "loss": 0.1446, "step": 2529 }, { "epoch": 0.8198314970836034, "grad_norm": 0.8442394137382507, "learning_rate": 4.2640524129228815e-06, "loss": 0.1541, "step": 2530 }, { "epoch": 0.8201555411535969, "grad_norm": 0.8731728196144104, "learning_rate": 4.263432566813123e-06, "loss": 0.1597, "step": 2531 }, { "epoch": 0.8204795852235904, "grad_norm": 0.8163385987281799, "learning_rate": 4.262812504876291e-06, "loss": 0.1365, "step": 2532 }, { "epoch": 0.820803629293584, "grad_norm": 0.9060762524604797, "learning_rate": 4.262192227188273e-06, "loss": 0.1584, "step": 2533 }, { "epoch": 0.8211276733635774, "grad_norm": 0.8710756301879883, "learning_rate": 4.261571733824986e-06, "loss": 0.1541, "step": 2534 }, { "epoch": 0.8214517174335709, "grad_norm": 0.8472952842712402, "learning_rate": 4.260951024862372e-06, "loss": 0.15, "step": 2535 }, { "epoch": 0.8217757615035645, "grad_norm": 0.8542965650558472, "learning_rate": 4.2603301003763994e-06, "loss": 0.1476, "step": 2536 }, { "epoch": 0.822099805573558, "grad_norm": 0.8944793343544006, "learning_rate": 4.259708960443065e-06, "loss": 0.156, "step": 2537 }, { "epoch": 0.8224238496435515, "grad_norm": 0.8457514643669128, "learning_rate": 4.259087605138388e-06, "loss": 0.1559, "step": 2538 }, { "epoch": 0.8227478937135451, "grad_norm": 0.8715274333953857, "learning_rate": 4.2584660345384176e-06, "loss": 0.1461, "step": 2539 }, { "epoch": 0.8230719377835386, "grad_norm": 0.8897663354873657, "learning_rate": 4.257844248719229e-06, "loss": 0.1685, "step": 2540 }, { "epoch": 0.8233959818535321, "grad_norm": 0.8603126406669617, "learning_rate": 4.25722224775692e-06, "loss": 0.1481, "step": 2541 }, { "epoch": 0.8237200259235256, "grad_norm": 0.8388012647628784, "learning_rate": 4.25660003172762e-06, "loss": 0.1575, "step": 2542 }, { "epoch": 0.8240440699935191, "grad_norm": 0.8356682062149048, "learning_rate": 4.255977600707481e-06, "loss": 0.1575, "step": 2543 }, { "epoch": 0.8243681140635126, "grad_norm": 0.8801903128623962, "learning_rate": 4.255354954772684e-06, "loss": 0.1464, "step": 2544 }, { "epoch": 0.8246921581335062, "grad_norm": 0.8981055021286011, "learning_rate": 4.2547320939994315e-06, "loss": 0.1595, "step": 2545 }, { "epoch": 0.8250162022034997, "grad_norm": 0.815737783908844, "learning_rate": 4.25410901846396e-06, "loss": 0.1444, "step": 2546 }, { "epoch": 0.8253402462734932, "grad_norm": 0.9538549780845642, "learning_rate": 4.253485728242525e-06, "loss": 0.1531, "step": 2547 }, { "epoch": 0.8256642903434868, "grad_norm": 0.8105667233467102, "learning_rate": 4.252862223411412e-06, "loss": 0.1458, "step": 2548 }, { "epoch": 0.8259883344134802, "grad_norm": 0.7784798741340637, "learning_rate": 4.252238504046931e-06, "loss": 0.1307, "step": 2549 }, { "epoch": 0.8263123784834737, "grad_norm": 0.9695181846618652, "learning_rate": 4.251614570225421e-06, "loss": 0.1674, "step": 2550 }, { "epoch": 0.8266364225534673, "grad_norm": 0.8391134738922119, "learning_rate": 4.250990422023243e-06, "loss": 0.153, "step": 2551 }, { "epoch": 0.8269604666234608, "grad_norm": 0.882595419883728, "learning_rate": 4.250366059516791e-06, "loss": 0.154, "step": 2552 }, { "epoch": 0.8272845106934543, "grad_norm": 0.8489596247673035, "learning_rate": 4.249741482782476e-06, "loss": 0.1565, "step": 2553 }, { "epoch": 0.8276085547634479, "grad_norm": 0.878032922744751, "learning_rate": 4.249116691896743e-06, "loss": 0.1506, "step": 2554 }, { "epoch": 0.8279325988334414, "grad_norm": 0.8520094752311707, "learning_rate": 4.248491686936059e-06, "loss": 0.1559, "step": 2555 }, { "epoch": 0.8282566429034348, "grad_norm": 0.8968580961227417, "learning_rate": 4.2478664679769196e-06, "loss": 0.1574, "step": 2556 }, { "epoch": 0.8285806869734283, "grad_norm": 0.8315073251724243, "learning_rate": 4.247241035095846e-06, "loss": 0.1415, "step": 2557 }, { "epoch": 0.8289047310434219, "grad_norm": 0.919426441192627, "learning_rate": 4.246615388369384e-06, "loss": 0.162, "step": 2558 }, { "epoch": 0.8292287751134154, "grad_norm": 0.8385602235794067, "learning_rate": 4.245989527874107e-06, "loss": 0.1462, "step": 2559 }, { "epoch": 0.829552819183409, "grad_norm": 0.892264723777771, "learning_rate": 4.245363453686614e-06, "loss": 0.1554, "step": 2560 }, { "epoch": 0.8298768632534025, "grad_norm": 0.9176088571548462, "learning_rate": 4.24473716588353e-06, "loss": 0.1717, "step": 2561 }, { "epoch": 0.830200907323396, "grad_norm": 0.8820486068725586, "learning_rate": 4.2441106645415085e-06, "loss": 0.1563, "step": 2562 }, { "epoch": 0.8305249513933896, "grad_norm": 0.8998873233795166, "learning_rate": 4.243483949737225e-06, "loss": 0.175, "step": 2563 }, { "epoch": 0.830848995463383, "grad_norm": 0.824447512626648, "learning_rate": 4.242857021547385e-06, "loss": 0.1551, "step": 2564 }, { "epoch": 0.8311730395333765, "grad_norm": 0.9089164733886719, "learning_rate": 4.242229880048718e-06, "loss": 0.1599, "step": 2565 }, { "epoch": 0.83149708360337, "grad_norm": 0.9251378178596497, "learning_rate": 4.241602525317979e-06, "loss": 0.1716, "step": 2566 }, { "epoch": 0.8318211276733636, "grad_norm": 0.8123130202293396, "learning_rate": 4.240974957431951e-06, "loss": 0.1416, "step": 2567 }, { "epoch": 0.8321451717433571, "grad_norm": 0.8112072348594666, "learning_rate": 4.240347176467442e-06, "loss": 0.1446, "step": 2568 }, { "epoch": 0.8324692158133506, "grad_norm": 0.8598057627677917, "learning_rate": 4.2397191825012865e-06, "loss": 0.1575, "step": 2569 }, { "epoch": 0.8327932598833442, "grad_norm": 0.8484219312667847, "learning_rate": 4.239090975610346e-06, "loss": 0.1522, "step": 2570 }, { "epoch": 0.8331173039533376, "grad_norm": 0.8238083124160767, "learning_rate": 4.2384625558715045e-06, "loss": 0.1481, "step": 2571 }, { "epoch": 0.8334413480233311, "grad_norm": 0.8477686643600464, "learning_rate": 4.237833923361676e-06, "loss": 0.1528, "step": 2572 }, { "epoch": 0.8337653920933247, "grad_norm": 1.2084839344024658, "learning_rate": 4.237205078157799e-06, "loss": 0.1555, "step": 2573 }, { "epoch": 0.8340894361633182, "grad_norm": 0.822715163230896, "learning_rate": 4.236576020336838e-06, "loss": 0.1515, "step": 2574 }, { "epoch": 0.8344134802333117, "grad_norm": 0.8149126172065735, "learning_rate": 4.235946749975783e-06, "loss": 0.1515, "step": 2575 }, { "epoch": 0.8347375243033053, "grad_norm": 0.810035228729248, "learning_rate": 4.235317267151652e-06, "loss": 0.1642, "step": 2576 }, { "epoch": 0.8350615683732988, "grad_norm": 0.8240023255348206, "learning_rate": 4.234687571941486e-06, "loss": 0.1445, "step": 2577 }, { "epoch": 0.8353856124432922, "grad_norm": 0.8536616563796997, "learning_rate": 4.234057664422354e-06, "loss": 0.1533, "step": 2578 }, { "epoch": 0.8357096565132858, "grad_norm": 0.8576487302780151, "learning_rate": 4.2334275446713515e-06, "loss": 0.1539, "step": 2579 }, { "epoch": 0.8360337005832793, "grad_norm": 0.8609020709991455, "learning_rate": 4.232797212765598e-06, "loss": 0.1555, "step": 2580 }, { "epoch": 0.8363577446532728, "grad_norm": 0.8141218423843384, "learning_rate": 4.2321666687822405e-06, "loss": 0.1578, "step": 2581 }, { "epoch": 0.8366817887232664, "grad_norm": 0.9742339849472046, "learning_rate": 4.231535912798452e-06, "loss": 0.1601, "step": 2582 }, { "epoch": 0.8370058327932599, "grad_norm": 0.8200106024742126, "learning_rate": 4.23090494489143e-06, "loss": 0.1406, "step": 2583 }, { "epoch": 0.8373298768632534, "grad_norm": 0.7919231057167053, "learning_rate": 4.230273765138399e-06, "loss": 0.1371, "step": 2584 }, { "epoch": 0.837653920933247, "grad_norm": 0.8794668316841125, "learning_rate": 4.229642373616609e-06, "loss": 0.1628, "step": 2585 }, { "epoch": 0.8379779650032404, "grad_norm": 0.8123350143432617, "learning_rate": 4.229010770403337e-06, "loss": 0.137, "step": 2586 }, { "epoch": 0.8383020090732339, "grad_norm": 0.7920833230018616, "learning_rate": 4.228378955575885e-06, "loss": 0.1309, "step": 2587 }, { "epoch": 0.8386260531432275, "grad_norm": 0.8339619636535645, "learning_rate": 4.227746929211582e-06, "loss": 0.1561, "step": 2588 }, { "epoch": 0.838950097213221, "grad_norm": 0.8547282218933105, "learning_rate": 4.227114691387779e-06, "loss": 0.1472, "step": 2589 }, { "epoch": 0.8392741412832145, "grad_norm": 0.9381993412971497, "learning_rate": 4.226482242181859e-06, "loss": 0.1607, "step": 2590 }, { "epoch": 0.8395981853532081, "grad_norm": 0.8304243683815002, "learning_rate": 4.225849581671225e-06, "loss": 0.1461, "step": 2591 }, { "epoch": 0.8399222294232016, "grad_norm": 0.8055948615074158, "learning_rate": 4.225216709933309e-06, "loss": 0.1383, "step": 2592 }, { "epoch": 0.840246273493195, "grad_norm": 0.8732760548591614, "learning_rate": 4.2245836270455706e-06, "loss": 0.1596, "step": 2593 }, { "epoch": 0.8405703175631886, "grad_norm": 0.8491640090942383, "learning_rate": 4.223950333085492e-06, "loss": 0.1584, "step": 2594 }, { "epoch": 0.8408943616331821, "grad_norm": 0.7880257964134216, "learning_rate": 4.223316828130581e-06, "loss": 0.1416, "step": 2595 }, { "epoch": 0.8412184057031756, "grad_norm": 0.8852708339691162, "learning_rate": 4.222683112258372e-06, "loss": 0.1435, "step": 2596 }, { "epoch": 0.8415424497731692, "grad_norm": 0.8148348927497864, "learning_rate": 4.222049185546428e-06, "loss": 0.139, "step": 2597 }, { "epoch": 0.8418664938431627, "grad_norm": 0.9881069660186768, "learning_rate": 4.221415048072335e-06, "loss": 0.1942, "step": 2598 }, { "epoch": 0.8421905379131562, "grad_norm": 0.9064881801605225, "learning_rate": 4.220780699913704e-06, "loss": 0.1671, "step": 2599 }, { "epoch": 0.8425145819831497, "grad_norm": 0.8283922672271729, "learning_rate": 4.220146141148174e-06, "loss": 0.1446, "step": 2600 }, { "epoch": 0.8428386260531432, "grad_norm": 0.8480839729309082, "learning_rate": 4.219511371853408e-06, "loss": 0.1572, "step": 2601 }, { "epoch": 0.8431626701231367, "grad_norm": 0.8674578666687012, "learning_rate": 4.2188763921070974e-06, "loss": 0.1604, "step": 2602 }, { "epoch": 0.8434867141931303, "grad_norm": 0.8412938714027405, "learning_rate": 4.2182412019869556e-06, "loss": 0.1419, "step": 2603 }, { "epoch": 0.8438107582631238, "grad_norm": 0.9011821150779724, "learning_rate": 4.217605801570725e-06, "loss": 0.1778, "step": 2604 }, { "epoch": 0.8441348023331173, "grad_norm": 0.8756242990493774, "learning_rate": 4.216970190936171e-06, "loss": 0.1635, "step": 2605 }, { "epoch": 0.8444588464031109, "grad_norm": 0.8517950177192688, "learning_rate": 4.2163343701610884e-06, "loss": 0.1594, "step": 2606 }, { "epoch": 0.8447828904731044, "grad_norm": 0.9082374572753906, "learning_rate": 4.215698339323294e-06, "loss": 0.1674, "step": 2607 }, { "epoch": 0.8451069345430978, "grad_norm": 0.8264471888542175, "learning_rate": 4.215062098500632e-06, "loss": 0.1598, "step": 2608 }, { "epoch": 0.8454309786130914, "grad_norm": 0.8358623385429382, "learning_rate": 4.214425647770972e-06, "loss": 0.1533, "step": 2609 }, { "epoch": 0.8457550226830849, "grad_norm": 0.8975405097007751, "learning_rate": 4.213788987212211e-06, "loss": 0.1782, "step": 2610 }, { "epoch": 0.8460790667530784, "grad_norm": 0.8398000001907349, "learning_rate": 4.213152116902267e-06, "loss": 0.1499, "step": 2611 }, { "epoch": 0.846403110823072, "grad_norm": 0.7483087182044983, "learning_rate": 4.212515036919089e-06, "loss": 0.1413, "step": 2612 }, { "epoch": 0.8467271548930655, "grad_norm": 0.8305369019508362, "learning_rate": 4.211877747340649e-06, "loss": 0.1461, "step": 2613 }, { "epoch": 0.847051198963059, "grad_norm": 0.8465678691864014, "learning_rate": 4.211240248244945e-06, "loss": 0.1555, "step": 2614 }, { "epoch": 0.8473752430330524, "grad_norm": 0.8199893236160278, "learning_rate": 4.21060253971e-06, "loss": 0.1233, "step": 2615 }, { "epoch": 0.847699287103046, "grad_norm": 0.898139476776123, "learning_rate": 4.2099646218138655e-06, "loss": 0.162, "step": 2616 }, { "epoch": 0.8480233311730395, "grad_norm": 0.8000852465629578, "learning_rate": 4.209326494634614e-06, "loss": 0.1361, "step": 2617 }, { "epoch": 0.848347375243033, "grad_norm": 0.8270335793495178, "learning_rate": 4.208688158250348e-06, "loss": 0.136, "step": 2618 }, { "epoch": 0.8486714193130266, "grad_norm": 0.8229568600654602, "learning_rate": 4.2080496127391914e-06, "loss": 0.1506, "step": 2619 }, { "epoch": 0.8489954633830201, "grad_norm": 0.9595044255256653, "learning_rate": 4.207410858179298e-06, "loss": 0.16, "step": 2620 }, { "epoch": 0.8493195074530137, "grad_norm": 0.8614192008972168, "learning_rate": 4.206771894648846e-06, "loss": 0.1548, "step": 2621 }, { "epoch": 0.8496435515230071, "grad_norm": 0.9117885231971741, "learning_rate": 4.206132722226035e-06, "loss": 0.1691, "step": 2622 }, { "epoch": 0.8499675955930006, "grad_norm": 0.8764152526855469, "learning_rate": 4.205493340989096e-06, "loss": 0.1532, "step": 2623 }, { "epoch": 0.8502916396629941, "grad_norm": 0.8756179213523865, "learning_rate": 4.204853751016282e-06, "loss": 0.1601, "step": 2624 }, { "epoch": 0.8506156837329877, "grad_norm": 0.8578720092773438, "learning_rate": 4.204213952385875e-06, "loss": 0.1421, "step": 2625 }, { "epoch": 0.8509397278029812, "grad_norm": 0.8636815547943115, "learning_rate": 4.203573945176177e-06, "loss": 0.1495, "step": 2626 }, { "epoch": 0.8512637718729748, "grad_norm": 0.8231790065765381, "learning_rate": 4.202933729465519e-06, "loss": 0.1492, "step": 2627 }, { "epoch": 0.8515878159429683, "grad_norm": 0.9206662774085999, "learning_rate": 4.20229330533226e-06, "loss": 0.1589, "step": 2628 }, { "epoch": 0.8519118600129617, "grad_norm": 0.8775075078010559, "learning_rate": 4.201652672854779e-06, "loss": 0.155, "step": 2629 }, { "epoch": 0.8522359040829552, "grad_norm": 0.8573769927024841, "learning_rate": 4.201011832111485e-06, "loss": 0.1506, "step": 2630 }, { "epoch": 0.8525599481529488, "grad_norm": 0.816596269607544, "learning_rate": 4.2003707831808086e-06, "loss": 0.1404, "step": 2631 }, { "epoch": 0.8528839922229423, "grad_norm": 0.8326970934867859, "learning_rate": 4.199729526141209e-06, "loss": 0.1511, "step": 2632 }, { "epoch": 0.8532080362929358, "grad_norm": 0.8921494483947754, "learning_rate": 4.199088061071172e-06, "loss": 0.1717, "step": 2633 }, { "epoch": 0.8535320803629294, "grad_norm": 0.8992860913276672, "learning_rate": 4.198446388049203e-06, "loss": 0.1559, "step": 2634 }, { "epoch": 0.8538561244329229, "grad_norm": 0.7511637806892395, "learning_rate": 4.197804507153838e-06, "loss": 0.14, "step": 2635 }, { "epoch": 0.8541801685029164, "grad_norm": 0.8324440121650696, "learning_rate": 4.197162418463639e-06, "loss": 0.1485, "step": 2636 }, { "epoch": 0.8545042125729099, "grad_norm": 0.7635222673416138, "learning_rate": 4.1965201220571895e-06, "loss": 0.1551, "step": 2637 }, { "epoch": 0.8548282566429034, "grad_norm": 0.8577344417572021, "learning_rate": 4.1958776180131e-06, "loss": 0.1605, "step": 2638 }, { "epoch": 0.8551523007128969, "grad_norm": 0.8160384893417358, "learning_rate": 4.1952349064100074e-06, "loss": 0.1504, "step": 2639 }, { "epoch": 0.8554763447828905, "grad_norm": 0.870599091053009, "learning_rate": 4.194591987326574e-06, "loss": 0.149, "step": 2640 }, { "epoch": 0.855800388852884, "grad_norm": 0.8268269300460815, "learning_rate": 4.193948860841485e-06, "loss": 0.1513, "step": 2641 }, { "epoch": 0.8561244329228775, "grad_norm": 0.8784127235412598, "learning_rate": 4.193305527033456e-06, "loss": 0.1622, "step": 2642 }, { "epoch": 0.8564484769928711, "grad_norm": 0.8954482674598694, "learning_rate": 4.192661985981221e-06, "loss": 0.1544, "step": 2643 }, { "epoch": 0.8567725210628645, "grad_norm": 0.8588570952415466, "learning_rate": 4.192018237763547e-06, "loss": 0.1479, "step": 2644 }, { "epoch": 0.857096565132858, "grad_norm": 0.8091132640838623, "learning_rate": 4.19137428245922e-06, "loss": 0.1484, "step": 2645 }, { "epoch": 0.8574206092028516, "grad_norm": 0.8757805824279785, "learning_rate": 4.190730120147054e-06, "loss": 0.1573, "step": 2646 }, { "epoch": 0.8577446532728451, "grad_norm": 0.8502841591835022, "learning_rate": 4.190085750905889e-06, "loss": 0.1473, "step": 2647 }, { "epoch": 0.8580686973428386, "grad_norm": 0.7808975577354431, "learning_rate": 4.189441174814589e-06, "loss": 0.1461, "step": 2648 }, { "epoch": 0.8583927414128322, "grad_norm": 0.7952367067337036, "learning_rate": 4.188796391952046e-06, "loss": 0.1446, "step": 2649 }, { "epoch": 0.8587167854828257, "grad_norm": 0.8280566334724426, "learning_rate": 4.188151402397172e-06, "loss": 0.1459, "step": 2650 }, { "epoch": 0.8590408295528191, "grad_norm": 0.8707353472709656, "learning_rate": 4.187506206228909e-06, "loss": 0.1494, "step": 2651 }, { "epoch": 0.8593648736228127, "grad_norm": 0.7923481464385986, "learning_rate": 4.1868608035262225e-06, "loss": 0.1584, "step": 2652 }, { "epoch": 0.8596889176928062, "grad_norm": 0.7720541954040527, "learning_rate": 4.186215194368105e-06, "loss": 0.1404, "step": 2653 }, { "epoch": 0.8600129617627997, "grad_norm": 0.7756115794181824, "learning_rate": 4.18556937883357e-06, "loss": 0.1396, "step": 2654 }, { "epoch": 0.8603370058327933, "grad_norm": 0.8358388543128967, "learning_rate": 4.184923357001661e-06, "loss": 0.1575, "step": 2655 }, { "epoch": 0.8606610499027868, "grad_norm": 0.8395894765853882, "learning_rate": 4.184277128951445e-06, "loss": 0.15, "step": 2656 }, { "epoch": 0.8609850939727803, "grad_norm": 0.8533385396003723, "learning_rate": 4.1836306947620135e-06, "loss": 0.1574, "step": 2657 }, { "epoch": 0.8613091380427739, "grad_norm": 0.9090582132339478, "learning_rate": 4.182984054512483e-06, "loss": 0.1658, "step": 2658 }, { "epoch": 0.8616331821127673, "grad_norm": 0.8781484961509705, "learning_rate": 4.182337208281998e-06, "loss": 0.1631, "step": 2659 }, { "epoch": 0.8619572261827608, "grad_norm": 0.825088381767273, "learning_rate": 4.181690156149724e-06, "loss": 0.1494, "step": 2660 }, { "epoch": 0.8622812702527544, "grad_norm": 0.8371817469596863, "learning_rate": 4.1810428981948555e-06, "loss": 0.1548, "step": 2661 }, { "epoch": 0.8626053143227479, "grad_norm": 0.872991681098938, "learning_rate": 4.1803954344966095e-06, "loss": 0.1492, "step": 2662 }, { "epoch": 0.8629293583927414, "grad_norm": 0.8750609755516052, "learning_rate": 4.17974776513423e-06, "loss": 0.1539, "step": 2663 }, { "epoch": 0.863253402462735, "grad_norm": 0.8449800610542297, "learning_rate": 4.179099890186985e-06, "loss": 0.1604, "step": 2664 }, { "epoch": 0.8635774465327285, "grad_norm": 0.9044042229652405, "learning_rate": 4.178451809734168e-06, "loss": 0.1558, "step": 2665 }, { "epoch": 0.8639014906027219, "grad_norm": 0.8914145827293396, "learning_rate": 4.1778035238550995e-06, "loss": 0.1664, "step": 2666 }, { "epoch": 0.8642255346727155, "grad_norm": 0.8576059937477112, "learning_rate": 4.177155032629122e-06, "loss": 0.1622, "step": 2667 }, { "epoch": 0.864549578742709, "grad_norm": 0.8929911255836487, "learning_rate": 4.176506336135603e-06, "loss": 0.1594, "step": 2668 }, { "epoch": 0.8648736228127025, "grad_norm": 0.9083852171897888, "learning_rate": 4.175857434453939e-06, "loss": 0.156, "step": 2669 }, { "epoch": 0.8651976668826961, "grad_norm": 0.8650022745132446, "learning_rate": 4.175208327663549e-06, "loss": 0.1646, "step": 2670 }, { "epoch": 0.8655217109526896, "grad_norm": 0.8090750575065613, "learning_rate": 4.174559015843878e-06, "loss": 0.1537, "step": 2671 }, { "epoch": 0.8658457550226831, "grad_norm": 0.7599000930786133, "learning_rate": 4.173909499074392e-06, "loss": 0.1317, "step": 2672 }, { "epoch": 0.8661697990926766, "grad_norm": 0.8080236911773682, "learning_rate": 4.173259777434589e-06, "loss": 0.1463, "step": 2673 }, { "epoch": 0.8664938431626701, "grad_norm": 0.8762059807777405, "learning_rate": 4.1726098510039894e-06, "loss": 0.1599, "step": 2674 }, { "epoch": 0.8668178872326636, "grad_norm": 0.8226339817047119, "learning_rate": 4.171959719862134e-06, "loss": 0.1489, "step": 2675 }, { "epoch": 0.8671419313026572, "grad_norm": 0.8414006233215332, "learning_rate": 4.171309384088596e-06, "loss": 0.1611, "step": 2676 }, { "epoch": 0.8674659753726507, "grad_norm": 0.862315833568573, "learning_rate": 4.170658843762968e-06, "loss": 0.1656, "step": 2677 }, { "epoch": 0.8677900194426442, "grad_norm": 0.827226459980011, "learning_rate": 4.170008098964871e-06, "loss": 0.154, "step": 2678 }, { "epoch": 0.8681140635126378, "grad_norm": 0.8278335928916931, "learning_rate": 4.169357149773949e-06, "loss": 0.1657, "step": 2679 }, { "epoch": 0.8684381075826313, "grad_norm": 0.8246443867683411, "learning_rate": 4.168705996269874e-06, "loss": 0.1479, "step": 2680 }, { "epoch": 0.8687621516526247, "grad_norm": 0.8692947626113892, "learning_rate": 4.168054638532338e-06, "loss": 0.159, "step": 2681 }, { "epoch": 0.8690861957226182, "grad_norm": 0.7972445487976074, "learning_rate": 4.167403076641063e-06, "loss": 0.1534, "step": 2682 }, { "epoch": 0.8694102397926118, "grad_norm": 0.8467311263084412, "learning_rate": 4.166751310675793e-06, "loss": 0.131, "step": 2683 }, { "epoch": 0.8697342838626053, "grad_norm": 0.8024827837944031, "learning_rate": 4.166099340716298e-06, "loss": 0.1496, "step": 2684 }, { "epoch": 0.8700583279325989, "grad_norm": 0.7870508432388306, "learning_rate": 4.165447166842373e-06, "loss": 0.1382, "step": 2685 }, { "epoch": 0.8703823720025924, "grad_norm": 0.8258494138717651, "learning_rate": 4.164794789133837e-06, "loss": 0.1617, "step": 2686 }, { "epoch": 0.8707064160725859, "grad_norm": 0.8265944719314575, "learning_rate": 4.164142207670536e-06, "loss": 0.1364, "step": 2687 }, { "epoch": 0.8710304601425793, "grad_norm": 0.8695728182792664, "learning_rate": 4.163489422532338e-06, "loss": 0.1628, "step": 2688 }, { "epoch": 0.8713545042125729, "grad_norm": 0.7880622744560242, "learning_rate": 4.162836433799139e-06, "loss": 0.1428, "step": 2689 }, { "epoch": 0.8716785482825664, "grad_norm": 0.7860510945320129, "learning_rate": 4.162183241550858e-06, "loss": 0.1382, "step": 2690 }, { "epoch": 0.87200259235256, "grad_norm": 0.8720436692237854, "learning_rate": 4.161529845867439e-06, "loss": 0.1501, "step": 2691 }, { "epoch": 0.8723266364225535, "grad_norm": 0.8479650020599365, "learning_rate": 4.160876246828853e-06, "loss": 0.1517, "step": 2692 }, { "epoch": 0.872650680492547, "grad_norm": 0.801552951335907, "learning_rate": 4.160222444515092e-06, "loss": 0.1478, "step": 2693 }, { "epoch": 0.8729747245625405, "grad_norm": 0.900095522403717, "learning_rate": 4.159568439006176e-06, "loss": 0.1571, "step": 2694 }, { "epoch": 0.873298768632534, "grad_norm": 0.9002645015716553, "learning_rate": 4.1589142303821485e-06, "loss": 0.1473, "step": 2695 }, { "epoch": 0.8736228127025275, "grad_norm": 0.9044235944747925, "learning_rate": 4.158259818723079e-06, "loss": 0.1545, "step": 2696 }, { "epoch": 0.873946856772521, "grad_norm": 0.8760396838188171, "learning_rate": 4.157605204109062e-06, "loss": 0.1445, "step": 2697 }, { "epoch": 0.8742709008425146, "grad_norm": 0.8378698825836182, "learning_rate": 4.156950386620214e-06, "loss": 0.1615, "step": 2698 }, { "epoch": 0.8745949449125081, "grad_norm": 0.7960793972015381, "learning_rate": 4.156295366336679e-06, "loss": 0.1423, "step": 2699 }, { "epoch": 0.8749189889825016, "grad_norm": 0.8463109135627747, "learning_rate": 4.155640143338625e-06, "loss": 0.1558, "step": 2700 }, { "epoch": 0.8752430330524952, "grad_norm": 0.7898882031440735, "learning_rate": 4.154984717706246e-06, "loss": 0.146, "step": 2701 }, { "epoch": 0.8755670771224887, "grad_norm": 0.8624311685562134, "learning_rate": 4.15432908951976e-06, "loss": 0.1512, "step": 2702 }, { "epoch": 0.8758911211924821, "grad_norm": 0.8539666533470154, "learning_rate": 4.153673258859406e-06, "loss": 0.1485, "step": 2703 }, { "epoch": 0.8762151652624757, "grad_norm": 0.8110834956169128, "learning_rate": 4.153017225805456e-06, "loss": 0.1387, "step": 2704 }, { "epoch": 0.8765392093324692, "grad_norm": 0.8246792554855347, "learning_rate": 4.1523609904382e-06, "loss": 0.1467, "step": 2705 }, { "epoch": 0.8768632534024627, "grad_norm": 0.8625628352165222, "learning_rate": 4.1517045528379544e-06, "loss": 0.1599, "step": 2706 }, { "epoch": 0.8771872974724563, "grad_norm": 0.8822122812271118, "learning_rate": 4.151047913085061e-06, "loss": 0.1621, "step": 2707 }, { "epoch": 0.8775113415424498, "grad_norm": 0.8297098278999329, "learning_rate": 4.150391071259886e-06, "loss": 0.1463, "step": 2708 }, { "epoch": 0.8778353856124433, "grad_norm": 0.8685880303382874, "learning_rate": 4.149734027442821e-06, "loss": 0.1599, "step": 2709 }, { "epoch": 0.8781594296824368, "grad_norm": 0.9043188095092773, "learning_rate": 4.149076781714283e-06, "loss": 0.1672, "step": 2710 }, { "epoch": 0.8784834737524303, "grad_norm": 0.7538388967514038, "learning_rate": 4.1484193341547106e-06, "loss": 0.1413, "step": 2711 }, { "epoch": 0.8788075178224238, "grad_norm": 0.8316788673400879, "learning_rate": 4.147761684844569e-06, "loss": 0.1477, "step": 2712 }, { "epoch": 0.8791315618924174, "grad_norm": 0.8419067859649658, "learning_rate": 4.147103833864349e-06, "loss": 0.155, "step": 2713 }, { "epoch": 0.8794556059624109, "grad_norm": 0.8238720297813416, "learning_rate": 4.146445781294566e-06, "loss": 0.1492, "step": 2714 }, { "epoch": 0.8797796500324044, "grad_norm": 0.7852333188056946, "learning_rate": 4.145787527215757e-06, "loss": 0.1459, "step": 2715 }, { "epoch": 0.880103694102398, "grad_norm": 0.8900299668312073, "learning_rate": 4.145129071708487e-06, "loss": 0.1781, "step": 2716 }, { "epoch": 0.8804277381723914, "grad_norm": 0.8969780802726746, "learning_rate": 4.144470414853345e-06, "loss": 0.1668, "step": 2717 }, { "epoch": 0.8807517822423849, "grad_norm": 0.880719006061554, "learning_rate": 4.143811556730944e-06, "loss": 0.1623, "step": 2718 }, { "epoch": 0.8810758263123785, "grad_norm": 0.8145759105682373, "learning_rate": 4.143152497421922e-06, "loss": 0.1431, "step": 2719 }, { "epoch": 0.881399870382372, "grad_norm": 0.8346846103668213, "learning_rate": 4.142493237006941e-06, "loss": 0.1461, "step": 2720 }, { "epoch": 0.8817239144523655, "grad_norm": 0.8116910457611084, "learning_rate": 4.141833775566688e-06, "loss": 0.1587, "step": 2721 }, { "epoch": 0.8820479585223591, "grad_norm": 0.843694806098938, "learning_rate": 4.1411741131818765e-06, "loss": 0.1443, "step": 2722 }, { "epoch": 0.8823720025923526, "grad_norm": 0.9246916174888611, "learning_rate": 4.14051424993324e-06, "loss": 0.1565, "step": 2723 }, { "epoch": 0.8826960466623461, "grad_norm": 0.7806346416473389, "learning_rate": 4.1398541859015405e-06, "loss": 0.1419, "step": 2724 }, { "epoch": 0.8830200907323396, "grad_norm": 0.9195976853370667, "learning_rate": 4.139193921167565e-06, "loss": 0.1572, "step": 2725 }, { "epoch": 0.8833441348023331, "grad_norm": 0.9154192209243774, "learning_rate": 4.138533455812121e-06, "loss": 0.1791, "step": 2726 }, { "epoch": 0.8836681788723266, "grad_norm": 0.7888971567153931, "learning_rate": 4.137872789916044e-06, "loss": 0.1437, "step": 2727 }, { "epoch": 0.8839922229423202, "grad_norm": 0.8127666711807251, "learning_rate": 4.137211923560195e-06, "loss": 0.1536, "step": 2728 }, { "epoch": 0.8843162670123137, "grad_norm": 0.8747718930244446, "learning_rate": 4.136550856825455e-06, "loss": 0.1627, "step": 2729 }, { "epoch": 0.8846403110823072, "grad_norm": 0.8618316650390625, "learning_rate": 4.135889589792733e-06, "loss": 0.1604, "step": 2730 }, { "epoch": 0.8849643551523008, "grad_norm": 0.8301213383674622, "learning_rate": 4.135228122542962e-06, "loss": 0.1573, "step": 2731 }, { "epoch": 0.8852883992222942, "grad_norm": 0.8168521523475647, "learning_rate": 4.1345664551570985e-06, "loss": 0.1595, "step": 2732 }, { "epoch": 0.8856124432922877, "grad_norm": 0.8900003433227539, "learning_rate": 4.133904587716126e-06, "loss": 0.1471, "step": 2733 }, { "epoch": 0.8859364873622813, "grad_norm": 0.7271791696548462, "learning_rate": 4.133242520301049e-06, "loss": 0.1235, "step": 2734 }, { "epoch": 0.8862605314322748, "grad_norm": 0.8689054250717163, "learning_rate": 4.132580252992898e-06, "loss": 0.1585, "step": 2735 }, { "epoch": 0.8865845755022683, "grad_norm": 0.8039261102676392, "learning_rate": 4.131917785872728e-06, "loss": 0.1519, "step": 2736 }, { "epoch": 0.8869086195722619, "grad_norm": 0.819491446018219, "learning_rate": 4.13125511902162e-06, "loss": 0.1567, "step": 2737 }, { "epoch": 0.8872326636422554, "grad_norm": 0.9310662746429443, "learning_rate": 4.130592252520677e-06, "loss": 0.1694, "step": 2738 }, { "epoch": 0.8875567077122488, "grad_norm": 0.79558265209198, "learning_rate": 4.129929186451028e-06, "loss": 0.155, "step": 2739 }, { "epoch": 0.8878807517822424, "grad_norm": 0.7396335601806641, "learning_rate": 4.129265920893826e-06, "loss": 0.139, "step": 2740 }, { "epoch": 0.8882047958522359, "grad_norm": 0.803795576095581, "learning_rate": 4.128602455930247e-06, "loss": 0.1545, "step": 2741 }, { "epoch": 0.8885288399222294, "grad_norm": 0.8279784321784973, "learning_rate": 4.127938791641493e-06, "loss": 0.1427, "step": 2742 }, { "epoch": 0.888852883992223, "grad_norm": 0.8597333431243896, "learning_rate": 4.127274928108792e-06, "loss": 0.1665, "step": 2743 }, { "epoch": 0.8891769280622165, "grad_norm": 0.9069391489028931, "learning_rate": 4.126610865413392e-06, "loss": 0.1663, "step": 2744 }, { "epoch": 0.88950097213221, "grad_norm": 0.9247065186500549, "learning_rate": 4.125946603636569e-06, "loss": 0.162, "step": 2745 }, { "epoch": 0.8898250162022034, "grad_norm": 0.85749751329422, "learning_rate": 4.125282142859622e-06, "loss": 0.1452, "step": 2746 }, { "epoch": 0.890149060272197, "grad_norm": 0.8194043636322021, "learning_rate": 4.124617483163876e-06, "loss": 0.1525, "step": 2747 }, { "epoch": 0.8904731043421905, "grad_norm": 0.8424063324928284, "learning_rate": 4.123952624630676e-06, "loss": 0.1455, "step": 2748 }, { "epoch": 0.890797148412184, "grad_norm": 0.8458553552627563, "learning_rate": 4.123287567341396e-06, "loss": 0.1526, "step": 2749 }, { "epoch": 0.8911211924821776, "grad_norm": 0.822384774684906, "learning_rate": 4.122622311377433e-06, "loss": 0.1426, "step": 2750 }, { "epoch": 0.8914452365521711, "grad_norm": 0.903175413608551, "learning_rate": 4.121956856820207e-06, "loss": 0.1571, "step": 2751 }, { "epoch": 0.8917692806221647, "grad_norm": 0.8061724305152893, "learning_rate": 4.1212912037511634e-06, "loss": 0.1398, "step": 2752 }, { "epoch": 0.8920933246921582, "grad_norm": 0.8506471514701843, "learning_rate": 4.1206253522517725e-06, "loss": 0.1521, "step": 2753 }, { "epoch": 0.8924173687621516, "grad_norm": 0.8087679743766785, "learning_rate": 4.119959302403527e-06, "loss": 0.1345, "step": 2754 }, { "epoch": 0.8927414128321451, "grad_norm": 0.8700354695320129, "learning_rate": 4.119293054287945e-06, "loss": 0.1518, "step": 2755 }, { "epoch": 0.8930654569021387, "grad_norm": 0.824869692325592, "learning_rate": 4.118626607986569e-06, "loss": 0.1392, "step": 2756 }, { "epoch": 0.8933895009721322, "grad_norm": 0.8283060193061829, "learning_rate": 4.1179599635809654e-06, "loss": 0.1466, "step": 2757 }, { "epoch": 0.8937135450421257, "grad_norm": 0.9671257138252258, "learning_rate": 4.1172931211527254e-06, "loss": 0.1638, "step": 2758 }, { "epoch": 0.8940375891121193, "grad_norm": 0.9112168550491333, "learning_rate": 4.116626080783464e-06, "loss": 0.1666, "step": 2759 }, { "epoch": 0.8943616331821128, "grad_norm": 0.9166633486747742, "learning_rate": 4.1159588425548215e-06, "loss": 0.1697, "step": 2760 }, { "epoch": 0.8946856772521062, "grad_norm": 0.836449384689331, "learning_rate": 4.11529140654846e-06, "loss": 0.1504, "step": 2761 }, { "epoch": 0.8950097213220998, "grad_norm": 0.7775622606277466, "learning_rate": 4.114623772846067e-06, "loss": 0.1301, "step": 2762 }, { "epoch": 0.8953337653920933, "grad_norm": 0.8575242757797241, "learning_rate": 4.113955941529355e-06, "loss": 0.1571, "step": 2763 }, { "epoch": 0.8956578094620868, "grad_norm": 0.968856930732727, "learning_rate": 4.113287912680061e-06, "loss": 0.1669, "step": 2764 }, { "epoch": 0.8959818535320804, "grad_norm": 0.8376212120056152, "learning_rate": 4.112619686379944e-06, "loss": 0.1504, "step": 2765 }, { "epoch": 0.8963058976020739, "grad_norm": 0.8225062489509583, "learning_rate": 4.111951262710788e-06, "loss": 0.1574, "step": 2766 }, { "epoch": 0.8966299416720674, "grad_norm": 0.7696053981781006, "learning_rate": 4.111282641754403e-06, "loss": 0.1356, "step": 2767 }, { "epoch": 0.8969539857420609, "grad_norm": 0.8558640480041504, "learning_rate": 4.110613823592621e-06, "loss": 0.1712, "step": 2768 }, { "epoch": 0.8972780298120544, "grad_norm": 0.8523713946342468, "learning_rate": 4.109944808307298e-06, "loss": 0.1461, "step": 2769 }, { "epoch": 0.8976020738820479, "grad_norm": 0.7888774275779724, "learning_rate": 4.109275595980316e-06, "loss": 0.1427, "step": 2770 }, { "epoch": 0.8979261179520415, "grad_norm": 0.8624205589294434, "learning_rate": 4.108606186693582e-06, "loss": 0.1652, "step": 2771 }, { "epoch": 0.898250162022035, "grad_norm": 0.8590965867042542, "learning_rate": 4.1079365805290214e-06, "loss": 0.1639, "step": 2772 }, { "epoch": 0.8985742060920285, "grad_norm": 0.8436129093170166, "learning_rate": 4.10726677756859e-06, "loss": 0.1465, "step": 2773 }, { "epoch": 0.8988982501620221, "grad_norm": 0.791901171207428, "learning_rate": 4.106596777894265e-06, "loss": 0.1377, "step": 2774 }, { "epoch": 0.8992222942320156, "grad_norm": 0.8803940415382385, "learning_rate": 4.105926581588046e-06, "loss": 0.1592, "step": 2775 }, { "epoch": 0.899546338302009, "grad_norm": 0.8036686778068542, "learning_rate": 4.105256188731962e-06, "loss": 0.1501, "step": 2776 }, { "epoch": 0.8998703823720026, "grad_norm": 0.8757483959197998, "learning_rate": 4.104585599408059e-06, "loss": 0.1658, "step": 2777 }, { "epoch": 0.9001944264419961, "grad_norm": 0.8514494299888611, "learning_rate": 4.1039148136984134e-06, "loss": 0.1547, "step": 2778 }, { "epoch": 0.9005184705119896, "grad_norm": 0.9344333410263062, "learning_rate": 4.103243831685121e-06, "loss": 0.175, "step": 2779 }, { "epoch": 0.9008425145819832, "grad_norm": 0.8821116089820862, "learning_rate": 4.102572653450304e-06, "loss": 0.1616, "step": 2780 }, { "epoch": 0.9011665586519767, "grad_norm": 0.7999144196510315, "learning_rate": 4.101901279076108e-06, "loss": 0.1549, "step": 2781 }, { "epoch": 0.9014906027219702, "grad_norm": 0.8356010317802429, "learning_rate": 4.101229708644704e-06, "loss": 0.1421, "step": 2782 }, { "epoch": 0.9018146467919637, "grad_norm": 0.7832415699958801, "learning_rate": 4.100557942238284e-06, "loss": 0.1453, "step": 2783 }, { "epoch": 0.9021386908619572, "grad_norm": 0.7718949913978577, "learning_rate": 4.099885979939068e-06, "loss": 0.1266, "step": 2784 }, { "epoch": 0.9024627349319507, "grad_norm": 0.8561499118804932, "learning_rate": 4.099213821829295e-06, "loss": 0.1477, "step": 2785 }, { "epoch": 0.9027867790019443, "grad_norm": 0.9258077144622803, "learning_rate": 4.098541467991231e-06, "loss": 0.1668, "step": 2786 }, { "epoch": 0.9031108230719378, "grad_norm": 0.9254985451698303, "learning_rate": 4.097868918507168e-06, "loss": 0.152, "step": 2787 }, { "epoch": 0.9034348671419313, "grad_norm": 0.8128352165222168, "learning_rate": 4.097196173459417e-06, "loss": 0.1525, "step": 2788 }, { "epoch": 0.9037589112119249, "grad_norm": 0.885798454284668, "learning_rate": 4.0965232329303175e-06, "loss": 0.1561, "step": 2789 }, { "epoch": 0.9040829552819183, "grad_norm": 0.8290806412696838, "learning_rate": 4.095850097002228e-06, "loss": 0.1504, "step": 2790 }, { "epoch": 0.9044069993519118, "grad_norm": 0.8356497883796692, "learning_rate": 4.095176765757537e-06, "loss": 0.157, "step": 2791 }, { "epoch": 0.9047310434219054, "grad_norm": 0.8767359256744385, "learning_rate": 4.094503239278652e-06, "loss": 0.1465, "step": 2792 }, { "epoch": 0.9050550874918989, "grad_norm": 0.8219475746154785, "learning_rate": 4.0938295176480055e-06, "loss": 0.1561, "step": 2793 }, { "epoch": 0.9053791315618924, "grad_norm": 0.8670582175254822, "learning_rate": 4.093155600948057e-06, "loss": 0.1659, "step": 2794 }, { "epoch": 0.905703175631886, "grad_norm": 0.7694851756095886, "learning_rate": 4.092481489261285e-06, "loss": 0.135, "step": 2795 }, { "epoch": 0.9060272197018795, "grad_norm": 0.819607138633728, "learning_rate": 4.0918071826701966e-06, "loss": 0.1543, "step": 2796 }, { "epoch": 0.906351263771873, "grad_norm": 0.8917694091796875, "learning_rate": 4.091132681257317e-06, "loss": 0.1595, "step": 2797 }, { "epoch": 0.9066753078418665, "grad_norm": 0.8101462721824646, "learning_rate": 4.090457985105202e-06, "loss": 0.1539, "step": 2798 }, { "epoch": 0.90699935191186, "grad_norm": 0.8657563328742981, "learning_rate": 4.089783094296425e-06, "loss": 0.1734, "step": 2799 }, { "epoch": 0.9073233959818535, "grad_norm": 0.8259778022766113, "learning_rate": 4.089108008913589e-06, "loss": 0.1483, "step": 2800 }, { "epoch": 0.907647440051847, "grad_norm": 0.7770955562591553, "learning_rate": 4.088432729039316e-06, "loss": 0.1395, "step": 2801 }, { "epoch": 0.9079714841218406, "grad_norm": 0.7907054424285889, "learning_rate": 4.087757254756254e-06, "loss": 0.1453, "step": 2802 }, { "epoch": 0.9082955281918341, "grad_norm": 0.7664549350738525, "learning_rate": 4.087081586147075e-06, "loss": 0.1341, "step": 2803 }, { "epoch": 0.9086195722618277, "grad_norm": 0.7957342267036438, "learning_rate": 4.086405723294474e-06, "loss": 0.1443, "step": 2804 }, { "epoch": 0.9089436163318211, "grad_norm": 0.8862059712409973, "learning_rate": 4.0857296662811696e-06, "loss": 0.155, "step": 2805 }, { "epoch": 0.9092676604018146, "grad_norm": 0.7839810252189636, "learning_rate": 4.085053415189905e-06, "loss": 0.1337, "step": 2806 }, { "epoch": 0.9095917044718081, "grad_norm": 0.8341511487960815, "learning_rate": 4.084376970103448e-06, "loss": 0.143, "step": 2807 }, { "epoch": 0.9099157485418017, "grad_norm": 0.8733060956001282, "learning_rate": 4.0837003311045865e-06, "loss": 0.1542, "step": 2808 }, { "epoch": 0.9102397926117952, "grad_norm": 0.821233332157135, "learning_rate": 4.083023498276136e-06, "loss": 0.1525, "step": 2809 }, { "epoch": 0.9105638366817888, "grad_norm": 0.8411147594451904, "learning_rate": 4.082346471700935e-06, "loss": 0.1471, "step": 2810 }, { "epoch": 0.9108878807517823, "grad_norm": 0.8866571187973022, "learning_rate": 4.081669251461844e-06, "loss": 0.1625, "step": 2811 }, { "epoch": 0.9112119248217757, "grad_norm": 0.8973330855369568, "learning_rate": 4.080991837641748e-06, "loss": 0.1481, "step": 2812 }, { "epoch": 0.9115359688917692, "grad_norm": 0.794649600982666, "learning_rate": 4.080314230323556e-06, "loss": 0.136, "step": 2813 }, { "epoch": 0.9118600129617628, "grad_norm": 0.8813613057136536, "learning_rate": 4.079636429590201e-06, "loss": 0.1581, "step": 2814 }, { "epoch": 0.9121840570317563, "grad_norm": 0.8289700150489807, "learning_rate": 4.07895843552464e-06, "loss": 0.1558, "step": 2815 }, { "epoch": 0.9125081011017498, "grad_norm": 0.8710315227508545, "learning_rate": 4.078280248209851e-06, "loss": 0.1582, "step": 2816 }, { "epoch": 0.9128321451717434, "grad_norm": 0.7751961350440979, "learning_rate": 4.077601867728839e-06, "loss": 0.1398, "step": 2817 }, { "epoch": 0.9131561892417369, "grad_norm": 0.7780879139900208, "learning_rate": 4.07692329416463e-06, "loss": 0.1455, "step": 2818 }, { "epoch": 0.9134802333117304, "grad_norm": 0.8030531406402588, "learning_rate": 4.0762445276002765e-06, "loss": 0.1471, "step": 2819 }, { "epoch": 0.9138042773817239, "grad_norm": 0.8089961409568787, "learning_rate": 4.075565568118852e-06, "loss": 0.1606, "step": 2820 }, { "epoch": 0.9141283214517174, "grad_norm": 0.7848464846611023, "learning_rate": 4.074886415803454e-06, "loss": 0.1508, "step": 2821 }, { "epoch": 0.9144523655217109, "grad_norm": 0.9256161451339722, "learning_rate": 4.074207070737205e-06, "loss": 0.1608, "step": 2822 }, { "epoch": 0.9147764095917045, "grad_norm": 0.8239982724189758, "learning_rate": 4.07352753300325e-06, "loss": 0.1536, "step": 2823 }, { "epoch": 0.915100453661698, "grad_norm": 0.823776364326477, "learning_rate": 4.072847802684758e-06, "loss": 0.1555, "step": 2824 }, { "epoch": 0.9154244977316915, "grad_norm": 0.8191870450973511, "learning_rate": 4.072167879864922e-06, "loss": 0.1474, "step": 2825 }, { "epoch": 0.9157485418016851, "grad_norm": 0.8104268908500671, "learning_rate": 4.071487764626957e-06, "loss": 0.1426, "step": 2826 }, { "epoch": 0.9160725858716785, "grad_norm": 0.8624962568283081, "learning_rate": 4.070807457054102e-06, "loss": 0.1455, "step": 2827 }, { "epoch": 0.916396629941672, "grad_norm": 0.8465151786804199, "learning_rate": 4.070126957229622e-06, "loss": 0.1671, "step": 2828 }, { "epoch": 0.9167206740116656, "grad_norm": 0.845751166343689, "learning_rate": 4.069446265236801e-06, "loss": 0.1483, "step": 2829 }, { "epoch": 0.9170447180816591, "grad_norm": 0.8201125264167786, "learning_rate": 4.068765381158951e-06, "loss": 0.1479, "step": 2830 }, { "epoch": 0.9173687621516526, "grad_norm": 0.9279409646987915, "learning_rate": 4.068084305079406e-06, "loss": 0.1677, "step": 2831 }, { "epoch": 0.9176928062216462, "grad_norm": 0.8532978892326355, "learning_rate": 4.067403037081522e-06, "loss": 0.1493, "step": 2832 }, { "epoch": 0.9180168502916397, "grad_norm": 0.8532582521438599, "learning_rate": 4.06672157724868e-06, "loss": 0.139, "step": 2833 }, { "epoch": 0.9183408943616331, "grad_norm": 0.822012722492218, "learning_rate": 4.066039925664283e-06, "loss": 0.1565, "step": 2834 }, { "epoch": 0.9186649384316267, "grad_norm": 0.8295703530311584, "learning_rate": 4.06535808241176e-06, "loss": 0.1484, "step": 2835 }, { "epoch": 0.9189889825016202, "grad_norm": 0.9345645904541016, "learning_rate": 4.064676047574561e-06, "loss": 0.1516, "step": 2836 }, { "epoch": 0.9193130265716137, "grad_norm": 0.8906318545341492, "learning_rate": 4.063993821236162e-06, "loss": 0.1548, "step": 2837 }, { "epoch": 0.9196370706416073, "grad_norm": 0.853904664516449, "learning_rate": 4.063311403480061e-06, "loss": 0.1597, "step": 2838 }, { "epoch": 0.9199611147116008, "grad_norm": 0.8370136618614197, "learning_rate": 4.0626287943897765e-06, "loss": 0.1402, "step": 2839 }, { "epoch": 0.9202851587815943, "grad_norm": 0.8235974907875061, "learning_rate": 4.061945994048855e-06, "loss": 0.1506, "step": 2840 }, { "epoch": 0.9206092028515879, "grad_norm": 0.8228686451911926, "learning_rate": 4.061263002540865e-06, "loss": 0.1543, "step": 2841 }, { "epoch": 0.9209332469215813, "grad_norm": 0.8150495886802673, "learning_rate": 4.060579819949398e-06, "loss": 0.1528, "step": 2842 }, { "epoch": 0.9212572909915748, "grad_norm": 0.7938353419303894, "learning_rate": 4.059896446358068e-06, "loss": 0.1377, "step": 2843 }, { "epoch": 0.9215813350615684, "grad_norm": 0.8098077774047852, "learning_rate": 4.059212881850515e-06, "loss": 0.1437, "step": 2844 }, { "epoch": 0.9219053791315619, "grad_norm": 0.9222689270973206, "learning_rate": 4.0585291265103985e-06, "loss": 0.1548, "step": 2845 }, { "epoch": 0.9222294232015554, "grad_norm": 0.8263746500015259, "learning_rate": 4.057845180421405e-06, "loss": 0.1441, "step": 2846 }, { "epoch": 0.922553467271549, "grad_norm": 0.8179762363433838, "learning_rate": 4.057161043667243e-06, "loss": 0.1468, "step": 2847 }, { "epoch": 0.9228775113415425, "grad_norm": 0.8503578901290894, "learning_rate": 4.056476716331643e-06, "loss": 0.1513, "step": 2848 }, { "epoch": 0.9232015554115359, "grad_norm": 0.8281416893005371, "learning_rate": 4.05579219849836e-06, "loss": 0.1535, "step": 2849 }, { "epoch": 0.9235255994815295, "grad_norm": 0.8391620516777039, "learning_rate": 4.055107490251175e-06, "loss": 0.1475, "step": 2850 }, { "epoch": 0.923849643551523, "grad_norm": 0.805622935295105, "learning_rate": 4.054422591673887e-06, "loss": 0.1424, "step": 2851 }, { "epoch": 0.9241736876215165, "grad_norm": 0.8314092755317688, "learning_rate": 4.0537375028503225e-06, "loss": 0.1421, "step": 2852 }, { "epoch": 0.9244977316915101, "grad_norm": 0.9176031947135925, "learning_rate": 4.053052223864328e-06, "loss": 0.152, "step": 2853 }, { "epoch": 0.9248217757615036, "grad_norm": 0.8071728944778442, "learning_rate": 4.052366754799776e-06, "loss": 0.1525, "step": 2854 }, { "epoch": 0.9251458198314971, "grad_norm": 0.8542532920837402, "learning_rate": 4.051681095740561e-06, "loss": 0.1671, "step": 2855 }, { "epoch": 0.9254698639014906, "grad_norm": 0.8364072442054749, "learning_rate": 4.050995246770602e-06, "loss": 0.1663, "step": 2856 }, { "epoch": 0.9257939079714841, "grad_norm": 0.8671528100967407, "learning_rate": 4.05030920797384e-06, "loss": 0.1529, "step": 2857 }, { "epoch": 0.9261179520414776, "grad_norm": 0.8020675778388977, "learning_rate": 4.049622979434239e-06, "loss": 0.1581, "step": 2858 }, { "epoch": 0.9264419961114712, "grad_norm": 0.8543144464492798, "learning_rate": 4.0489365612357854e-06, "loss": 0.1452, "step": 2859 }, { "epoch": 0.9267660401814647, "grad_norm": 0.8329032063484192, "learning_rate": 4.0482499534624934e-06, "loss": 0.1609, "step": 2860 }, { "epoch": 0.9270900842514582, "grad_norm": 0.8056915998458862, "learning_rate": 4.047563156198394e-06, "loss": 0.1452, "step": 2861 }, { "epoch": 0.9274141283214518, "grad_norm": 0.8419206738471985, "learning_rate": 4.046876169527547e-06, "loss": 0.1529, "step": 2862 }, { "epoch": 0.9277381723914452, "grad_norm": 0.8076557517051697, "learning_rate": 4.04618899353403e-06, "loss": 0.151, "step": 2863 }, { "epoch": 0.9280622164614387, "grad_norm": 0.8330927491188049, "learning_rate": 4.04550162830195e-06, "loss": 0.1523, "step": 2864 }, { "epoch": 0.9283862605314323, "grad_norm": 0.8939826488494873, "learning_rate": 4.044814073915432e-06, "loss": 0.1744, "step": 2865 }, { "epoch": 0.9287103046014258, "grad_norm": 0.8498444557189941, "learning_rate": 4.044126330458626e-06, "loss": 0.1471, "step": 2866 }, { "epoch": 0.9290343486714193, "grad_norm": 0.7920177578926086, "learning_rate": 4.0434383980157055e-06, "loss": 0.1439, "step": 2867 }, { "epoch": 0.9293583927414129, "grad_norm": 0.8161009550094604, "learning_rate": 4.042750276670867e-06, "loss": 0.1435, "step": 2868 }, { "epoch": 0.9296824368114064, "grad_norm": 0.7840574383735657, "learning_rate": 4.04206196650833e-06, "loss": 0.1332, "step": 2869 }, { "epoch": 0.9300064808813999, "grad_norm": 0.808705747127533, "learning_rate": 4.041373467612337e-06, "loss": 0.149, "step": 2870 }, { "epoch": 0.9303305249513933, "grad_norm": 0.7589499354362488, "learning_rate": 4.0406847800671515e-06, "loss": 0.1438, "step": 2871 }, { "epoch": 0.9306545690213869, "grad_norm": 0.9110842943191528, "learning_rate": 4.0399959039570646e-06, "loss": 0.172, "step": 2872 }, { "epoch": 0.9309786130913804, "grad_norm": 0.802574634552002, "learning_rate": 4.039306839366387e-06, "loss": 0.145, "step": 2873 }, { "epoch": 0.931302657161374, "grad_norm": 0.8501918315887451, "learning_rate": 4.038617586379455e-06, "loss": 0.1559, "step": 2874 }, { "epoch": 0.9316267012313675, "grad_norm": 0.8628923296928406, "learning_rate": 4.0379281450806255e-06, "loss": 0.1472, "step": 2875 }, { "epoch": 0.931950745301361, "grad_norm": 0.8358166813850403, "learning_rate": 4.037238515554278e-06, "loss": 0.1493, "step": 2876 }, { "epoch": 0.9322747893713546, "grad_norm": 0.7423512935638428, "learning_rate": 4.0365486978848176e-06, "loss": 0.1399, "step": 2877 }, { "epoch": 0.932598833441348, "grad_norm": 0.7642088532447815, "learning_rate": 4.035858692156673e-06, "loss": 0.1341, "step": 2878 }, { "epoch": 0.9329228775113415, "grad_norm": 0.8165916800498962, "learning_rate": 4.035168498454292e-06, "loss": 0.142, "step": 2879 }, { "epoch": 0.933246921581335, "grad_norm": 0.8518221974372864, "learning_rate": 4.034478116862149e-06, "loss": 0.1422, "step": 2880 }, { "epoch": 0.9335709656513286, "grad_norm": 0.7966205477714539, "learning_rate": 4.033787547464738e-06, "loss": 0.1502, "step": 2881 }, { "epoch": 0.9338950097213221, "grad_norm": 0.7498399019241333, "learning_rate": 4.033096790346581e-06, "loss": 0.1329, "step": 2882 }, { "epoch": 0.9342190537913156, "grad_norm": 0.8181966543197632, "learning_rate": 4.032405845592218e-06, "loss": 0.1494, "step": 2883 }, { "epoch": 0.9345430978613092, "grad_norm": 0.8142485618591309, "learning_rate": 4.0317147132862135e-06, "loss": 0.1386, "step": 2884 }, { "epoch": 0.9348671419313026, "grad_norm": 0.8145033717155457, "learning_rate": 4.031023393513157e-06, "loss": 0.139, "step": 2885 }, { "epoch": 0.9351911860012961, "grad_norm": 0.8427969217300415, "learning_rate": 4.030331886357659e-06, "loss": 0.1607, "step": 2886 }, { "epoch": 0.9355152300712897, "grad_norm": 0.7301238179206848, "learning_rate": 4.029640191904352e-06, "loss": 0.1297, "step": 2887 }, { "epoch": 0.9358392741412832, "grad_norm": 0.8938672542572021, "learning_rate": 4.028948310237893e-06, "loss": 0.1588, "step": 2888 }, { "epoch": 0.9361633182112767, "grad_norm": 0.8654724955558777, "learning_rate": 4.0282562414429635e-06, "loss": 0.1537, "step": 2889 }, { "epoch": 0.9364873622812703, "grad_norm": 0.8152555823326111, "learning_rate": 4.027563985604264e-06, "loss": 0.1551, "step": 2890 }, { "epoch": 0.9368114063512638, "grad_norm": 0.8133431077003479, "learning_rate": 4.026871542806521e-06, "loss": 0.1523, "step": 2891 }, { "epoch": 0.9371354504212573, "grad_norm": 0.9307568073272705, "learning_rate": 4.026178913134482e-06, "loss": 0.1737, "step": 2892 }, { "epoch": 0.9374594944912508, "grad_norm": 0.8865219950675964, "learning_rate": 4.02548609667292e-06, "loss": 0.1419, "step": 2893 }, { "epoch": 0.9377835385612443, "grad_norm": 0.8078862428665161, "learning_rate": 4.024793093506626e-06, "loss": 0.1528, "step": 2894 }, { "epoch": 0.9381075826312378, "grad_norm": 0.7754785418510437, "learning_rate": 4.024099903720419e-06, "loss": 0.1438, "step": 2895 }, { "epoch": 0.9384316267012314, "grad_norm": 0.7733836770057678, "learning_rate": 4.023406527399137e-06, "loss": 0.1337, "step": 2896 }, { "epoch": 0.9387556707712249, "grad_norm": 0.795275092124939, "learning_rate": 4.022712964627645e-06, "loss": 0.1536, "step": 2897 }, { "epoch": 0.9390797148412184, "grad_norm": 0.8315867781639099, "learning_rate": 4.022019215490827e-06, "loss": 0.1476, "step": 2898 }, { "epoch": 0.939403758911212, "grad_norm": 0.8151440024375916, "learning_rate": 4.021325280073592e-06, "loss": 0.1532, "step": 2899 }, { "epoch": 0.9397278029812054, "grad_norm": 0.8618231415748596, "learning_rate": 4.0206311584608705e-06, "loss": 0.1506, "step": 2900 }, { "epoch": 0.9400518470511989, "grad_norm": 0.84187912940979, "learning_rate": 4.019936850737615e-06, "loss": 0.1488, "step": 2901 }, { "epoch": 0.9403758911211925, "grad_norm": 0.8414024710655212, "learning_rate": 4.019242356988803e-06, "loss": 0.1523, "step": 2902 }, { "epoch": 0.940699935191186, "grad_norm": 0.8652313947677612, "learning_rate": 4.018547677299434e-06, "loss": 0.1571, "step": 2903 }, { "epoch": 0.9410239792611795, "grad_norm": 0.9478034973144531, "learning_rate": 4.01785281175453e-06, "loss": 0.1612, "step": 2904 }, { "epoch": 0.9413480233311731, "grad_norm": 0.8561654686927795, "learning_rate": 4.017157760439136e-06, "loss": 0.1425, "step": 2905 }, { "epoch": 0.9416720674011666, "grad_norm": 0.840130627155304, "learning_rate": 4.01646252343832e-06, "loss": 0.1546, "step": 2906 }, { "epoch": 0.94199611147116, "grad_norm": 0.7839550971984863, "learning_rate": 4.015767100837171e-06, "loss": 0.1372, "step": 2907 }, { "epoch": 0.9423201555411536, "grad_norm": 0.8164642453193665, "learning_rate": 4.015071492720802e-06, "loss": 0.1482, "step": 2908 }, { "epoch": 0.9426441996111471, "grad_norm": 0.900124728679657, "learning_rate": 4.014375699174351e-06, "loss": 0.1543, "step": 2909 }, { "epoch": 0.9429682436811406, "grad_norm": 0.8421492576599121, "learning_rate": 4.013679720282973e-06, "loss": 0.1596, "step": 2910 }, { "epoch": 0.9432922877511342, "grad_norm": 0.8553417921066284, "learning_rate": 4.012983556131852e-06, "loss": 0.1473, "step": 2911 }, { "epoch": 0.9436163318211277, "grad_norm": 0.9092195630073547, "learning_rate": 4.01228720680619e-06, "loss": 0.149, "step": 2912 }, { "epoch": 0.9439403758911212, "grad_norm": 0.8383063673973083, "learning_rate": 4.011590672391213e-06, "loss": 0.1466, "step": 2913 }, { "epoch": 0.9442644199611148, "grad_norm": 0.8686926364898682, "learning_rate": 4.010893952972173e-06, "loss": 0.1468, "step": 2914 }, { "epoch": 0.9445884640311082, "grad_norm": 0.844262957572937, "learning_rate": 4.010197048634338e-06, "loss": 0.1513, "step": 2915 }, { "epoch": 0.9449125081011017, "grad_norm": 0.7631595730781555, "learning_rate": 4.009499959463005e-06, "loss": 0.1396, "step": 2916 }, { "epoch": 0.9452365521710953, "grad_norm": 0.8633599877357483, "learning_rate": 4.00880268554349e-06, "loss": 0.1629, "step": 2917 }, { "epoch": 0.9455605962410888, "grad_norm": 0.8235397338867188, "learning_rate": 4.008105226961132e-06, "loss": 0.1392, "step": 2918 }, { "epoch": 0.9458846403110823, "grad_norm": 0.747604489326477, "learning_rate": 4.007407583801295e-06, "loss": 0.1274, "step": 2919 }, { "epoch": 0.9462086843810759, "grad_norm": 0.8771180510520935, "learning_rate": 4.006709756149362e-06, "loss": 0.1656, "step": 2920 }, { "epoch": 0.9465327284510694, "grad_norm": 0.8580746650695801, "learning_rate": 4.006011744090741e-06, "loss": 0.1534, "step": 2921 }, { "epoch": 0.9468567725210628, "grad_norm": 0.8321346044540405, "learning_rate": 4.005313547710861e-06, "loss": 0.1457, "step": 2922 }, { "epoch": 0.9471808165910564, "grad_norm": 0.7928733825683594, "learning_rate": 4.004615167095176e-06, "loss": 0.1487, "step": 2923 }, { "epoch": 0.9475048606610499, "grad_norm": 0.914631724357605, "learning_rate": 4.003916602329161e-06, "loss": 0.1602, "step": 2924 }, { "epoch": 0.9478289047310434, "grad_norm": 0.8683075904846191, "learning_rate": 4.0032178534983115e-06, "loss": 0.1569, "step": 2925 }, { "epoch": 0.948152948801037, "grad_norm": 0.817596435546875, "learning_rate": 4.00251892068815e-06, "loss": 0.1536, "step": 2926 }, { "epoch": 0.9484769928710305, "grad_norm": 0.8051871657371521, "learning_rate": 4.001819803984218e-06, "loss": 0.1396, "step": 2927 }, { "epoch": 0.948801036941024, "grad_norm": 0.8614630103111267, "learning_rate": 4.00112050347208e-06, "loss": 0.1549, "step": 2928 }, { "epoch": 0.9491250810110174, "grad_norm": 0.9327095150947571, "learning_rate": 4.000421019237326e-06, "loss": 0.1754, "step": 2929 }, { "epoch": 0.949449125081011, "grad_norm": 0.8932909369468689, "learning_rate": 3.999721351365563e-06, "loss": 0.1588, "step": 2930 }, { "epoch": 0.9497731691510045, "grad_norm": 0.7977306246757507, "learning_rate": 3.999021499942425e-06, "loss": 0.1565, "step": 2931 }, { "epoch": 0.950097213220998, "grad_norm": 0.800845205783844, "learning_rate": 3.998321465053568e-06, "loss": 0.1423, "step": 2932 }, { "epoch": 0.9504212572909916, "grad_norm": 0.7385462522506714, "learning_rate": 3.9976212467846674e-06, "loss": 0.1341, "step": 2933 }, { "epoch": 0.9507453013609851, "grad_norm": 0.8102892637252808, "learning_rate": 3.996920845221425e-06, "loss": 0.1369, "step": 2934 }, { "epoch": 0.9510693454309787, "grad_norm": 0.8875524401664734, "learning_rate": 3.996220260449563e-06, "loss": 0.1652, "step": 2935 }, { "epoch": 0.9513933895009722, "grad_norm": 0.7795872092247009, "learning_rate": 3.9955194925548245e-06, "loss": 0.1426, "step": 2936 }, { "epoch": 0.9517174335709656, "grad_norm": 0.9427581429481506, "learning_rate": 3.994818541622979e-06, "loss": 0.1694, "step": 2937 }, { "epoch": 0.9520414776409591, "grad_norm": 0.8169898986816406, "learning_rate": 3.994117407739814e-06, "loss": 0.1452, "step": 2938 }, { "epoch": 0.9523655217109527, "grad_norm": 0.8433765769004822, "learning_rate": 3.993416090991143e-06, "loss": 0.1441, "step": 2939 }, { "epoch": 0.9526895657809462, "grad_norm": 0.9561168551445007, "learning_rate": 3.992714591462799e-06, "loss": 0.1592, "step": 2940 }, { "epoch": 0.9530136098509397, "grad_norm": 0.7818630933761597, "learning_rate": 3.992012909240641e-06, "loss": 0.1417, "step": 2941 }, { "epoch": 0.9533376539209333, "grad_norm": 0.8289167284965515, "learning_rate": 3.991311044410546e-06, "loss": 0.15, "step": 2942 }, { "epoch": 0.9536616979909268, "grad_norm": 0.8170865774154663, "learning_rate": 3.990608997058416e-06, "loss": 0.1371, "step": 2943 }, { "epoch": 0.9539857420609202, "grad_norm": 0.8745966553688049, "learning_rate": 3.989906767270175e-06, "loss": 0.1588, "step": 2944 }, { "epoch": 0.9543097861309138, "grad_norm": 0.8419452905654907, "learning_rate": 3.989204355131769e-06, "loss": 0.1459, "step": 2945 }, { "epoch": 0.9546338302009073, "grad_norm": 0.8660749793052673, "learning_rate": 3.988501760729168e-06, "loss": 0.1471, "step": 2946 }, { "epoch": 0.9549578742709008, "grad_norm": 0.9283533096313477, "learning_rate": 3.98779898414836e-06, "loss": 0.1659, "step": 2947 }, { "epoch": 0.9552819183408944, "grad_norm": 0.8726081252098083, "learning_rate": 3.98709602547536e-06, "loss": 0.1589, "step": 2948 }, { "epoch": 0.9556059624108879, "grad_norm": 0.7964595556259155, "learning_rate": 3.986392884796202e-06, "loss": 0.1459, "step": 2949 }, { "epoch": 0.9559300064808814, "grad_norm": 0.8376349210739136, "learning_rate": 3.9856895621969435e-06, "loss": 0.1475, "step": 2950 }, { "epoch": 0.9562540505508749, "grad_norm": 0.8806761503219604, "learning_rate": 3.984986057763667e-06, "loss": 0.1524, "step": 2951 }, { "epoch": 0.9565780946208684, "grad_norm": 0.9261873960494995, "learning_rate": 3.984282371582472e-06, "loss": 0.133, "step": 2952 }, { "epoch": 0.9569021386908619, "grad_norm": 0.8796440958976746, "learning_rate": 3.983578503739483e-06, "loss": 0.151, "step": 2953 }, { "epoch": 0.9572261827608555, "grad_norm": 0.8551722168922424, "learning_rate": 3.982874454320849e-06, "loss": 0.1509, "step": 2954 }, { "epoch": 0.957550226830849, "grad_norm": 0.932380199432373, "learning_rate": 3.982170223412735e-06, "loss": 0.1656, "step": 2955 }, { "epoch": 0.9578742709008425, "grad_norm": 0.784862756729126, "learning_rate": 3.981465811101335e-06, "loss": 0.1379, "step": 2956 }, { "epoch": 0.9581983149708361, "grad_norm": 0.9122695922851562, "learning_rate": 3.9807612174728615e-06, "loss": 0.1561, "step": 2957 }, { "epoch": 0.9585223590408296, "grad_norm": 0.8077194094657898, "learning_rate": 3.98005644261355e-06, "loss": 0.15, "step": 2958 }, { "epoch": 0.958846403110823, "grad_norm": 0.8609473705291748, "learning_rate": 3.979351486609659e-06, "loss": 0.1593, "step": 2959 }, { "epoch": 0.9591704471808166, "grad_norm": 0.7915871739387512, "learning_rate": 3.978646349547466e-06, "loss": 0.1534, "step": 2960 }, { "epoch": 0.9594944912508101, "grad_norm": 0.8479771614074707, "learning_rate": 3.977941031513275e-06, "loss": 0.1421, "step": 2961 }, { "epoch": 0.9598185353208036, "grad_norm": 0.8406703472137451, "learning_rate": 3.977235532593408e-06, "loss": 0.1376, "step": 2962 }, { "epoch": 0.9601425793907972, "grad_norm": 0.9173598289489746, "learning_rate": 3.976529852874214e-06, "loss": 0.1566, "step": 2963 }, { "epoch": 0.9604666234607907, "grad_norm": 0.8462458252906799, "learning_rate": 3.975823992442058e-06, "loss": 0.1556, "step": 2964 }, { "epoch": 0.9607906675307842, "grad_norm": 0.8729339241981506, "learning_rate": 3.975117951383334e-06, "loss": 0.1509, "step": 2965 }, { "epoch": 0.9611147116007777, "grad_norm": 0.9013268351554871, "learning_rate": 3.974411729784453e-06, "loss": 0.1626, "step": 2966 }, { "epoch": 0.9614387556707712, "grad_norm": 0.7783372402191162, "learning_rate": 3.973705327731849e-06, "loss": 0.144, "step": 2967 }, { "epoch": 0.9617627997407647, "grad_norm": 0.7804936766624451, "learning_rate": 3.97299874531198e-06, "loss": 0.1509, "step": 2968 }, { "epoch": 0.9620868438107583, "grad_norm": 1.2105624675750732, "learning_rate": 3.972291982611325e-06, "loss": 0.2081, "step": 2969 }, { "epoch": 0.9624108878807518, "grad_norm": 0.8226312398910522, "learning_rate": 3.971585039716382e-06, "loss": 0.1561, "step": 2970 }, { "epoch": 0.9627349319507453, "grad_norm": 0.8245861530303955, "learning_rate": 3.970877916713678e-06, "loss": 0.1532, "step": 2971 }, { "epoch": 0.9630589760207389, "grad_norm": 0.8438794016838074, "learning_rate": 3.9701706136897564e-06, "loss": 0.1597, "step": 2972 }, { "epoch": 0.9633830200907323, "grad_norm": 0.8172274231910706, "learning_rate": 3.969463130731183e-06, "loss": 0.1548, "step": 2973 }, { "epoch": 0.9637070641607258, "grad_norm": 0.8268179893493652, "learning_rate": 3.968755467924549e-06, "loss": 0.1507, "step": 2974 }, { "epoch": 0.9640311082307194, "grad_norm": 0.7694240212440491, "learning_rate": 3.968047625356463e-06, "loss": 0.136, "step": 2975 }, { "epoch": 0.9643551523007129, "grad_norm": 0.8401308059692383, "learning_rate": 3.96733960311356e-06, "loss": 0.1548, "step": 2976 }, { "epoch": 0.9646791963707064, "grad_norm": 0.8167865872383118, "learning_rate": 3.966631401282495e-06, "loss": 0.1379, "step": 2977 }, { "epoch": 0.9650032404407, "grad_norm": 0.8723135590553284, "learning_rate": 3.965923019949944e-06, "loss": 0.1586, "step": 2978 }, { "epoch": 0.9653272845106935, "grad_norm": 0.8042678833007812, "learning_rate": 3.965214459202607e-06, "loss": 0.146, "step": 2979 }, { "epoch": 0.9656513285806869, "grad_norm": 0.8387596607208252, "learning_rate": 3.964505719127205e-06, "loss": 0.1541, "step": 2980 }, { "epoch": 0.9659753726506805, "grad_norm": 0.8149929642677307, "learning_rate": 3.963796799810479e-06, "loss": 0.1546, "step": 2981 }, { "epoch": 0.966299416720674, "grad_norm": 0.8523327708244324, "learning_rate": 3.9630877013391964e-06, "loss": 0.164, "step": 2982 }, { "epoch": 0.9666234607906675, "grad_norm": 0.7993361353874207, "learning_rate": 3.962378423800143e-06, "loss": 0.1415, "step": 2983 }, { "epoch": 0.9669475048606611, "grad_norm": 0.7707695364952087, "learning_rate": 3.961668967280128e-06, "loss": 0.1429, "step": 2984 }, { "epoch": 0.9672715489306546, "grad_norm": 0.7808859348297119, "learning_rate": 3.96095933186598e-06, "loss": 0.1367, "step": 2985 }, { "epoch": 0.9675955930006481, "grad_norm": 0.8761918544769287, "learning_rate": 3.960249517644553e-06, "loss": 0.1578, "step": 2986 }, { "epoch": 0.9679196370706417, "grad_norm": 0.8220571279525757, "learning_rate": 3.959539524702722e-06, "loss": 0.1414, "step": 2987 }, { "epoch": 0.9682436811406351, "grad_norm": 0.8590094447135925, "learning_rate": 3.958829353127383e-06, "loss": 0.1556, "step": 2988 }, { "epoch": 0.9685677252106286, "grad_norm": 0.8472662568092346, "learning_rate": 3.958119003005453e-06, "loss": 0.1653, "step": 2989 }, { "epoch": 0.9688917692806222, "grad_norm": 0.745740532875061, "learning_rate": 3.9574084744238735e-06, "loss": 0.1381, "step": 2990 }, { "epoch": 0.9692158133506157, "grad_norm": 0.8592861890792847, "learning_rate": 3.956697767469606e-06, "loss": 0.1637, "step": 2991 }, { "epoch": 0.9695398574206092, "grad_norm": 0.8486888408660889, "learning_rate": 3.955986882229632e-06, "loss": 0.1527, "step": 2992 }, { "epoch": 0.9698639014906028, "grad_norm": 0.9307520985603333, "learning_rate": 3.95527581879096e-06, "loss": 0.1672, "step": 2993 }, { "epoch": 0.9701879455605963, "grad_norm": 0.8392777442932129, "learning_rate": 3.954564577240615e-06, "loss": 0.139, "step": 2994 }, { "epoch": 0.9705119896305897, "grad_norm": 0.8226277828216553, "learning_rate": 3.9538531576656465e-06, "loss": 0.1563, "step": 2995 }, { "epoch": 0.9708360337005832, "grad_norm": 0.8464024662971497, "learning_rate": 3.953141560153128e-06, "loss": 0.16, "step": 2996 }, { "epoch": 0.9711600777705768, "grad_norm": 0.9020957350730896, "learning_rate": 3.952429784790148e-06, "loss": 0.1497, "step": 2997 }, { "epoch": 0.9714841218405703, "grad_norm": 0.8413184881210327, "learning_rate": 3.951717831663825e-06, "loss": 0.1578, "step": 2998 }, { "epoch": 0.9718081659105638, "grad_norm": 0.784361720085144, "learning_rate": 3.951005700861291e-06, "loss": 0.1408, "step": 2999 }, { "epoch": 0.9721322099805574, "grad_norm": 0.7845505475997925, "learning_rate": 3.9502933924697076e-06, "loss": 0.1428, "step": 3000 }, { "epoch": 0.9724562540505509, "grad_norm": 0.8205187320709229, "learning_rate": 3.949580906576252e-06, "loss": 0.1572, "step": 3001 }, { "epoch": 0.9727802981205443, "grad_norm": 0.8463999032974243, "learning_rate": 3.948868243268127e-06, "loss": 0.1402, "step": 3002 }, { "epoch": 0.9731043421905379, "grad_norm": 0.8993105292320251, "learning_rate": 3.948155402632554e-06, "loss": 0.1724, "step": 3003 }, { "epoch": 0.9734283862605314, "grad_norm": 0.7958623170852661, "learning_rate": 3.94744238475678e-06, "loss": 0.1367, "step": 3004 }, { "epoch": 0.9737524303305249, "grad_norm": 0.8215851783752441, "learning_rate": 3.94672918972807e-06, "loss": 0.1452, "step": 3005 }, { "epoch": 0.9740764744005185, "grad_norm": 0.75538569688797, "learning_rate": 3.946015817633714e-06, "loss": 0.1349, "step": 3006 }, { "epoch": 0.974400518470512, "grad_norm": 0.749919593334198, "learning_rate": 3.945302268561019e-06, "loss": 0.1394, "step": 3007 }, { "epoch": 0.9747245625405055, "grad_norm": 0.8285024166107178, "learning_rate": 3.944588542597319e-06, "loss": 0.1553, "step": 3008 }, { "epoch": 0.9750486066104991, "grad_norm": 0.8027199506759644, "learning_rate": 3.943874639829964e-06, "loss": 0.1453, "step": 3009 }, { "epoch": 0.9753726506804925, "grad_norm": 0.7956819534301758, "learning_rate": 3.943160560346332e-06, "loss": 0.1393, "step": 3010 }, { "epoch": 0.975696694750486, "grad_norm": 0.8369397521018982, "learning_rate": 3.942446304233819e-06, "loss": 0.1461, "step": 3011 }, { "epoch": 0.9760207388204796, "grad_norm": 0.8727890253067017, "learning_rate": 3.941731871579842e-06, "loss": 0.1707, "step": 3012 }, { "epoch": 0.9763447828904731, "grad_norm": 0.846352756023407, "learning_rate": 3.94101726247184e-06, "loss": 0.1551, "step": 3013 }, { "epoch": 0.9766688269604666, "grad_norm": 0.7778693437576294, "learning_rate": 3.9403024769972766e-06, "loss": 0.1372, "step": 3014 }, { "epoch": 0.9769928710304602, "grad_norm": 0.8392274975776672, "learning_rate": 3.939587515243632e-06, "loss": 0.1581, "step": 3015 }, { "epoch": 0.9773169151004537, "grad_norm": 0.8242346048355103, "learning_rate": 3.938872377298413e-06, "loss": 0.1449, "step": 3016 }, { "epoch": 0.9776409591704471, "grad_norm": 0.8143014907836914, "learning_rate": 3.938157063249144e-06, "loss": 0.1478, "step": 3017 }, { "epoch": 0.9779650032404407, "grad_norm": 0.8591554760932922, "learning_rate": 3.937441573183373e-06, "loss": 0.1429, "step": 3018 }, { "epoch": 0.9782890473104342, "grad_norm": 0.8570237159729004, "learning_rate": 3.936725907188668e-06, "loss": 0.1607, "step": 3019 }, { "epoch": 0.9786130913804277, "grad_norm": 0.8121863007545471, "learning_rate": 3.936010065352622e-06, "loss": 0.1423, "step": 3020 }, { "epoch": 0.9789371354504213, "grad_norm": 0.8878906965255737, "learning_rate": 3.935294047762844e-06, "loss": 0.1491, "step": 3021 }, { "epoch": 0.9792611795204148, "grad_norm": 0.8019052147865295, "learning_rate": 3.93457785450697e-06, "loss": 0.1525, "step": 3022 }, { "epoch": 0.9795852235904083, "grad_norm": 0.8467800617218018, "learning_rate": 3.933861485672656e-06, "loss": 0.1611, "step": 3023 }, { "epoch": 0.9799092676604018, "grad_norm": 0.816117525100708, "learning_rate": 3.933144941347574e-06, "loss": 0.1548, "step": 3024 }, { "epoch": 0.9802333117303953, "grad_norm": 0.8132639527320862, "learning_rate": 3.932428221619427e-06, "loss": 0.1482, "step": 3025 }, { "epoch": 0.9805573558003888, "grad_norm": 0.845690906047821, "learning_rate": 3.931711326575933e-06, "loss": 0.1483, "step": 3026 }, { "epoch": 0.9808813998703824, "grad_norm": 0.8532651662826538, "learning_rate": 3.9309942563048315e-06, "loss": 0.1628, "step": 3027 }, { "epoch": 0.9812054439403759, "grad_norm": 0.8248721957206726, "learning_rate": 3.930277010893887e-06, "loss": 0.1588, "step": 3028 }, { "epoch": 0.9815294880103694, "grad_norm": 0.8405593037605286, "learning_rate": 3.929559590430881e-06, "loss": 0.1566, "step": 3029 }, { "epoch": 0.981853532080363, "grad_norm": 0.6945734620094299, "learning_rate": 3.928841995003622e-06, "loss": 0.1208, "step": 3030 }, { "epoch": 0.9821775761503565, "grad_norm": 0.8376073241233826, "learning_rate": 3.928124224699935e-06, "loss": 0.1516, "step": 3031 }, { "epoch": 0.9825016202203499, "grad_norm": 0.855110228061676, "learning_rate": 3.927406279607668e-06, "loss": 0.1549, "step": 3032 }, { "epoch": 0.9828256642903435, "grad_norm": 0.900316596031189, "learning_rate": 3.92668815981469e-06, "loss": 0.164, "step": 3033 }, { "epoch": 0.983149708360337, "grad_norm": 0.8319350481033325, "learning_rate": 3.925969865408893e-06, "loss": 0.1537, "step": 3034 }, { "epoch": 0.9834737524303305, "grad_norm": 0.8483204245567322, "learning_rate": 3.925251396478189e-06, "loss": 0.1537, "step": 3035 }, { "epoch": 0.9837977965003241, "grad_norm": 0.8711523413658142, "learning_rate": 3.9245327531105115e-06, "loss": 0.1462, "step": 3036 }, { "epoch": 0.9841218405703176, "grad_norm": 0.7892616987228394, "learning_rate": 3.923813935393816e-06, "loss": 0.1363, "step": 3037 }, { "epoch": 0.9844458846403111, "grad_norm": 0.8650854229927063, "learning_rate": 3.923094943416078e-06, "loss": 0.1653, "step": 3038 }, { "epoch": 0.9847699287103046, "grad_norm": 0.8720736503601074, "learning_rate": 3.922375777265296e-06, "loss": 0.159, "step": 3039 }, { "epoch": 0.9850939727802981, "grad_norm": 0.8047921061515808, "learning_rate": 3.921656437029488e-06, "loss": 0.1389, "step": 3040 }, { "epoch": 0.9854180168502916, "grad_norm": 0.8041824102401733, "learning_rate": 3.9209369227966945e-06, "loss": 0.1431, "step": 3041 }, { "epoch": 0.9857420609202852, "grad_norm": 0.8418930768966675, "learning_rate": 3.920217234654978e-06, "loss": 0.1605, "step": 3042 }, { "epoch": 0.9860661049902787, "grad_norm": 0.7825391888618469, "learning_rate": 3.919497372692421e-06, "loss": 0.1443, "step": 3043 }, { "epoch": 0.9863901490602722, "grad_norm": 0.704852819442749, "learning_rate": 3.918777336997127e-06, "loss": 0.1255, "step": 3044 }, { "epoch": 0.9867141931302658, "grad_norm": 0.8802457451820374, "learning_rate": 3.918057127657222e-06, "loss": 0.1746, "step": 3045 }, { "epoch": 0.9870382372002592, "grad_norm": 0.9226857423782349, "learning_rate": 3.9173367447608525e-06, "loss": 0.1442, "step": 3046 }, { "epoch": 0.9873622812702527, "grad_norm": 0.8810186386108398, "learning_rate": 3.916616188396185e-06, "loss": 0.1583, "step": 3047 }, { "epoch": 0.9876863253402463, "grad_norm": 0.8760223388671875, "learning_rate": 3.915895458651411e-06, "loss": 0.1478, "step": 3048 }, { "epoch": 0.9880103694102398, "grad_norm": 0.8516616225242615, "learning_rate": 3.9151745556147404e-06, "loss": 0.151, "step": 3049 }, { "epoch": 0.9883344134802333, "grad_norm": 0.8152266144752502, "learning_rate": 3.914453479374403e-06, "loss": 0.1543, "step": 3050 }, { "epoch": 0.9886584575502269, "grad_norm": 0.7873851656913757, "learning_rate": 3.913732230018654e-06, "loss": 0.1565, "step": 3051 }, { "epoch": 0.9889825016202204, "grad_norm": 0.7410478591918945, "learning_rate": 3.913010807635765e-06, "loss": 0.1332, "step": 3052 }, { "epoch": 0.9893065456902139, "grad_norm": 0.8501771092414856, "learning_rate": 3.9122892123140324e-06, "loss": 0.1471, "step": 3053 }, { "epoch": 0.9896305897602073, "grad_norm": 0.8704087734222412, "learning_rate": 3.911567444141771e-06, "loss": 0.1569, "step": 3054 }, { "epoch": 0.9899546338302009, "grad_norm": 0.826313316822052, "learning_rate": 3.910845503207322e-06, "loss": 0.1441, "step": 3055 }, { "epoch": 0.9902786779001944, "grad_norm": 0.7761698961257935, "learning_rate": 3.9101233895990396e-06, "loss": 0.1338, "step": 3056 }, { "epoch": 0.990602721970188, "grad_norm": 0.7883891463279724, "learning_rate": 3.909401103405307e-06, "loss": 0.1445, "step": 3057 }, { "epoch": 0.9909267660401815, "grad_norm": 0.8291919231414795, "learning_rate": 3.908678644714522e-06, "loss": 0.1392, "step": 3058 }, { "epoch": 0.991250810110175, "grad_norm": 0.8155975937843323, "learning_rate": 3.907956013615108e-06, "loss": 0.1501, "step": 3059 }, { "epoch": 0.9915748541801686, "grad_norm": 0.7870499491691589, "learning_rate": 3.907233210195508e-06, "loss": 0.1436, "step": 3060 }, { "epoch": 0.991898898250162, "grad_norm": 0.8479496836662292, "learning_rate": 3.906510234544186e-06, "loss": 0.1534, "step": 3061 }, { "epoch": 0.9922229423201555, "grad_norm": 0.7778910994529724, "learning_rate": 3.905787086749628e-06, "loss": 0.144, "step": 3062 }, { "epoch": 0.992546986390149, "grad_norm": 0.7847429513931274, "learning_rate": 3.90506376690034e-06, "loss": 0.133, "step": 3063 }, { "epoch": 0.9928710304601426, "grad_norm": 0.8366702198982239, "learning_rate": 3.904340275084848e-06, "loss": 0.1524, "step": 3064 }, { "epoch": 0.9931950745301361, "grad_norm": 0.7599114179611206, "learning_rate": 3.9036166113917015e-06, "loss": 0.1384, "step": 3065 }, { "epoch": 0.9935191186001296, "grad_norm": 0.7668454051017761, "learning_rate": 3.90289277590947e-06, "loss": 0.1434, "step": 3066 }, { "epoch": 0.9938431626701232, "grad_norm": 0.7598092555999756, "learning_rate": 3.902168768726745e-06, "loss": 0.1231, "step": 3067 }, { "epoch": 0.9941672067401166, "grad_norm": 0.8079851865768433, "learning_rate": 3.9014445899321355e-06, "loss": 0.1425, "step": 3068 }, { "epoch": 0.9944912508101101, "grad_norm": 0.833429217338562, "learning_rate": 3.900720239614275e-06, "loss": 0.1472, "step": 3069 }, { "epoch": 0.9948152948801037, "grad_norm": 0.7490724325180054, "learning_rate": 3.899995717861818e-06, "loss": 0.1418, "step": 3070 }, { "epoch": 0.9951393389500972, "grad_norm": 0.785024106502533, "learning_rate": 3.899271024763438e-06, "loss": 0.1423, "step": 3071 }, { "epoch": 0.9954633830200907, "grad_norm": 0.7634562849998474, "learning_rate": 3.89854616040783e-06, "loss": 0.1292, "step": 3072 }, { "epoch": 0.9957874270900843, "grad_norm": 0.8108155131340027, "learning_rate": 3.897821124883711e-06, "loss": 0.1467, "step": 3073 }, { "epoch": 0.9961114711600778, "grad_norm": 0.9205564856529236, "learning_rate": 3.897095918279818e-06, "loss": 0.1715, "step": 3074 }, { "epoch": 0.9964355152300713, "grad_norm": 0.8294411897659302, "learning_rate": 3.896370540684911e-06, "loss": 0.1392, "step": 3075 }, { "epoch": 0.9967595593000648, "grad_norm": 0.827343761920929, "learning_rate": 3.895644992187767e-06, "loss": 0.1623, "step": 3076 }, { "epoch": 0.9970836033700583, "grad_norm": 0.7698377370834351, "learning_rate": 3.894919272877187e-06, "loss": 0.1421, "step": 3077 }, { "epoch": 0.9974076474400518, "grad_norm": 0.7445449829101562, "learning_rate": 3.894193382841991e-06, "loss": 0.1374, "step": 3078 }, { "epoch": 0.9977316915100454, "grad_norm": 0.7607712745666504, "learning_rate": 3.893467322171022e-06, "loss": 0.1403, "step": 3079 }, { "epoch": 0.9980557355800389, "grad_norm": 0.8286640644073486, "learning_rate": 3.892741090953143e-06, "loss": 0.1451, "step": 3080 }, { "epoch": 0.9983797796500324, "grad_norm": 0.7811787128448486, "learning_rate": 3.892014689277238e-06, "loss": 0.1364, "step": 3081 }, { "epoch": 0.998703823720026, "grad_norm": 0.798345148563385, "learning_rate": 3.891288117232209e-06, "loss": 0.1535, "step": 3082 }, { "epoch": 0.9990278677900194, "grad_norm": 0.8324628472328186, "learning_rate": 3.890561374906985e-06, "loss": 0.1366, "step": 3083 }, { "epoch": 0.9993519118600129, "grad_norm": 0.7567122578620911, "learning_rate": 3.889834462390509e-06, "loss": 0.1342, "step": 3084 }, { "epoch": 0.9996759559300065, "grad_norm": 0.8021467328071594, "learning_rate": 3.889107379771749e-06, "loss": 0.1404, "step": 3085 }, { "epoch": 1.0, "grad_norm": 0.9288654923439026, "learning_rate": 3.888380127139695e-06, "loss": 0.1531, "step": 3086 }, { "epoch": 1.0003240440699934, "grad_norm": 0.7892737984657288, "learning_rate": 3.887652704583354e-06, "loss": 0.1268, "step": 3087 }, { "epoch": 1.000648088139987, "grad_norm": 0.722515344619751, "learning_rate": 3.886925112191754e-06, "loss": 0.1064, "step": 3088 }, { "epoch": 1.0009721322099805, "grad_norm": 0.743994951248169, "learning_rate": 3.886197350053948e-06, "loss": 0.1129, "step": 3089 }, { "epoch": 1.0012961762799741, "grad_norm": 0.786417543888092, "learning_rate": 3.885469418259005e-06, "loss": 0.1188, "step": 3090 }, { "epoch": 1.0016202203499676, "grad_norm": 0.7891602516174316, "learning_rate": 3.8847413168960175e-06, "loss": 0.1138, "step": 3091 }, { "epoch": 1.0019442644199612, "grad_norm": 0.8058298230171204, "learning_rate": 3.884013046054098e-06, "loss": 0.1159, "step": 3092 }, { "epoch": 1.0022683084899546, "grad_norm": 0.7223353385925293, "learning_rate": 3.8832846058223814e-06, "loss": 0.1061, "step": 3093 }, { "epoch": 1.002592352559948, "grad_norm": 0.8861434459686279, "learning_rate": 3.882555996290019e-06, "loss": 0.1278, "step": 3094 }, { "epoch": 1.0029163966299417, "grad_norm": 0.7787957787513733, "learning_rate": 3.881827217546187e-06, "loss": 0.1085, "step": 3095 }, { "epoch": 1.0032404406999351, "grad_norm": 0.8384479880332947, "learning_rate": 3.881098269680081e-06, "loss": 0.1168, "step": 3096 }, { "epoch": 1.0035644847699288, "grad_norm": 0.8060561418533325, "learning_rate": 3.880369152780916e-06, "loss": 0.1152, "step": 3097 }, { "epoch": 1.0038885288399222, "grad_norm": 0.8682321906089783, "learning_rate": 3.879639866937931e-06, "loss": 0.1127, "step": 3098 }, { "epoch": 1.0042125729099158, "grad_norm": 0.8855298757553101, "learning_rate": 3.8789104122403815e-06, "loss": 0.1269, "step": 3099 }, { "epoch": 1.0045366169799093, "grad_norm": 0.8725508451461792, "learning_rate": 3.878180788777546e-06, "loss": 0.1207, "step": 3100 }, { "epoch": 1.0048606610499027, "grad_norm": 0.8179731965065002, "learning_rate": 3.877450996638725e-06, "loss": 0.1175, "step": 3101 }, { "epoch": 1.0051847051198963, "grad_norm": 0.8003975749015808, "learning_rate": 3.876721035913236e-06, "loss": 0.1099, "step": 3102 }, { "epoch": 1.0055087491898898, "grad_norm": 0.8058536648750305, "learning_rate": 3.87599090669042e-06, "loss": 0.1163, "step": 3103 }, { "epoch": 1.0058327932598834, "grad_norm": 0.774759829044342, "learning_rate": 3.875260609059638e-06, "loss": 0.1048, "step": 3104 }, { "epoch": 1.0061568373298768, "grad_norm": 0.8399088382720947, "learning_rate": 3.87453014311027e-06, "loss": 0.121, "step": 3105 }, { "epoch": 1.0064808813998705, "grad_norm": 0.8479682803153992, "learning_rate": 3.87379950893172e-06, "loss": 0.1118, "step": 3106 }, { "epoch": 1.0068049254698639, "grad_norm": 0.8828272819519043, "learning_rate": 3.8730687066134086e-06, "loss": 0.132, "step": 3107 }, { "epoch": 1.0071289695398573, "grad_norm": 0.7902854084968567, "learning_rate": 3.8723377362447805e-06, "loss": 0.1061, "step": 3108 }, { "epoch": 1.007453013609851, "grad_norm": 0.8610204458236694, "learning_rate": 3.871606597915298e-06, "loss": 0.1216, "step": 3109 }, { "epoch": 1.0077770576798444, "grad_norm": 0.8818372488021851, "learning_rate": 3.870875291714448e-06, "loss": 0.1242, "step": 3110 }, { "epoch": 1.008101101749838, "grad_norm": 0.8453108668327332, "learning_rate": 3.870143817731732e-06, "loss": 0.1203, "step": 3111 }, { "epoch": 1.0084251458198314, "grad_norm": 0.7680081725120544, "learning_rate": 3.8694121760566765e-06, "loss": 0.1126, "step": 3112 }, { "epoch": 1.008749189889825, "grad_norm": 0.8433241248130798, "learning_rate": 3.868680366778828e-06, "loss": 0.1191, "step": 3113 }, { "epoch": 1.0090732339598185, "grad_norm": 0.8732882142066956, "learning_rate": 3.867948389987752e-06, "loss": 0.1221, "step": 3114 }, { "epoch": 1.0093972780298122, "grad_norm": 0.8452468514442444, "learning_rate": 3.8672162457730365e-06, "loss": 0.1186, "step": 3115 }, { "epoch": 1.0097213220998056, "grad_norm": 0.7950354814529419, "learning_rate": 3.866483934224288e-06, "loss": 0.1136, "step": 3116 }, { "epoch": 1.010045366169799, "grad_norm": 0.8962631225585938, "learning_rate": 3.865751455431134e-06, "loss": 0.1232, "step": 3117 }, { "epoch": 1.0103694102397927, "grad_norm": 0.7999354600906372, "learning_rate": 3.865018809483224e-06, "loss": 0.1216, "step": 3118 }, { "epoch": 1.010693454309786, "grad_norm": 0.8645737171173096, "learning_rate": 3.864285996470226e-06, "loss": 0.1284, "step": 3119 }, { "epoch": 1.0110174983797797, "grad_norm": 0.8569557070732117, "learning_rate": 3.863553016481829e-06, "loss": 0.1151, "step": 3120 }, { "epoch": 1.0113415424497731, "grad_norm": 0.812362015247345, "learning_rate": 3.862819869607743e-06, "loss": 0.1143, "step": 3121 }, { "epoch": 1.0116655865197668, "grad_norm": 0.8417925834655762, "learning_rate": 3.862086555937699e-06, "loss": 0.122, "step": 3122 }, { "epoch": 1.0119896305897602, "grad_norm": 0.8303039073944092, "learning_rate": 3.861353075561446e-06, "loss": 0.1218, "step": 3123 }, { "epoch": 1.0123136746597536, "grad_norm": 0.809136688709259, "learning_rate": 3.860619428568756e-06, "loss": 0.1159, "step": 3124 }, { "epoch": 1.0126377187297473, "grad_norm": 0.7998272180557251, "learning_rate": 3.859885615049419e-06, "loss": 0.1116, "step": 3125 }, { "epoch": 1.0129617627997407, "grad_norm": 0.8065343499183655, "learning_rate": 3.8591516350932476e-06, "loss": 0.1142, "step": 3126 }, { "epoch": 1.0132858068697344, "grad_norm": 0.7445245981216431, "learning_rate": 3.8584174887900735e-06, "loss": 0.1069, "step": 3127 }, { "epoch": 1.0136098509397278, "grad_norm": 0.8598371744155884, "learning_rate": 3.8576831762297495e-06, "loss": 0.1211, "step": 3128 }, { "epoch": 1.0139338950097214, "grad_norm": 0.7716249227523804, "learning_rate": 3.856948697502148e-06, "loss": 0.1109, "step": 3129 }, { "epoch": 1.0142579390797148, "grad_norm": 0.7559572458267212, "learning_rate": 3.8562140526971625e-06, "loss": 0.1062, "step": 3130 }, { "epoch": 1.0145819831497083, "grad_norm": 0.8070071935653687, "learning_rate": 3.855479241904705e-06, "loss": 0.1126, "step": 3131 }, { "epoch": 1.014906027219702, "grad_norm": 0.8478087782859802, "learning_rate": 3.8547442652147115e-06, "loss": 0.1198, "step": 3132 }, { "epoch": 1.0152300712896953, "grad_norm": 0.8656642436981201, "learning_rate": 3.854009122717135e-06, "loss": 0.1206, "step": 3133 }, { "epoch": 1.015554115359689, "grad_norm": 0.8849382996559143, "learning_rate": 3.8532738145019484e-06, "loss": 0.1165, "step": 3134 }, { "epoch": 1.0158781594296824, "grad_norm": 0.8415027856826782, "learning_rate": 3.852538340659149e-06, "loss": 0.1157, "step": 3135 }, { "epoch": 1.016202203499676, "grad_norm": 0.8097540736198425, "learning_rate": 3.85180270127875e-06, "loss": 0.1136, "step": 3136 }, { "epoch": 1.0165262475696695, "grad_norm": 0.8937448263168335, "learning_rate": 3.851066896450787e-06, "loss": 0.1208, "step": 3137 }, { "epoch": 1.016850291639663, "grad_norm": 0.7370225191116333, "learning_rate": 3.850330926265314e-06, "loss": 0.1026, "step": 3138 }, { "epoch": 1.0171743357096565, "grad_norm": 0.8523708581924438, "learning_rate": 3.849594790812409e-06, "loss": 0.1166, "step": 3139 }, { "epoch": 1.01749837977965, "grad_norm": 0.7796236872673035, "learning_rate": 3.848858490182167e-06, "loss": 0.1078, "step": 3140 }, { "epoch": 1.0178224238496436, "grad_norm": 0.7496045827865601, "learning_rate": 3.8481220244647025e-06, "loss": 0.1047, "step": 3141 }, { "epoch": 1.018146467919637, "grad_norm": 0.7842292785644531, "learning_rate": 3.847385393750154e-06, "loss": 0.1118, "step": 3142 }, { "epoch": 1.0184705119896307, "grad_norm": 0.8823502659797668, "learning_rate": 3.846648598128677e-06, "loss": 0.1218, "step": 3143 }, { "epoch": 1.018794556059624, "grad_norm": 0.7918041348457336, "learning_rate": 3.8459116376904475e-06, "loss": 0.1173, "step": 3144 }, { "epoch": 1.0191186001296175, "grad_norm": 0.8599136471748352, "learning_rate": 3.8451745125256635e-06, "loss": 0.1154, "step": 3145 }, { "epoch": 1.0194426441996112, "grad_norm": 0.8427619338035583, "learning_rate": 3.8444372227245415e-06, "loss": 0.1199, "step": 3146 }, { "epoch": 1.0197666882696046, "grad_norm": 0.8241166472434998, "learning_rate": 3.843699768377318e-06, "loss": 0.1149, "step": 3147 }, { "epoch": 1.0200907323395982, "grad_norm": 0.8806132674217224, "learning_rate": 3.842962149574252e-06, "loss": 0.1315, "step": 3148 }, { "epoch": 1.0204147764095917, "grad_norm": 0.8508759140968323, "learning_rate": 3.842224366405619e-06, "loss": 0.1203, "step": 3149 }, { "epoch": 1.0207388204795853, "grad_norm": 0.869519829750061, "learning_rate": 3.841486418961717e-06, "loss": 0.1294, "step": 3150 }, { "epoch": 1.0210628645495787, "grad_norm": 0.7835947275161743, "learning_rate": 3.840748307332865e-06, "loss": 0.1105, "step": 3151 }, { "epoch": 1.0213869086195722, "grad_norm": 0.8605433702468872, "learning_rate": 3.840010031609398e-06, "loss": 0.1303, "step": 3152 }, { "epoch": 1.0217109526895658, "grad_norm": 0.7887565493583679, "learning_rate": 3.8392715918816755e-06, "loss": 0.1112, "step": 3153 }, { "epoch": 1.0220349967595592, "grad_norm": 0.7533249258995056, "learning_rate": 3.838532988240077e-06, "loss": 0.1026, "step": 3154 }, { "epoch": 1.0223590408295529, "grad_norm": 0.8589524626731873, "learning_rate": 3.837794220774998e-06, "loss": 0.1211, "step": 3155 }, { "epoch": 1.0226830848995463, "grad_norm": 0.8438388109207153, "learning_rate": 3.8370552895768565e-06, "loss": 0.1182, "step": 3156 }, { "epoch": 1.02300712896954, "grad_norm": 0.8679496645927429, "learning_rate": 3.836316194736093e-06, "loss": 0.124, "step": 3157 }, { "epoch": 1.0233311730395334, "grad_norm": 0.9431676268577576, "learning_rate": 3.835576936343162e-06, "loss": 0.1277, "step": 3158 }, { "epoch": 1.023655217109527, "grad_norm": 0.797709584236145, "learning_rate": 3.8348375144885445e-06, "loss": 0.1122, "step": 3159 }, { "epoch": 1.0239792611795204, "grad_norm": 0.8156585693359375, "learning_rate": 3.834097929262737e-06, "loss": 0.1212, "step": 3160 }, { "epoch": 1.0243033052495139, "grad_norm": 0.7179230451583862, "learning_rate": 3.833358180756258e-06, "loss": 0.1027, "step": 3161 }, { "epoch": 1.0246273493195075, "grad_norm": 0.880363404750824, "learning_rate": 3.832618269059645e-06, "loss": 0.1201, "step": 3162 }, { "epoch": 1.024951393389501, "grad_norm": 0.826892077922821, "learning_rate": 3.831878194263458e-06, "loss": 0.1147, "step": 3163 }, { "epoch": 1.0252754374594946, "grad_norm": 0.8258995413780212, "learning_rate": 3.831137956458272e-06, "loss": 0.1219, "step": 3164 }, { "epoch": 1.025599481529488, "grad_norm": 0.8825317621231079, "learning_rate": 3.830397555734687e-06, "loss": 0.1307, "step": 3165 }, { "epoch": 1.0259235255994816, "grad_norm": 0.8365684151649475, "learning_rate": 3.8296569921833214e-06, "loss": 0.1133, "step": 3166 }, { "epoch": 1.026247569669475, "grad_norm": 0.8617843985557556, "learning_rate": 3.8289162658948114e-06, "loss": 0.1262, "step": 3167 }, { "epoch": 1.0265716137394685, "grad_norm": 0.74489825963974, "learning_rate": 3.828175376959815e-06, "loss": 0.1099, "step": 3168 }, { "epoch": 1.0268956578094621, "grad_norm": 0.7609895467758179, "learning_rate": 3.827434325469011e-06, "loss": 0.1073, "step": 3169 }, { "epoch": 1.0272197018794555, "grad_norm": 0.8371672034263611, "learning_rate": 3.8266931115130955e-06, "loss": 0.1189, "step": 3170 }, { "epoch": 1.0275437459494492, "grad_norm": 0.847689688205719, "learning_rate": 3.8259517351827866e-06, "loss": 0.1252, "step": 3171 }, { "epoch": 1.0278677900194426, "grad_norm": 0.8477571606636047, "learning_rate": 3.825210196568823e-06, "loss": 0.1165, "step": 3172 }, { "epoch": 1.0281918340894363, "grad_norm": 0.7831157445907593, "learning_rate": 3.824468495761958e-06, "loss": 0.1125, "step": 3173 }, { "epoch": 1.0285158781594297, "grad_norm": 0.8094596266746521, "learning_rate": 3.823726632852972e-06, "loss": 0.1223, "step": 3174 }, { "epoch": 1.028839922229423, "grad_norm": 0.8329768776893616, "learning_rate": 3.822984607932661e-06, "loss": 0.1106, "step": 3175 }, { "epoch": 1.0291639662994168, "grad_norm": 0.8299722671508789, "learning_rate": 3.8222424210918404e-06, "loss": 0.1174, "step": 3176 }, { "epoch": 1.0294880103694102, "grad_norm": 0.7693575620651245, "learning_rate": 3.821500072421349e-06, "loss": 0.1169, "step": 3177 }, { "epoch": 1.0298120544394038, "grad_norm": 0.7669704556465149, "learning_rate": 3.820757562012042e-06, "loss": 0.1147, "step": 3178 }, { "epoch": 1.0301360985093972, "grad_norm": 0.8572734594345093, "learning_rate": 3.820014889954794e-06, "loss": 0.1253, "step": 3179 }, { "epoch": 1.030460142579391, "grad_norm": 0.8561815619468689, "learning_rate": 3.819272056340504e-06, "loss": 0.1232, "step": 3180 }, { "epoch": 1.0307841866493843, "grad_norm": 0.7440727949142456, "learning_rate": 3.818529061260084e-06, "loss": 0.107, "step": 3181 }, { "epoch": 1.0311082307193777, "grad_norm": 0.8579633235931396, "learning_rate": 3.817785904804473e-06, "loss": 0.1214, "step": 3182 }, { "epoch": 1.0314322747893714, "grad_norm": 0.8033884763717651, "learning_rate": 3.817042587064623e-06, "loss": 0.108, "step": 3183 }, { "epoch": 1.0317563188593648, "grad_norm": 0.8052843809127808, "learning_rate": 3.81629910813151e-06, "loss": 0.117, "step": 3184 }, { "epoch": 1.0320803629293585, "grad_norm": 0.8760218620300293, "learning_rate": 3.815555468096131e-06, "loss": 0.1211, "step": 3185 }, { "epoch": 1.0324044069993519, "grad_norm": 0.8402146100997925, "learning_rate": 3.814811667049497e-06, "loss": 0.1151, "step": 3186 }, { "epoch": 1.0327284510693455, "grad_norm": 0.8820028901100159, "learning_rate": 3.814067705082643e-06, "loss": 0.1171, "step": 3187 }, { "epoch": 1.033052495139339, "grad_norm": 0.8354962468147278, "learning_rate": 3.8133235822866234e-06, "loss": 0.1094, "step": 3188 }, { "epoch": 1.0333765392093324, "grad_norm": 0.8769159913063049, "learning_rate": 3.812579298752511e-06, "loss": 0.118, "step": 3189 }, { "epoch": 1.033700583279326, "grad_norm": 0.8448501229286194, "learning_rate": 3.8118348545714e-06, "loss": 0.1182, "step": 3190 }, { "epoch": 1.0340246273493194, "grad_norm": 0.8219982385635376, "learning_rate": 3.8110902498344023e-06, "loss": 0.107, "step": 3191 }, { "epoch": 1.034348671419313, "grad_norm": 0.8816295266151428, "learning_rate": 3.8103454846326493e-06, "loss": 0.1138, "step": 3192 }, { "epoch": 1.0346727154893065, "grad_norm": 0.8417076468467712, "learning_rate": 3.809600559057295e-06, "loss": 0.1157, "step": 3193 }, { "epoch": 1.0349967595593002, "grad_norm": 0.8487566709518433, "learning_rate": 3.80885547319951e-06, "loss": 0.1152, "step": 3194 }, { "epoch": 1.0353208036292936, "grad_norm": 0.7868528366088867, "learning_rate": 3.808110227150485e-06, "loss": 0.1064, "step": 3195 }, { "epoch": 1.035644847699287, "grad_norm": 0.8207704424858093, "learning_rate": 3.8073648210014323e-06, "loss": 0.1108, "step": 3196 }, { "epoch": 1.0359688917692806, "grad_norm": 0.9189059734344482, "learning_rate": 3.806619254843582e-06, "loss": 0.1235, "step": 3197 }, { "epoch": 1.036292935839274, "grad_norm": 0.851455807685852, "learning_rate": 3.8058735287681835e-06, "loss": 0.1101, "step": 3198 }, { "epoch": 1.0366169799092677, "grad_norm": 0.9308487772941589, "learning_rate": 3.8051276428665074e-06, "loss": 0.1269, "step": 3199 }, { "epoch": 1.0369410239792611, "grad_norm": 0.8116505146026611, "learning_rate": 3.8043815972298424e-06, "loss": 0.1193, "step": 3200 }, { "epoch": 1.0372650680492548, "grad_norm": 0.8720618486404419, "learning_rate": 3.8036353919494973e-06, "loss": 0.1135, "step": 3201 }, { "epoch": 1.0375891121192482, "grad_norm": 0.8109154105186462, "learning_rate": 3.8028890271168e-06, "loss": 0.1125, "step": 3202 }, { "epoch": 1.0379131561892416, "grad_norm": 0.8150811195373535, "learning_rate": 3.8021425028230994e-06, "loss": 0.1062, "step": 3203 }, { "epoch": 1.0382372002592353, "grad_norm": 0.7590900659561157, "learning_rate": 3.801395819159761e-06, "loss": 0.106, "step": 3204 }, { "epoch": 1.0385612443292287, "grad_norm": 0.8100997805595398, "learning_rate": 3.8006489762181744e-06, "loss": 0.1129, "step": 3205 }, { "epoch": 1.0388852883992223, "grad_norm": 0.8717626333236694, "learning_rate": 3.7999019740897423e-06, "loss": 0.1146, "step": 3206 }, { "epoch": 1.0392093324692158, "grad_norm": 1.0034371614456177, "learning_rate": 3.799154812865894e-06, "loss": 0.1263, "step": 3207 }, { "epoch": 1.0395333765392094, "grad_norm": 0.8377565741539001, "learning_rate": 3.7984074926380733e-06, "loss": 0.1188, "step": 3208 }, { "epoch": 1.0398574206092028, "grad_norm": 0.881369948387146, "learning_rate": 3.7976600134977455e-06, "loss": 0.117, "step": 3209 }, { "epoch": 1.0401814646791965, "grad_norm": 0.8010326027870178, "learning_rate": 3.7969123755363935e-06, "loss": 0.1139, "step": 3210 }, { "epoch": 1.04050550874919, "grad_norm": 0.9041508436203003, "learning_rate": 3.7961645788455225e-06, "loss": 0.1213, "step": 3211 }, { "epoch": 1.0408295528191833, "grad_norm": 0.7512882351875305, "learning_rate": 3.7954166235166545e-06, "loss": 0.1059, "step": 3212 }, { "epoch": 1.041153596889177, "grad_norm": 0.8295339345932007, "learning_rate": 3.794668509641332e-06, "loss": 0.1212, "step": 3213 }, { "epoch": 1.0414776409591704, "grad_norm": 0.8528342247009277, "learning_rate": 3.793920237311118e-06, "loss": 0.1162, "step": 3214 }, { "epoch": 1.041801685029164, "grad_norm": 0.7838711142539978, "learning_rate": 3.793171806617593e-06, "loss": 0.1125, "step": 3215 }, { "epoch": 1.0421257290991575, "grad_norm": 0.8150649666786194, "learning_rate": 3.7924232176523574e-06, "loss": 0.1084, "step": 3216 }, { "epoch": 1.042449773169151, "grad_norm": 0.7916179299354553, "learning_rate": 3.7916744705070318e-06, "loss": 0.1068, "step": 3217 }, { "epoch": 1.0427738172391445, "grad_norm": 0.8389194011688232, "learning_rate": 3.790925565273255e-06, "loss": 0.1163, "step": 3218 }, { "epoch": 1.043097861309138, "grad_norm": 0.9675598740577698, "learning_rate": 3.790176502042686e-06, "loss": 0.1275, "step": 3219 }, { "epoch": 1.0434219053791316, "grad_norm": 0.8746939301490784, "learning_rate": 3.789427280907004e-06, "loss": 0.1245, "step": 3220 }, { "epoch": 1.043745949449125, "grad_norm": 0.7958021759986877, "learning_rate": 3.7886779019579045e-06, "loss": 0.1178, "step": 3221 }, { "epoch": 1.0440699935191187, "grad_norm": 0.8526121973991394, "learning_rate": 3.787928365287106e-06, "loss": 0.1142, "step": 3222 }, { "epoch": 1.044394037589112, "grad_norm": 0.8836873769760132, "learning_rate": 3.7871786709863435e-06, "loss": 0.1262, "step": 3223 }, { "epoch": 1.0447180816591057, "grad_norm": 0.8168222308158875, "learning_rate": 3.7864288191473718e-06, "loss": 0.1201, "step": 3224 }, { "epoch": 1.0450421257290992, "grad_norm": 0.86906898021698, "learning_rate": 3.7856788098619667e-06, "loss": 0.1158, "step": 3225 }, { "epoch": 1.0453661697990926, "grad_norm": 0.898335337638855, "learning_rate": 3.7849286432219216e-06, "loss": 0.1174, "step": 3226 }, { "epoch": 1.0456902138690862, "grad_norm": 0.8181985020637512, "learning_rate": 3.78417831931905e-06, "loss": 0.1177, "step": 3227 }, { "epoch": 1.0460142579390797, "grad_norm": 0.8221048712730408, "learning_rate": 3.783427838245184e-06, "loss": 0.1144, "step": 3228 }, { "epoch": 1.0463383020090733, "grad_norm": 0.8519485592842102, "learning_rate": 3.7826772000921742e-06, "loss": 0.1199, "step": 3229 }, { "epoch": 1.0466623460790667, "grad_norm": 0.8599885702133179, "learning_rate": 3.781926404951893e-06, "loss": 0.1152, "step": 3230 }, { "epoch": 1.0469863901490604, "grad_norm": 0.8111092448234558, "learning_rate": 3.7811754529162294e-06, "loss": 0.1144, "step": 3231 }, { "epoch": 1.0473104342190538, "grad_norm": 0.8220904469490051, "learning_rate": 3.7804243440770936e-06, "loss": 0.1152, "step": 3232 }, { "epoch": 1.0476344782890472, "grad_norm": 0.8551547527313232, "learning_rate": 3.779673078526414e-06, "loss": 0.1169, "step": 3233 }, { "epoch": 1.0479585223590409, "grad_norm": 0.8881143927574158, "learning_rate": 3.7789216563561373e-06, "loss": 0.1331, "step": 3234 }, { "epoch": 1.0482825664290343, "grad_norm": 0.8921029567718506, "learning_rate": 3.778170077658231e-06, "loss": 0.1165, "step": 3235 }, { "epoch": 1.048606610499028, "grad_norm": 0.7967270612716675, "learning_rate": 3.77741834252468e-06, "loss": 0.1103, "step": 3236 }, { "epoch": 1.0489306545690213, "grad_norm": 0.8333545327186584, "learning_rate": 3.7766664510474903e-06, "loss": 0.1178, "step": 3237 }, { "epoch": 1.049254698639015, "grad_norm": 0.8133196830749512, "learning_rate": 3.775914403318687e-06, "loss": 0.1088, "step": 3238 }, { "epoch": 1.0495787427090084, "grad_norm": 0.836877703666687, "learning_rate": 3.7751621994303123e-06, "loss": 0.121, "step": 3239 }, { "epoch": 1.0499027867790018, "grad_norm": 0.8581812977790833, "learning_rate": 3.7744098394744287e-06, "loss": 0.1215, "step": 3240 }, { "epoch": 1.0502268308489955, "grad_norm": 0.8775882720947266, "learning_rate": 3.7736573235431174e-06, "loss": 0.1312, "step": 3241 }, { "epoch": 1.050550874918989, "grad_norm": 0.8495439291000366, "learning_rate": 3.7729046517284805e-06, "loss": 0.1217, "step": 3242 }, { "epoch": 1.0508749189889826, "grad_norm": 0.8486558794975281, "learning_rate": 3.7721518241226375e-06, "loss": 0.112, "step": 3243 }, { "epoch": 1.051198963058976, "grad_norm": 0.7727014422416687, "learning_rate": 3.771398840817725e-06, "loss": 0.1024, "step": 3244 }, { "epoch": 1.0515230071289696, "grad_norm": 0.9136723279953003, "learning_rate": 3.770645701905904e-06, "loss": 0.1325, "step": 3245 }, { "epoch": 1.051847051198963, "grad_norm": 0.807131290435791, "learning_rate": 3.7698924074793484e-06, "loss": 0.1109, "step": 3246 }, { "epoch": 1.0521710952689567, "grad_norm": 0.7704994678497314, "learning_rate": 3.7691389576302567e-06, "loss": 0.1101, "step": 3247 }, { "epoch": 1.0524951393389501, "grad_norm": 0.775662362575531, "learning_rate": 3.768385352450842e-06, "loss": 0.1074, "step": 3248 }, { "epoch": 1.0528191834089435, "grad_norm": 0.8145730495452881, "learning_rate": 3.7676315920333396e-06, "loss": 0.1181, "step": 3249 }, { "epoch": 1.0531432274789372, "grad_norm": 0.8446400165557861, "learning_rate": 3.7668776764700023e-06, "loss": 0.1255, "step": 3250 }, { "epoch": 1.0534672715489306, "grad_norm": 0.8573490977287292, "learning_rate": 3.766123605853101e-06, "loss": 0.1225, "step": 3251 }, { "epoch": 1.0537913156189243, "grad_norm": 0.9274383187294006, "learning_rate": 3.765369380274928e-06, "loss": 0.1274, "step": 3252 }, { "epoch": 1.0541153596889177, "grad_norm": 0.8625757098197937, "learning_rate": 3.7646149998277924e-06, "loss": 0.1156, "step": 3253 }, { "epoch": 1.054439403758911, "grad_norm": 0.896124541759491, "learning_rate": 3.7638604646040232e-06, "loss": 0.1276, "step": 3254 }, { "epoch": 1.0547634478289047, "grad_norm": 0.8273859024047852, "learning_rate": 3.763105774695968e-06, "loss": 0.1146, "step": 3255 }, { "epoch": 1.0550874918988982, "grad_norm": 0.821397602558136, "learning_rate": 3.7623509301959935e-06, "loss": 0.113, "step": 3256 }, { "epoch": 1.0554115359688918, "grad_norm": 0.7991721630096436, "learning_rate": 3.7615959311964865e-06, "loss": 0.1137, "step": 3257 }, { "epoch": 1.0557355800388852, "grad_norm": 0.7959055304527283, "learning_rate": 3.760840777789851e-06, "loss": 0.1141, "step": 3258 }, { "epoch": 1.0560596241088789, "grad_norm": 0.8501467108726501, "learning_rate": 3.7600854700685095e-06, "loss": 0.1181, "step": 3259 }, { "epoch": 1.0563836681788723, "grad_norm": 0.8222145438194275, "learning_rate": 3.759330008124905e-06, "loss": 0.1161, "step": 3260 }, { "epoch": 1.056707712248866, "grad_norm": 0.7849128842353821, "learning_rate": 3.7585743920514985e-06, "loss": 0.1145, "step": 3261 }, { "epoch": 1.0570317563188594, "grad_norm": 0.7778906226158142, "learning_rate": 3.757818621940771e-06, "loss": 0.1163, "step": 3262 }, { "epoch": 1.0573558003888528, "grad_norm": 0.9160462021827698, "learning_rate": 3.7570626978852203e-06, "loss": 0.1322, "step": 3263 }, { "epoch": 1.0576798444588464, "grad_norm": 0.7777760028839111, "learning_rate": 3.7563066199773645e-06, "loss": 0.1106, "step": 3264 }, { "epoch": 1.0580038885288399, "grad_norm": 0.8116745948791504, "learning_rate": 3.7555503883097414e-06, "loss": 0.12, "step": 3265 }, { "epoch": 1.0583279325988335, "grad_norm": 0.8378960490226746, "learning_rate": 3.7547940029749054e-06, "loss": 0.1151, "step": 3266 }, { "epoch": 1.058651976668827, "grad_norm": 0.8337701559066772, "learning_rate": 3.75403746406543e-06, "loss": 0.1212, "step": 3267 }, { "epoch": 1.0589760207388206, "grad_norm": 0.8173056840896606, "learning_rate": 3.7532807716739082e-06, "loss": 0.1166, "step": 3268 }, { "epoch": 1.059300064808814, "grad_norm": 0.7868010401725769, "learning_rate": 3.752523925892954e-06, "loss": 0.1182, "step": 3269 }, { "epoch": 1.0596241088788074, "grad_norm": 0.7789453268051147, "learning_rate": 3.7517669268151967e-06, "loss": 0.0995, "step": 3270 }, { "epoch": 1.059948152948801, "grad_norm": 0.8659542798995972, "learning_rate": 3.751009774533285e-06, "loss": 0.1272, "step": 3271 }, { "epoch": 1.0602721970187945, "grad_norm": 0.8237266540527344, "learning_rate": 3.7502524691398877e-06, "loss": 0.1153, "step": 3272 }, { "epoch": 1.0605962410887881, "grad_norm": 0.8071155548095703, "learning_rate": 3.7494950107276917e-06, "loss": 0.1126, "step": 3273 }, { "epoch": 1.0609202851587816, "grad_norm": 0.8354878425598145, "learning_rate": 3.7487373993894027e-06, "loss": 0.1151, "step": 3274 }, { "epoch": 1.0612443292287752, "grad_norm": 0.8763176798820496, "learning_rate": 3.7479796352177445e-06, "loss": 0.116, "step": 3275 }, { "epoch": 1.0615683732987686, "grad_norm": 0.891167402267456, "learning_rate": 3.7472217183054605e-06, "loss": 0.1303, "step": 3276 }, { "epoch": 1.061892417368762, "grad_norm": 0.7942646145820618, "learning_rate": 3.7464636487453122e-06, "loss": 0.1087, "step": 3277 }, { "epoch": 1.0622164614387557, "grad_norm": 0.9152526259422302, "learning_rate": 3.74570542663008e-06, "loss": 0.1235, "step": 3278 }, { "epoch": 1.0625405055087491, "grad_norm": 0.7322328090667725, "learning_rate": 3.744947052052562e-06, "loss": 0.1007, "step": 3279 }, { "epoch": 1.0628645495787428, "grad_norm": 0.844443678855896, "learning_rate": 3.7441885251055774e-06, "loss": 0.1207, "step": 3280 }, { "epoch": 1.0631885936487362, "grad_norm": 0.813022792339325, "learning_rate": 3.7434298458819622e-06, "loss": 0.1127, "step": 3281 }, { "epoch": 1.0635126377187298, "grad_norm": 0.8554878234863281, "learning_rate": 3.7426710144745717e-06, "loss": 0.1213, "step": 3282 }, { "epoch": 1.0638366817887233, "grad_norm": 0.831193745136261, "learning_rate": 3.7419120309762787e-06, "loss": 0.1169, "step": 3283 }, { "epoch": 1.0641607258587167, "grad_norm": 0.8382897973060608, "learning_rate": 3.7411528954799752e-06, "loss": 0.123, "step": 3284 }, { "epoch": 1.0644847699287103, "grad_norm": 0.8006016612052917, "learning_rate": 3.740393608078573e-06, "loss": 0.116, "step": 3285 }, { "epoch": 1.0648088139987038, "grad_norm": 0.9068976640701294, "learning_rate": 3.739634168865001e-06, "loss": 0.1199, "step": 3286 }, { "epoch": 1.0651328580686974, "grad_norm": 0.7854377627372742, "learning_rate": 3.738874577932208e-06, "loss": 0.1088, "step": 3287 }, { "epoch": 1.0654569021386908, "grad_norm": 0.81007981300354, "learning_rate": 3.738114835373159e-06, "loss": 0.1219, "step": 3288 }, { "epoch": 1.0657809462086845, "grad_norm": 0.8615376353263855, "learning_rate": 3.73735494128084e-06, "loss": 0.1216, "step": 3289 }, { "epoch": 1.0661049902786779, "grad_norm": 0.7818930745124817, "learning_rate": 3.736594895748255e-06, "loss": 0.1137, "step": 3290 }, { "epoch": 1.0664290343486713, "grad_norm": 0.8900367617607117, "learning_rate": 3.7358346988684258e-06, "loss": 0.1304, "step": 3291 }, { "epoch": 1.066753078418665, "grad_norm": 0.8263948559761047, "learning_rate": 3.735074350734393e-06, "loss": 0.1197, "step": 3292 }, { "epoch": 1.0670771224886584, "grad_norm": 0.8178905248641968, "learning_rate": 3.734313851439217e-06, "loss": 0.1183, "step": 3293 }, { "epoch": 1.067401166558652, "grad_norm": 0.8497899174690247, "learning_rate": 3.7335532010759747e-06, "loss": 0.1206, "step": 3294 }, { "epoch": 1.0677252106286454, "grad_norm": 0.7759349942207336, "learning_rate": 3.732792399737761e-06, "loss": 0.1035, "step": 3295 }, { "epoch": 1.068049254698639, "grad_norm": 0.9050149321556091, "learning_rate": 3.7320314475176933e-06, "loss": 0.1125, "step": 3296 }, { "epoch": 1.0683732987686325, "grad_norm": 0.8377945423126221, "learning_rate": 3.731270344508903e-06, "loss": 0.1145, "step": 3297 }, { "epoch": 1.0686973428386262, "grad_norm": 0.8067528605461121, "learning_rate": 3.7305090908045422e-06, "loss": 0.1138, "step": 3298 }, { "epoch": 1.0690213869086196, "grad_norm": 0.7546223402023315, "learning_rate": 3.7297476864977805e-06, "loss": 0.1017, "step": 3299 }, { "epoch": 1.069345430978613, "grad_norm": 0.8082880973815918, "learning_rate": 3.7289861316818077e-06, "loss": 0.1181, "step": 3300 }, { "epoch": 1.0696694750486067, "grad_norm": 0.8667186498641968, "learning_rate": 3.728224426449829e-06, "loss": 0.1225, "step": 3301 }, { "epoch": 1.0699935191186, "grad_norm": 0.8660303354263306, "learning_rate": 3.7274625708950706e-06, "loss": 0.1213, "step": 3302 }, { "epoch": 1.0703175631885937, "grad_norm": 0.847798764705658, "learning_rate": 3.7267005651107763e-06, "loss": 0.1202, "step": 3303 }, { "epoch": 1.0706416072585871, "grad_norm": 0.8011866211891174, "learning_rate": 3.7259384091902085e-06, "loss": 0.105, "step": 3304 }, { "epoch": 1.0709656513285806, "grad_norm": 0.8448381423950195, "learning_rate": 3.7251761032266475e-06, "loss": 0.1194, "step": 3305 }, { "epoch": 1.0712896953985742, "grad_norm": 0.854239284992218, "learning_rate": 3.7244136473133924e-06, "loss": 0.1184, "step": 3306 }, { "epoch": 1.0716137394685676, "grad_norm": 0.7776972651481628, "learning_rate": 3.7236510415437598e-06, "loss": 0.1097, "step": 3307 }, { "epoch": 1.0719377835385613, "grad_norm": 0.8738824129104614, "learning_rate": 3.7228882860110856e-06, "loss": 0.1238, "step": 3308 }, { "epoch": 1.0722618276085547, "grad_norm": 0.7855773568153381, "learning_rate": 3.7221253808087234e-06, "loss": 0.1102, "step": 3309 }, { "epoch": 1.0725858716785484, "grad_norm": 0.8029606342315674, "learning_rate": 3.721362326030046e-06, "loss": 0.1086, "step": 3310 }, { "epoch": 1.0729099157485418, "grad_norm": 0.8558063507080078, "learning_rate": 3.720599121768443e-06, "loss": 0.1188, "step": 3311 }, { "epoch": 1.0732339598185354, "grad_norm": 0.8302557468414307, "learning_rate": 3.7198357681173247e-06, "loss": 0.114, "step": 3312 }, { "epoch": 1.0735580038885288, "grad_norm": 0.8412912487983704, "learning_rate": 3.7190722651701166e-06, "loss": 0.1216, "step": 3313 }, { "epoch": 1.0738820479585223, "grad_norm": 0.8054918646812439, "learning_rate": 3.718308613020265e-06, "loss": 0.1124, "step": 3314 }, { "epoch": 1.074206092028516, "grad_norm": 0.8104496002197266, "learning_rate": 3.717544811761233e-06, "loss": 0.1142, "step": 3315 }, { "epoch": 1.0745301360985093, "grad_norm": 0.8747921586036682, "learning_rate": 3.716780861486503e-06, "loss": 0.1192, "step": 3316 }, { "epoch": 1.074854180168503, "grad_norm": 0.7890901565551758, "learning_rate": 3.716016762289576e-06, "loss": 0.1104, "step": 3317 }, { "epoch": 1.0751782242384964, "grad_norm": 0.9319736957550049, "learning_rate": 3.7152525142639682e-06, "loss": 0.1261, "step": 3318 }, { "epoch": 1.07550226830849, "grad_norm": 0.8697876930236816, "learning_rate": 3.7144881175032178e-06, "loss": 0.1239, "step": 3319 }, { "epoch": 1.0758263123784835, "grad_norm": 0.7517074942588806, "learning_rate": 3.713723572100878e-06, "loss": 0.1055, "step": 3320 }, { "epoch": 1.076150356448477, "grad_norm": 0.8587399125099182, "learning_rate": 3.7129588781505232e-06, "loss": 0.1217, "step": 3321 }, { "epoch": 1.0764744005184705, "grad_norm": 0.7583026885986328, "learning_rate": 3.7121940357457438e-06, "loss": 0.1058, "step": 3322 }, { "epoch": 1.076798444588464, "grad_norm": 0.8852120637893677, "learning_rate": 3.7114290449801493e-06, "loss": 0.1194, "step": 3323 }, { "epoch": 1.0771224886584576, "grad_norm": 0.8140954971313477, "learning_rate": 3.7106639059473675e-06, "loss": 0.1113, "step": 3324 }, { "epoch": 1.077446532728451, "grad_norm": 0.9091130495071411, "learning_rate": 3.7098986187410447e-06, "loss": 0.1293, "step": 3325 }, { "epoch": 1.0777705767984447, "grad_norm": 0.8124028444290161, "learning_rate": 3.7091331834548427e-06, "loss": 0.1265, "step": 3326 }, { "epoch": 1.078094620868438, "grad_norm": 0.8255280256271362, "learning_rate": 3.7083676001824443e-06, "loss": 0.1163, "step": 3327 }, { "epoch": 1.0784186649384315, "grad_norm": 0.9926854968070984, "learning_rate": 3.70760186901755e-06, "loss": 0.1202, "step": 3328 }, { "epoch": 1.0787427090084252, "grad_norm": 0.8930680155754089, "learning_rate": 3.706835990053877e-06, "loss": 0.1185, "step": 3329 }, { "epoch": 1.0790667530784186, "grad_norm": 0.8199201822280884, "learning_rate": 3.7060699633851615e-06, "loss": 0.1073, "step": 3330 }, { "epoch": 1.0793907971484122, "grad_norm": 0.8079055547714233, "learning_rate": 3.7053037891051596e-06, "loss": 0.1172, "step": 3331 }, { "epoch": 1.0797148412184057, "grad_norm": 0.8031049370765686, "learning_rate": 3.704537467307641e-06, "loss": 0.1104, "step": 3332 }, { "epoch": 1.0800388852883993, "grad_norm": 0.8727356791496277, "learning_rate": 3.7037709980863974e-06, "loss": 0.1254, "step": 3333 }, { "epoch": 1.0803629293583927, "grad_norm": 0.8698641657829285, "learning_rate": 3.703004381535237e-06, "loss": 0.1155, "step": 3334 }, { "epoch": 1.0806869734283864, "grad_norm": 0.8368037939071655, "learning_rate": 3.7022376177479863e-06, "loss": 0.1196, "step": 3335 }, { "epoch": 1.0810110174983798, "grad_norm": 0.7489742040634155, "learning_rate": 3.7014707068184895e-06, "loss": 0.1045, "step": 3336 }, { "epoch": 1.0813350615683732, "grad_norm": 0.8222491145133972, "learning_rate": 3.70070364884061e-06, "loss": 0.1212, "step": 3337 }, { "epoch": 1.0816591056383669, "grad_norm": 0.761885941028595, "learning_rate": 3.6999364439082274e-06, "loss": 0.1163, "step": 3338 }, { "epoch": 1.0819831497083603, "grad_norm": 0.8204994201660156, "learning_rate": 3.6991690921152407e-06, "loss": 0.1124, "step": 3339 }, { "epoch": 1.082307193778354, "grad_norm": 1.0559394359588623, "learning_rate": 3.698401593555565e-06, "loss": 0.1241, "step": 3340 }, { "epoch": 1.0826312378483474, "grad_norm": 0.7862476110458374, "learning_rate": 3.697633948323136e-06, "loss": 0.1147, "step": 3341 }, { "epoch": 1.0829552819183408, "grad_norm": 0.871893584728241, "learning_rate": 3.6968661565119062e-06, "loss": 0.1168, "step": 3342 }, { "epoch": 1.0832793259883344, "grad_norm": 0.9019219279289246, "learning_rate": 3.6960982182158458e-06, "loss": 0.1303, "step": 3343 }, { "epoch": 1.0836033700583279, "grad_norm": 0.8220975995063782, "learning_rate": 3.6953301335289415e-06, "loss": 0.1243, "step": 3344 }, { "epoch": 1.0839274141283215, "grad_norm": 0.8278359770774841, "learning_rate": 3.6945619025452006e-06, "loss": 0.1202, "step": 3345 }, { "epoch": 1.084251458198315, "grad_norm": 0.7727410793304443, "learning_rate": 3.6937935253586475e-06, "loss": 0.1061, "step": 3346 }, { "epoch": 1.0845755022683086, "grad_norm": 0.836865246295929, "learning_rate": 3.6930250020633237e-06, "loss": 0.1206, "step": 3347 }, { "epoch": 1.084899546338302, "grad_norm": 0.8528717756271362, "learning_rate": 3.692256332753289e-06, "loss": 0.1193, "step": 3348 }, { "epoch": 1.0852235904082956, "grad_norm": 0.860572338104248, "learning_rate": 3.691487517522621e-06, "loss": 0.1228, "step": 3349 }, { "epoch": 1.085547634478289, "grad_norm": 0.8165038824081421, "learning_rate": 3.690718556465416e-06, "loss": 0.1206, "step": 3350 }, { "epoch": 1.0858716785482825, "grad_norm": 0.8471930623054504, "learning_rate": 3.689949449675786e-06, "loss": 0.1296, "step": 3351 }, { "epoch": 1.0861957226182761, "grad_norm": 0.7674320936203003, "learning_rate": 3.689180197247863e-06, "loss": 0.1062, "step": 3352 }, { "epoch": 1.0865197666882696, "grad_norm": 0.8666447401046753, "learning_rate": 3.688410799275796e-06, "loss": 0.1242, "step": 3353 }, { "epoch": 1.0868438107582632, "grad_norm": 0.8304182291030884, "learning_rate": 3.6876412558537524e-06, "loss": 0.1169, "step": 3354 }, { "epoch": 1.0871678548282566, "grad_norm": 0.838219404220581, "learning_rate": 3.686871567075916e-06, "loss": 0.1192, "step": 3355 }, { "epoch": 1.0874918988982503, "grad_norm": 0.859727144241333, "learning_rate": 3.6861017330364897e-06, "loss": 0.1202, "step": 3356 }, { "epoch": 1.0878159429682437, "grad_norm": 0.7956854701042175, "learning_rate": 3.685331753829693e-06, "loss": 0.11, "step": 3357 }, { "epoch": 1.088139987038237, "grad_norm": 0.7603848576545715, "learning_rate": 3.684561629549765e-06, "loss": 0.1057, "step": 3358 }, { "epoch": 1.0884640311082308, "grad_norm": 0.84071284532547, "learning_rate": 3.6837913602909615e-06, "loss": 0.1142, "step": 3359 }, { "epoch": 1.0887880751782242, "grad_norm": 0.8513961434364319, "learning_rate": 3.6830209461475554e-06, "loss": 0.1139, "step": 3360 }, { "epoch": 1.0891121192482178, "grad_norm": 0.845880389213562, "learning_rate": 3.6822503872138377e-06, "loss": 0.1218, "step": 3361 }, { "epoch": 1.0894361633182112, "grad_norm": 0.7607066631317139, "learning_rate": 3.6814796835841172e-06, "loss": 0.1039, "step": 3362 }, { "epoch": 1.089760207388205, "grad_norm": 0.9276735782623291, "learning_rate": 3.6807088353527216e-06, "loss": 0.1254, "step": 3363 }, { "epoch": 1.0900842514581983, "grad_norm": 0.768365740776062, "learning_rate": 3.6799378426139942e-06, "loss": 0.1011, "step": 3364 }, { "epoch": 1.0904082955281917, "grad_norm": 0.8057131767272949, "learning_rate": 3.679166705462298e-06, "loss": 0.1149, "step": 3365 }, { "epoch": 1.0907323395981854, "grad_norm": 0.8209421038627625, "learning_rate": 3.6783954239920118e-06, "loss": 0.1144, "step": 3366 }, { "epoch": 1.0910563836681788, "grad_norm": 0.8330515027046204, "learning_rate": 3.677623998297534e-06, "loss": 0.1146, "step": 3367 }, { "epoch": 1.0913804277381725, "grad_norm": 0.7507935762405396, "learning_rate": 3.6768524284732794e-06, "loss": 0.1078, "step": 3368 }, { "epoch": 1.0917044718081659, "grad_norm": 0.9468154907226562, "learning_rate": 3.6760807146136796e-06, "loss": 0.1243, "step": 3369 }, { "epoch": 1.0920285158781595, "grad_norm": 0.906830906867981, "learning_rate": 3.675308856813186e-06, "loss": 0.122, "step": 3370 }, { "epoch": 1.092352559948153, "grad_norm": 0.8906958699226379, "learning_rate": 3.6745368551662663e-06, "loss": 0.1191, "step": 3371 }, { "epoch": 1.0926766040181464, "grad_norm": 0.8567639589309692, "learning_rate": 3.6737647097674056e-06, "loss": 0.1204, "step": 3372 }, { "epoch": 1.09300064808814, "grad_norm": 0.7823848128318787, "learning_rate": 3.6729924207111077e-06, "loss": 0.1133, "step": 3373 }, { "epoch": 1.0933246921581334, "grad_norm": 0.8717378973960876, "learning_rate": 3.6722199880918928e-06, "loss": 0.1261, "step": 3374 }, { "epoch": 1.093648736228127, "grad_norm": 0.785054087638855, "learning_rate": 3.6714474120042993e-06, "loss": 0.1128, "step": 3375 }, { "epoch": 1.0939727802981205, "grad_norm": 0.8238503336906433, "learning_rate": 3.6706746925428833e-06, "loss": 0.1205, "step": 3376 }, { "epoch": 1.0942968243681142, "grad_norm": 0.7985695600509644, "learning_rate": 3.6699018298022173e-06, "loss": 0.113, "step": 3377 }, { "epoch": 1.0946208684381076, "grad_norm": 0.8084473013877869, "learning_rate": 3.6691288238768928e-06, "loss": 0.1211, "step": 3378 }, { "epoch": 1.094944912508101, "grad_norm": 0.7871827483177185, "learning_rate": 3.6683556748615196e-06, "loss": 0.1149, "step": 3379 }, { "epoch": 1.0952689565780946, "grad_norm": 0.7735625505447388, "learning_rate": 3.667582382850721e-06, "loss": 0.1085, "step": 3380 }, { "epoch": 1.095593000648088, "grad_norm": 0.8151714205741882, "learning_rate": 3.6668089479391433e-06, "loss": 0.1145, "step": 3381 }, { "epoch": 1.0959170447180817, "grad_norm": 0.8279069662094116, "learning_rate": 3.666035370221445e-06, "loss": 0.11, "step": 3382 }, { "epoch": 1.0962410887880751, "grad_norm": 0.8695924282073975, "learning_rate": 3.665261649792305e-06, "loss": 0.1205, "step": 3383 }, { "epoch": 1.0965651328580688, "grad_norm": 0.7513477802276611, "learning_rate": 3.66448778674642e-06, "loss": 0.1042, "step": 3384 }, { "epoch": 1.0968891769280622, "grad_norm": 0.7814244031906128, "learning_rate": 3.663713781178504e-06, "loss": 0.105, "step": 3385 }, { "epoch": 1.0972132209980558, "grad_norm": 0.8193655610084534, "learning_rate": 3.6629396331832854e-06, "loss": 0.1184, "step": 3386 }, { "epoch": 1.0975372650680493, "grad_norm": 0.8398494124412537, "learning_rate": 3.6621653428555144e-06, "loss": 0.1234, "step": 3387 }, { "epoch": 1.0978613091380427, "grad_norm": 0.8566270470619202, "learning_rate": 3.661390910289956e-06, "loss": 0.1151, "step": 3388 }, { "epoch": 1.0981853532080363, "grad_norm": 0.8087670207023621, "learning_rate": 3.6606163355813935e-06, "loss": 0.114, "step": 3389 }, { "epoch": 1.0985093972780298, "grad_norm": 0.8933915495872498, "learning_rate": 3.6598416188246265e-06, "loss": 0.1168, "step": 3390 }, { "epoch": 1.0988334413480234, "grad_norm": 0.834700345993042, "learning_rate": 3.6590667601144748e-06, "loss": 0.1172, "step": 3391 }, { "epoch": 1.0991574854180168, "grad_norm": 0.7665829658508301, "learning_rate": 3.6582917595457718e-06, "loss": 0.1144, "step": 3392 }, { "epoch": 1.0994815294880103, "grad_norm": 0.8383209705352783, "learning_rate": 3.6575166172133703e-06, "loss": 0.1162, "step": 3393 }, { "epoch": 1.099805573558004, "grad_norm": 0.8014138340950012, "learning_rate": 3.6567413332121402e-06, "loss": 0.1169, "step": 3394 }, { "epoch": 1.1001296176279973, "grad_norm": 0.8607217669487, "learning_rate": 3.655965907636969e-06, "loss": 0.1237, "step": 3395 }, { "epoch": 1.100453661697991, "grad_norm": 0.7848190665245056, "learning_rate": 3.6551903405827615e-06, "loss": 0.1114, "step": 3396 }, { "epoch": 1.1007777057679844, "grad_norm": 0.8079403638839722, "learning_rate": 3.6544146321444397e-06, "loss": 0.1114, "step": 3397 }, { "epoch": 1.101101749837978, "grad_norm": 0.8031437397003174, "learning_rate": 3.653638782416943e-06, "loss": 0.1104, "step": 3398 }, { "epoch": 1.1014257939079715, "grad_norm": 0.8697689175605774, "learning_rate": 3.6528627914952263e-06, "loss": 0.1196, "step": 3399 }, { "epoch": 1.101749837977965, "grad_norm": 0.8094655871391296, "learning_rate": 3.652086659474265e-06, "loss": 0.1161, "step": 3400 }, { "epoch": 1.1020738820479585, "grad_norm": 0.8157943487167358, "learning_rate": 3.6513103864490497e-06, "loss": 0.1213, "step": 3401 }, { "epoch": 1.102397926117952, "grad_norm": 0.806490957736969, "learning_rate": 3.650533972514589e-06, "loss": 0.1206, "step": 3402 }, { "epoch": 1.1027219701879456, "grad_norm": 0.8227203488349915, "learning_rate": 3.6497574177659073e-06, "loss": 0.1115, "step": 3403 }, { "epoch": 1.103046014257939, "grad_norm": 0.8440227508544922, "learning_rate": 3.6489807222980487e-06, "loss": 0.126, "step": 3404 }, { "epoch": 1.1033700583279327, "grad_norm": 0.8067148923873901, "learning_rate": 3.648203886206073e-06, "loss": 0.1138, "step": 3405 }, { "epoch": 1.103694102397926, "grad_norm": 0.8669979572296143, "learning_rate": 3.6474269095850568e-06, "loss": 0.1259, "step": 3406 }, { "epoch": 1.1040181464679197, "grad_norm": 0.8610507249832153, "learning_rate": 3.646649792530094e-06, "loss": 0.115, "step": 3407 }, { "epoch": 1.1043421905379132, "grad_norm": 0.8911367058753967, "learning_rate": 3.645872535136298e-06, "loss": 0.1235, "step": 3408 }, { "epoch": 1.1046662346079066, "grad_norm": 0.7985128164291382, "learning_rate": 3.6450951374987958e-06, "loss": 0.1059, "step": 3409 }, { "epoch": 1.1049902786779002, "grad_norm": 0.8526930809020996, "learning_rate": 3.6443175997127354e-06, "loss": 0.1152, "step": 3410 }, { "epoch": 1.1053143227478937, "grad_norm": 0.8289358019828796, "learning_rate": 3.6435399218732776e-06, "loss": 0.1218, "step": 3411 }, { "epoch": 1.1056383668178873, "grad_norm": 0.8703218102455139, "learning_rate": 3.642762104075604e-06, "loss": 0.1181, "step": 3412 }, { "epoch": 1.1059624108878807, "grad_norm": 0.834434986114502, "learning_rate": 3.641984146414912e-06, "loss": 0.1183, "step": 3413 }, { "epoch": 1.1062864549578744, "grad_norm": 0.8061531782150269, "learning_rate": 3.6412060489864155e-06, "loss": 0.119, "step": 3414 }, { "epoch": 1.1066104990278678, "grad_norm": 0.8192474246025085, "learning_rate": 3.640427811885346e-06, "loss": 0.1126, "step": 3415 }, { "epoch": 1.1069345430978612, "grad_norm": 0.8862019181251526, "learning_rate": 3.639649435206953e-06, "loss": 0.1197, "step": 3416 }, { "epoch": 1.1072585871678549, "grad_norm": 0.8898884057998657, "learning_rate": 3.6388709190465018e-06, "loss": 0.123, "step": 3417 }, { "epoch": 1.1075826312378483, "grad_norm": 0.8126233220100403, "learning_rate": 3.638092263499274e-06, "loss": 0.1195, "step": 3418 }, { "epoch": 1.107906675307842, "grad_norm": 0.7620636224746704, "learning_rate": 3.6373134686605722e-06, "loss": 0.1032, "step": 3419 }, { "epoch": 1.1082307193778353, "grad_norm": 0.7888167500495911, "learning_rate": 3.6365345346257112e-06, "loss": 0.1081, "step": 3420 }, { "epoch": 1.108554763447829, "grad_norm": 0.9147345423698425, "learning_rate": 3.635755461490026e-06, "loss": 0.133, "step": 3421 }, { "epoch": 1.1088788075178224, "grad_norm": 0.823191225528717, "learning_rate": 3.634976249348867e-06, "loss": 0.1205, "step": 3422 }, { "epoch": 1.1092028515878158, "grad_norm": 0.8202910423278809, "learning_rate": 3.6341968982976027e-06, "loss": 0.1154, "step": 3423 }, { "epoch": 1.1095268956578095, "grad_norm": 0.8147830367088318, "learning_rate": 3.6334174084316186e-06, "loss": 0.1082, "step": 3424 }, { "epoch": 1.109850939727803, "grad_norm": 0.8129696846008301, "learning_rate": 3.632637779846315e-06, "loss": 0.1149, "step": 3425 }, { "epoch": 1.1101749837977966, "grad_norm": 0.9183993339538574, "learning_rate": 3.6318580126371124e-06, "loss": 0.1357, "step": 3426 }, { "epoch": 1.11049902786779, "grad_norm": 0.7583103179931641, "learning_rate": 3.631078106899446e-06, "loss": 0.1048, "step": 3427 }, { "epoch": 1.1108230719377836, "grad_norm": 0.8466932773590088, "learning_rate": 3.630298062728769e-06, "loss": 0.1206, "step": 3428 }, { "epoch": 1.111147116007777, "grad_norm": 0.8395487070083618, "learning_rate": 3.6295178802205515e-06, "loss": 0.1238, "step": 3429 }, { "epoch": 1.1114711600777705, "grad_norm": 0.8088542222976685, "learning_rate": 3.62873755947028e-06, "loss": 0.1162, "step": 3430 }, { "epoch": 1.1117952041477641, "grad_norm": 0.9063001275062561, "learning_rate": 3.6279571005734583e-06, "loss": 0.1284, "step": 3431 }, { "epoch": 1.1121192482177575, "grad_norm": 0.774341881275177, "learning_rate": 3.6271765036256064e-06, "loss": 0.0993, "step": 3432 }, { "epoch": 1.1124432922877512, "grad_norm": 0.8491561412811279, "learning_rate": 3.6263957687222633e-06, "loss": 0.1114, "step": 3433 }, { "epoch": 1.1127673363577446, "grad_norm": 0.8198556900024414, "learning_rate": 3.625614895958982e-06, "loss": 0.113, "step": 3434 }, { "epoch": 1.1130913804277383, "grad_norm": 0.7903474569320679, "learning_rate": 3.624833885431334e-06, "loss": 0.113, "step": 3435 }, { "epoch": 1.1134154244977317, "grad_norm": 1.7318416833877563, "learning_rate": 3.624052737234908e-06, "loss": 0.1048, "step": 3436 }, { "epoch": 1.1137394685677253, "grad_norm": 0.8492423295974731, "learning_rate": 3.6232714514653082e-06, "loss": 0.1185, "step": 3437 }, { "epoch": 1.1140635126377187, "grad_norm": 0.7925660014152527, "learning_rate": 3.6224900282181574e-06, "loss": 0.1124, "step": 3438 }, { "epoch": 1.1143875567077122, "grad_norm": 0.847726047039032, "learning_rate": 3.6217084675890935e-06, "loss": 0.1211, "step": 3439 }, { "epoch": 1.1147116007777058, "grad_norm": 0.8837020397186279, "learning_rate": 3.6209267696737723e-06, "loss": 0.1312, "step": 3440 }, { "epoch": 1.1150356448476992, "grad_norm": 0.8512900471687317, "learning_rate": 3.6201449345678657e-06, "loss": 0.1186, "step": 3441 }, { "epoch": 1.1153596889176929, "grad_norm": 0.7909263968467712, "learning_rate": 3.6193629623670627e-06, "loss": 0.1098, "step": 3442 }, { "epoch": 1.1156837329876863, "grad_norm": 0.7999937534332275, "learning_rate": 3.6185808531670695e-06, "loss": 0.1119, "step": 3443 }, { "epoch": 1.1160077770576797, "grad_norm": 0.8300805687904358, "learning_rate": 3.617798607063609e-06, "loss": 0.1162, "step": 3444 }, { "epoch": 1.1163318211276734, "grad_norm": 0.7801008820533752, "learning_rate": 3.61701622415242e-06, "loss": 0.1079, "step": 3445 }, { "epoch": 1.1166558651976668, "grad_norm": 0.8645122051239014, "learning_rate": 3.616233704529259e-06, "loss": 0.1198, "step": 3446 }, { "epoch": 1.1169799092676604, "grad_norm": 0.7718315124511719, "learning_rate": 3.6154510482898973e-06, "loss": 0.1117, "step": 3447 }, { "epoch": 1.1173039533376539, "grad_norm": 0.8447999954223633, "learning_rate": 3.6146682555301266e-06, "loss": 0.13, "step": 3448 }, { "epoch": 1.1176279974076475, "grad_norm": 0.8865242004394531, "learning_rate": 3.613885326345752e-06, "loss": 0.1235, "step": 3449 }, { "epoch": 1.117952041477641, "grad_norm": 0.8368310332298279, "learning_rate": 3.6131022608325973e-06, "loss": 0.115, "step": 3450 }, { "epoch": 1.1182760855476346, "grad_norm": 0.8560866713523865, "learning_rate": 3.6123190590865e-06, "loss": 0.1211, "step": 3451 }, { "epoch": 1.118600129617628, "grad_norm": 0.8064238429069519, "learning_rate": 3.6115357212033196e-06, "loss": 0.1116, "step": 3452 }, { "epoch": 1.1189241736876214, "grad_norm": 0.808874249458313, "learning_rate": 3.610752247278927e-06, "loss": 0.1171, "step": 3453 }, { "epoch": 1.119248217757615, "grad_norm": 0.814598023891449, "learning_rate": 3.609968637409212e-06, "loss": 0.1143, "step": 3454 }, { "epoch": 1.1195722618276085, "grad_norm": 0.8557067513465881, "learning_rate": 3.6091848916900816e-06, "loss": 0.1184, "step": 3455 }, { "epoch": 1.1198963058976021, "grad_norm": 0.8691409230232239, "learning_rate": 3.6084010102174576e-06, "loss": 0.1223, "step": 3456 }, { "epoch": 1.1202203499675956, "grad_norm": 0.8462197780609131, "learning_rate": 3.6076169930872805e-06, "loss": 0.1197, "step": 3457 }, { "epoch": 1.1205443940375892, "grad_norm": 0.9942762851715088, "learning_rate": 3.606832840395506e-06, "loss": 0.1159, "step": 3458 }, { "epoch": 1.1208684381075826, "grad_norm": 0.8324168920516968, "learning_rate": 3.6060485522381067e-06, "loss": 0.12, "step": 3459 }, { "epoch": 1.121192482177576, "grad_norm": 0.8624020218849182, "learning_rate": 3.605264128711072e-06, "loss": 0.124, "step": 3460 }, { "epoch": 1.1215165262475697, "grad_norm": 0.7688712477684021, "learning_rate": 3.6044795699104074e-06, "loss": 0.1082, "step": 3461 }, { "epoch": 1.1218405703175631, "grad_norm": 0.8165678977966309, "learning_rate": 3.6036948759321357e-06, "loss": 0.1111, "step": 3462 }, { "epoch": 1.1221646143875568, "grad_norm": 0.8439948558807373, "learning_rate": 3.6029100468722954e-06, "loss": 0.1068, "step": 3463 }, { "epoch": 1.1224886584575502, "grad_norm": 0.863638699054718, "learning_rate": 3.602125082826944e-06, "loss": 0.1218, "step": 3464 }, { "epoch": 1.1228127025275438, "grad_norm": 0.8242613673210144, "learning_rate": 3.60133998389215e-06, "loss": 0.1168, "step": 3465 }, { "epoch": 1.1231367465975373, "grad_norm": 0.8749831318855286, "learning_rate": 3.600554750164005e-06, "loss": 0.12, "step": 3466 }, { "epoch": 1.1234607906675307, "grad_norm": 0.853952169418335, "learning_rate": 3.5997693817386128e-06, "loss": 0.1169, "step": 3467 }, { "epoch": 1.1237848347375243, "grad_norm": 0.8400661945343018, "learning_rate": 3.598983878712094e-06, "loss": 0.1206, "step": 3468 }, { "epoch": 1.1241088788075178, "grad_norm": 0.7379463315010071, "learning_rate": 3.598198241180588e-06, "loss": 0.1027, "step": 3469 }, { "epoch": 1.1244329228775114, "grad_norm": 0.8917940258979797, "learning_rate": 3.597412469240248e-06, "loss": 0.1169, "step": 3470 }, { "epoch": 1.1247569669475048, "grad_norm": 0.8408751487731934, "learning_rate": 3.5966265629872466e-06, "loss": 0.1189, "step": 3471 }, { "epoch": 1.1250810110174985, "grad_norm": 0.8500704765319824, "learning_rate": 3.595840522517769e-06, "loss": 0.123, "step": 3472 }, { "epoch": 1.125405055087492, "grad_norm": 0.7802397608757019, "learning_rate": 3.5950543479280205e-06, "loss": 0.1088, "step": 3473 }, { "epoch": 1.1257290991574855, "grad_norm": 0.8065983653068542, "learning_rate": 3.5942680393142203e-06, "loss": 0.1171, "step": 3474 }, { "epoch": 1.126053143227479, "grad_norm": 0.7985681891441345, "learning_rate": 3.593481596772606e-06, "loss": 0.1143, "step": 3475 }, { "epoch": 1.1263771872974724, "grad_norm": 0.8253538608551025, "learning_rate": 3.5926950203994303e-06, "loss": 0.1088, "step": 3476 }, { "epoch": 1.126701231367466, "grad_norm": 0.7611861824989319, "learning_rate": 3.5919083102909615e-06, "loss": 0.1148, "step": 3477 }, { "epoch": 1.1270252754374595, "grad_norm": 0.8118978142738342, "learning_rate": 3.591121466543487e-06, "loss": 0.1166, "step": 3478 }, { "epoch": 1.127349319507453, "grad_norm": 0.8605949282646179, "learning_rate": 3.5903344892533067e-06, "loss": 0.1075, "step": 3479 }, { "epoch": 1.1276733635774465, "grad_norm": 0.8666369915008545, "learning_rate": 3.5895473785167407e-06, "loss": 0.1152, "step": 3480 }, { "epoch": 1.12799740764744, "grad_norm": 0.8612379431724548, "learning_rate": 3.5887601344301228e-06, "loss": 0.1238, "step": 3481 }, { "epoch": 1.1283214517174336, "grad_norm": 0.8836613893508911, "learning_rate": 3.587972757089805e-06, "loss": 0.1226, "step": 3482 }, { "epoch": 1.128645495787427, "grad_norm": 0.8429580926895142, "learning_rate": 3.587185246592154e-06, "loss": 0.1138, "step": 3483 }, { "epoch": 1.1289695398574207, "grad_norm": 0.8613108396530151, "learning_rate": 3.5863976030335535e-06, "loss": 0.1205, "step": 3484 }, { "epoch": 1.129293583927414, "grad_norm": 0.8061350584030151, "learning_rate": 3.5856098265104033e-06, "loss": 0.1081, "step": 3485 }, { "epoch": 1.1296176279974077, "grad_norm": 0.755104124546051, "learning_rate": 3.58482191711912e-06, "loss": 0.1155, "step": 3486 }, { "epoch": 1.1299416720674011, "grad_norm": 0.9006972908973694, "learning_rate": 3.5840338749561365e-06, "loss": 0.1212, "step": 3487 }, { "epoch": 1.1302657161373948, "grad_norm": 0.7697506546974182, "learning_rate": 3.5832457001179e-06, "loss": 0.1079, "step": 3488 }, { "epoch": 1.1305897602073882, "grad_norm": 0.920031726360321, "learning_rate": 3.582457392700878e-06, "loss": 0.1285, "step": 3489 }, { "epoch": 1.1309138042773816, "grad_norm": 0.8720796704292297, "learning_rate": 3.5816689528015485e-06, "loss": 0.1183, "step": 3490 }, { "epoch": 1.1312378483473753, "grad_norm": 0.8361836075782776, "learning_rate": 3.580880380516411e-06, "loss": 0.1163, "step": 3491 }, { "epoch": 1.1315618924173687, "grad_norm": 0.900898814201355, "learning_rate": 3.5800916759419784e-06, "loss": 0.1269, "step": 3492 }, { "epoch": 1.1318859364873624, "grad_norm": 0.9403854608535767, "learning_rate": 3.579302839174781e-06, "loss": 0.1318, "step": 3493 }, { "epoch": 1.1322099805573558, "grad_norm": 0.8255032300949097, "learning_rate": 3.578513870311365e-06, "loss": 0.1165, "step": 3494 }, { "epoch": 1.1325340246273492, "grad_norm": 0.8372628092765808, "learning_rate": 3.577724769448292e-06, "loss": 0.1245, "step": 3495 }, { "epoch": 1.1328580686973428, "grad_norm": 0.801400363445282, "learning_rate": 3.57693553668214e-06, "loss": 0.1133, "step": 3496 }, { "epoch": 1.1331821127673363, "grad_norm": 0.832352340221405, "learning_rate": 3.5761461721095037e-06, "loss": 0.1279, "step": 3497 }, { "epoch": 1.13350615683733, "grad_norm": 0.9028059840202332, "learning_rate": 3.575356675826995e-06, "loss": 0.1193, "step": 3498 }, { "epoch": 1.1338302009073233, "grad_norm": 0.837671160697937, "learning_rate": 3.574567047931238e-06, "loss": 0.1244, "step": 3499 }, { "epoch": 1.134154244977317, "grad_norm": 0.7758857011795044, "learning_rate": 3.5737772885188777e-06, "loss": 0.1149, "step": 3500 }, { "epoch": 1.1344782890473104, "grad_norm": 0.8156300783157349, "learning_rate": 3.5729873976865726e-06, "loss": 0.1178, "step": 3501 }, { "epoch": 1.134802333117304, "grad_norm": 0.8366619944572449, "learning_rate": 3.5721973755309963e-06, "loss": 0.1186, "step": 3502 }, { "epoch": 1.1351263771872975, "grad_norm": 0.7583264708518982, "learning_rate": 3.5714072221488414e-06, "loss": 0.1086, "step": 3503 }, { "epoch": 1.135450421257291, "grad_norm": 0.8399912714958191, "learning_rate": 3.5706169376368143e-06, "loss": 0.1206, "step": 3504 }, { "epoch": 1.1357744653272845, "grad_norm": 0.8327789306640625, "learning_rate": 3.5698265220916388e-06, "loss": 0.123, "step": 3505 }, { "epoch": 1.136098509397278, "grad_norm": 0.8301259875297546, "learning_rate": 3.5690359756100532e-06, "loss": 0.1242, "step": 3506 }, { "epoch": 1.1364225534672716, "grad_norm": 0.8315457701683044, "learning_rate": 3.5682452982888143e-06, "loss": 0.1241, "step": 3507 }, { "epoch": 1.136746597537265, "grad_norm": 0.8232241868972778, "learning_rate": 3.5674544902246916e-06, "loss": 0.121, "step": 3508 }, { "epoch": 1.1370706416072587, "grad_norm": 0.8107770085334778, "learning_rate": 3.566663551514473e-06, "loss": 0.1149, "step": 3509 }, { "epoch": 1.137394685677252, "grad_norm": 0.7625928521156311, "learning_rate": 3.5658724822549624e-06, "loss": 0.1046, "step": 3510 }, { "epoch": 1.1377187297472457, "grad_norm": 0.8264610767364502, "learning_rate": 3.5650812825429774e-06, "loss": 0.1142, "step": 3511 }, { "epoch": 1.1380427738172392, "grad_norm": 0.8182802200317383, "learning_rate": 3.5642899524753548e-06, "loss": 0.1156, "step": 3512 }, { "epoch": 1.1383668178872326, "grad_norm": 0.8160684704780579, "learning_rate": 3.5634984921489455e-06, "loss": 0.1112, "step": 3513 }, { "epoch": 1.1386908619572262, "grad_norm": 0.8345882296562195, "learning_rate": 3.562706901660616e-06, "loss": 0.1198, "step": 3514 }, { "epoch": 1.1390149060272197, "grad_norm": 0.7987375259399414, "learning_rate": 3.561915181107249e-06, "loss": 0.1113, "step": 3515 }, { "epoch": 1.1393389500972133, "grad_norm": 0.8715707659721375, "learning_rate": 3.561123330585744e-06, "loss": 0.1209, "step": 3516 }, { "epoch": 1.1396629941672067, "grad_norm": 0.9261237978935242, "learning_rate": 3.560331350193016e-06, "loss": 0.1182, "step": 3517 }, { "epoch": 1.1399870382372002, "grad_norm": 0.8216531872749329, "learning_rate": 3.5595392400259963e-06, "loss": 0.1148, "step": 3518 }, { "epoch": 1.1403110823071938, "grad_norm": 0.7825983166694641, "learning_rate": 3.55874700018163e-06, "loss": 0.1094, "step": 3519 }, { "epoch": 1.1406351263771872, "grad_norm": 0.932933509349823, "learning_rate": 3.5579546307568807e-06, "loss": 0.1231, "step": 3520 }, { "epoch": 1.1409591704471809, "grad_norm": 0.8715762495994568, "learning_rate": 3.557162131848726e-06, "loss": 0.1248, "step": 3521 }, { "epoch": 1.1412832145171743, "grad_norm": 0.8369512557983398, "learning_rate": 3.5563695035541607e-06, "loss": 0.1064, "step": 3522 }, { "epoch": 1.141607258587168, "grad_norm": 0.739835262298584, "learning_rate": 3.5555767459701946e-06, "loss": 0.1117, "step": 3523 }, { "epoch": 1.1419313026571614, "grad_norm": 0.8349111676216125, "learning_rate": 3.554783859193853e-06, "loss": 0.1152, "step": 3524 }, { "epoch": 1.142255346727155, "grad_norm": 0.8169111013412476, "learning_rate": 3.5539908433221793e-06, "loss": 0.1142, "step": 3525 }, { "epoch": 1.1425793907971484, "grad_norm": 0.804642915725708, "learning_rate": 3.553197698452229e-06, "loss": 0.1133, "step": 3526 }, { "epoch": 1.1429034348671419, "grad_norm": 0.8334620594978333, "learning_rate": 3.5524044246810764e-06, "loss": 0.1196, "step": 3527 }, { "epoch": 1.1432274789371355, "grad_norm": 0.8267092704772949, "learning_rate": 3.5516110221058096e-06, "loss": 0.118, "step": 3528 }, { "epoch": 1.143551523007129, "grad_norm": 0.7908319234848022, "learning_rate": 3.550817490823535e-06, "loss": 0.1122, "step": 3529 }, { "epoch": 1.1438755670771226, "grad_norm": 0.8376598954200745, "learning_rate": 3.5500238309313717e-06, "loss": 0.1122, "step": 3530 }, { "epoch": 1.144199611147116, "grad_norm": 0.8465738296508789, "learning_rate": 3.5492300425264574e-06, "loss": 0.1159, "step": 3531 }, { "epoch": 1.1445236552171094, "grad_norm": 0.8763962388038635, "learning_rate": 3.5484361257059425e-06, "loss": 0.1256, "step": 3532 }, { "epoch": 1.144847699287103, "grad_norm": 0.8678879737854004, "learning_rate": 3.5476420805669953e-06, "loss": 0.1215, "step": 3533 }, { "epoch": 1.1451717433570965, "grad_norm": 0.8365668058395386, "learning_rate": 3.5468479072067996e-06, "loss": 0.1156, "step": 3534 }, { "epoch": 1.1454957874270901, "grad_norm": 0.817199170589447, "learning_rate": 3.5460536057225542e-06, "loss": 0.1117, "step": 3535 }, { "epoch": 1.1458198314970836, "grad_norm": 0.8301751017570496, "learning_rate": 3.545259176211474e-06, "loss": 0.1111, "step": 3536 }, { "epoch": 1.1461438755670772, "grad_norm": 0.845215916633606, "learning_rate": 3.5444646187707897e-06, "loss": 0.1216, "step": 3537 }, { "epoch": 1.1464679196370706, "grad_norm": 0.8913041353225708, "learning_rate": 3.5436699334977476e-06, "loss": 0.1364, "step": 3538 }, { "epoch": 1.1467919637070643, "grad_norm": 0.7893886566162109, "learning_rate": 3.5428751204896083e-06, "loss": 0.1106, "step": 3539 }, { "epoch": 1.1471160077770577, "grad_norm": 0.8129891753196716, "learning_rate": 3.542080179843651e-06, "loss": 0.1157, "step": 3540 }, { "epoch": 1.1474400518470511, "grad_norm": 0.8534232974052429, "learning_rate": 3.5412851116571673e-06, "loss": 0.1198, "step": 3541 }, { "epoch": 1.1477640959170448, "grad_norm": 0.8511255383491516, "learning_rate": 3.5404899160274664e-06, "loss": 0.1205, "step": 3542 }, { "epoch": 1.1480881399870382, "grad_norm": 0.8838348984718323, "learning_rate": 3.5396945930518722e-06, "loss": 0.1295, "step": 3543 }, { "epoch": 1.1484121840570318, "grad_norm": 0.8620544672012329, "learning_rate": 3.538899142827726e-06, "loss": 0.1107, "step": 3544 }, { "epoch": 1.1487362281270252, "grad_norm": 0.8188989162445068, "learning_rate": 3.538103565452381e-06, "loss": 0.1173, "step": 3545 }, { "epoch": 1.1490602721970187, "grad_norm": 0.8156138062477112, "learning_rate": 3.537307861023209e-06, "loss": 0.1171, "step": 3546 }, { "epoch": 1.1493843162670123, "grad_norm": 0.7772921323776245, "learning_rate": 3.536512029637597e-06, "loss": 0.1117, "step": 3547 }, { "epoch": 1.1497083603370057, "grad_norm": 0.9473240971565247, "learning_rate": 3.5357160713929473e-06, "loss": 0.123, "step": 3548 }, { "epoch": 1.1500324044069994, "grad_norm": 0.8777554035186768, "learning_rate": 3.534919986386676e-06, "loss": 0.1224, "step": 3549 }, { "epoch": 1.1503564484769928, "grad_norm": 0.825102686882019, "learning_rate": 3.5341237747162183e-06, "loss": 0.1251, "step": 3550 }, { "epoch": 1.1506804925469865, "grad_norm": 0.8877979516983032, "learning_rate": 3.533327436479021e-06, "loss": 0.1228, "step": 3551 }, { "epoch": 1.1510045366169799, "grad_norm": 0.9140046238899231, "learning_rate": 3.53253097177255e-06, "loss": 0.13, "step": 3552 }, { "epoch": 1.1513285806869735, "grad_norm": 0.9374979734420776, "learning_rate": 3.531734380694282e-06, "loss": 0.1281, "step": 3553 }, { "epoch": 1.151652624756967, "grad_norm": 0.8636061549186707, "learning_rate": 3.5309376633417146e-06, "loss": 0.1286, "step": 3554 }, { "epoch": 1.1519766688269604, "grad_norm": 0.7899109721183777, "learning_rate": 3.530140819812357e-06, "loss": 0.1173, "step": 3555 }, { "epoch": 1.152300712896954, "grad_norm": 0.819510281085968, "learning_rate": 3.5293438502037363e-06, "loss": 0.1186, "step": 3556 }, { "epoch": 1.1526247569669474, "grad_norm": 0.8103784322738647, "learning_rate": 3.5285467546133926e-06, "loss": 0.1182, "step": 3557 }, { "epoch": 1.152948801036941, "grad_norm": 0.8102884888648987, "learning_rate": 3.5277495331388835e-06, "loss": 0.1167, "step": 3558 }, { "epoch": 1.1532728451069345, "grad_norm": 0.8475920557975769, "learning_rate": 3.526952185877781e-06, "loss": 0.1234, "step": 3559 }, { "epoch": 1.1535968891769282, "grad_norm": 0.8501052856445312, "learning_rate": 3.526154712927672e-06, "loss": 0.1218, "step": 3560 }, { "epoch": 1.1539209332469216, "grad_norm": 0.7630634307861328, "learning_rate": 3.525357114386161e-06, "loss": 0.1099, "step": 3561 }, { "epoch": 1.1542449773169152, "grad_norm": 0.8789275884628296, "learning_rate": 3.524559390350865e-06, "loss": 0.12, "step": 3562 }, { "epoch": 1.1545690213869086, "grad_norm": 0.7459178566932678, "learning_rate": 3.523761540919418e-06, "loss": 0.1022, "step": 3563 }, { "epoch": 1.154893065456902, "grad_norm": 0.7126516103744507, "learning_rate": 3.5229635661894696e-06, "loss": 0.1033, "step": 3564 }, { "epoch": 1.1552171095268957, "grad_norm": 0.7776930928230286, "learning_rate": 3.5221654662586837e-06, "loss": 0.1093, "step": 3565 }, { "epoch": 1.1555411535968891, "grad_norm": 0.7800213694572449, "learning_rate": 3.521367241224739e-06, "loss": 0.1102, "step": 3566 }, { "epoch": 1.1558651976668828, "grad_norm": 0.7869005799293518, "learning_rate": 3.5205688911853326e-06, "loss": 0.105, "step": 3567 }, { "epoch": 1.1561892417368762, "grad_norm": 0.8360563516616821, "learning_rate": 3.5197704162381742e-06, "loss": 0.1201, "step": 3568 }, { "epoch": 1.1565132858068696, "grad_norm": 0.8422601222991943, "learning_rate": 3.5189718164809884e-06, "loss": 0.1286, "step": 3569 }, { "epoch": 1.1568373298768633, "grad_norm": 0.8678613901138306, "learning_rate": 3.5181730920115165e-06, "loss": 0.1163, "step": 3570 }, { "epoch": 1.1571613739468567, "grad_norm": 0.8137229681015015, "learning_rate": 3.517374242927514e-06, "loss": 0.1132, "step": 3571 }, { "epoch": 1.1574854180168503, "grad_norm": 0.7828996181488037, "learning_rate": 3.516575269326755e-06, "loss": 0.1156, "step": 3572 }, { "epoch": 1.1578094620868438, "grad_norm": 0.7514994740486145, "learning_rate": 3.515776171307023e-06, "loss": 0.1053, "step": 3573 }, { "epoch": 1.1581335061568374, "grad_norm": 0.7747782468795776, "learning_rate": 3.5149769489661216e-06, "loss": 0.1069, "step": 3574 }, { "epoch": 1.1584575502268308, "grad_norm": 0.7957112193107605, "learning_rate": 3.5141776024018676e-06, "loss": 0.1064, "step": 3575 }, { "epoch": 1.1587815942968245, "grad_norm": 0.8027356863021851, "learning_rate": 3.513378131712092e-06, "loss": 0.1126, "step": 3576 }, { "epoch": 1.159105638366818, "grad_norm": 0.9086324572563171, "learning_rate": 3.5125785369946442e-06, "loss": 0.1256, "step": 3577 }, { "epoch": 1.1594296824368113, "grad_norm": 0.7713324427604675, "learning_rate": 3.5117788183473856e-06, "loss": 0.1096, "step": 3578 }, { "epoch": 1.159753726506805, "grad_norm": 0.8583053350448608, "learning_rate": 3.5109789758681944e-06, "loss": 0.1113, "step": 3579 }, { "epoch": 1.1600777705767984, "grad_norm": 0.831057071685791, "learning_rate": 3.5101790096549643e-06, "loss": 0.1163, "step": 3580 }, { "epoch": 1.160401814646792, "grad_norm": 0.7998328804969788, "learning_rate": 3.509378919805602e-06, "loss": 0.1171, "step": 3581 }, { "epoch": 1.1607258587167855, "grad_norm": 0.8440855741500854, "learning_rate": 3.5085787064180317e-06, "loss": 0.122, "step": 3582 }, { "epoch": 1.1610499027867789, "grad_norm": 0.8473719358444214, "learning_rate": 3.5077783695901917e-06, "loss": 0.1072, "step": 3583 }, { "epoch": 1.1613739468567725, "grad_norm": 0.7954153418540955, "learning_rate": 3.506977909420035e-06, "loss": 0.1138, "step": 3584 }, { "epoch": 1.161697990926766, "grad_norm": 0.836928129196167, "learning_rate": 3.506177326005531e-06, "loss": 0.1263, "step": 3585 }, { "epoch": 1.1620220349967596, "grad_norm": 0.8505675196647644, "learning_rate": 3.5053766194446626e-06, "loss": 0.1155, "step": 3586 }, { "epoch": 1.162346079066753, "grad_norm": 0.8441529273986816, "learning_rate": 3.504575789835428e-06, "loss": 0.1185, "step": 3587 }, { "epoch": 1.1626701231367467, "grad_norm": 0.8499163389205933, "learning_rate": 3.503774837275843e-06, "loss": 0.115, "step": 3588 }, { "epoch": 1.16299416720674, "grad_norm": 0.8068932294845581, "learning_rate": 3.5029737618639344e-06, "loss": 0.1064, "step": 3589 }, { "epoch": 1.1633182112767337, "grad_norm": 0.8755378723144531, "learning_rate": 3.5021725636977466e-06, "loss": 0.1189, "step": 3590 }, { "epoch": 1.1636422553467272, "grad_norm": 0.8903499841690063, "learning_rate": 3.5013712428753392e-06, "loss": 0.1253, "step": 3591 }, { "epoch": 1.1639662994167206, "grad_norm": 0.7750681042671204, "learning_rate": 3.500569799494786e-06, "loss": 0.1071, "step": 3592 }, { "epoch": 1.1642903434867142, "grad_norm": 0.7885310649871826, "learning_rate": 3.4997682336541756e-06, "loss": 0.1155, "step": 3593 }, { "epoch": 1.1646143875567077, "grad_norm": 0.8886268138885498, "learning_rate": 3.498966545451612e-06, "loss": 0.1259, "step": 3594 }, { "epoch": 1.1649384316267013, "grad_norm": 0.8036131858825684, "learning_rate": 3.4981647349852137e-06, "loss": 0.1145, "step": 3595 }, { "epoch": 1.1652624756966947, "grad_norm": 0.7636457681655884, "learning_rate": 3.4973628023531146e-06, "loss": 0.0985, "step": 3596 }, { "epoch": 1.1655865197666881, "grad_norm": 0.8249123692512512, "learning_rate": 3.496560747653464e-06, "loss": 0.1203, "step": 3597 }, { "epoch": 1.1659105638366818, "grad_norm": 0.8131135702133179, "learning_rate": 3.4957585709844254e-06, "loss": 0.1138, "step": 3598 }, { "epoch": 1.1662346079066752, "grad_norm": 0.8088524341583252, "learning_rate": 3.494956272444177e-06, "loss": 0.1147, "step": 3599 }, { "epoch": 1.1665586519766689, "grad_norm": 0.8252901434898376, "learning_rate": 3.494153852130913e-06, "loss": 0.1206, "step": 3600 }, { "epoch": 1.1668826960466623, "grad_norm": 0.7895000576972961, "learning_rate": 3.4933513101428416e-06, "loss": 0.1061, "step": 3601 }, { "epoch": 1.167206740116656, "grad_norm": 0.8485018014907837, "learning_rate": 3.4925486465781865e-06, "loss": 0.1161, "step": 3602 }, { "epoch": 1.1675307841866494, "grad_norm": 0.9247622489929199, "learning_rate": 3.4917458615351853e-06, "loss": 0.1252, "step": 3603 }, { "epoch": 1.167854828256643, "grad_norm": 0.8563806414604187, "learning_rate": 3.490942955112092e-06, "loss": 0.114, "step": 3604 }, { "epoch": 1.1681788723266364, "grad_norm": 0.9615631699562073, "learning_rate": 3.490139927407174e-06, "loss": 0.1387, "step": 3605 }, { "epoch": 1.1685029163966298, "grad_norm": 0.8398370742797852, "learning_rate": 3.4893367785187137e-06, "loss": 0.122, "step": 3606 }, { "epoch": 1.1688269604666235, "grad_norm": 0.8580317497253418, "learning_rate": 3.4885335085450095e-06, "loss": 0.1137, "step": 3607 }, { "epoch": 1.169151004536617, "grad_norm": 0.8144624829292297, "learning_rate": 3.4877301175843735e-06, "loss": 0.1166, "step": 3608 }, { "epoch": 1.1694750486066106, "grad_norm": 0.8784302473068237, "learning_rate": 3.486926605735133e-06, "loss": 0.123, "step": 3609 }, { "epoch": 1.169799092676604, "grad_norm": 0.8242217898368835, "learning_rate": 3.486122973095631e-06, "loss": 0.1254, "step": 3610 }, { "epoch": 1.1701231367465976, "grad_norm": 0.8689938187599182, "learning_rate": 3.4853192197642226e-06, "loss": 0.1201, "step": 3611 }, { "epoch": 1.170447180816591, "grad_norm": 0.804336667060852, "learning_rate": 3.48451534583928e-06, "loss": 0.1263, "step": 3612 }, { "epoch": 1.1707712248865847, "grad_norm": 0.8793946504592896, "learning_rate": 3.4837113514191907e-06, "loss": 0.1355, "step": 3613 }, { "epoch": 1.1710952689565781, "grad_norm": 0.8660229444503784, "learning_rate": 3.482907236602354e-06, "loss": 0.1216, "step": 3614 }, { "epoch": 1.1714193130265715, "grad_norm": 0.7956066727638245, "learning_rate": 3.4821030014871886e-06, "loss": 0.1165, "step": 3615 }, { "epoch": 1.1717433570965652, "grad_norm": 0.8330211639404297, "learning_rate": 3.481298646172122e-06, "loss": 0.1127, "step": 3616 }, { "epoch": 1.1720674011665586, "grad_norm": 0.7992603182792664, "learning_rate": 3.480494170755602e-06, "loss": 0.112, "step": 3617 }, { "epoch": 1.1723914452365523, "grad_norm": 0.7594689726829529, "learning_rate": 3.479689575336086e-06, "loss": 0.1078, "step": 3618 }, { "epoch": 1.1727154893065457, "grad_norm": 0.8167518973350525, "learning_rate": 3.4788848600120507e-06, "loss": 0.1056, "step": 3619 }, { "epoch": 1.173039533376539, "grad_norm": 0.8602342009544373, "learning_rate": 3.4780800248819847e-06, "loss": 0.118, "step": 3620 }, { "epoch": 1.1733635774465327, "grad_norm": 0.8619323968887329, "learning_rate": 3.4772750700443923e-06, "loss": 0.1171, "step": 3621 }, { "epoch": 1.1736876215165262, "grad_norm": 0.8086685538291931, "learning_rate": 3.476469995597792e-06, "loss": 0.1093, "step": 3622 }, { "epoch": 1.1740116655865198, "grad_norm": 0.8759011030197144, "learning_rate": 3.4756648016407175e-06, "loss": 0.1131, "step": 3623 }, { "epoch": 1.1743357096565132, "grad_norm": 0.8564737439155579, "learning_rate": 3.4748594882717163e-06, "loss": 0.1182, "step": 3624 }, { "epoch": 1.1746597537265069, "grad_norm": 0.8285084962844849, "learning_rate": 3.474054055589351e-06, "loss": 0.1075, "step": 3625 }, { "epoch": 1.1749837977965003, "grad_norm": 0.932595431804657, "learning_rate": 3.473248503692199e-06, "loss": 0.1289, "step": 3626 }, { "epoch": 1.175307841866494, "grad_norm": 0.844597578048706, "learning_rate": 3.472442832678852e-06, "loss": 0.121, "step": 3627 }, { "epoch": 1.1756318859364874, "grad_norm": 0.9832401275634766, "learning_rate": 3.471637042647916e-06, "loss": 0.1271, "step": 3628 }, { "epoch": 1.1759559300064808, "grad_norm": 0.880580484867096, "learning_rate": 3.470831133698013e-06, "loss": 0.1166, "step": 3629 }, { "epoch": 1.1762799740764744, "grad_norm": 0.8617246150970459, "learning_rate": 3.470025105927777e-06, "loss": 0.1141, "step": 3630 }, { "epoch": 1.1766040181464679, "grad_norm": 0.8869532346725464, "learning_rate": 3.4692189594358578e-06, "loss": 0.1227, "step": 3631 }, { "epoch": 1.1769280622164615, "grad_norm": 0.8361830711364746, "learning_rate": 3.468412694320921e-06, "loss": 0.1171, "step": 3632 }, { "epoch": 1.177252106286455, "grad_norm": 0.8260972499847412, "learning_rate": 3.467606310681646e-06, "loss": 0.1133, "step": 3633 }, { "epoch": 1.1775761503564484, "grad_norm": 0.8462033867835999, "learning_rate": 3.4667998086167253e-06, "loss": 0.1181, "step": 3634 }, { "epoch": 1.177900194426442, "grad_norm": 0.826924204826355, "learning_rate": 3.465993188224868e-06, "loss": 0.1122, "step": 3635 }, { "epoch": 1.1782242384964354, "grad_norm": 0.7624189853668213, "learning_rate": 3.4651864496047952e-06, "loss": 0.1142, "step": 3636 }, { "epoch": 1.178548282566429, "grad_norm": 0.8189399242401123, "learning_rate": 3.464379592855246e-06, "loss": 0.1198, "step": 3637 }, { "epoch": 1.1788723266364225, "grad_norm": 0.8579363822937012, "learning_rate": 3.4635726180749698e-06, "loss": 0.1197, "step": 3638 }, { "epoch": 1.1791963707064161, "grad_norm": 0.8414760231971741, "learning_rate": 3.4627655253627324e-06, "loss": 0.1269, "step": 3639 }, { "epoch": 1.1795204147764096, "grad_norm": 0.8462185263633728, "learning_rate": 3.461958314817316e-06, "loss": 0.1205, "step": 3640 }, { "epoch": 1.1798444588464032, "grad_norm": 0.8419263958930969, "learning_rate": 3.4611509865375143e-06, "loss": 0.1262, "step": 3641 }, { "epoch": 1.1801685029163966, "grad_norm": 0.8019840121269226, "learning_rate": 3.4603435406221356e-06, "loss": 0.1171, "step": 3642 }, { "epoch": 1.18049254698639, "grad_norm": 0.850952684879303, "learning_rate": 3.4595359771700055e-06, "loss": 0.1245, "step": 3643 }, { "epoch": 1.1808165910563837, "grad_norm": 0.7618928551673889, "learning_rate": 3.4587282962799602e-06, "loss": 0.1056, "step": 3644 }, { "epoch": 1.1811406351263771, "grad_norm": 0.7782850861549377, "learning_rate": 3.4579204980508525e-06, "loss": 0.1109, "step": 3645 }, { "epoch": 1.1814646791963708, "grad_norm": 0.8100502490997314, "learning_rate": 3.45711258258155e-06, "loss": 0.1163, "step": 3646 }, { "epoch": 1.1817887232663642, "grad_norm": 0.9370281100273132, "learning_rate": 3.4563045499709324e-06, "loss": 0.1319, "step": 3647 }, { "epoch": 1.1821127673363578, "grad_norm": 0.8365497589111328, "learning_rate": 3.455496400317896e-06, "loss": 0.1188, "step": 3648 }, { "epoch": 1.1824368114063513, "grad_norm": 0.8149347305297852, "learning_rate": 3.45468813372135e-06, "loss": 0.1127, "step": 3649 }, { "epoch": 1.1827608554763447, "grad_norm": 0.815455436706543, "learning_rate": 3.453879750280218e-06, "loss": 0.1072, "step": 3650 }, { "epoch": 1.1830848995463383, "grad_norm": 0.8272018432617188, "learning_rate": 3.4530712500934393e-06, "loss": 0.1172, "step": 3651 }, { "epoch": 1.1834089436163318, "grad_norm": 0.8645578622817993, "learning_rate": 3.4522626332599657e-06, "loss": 0.1261, "step": 3652 }, { "epoch": 1.1837329876863254, "grad_norm": 0.8879179954528809, "learning_rate": 3.451453899878765e-06, "loss": 0.1229, "step": 3653 }, { "epoch": 1.1840570317563188, "grad_norm": 0.8350304961204529, "learning_rate": 3.450645050048817e-06, "loss": 0.116, "step": 3654 }, { "epoch": 1.1843810758263125, "grad_norm": 0.7642476558685303, "learning_rate": 3.449836083869118e-06, "loss": 0.1096, "step": 3655 }, { "epoch": 1.184705119896306, "grad_norm": 0.7937090396881104, "learning_rate": 3.449027001438678e-06, "loss": 0.1167, "step": 3656 }, { "epoch": 1.1850291639662993, "grad_norm": 0.9045467972755432, "learning_rate": 3.44821780285652e-06, "loss": 0.1261, "step": 3657 }, { "epoch": 1.185353208036293, "grad_norm": 0.8645219206809998, "learning_rate": 3.4474084882216826e-06, "loss": 0.1329, "step": 3658 }, { "epoch": 1.1856772521062864, "grad_norm": 0.8591427206993103, "learning_rate": 3.4465990576332177e-06, "loss": 0.1254, "step": 3659 }, { "epoch": 1.18600129617628, "grad_norm": 0.8770203590393066, "learning_rate": 3.445789511190192e-06, "loss": 0.122, "step": 3660 }, { "epoch": 1.1863253402462735, "grad_norm": 0.8709147572517395, "learning_rate": 3.4449798489916856e-06, "loss": 0.1274, "step": 3661 }, { "epoch": 1.186649384316267, "grad_norm": 0.8532130122184753, "learning_rate": 3.444170071136794e-06, "loss": 0.1276, "step": 3662 }, { "epoch": 1.1869734283862605, "grad_norm": 0.831161618232727, "learning_rate": 3.4433601777246263e-06, "loss": 0.122, "step": 3663 }, { "epoch": 1.1872974724562542, "grad_norm": 0.7930514812469482, "learning_rate": 3.442550168854305e-06, "loss": 0.1182, "step": 3664 }, { "epoch": 1.1876215165262476, "grad_norm": 0.7588174939155579, "learning_rate": 3.4417400446249684e-06, "loss": 0.1083, "step": 3665 }, { "epoch": 1.187945560596241, "grad_norm": 0.8680461645126343, "learning_rate": 3.440929805135766e-06, "loss": 0.1298, "step": 3666 }, { "epoch": 1.1882696046662347, "grad_norm": 0.8464110493659973, "learning_rate": 3.440119450485865e-06, "loss": 0.1155, "step": 3667 }, { "epoch": 1.188593648736228, "grad_norm": 0.8591915369033813, "learning_rate": 3.439308980774444e-06, "loss": 0.1267, "step": 3668 }, { "epoch": 1.1889176928062217, "grad_norm": 0.7415616512298584, "learning_rate": 3.438498396100697e-06, "loss": 0.0989, "step": 3669 }, { "epoch": 1.1892417368762151, "grad_norm": 0.8387213945388794, "learning_rate": 3.4376876965638317e-06, "loss": 0.1218, "step": 3670 }, { "epoch": 1.1895657809462086, "grad_norm": 0.8068972826004028, "learning_rate": 3.4368768822630705e-06, "loss": 0.112, "step": 3671 }, { "epoch": 1.1898898250162022, "grad_norm": 0.8905644416809082, "learning_rate": 3.4360659532976475e-06, "loss": 0.1264, "step": 3672 }, { "epoch": 1.1902138690861956, "grad_norm": 0.769451916217804, "learning_rate": 3.435254909766814e-06, "loss": 0.1135, "step": 3673 }, { "epoch": 1.1905379131561893, "grad_norm": 0.7571532726287842, "learning_rate": 3.4344437517698336e-06, "loss": 0.1142, "step": 3674 }, { "epoch": 1.1908619572261827, "grad_norm": 0.8844805955886841, "learning_rate": 3.433632479405984e-06, "loss": 0.1057, "step": 3675 }, { "epoch": 1.1911860012961764, "grad_norm": 0.8947396874427795, "learning_rate": 3.4328210927745577e-06, "loss": 0.1295, "step": 3676 }, { "epoch": 1.1915100453661698, "grad_norm": 0.7603358030319214, "learning_rate": 3.4320095919748596e-06, "loss": 0.1088, "step": 3677 }, { "epoch": 1.1918340894361634, "grad_norm": 0.8614794611930847, "learning_rate": 3.43119797710621e-06, "loss": 0.1288, "step": 3678 }, { "epoch": 1.1921581335061568, "grad_norm": 0.8844104409217834, "learning_rate": 3.4303862482679435e-06, "loss": 0.1293, "step": 3679 }, { "epoch": 1.1924821775761503, "grad_norm": 0.8741898536682129, "learning_rate": 3.429574405559406e-06, "loss": 0.1255, "step": 3680 }, { "epoch": 1.192806221646144, "grad_norm": 0.8146804571151733, "learning_rate": 3.4287624490799605e-06, "loss": 0.1139, "step": 3681 }, { "epoch": 1.1931302657161373, "grad_norm": 0.8065559267997742, "learning_rate": 3.4279503789289824e-06, "loss": 0.1118, "step": 3682 }, { "epoch": 1.193454309786131, "grad_norm": 0.8196713328361511, "learning_rate": 3.4271381952058607e-06, "loss": 0.118, "step": 3683 }, { "epoch": 1.1937783538561244, "grad_norm": 0.7994847893714905, "learning_rate": 3.42632589801e-06, "loss": 0.1161, "step": 3684 }, { "epoch": 1.1941023979261178, "grad_norm": 0.9280275702476501, "learning_rate": 3.425513487440817e-06, "loss": 0.1256, "step": 3685 }, { "epoch": 1.1944264419961115, "grad_norm": 0.8903563022613525, "learning_rate": 3.4247009635977425e-06, "loss": 0.1233, "step": 3686 }, { "epoch": 1.194750486066105, "grad_norm": 0.7753077745437622, "learning_rate": 3.4238883265802215e-06, "loss": 0.1099, "step": 3687 }, { "epoch": 1.1950745301360985, "grad_norm": 0.8520569205284119, "learning_rate": 3.4230755764877133e-06, "loss": 0.1233, "step": 3688 }, { "epoch": 1.195398574206092, "grad_norm": 0.8427757620811462, "learning_rate": 3.4222627134196917e-06, "loss": 0.1226, "step": 3689 }, { "epoch": 1.1957226182760856, "grad_norm": 0.7773596048355103, "learning_rate": 3.4214497374756415e-06, "loss": 0.1099, "step": 3690 }, { "epoch": 1.196046662346079, "grad_norm": 0.7584930658340454, "learning_rate": 3.4206366487550637e-06, "loss": 0.1096, "step": 3691 }, { "epoch": 1.1963707064160727, "grad_norm": 0.8986695408821106, "learning_rate": 3.419823447357472e-06, "loss": 0.1293, "step": 3692 }, { "epoch": 1.196694750486066, "grad_norm": 0.8784900307655334, "learning_rate": 3.4190101333823956e-06, "loss": 0.1246, "step": 3693 }, { "epoch": 1.1970187945560595, "grad_norm": 0.8179463148117065, "learning_rate": 3.4181967069293754e-06, "loss": 0.1115, "step": 3694 }, { "epoch": 1.1973428386260532, "grad_norm": 0.7988253831863403, "learning_rate": 3.417383168097967e-06, "loss": 0.1103, "step": 3695 }, { "epoch": 1.1976668826960466, "grad_norm": 0.826006293296814, "learning_rate": 3.41656951698774e-06, "loss": 0.1143, "step": 3696 }, { "epoch": 1.1979909267660402, "grad_norm": 0.8301533460617065, "learning_rate": 3.4157557536982773e-06, "loss": 0.115, "step": 3697 }, { "epoch": 1.1983149708360337, "grad_norm": 0.7946401238441467, "learning_rate": 3.414941878329175e-06, "loss": 0.1146, "step": 3698 }, { "epoch": 1.1986390149060273, "grad_norm": 0.836628258228302, "learning_rate": 3.4141278909800444e-06, "loss": 0.1147, "step": 3699 }, { "epoch": 1.1989630589760207, "grad_norm": 0.8173867464065552, "learning_rate": 3.41331379175051e-06, "loss": 0.1129, "step": 3700 }, { "epoch": 1.1992871030460144, "grad_norm": 0.8635963797569275, "learning_rate": 3.4124995807402082e-06, "loss": 0.1219, "step": 3701 }, { "epoch": 1.1996111471160078, "grad_norm": 0.8261188864707947, "learning_rate": 3.4116852580487925e-06, "loss": 0.112, "step": 3702 }, { "epoch": 1.1999351911860012, "grad_norm": 0.8579382300376892, "learning_rate": 3.4108708237759258e-06, "loss": 0.1121, "step": 3703 }, { "epoch": 1.2002592352559949, "grad_norm": 0.8296414017677307, "learning_rate": 3.4100562780212887e-06, "loss": 0.1129, "step": 3704 }, { "epoch": 1.2005832793259883, "grad_norm": 0.8090779185295105, "learning_rate": 3.4092416208845723e-06, "loss": 0.1141, "step": 3705 }, { "epoch": 1.200907323395982, "grad_norm": 0.8289951682090759, "learning_rate": 3.4084268524654847e-06, "loss": 0.1223, "step": 3706 }, { "epoch": 1.2012313674659754, "grad_norm": 0.8589972257614136, "learning_rate": 3.407611972863744e-06, "loss": 0.1205, "step": 3707 }, { "epoch": 1.2015554115359688, "grad_norm": 0.8730632662773132, "learning_rate": 3.406796982179085e-06, "loss": 0.1253, "step": 3708 }, { "epoch": 1.2018794556059624, "grad_norm": 0.8103494644165039, "learning_rate": 3.4059818805112534e-06, "loss": 0.1139, "step": 3709 }, { "epoch": 1.2022034996759559, "grad_norm": 0.7576596736907959, "learning_rate": 3.4051666679600105e-06, "loss": 0.104, "step": 3710 }, { "epoch": 1.2025275437459495, "grad_norm": 0.8215952515602112, "learning_rate": 3.40435134462513e-06, "loss": 0.1136, "step": 3711 }, { "epoch": 1.202851587815943, "grad_norm": 0.7837064862251282, "learning_rate": 3.403535910606399e-06, "loss": 0.1106, "step": 3712 }, { "epoch": 1.2031756318859366, "grad_norm": 0.7939320206642151, "learning_rate": 3.4027203660036202e-06, "loss": 0.1104, "step": 3713 }, { "epoch": 1.20349967595593, "grad_norm": 0.7713475227355957, "learning_rate": 3.4019047109166077e-06, "loss": 0.1086, "step": 3714 }, { "epoch": 1.2038237200259236, "grad_norm": 0.8262404203414917, "learning_rate": 3.401088945445189e-06, "loss": 0.1188, "step": 3715 }, { "epoch": 1.204147764095917, "grad_norm": 0.8336588144302368, "learning_rate": 3.4002730696892073e-06, "loss": 0.1182, "step": 3716 }, { "epoch": 1.2044718081659105, "grad_norm": 0.9291484951972961, "learning_rate": 3.3994570837485163e-06, "loss": 0.1332, "step": 3717 }, { "epoch": 1.2047958522359041, "grad_norm": 0.749964714050293, "learning_rate": 3.3986409877229863e-06, "loss": 0.1097, "step": 3718 }, { "epoch": 1.2051198963058976, "grad_norm": 0.8636124134063721, "learning_rate": 3.3978247817124986e-06, "loss": 0.1273, "step": 3719 }, { "epoch": 1.2054439403758912, "grad_norm": 0.7298382520675659, "learning_rate": 3.39700846581695e-06, "loss": 0.1015, "step": 3720 }, { "epoch": 1.2057679844458846, "grad_norm": 0.832988440990448, "learning_rate": 3.3961920401362488e-06, "loss": 0.1156, "step": 3721 }, { "epoch": 1.206092028515878, "grad_norm": 0.8105771541595459, "learning_rate": 3.3953755047703174e-06, "loss": 0.1164, "step": 3722 }, { "epoch": 1.2064160725858717, "grad_norm": 0.8326833844184875, "learning_rate": 3.394558859819092e-06, "loss": 0.1274, "step": 3723 }, { "epoch": 1.2067401166558651, "grad_norm": 0.8200793862342834, "learning_rate": 3.393742105382522e-06, "loss": 0.1213, "step": 3724 }, { "epoch": 1.2070641607258588, "grad_norm": 0.849841296672821, "learning_rate": 3.3929252415605708e-06, "loss": 0.1198, "step": 3725 }, { "epoch": 1.2073882047958522, "grad_norm": 0.8289008736610413, "learning_rate": 3.3921082684532143e-06, "loss": 0.1142, "step": 3726 }, { "epoch": 1.2077122488658458, "grad_norm": 0.7795475721359253, "learning_rate": 3.391291186160441e-06, "loss": 0.1029, "step": 3727 }, { "epoch": 1.2080362929358393, "grad_norm": 0.8882423639297485, "learning_rate": 3.3904739947822556e-06, "loss": 0.1276, "step": 3728 }, { "epoch": 1.208360337005833, "grad_norm": 0.7692775130271912, "learning_rate": 3.3896566944186737e-06, "loss": 0.1042, "step": 3729 }, { "epoch": 1.2086843810758263, "grad_norm": 0.8318784236907959, "learning_rate": 3.388839285169725e-06, "loss": 0.1188, "step": 3730 }, { "epoch": 1.2090084251458197, "grad_norm": 0.798616349697113, "learning_rate": 3.3880217671354527e-06, "loss": 0.1135, "step": 3731 }, { "epoch": 1.2093324692158134, "grad_norm": 0.8450761437416077, "learning_rate": 3.3872041404159124e-06, "loss": 0.1233, "step": 3732 }, { "epoch": 1.2096565132858068, "grad_norm": 0.8565431833267212, "learning_rate": 3.3863864051111744e-06, "loss": 0.1238, "step": 3733 }, { "epoch": 1.2099805573558005, "grad_norm": 0.7984777092933655, "learning_rate": 3.385568561321321e-06, "loss": 0.1144, "step": 3734 }, { "epoch": 1.2103046014257939, "grad_norm": 0.8732336759567261, "learning_rate": 3.3847506091464487e-06, "loss": 0.124, "step": 3735 }, { "epoch": 1.2106286454957873, "grad_norm": 0.8275569081306458, "learning_rate": 3.383932548686667e-06, "loss": 0.1198, "step": 3736 }, { "epoch": 1.210952689565781, "grad_norm": 0.8002777099609375, "learning_rate": 3.3831143800420983e-06, "loss": 0.116, "step": 3737 }, { "epoch": 1.2112767336357744, "grad_norm": 0.8314655423164368, "learning_rate": 3.3822961033128793e-06, "loss": 0.1161, "step": 3738 }, { "epoch": 1.211600777705768, "grad_norm": 0.8205680251121521, "learning_rate": 3.3814777185991577e-06, "loss": 0.1094, "step": 3739 }, { "epoch": 1.2119248217757614, "grad_norm": 0.8222754597663879, "learning_rate": 3.380659226001097e-06, "loss": 0.1182, "step": 3740 }, { "epoch": 1.212248865845755, "grad_norm": 0.8440394997596741, "learning_rate": 3.3798406256188725e-06, "loss": 0.1242, "step": 3741 }, { "epoch": 1.2125729099157485, "grad_norm": 0.8333975672721863, "learning_rate": 3.3790219175526733e-06, "loss": 0.115, "step": 3742 }, { "epoch": 1.2128969539857422, "grad_norm": 0.7962969541549683, "learning_rate": 3.3782031019027006e-06, "loss": 0.1148, "step": 3743 }, { "epoch": 1.2132209980557356, "grad_norm": 0.8393021821975708, "learning_rate": 3.3773841787691708e-06, "loss": 0.1196, "step": 3744 }, { "epoch": 1.213545042125729, "grad_norm": 0.8597471117973328, "learning_rate": 3.3765651482523097e-06, "loss": 0.1186, "step": 3745 }, { "epoch": 1.2138690861957226, "grad_norm": 0.837240993976593, "learning_rate": 3.375746010452361e-06, "loss": 0.1261, "step": 3746 }, { "epoch": 1.214193130265716, "grad_norm": 0.8484938144683838, "learning_rate": 3.374926765469578e-06, "loss": 0.1129, "step": 3747 }, { "epoch": 1.2145171743357097, "grad_norm": 0.8841095566749573, "learning_rate": 3.3741074134042297e-06, "loss": 0.1372, "step": 3748 }, { "epoch": 1.2148412184057031, "grad_norm": 0.782389223575592, "learning_rate": 3.3732879543565955e-06, "loss": 0.1139, "step": 3749 }, { "epoch": 1.2151652624756968, "grad_norm": 0.8806767463684082, "learning_rate": 3.3724683884269702e-06, "loss": 0.1382, "step": 3750 }, { "epoch": 1.2154893065456902, "grad_norm": 0.799345850944519, "learning_rate": 3.37164871571566e-06, "loss": 0.1162, "step": 3751 }, { "epoch": 1.2158133506156839, "grad_norm": 0.8000692129135132, "learning_rate": 3.370828936322985e-06, "loss": 0.1096, "step": 3752 }, { "epoch": 1.2161373946856773, "grad_norm": 0.7767576575279236, "learning_rate": 3.3700090503492795e-06, "loss": 0.1146, "step": 3753 }, { "epoch": 1.2164614387556707, "grad_norm": 0.7903865575790405, "learning_rate": 3.3691890578948876e-06, "loss": 0.1145, "step": 3754 }, { "epoch": 1.2167854828256643, "grad_norm": 0.8180772066116333, "learning_rate": 3.36836895906017e-06, "loss": 0.1103, "step": 3755 }, { "epoch": 1.2171095268956578, "grad_norm": 0.907408595085144, "learning_rate": 3.3675487539454972e-06, "loss": 0.1282, "step": 3756 }, { "epoch": 1.2174335709656514, "grad_norm": 0.7968785166740417, "learning_rate": 3.3667284426512565e-06, "loss": 0.1143, "step": 3757 }, { "epoch": 1.2177576150356448, "grad_norm": 0.7605866193771362, "learning_rate": 3.3659080252778446e-06, "loss": 0.1154, "step": 3758 }, { "epoch": 1.2180816591056383, "grad_norm": 0.9358341097831726, "learning_rate": 3.365087501925673e-06, "loss": 0.1308, "step": 3759 }, { "epoch": 1.218405703175632, "grad_norm": 0.8174268007278442, "learning_rate": 3.3642668726951657e-06, "loss": 0.1218, "step": 3760 }, { "epoch": 1.2187297472456253, "grad_norm": 0.8260276317596436, "learning_rate": 3.36344613768676e-06, "loss": 0.1175, "step": 3761 }, { "epoch": 1.219053791315619, "grad_norm": 0.8210480213165283, "learning_rate": 3.362625297000906e-06, "loss": 0.1207, "step": 3762 }, { "epoch": 1.2193778353856124, "grad_norm": 0.7981109619140625, "learning_rate": 3.3618043507380673e-06, "loss": 0.116, "step": 3763 }, { "epoch": 1.219701879455606, "grad_norm": 0.7980762720108032, "learning_rate": 3.3609832989987178e-06, "loss": 0.1181, "step": 3764 }, { "epoch": 1.2200259235255995, "grad_norm": 0.7661181688308716, "learning_rate": 3.360162141883348e-06, "loss": 0.1081, "step": 3765 }, { "epoch": 1.220349967595593, "grad_norm": 0.8313801884651184, "learning_rate": 3.3593408794924585e-06, "loss": 0.116, "step": 3766 }, { "epoch": 1.2206740116655865, "grad_norm": 0.7752500176429749, "learning_rate": 3.358519511926565e-06, "loss": 0.1102, "step": 3767 }, { "epoch": 1.22099805573558, "grad_norm": 0.8230960369110107, "learning_rate": 3.357698039286194e-06, "loss": 0.1212, "step": 3768 }, { "epoch": 1.2213220998055736, "grad_norm": 0.7976222634315491, "learning_rate": 3.356876461671887e-06, "loss": 0.1159, "step": 3769 }, { "epoch": 1.221646143875567, "grad_norm": 0.8018410801887512, "learning_rate": 3.3560547791841957e-06, "loss": 0.1112, "step": 3770 }, { "epoch": 1.2219701879455607, "grad_norm": 0.8810217976570129, "learning_rate": 3.3552329919236865e-06, "loss": 0.1152, "step": 3771 }, { "epoch": 1.222294232015554, "grad_norm": 0.8110620379447937, "learning_rate": 3.3544110999909385e-06, "loss": 0.1117, "step": 3772 }, { "epoch": 1.2226182760855475, "grad_norm": 0.8340097069740295, "learning_rate": 3.3535891034865433e-06, "loss": 0.1178, "step": 3773 }, { "epoch": 1.2229423201555412, "grad_norm": 0.8052523732185364, "learning_rate": 3.3527670025111046e-06, "loss": 0.1058, "step": 3774 }, { "epoch": 1.2232663642255346, "grad_norm": 0.7342526912689209, "learning_rate": 3.3519447971652407e-06, "loss": 0.1037, "step": 3775 }, { "epoch": 1.2235904082955282, "grad_norm": 0.8502779006958008, "learning_rate": 3.351122487549582e-06, "loss": 0.1146, "step": 3776 }, { "epoch": 1.2239144523655217, "grad_norm": 0.8023433685302734, "learning_rate": 3.3503000737647696e-06, "loss": 0.1193, "step": 3777 }, { "epoch": 1.2242384964355153, "grad_norm": 0.792868971824646, "learning_rate": 3.349477555911459e-06, "loss": 0.1089, "step": 3778 }, { "epoch": 1.2245625405055087, "grad_norm": 0.8314449191093445, "learning_rate": 3.3486549340903196e-06, "loss": 0.1228, "step": 3779 }, { "epoch": 1.2248865845755024, "grad_norm": 0.8645145893096924, "learning_rate": 3.3478322084020322e-06, "loss": 0.1152, "step": 3780 }, { "epoch": 1.2252106286454958, "grad_norm": 0.9096472859382629, "learning_rate": 3.34700937894729e-06, "loss": 0.1181, "step": 3781 }, { "epoch": 1.2255346727154892, "grad_norm": 0.8502877950668335, "learning_rate": 3.3461864458267996e-06, "loss": 0.1129, "step": 3782 }, { "epoch": 1.2258587167854829, "grad_norm": 0.8530779480934143, "learning_rate": 3.3453634091412795e-06, "loss": 0.1243, "step": 3783 }, { "epoch": 1.2261827608554763, "grad_norm": 0.9280954003334045, "learning_rate": 3.344540268991462e-06, "loss": 0.1348, "step": 3784 }, { "epoch": 1.22650680492547, "grad_norm": 0.785020649433136, "learning_rate": 3.343717025478092e-06, "loss": 0.1126, "step": 3785 }, { "epoch": 1.2268308489954634, "grad_norm": 0.8465002179145813, "learning_rate": 3.342893678701925e-06, "loss": 0.1169, "step": 3786 }, { "epoch": 1.227154893065457, "grad_norm": 0.8912972211837769, "learning_rate": 3.3420702287637325e-06, "loss": 0.1332, "step": 3787 }, { "epoch": 1.2274789371354504, "grad_norm": 0.8117028474807739, "learning_rate": 3.341246675764295e-06, "loss": 0.1202, "step": 3788 }, { "epoch": 1.2278029812054438, "grad_norm": 0.8363078832626343, "learning_rate": 3.3404230198044085e-06, "loss": 0.1237, "step": 3789 }, { "epoch": 1.2281270252754375, "grad_norm": 0.7812249660491943, "learning_rate": 3.3395992609848804e-06, "loss": 0.1153, "step": 3790 }, { "epoch": 1.228451069345431, "grad_norm": 0.858752965927124, "learning_rate": 3.338775399406531e-06, "loss": 0.1166, "step": 3791 }, { "epoch": 1.2287751134154246, "grad_norm": 0.7888984680175781, "learning_rate": 3.3379514351701924e-06, "loss": 0.1111, "step": 3792 }, { "epoch": 1.229099157485418, "grad_norm": 0.7987516522407532, "learning_rate": 3.3371273683767102e-06, "loss": 0.1086, "step": 3793 }, { "epoch": 1.2294232015554116, "grad_norm": 0.8837012648582458, "learning_rate": 3.3363031991269423e-06, "loss": 0.1211, "step": 3794 }, { "epoch": 1.229747245625405, "grad_norm": 0.8158565759658813, "learning_rate": 3.3354789275217587e-06, "loss": 0.1187, "step": 3795 }, { "epoch": 1.2300712896953985, "grad_norm": 0.8124872446060181, "learning_rate": 3.3346545536620425e-06, "loss": 0.1181, "step": 3796 }, { "epoch": 1.2303953337653921, "grad_norm": 0.8039227724075317, "learning_rate": 3.3338300776486886e-06, "loss": 0.1138, "step": 3797 }, { "epoch": 1.2307193778353855, "grad_norm": 0.8304813504219055, "learning_rate": 3.3330054995826056e-06, "loss": 0.1146, "step": 3798 }, { "epoch": 1.2310434219053792, "grad_norm": 0.7987209558486938, "learning_rate": 3.3321808195647144e-06, "loss": 0.1155, "step": 3799 }, { "epoch": 1.2313674659753726, "grad_norm": 0.7513888478279114, "learning_rate": 3.3313560376959456e-06, "loss": 0.1082, "step": 3800 }, { "epoch": 1.2316915100453663, "grad_norm": 0.851762592792511, "learning_rate": 3.3305311540772467e-06, "loss": 0.1147, "step": 3801 }, { "epoch": 1.2320155541153597, "grad_norm": 0.7502050399780273, "learning_rate": 3.3297061688095746e-06, "loss": 0.1051, "step": 3802 }, { "epoch": 1.2323395981853533, "grad_norm": 0.7800151109695435, "learning_rate": 3.3288810819938995e-06, "loss": 0.1141, "step": 3803 }, { "epoch": 1.2326636422553467, "grad_norm": 0.8294506669044495, "learning_rate": 3.3280558937312037e-06, "loss": 0.1031, "step": 3804 }, { "epoch": 1.2329876863253402, "grad_norm": 0.8127052783966064, "learning_rate": 3.327230604122484e-06, "loss": 0.1157, "step": 3805 }, { "epoch": 1.2333117303953338, "grad_norm": 0.8488887548446655, "learning_rate": 3.326405213268745e-06, "loss": 0.1138, "step": 3806 }, { "epoch": 1.2336357744653272, "grad_norm": 0.8581557869911194, "learning_rate": 3.3255797212710095e-06, "loss": 0.1199, "step": 3807 }, { "epoch": 1.2339598185353209, "grad_norm": 0.8989530801773071, "learning_rate": 3.3247541282303082e-06, "loss": 0.1269, "step": 3808 }, { "epoch": 1.2342838626053143, "grad_norm": 0.8879166841506958, "learning_rate": 3.3239284342476852e-06, "loss": 0.1292, "step": 3809 }, { "epoch": 1.2346079066753077, "grad_norm": 0.847872257232666, "learning_rate": 3.3231026394241983e-06, "loss": 0.1283, "step": 3810 }, { "epoch": 1.2349319507453014, "grad_norm": 0.7723486423492432, "learning_rate": 3.3222767438609166e-06, "loss": 0.1089, "step": 3811 }, { "epoch": 1.2352559948152948, "grad_norm": 0.8634940385818481, "learning_rate": 3.321450747658922e-06, "loss": 0.1284, "step": 3812 }, { "epoch": 1.2355800388852884, "grad_norm": 0.8183860182762146, "learning_rate": 3.3206246509193076e-06, "loss": 0.1219, "step": 3813 }, { "epoch": 1.2359040829552819, "grad_norm": 0.907188892364502, "learning_rate": 3.3197984537431797e-06, "loss": 0.1089, "step": 3814 }, { "epoch": 1.2362281270252755, "grad_norm": 0.8658711910247803, "learning_rate": 3.3189721562316585e-06, "loss": 0.1258, "step": 3815 }, { "epoch": 1.236552171095269, "grad_norm": 0.7768942713737488, "learning_rate": 3.3181457584858736e-06, "loss": 0.1107, "step": 3816 }, { "epoch": 1.2368762151652626, "grad_norm": 0.8284804821014404, "learning_rate": 3.3173192606069673e-06, "loss": 0.1197, "step": 3817 }, { "epoch": 1.237200259235256, "grad_norm": 0.8545773029327393, "learning_rate": 3.316492662696097e-06, "loss": 0.1232, "step": 3818 }, { "epoch": 1.2375243033052494, "grad_norm": 0.8069331645965576, "learning_rate": 3.3156659648544276e-06, "loss": 0.1193, "step": 3819 }, { "epoch": 1.237848347375243, "grad_norm": 0.8107204437255859, "learning_rate": 3.314839167183141e-06, "loss": 0.1113, "step": 3820 }, { "epoch": 1.2381723914452365, "grad_norm": 0.7912015914916992, "learning_rate": 3.3140122697834287e-06, "loss": 0.1072, "step": 3821 }, { "epoch": 1.2384964355152301, "grad_norm": 0.8278244733810425, "learning_rate": 3.3131852727564947e-06, "loss": 0.1166, "step": 3822 }, { "epoch": 1.2388204795852236, "grad_norm": 0.8750582337379456, "learning_rate": 3.3123581762035557e-06, "loss": 0.1253, "step": 3823 }, { "epoch": 1.239144523655217, "grad_norm": 0.860623836517334, "learning_rate": 3.31153098022584e-06, "loss": 0.1155, "step": 3824 }, { "epoch": 1.2394685677252106, "grad_norm": 0.8325600028038025, "learning_rate": 3.3107036849245883e-06, "loss": 0.1158, "step": 3825 }, { "epoch": 1.239792611795204, "grad_norm": 0.8544103503227234, "learning_rate": 3.309876290401054e-06, "loss": 0.1187, "step": 3826 }, { "epoch": 1.2401166558651977, "grad_norm": 0.8223888278007507, "learning_rate": 3.309048796756503e-06, "loss": 0.1204, "step": 3827 }, { "epoch": 1.2404406999351911, "grad_norm": 0.7503345608711243, "learning_rate": 3.3082212040922103e-06, "loss": 0.1153, "step": 3828 }, { "epoch": 1.2407647440051848, "grad_norm": 0.8012227416038513, "learning_rate": 3.307393512509466e-06, "loss": 0.1154, "step": 3829 }, { "epoch": 1.2410887880751782, "grad_norm": 0.724138081073761, "learning_rate": 3.3065657221095732e-06, "loss": 0.0955, "step": 3830 }, { "epoch": 1.2414128321451718, "grad_norm": 0.8285104036331177, "learning_rate": 3.3057378329938432e-06, "loss": 0.1181, "step": 3831 }, { "epoch": 1.2417368762151653, "grad_norm": 0.8077479004859924, "learning_rate": 3.304909845263603e-06, "loss": 0.1138, "step": 3832 }, { "epoch": 1.2420609202851587, "grad_norm": 0.7801032662391663, "learning_rate": 3.3040817590201897e-06, "loss": 0.1041, "step": 3833 }, { "epoch": 1.2423849643551523, "grad_norm": 0.8577712178230286, "learning_rate": 3.303253574364953e-06, "loss": 0.1206, "step": 3834 }, { "epoch": 1.2427090084251458, "grad_norm": 0.8118460178375244, "learning_rate": 3.3024252913992548e-06, "loss": 0.112, "step": 3835 }, { "epoch": 1.2430330524951394, "grad_norm": 0.9097766876220703, "learning_rate": 3.3015969102244704e-06, "loss": 0.1292, "step": 3836 }, { "epoch": 1.2433570965651328, "grad_norm": 0.7517203688621521, "learning_rate": 3.300768430941983e-06, "loss": 0.1056, "step": 3837 }, { "epoch": 1.2436811406351265, "grad_norm": 0.81373530626297, "learning_rate": 3.299939853653192e-06, "loss": 0.1121, "step": 3838 }, { "epoch": 1.24400518470512, "grad_norm": 0.8525338768959045, "learning_rate": 3.299111178459507e-06, "loss": 0.1279, "step": 3839 }, { "epoch": 1.2443292287751135, "grad_norm": 0.8729413747787476, "learning_rate": 3.29828240546235e-06, "loss": 0.1248, "step": 3840 }, { "epoch": 1.244653272845107, "grad_norm": 0.790283739566803, "learning_rate": 3.297453534763154e-06, "loss": 0.1146, "step": 3841 }, { "epoch": 1.2449773169151004, "grad_norm": 0.8346278667449951, "learning_rate": 3.2966245664633654e-06, "loss": 0.1189, "step": 3842 }, { "epoch": 1.245301360985094, "grad_norm": 0.8715471625328064, "learning_rate": 3.295795500664442e-06, "loss": 0.1221, "step": 3843 }, { "epoch": 1.2456254050550875, "grad_norm": 0.8614826202392578, "learning_rate": 3.294966337467853e-06, "loss": 0.1177, "step": 3844 }, { "epoch": 1.245949449125081, "grad_norm": 0.7593932151794434, "learning_rate": 3.2941370769750804e-06, "loss": 0.1124, "step": 3845 }, { "epoch": 1.2462734931950745, "grad_norm": 0.8406314253807068, "learning_rate": 3.293307719287617e-06, "loss": 0.1099, "step": 3846 }, { "epoch": 1.246597537265068, "grad_norm": 0.8011007308959961, "learning_rate": 3.2924782645069684e-06, "loss": 0.1129, "step": 3847 }, { "epoch": 1.2469215813350616, "grad_norm": 0.841182291507721, "learning_rate": 3.291648712734653e-06, "loss": 0.1089, "step": 3848 }, { "epoch": 1.247245625405055, "grad_norm": 0.84003746509552, "learning_rate": 3.290819064072198e-06, "loss": 0.1254, "step": 3849 }, { "epoch": 1.2475696694750487, "grad_norm": 0.920987606048584, "learning_rate": 3.289989318621146e-06, "loss": 0.1157, "step": 3850 }, { "epoch": 1.247893713545042, "grad_norm": 0.8493415117263794, "learning_rate": 3.289159476483049e-06, "loss": 0.1176, "step": 3851 }, { "epoch": 1.2482177576150357, "grad_norm": 0.9096932411193848, "learning_rate": 3.2883295377594716e-06, "loss": 0.127, "step": 3852 }, { "epoch": 1.2485418016850292, "grad_norm": 0.838018000125885, "learning_rate": 3.2874995025519897e-06, "loss": 0.1176, "step": 3853 }, { "epoch": 1.2488658457550228, "grad_norm": 0.804726779460907, "learning_rate": 3.2866693709621933e-06, "loss": 0.1116, "step": 3854 }, { "epoch": 1.2491898898250162, "grad_norm": 0.796380341053009, "learning_rate": 3.285839143091681e-06, "loss": 0.1067, "step": 3855 }, { "epoch": 1.2495139338950096, "grad_norm": 0.8139790296554565, "learning_rate": 3.2850088190420647e-06, "loss": 0.1152, "step": 3856 }, { "epoch": 1.2498379779650033, "grad_norm": 0.8421602845191956, "learning_rate": 3.284178398914969e-06, "loss": 0.1151, "step": 3857 }, { "epoch": 1.2501620220349967, "grad_norm": 0.811719536781311, "learning_rate": 3.283347882812028e-06, "loss": 0.1221, "step": 3858 }, { "epoch": 1.2504860661049904, "grad_norm": 0.9033892154693604, "learning_rate": 3.282517270834891e-06, "loss": 0.1241, "step": 3859 }, { "epoch": 1.2508101101749838, "grad_norm": 0.8108022809028625, "learning_rate": 3.281686563085214e-06, "loss": 0.1213, "step": 3860 }, { "epoch": 1.2511341542449772, "grad_norm": 0.8107488751411438, "learning_rate": 3.28085575966467e-06, "loss": 0.109, "step": 3861 }, { "epoch": 1.2514581983149708, "grad_norm": 0.8544443249702454, "learning_rate": 3.2800248606749395e-06, "loss": 0.1167, "step": 3862 }, { "epoch": 1.2517822423849643, "grad_norm": 0.8549750447273254, "learning_rate": 3.2791938662177174e-06, "loss": 0.1126, "step": 3863 }, { "epoch": 1.252106286454958, "grad_norm": 0.9143353700637817, "learning_rate": 3.278362776394709e-06, "loss": 0.1354, "step": 3864 }, { "epoch": 1.2524303305249513, "grad_norm": 0.838623583316803, "learning_rate": 3.277531591307632e-06, "loss": 0.116, "step": 3865 }, { "epoch": 1.252754374594945, "grad_norm": 0.7793409824371338, "learning_rate": 3.2767003110582164e-06, "loss": 0.1098, "step": 3866 }, { "epoch": 1.2530784186649384, "grad_norm": 0.8405201435089111, "learning_rate": 3.275868935748201e-06, "loss": 0.1283, "step": 3867 }, { "epoch": 1.253402462734932, "grad_norm": 0.8322092294692993, "learning_rate": 3.2750374654793387e-06, "loss": 0.1145, "step": 3868 }, { "epoch": 1.2537265068049255, "grad_norm": 0.894817054271698, "learning_rate": 3.2742059003533933e-06, "loss": 0.1262, "step": 3869 }, { "epoch": 1.254050550874919, "grad_norm": 0.7786268591880798, "learning_rate": 3.2733742404721413e-06, "loss": 0.1056, "step": 3870 }, { "epoch": 1.2543745949449125, "grad_norm": 0.744128406047821, "learning_rate": 3.272542485937369e-06, "loss": 0.1039, "step": 3871 }, { "epoch": 1.254698639014906, "grad_norm": 0.8040996789932251, "learning_rate": 3.2717106368508755e-06, "loss": 0.1115, "step": 3872 }, { "epoch": 1.2550226830848996, "grad_norm": 0.8093754649162292, "learning_rate": 3.27087869331447e-06, "loss": 0.108, "step": 3873 }, { "epoch": 1.255346727154893, "grad_norm": 0.8474469184875488, "learning_rate": 3.2700466554299755e-06, "loss": 0.1125, "step": 3874 }, { "epoch": 1.2556707712248865, "grad_norm": 0.8335444331169128, "learning_rate": 3.2692145232992244e-06, "loss": 0.121, "step": 3875 }, { "epoch": 1.25599481529488, "grad_norm": 0.7793686389923096, "learning_rate": 3.268382297024063e-06, "loss": 0.1073, "step": 3876 }, { "epoch": 1.2563188593648738, "grad_norm": 0.7746005654335022, "learning_rate": 3.2675499767063464e-06, "loss": 0.1127, "step": 3877 }, { "epoch": 1.2566429034348672, "grad_norm": 0.8334304094314575, "learning_rate": 3.266717562447944e-06, "loss": 0.1209, "step": 3878 }, { "epoch": 1.2569669475048606, "grad_norm": 0.8408777117729187, "learning_rate": 3.2658850543507336e-06, "loss": 0.1244, "step": 3879 }, { "epoch": 1.2572909915748542, "grad_norm": 0.7953009009361267, "learning_rate": 3.2650524525166064e-06, "loss": 0.1093, "step": 3880 }, { "epoch": 1.2576150356448477, "grad_norm": 0.8646888136863708, "learning_rate": 3.2642197570474665e-06, "loss": 0.1205, "step": 3881 }, { "epoch": 1.2579390797148413, "grad_norm": 0.8490650057792664, "learning_rate": 3.263386968045226e-06, "loss": 0.1093, "step": 3882 }, { "epoch": 1.2582631237848347, "grad_norm": 0.8932080268859863, "learning_rate": 3.262554085611811e-06, "loss": 0.1282, "step": 3883 }, { "epoch": 1.2585871678548282, "grad_norm": 0.7818010449409485, "learning_rate": 3.261721109849158e-06, "loss": 0.1078, "step": 3884 }, { "epoch": 1.2589112119248218, "grad_norm": 0.7877272963523865, "learning_rate": 3.2608880408592148e-06, "loss": 0.1067, "step": 3885 }, { "epoch": 1.2592352559948152, "grad_norm": 0.8062349557876587, "learning_rate": 3.2600548787439413e-06, "loss": 0.1154, "step": 3886 }, { "epoch": 1.2595593000648089, "grad_norm": 0.8956946730613708, "learning_rate": 3.2592216236053086e-06, "loss": 0.1166, "step": 3887 }, { "epoch": 1.2598833441348023, "grad_norm": 0.8361225724220276, "learning_rate": 3.2583882755452994e-06, "loss": 0.1152, "step": 3888 }, { "epoch": 1.2602073882047957, "grad_norm": 0.8042587041854858, "learning_rate": 3.257554834665907e-06, "loss": 0.1178, "step": 3889 }, { "epoch": 1.2605314322747894, "grad_norm": 0.8830363154411316, "learning_rate": 3.2567213010691367e-06, "loss": 0.1199, "step": 3890 }, { "epoch": 1.260855476344783, "grad_norm": 0.841013491153717, "learning_rate": 3.255887674857004e-06, "loss": 0.1174, "step": 3891 }, { "epoch": 1.2611795204147764, "grad_norm": 0.7718873620033264, "learning_rate": 3.2550539561315385e-06, "loss": 0.1115, "step": 3892 }, { "epoch": 1.2615035644847699, "grad_norm": 0.835184633731842, "learning_rate": 3.2542201449947774e-06, "loss": 0.122, "step": 3893 }, { "epoch": 1.2618276085547635, "grad_norm": 0.7849681973457336, "learning_rate": 3.2533862415487723e-06, "loss": 0.111, "step": 3894 }, { "epoch": 1.262151652624757, "grad_norm": 0.8814342617988586, "learning_rate": 3.2525522458955843e-06, "loss": 0.1277, "step": 3895 }, { "epoch": 1.2624756966947506, "grad_norm": 0.7500383257865906, "learning_rate": 3.251718158137287e-06, "loss": 0.109, "step": 3896 }, { "epoch": 1.262799740764744, "grad_norm": 0.7909573316574097, "learning_rate": 3.2508839783759642e-06, "loss": 0.1103, "step": 3897 }, { "epoch": 1.2631237848347374, "grad_norm": 0.7755079865455627, "learning_rate": 3.2500497067137116e-06, "loss": 0.1096, "step": 3898 }, { "epoch": 1.263447828904731, "grad_norm": 0.8379855155944824, "learning_rate": 3.2492153432526356e-06, "loss": 0.1125, "step": 3899 }, { "epoch": 1.2637718729747245, "grad_norm": 0.8379008769989014, "learning_rate": 3.2483808880948552e-06, "loss": 0.1344, "step": 3900 }, { "epoch": 1.2640959170447181, "grad_norm": 0.7528431415557861, "learning_rate": 3.2475463413424983e-06, "loss": 0.107, "step": 3901 }, { "epoch": 1.2644199611147116, "grad_norm": 0.806319534778595, "learning_rate": 3.246711703097707e-06, "loss": 0.1239, "step": 3902 }, { "epoch": 1.2647440051847052, "grad_norm": 0.8018485307693481, "learning_rate": 3.2458769734626315e-06, "loss": 0.126, "step": 3903 }, { "epoch": 1.2650680492546986, "grad_norm": 0.817126452922821, "learning_rate": 3.245042152539435e-06, "loss": 0.1187, "step": 3904 }, { "epoch": 1.2653920933246923, "grad_norm": 0.7525573372840881, "learning_rate": 3.2442072404302917e-06, "loss": 0.101, "step": 3905 }, { "epoch": 1.2657161373946857, "grad_norm": 0.8159139156341553, "learning_rate": 3.243372237237386e-06, "loss": 0.1137, "step": 3906 }, { "epoch": 1.2660401814646791, "grad_norm": 0.8026230931282043, "learning_rate": 3.2425371430629155e-06, "loss": 0.1156, "step": 3907 }, { "epoch": 1.2663642255346728, "grad_norm": 0.8402119874954224, "learning_rate": 3.241701958009087e-06, "loss": 0.1192, "step": 3908 }, { "epoch": 1.2666882696046662, "grad_norm": 0.8447930216789246, "learning_rate": 3.2408666821781186e-06, "loss": 0.1234, "step": 3909 }, { "epoch": 1.2670123136746598, "grad_norm": 0.8497375845909119, "learning_rate": 3.2400313156722414e-06, "loss": 0.1145, "step": 3910 }, { "epoch": 1.2673363577446533, "grad_norm": 0.7882909178733826, "learning_rate": 3.2391958585936946e-06, "loss": 0.1088, "step": 3911 }, { "epoch": 1.2676604018146467, "grad_norm": 0.8392199873924255, "learning_rate": 3.2383603110447304e-06, "loss": 0.1188, "step": 3912 }, { "epoch": 1.2679844458846403, "grad_norm": 0.7700950503349304, "learning_rate": 3.2375246731276122e-06, "loss": 0.1084, "step": 3913 }, { "epoch": 1.268308489954634, "grad_norm": 0.8718739151954651, "learning_rate": 3.236688944944614e-06, "loss": 0.1128, "step": 3914 }, { "epoch": 1.2686325340246274, "grad_norm": 0.8307285904884338, "learning_rate": 3.2358531265980207e-06, "loss": 0.1103, "step": 3915 }, { "epoch": 1.2689565780946208, "grad_norm": 0.843315064907074, "learning_rate": 3.2350172181901283e-06, "loss": 0.1171, "step": 3916 }, { "epoch": 1.2692806221646145, "grad_norm": 0.8017109632492065, "learning_rate": 3.2341812198232437e-06, "loss": 0.123, "step": 3917 }, { "epoch": 1.2696046662346079, "grad_norm": 0.8025103211402893, "learning_rate": 3.2333451315996857e-06, "loss": 0.1172, "step": 3918 }, { "epoch": 1.2699287103046015, "grad_norm": 0.8225618600845337, "learning_rate": 3.232508953621782e-06, "loss": 0.1195, "step": 3919 }, { "epoch": 1.270252754374595, "grad_norm": 0.8533084392547607, "learning_rate": 3.231672685991874e-06, "loss": 0.1228, "step": 3920 }, { "epoch": 1.2705767984445884, "grad_norm": 0.8867461681365967, "learning_rate": 3.2308363288123128e-06, "loss": 0.1232, "step": 3921 }, { "epoch": 1.270900842514582, "grad_norm": 0.7814192771911621, "learning_rate": 3.2299998821854593e-06, "loss": 0.1085, "step": 3922 }, { "epoch": 1.2712248865845754, "grad_norm": 0.804747998714447, "learning_rate": 3.229163346213688e-06, "loss": 0.1079, "step": 3923 }, { "epoch": 1.271548930654569, "grad_norm": 0.797062873840332, "learning_rate": 3.228326720999382e-06, "loss": 0.1124, "step": 3924 }, { "epoch": 1.2718729747245625, "grad_norm": 0.8368006944656372, "learning_rate": 3.2274900066449355e-06, "loss": 0.1222, "step": 3925 }, { "epoch": 1.272197018794556, "grad_norm": 0.8199383616447449, "learning_rate": 3.2266532032527548e-06, "loss": 0.1121, "step": 3926 }, { "epoch": 1.2725210628645496, "grad_norm": 0.7986612915992737, "learning_rate": 3.225816310925257e-06, "loss": 0.1106, "step": 3927 }, { "epoch": 1.2728451069345432, "grad_norm": 0.764630913734436, "learning_rate": 3.224979329764869e-06, "loss": 0.106, "step": 3928 }, { "epoch": 1.2731691510045366, "grad_norm": 0.8350912928581238, "learning_rate": 3.224142259874029e-06, "loss": 0.1207, "step": 3929 }, { "epoch": 1.27349319507453, "grad_norm": 0.8321165442466736, "learning_rate": 3.223305101355187e-06, "loss": 0.1245, "step": 3930 }, { "epoch": 1.2738172391445237, "grad_norm": 0.7997496128082275, "learning_rate": 3.2224678543108024e-06, "loss": 0.1121, "step": 3931 }, { "epoch": 1.2741412832145171, "grad_norm": 0.8008485436439514, "learning_rate": 3.221630518843347e-06, "loss": 0.1135, "step": 3932 }, { "epoch": 1.2744653272845108, "grad_norm": 0.798004150390625, "learning_rate": 3.2207930950553017e-06, "loss": 0.1134, "step": 3933 }, { "epoch": 1.2747893713545042, "grad_norm": 0.8458718061447144, "learning_rate": 3.2199555830491597e-06, "loss": 0.12, "step": 3934 }, { "epoch": 1.2751134154244976, "grad_norm": 0.8262900710105896, "learning_rate": 3.2191179829274244e-06, "loss": 0.1205, "step": 3935 }, { "epoch": 1.2754374594944913, "grad_norm": 0.8696123361587524, "learning_rate": 3.2182802947926086e-06, "loss": 0.1196, "step": 3936 }, { "epoch": 1.2757615035644847, "grad_norm": 0.8649618029594421, "learning_rate": 3.2174425187472387e-06, "loss": 0.1243, "step": 3937 }, { "epoch": 1.2760855476344783, "grad_norm": 0.7876730561256409, "learning_rate": 3.2166046548938497e-06, "loss": 0.1118, "step": 3938 }, { "epoch": 1.2764095917044718, "grad_norm": 0.8547011017799377, "learning_rate": 3.215766703334988e-06, "loss": 0.1213, "step": 3939 }, { "epoch": 1.2767336357744652, "grad_norm": 0.9488222599029541, "learning_rate": 3.214928664173211e-06, "loss": 0.1214, "step": 3940 }, { "epoch": 1.2770576798444588, "grad_norm": 0.8610497117042542, "learning_rate": 3.2140905375110875e-06, "loss": 0.1271, "step": 3941 }, { "epoch": 1.2773817239144525, "grad_norm": 0.793376088142395, "learning_rate": 3.2132523234511943e-06, "loss": 0.1095, "step": 3942 }, { "epoch": 1.277705767984446, "grad_norm": 0.8819077014923096, "learning_rate": 3.2124140220961215e-06, "loss": 0.1255, "step": 3943 }, { "epoch": 1.2780298120544393, "grad_norm": 0.8474619388580322, "learning_rate": 3.2115756335484694e-06, "loss": 0.1265, "step": 3944 }, { "epoch": 1.278353856124433, "grad_norm": 0.8497169613838196, "learning_rate": 3.210737157910848e-06, "loss": 0.1276, "step": 3945 }, { "epoch": 1.2786779001944264, "grad_norm": 0.8501853346824646, "learning_rate": 3.2098985952858796e-06, "loss": 0.1274, "step": 3946 }, { "epoch": 1.27900194426442, "grad_norm": 0.8726108074188232, "learning_rate": 3.209059945776195e-06, "loss": 0.1309, "step": 3947 }, { "epoch": 1.2793259883344135, "grad_norm": 0.7873273491859436, "learning_rate": 3.2082212094844374e-06, "loss": 0.1187, "step": 3948 }, { "epoch": 1.279650032404407, "grad_norm": 0.757980227470398, "learning_rate": 3.20738238651326e-06, "loss": 0.1116, "step": 3949 }, { "epoch": 1.2799740764744005, "grad_norm": 0.8315106630325317, "learning_rate": 3.206543476965326e-06, "loss": 0.1271, "step": 3950 }, { "epoch": 1.280298120544394, "grad_norm": 0.8244580626487732, "learning_rate": 3.2057044809433108e-06, "loss": 0.1221, "step": 3951 }, { "epoch": 1.2806221646143876, "grad_norm": 0.8443965315818787, "learning_rate": 3.2048653985498985e-06, "loss": 0.1241, "step": 3952 }, { "epoch": 1.280946208684381, "grad_norm": 0.7351576089859009, "learning_rate": 3.204026229887785e-06, "loss": 0.1099, "step": 3953 }, { "epoch": 1.2812702527543747, "grad_norm": 0.8433205485343933, "learning_rate": 3.203186975059677e-06, "loss": 0.1226, "step": 3954 }, { "epoch": 1.281594296824368, "grad_norm": 0.8397737145423889, "learning_rate": 3.2023476341682902e-06, "loss": 0.1119, "step": 3955 }, { "epoch": 1.2819183408943617, "grad_norm": 0.7775555849075317, "learning_rate": 3.2015082073163524e-06, "loss": 0.1221, "step": 3956 }, { "epoch": 1.2822423849643552, "grad_norm": 0.7749071717262268, "learning_rate": 3.2006686946066012e-06, "loss": 0.104, "step": 3957 }, { "epoch": 1.2825664290343486, "grad_norm": 0.8695152997970581, "learning_rate": 3.1998290961417844e-06, "loss": 0.1147, "step": 3958 }, { "epoch": 1.2828904731043422, "grad_norm": 0.8355270624160767, "learning_rate": 3.1989894120246613e-06, "loss": 0.1153, "step": 3959 }, { "epoch": 1.2832145171743357, "grad_norm": 0.8163199424743652, "learning_rate": 3.1981496423580012e-06, "loss": 0.1232, "step": 3960 }, { "epoch": 1.2835385612443293, "grad_norm": 0.8346755504608154, "learning_rate": 3.1973097872445828e-06, "loss": 0.1029, "step": 3961 }, { "epoch": 1.2838626053143227, "grad_norm": 0.859024167060852, "learning_rate": 3.1964698467871976e-06, "loss": 0.1304, "step": 3962 }, { "epoch": 1.2841866493843161, "grad_norm": 0.7759827971458435, "learning_rate": 3.1956298210886454e-06, "loss": 0.1074, "step": 3963 }, { "epoch": 1.2845106934543098, "grad_norm": 0.8474326729774475, "learning_rate": 3.1947897102517374e-06, "loss": 0.1157, "step": 3964 }, { "epoch": 1.2848347375243034, "grad_norm": 0.8403254151344299, "learning_rate": 3.1939495143792944e-06, "loss": 0.1182, "step": 3965 }, { "epoch": 1.2851587815942969, "grad_norm": 0.8455356359481812, "learning_rate": 3.1931092335741497e-06, "loss": 0.1211, "step": 3966 }, { "epoch": 1.2854828256642903, "grad_norm": 0.9320614337921143, "learning_rate": 3.192268867939144e-06, "loss": 0.1256, "step": 3967 }, { "epoch": 1.285806869734284, "grad_norm": 0.8611961007118225, "learning_rate": 3.1914284175771303e-06, "loss": 0.1158, "step": 3968 }, { "epoch": 1.2861309138042774, "grad_norm": 0.8324908018112183, "learning_rate": 3.1905878825909726e-06, "loss": 0.1133, "step": 3969 }, { "epoch": 1.286454957874271, "grad_norm": 0.8874091506004333, "learning_rate": 3.189747263083543e-06, "loss": 0.1223, "step": 3970 }, { "epoch": 1.2867790019442644, "grad_norm": 0.7869680523872375, "learning_rate": 3.188906559157725e-06, "loss": 0.1079, "step": 3971 }, { "epoch": 1.2871030460142578, "grad_norm": 0.8412497639656067, "learning_rate": 3.1880657709164144e-06, "loss": 0.1209, "step": 3972 }, { "epoch": 1.2874270900842515, "grad_norm": 0.8754928708076477, "learning_rate": 3.1872248984625135e-06, "loss": 0.1205, "step": 3973 }, { "epoch": 1.287751134154245, "grad_norm": 0.8010230660438538, "learning_rate": 3.1863839418989385e-06, "loss": 0.1072, "step": 3974 }, { "epoch": 1.2880751782242386, "grad_norm": 0.9506638646125793, "learning_rate": 3.185542901328613e-06, "loss": 0.1405, "step": 3975 }, { "epoch": 1.288399222294232, "grad_norm": 0.8250179290771484, "learning_rate": 3.184701776854474e-06, "loss": 0.1131, "step": 3976 }, { "epoch": 1.2887232663642254, "grad_norm": 0.8661324381828308, "learning_rate": 3.1838605685794665e-06, "loss": 0.1178, "step": 3977 }, { "epoch": 1.289047310434219, "grad_norm": 0.8399199843406677, "learning_rate": 3.1830192766065445e-06, "loss": 0.1167, "step": 3978 }, { "epoch": 1.2893713545042127, "grad_norm": 0.7626136541366577, "learning_rate": 3.1821779010386755e-06, "loss": 0.1067, "step": 3979 }, { "epoch": 1.2896953985742061, "grad_norm": 0.8195885419845581, "learning_rate": 3.181336441978835e-06, "loss": 0.1162, "step": 3980 }, { "epoch": 1.2900194426441995, "grad_norm": 0.7261697053909302, "learning_rate": 3.18049489953001e-06, "loss": 0.1101, "step": 3981 }, { "epoch": 1.2903434867141932, "grad_norm": 0.7609726190567017, "learning_rate": 3.1796532737951975e-06, "loss": 0.1089, "step": 3982 }, { "epoch": 1.2906675307841866, "grad_norm": 0.8558682799339294, "learning_rate": 3.1788115648774033e-06, "loss": 0.1262, "step": 3983 }, { "epoch": 1.2909915748541803, "grad_norm": 0.8411134481430054, "learning_rate": 3.177969772879645e-06, "loss": 0.113, "step": 3984 }, { "epoch": 1.2913156189241737, "grad_norm": 0.8650814890861511, "learning_rate": 3.1771278979049496e-06, "loss": 0.1182, "step": 3985 }, { "epoch": 1.291639662994167, "grad_norm": 0.8382723331451416, "learning_rate": 3.176285940056355e-06, "loss": 0.122, "step": 3986 }, { "epoch": 1.2919637070641607, "grad_norm": 0.7756894826889038, "learning_rate": 3.1754438994369087e-06, "loss": 0.1129, "step": 3987 }, { "epoch": 1.2922877511341542, "grad_norm": 0.8274821639060974, "learning_rate": 3.174601776149668e-06, "loss": 0.122, "step": 3988 }, { "epoch": 1.2926117952041478, "grad_norm": 0.8245730400085449, "learning_rate": 3.1737595702976996e-06, "loss": 0.1165, "step": 3989 }, { "epoch": 1.2929358392741412, "grad_norm": 0.833957850933075, "learning_rate": 3.1729172819840825e-06, "loss": 0.1195, "step": 3990 }, { "epoch": 1.2932598833441347, "grad_norm": 0.7777155041694641, "learning_rate": 3.1720749113119045e-06, "loss": 0.109, "step": 3991 }, { "epoch": 1.2935839274141283, "grad_norm": 0.8168273568153381, "learning_rate": 3.1712324583842637e-06, "loss": 0.1182, "step": 3992 }, { "epoch": 1.293907971484122, "grad_norm": 0.8285285234451294, "learning_rate": 3.1703899233042675e-06, "loss": 0.1202, "step": 3993 }, { "epoch": 1.2942320155541154, "grad_norm": 0.813170850276947, "learning_rate": 3.1695473061750353e-06, "loss": 0.1088, "step": 3994 }, { "epoch": 1.2945560596241088, "grad_norm": 0.8755943775177002, "learning_rate": 3.1687046070996942e-06, "loss": 0.1231, "step": 3995 }, { "epoch": 1.2948801036941024, "grad_norm": 0.848179042339325, "learning_rate": 3.1678618261813828e-06, "loss": 0.1127, "step": 3996 }, { "epoch": 1.2952041477640959, "grad_norm": 0.8618291020393372, "learning_rate": 3.167018963523249e-06, "loss": 0.1241, "step": 3997 }, { "epoch": 1.2955281918340895, "grad_norm": 0.8661099076271057, "learning_rate": 3.1661760192284518e-06, "loss": 0.1185, "step": 3998 }, { "epoch": 1.295852235904083, "grad_norm": 0.8482186794281006, "learning_rate": 3.165332993400159e-06, "loss": 0.1263, "step": 3999 }, { "epoch": 1.2961762799740764, "grad_norm": 0.8614206910133362, "learning_rate": 3.1644898861415484e-06, "loss": 0.1234, "step": 4000 }, { "epoch": 1.29650032404407, "grad_norm": 0.8205857276916504, "learning_rate": 3.163646697555809e-06, "loss": 0.1192, "step": 4001 }, { "epoch": 1.2968243681140634, "grad_norm": 0.8212650418281555, "learning_rate": 3.1628034277461376e-06, "loss": 0.1126, "step": 4002 }, { "epoch": 1.297148412184057, "grad_norm": 0.9054401516914368, "learning_rate": 3.161960076815743e-06, "loss": 0.1238, "step": 4003 }, { "epoch": 1.2974724562540505, "grad_norm": 0.7641319632530212, "learning_rate": 3.1611166448678445e-06, "loss": 0.1091, "step": 4004 }, { "epoch": 1.2977965003240441, "grad_norm": 0.9123237133026123, "learning_rate": 3.1602731320056675e-06, "loss": 0.1245, "step": 4005 }, { "epoch": 1.2981205443940376, "grad_norm": 0.8741422295570374, "learning_rate": 3.159429538332452e-06, "loss": 0.1302, "step": 4006 }, { "epoch": 1.2984445884640312, "grad_norm": 0.8232343792915344, "learning_rate": 3.1585858639514444e-06, "loss": 0.1146, "step": 4007 }, { "epoch": 1.2987686325340246, "grad_norm": 0.8644008636474609, "learning_rate": 3.1577421089659023e-06, "loss": 0.1153, "step": 4008 }, { "epoch": 1.299092676604018, "grad_norm": 0.8121967315673828, "learning_rate": 3.1568982734790943e-06, "loss": 0.1167, "step": 4009 }, { "epoch": 1.2994167206740117, "grad_norm": 0.8596533536911011, "learning_rate": 3.1560543575942958e-06, "loss": 0.1206, "step": 4010 }, { "epoch": 1.2997407647440051, "grad_norm": 0.8099182844161987, "learning_rate": 3.1552103614147955e-06, "loss": 0.1108, "step": 4011 }, { "epoch": 1.3000648088139988, "grad_norm": 0.8212376832962036, "learning_rate": 3.1543662850438905e-06, "loss": 0.1232, "step": 4012 }, { "epoch": 1.3003888528839922, "grad_norm": 0.8360647559165955, "learning_rate": 3.1535221285848866e-06, "loss": 0.1204, "step": 4013 }, { "epoch": 1.3007128969539856, "grad_norm": 0.8495321869850159, "learning_rate": 3.1526778921411006e-06, "loss": 0.1182, "step": 4014 }, { "epoch": 1.3010369410239793, "grad_norm": 0.7739179730415344, "learning_rate": 3.151833575815859e-06, "loss": 0.1107, "step": 4015 }, { "epoch": 1.301360985093973, "grad_norm": 0.8643922805786133, "learning_rate": 3.1509891797124977e-06, "loss": 0.1296, "step": 4016 }, { "epoch": 1.3016850291639663, "grad_norm": 0.8476508855819702, "learning_rate": 3.150144703934363e-06, "loss": 0.126, "step": 4017 }, { "epoch": 1.3020090732339598, "grad_norm": 0.8171755075454712, "learning_rate": 3.149300148584811e-06, "loss": 0.1062, "step": 4018 }, { "epoch": 1.3023331173039534, "grad_norm": 0.731436014175415, "learning_rate": 3.1484555137672063e-06, "loss": 0.1098, "step": 4019 }, { "epoch": 1.3026571613739468, "grad_norm": 0.7991498112678528, "learning_rate": 3.147610799584924e-06, "loss": 0.1142, "step": 4020 }, { "epoch": 1.3029812054439405, "grad_norm": 0.8196418285369873, "learning_rate": 3.1467660061413497e-06, "loss": 0.1178, "step": 4021 }, { "epoch": 1.303305249513934, "grad_norm": 0.8183781504631042, "learning_rate": 3.1459211335398765e-06, "loss": 0.1203, "step": 4022 }, { "epoch": 1.3036292935839273, "grad_norm": 0.8166159391403198, "learning_rate": 3.14507618188391e-06, "loss": 0.1229, "step": 4023 }, { "epoch": 1.303953337653921, "grad_norm": 0.8409057855606079, "learning_rate": 3.144231151276864e-06, "loss": 0.1155, "step": 4024 }, { "epoch": 1.3042773817239144, "grad_norm": 0.8440519571304321, "learning_rate": 3.143386041822162e-06, "loss": 0.1224, "step": 4025 }, { "epoch": 1.304601425793908, "grad_norm": 0.8403790593147278, "learning_rate": 3.142540853623236e-06, "loss": 0.1204, "step": 4026 }, { "epoch": 1.3049254698639015, "grad_norm": 0.8980937004089355, "learning_rate": 3.14169558678353e-06, "loss": 0.1178, "step": 4027 }, { "epoch": 1.3052495139338949, "grad_norm": 0.7783441543579102, "learning_rate": 3.1408502414064963e-06, "loss": 0.1024, "step": 4028 }, { "epoch": 1.3055735580038885, "grad_norm": 0.7647897601127625, "learning_rate": 3.140004817595597e-06, "loss": 0.1079, "step": 4029 }, { "epoch": 1.3058976020738822, "grad_norm": 0.8126063346862793, "learning_rate": 3.1391593154543043e-06, "loss": 0.1178, "step": 4030 }, { "epoch": 1.3062216461438756, "grad_norm": 0.9322881698608398, "learning_rate": 3.138313735086099e-06, "loss": 0.1259, "step": 4031 }, { "epoch": 1.306545690213869, "grad_norm": 0.8398449420928955, "learning_rate": 3.137468076594471e-06, "loss": 0.1201, "step": 4032 }, { "epoch": 1.3068697342838627, "grad_norm": 0.825153648853302, "learning_rate": 3.1366223400829215e-06, "loss": 0.1136, "step": 4033 }, { "epoch": 1.307193778353856, "grad_norm": 0.8654718995094299, "learning_rate": 3.135776525654961e-06, "loss": 0.1132, "step": 4034 }, { "epoch": 1.3075178224238497, "grad_norm": 0.8122419714927673, "learning_rate": 3.1349306334141084e-06, "loss": 0.113, "step": 4035 }, { "epoch": 1.3078418664938432, "grad_norm": 0.8519636988639832, "learning_rate": 3.134084663463894e-06, "loss": 0.1174, "step": 4036 }, { "epoch": 1.3081659105638366, "grad_norm": 0.8526609539985657, "learning_rate": 3.1332386159078536e-06, "loss": 0.113, "step": 4037 }, { "epoch": 1.3084899546338302, "grad_norm": 0.8123700618743896, "learning_rate": 3.132392490849537e-06, "loss": 0.1064, "step": 4038 }, { "epoch": 1.3088139987038236, "grad_norm": 0.8796939253807068, "learning_rate": 3.1315462883925026e-06, "loss": 0.1277, "step": 4039 }, { "epoch": 1.3091380427738173, "grad_norm": 0.8536302447319031, "learning_rate": 3.1307000086403162e-06, "loss": 0.1161, "step": 4040 }, { "epoch": 1.3094620868438107, "grad_norm": 0.8248025178909302, "learning_rate": 3.1298536516965537e-06, "loss": 0.1204, "step": 4041 }, { "epoch": 1.3097861309138044, "grad_norm": 0.8177707195281982, "learning_rate": 3.129007217664802e-06, "loss": 0.113, "step": 4042 }, { "epoch": 1.3101101749837978, "grad_norm": 0.8128197193145752, "learning_rate": 3.1281607066486565e-06, "loss": 0.1063, "step": 4043 }, { "epoch": 1.3104342190537914, "grad_norm": 0.7673478722572327, "learning_rate": 3.127314118751721e-06, "loss": 0.1068, "step": 4044 }, { "epoch": 1.3107582631237849, "grad_norm": 0.8692690134048462, "learning_rate": 3.12646745407761e-06, "loss": 0.1221, "step": 4045 }, { "epoch": 1.3110823071937783, "grad_norm": 0.8033125400543213, "learning_rate": 3.1256207127299475e-06, "loss": 0.117, "step": 4046 }, { "epoch": 1.311406351263772, "grad_norm": 0.8258150219917297, "learning_rate": 3.124773894812367e-06, "loss": 0.114, "step": 4047 }, { "epoch": 1.3117303953337653, "grad_norm": 0.8598066568374634, "learning_rate": 3.123927000428509e-06, "loss": 0.1243, "step": 4048 }, { "epoch": 1.312054439403759, "grad_norm": 0.8733276724815369, "learning_rate": 3.123080029682027e-06, "loss": 0.1285, "step": 4049 }, { "epoch": 1.3123784834737524, "grad_norm": 0.8112250566482544, "learning_rate": 3.1222329826765806e-06, "loss": 0.1169, "step": 4050 }, { "epoch": 1.3127025275437458, "grad_norm": 0.8250533938407898, "learning_rate": 3.121385859515842e-06, "loss": 0.13, "step": 4051 }, { "epoch": 1.3130265716137395, "grad_norm": 0.8050197958946228, "learning_rate": 3.1205386603034886e-06, "loss": 0.1154, "step": 4052 }, { "epoch": 1.3133506156837331, "grad_norm": 0.819603443145752, "learning_rate": 3.1196913851432108e-06, "loss": 0.1146, "step": 4053 }, { "epoch": 1.3136746597537265, "grad_norm": 0.7729926705360413, "learning_rate": 3.1188440341387063e-06, "loss": 0.1006, "step": 4054 }, { "epoch": 1.31399870382372, "grad_norm": 0.7933728694915771, "learning_rate": 3.1179966073936837e-06, "loss": 0.1112, "step": 4055 }, { "epoch": 1.3143227478937136, "grad_norm": 0.7561730146408081, "learning_rate": 3.117149105011858e-06, "loss": 0.1139, "step": 4056 }, { "epoch": 1.314646791963707, "grad_norm": 0.9096986055374146, "learning_rate": 3.1163015270969567e-06, "loss": 0.1286, "step": 4057 }, { "epoch": 1.3149708360337007, "grad_norm": 0.8904516100883484, "learning_rate": 3.115453873752714e-06, "loss": 0.1251, "step": 4058 }, { "epoch": 1.315294880103694, "grad_norm": 1.1391996145248413, "learning_rate": 3.114606145082876e-06, "loss": 0.1189, "step": 4059 }, { "epoch": 1.3156189241736875, "grad_norm": 0.841249406337738, "learning_rate": 3.1137583411911954e-06, "loss": 0.1183, "step": 4060 }, { "epoch": 1.3159429682436812, "grad_norm": 0.7857441902160645, "learning_rate": 3.1129104621814365e-06, "loss": 0.1144, "step": 4061 }, { "epoch": 1.3162670123136746, "grad_norm": 0.8113446235656738, "learning_rate": 3.1120625081573696e-06, "loss": 0.1098, "step": 4062 }, { "epoch": 1.3165910563836682, "grad_norm": 0.8641226887702942, "learning_rate": 3.1112144792227774e-06, "loss": 0.1207, "step": 4063 }, { "epoch": 1.3169151004536617, "grad_norm": 0.9279201030731201, "learning_rate": 3.1103663754814493e-06, "loss": 0.1371, "step": 4064 }, { "epoch": 1.317239144523655, "grad_norm": 0.8208470940589905, "learning_rate": 3.109518197037186e-06, "loss": 0.1121, "step": 4065 }, { "epoch": 1.3175631885936487, "grad_norm": 0.8214272856712341, "learning_rate": 3.1086699439937957e-06, "loss": 0.1141, "step": 4066 }, { "epoch": 1.3178872326636424, "grad_norm": 0.8766685724258423, "learning_rate": 3.1078216164550966e-06, "loss": 0.1273, "step": 4067 }, { "epoch": 1.3182112767336358, "grad_norm": 0.8116971254348755, "learning_rate": 3.1069732145249166e-06, "loss": 0.1043, "step": 4068 }, { "epoch": 1.3185353208036292, "grad_norm": 0.7683243751525879, "learning_rate": 3.1061247383070905e-06, "loss": 0.1127, "step": 4069 }, { "epoch": 1.3188593648736229, "grad_norm": 0.8518966436386108, "learning_rate": 3.1052761879054637e-06, "loss": 0.116, "step": 4070 }, { "epoch": 1.3191834089436163, "grad_norm": 0.8467736840248108, "learning_rate": 3.1044275634238913e-06, "loss": 0.1158, "step": 4071 }, { "epoch": 1.31950745301361, "grad_norm": 0.8552513718605042, "learning_rate": 3.103578864966237e-06, "loss": 0.1209, "step": 4072 }, { "epoch": 1.3198314970836034, "grad_norm": 0.928536593914032, "learning_rate": 3.1027300926363723e-06, "loss": 0.1257, "step": 4073 }, { "epoch": 1.3201555411535968, "grad_norm": 0.9464770555496216, "learning_rate": 3.1018812465381796e-06, "loss": 0.1207, "step": 4074 }, { "epoch": 1.3204795852235904, "grad_norm": 0.8095433115959167, "learning_rate": 3.1010323267755486e-06, "loss": 0.1136, "step": 4075 }, { "epoch": 1.3208036292935839, "grad_norm": 0.7639227509498596, "learning_rate": 3.100183333452379e-06, "loss": 0.1124, "step": 4076 }, { "epoch": 1.3211276733635775, "grad_norm": 0.8170154094696045, "learning_rate": 3.0993342666725803e-06, "loss": 0.1136, "step": 4077 }, { "epoch": 1.321451717433571, "grad_norm": 0.8701738119125366, "learning_rate": 3.0984851265400683e-06, "loss": 0.1177, "step": 4078 }, { "epoch": 1.3217757615035644, "grad_norm": 0.8065285086631775, "learning_rate": 3.097635913158772e-06, "loss": 0.1141, "step": 4079 }, { "epoch": 1.322099805573558, "grad_norm": 0.8607053160667419, "learning_rate": 3.096786626632624e-06, "loss": 0.1234, "step": 4080 }, { "epoch": 1.3224238496435516, "grad_norm": 0.8417751789093018, "learning_rate": 3.0959372670655714e-06, "loss": 0.1201, "step": 4081 }, { "epoch": 1.322747893713545, "grad_norm": 0.8256826996803284, "learning_rate": 3.0950878345615654e-06, "loss": 0.1238, "step": 4082 }, { "epoch": 1.3230719377835385, "grad_norm": 0.8309873938560486, "learning_rate": 3.0942383292245704e-06, "loss": 0.1177, "step": 4083 }, { "epoch": 1.3233959818535321, "grad_norm": 0.828330934047699, "learning_rate": 3.0933887511585564e-06, "loss": 0.117, "step": 4084 }, { "epoch": 1.3237200259235256, "grad_norm": 0.8285284638404846, "learning_rate": 3.0925391004675037e-06, "loss": 0.1148, "step": 4085 }, { "epoch": 1.3240440699935192, "grad_norm": 0.8505363464355469, "learning_rate": 3.0916893772554006e-06, "loss": 0.1135, "step": 4086 }, { "epoch": 1.3243681140635126, "grad_norm": 0.8414075374603271, "learning_rate": 3.0908395816262466e-06, "loss": 0.1201, "step": 4087 }, { "epoch": 1.324692158133506, "grad_norm": 0.9821286797523499, "learning_rate": 3.0899897136840468e-06, "loss": 0.1199, "step": 4088 }, { "epoch": 1.3250162022034997, "grad_norm": 0.8443735837936401, "learning_rate": 3.0891397735328176e-06, "loss": 0.1157, "step": 4089 }, { "epoch": 1.3253402462734931, "grad_norm": 0.8299155235290527, "learning_rate": 3.088289761276584e-06, "loss": 0.1094, "step": 4090 }, { "epoch": 1.3256642903434868, "grad_norm": 0.8571391701698303, "learning_rate": 3.0874396770193785e-06, "loss": 0.13, "step": 4091 }, { "epoch": 1.3259883344134802, "grad_norm": 0.8635405898094177, "learning_rate": 3.0865895208652436e-06, "loss": 0.1303, "step": 4092 }, { "epoch": 1.3263123784834738, "grad_norm": 0.8105101585388184, "learning_rate": 3.0857392929182296e-06, "loss": 0.113, "step": 4093 }, { "epoch": 1.3266364225534673, "grad_norm": 0.8883736729621887, "learning_rate": 3.084888993282397e-06, "loss": 0.1266, "step": 4094 }, { "epoch": 1.326960466623461, "grad_norm": 0.9142129421234131, "learning_rate": 3.0840386220618137e-06, "loss": 0.1251, "step": 4095 }, { "epoch": 1.3272845106934543, "grad_norm": 0.8064966201782227, "learning_rate": 3.083188179360556e-06, "loss": 0.1119, "step": 4096 }, { "epoch": 1.3276085547634477, "grad_norm": 0.8080044984817505, "learning_rate": 3.0823376652827123e-06, "loss": 0.1174, "step": 4097 }, { "epoch": 1.3279325988334414, "grad_norm": 0.8498368263244629, "learning_rate": 3.0814870799323748e-06, "loss": 0.1129, "step": 4098 }, { "epoch": 1.3282566429034348, "grad_norm": 0.8681304454803467, "learning_rate": 3.080636423413649e-06, "loss": 0.1259, "step": 4099 }, { "epoch": 1.3285806869734285, "grad_norm": 0.8149116635322571, "learning_rate": 3.079785695830645e-06, "loss": 0.1256, "step": 4100 }, { "epoch": 1.3289047310434219, "grad_norm": 0.8620883226394653, "learning_rate": 3.0789348972874844e-06, "loss": 0.1142, "step": 4101 }, { "epoch": 1.3292287751134153, "grad_norm": 0.790808379650116, "learning_rate": 3.0780840278882974e-06, "loss": 0.1068, "step": 4102 }, { "epoch": 1.329552819183409, "grad_norm": 0.8280158042907715, "learning_rate": 3.077233087737222e-06, "loss": 0.1182, "step": 4103 }, { "epoch": 1.3298768632534026, "grad_norm": 0.8379877805709839, "learning_rate": 3.0763820769384038e-06, "loss": 0.1151, "step": 4104 }, { "epoch": 1.330200907323396, "grad_norm": 0.7841100692749023, "learning_rate": 3.0755309955960007e-06, "loss": 0.1126, "step": 4105 }, { "epoch": 1.3305249513933894, "grad_norm": 0.8145238757133484, "learning_rate": 3.074679843814174e-06, "loss": 0.1166, "step": 4106 }, { "epoch": 1.330848995463383, "grad_norm": 0.8462419509887695, "learning_rate": 3.073828621697098e-06, "loss": 0.1243, "step": 4107 }, { "epoch": 1.3311730395333765, "grad_norm": 0.8405357003211975, "learning_rate": 3.072977329348954e-06, "loss": 0.1231, "step": 4108 }, { "epoch": 1.3314970836033702, "grad_norm": 0.9085520505905151, "learning_rate": 3.072125966873932e-06, "loss": 0.1308, "step": 4109 }, { "epoch": 1.3318211276733636, "grad_norm": 0.8673784136772156, "learning_rate": 3.0712745343762295e-06, "loss": 0.1177, "step": 4110 }, { "epoch": 1.332145171743357, "grad_norm": 0.7908552885055542, "learning_rate": 3.0704230319600547e-06, "loss": 0.1072, "step": 4111 }, { "epoch": 1.3324692158133506, "grad_norm": 0.7814868092536926, "learning_rate": 3.069571459729623e-06, "loss": 0.1146, "step": 4112 }, { "epoch": 1.332793259883344, "grad_norm": 0.7968869209289551, "learning_rate": 3.068719817789158e-06, "loss": 0.1108, "step": 4113 }, { "epoch": 1.3331173039533377, "grad_norm": 0.8128607869148254, "learning_rate": 3.067868106242894e-06, "loss": 0.1139, "step": 4114 }, { "epoch": 1.3334413480233311, "grad_norm": 0.8034228682518005, "learning_rate": 3.0670163251950703e-06, "loss": 0.1082, "step": 4115 }, { "epoch": 1.3337653920933246, "grad_norm": 0.8152320384979248, "learning_rate": 3.0661644747499385e-06, "loss": 0.1158, "step": 4116 }, { "epoch": 1.3340894361633182, "grad_norm": 0.8252043128013611, "learning_rate": 3.0653125550117547e-06, "loss": 0.1136, "step": 4117 }, { "epoch": 1.3344134802333119, "grad_norm": 0.8656401038169861, "learning_rate": 3.0644605660847875e-06, "loss": 0.1219, "step": 4118 }, { "epoch": 1.3347375243033053, "grad_norm": 0.7695584297180176, "learning_rate": 3.0636085080733113e-06, "loss": 0.1087, "step": 4119 }, { "epoch": 1.3350615683732987, "grad_norm": 0.8830423355102539, "learning_rate": 3.0627563810816097e-06, "loss": 0.1213, "step": 4120 }, { "epoch": 1.3353856124432923, "grad_norm": 0.8353223204612732, "learning_rate": 3.0619041852139746e-06, "loss": 0.1192, "step": 4121 }, { "epoch": 1.3357096565132858, "grad_norm": 0.7975737452507019, "learning_rate": 3.061051920574708e-06, "loss": 0.1153, "step": 4122 }, { "epoch": 1.3360337005832794, "grad_norm": 0.8361819386482239, "learning_rate": 3.0601995872681167e-06, "loss": 0.1178, "step": 4123 }, { "epoch": 1.3363577446532728, "grad_norm": 0.7925208806991577, "learning_rate": 3.0593471853985197e-06, "loss": 0.1085, "step": 4124 }, { "epoch": 1.3366817887232663, "grad_norm": 0.8033835887908936, "learning_rate": 3.058494715070242e-06, "loss": 0.1076, "step": 4125 }, { "epoch": 1.33700583279326, "grad_norm": 0.8789135813713074, "learning_rate": 3.0576421763876174e-06, "loss": 0.1258, "step": 4126 }, { "epoch": 1.3373298768632533, "grad_norm": 0.8176473379135132, "learning_rate": 3.056789569454989e-06, "loss": 0.1133, "step": 4127 }, { "epoch": 1.337653920933247, "grad_norm": 0.8783532381057739, "learning_rate": 3.055936894376708e-06, "loss": 0.1224, "step": 4128 }, { "epoch": 1.3379779650032404, "grad_norm": 0.8398758769035339, "learning_rate": 3.055084151257133e-06, "loss": 0.1184, "step": 4129 }, { "epoch": 1.3383020090732338, "grad_norm": 0.7387755513191223, "learning_rate": 3.054231340200631e-06, "loss": 0.0991, "step": 4130 }, { "epoch": 1.3386260531432275, "grad_norm": 0.7608300447463989, "learning_rate": 3.053378461311578e-06, "loss": 0.1077, "step": 4131 }, { "epoch": 1.3389500972132211, "grad_norm": 0.781328558921814, "learning_rate": 3.0525255146943582e-06, "loss": 0.1077, "step": 4132 }, { "epoch": 1.3392741412832145, "grad_norm": 0.8142044544219971, "learning_rate": 3.0516725004533648e-06, "loss": 0.1176, "step": 4133 }, { "epoch": 1.339598185353208, "grad_norm": 0.7998998165130615, "learning_rate": 3.0508194186929983e-06, "loss": 0.1114, "step": 4134 }, { "epoch": 1.3399222294232016, "grad_norm": 0.7751430869102478, "learning_rate": 3.0499662695176675e-06, "loss": 0.1102, "step": 4135 }, { "epoch": 1.340246273493195, "grad_norm": 0.8340958952903748, "learning_rate": 3.0491130530317887e-06, "loss": 0.1214, "step": 4136 }, { "epoch": 1.3405703175631887, "grad_norm": 0.869506299495697, "learning_rate": 3.0482597693397887e-06, "loss": 0.1162, "step": 4137 }, { "epoch": 1.340894361633182, "grad_norm": 0.8065547943115234, "learning_rate": 3.0474064185461e-06, "loss": 0.1193, "step": 4138 }, { "epoch": 1.3412184057031755, "grad_norm": 0.8128240704536438, "learning_rate": 3.0465530007551646e-06, "loss": 0.1194, "step": 4139 }, { "epoch": 1.3415424497731692, "grad_norm": 0.7759813666343689, "learning_rate": 3.0456995160714344e-06, "loss": 0.1123, "step": 4140 }, { "epoch": 1.3418664938431626, "grad_norm": 0.7582803964614868, "learning_rate": 3.044845964599365e-06, "loss": 0.1106, "step": 4141 }, { "epoch": 1.3421905379131562, "grad_norm": 0.7886553406715393, "learning_rate": 3.043992346443424e-06, "loss": 0.1079, "step": 4142 }, { "epoch": 1.3425145819831497, "grad_norm": 0.778815746307373, "learning_rate": 3.043138661708086e-06, "loss": 0.1089, "step": 4143 }, { "epoch": 1.3428386260531433, "grad_norm": 0.8232936859130859, "learning_rate": 3.042284910497834e-06, "loss": 0.1248, "step": 4144 }, { "epoch": 1.3431626701231367, "grad_norm": 0.8517846465110779, "learning_rate": 3.0414310929171587e-06, "loss": 0.1183, "step": 4145 }, { "epoch": 1.3434867141931304, "grad_norm": 0.8031988143920898, "learning_rate": 3.04057720907056e-06, "loss": 0.1153, "step": 4146 }, { "epoch": 1.3438107582631238, "grad_norm": 0.8768326640129089, "learning_rate": 3.039723259062543e-06, "loss": 0.1196, "step": 4147 }, { "epoch": 1.3441348023331172, "grad_norm": 0.8870144486427307, "learning_rate": 3.0388692429976247e-06, "loss": 0.12, "step": 4148 }, { "epoch": 1.3444588464031109, "grad_norm": 0.8968208432197571, "learning_rate": 3.038015160980327e-06, "loss": 0.1258, "step": 4149 }, { "epoch": 1.3447828904731043, "grad_norm": 0.8066931366920471, "learning_rate": 3.0371610131151823e-06, "loss": 0.1162, "step": 4150 }, { "epoch": 1.345106934543098, "grad_norm": 0.8446550965309143, "learning_rate": 3.0363067995067297e-06, "loss": 0.1218, "step": 4151 }, { "epoch": 1.3454309786130914, "grad_norm": 0.7766255736351013, "learning_rate": 3.035452520259517e-06, "loss": 0.1122, "step": 4152 }, { "epoch": 1.3457550226830848, "grad_norm": 0.8000936508178711, "learning_rate": 3.034598175478099e-06, "loss": 0.1174, "step": 4153 }, { "epoch": 1.3460790667530784, "grad_norm": 0.825092077255249, "learning_rate": 3.03374376526704e-06, "loss": 0.1217, "step": 4154 }, { "epoch": 1.346403110823072, "grad_norm": 0.8683377504348755, "learning_rate": 3.0328892897309105e-06, "loss": 0.1277, "step": 4155 }, { "epoch": 1.3467271548930655, "grad_norm": 0.8227855563163757, "learning_rate": 3.0320347489742905e-06, "loss": 0.1121, "step": 4156 }, { "epoch": 1.347051198963059, "grad_norm": 0.8627481460571289, "learning_rate": 3.031180143101769e-06, "loss": 0.1163, "step": 4157 }, { "epoch": 1.3473752430330526, "grad_norm": 0.8161624073982239, "learning_rate": 3.03032547221794e-06, "loss": 0.1209, "step": 4158 }, { "epoch": 1.347699287103046, "grad_norm": 0.7790018320083618, "learning_rate": 3.0294707364274066e-06, "loss": 0.1144, "step": 4159 }, { "epoch": 1.3480233311730396, "grad_norm": 0.8219239115715027, "learning_rate": 3.028615935834781e-06, "loss": 0.114, "step": 4160 }, { "epoch": 1.348347375243033, "grad_norm": 0.7680139541625977, "learning_rate": 3.027761070544682e-06, "loss": 0.1078, "step": 4161 }, { "epoch": 1.3486714193130265, "grad_norm": 0.8134229183197021, "learning_rate": 3.026906140661737e-06, "loss": 0.1065, "step": 4162 }, { "epoch": 1.3489954633830201, "grad_norm": 0.8216208815574646, "learning_rate": 3.026051146290581e-06, "loss": 0.1227, "step": 4163 }, { "epoch": 1.3493195074530135, "grad_norm": 0.8143205642700195, "learning_rate": 3.025196087535858e-06, "loss": 0.1202, "step": 4164 }, { "epoch": 1.3496435515230072, "grad_norm": 0.8093990087509155, "learning_rate": 3.024340964502218e-06, "loss": 0.1106, "step": 4165 }, { "epoch": 1.3499675955930006, "grad_norm": 0.7998570799827576, "learning_rate": 3.0234857772943197e-06, "loss": 0.1032, "step": 4166 }, { "epoch": 1.350291639662994, "grad_norm": 0.874220073223114, "learning_rate": 3.0226305260168298e-06, "loss": 0.129, "step": 4167 }, { "epoch": 1.3506156837329877, "grad_norm": 0.8281093239784241, "learning_rate": 3.0217752107744237e-06, "loss": 0.1122, "step": 4168 }, { "epoch": 1.3509397278029813, "grad_norm": 0.8350166082382202, "learning_rate": 3.0209198316717825e-06, "loss": 0.1187, "step": 4169 }, { "epoch": 1.3512637718729748, "grad_norm": 0.904016375541687, "learning_rate": 3.0200643888135973e-06, "loss": 0.13, "step": 4170 }, { "epoch": 1.3515878159429682, "grad_norm": 0.7951503396034241, "learning_rate": 3.019208882304565e-06, "loss": 0.1036, "step": 4171 }, { "epoch": 1.3519118600129618, "grad_norm": 0.9027987122535706, "learning_rate": 3.0183533122493917e-06, "loss": 0.1325, "step": 4172 }, { "epoch": 1.3522359040829552, "grad_norm": 0.879817545413971, "learning_rate": 3.017497678752791e-06, "loss": 0.1233, "step": 4173 }, { "epoch": 1.3525599481529489, "grad_norm": 0.8084651231765747, "learning_rate": 3.016641981919485e-06, "loss": 0.1125, "step": 4174 }, { "epoch": 1.3528839922229423, "grad_norm": 0.8651638031005859, "learning_rate": 3.0157862218542004e-06, "loss": 0.1302, "step": 4175 }, { "epoch": 1.3532080362929357, "grad_norm": 0.8165826201438904, "learning_rate": 3.0149303986616772e-06, "loss": 0.117, "step": 4176 }, { "epoch": 1.3535320803629294, "grad_norm": 0.8501429557800293, "learning_rate": 3.014074512446657e-06, "loss": 0.1255, "step": 4177 }, { "epoch": 1.3538561244329228, "grad_norm": 0.8126803636550903, "learning_rate": 3.0132185633138934e-06, "loss": 0.1159, "step": 4178 }, { "epoch": 1.3541801685029164, "grad_norm": 0.8380131721496582, "learning_rate": 3.0123625513681463e-06, "loss": 0.1199, "step": 4179 }, { "epoch": 1.3545042125729099, "grad_norm": 0.8589929938316345, "learning_rate": 3.0115064767141827e-06, "loss": 0.1251, "step": 4180 }, { "epoch": 1.3548282566429035, "grad_norm": 0.7794283032417297, "learning_rate": 3.0106503394567775e-06, "loss": 0.1142, "step": 4181 }, { "epoch": 1.355152300712897, "grad_norm": 0.7469875812530518, "learning_rate": 3.0097941397007156e-06, "loss": 0.1103, "step": 4182 }, { "epoch": 1.3554763447828906, "grad_norm": 0.8287574648857117, "learning_rate": 3.008937877550785e-06, "loss": 0.1221, "step": 4183 }, { "epoch": 1.355800388852884, "grad_norm": 0.8686206340789795, "learning_rate": 3.008081553111786e-06, "loss": 0.1297, "step": 4184 }, { "epoch": 1.3561244329228774, "grad_norm": 0.7942254543304443, "learning_rate": 3.0072251664885222e-06, "loss": 0.1146, "step": 4185 }, { "epoch": 1.356448476992871, "grad_norm": 0.7875716686248779, "learning_rate": 3.006368717785809e-06, "loss": 0.1089, "step": 4186 }, { "epoch": 1.3567725210628645, "grad_norm": 0.7777183651924133, "learning_rate": 3.005512207108467e-06, "loss": 0.1094, "step": 4187 }, { "epoch": 1.3570965651328581, "grad_norm": 0.8916992545127869, "learning_rate": 3.004655634561325e-06, "loss": 0.1252, "step": 4188 }, { "epoch": 1.3574206092028516, "grad_norm": 0.8388050198554993, "learning_rate": 3.003799000249218e-06, "loss": 0.1269, "step": 4189 }, { "epoch": 1.357744653272845, "grad_norm": 0.8051396608352661, "learning_rate": 3.002942304276991e-06, "loss": 0.1152, "step": 4190 }, { "epoch": 1.3580686973428386, "grad_norm": 0.8340529203414917, "learning_rate": 3.002085546749495e-06, "loss": 0.1157, "step": 4191 }, { "epoch": 1.3583927414128323, "grad_norm": 0.8595792651176453, "learning_rate": 3.001228727771588e-06, "loss": 0.129, "step": 4192 }, { "epoch": 1.3587167854828257, "grad_norm": 0.7768383026123047, "learning_rate": 3.000371847448137e-06, "loss": 0.1055, "step": 4193 }, { "epoch": 1.3590408295528191, "grad_norm": 0.8091722130775452, "learning_rate": 2.9995149058840157e-06, "loss": 0.1112, "step": 4194 }, { "epoch": 1.3593648736228128, "grad_norm": 0.809382975101471, "learning_rate": 2.998657903184107e-06, "loss": 0.1147, "step": 4195 }, { "epoch": 1.3596889176928062, "grad_norm": 0.8365122675895691, "learning_rate": 2.9978008394532966e-06, "loss": 0.1181, "step": 4196 }, { "epoch": 1.3600129617627998, "grad_norm": 0.8634129762649536, "learning_rate": 2.996943714796483e-06, "loss": 0.1212, "step": 4197 }, { "epoch": 1.3603370058327933, "grad_norm": 0.8216782808303833, "learning_rate": 2.9960865293185697e-06, "loss": 0.1161, "step": 4198 }, { "epoch": 1.3606610499027867, "grad_norm": 0.8733593225479126, "learning_rate": 2.995229283124468e-06, "loss": 0.1126, "step": 4199 }, { "epoch": 1.3609850939727803, "grad_norm": 0.7830566763877869, "learning_rate": 2.994371976319096e-06, "loss": 0.117, "step": 4200 }, { "epoch": 1.3613091380427738, "grad_norm": 0.8722835183143616, "learning_rate": 2.993514609007381e-06, "loss": 0.1305, "step": 4201 }, { "epoch": 1.3616331821127674, "grad_norm": 0.7920193672180176, "learning_rate": 2.992657181294254e-06, "loss": 0.109, "step": 4202 }, { "epoch": 1.3619572261827608, "grad_norm": 0.7887697815895081, "learning_rate": 2.9917996932846572e-06, "loss": 0.1066, "step": 4203 }, { "epoch": 1.3622812702527543, "grad_norm": 0.9010506868362427, "learning_rate": 2.99094214508354e-06, "loss": 0.1252, "step": 4204 }, { "epoch": 1.362605314322748, "grad_norm": 0.7866716384887695, "learning_rate": 2.990084536795856e-06, "loss": 0.1037, "step": 4205 }, { "epoch": 1.3629293583927415, "grad_norm": 0.7375853657722473, "learning_rate": 2.989226868526569e-06, "loss": 0.105, "step": 4206 }, { "epoch": 1.363253402462735, "grad_norm": 0.7753633260726929, "learning_rate": 2.98836914038065e-06, "loss": 0.111, "step": 4207 }, { "epoch": 1.3635774465327284, "grad_norm": 0.811918318271637, "learning_rate": 2.987511352463076e-06, "loss": 0.1184, "step": 4208 }, { "epoch": 1.363901490602722, "grad_norm": 0.7513830065727234, "learning_rate": 2.9866535048788314e-06, "loss": 0.1086, "step": 4209 }, { "epoch": 1.3642255346727155, "grad_norm": 0.8139031529426575, "learning_rate": 2.9857955977329095e-06, "loss": 0.1147, "step": 4210 }, { "epoch": 1.364549578742709, "grad_norm": 0.8000282645225525, "learning_rate": 2.9849376311303095e-06, "loss": 0.1095, "step": 4211 }, { "epoch": 1.3648736228127025, "grad_norm": 0.8604514598846436, "learning_rate": 2.984079605176038e-06, "loss": 0.1242, "step": 4212 }, { "epoch": 1.365197666882696, "grad_norm": 0.8277812004089355, "learning_rate": 2.9832215199751085e-06, "loss": 0.1174, "step": 4213 }, { "epoch": 1.3655217109526896, "grad_norm": 0.809662938117981, "learning_rate": 2.9823633756325433e-06, "loss": 0.1101, "step": 4214 }, { "epoch": 1.365845755022683, "grad_norm": 0.8288755416870117, "learning_rate": 2.9815051722533707e-06, "loss": 0.1194, "step": 4215 }, { "epoch": 1.3661697990926767, "grad_norm": 0.7696095108985901, "learning_rate": 2.9806469099426254e-06, "loss": 0.1036, "step": 4216 }, { "epoch": 1.36649384316267, "grad_norm": 0.8410941958427429, "learning_rate": 2.9797885888053517e-06, "loss": 0.1171, "step": 4217 }, { "epoch": 1.3668178872326635, "grad_norm": 0.8517903685569763, "learning_rate": 2.9789302089466e-06, "loss": 0.1236, "step": 4218 }, { "epoch": 1.3671419313026572, "grad_norm": 0.8763234615325928, "learning_rate": 2.978071770471427e-06, "loss": 0.1238, "step": 4219 }, { "epoch": 1.3674659753726508, "grad_norm": 0.8843665719032288, "learning_rate": 2.9772132734848974e-06, "loss": 0.1258, "step": 4220 }, { "epoch": 1.3677900194426442, "grad_norm": 0.8795833587646484, "learning_rate": 2.9763547180920825e-06, "loss": 0.1202, "step": 4221 }, { "epoch": 1.3681140635126376, "grad_norm": 0.7880746126174927, "learning_rate": 2.9754961043980623e-06, "loss": 0.1171, "step": 4222 }, { "epoch": 1.3684381075826313, "grad_norm": 0.9248366951942444, "learning_rate": 2.9746374325079213e-06, "loss": 0.1184, "step": 4223 }, { "epoch": 1.3687621516526247, "grad_norm": 0.9103997945785522, "learning_rate": 2.973778702526754e-06, "loss": 0.131, "step": 4224 }, { "epoch": 1.3690861957226184, "grad_norm": 0.8283572793006897, "learning_rate": 2.97291991455966e-06, "loss": 0.1162, "step": 4225 }, { "epoch": 1.3694102397926118, "grad_norm": 0.82844078540802, "learning_rate": 2.9720610687117462e-06, "loss": 0.1115, "step": 4226 }, { "epoch": 1.3697342838626052, "grad_norm": 0.8517773747444153, "learning_rate": 2.971202165088128e-06, "loss": 0.1209, "step": 4227 }, { "epoch": 1.3700583279325989, "grad_norm": 0.8081420063972473, "learning_rate": 2.9703432037939255e-06, "loss": 0.1171, "step": 4228 }, { "epoch": 1.3703823720025923, "grad_norm": 0.8540878295898438, "learning_rate": 2.9694841849342688e-06, "loss": 0.1222, "step": 4229 }, { "epoch": 1.370706416072586, "grad_norm": 0.8613157868385315, "learning_rate": 2.9686251086142927e-06, "loss": 0.118, "step": 4230 }, { "epoch": 1.3710304601425793, "grad_norm": 0.8323705196380615, "learning_rate": 2.9677659749391404e-06, "loss": 0.1134, "step": 4231 }, { "epoch": 1.371354504212573, "grad_norm": 0.8358492255210876, "learning_rate": 2.9669067840139603e-06, "loss": 0.1126, "step": 4232 }, { "epoch": 1.3716785482825664, "grad_norm": 0.8735930323600769, "learning_rate": 2.9660475359439113e-06, "loss": 0.1258, "step": 4233 }, { "epoch": 1.37200259235256, "grad_norm": 0.889940619468689, "learning_rate": 2.965188230834154e-06, "loss": 0.1188, "step": 4234 }, { "epoch": 1.3723266364225535, "grad_norm": 0.8550429344177246, "learning_rate": 2.9643288687898614e-06, "loss": 0.1218, "step": 4235 }, { "epoch": 1.372650680492547, "grad_norm": 0.8896758556365967, "learning_rate": 2.96346944991621e-06, "loss": 0.1309, "step": 4236 }, { "epoch": 1.3729747245625405, "grad_norm": 0.8091087341308594, "learning_rate": 2.962609974318385e-06, "loss": 0.1177, "step": 4237 }, { "epoch": 1.373298768632534, "grad_norm": 0.837196946144104, "learning_rate": 2.961750442101577e-06, "loss": 0.1116, "step": 4238 }, { "epoch": 1.3736228127025276, "grad_norm": 0.8127410411834717, "learning_rate": 2.9608908533709852e-06, "loss": 0.1112, "step": 4239 }, { "epoch": 1.373946856772521, "grad_norm": 0.8529530167579651, "learning_rate": 2.9600312082318144e-06, "loss": 0.1249, "step": 4240 }, { "epoch": 1.3742709008425145, "grad_norm": 0.8476062417030334, "learning_rate": 2.9591715067892777e-06, "loss": 0.1173, "step": 4241 }, { "epoch": 1.374594944912508, "grad_norm": 0.8822113275527954, "learning_rate": 2.958311749148594e-06, "loss": 0.1212, "step": 4242 }, { "epoch": 1.3749189889825018, "grad_norm": 0.8067193627357483, "learning_rate": 2.9574519354149884e-06, "loss": 0.1244, "step": 4243 }, { "epoch": 1.3752430330524952, "grad_norm": 0.8280297517776489, "learning_rate": 2.9565920656936947e-06, "loss": 0.1161, "step": 4244 }, { "epoch": 1.3755670771224886, "grad_norm": 0.791181743144989, "learning_rate": 2.9557321400899524e-06, "loss": 0.1201, "step": 4245 }, { "epoch": 1.3758911211924822, "grad_norm": 0.8039045333862305, "learning_rate": 2.9548721587090075e-06, "loss": 0.1093, "step": 4246 }, { "epoch": 1.3762151652624757, "grad_norm": 0.7414494752883911, "learning_rate": 2.954012121656114e-06, "loss": 0.0999, "step": 4247 }, { "epoch": 1.3765392093324693, "grad_norm": 0.7822027802467346, "learning_rate": 2.9531520290365316e-06, "loss": 0.1073, "step": 4248 }, { "epoch": 1.3768632534024627, "grad_norm": 0.8837736248970032, "learning_rate": 2.952291880955529e-06, "loss": 0.1187, "step": 4249 }, { "epoch": 1.3771872974724562, "grad_norm": 0.8309406638145447, "learning_rate": 2.9514316775183777e-06, "loss": 0.1246, "step": 4250 }, { "epoch": 1.3775113415424498, "grad_norm": 0.7587549090385437, "learning_rate": 2.950571418830359e-06, "loss": 0.1067, "step": 4251 }, { "epoch": 1.3778353856124432, "grad_norm": 0.8112297654151917, "learning_rate": 2.949711104996761e-06, "loss": 0.1227, "step": 4252 }, { "epoch": 1.3781594296824369, "grad_norm": 0.7933392524719238, "learning_rate": 2.948850736122878e-06, "loss": 0.1188, "step": 4253 }, { "epoch": 1.3784834737524303, "grad_norm": 0.8585779666900635, "learning_rate": 2.947990312314009e-06, "loss": 0.1321, "step": 4254 }, { "epoch": 1.3788075178224237, "grad_norm": 0.8090001940727234, "learning_rate": 2.9471298336754633e-06, "loss": 0.1171, "step": 4255 }, { "epoch": 1.3791315618924174, "grad_norm": 0.8341668844223022, "learning_rate": 2.9462693003125544e-06, "loss": 0.1124, "step": 4256 }, { "epoch": 1.379455605962411, "grad_norm": 0.7895886301994324, "learning_rate": 2.945408712330603e-06, "loss": 0.1157, "step": 4257 }, { "epoch": 1.3797796500324044, "grad_norm": 0.8023675084114075, "learning_rate": 2.944548069834937e-06, "loss": 0.1195, "step": 4258 }, { "epoch": 1.3801036941023979, "grad_norm": 0.8319817781448364, "learning_rate": 2.943687372930891e-06, "loss": 0.1146, "step": 4259 }, { "epoch": 1.3804277381723915, "grad_norm": 0.7980121374130249, "learning_rate": 2.942826621723806e-06, "loss": 0.1168, "step": 4260 }, { "epoch": 1.380751782242385, "grad_norm": 0.8253116607666016, "learning_rate": 2.9419658163190295e-06, "loss": 0.1193, "step": 4261 }, { "epoch": 1.3810758263123786, "grad_norm": 0.7385011911392212, "learning_rate": 2.9411049568219153e-06, "loss": 0.1051, "step": 4262 }, { "epoch": 1.381399870382372, "grad_norm": 0.7881051301956177, "learning_rate": 2.9402440433378247e-06, "loss": 0.1146, "step": 4263 }, { "epoch": 1.3817239144523654, "grad_norm": 0.8268387913703918, "learning_rate": 2.939383075972125e-06, "loss": 0.1144, "step": 4264 }, { "epoch": 1.382047958522359, "grad_norm": 0.8658704161643982, "learning_rate": 2.9385220548301906e-06, "loss": 0.1187, "step": 4265 }, { "epoch": 1.3823720025923525, "grad_norm": 0.8008397817611694, "learning_rate": 2.937660980017402e-06, "loss": 0.1109, "step": 4266 }, { "epoch": 1.3826960466623461, "grad_norm": 0.8876312971115112, "learning_rate": 2.936799851639146e-06, "loss": 0.1232, "step": 4267 }, { "epoch": 1.3830200907323396, "grad_norm": 0.8302385807037354, "learning_rate": 2.9359386698008172e-06, "loss": 0.1146, "step": 4268 }, { "epoch": 1.383344134802333, "grad_norm": 0.8758426904678345, "learning_rate": 2.935077434607815e-06, "loss": 0.1123, "step": 4269 }, { "epoch": 1.3836681788723266, "grad_norm": 0.8773407340049744, "learning_rate": 2.9342161461655468e-06, "loss": 0.1195, "step": 4270 }, { "epoch": 1.3839922229423203, "grad_norm": 0.7563143372535706, "learning_rate": 2.9333548045794253e-06, "loss": 0.1115, "step": 4271 }, { "epoch": 1.3843162670123137, "grad_norm": 0.8111708164215088, "learning_rate": 2.9324934099548713e-06, "loss": 0.1113, "step": 4272 }, { "epoch": 1.3846403110823071, "grad_norm": 0.922406792640686, "learning_rate": 2.931631962397311e-06, "loss": 0.1287, "step": 4273 }, { "epoch": 1.3849643551523008, "grad_norm": 0.7872484922409058, "learning_rate": 2.9307704620121775e-06, "loss": 0.1143, "step": 4274 }, { "epoch": 1.3852883992222942, "grad_norm": 0.8987158536911011, "learning_rate": 2.9299089089049092e-06, "loss": 0.1223, "step": 4275 }, { "epoch": 1.3856124432922878, "grad_norm": 0.7530018091201782, "learning_rate": 2.929047303180952e-06, "loss": 0.1081, "step": 4276 }, { "epoch": 1.3859364873622813, "grad_norm": 0.8436494469642639, "learning_rate": 2.9281856449457587e-06, "loss": 0.1177, "step": 4277 }, { "epoch": 1.3862605314322747, "grad_norm": 0.8935341238975525, "learning_rate": 2.927323934304787e-06, "loss": 0.1408, "step": 4278 }, { "epoch": 1.3865845755022683, "grad_norm": 0.750361979007721, "learning_rate": 2.926462171363503e-06, "loss": 0.1105, "step": 4279 }, { "epoch": 1.3869086195722617, "grad_norm": 0.8003627061843872, "learning_rate": 2.9256003562273784e-06, "loss": 0.1154, "step": 4280 }, { "epoch": 1.3872326636422554, "grad_norm": 0.8266944885253906, "learning_rate": 2.924738489001889e-06, "loss": 0.1148, "step": 4281 }, { "epoch": 1.3875567077122488, "grad_norm": 0.9039225578308105, "learning_rate": 2.923876569792521e-06, "loss": 0.1333, "step": 4282 }, { "epoch": 1.3878807517822425, "grad_norm": 0.8375152945518494, "learning_rate": 2.923014598704764e-06, "loss": 0.1231, "step": 4283 }, { "epoch": 1.3882047958522359, "grad_norm": 0.7849745154380798, "learning_rate": 2.9221525758441155e-06, "loss": 0.1195, "step": 4284 }, { "epoch": 1.3885288399222295, "grad_norm": 0.7857218384742737, "learning_rate": 2.9212905013160784e-06, "loss": 0.1123, "step": 4285 }, { "epoch": 1.388852883992223, "grad_norm": 0.7229386568069458, "learning_rate": 2.920428375226163e-06, "loss": 0.1031, "step": 4286 }, { "epoch": 1.3891769280622164, "grad_norm": 0.7687814831733704, "learning_rate": 2.9195661976798838e-06, "loss": 0.1049, "step": 4287 }, { "epoch": 1.38950097213221, "grad_norm": 0.8102664947509766, "learning_rate": 2.918703968782764e-06, "loss": 0.1169, "step": 4288 }, { "epoch": 1.3898250162022034, "grad_norm": 0.8313232660293579, "learning_rate": 2.9178416886403318e-06, "loss": 0.1241, "step": 4289 }, { "epoch": 1.390149060272197, "grad_norm": 0.76463782787323, "learning_rate": 2.916979357358121e-06, "loss": 0.1158, "step": 4290 }, { "epoch": 1.3904731043421905, "grad_norm": 0.8194743394851685, "learning_rate": 2.9161169750416746e-06, "loss": 0.1194, "step": 4291 }, { "epoch": 1.390797148412184, "grad_norm": 0.7722929120063782, "learning_rate": 2.915254541796539e-06, "loss": 0.113, "step": 4292 }, { "epoch": 1.3911211924821776, "grad_norm": 0.8249363303184509, "learning_rate": 2.914392057728267e-06, "loss": 0.1223, "step": 4293 }, { "epoch": 1.3914452365521712, "grad_norm": 0.8252852559089661, "learning_rate": 2.913529522942418e-06, "loss": 0.1171, "step": 4294 }, { "epoch": 1.3917692806221647, "grad_norm": 0.795011043548584, "learning_rate": 2.9126669375445595e-06, "loss": 0.1163, "step": 4295 }, { "epoch": 1.392093324692158, "grad_norm": 0.8001385927200317, "learning_rate": 2.911804301640263e-06, "loss": 0.1222, "step": 4296 }, { "epoch": 1.3924173687621517, "grad_norm": 0.8127326369285583, "learning_rate": 2.910941615335106e-06, "loss": 0.124, "step": 4297 }, { "epoch": 1.3927414128321451, "grad_norm": 0.851428747177124, "learning_rate": 2.9100788787346746e-06, "loss": 0.12, "step": 4298 }, { "epoch": 1.3930654569021388, "grad_norm": 0.8312917947769165, "learning_rate": 2.9092160919445566e-06, "loss": 0.1216, "step": 4299 }, { "epoch": 1.3933895009721322, "grad_norm": 0.7327541708946228, "learning_rate": 2.9083532550703515e-06, "loss": 0.1113, "step": 4300 }, { "epoch": 1.3937135450421256, "grad_norm": 0.8703411221504211, "learning_rate": 2.9074903682176607e-06, "loss": 0.1268, "step": 4301 }, { "epoch": 1.3940375891121193, "grad_norm": 0.8300853967666626, "learning_rate": 2.906627431492094e-06, "loss": 0.1112, "step": 4302 }, { "epoch": 1.3943616331821127, "grad_norm": 0.8130788207054138, "learning_rate": 2.9057644449992655e-06, "loss": 0.1144, "step": 4303 }, { "epoch": 1.3946856772521063, "grad_norm": 0.8016307353973389, "learning_rate": 2.904901408844798e-06, "loss": 0.1188, "step": 4304 }, { "epoch": 1.3950097213220998, "grad_norm": 0.7863853573799133, "learning_rate": 2.9040383231343173e-06, "loss": 0.1105, "step": 4305 }, { "epoch": 1.3953337653920932, "grad_norm": 0.8026547431945801, "learning_rate": 2.903175187973457e-06, "loss": 0.1144, "step": 4306 }, { "epoch": 1.3956578094620868, "grad_norm": 0.8447574973106384, "learning_rate": 2.9023120034678575e-06, "loss": 0.132, "step": 4307 }, { "epoch": 1.3959818535320805, "grad_norm": 0.8244043588638306, "learning_rate": 2.901448769723163e-06, "loss": 0.1069, "step": 4308 }, { "epoch": 1.396305897602074, "grad_norm": 0.8813815116882324, "learning_rate": 2.900585486845026e-06, "loss": 0.127, "step": 4309 }, { "epoch": 1.3966299416720673, "grad_norm": 0.8630741834640503, "learning_rate": 2.8997221549391025e-06, "loss": 0.1239, "step": 4310 }, { "epoch": 1.396953985742061, "grad_norm": 0.75046706199646, "learning_rate": 2.8988587741110575e-06, "loss": 0.0973, "step": 4311 }, { "epoch": 1.3972780298120544, "grad_norm": 0.7843610048294067, "learning_rate": 2.8979953444665585e-06, "loss": 0.1046, "step": 4312 }, { "epoch": 1.397602073882048, "grad_norm": 0.8384765386581421, "learning_rate": 2.8971318661112836e-06, "loss": 0.1163, "step": 4313 }, { "epoch": 1.3979261179520415, "grad_norm": 0.8430674076080322, "learning_rate": 2.896268339150912e-06, "loss": 0.114, "step": 4314 }, { "epoch": 1.398250162022035, "grad_norm": 0.8547707200050354, "learning_rate": 2.895404763691132e-06, "loss": 0.119, "step": 4315 }, { "epoch": 1.3985742060920285, "grad_norm": 0.7359633445739746, "learning_rate": 2.894541139837638e-06, "loss": 0.1078, "step": 4316 }, { "epoch": 1.398898250162022, "grad_norm": 0.8550858497619629, "learning_rate": 2.8936774676961264e-06, "loss": 0.1228, "step": 4317 }, { "epoch": 1.3992222942320156, "grad_norm": 0.8657211661338806, "learning_rate": 2.892813747372305e-06, "loss": 0.1265, "step": 4318 }, { "epoch": 1.399546338302009, "grad_norm": 0.8944101929664612, "learning_rate": 2.891949978971883e-06, "loss": 0.1146, "step": 4319 }, { "epoch": 1.3998703823720027, "grad_norm": 0.7339831590652466, "learning_rate": 2.8910861626005774e-06, "loss": 0.0996, "step": 4320 }, { "epoch": 1.400194426441996, "grad_norm": 0.839537501335144, "learning_rate": 2.890222298364112e-06, "loss": 0.1263, "step": 4321 }, { "epoch": 1.4005184705119897, "grad_norm": 0.7799286246299744, "learning_rate": 2.8893583863682157e-06, "loss": 0.1191, "step": 4322 }, { "epoch": 1.4008425145819832, "grad_norm": 0.9313471913337708, "learning_rate": 2.888494426718621e-06, "loss": 0.1292, "step": 4323 }, { "epoch": 1.4011665586519766, "grad_norm": 0.8260443210601807, "learning_rate": 2.8876304195210697e-06, "loss": 0.1134, "step": 4324 }, { "epoch": 1.4014906027219702, "grad_norm": 0.8529420495033264, "learning_rate": 2.8867663648813077e-06, "loss": 0.1257, "step": 4325 }, { "epoch": 1.4018146467919637, "grad_norm": 0.8417792916297913, "learning_rate": 2.885902262905087e-06, "loss": 0.1114, "step": 4326 }, { "epoch": 1.4021386908619573, "grad_norm": 0.8513123393058777, "learning_rate": 2.885038113698165e-06, "loss": 0.126, "step": 4327 }, { "epoch": 1.4024627349319507, "grad_norm": 0.8104535341262817, "learning_rate": 2.8841739173663057e-06, "loss": 0.1161, "step": 4328 }, { "epoch": 1.4027867790019442, "grad_norm": 0.7754889130592346, "learning_rate": 2.883309674015278e-06, "loss": 0.107, "step": 4329 }, { "epoch": 1.4031108230719378, "grad_norm": 0.8020490407943726, "learning_rate": 2.8824453837508563e-06, "loss": 0.1122, "step": 4330 }, { "epoch": 1.4034348671419314, "grad_norm": 0.8663510680198669, "learning_rate": 2.8815810466788225e-06, "loss": 0.1237, "step": 4331 }, { "epoch": 1.4037589112119249, "grad_norm": 0.9045275449752808, "learning_rate": 2.8807166629049623e-06, "loss": 0.132, "step": 4332 }, { "epoch": 1.4040829552819183, "grad_norm": 0.7936350703239441, "learning_rate": 2.8798522325350683e-06, "loss": 0.1106, "step": 4333 }, { "epoch": 1.404406999351912, "grad_norm": 0.8123335242271423, "learning_rate": 2.8789877556749383e-06, "loss": 0.107, "step": 4334 }, { "epoch": 1.4047310434219054, "grad_norm": 0.8923666477203369, "learning_rate": 2.8781232324303758e-06, "loss": 0.1217, "step": 4335 }, { "epoch": 1.405055087491899, "grad_norm": 0.7928508520126343, "learning_rate": 2.8772586629071902e-06, "loss": 0.1119, "step": 4336 }, { "epoch": 1.4053791315618924, "grad_norm": 0.7635928392410278, "learning_rate": 2.876394047211196e-06, "loss": 0.1124, "step": 4337 }, { "epoch": 1.4057031756318858, "grad_norm": 0.861503005027771, "learning_rate": 2.875529385448215e-06, "loss": 0.1146, "step": 4338 }, { "epoch": 1.4060272197018795, "grad_norm": 0.8829136490821838, "learning_rate": 2.8746646777240724e-06, "loss": 0.1275, "step": 4339 }, { "epoch": 1.406351263771873, "grad_norm": 0.8503878712654114, "learning_rate": 2.8737999241446e-06, "loss": 0.1244, "step": 4340 }, { "epoch": 1.4066753078418666, "grad_norm": 0.7719369530677795, "learning_rate": 2.8729351248156364e-06, "loss": 0.1028, "step": 4341 }, { "epoch": 1.40699935191186, "grad_norm": 0.8596400022506714, "learning_rate": 2.872070279843023e-06, "loss": 0.1246, "step": 4342 }, { "epoch": 1.4073233959818534, "grad_norm": 0.8827523589134216, "learning_rate": 2.8712053893326088e-06, "loss": 0.1285, "step": 4343 }, { "epoch": 1.407647440051847, "grad_norm": 0.8571576476097107, "learning_rate": 2.8703404533902492e-06, "loss": 0.1292, "step": 4344 }, { "epoch": 1.4079714841218407, "grad_norm": 0.7802562713623047, "learning_rate": 2.8694754721218027e-06, "loss": 0.1114, "step": 4345 }, { "epoch": 1.4082955281918341, "grad_norm": 0.8333742022514343, "learning_rate": 2.8686104456331356e-06, "loss": 0.108, "step": 4346 }, { "epoch": 1.4086195722618275, "grad_norm": 0.7979025840759277, "learning_rate": 2.8677453740301185e-06, "loss": 0.1129, "step": 4347 }, { "epoch": 1.4089436163318212, "grad_norm": 0.8137672543525696, "learning_rate": 2.8668802574186277e-06, "loss": 0.1217, "step": 4348 }, { "epoch": 1.4092676604018146, "grad_norm": 0.8151755332946777, "learning_rate": 2.8660150959045456e-06, "loss": 0.1226, "step": 4349 }, { "epoch": 1.4095917044718083, "grad_norm": 0.8417986631393433, "learning_rate": 2.865149889593758e-06, "loss": 0.1133, "step": 4350 }, { "epoch": 1.4099157485418017, "grad_norm": 0.8378992676734924, "learning_rate": 2.8642846385921593e-06, "loss": 0.1168, "step": 4351 }, { "epoch": 1.410239792611795, "grad_norm": 0.8362653255462646, "learning_rate": 2.863419343005647e-06, "loss": 0.1181, "step": 4352 }, { "epoch": 1.4105638366817888, "grad_norm": 0.8783177137374878, "learning_rate": 2.8625540029401262e-06, "loss": 0.126, "step": 4353 }, { "epoch": 1.4108878807517822, "grad_norm": 0.8419884443283081, "learning_rate": 2.8616886185015046e-06, "loss": 0.1275, "step": 4354 }, { "epoch": 1.4112119248217758, "grad_norm": 0.8621551394462585, "learning_rate": 2.860823189795697e-06, "loss": 0.1217, "step": 4355 }, { "epoch": 1.4115359688917692, "grad_norm": 0.8619939684867859, "learning_rate": 2.859957716928625e-06, "loss": 0.1229, "step": 4356 }, { "epoch": 1.4118600129617627, "grad_norm": 0.7918229699134827, "learning_rate": 2.8590922000062125e-06, "loss": 0.1133, "step": 4357 }, { "epoch": 1.4121840570317563, "grad_norm": 0.8201887011528015, "learning_rate": 2.858226639134391e-06, "loss": 0.114, "step": 4358 }, { "epoch": 1.41250810110175, "grad_norm": 0.8521756529808044, "learning_rate": 2.8573610344190978e-06, "loss": 0.1302, "step": 4359 }, { "epoch": 1.4128321451717434, "grad_norm": 0.8289884328842163, "learning_rate": 2.8564953859662725e-06, "loss": 0.1152, "step": 4360 }, { "epoch": 1.4131561892417368, "grad_norm": 0.8881711959838867, "learning_rate": 2.8556296938818632e-06, "loss": 0.1246, "step": 4361 }, { "epoch": 1.4134802333117304, "grad_norm": 0.7568867206573486, "learning_rate": 2.8547639582718223e-06, "loss": 0.1027, "step": 4362 }, { "epoch": 1.4138042773817239, "grad_norm": 0.8158787488937378, "learning_rate": 2.853898179242107e-06, "loss": 0.1159, "step": 4363 }, { "epoch": 1.4141283214517175, "grad_norm": 0.7525155544281006, "learning_rate": 2.8530323568986805e-06, "loss": 0.1044, "step": 4364 }, { "epoch": 1.414452365521711, "grad_norm": 0.8484021425247192, "learning_rate": 2.8521664913475123e-06, "loss": 0.1192, "step": 4365 }, { "epoch": 1.4147764095917044, "grad_norm": 0.7542396783828735, "learning_rate": 2.8513005826945733e-06, "loss": 0.1086, "step": 4366 }, { "epoch": 1.415100453661698, "grad_norm": 0.7980123162269592, "learning_rate": 2.8504346310458446e-06, "loss": 0.1173, "step": 4367 }, { "epoch": 1.4154244977316914, "grad_norm": 0.8765463829040527, "learning_rate": 2.8495686365073096e-06, "loss": 0.1198, "step": 4368 }, { "epoch": 1.415748541801685, "grad_norm": 0.7539858818054199, "learning_rate": 2.848702599184957e-06, "loss": 0.1024, "step": 4369 }, { "epoch": 1.4160725858716785, "grad_norm": 0.7840802073478699, "learning_rate": 2.8478365191847824e-06, "loss": 0.1206, "step": 4370 }, { "epoch": 1.4163966299416721, "grad_norm": 0.8659231066703796, "learning_rate": 2.8469703966127853e-06, "loss": 0.1203, "step": 4371 }, { "epoch": 1.4167206740116656, "grad_norm": 0.7570154070854187, "learning_rate": 2.8461042315749706e-06, "loss": 0.1065, "step": 4372 }, { "epoch": 1.4170447180816592, "grad_norm": 0.8071007132530212, "learning_rate": 2.845238024177348e-06, "loss": 0.1171, "step": 4373 }, { "epoch": 1.4173687621516526, "grad_norm": 0.8437150716781616, "learning_rate": 2.8443717745259335e-06, "loss": 0.1185, "step": 4374 }, { "epoch": 1.417692806221646, "grad_norm": 0.8691065907478333, "learning_rate": 2.8435054827267476e-06, "loss": 0.125, "step": 4375 }, { "epoch": 1.4180168502916397, "grad_norm": 0.8441025614738464, "learning_rate": 2.8426391488858163e-06, "loss": 0.1209, "step": 4376 }, { "epoch": 1.4183408943616331, "grad_norm": 0.8875846266746521, "learning_rate": 2.8417727731091705e-06, "loss": 0.1258, "step": 4377 }, { "epoch": 1.4186649384316268, "grad_norm": 0.8732943534851074, "learning_rate": 2.840906355502845e-06, "loss": 0.1215, "step": 4378 }, { "epoch": 1.4189889825016202, "grad_norm": 0.826084554195404, "learning_rate": 2.840039896172882e-06, "loss": 0.1104, "step": 4379 }, { "epoch": 1.4193130265716136, "grad_norm": 0.8853211402893066, "learning_rate": 2.8391733952253277e-06, "loss": 0.1232, "step": 4380 }, { "epoch": 1.4196370706416073, "grad_norm": 0.8420249223709106, "learning_rate": 2.838306852766234e-06, "loss": 0.118, "step": 4381 }, { "epoch": 1.419961114711601, "grad_norm": 0.8085249662399292, "learning_rate": 2.8374402689016557e-06, "loss": 0.1096, "step": 4382 }, { "epoch": 1.4202851587815943, "grad_norm": 0.8482363224029541, "learning_rate": 2.8365736437376555e-06, "loss": 0.1234, "step": 4383 }, { "epoch": 1.4206092028515878, "grad_norm": 0.857765257358551, "learning_rate": 2.8357069773802996e-06, "loss": 0.1207, "step": 4384 }, { "epoch": 1.4209332469215814, "grad_norm": 0.8329556584358215, "learning_rate": 2.834840269935659e-06, "loss": 0.1166, "step": 4385 }, { "epoch": 1.4212572909915748, "grad_norm": 0.8334675431251526, "learning_rate": 2.833973521509812e-06, "loss": 0.123, "step": 4386 }, { "epoch": 1.4215813350615685, "grad_norm": 0.7916951179504395, "learning_rate": 2.833106732208838e-06, "loss": 0.1183, "step": 4387 }, { "epoch": 1.421905379131562, "grad_norm": 0.8398131132125854, "learning_rate": 2.8322399021388248e-06, "loss": 0.118, "step": 4388 }, { "epoch": 1.4222294232015553, "grad_norm": 0.8872308731079102, "learning_rate": 2.8313730314058645e-06, "loss": 0.126, "step": 4389 }, { "epoch": 1.422553467271549, "grad_norm": 0.8836546540260315, "learning_rate": 2.830506120116053e-06, "loss": 0.1261, "step": 4390 }, { "epoch": 1.4228775113415424, "grad_norm": 0.8260588049888611, "learning_rate": 2.8296391683754916e-06, "loss": 0.1191, "step": 4391 }, { "epoch": 1.423201555411536, "grad_norm": 0.814530611038208, "learning_rate": 2.8287721762902877e-06, "loss": 0.1125, "step": 4392 }, { "epoch": 1.4235255994815295, "grad_norm": 0.8176182508468628, "learning_rate": 2.8279051439665516e-06, "loss": 0.1226, "step": 4393 }, { "epoch": 1.4238496435515229, "grad_norm": 0.8547371029853821, "learning_rate": 2.8270380715104e-06, "loss": 0.1196, "step": 4394 }, { "epoch": 1.4241736876215165, "grad_norm": 0.8309570550918579, "learning_rate": 2.826170959027956e-06, "loss": 0.1123, "step": 4395 }, { "epoch": 1.4244977316915102, "grad_norm": 0.8684561848640442, "learning_rate": 2.8253038066253423e-06, "loss": 0.1257, "step": 4396 }, { "epoch": 1.4248217757615036, "grad_norm": 0.8328901529312134, "learning_rate": 2.8244366144086926e-06, "loss": 0.1136, "step": 4397 }, { "epoch": 1.425145819831497, "grad_norm": 0.8921581506729126, "learning_rate": 2.823569382484142e-06, "loss": 0.1238, "step": 4398 }, { "epoch": 1.4254698639014907, "grad_norm": 0.8457754850387573, "learning_rate": 2.822702110957831e-06, "loss": 0.114, "step": 4399 }, { "epoch": 1.425793907971484, "grad_norm": 0.8151159882545471, "learning_rate": 2.8218347999359066e-06, "loss": 0.1187, "step": 4400 }, { "epoch": 1.4261179520414777, "grad_norm": 0.8689265251159668, "learning_rate": 2.8209674495245177e-06, "loss": 0.1183, "step": 4401 }, { "epoch": 1.4264419961114712, "grad_norm": 0.8540447950363159, "learning_rate": 2.82010005982982e-06, "loss": 0.1209, "step": 4402 }, { "epoch": 1.4267660401814646, "grad_norm": 0.9191943407058716, "learning_rate": 2.819232630957975e-06, "loss": 0.1325, "step": 4403 }, { "epoch": 1.4270900842514582, "grad_norm": 0.7965993881225586, "learning_rate": 2.818365163015145e-06, "loss": 0.1152, "step": 4404 }, { "epoch": 1.4274141283214516, "grad_norm": 0.8361888527870178, "learning_rate": 2.8174976561075013e-06, "loss": 0.1224, "step": 4405 }, { "epoch": 1.4277381723914453, "grad_norm": 0.7920622229576111, "learning_rate": 2.816630110341218e-06, "loss": 0.1131, "step": 4406 }, { "epoch": 1.4280622164614387, "grad_norm": 0.8130832314491272, "learning_rate": 2.8157625258224746e-06, "loss": 0.1178, "step": 4407 }, { "epoch": 1.4283862605314321, "grad_norm": 0.7392692565917969, "learning_rate": 2.814894902657456e-06, "loss": 0.103, "step": 4408 }, { "epoch": 1.4287103046014258, "grad_norm": 0.8898904919624329, "learning_rate": 2.814027240952348e-06, "loss": 0.1238, "step": 4409 }, { "epoch": 1.4290343486714194, "grad_norm": 0.8473485708236694, "learning_rate": 2.8131595408133467e-06, "loss": 0.1141, "step": 4410 }, { "epoch": 1.4293583927414129, "grad_norm": 0.8053525686264038, "learning_rate": 2.8122918023466485e-06, "loss": 0.1173, "step": 4411 }, { "epoch": 1.4296824368114063, "grad_norm": 0.8200134634971619, "learning_rate": 2.811424025658458e-06, "loss": 0.1042, "step": 4412 }, { "epoch": 1.4300064808814, "grad_norm": 0.9330074191093445, "learning_rate": 2.8105562108549807e-06, "loss": 0.13, "step": 4413 }, { "epoch": 1.4303305249513933, "grad_norm": 0.8630911111831665, "learning_rate": 2.80968835804243e-06, "loss": 0.1167, "step": 4414 }, { "epoch": 1.430654569021387, "grad_norm": 0.8359479904174805, "learning_rate": 2.808820467327022e-06, "loss": 0.1221, "step": 4415 }, { "epoch": 1.4309786130913804, "grad_norm": 0.8438312411308289, "learning_rate": 2.8079525388149787e-06, "loss": 0.1268, "step": 4416 }, { "epoch": 1.4313026571613738, "grad_norm": 0.8100758790969849, "learning_rate": 2.8070845726125257e-06, "loss": 0.1063, "step": 4417 }, { "epoch": 1.4316267012313675, "grad_norm": 0.9410319924354553, "learning_rate": 2.8062165688258934e-06, "loss": 0.1308, "step": 4418 }, { "epoch": 1.431950745301361, "grad_norm": 0.7617793083190918, "learning_rate": 2.8053485275613177e-06, "loss": 0.1006, "step": 4419 }, { "epoch": 1.4322747893713546, "grad_norm": 0.8048340082168579, "learning_rate": 2.804480448925039e-06, "loss": 0.1155, "step": 4420 }, { "epoch": 1.432598833441348, "grad_norm": 0.7600870728492737, "learning_rate": 2.8036123330233e-06, "loss": 0.1066, "step": 4421 }, { "epoch": 1.4329228775113416, "grad_norm": 0.8405733704566956, "learning_rate": 2.802744179962351e-06, "loss": 0.1167, "step": 4422 }, { "epoch": 1.433246921581335, "grad_norm": 0.7904203534126282, "learning_rate": 2.801875989848446e-06, "loss": 0.1155, "step": 4423 }, { "epoch": 1.4335709656513287, "grad_norm": 0.8001915812492371, "learning_rate": 2.8010077627878414e-06, "loss": 0.1098, "step": 4424 }, { "epoch": 1.4338950097213221, "grad_norm": 0.8362448811531067, "learning_rate": 2.8001394988868003e-06, "loss": 0.1179, "step": 4425 }, { "epoch": 1.4342190537913155, "grad_norm": 0.7693252563476562, "learning_rate": 2.7992711982515908e-06, "loss": 0.1128, "step": 4426 }, { "epoch": 1.4345430978613092, "grad_norm": 0.813224732875824, "learning_rate": 2.798402860988483e-06, "loss": 0.1061, "step": 4427 }, { "epoch": 1.4348671419313026, "grad_norm": 0.8760613799095154, "learning_rate": 2.797534487203755e-06, "loss": 0.1146, "step": 4428 }, { "epoch": 1.4351911860012962, "grad_norm": 0.8515808582305908, "learning_rate": 2.7966660770036845e-06, "loss": 0.1285, "step": 4429 }, { "epoch": 1.4355152300712897, "grad_norm": 0.8039884567260742, "learning_rate": 2.795797630494559e-06, "loss": 0.1116, "step": 4430 }, { "epoch": 1.435839274141283, "grad_norm": 0.8766940832138062, "learning_rate": 2.7949291477826666e-06, "loss": 0.1156, "step": 4431 }, { "epoch": 1.4361633182112767, "grad_norm": 0.7784311175346375, "learning_rate": 2.7940606289743026e-06, "loss": 0.1047, "step": 4432 }, { "epoch": 1.4364873622812704, "grad_norm": 0.8435165882110596, "learning_rate": 2.793192074175764e-06, "loss": 0.1232, "step": 4433 }, { "epoch": 1.4368114063512638, "grad_norm": 0.8447853326797485, "learning_rate": 2.792323483493354e-06, "loss": 0.1173, "step": 4434 }, { "epoch": 1.4371354504212572, "grad_norm": 0.7999764680862427, "learning_rate": 2.791454857033379e-06, "loss": 0.1116, "step": 4435 }, { "epoch": 1.4374594944912509, "grad_norm": 0.8022122979164124, "learning_rate": 2.790586194902151e-06, "loss": 0.1163, "step": 4436 }, { "epoch": 1.4377835385612443, "grad_norm": 0.7947648763656616, "learning_rate": 2.789717497205986e-06, "loss": 0.1068, "step": 4437 }, { "epoch": 1.438107582631238, "grad_norm": 0.8844077587127686, "learning_rate": 2.7888487640512046e-06, "loss": 0.1274, "step": 4438 }, { "epoch": 1.4384316267012314, "grad_norm": 0.7734432220458984, "learning_rate": 2.78797999554413e-06, "loss": 0.1074, "step": 4439 }, { "epoch": 1.4387556707712248, "grad_norm": 0.8220635056495667, "learning_rate": 2.787111191791092e-06, "loss": 0.1178, "step": 4440 }, { "epoch": 1.4390797148412184, "grad_norm": 0.7775998115539551, "learning_rate": 2.7862423528984233e-06, "loss": 0.1151, "step": 4441 }, { "epoch": 1.4394037589112119, "grad_norm": 0.7471057772636414, "learning_rate": 2.7853734789724618e-06, "loss": 0.1044, "step": 4442 }, { "epoch": 1.4397278029812055, "grad_norm": 0.9184688925743103, "learning_rate": 2.7845045701195494e-06, "loss": 0.1394, "step": 4443 }, { "epoch": 1.440051847051199, "grad_norm": 0.800000011920929, "learning_rate": 2.7836356264460316e-06, "loss": 0.1167, "step": 4444 }, { "epoch": 1.4403758911211924, "grad_norm": 0.8667371273040771, "learning_rate": 2.7827666480582593e-06, "loss": 0.1202, "step": 4445 }, { "epoch": 1.440699935191186, "grad_norm": 0.7411370873451233, "learning_rate": 2.7818976350625864e-06, "loss": 0.0965, "step": 4446 }, { "epoch": 1.4410239792611796, "grad_norm": 0.8143919706344604, "learning_rate": 2.781028587565372e-06, "loss": 0.1213, "step": 4447 }, { "epoch": 1.441348023331173, "grad_norm": 0.8431907892227173, "learning_rate": 2.780159505672979e-06, "loss": 0.1196, "step": 4448 }, { "epoch": 1.4416720674011665, "grad_norm": 0.7820329666137695, "learning_rate": 2.7792903894917746e-06, "loss": 0.1174, "step": 4449 }, { "epoch": 1.4419961114711601, "grad_norm": 0.8205739855766296, "learning_rate": 2.7784212391281307e-06, "loss": 0.1138, "step": 4450 }, { "epoch": 1.4423201555411536, "grad_norm": 0.9027796983718872, "learning_rate": 2.7775520546884216e-06, "loss": 0.1163, "step": 4451 }, { "epoch": 1.4426441996111472, "grad_norm": 0.8040214776992798, "learning_rate": 2.7766828362790283e-06, "loss": 0.1204, "step": 4452 }, { "epoch": 1.4429682436811406, "grad_norm": 0.8394964933395386, "learning_rate": 2.7758135840063344e-06, "loss": 0.1254, "step": 4453 }, { "epoch": 1.443292287751134, "grad_norm": 0.8752254843711853, "learning_rate": 2.7749442979767276e-06, "loss": 0.1226, "step": 4454 }, { "epoch": 1.4436163318211277, "grad_norm": 0.8331737518310547, "learning_rate": 2.7740749782966016e-06, "loss": 0.1139, "step": 4455 }, { "epoch": 1.4439403758911211, "grad_norm": 0.8385717868804932, "learning_rate": 2.7732056250723505e-06, "loss": 0.1175, "step": 4456 }, { "epoch": 1.4442644199611148, "grad_norm": 0.8438707590103149, "learning_rate": 2.7723362384103757e-06, "loss": 0.1213, "step": 4457 }, { "epoch": 1.4445884640311082, "grad_norm": 0.8456485867500305, "learning_rate": 2.771466818417082e-06, "loss": 0.1257, "step": 4458 }, { "epoch": 1.4449125081011016, "grad_norm": 0.7441908717155457, "learning_rate": 2.7705973651988777e-06, "loss": 0.1059, "step": 4459 }, { "epoch": 1.4452365521710953, "grad_norm": 0.8134219646453857, "learning_rate": 2.769727878862175e-06, "loss": 0.1182, "step": 4460 }, { "epoch": 1.445560596241089, "grad_norm": 0.8193073272705078, "learning_rate": 2.768858359513392e-06, "loss": 0.116, "step": 4461 }, { "epoch": 1.4458846403110823, "grad_norm": 0.9124169945716858, "learning_rate": 2.767988807258948e-06, "loss": 0.1266, "step": 4462 }, { "epoch": 1.4462086843810757, "grad_norm": 0.7838951945304871, "learning_rate": 2.7671192222052685e-06, "loss": 0.1128, "step": 4463 }, { "epoch": 1.4465327284510694, "grad_norm": 0.8049332499504089, "learning_rate": 2.7662496044587817e-06, "loss": 0.1112, "step": 4464 }, { "epoch": 1.4468567725210628, "grad_norm": 0.9545280933380127, "learning_rate": 2.765379954125921e-06, "loss": 0.1239, "step": 4465 }, { "epoch": 1.4471808165910565, "grad_norm": 0.781912088394165, "learning_rate": 2.764510271313123e-06, "loss": 0.1115, "step": 4466 }, { "epoch": 1.4475048606610499, "grad_norm": 0.8536630272865295, "learning_rate": 2.7636405561268286e-06, "loss": 0.1219, "step": 4467 }, { "epoch": 1.4478289047310433, "grad_norm": 0.8723033666610718, "learning_rate": 2.7627708086734827e-06, "loss": 0.1229, "step": 4468 }, { "epoch": 1.448152948801037, "grad_norm": 0.7570147514343262, "learning_rate": 2.7619010290595333e-06, "loss": 0.1005, "step": 4469 }, { "epoch": 1.4484769928710304, "grad_norm": 0.806489109992981, "learning_rate": 2.7610312173914334e-06, "loss": 0.1101, "step": 4470 }, { "epoch": 1.448801036941024, "grad_norm": 0.8391226530075073, "learning_rate": 2.760161373775639e-06, "loss": 0.1176, "step": 4471 }, { "epoch": 1.4491250810110174, "grad_norm": 0.8136021494865417, "learning_rate": 2.7592914983186113e-06, "loss": 0.1177, "step": 4472 }, { "epoch": 1.449449125081011, "grad_norm": 0.8229960203170776, "learning_rate": 2.758421591126814e-06, "loss": 0.1206, "step": 4473 }, { "epoch": 1.4497731691510045, "grad_norm": 0.7568755745887756, "learning_rate": 2.757551652306717e-06, "loss": 0.1093, "step": 4474 }, { "epoch": 1.4500972132209982, "grad_norm": 0.8672506213188171, "learning_rate": 2.7566816819647897e-06, "loss": 0.1248, "step": 4475 }, { "epoch": 1.4504212572909916, "grad_norm": 0.774766206741333, "learning_rate": 2.7558116802075095e-06, "loss": 0.1124, "step": 4476 }, { "epoch": 1.450745301360985, "grad_norm": 0.8504829406738281, "learning_rate": 2.754941647141357e-06, "loss": 0.1218, "step": 4477 }, { "epoch": 1.4510693454309787, "grad_norm": 0.8221288919448853, "learning_rate": 2.754071582872814e-06, "loss": 0.1166, "step": 4478 }, { "epoch": 1.451393389500972, "grad_norm": 0.8883230090141296, "learning_rate": 2.753201487508369e-06, "loss": 0.1232, "step": 4479 }, { "epoch": 1.4517174335709657, "grad_norm": 0.8256147503852844, "learning_rate": 2.7523313611545133e-06, "loss": 0.114, "step": 4480 }, { "epoch": 1.4520414776409591, "grad_norm": 0.8885819911956787, "learning_rate": 2.7514612039177422e-06, "loss": 0.1282, "step": 4481 }, { "epoch": 1.4523655217109526, "grad_norm": 0.8273267149925232, "learning_rate": 2.7505910159045534e-06, "loss": 0.1132, "step": 4482 }, { "epoch": 1.4526895657809462, "grad_norm": 0.8149267435073853, "learning_rate": 2.74972079722145e-06, "loss": 0.1222, "step": 4483 }, { "epoch": 1.4530136098509399, "grad_norm": 0.8353701233863831, "learning_rate": 2.7488505479749395e-06, "loss": 0.1161, "step": 4484 }, { "epoch": 1.4533376539209333, "grad_norm": 0.8027179837226868, "learning_rate": 2.74798026827153e-06, "loss": 0.116, "step": 4485 }, { "epoch": 1.4536616979909267, "grad_norm": 0.8460054993629456, "learning_rate": 2.747109958217737e-06, "loss": 0.1116, "step": 4486 }, { "epoch": 1.4539857420609203, "grad_norm": 0.8322781324386597, "learning_rate": 2.746239617920077e-06, "loss": 0.1108, "step": 4487 }, { "epoch": 1.4543097861309138, "grad_norm": 0.7815658450126648, "learning_rate": 2.745369247485072e-06, "loss": 0.1023, "step": 4488 }, { "epoch": 1.4546338302009074, "grad_norm": 0.7694299221038818, "learning_rate": 2.7444988470192457e-06, "loss": 0.11, "step": 4489 }, { "epoch": 1.4549578742709008, "grad_norm": 0.8041089177131653, "learning_rate": 2.743628416629128e-06, "loss": 0.1098, "step": 4490 }, { "epoch": 1.4552819183408943, "grad_norm": 0.7983806729316711, "learning_rate": 2.7427579564212496e-06, "loss": 0.1143, "step": 4491 }, { "epoch": 1.455605962410888, "grad_norm": 0.7699452638626099, "learning_rate": 2.7418874665021483e-06, "loss": 0.1135, "step": 4492 }, { "epoch": 1.4559300064808813, "grad_norm": 0.8244098424911499, "learning_rate": 2.7410169469783632e-06, "loss": 0.1186, "step": 4493 }, { "epoch": 1.456254050550875, "grad_norm": 0.77878338098526, "learning_rate": 2.7401463979564365e-06, "loss": 0.1096, "step": 4494 }, { "epoch": 1.4565780946208684, "grad_norm": 0.8096309304237366, "learning_rate": 2.7392758195429153e-06, "loss": 0.1079, "step": 4495 }, { "epoch": 1.4569021386908618, "grad_norm": 0.7888034582138062, "learning_rate": 2.73840521184435e-06, "loss": 0.1149, "step": 4496 }, { "epoch": 1.4572261827608555, "grad_norm": 0.8182387351989746, "learning_rate": 2.737534574967295e-06, "loss": 0.1187, "step": 4497 }, { "epoch": 1.4575502268308491, "grad_norm": 0.8907278776168823, "learning_rate": 2.7366639090183076e-06, "loss": 0.1335, "step": 4498 }, { "epoch": 1.4578742709008425, "grad_norm": 0.8672367334365845, "learning_rate": 2.7357932141039494e-06, "loss": 0.1185, "step": 4499 }, { "epoch": 1.458198314970836, "grad_norm": 0.8519613146781921, "learning_rate": 2.7349224903307836e-06, "loss": 0.1175, "step": 4500 }, { "epoch": 1.4585223590408296, "grad_norm": 0.8108680844306946, "learning_rate": 2.734051737805379e-06, "loss": 0.1129, "step": 4501 }, { "epoch": 1.458846403110823, "grad_norm": 0.8352702260017395, "learning_rate": 2.733180956634308e-06, "loss": 0.1165, "step": 4502 }, { "epoch": 1.4591704471808167, "grad_norm": 0.8059620261192322, "learning_rate": 2.7323101469241454e-06, "loss": 0.1156, "step": 4503 }, { "epoch": 1.45949449125081, "grad_norm": 0.8234224319458008, "learning_rate": 2.7314393087814693e-06, "loss": 0.1128, "step": 4504 }, { "epoch": 1.4598185353208035, "grad_norm": 0.7840672135353088, "learning_rate": 2.7305684423128633e-06, "loss": 0.1067, "step": 4505 }, { "epoch": 1.4601425793907972, "grad_norm": 0.8008546233177185, "learning_rate": 2.729697547624911e-06, "loss": 0.1062, "step": 4506 }, { "epoch": 1.4604666234607906, "grad_norm": 0.8446060419082642, "learning_rate": 2.7288266248242025e-06, "loss": 0.1157, "step": 4507 }, { "epoch": 1.4607906675307842, "grad_norm": 0.7840669751167297, "learning_rate": 2.7279556740173306e-06, "loss": 0.1023, "step": 4508 }, { "epoch": 1.4611147116007777, "grad_norm": 0.8466615080833435, "learning_rate": 2.7270846953108913e-06, "loss": 0.1214, "step": 4509 }, { "epoch": 1.4614387556707713, "grad_norm": 0.8785419464111328, "learning_rate": 2.7262136888114833e-06, "loss": 0.1154, "step": 4510 }, { "epoch": 1.4617627997407647, "grad_norm": 0.7884746789932251, "learning_rate": 2.72534265462571e-06, "loss": 0.1046, "step": 4511 }, { "epoch": 1.4620868438107584, "grad_norm": 0.8650237321853638, "learning_rate": 2.7244715928601774e-06, "loss": 0.1162, "step": 4512 }, { "epoch": 1.4624108878807518, "grad_norm": 0.7944271564483643, "learning_rate": 2.723600503621494e-06, "loss": 0.116, "step": 4513 }, { "epoch": 1.4627349319507452, "grad_norm": 0.8509773015975952, "learning_rate": 2.7227293870162742e-06, "loss": 0.1162, "step": 4514 }, { "epoch": 1.4630589760207389, "grad_norm": 0.8748196959495544, "learning_rate": 2.721858243151133e-06, "loss": 0.1258, "step": 4515 }, { "epoch": 1.4633830200907323, "grad_norm": 0.7907067537307739, "learning_rate": 2.7209870721326915e-06, "loss": 0.1078, "step": 4516 }, { "epoch": 1.463707064160726, "grad_norm": 0.8583985567092896, "learning_rate": 2.7201158740675714e-06, "loss": 0.1173, "step": 4517 }, { "epoch": 1.4640311082307194, "grad_norm": 0.8395696878433228, "learning_rate": 2.719244649062399e-06, "loss": 0.1142, "step": 4518 }, { "epoch": 1.4643551523007128, "grad_norm": 0.8139791488647461, "learning_rate": 2.718373397223804e-06, "loss": 0.111, "step": 4519 }, { "epoch": 1.4646791963707064, "grad_norm": 0.8256497979164124, "learning_rate": 2.71750211865842e-06, "loss": 0.1076, "step": 4520 }, { "epoch": 1.4650032404407, "grad_norm": 0.8206305503845215, "learning_rate": 2.7166308134728814e-06, "loss": 0.1139, "step": 4521 }, { "epoch": 1.4653272845106935, "grad_norm": 0.845827043056488, "learning_rate": 2.715759481773828e-06, "loss": 0.115, "step": 4522 }, { "epoch": 1.465651328580687, "grad_norm": 0.7799459099769592, "learning_rate": 2.7148881236679035e-06, "loss": 0.1124, "step": 4523 }, { "epoch": 1.4659753726506806, "grad_norm": 0.8159389495849609, "learning_rate": 2.7140167392617527e-06, "loss": 0.122, "step": 4524 }, { "epoch": 1.466299416720674, "grad_norm": 0.8140308260917664, "learning_rate": 2.7131453286620253e-06, "loss": 0.1178, "step": 4525 }, { "epoch": 1.4666234607906676, "grad_norm": 0.854189932346344, "learning_rate": 2.712273891975372e-06, "loss": 0.1239, "step": 4526 }, { "epoch": 1.466947504860661, "grad_norm": 0.7412453889846802, "learning_rate": 2.7114024293084502e-06, "loss": 0.0979, "step": 4527 }, { "epoch": 1.4672715489306545, "grad_norm": 0.8710846900939941, "learning_rate": 2.710530940767917e-06, "loss": 0.1207, "step": 4528 }, { "epoch": 1.4675955930006481, "grad_norm": 0.7715765237808228, "learning_rate": 2.7096594264604357e-06, "loss": 0.1087, "step": 4529 }, { "epoch": 1.4679196370706415, "grad_norm": 0.8217405080795288, "learning_rate": 2.7087878864926696e-06, "loss": 0.1121, "step": 4530 }, { "epoch": 1.4682436811406352, "grad_norm": 0.8400027751922607, "learning_rate": 2.707916320971288e-06, "loss": 0.1123, "step": 4531 }, { "epoch": 1.4685677252106286, "grad_norm": 0.8444027304649353, "learning_rate": 2.7070447300029607e-06, "loss": 0.1208, "step": 4532 }, { "epoch": 1.468891769280622, "grad_norm": 0.8026435375213623, "learning_rate": 2.706173113694363e-06, "loss": 0.1087, "step": 4533 }, { "epoch": 1.4692158133506157, "grad_norm": 0.8625495433807373, "learning_rate": 2.705301472152172e-06, "loss": 0.121, "step": 4534 }, { "epoch": 1.4695398574206093, "grad_norm": 0.8635041117668152, "learning_rate": 2.7044298054830687e-06, "loss": 0.1159, "step": 4535 }, { "epoch": 1.4698639014906028, "grad_norm": 0.8940765857696533, "learning_rate": 2.703558113793736e-06, "loss": 0.1188, "step": 4536 }, { "epoch": 1.4701879455605962, "grad_norm": 0.816907525062561, "learning_rate": 2.7026863971908607e-06, "loss": 0.1149, "step": 4537 }, { "epoch": 1.4705119896305898, "grad_norm": 0.8096557259559631, "learning_rate": 2.7018146557811325e-06, "loss": 0.1164, "step": 4538 }, { "epoch": 1.4708360337005832, "grad_norm": 0.8419772386550903, "learning_rate": 2.7009428896712443e-06, "loss": 0.1158, "step": 4539 }, { "epoch": 1.471160077770577, "grad_norm": 0.832172155380249, "learning_rate": 2.700071098967892e-06, "loss": 0.1164, "step": 4540 }, { "epoch": 1.4714841218405703, "grad_norm": 0.769187867641449, "learning_rate": 2.699199283777773e-06, "loss": 0.1026, "step": 4541 }, { "epoch": 1.4718081659105637, "grad_norm": 0.8220400810241699, "learning_rate": 2.6983274442075914e-06, "loss": 0.1177, "step": 4542 }, { "epoch": 1.4721322099805574, "grad_norm": 0.8324995040893555, "learning_rate": 2.69745558036405e-06, "loss": 0.1198, "step": 4543 }, { "epoch": 1.4724562540505508, "grad_norm": 0.7806642055511475, "learning_rate": 2.6965836923538568e-06, "loss": 0.1133, "step": 4544 }, { "epoch": 1.4727802981205445, "grad_norm": 0.8086503148078918, "learning_rate": 2.695711780283723e-06, "loss": 0.1151, "step": 4545 }, { "epoch": 1.4731043421905379, "grad_norm": 0.8214067816734314, "learning_rate": 2.694839844260361e-06, "loss": 0.1197, "step": 4546 }, { "epoch": 1.4734283862605313, "grad_norm": 0.9072923064231873, "learning_rate": 2.6939678843904897e-06, "loss": 0.1356, "step": 4547 }, { "epoch": 1.473752430330525, "grad_norm": 0.7834147214889526, "learning_rate": 2.6930959007808268e-06, "loss": 0.1171, "step": 4548 }, { "epoch": 1.4740764744005186, "grad_norm": 1.084670901298523, "learning_rate": 2.6922238935380946e-06, "loss": 0.1128, "step": 4549 }, { "epoch": 1.474400518470512, "grad_norm": 0.8274711966514587, "learning_rate": 2.691351862769018e-06, "loss": 0.1135, "step": 4550 }, { "epoch": 1.4747245625405054, "grad_norm": 0.7669548392295837, "learning_rate": 2.6904798085803276e-06, "loss": 0.1088, "step": 4551 }, { "epoch": 1.475048606610499, "grad_norm": 0.7579964995384216, "learning_rate": 2.689607731078751e-06, "loss": 0.1123, "step": 4552 }, { "epoch": 1.4753726506804925, "grad_norm": 0.8058258891105652, "learning_rate": 2.688735630371024e-06, "loss": 0.1128, "step": 4553 }, { "epoch": 1.4756966947504861, "grad_norm": 0.7897571921348572, "learning_rate": 2.6878635065638843e-06, "loss": 0.1126, "step": 4554 }, { "epoch": 1.4760207388204796, "grad_norm": 0.8144134283065796, "learning_rate": 2.6869913597640686e-06, "loss": 0.1117, "step": 4555 }, { "epoch": 1.476344782890473, "grad_norm": 0.8493690490722656, "learning_rate": 2.6861191900783213e-06, "loss": 0.1187, "step": 4556 }, { "epoch": 1.4766688269604666, "grad_norm": 0.7807652950286865, "learning_rate": 2.685246997613386e-06, "loss": 0.1168, "step": 4557 }, { "epoch": 1.47699287103046, "grad_norm": 0.8303132057189941, "learning_rate": 2.6843747824760125e-06, "loss": 0.1106, "step": 4558 }, { "epoch": 1.4773169151004537, "grad_norm": 0.7779312133789062, "learning_rate": 2.6835025447729495e-06, "loss": 0.112, "step": 4559 }, { "epoch": 1.4776409591704471, "grad_norm": 0.7840691804885864, "learning_rate": 2.682630284610953e-06, "loss": 0.1129, "step": 4560 }, { "epoch": 1.4779650032404408, "grad_norm": 0.8038629293441772, "learning_rate": 2.6817580020967767e-06, "loss": 0.1118, "step": 4561 }, { "epoch": 1.4782890473104342, "grad_norm": 0.7459808588027954, "learning_rate": 2.680885697337181e-06, "loss": 0.1016, "step": 4562 }, { "epoch": 1.4786130913804278, "grad_norm": 0.8536557555198669, "learning_rate": 2.6800133704389263e-06, "loss": 0.1199, "step": 4563 }, { "epoch": 1.4789371354504213, "grad_norm": 0.8047173023223877, "learning_rate": 2.6791410215087783e-06, "loss": 0.1109, "step": 4564 }, { "epoch": 1.4792611795204147, "grad_norm": 0.7865987420082092, "learning_rate": 2.678268650653503e-06, "loss": 0.1076, "step": 4565 }, { "epoch": 1.4795852235904083, "grad_norm": 0.9128715991973877, "learning_rate": 2.6773962579798713e-06, "loss": 0.124, "step": 4566 }, { "epoch": 1.4799092676604018, "grad_norm": 0.8672195076942444, "learning_rate": 2.6765238435946543e-06, "loss": 0.1202, "step": 4567 }, { "epoch": 1.4802333117303954, "grad_norm": 0.7601926922798157, "learning_rate": 2.675651407604628e-06, "loss": 0.1028, "step": 4568 }, { "epoch": 1.4805573558003888, "grad_norm": 0.7621402144432068, "learning_rate": 2.67477895011657e-06, "loss": 0.1144, "step": 4569 }, { "epoch": 1.4808813998703823, "grad_norm": 0.8962377309799194, "learning_rate": 2.6739064712372596e-06, "loss": 0.1272, "step": 4570 }, { "epoch": 1.481205443940376, "grad_norm": 0.803502082824707, "learning_rate": 2.6730339710734815e-06, "loss": 0.1136, "step": 4571 }, { "epoch": 1.4815294880103695, "grad_norm": 0.7923281192779541, "learning_rate": 2.672161449732021e-06, "loss": 0.1106, "step": 4572 }, { "epoch": 1.481853532080363, "grad_norm": 0.8307749032974243, "learning_rate": 2.671288907319666e-06, "loss": 0.1184, "step": 4573 }, { "epoch": 1.4821775761503564, "grad_norm": 0.8426263332366943, "learning_rate": 2.670416343943205e-06, "loss": 0.1167, "step": 4574 }, { "epoch": 1.48250162022035, "grad_norm": 0.7670769691467285, "learning_rate": 2.669543759709434e-06, "loss": 0.1101, "step": 4575 }, { "epoch": 1.4828256642903435, "grad_norm": 0.7515110969543457, "learning_rate": 2.668671154725149e-06, "loss": 0.1086, "step": 4576 }, { "epoch": 1.483149708360337, "grad_norm": 0.8248686790466309, "learning_rate": 2.6677985290971464e-06, "loss": 0.1147, "step": 4577 }, { "epoch": 1.4834737524303305, "grad_norm": 0.7573780417442322, "learning_rate": 2.666925882932229e-06, "loss": 0.1088, "step": 4578 }, { "epoch": 1.483797796500324, "grad_norm": 0.7721401453018188, "learning_rate": 2.6660532163371995e-06, "loss": 0.1085, "step": 4579 }, { "epoch": 1.4841218405703176, "grad_norm": 0.7526261806488037, "learning_rate": 2.665180529418863e-06, "loss": 0.1062, "step": 4580 }, { "epoch": 1.484445884640311, "grad_norm": 0.8702643513679504, "learning_rate": 2.6643078222840295e-06, "loss": 0.125, "step": 4581 }, { "epoch": 1.4847699287103047, "grad_norm": 0.8288251161575317, "learning_rate": 2.6634350950395096e-06, "loss": 0.118, "step": 4582 }, { "epoch": 1.485093972780298, "grad_norm": 0.8676289319992065, "learning_rate": 2.662562347792116e-06, "loss": 0.1221, "step": 4583 }, { "epoch": 1.4854180168502915, "grad_norm": 0.8323357105255127, "learning_rate": 2.6616895806486644e-06, "loss": 0.1181, "step": 4584 }, { "epoch": 1.4857420609202852, "grad_norm": 0.793228268623352, "learning_rate": 2.6608167937159735e-06, "loss": 0.113, "step": 4585 }, { "epoch": 1.4860661049902788, "grad_norm": 0.7849512100219727, "learning_rate": 2.6599439871008636e-06, "loss": 0.1086, "step": 4586 }, { "epoch": 1.4863901490602722, "grad_norm": 0.7768422961235046, "learning_rate": 2.659071160910158e-06, "loss": 0.104, "step": 4587 }, { "epoch": 1.4867141931302656, "grad_norm": 0.817353367805481, "learning_rate": 2.6581983152506825e-06, "loss": 0.1171, "step": 4588 }, { "epoch": 1.4870382372002593, "grad_norm": 0.8006489872932434, "learning_rate": 2.6573254502292644e-06, "loss": 0.1126, "step": 4589 }, { "epoch": 1.4873622812702527, "grad_norm": 0.782650887966156, "learning_rate": 2.656452565952735e-06, "loss": 0.1123, "step": 4590 }, { "epoch": 1.4876863253402464, "grad_norm": 0.8069577217102051, "learning_rate": 2.6555796625279257e-06, "loss": 0.1129, "step": 4591 }, { "epoch": 1.4880103694102398, "grad_norm": 0.7839584350585938, "learning_rate": 2.6547067400616717e-06, "loss": 0.1076, "step": 4592 }, { "epoch": 1.4883344134802332, "grad_norm": 0.8803831338882446, "learning_rate": 2.6538337986608105e-06, "loss": 0.1188, "step": 4593 }, { "epoch": 1.4886584575502269, "grad_norm": 0.7832203507423401, "learning_rate": 2.6529608384321815e-06, "loss": 0.1104, "step": 4594 }, { "epoch": 1.4889825016202203, "grad_norm": 0.8302662968635559, "learning_rate": 2.6520878594826268e-06, "loss": 0.1216, "step": 4595 }, { "epoch": 1.489306545690214, "grad_norm": 0.8601539134979248, "learning_rate": 2.651214861918991e-06, "loss": 0.1208, "step": 4596 }, { "epoch": 1.4896305897602073, "grad_norm": 0.7589988708496094, "learning_rate": 2.6503418458481188e-06, "loss": 0.106, "step": 4597 }, { "epoch": 1.4899546338302008, "grad_norm": 0.8155503273010254, "learning_rate": 2.649468811376861e-06, "loss": 0.1228, "step": 4598 }, { "epoch": 1.4902786779001944, "grad_norm": 0.8368939161300659, "learning_rate": 2.6485957586120664e-06, "loss": 0.1175, "step": 4599 }, { "epoch": 1.490602721970188, "grad_norm": 0.7997182011604309, "learning_rate": 2.6477226876605903e-06, "loss": 0.1142, "step": 4600 }, { "epoch": 1.4909267660401815, "grad_norm": 0.8174132704734802, "learning_rate": 2.646849598629287e-06, "loss": 0.1163, "step": 4601 }, { "epoch": 1.491250810110175, "grad_norm": 0.8421667814254761, "learning_rate": 2.645976491625015e-06, "loss": 0.1193, "step": 4602 }, { "epoch": 1.4915748541801686, "grad_norm": 0.8165988326072693, "learning_rate": 2.645103366754633e-06, "loss": 0.1169, "step": 4603 }, { "epoch": 1.491898898250162, "grad_norm": 0.8092917203903198, "learning_rate": 2.6442302241250047e-06, "loss": 0.1065, "step": 4604 }, { "epoch": 1.4922229423201556, "grad_norm": 0.8170256614685059, "learning_rate": 2.6433570638429923e-06, "loss": 0.1088, "step": 4605 }, { "epoch": 1.492546986390149, "grad_norm": 0.805540919303894, "learning_rate": 2.6424838860154633e-06, "loss": 0.1164, "step": 4606 }, { "epoch": 1.4928710304601425, "grad_norm": 0.9216932058334351, "learning_rate": 2.641610690749286e-06, "loss": 0.1325, "step": 4607 }, { "epoch": 1.4931950745301361, "grad_norm": 0.7840494513511658, "learning_rate": 2.640737478151331e-06, "loss": 0.1074, "step": 4608 }, { "epoch": 1.4935191186001295, "grad_norm": 0.9483461380004883, "learning_rate": 2.6398642483284716e-06, "loss": 0.1233, "step": 4609 }, { "epoch": 1.4938431626701232, "grad_norm": 0.8062817454338074, "learning_rate": 2.6389910013875814e-06, "loss": 0.1109, "step": 4610 }, { "epoch": 1.4941672067401166, "grad_norm": 0.7685247659683228, "learning_rate": 2.638117737435538e-06, "loss": 0.1091, "step": 4611 }, { "epoch": 1.4944912508101102, "grad_norm": 0.8100350499153137, "learning_rate": 2.637244456579221e-06, "loss": 0.1165, "step": 4612 }, { "epoch": 1.4948152948801037, "grad_norm": 0.7939400672912598, "learning_rate": 2.6363711589255115e-06, "loss": 0.1099, "step": 4613 }, { "epoch": 1.4951393389500973, "grad_norm": 0.8388355374336243, "learning_rate": 2.6354978445812923e-06, "loss": 0.116, "step": 4614 }, { "epoch": 1.4954633830200907, "grad_norm": 0.7846883535385132, "learning_rate": 2.6346245136534483e-06, "loss": 0.111, "step": 4615 }, { "epoch": 1.4957874270900842, "grad_norm": 0.853891134262085, "learning_rate": 2.6337511662488678e-06, "loss": 0.1192, "step": 4616 }, { "epoch": 1.4961114711600778, "grad_norm": 0.7976066470146179, "learning_rate": 2.6328778024744384e-06, "loss": 0.1149, "step": 4617 }, { "epoch": 1.4964355152300712, "grad_norm": 0.7755050659179688, "learning_rate": 2.6320044224370526e-06, "loss": 0.1081, "step": 4618 }, { "epoch": 1.4967595593000649, "grad_norm": 0.9004369974136353, "learning_rate": 2.6311310262436035e-06, "loss": 0.1294, "step": 4619 }, { "epoch": 1.4970836033700583, "grad_norm": 0.8410223722457886, "learning_rate": 2.6302576140009866e-06, "loss": 0.1154, "step": 4620 }, { "epoch": 1.4974076474400517, "grad_norm": 0.7805401682853699, "learning_rate": 2.6293841858160983e-06, "loss": 0.1081, "step": 4621 }, { "epoch": 1.4977316915100454, "grad_norm": 0.7736295461654663, "learning_rate": 2.6285107417958385e-06, "loss": 0.1062, "step": 4622 }, { "epoch": 1.498055735580039, "grad_norm": 0.8259331583976746, "learning_rate": 2.6276372820471073e-06, "loss": 0.114, "step": 4623 }, { "epoch": 1.4983797796500324, "grad_norm": 0.8400769829750061, "learning_rate": 2.6267638066768087e-06, "loss": 0.1187, "step": 4624 }, { "epoch": 1.4987038237200259, "grad_norm": 0.8498058319091797, "learning_rate": 2.625890315791848e-06, "loss": 0.1117, "step": 4625 }, { "epoch": 1.4990278677900195, "grad_norm": 0.7712481021881104, "learning_rate": 2.625016809499131e-06, "loss": 0.1056, "step": 4626 }, { "epoch": 1.499351911860013, "grad_norm": 0.7691459059715271, "learning_rate": 2.6241432879055667e-06, "loss": 0.1115, "step": 4627 }, { "epoch": 1.4996759559300066, "grad_norm": 0.8329920768737793, "learning_rate": 2.6232697511180654e-06, "loss": 0.1141, "step": 4628 }, { "epoch": 1.5, "grad_norm": 0.8468537926673889, "learning_rate": 2.6223961992435406e-06, "loss": 0.1103, "step": 4629 }, { "epoch": 1.5003240440699934, "grad_norm": 0.782757580280304, "learning_rate": 2.6215226323889048e-06, "loss": 0.1064, "step": 4630 }, { "epoch": 1.500648088139987, "grad_norm": 0.8492832779884338, "learning_rate": 2.620649050661076e-06, "loss": 0.1166, "step": 4631 }, { "epoch": 1.5009721322099807, "grad_norm": 0.8164941668510437, "learning_rate": 2.6197754541669714e-06, "loss": 0.1172, "step": 4632 }, { "epoch": 1.5012961762799741, "grad_norm": 0.8901158571243286, "learning_rate": 2.6189018430135106e-06, "loss": 0.1259, "step": 4633 }, { "epoch": 1.5016202203499676, "grad_norm": 0.7792893052101135, "learning_rate": 2.6180282173076156e-06, "loss": 0.1082, "step": 4634 }, { "epoch": 1.501944264419961, "grad_norm": 0.8170306086540222, "learning_rate": 2.6171545771562085e-06, "loss": 0.1148, "step": 4635 }, { "epoch": 1.5022683084899546, "grad_norm": 0.8025646209716797, "learning_rate": 2.6162809226662167e-06, "loss": 0.1127, "step": 4636 }, { "epoch": 1.5025923525599483, "grad_norm": 0.8521740436553955, "learning_rate": 2.6154072539445645e-06, "loss": 0.1278, "step": 4637 }, { "epoch": 1.5029163966299417, "grad_norm": 0.7798749208450317, "learning_rate": 2.6145335710981817e-06, "loss": 0.1058, "step": 4638 }, { "epoch": 1.5032404406999351, "grad_norm": 0.8448235988616943, "learning_rate": 2.613659874233999e-06, "loss": 0.1181, "step": 4639 }, { "epoch": 1.5035644847699285, "grad_norm": 0.8113897442817688, "learning_rate": 2.612786163458948e-06, "loss": 0.1179, "step": 4640 }, { "epoch": 1.5038885288399222, "grad_norm": 0.9807034730911255, "learning_rate": 2.611912438879962e-06, "loss": 0.1293, "step": 4641 }, { "epoch": 1.5042125729099158, "grad_norm": 0.753908097743988, "learning_rate": 2.611038700603977e-06, "loss": 0.109, "step": 4642 }, { "epoch": 1.5045366169799093, "grad_norm": 0.8391751050949097, "learning_rate": 2.6101649487379304e-06, "loss": 0.1214, "step": 4643 }, { "epoch": 1.5048606610499027, "grad_norm": 0.9241876006126404, "learning_rate": 2.6092911833887602e-06, "loss": 0.1219, "step": 4644 }, { "epoch": 1.5051847051198963, "grad_norm": 0.7995864748954773, "learning_rate": 2.6084174046634075e-06, "loss": 0.1159, "step": 4645 }, { "epoch": 1.50550874918989, "grad_norm": 0.7828188538551331, "learning_rate": 2.607543612668814e-06, "loss": 0.1143, "step": 4646 }, { "epoch": 1.5058327932598834, "grad_norm": 0.7762099504470825, "learning_rate": 2.6066698075119237e-06, "loss": 0.1087, "step": 4647 }, { "epoch": 1.5061568373298768, "grad_norm": 0.8002234101295471, "learning_rate": 2.605795989299681e-06, "loss": 0.1118, "step": 4648 }, { "epoch": 1.5064808813998702, "grad_norm": 0.800934910774231, "learning_rate": 2.604922158139033e-06, "loss": 0.1068, "step": 4649 }, { "epoch": 1.5068049254698639, "grad_norm": 0.8457976579666138, "learning_rate": 2.6040483141369293e-06, "loss": 0.1187, "step": 4650 }, { "epoch": 1.5071289695398575, "grad_norm": 0.8208997249603271, "learning_rate": 2.603174457400319e-06, "loss": 0.115, "step": 4651 }, { "epoch": 1.507453013609851, "grad_norm": 0.7759538292884827, "learning_rate": 2.602300588036154e-06, "loss": 0.104, "step": 4652 }, { "epoch": 1.5077770576798444, "grad_norm": 0.8319365978240967, "learning_rate": 2.6014267061513875e-06, "loss": 0.1132, "step": 4653 }, { "epoch": 1.508101101749838, "grad_norm": 0.8539026975631714, "learning_rate": 2.6005528118529738e-06, "loss": 0.1196, "step": 4654 }, { "epoch": 1.5084251458198314, "grad_norm": 0.8303791880607605, "learning_rate": 2.5996789052478693e-06, "loss": 0.1211, "step": 4655 }, { "epoch": 1.508749189889825, "grad_norm": 0.807961642742157, "learning_rate": 2.5988049864430314e-06, "loss": 0.1053, "step": 4656 }, { "epoch": 1.5090732339598185, "grad_norm": 0.7976993322372437, "learning_rate": 2.597931055545421e-06, "loss": 0.1118, "step": 4657 }, { "epoch": 1.509397278029812, "grad_norm": 0.8033688068389893, "learning_rate": 2.597057112661997e-06, "loss": 0.1179, "step": 4658 }, { "epoch": 1.5097213220998056, "grad_norm": 0.798485517501831, "learning_rate": 2.5961831578997214e-06, "loss": 0.1162, "step": 4659 }, { "epoch": 1.5100453661697992, "grad_norm": 0.79729825258255, "learning_rate": 2.5953091913655586e-06, "loss": 0.1147, "step": 4660 }, { "epoch": 1.5103694102397927, "grad_norm": 0.8427738547325134, "learning_rate": 2.594435213166473e-06, "loss": 0.1186, "step": 4661 }, { "epoch": 1.510693454309786, "grad_norm": 0.7964630126953125, "learning_rate": 2.593561223409432e-06, "loss": 0.1198, "step": 4662 }, { "epoch": 1.5110174983797795, "grad_norm": 0.7886890172958374, "learning_rate": 2.592687222201403e-06, "loss": 0.1071, "step": 4663 }, { "epoch": 1.5113415424497731, "grad_norm": 0.8665766716003418, "learning_rate": 2.5918132096493552e-06, "loss": 0.124, "step": 4664 }, { "epoch": 1.5116655865197668, "grad_norm": 0.7874413728713989, "learning_rate": 2.5909391858602596e-06, "loss": 0.1137, "step": 4665 }, { "epoch": 1.5119896305897602, "grad_norm": 0.7725497484207153, "learning_rate": 2.5900651509410875e-06, "loss": 0.1111, "step": 4666 }, { "epoch": 1.5123136746597536, "grad_norm": 0.8019454479217529, "learning_rate": 2.5891911049988133e-06, "loss": 0.1153, "step": 4667 }, { "epoch": 1.5126377187297473, "grad_norm": 0.8188468813896179, "learning_rate": 2.5883170481404112e-06, "loss": 0.1121, "step": 4668 }, { "epoch": 1.512961762799741, "grad_norm": 0.788652241230011, "learning_rate": 2.587442980472858e-06, "loss": 0.1109, "step": 4669 }, { "epoch": 1.5132858068697344, "grad_norm": 0.8207998275756836, "learning_rate": 2.5865689021031292e-06, "loss": 0.108, "step": 4670 }, { "epoch": 1.5136098509397278, "grad_norm": 0.868406355381012, "learning_rate": 2.5856948131382055e-06, "loss": 0.1246, "step": 4671 }, { "epoch": 1.5139338950097212, "grad_norm": 0.7927680611610413, "learning_rate": 2.584820713685066e-06, "loss": 0.11, "step": 4672 }, { "epoch": 1.5142579390797148, "grad_norm": 0.7637700438499451, "learning_rate": 2.5839466038506927e-06, "loss": 0.1093, "step": 4673 }, { "epoch": 1.5145819831497085, "grad_norm": 0.875170111656189, "learning_rate": 2.5830724837420675e-06, "loss": 0.1272, "step": 4674 }, { "epoch": 1.514906027219702, "grad_norm": 0.8147414326667786, "learning_rate": 2.582198353466175e-06, "loss": 0.1123, "step": 4675 }, { "epoch": 1.5152300712896953, "grad_norm": 0.8192980885505676, "learning_rate": 2.5813242131299986e-06, "loss": 0.1124, "step": 4676 }, { "epoch": 1.5155541153596888, "grad_norm": 0.8011279106140137, "learning_rate": 2.5804500628405265e-06, "loss": 0.1178, "step": 4677 }, { "epoch": 1.5158781594296824, "grad_norm": 0.8356201648712158, "learning_rate": 2.5795759027047457e-06, "loss": 0.1149, "step": 4678 }, { "epoch": 1.516202203499676, "grad_norm": 0.8754128217697144, "learning_rate": 2.578701732829645e-06, "loss": 0.1221, "step": 4679 }, { "epoch": 1.5165262475696695, "grad_norm": 0.7901120781898499, "learning_rate": 2.5778275533222135e-06, "loss": 0.1132, "step": 4680 }, { "epoch": 1.516850291639663, "grad_norm": 0.8232377767562866, "learning_rate": 2.5769533642894433e-06, "loss": 0.1104, "step": 4681 }, { "epoch": 1.5171743357096565, "grad_norm": 0.8965370655059814, "learning_rate": 2.576079165838326e-06, "loss": 0.125, "step": 4682 }, { "epoch": 1.5174983797796502, "grad_norm": 0.830436646938324, "learning_rate": 2.5752049580758555e-06, "loss": 0.1243, "step": 4683 }, { "epoch": 1.5178224238496436, "grad_norm": 0.8122559785842896, "learning_rate": 2.5743307411090255e-06, "loss": 0.1151, "step": 4684 }, { "epoch": 1.518146467919637, "grad_norm": 0.7352414727210999, "learning_rate": 2.5734565150448325e-06, "loss": 0.1014, "step": 4685 }, { "epoch": 1.5184705119896305, "grad_norm": 0.8098716139793396, "learning_rate": 2.5725822799902738e-06, "loss": 0.1172, "step": 4686 }, { "epoch": 1.518794556059624, "grad_norm": 0.8782811760902405, "learning_rate": 2.5717080360523464e-06, "loss": 0.1174, "step": 4687 }, { "epoch": 1.5191186001296177, "grad_norm": 0.7974719405174255, "learning_rate": 2.57083378333805e-06, "loss": 0.1174, "step": 4688 }, { "epoch": 1.5194426441996112, "grad_norm": 0.8503957986831665, "learning_rate": 2.5699595219543838e-06, "loss": 0.1233, "step": 4689 }, { "epoch": 1.5197666882696046, "grad_norm": 0.7533683776855469, "learning_rate": 2.5690852520083496e-06, "loss": 0.1092, "step": 4690 }, { "epoch": 1.5200907323395982, "grad_norm": 0.8622933030128479, "learning_rate": 2.5682109736069492e-06, "loss": 0.1289, "step": 4691 }, { "epoch": 1.5204147764095917, "grad_norm": 0.7616762518882751, "learning_rate": 2.5673366868571858e-06, "loss": 0.1098, "step": 4692 }, { "epoch": 1.5207388204795853, "grad_norm": 0.8017957806587219, "learning_rate": 2.566462391866064e-06, "loss": 0.1185, "step": 4693 }, { "epoch": 1.5210628645495787, "grad_norm": 0.8263230323791504, "learning_rate": 2.5655880887405893e-06, "loss": 0.12, "step": 4694 }, { "epoch": 1.5213869086195722, "grad_norm": 0.7774035930633545, "learning_rate": 2.564713777587767e-06, "loss": 0.111, "step": 4695 }, { "epoch": 1.5217109526895658, "grad_norm": 0.7920206189155579, "learning_rate": 2.5638394585146044e-06, "loss": 0.1148, "step": 4696 }, { "epoch": 1.5220349967595594, "grad_norm": 0.8174133896827698, "learning_rate": 2.56296513162811e-06, "loss": 0.115, "step": 4697 }, { "epoch": 1.5223590408295529, "grad_norm": 0.8442100882530212, "learning_rate": 2.5620907970352937e-06, "loss": 0.118, "step": 4698 }, { "epoch": 1.5226830848995463, "grad_norm": 0.7956017255783081, "learning_rate": 2.561216454843165e-06, "loss": 0.1117, "step": 4699 }, { "epoch": 1.5230071289695397, "grad_norm": 0.850167453289032, "learning_rate": 2.5603421051587344e-06, "loss": 0.1192, "step": 4700 }, { "epoch": 1.5233311730395334, "grad_norm": 0.8265661001205444, "learning_rate": 2.5594677480890152e-06, "loss": 0.1171, "step": 4701 }, { "epoch": 1.523655217109527, "grad_norm": 0.8158977031707764, "learning_rate": 2.558593383741018e-06, "loss": 0.1166, "step": 4702 }, { "epoch": 1.5239792611795204, "grad_norm": 0.8036544322967529, "learning_rate": 2.5577190122217583e-06, "loss": 0.1111, "step": 4703 }, { "epoch": 1.5243033052495139, "grad_norm": 0.7271086573600769, "learning_rate": 2.55684463363825e-06, "loss": 0.1089, "step": 4704 }, { "epoch": 1.5246273493195075, "grad_norm": 0.8072844743728638, "learning_rate": 2.5559702480975094e-06, "loss": 0.1158, "step": 4705 }, { "epoch": 1.524951393389501, "grad_norm": 0.8877519369125366, "learning_rate": 2.5550958557065523e-06, "loss": 0.1264, "step": 4706 }, { "epoch": 1.5252754374594946, "grad_norm": 0.7614017724990845, "learning_rate": 2.554221456572396e-06, "loss": 0.1148, "step": 4707 }, { "epoch": 1.525599481529488, "grad_norm": 0.8140348196029663, "learning_rate": 2.553347050802058e-06, "loss": 0.1167, "step": 4708 }, { "epoch": 1.5259235255994814, "grad_norm": 0.8409748077392578, "learning_rate": 2.552472638502557e-06, "loss": 0.1194, "step": 4709 }, { "epoch": 1.526247569669475, "grad_norm": 0.9039139151573181, "learning_rate": 2.5515982197809142e-06, "loss": 0.1203, "step": 4710 }, { "epoch": 1.5265716137394687, "grad_norm": 0.8296209573745728, "learning_rate": 2.5507237947441478e-06, "loss": 0.1166, "step": 4711 }, { "epoch": 1.5268956578094621, "grad_norm": 0.9392574429512024, "learning_rate": 2.5498493634992803e-06, "loss": 0.1247, "step": 4712 }, { "epoch": 1.5272197018794555, "grad_norm": 0.8392185568809509, "learning_rate": 2.5489749261533333e-06, "loss": 0.1126, "step": 4713 }, { "epoch": 1.527543745949449, "grad_norm": 0.8599982857704163, "learning_rate": 2.548100482813329e-06, "loss": 0.1168, "step": 4714 }, { "epoch": 1.5278677900194426, "grad_norm": 0.8504279851913452, "learning_rate": 2.5472260335862915e-06, "loss": 0.117, "step": 4715 }, { "epoch": 1.5281918340894363, "grad_norm": 0.7987934947013855, "learning_rate": 2.546351578579245e-06, "loss": 0.108, "step": 4716 }, { "epoch": 1.5285158781594297, "grad_norm": 0.7965323328971863, "learning_rate": 2.545477117899213e-06, "loss": 0.118, "step": 4717 }, { "epoch": 1.528839922229423, "grad_norm": 0.9307451844215393, "learning_rate": 2.5446026516532235e-06, "loss": 0.1273, "step": 4718 }, { "epoch": 1.5291639662994168, "grad_norm": 0.8149518966674805, "learning_rate": 2.5437281799483005e-06, "loss": 0.1226, "step": 4719 }, { "epoch": 1.5294880103694104, "grad_norm": 0.8553066849708557, "learning_rate": 2.542853702891471e-06, "loss": 0.1218, "step": 4720 }, { "epoch": 1.5298120544394038, "grad_norm": 0.9042952656745911, "learning_rate": 2.541979220589765e-06, "loss": 0.1204, "step": 4721 }, { "epoch": 1.5301360985093972, "grad_norm": 0.7949498295783997, "learning_rate": 2.541104733150207e-06, "loss": 0.1123, "step": 4722 }, { "epoch": 1.5304601425793907, "grad_norm": 0.8241522312164307, "learning_rate": 2.540230240679828e-06, "loss": 0.1149, "step": 4723 }, { "epoch": 1.5307841866493843, "grad_norm": 0.7348130941390991, "learning_rate": 2.5393557432856575e-06, "loss": 0.1083, "step": 4724 }, { "epoch": 1.531108230719378, "grad_norm": 0.8246307969093323, "learning_rate": 2.5384812410747244e-06, "loss": 0.1158, "step": 4725 }, { "epoch": 1.5314322747893714, "grad_norm": 0.8028252720832825, "learning_rate": 2.53760673415406e-06, "loss": 0.1165, "step": 4726 }, { "epoch": 1.5317563188593648, "grad_norm": 0.76541668176651, "learning_rate": 2.5367322226306956e-06, "loss": 0.1147, "step": 4727 }, { "epoch": 1.5320803629293582, "grad_norm": 0.8166850805282593, "learning_rate": 2.5358577066116622e-06, "loss": 0.1162, "step": 4728 }, { "epoch": 1.5324044069993519, "grad_norm": 0.7746853232383728, "learning_rate": 2.534983186203993e-06, "loss": 0.1112, "step": 4729 }, { "epoch": 1.5327284510693455, "grad_norm": 0.8185518383979797, "learning_rate": 2.5341086615147207e-06, "loss": 0.1201, "step": 4730 }, { "epoch": 1.533052495139339, "grad_norm": 0.7633078694343567, "learning_rate": 2.5332341326508786e-06, "loss": 0.1064, "step": 4731 }, { "epoch": 1.5333765392093324, "grad_norm": 0.7728112936019897, "learning_rate": 2.5323595997195005e-06, "loss": 0.1065, "step": 4732 }, { "epoch": 1.533700583279326, "grad_norm": 0.8007860779762268, "learning_rate": 2.53148506282762e-06, "loss": 0.1181, "step": 4733 }, { "epoch": 1.5340246273493197, "grad_norm": 0.8603343367576599, "learning_rate": 2.530610522082273e-06, "loss": 0.1144, "step": 4734 }, { "epoch": 1.534348671419313, "grad_norm": 0.8246845006942749, "learning_rate": 2.529735977590494e-06, "loss": 0.1179, "step": 4735 }, { "epoch": 1.5346727154893065, "grad_norm": 0.8384710550308228, "learning_rate": 2.52886142945932e-06, "loss": 0.1242, "step": 4736 }, { "epoch": 1.5349967595593, "grad_norm": 0.7882184386253357, "learning_rate": 2.527986877795786e-06, "loss": 0.108, "step": 4737 }, { "epoch": 1.5353208036292936, "grad_norm": 0.8187207579612732, "learning_rate": 2.527112322706929e-06, "loss": 0.1177, "step": 4738 }, { "epoch": 1.5356448476992872, "grad_norm": 0.8378166556358337, "learning_rate": 2.526237764299786e-06, "loss": 0.1099, "step": 4739 }, { "epoch": 1.5359688917692806, "grad_norm": 0.7764092683792114, "learning_rate": 2.5253632026813945e-06, "loss": 0.1131, "step": 4740 }, { "epoch": 1.536292935839274, "grad_norm": 0.8988218307495117, "learning_rate": 2.524488637958793e-06, "loss": 0.1217, "step": 4741 }, { "epoch": 1.5366169799092677, "grad_norm": 0.7971240878105164, "learning_rate": 2.5236140702390194e-06, "loss": 0.1072, "step": 4742 }, { "epoch": 1.5369410239792611, "grad_norm": 0.7888842225074768, "learning_rate": 2.522739499629112e-06, "loss": 0.1167, "step": 4743 }, { "epoch": 1.5372650680492548, "grad_norm": 0.8259699940681458, "learning_rate": 2.5218649262361104e-06, "loss": 0.1183, "step": 4744 }, { "epoch": 1.5375891121192482, "grad_norm": 0.8481448888778687, "learning_rate": 2.520990350167053e-06, "loss": 0.1183, "step": 4745 }, { "epoch": 1.5379131561892416, "grad_norm": 0.7942140102386475, "learning_rate": 2.5201157715289796e-06, "loss": 0.1144, "step": 4746 }, { "epoch": 1.5382372002592353, "grad_norm": 0.8272114992141724, "learning_rate": 2.519241190428931e-06, "loss": 0.1237, "step": 4747 }, { "epoch": 1.538561244329229, "grad_norm": 0.841238260269165, "learning_rate": 2.518366606973947e-06, "loss": 0.1244, "step": 4748 }, { "epoch": 1.5388852883992223, "grad_norm": 0.8029258251190186, "learning_rate": 2.517492021271068e-06, "loss": 0.1112, "step": 4749 }, { "epoch": 1.5392093324692158, "grad_norm": 0.7682906985282898, "learning_rate": 2.5166174334273347e-06, "loss": 0.1106, "step": 4750 }, { "epoch": 1.5395333765392092, "grad_norm": 0.7744021415710449, "learning_rate": 2.5157428435497887e-06, "loss": 0.1129, "step": 4751 }, { "epoch": 1.5398574206092028, "grad_norm": 0.810088038444519, "learning_rate": 2.5148682517454707e-06, "loss": 0.1172, "step": 4752 }, { "epoch": 1.5401814646791965, "grad_norm": 0.8204680681228638, "learning_rate": 2.5139936581214235e-06, "loss": 0.1038, "step": 4753 }, { "epoch": 1.54050550874919, "grad_norm": 0.8239283561706543, "learning_rate": 2.5131190627846875e-06, "loss": 0.1216, "step": 4754 }, { "epoch": 1.5408295528191833, "grad_norm": 0.8344904780387878, "learning_rate": 2.512244465842305e-06, "loss": 0.1204, "step": 4755 }, { "epoch": 1.541153596889177, "grad_norm": 0.830840528011322, "learning_rate": 2.5113698674013186e-06, "loss": 0.1169, "step": 4756 }, { "epoch": 1.5414776409591704, "grad_norm": 0.8880487084388733, "learning_rate": 2.5104952675687706e-06, "loss": 0.1265, "step": 4757 }, { "epoch": 1.541801685029164, "grad_norm": 0.8078792095184326, "learning_rate": 2.509620666451703e-06, "loss": 0.1146, "step": 4758 }, { "epoch": 1.5421257290991575, "grad_norm": 0.8384366631507874, "learning_rate": 2.5087460641571594e-06, "loss": 0.1212, "step": 4759 }, { "epoch": 1.5424497731691509, "grad_norm": 0.8105331063270569, "learning_rate": 2.5078714607921825e-06, "loss": 0.1169, "step": 4760 }, { "epoch": 1.5427738172391445, "grad_norm": 0.8387223482131958, "learning_rate": 2.506996856463814e-06, "loss": 0.1172, "step": 4761 }, { "epoch": 1.5430978613091382, "grad_norm": 0.9171087741851807, "learning_rate": 2.506122251279099e-06, "loss": 0.1261, "step": 4762 }, { "epoch": 1.5434219053791316, "grad_norm": 0.8017227053642273, "learning_rate": 2.5052476453450788e-06, "loss": 0.1142, "step": 4763 }, { "epoch": 1.543745949449125, "grad_norm": 0.8722697496414185, "learning_rate": 2.504373038768799e-06, "loss": 0.127, "step": 4764 }, { "epoch": 1.5440699935191184, "grad_norm": 0.8668619394302368, "learning_rate": 2.5034984316573003e-06, "loss": 0.1272, "step": 4765 }, { "epoch": 1.544394037589112, "grad_norm": 0.8164231777191162, "learning_rate": 2.5026238241176283e-06, "loss": 0.1133, "step": 4766 }, { "epoch": 1.5447180816591057, "grad_norm": 0.8477160930633545, "learning_rate": 2.5017492162568246e-06, "loss": 0.1191, "step": 4767 }, { "epoch": 1.5450421257290992, "grad_norm": 0.9103198647499084, "learning_rate": 2.5008746081819345e-06, "loss": 0.1168, "step": 4768 }, { "epoch": 1.5453661697990926, "grad_norm": 0.7986923456192017, "learning_rate": 2.5e-06, "loss": 0.1182, "step": 4769 }, { "epoch": 1.5456902138690862, "grad_norm": 0.8324925899505615, "learning_rate": 2.4991253918180668e-06, "loss": 0.1238, "step": 4770 }, { "epoch": 1.5460142579390799, "grad_norm": 0.8424703478813171, "learning_rate": 2.498250783743176e-06, "loss": 0.1205, "step": 4771 }, { "epoch": 1.5463383020090733, "grad_norm": 0.8214755058288574, "learning_rate": 2.4973761758823734e-06, "loss": 0.1153, "step": 4772 }, { "epoch": 1.5466623460790667, "grad_norm": 0.850267767906189, "learning_rate": 2.4965015683427005e-06, "loss": 0.1092, "step": 4773 }, { "epoch": 1.5469863901490601, "grad_norm": 0.7865439653396606, "learning_rate": 2.4956269612312025e-06, "loss": 0.1159, "step": 4774 }, { "epoch": 1.5473104342190538, "grad_norm": 0.772476851940155, "learning_rate": 2.494752354654921e-06, "loss": 0.112, "step": 4775 }, { "epoch": 1.5476344782890474, "grad_norm": 0.8782417178153992, "learning_rate": 2.4938777487209022e-06, "loss": 0.1306, "step": 4776 }, { "epoch": 1.5479585223590409, "grad_norm": 0.809949517250061, "learning_rate": 2.493003143536187e-06, "loss": 0.1152, "step": 4777 }, { "epoch": 1.5482825664290343, "grad_norm": 0.7971104979515076, "learning_rate": 2.4921285392078184e-06, "loss": 0.1096, "step": 4778 }, { "epoch": 1.5486066104990277, "grad_norm": 0.8603529930114746, "learning_rate": 2.491253935842842e-06, "loss": 0.1264, "step": 4779 }, { "epoch": 1.5489306545690213, "grad_norm": 0.882556140422821, "learning_rate": 2.490379333548297e-06, "loss": 0.1173, "step": 4780 }, { "epoch": 1.549254698639015, "grad_norm": 0.788023829460144, "learning_rate": 2.4895047324312303e-06, "loss": 0.1108, "step": 4781 }, { "epoch": 1.5495787427090084, "grad_norm": 0.7869258522987366, "learning_rate": 2.4886301325986827e-06, "loss": 0.1076, "step": 4782 }, { "epoch": 1.5499027867790018, "grad_norm": 0.7824283838272095, "learning_rate": 2.4877555341576955e-06, "loss": 0.104, "step": 4783 }, { "epoch": 1.5502268308489955, "grad_norm": 0.8321191668510437, "learning_rate": 2.4868809372153137e-06, "loss": 0.119, "step": 4784 }, { "epoch": 1.5505508749189891, "grad_norm": 0.7406947612762451, "learning_rate": 2.4860063418785773e-06, "loss": 0.1015, "step": 4785 }, { "epoch": 1.5508749189889826, "grad_norm": 0.8466529846191406, "learning_rate": 2.4851317482545297e-06, "loss": 0.12, "step": 4786 }, { "epoch": 1.551198963058976, "grad_norm": 0.7870974540710449, "learning_rate": 2.4842571564502117e-06, "loss": 0.1097, "step": 4787 }, { "epoch": 1.5515230071289694, "grad_norm": 0.8245130777359009, "learning_rate": 2.4833825665726657e-06, "loss": 0.1146, "step": 4788 }, { "epoch": 1.551847051198963, "grad_norm": 0.7951235175132751, "learning_rate": 2.482507978728933e-06, "loss": 0.1213, "step": 4789 }, { "epoch": 1.5521710952689567, "grad_norm": 0.7778697609901428, "learning_rate": 2.4816333930260535e-06, "loss": 0.11, "step": 4790 }, { "epoch": 1.5524951393389501, "grad_norm": 0.7982646822929382, "learning_rate": 2.4807588095710696e-06, "loss": 0.11, "step": 4791 }, { "epoch": 1.5528191834089435, "grad_norm": 0.8741667866706848, "learning_rate": 2.4798842284710203e-06, "loss": 0.1156, "step": 4792 }, { "epoch": 1.5531432274789372, "grad_norm": 0.8188022971153259, "learning_rate": 2.4790096498329477e-06, "loss": 0.1172, "step": 4793 }, { "epoch": 1.5534672715489306, "grad_norm": 0.8138577938079834, "learning_rate": 2.478135073763891e-06, "loss": 0.1167, "step": 4794 }, { "epoch": 1.5537913156189243, "grad_norm": 0.8361755609512329, "learning_rate": 2.4772605003708885e-06, "loss": 0.121, "step": 4795 }, { "epoch": 1.5541153596889177, "grad_norm": 0.8536872863769531, "learning_rate": 2.476385929760981e-06, "loss": 0.1223, "step": 4796 }, { "epoch": 1.554439403758911, "grad_norm": 0.754604697227478, "learning_rate": 2.475511362041207e-06, "loss": 0.0987, "step": 4797 }, { "epoch": 1.5547634478289047, "grad_norm": 0.8049080967903137, "learning_rate": 2.4746367973186063e-06, "loss": 0.1185, "step": 4798 }, { "epoch": 1.5550874918988984, "grad_norm": 0.7643322944641113, "learning_rate": 2.473762235700214e-06, "loss": 0.1061, "step": 4799 }, { "epoch": 1.5554115359688918, "grad_norm": 0.8364583849906921, "learning_rate": 2.472887677293072e-06, "loss": 0.1135, "step": 4800 }, { "epoch": 1.5557355800388852, "grad_norm": 0.8260913491249084, "learning_rate": 2.4720131222042156e-06, "loss": 0.1139, "step": 4801 }, { "epoch": 1.5560596241088787, "grad_norm": 0.805766224861145, "learning_rate": 2.4711385705406805e-06, "loss": 0.1171, "step": 4802 }, { "epoch": 1.5563836681788723, "grad_norm": 0.8735179901123047, "learning_rate": 2.4702640224095066e-06, "loss": 0.1242, "step": 4803 }, { "epoch": 1.556707712248866, "grad_norm": 0.7554102540016174, "learning_rate": 2.469389477917727e-06, "loss": 0.1104, "step": 4804 }, { "epoch": 1.5570317563188594, "grad_norm": 0.7954150438308716, "learning_rate": 2.4685149371723806e-06, "loss": 0.1133, "step": 4805 }, { "epoch": 1.5573558003888528, "grad_norm": 0.7790459394454956, "learning_rate": 2.467640400280501e-06, "loss": 0.1042, "step": 4806 }, { "epoch": 1.5576798444588464, "grad_norm": 0.8028296232223511, "learning_rate": 2.466765867349122e-06, "loss": 0.1188, "step": 4807 }, { "epoch": 1.55800388852884, "grad_norm": 0.804160475730896, "learning_rate": 2.46589133848528e-06, "loss": 0.1187, "step": 4808 }, { "epoch": 1.5583279325988335, "grad_norm": 0.7990769743919373, "learning_rate": 2.465016813796007e-06, "loss": 0.121, "step": 4809 }, { "epoch": 1.558651976668827, "grad_norm": 0.7471599578857422, "learning_rate": 2.464142293388338e-06, "loss": 0.1058, "step": 4810 }, { "epoch": 1.5589760207388204, "grad_norm": 0.7916015982627869, "learning_rate": 2.4632677773693048e-06, "loss": 0.1057, "step": 4811 }, { "epoch": 1.559300064808814, "grad_norm": 0.8223981857299805, "learning_rate": 2.4623932658459406e-06, "loss": 0.1215, "step": 4812 }, { "epoch": 1.5596241088788076, "grad_norm": 0.7924101948738098, "learning_rate": 2.461518758925277e-06, "loss": 0.1134, "step": 4813 }, { "epoch": 1.559948152948801, "grad_norm": 0.8664736151695251, "learning_rate": 2.4606442567143434e-06, "loss": 0.1199, "step": 4814 }, { "epoch": 1.5602721970187945, "grad_norm": 0.7690613865852356, "learning_rate": 2.4597697593201728e-06, "loss": 0.1054, "step": 4815 }, { "epoch": 1.560596241088788, "grad_norm": 0.7565768957138062, "learning_rate": 2.4588952668497937e-06, "loss": 0.1106, "step": 4816 }, { "epoch": 1.5609202851587816, "grad_norm": 0.7539846301078796, "learning_rate": 2.4580207794102364e-06, "loss": 0.1077, "step": 4817 }, { "epoch": 1.5612443292287752, "grad_norm": 0.8341878056526184, "learning_rate": 2.4571462971085293e-06, "loss": 0.115, "step": 4818 }, { "epoch": 1.5615683732987686, "grad_norm": 0.8036201000213623, "learning_rate": 2.4562718200517003e-06, "loss": 0.1145, "step": 4819 }, { "epoch": 1.561892417368762, "grad_norm": 0.786769688129425, "learning_rate": 2.4553973483467778e-06, "loss": 0.1103, "step": 4820 }, { "epoch": 1.5622164614387557, "grad_norm": 0.8251572251319885, "learning_rate": 2.454522882100787e-06, "loss": 0.114, "step": 4821 }, { "epoch": 1.5625405055087493, "grad_norm": 0.8083153367042542, "learning_rate": 2.453648421420756e-06, "loss": 0.1152, "step": 4822 }, { "epoch": 1.5628645495787428, "grad_norm": 0.7225403785705566, "learning_rate": 2.4527739664137085e-06, "loss": 0.1001, "step": 4823 }, { "epoch": 1.5631885936487362, "grad_norm": 0.7972891926765442, "learning_rate": 2.4518995171866717e-06, "loss": 0.1162, "step": 4824 }, { "epoch": 1.5635126377187296, "grad_norm": 0.8370020985603333, "learning_rate": 2.451025073846668e-06, "loss": 0.1191, "step": 4825 }, { "epoch": 1.5638366817887233, "grad_norm": 0.7791475057601929, "learning_rate": 2.45015063650072e-06, "loss": 0.1151, "step": 4826 }, { "epoch": 1.564160725858717, "grad_norm": 0.8660632371902466, "learning_rate": 2.449276205255853e-06, "loss": 0.1156, "step": 4827 }, { "epoch": 1.5644847699287103, "grad_norm": 0.8165311813354492, "learning_rate": 2.448401780219087e-06, "loss": 0.1074, "step": 4828 }, { "epoch": 1.5648088139987038, "grad_norm": 0.8229710459709167, "learning_rate": 2.4475273614974437e-06, "loss": 0.1185, "step": 4829 }, { "epoch": 1.5651328580686974, "grad_norm": 0.9050421714782715, "learning_rate": 2.4466529491979437e-06, "loss": 0.1183, "step": 4830 }, { "epoch": 1.5654569021386908, "grad_norm": 0.8065712451934814, "learning_rate": 2.445778543427605e-06, "loss": 0.1073, "step": 4831 }, { "epoch": 1.5657809462086845, "grad_norm": 0.7765482664108276, "learning_rate": 2.4449041442934485e-06, "loss": 0.1133, "step": 4832 }, { "epoch": 1.5661049902786779, "grad_norm": 0.8295376300811768, "learning_rate": 2.4440297519024906e-06, "loss": 0.1208, "step": 4833 }, { "epoch": 1.5664290343486713, "grad_norm": 0.7947652339935303, "learning_rate": 2.4431553663617502e-06, "loss": 0.109, "step": 4834 }, { "epoch": 1.566753078418665, "grad_norm": 0.8637305498123169, "learning_rate": 2.4422809877782417e-06, "loss": 0.1207, "step": 4835 }, { "epoch": 1.5670771224886586, "grad_norm": 0.7860732674598694, "learning_rate": 2.4414066162589823e-06, "loss": 0.1154, "step": 4836 }, { "epoch": 1.567401166558652, "grad_norm": 0.7914908528327942, "learning_rate": 2.4405322519109864e-06, "loss": 0.1124, "step": 4837 }, { "epoch": 1.5677252106286454, "grad_norm": 0.8570221662521362, "learning_rate": 2.4396578948412664e-06, "loss": 0.1285, "step": 4838 }, { "epoch": 1.5680492546986389, "grad_norm": 0.8589009642601013, "learning_rate": 2.4387835451568355e-06, "loss": 0.1199, "step": 4839 }, { "epoch": 1.5683732987686325, "grad_norm": 0.798724353313446, "learning_rate": 2.4379092029647067e-06, "loss": 0.1061, "step": 4840 }, { "epoch": 1.5686973428386262, "grad_norm": 0.772853434085846, "learning_rate": 2.4370348683718906e-06, "loss": 0.1117, "step": 4841 }, { "epoch": 1.5690213869086196, "grad_norm": 0.8315975069999695, "learning_rate": 2.436160541485396e-06, "loss": 0.114, "step": 4842 }, { "epoch": 1.569345430978613, "grad_norm": 0.893140971660614, "learning_rate": 2.4352862224122344e-06, "loss": 0.1195, "step": 4843 }, { "epoch": 1.5696694750486067, "grad_norm": 0.7216249108314514, "learning_rate": 2.4344119112594124e-06, "loss": 0.097, "step": 4844 }, { "epoch": 1.5699935191186, "grad_norm": 0.7690826654434204, "learning_rate": 2.4335376081339364e-06, "loss": 0.114, "step": 4845 }, { "epoch": 1.5703175631885937, "grad_norm": 0.7511160969734192, "learning_rate": 2.4326633131428147e-06, "loss": 0.1046, "step": 4846 }, { "epoch": 1.5706416072585871, "grad_norm": 0.7963749170303345, "learning_rate": 2.4317890263930516e-06, "loss": 0.1108, "step": 4847 }, { "epoch": 1.5709656513285806, "grad_norm": 0.7963820695877075, "learning_rate": 2.430914747991651e-06, "loss": 0.1189, "step": 4848 }, { "epoch": 1.5712896953985742, "grad_norm": 0.7745254039764404, "learning_rate": 2.430040478045617e-06, "loss": 0.1064, "step": 4849 }, { "epoch": 1.5716137394685679, "grad_norm": 0.9512690901756287, "learning_rate": 2.429166216661951e-06, "loss": 0.1161, "step": 4850 }, { "epoch": 1.5719377835385613, "grad_norm": 0.8008152842521667, "learning_rate": 2.4282919639476544e-06, "loss": 0.1096, "step": 4851 }, { "epoch": 1.5722618276085547, "grad_norm": 0.8169487714767456, "learning_rate": 2.4274177200097266e-06, "loss": 0.1197, "step": 4852 }, { "epoch": 1.5725858716785481, "grad_norm": 0.8935542702674866, "learning_rate": 2.426543484955168e-06, "loss": 0.1219, "step": 4853 }, { "epoch": 1.5729099157485418, "grad_norm": 0.824174165725708, "learning_rate": 2.425669258890975e-06, "loss": 0.1223, "step": 4854 }, { "epoch": 1.5732339598185354, "grad_norm": 0.8504593372344971, "learning_rate": 2.4247950419241457e-06, "loss": 0.1183, "step": 4855 }, { "epoch": 1.5735580038885288, "grad_norm": 0.8114284873008728, "learning_rate": 2.4239208341616755e-06, "loss": 0.1142, "step": 4856 }, { "epoch": 1.5738820479585223, "grad_norm": 0.8362430334091187, "learning_rate": 2.4230466357105575e-06, "loss": 0.1182, "step": 4857 }, { "epoch": 1.574206092028516, "grad_norm": 0.8267870545387268, "learning_rate": 2.4221724466777874e-06, "loss": 0.1129, "step": 4858 }, { "epoch": 1.5745301360985096, "grad_norm": 0.829098641872406, "learning_rate": 2.421298267170356e-06, "loss": 0.1185, "step": 4859 }, { "epoch": 1.574854180168503, "grad_norm": 0.8627114295959473, "learning_rate": 2.420424097295255e-06, "loss": 0.1207, "step": 4860 }, { "epoch": 1.5751782242384964, "grad_norm": 0.8436676263809204, "learning_rate": 2.419549937159474e-06, "loss": 0.1097, "step": 4861 }, { "epoch": 1.5755022683084898, "grad_norm": 0.8668633699417114, "learning_rate": 2.418675786870002e-06, "loss": 0.1217, "step": 4862 }, { "epoch": 1.5758263123784835, "grad_norm": 0.7808005809783936, "learning_rate": 2.4178016465338266e-06, "loss": 0.1154, "step": 4863 }, { "epoch": 1.5761503564484771, "grad_norm": 0.8143179416656494, "learning_rate": 2.416927516257933e-06, "loss": 0.1228, "step": 4864 }, { "epoch": 1.5764744005184705, "grad_norm": 0.8253973722457886, "learning_rate": 2.416053396149308e-06, "loss": 0.1144, "step": 4865 }, { "epoch": 1.576798444588464, "grad_norm": 0.833735466003418, "learning_rate": 2.415179286314934e-06, "loss": 0.1172, "step": 4866 }, { "epoch": 1.5771224886584574, "grad_norm": 0.8309965133666992, "learning_rate": 2.414305186861795e-06, "loss": 0.1228, "step": 4867 }, { "epoch": 1.577446532728451, "grad_norm": 0.8067078590393066, "learning_rate": 2.4134310978968716e-06, "loss": 0.1125, "step": 4868 }, { "epoch": 1.5777705767984447, "grad_norm": 0.8044711947441101, "learning_rate": 2.412557019527143e-06, "loss": 0.1147, "step": 4869 }, { "epoch": 1.578094620868438, "grad_norm": 0.7666822075843811, "learning_rate": 2.4116829518595896e-06, "loss": 0.1057, "step": 4870 }, { "epoch": 1.5784186649384315, "grad_norm": 0.7714723348617554, "learning_rate": 2.410808895001187e-06, "loss": 0.1156, "step": 4871 }, { "epoch": 1.5787427090084252, "grad_norm": 0.828205406665802, "learning_rate": 2.409934849058913e-06, "loss": 0.1247, "step": 4872 }, { "epoch": 1.5790667530784188, "grad_norm": 0.8341466784477234, "learning_rate": 2.4090608141397417e-06, "loss": 0.1127, "step": 4873 }, { "epoch": 1.5793907971484122, "grad_norm": 0.7735393047332764, "learning_rate": 2.408186790350645e-06, "loss": 0.1051, "step": 4874 }, { "epoch": 1.5797148412184057, "grad_norm": 0.7524586915969849, "learning_rate": 2.4073127777985982e-06, "loss": 0.0987, "step": 4875 }, { "epoch": 1.580038885288399, "grad_norm": 0.7769646644592285, "learning_rate": 2.406438776590568e-06, "loss": 0.1116, "step": 4876 }, { "epoch": 1.5803629293583927, "grad_norm": 0.7380020022392273, "learning_rate": 2.4055647868335273e-06, "loss": 0.1086, "step": 4877 }, { "epoch": 1.5806869734283864, "grad_norm": 0.8171250224113464, "learning_rate": 2.404690808634442e-06, "loss": 0.12, "step": 4878 }, { "epoch": 1.5810110174983798, "grad_norm": 0.8413975834846497, "learning_rate": 2.4038168421002795e-06, "loss": 0.1187, "step": 4879 }, { "epoch": 1.5813350615683732, "grad_norm": 0.7690439820289612, "learning_rate": 2.4029428873380044e-06, "loss": 0.1027, "step": 4880 }, { "epoch": 1.5816591056383669, "grad_norm": 0.8208738565444946, "learning_rate": 2.4020689444545796e-06, "loss": 0.1173, "step": 4881 }, { "epoch": 1.5819831497083603, "grad_norm": 0.8537747263908386, "learning_rate": 2.401195013556969e-06, "loss": 0.1144, "step": 4882 }, { "epoch": 1.582307193778354, "grad_norm": 0.8122548460960388, "learning_rate": 2.400321094752131e-06, "loss": 0.1181, "step": 4883 }, { "epoch": 1.5826312378483474, "grad_norm": 0.8134654760360718, "learning_rate": 2.399447188147027e-06, "loss": 0.1079, "step": 4884 }, { "epoch": 1.5829552819183408, "grad_norm": 0.8235934376716614, "learning_rate": 2.3985732938486137e-06, "loss": 0.1192, "step": 4885 }, { "epoch": 1.5832793259883344, "grad_norm": 0.8062383532524109, "learning_rate": 2.3976994119638464e-06, "loss": 0.1149, "step": 4886 }, { "epoch": 1.583603370058328, "grad_norm": 0.8052951693534851, "learning_rate": 2.3968255425996817e-06, "loss": 0.1108, "step": 4887 }, { "epoch": 1.5839274141283215, "grad_norm": 0.8194572925567627, "learning_rate": 2.3959516858630707e-06, "loss": 0.115, "step": 4888 }, { "epoch": 1.584251458198315, "grad_norm": 0.7949249744415283, "learning_rate": 2.3950778418609676e-06, "loss": 0.1005, "step": 4889 }, { "epoch": 1.5845755022683083, "grad_norm": 0.8598880767822266, "learning_rate": 2.39420401070032e-06, "loss": 0.1188, "step": 4890 }, { "epoch": 1.584899546338302, "grad_norm": 0.7758145928382874, "learning_rate": 2.3933301924880768e-06, "loss": 0.1084, "step": 4891 }, { "epoch": 1.5852235904082956, "grad_norm": 0.9253159761428833, "learning_rate": 2.3924563873311868e-06, "loss": 0.1175, "step": 4892 }, { "epoch": 1.585547634478289, "grad_norm": 0.9324074983596802, "learning_rate": 2.391582595336593e-06, "loss": 0.1233, "step": 4893 }, { "epoch": 1.5858716785482825, "grad_norm": 0.8482661843299866, "learning_rate": 2.3907088166112406e-06, "loss": 0.1168, "step": 4894 }, { "epoch": 1.5861957226182761, "grad_norm": 0.8450517058372498, "learning_rate": 2.3898350512620696e-06, "loss": 0.1251, "step": 4895 }, { "epoch": 1.5865197666882696, "grad_norm": 0.8631404042243958, "learning_rate": 2.3889612993960233e-06, "loss": 0.1223, "step": 4896 }, { "epoch": 1.5868438107582632, "grad_norm": 0.8064488768577576, "learning_rate": 2.3880875611200387e-06, "loss": 0.1152, "step": 4897 }, { "epoch": 1.5871678548282566, "grad_norm": 0.862596869468689, "learning_rate": 2.3872138365410525e-06, "loss": 0.1194, "step": 4898 }, { "epoch": 1.58749189889825, "grad_norm": 0.7267577052116394, "learning_rate": 2.3863401257660016e-06, "loss": 0.1091, "step": 4899 }, { "epoch": 1.5878159429682437, "grad_norm": 0.8517301082611084, "learning_rate": 2.3854664289018182e-06, "loss": 0.1258, "step": 4900 }, { "epoch": 1.5881399870382373, "grad_norm": 0.8594855666160583, "learning_rate": 2.3845927460554363e-06, "loss": 0.1183, "step": 4901 }, { "epoch": 1.5884640311082308, "grad_norm": 0.8139765858650208, "learning_rate": 2.383719077333784e-06, "loss": 0.1216, "step": 4902 }, { "epoch": 1.5887880751782242, "grad_norm": 0.7673820853233337, "learning_rate": 2.382845422843792e-06, "loss": 0.1149, "step": 4903 }, { "epoch": 1.5891121192482176, "grad_norm": 0.790939211845398, "learning_rate": 2.381971782692386e-06, "loss": 0.1075, "step": 4904 }, { "epoch": 1.5894361633182112, "grad_norm": 0.7800084948539734, "learning_rate": 2.3810981569864898e-06, "loss": 0.1068, "step": 4905 }, { "epoch": 1.589760207388205, "grad_norm": 0.8078755736351013, "learning_rate": 2.38022454583303e-06, "loss": 0.1169, "step": 4906 }, { "epoch": 1.5900842514581983, "grad_norm": 0.7707123756408691, "learning_rate": 2.379350949338924e-06, "loss": 0.1197, "step": 4907 }, { "epoch": 1.5904082955281917, "grad_norm": 0.7820994257926941, "learning_rate": 2.378477367611096e-06, "loss": 0.1103, "step": 4908 }, { "epoch": 1.5907323395981854, "grad_norm": 0.8051702976226807, "learning_rate": 2.377603800756461e-06, "loss": 0.1158, "step": 4909 }, { "epoch": 1.591056383668179, "grad_norm": 0.8018122911453247, "learning_rate": 2.376730248881935e-06, "loss": 0.1121, "step": 4910 }, { "epoch": 1.5913804277381725, "grad_norm": 0.8079573512077332, "learning_rate": 2.3758567120944345e-06, "loss": 0.1095, "step": 4911 }, { "epoch": 1.5917044718081659, "grad_norm": 0.7187642455101013, "learning_rate": 2.3749831905008704e-06, "loss": 0.1, "step": 4912 }, { "epoch": 1.5920285158781593, "grad_norm": 0.8316268920898438, "learning_rate": 2.374109684208153e-06, "loss": 0.1127, "step": 4913 }, { "epoch": 1.592352559948153, "grad_norm": 0.8374061584472656, "learning_rate": 2.3732361933231917e-06, "loss": 0.1121, "step": 4914 }, { "epoch": 1.5926766040181466, "grad_norm": 0.7737603187561035, "learning_rate": 2.3723627179528935e-06, "loss": 0.1065, "step": 4915 }, { "epoch": 1.59300064808814, "grad_norm": 0.7794497609138489, "learning_rate": 2.371489258204163e-06, "loss": 0.1103, "step": 4916 }, { "epoch": 1.5933246921581334, "grad_norm": 0.8009302020072937, "learning_rate": 2.3706158141839025e-06, "loss": 0.1145, "step": 4917 }, { "epoch": 1.5936487362281269, "grad_norm": 1.001829743385315, "learning_rate": 2.3697423859990147e-06, "loss": 0.1099, "step": 4918 }, { "epoch": 1.5939727802981205, "grad_norm": 0.8271702527999878, "learning_rate": 2.3688689737563965e-06, "loss": 0.1202, "step": 4919 }, { "epoch": 1.5942968243681142, "grad_norm": 0.8045162558555603, "learning_rate": 2.367995577562948e-06, "loss": 0.1101, "step": 4920 }, { "epoch": 1.5946208684381076, "grad_norm": 0.8168801069259644, "learning_rate": 2.3671221975255616e-06, "loss": 0.1161, "step": 4921 }, { "epoch": 1.594944912508101, "grad_norm": 0.8279063701629639, "learning_rate": 2.366248833751133e-06, "loss": 0.1223, "step": 4922 }, { "epoch": 1.5952689565780946, "grad_norm": 0.8146759271621704, "learning_rate": 2.365375486346552e-06, "loss": 0.1133, "step": 4923 }, { "epoch": 1.5955930006480883, "grad_norm": 0.8911711573600769, "learning_rate": 2.3645021554187086e-06, "loss": 0.1244, "step": 4924 }, { "epoch": 1.5959170447180817, "grad_norm": 0.7659531235694885, "learning_rate": 2.3636288410744894e-06, "loss": 0.1093, "step": 4925 }, { "epoch": 1.5962410887880751, "grad_norm": 0.8901681303977966, "learning_rate": 2.3627555434207787e-06, "loss": 0.1236, "step": 4926 }, { "epoch": 1.5965651328580686, "grad_norm": 0.8137619495391846, "learning_rate": 2.3618822625644624e-06, "loss": 0.1165, "step": 4927 }, { "epoch": 1.5968891769280622, "grad_norm": 0.749087393283844, "learning_rate": 2.36100899861242e-06, "loss": 0.1126, "step": 4928 }, { "epoch": 1.5972132209980558, "grad_norm": 0.7478780746459961, "learning_rate": 2.3601357516715297e-06, "loss": 0.1111, "step": 4929 }, { "epoch": 1.5975372650680493, "grad_norm": 0.8310571908950806, "learning_rate": 2.35926252184867e-06, "loss": 0.1178, "step": 4930 }, { "epoch": 1.5978613091380427, "grad_norm": 0.7701750993728638, "learning_rate": 2.3583893092507144e-06, "loss": 0.1099, "step": 4931 }, { "epoch": 1.5981853532080363, "grad_norm": 0.7579521536827087, "learning_rate": 2.3575161139845375e-06, "loss": 0.1044, "step": 4932 }, { "epoch": 1.5985093972780298, "grad_norm": 0.7884305715560913, "learning_rate": 2.356642936157008e-06, "loss": 0.1026, "step": 4933 }, { "epoch": 1.5988334413480234, "grad_norm": 0.8843073844909668, "learning_rate": 2.3557697758749966e-06, "loss": 0.1258, "step": 4934 }, { "epoch": 1.5991574854180168, "grad_norm": 0.8237454891204834, "learning_rate": 2.3548966332453673e-06, "loss": 0.1171, "step": 4935 }, { "epoch": 1.5994815294880103, "grad_norm": 0.8186402320861816, "learning_rate": 2.3540235083749853e-06, "loss": 0.1168, "step": 4936 }, { "epoch": 1.599805573558004, "grad_norm": 0.8118522763252258, "learning_rate": 2.3531504013707134e-06, "loss": 0.1116, "step": 4937 }, { "epoch": 1.6001296176279975, "grad_norm": 0.7732279896736145, "learning_rate": 2.35227731233941e-06, "loss": 0.1142, "step": 4938 }, { "epoch": 1.600453661697991, "grad_norm": 0.8047366738319397, "learning_rate": 2.3514042413879344e-06, "loss": 0.1122, "step": 4939 }, { "epoch": 1.6007777057679844, "grad_norm": 0.845105767250061, "learning_rate": 2.350531188623141e-06, "loss": 0.1134, "step": 4940 }, { "epoch": 1.6011017498379778, "grad_norm": 0.7973531484603882, "learning_rate": 2.349658154151882e-06, "loss": 0.1211, "step": 4941 }, { "epoch": 1.6014257939079715, "grad_norm": 0.8800602555274963, "learning_rate": 2.3487851380810106e-06, "loss": 0.1208, "step": 4942 }, { "epoch": 1.601749837977965, "grad_norm": 0.7945040464401245, "learning_rate": 2.3479121405173736e-06, "loss": 0.1104, "step": 4943 }, { "epoch": 1.6020738820479585, "grad_norm": 0.8744386434555054, "learning_rate": 2.347039161567819e-06, "loss": 0.1204, "step": 4944 }, { "epoch": 1.602397926117952, "grad_norm": 0.7569018006324768, "learning_rate": 2.34616620133919e-06, "loss": 0.1081, "step": 4945 }, { "epoch": 1.6027219701879456, "grad_norm": 0.7882606387138367, "learning_rate": 2.345293259938329e-06, "loss": 0.1089, "step": 4946 }, { "epoch": 1.6030460142579392, "grad_norm": 0.7308951616287231, "learning_rate": 2.3444203374720755e-06, "loss": 0.1065, "step": 4947 }, { "epoch": 1.6033700583279327, "grad_norm": 0.8240647912025452, "learning_rate": 2.3435474340472657e-06, "loss": 0.1205, "step": 4948 }, { "epoch": 1.603694102397926, "grad_norm": 0.8487390875816345, "learning_rate": 2.3426745497707364e-06, "loss": 0.1189, "step": 4949 }, { "epoch": 1.6040181464679195, "grad_norm": 0.7914772033691406, "learning_rate": 2.341801684749318e-06, "loss": 0.1119, "step": 4950 }, { "epoch": 1.6043421905379132, "grad_norm": 0.7847076058387756, "learning_rate": 2.3409288390898427e-06, "loss": 0.1144, "step": 4951 }, { "epoch": 1.6046662346079068, "grad_norm": 0.8344978094100952, "learning_rate": 2.3400560128991377e-06, "loss": 0.1169, "step": 4952 }, { "epoch": 1.6049902786779002, "grad_norm": 0.780342698097229, "learning_rate": 2.3391832062840273e-06, "loss": 0.1088, "step": 4953 }, { "epoch": 1.6053143227478937, "grad_norm": 0.752775251865387, "learning_rate": 2.338310419351337e-06, "loss": 0.1006, "step": 4954 }, { "epoch": 1.605638366817887, "grad_norm": 0.8063361644744873, "learning_rate": 2.3374376522078852e-06, "loss": 0.1119, "step": 4955 }, { "epoch": 1.6059624108878807, "grad_norm": 0.8843941688537598, "learning_rate": 2.3365649049604917e-06, "loss": 0.1319, "step": 4956 }, { "epoch": 1.6062864549578744, "grad_norm": 0.7759780883789062, "learning_rate": 2.3356921777159705e-06, "loss": 0.1078, "step": 4957 }, { "epoch": 1.6066104990278678, "grad_norm": 0.8233708143234253, "learning_rate": 2.334819470581137e-06, "loss": 0.1178, "step": 4958 }, { "epoch": 1.6069345430978612, "grad_norm": 0.8936583399772644, "learning_rate": 2.3339467836628018e-06, "loss": 0.1333, "step": 4959 }, { "epoch": 1.6072585871678549, "grad_norm": 0.8358684182167053, "learning_rate": 2.3330741170677713e-06, "loss": 0.1252, "step": 4960 }, { "epoch": 1.6075826312378485, "grad_norm": 0.7811366319656372, "learning_rate": 2.3322014709028545e-06, "loss": 0.1106, "step": 4961 }, { "epoch": 1.607906675307842, "grad_norm": 0.8231356739997864, "learning_rate": 2.3313288452748515e-06, "loss": 0.1177, "step": 4962 }, { "epoch": 1.6082307193778353, "grad_norm": 0.8499090075492859, "learning_rate": 2.3304562402905662e-06, "loss": 0.1125, "step": 4963 }, { "epoch": 1.6085547634478288, "grad_norm": 0.8415418863296509, "learning_rate": 2.329583656056796e-06, "loss": 0.125, "step": 4964 }, { "epoch": 1.6088788075178224, "grad_norm": 0.8120545744895935, "learning_rate": 2.3287110926803354e-06, "loss": 0.1154, "step": 4965 }, { "epoch": 1.609202851587816, "grad_norm": 0.8539736270904541, "learning_rate": 2.32783855026798e-06, "loss": 0.1258, "step": 4966 }, { "epoch": 1.6095268956578095, "grad_norm": 0.9000766277313232, "learning_rate": 2.3269660289265184e-06, "loss": 0.1315, "step": 4967 }, { "epoch": 1.609850939727803, "grad_norm": 0.8382875919342041, "learning_rate": 2.3260935287627408e-06, "loss": 0.1247, "step": 4968 }, { "epoch": 1.6101749837977966, "grad_norm": 0.8455458283424377, "learning_rate": 2.3252210498834306e-06, "loss": 0.1123, "step": 4969 }, { "epoch": 1.61049902786779, "grad_norm": 0.8859215974807739, "learning_rate": 2.3243485923953725e-06, "loss": 0.1233, "step": 4970 }, { "epoch": 1.6108230719377836, "grad_norm": 0.818753182888031, "learning_rate": 2.323476156405347e-06, "loss": 0.1148, "step": 4971 }, { "epoch": 1.611147116007777, "grad_norm": 0.8630556464195251, "learning_rate": 2.3226037420201296e-06, "loss": 0.1189, "step": 4972 }, { "epoch": 1.6114711600777705, "grad_norm": 0.7116557955741882, "learning_rate": 2.3217313493464977e-06, "loss": 0.0991, "step": 4973 }, { "epoch": 1.6117952041477641, "grad_norm": 0.822060763835907, "learning_rate": 2.320858978491222e-06, "loss": 0.1177, "step": 4974 }, { "epoch": 1.6121192482177578, "grad_norm": 0.8356267213821411, "learning_rate": 2.319986629561074e-06, "loss": 0.1149, "step": 4975 }, { "epoch": 1.6124432922877512, "grad_norm": 0.8050525784492493, "learning_rate": 2.3191143026628206e-06, "loss": 0.1135, "step": 4976 }, { "epoch": 1.6127673363577446, "grad_norm": 0.826977014541626, "learning_rate": 2.318241997903224e-06, "loss": 0.1191, "step": 4977 }, { "epoch": 1.613091380427738, "grad_norm": 0.7832971215248108, "learning_rate": 2.3173697153890486e-06, "loss": 0.1048, "step": 4978 }, { "epoch": 1.6134154244977317, "grad_norm": 0.7937376499176025, "learning_rate": 2.31649745522705e-06, "loss": 0.1147, "step": 4979 }, { "epoch": 1.6137394685677253, "grad_norm": 0.734635591506958, "learning_rate": 2.3156252175239883e-06, "loss": 0.1054, "step": 4980 }, { "epoch": 1.6140635126377187, "grad_norm": 0.9116652011871338, "learning_rate": 2.3147530023866136e-06, "loss": 0.1301, "step": 4981 }, { "epoch": 1.6143875567077122, "grad_norm": 0.7477267384529114, "learning_rate": 2.3138808099216796e-06, "loss": 0.1043, "step": 4982 }, { "epoch": 1.6147116007777058, "grad_norm": 0.8117696046829224, "learning_rate": 2.3130086402359327e-06, "loss": 0.1129, "step": 4983 }, { "epoch": 1.6150356448476992, "grad_norm": 0.86134934425354, "learning_rate": 2.312136493436117e-06, "loss": 0.124, "step": 4984 }, { "epoch": 1.6153596889176929, "grad_norm": 0.7768048644065857, "learning_rate": 2.311264369628976e-06, "loss": 0.1131, "step": 4985 }, { "epoch": 1.6156837329876863, "grad_norm": 0.782417356967926, "learning_rate": 2.3103922689212494e-06, "loss": 0.1112, "step": 4986 }, { "epoch": 1.6160077770576797, "grad_norm": 0.8572040796279907, "learning_rate": 2.3095201914196732e-06, "loss": 0.1081, "step": 4987 }, { "epoch": 1.6163318211276734, "grad_norm": 0.8067081570625305, "learning_rate": 2.308648137230982e-06, "loss": 0.1144, "step": 4988 }, { "epoch": 1.616655865197667, "grad_norm": 0.8214498162269592, "learning_rate": 2.3077761064619062e-06, "loss": 0.1079, "step": 4989 }, { "epoch": 1.6169799092676604, "grad_norm": 0.7868883609771729, "learning_rate": 2.3069040992191745e-06, "loss": 0.106, "step": 4990 }, { "epoch": 1.6173039533376539, "grad_norm": 0.7447760105133057, "learning_rate": 2.3060321156095107e-06, "loss": 0.0981, "step": 4991 }, { "epoch": 1.6176279974076473, "grad_norm": 0.8617889881134033, "learning_rate": 2.3051601557396393e-06, "loss": 0.1238, "step": 4992 }, { "epoch": 1.617952041477641, "grad_norm": 0.8899185061454773, "learning_rate": 2.3042882197162776e-06, "loss": 0.1225, "step": 4993 }, { "epoch": 1.6182760855476346, "grad_norm": 0.8807141780853271, "learning_rate": 2.303416307646144e-06, "loss": 0.1192, "step": 4994 }, { "epoch": 1.618600129617628, "grad_norm": 0.7890043258666992, "learning_rate": 2.3025444196359513e-06, "loss": 0.1076, "step": 4995 }, { "epoch": 1.6189241736876214, "grad_norm": 0.8740971684455872, "learning_rate": 2.3016725557924095e-06, "loss": 0.1248, "step": 4996 }, { "epoch": 1.619248217757615, "grad_norm": 0.7967113852500916, "learning_rate": 2.3008007162222273e-06, "loss": 0.1136, "step": 4997 }, { "epoch": 1.6195722618276087, "grad_norm": 0.7795601487159729, "learning_rate": 2.2999289010321092e-06, "loss": 0.1077, "step": 4998 }, { "epoch": 1.6198963058976021, "grad_norm": 0.7632074356079102, "learning_rate": 2.299057110328757e-06, "loss": 0.1107, "step": 4999 }, { "epoch": 1.6202203499675956, "grad_norm": 0.8201764822006226, "learning_rate": 2.298185344218868e-06, "loss": 0.1121, "step": 5000 }, { "epoch": 1.620544394037589, "grad_norm": 0.8989620804786682, "learning_rate": 2.29731360280914e-06, "loss": 0.1314, "step": 5001 }, { "epoch": 1.6208684381075826, "grad_norm": 0.7793564200401306, "learning_rate": 2.2964418862062655e-06, "loss": 0.1135, "step": 5002 }, { "epoch": 1.6211924821775763, "grad_norm": 0.8126095533370972, "learning_rate": 2.2955701945169317e-06, "loss": 0.1193, "step": 5003 }, { "epoch": 1.6215165262475697, "grad_norm": 0.7722181081771851, "learning_rate": 2.294698527847829e-06, "loss": 0.1091, "step": 5004 }, { "epoch": 1.6218405703175631, "grad_norm": 0.850853443145752, "learning_rate": 2.2938268863056373e-06, "loss": 0.1197, "step": 5005 }, { "epoch": 1.6221646143875565, "grad_norm": 0.8804651498794556, "learning_rate": 2.29295526999704e-06, "loss": 0.1214, "step": 5006 }, { "epoch": 1.6224886584575502, "grad_norm": 0.7808780074119568, "learning_rate": 2.2920836790287134e-06, "loss": 0.1112, "step": 5007 }, { "epoch": 1.6228127025275438, "grad_norm": 0.7318097352981567, "learning_rate": 2.291212113507331e-06, "loss": 0.1057, "step": 5008 }, { "epoch": 1.6231367465975373, "grad_norm": 0.8634607195854187, "learning_rate": 2.290340573539565e-06, "loss": 0.1229, "step": 5009 }, { "epoch": 1.6234607906675307, "grad_norm": 0.6968221664428711, "learning_rate": 2.2894690592320827e-06, "loss": 0.0988, "step": 5010 }, { "epoch": 1.6237848347375243, "grad_norm": 0.816426694393158, "learning_rate": 2.2885975706915506e-06, "loss": 0.1122, "step": 5011 }, { "epoch": 1.624108878807518, "grad_norm": 0.8346845507621765, "learning_rate": 2.287726108024628e-06, "loss": 0.1226, "step": 5012 }, { "epoch": 1.6244329228775114, "grad_norm": 0.8421067595481873, "learning_rate": 2.2868546713379755e-06, "loss": 0.115, "step": 5013 }, { "epoch": 1.6247569669475048, "grad_norm": 0.8678304553031921, "learning_rate": 2.285983260738248e-06, "loss": 0.1262, "step": 5014 }, { "epoch": 1.6250810110174982, "grad_norm": 0.7817627191543579, "learning_rate": 2.285111876332097e-06, "loss": 0.1139, "step": 5015 }, { "epoch": 1.625405055087492, "grad_norm": 0.8300804495811462, "learning_rate": 2.2842405182261725e-06, "loss": 0.1191, "step": 5016 }, { "epoch": 1.6257290991574855, "grad_norm": 0.783602237701416, "learning_rate": 2.283369186527119e-06, "loss": 0.1142, "step": 5017 }, { "epoch": 1.626053143227479, "grad_norm": 0.7819046974182129, "learning_rate": 2.282497881341581e-06, "loss": 0.1108, "step": 5018 }, { "epoch": 1.6263771872974724, "grad_norm": 0.7916659116744995, "learning_rate": 2.2816266027761965e-06, "loss": 0.1134, "step": 5019 }, { "epoch": 1.626701231367466, "grad_norm": 0.7798147201538086, "learning_rate": 2.280755350937602e-06, "loss": 0.1078, "step": 5020 }, { "epoch": 1.6270252754374595, "grad_norm": 0.8133070468902588, "learning_rate": 2.27988412593243e-06, "loss": 0.116, "step": 5021 }, { "epoch": 1.627349319507453, "grad_norm": 0.8206139802932739, "learning_rate": 2.279012927867309e-06, "loss": 0.1211, "step": 5022 }, { "epoch": 1.6276733635774465, "grad_norm": 0.8827968835830688, "learning_rate": 2.2781417568488677e-06, "loss": 0.1236, "step": 5023 }, { "epoch": 1.62799740764744, "grad_norm": 0.8431521654129028, "learning_rate": 2.277270612983726e-06, "loss": 0.1168, "step": 5024 }, { "epoch": 1.6283214517174336, "grad_norm": 0.7578520774841309, "learning_rate": 2.2763994963785066e-06, "loss": 0.1064, "step": 5025 }, { "epoch": 1.6286454957874272, "grad_norm": 0.7708545923233032, "learning_rate": 2.2755284071398243e-06, "loss": 0.1068, "step": 5026 }, { "epoch": 1.6289695398574207, "grad_norm": 0.8634827733039856, "learning_rate": 2.2746573453742905e-06, "loss": 0.1206, "step": 5027 }, { "epoch": 1.629293583927414, "grad_norm": 0.7665538191795349, "learning_rate": 2.2737863111885175e-06, "loss": 0.1117, "step": 5028 }, { "epoch": 1.6296176279974075, "grad_norm": 0.7242918610572815, "learning_rate": 2.2729153046891095e-06, "loss": 0.1006, "step": 5029 }, { "epoch": 1.6299416720674011, "grad_norm": 0.9581153988838196, "learning_rate": 2.2720443259826702e-06, "loss": 0.1225, "step": 5030 }, { "epoch": 1.6302657161373948, "grad_norm": 0.8096961975097656, "learning_rate": 2.2711733751757983e-06, "loss": 0.109, "step": 5031 }, { "epoch": 1.6305897602073882, "grad_norm": 0.8381730318069458, "learning_rate": 2.27030245237509e-06, "loss": 0.1175, "step": 5032 }, { "epoch": 1.6309138042773816, "grad_norm": 0.7741770148277283, "learning_rate": 2.2694315576871384e-06, "loss": 0.1063, "step": 5033 }, { "epoch": 1.6312378483473753, "grad_norm": 0.962214469909668, "learning_rate": 2.268560691218531e-06, "loss": 0.1262, "step": 5034 }, { "epoch": 1.6315618924173687, "grad_norm": 0.8154774308204651, "learning_rate": 2.2676898530758554e-06, "loss": 0.1133, "step": 5035 }, { "epoch": 1.6318859364873624, "grad_norm": 0.7884747385978699, "learning_rate": 2.266819043365692e-06, "loss": 0.111, "step": 5036 }, { "epoch": 1.6322099805573558, "grad_norm": 0.8193156123161316, "learning_rate": 2.265948262194621e-06, "loss": 0.1207, "step": 5037 }, { "epoch": 1.6325340246273492, "grad_norm": 0.7977531552314758, "learning_rate": 2.2650775096692176e-06, "loss": 0.1132, "step": 5038 }, { "epoch": 1.6328580686973428, "grad_norm": 0.8377412557601929, "learning_rate": 2.2642067858960514e-06, "loss": 0.116, "step": 5039 }, { "epoch": 1.6331821127673365, "grad_norm": 0.8771550059318542, "learning_rate": 2.263336090981693e-06, "loss": 0.1228, "step": 5040 }, { "epoch": 1.63350615683733, "grad_norm": 0.8469054698944092, "learning_rate": 2.2624654250327054e-06, "loss": 0.1166, "step": 5041 }, { "epoch": 1.6338302009073233, "grad_norm": 0.7819681167602539, "learning_rate": 2.2615947881556506e-06, "loss": 0.111, "step": 5042 }, { "epoch": 1.6341542449773168, "grad_norm": 0.7739814519882202, "learning_rate": 2.2607241804570864e-06, "loss": 0.1136, "step": 5043 }, { "epoch": 1.6344782890473104, "grad_norm": 0.7743594646453857, "learning_rate": 2.2598536020435644e-06, "loss": 0.1103, "step": 5044 }, { "epoch": 1.634802333117304, "grad_norm": 0.8755719065666199, "learning_rate": 2.258983053021638e-06, "loss": 0.1293, "step": 5045 }, { "epoch": 1.6351263771872975, "grad_norm": 0.8436084985733032, "learning_rate": 2.2581125334978517e-06, "loss": 0.1123, "step": 5046 }, { "epoch": 1.635450421257291, "grad_norm": 0.7783282995223999, "learning_rate": 2.257242043578751e-06, "loss": 0.1098, "step": 5047 }, { "epoch": 1.6357744653272845, "grad_norm": 0.7676630616188049, "learning_rate": 2.2563715833708726e-06, "loss": 0.109, "step": 5048 }, { "epoch": 1.6360985093972782, "grad_norm": 0.8583880662918091, "learning_rate": 2.255501152980755e-06, "loss": 0.1196, "step": 5049 }, { "epoch": 1.6364225534672716, "grad_norm": 0.8024161458015442, "learning_rate": 2.2546307525149293e-06, "loss": 0.1156, "step": 5050 }, { "epoch": 1.636746597537265, "grad_norm": 0.8013902306556702, "learning_rate": 2.253760382079924e-06, "loss": 0.1083, "step": 5051 }, { "epoch": 1.6370706416072585, "grad_norm": 0.8168145418167114, "learning_rate": 2.2528900417822636e-06, "loss": 0.1235, "step": 5052 }, { "epoch": 1.637394685677252, "grad_norm": 0.8024635910987854, "learning_rate": 2.2520197317284702e-06, "loss": 0.1162, "step": 5053 }, { "epoch": 1.6377187297472457, "grad_norm": 0.7219820618629456, "learning_rate": 2.2511494520250613e-06, "loss": 0.1, "step": 5054 }, { "epoch": 1.6380427738172392, "grad_norm": 0.8405579328536987, "learning_rate": 2.2502792027785508e-06, "loss": 0.1219, "step": 5055 }, { "epoch": 1.6383668178872326, "grad_norm": 0.8250084519386292, "learning_rate": 2.249408984095447e-06, "loss": 0.1162, "step": 5056 }, { "epoch": 1.638690861957226, "grad_norm": 0.8255623579025269, "learning_rate": 2.248538796082259e-06, "loss": 0.1113, "step": 5057 }, { "epoch": 1.6390149060272197, "grad_norm": 0.8642399311065674, "learning_rate": 2.2476686388454867e-06, "loss": 0.1145, "step": 5058 }, { "epoch": 1.6393389500972133, "grad_norm": 0.7692127823829651, "learning_rate": 2.2467985124916314e-06, "loss": 0.1139, "step": 5059 }, { "epoch": 1.6396629941672067, "grad_norm": 0.866449236869812, "learning_rate": 2.2459284171271863e-06, "loss": 0.1216, "step": 5060 }, { "epoch": 1.6399870382372002, "grad_norm": 0.8447216153144836, "learning_rate": 2.2450583528586437e-06, "loss": 0.1135, "step": 5061 }, { "epoch": 1.6403110823071938, "grad_norm": 0.8008463978767395, "learning_rate": 2.244188319792491e-06, "loss": 0.109, "step": 5062 }, { "epoch": 1.6406351263771874, "grad_norm": 0.8315277695655823, "learning_rate": 2.243318318035211e-06, "loss": 0.1131, "step": 5063 }, { "epoch": 1.6409591704471809, "grad_norm": 0.7836526036262512, "learning_rate": 2.2424483476932847e-06, "loss": 0.1083, "step": 5064 }, { "epoch": 1.6412832145171743, "grad_norm": 0.7713335752487183, "learning_rate": 2.241578408873186e-06, "loss": 0.1083, "step": 5065 }, { "epoch": 1.6416072585871677, "grad_norm": 0.8240923881530762, "learning_rate": 2.2407085016813895e-06, "loss": 0.1143, "step": 5066 }, { "epoch": 1.6419313026571614, "grad_norm": 0.8240248560905457, "learning_rate": 2.239838626224361e-06, "loss": 0.1105, "step": 5067 }, { "epoch": 1.642255346727155, "grad_norm": 0.8505247235298157, "learning_rate": 2.2389687826085675e-06, "loss": 0.1148, "step": 5068 }, { "epoch": 1.6425793907971484, "grad_norm": 0.7987221479415894, "learning_rate": 2.238098970940468e-06, "loss": 0.1047, "step": 5069 }, { "epoch": 1.6429034348671419, "grad_norm": 0.8047895431518555, "learning_rate": 2.2372291913265177e-06, "loss": 0.12, "step": 5070 }, { "epoch": 1.6432274789371355, "grad_norm": 0.8510376811027527, "learning_rate": 2.236359443873172e-06, "loss": 0.1134, "step": 5071 }, { "epoch": 1.643551523007129, "grad_norm": 0.7813231348991394, "learning_rate": 2.2354897286868773e-06, "loss": 0.1093, "step": 5072 }, { "epoch": 1.6438755670771226, "grad_norm": 0.7600345611572266, "learning_rate": 2.23462004587408e-06, "loss": 0.1041, "step": 5073 }, { "epoch": 1.644199611147116, "grad_norm": 0.8008465766906738, "learning_rate": 2.233750395541219e-06, "loss": 0.1094, "step": 5074 }, { "epoch": 1.6445236552171094, "grad_norm": 0.7862761616706848, "learning_rate": 2.2328807777947323e-06, "loss": 0.1045, "step": 5075 }, { "epoch": 1.644847699287103, "grad_norm": 0.9118563532829285, "learning_rate": 2.232011192741053e-06, "loss": 0.1293, "step": 5076 }, { "epoch": 1.6451717433570967, "grad_norm": 0.8136278986930847, "learning_rate": 2.2311416404866085e-06, "loss": 0.113, "step": 5077 }, { "epoch": 1.6454957874270901, "grad_norm": 0.7324303388595581, "learning_rate": 2.2302721211378254e-06, "loss": 0.1044, "step": 5078 }, { "epoch": 1.6458198314970836, "grad_norm": 0.8289517760276794, "learning_rate": 2.2294026348011223e-06, "loss": 0.1239, "step": 5079 }, { "epoch": 1.646143875567077, "grad_norm": 0.8013531565666199, "learning_rate": 2.2285331815829187e-06, "loss": 0.1104, "step": 5080 }, { "epoch": 1.6464679196370706, "grad_norm": 0.791636049747467, "learning_rate": 2.227663761589625e-06, "loss": 0.1078, "step": 5081 }, { "epoch": 1.6467919637070643, "grad_norm": 0.7839667797088623, "learning_rate": 2.2267943749276503e-06, "loss": 0.1129, "step": 5082 }, { "epoch": 1.6471160077770577, "grad_norm": 0.674390435218811, "learning_rate": 2.225925021703399e-06, "loss": 0.095, "step": 5083 }, { "epoch": 1.6474400518470511, "grad_norm": 0.8988636136054993, "learning_rate": 2.2250557020232724e-06, "loss": 0.122, "step": 5084 }, { "epoch": 1.6477640959170448, "grad_norm": 0.8070955872535706, "learning_rate": 2.2241864159936664e-06, "loss": 0.1136, "step": 5085 }, { "epoch": 1.6480881399870384, "grad_norm": 0.8262961506843567, "learning_rate": 2.223317163720973e-06, "loss": 0.118, "step": 5086 }, { "epoch": 1.6484121840570318, "grad_norm": 0.9083569645881653, "learning_rate": 2.222447945311579e-06, "loss": 0.1346, "step": 5087 }, { "epoch": 1.6487362281270252, "grad_norm": 0.8316741585731506, "learning_rate": 2.2215787608718706e-06, "loss": 0.1166, "step": 5088 }, { "epoch": 1.6490602721970187, "grad_norm": 0.7819686532020569, "learning_rate": 2.220709610508226e-06, "loss": 0.1106, "step": 5089 }, { "epoch": 1.6493843162670123, "grad_norm": 0.7735289931297302, "learning_rate": 2.2198404943270217e-06, "loss": 0.1057, "step": 5090 }, { "epoch": 1.649708360337006, "grad_norm": 0.8702659606933594, "learning_rate": 2.218971412434628e-06, "loss": 0.12, "step": 5091 }, { "epoch": 1.6500324044069994, "grad_norm": 0.8448833227157593, "learning_rate": 2.218102364937414e-06, "loss": 0.1131, "step": 5092 }, { "epoch": 1.6503564484769928, "grad_norm": 0.8449171185493469, "learning_rate": 2.2172333519417415e-06, "loss": 0.1238, "step": 5093 }, { "epoch": 1.6506804925469862, "grad_norm": 0.7623543739318848, "learning_rate": 2.2163643735539688e-06, "loss": 0.1144, "step": 5094 }, { "epoch": 1.6510045366169799, "grad_norm": 0.7137896418571472, "learning_rate": 2.2154954298804514e-06, "loss": 0.0932, "step": 5095 }, { "epoch": 1.6513285806869735, "grad_norm": 0.7739506363868713, "learning_rate": 2.214626521027538e-06, "loss": 0.115, "step": 5096 }, { "epoch": 1.651652624756967, "grad_norm": 0.8516456484794617, "learning_rate": 2.213757647101577e-06, "loss": 0.1153, "step": 5097 }, { "epoch": 1.6519766688269604, "grad_norm": 0.7973065972328186, "learning_rate": 2.2128888082089093e-06, "loss": 0.1111, "step": 5098 }, { "epoch": 1.652300712896954, "grad_norm": 0.8301618695259094, "learning_rate": 2.2120200044558705e-06, "loss": 0.1171, "step": 5099 }, { "epoch": 1.6526247569669477, "grad_norm": 0.8303407430648804, "learning_rate": 2.2111512359487967e-06, "loss": 0.1091, "step": 5100 }, { "epoch": 1.652948801036941, "grad_norm": 0.8207636475563049, "learning_rate": 2.2102825027940143e-06, "loss": 0.118, "step": 5101 }, { "epoch": 1.6532728451069345, "grad_norm": 0.7522189617156982, "learning_rate": 2.2094138050978496e-06, "loss": 0.1047, "step": 5102 }, { "epoch": 1.653596889176928, "grad_norm": 1.0848859548568726, "learning_rate": 2.2085451429666215e-06, "loss": 0.118, "step": 5103 }, { "epoch": 1.6539209332469216, "grad_norm": 0.8184290528297424, "learning_rate": 2.207676516506647e-06, "loss": 0.1085, "step": 5104 }, { "epoch": 1.6542449773169152, "grad_norm": 0.8451368808746338, "learning_rate": 2.206807925824237e-06, "loss": 0.1212, "step": 5105 }, { "epoch": 1.6545690213869086, "grad_norm": 0.8918312788009644, "learning_rate": 2.205939371025698e-06, "loss": 0.1222, "step": 5106 }, { "epoch": 1.654893065456902, "grad_norm": 0.8274011015892029, "learning_rate": 2.205070852217334e-06, "loss": 0.1167, "step": 5107 }, { "epoch": 1.6552171095268955, "grad_norm": 0.84331214427948, "learning_rate": 2.204202369505441e-06, "loss": 0.1171, "step": 5108 }, { "epoch": 1.6555411535968891, "grad_norm": 0.7344668507575989, "learning_rate": 2.203333922996316e-06, "loss": 0.0974, "step": 5109 }, { "epoch": 1.6558651976668828, "grad_norm": 0.8344709277153015, "learning_rate": 2.202465512796247e-06, "loss": 0.1152, "step": 5110 }, { "epoch": 1.6561892417368762, "grad_norm": 0.7639608383178711, "learning_rate": 2.2015971390115172e-06, "loss": 0.1025, "step": 5111 }, { "epoch": 1.6565132858068696, "grad_norm": 0.7677879929542542, "learning_rate": 2.2007288017484105e-06, "loss": 0.1072, "step": 5112 }, { "epoch": 1.6568373298768633, "grad_norm": 0.8520178198814392, "learning_rate": 2.1998605011131997e-06, "loss": 0.1175, "step": 5113 }, { "epoch": 1.657161373946857, "grad_norm": 0.8265737891197205, "learning_rate": 2.19899223721216e-06, "loss": 0.1137, "step": 5114 }, { "epoch": 1.6574854180168503, "grad_norm": 0.7737930417060852, "learning_rate": 2.1981240101515548e-06, "loss": 0.1098, "step": 5115 }, { "epoch": 1.6578094620868438, "grad_norm": 0.8008612394332886, "learning_rate": 2.1972558200376497e-06, "loss": 0.1057, "step": 5116 }, { "epoch": 1.6581335061568372, "grad_norm": 0.8462890982627869, "learning_rate": 2.1963876669767008e-06, "loss": 0.1181, "step": 5117 }, { "epoch": 1.6584575502268308, "grad_norm": 0.8264334797859192, "learning_rate": 2.1955195510749614e-06, "loss": 0.124, "step": 5118 }, { "epoch": 1.6587815942968245, "grad_norm": 0.8477981090545654, "learning_rate": 2.1946514724386827e-06, "loss": 0.1155, "step": 5119 }, { "epoch": 1.659105638366818, "grad_norm": 0.7597894668579102, "learning_rate": 2.1937834311741066e-06, "loss": 0.1052, "step": 5120 }, { "epoch": 1.6594296824368113, "grad_norm": 0.7452861666679382, "learning_rate": 2.192915427387475e-06, "loss": 0.1054, "step": 5121 }, { "epoch": 1.659753726506805, "grad_norm": 0.7864974141120911, "learning_rate": 2.1920474611850225e-06, "loss": 0.109, "step": 5122 }, { "epoch": 1.6600777705767984, "grad_norm": 0.769801914691925, "learning_rate": 2.1911795326729784e-06, "loss": 0.1022, "step": 5123 }, { "epoch": 1.660401814646792, "grad_norm": 0.7793107032775879, "learning_rate": 2.190311641957571e-06, "loss": 0.105, "step": 5124 }, { "epoch": 1.6607258587167855, "grad_norm": 0.8600584864616394, "learning_rate": 2.18944378914502e-06, "loss": 0.1248, "step": 5125 }, { "epoch": 1.6610499027867789, "grad_norm": 0.714015007019043, "learning_rate": 2.188575974341543e-06, "loss": 0.0992, "step": 5126 }, { "epoch": 1.6613739468567725, "grad_norm": 0.8324199914932251, "learning_rate": 2.1877081976533515e-06, "loss": 0.12, "step": 5127 }, { "epoch": 1.6616979909267662, "grad_norm": 0.7645648121833801, "learning_rate": 2.186840459186654e-06, "loss": 0.1041, "step": 5128 }, { "epoch": 1.6620220349967596, "grad_norm": 0.9017804265022278, "learning_rate": 2.185972759047653e-06, "loss": 0.13, "step": 5129 }, { "epoch": 1.662346079066753, "grad_norm": 0.7904400825500488, "learning_rate": 2.1851050973425454e-06, "loss": 0.1126, "step": 5130 }, { "epoch": 1.6626701231367464, "grad_norm": 0.7225543856620789, "learning_rate": 2.1842374741775262e-06, "loss": 0.1025, "step": 5131 }, { "epoch": 1.66299416720674, "grad_norm": 0.8221871256828308, "learning_rate": 2.1833698896587816e-06, "loss": 0.1158, "step": 5132 }, { "epoch": 1.6633182112767337, "grad_norm": 0.8409931659698486, "learning_rate": 2.1825023438924995e-06, "loss": 0.1221, "step": 5133 }, { "epoch": 1.6636422553467272, "grad_norm": 0.7728251814842224, "learning_rate": 2.1816348369848555e-06, "loss": 0.1082, "step": 5134 }, { "epoch": 1.6639662994167206, "grad_norm": 0.7423127889633179, "learning_rate": 2.180767369042026e-06, "loss": 0.0987, "step": 5135 }, { "epoch": 1.6642903434867142, "grad_norm": 0.8032506108283997, "learning_rate": 2.1798999401701802e-06, "loss": 0.1088, "step": 5136 }, { "epoch": 1.6646143875567079, "grad_norm": 0.8562729954719543, "learning_rate": 2.1790325504754827e-06, "loss": 0.1244, "step": 5137 }, { "epoch": 1.6649384316267013, "grad_norm": 0.7597046494483948, "learning_rate": 2.1781652000640947e-06, "loss": 0.1047, "step": 5138 }, { "epoch": 1.6652624756966947, "grad_norm": 0.8073447942733765, "learning_rate": 2.177297889042169e-06, "loss": 0.1147, "step": 5139 }, { "epoch": 1.6655865197666881, "grad_norm": 0.7578743696212769, "learning_rate": 2.1764306175158588e-06, "loss": 0.1047, "step": 5140 }, { "epoch": 1.6659105638366818, "grad_norm": 0.8077635765075684, "learning_rate": 2.1755633855913086e-06, "loss": 0.1157, "step": 5141 }, { "epoch": 1.6662346079066754, "grad_norm": 0.7816113233566284, "learning_rate": 2.174696193374658e-06, "loss": 0.1126, "step": 5142 }, { "epoch": 1.6665586519766689, "grad_norm": 0.8228937387466431, "learning_rate": 2.173829040972046e-06, "loss": 0.1208, "step": 5143 }, { "epoch": 1.6668826960466623, "grad_norm": 0.8054512143135071, "learning_rate": 2.1729619284896e-06, "loss": 0.1165, "step": 5144 }, { "epoch": 1.6672067401166557, "grad_norm": 0.712684690952301, "learning_rate": 2.1720948560334492e-06, "loss": 0.0987, "step": 5145 }, { "epoch": 1.6675307841866494, "grad_norm": 0.9595630764961243, "learning_rate": 2.171227823709713e-06, "loss": 0.1372, "step": 5146 }, { "epoch": 1.667854828256643, "grad_norm": 0.8455243706703186, "learning_rate": 2.1703608316245092e-06, "loss": 0.1212, "step": 5147 }, { "epoch": 1.6681788723266364, "grad_norm": 0.7891972661018372, "learning_rate": 2.169493879883948e-06, "loss": 0.1104, "step": 5148 }, { "epoch": 1.6685029163966298, "grad_norm": 0.8356092572212219, "learning_rate": 2.168626968594136e-06, "loss": 0.1129, "step": 5149 }, { "epoch": 1.6688269604666235, "grad_norm": 0.7961825728416443, "learning_rate": 2.167760097861176e-06, "loss": 0.1114, "step": 5150 }, { "epoch": 1.6691510045366171, "grad_norm": 0.7900418043136597, "learning_rate": 2.1668932677911624e-06, "loss": 0.1059, "step": 5151 }, { "epoch": 1.6694750486066106, "grad_norm": 0.7676316499710083, "learning_rate": 2.166026478490189e-06, "loss": 0.1045, "step": 5152 }, { "epoch": 1.669799092676604, "grad_norm": 0.8411489725112915, "learning_rate": 2.1651597300643418e-06, "loss": 0.1065, "step": 5153 }, { "epoch": 1.6701231367465974, "grad_norm": 0.7867658138275146, "learning_rate": 2.1642930226197012e-06, "loss": 0.1154, "step": 5154 }, { "epoch": 1.670447180816591, "grad_norm": 0.8002599477767944, "learning_rate": 2.1634263562623454e-06, "loss": 0.1102, "step": 5155 }, { "epoch": 1.6707712248865847, "grad_norm": 0.703038215637207, "learning_rate": 2.162559731098345e-06, "loss": 0.0976, "step": 5156 }, { "epoch": 1.6710952689565781, "grad_norm": 0.8850612044334412, "learning_rate": 2.161693147233767e-06, "loss": 0.1279, "step": 5157 }, { "epoch": 1.6714193130265715, "grad_norm": 0.890364408493042, "learning_rate": 2.1608266047746723e-06, "loss": 0.1266, "step": 5158 }, { "epoch": 1.6717433570965652, "grad_norm": 0.7292555570602417, "learning_rate": 2.1599601038271186e-06, "loss": 0.1019, "step": 5159 }, { "epoch": 1.6720674011665586, "grad_norm": 0.8944790959358215, "learning_rate": 2.1590936444971563e-06, "loss": 0.123, "step": 5160 }, { "epoch": 1.6723914452365523, "grad_norm": 0.8260093331336975, "learning_rate": 2.1582272268908307e-06, "loss": 0.122, "step": 5161 }, { "epoch": 1.6727154893065457, "grad_norm": 0.7920061945915222, "learning_rate": 2.1573608511141845e-06, "loss": 0.1176, "step": 5162 }, { "epoch": 1.673039533376539, "grad_norm": 0.8595673441886902, "learning_rate": 2.1564945172732523e-06, "loss": 0.1229, "step": 5163 }, { "epoch": 1.6733635774465327, "grad_norm": 0.7942947745323181, "learning_rate": 2.155628225474067e-06, "loss": 0.1139, "step": 5164 }, { "epoch": 1.6736876215165264, "grad_norm": 0.808169960975647, "learning_rate": 2.154761975822653e-06, "loss": 0.1129, "step": 5165 }, { "epoch": 1.6740116655865198, "grad_norm": 0.7921257019042969, "learning_rate": 2.1538957684250303e-06, "loss": 0.1118, "step": 5166 }, { "epoch": 1.6743357096565132, "grad_norm": 0.8302102088928223, "learning_rate": 2.1530296033872155e-06, "loss": 0.1158, "step": 5167 }, { "epoch": 1.6746597537265067, "grad_norm": 0.7993295788764954, "learning_rate": 2.152163480815218e-06, "loss": 0.1146, "step": 5168 }, { "epoch": 1.6749837977965003, "grad_norm": 0.785004198551178, "learning_rate": 2.151297400815044e-06, "loss": 0.115, "step": 5169 }, { "epoch": 1.675307841866494, "grad_norm": 0.7763941884040833, "learning_rate": 2.150431363492691e-06, "loss": 0.1143, "step": 5170 }, { "epoch": 1.6756318859364874, "grad_norm": 0.8561198711395264, "learning_rate": 2.1495653689541562e-06, "loss": 0.1224, "step": 5171 }, { "epoch": 1.6759559300064808, "grad_norm": 0.7910784482955933, "learning_rate": 2.1486994173054276e-06, "loss": 0.1122, "step": 5172 }, { "epoch": 1.6762799740764744, "grad_norm": 0.821010947227478, "learning_rate": 2.1478335086524885e-06, "loss": 0.1088, "step": 5173 }, { "epoch": 1.6766040181464679, "grad_norm": 0.8397283554077148, "learning_rate": 2.14696764310132e-06, "loss": 0.1194, "step": 5174 }, { "epoch": 1.6769280622164615, "grad_norm": 0.7711231112480164, "learning_rate": 2.1461018207578932e-06, "loss": 0.1017, "step": 5175 }, { "epoch": 1.677252106286455, "grad_norm": 0.7861514091491699, "learning_rate": 2.1452360417281786e-06, "loss": 0.1092, "step": 5176 }, { "epoch": 1.6775761503564484, "grad_norm": 0.7565597891807556, "learning_rate": 2.144370306118138e-06, "loss": 0.1043, "step": 5177 }, { "epoch": 1.677900194426442, "grad_norm": 0.7293825149536133, "learning_rate": 2.143504614033728e-06, "loss": 0.1027, "step": 5178 }, { "epoch": 1.6782242384964356, "grad_norm": 0.8587285280227661, "learning_rate": 2.142638965580903e-06, "loss": 0.1151, "step": 5179 }, { "epoch": 1.678548282566429, "grad_norm": 0.8521298766136169, "learning_rate": 2.141773360865609e-06, "loss": 0.1197, "step": 5180 }, { "epoch": 1.6788723266364225, "grad_norm": 0.8533197641372681, "learning_rate": 2.1409077999937883e-06, "loss": 0.1212, "step": 5181 }, { "epoch": 1.679196370706416, "grad_norm": 0.8288674354553223, "learning_rate": 2.1400422830713752e-06, "loss": 0.1158, "step": 5182 }, { "epoch": 1.6795204147764096, "grad_norm": 0.8852332234382629, "learning_rate": 2.1391768102043032e-06, "loss": 0.1265, "step": 5183 }, { "epoch": 1.6798444588464032, "grad_norm": 0.8235684037208557, "learning_rate": 2.1383113814984967e-06, "loss": 0.1112, "step": 5184 }, { "epoch": 1.6801685029163966, "grad_norm": 0.7521533370018005, "learning_rate": 2.137445997059874e-06, "loss": 0.1086, "step": 5185 }, { "epoch": 1.68049254698639, "grad_norm": 0.8826772570610046, "learning_rate": 2.1365806569943533e-06, "loss": 0.1226, "step": 5186 }, { "epoch": 1.6808165910563837, "grad_norm": 0.8674874901771545, "learning_rate": 2.1357153614078407e-06, "loss": 0.1196, "step": 5187 }, { "epoch": 1.6811406351263773, "grad_norm": 0.8353680372238159, "learning_rate": 2.1348501104062423e-06, "loss": 0.1225, "step": 5188 }, { "epoch": 1.6814646791963708, "grad_norm": 0.8163774609565735, "learning_rate": 2.1339849040954556e-06, "loss": 0.1132, "step": 5189 }, { "epoch": 1.6817887232663642, "grad_norm": 0.7950161695480347, "learning_rate": 2.133119742581373e-06, "loss": 0.1117, "step": 5190 }, { "epoch": 1.6821127673363576, "grad_norm": 0.8247337341308594, "learning_rate": 2.1322546259698823e-06, "loss": 0.1144, "step": 5191 }, { "epoch": 1.6824368114063513, "grad_norm": 0.7793399095535278, "learning_rate": 2.1313895543668644e-06, "loss": 0.1107, "step": 5192 }, { "epoch": 1.682760855476345, "grad_norm": 0.8173040747642517, "learning_rate": 2.1305245278781977e-06, "loss": 0.1139, "step": 5193 }, { "epoch": 1.6830848995463383, "grad_norm": 0.8045807480812073, "learning_rate": 2.129659546609751e-06, "loss": 0.1136, "step": 5194 }, { "epoch": 1.6834089436163318, "grad_norm": 0.80312180519104, "learning_rate": 2.1287946106673916e-06, "loss": 0.1108, "step": 5195 }, { "epoch": 1.6837329876863252, "grad_norm": 0.836544930934906, "learning_rate": 2.1279297201569787e-06, "loss": 0.1209, "step": 5196 }, { "epoch": 1.6840570317563188, "grad_norm": 0.7888853549957275, "learning_rate": 2.127064875184365e-06, "loss": 0.106, "step": 5197 }, { "epoch": 1.6843810758263125, "grad_norm": 0.8119872808456421, "learning_rate": 2.126200075855401e-06, "loss": 0.12, "step": 5198 }, { "epoch": 1.684705119896306, "grad_norm": 0.7502855062484741, "learning_rate": 2.125335322275928e-06, "loss": 0.1081, "step": 5199 }, { "epoch": 1.6850291639662993, "grad_norm": 0.7704909443855286, "learning_rate": 2.1244706145517853e-06, "loss": 0.1046, "step": 5200 }, { "epoch": 1.685353208036293, "grad_norm": 0.8156105279922485, "learning_rate": 2.1236059527888044e-06, "loss": 0.1142, "step": 5201 }, { "epoch": 1.6856772521062866, "grad_norm": 0.8128482103347778, "learning_rate": 2.1227413370928106e-06, "loss": 0.1135, "step": 5202 }, { "epoch": 1.68600129617628, "grad_norm": 0.7718719840049744, "learning_rate": 2.1218767675696255e-06, "loss": 0.1082, "step": 5203 }, { "epoch": 1.6863253402462735, "grad_norm": 0.7750940918922424, "learning_rate": 2.1210122443250625e-06, "loss": 0.1143, "step": 5204 }, { "epoch": 1.6866493843162669, "grad_norm": 0.840144693851471, "learning_rate": 2.1201477674649326e-06, "loss": 0.1135, "step": 5205 }, { "epoch": 1.6869734283862605, "grad_norm": 0.8162680864334106, "learning_rate": 2.119283337095038e-06, "loss": 0.1122, "step": 5206 }, { "epoch": 1.6872974724562542, "grad_norm": 0.7928407192230225, "learning_rate": 2.1184189533211783e-06, "loss": 0.118, "step": 5207 }, { "epoch": 1.6876215165262476, "grad_norm": 0.8174422979354858, "learning_rate": 2.117554616249145e-06, "loss": 0.1202, "step": 5208 }, { "epoch": 1.687945560596241, "grad_norm": 0.7704837918281555, "learning_rate": 2.1166903259847228e-06, "loss": 0.1016, "step": 5209 }, { "epoch": 1.6882696046662347, "grad_norm": 0.8021647930145264, "learning_rate": 2.115826082633695e-06, "loss": 0.1123, "step": 5210 }, { "epoch": 1.688593648736228, "grad_norm": 0.7987725138664246, "learning_rate": 2.114961886301835e-06, "loss": 0.1127, "step": 5211 }, { "epoch": 1.6889176928062217, "grad_norm": 0.8358885049819946, "learning_rate": 2.114097737094914e-06, "loss": 0.1216, "step": 5212 }, { "epoch": 1.6892417368762151, "grad_norm": 0.8821001648902893, "learning_rate": 2.1132336351186923e-06, "loss": 0.1176, "step": 5213 }, { "epoch": 1.6895657809462086, "grad_norm": 0.8151198029518127, "learning_rate": 2.1123695804789307e-06, "loss": 0.1104, "step": 5214 }, { "epoch": 1.6898898250162022, "grad_norm": 0.8548397421836853, "learning_rate": 2.11150557328138e-06, "loss": 0.1244, "step": 5215 }, { "epoch": 1.6902138690861959, "grad_norm": 0.7912091016769409, "learning_rate": 2.110641613631785e-06, "loss": 0.1117, "step": 5216 }, { "epoch": 1.6905379131561893, "grad_norm": 0.8448325395584106, "learning_rate": 2.109777701635889e-06, "loss": 0.1182, "step": 5217 }, { "epoch": 1.6908619572261827, "grad_norm": 0.8200645446777344, "learning_rate": 2.1089138373994226e-06, "loss": 0.1068, "step": 5218 }, { "epoch": 1.6911860012961761, "grad_norm": 0.767809271812439, "learning_rate": 2.108050021028118e-06, "loss": 0.1083, "step": 5219 }, { "epoch": 1.6915100453661698, "grad_norm": 0.7847505807876587, "learning_rate": 2.1071862526276963e-06, "loss": 0.1012, "step": 5220 }, { "epoch": 1.6918340894361634, "grad_norm": 0.7471839785575867, "learning_rate": 2.1063225323038744e-06, "loss": 0.1055, "step": 5221 }, { "epoch": 1.6921581335061568, "grad_norm": 0.8450305461883545, "learning_rate": 2.1054588601623634e-06, "loss": 0.1166, "step": 5222 }, { "epoch": 1.6924821775761503, "grad_norm": 0.8114377856254578, "learning_rate": 2.104595236308868e-06, "loss": 0.1092, "step": 5223 }, { "epoch": 1.692806221646144, "grad_norm": 0.8460206389427185, "learning_rate": 2.1037316608490886e-06, "loss": 0.1171, "step": 5224 }, { "epoch": 1.6931302657161373, "grad_norm": 0.8369781374931335, "learning_rate": 2.1028681338887164e-06, "loss": 0.1207, "step": 5225 }, { "epoch": 1.693454309786131, "grad_norm": 0.7532182931900024, "learning_rate": 2.102004655533442e-06, "loss": 0.1019, "step": 5226 }, { "epoch": 1.6937783538561244, "grad_norm": 0.7661359906196594, "learning_rate": 2.101141225888944e-06, "loss": 0.1109, "step": 5227 }, { "epoch": 1.6941023979261178, "grad_norm": 0.7669044137001038, "learning_rate": 2.100277845060898e-06, "loss": 0.1078, "step": 5228 }, { "epoch": 1.6944264419961115, "grad_norm": 0.8359112739562988, "learning_rate": 2.0994145131549755e-06, "loss": 0.1174, "step": 5229 }, { "epoch": 1.6947504860661051, "grad_norm": 0.7608208060264587, "learning_rate": 2.0985512302768366e-06, "loss": 0.1007, "step": 5230 }, { "epoch": 1.6950745301360985, "grad_norm": 0.7669379711151123, "learning_rate": 2.097687996532143e-06, "loss": 0.1005, "step": 5231 }, { "epoch": 1.695398574206092, "grad_norm": 0.8257039785385132, "learning_rate": 2.0968248120265433e-06, "loss": 0.1183, "step": 5232 }, { "epoch": 1.6957226182760854, "grad_norm": 0.792262852191925, "learning_rate": 2.095961676865683e-06, "loss": 0.1116, "step": 5233 }, { "epoch": 1.696046662346079, "grad_norm": 0.7693544626235962, "learning_rate": 2.095098591155203e-06, "loss": 0.1106, "step": 5234 }, { "epoch": 1.6963707064160727, "grad_norm": 0.8442361354827881, "learning_rate": 2.094235555000734e-06, "loss": 0.1116, "step": 5235 }, { "epoch": 1.696694750486066, "grad_norm": 0.7691715955734253, "learning_rate": 2.093372568507907e-06, "loss": 0.1071, "step": 5236 }, { "epoch": 1.6970187945560595, "grad_norm": 0.8054088354110718, "learning_rate": 2.0925096317823393e-06, "loss": 0.111, "step": 5237 }, { "epoch": 1.6973428386260532, "grad_norm": 0.7808986902236938, "learning_rate": 2.091646744929649e-06, "loss": 0.1039, "step": 5238 }, { "epoch": 1.6976668826960468, "grad_norm": 0.8026829361915588, "learning_rate": 2.0907839080554443e-06, "loss": 0.1158, "step": 5239 }, { "epoch": 1.6979909267660402, "grad_norm": 0.7938512563705444, "learning_rate": 2.0899211212653262e-06, "loss": 0.1135, "step": 5240 }, { "epoch": 1.6983149708360337, "grad_norm": 0.7803632616996765, "learning_rate": 2.0890583846648945e-06, "loss": 0.1089, "step": 5241 }, { "epoch": 1.698639014906027, "grad_norm": 0.8693332672119141, "learning_rate": 2.0881956983597375e-06, "loss": 0.1149, "step": 5242 }, { "epoch": 1.6989630589760207, "grad_norm": 0.8382584452629089, "learning_rate": 2.087333062455441e-06, "loss": 0.1217, "step": 5243 }, { "epoch": 1.6992871030460144, "grad_norm": 0.8314406275749207, "learning_rate": 2.0864704770575824e-06, "loss": 0.1168, "step": 5244 }, { "epoch": 1.6996111471160078, "grad_norm": 0.848581075668335, "learning_rate": 2.085607942271734e-06, "loss": 0.1153, "step": 5245 }, { "epoch": 1.6999351911860012, "grad_norm": 0.8251847624778748, "learning_rate": 2.0847454582034625e-06, "loss": 0.1206, "step": 5246 }, { "epoch": 1.7002592352559946, "grad_norm": 0.750935971736908, "learning_rate": 2.0838830249583254e-06, "loss": 0.1015, "step": 5247 }, { "epoch": 1.7005832793259883, "grad_norm": 0.8350731134414673, "learning_rate": 2.0830206426418794e-06, "loss": 0.1158, "step": 5248 }, { "epoch": 1.700907323395982, "grad_norm": 0.8031424880027771, "learning_rate": 2.0821583113596686e-06, "loss": 0.1155, "step": 5249 }, { "epoch": 1.7012313674659754, "grad_norm": 0.904130220413208, "learning_rate": 2.081296031217237e-06, "loss": 0.1181, "step": 5250 }, { "epoch": 1.7015554115359688, "grad_norm": 0.7820038795471191, "learning_rate": 2.080433802320117e-06, "loss": 0.1104, "step": 5251 }, { "epoch": 1.7018794556059624, "grad_norm": 0.7897928953170776, "learning_rate": 2.0795716247738374e-06, "loss": 0.1116, "step": 5252 }, { "epoch": 1.702203499675956, "grad_norm": 0.7536505460739136, "learning_rate": 2.078709498683922e-06, "loss": 0.1046, "step": 5253 }, { "epoch": 1.7025275437459495, "grad_norm": 0.7892839312553406, "learning_rate": 2.0778474241558845e-06, "loss": 0.1101, "step": 5254 }, { "epoch": 1.702851587815943, "grad_norm": 0.745802640914917, "learning_rate": 2.0769854012952368e-06, "loss": 0.102, "step": 5255 }, { "epoch": 1.7031756318859363, "grad_norm": 0.8427780866622925, "learning_rate": 2.0761234302074803e-06, "loss": 0.1191, "step": 5256 }, { "epoch": 1.70349967595593, "grad_norm": 0.714269757270813, "learning_rate": 2.0752615109981116e-06, "loss": 0.0992, "step": 5257 }, { "epoch": 1.7038237200259236, "grad_norm": 0.7593355774879456, "learning_rate": 2.0743996437726233e-06, "loss": 0.0979, "step": 5258 }, { "epoch": 1.704147764095917, "grad_norm": 0.7677283883094788, "learning_rate": 2.073537828636497e-06, "loss": 0.0983, "step": 5259 }, { "epoch": 1.7044718081659105, "grad_norm": 0.7660530805587769, "learning_rate": 2.0726760656952137e-06, "loss": 0.1055, "step": 5260 }, { "epoch": 1.7047958522359041, "grad_norm": 0.8433137536048889, "learning_rate": 2.0718143550542418e-06, "loss": 0.113, "step": 5261 }, { "epoch": 1.7051198963058976, "grad_norm": 0.8808390498161316, "learning_rate": 2.0709526968190483e-06, "loss": 0.1149, "step": 5262 }, { "epoch": 1.7054439403758912, "grad_norm": 0.853110671043396, "learning_rate": 2.070091091095092e-06, "loss": 0.116, "step": 5263 }, { "epoch": 1.7057679844458846, "grad_norm": 0.8006988167762756, "learning_rate": 2.0692295379878237e-06, "loss": 0.1183, "step": 5264 }, { "epoch": 1.706092028515878, "grad_norm": 0.8483065366744995, "learning_rate": 2.0683680376026897e-06, "loss": 0.1277, "step": 5265 }, { "epoch": 1.7064160725858717, "grad_norm": 0.8355550169944763, "learning_rate": 2.0675065900451287e-06, "loss": 0.1146, "step": 5266 }, { "epoch": 1.7067401166558653, "grad_norm": 0.8079224228858948, "learning_rate": 2.066645195420575e-06, "loss": 0.1138, "step": 5267 }, { "epoch": 1.7070641607258588, "grad_norm": 0.90150386095047, "learning_rate": 2.0657838538344545e-06, "loss": 0.1182, "step": 5268 }, { "epoch": 1.7073882047958522, "grad_norm": 0.7275080680847168, "learning_rate": 2.0649225653921855e-06, "loss": 0.1079, "step": 5269 }, { "epoch": 1.7077122488658456, "grad_norm": 0.7734233736991882, "learning_rate": 2.064061330199184e-06, "loss": 0.1117, "step": 5270 }, { "epoch": 1.7080362929358393, "grad_norm": 0.747999370098114, "learning_rate": 2.0632001483608544e-06, "loss": 0.1078, "step": 5271 }, { "epoch": 1.708360337005833, "grad_norm": 0.7794125080108643, "learning_rate": 2.062339019982599e-06, "loss": 0.1177, "step": 5272 }, { "epoch": 1.7086843810758263, "grad_norm": 0.8238285183906555, "learning_rate": 2.06147794516981e-06, "loss": 0.1164, "step": 5273 }, { "epoch": 1.7090084251458197, "grad_norm": 0.8097456097602844, "learning_rate": 2.0606169240278752e-06, "loss": 0.1167, "step": 5274 }, { "epoch": 1.7093324692158134, "grad_norm": 0.8134164214134216, "learning_rate": 2.059755956662176e-06, "loss": 0.1173, "step": 5275 }, { "epoch": 1.709656513285807, "grad_norm": 0.7630842924118042, "learning_rate": 2.058895043178085e-06, "loss": 0.108, "step": 5276 }, { "epoch": 1.7099805573558005, "grad_norm": 0.7667798399925232, "learning_rate": 2.0580341836809718e-06, "loss": 0.1113, "step": 5277 }, { "epoch": 1.7103046014257939, "grad_norm": 0.821010947227478, "learning_rate": 2.0571733782761943e-06, "loss": 0.1101, "step": 5278 }, { "epoch": 1.7106286454957873, "grad_norm": 0.8166499137878418, "learning_rate": 2.0563126270691097e-06, "loss": 0.1195, "step": 5279 }, { "epoch": 1.710952689565781, "grad_norm": 0.7887958884239197, "learning_rate": 2.055451930165063e-06, "loss": 0.105, "step": 5280 }, { "epoch": 1.7112767336357746, "grad_norm": 0.8705652356147766, "learning_rate": 2.054591287669398e-06, "loss": 0.1162, "step": 5281 }, { "epoch": 1.711600777705768, "grad_norm": 0.8159167170524597, "learning_rate": 2.053730699687447e-06, "loss": 0.1121, "step": 5282 }, { "epoch": 1.7119248217757614, "grad_norm": 0.8327007293701172, "learning_rate": 2.052870166324537e-06, "loss": 0.1124, "step": 5283 }, { "epoch": 1.7122488658457549, "grad_norm": 0.8116735816001892, "learning_rate": 2.0520096876859918e-06, "loss": 0.1156, "step": 5284 }, { "epoch": 1.7125729099157485, "grad_norm": 0.7572367191314697, "learning_rate": 2.051149263877123e-06, "loss": 0.1156, "step": 5285 }, { "epoch": 1.7128969539857422, "grad_norm": 0.8980776071548462, "learning_rate": 2.0502888950032396e-06, "loss": 0.1274, "step": 5286 }, { "epoch": 1.7132209980557356, "grad_norm": 0.7499247193336487, "learning_rate": 2.0494285811696417e-06, "loss": 0.1088, "step": 5287 }, { "epoch": 1.713545042125729, "grad_norm": 0.9621228575706482, "learning_rate": 2.048568322481623e-06, "loss": 0.1082, "step": 5288 }, { "epoch": 1.7138690861957226, "grad_norm": 0.7719082236289978, "learning_rate": 2.0477081190444724e-06, "loss": 0.1068, "step": 5289 }, { "epoch": 1.7141931302657163, "grad_norm": 0.847591757774353, "learning_rate": 2.046847970963468e-06, "loss": 0.1174, "step": 5290 }, { "epoch": 1.7145171743357097, "grad_norm": 0.7991353869438171, "learning_rate": 2.0459878783438867e-06, "loss": 0.1075, "step": 5291 }, { "epoch": 1.7148412184057031, "grad_norm": 0.8025398254394531, "learning_rate": 2.045127841290993e-06, "loss": 0.1116, "step": 5292 }, { "epoch": 1.7151652624756966, "grad_norm": 0.8297464847564697, "learning_rate": 2.0442678599100484e-06, "loss": 0.1166, "step": 5293 }, { "epoch": 1.7154893065456902, "grad_norm": 0.9035441279411316, "learning_rate": 2.043407934306306e-06, "loss": 0.1201, "step": 5294 }, { "epoch": 1.7158133506156839, "grad_norm": 0.809662938117981, "learning_rate": 2.0425480645850124e-06, "loss": 0.1137, "step": 5295 }, { "epoch": 1.7161373946856773, "grad_norm": 0.8593390583992004, "learning_rate": 2.041688250851407e-06, "loss": 0.1164, "step": 5296 }, { "epoch": 1.7164614387556707, "grad_norm": 0.8075687885284424, "learning_rate": 2.0408284932107227e-06, "loss": 0.1164, "step": 5297 }, { "epoch": 1.7167854828256643, "grad_norm": 0.8436287641525269, "learning_rate": 2.039968791768186e-06, "loss": 0.1125, "step": 5298 }, { "epoch": 1.7171095268956578, "grad_norm": 0.8198383450508118, "learning_rate": 2.039109146629016e-06, "loss": 0.1095, "step": 5299 }, { "epoch": 1.7174335709656514, "grad_norm": 0.8079723119735718, "learning_rate": 2.0382495578984236e-06, "loss": 0.1109, "step": 5300 }, { "epoch": 1.7177576150356448, "grad_norm": 0.7642526030540466, "learning_rate": 2.0373900256816166e-06, "loss": 0.1099, "step": 5301 }, { "epoch": 1.7180816591056383, "grad_norm": 0.7780331373214722, "learning_rate": 2.0365305500837906e-06, "loss": 0.1121, "step": 5302 }, { "epoch": 1.718405703175632, "grad_norm": 0.8425008058547974, "learning_rate": 2.0356711312101394e-06, "loss": 0.1149, "step": 5303 }, { "epoch": 1.7187297472456255, "grad_norm": 0.799623429775238, "learning_rate": 2.0348117691658463e-06, "loss": 0.1125, "step": 5304 }, { "epoch": 1.719053791315619, "grad_norm": 0.7548736333847046, "learning_rate": 2.03395246405609e-06, "loss": 0.1032, "step": 5305 }, { "epoch": 1.7193778353856124, "grad_norm": 0.7889924645423889, "learning_rate": 2.03309321598604e-06, "loss": 0.1098, "step": 5306 }, { "epoch": 1.7197018794556058, "grad_norm": 0.7848178744316101, "learning_rate": 2.03223402506086e-06, "loss": 0.1077, "step": 5307 }, { "epoch": 1.7200259235255995, "grad_norm": 0.7917930483818054, "learning_rate": 2.031374891385708e-06, "loss": 0.1116, "step": 5308 }, { "epoch": 1.720349967595593, "grad_norm": 0.819743275642395, "learning_rate": 2.0305158150657316e-06, "loss": 0.1153, "step": 5309 }, { "epoch": 1.7206740116655865, "grad_norm": 0.7816472053527832, "learning_rate": 2.0296567962060753e-06, "loss": 0.1096, "step": 5310 }, { "epoch": 1.72099805573558, "grad_norm": 0.8776841759681702, "learning_rate": 2.0287978349118737e-06, "loss": 0.1221, "step": 5311 }, { "epoch": 1.7213220998055736, "grad_norm": 0.8157861828804016, "learning_rate": 2.0279389312882546e-06, "loss": 0.1112, "step": 5312 }, { "epoch": 1.721646143875567, "grad_norm": 0.7488312125205994, "learning_rate": 2.027080085440341e-06, "loss": 0.1014, "step": 5313 }, { "epoch": 1.7219701879455607, "grad_norm": 0.8361231088638306, "learning_rate": 2.0262212974732465e-06, "loss": 0.1071, "step": 5314 }, { "epoch": 1.722294232015554, "grad_norm": 0.8565608859062195, "learning_rate": 2.0253625674920795e-06, "loss": 0.1147, "step": 5315 }, { "epoch": 1.7226182760855475, "grad_norm": 0.8289151787757874, "learning_rate": 2.0245038956019386e-06, "loss": 0.1124, "step": 5316 }, { "epoch": 1.7229423201555412, "grad_norm": 0.7731626629829407, "learning_rate": 2.0236452819079183e-06, "loss": 0.1099, "step": 5317 }, { "epoch": 1.7232663642255348, "grad_norm": 0.8253824710845947, "learning_rate": 2.0227867265151035e-06, "loss": 0.1169, "step": 5318 }, { "epoch": 1.7235904082955282, "grad_norm": 0.7757041454315186, "learning_rate": 2.0219282295285734e-06, "loss": 0.1063, "step": 5319 }, { "epoch": 1.7239144523655217, "grad_norm": 0.7756922841072083, "learning_rate": 2.021069791053401e-06, "loss": 0.1052, "step": 5320 }, { "epoch": 1.724238496435515, "grad_norm": 0.9245814681053162, "learning_rate": 2.0202114111946483e-06, "loss": 0.1267, "step": 5321 }, { "epoch": 1.7245625405055087, "grad_norm": 0.7585616707801819, "learning_rate": 2.019353090057375e-06, "loss": 0.1021, "step": 5322 }, { "epoch": 1.7248865845755024, "grad_norm": 0.8026843667030334, "learning_rate": 2.018494827746631e-06, "loss": 0.1194, "step": 5323 }, { "epoch": 1.7252106286454958, "grad_norm": 0.7823585867881775, "learning_rate": 2.0176366243674575e-06, "loss": 0.1073, "step": 5324 }, { "epoch": 1.7255346727154892, "grad_norm": 0.8478108048439026, "learning_rate": 2.0167784800248924e-06, "loss": 0.1177, "step": 5325 }, { "epoch": 1.7258587167854829, "grad_norm": 0.8905182480812073, "learning_rate": 2.0159203948239624e-06, "loss": 0.121, "step": 5326 }, { "epoch": 1.7261827608554765, "grad_norm": 0.830605149269104, "learning_rate": 2.015062368869691e-06, "loss": 0.1163, "step": 5327 }, { "epoch": 1.72650680492547, "grad_norm": 0.8245023488998413, "learning_rate": 2.0142044022670905e-06, "loss": 0.1167, "step": 5328 }, { "epoch": 1.7268308489954634, "grad_norm": 0.8123703002929688, "learning_rate": 2.013346495121169e-06, "loss": 0.1167, "step": 5329 }, { "epoch": 1.7271548930654568, "grad_norm": 0.8318195939064026, "learning_rate": 2.012488647536925e-06, "loss": 0.1167, "step": 5330 }, { "epoch": 1.7274789371354504, "grad_norm": 0.8157840967178345, "learning_rate": 2.0116308596193502e-06, "loss": 0.1087, "step": 5331 }, { "epoch": 1.727802981205444, "grad_norm": 0.8502197265625, "learning_rate": 2.0107731314734316e-06, "loss": 0.1228, "step": 5332 }, { "epoch": 1.7281270252754375, "grad_norm": 0.8437539935112, "learning_rate": 2.0099154632041446e-06, "loss": 0.1133, "step": 5333 }, { "epoch": 1.728451069345431, "grad_norm": 0.777830183506012, "learning_rate": 2.0090578549164614e-06, "loss": 0.1145, "step": 5334 }, { "epoch": 1.7287751134154243, "grad_norm": 0.8191279172897339, "learning_rate": 2.0082003067153436e-06, "loss": 0.1099, "step": 5335 }, { "epoch": 1.729099157485418, "grad_norm": 0.8253933191299438, "learning_rate": 2.007342818705747e-06, "loss": 0.1137, "step": 5336 }, { "epoch": 1.7294232015554116, "grad_norm": 0.7542148232460022, "learning_rate": 2.006485390992621e-06, "loss": 0.1002, "step": 5337 }, { "epoch": 1.729747245625405, "grad_norm": 0.8375315070152283, "learning_rate": 2.0056280236809044e-06, "loss": 0.1187, "step": 5338 }, { "epoch": 1.7300712896953985, "grad_norm": 0.8146992325782776, "learning_rate": 2.004770716875533e-06, "loss": 0.1155, "step": 5339 }, { "epoch": 1.7303953337653921, "grad_norm": 0.8450291752815247, "learning_rate": 2.0039134706814303e-06, "loss": 0.1193, "step": 5340 }, { "epoch": 1.7307193778353858, "grad_norm": 0.7564694881439209, "learning_rate": 2.0030562852035175e-06, "loss": 0.1044, "step": 5341 }, { "epoch": 1.7310434219053792, "grad_norm": 0.7646568417549133, "learning_rate": 2.0021991605467043e-06, "loss": 0.108, "step": 5342 }, { "epoch": 1.7313674659753726, "grad_norm": 0.8570701479911804, "learning_rate": 2.0013420968158944e-06, "loss": 0.114, "step": 5343 }, { "epoch": 1.731691510045366, "grad_norm": 0.7767725586891174, "learning_rate": 2.0004850941159847e-06, "loss": 0.1087, "step": 5344 }, { "epoch": 1.7320155541153597, "grad_norm": 0.7576577067375183, "learning_rate": 1.999628152551863e-06, "loss": 0.1071, "step": 5345 }, { "epoch": 1.7323395981853533, "grad_norm": 0.7547813057899475, "learning_rate": 1.9987712722284132e-06, "loss": 0.1012, "step": 5346 }, { "epoch": 1.7326636422553467, "grad_norm": 0.8170945644378662, "learning_rate": 1.9979144532505064e-06, "loss": 0.1113, "step": 5347 }, { "epoch": 1.7329876863253402, "grad_norm": 0.8752892017364502, "learning_rate": 1.9970576957230094e-06, "loss": 0.1209, "step": 5348 }, { "epoch": 1.7333117303953338, "grad_norm": 0.7312746644020081, "learning_rate": 1.996200999750783e-06, "loss": 0.1034, "step": 5349 }, { "epoch": 1.7336357744653272, "grad_norm": 0.850387692451477, "learning_rate": 1.995344365438676e-06, "loss": 0.1226, "step": 5350 }, { "epoch": 1.7339598185353209, "grad_norm": 0.8289128541946411, "learning_rate": 1.994487792891534e-06, "loss": 0.1199, "step": 5351 }, { "epoch": 1.7342838626053143, "grad_norm": 0.7456474304199219, "learning_rate": 1.993631282214191e-06, "loss": 0.112, "step": 5352 }, { "epoch": 1.7346079066753077, "grad_norm": 0.8394036889076233, "learning_rate": 1.992774833511478e-06, "loss": 0.1312, "step": 5353 }, { "epoch": 1.7349319507453014, "grad_norm": 0.8686239123344421, "learning_rate": 1.991918446888216e-06, "loss": 0.1235, "step": 5354 }, { "epoch": 1.735255994815295, "grad_norm": 0.8267319798469543, "learning_rate": 1.9910621224492154e-06, "loss": 0.1126, "step": 5355 }, { "epoch": 1.7355800388852884, "grad_norm": 0.7727353572845459, "learning_rate": 1.9902058602992856e-06, "loss": 0.1056, "step": 5356 }, { "epoch": 1.7359040829552819, "grad_norm": 0.7727401852607727, "learning_rate": 1.989349660543222e-06, "loss": 0.0959, "step": 5357 }, { "epoch": 1.7362281270252753, "grad_norm": 0.8392402529716492, "learning_rate": 1.988493523285818e-06, "loss": 0.1186, "step": 5358 }, { "epoch": 1.736552171095269, "grad_norm": 0.8880442380905151, "learning_rate": 1.9876374486318545e-06, "loss": 0.1243, "step": 5359 }, { "epoch": 1.7368762151652626, "grad_norm": 0.756595253944397, "learning_rate": 1.9867814366861075e-06, "loss": 0.1028, "step": 5360 }, { "epoch": 1.737200259235256, "grad_norm": 0.7915642261505127, "learning_rate": 1.9859254875533435e-06, "loss": 0.1118, "step": 5361 }, { "epoch": 1.7375243033052494, "grad_norm": 0.8473532199859619, "learning_rate": 1.9850696013383236e-06, "loss": 0.1188, "step": 5362 }, { "epoch": 1.737848347375243, "grad_norm": 0.7980912327766418, "learning_rate": 1.9842137781458e-06, "loss": 0.1066, "step": 5363 }, { "epoch": 1.7381723914452365, "grad_norm": 0.7806613445281982, "learning_rate": 1.9833580180805155e-06, "loss": 0.1127, "step": 5364 }, { "epoch": 1.7384964355152301, "grad_norm": 0.8259003758430481, "learning_rate": 1.9825023212472095e-06, "loss": 0.1118, "step": 5365 }, { "epoch": 1.7388204795852236, "grad_norm": 0.7611026167869568, "learning_rate": 1.9816466877506095e-06, "loss": 0.1056, "step": 5366 }, { "epoch": 1.739144523655217, "grad_norm": 0.7931367754936218, "learning_rate": 1.9807911176954357e-06, "loss": 0.112, "step": 5367 }, { "epoch": 1.7394685677252106, "grad_norm": 0.7729525566101074, "learning_rate": 1.9799356111864036e-06, "loss": 0.1119, "step": 5368 }, { "epoch": 1.7397926117952043, "grad_norm": 0.78594970703125, "learning_rate": 1.979080168328218e-06, "loss": 0.1131, "step": 5369 }, { "epoch": 1.7401166558651977, "grad_norm": 0.8103339672088623, "learning_rate": 1.9782247892255767e-06, "loss": 0.1094, "step": 5370 }, { "epoch": 1.7404406999351911, "grad_norm": 0.8295299410820007, "learning_rate": 1.9773694739831702e-06, "loss": 0.1165, "step": 5371 }, { "epoch": 1.7407647440051845, "grad_norm": 0.7763726115226746, "learning_rate": 1.976514222705681e-06, "loss": 0.1077, "step": 5372 }, { "epoch": 1.7410887880751782, "grad_norm": 0.8518121838569641, "learning_rate": 1.975659035497783e-06, "loss": 0.116, "step": 5373 }, { "epoch": 1.7414128321451718, "grad_norm": 0.7763303518295288, "learning_rate": 1.9748039124641426e-06, "loss": 0.1081, "step": 5374 }, { "epoch": 1.7417368762151653, "grad_norm": 0.8390318155288696, "learning_rate": 1.9739488537094197e-06, "loss": 0.1155, "step": 5375 }, { "epoch": 1.7420609202851587, "grad_norm": 0.8260394930839539, "learning_rate": 1.973093859338263e-06, "loss": 0.1085, "step": 5376 }, { "epoch": 1.7423849643551523, "grad_norm": 0.7894601225852966, "learning_rate": 1.9722389294553188e-06, "loss": 0.1084, "step": 5377 }, { "epoch": 1.742709008425146, "grad_norm": 0.8820915818214417, "learning_rate": 1.9713840641652206e-06, "loss": 0.1255, "step": 5378 }, { "epoch": 1.7430330524951394, "grad_norm": 0.8178963661193848, "learning_rate": 1.970529263572594e-06, "loss": 0.1186, "step": 5379 }, { "epoch": 1.7433570965651328, "grad_norm": 0.836940586566925, "learning_rate": 1.9696745277820613e-06, "loss": 0.1125, "step": 5380 }, { "epoch": 1.7436811406351262, "grad_norm": 0.9130598306655884, "learning_rate": 1.9688198568982316e-06, "loss": 0.1297, "step": 5381 }, { "epoch": 1.74400518470512, "grad_norm": 0.7412441968917847, "learning_rate": 1.96796525102571e-06, "loss": 0.1025, "step": 5382 }, { "epoch": 1.7443292287751135, "grad_norm": 0.7745087146759033, "learning_rate": 1.96711071026909e-06, "loss": 0.1054, "step": 5383 }, { "epoch": 1.744653272845107, "grad_norm": 0.7893478870391846, "learning_rate": 1.9662562347329613e-06, "loss": 0.1165, "step": 5384 }, { "epoch": 1.7449773169151004, "grad_norm": 0.8026453256607056, "learning_rate": 1.9654018245219024e-06, "loss": 0.111, "step": 5385 }, { "epoch": 1.7453013609850938, "grad_norm": 0.8066617846488953, "learning_rate": 1.9645474797404838e-06, "loss": 0.1152, "step": 5386 }, { "epoch": 1.7456254050550875, "grad_norm": 0.7359598278999329, "learning_rate": 1.963693200493271e-06, "loss": 0.1042, "step": 5387 }, { "epoch": 1.745949449125081, "grad_norm": 0.8136652708053589, "learning_rate": 1.962838986884818e-06, "loss": 0.1208, "step": 5388 }, { "epoch": 1.7462734931950745, "grad_norm": 0.8009673953056335, "learning_rate": 1.9619848390196734e-06, "loss": 0.1083, "step": 5389 }, { "epoch": 1.746597537265068, "grad_norm": 0.749184250831604, "learning_rate": 1.9611307570023766e-06, "loss": 0.1048, "step": 5390 }, { "epoch": 1.7469215813350616, "grad_norm": 0.8438735008239746, "learning_rate": 1.960276740937458e-06, "loss": 0.1162, "step": 5391 }, { "epoch": 1.7472456254050552, "grad_norm": 0.7352275848388672, "learning_rate": 1.959422790929441e-06, "loss": 0.1023, "step": 5392 }, { "epoch": 1.7475696694750487, "grad_norm": 0.8380963206291199, "learning_rate": 1.9585689070828413e-06, "loss": 0.1136, "step": 5393 }, { "epoch": 1.747893713545042, "grad_norm": 0.8800554275512695, "learning_rate": 1.9577150895021664e-06, "loss": 0.1219, "step": 5394 }, { "epoch": 1.7482177576150355, "grad_norm": 0.7962919473648071, "learning_rate": 1.9568613382919142e-06, "loss": 0.1144, "step": 5395 }, { "epoch": 1.7485418016850292, "grad_norm": 0.8999350666999817, "learning_rate": 1.9560076535565766e-06, "loss": 0.1117, "step": 5396 }, { "epoch": 1.7488658457550228, "grad_norm": 0.8126070499420166, "learning_rate": 1.9551540354006366e-06, "loss": 0.1117, "step": 5397 }, { "epoch": 1.7491898898250162, "grad_norm": 0.8283818364143372, "learning_rate": 1.954300483928567e-06, "loss": 0.1188, "step": 5398 }, { "epoch": 1.7495139338950096, "grad_norm": 0.8194707036018372, "learning_rate": 1.953446999244836e-06, "loss": 0.1105, "step": 5399 }, { "epoch": 1.7498379779650033, "grad_norm": 0.7576519846916199, "learning_rate": 1.9525935814539e-06, "loss": 0.1008, "step": 5400 }, { "epoch": 1.7501620220349967, "grad_norm": 0.8350682854652405, "learning_rate": 1.951740230660212e-06, "loss": 0.1184, "step": 5401 }, { "epoch": 1.7504860661049904, "grad_norm": 0.8378635048866272, "learning_rate": 1.950886946968212e-06, "loss": 0.1135, "step": 5402 }, { "epoch": 1.7508101101749838, "grad_norm": 0.7591724395751953, "learning_rate": 1.9500337304823333e-06, "loss": 0.1009, "step": 5403 }, { "epoch": 1.7511341542449772, "grad_norm": 0.8226900696754456, "learning_rate": 1.9491805813070025e-06, "loss": 0.1155, "step": 5404 }, { "epoch": 1.7514581983149708, "grad_norm": 0.8173897862434387, "learning_rate": 1.948327499546635e-06, "loss": 0.1143, "step": 5405 }, { "epoch": 1.7517822423849645, "grad_norm": 0.735022783279419, "learning_rate": 1.947474485305642e-06, "loss": 0.1036, "step": 5406 }, { "epoch": 1.752106286454958, "grad_norm": 0.8631489872932434, "learning_rate": 1.9466215386884223e-06, "loss": 0.1249, "step": 5407 }, { "epoch": 1.7524303305249513, "grad_norm": 0.8307135105133057, "learning_rate": 1.9457686597993704e-06, "loss": 0.1137, "step": 5408 }, { "epoch": 1.7527543745949448, "grad_norm": 0.8436553478240967, "learning_rate": 1.9449158487428688e-06, "loss": 0.1183, "step": 5409 }, { "epoch": 1.7530784186649384, "grad_norm": 0.8238299489021301, "learning_rate": 1.9440631056232926e-06, "loss": 0.1132, "step": 5410 }, { "epoch": 1.753402462734932, "grad_norm": 0.8608657121658325, "learning_rate": 1.9432104305450117e-06, "loss": 0.1219, "step": 5411 }, { "epoch": 1.7537265068049255, "grad_norm": 0.8268369436264038, "learning_rate": 1.942357823612383e-06, "loss": 0.1212, "step": 5412 }, { "epoch": 1.754050550874919, "grad_norm": 0.9069793820381165, "learning_rate": 1.9415052849297585e-06, "loss": 0.1331, "step": 5413 }, { "epoch": 1.7543745949449125, "grad_norm": 0.8022398352622986, "learning_rate": 1.9406528146014815e-06, "loss": 0.1126, "step": 5414 }, { "epoch": 1.7546986390149062, "grad_norm": 0.8379114270210266, "learning_rate": 1.939800412731884e-06, "loss": 0.1239, "step": 5415 }, { "epoch": 1.7550226830848996, "grad_norm": 0.8638489842414856, "learning_rate": 1.9389480794252933e-06, "loss": 0.1207, "step": 5416 }, { "epoch": 1.755346727154893, "grad_norm": 0.7857935428619385, "learning_rate": 1.9380958147860254e-06, "loss": 0.1119, "step": 5417 }, { "epoch": 1.7556707712248865, "grad_norm": 0.8815799355506897, "learning_rate": 1.937243618918391e-06, "loss": 0.1176, "step": 5418 }, { "epoch": 1.75599481529488, "grad_norm": 0.7748388648033142, "learning_rate": 1.936391491926689e-06, "loss": 0.1063, "step": 5419 }, { "epoch": 1.7563188593648738, "grad_norm": 0.812292754650116, "learning_rate": 1.9355394339152133e-06, "loss": 0.1105, "step": 5420 }, { "epoch": 1.7566429034348672, "grad_norm": 0.7713181376457214, "learning_rate": 1.9346874449882465e-06, "loss": 0.1145, "step": 5421 }, { "epoch": 1.7569669475048606, "grad_norm": 0.8215756416320801, "learning_rate": 1.9338355252500624e-06, "loss": 0.1151, "step": 5422 }, { "epoch": 1.757290991574854, "grad_norm": 0.7613236904144287, "learning_rate": 1.93298367480493e-06, "loss": 0.1037, "step": 5423 }, { "epoch": 1.7576150356448477, "grad_norm": 0.809416651725769, "learning_rate": 1.932131893757107e-06, "loss": 0.1012, "step": 5424 }, { "epoch": 1.7579390797148413, "grad_norm": 0.8261358141899109, "learning_rate": 1.9312801822108425e-06, "loss": 0.1164, "step": 5425 }, { "epoch": 1.7582631237848347, "grad_norm": 0.805388331413269, "learning_rate": 1.9304285402703775e-06, "loss": 0.114, "step": 5426 }, { "epoch": 1.7585871678548282, "grad_norm": 0.8319758176803589, "learning_rate": 1.929576968039946e-06, "loss": 0.1232, "step": 5427 }, { "epoch": 1.7589112119248218, "grad_norm": 0.8471801280975342, "learning_rate": 1.928725465623772e-06, "loss": 0.1236, "step": 5428 }, { "epoch": 1.7592352559948155, "grad_norm": 0.8064789772033691, "learning_rate": 1.927874033126069e-06, "loss": 0.1191, "step": 5429 }, { "epoch": 1.7595593000648089, "grad_norm": 0.8756115436553955, "learning_rate": 1.927022670651047e-06, "loss": 0.1263, "step": 5430 }, { "epoch": 1.7598833441348023, "grad_norm": 0.7553970813751221, "learning_rate": 1.9261713783029024e-06, "loss": 0.1064, "step": 5431 }, { "epoch": 1.7602073882047957, "grad_norm": 0.7707648277282715, "learning_rate": 1.9253201561858266e-06, "loss": 0.1038, "step": 5432 }, { "epoch": 1.7605314322747894, "grad_norm": 0.7251192927360535, "learning_rate": 1.924469004404001e-06, "loss": 0.1036, "step": 5433 }, { "epoch": 1.760855476344783, "grad_norm": 0.7130362391471863, "learning_rate": 1.9236179230615967e-06, "loss": 0.099, "step": 5434 }, { "epoch": 1.7611795204147764, "grad_norm": 0.7608776688575745, "learning_rate": 1.922766912262779e-06, "loss": 0.1005, "step": 5435 }, { "epoch": 1.7615035644847699, "grad_norm": 0.8560105562210083, "learning_rate": 1.921915972111703e-06, "loss": 0.1186, "step": 5436 }, { "epoch": 1.7618276085547635, "grad_norm": 0.7603005766868591, "learning_rate": 1.9210651027125164e-06, "loss": 0.1112, "step": 5437 }, { "epoch": 1.762151652624757, "grad_norm": 0.7880005240440369, "learning_rate": 1.9202143041693554e-06, "loss": 0.1141, "step": 5438 }, { "epoch": 1.7624756966947506, "grad_norm": 0.8264434933662415, "learning_rate": 1.919363576586352e-06, "loss": 0.1116, "step": 5439 }, { "epoch": 1.762799740764744, "grad_norm": 0.8080835342407227, "learning_rate": 1.918512920067626e-06, "loss": 0.1137, "step": 5440 }, { "epoch": 1.7631237848347374, "grad_norm": 0.8233956694602966, "learning_rate": 1.9176623347172885e-06, "loss": 0.1129, "step": 5441 }, { "epoch": 1.763447828904731, "grad_norm": 0.8224033713340759, "learning_rate": 1.9168118206394443e-06, "loss": 0.1191, "step": 5442 }, { "epoch": 1.7637718729747247, "grad_norm": 0.7948696613311768, "learning_rate": 1.915961377938187e-06, "loss": 0.1141, "step": 5443 }, { "epoch": 1.7640959170447181, "grad_norm": 0.9159996509552002, "learning_rate": 1.9151110067176038e-06, "loss": 0.1365, "step": 5444 }, { "epoch": 1.7644199611147116, "grad_norm": 0.7687258720397949, "learning_rate": 1.914260707081771e-06, "loss": 0.1043, "step": 5445 }, { "epoch": 1.764744005184705, "grad_norm": 0.8619191646575928, "learning_rate": 1.913410479134757e-06, "loss": 0.123, "step": 5446 }, { "epoch": 1.7650680492546986, "grad_norm": 0.881009578704834, "learning_rate": 1.9125603229806223e-06, "loss": 0.1185, "step": 5447 }, { "epoch": 1.7653920933246923, "grad_norm": 0.7797643542289734, "learning_rate": 1.9117102387234165e-06, "loss": 0.1069, "step": 5448 }, { "epoch": 1.7657161373946857, "grad_norm": 0.9099947214126587, "learning_rate": 1.910860226467183e-06, "loss": 0.1206, "step": 5449 }, { "epoch": 1.7660401814646791, "grad_norm": 0.8067218065261841, "learning_rate": 1.910010286315953e-06, "loss": 0.1021, "step": 5450 }, { "epoch": 1.7663642255346728, "grad_norm": 0.7724782228469849, "learning_rate": 1.9091604183737546e-06, "loss": 0.1118, "step": 5451 }, { "epoch": 1.7666882696046662, "grad_norm": 0.8174054622650146, "learning_rate": 1.9083106227446e-06, "loss": 0.1098, "step": 5452 }, { "epoch": 1.7670123136746598, "grad_norm": 0.8142311573028564, "learning_rate": 1.907460899532497e-06, "loss": 0.1042, "step": 5453 }, { "epoch": 1.7673363577446533, "grad_norm": 0.8318405151367188, "learning_rate": 1.9066112488414445e-06, "loss": 0.111, "step": 5454 }, { "epoch": 1.7676604018146467, "grad_norm": 0.8179137110710144, "learning_rate": 1.90576167077543e-06, "loss": 0.1048, "step": 5455 }, { "epoch": 1.7679844458846403, "grad_norm": 0.7973840832710266, "learning_rate": 1.904912165438435e-06, "loss": 0.118, "step": 5456 }, { "epoch": 1.768308489954634, "grad_norm": 0.794683039188385, "learning_rate": 1.9040627329344296e-06, "loss": 0.1086, "step": 5457 }, { "epoch": 1.7686325340246274, "grad_norm": 0.8206287026405334, "learning_rate": 1.9032133733673764e-06, "loss": 0.1183, "step": 5458 }, { "epoch": 1.7689565780946208, "grad_norm": 0.7799130082130432, "learning_rate": 1.9023640868412297e-06, "loss": 0.1059, "step": 5459 }, { "epoch": 1.7692806221646142, "grad_norm": 0.7698259949684143, "learning_rate": 1.9015148734599317e-06, "loss": 0.1103, "step": 5460 }, { "epoch": 1.7696046662346079, "grad_norm": 0.8091194033622742, "learning_rate": 1.900665733327421e-06, "loss": 0.115, "step": 5461 }, { "epoch": 1.7699287103046015, "grad_norm": 0.8662896752357483, "learning_rate": 1.899816666547621e-06, "loss": 0.1244, "step": 5462 }, { "epoch": 1.770252754374595, "grad_norm": 0.8698745369911194, "learning_rate": 1.8989676732244522e-06, "loss": 0.1276, "step": 5463 }, { "epoch": 1.7705767984445884, "grad_norm": 0.863068163394928, "learning_rate": 1.8981187534618217e-06, "loss": 0.1183, "step": 5464 }, { "epoch": 1.770900842514582, "grad_norm": 0.7592810988426208, "learning_rate": 1.8972699073636283e-06, "loss": 0.1053, "step": 5465 }, { "epoch": 1.7712248865845757, "grad_norm": 0.8105905652046204, "learning_rate": 1.8964211350337637e-06, "loss": 0.112, "step": 5466 }, { "epoch": 1.771548930654569, "grad_norm": 0.7767974138259888, "learning_rate": 1.895572436576109e-06, "loss": 0.1049, "step": 5467 }, { "epoch": 1.7718729747245625, "grad_norm": 0.8776288628578186, "learning_rate": 1.8947238120945372e-06, "loss": 0.1234, "step": 5468 }, { "epoch": 1.772197018794556, "grad_norm": 0.8377476334571838, "learning_rate": 1.8938752616929112e-06, "loss": 0.1157, "step": 5469 }, { "epoch": 1.7725210628645496, "grad_norm": 0.8418101072311401, "learning_rate": 1.8930267854750845e-06, "loss": 0.1247, "step": 5470 }, { "epoch": 1.7728451069345432, "grad_norm": 0.7262491583824158, "learning_rate": 1.8921783835449042e-06, "loss": 0.1037, "step": 5471 }, { "epoch": 1.7731691510045366, "grad_norm": 0.8218994140625, "learning_rate": 1.8913300560062047e-06, "loss": 0.1125, "step": 5472 }, { "epoch": 1.77349319507453, "grad_norm": 0.7634402513504028, "learning_rate": 1.890481802962815e-06, "loss": 0.1026, "step": 5473 }, { "epoch": 1.7738172391445235, "grad_norm": 0.8064968585968018, "learning_rate": 1.889633624518551e-06, "loss": 0.116, "step": 5474 }, { "epoch": 1.7741412832145171, "grad_norm": 0.8574610948562622, "learning_rate": 1.8887855207772235e-06, "loss": 0.1254, "step": 5475 }, { "epoch": 1.7744653272845108, "grad_norm": 0.7670876383781433, "learning_rate": 1.8879374918426312e-06, "loss": 0.1119, "step": 5476 }, { "epoch": 1.7747893713545042, "grad_norm": 0.8037365078926086, "learning_rate": 1.8870895378185643e-06, "loss": 0.1106, "step": 5477 }, { "epoch": 1.7751134154244976, "grad_norm": 0.7313063740730286, "learning_rate": 1.886241658808805e-06, "loss": 0.1012, "step": 5478 }, { "epoch": 1.7754374594944913, "grad_norm": 0.8549699187278748, "learning_rate": 1.8853938549171242e-06, "loss": 0.1195, "step": 5479 }, { "epoch": 1.775761503564485, "grad_norm": 0.8040359020233154, "learning_rate": 1.8845461262472863e-06, "loss": 0.1167, "step": 5480 }, { "epoch": 1.7760855476344783, "grad_norm": 0.7298676371574402, "learning_rate": 1.883698472903045e-06, "loss": 0.1053, "step": 5481 }, { "epoch": 1.7764095917044718, "grad_norm": 0.7766726016998291, "learning_rate": 1.882850894988143e-06, "loss": 0.1121, "step": 5482 }, { "epoch": 1.7767336357744652, "grad_norm": 0.7604732513427734, "learning_rate": 1.882003392606318e-06, "loss": 0.1104, "step": 5483 }, { "epoch": 1.7770576798444588, "grad_norm": 0.757429301738739, "learning_rate": 1.8811559658612941e-06, "loss": 0.1065, "step": 5484 }, { "epoch": 1.7773817239144525, "grad_norm": 0.8506981134414673, "learning_rate": 1.88030861485679e-06, "loss": 0.1186, "step": 5485 }, { "epoch": 1.777705767984446, "grad_norm": 0.772424578666687, "learning_rate": 1.879461339696512e-06, "loss": 0.1101, "step": 5486 }, { "epoch": 1.7780298120544393, "grad_norm": 0.777412474155426, "learning_rate": 1.8786141404841587e-06, "loss": 0.1094, "step": 5487 }, { "epoch": 1.778353856124433, "grad_norm": 0.8396077752113342, "learning_rate": 1.8777670173234198e-06, "loss": 0.1134, "step": 5488 }, { "epoch": 1.7786779001944264, "grad_norm": 0.7566655874252319, "learning_rate": 1.8769199703179736e-06, "loss": 0.1046, "step": 5489 }, { "epoch": 1.77900194426442, "grad_norm": 0.8267093896865845, "learning_rate": 1.8760729995714916e-06, "loss": 0.1147, "step": 5490 }, { "epoch": 1.7793259883344135, "grad_norm": 0.8138669729232788, "learning_rate": 1.8752261051876337e-06, "loss": 0.117, "step": 5491 }, { "epoch": 1.779650032404407, "grad_norm": 0.8392179012298584, "learning_rate": 1.8743792872700529e-06, "loss": 0.1089, "step": 5492 }, { "epoch": 1.7799740764744005, "grad_norm": 0.7915463447570801, "learning_rate": 1.873532545922391e-06, "loss": 0.109, "step": 5493 }, { "epoch": 1.7802981205443942, "grad_norm": 0.7900585532188416, "learning_rate": 1.8726858812482798e-06, "loss": 0.1122, "step": 5494 }, { "epoch": 1.7806221646143876, "grad_norm": 0.7889156937599182, "learning_rate": 1.871839293351345e-06, "loss": 0.1055, "step": 5495 }, { "epoch": 1.780946208684381, "grad_norm": 0.8391129970550537, "learning_rate": 1.870992782335198e-06, "loss": 0.1161, "step": 5496 }, { "epoch": 1.7812702527543745, "grad_norm": 0.8119585514068604, "learning_rate": 1.8701463483034471e-06, "loss": 0.1159, "step": 5497 }, { "epoch": 1.781594296824368, "grad_norm": 0.9002379775047302, "learning_rate": 1.8692999913596846e-06, "loss": 0.1303, "step": 5498 }, { "epoch": 1.7819183408943617, "grad_norm": 0.8775905966758728, "learning_rate": 1.8684537116074983e-06, "loss": 0.1182, "step": 5499 }, { "epoch": 1.7822423849643552, "grad_norm": 0.7602542042732239, "learning_rate": 1.8676075091504637e-06, "loss": 0.101, "step": 5500 }, { "epoch": 1.7825664290343486, "grad_norm": 0.8325319886207581, "learning_rate": 1.866761384092147e-06, "loss": 0.1114, "step": 5501 }, { "epoch": 1.7828904731043422, "grad_norm": 0.8351833820343018, "learning_rate": 1.8659153365361076e-06, "loss": 0.1161, "step": 5502 }, { "epoch": 1.7832145171743357, "grad_norm": 0.8513324856758118, "learning_rate": 1.8650693665858916e-06, "loss": 0.1202, "step": 5503 }, { "epoch": 1.7835385612443293, "grad_norm": 0.7680867910385132, "learning_rate": 1.8642234743450394e-06, "loss": 0.1019, "step": 5504 }, { "epoch": 1.7838626053143227, "grad_norm": 0.8985635638237, "learning_rate": 1.8633776599170783e-06, "loss": 0.1187, "step": 5505 }, { "epoch": 1.7841866493843161, "grad_norm": 0.8368175029754639, "learning_rate": 1.86253192340553e-06, "loss": 0.1177, "step": 5506 }, { "epoch": 1.7845106934543098, "grad_norm": 0.7756860852241516, "learning_rate": 1.8616862649139024e-06, "loss": 0.1057, "step": 5507 }, { "epoch": 1.7848347375243034, "grad_norm": 0.7564083933830261, "learning_rate": 1.8608406845456968e-06, "loss": 0.106, "step": 5508 }, { "epoch": 1.7851587815942969, "grad_norm": 0.7947510480880737, "learning_rate": 1.8599951824044033e-06, "loss": 0.1036, "step": 5509 }, { "epoch": 1.7854828256642903, "grad_norm": 0.8901932239532471, "learning_rate": 1.8591497585935041e-06, "loss": 0.1292, "step": 5510 }, { "epoch": 1.7858068697342837, "grad_norm": 0.8269218802452087, "learning_rate": 1.858304413216471e-06, "loss": 0.1167, "step": 5511 }, { "epoch": 1.7861309138042774, "grad_norm": 0.782346785068512, "learning_rate": 1.8574591463767656e-06, "loss": 0.1083, "step": 5512 }, { "epoch": 1.786454957874271, "grad_norm": 0.8424670696258545, "learning_rate": 1.8566139581778392e-06, "loss": 0.1178, "step": 5513 }, { "epoch": 1.7867790019442644, "grad_norm": 0.8284109830856323, "learning_rate": 1.855768848723137e-06, "loss": 0.1123, "step": 5514 }, { "epoch": 1.7871030460142578, "grad_norm": 0.816237211227417, "learning_rate": 1.85492381811609e-06, "loss": 0.1181, "step": 5515 }, { "epoch": 1.7874270900842515, "grad_norm": 0.8606086373329163, "learning_rate": 1.854078866460124e-06, "loss": 0.1094, "step": 5516 }, { "epoch": 1.7877511341542451, "grad_norm": 0.8743523359298706, "learning_rate": 1.8532339938586513e-06, "loss": 0.1208, "step": 5517 }, { "epoch": 1.7880751782242386, "grad_norm": 0.7890545725822449, "learning_rate": 1.8523892004150765e-06, "loss": 0.1117, "step": 5518 }, { "epoch": 1.788399222294232, "grad_norm": 0.7640580534934998, "learning_rate": 1.8515444862327947e-06, "loss": 0.1057, "step": 5519 }, { "epoch": 1.7887232663642254, "grad_norm": 0.7732981443405151, "learning_rate": 1.8506998514151896e-06, "loss": 0.1088, "step": 5520 }, { "epoch": 1.789047310434219, "grad_norm": 0.7903842329978943, "learning_rate": 1.8498552960656378e-06, "loss": 0.1075, "step": 5521 }, { "epoch": 1.7893713545042127, "grad_norm": 0.8699726462364197, "learning_rate": 1.8490108202875023e-06, "loss": 0.1256, "step": 5522 }, { "epoch": 1.7896953985742061, "grad_norm": 0.7327967882156372, "learning_rate": 1.848166424184142e-06, "loss": 0.1058, "step": 5523 }, { "epoch": 1.7900194426441995, "grad_norm": 0.8472350239753723, "learning_rate": 1.8473221078589006e-06, "loss": 0.1179, "step": 5524 }, { "epoch": 1.790343486714193, "grad_norm": 0.7945756316184998, "learning_rate": 1.846477871415114e-06, "loss": 0.108, "step": 5525 }, { "epoch": 1.7906675307841866, "grad_norm": 0.8109987378120422, "learning_rate": 1.8456337149561105e-06, "loss": 0.1162, "step": 5526 }, { "epoch": 1.7909915748541803, "grad_norm": 0.808334469795227, "learning_rate": 1.8447896385852043e-06, "loss": 0.1114, "step": 5527 }, { "epoch": 1.7913156189241737, "grad_norm": 0.8291633725166321, "learning_rate": 1.8439456424057044e-06, "loss": 0.1116, "step": 5528 }, { "epoch": 1.791639662994167, "grad_norm": 0.822845995426178, "learning_rate": 1.8431017265209067e-06, "loss": 0.1138, "step": 5529 }, { "epoch": 1.7919637070641607, "grad_norm": 0.7952374815940857, "learning_rate": 1.8422578910340985e-06, "loss": 0.1073, "step": 5530 }, { "epoch": 1.7922877511341544, "grad_norm": 0.8398154973983765, "learning_rate": 1.8414141360485565e-06, "loss": 0.1159, "step": 5531 }, { "epoch": 1.7926117952041478, "grad_norm": 0.7845988273620605, "learning_rate": 1.840570461667549e-06, "loss": 0.1043, "step": 5532 }, { "epoch": 1.7929358392741412, "grad_norm": 0.8346614241600037, "learning_rate": 1.8397268679943333e-06, "loss": 0.1109, "step": 5533 }, { "epoch": 1.7932598833441347, "grad_norm": 0.8257557153701782, "learning_rate": 1.8388833551321562e-06, "loss": 0.1118, "step": 5534 }, { "epoch": 1.7935839274141283, "grad_norm": 0.8189712762832642, "learning_rate": 1.838039923184257e-06, "loss": 0.1122, "step": 5535 }, { "epoch": 1.793907971484122, "grad_norm": 0.7669243812561035, "learning_rate": 1.8371965722538636e-06, "loss": 0.1116, "step": 5536 }, { "epoch": 1.7942320155541154, "grad_norm": 0.7271428108215332, "learning_rate": 1.836353302444192e-06, "loss": 0.1006, "step": 5537 }, { "epoch": 1.7945560596241088, "grad_norm": 0.8294802308082581, "learning_rate": 1.8355101138584524e-06, "loss": 0.1216, "step": 5538 }, { "epoch": 1.7948801036941024, "grad_norm": 0.747330904006958, "learning_rate": 1.8346670065998411e-06, "loss": 0.1061, "step": 5539 }, { "epoch": 1.7952041477640959, "grad_norm": 0.8953630328178406, "learning_rate": 1.8338239807715486e-06, "loss": 0.1247, "step": 5540 }, { "epoch": 1.7955281918340895, "grad_norm": 0.8005939722061157, "learning_rate": 1.8329810364767511e-06, "loss": 0.111, "step": 5541 }, { "epoch": 1.795852235904083, "grad_norm": 0.7868248224258423, "learning_rate": 1.8321381738186178e-06, "loss": 0.1054, "step": 5542 }, { "epoch": 1.7961762799740764, "grad_norm": 0.7667168974876404, "learning_rate": 1.8312953929003068e-06, "loss": 0.1025, "step": 5543 }, { "epoch": 1.79650032404407, "grad_norm": 0.7552728652954102, "learning_rate": 1.8304526938249653e-06, "loss": 0.1005, "step": 5544 }, { "epoch": 1.7968243681140637, "grad_norm": 0.8472879528999329, "learning_rate": 1.8296100766957331e-06, "loss": 0.118, "step": 5545 }, { "epoch": 1.797148412184057, "grad_norm": 0.7923120260238647, "learning_rate": 1.828767541615737e-06, "loss": 0.1094, "step": 5546 }, { "epoch": 1.7974724562540505, "grad_norm": 0.8863158822059631, "learning_rate": 1.8279250886880962e-06, "loss": 0.1308, "step": 5547 }, { "epoch": 1.797796500324044, "grad_norm": 0.8249463438987732, "learning_rate": 1.827082718015919e-06, "loss": 0.1177, "step": 5548 }, { "epoch": 1.7981205443940376, "grad_norm": 0.7819499373435974, "learning_rate": 1.8262404297023013e-06, "loss": 0.1148, "step": 5549 }, { "epoch": 1.7984445884640312, "grad_norm": 0.7605578303337097, "learning_rate": 1.8253982238503338e-06, "loss": 0.1037, "step": 5550 }, { "epoch": 1.7987686325340246, "grad_norm": 0.778826117515564, "learning_rate": 1.8245561005630921e-06, "loss": 0.108, "step": 5551 }, { "epoch": 1.799092676604018, "grad_norm": 0.8250377178192139, "learning_rate": 1.823714059943646e-06, "loss": 0.1149, "step": 5552 }, { "epoch": 1.7994167206740117, "grad_norm": 0.776631772518158, "learning_rate": 1.8228721020950504e-06, "loss": 0.1045, "step": 5553 }, { "epoch": 1.7997407647440054, "grad_norm": 0.8652671575546265, "learning_rate": 1.8220302271203557e-06, "loss": 0.1172, "step": 5554 }, { "epoch": 1.8000648088139988, "grad_norm": 0.7777321934700012, "learning_rate": 1.8211884351225978e-06, "loss": 0.105, "step": 5555 }, { "epoch": 1.8003888528839922, "grad_norm": 0.8324203491210938, "learning_rate": 1.8203467262048033e-06, "loss": 0.1198, "step": 5556 }, { "epoch": 1.8007128969539856, "grad_norm": 0.7479450702667236, "learning_rate": 1.819505100469991e-06, "loss": 0.1027, "step": 5557 }, { "epoch": 1.8010369410239793, "grad_norm": 0.8221516013145447, "learning_rate": 1.8186635580211654e-06, "loss": 0.1082, "step": 5558 }, { "epoch": 1.801360985093973, "grad_norm": 0.8015259504318237, "learning_rate": 1.8178220989613255e-06, "loss": 0.1073, "step": 5559 }, { "epoch": 1.8016850291639663, "grad_norm": 0.817034125328064, "learning_rate": 1.8169807233934567e-06, "loss": 0.1109, "step": 5560 }, { "epoch": 1.8020090732339598, "grad_norm": 0.8351563215255737, "learning_rate": 1.8161394314205343e-06, "loss": 0.1189, "step": 5561 }, { "epoch": 1.8023331173039532, "grad_norm": 0.7715746760368347, "learning_rate": 1.8152982231455262e-06, "loss": 0.1047, "step": 5562 }, { "epoch": 1.8026571613739468, "grad_norm": 0.8527897596359253, "learning_rate": 1.8144570986713867e-06, "loss": 0.1201, "step": 5563 }, { "epoch": 1.8029812054439405, "grad_norm": 0.8445289134979248, "learning_rate": 1.8136160581010624e-06, "loss": 0.1186, "step": 5564 }, { "epoch": 1.803305249513934, "grad_norm": 0.877656102180481, "learning_rate": 1.8127751015374865e-06, "loss": 0.1211, "step": 5565 }, { "epoch": 1.8036292935839273, "grad_norm": 0.7629379034042358, "learning_rate": 1.8119342290835864e-06, "loss": 0.1053, "step": 5566 }, { "epoch": 1.803953337653921, "grad_norm": 0.8069267868995667, "learning_rate": 1.8110934408422758e-06, "loss": 0.1132, "step": 5567 }, { "epoch": 1.8042773817239146, "grad_norm": 0.899695873260498, "learning_rate": 1.810252736916458e-06, "loss": 0.1216, "step": 5568 }, { "epoch": 1.804601425793908, "grad_norm": 0.8216001391410828, "learning_rate": 1.8094121174090288e-06, "loss": 0.1135, "step": 5569 }, { "epoch": 1.8049254698639015, "grad_norm": 0.8484271168708801, "learning_rate": 1.80857158242287e-06, "loss": 0.1267, "step": 5570 }, { "epoch": 1.8052495139338949, "grad_norm": 0.845133364200592, "learning_rate": 1.8077311320608571e-06, "loss": 0.1172, "step": 5571 }, { "epoch": 1.8055735580038885, "grad_norm": 0.8173267841339111, "learning_rate": 1.806890766425851e-06, "loss": 0.1136, "step": 5572 }, { "epoch": 1.8058976020738822, "grad_norm": 0.8459123373031616, "learning_rate": 1.8060504856207062e-06, "loss": 0.1203, "step": 5573 }, { "epoch": 1.8062216461438756, "grad_norm": 0.8414890170097351, "learning_rate": 1.8052102897482643e-06, "loss": 0.1127, "step": 5574 }, { "epoch": 1.806545690213869, "grad_norm": 0.7746798396110535, "learning_rate": 1.8043701789113552e-06, "loss": 0.1093, "step": 5575 }, { "epoch": 1.8068697342838627, "grad_norm": 0.8486313223838806, "learning_rate": 1.8035301532128032e-06, "loss": 0.1098, "step": 5576 }, { "epoch": 1.807193778353856, "grad_norm": 0.8131248950958252, "learning_rate": 1.8026902127554172e-06, "loss": 0.1156, "step": 5577 }, { "epoch": 1.8075178224238497, "grad_norm": 0.846875786781311, "learning_rate": 1.8018503576419996e-06, "loss": 0.1163, "step": 5578 }, { "epoch": 1.8078418664938432, "grad_norm": 0.8525400161743164, "learning_rate": 1.8010105879753398e-06, "loss": 0.1169, "step": 5579 }, { "epoch": 1.8081659105638366, "grad_norm": 0.7956811785697937, "learning_rate": 1.800170903858216e-06, "loss": 0.107, "step": 5580 }, { "epoch": 1.8084899546338302, "grad_norm": 0.7707834243774414, "learning_rate": 1.7993313053933998e-06, "loss": 0.1065, "step": 5581 }, { "epoch": 1.8088139987038239, "grad_norm": 0.7616016864776611, "learning_rate": 1.7984917926836484e-06, "loss": 0.1078, "step": 5582 }, { "epoch": 1.8091380427738173, "grad_norm": 0.8728845715522766, "learning_rate": 1.7976523658317104e-06, "loss": 0.1256, "step": 5583 }, { "epoch": 1.8094620868438107, "grad_norm": 0.8702101707458496, "learning_rate": 1.7968130249403238e-06, "loss": 0.1167, "step": 5584 }, { "epoch": 1.8097861309138041, "grad_norm": 0.8324860334396362, "learning_rate": 1.7959737701122157e-06, "loss": 0.1242, "step": 5585 }, { "epoch": 1.8101101749837978, "grad_norm": 0.8261635303497314, "learning_rate": 1.7951346014501027e-06, "loss": 0.1085, "step": 5586 }, { "epoch": 1.8104342190537914, "grad_norm": 0.8950084447860718, "learning_rate": 1.7942955190566899e-06, "loss": 0.1043, "step": 5587 }, { "epoch": 1.8107582631237849, "grad_norm": 0.7961917519569397, "learning_rate": 1.7934565230346752e-06, "loss": 0.1083, "step": 5588 }, { "epoch": 1.8110823071937783, "grad_norm": 0.7906010746955872, "learning_rate": 1.7926176134867408e-06, "loss": 0.1068, "step": 5589 }, { "epoch": 1.811406351263772, "grad_norm": 0.841482937335968, "learning_rate": 1.7917787905155634e-06, "loss": 0.1244, "step": 5590 }, { "epoch": 1.8117303953337653, "grad_norm": 0.802306592464447, "learning_rate": 1.790940054223806e-06, "loss": 0.1149, "step": 5591 }, { "epoch": 1.812054439403759, "grad_norm": 0.8944019675254822, "learning_rate": 1.7901014047141208e-06, "loss": 0.1219, "step": 5592 }, { "epoch": 1.8123784834737524, "grad_norm": 0.8466723561286926, "learning_rate": 1.7892628420891526e-06, "loss": 0.1077, "step": 5593 }, { "epoch": 1.8127025275437458, "grad_norm": 0.8002016544342041, "learning_rate": 1.788424366451531e-06, "loss": 0.1126, "step": 5594 }, { "epoch": 1.8130265716137395, "grad_norm": 0.7788607478141785, "learning_rate": 1.7875859779038796e-06, "loss": 0.1081, "step": 5595 }, { "epoch": 1.8133506156837331, "grad_norm": 0.7892195582389832, "learning_rate": 1.7867476765488061e-06, "loss": 0.1089, "step": 5596 }, { "epoch": 1.8136746597537265, "grad_norm": 0.8240311741828918, "learning_rate": 1.7859094624889135e-06, "loss": 0.1165, "step": 5597 }, { "epoch": 1.81399870382372, "grad_norm": 0.8330913186073303, "learning_rate": 1.7850713358267897e-06, "loss": 0.1157, "step": 5598 }, { "epoch": 1.8143227478937134, "grad_norm": 0.8133293390274048, "learning_rate": 1.7842332966650122e-06, "loss": 0.1139, "step": 5599 }, { "epoch": 1.814646791963707, "grad_norm": 0.8080258369445801, "learning_rate": 1.7833953451061513e-06, "loss": 0.1074, "step": 5600 }, { "epoch": 1.8149708360337007, "grad_norm": 0.7805373072624207, "learning_rate": 1.7825574812527617e-06, "loss": 0.1061, "step": 5601 }, { "epoch": 1.815294880103694, "grad_norm": 0.7952004671096802, "learning_rate": 1.781719705207392e-06, "loss": 0.1103, "step": 5602 }, { "epoch": 1.8156189241736875, "grad_norm": 0.7933663725852966, "learning_rate": 1.7808820170725772e-06, "loss": 0.1052, "step": 5603 }, { "epoch": 1.8159429682436812, "grad_norm": 0.857387900352478, "learning_rate": 1.7800444169508414e-06, "loss": 0.1174, "step": 5604 }, { "epoch": 1.8162670123136748, "grad_norm": 0.8322799205780029, "learning_rate": 1.7792069049446987e-06, "loss": 0.1169, "step": 5605 }, { "epoch": 1.8165910563836682, "grad_norm": 0.7579730749130249, "learning_rate": 1.7783694811566534e-06, "loss": 0.1065, "step": 5606 }, { "epoch": 1.8169151004536617, "grad_norm": 0.7945507764816284, "learning_rate": 1.777532145689198e-06, "loss": 0.1095, "step": 5607 }, { "epoch": 1.817239144523655, "grad_norm": 0.7895393967628479, "learning_rate": 1.7766948986448131e-06, "loss": 0.1058, "step": 5608 }, { "epoch": 1.8175631885936487, "grad_norm": 0.7503500580787659, "learning_rate": 1.7758577401259716e-06, "loss": 0.1079, "step": 5609 }, { "epoch": 1.8178872326636424, "grad_norm": 0.7820692658424377, "learning_rate": 1.7750206702351325e-06, "loss": 0.1079, "step": 5610 }, { "epoch": 1.8182112767336358, "grad_norm": 0.8591213226318359, "learning_rate": 1.7741836890747438e-06, "loss": 0.1171, "step": 5611 }, { "epoch": 1.8185353208036292, "grad_norm": 0.7695344090461731, "learning_rate": 1.7733467967472459e-06, "loss": 0.1051, "step": 5612 }, { "epoch": 1.8188593648736227, "grad_norm": 0.8064881563186646, "learning_rate": 1.7725099933550649e-06, "loss": 0.1082, "step": 5613 }, { "epoch": 1.8191834089436163, "grad_norm": 0.8662197589874268, "learning_rate": 1.7716732790006188e-06, "loss": 0.1145, "step": 5614 }, { "epoch": 1.81950745301361, "grad_norm": 0.8277799487113953, "learning_rate": 1.7708366537863129e-06, "loss": 0.1185, "step": 5615 }, { "epoch": 1.8198314970836034, "grad_norm": 0.7557022571563721, "learning_rate": 1.7700001178145409e-06, "loss": 0.1029, "step": 5616 }, { "epoch": 1.8201555411535968, "grad_norm": 0.7699225544929504, "learning_rate": 1.7691636711876883e-06, "loss": 0.1046, "step": 5617 }, { "epoch": 1.8204795852235904, "grad_norm": 0.7422550916671753, "learning_rate": 1.768327314008126e-06, "loss": 0.1037, "step": 5618 }, { "epoch": 1.820803629293584, "grad_norm": 0.8538484573364258, "learning_rate": 1.7674910463782186e-06, "loss": 0.1219, "step": 5619 }, { "epoch": 1.8211276733635775, "grad_norm": 0.7328668236732483, "learning_rate": 1.766654868400315e-06, "loss": 0.108, "step": 5620 }, { "epoch": 1.821451717433571, "grad_norm": 0.7712154388427734, "learning_rate": 1.7658187801767568e-06, "loss": 0.1009, "step": 5621 }, { "epoch": 1.8217757615035644, "grad_norm": 0.7927885055541992, "learning_rate": 1.7649827818098727e-06, "loss": 0.1069, "step": 5622 }, { "epoch": 1.822099805573558, "grad_norm": 0.7272517681121826, "learning_rate": 1.7641468734019795e-06, "loss": 0.0989, "step": 5623 }, { "epoch": 1.8224238496435516, "grad_norm": 0.7580925822257996, "learning_rate": 1.7633110550553867e-06, "loss": 0.1062, "step": 5624 }, { "epoch": 1.822747893713545, "grad_norm": 0.8520054817199707, "learning_rate": 1.7624753268723882e-06, "loss": 0.1196, "step": 5625 }, { "epoch": 1.8230719377835385, "grad_norm": 0.8394516706466675, "learning_rate": 1.7616396889552706e-06, "loss": 0.115, "step": 5626 }, { "epoch": 1.8233959818535321, "grad_norm": 0.9501622319221497, "learning_rate": 1.7608041414063065e-06, "loss": 0.1275, "step": 5627 }, { "epoch": 1.8237200259235256, "grad_norm": 0.8643268942832947, "learning_rate": 1.7599686843277596e-06, "loss": 0.1206, "step": 5628 }, { "epoch": 1.8240440699935192, "grad_norm": 0.8088141083717346, "learning_rate": 1.7591333178218823e-06, "loss": 0.1037, "step": 5629 }, { "epoch": 1.8243681140635126, "grad_norm": 0.8530080318450928, "learning_rate": 1.7582980419909135e-06, "loss": 0.1229, "step": 5630 }, { "epoch": 1.824692158133506, "grad_norm": 0.8482834696769714, "learning_rate": 1.7574628569370855e-06, "loss": 0.1144, "step": 5631 }, { "epoch": 1.8250162022034997, "grad_norm": 0.8686640858650208, "learning_rate": 1.756627762762614e-06, "loss": 0.1174, "step": 5632 }, { "epoch": 1.8253402462734933, "grad_norm": 0.812243640422821, "learning_rate": 1.7557927595697094e-06, "loss": 0.1156, "step": 5633 }, { "epoch": 1.8256642903434868, "grad_norm": 0.8343957662582397, "learning_rate": 1.7549578474605661e-06, "loss": 0.1134, "step": 5634 }, { "epoch": 1.8259883344134802, "grad_norm": 0.7635195255279541, "learning_rate": 1.754123026537369e-06, "loss": 0.1032, "step": 5635 }, { "epoch": 1.8263123784834736, "grad_norm": 0.8139019012451172, "learning_rate": 1.7532882969022941e-06, "loss": 0.1145, "step": 5636 }, { "epoch": 1.8266364225534673, "grad_norm": 0.7639754414558411, "learning_rate": 1.752453658657502e-06, "loss": 0.1065, "step": 5637 }, { "epoch": 1.826960466623461, "grad_norm": 0.7924251556396484, "learning_rate": 1.7516191119051456e-06, "loss": 0.1129, "step": 5638 }, { "epoch": 1.8272845106934543, "grad_norm": 0.796831488609314, "learning_rate": 1.7507846567473643e-06, "loss": 0.112, "step": 5639 }, { "epoch": 1.8276085547634477, "grad_norm": 0.7968273758888245, "learning_rate": 1.749950293286289e-06, "loss": 0.1103, "step": 5640 }, { "epoch": 1.8279325988334414, "grad_norm": 0.7729365825653076, "learning_rate": 1.7491160216240368e-06, "loss": 0.1157, "step": 5641 }, { "epoch": 1.8282566429034348, "grad_norm": 0.7678266167640686, "learning_rate": 1.7482818418627134e-06, "loss": 0.1115, "step": 5642 }, { "epoch": 1.8285806869734285, "grad_norm": 0.7949190735816956, "learning_rate": 1.7474477541044165e-06, "loss": 0.1096, "step": 5643 }, { "epoch": 1.8289047310434219, "grad_norm": 0.7390647530555725, "learning_rate": 1.746613758451228e-06, "loss": 0.105, "step": 5644 }, { "epoch": 1.8292287751134153, "grad_norm": 0.8184527158737183, "learning_rate": 1.7457798550052232e-06, "loss": 0.1188, "step": 5645 }, { "epoch": 1.829552819183409, "grad_norm": 0.848542332649231, "learning_rate": 1.744946043868463e-06, "loss": 0.116, "step": 5646 }, { "epoch": 1.8298768632534026, "grad_norm": 0.8174238204956055, "learning_rate": 1.7441123251429968e-06, "loss": 0.1102, "step": 5647 }, { "epoch": 1.830200907323396, "grad_norm": 0.7834118008613586, "learning_rate": 1.7432786989308648e-06, "loss": 0.1089, "step": 5648 }, { "epoch": 1.8305249513933894, "grad_norm": 0.7657251954078674, "learning_rate": 1.7424451653340934e-06, "loss": 0.1096, "step": 5649 }, { "epoch": 1.8308489954633829, "grad_norm": 0.8082907795906067, "learning_rate": 1.7416117244547014e-06, "loss": 0.1201, "step": 5650 }, { "epoch": 1.8311730395333765, "grad_norm": 0.8496717810630798, "learning_rate": 1.7407783763946911e-06, "loss": 0.1235, "step": 5651 }, { "epoch": 1.8314970836033702, "grad_norm": 0.8051583766937256, "learning_rate": 1.7399451212560593e-06, "loss": 0.1154, "step": 5652 }, { "epoch": 1.8318211276733636, "grad_norm": 0.8396188020706177, "learning_rate": 1.7391119591407863e-06, "loss": 0.1161, "step": 5653 }, { "epoch": 1.832145171743357, "grad_norm": 0.847876787185669, "learning_rate": 1.7382788901508426e-06, "loss": 0.1184, "step": 5654 }, { "epoch": 1.8324692158133506, "grad_norm": 0.8422421813011169, "learning_rate": 1.7374459143881899e-06, "loss": 0.104, "step": 5655 }, { "epoch": 1.8327932598833443, "grad_norm": 0.7739959955215454, "learning_rate": 1.7366130319547747e-06, "loss": 0.1131, "step": 5656 }, { "epoch": 1.8331173039533377, "grad_norm": 0.7968919277191162, "learning_rate": 1.735780242952534e-06, "loss": 0.1105, "step": 5657 }, { "epoch": 1.8334413480233311, "grad_norm": 0.8058465123176575, "learning_rate": 1.7349475474833938e-06, "loss": 0.1196, "step": 5658 }, { "epoch": 1.8337653920933246, "grad_norm": 0.8363388180732727, "learning_rate": 1.7341149456492672e-06, "loss": 0.1148, "step": 5659 }, { "epoch": 1.8340894361633182, "grad_norm": 0.7761878371238708, "learning_rate": 1.7332824375520574e-06, "loss": 0.1007, "step": 5660 }, { "epoch": 1.8344134802333119, "grad_norm": 0.8327564597129822, "learning_rate": 1.7324500232936536e-06, "loss": 0.1218, "step": 5661 }, { "epoch": 1.8347375243033053, "grad_norm": 0.8729468584060669, "learning_rate": 1.731617702975938e-06, "loss": 0.1299, "step": 5662 }, { "epoch": 1.8350615683732987, "grad_norm": 0.8369905352592468, "learning_rate": 1.7307854767007756e-06, "loss": 0.1154, "step": 5663 }, { "epoch": 1.8353856124432921, "grad_norm": 0.8091652989387512, "learning_rate": 1.7299533445700253e-06, "loss": 0.1108, "step": 5664 }, { "epoch": 1.8357096565132858, "grad_norm": 0.8384531736373901, "learning_rate": 1.7291213066855312e-06, "loss": 0.1189, "step": 5665 }, { "epoch": 1.8360337005832794, "grad_norm": 0.8815416693687439, "learning_rate": 1.7282893631491253e-06, "loss": 0.117, "step": 5666 }, { "epoch": 1.8363577446532728, "grad_norm": 0.7908858060836792, "learning_rate": 1.7274575140626318e-06, "loss": 0.1106, "step": 5667 }, { "epoch": 1.8366817887232663, "grad_norm": 0.8137184977531433, "learning_rate": 1.7266257595278591e-06, "loss": 0.1109, "step": 5668 }, { "epoch": 1.83700583279326, "grad_norm": 0.7663424015045166, "learning_rate": 1.725794099646607e-06, "loss": 0.1098, "step": 5669 }, { "epoch": 1.8373298768632536, "grad_norm": 0.782175600528717, "learning_rate": 1.7249625345206623e-06, "loss": 0.1107, "step": 5670 }, { "epoch": 1.837653920933247, "grad_norm": 0.8185158967971802, "learning_rate": 1.7241310642517998e-06, "loss": 0.1171, "step": 5671 }, { "epoch": 1.8379779650032404, "grad_norm": 0.7788330316543579, "learning_rate": 1.7232996889417846e-06, "loss": 0.1057, "step": 5672 }, { "epoch": 1.8383020090732338, "grad_norm": 0.7830475568771362, "learning_rate": 1.7224684086923677e-06, "loss": 0.1092, "step": 5673 }, { "epoch": 1.8386260531432275, "grad_norm": 0.7791877388954163, "learning_rate": 1.7216372236052914e-06, "loss": 0.1124, "step": 5674 }, { "epoch": 1.8389500972132211, "grad_norm": 0.7709876298904419, "learning_rate": 1.7208061337822828e-06, "loss": 0.1102, "step": 5675 }, { "epoch": 1.8392741412832145, "grad_norm": 0.8033519387245178, "learning_rate": 1.7199751393250614e-06, "loss": 0.1087, "step": 5676 }, { "epoch": 1.839598185353208, "grad_norm": 0.8064829707145691, "learning_rate": 1.7191442403353314e-06, "loss": 0.1106, "step": 5677 }, { "epoch": 1.8399222294232016, "grad_norm": 0.8984112739562988, "learning_rate": 1.7183134369147866e-06, "loss": 0.1151, "step": 5678 }, { "epoch": 1.840246273493195, "grad_norm": 0.8125988245010376, "learning_rate": 1.71748272916511e-06, "loss": 0.1144, "step": 5679 }, { "epoch": 1.8405703175631887, "grad_norm": 0.8261982202529907, "learning_rate": 1.716652117187972e-06, "loss": 0.108, "step": 5680 }, { "epoch": 1.840894361633182, "grad_norm": 0.7781147956848145, "learning_rate": 1.7158216010850318e-06, "loss": 0.1156, "step": 5681 }, { "epoch": 1.8412184057031755, "grad_norm": 0.8234754800796509, "learning_rate": 1.7149911809579361e-06, "loss": 0.1121, "step": 5682 }, { "epoch": 1.8415424497731692, "grad_norm": 0.7627200484275818, "learning_rate": 1.7141608569083195e-06, "loss": 0.1027, "step": 5683 }, { "epoch": 1.8418664938431628, "grad_norm": 0.7426549196243286, "learning_rate": 1.7133306290378077e-06, "loss": 0.0976, "step": 5684 }, { "epoch": 1.8421905379131562, "grad_norm": 0.8514507412910461, "learning_rate": 1.7125004974480102e-06, "loss": 0.1075, "step": 5685 }, { "epoch": 1.8425145819831497, "grad_norm": 0.7836448550224304, "learning_rate": 1.7116704622405295e-06, "loss": 0.1102, "step": 5686 }, { "epoch": 1.842838626053143, "grad_norm": 0.7563350796699524, "learning_rate": 1.7108405235169511e-06, "loss": 0.0993, "step": 5687 }, { "epoch": 1.8431626701231367, "grad_norm": 0.8214438557624817, "learning_rate": 1.7100106813788544e-06, "loss": 0.117, "step": 5688 }, { "epoch": 1.8434867141931304, "grad_norm": 0.8818809390068054, "learning_rate": 1.7091809359278025e-06, "loss": 0.1211, "step": 5689 }, { "epoch": 1.8438107582631238, "grad_norm": 0.8602452278137207, "learning_rate": 1.7083512872653477e-06, "loss": 0.1177, "step": 5690 }, { "epoch": 1.8441348023331172, "grad_norm": 0.8079312443733215, "learning_rate": 1.7075217354930324e-06, "loss": 0.1103, "step": 5691 }, { "epoch": 1.8444588464031109, "grad_norm": 0.742443323135376, "learning_rate": 1.7066922807123834e-06, "loss": 0.1041, "step": 5692 }, { "epoch": 1.8447828904731045, "grad_norm": 0.7868834733963013, "learning_rate": 1.7058629230249207e-06, "loss": 0.1048, "step": 5693 }, { "epoch": 1.845106934543098, "grad_norm": 0.7643986940383911, "learning_rate": 1.7050336625321484e-06, "loss": 0.1067, "step": 5694 }, { "epoch": 1.8454309786130914, "grad_norm": 0.8305909037590027, "learning_rate": 1.704204499335559e-06, "loss": 0.1121, "step": 5695 }, { "epoch": 1.8457550226830848, "grad_norm": 0.8438430428504944, "learning_rate": 1.7033754335366356e-06, "loss": 0.1189, "step": 5696 }, { "epoch": 1.8460790667530784, "grad_norm": 0.8111346364021301, "learning_rate": 1.7025464652368464e-06, "loss": 0.1134, "step": 5697 }, { "epoch": 1.846403110823072, "grad_norm": 0.8159440755844116, "learning_rate": 1.701717594537651e-06, "loss": 0.1141, "step": 5698 }, { "epoch": 1.8467271548930655, "grad_norm": 0.7892287969589233, "learning_rate": 1.7008888215404933e-06, "loss": 0.1072, "step": 5699 }, { "epoch": 1.847051198963059, "grad_norm": 0.7901312112808228, "learning_rate": 1.7000601463468088e-06, "loss": 0.1073, "step": 5700 }, { "epoch": 1.8473752430330523, "grad_norm": 0.7535051703453064, "learning_rate": 1.6992315690580178e-06, "loss": 0.0971, "step": 5701 }, { "epoch": 1.847699287103046, "grad_norm": 0.798143208026886, "learning_rate": 1.6984030897755304e-06, "loss": 0.1134, "step": 5702 }, { "epoch": 1.8480233311730396, "grad_norm": 0.8003287315368652, "learning_rate": 1.6975747086007454e-06, "loss": 0.1073, "step": 5703 }, { "epoch": 1.848347375243033, "grad_norm": 0.8201723694801331, "learning_rate": 1.6967464256350468e-06, "loss": 0.1188, "step": 5704 }, { "epoch": 1.8486714193130265, "grad_norm": 0.8480156660079956, "learning_rate": 1.6959182409798111e-06, "loss": 0.116, "step": 5705 }, { "epoch": 1.8489954633830201, "grad_norm": 0.783407986164093, "learning_rate": 1.695090154736398e-06, "loss": 0.108, "step": 5706 }, { "epoch": 1.8493195074530138, "grad_norm": 0.7597818970680237, "learning_rate": 1.6942621670061574e-06, "loss": 0.1106, "step": 5707 }, { "epoch": 1.8496435515230072, "grad_norm": 0.7512392401695251, "learning_rate": 1.693434277890428e-06, "loss": 0.1107, "step": 5708 }, { "epoch": 1.8499675955930006, "grad_norm": 0.7646990418434143, "learning_rate": 1.692606487490534e-06, "loss": 0.1071, "step": 5709 }, { "epoch": 1.850291639662994, "grad_norm": 0.8326711058616638, "learning_rate": 1.6917787959077907e-06, "loss": 0.1266, "step": 5710 }, { "epoch": 1.8506156837329877, "grad_norm": 0.8679319620132446, "learning_rate": 1.6909512032434984e-06, "loss": 0.1271, "step": 5711 }, { "epoch": 1.8509397278029813, "grad_norm": 0.733222484588623, "learning_rate": 1.6901237095989464e-06, "loss": 0.0994, "step": 5712 }, { "epoch": 1.8512637718729748, "grad_norm": 0.8290033340454102, "learning_rate": 1.6892963150754128e-06, "loss": 0.1091, "step": 5713 }, { "epoch": 1.8515878159429682, "grad_norm": 0.8612515926361084, "learning_rate": 1.6884690197741608e-06, "loss": 0.1217, "step": 5714 }, { "epoch": 1.8519118600129616, "grad_norm": 0.8125770092010498, "learning_rate": 1.6876418237964453e-06, "loss": 0.1133, "step": 5715 }, { "epoch": 1.8522359040829552, "grad_norm": 0.7832993268966675, "learning_rate": 1.6868147272435057e-06, "loss": 0.107, "step": 5716 }, { "epoch": 1.8525599481529489, "grad_norm": 0.8174955248832703, "learning_rate": 1.6859877302165723e-06, "loss": 0.1116, "step": 5717 }, { "epoch": 1.8528839922229423, "grad_norm": 0.8022476434707642, "learning_rate": 1.6851608328168589e-06, "loss": 0.112, "step": 5718 }, { "epoch": 1.8532080362929357, "grad_norm": 0.7933598160743713, "learning_rate": 1.6843340351455728e-06, "loss": 0.1077, "step": 5719 }, { "epoch": 1.8535320803629294, "grad_norm": 0.9144831895828247, "learning_rate": 1.6835073373039045e-06, "loss": 0.1126, "step": 5720 }, { "epoch": 1.853856124432923, "grad_norm": 0.7254115343093872, "learning_rate": 1.6826807393930334e-06, "loss": 0.0942, "step": 5721 }, { "epoch": 1.8541801685029164, "grad_norm": 0.8766010999679565, "learning_rate": 1.6818542415141273e-06, "loss": 0.1158, "step": 5722 }, { "epoch": 1.8545042125729099, "grad_norm": 0.8294383883476257, "learning_rate": 1.6810278437683419e-06, "loss": 0.1195, "step": 5723 }, { "epoch": 1.8548282566429033, "grad_norm": 0.8535876274108887, "learning_rate": 1.6802015462568205e-06, "loss": 0.1182, "step": 5724 }, { "epoch": 1.855152300712897, "grad_norm": 0.7217211723327637, "learning_rate": 1.6793753490806939e-06, "loss": 0.097, "step": 5725 }, { "epoch": 1.8554763447828906, "grad_norm": 0.800193727016449, "learning_rate": 1.678549252341079e-06, "loss": 0.115, "step": 5726 }, { "epoch": 1.855800388852884, "grad_norm": 0.7480499148368835, "learning_rate": 1.6777232561390844e-06, "loss": 0.1029, "step": 5727 }, { "epoch": 1.8561244329228774, "grad_norm": 0.7266390919685364, "learning_rate": 1.6768973605758021e-06, "loss": 0.1011, "step": 5728 }, { "epoch": 1.856448476992871, "grad_norm": 0.8023138642311096, "learning_rate": 1.6760715657523158e-06, "loss": 0.1112, "step": 5729 }, { "epoch": 1.8567725210628645, "grad_norm": 0.7607341408729553, "learning_rate": 1.6752458717696928e-06, "loss": 0.1078, "step": 5730 }, { "epoch": 1.8570965651328581, "grad_norm": 0.7758510112762451, "learning_rate": 1.674420278728991e-06, "loss": 0.1066, "step": 5731 }, { "epoch": 1.8574206092028516, "grad_norm": 0.8236150145530701, "learning_rate": 1.6735947867312553e-06, "loss": 0.1162, "step": 5732 }, { "epoch": 1.857744653272845, "grad_norm": 0.826967716217041, "learning_rate": 1.6727693958775172e-06, "loss": 0.1165, "step": 5733 }, { "epoch": 1.8580686973428386, "grad_norm": 0.8800385594367981, "learning_rate": 1.671944106268797e-06, "loss": 0.124, "step": 5734 }, { "epoch": 1.8583927414128323, "grad_norm": 0.7806604504585266, "learning_rate": 1.671118918006101e-06, "loss": 0.1119, "step": 5735 }, { "epoch": 1.8587167854828257, "grad_norm": 0.8205379843711853, "learning_rate": 1.6702938311904262e-06, "loss": 0.1139, "step": 5736 }, { "epoch": 1.8590408295528191, "grad_norm": 0.7946128845214844, "learning_rate": 1.6694688459227545e-06, "loss": 0.1129, "step": 5737 }, { "epoch": 1.8593648736228126, "grad_norm": 0.733856737613678, "learning_rate": 1.6686439623040548e-06, "loss": 0.1001, "step": 5738 }, { "epoch": 1.8596889176928062, "grad_norm": 0.8636114001274109, "learning_rate": 1.6678191804352873e-06, "loss": 0.1161, "step": 5739 }, { "epoch": 1.8600129617627998, "grad_norm": 0.8229653239250183, "learning_rate": 1.6669945004173944e-06, "loss": 0.1152, "step": 5740 }, { "epoch": 1.8603370058327933, "grad_norm": 0.7979140281677246, "learning_rate": 1.6661699223513118e-06, "loss": 0.1155, "step": 5741 }, { "epoch": 1.8606610499027867, "grad_norm": 0.8589354753494263, "learning_rate": 1.6653454463379582e-06, "loss": 0.1226, "step": 5742 }, { "epoch": 1.8609850939727803, "grad_norm": 0.8426529765129089, "learning_rate": 1.6645210724782423e-06, "loss": 0.1196, "step": 5743 }, { "epoch": 1.861309138042774, "grad_norm": 0.7011165618896484, "learning_rate": 1.6636968008730586e-06, "loss": 0.0934, "step": 5744 }, { "epoch": 1.8616331821127674, "grad_norm": 0.7157084941864014, "learning_rate": 1.6628726316232902e-06, "loss": 0.0959, "step": 5745 }, { "epoch": 1.8619572261827608, "grad_norm": 0.8379035592079163, "learning_rate": 1.6620485648298084e-06, "loss": 0.1115, "step": 5746 }, { "epoch": 1.8622812702527543, "grad_norm": 0.728182315826416, "learning_rate": 1.6612246005934694e-06, "loss": 0.1021, "step": 5747 }, { "epoch": 1.862605314322748, "grad_norm": 0.832934558391571, "learning_rate": 1.66040073901512e-06, "loss": 0.1214, "step": 5748 }, { "epoch": 1.8629293583927415, "grad_norm": 0.7622983455657959, "learning_rate": 1.6595769801955925e-06, "loss": 0.103, "step": 5749 }, { "epoch": 1.863253402462735, "grad_norm": 0.9064056873321533, "learning_rate": 1.6587533242357053e-06, "loss": 0.104, "step": 5750 }, { "epoch": 1.8635774465327284, "grad_norm": 0.8469486832618713, "learning_rate": 1.6579297712362686e-06, "loss": 0.1173, "step": 5751 }, { "epoch": 1.8639014906027218, "grad_norm": 0.8399779796600342, "learning_rate": 1.6571063212980753e-06, "loss": 0.1172, "step": 5752 }, { "epoch": 1.8642255346727155, "grad_norm": 0.9038817286491394, "learning_rate": 1.6562829745219089e-06, "loss": 0.1134, "step": 5753 }, { "epoch": 1.864549578742709, "grad_norm": 0.7293316125869751, "learning_rate": 1.6554597310085383e-06, "loss": 0.0983, "step": 5754 }, { "epoch": 1.8648736228127025, "grad_norm": 0.7801869511604309, "learning_rate": 1.6546365908587213e-06, "loss": 0.1103, "step": 5755 }, { "epoch": 1.865197666882696, "grad_norm": 0.8423866033554077, "learning_rate": 1.653813554173202e-06, "loss": 0.1205, "step": 5756 }, { "epoch": 1.8655217109526896, "grad_norm": 0.8145995736122131, "learning_rate": 1.6529906210527107e-06, "loss": 0.1146, "step": 5757 }, { "epoch": 1.8658457550226832, "grad_norm": 0.8038000464439392, "learning_rate": 1.6521677915979688e-06, "loss": 0.1106, "step": 5758 }, { "epoch": 1.8661697990926767, "grad_norm": 0.8118459582328796, "learning_rate": 1.6513450659096804e-06, "loss": 0.1099, "step": 5759 }, { "epoch": 1.86649384316267, "grad_norm": 0.9116185307502747, "learning_rate": 1.6505224440885414e-06, "loss": 0.122, "step": 5760 }, { "epoch": 1.8668178872326635, "grad_norm": 0.7717702984809875, "learning_rate": 1.649699926235232e-06, "loss": 0.1033, "step": 5761 }, { "epoch": 1.8671419313026572, "grad_norm": 0.8235916495323181, "learning_rate": 1.6488775124504188e-06, "loss": 0.1252, "step": 5762 }, { "epoch": 1.8674659753726508, "grad_norm": 0.8452646732330322, "learning_rate": 1.6480552028347597e-06, "loss": 0.1179, "step": 5763 }, { "epoch": 1.8677900194426442, "grad_norm": 0.7542973160743713, "learning_rate": 1.6472329974888956e-06, "loss": 0.1077, "step": 5764 }, { "epoch": 1.8681140635126376, "grad_norm": 0.8130019307136536, "learning_rate": 1.6464108965134578e-06, "loss": 0.1183, "step": 5765 }, { "epoch": 1.8684381075826313, "grad_norm": 0.8335098028182983, "learning_rate": 1.645588900009062e-06, "loss": 0.1148, "step": 5766 }, { "epoch": 1.8687621516526247, "grad_norm": 0.8066989779472351, "learning_rate": 1.6447670080763146e-06, "loss": 0.1193, "step": 5767 }, { "epoch": 1.8690861957226184, "grad_norm": 0.8658890724182129, "learning_rate": 1.6439452208158058e-06, "loss": 0.1165, "step": 5768 }, { "epoch": 1.8694102397926118, "grad_norm": 0.786740779876709, "learning_rate": 1.6431235383281135e-06, "loss": 0.1164, "step": 5769 }, { "epoch": 1.8697342838626052, "grad_norm": 0.8632078766822815, "learning_rate": 1.6423019607138064e-06, "loss": 0.1092, "step": 5770 }, { "epoch": 1.8700583279325989, "grad_norm": 0.777319610118866, "learning_rate": 1.641480488073435e-06, "loss": 0.1151, "step": 5771 }, { "epoch": 1.8703823720025925, "grad_norm": 0.7908869385719299, "learning_rate": 1.6406591205075417e-06, "loss": 0.1074, "step": 5772 }, { "epoch": 1.870706416072586, "grad_norm": 0.7509408593177795, "learning_rate": 1.639837858116653e-06, "loss": 0.1022, "step": 5773 }, { "epoch": 1.8710304601425793, "grad_norm": 0.8177096247673035, "learning_rate": 1.6390167010012824e-06, "loss": 0.1157, "step": 5774 }, { "epoch": 1.8713545042125728, "grad_norm": 0.7938538193702698, "learning_rate": 1.638195649261934e-06, "loss": 0.1048, "step": 5775 }, { "epoch": 1.8716785482825664, "grad_norm": 0.8329545855522156, "learning_rate": 1.6373747029990943e-06, "loss": 0.12, "step": 5776 }, { "epoch": 1.87200259235256, "grad_norm": 0.7647078633308411, "learning_rate": 1.6365538623132405e-06, "loss": 0.0998, "step": 5777 }, { "epoch": 1.8723266364225535, "grad_norm": 0.7947881817817688, "learning_rate": 1.6357331273048343e-06, "loss": 0.1172, "step": 5778 }, { "epoch": 1.872650680492547, "grad_norm": 0.8033885359764099, "learning_rate": 1.6349124980743278e-06, "loss": 0.1067, "step": 5779 }, { "epoch": 1.8729747245625405, "grad_norm": 0.8645670413970947, "learning_rate": 1.6340919747221568e-06, "loss": 0.1126, "step": 5780 }, { "epoch": 1.873298768632534, "grad_norm": 0.6997979879379272, "learning_rate": 1.633271557348744e-06, "loss": 0.097, "step": 5781 }, { "epoch": 1.8736228127025276, "grad_norm": 0.7694612145423889, "learning_rate": 1.6324512460545034e-06, "loss": 0.1058, "step": 5782 }, { "epoch": 1.873946856772521, "grad_norm": 0.8525017499923706, "learning_rate": 1.6316310409398306e-06, "loss": 0.123, "step": 5783 }, { "epoch": 1.8742709008425145, "grad_norm": 0.7532781362533569, "learning_rate": 1.6308109421051132e-06, "loss": 0.1098, "step": 5784 }, { "epoch": 1.874594944912508, "grad_norm": 0.887808084487915, "learning_rate": 1.6299909496507214e-06, "loss": 0.1232, "step": 5785 }, { "epoch": 1.8749189889825018, "grad_norm": 0.7432640194892883, "learning_rate": 1.6291710636770152e-06, "loss": 0.1042, "step": 5786 }, { "epoch": 1.8752430330524952, "grad_norm": 0.8639612197875977, "learning_rate": 1.628351284284341e-06, "loss": 0.1128, "step": 5787 }, { "epoch": 1.8755670771224886, "grad_norm": 0.8335579633712769, "learning_rate": 1.6275316115730302e-06, "loss": 0.1175, "step": 5788 }, { "epoch": 1.875891121192482, "grad_norm": 0.8358171582221985, "learning_rate": 1.626712045643405e-06, "loss": 0.1141, "step": 5789 }, { "epoch": 1.8762151652624757, "grad_norm": 0.765986442565918, "learning_rate": 1.6258925865957703e-06, "loss": 0.1085, "step": 5790 }, { "epoch": 1.8765392093324693, "grad_norm": 0.8088056445121765, "learning_rate": 1.625073234530422e-06, "loss": 0.1107, "step": 5791 }, { "epoch": 1.8768632534024627, "grad_norm": 0.7557681202888489, "learning_rate": 1.62425398954764e-06, "loss": 0.1057, "step": 5792 }, { "epoch": 1.8771872974724562, "grad_norm": 0.7989634275436401, "learning_rate": 1.6234348517476905e-06, "loss": 0.1215, "step": 5793 }, { "epoch": 1.8775113415424498, "grad_norm": 1.2016243934631348, "learning_rate": 1.6226158212308307e-06, "loss": 0.1141, "step": 5794 }, { "epoch": 1.8778353856124435, "grad_norm": 0.7669769525527954, "learning_rate": 1.6217968980972998e-06, "loss": 0.0992, "step": 5795 }, { "epoch": 1.8781594296824369, "grad_norm": 0.8094942569732666, "learning_rate": 1.620978082447327e-06, "loss": 0.1138, "step": 5796 }, { "epoch": 1.8784834737524303, "grad_norm": 0.8035215139389038, "learning_rate": 1.6201593743811275e-06, "loss": 0.11, "step": 5797 }, { "epoch": 1.8788075178224237, "grad_norm": 0.8170638084411621, "learning_rate": 1.6193407739989037e-06, "loss": 0.1106, "step": 5798 }, { "epoch": 1.8791315618924174, "grad_norm": 0.7591819763183594, "learning_rate": 1.6185222814008434e-06, "loss": 0.1116, "step": 5799 }, { "epoch": 1.879455605962411, "grad_norm": 0.8090928196907043, "learning_rate": 1.6177038966871213e-06, "loss": 0.1098, "step": 5800 }, { "epoch": 1.8797796500324044, "grad_norm": 0.7584290504455566, "learning_rate": 1.6168856199579025e-06, "loss": 0.1066, "step": 5801 }, { "epoch": 1.8801036941023979, "grad_norm": 0.8811301589012146, "learning_rate": 1.6160674513133332e-06, "loss": 0.1213, "step": 5802 }, { "epoch": 1.8804277381723913, "grad_norm": 0.8295065760612488, "learning_rate": 1.615249390853552e-06, "loss": 0.1057, "step": 5803 }, { "epoch": 1.880751782242385, "grad_norm": 0.8181267380714417, "learning_rate": 1.61443143867868e-06, "loss": 0.115, "step": 5804 }, { "epoch": 1.8810758263123786, "grad_norm": 0.7951749563217163, "learning_rate": 1.613613594888826e-06, "loss": 0.1131, "step": 5805 }, { "epoch": 1.881399870382372, "grad_norm": 0.7731319665908813, "learning_rate": 1.612795859584088e-06, "loss": 0.1056, "step": 5806 }, { "epoch": 1.8817239144523654, "grad_norm": 0.8112607598304749, "learning_rate": 1.611978232864548e-06, "loss": 0.115, "step": 5807 }, { "epoch": 1.882047958522359, "grad_norm": 0.7796767354011536, "learning_rate": 1.6111607148302758e-06, "loss": 0.1063, "step": 5808 }, { "epoch": 1.8823720025923527, "grad_norm": 0.7607157230377197, "learning_rate": 1.6103433055813265e-06, "loss": 0.1002, "step": 5809 }, { "epoch": 1.8826960466623461, "grad_norm": 0.7605647444725037, "learning_rate": 1.6095260052177446e-06, "loss": 0.1037, "step": 5810 }, { "epoch": 1.8830200907323396, "grad_norm": 0.8884912133216858, "learning_rate": 1.6087088138395598e-06, "loss": 0.122, "step": 5811 }, { "epoch": 1.883344134802333, "grad_norm": 0.7630835175514221, "learning_rate": 1.6078917315467867e-06, "loss": 0.1037, "step": 5812 }, { "epoch": 1.8836681788723266, "grad_norm": 0.8057610988616943, "learning_rate": 1.6070747584394303e-06, "loss": 0.1156, "step": 5813 }, { "epoch": 1.8839922229423203, "grad_norm": 0.8627750277519226, "learning_rate": 1.6062578946174785e-06, "loss": 0.1265, "step": 5814 }, { "epoch": 1.8843162670123137, "grad_norm": 0.8453134894371033, "learning_rate": 1.605441140180909e-06, "loss": 0.1056, "step": 5815 }, { "epoch": 1.8846403110823071, "grad_norm": 0.7545762658119202, "learning_rate": 1.6046244952296839e-06, "loss": 0.1004, "step": 5816 }, { "epoch": 1.8849643551523008, "grad_norm": 0.9063136577606201, "learning_rate": 1.6038079598637523e-06, "loss": 0.1143, "step": 5817 }, { "epoch": 1.8852883992222942, "grad_norm": 1.0915850400924683, "learning_rate": 1.6029915341830503e-06, "loss": 0.1489, "step": 5818 }, { "epoch": 1.8856124432922878, "grad_norm": 0.7500689625740051, "learning_rate": 1.6021752182875012e-06, "loss": 0.1045, "step": 5819 }, { "epoch": 1.8859364873622813, "grad_norm": 0.8040003180503845, "learning_rate": 1.6013590122770143e-06, "loss": 0.1113, "step": 5820 }, { "epoch": 1.8862605314322747, "grad_norm": 0.7638602256774902, "learning_rate": 1.6005429162514834e-06, "loss": 0.1029, "step": 5821 }, { "epoch": 1.8865845755022683, "grad_norm": 0.7854073643684387, "learning_rate": 1.5997269303107937e-06, "loss": 0.1125, "step": 5822 }, { "epoch": 1.886908619572262, "grad_norm": 0.808712363243103, "learning_rate": 1.598911054554812e-06, "loss": 0.1047, "step": 5823 }, { "epoch": 1.8872326636422554, "grad_norm": 0.7914475202560425, "learning_rate": 1.5980952890833929e-06, "loss": 0.1034, "step": 5824 }, { "epoch": 1.8875567077122488, "grad_norm": 0.8212109208106995, "learning_rate": 1.5972796339963806e-06, "loss": 0.1155, "step": 5825 }, { "epoch": 1.8878807517822422, "grad_norm": 0.8299123644828796, "learning_rate": 1.5964640893936015e-06, "loss": 0.1205, "step": 5826 }, { "epoch": 1.8882047958522359, "grad_norm": 0.8492401838302612, "learning_rate": 1.595648655374871e-06, "loss": 0.1188, "step": 5827 }, { "epoch": 1.8885288399222295, "grad_norm": 0.7753995656967163, "learning_rate": 1.5948333320399905e-06, "loss": 0.1108, "step": 5828 }, { "epoch": 1.888852883992223, "grad_norm": 0.7334645986557007, "learning_rate": 1.5940181194887472e-06, "loss": 0.0975, "step": 5829 }, { "epoch": 1.8891769280622164, "grad_norm": 0.7638627290725708, "learning_rate": 1.5932030178209163e-06, "loss": 0.1067, "step": 5830 }, { "epoch": 1.88950097213221, "grad_norm": 0.7731084227561951, "learning_rate": 1.592388027136256e-06, "loss": 0.1142, "step": 5831 }, { "epoch": 1.8898250162022034, "grad_norm": 0.792477548122406, "learning_rate": 1.591573147534516e-06, "loss": 0.1125, "step": 5832 }, { "epoch": 1.890149060272197, "grad_norm": 0.7775505781173706, "learning_rate": 1.5907583791154275e-06, "loss": 0.1097, "step": 5833 }, { "epoch": 1.8904731043421905, "grad_norm": 0.7763543128967285, "learning_rate": 1.5899437219787124e-06, "loss": 0.1057, "step": 5834 }, { "epoch": 1.890797148412184, "grad_norm": 0.7588939666748047, "learning_rate": 1.5891291762240757e-06, "loss": 0.1072, "step": 5835 }, { "epoch": 1.8911211924821776, "grad_norm": 0.839514970779419, "learning_rate": 1.5883147419512086e-06, "loss": 0.1125, "step": 5836 }, { "epoch": 1.8914452365521712, "grad_norm": 0.8462491035461426, "learning_rate": 1.5875004192597926e-06, "loss": 0.114, "step": 5837 }, { "epoch": 1.8917692806221647, "grad_norm": 0.7633974552154541, "learning_rate": 1.5866862082494907e-06, "loss": 0.1008, "step": 5838 }, { "epoch": 1.892093324692158, "grad_norm": 0.7939409017562866, "learning_rate": 1.5858721090199564e-06, "loss": 0.11, "step": 5839 }, { "epoch": 1.8924173687621515, "grad_norm": 0.806594967842102, "learning_rate": 1.5850581216708254e-06, "loss": 0.1167, "step": 5840 }, { "epoch": 1.8927414128321451, "grad_norm": 0.8261216878890991, "learning_rate": 1.5842442463017235e-06, "loss": 0.1074, "step": 5841 }, { "epoch": 1.8930654569021388, "grad_norm": 0.7876153588294983, "learning_rate": 1.583430483012261e-06, "loss": 0.1097, "step": 5842 }, { "epoch": 1.8933895009721322, "grad_norm": 0.7754987478256226, "learning_rate": 1.5826168319020332e-06, "loss": 0.1062, "step": 5843 }, { "epoch": 1.8937135450421256, "grad_norm": 0.8216641545295715, "learning_rate": 1.5818032930706254e-06, "loss": 0.1115, "step": 5844 }, { "epoch": 1.8940375891121193, "grad_norm": 0.8169437050819397, "learning_rate": 1.5809898666176044e-06, "loss": 0.112, "step": 5845 }, { "epoch": 1.894361633182113, "grad_norm": 0.7897141575813293, "learning_rate": 1.5801765526425283e-06, "loss": 0.1121, "step": 5846 }, { "epoch": 1.8946856772521063, "grad_norm": 0.8244640827178955, "learning_rate": 1.5793633512449374e-06, "loss": 0.1151, "step": 5847 }, { "epoch": 1.8950097213220998, "grad_norm": 0.792438805103302, "learning_rate": 1.578550262524359e-06, "loss": 0.1074, "step": 5848 }, { "epoch": 1.8953337653920932, "grad_norm": 0.796003520488739, "learning_rate": 1.5777372865803091e-06, "loss": 0.1065, "step": 5849 }, { "epoch": 1.8956578094620868, "grad_norm": 0.8183282017707825, "learning_rate": 1.5769244235122867e-06, "loss": 0.1153, "step": 5850 }, { "epoch": 1.8959818535320805, "grad_norm": 0.8319751024246216, "learning_rate": 1.576111673419779e-06, "loss": 0.1194, "step": 5851 }, { "epoch": 1.896305897602074, "grad_norm": 0.7649463415145874, "learning_rate": 1.5752990364022588e-06, "loss": 0.1042, "step": 5852 }, { "epoch": 1.8966299416720673, "grad_norm": 0.8287633657455444, "learning_rate": 1.5744865125591837e-06, "loss": 0.1141, "step": 5853 }, { "epoch": 1.8969539857420608, "grad_norm": 0.7885811924934387, "learning_rate": 1.573674101990001e-06, "loss": 0.1142, "step": 5854 }, { "epoch": 1.8972780298120544, "grad_norm": 0.839322566986084, "learning_rate": 1.5728618047941393e-06, "loss": 0.1066, "step": 5855 }, { "epoch": 1.897602073882048, "grad_norm": 0.820233166217804, "learning_rate": 1.5720496210710185e-06, "loss": 0.1101, "step": 5856 }, { "epoch": 1.8979261179520415, "grad_norm": 0.8723364472389221, "learning_rate": 1.5712375509200397e-06, "loss": 0.1142, "step": 5857 }, { "epoch": 1.898250162022035, "grad_norm": 0.8589418530464172, "learning_rate": 1.5704255944405947e-06, "loss": 0.1131, "step": 5858 }, { "epoch": 1.8985742060920285, "grad_norm": 0.8543845415115356, "learning_rate": 1.5696137517320582e-06, "loss": 0.1165, "step": 5859 }, { "epoch": 1.8988982501620222, "grad_norm": 0.78272545337677, "learning_rate": 1.5688020228937905e-06, "loss": 0.1114, "step": 5860 }, { "epoch": 1.8992222942320156, "grad_norm": 0.879530668258667, "learning_rate": 1.5679904080251414e-06, "loss": 0.1238, "step": 5861 }, { "epoch": 1.899546338302009, "grad_norm": 0.8408613801002502, "learning_rate": 1.567178907225443e-06, "loss": 0.1219, "step": 5862 }, { "epoch": 1.8998703823720025, "grad_norm": 0.8422929048538208, "learning_rate": 1.5663675205940164e-06, "loss": 0.112, "step": 5863 }, { "epoch": 1.900194426441996, "grad_norm": 0.8757169842720032, "learning_rate": 1.5655562482301664e-06, "loss": 0.1196, "step": 5864 }, { "epoch": 1.9005184705119897, "grad_norm": 0.8293363451957703, "learning_rate": 1.5647450902331866e-06, "loss": 0.1174, "step": 5865 }, { "epoch": 1.9008425145819832, "grad_norm": 0.7704747319221497, "learning_rate": 1.5639340467023534e-06, "loss": 0.1063, "step": 5866 }, { "epoch": 1.9011665586519766, "grad_norm": 0.7849206924438477, "learning_rate": 1.5631231177369305e-06, "loss": 0.1112, "step": 5867 }, { "epoch": 1.9014906027219702, "grad_norm": 0.8597646951675415, "learning_rate": 1.562312303436169e-06, "loss": 0.1212, "step": 5868 }, { "epoch": 1.9018146467919637, "grad_norm": 0.8411148190498352, "learning_rate": 1.5615016038993036e-06, "loss": 0.1117, "step": 5869 }, { "epoch": 1.9021386908619573, "grad_norm": 0.8403557538986206, "learning_rate": 1.5606910192255565e-06, "loss": 0.1155, "step": 5870 }, { "epoch": 1.9024627349319507, "grad_norm": 0.759688138961792, "learning_rate": 1.5598805495141362e-06, "loss": 0.1069, "step": 5871 }, { "epoch": 1.9027867790019442, "grad_norm": 0.799876868724823, "learning_rate": 1.5590701948642348e-06, "loss": 0.1136, "step": 5872 }, { "epoch": 1.9031108230719378, "grad_norm": 0.8051289319992065, "learning_rate": 1.5582599553750332e-06, "loss": 0.1179, "step": 5873 }, { "epoch": 1.9034348671419314, "grad_norm": 0.7589281797409058, "learning_rate": 1.5574498311456953e-06, "loss": 0.1034, "step": 5874 }, { "epoch": 1.9037589112119249, "grad_norm": 0.797426700592041, "learning_rate": 1.5566398222753745e-06, "loss": 0.1056, "step": 5875 }, { "epoch": 1.9040829552819183, "grad_norm": 0.7796162366867065, "learning_rate": 1.5558299288632061e-06, "loss": 0.105, "step": 5876 }, { "epoch": 1.9044069993519117, "grad_norm": 0.8247846364974976, "learning_rate": 1.555020151008315e-06, "loss": 0.1214, "step": 5877 }, { "epoch": 1.9047310434219054, "grad_norm": 0.7716207504272461, "learning_rate": 1.5542104888098093e-06, "loss": 0.1049, "step": 5878 }, { "epoch": 1.905055087491899, "grad_norm": 0.8307933807373047, "learning_rate": 1.553400942366783e-06, "loss": 0.1152, "step": 5879 }, { "epoch": 1.9053791315618924, "grad_norm": 0.8331568241119385, "learning_rate": 1.5525915117783182e-06, "loss": 0.1159, "step": 5880 }, { "epoch": 1.9057031756318858, "grad_norm": 0.8303659558296204, "learning_rate": 1.5517821971434804e-06, "loss": 0.1094, "step": 5881 }, { "epoch": 1.9060272197018795, "grad_norm": 0.8347376585006714, "learning_rate": 1.5509729985613232e-06, "loss": 0.1157, "step": 5882 }, { "epoch": 1.9063512637718731, "grad_norm": 0.8244526386260986, "learning_rate": 1.5501639161308829e-06, "loss": 0.1142, "step": 5883 }, { "epoch": 1.9066753078418666, "grad_norm": 0.8219770789146423, "learning_rate": 1.5493549499511834e-06, "loss": 0.1195, "step": 5884 }, { "epoch": 1.90699935191186, "grad_norm": 0.8310133218765259, "learning_rate": 1.5485461001212365e-06, "loss": 0.116, "step": 5885 }, { "epoch": 1.9073233959818534, "grad_norm": 0.807929515838623, "learning_rate": 1.5477373667400347e-06, "loss": 0.1096, "step": 5886 }, { "epoch": 1.907647440051847, "grad_norm": 0.7839474678039551, "learning_rate": 1.5469287499065615e-06, "loss": 0.1069, "step": 5887 }, { "epoch": 1.9079714841218407, "grad_norm": 0.8028229475021362, "learning_rate": 1.5461202497197821e-06, "loss": 0.11, "step": 5888 }, { "epoch": 1.9082955281918341, "grad_norm": 0.8570075631141663, "learning_rate": 1.5453118662786509e-06, "loss": 0.1234, "step": 5889 }, { "epoch": 1.9086195722618275, "grad_norm": 0.7751265168190002, "learning_rate": 1.544503599682105e-06, "loss": 0.1067, "step": 5890 }, { "epoch": 1.908943616331821, "grad_norm": 0.827232837677002, "learning_rate": 1.5436954500290684e-06, "loss": 0.1127, "step": 5891 }, { "epoch": 1.9092676604018146, "grad_norm": 0.8176938891410828, "learning_rate": 1.5428874174184509e-06, "loss": 0.1181, "step": 5892 }, { "epoch": 1.9095917044718083, "grad_norm": 0.7876701951026917, "learning_rate": 1.5420795019491475e-06, "loss": 0.1095, "step": 5893 }, { "epoch": 1.9099157485418017, "grad_norm": 0.781857430934906, "learning_rate": 1.5412717037200406e-06, "loss": 0.1003, "step": 5894 }, { "epoch": 1.910239792611795, "grad_norm": 0.8700427412986755, "learning_rate": 1.540464022829996e-06, "loss": 0.1094, "step": 5895 }, { "epoch": 1.9105638366817888, "grad_norm": 0.8291754722595215, "learning_rate": 1.5396564593778646e-06, "loss": 0.1135, "step": 5896 }, { "epoch": 1.9108878807517824, "grad_norm": 0.800238311290741, "learning_rate": 1.538849013462487e-06, "loss": 0.1076, "step": 5897 }, { "epoch": 1.9112119248217758, "grad_norm": 0.8612180948257446, "learning_rate": 1.5380416851826845e-06, "loss": 0.1196, "step": 5898 }, { "epoch": 1.9115359688917692, "grad_norm": 0.8104245662689209, "learning_rate": 1.537234474637268e-06, "loss": 0.1122, "step": 5899 }, { "epoch": 1.9118600129617627, "grad_norm": 0.8221279382705688, "learning_rate": 1.5364273819250308e-06, "loss": 0.1145, "step": 5900 }, { "epoch": 1.9121840570317563, "grad_norm": 0.7910533547401428, "learning_rate": 1.535620407144755e-06, "loss": 0.114, "step": 5901 }, { "epoch": 1.91250810110175, "grad_norm": 0.8792036175727844, "learning_rate": 1.534813550395205e-06, "loss": 0.1158, "step": 5902 }, { "epoch": 1.9128321451717434, "grad_norm": 0.7558249235153198, "learning_rate": 1.5340068117751329e-06, "loss": 0.1039, "step": 5903 }, { "epoch": 1.9131561892417368, "grad_norm": 0.8300741910934448, "learning_rate": 1.5332001913832754e-06, "loss": 0.1175, "step": 5904 }, { "epoch": 1.9134802333117304, "grad_norm": 0.8217713832855225, "learning_rate": 1.5323936893183542e-06, "loss": 0.1116, "step": 5905 }, { "epoch": 1.9138042773817239, "grad_norm": 0.7724614143371582, "learning_rate": 1.5315873056790791e-06, "loss": 0.1054, "step": 5906 }, { "epoch": 1.9141283214517175, "grad_norm": 0.8543126583099365, "learning_rate": 1.5307810405641433e-06, "loss": 0.1141, "step": 5907 }, { "epoch": 1.914452365521711, "grad_norm": 0.848846971988678, "learning_rate": 1.5299748940722241e-06, "loss": 0.1095, "step": 5908 }, { "epoch": 1.9147764095917044, "grad_norm": 0.8107044696807861, "learning_rate": 1.5291688663019885e-06, "loss": 0.1089, "step": 5909 }, { "epoch": 1.915100453661698, "grad_norm": 0.777558445930481, "learning_rate": 1.5283629573520841e-06, "loss": 0.1079, "step": 5910 }, { "epoch": 1.9154244977316917, "grad_norm": 0.9644023776054382, "learning_rate": 1.5275571673211487e-06, "loss": 0.1273, "step": 5911 }, { "epoch": 1.915748541801685, "grad_norm": 0.7175261974334717, "learning_rate": 1.5267514963078014e-06, "loss": 0.0976, "step": 5912 }, { "epoch": 1.9160725858716785, "grad_norm": 0.7565993070602417, "learning_rate": 1.5259459444106497e-06, "loss": 0.1074, "step": 5913 }, { "epoch": 1.916396629941672, "grad_norm": 0.84763503074646, "learning_rate": 1.5251405117282843e-06, "loss": 0.1177, "step": 5914 }, { "epoch": 1.9167206740116656, "grad_norm": 0.8428905606269836, "learning_rate": 1.524335198359283e-06, "loss": 0.1194, "step": 5915 }, { "epoch": 1.9170447180816592, "grad_norm": 0.7854635119438171, "learning_rate": 1.5235300044022088e-06, "loss": 0.1123, "step": 5916 }, { "epoch": 1.9173687621516526, "grad_norm": 0.7823472023010254, "learning_rate": 1.522724929955608e-06, "loss": 0.1067, "step": 5917 }, { "epoch": 1.917692806221646, "grad_norm": 0.8037146329879761, "learning_rate": 1.5219199751180162e-06, "loss": 0.1032, "step": 5918 }, { "epoch": 1.9180168502916397, "grad_norm": 0.7649844288825989, "learning_rate": 1.5211151399879505e-06, "loss": 0.112, "step": 5919 }, { "epoch": 1.9183408943616331, "grad_norm": 0.8710806965827942, "learning_rate": 1.5203104246639144e-06, "loss": 0.125, "step": 5920 }, { "epoch": 1.9186649384316268, "grad_norm": 0.7725611329078674, "learning_rate": 1.5195058292443996e-06, "loss": 0.1097, "step": 5921 }, { "epoch": 1.9189889825016202, "grad_norm": 0.8272144198417664, "learning_rate": 1.518701353827878e-06, "loss": 0.1106, "step": 5922 }, { "epoch": 1.9193130265716136, "grad_norm": 0.8756875395774841, "learning_rate": 1.5178969985128122e-06, "loss": 0.1208, "step": 5923 }, { "epoch": 1.9196370706416073, "grad_norm": 0.8208725452423096, "learning_rate": 1.5170927633976457e-06, "loss": 0.1136, "step": 5924 }, { "epoch": 1.919961114711601, "grad_norm": 0.7696161270141602, "learning_rate": 1.5162886485808102e-06, "loss": 0.1129, "step": 5925 }, { "epoch": 1.9202851587815943, "grad_norm": 0.9552154541015625, "learning_rate": 1.515484654160721e-06, "loss": 0.1238, "step": 5926 }, { "epoch": 1.9206092028515878, "grad_norm": 0.8473532199859619, "learning_rate": 1.5146807802357782e-06, "loss": 0.1185, "step": 5927 }, { "epoch": 1.9209332469215812, "grad_norm": 0.8460490107536316, "learning_rate": 1.5138770269043704e-06, "loss": 0.1154, "step": 5928 }, { "epoch": 1.9212572909915748, "grad_norm": 0.7778971791267395, "learning_rate": 1.513073394264867e-06, "loss": 0.1023, "step": 5929 }, { "epoch": 1.9215813350615685, "grad_norm": 0.7810094952583313, "learning_rate": 1.5122698824156271e-06, "loss": 0.1075, "step": 5930 }, { "epoch": 1.921905379131562, "grad_norm": 0.7526049613952637, "learning_rate": 1.5114664914549903e-06, "loss": 0.1046, "step": 5931 }, { "epoch": 1.9222294232015553, "grad_norm": 0.7893481254577637, "learning_rate": 1.5106632214812865e-06, "loss": 0.1111, "step": 5932 }, { "epoch": 1.922553467271549, "grad_norm": 0.8875093460083008, "learning_rate": 1.5098600725928269e-06, "loss": 0.117, "step": 5933 }, { "epoch": 1.9228775113415426, "grad_norm": 0.7732181549072266, "learning_rate": 1.5090570448879088e-06, "loss": 0.1081, "step": 5934 }, { "epoch": 1.923201555411536, "grad_norm": 0.8007689118385315, "learning_rate": 1.5082541384648154e-06, "loss": 0.1089, "step": 5935 }, { "epoch": 1.9235255994815295, "grad_norm": 0.7940258383750916, "learning_rate": 1.5074513534218137e-06, "loss": 0.1079, "step": 5936 }, { "epoch": 1.9238496435515229, "grad_norm": 0.7803616523742676, "learning_rate": 1.5066486898571588e-06, "loss": 0.1085, "step": 5937 }, { "epoch": 1.9241736876215165, "grad_norm": 0.7531672120094299, "learning_rate": 1.5058461478690878e-06, "loss": 0.1014, "step": 5938 }, { "epoch": 1.9244977316915102, "grad_norm": 0.8603652715682983, "learning_rate": 1.5050437275558233e-06, "loss": 0.1164, "step": 5939 }, { "epoch": 1.9248217757615036, "grad_norm": 0.8175323605537415, "learning_rate": 1.5042414290155754e-06, "loss": 0.1079, "step": 5940 }, { "epoch": 1.925145819831497, "grad_norm": 0.7698320150375366, "learning_rate": 1.5034392523465364e-06, "loss": 0.0941, "step": 5941 }, { "epoch": 1.9254698639014904, "grad_norm": 0.7824234962463379, "learning_rate": 1.502637197646886e-06, "loss": 0.1125, "step": 5942 }, { "epoch": 1.925793907971484, "grad_norm": 0.794787585735321, "learning_rate": 1.5018352650147872e-06, "loss": 0.1062, "step": 5943 }, { "epoch": 1.9261179520414777, "grad_norm": 0.8701479434967041, "learning_rate": 1.5010334545483885e-06, "loss": 0.1243, "step": 5944 }, { "epoch": 1.9264419961114712, "grad_norm": 0.813524067401886, "learning_rate": 1.500231766345825e-06, "loss": 0.1221, "step": 5945 }, { "epoch": 1.9267660401814646, "grad_norm": 0.7518372535705566, "learning_rate": 1.4994302005052141e-06, "loss": 0.1048, "step": 5946 }, { "epoch": 1.9270900842514582, "grad_norm": 0.8398098945617676, "learning_rate": 1.4986287571246614e-06, "loss": 0.1189, "step": 5947 }, { "epoch": 1.9274141283214519, "grad_norm": 0.782597005367279, "learning_rate": 1.4978274363022532e-06, "loss": 0.1132, "step": 5948 }, { "epoch": 1.9277381723914453, "grad_norm": 0.7935764789581299, "learning_rate": 1.4970262381360664e-06, "loss": 0.1054, "step": 5949 }, { "epoch": 1.9280622164614387, "grad_norm": 0.8103215098381042, "learning_rate": 1.4962251627241583e-06, "loss": 0.112, "step": 5950 }, { "epoch": 1.9283862605314321, "grad_norm": 0.7776728272438049, "learning_rate": 1.4954242101645722e-06, "loss": 0.1062, "step": 5951 }, { "epoch": 1.9287103046014258, "grad_norm": 0.7960215210914612, "learning_rate": 1.4946233805553387e-06, "loss": 0.1115, "step": 5952 }, { "epoch": 1.9290343486714194, "grad_norm": 0.8163950443267822, "learning_rate": 1.4938226739944694e-06, "loss": 0.1115, "step": 5953 }, { "epoch": 1.9293583927414129, "grad_norm": 0.788702130317688, "learning_rate": 1.4930220905799652e-06, "loss": 0.1158, "step": 5954 }, { "epoch": 1.9296824368114063, "grad_norm": 0.8842043280601501, "learning_rate": 1.4922216304098085e-06, "loss": 0.122, "step": 5955 }, { "epoch": 1.9300064808814, "grad_norm": 0.8993422985076904, "learning_rate": 1.4914212935819689e-06, "loss": 0.1249, "step": 5956 }, { "epoch": 1.9303305249513933, "grad_norm": 0.7884942889213562, "learning_rate": 1.4906210801943985e-06, "loss": 0.1128, "step": 5957 }, { "epoch": 1.930654569021387, "grad_norm": 0.8368211388587952, "learning_rate": 1.4898209903450361e-06, "loss": 0.1194, "step": 5958 }, { "epoch": 1.9309786130913804, "grad_norm": 0.7727013230323792, "learning_rate": 1.489021024131806e-06, "loss": 0.1048, "step": 5959 }, { "epoch": 1.9313026571613738, "grad_norm": 0.7856116890907288, "learning_rate": 1.4882211816526144e-06, "loss": 0.1057, "step": 5960 }, { "epoch": 1.9316267012313675, "grad_norm": 0.8511420488357544, "learning_rate": 1.4874214630053562e-06, "loss": 0.1149, "step": 5961 }, { "epoch": 1.9319507453013611, "grad_norm": 0.7858923077583313, "learning_rate": 1.4866218682879088e-06, "loss": 0.1119, "step": 5962 }, { "epoch": 1.9322747893713546, "grad_norm": 0.8455827236175537, "learning_rate": 1.4858223975981334e-06, "loss": 0.1151, "step": 5963 }, { "epoch": 1.932598833441348, "grad_norm": 0.759452223777771, "learning_rate": 1.4850230510338792e-06, "loss": 0.1092, "step": 5964 }, { "epoch": 1.9329228775113414, "grad_norm": 0.8318132758140564, "learning_rate": 1.4842238286929777e-06, "loss": 0.1105, "step": 5965 }, { "epoch": 1.933246921581335, "grad_norm": 0.8317754864692688, "learning_rate": 1.4834247306732457e-06, "loss": 0.1139, "step": 5966 }, { "epoch": 1.9335709656513287, "grad_norm": 0.7857781052589417, "learning_rate": 1.4826257570724856e-06, "loss": 0.1036, "step": 5967 }, { "epoch": 1.9338950097213221, "grad_norm": 0.8519753217697144, "learning_rate": 1.4818269079884845e-06, "loss": 0.1156, "step": 5968 }, { "epoch": 1.9342190537913155, "grad_norm": 0.7977727055549622, "learning_rate": 1.4810281835190132e-06, "loss": 0.1126, "step": 5969 }, { "epoch": 1.9345430978613092, "grad_norm": 0.846114993095398, "learning_rate": 1.4802295837618268e-06, "loss": 0.1158, "step": 5970 }, { "epoch": 1.9348671419313026, "grad_norm": 0.8024095296859741, "learning_rate": 1.479431108814668e-06, "loss": 0.1027, "step": 5971 }, { "epoch": 1.9351911860012962, "grad_norm": 0.7943775653839111, "learning_rate": 1.4786327587752608e-06, "loss": 0.1087, "step": 5972 }, { "epoch": 1.9355152300712897, "grad_norm": 0.8726453185081482, "learning_rate": 1.4778345337413174e-06, "loss": 0.1194, "step": 5973 }, { "epoch": 1.935839274141283, "grad_norm": 0.7967553734779358, "learning_rate": 1.4770364338105315e-06, "loss": 0.1114, "step": 5974 }, { "epoch": 1.9361633182112767, "grad_norm": 0.7967367172241211, "learning_rate": 1.4762384590805823e-06, "loss": 0.111, "step": 5975 }, { "epoch": 1.9364873622812704, "grad_norm": 0.8494221568107605, "learning_rate": 1.475440609649136e-06, "loss": 0.1203, "step": 5976 }, { "epoch": 1.9368114063512638, "grad_norm": 0.8069921135902405, "learning_rate": 1.4746428856138395e-06, "loss": 0.1079, "step": 5977 }, { "epoch": 1.9371354504212572, "grad_norm": 0.7718681693077087, "learning_rate": 1.4738452870723286e-06, "loss": 0.1092, "step": 5978 }, { "epoch": 1.9374594944912507, "grad_norm": 0.881232500076294, "learning_rate": 1.4730478141222194e-06, "loss": 0.1189, "step": 5979 }, { "epoch": 1.9377835385612443, "grad_norm": 0.8377132415771484, "learning_rate": 1.4722504668611172e-06, "loss": 0.1098, "step": 5980 }, { "epoch": 1.938107582631238, "grad_norm": 0.8156023621559143, "learning_rate": 1.4714532453866084e-06, "loss": 0.1097, "step": 5981 }, { "epoch": 1.9384316267012314, "grad_norm": 0.8057281374931335, "learning_rate": 1.4706561497962644e-06, "loss": 0.1108, "step": 5982 }, { "epoch": 1.9387556707712248, "grad_norm": 0.8118177056312561, "learning_rate": 1.4698591801876435e-06, "loss": 0.1214, "step": 5983 }, { "epoch": 1.9390797148412184, "grad_norm": 0.8250911831855774, "learning_rate": 1.4690623366582856e-06, "loss": 0.1141, "step": 5984 }, { "epoch": 1.939403758911212, "grad_norm": 0.7952162027359009, "learning_rate": 1.4682656193057189e-06, "loss": 0.107, "step": 5985 }, { "epoch": 1.9397278029812055, "grad_norm": 0.8406686782836914, "learning_rate": 1.4674690282274517e-06, "loss": 0.1182, "step": 5986 }, { "epoch": 1.940051847051199, "grad_norm": 0.8226285576820374, "learning_rate": 1.4666725635209794e-06, "loss": 0.1098, "step": 5987 }, { "epoch": 1.9403758911211924, "grad_norm": 0.835455596446991, "learning_rate": 1.4658762252837821e-06, "loss": 0.116, "step": 5988 }, { "epoch": 1.940699935191186, "grad_norm": 0.7987916469573975, "learning_rate": 1.4650800136133238e-06, "loss": 0.1066, "step": 5989 }, { "epoch": 1.9410239792611796, "grad_norm": 0.7740179896354675, "learning_rate": 1.4642839286070537e-06, "loss": 0.119, "step": 5990 }, { "epoch": 1.941348023331173, "grad_norm": 0.8301352262496948, "learning_rate": 1.4634879703624027e-06, "loss": 0.1089, "step": 5991 }, { "epoch": 1.9416720674011665, "grad_norm": 0.7866767644882202, "learning_rate": 1.4626921389767915e-06, "loss": 0.1096, "step": 5992 }, { "epoch": 1.94199611147116, "grad_norm": 0.783625066280365, "learning_rate": 1.4618964345476203e-06, "loss": 0.1066, "step": 5993 }, { "epoch": 1.9423201555411536, "grad_norm": 0.789161741733551, "learning_rate": 1.4611008571722748e-06, "loss": 0.1108, "step": 5994 }, { "epoch": 1.9426441996111472, "grad_norm": 0.7411786317825317, "learning_rate": 1.4603054069481282e-06, "loss": 0.0965, "step": 5995 }, { "epoch": 1.9429682436811406, "grad_norm": 0.8316660523414612, "learning_rate": 1.4595100839725338e-06, "loss": 0.1132, "step": 5996 }, { "epoch": 1.943292287751134, "grad_norm": 0.733279287815094, "learning_rate": 1.4587148883428337e-06, "loss": 0.1051, "step": 5997 }, { "epoch": 1.9436163318211277, "grad_norm": 0.7861854434013367, "learning_rate": 1.45791982015635e-06, "loss": 0.1092, "step": 5998 }, { "epoch": 1.9439403758911213, "grad_norm": 0.795117974281311, "learning_rate": 1.4571248795103921e-06, "loss": 0.1044, "step": 5999 }, { "epoch": 1.9442644199611148, "grad_norm": 0.7990328073501587, "learning_rate": 1.4563300665022534e-06, "loss": 0.1061, "step": 6000 }, { "epoch": 1.9445884640311082, "grad_norm": 0.8901631236076355, "learning_rate": 1.4555353812292105e-06, "loss": 0.1181, "step": 6001 }, { "epoch": 1.9449125081011016, "grad_norm": 0.7094663977622986, "learning_rate": 1.4547408237885262e-06, "loss": 0.0938, "step": 6002 }, { "epoch": 1.9452365521710953, "grad_norm": 0.8745992183685303, "learning_rate": 1.4539463942774462e-06, "loss": 0.1169, "step": 6003 }, { "epoch": 1.945560596241089, "grad_norm": 0.8128504157066345, "learning_rate": 1.4531520927932017e-06, "loss": 0.1117, "step": 6004 }, { "epoch": 1.9458846403110823, "grad_norm": 0.8003115057945251, "learning_rate": 1.452357919433006e-06, "loss": 0.1132, "step": 6005 }, { "epoch": 1.9462086843810757, "grad_norm": 0.7621777057647705, "learning_rate": 1.4515638742940585e-06, "loss": 0.1032, "step": 6006 }, { "epoch": 1.9465327284510694, "grad_norm": 0.794258713722229, "learning_rate": 1.4507699574735436e-06, "loss": 0.1058, "step": 6007 }, { "epoch": 1.9468567725210628, "grad_norm": 0.8062702417373657, "learning_rate": 1.4499761690686287e-06, "loss": 0.1082, "step": 6008 }, { "epoch": 1.9471808165910565, "grad_norm": 0.7925530672073364, "learning_rate": 1.4491825091764656e-06, "loss": 0.1128, "step": 6009 }, { "epoch": 1.9475048606610499, "grad_norm": 0.7610135078430176, "learning_rate": 1.4483889778941904e-06, "loss": 0.1008, "step": 6010 }, { "epoch": 1.9478289047310433, "grad_norm": 0.8870550394058228, "learning_rate": 1.447595575318924e-06, "loss": 0.1269, "step": 6011 }, { "epoch": 1.948152948801037, "grad_norm": 0.7672936320304871, "learning_rate": 1.4468023015477722e-06, "loss": 0.1077, "step": 6012 }, { "epoch": 1.9484769928710306, "grad_norm": 0.8201618790626526, "learning_rate": 1.446009156677822e-06, "loss": 0.1079, "step": 6013 }, { "epoch": 1.948801036941024, "grad_norm": 0.8383089303970337, "learning_rate": 1.4452161408061478e-06, "loss": 0.1203, "step": 6014 }, { "epoch": 1.9491250810110174, "grad_norm": 0.8672509789466858, "learning_rate": 1.4444232540298064e-06, "loss": 0.1179, "step": 6015 }, { "epoch": 1.9494491250810109, "grad_norm": 0.7988051176071167, "learning_rate": 1.44363049644584e-06, "loss": 0.1131, "step": 6016 }, { "epoch": 1.9497731691510045, "grad_norm": 0.7616187930107117, "learning_rate": 1.4428378681512755e-06, "loss": 0.0999, "step": 6017 }, { "epoch": 1.9500972132209982, "grad_norm": 0.7699881792068481, "learning_rate": 1.4420453692431197e-06, "loss": 0.1111, "step": 6018 }, { "epoch": 1.9504212572909916, "grad_norm": 0.7896310091018677, "learning_rate": 1.441252999818371e-06, "loss": 0.1061, "step": 6019 }, { "epoch": 1.950745301360985, "grad_norm": 0.7674881219863892, "learning_rate": 1.440460759974004e-06, "loss": 0.1035, "step": 6020 }, { "epoch": 1.9510693454309787, "grad_norm": 0.8250930309295654, "learning_rate": 1.4396686498069844e-06, "loss": 0.1178, "step": 6021 }, { "epoch": 1.9513933895009723, "grad_norm": 0.7929973006248474, "learning_rate": 1.4388766694142553e-06, "loss": 0.1058, "step": 6022 }, { "epoch": 1.9517174335709657, "grad_norm": 0.8182612061500549, "learning_rate": 1.4380848188927516e-06, "loss": 0.1129, "step": 6023 }, { "epoch": 1.9520414776409591, "grad_norm": 1.7397314310073853, "learning_rate": 1.4372930983393849e-06, "loss": 0.1156, "step": 6024 }, { "epoch": 1.9523655217109526, "grad_norm": 0.7889402508735657, "learning_rate": 1.4365015078510553e-06, "loss": 0.1028, "step": 6025 }, { "epoch": 1.9526895657809462, "grad_norm": 0.8027127385139465, "learning_rate": 1.4357100475246463e-06, "loss": 0.1084, "step": 6026 }, { "epoch": 1.9530136098509399, "grad_norm": 0.7677908539772034, "learning_rate": 1.4349187174570226e-06, "loss": 0.1017, "step": 6027 }, { "epoch": 1.9533376539209333, "grad_norm": 0.7761096954345703, "learning_rate": 1.4341275177450389e-06, "loss": 0.1089, "step": 6028 }, { "epoch": 1.9536616979909267, "grad_norm": 0.7991950511932373, "learning_rate": 1.4333364484855277e-06, "loss": 0.11, "step": 6029 }, { "epoch": 1.9539857420609201, "grad_norm": 0.7403672337532043, "learning_rate": 1.432545509775309e-06, "loss": 0.1019, "step": 6030 }, { "epoch": 1.9543097861309138, "grad_norm": 0.8399749398231506, "learning_rate": 1.4317547017111865e-06, "loss": 0.1143, "step": 6031 }, { "epoch": 1.9546338302009074, "grad_norm": 0.8288451433181763, "learning_rate": 1.4309640243899467e-06, "loss": 0.1136, "step": 6032 }, { "epoch": 1.9549578742709008, "grad_norm": 0.8094178438186646, "learning_rate": 1.4301734779083614e-06, "loss": 0.1064, "step": 6033 }, { "epoch": 1.9552819183408943, "grad_norm": 0.8080481290817261, "learning_rate": 1.4293830623631857e-06, "loss": 0.1077, "step": 6034 }, { "epoch": 1.955605962410888, "grad_norm": 0.8804945349693298, "learning_rate": 1.4285927778511598e-06, "loss": 0.1185, "step": 6035 }, { "epoch": 1.9559300064808816, "grad_norm": 0.7942977547645569, "learning_rate": 1.4278026244690046e-06, "loss": 0.1089, "step": 6036 }, { "epoch": 1.956254050550875, "grad_norm": 0.7964348196983337, "learning_rate": 1.427012602313429e-06, "loss": 0.1145, "step": 6037 }, { "epoch": 1.9565780946208684, "grad_norm": 0.8437153697013855, "learning_rate": 1.4262227114811233e-06, "loss": 0.1127, "step": 6038 }, { "epoch": 1.9569021386908618, "grad_norm": 0.7795710563659668, "learning_rate": 1.4254329520687626e-06, "loss": 0.109, "step": 6039 }, { "epoch": 1.9572261827608555, "grad_norm": 0.760061502456665, "learning_rate": 1.4246433241730062e-06, "loss": 0.111, "step": 6040 }, { "epoch": 1.9575502268308491, "grad_norm": 0.8100674152374268, "learning_rate": 1.4238538278904973e-06, "loss": 0.1036, "step": 6041 }, { "epoch": 1.9578742709008425, "grad_norm": 0.7496659159660339, "learning_rate": 1.4230644633178603e-06, "loss": 0.1059, "step": 6042 }, { "epoch": 1.958198314970836, "grad_norm": 0.7997440695762634, "learning_rate": 1.4222752305517093e-06, "loss": 0.1098, "step": 6043 }, { "epoch": 1.9585223590408296, "grad_norm": 0.885632336139679, "learning_rate": 1.421486129688635e-06, "loss": 0.1222, "step": 6044 }, { "epoch": 1.958846403110823, "grad_norm": 0.7634774446487427, "learning_rate": 1.4206971608252196e-06, "loss": 0.1074, "step": 6045 }, { "epoch": 1.9591704471808167, "grad_norm": 0.8368650078773499, "learning_rate": 1.4199083240580218e-06, "loss": 0.1131, "step": 6046 }, { "epoch": 1.95949449125081, "grad_norm": 0.8191322684288025, "learning_rate": 1.41911961948359e-06, "loss": 0.1075, "step": 6047 }, { "epoch": 1.9598185353208035, "grad_norm": 0.8034201860427856, "learning_rate": 1.4183310471984532e-06, "loss": 0.1084, "step": 6048 }, { "epoch": 1.9601425793907972, "grad_norm": 0.7873152494430542, "learning_rate": 1.4175426072991234e-06, "loss": 0.1093, "step": 6049 }, { "epoch": 1.9604666234607908, "grad_norm": 0.7949634790420532, "learning_rate": 1.416754299882101e-06, "loss": 0.109, "step": 6050 }, { "epoch": 1.9607906675307842, "grad_norm": 0.838327944278717, "learning_rate": 1.415966125043864e-06, "loss": 0.1069, "step": 6051 }, { "epoch": 1.9611147116007777, "grad_norm": 0.7817742824554443, "learning_rate": 1.415178082880881e-06, "loss": 0.1005, "step": 6052 }, { "epoch": 1.961438755670771, "grad_norm": 0.7917423844337463, "learning_rate": 1.4143901734895973e-06, "loss": 0.1064, "step": 6053 }, { "epoch": 1.9617627997407647, "grad_norm": 0.8067330718040466, "learning_rate": 1.4136023969664471e-06, "loss": 0.1107, "step": 6054 }, { "epoch": 1.9620868438107584, "grad_norm": 0.7707548141479492, "learning_rate": 1.4128147534078469e-06, "loss": 0.1064, "step": 6055 }, { "epoch": 1.9624108878807518, "grad_norm": 0.8027534484863281, "learning_rate": 1.4120272429101955e-06, "loss": 0.1102, "step": 6056 }, { "epoch": 1.9627349319507452, "grad_norm": 0.7992814779281616, "learning_rate": 1.4112398655698772e-06, "loss": 0.1092, "step": 6057 }, { "epoch": 1.9630589760207389, "grad_norm": 0.7964221835136414, "learning_rate": 1.4104526214832595e-06, "loss": 0.1078, "step": 6058 }, { "epoch": 1.9633830200907323, "grad_norm": 0.8293501138687134, "learning_rate": 1.4096655107466943e-06, "loss": 0.113, "step": 6059 }, { "epoch": 1.963707064160726, "grad_norm": 0.7957487106323242, "learning_rate": 1.4088785334565145e-06, "loss": 0.1143, "step": 6060 }, { "epoch": 1.9640311082307194, "grad_norm": 0.8345206379890442, "learning_rate": 1.4080916897090391e-06, "loss": 0.1147, "step": 6061 }, { "epoch": 1.9643551523007128, "grad_norm": 0.8262994289398193, "learning_rate": 1.4073049796005705e-06, "loss": 0.1181, "step": 6062 }, { "epoch": 1.9646791963707064, "grad_norm": 0.8111175894737244, "learning_rate": 1.4065184032273942e-06, "loss": 0.1137, "step": 6063 }, { "epoch": 1.9650032404407, "grad_norm": 0.8582708239555359, "learning_rate": 1.4057319606857795e-06, "loss": 0.1181, "step": 6064 }, { "epoch": 1.9653272845106935, "grad_norm": 0.7551923394203186, "learning_rate": 1.4049456520719805e-06, "loss": 0.1078, "step": 6065 }, { "epoch": 1.965651328580687, "grad_norm": 0.7485328316688538, "learning_rate": 1.404159477482231e-06, "loss": 0.0979, "step": 6066 }, { "epoch": 1.9659753726506803, "grad_norm": 0.7505809664726257, "learning_rate": 1.403373437012755e-06, "loss": 0.1012, "step": 6067 }, { "epoch": 1.966299416720674, "grad_norm": 0.8598964214324951, "learning_rate": 1.4025875307597528e-06, "loss": 0.1112, "step": 6068 }, { "epoch": 1.9666234607906676, "grad_norm": 0.7594893574714661, "learning_rate": 1.4018017588194132e-06, "loss": 0.098, "step": 6069 }, { "epoch": 1.966947504860661, "grad_norm": 0.7838597893714905, "learning_rate": 1.401016121287907e-06, "loss": 0.1124, "step": 6070 }, { "epoch": 1.9672715489306545, "grad_norm": 0.802146852016449, "learning_rate": 1.4002306182613885e-06, "loss": 0.1161, "step": 6071 }, { "epoch": 1.9675955930006481, "grad_norm": 0.7836012840270996, "learning_rate": 1.3994452498359963e-06, "loss": 0.1055, "step": 6072 }, { "epoch": 1.9679196370706418, "grad_norm": 0.7942901253700256, "learning_rate": 1.39866001610785e-06, "loss": 0.115, "step": 6073 }, { "epoch": 1.9682436811406352, "grad_norm": 0.765285313129425, "learning_rate": 1.3978749171730577e-06, "loss": 0.1062, "step": 6074 }, { "epoch": 1.9685677252106286, "grad_norm": 0.749472975730896, "learning_rate": 1.397089953127704e-06, "loss": 0.1019, "step": 6075 }, { "epoch": 1.968891769280622, "grad_norm": 0.8121153116226196, "learning_rate": 1.3963051240678652e-06, "loss": 0.1119, "step": 6076 }, { "epoch": 1.9692158133506157, "grad_norm": 0.8156499862670898, "learning_rate": 1.3955204300895937e-06, "loss": 0.1091, "step": 6077 }, { "epoch": 1.9695398574206093, "grad_norm": 0.796762228012085, "learning_rate": 1.3947358712889292e-06, "loss": 0.1114, "step": 6078 }, { "epoch": 1.9698639014906028, "grad_norm": 0.7823910713195801, "learning_rate": 1.3939514477618944e-06, "loss": 0.1058, "step": 6079 }, { "epoch": 1.9701879455605962, "grad_norm": 0.798762321472168, "learning_rate": 1.3931671596044946e-06, "loss": 0.1052, "step": 6080 }, { "epoch": 1.9705119896305896, "grad_norm": 0.761111319065094, "learning_rate": 1.392383006912721e-06, "loss": 0.1038, "step": 6081 }, { "epoch": 1.9708360337005832, "grad_norm": 0.8409186005592346, "learning_rate": 1.3915989897825424e-06, "loss": 0.1129, "step": 6082 }, { "epoch": 1.971160077770577, "grad_norm": 0.7887939214706421, "learning_rate": 1.3908151083099195e-06, "loss": 0.1089, "step": 6083 }, { "epoch": 1.9714841218405703, "grad_norm": 0.8472685217857361, "learning_rate": 1.3900313625907886e-06, "loss": 0.116, "step": 6084 }, { "epoch": 1.9718081659105637, "grad_norm": 0.8228740096092224, "learning_rate": 1.3892477527210734e-06, "loss": 0.1149, "step": 6085 }, { "epoch": 1.9721322099805574, "grad_norm": 0.8421080708503723, "learning_rate": 1.3884642787966806e-06, "loss": 0.1144, "step": 6086 }, { "epoch": 1.972456254050551, "grad_norm": 0.8418703675270081, "learning_rate": 1.3876809409134994e-06, "loss": 0.1235, "step": 6087 }, { "epoch": 1.9727802981205445, "grad_norm": 0.7663489580154419, "learning_rate": 1.3868977391674033e-06, "loss": 0.1077, "step": 6088 }, { "epoch": 1.9731043421905379, "grad_norm": 0.7076537013053894, "learning_rate": 1.386114673654248e-06, "loss": 0.1008, "step": 6089 }, { "epoch": 1.9734283862605313, "grad_norm": 0.904552698135376, "learning_rate": 1.3853317444698744e-06, "loss": 0.1294, "step": 6090 }, { "epoch": 1.973752430330525, "grad_norm": 0.8387356400489807, "learning_rate": 1.3845489517101036e-06, "loss": 0.1183, "step": 6091 }, { "epoch": 1.9740764744005186, "grad_norm": 0.824189305305481, "learning_rate": 1.3837662954707426e-06, "loss": 0.1127, "step": 6092 }, { "epoch": 1.974400518470512, "grad_norm": 0.8098284006118774, "learning_rate": 1.3829837758475808e-06, "loss": 0.113, "step": 6093 }, { "epoch": 1.9747245625405054, "grad_norm": 0.752780020236969, "learning_rate": 1.3822013929363914e-06, "loss": 0.1019, "step": 6094 }, { "epoch": 1.975048606610499, "grad_norm": 0.8036346435546875, "learning_rate": 1.3814191468329307e-06, "loss": 0.1091, "step": 6095 }, { "epoch": 1.9753726506804925, "grad_norm": 0.8747115135192871, "learning_rate": 1.3806370376329388e-06, "loss": 0.1105, "step": 6096 }, { "epoch": 1.9756966947504861, "grad_norm": 0.8383525013923645, "learning_rate": 1.3798550654321347e-06, "loss": 0.1199, "step": 6097 }, { "epoch": 1.9760207388204796, "grad_norm": 0.852014422416687, "learning_rate": 1.379073230326229e-06, "loss": 0.1255, "step": 6098 }, { "epoch": 1.976344782890473, "grad_norm": 0.8316348791122437, "learning_rate": 1.3782915324109075e-06, "loss": 0.1131, "step": 6099 }, { "epoch": 1.9766688269604666, "grad_norm": 0.8360430598258972, "learning_rate": 1.3775099717818432e-06, "loss": 0.1112, "step": 6100 }, { "epoch": 1.9769928710304603, "grad_norm": 0.8059060573577881, "learning_rate": 1.376728548534692e-06, "loss": 0.1118, "step": 6101 }, { "epoch": 1.9773169151004537, "grad_norm": 0.8584257364273071, "learning_rate": 1.3759472627650926e-06, "loss": 0.117, "step": 6102 }, { "epoch": 1.9776409591704471, "grad_norm": 0.8232131004333496, "learning_rate": 1.3751661145686673e-06, "loss": 0.1163, "step": 6103 }, { "epoch": 1.9779650032404406, "grad_norm": 0.770917534828186, "learning_rate": 1.3743851040410183e-06, "loss": 0.1104, "step": 6104 }, { "epoch": 1.9782890473104342, "grad_norm": 0.7977389097213745, "learning_rate": 1.3736042312777381e-06, "loss": 0.1052, "step": 6105 }, { "epoch": 1.9786130913804278, "grad_norm": 0.7602249979972839, "learning_rate": 1.3728234963743931e-06, "loss": 0.1044, "step": 6106 }, { "epoch": 1.9789371354504213, "grad_norm": 0.7684023380279541, "learning_rate": 1.3720428994265427e-06, "loss": 0.1059, "step": 6107 }, { "epoch": 1.9792611795204147, "grad_norm": 0.8109222054481506, "learning_rate": 1.3712624405297209e-06, "loss": 0.1185, "step": 6108 }, { "epoch": 1.9795852235904083, "grad_norm": 0.7657626271247864, "learning_rate": 1.3704821197794491e-06, "loss": 0.1007, "step": 6109 }, { "epoch": 1.9799092676604018, "grad_norm": 0.831357479095459, "learning_rate": 1.369701937271231e-06, "loss": 0.1095, "step": 6110 }, { "epoch": 1.9802333117303954, "grad_norm": 0.832591712474823, "learning_rate": 1.3689218931005543e-06, "loss": 0.1175, "step": 6111 }, { "epoch": 1.9805573558003888, "grad_norm": 0.7991769313812256, "learning_rate": 1.368141987362889e-06, "loss": 0.1078, "step": 6112 }, { "epoch": 1.9808813998703823, "grad_norm": 0.8150991797447205, "learning_rate": 1.3673622201536852e-06, "loss": 0.1111, "step": 6113 }, { "epoch": 1.981205443940376, "grad_norm": 0.849617600440979, "learning_rate": 1.3665825915683829e-06, "loss": 0.1197, "step": 6114 }, { "epoch": 1.9815294880103695, "grad_norm": 0.8102957606315613, "learning_rate": 1.3658031017023977e-06, "loss": 0.1076, "step": 6115 }, { "epoch": 1.981853532080363, "grad_norm": 0.8073728680610657, "learning_rate": 1.3650237506511333e-06, "loss": 0.1071, "step": 6116 }, { "epoch": 1.9821775761503564, "grad_norm": 0.7590360045433044, "learning_rate": 1.3642445385099746e-06, "loss": 0.1061, "step": 6117 }, { "epoch": 1.9825016202203498, "grad_norm": 0.7506588101387024, "learning_rate": 1.363465465374289e-06, "loss": 0.0982, "step": 6118 }, { "epoch": 1.9828256642903435, "grad_norm": 0.8684452772140503, "learning_rate": 1.362686531339428e-06, "loss": 0.1182, "step": 6119 }, { "epoch": 1.983149708360337, "grad_norm": 0.8147026896476746, "learning_rate": 1.3619077365007266e-06, "loss": 0.1093, "step": 6120 }, { "epoch": 1.9834737524303305, "grad_norm": 0.796436607837677, "learning_rate": 1.3611290809534997e-06, "loss": 0.1095, "step": 6121 }, { "epoch": 1.983797796500324, "grad_norm": 0.7356460094451904, "learning_rate": 1.3603505647930481e-06, "loss": 0.1034, "step": 6122 }, { "epoch": 1.9841218405703176, "grad_norm": 0.7705609202384949, "learning_rate": 1.3595721881146548e-06, "loss": 0.1086, "step": 6123 }, { "epoch": 1.9844458846403112, "grad_norm": 0.7775025963783264, "learning_rate": 1.3587939510135856e-06, "loss": 0.1098, "step": 6124 }, { "epoch": 1.9847699287103047, "grad_norm": 0.8204125761985779, "learning_rate": 1.3580158535850884e-06, "loss": 0.1138, "step": 6125 }, { "epoch": 1.985093972780298, "grad_norm": 0.7840316891670227, "learning_rate": 1.357237895924396e-06, "loss": 0.1069, "step": 6126 }, { "epoch": 1.9854180168502915, "grad_norm": 0.730146586894989, "learning_rate": 1.3564600781267234e-06, "loss": 0.1001, "step": 6127 }, { "epoch": 1.9857420609202852, "grad_norm": 0.776727557182312, "learning_rate": 1.3556824002872648e-06, "loss": 0.109, "step": 6128 }, { "epoch": 1.9860661049902788, "grad_norm": 0.8463088274002075, "learning_rate": 1.3549048625012046e-06, "loss": 0.1204, "step": 6129 }, { "epoch": 1.9863901490602722, "grad_norm": 0.7711324691772461, "learning_rate": 1.354127464863703e-06, "loss": 0.1085, "step": 6130 }, { "epoch": 1.9867141931302656, "grad_norm": 0.8014845848083496, "learning_rate": 1.3533502074699065e-06, "loss": 0.1058, "step": 6131 }, { "epoch": 1.987038237200259, "grad_norm": 0.8500660061836243, "learning_rate": 1.3525730904149443e-06, "loss": 0.1129, "step": 6132 }, { "epoch": 1.9873622812702527, "grad_norm": 0.7671992778778076, "learning_rate": 1.351796113793928e-06, "loss": 0.1037, "step": 6133 }, { "epoch": 1.9876863253402464, "grad_norm": 0.7462871074676514, "learning_rate": 1.3510192777019527e-06, "loss": 0.1031, "step": 6134 }, { "epoch": 1.9880103694102398, "grad_norm": 0.7898198366165161, "learning_rate": 1.3502425822340925e-06, "loss": 0.1099, "step": 6135 }, { "epoch": 1.9883344134802332, "grad_norm": 0.8464086651802063, "learning_rate": 1.3494660274854122e-06, "loss": 0.1139, "step": 6136 }, { "epoch": 1.9886584575502269, "grad_norm": 0.7823293209075928, "learning_rate": 1.3486896135509503e-06, "loss": 0.1068, "step": 6137 }, { "epoch": 1.9889825016202205, "grad_norm": 0.7849881052970886, "learning_rate": 1.3479133405257355e-06, "loss": 0.1032, "step": 6138 }, { "epoch": 1.989306545690214, "grad_norm": 0.7832697629928589, "learning_rate": 1.3471372085047743e-06, "loss": 0.1061, "step": 6139 }, { "epoch": 1.9896305897602073, "grad_norm": 0.8261182308197021, "learning_rate": 1.3463612175830578e-06, "loss": 0.1112, "step": 6140 }, { "epoch": 1.9899546338302008, "grad_norm": 0.777273952960968, "learning_rate": 1.3455853678555605e-06, "loss": 0.1028, "step": 6141 }, { "epoch": 1.9902786779001944, "grad_norm": 0.7343865036964417, "learning_rate": 1.3448096594172383e-06, "loss": 0.0994, "step": 6142 }, { "epoch": 1.990602721970188, "grad_norm": 0.8520564436912537, "learning_rate": 1.344034092363032e-06, "loss": 0.1064, "step": 6143 }, { "epoch": 1.9909267660401815, "grad_norm": 0.7399676442146301, "learning_rate": 1.343258666787861e-06, "loss": 0.1009, "step": 6144 }, { "epoch": 1.991250810110175, "grad_norm": 0.8225117325782776, "learning_rate": 1.3424833827866312e-06, "loss": 0.1151, "step": 6145 }, { "epoch": 1.9915748541801686, "grad_norm": 0.8109341859817505, "learning_rate": 1.3417082404542295e-06, "loss": 0.1099, "step": 6146 }, { "epoch": 1.991898898250162, "grad_norm": 0.795943021774292, "learning_rate": 1.3409332398855263e-06, "loss": 0.107, "step": 6147 }, { "epoch": 1.9922229423201556, "grad_norm": 0.797545313835144, "learning_rate": 1.3401583811753735e-06, "loss": 0.1094, "step": 6148 }, { "epoch": 1.992546986390149, "grad_norm": 0.7752687931060791, "learning_rate": 1.339383664418607e-06, "loss": 0.0992, "step": 6149 }, { "epoch": 1.9928710304601425, "grad_norm": 0.7549872398376465, "learning_rate": 1.3386090897100442e-06, "loss": 0.0952, "step": 6150 }, { "epoch": 1.9931950745301361, "grad_norm": 0.7405936121940613, "learning_rate": 1.3378346571444866e-06, "loss": 0.1015, "step": 6151 }, { "epoch": 1.9935191186001298, "grad_norm": 0.7260972261428833, "learning_rate": 1.3370603668167156e-06, "loss": 0.1032, "step": 6152 }, { "epoch": 1.9938431626701232, "grad_norm": 0.8044086694717407, "learning_rate": 1.3362862188214977e-06, "loss": 0.1104, "step": 6153 }, { "epoch": 1.9941672067401166, "grad_norm": 0.7475235462188721, "learning_rate": 1.3355122132535806e-06, "loss": 0.1031, "step": 6154 }, { "epoch": 1.99449125081011, "grad_norm": 0.7913709282875061, "learning_rate": 1.3347383502076955e-06, "loss": 0.1115, "step": 6155 }, { "epoch": 1.9948152948801037, "grad_norm": 0.8162972927093506, "learning_rate": 1.333964629778556e-06, "loss": 0.1131, "step": 6156 }, { "epoch": 1.9951393389500973, "grad_norm": 0.8377811908721924, "learning_rate": 1.3331910520608576e-06, "loss": 0.1199, "step": 6157 }, { "epoch": 1.9954633830200907, "grad_norm": 0.8220179677009583, "learning_rate": 1.3324176171492798e-06, "loss": 0.117, "step": 6158 }, { "epoch": 1.9957874270900842, "grad_norm": 0.8617610335350037, "learning_rate": 1.3316443251384808e-06, "loss": 0.1204, "step": 6159 }, { "epoch": 1.9961114711600778, "grad_norm": 0.8488910794258118, "learning_rate": 1.3308711761231074e-06, "loss": 0.1157, "step": 6160 }, { "epoch": 1.9964355152300715, "grad_norm": 0.7747528553009033, "learning_rate": 1.3300981701977834e-06, "loss": 0.105, "step": 6161 }, { "epoch": 1.9967595593000649, "grad_norm": 0.796110212802887, "learning_rate": 1.3293253074571178e-06, "loss": 0.1033, "step": 6162 }, { "epoch": 1.9970836033700583, "grad_norm": 0.8537262082099915, "learning_rate": 1.3285525879957011e-06, "loss": 0.1172, "step": 6163 }, { "epoch": 1.9974076474400517, "grad_norm": 0.7868740558624268, "learning_rate": 1.3277800119081077e-06, "loss": 0.1106, "step": 6164 }, { "epoch": 1.9977316915100454, "grad_norm": 0.8078975677490234, "learning_rate": 1.3270075792888937e-06, "loss": 0.113, "step": 6165 }, { "epoch": 1.998055735580039, "grad_norm": 0.8577303886413574, "learning_rate": 1.3262352902325944e-06, "loss": 0.1162, "step": 6166 }, { "epoch": 1.9983797796500324, "grad_norm": 0.7342814803123474, "learning_rate": 1.325463144833735e-06, "loss": 0.1032, "step": 6167 }, { "epoch": 1.9987038237200259, "grad_norm": 0.7626562118530273, "learning_rate": 1.324691143186814e-06, "loss": 0.1019, "step": 6168 }, { "epoch": 1.9990278677900193, "grad_norm": 0.829145073890686, "learning_rate": 1.323919285386321e-06, "loss": 0.1126, "step": 6169 }, { "epoch": 1.999351911860013, "grad_norm": 0.7580152153968811, "learning_rate": 1.3231475715267217e-06, "loss": 0.11, "step": 6170 }, { "epoch": 1.9996759559300066, "grad_norm": 0.7596506476402283, "learning_rate": 1.3223760017024661e-06, "loss": 0.1, "step": 6171 }, { "epoch": 2.0, "grad_norm": 0.8046398758888245, "learning_rate": 1.3216045760079882e-06, "loss": 0.1231, "step": 6172 }, { "epoch": 2.0003240440699934, "grad_norm": 0.6925712823867798, "learning_rate": 1.3208332945377022e-06, "loss": 0.0823, "step": 6173 }, { "epoch": 2.000648088139987, "grad_norm": 0.6641772985458374, "learning_rate": 1.3200621573860068e-06, "loss": 0.0814, "step": 6174 }, { "epoch": 2.0009721322099807, "grad_norm": 0.6670467257499695, "learning_rate": 1.3192911646472796e-06, "loss": 0.0787, "step": 6175 }, { "epoch": 2.001296176279974, "grad_norm": 0.6554123759269714, "learning_rate": 1.3185203164158838e-06, "loss": 0.0778, "step": 6176 }, { "epoch": 2.0016202203499676, "grad_norm": 0.7232511639595032, "learning_rate": 1.3177496127861635e-06, "loss": 0.0843, "step": 6177 }, { "epoch": 2.001944264419961, "grad_norm": 0.6921700835227966, "learning_rate": 1.3169790538524457e-06, "loss": 0.0815, "step": 6178 }, { "epoch": 2.002268308489955, "grad_norm": 0.6646423935890198, "learning_rate": 1.316208639709039e-06, "loss": 0.0795, "step": 6179 }, { "epoch": 2.0025923525599483, "grad_norm": 0.6958689093589783, "learning_rate": 1.3154383704502349e-06, "loss": 0.082, "step": 6180 }, { "epoch": 2.0029163966299417, "grad_norm": 0.6785232424736023, "learning_rate": 1.3146682461703069e-06, "loss": 0.0738, "step": 6181 }, { "epoch": 2.003240440699935, "grad_norm": 0.6926654577255249, "learning_rate": 1.3138982669635117e-06, "loss": 0.0751, "step": 6182 }, { "epoch": 2.0035644847699285, "grad_norm": 0.6905853152275085, "learning_rate": 1.313128432924084e-06, "loss": 0.0783, "step": 6183 }, { "epoch": 2.0038885288399224, "grad_norm": 0.731368362903595, "learning_rate": 1.3123587441462487e-06, "loss": 0.0816, "step": 6184 }, { "epoch": 2.004212572909916, "grad_norm": 0.7648592591285706, "learning_rate": 1.3115892007242046e-06, "loss": 0.0748, "step": 6185 }, { "epoch": 2.0045366169799093, "grad_norm": 0.706275999546051, "learning_rate": 1.3108198027521374e-06, "loss": 0.0732, "step": 6186 }, { "epoch": 2.0048606610499027, "grad_norm": 0.7441841959953308, "learning_rate": 1.3100505503242156e-06, "loss": 0.0762, "step": 6187 }, { "epoch": 2.005184705119896, "grad_norm": 0.7099791765213013, "learning_rate": 1.3092814435345845e-06, "loss": 0.0733, "step": 6188 }, { "epoch": 2.00550874918989, "grad_norm": 0.781180739402771, "learning_rate": 1.3085124824773797e-06, "loss": 0.0818, "step": 6189 }, { "epoch": 2.0058327932598834, "grad_norm": 0.7897968888282776, "learning_rate": 1.307743667246711e-06, "loss": 0.084, "step": 6190 }, { "epoch": 2.006156837329877, "grad_norm": 0.7949253916740417, "learning_rate": 1.306974997936677e-06, "loss": 0.0843, "step": 6191 }, { "epoch": 2.0064808813998702, "grad_norm": 0.8092707395553589, "learning_rate": 1.3062064746413522e-06, "loss": 0.0783, "step": 6192 }, { "epoch": 2.006804925469864, "grad_norm": 0.9016462564468384, "learning_rate": 1.3054380974547998e-06, "loss": 0.0845, "step": 6193 }, { "epoch": 2.0071289695398575, "grad_norm": 0.8049711585044861, "learning_rate": 1.3046698664710595e-06, "loss": 0.076, "step": 6194 }, { "epoch": 2.007453013609851, "grad_norm": 0.8476788401603699, "learning_rate": 1.3039017817841553e-06, "loss": 0.0785, "step": 6195 }, { "epoch": 2.0077770576798444, "grad_norm": 0.860939621925354, "learning_rate": 1.3031338434880952e-06, "loss": 0.0837, "step": 6196 }, { "epoch": 2.008101101749838, "grad_norm": 0.8430168032646179, "learning_rate": 1.3023660516768638e-06, "loss": 0.088, "step": 6197 }, { "epoch": 2.0084251458198317, "grad_norm": 0.8520883917808533, "learning_rate": 1.301598406444436e-06, "loss": 0.0808, "step": 6198 }, { "epoch": 2.008749189889825, "grad_norm": 0.8094761967658997, "learning_rate": 1.3008309078847605e-06, "loss": 0.0755, "step": 6199 }, { "epoch": 2.0090732339598185, "grad_norm": 0.8996002674102783, "learning_rate": 1.3000635560917735e-06, "loss": 0.0789, "step": 6200 }, { "epoch": 2.009397278029812, "grad_norm": 0.8273560404777527, "learning_rate": 1.2992963511593904e-06, "loss": 0.0801, "step": 6201 }, { "epoch": 2.0097213220998054, "grad_norm": 0.9030359983444214, "learning_rate": 1.2985292931815105e-06, "loss": 0.0764, "step": 6202 }, { "epoch": 2.0100453661697992, "grad_norm": 0.9174606800079346, "learning_rate": 1.2977623822520141e-06, "loss": 0.083, "step": 6203 }, { "epoch": 2.0103694102397927, "grad_norm": 0.8109633326530457, "learning_rate": 1.296995618464763e-06, "loss": 0.0764, "step": 6204 }, { "epoch": 2.010693454309786, "grad_norm": 0.7600975632667542, "learning_rate": 1.2962290019136028e-06, "loss": 0.0746, "step": 6205 }, { "epoch": 2.0110174983797795, "grad_norm": 0.7989012002944946, "learning_rate": 1.2954625326923602e-06, "loss": 0.0745, "step": 6206 }, { "epoch": 2.0113415424497734, "grad_norm": 0.8223599195480347, "learning_rate": 1.294696210894842e-06, "loss": 0.08, "step": 6207 }, { "epoch": 2.011665586519767, "grad_norm": 0.786533772945404, "learning_rate": 1.2939300366148389e-06, "loss": 0.0758, "step": 6208 }, { "epoch": 2.01198963058976, "grad_norm": 0.913048505783081, "learning_rate": 1.2931640099461237e-06, "loss": 0.0861, "step": 6209 }, { "epoch": 2.0123136746597536, "grad_norm": 0.8590562343597412, "learning_rate": 1.2923981309824507e-06, "loss": 0.0855, "step": 6210 }, { "epoch": 2.012637718729747, "grad_norm": 0.819560706615448, "learning_rate": 1.291632399817557e-06, "loss": 0.0767, "step": 6211 }, { "epoch": 2.012961762799741, "grad_norm": 0.797325849533081, "learning_rate": 1.2908668165451577e-06, "loss": 0.0761, "step": 6212 }, { "epoch": 2.0132858068697344, "grad_norm": 0.8787040710449219, "learning_rate": 1.290101381258957e-06, "loss": 0.0814, "step": 6213 }, { "epoch": 2.0136098509397278, "grad_norm": 0.823753297328949, "learning_rate": 1.289336094052632e-06, "loss": 0.0816, "step": 6214 }, { "epoch": 2.013933895009721, "grad_norm": 0.8768038749694824, "learning_rate": 1.288570955019851e-06, "loss": 0.0862, "step": 6215 }, { "epoch": 2.0142579390797146, "grad_norm": 0.7695865035057068, "learning_rate": 1.2878059642542566e-06, "loss": 0.0798, "step": 6216 }, { "epoch": 2.0145819831497085, "grad_norm": 0.7729963064193726, "learning_rate": 1.2870411218494778e-06, "loss": 0.074, "step": 6217 }, { "epoch": 2.014906027219702, "grad_norm": 0.8531014919281006, "learning_rate": 1.2862764278991236e-06, "loss": 0.0798, "step": 6218 }, { "epoch": 2.0152300712896953, "grad_norm": 0.8081734776496887, "learning_rate": 1.2855118824967833e-06, "loss": 0.0779, "step": 6219 }, { "epoch": 2.0155541153596888, "grad_norm": 0.8807827830314636, "learning_rate": 1.2847474857360332e-06, "loss": 0.0787, "step": 6220 }, { "epoch": 2.0158781594296826, "grad_norm": 0.8906676769256592, "learning_rate": 1.2839832377104245e-06, "loss": 0.0909, "step": 6221 }, { "epoch": 2.016202203499676, "grad_norm": 0.7742086052894592, "learning_rate": 1.2832191385134972e-06, "loss": 0.0733, "step": 6222 }, { "epoch": 2.0165262475696695, "grad_norm": 0.917332112789154, "learning_rate": 1.2824551882387664e-06, "loss": 0.0847, "step": 6223 }, { "epoch": 2.016850291639663, "grad_norm": 0.787459135055542, "learning_rate": 1.2816913869797353e-06, "loss": 0.0773, "step": 6224 }, { "epoch": 2.0171743357096563, "grad_norm": 0.7695854902267456, "learning_rate": 1.2809277348298838e-06, "loss": 0.0735, "step": 6225 }, { "epoch": 2.01749837977965, "grad_norm": 0.8414837718009949, "learning_rate": 1.2801642318826759e-06, "loss": 0.0858, "step": 6226 }, { "epoch": 2.0178224238496436, "grad_norm": 0.7882809042930603, "learning_rate": 1.279400878231557e-06, "loss": 0.077, "step": 6227 }, { "epoch": 2.018146467919637, "grad_norm": 0.883886992931366, "learning_rate": 1.2786376739699547e-06, "loss": 0.0865, "step": 6228 }, { "epoch": 2.0184705119896305, "grad_norm": 0.7620802521705627, "learning_rate": 1.2778746191912778e-06, "loss": 0.0714, "step": 6229 }, { "epoch": 2.0187945560596243, "grad_norm": 0.7754296064376831, "learning_rate": 1.2771117139889155e-06, "loss": 0.0763, "step": 6230 }, { "epoch": 2.0191186001296177, "grad_norm": 0.8185173869132996, "learning_rate": 1.276348958456241e-06, "loss": 0.0796, "step": 6231 }, { "epoch": 2.019442644199611, "grad_norm": 0.8952303528785706, "learning_rate": 1.2755863526866087e-06, "loss": 0.0823, "step": 6232 }, { "epoch": 2.0197666882696046, "grad_norm": 0.8771202564239502, "learning_rate": 1.2748238967733529e-06, "loss": 0.0848, "step": 6233 }, { "epoch": 2.020090732339598, "grad_norm": 0.8356162309646606, "learning_rate": 1.2740615908097915e-06, "loss": 0.077, "step": 6234 }, { "epoch": 2.020414776409592, "grad_norm": 0.8438013195991516, "learning_rate": 1.2732994348892237e-06, "loss": 0.0797, "step": 6235 }, { "epoch": 2.0207388204795853, "grad_norm": 0.8168169856071472, "learning_rate": 1.2725374291049296e-06, "loss": 0.0781, "step": 6236 }, { "epoch": 2.0210628645495787, "grad_norm": 0.8462203741073608, "learning_rate": 1.2717755735501725e-06, "loss": 0.0829, "step": 6237 }, { "epoch": 2.021386908619572, "grad_norm": 0.8509506583213806, "learning_rate": 1.2710138683181937e-06, "loss": 0.079, "step": 6238 }, { "epoch": 2.0217109526895656, "grad_norm": 0.808458149433136, "learning_rate": 1.2702523135022205e-06, "loss": 0.072, "step": 6239 }, { "epoch": 2.0220349967595594, "grad_norm": 0.8539109826087952, "learning_rate": 1.2694909091954588e-06, "loss": 0.0844, "step": 6240 }, { "epoch": 2.022359040829553, "grad_norm": 0.9025552272796631, "learning_rate": 1.2687296554910978e-06, "loss": 0.0766, "step": 6241 }, { "epoch": 2.0226830848995463, "grad_norm": 0.8735927939414978, "learning_rate": 1.2679685524823082e-06, "loss": 0.078, "step": 6242 }, { "epoch": 2.0230071289695397, "grad_norm": 0.8082770705223083, "learning_rate": 1.2672076002622386e-06, "loss": 0.0741, "step": 6243 }, { "epoch": 2.0233311730395336, "grad_norm": 0.8505511283874512, "learning_rate": 1.2664467989240265e-06, "loss": 0.0788, "step": 6244 }, { "epoch": 2.023655217109527, "grad_norm": 0.854013979434967, "learning_rate": 1.2656861485607828e-06, "loss": 0.0793, "step": 6245 }, { "epoch": 2.0239792611795204, "grad_norm": 0.9275755882263184, "learning_rate": 1.264925649265607e-06, "loss": 0.081, "step": 6246 }, { "epoch": 2.024303305249514, "grad_norm": 0.8417896032333374, "learning_rate": 1.2641653011315746e-06, "loss": 0.0758, "step": 6247 }, { "epoch": 2.0246273493195073, "grad_norm": 0.8762497901916504, "learning_rate": 1.2634051042517453e-06, "loss": 0.0795, "step": 6248 }, { "epoch": 2.024951393389501, "grad_norm": 0.861675500869751, "learning_rate": 1.2626450587191602e-06, "loss": 0.0775, "step": 6249 }, { "epoch": 2.0252754374594946, "grad_norm": 0.9877171516418457, "learning_rate": 1.2618851646268416e-06, "loss": 0.0827, "step": 6250 }, { "epoch": 2.025599481529488, "grad_norm": 0.8101239800453186, "learning_rate": 1.2611254220677937e-06, "loss": 0.0698, "step": 6251 }, { "epoch": 2.0259235255994814, "grad_norm": 0.7930020689964294, "learning_rate": 1.260365831134999e-06, "loss": 0.0721, "step": 6252 }, { "epoch": 2.026247569669475, "grad_norm": 0.9465885162353516, "learning_rate": 1.259606391921428e-06, "loss": 0.0766, "step": 6253 }, { "epoch": 2.0265716137394687, "grad_norm": 0.8918383717536926, "learning_rate": 1.2588471045200256e-06, "loss": 0.0717, "step": 6254 }, { "epoch": 2.026895657809462, "grad_norm": 0.8400719165802002, "learning_rate": 1.2580879690237224e-06, "loss": 0.0785, "step": 6255 }, { "epoch": 2.0272197018794555, "grad_norm": 0.854532778263092, "learning_rate": 1.257328985525429e-06, "loss": 0.0799, "step": 6256 }, { "epoch": 2.027543745949449, "grad_norm": 0.8931845426559448, "learning_rate": 1.256570154118038e-06, "loss": 0.0827, "step": 6257 }, { "epoch": 2.027867790019443, "grad_norm": 0.9417962431907654, "learning_rate": 1.2558114748944226e-06, "loss": 0.0821, "step": 6258 }, { "epoch": 2.0281918340894363, "grad_norm": 0.8137034773826599, "learning_rate": 1.2550529479474383e-06, "loss": 0.0775, "step": 6259 }, { "epoch": 2.0285158781594297, "grad_norm": 0.8253341913223267, "learning_rate": 1.2542945733699216e-06, "loss": 0.0745, "step": 6260 }, { "epoch": 2.028839922229423, "grad_norm": 0.9062314629554749, "learning_rate": 1.2535363512546892e-06, "loss": 0.0814, "step": 6261 }, { "epoch": 2.0291639662994165, "grad_norm": 0.7367941737174988, "learning_rate": 1.2527782816945405e-06, "loss": 0.0683, "step": 6262 }, { "epoch": 2.0294880103694104, "grad_norm": 0.8357585668563843, "learning_rate": 1.2520203647822563e-06, "loss": 0.0791, "step": 6263 }, { "epoch": 2.029812054439404, "grad_norm": 0.9064425826072693, "learning_rate": 1.2512626006105977e-06, "loss": 0.086, "step": 6264 }, { "epoch": 2.0301360985093972, "grad_norm": 0.7778387665748596, "learning_rate": 1.2505049892723083e-06, "loss": 0.0769, "step": 6265 }, { "epoch": 2.0304601425793907, "grad_norm": 0.8687406778335571, "learning_rate": 1.2497475308601134e-06, "loss": 0.0844, "step": 6266 }, { "epoch": 2.0307841866493845, "grad_norm": 0.8010364174842834, "learning_rate": 1.248990225466715e-06, "loss": 0.077, "step": 6267 }, { "epoch": 2.031108230719378, "grad_norm": 0.8573675751686096, "learning_rate": 1.2482330731848044e-06, "loss": 0.0789, "step": 6268 }, { "epoch": 2.0314322747893714, "grad_norm": 0.8595082759857178, "learning_rate": 1.2474760741070465e-06, "loss": 0.0807, "step": 6269 }, { "epoch": 2.031756318859365, "grad_norm": 0.9239386320114136, "learning_rate": 1.246719228326092e-06, "loss": 0.082, "step": 6270 }, { "epoch": 2.0320803629293582, "grad_norm": 0.8387404680252075, "learning_rate": 1.2459625359345712e-06, "loss": 0.0746, "step": 6271 }, { "epoch": 2.032404406999352, "grad_norm": 0.8717005848884583, "learning_rate": 1.2452059970250957e-06, "loss": 0.0818, "step": 6272 }, { "epoch": 2.0327284510693455, "grad_norm": 0.9566702842712402, "learning_rate": 1.2444496116902602e-06, "loss": 0.0867, "step": 6273 }, { "epoch": 2.033052495139339, "grad_norm": 0.7944853901863098, "learning_rate": 1.2436933800226352e-06, "loss": 0.0757, "step": 6274 }, { "epoch": 2.0333765392093324, "grad_norm": 0.9047906398773193, "learning_rate": 1.2429373021147808e-06, "loss": 0.0883, "step": 6275 }, { "epoch": 2.033700583279326, "grad_norm": 0.8332332372665405, "learning_rate": 1.2421813780592294e-06, "loss": 0.0774, "step": 6276 }, { "epoch": 2.0340246273493197, "grad_norm": 0.8721747398376465, "learning_rate": 1.2414256079485021e-06, "loss": 0.0821, "step": 6277 }, { "epoch": 2.034348671419313, "grad_norm": 0.9223491549491882, "learning_rate": 1.240669991875096e-06, "loss": 0.0817, "step": 6278 }, { "epoch": 2.0346727154893065, "grad_norm": 0.8711641430854797, "learning_rate": 1.2399145299314913e-06, "loss": 0.0867, "step": 6279 }, { "epoch": 2.0349967595593, "grad_norm": 0.8048623204231262, "learning_rate": 1.2391592222101497e-06, "loss": 0.0727, "step": 6280 }, { "epoch": 2.035320803629294, "grad_norm": 0.8184431195259094, "learning_rate": 1.2384040688035135e-06, "loss": 0.0767, "step": 6281 }, { "epoch": 2.035644847699287, "grad_norm": 0.7848328948020935, "learning_rate": 1.2376490698040069e-06, "loss": 0.0702, "step": 6282 }, { "epoch": 2.0359688917692806, "grad_norm": 0.8464064002037048, "learning_rate": 1.236894225304032e-06, "loss": 0.0756, "step": 6283 }, { "epoch": 2.036292935839274, "grad_norm": 0.8497661352157593, "learning_rate": 1.2361395353959776e-06, "loss": 0.0767, "step": 6284 }, { "epoch": 2.0366169799092675, "grad_norm": 0.8617527484893799, "learning_rate": 1.2353850001722084e-06, "loss": 0.0808, "step": 6285 }, { "epoch": 2.0369410239792614, "grad_norm": 0.8274610638618469, "learning_rate": 1.2346306197250727e-06, "loss": 0.0733, "step": 6286 }, { "epoch": 2.037265068049255, "grad_norm": 0.9231866002082825, "learning_rate": 1.2338763941468993e-06, "loss": 0.0873, "step": 6287 }, { "epoch": 2.037589112119248, "grad_norm": 0.9184801578521729, "learning_rate": 1.2331223235299983e-06, "loss": 0.0836, "step": 6288 }, { "epoch": 2.0379131561892416, "grad_norm": 0.7702471017837524, "learning_rate": 1.2323684079666604e-06, "loss": 0.0724, "step": 6289 }, { "epoch": 2.038237200259235, "grad_norm": 0.8048983812332153, "learning_rate": 1.2316146475491578e-06, "loss": 0.0746, "step": 6290 }, { "epoch": 2.038561244329229, "grad_norm": 0.8604027032852173, "learning_rate": 1.2308610423697446e-06, "loss": 0.0788, "step": 6291 }, { "epoch": 2.0388852883992223, "grad_norm": 0.8295606374740601, "learning_rate": 1.2301075925206524e-06, "loss": 0.0766, "step": 6292 }, { "epoch": 2.0392093324692158, "grad_norm": 0.7977343201637268, "learning_rate": 1.2293542980940974e-06, "loss": 0.0726, "step": 6293 }, { "epoch": 2.039533376539209, "grad_norm": 0.9230608940124512, "learning_rate": 1.2286011591822756e-06, "loss": 0.0883, "step": 6294 }, { "epoch": 2.039857420609203, "grad_norm": 0.8658449649810791, "learning_rate": 1.2278481758773636e-06, "loss": 0.0813, "step": 6295 }, { "epoch": 2.0401814646791965, "grad_norm": 0.857531726360321, "learning_rate": 1.2270953482715197e-06, "loss": 0.0796, "step": 6296 }, { "epoch": 2.04050550874919, "grad_norm": 0.9500970840454102, "learning_rate": 1.2263426764568835e-06, "loss": 0.0836, "step": 6297 }, { "epoch": 2.0408295528191833, "grad_norm": 0.8607531785964966, "learning_rate": 1.2255901605255715e-06, "loss": 0.0777, "step": 6298 }, { "epoch": 2.0411535968891767, "grad_norm": 0.8002095818519592, "learning_rate": 1.224837800569689e-06, "loss": 0.07, "step": 6299 }, { "epoch": 2.0414776409591706, "grad_norm": 0.9007039666175842, "learning_rate": 1.224085596681314e-06, "loss": 0.082, "step": 6300 }, { "epoch": 2.041801685029164, "grad_norm": 0.8380284309387207, "learning_rate": 1.22333354895251e-06, "loss": 0.0774, "step": 6301 }, { "epoch": 2.0421257290991575, "grad_norm": 0.8431714773178101, "learning_rate": 1.2225816574753208e-06, "loss": 0.0763, "step": 6302 }, { "epoch": 2.042449773169151, "grad_norm": 0.8380304574966431, "learning_rate": 1.2218299223417702e-06, "loss": 0.0738, "step": 6303 }, { "epoch": 2.0427738172391443, "grad_norm": 0.930404782295227, "learning_rate": 1.2210783436438644e-06, "loss": 0.0801, "step": 6304 }, { "epoch": 2.043097861309138, "grad_norm": 0.8345676064491272, "learning_rate": 1.2203269214735866e-06, "loss": 0.0766, "step": 6305 }, { "epoch": 2.0434219053791316, "grad_norm": 0.8367553949356079, "learning_rate": 1.2195756559229072e-06, "loss": 0.074, "step": 6306 }, { "epoch": 2.043745949449125, "grad_norm": 0.9327186346054077, "learning_rate": 1.2188245470837702e-06, "loss": 0.0865, "step": 6307 }, { "epoch": 2.0440699935191184, "grad_norm": 0.7962414622306824, "learning_rate": 1.218073595048108e-06, "loss": 0.0751, "step": 6308 }, { "epoch": 2.0443940375891123, "grad_norm": 0.8408752679824829, "learning_rate": 1.2173227999078264e-06, "loss": 0.0775, "step": 6309 }, { "epoch": 2.0447180816591057, "grad_norm": 0.8453505039215088, "learning_rate": 1.2165721617548172e-06, "loss": 0.0811, "step": 6310 }, { "epoch": 2.045042125729099, "grad_norm": 0.8683754801750183, "learning_rate": 1.2158216806809505e-06, "loss": 0.079, "step": 6311 }, { "epoch": 2.0453661697990926, "grad_norm": 0.8050029277801514, "learning_rate": 1.2150713567780786e-06, "loss": 0.0739, "step": 6312 }, { "epoch": 2.045690213869086, "grad_norm": 0.8375382423400879, "learning_rate": 1.2143211901380341e-06, "loss": 0.0755, "step": 6313 }, { "epoch": 2.04601425793908, "grad_norm": 0.8136559724807739, "learning_rate": 1.2135711808526282e-06, "loss": 0.0765, "step": 6314 }, { "epoch": 2.0463383020090733, "grad_norm": 0.8717918395996094, "learning_rate": 1.2128213290136578e-06, "loss": 0.0789, "step": 6315 }, { "epoch": 2.0466623460790667, "grad_norm": 0.8089046478271484, "learning_rate": 1.212071634712895e-06, "loss": 0.069, "step": 6316 }, { "epoch": 2.04698639014906, "grad_norm": 0.8941287398338318, "learning_rate": 1.211322098042096e-06, "loss": 0.0812, "step": 6317 }, { "epoch": 2.047310434219054, "grad_norm": 0.9220302700996399, "learning_rate": 1.2105727190929967e-06, "loss": 0.0816, "step": 6318 }, { "epoch": 2.0476344782890474, "grad_norm": 0.9743109345436096, "learning_rate": 1.209823497957314e-06, "loss": 0.0868, "step": 6319 }, { "epoch": 2.047958522359041, "grad_norm": 0.8121354579925537, "learning_rate": 1.2090744347267452e-06, "loss": 0.0762, "step": 6320 }, { "epoch": 2.0482825664290343, "grad_norm": 0.830605149269104, "learning_rate": 1.2083255294929697e-06, "loss": 0.0782, "step": 6321 }, { "epoch": 2.0486066104990277, "grad_norm": 0.9353241920471191, "learning_rate": 1.2075767823476439e-06, "loss": 0.0848, "step": 6322 }, { "epoch": 2.0489306545690216, "grad_norm": 1.0210410356521606, "learning_rate": 1.2068281933824084e-06, "loss": 0.0916, "step": 6323 }, { "epoch": 2.049254698639015, "grad_norm": 0.8150740265846252, "learning_rate": 1.2060797626888828e-06, "loss": 0.0701, "step": 6324 }, { "epoch": 2.0495787427090084, "grad_norm": 0.849547266960144, "learning_rate": 1.2053314903586685e-06, "loss": 0.0799, "step": 6325 }, { "epoch": 2.049902786779002, "grad_norm": 0.8484644889831543, "learning_rate": 1.2045833764833461e-06, "loss": 0.0792, "step": 6326 }, { "epoch": 2.0502268308489953, "grad_norm": 0.861136257648468, "learning_rate": 1.2038354211544781e-06, "loss": 0.0801, "step": 6327 }, { "epoch": 2.050550874918989, "grad_norm": 0.8917108178138733, "learning_rate": 1.2030876244636078e-06, "loss": 0.0771, "step": 6328 }, { "epoch": 2.0508749189889826, "grad_norm": 0.8093529343605042, "learning_rate": 1.202339986502255e-06, "loss": 0.0726, "step": 6329 }, { "epoch": 2.051198963058976, "grad_norm": 0.8013548254966736, "learning_rate": 1.2015925073619275e-06, "loss": 0.0736, "step": 6330 }, { "epoch": 2.0515230071289694, "grad_norm": 0.7830600142478943, "learning_rate": 1.2008451871341056e-06, "loss": 0.073, "step": 6331 }, { "epoch": 2.0518470511989633, "grad_norm": 0.8527432084083557, "learning_rate": 1.200098025910258e-06, "loss": 0.0811, "step": 6332 }, { "epoch": 2.0521710952689567, "grad_norm": 0.8012470006942749, "learning_rate": 1.1993510237818269e-06, "loss": 0.0738, "step": 6333 }, { "epoch": 2.05249513933895, "grad_norm": 0.8954493999481201, "learning_rate": 1.1986041808402393e-06, "loss": 0.0867, "step": 6334 }, { "epoch": 2.0528191834089435, "grad_norm": 0.8696557283401489, "learning_rate": 1.1978574971769025e-06, "loss": 0.082, "step": 6335 }, { "epoch": 2.053143227478937, "grad_norm": 0.7633064389228821, "learning_rate": 1.1971109728832003e-06, "loss": 0.074, "step": 6336 }, { "epoch": 2.053467271548931, "grad_norm": 1.0557372570037842, "learning_rate": 1.196364608050504e-06, "loss": 0.0797, "step": 6337 }, { "epoch": 2.0537913156189243, "grad_norm": 0.772982656955719, "learning_rate": 1.1956184027701576e-06, "loss": 0.0729, "step": 6338 }, { "epoch": 2.0541153596889177, "grad_norm": 0.8109322190284729, "learning_rate": 1.1948723571334932e-06, "loss": 0.076, "step": 6339 }, { "epoch": 2.054439403758911, "grad_norm": 0.870343804359436, "learning_rate": 1.1941264712318167e-06, "loss": 0.0764, "step": 6340 }, { "epoch": 2.0547634478289045, "grad_norm": 0.9311466813087463, "learning_rate": 1.1933807451564186e-06, "loss": 0.0773, "step": 6341 }, { "epoch": 2.0550874918988984, "grad_norm": 0.8491466641426086, "learning_rate": 1.192635178998568e-06, "loss": 0.0706, "step": 6342 }, { "epoch": 2.055411535968892, "grad_norm": 0.7975406050682068, "learning_rate": 1.191889772849515e-06, "loss": 0.0729, "step": 6343 }, { "epoch": 2.0557355800388852, "grad_norm": 0.8268863558769226, "learning_rate": 1.1911445268004917e-06, "loss": 0.077, "step": 6344 }, { "epoch": 2.0560596241088787, "grad_norm": 0.9013711214065552, "learning_rate": 1.1903994409427063e-06, "loss": 0.0801, "step": 6345 }, { "epoch": 2.0563836681788725, "grad_norm": 0.7981849908828735, "learning_rate": 1.1896545153673517e-06, "loss": 0.0741, "step": 6346 }, { "epoch": 2.056707712248866, "grad_norm": 0.8823484182357788, "learning_rate": 1.1889097501655991e-06, "loss": 0.0776, "step": 6347 }, { "epoch": 2.0570317563188594, "grad_norm": 0.9027769565582275, "learning_rate": 1.1881651454286008e-06, "loss": 0.0792, "step": 6348 }, { "epoch": 2.057355800388853, "grad_norm": 0.8280683755874634, "learning_rate": 1.1874207012474891e-06, "loss": 0.0769, "step": 6349 }, { "epoch": 2.057679844458846, "grad_norm": 0.8139511346817017, "learning_rate": 1.186676417713377e-06, "loss": 0.0754, "step": 6350 }, { "epoch": 2.05800388852884, "grad_norm": 0.8425285816192627, "learning_rate": 1.1859322949173572e-06, "loss": 0.0801, "step": 6351 }, { "epoch": 2.0583279325988335, "grad_norm": 0.9451056122779846, "learning_rate": 1.1851883329505043e-06, "loss": 0.0838, "step": 6352 }, { "epoch": 2.058651976668827, "grad_norm": 0.8416407704353333, "learning_rate": 1.1844445319038694e-06, "loss": 0.0758, "step": 6353 }, { "epoch": 2.0589760207388204, "grad_norm": 0.8476356267929077, "learning_rate": 1.18370089186849e-06, "loss": 0.0823, "step": 6354 }, { "epoch": 2.059300064808814, "grad_norm": 0.9370405077934265, "learning_rate": 1.1829574129353777e-06, "loss": 0.0773, "step": 6355 }, { "epoch": 2.0596241088788076, "grad_norm": 0.865960955619812, "learning_rate": 1.182214095195528e-06, "loss": 0.074, "step": 6356 }, { "epoch": 2.059948152948801, "grad_norm": 0.8611675500869751, "learning_rate": 1.181470938739917e-06, "loss": 0.0848, "step": 6357 }, { "epoch": 2.0602721970187945, "grad_norm": 0.9232220649719238, "learning_rate": 1.1807279436594967e-06, "loss": 0.0815, "step": 6358 }, { "epoch": 2.060596241088788, "grad_norm": 0.9129186272621155, "learning_rate": 1.1799851100452067e-06, "loss": 0.0798, "step": 6359 }, { "epoch": 2.060920285158782, "grad_norm": 0.8891286849975586, "learning_rate": 1.1792424379879582e-06, "loss": 0.081, "step": 6360 }, { "epoch": 2.061244329228775, "grad_norm": 0.8557460308074951, "learning_rate": 1.1784999275786515e-06, "loss": 0.0764, "step": 6361 }, { "epoch": 2.0615683732987686, "grad_norm": 0.8832924365997314, "learning_rate": 1.177757578908159e-06, "loss": 0.0825, "step": 6362 }, { "epoch": 2.061892417368762, "grad_norm": 0.859259843826294, "learning_rate": 1.17701539206734e-06, "loss": 0.0855, "step": 6363 }, { "epoch": 2.0622164614387555, "grad_norm": 0.804772138595581, "learning_rate": 1.1762733671470285e-06, "loss": 0.0733, "step": 6364 }, { "epoch": 2.0625405055087493, "grad_norm": 0.8585007190704346, "learning_rate": 1.1755315042380425e-06, "loss": 0.0821, "step": 6365 }, { "epoch": 2.0628645495787428, "grad_norm": 0.8764128684997559, "learning_rate": 1.1747898034311782e-06, "loss": 0.0857, "step": 6366 }, { "epoch": 2.063188593648736, "grad_norm": 0.858251690864563, "learning_rate": 1.1740482648172132e-06, "loss": 0.0817, "step": 6367 }, { "epoch": 2.0635126377187296, "grad_norm": 0.8610502481460571, "learning_rate": 1.1733068884869053e-06, "loss": 0.0709, "step": 6368 }, { "epoch": 2.0638366817887235, "grad_norm": 0.8864075541496277, "learning_rate": 1.172565674530989e-06, "loss": 0.0862, "step": 6369 }, { "epoch": 2.064160725858717, "grad_norm": 0.8807039260864258, "learning_rate": 1.1718246230401856e-06, "loss": 0.0837, "step": 6370 }, { "epoch": 2.0644847699287103, "grad_norm": 0.913631796836853, "learning_rate": 1.1710837341051892e-06, "loss": 0.0887, "step": 6371 }, { "epoch": 2.0648088139987038, "grad_norm": 0.8298909664154053, "learning_rate": 1.1703430078166792e-06, "loss": 0.0807, "step": 6372 }, { "epoch": 2.065132858068697, "grad_norm": 0.9504325985908508, "learning_rate": 1.169602444265313e-06, "loss": 0.0878, "step": 6373 }, { "epoch": 2.065456902138691, "grad_norm": 0.8303631544113159, "learning_rate": 1.168862043541728e-06, "loss": 0.0778, "step": 6374 }, { "epoch": 2.0657809462086845, "grad_norm": 0.860530436038971, "learning_rate": 1.1681218057365429e-06, "loss": 0.081, "step": 6375 }, { "epoch": 2.066104990278678, "grad_norm": 0.8380438685417175, "learning_rate": 1.167381730940356e-06, "loss": 0.0781, "step": 6376 }, { "epoch": 2.0664290343486713, "grad_norm": 0.8239679336547852, "learning_rate": 1.1666418192437434e-06, "loss": 0.0773, "step": 6377 }, { "epoch": 2.0667530784186647, "grad_norm": 0.8223795294761658, "learning_rate": 1.1659020707372643e-06, "loss": 0.0753, "step": 6378 }, { "epoch": 2.0670771224886586, "grad_norm": 0.7889320850372314, "learning_rate": 1.1651624855114565e-06, "loss": 0.0788, "step": 6379 }, { "epoch": 2.067401166558652, "grad_norm": 0.886277973651886, "learning_rate": 1.1644230636568384e-06, "loss": 0.0752, "step": 6380 }, { "epoch": 2.0677252106286454, "grad_norm": 0.8053793907165527, "learning_rate": 1.1636838052639081e-06, "loss": 0.0779, "step": 6381 }, { "epoch": 2.068049254698639, "grad_norm": 0.8326542377471924, "learning_rate": 1.1629447104231435e-06, "loss": 0.0796, "step": 6382 }, { "epoch": 2.0683732987686327, "grad_norm": 0.865424394607544, "learning_rate": 1.1622057792250033e-06, "loss": 0.0824, "step": 6383 }, { "epoch": 2.068697342838626, "grad_norm": 0.8067560791969299, "learning_rate": 1.1614670117599231e-06, "loss": 0.0732, "step": 6384 }, { "epoch": 2.0690213869086196, "grad_norm": 0.9387754201889038, "learning_rate": 1.1607284081183245e-06, "loss": 0.0818, "step": 6385 }, { "epoch": 2.069345430978613, "grad_norm": 0.8114498853683472, "learning_rate": 1.1599899683906026e-06, "loss": 0.074, "step": 6386 }, { "epoch": 2.0696694750486064, "grad_norm": 0.8556808233261108, "learning_rate": 1.1592516926671367e-06, "loss": 0.0783, "step": 6387 }, { "epoch": 2.0699935191186003, "grad_norm": 0.904638946056366, "learning_rate": 1.1585135810382836e-06, "loss": 0.0811, "step": 6388 }, { "epoch": 2.0703175631885937, "grad_norm": 0.8684148788452148, "learning_rate": 1.1577756335943818e-06, "loss": 0.0751, "step": 6389 }, { "epoch": 2.070641607258587, "grad_norm": 0.9171475768089294, "learning_rate": 1.1570378504257499e-06, "loss": 0.0775, "step": 6390 }, { "epoch": 2.0709656513285806, "grad_norm": 0.949954092502594, "learning_rate": 1.156300231622682e-06, "loss": 0.0844, "step": 6391 }, { "epoch": 2.071289695398574, "grad_norm": 0.9226496815681458, "learning_rate": 1.1555627772754595e-06, "loss": 0.0763, "step": 6392 }, { "epoch": 2.071613739468568, "grad_norm": 0.888752281665802, "learning_rate": 1.1548254874743365e-06, "loss": 0.0773, "step": 6393 }, { "epoch": 2.0719377835385613, "grad_norm": 0.8955729007720947, "learning_rate": 1.154088362309553e-06, "loss": 0.0805, "step": 6394 }, { "epoch": 2.0722618276085547, "grad_norm": 0.8570363521575928, "learning_rate": 1.1533514018713238e-06, "loss": 0.0815, "step": 6395 }, { "epoch": 2.072585871678548, "grad_norm": 0.9176596403121948, "learning_rate": 1.1526146062498464e-06, "loss": 0.0831, "step": 6396 }, { "epoch": 2.072909915748542, "grad_norm": 0.8681780695915222, "learning_rate": 1.1518779755352977e-06, "loss": 0.0785, "step": 6397 }, { "epoch": 2.0732339598185354, "grad_norm": 0.8597813844680786, "learning_rate": 1.1511415098178336e-06, "loss": 0.0769, "step": 6398 }, { "epoch": 2.073558003888529, "grad_norm": 0.8584122061729431, "learning_rate": 1.1504052091875917e-06, "loss": 0.0763, "step": 6399 }, { "epoch": 2.0738820479585223, "grad_norm": 0.9270933270454407, "learning_rate": 1.1496690737346864e-06, "loss": 0.0823, "step": 6400 }, { "epoch": 2.0742060920285157, "grad_norm": 0.8876338601112366, "learning_rate": 1.148933103549214e-06, "loss": 0.0782, "step": 6401 }, { "epoch": 2.0745301360985096, "grad_norm": 0.8715948462486267, "learning_rate": 1.1481972987212505e-06, "loss": 0.0806, "step": 6402 }, { "epoch": 2.074854180168503, "grad_norm": 0.8495489358901978, "learning_rate": 1.1474616593408513e-06, "loss": 0.0784, "step": 6403 }, { "epoch": 2.0751782242384964, "grad_norm": 0.8601410388946533, "learning_rate": 1.1467261854980513e-06, "loss": 0.0773, "step": 6404 }, { "epoch": 2.07550226830849, "grad_norm": 0.9512822031974792, "learning_rate": 1.1459908772828658e-06, "loss": 0.0792, "step": 6405 }, { "epoch": 2.0758263123784833, "grad_norm": 0.8868715167045593, "learning_rate": 1.1452557347852885e-06, "loss": 0.0814, "step": 6406 }, { "epoch": 2.076150356448477, "grad_norm": 0.9470981359481812, "learning_rate": 1.1445207580952956e-06, "loss": 0.0904, "step": 6407 }, { "epoch": 2.0764744005184705, "grad_norm": 0.9002374410629272, "learning_rate": 1.143785947302839e-06, "loss": 0.088, "step": 6408 }, { "epoch": 2.076798444588464, "grad_norm": 0.7991862893104553, "learning_rate": 1.143051302497853e-06, "loss": 0.0712, "step": 6409 }, { "epoch": 2.0771224886584574, "grad_norm": 0.8354719281196594, "learning_rate": 1.1423168237702515e-06, "loss": 0.0699, "step": 6410 }, { "epoch": 2.0774465327284513, "grad_norm": 0.8705977201461792, "learning_rate": 1.1415825112099274e-06, "loss": 0.0742, "step": 6411 }, { "epoch": 2.0777705767984447, "grad_norm": 0.7786136269569397, "learning_rate": 1.1408483649067541e-06, "loss": 0.0743, "step": 6412 }, { "epoch": 2.078094620868438, "grad_norm": 0.8944233059883118, "learning_rate": 1.1401143849505816e-06, "loss": 0.0845, "step": 6413 }, { "epoch": 2.0784186649384315, "grad_norm": 0.8579782843589783, "learning_rate": 1.1393805714312456e-06, "loss": 0.0816, "step": 6414 }, { "epoch": 2.078742709008425, "grad_norm": 0.8879421949386597, "learning_rate": 1.138646924438554e-06, "loss": 0.0819, "step": 6415 }, { "epoch": 2.079066753078419, "grad_norm": 0.8416301608085632, "learning_rate": 1.1379134440623018e-06, "loss": 0.073, "step": 6416 }, { "epoch": 2.0793907971484122, "grad_norm": 0.9148510694503784, "learning_rate": 1.137180130392257e-06, "loss": 0.0858, "step": 6417 }, { "epoch": 2.0797148412184057, "grad_norm": 0.8215204477310181, "learning_rate": 1.1364469835181712e-06, "loss": 0.0745, "step": 6418 }, { "epoch": 2.080038885288399, "grad_norm": 0.86520916223526, "learning_rate": 1.1357140035297745e-06, "loss": 0.0767, "step": 6419 }, { "epoch": 2.080362929358393, "grad_norm": 0.837408721446991, "learning_rate": 1.1349811905167762e-06, "loss": 0.0743, "step": 6420 }, { "epoch": 2.0806869734283864, "grad_norm": 0.8289994597434998, "learning_rate": 1.134248544568867e-06, "loss": 0.0785, "step": 6421 }, { "epoch": 2.08101101749838, "grad_norm": 0.9177697896957397, "learning_rate": 1.1335160657757121e-06, "loss": 0.0806, "step": 6422 }, { "epoch": 2.0813350615683732, "grad_norm": 0.9063735604286194, "learning_rate": 1.1327837542269645e-06, "loss": 0.0825, "step": 6423 }, { "epoch": 2.0816591056383666, "grad_norm": 0.9097846150398254, "learning_rate": 1.1320516100122487e-06, "loss": 0.0817, "step": 6424 }, { "epoch": 2.0819831497083605, "grad_norm": 1.008061408996582, "learning_rate": 1.1313196332211728e-06, "loss": 0.0807, "step": 6425 }, { "epoch": 2.082307193778354, "grad_norm": 0.872922956943512, "learning_rate": 1.130587823943324e-06, "loss": 0.0818, "step": 6426 }, { "epoch": 2.0826312378483474, "grad_norm": 0.8754384517669678, "learning_rate": 1.1298561822682687e-06, "loss": 0.0786, "step": 6427 }, { "epoch": 2.082955281918341, "grad_norm": 0.8794349431991577, "learning_rate": 1.1291247082855528e-06, "loss": 0.074, "step": 6428 }, { "epoch": 2.083279325988334, "grad_norm": 0.82512366771698, "learning_rate": 1.1283934020847015e-06, "loss": 0.0758, "step": 6429 }, { "epoch": 2.083603370058328, "grad_norm": 0.8695098161697388, "learning_rate": 1.1276622637552203e-06, "loss": 0.08, "step": 6430 }, { "epoch": 2.0839274141283215, "grad_norm": 0.9111599922180176, "learning_rate": 1.126931293386592e-06, "loss": 0.0805, "step": 6431 }, { "epoch": 2.084251458198315, "grad_norm": 0.8794967532157898, "learning_rate": 1.1262004910682811e-06, "loss": 0.0809, "step": 6432 }, { "epoch": 2.0845755022683083, "grad_norm": 0.8865387439727783, "learning_rate": 1.1254698568897308e-06, "loss": 0.0822, "step": 6433 }, { "epoch": 2.084899546338302, "grad_norm": 0.8278342485427856, "learning_rate": 1.124739390940363e-06, "loss": 0.0758, "step": 6434 }, { "epoch": 2.0852235904082956, "grad_norm": 0.821538507938385, "learning_rate": 1.1240090933095806e-06, "loss": 0.0719, "step": 6435 }, { "epoch": 2.085547634478289, "grad_norm": 0.8098189830780029, "learning_rate": 1.1232789640867644e-06, "loss": 0.0741, "step": 6436 }, { "epoch": 2.0858716785482825, "grad_norm": 0.8238601088523865, "learning_rate": 1.1225490033612755e-06, "loss": 0.0785, "step": 6437 }, { "epoch": 2.086195722618276, "grad_norm": 0.8251599669456482, "learning_rate": 1.1218192112224547e-06, "loss": 0.0754, "step": 6438 }, { "epoch": 2.0865197666882698, "grad_norm": 0.9041087627410889, "learning_rate": 1.1210895877596195e-06, "loss": 0.0812, "step": 6439 }, { "epoch": 2.086843810758263, "grad_norm": 0.8868924975395203, "learning_rate": 1.12036013306207e-06, "loss": 0.0779, "step": 6440 }, { "epoch": 2.0871678548282566, "grad_norm": 0.9086484313011169, "learning_rate": 1.1196308472190845e-06, "loss": 0.0798, "step": 6441 }, { "epoch": 2.08749189889825, "grad_norm": 0.9073233604431152, "learning_rate": 1.1189017303199198e-06, "loss": 0.0798, "step": 6442 }, { "epoch": 2.087815942968244, "grad_norm": 1.000227928161621, "learning_rate": 1.1181727824538147e-06, "loss": 0.0824, "step": 6443 }, { "epoch": 2.0881399870382373, "grad_norm": 0.980191171169281, "learning_rate": 1.1174440037099815e-06, "loss": 0.0814, "step": 6444 }, { "epoch": 2.0884640311082308, "grad_norm": 0.863726794719696, "learning_rate": 1.1167153941776205e-06, "loss": 0.0773, "step": 6445 }, { "epoch": 2.088788075178224, "grad_norm": 0.9686128497123718, "learning_rate": 1.1159869539459018e-06, "loss": 0.0832, "step": 6446 }, { "epoch": 2.0891121192482176, "grad_norm": 0.8538342118263245, "learning_rate": 1.1152586831039835e-06, "loss": 0.0772, "step": 6447 }, { "epoch": 2.0894361633182115, "grad_norm": 0.8319849967956543, "learning_rate": 1.1145305817409962e-06, "loss": 0.0735, "step": 6448 }, { "epoch": 2.089760207388205, "grad_norm": 0.8991792798042297, "learning_rate": 1.1138026499460532e-06, "loss": 0.0805, "step": 6449 }, { "epoch": 2.0900842514581983, "grad_norm": 0.8539870381355286, "learning_rate": 1.1130748878082467e-06, "loss": 0.0689, "step": 6450 }, { "epoch": 2.0904082955281917, "grad_norm": 0.8225018382072449, "learning_rate": 1.1123472954166473e-06, "loss": 0.0737, "step": 6451 }, { "epoch": 2.090732339598185, "grad_norm": 0.8963720798492432, "learning_rate": 1.1116198728603061e-06, "loss": 0.0788, "step": 6452 }, { "epoch": 2.091056383668179, "grad_norm": 0.8966756463050842, "learning_rate": 1.1108926202282505e-06, "loss": 0.0821, "step": 6453 }, { "epoch": 2.0913804277381725, "grad_norm": 0.8887714147567749, "learning_rate": 1.110165537609492e-06, "loss": 0.0745, "step": 6454 }, { "epoch": 2.091704471808166, "grad_norm": 0.9101707935333252, "learning_rate": 1.1094386250930164e-06, "loss": 0.0791, "step": 6455 }, { "epoch": 2.0920285158781593, "grad_norm": 0.8202990889549255, "learning_rate": 1.1087118827677915e-06, "loss": 0.0758, "step": 6456 }, { "epoch": 2.0923525599481527, "grad_norm": 0.8937187790870667, "learning_rate": 1.1079853107227634e-06, "loss": 0.0853, "step": 6457 }, { "epoch": 2.0926766040181466, "grad_norm": 0.9171735644340515, "learning_rate": 1.1072589090468571e-06, "loss": 0.0819, "step": 6458 }, { "epoch": 2.09300064808814, "grad_norm": 0.9766994714736938, "learning_rate": 1.1065326778289782e-06, "loss": 0.0824, "step": 6459 }, { "epoch": 2.0933246921581334, "grad_norm": 0.8692915439605713, "learning_rate": 1.1058066171580092e-06, "loss": 0.0762, "step": 6460 }, { "epoch": 2.093648736228127, "grad_norm": 0.8715638518333435, "learning_rate": 1.1050807271228146e-06, "loss": 0.0753, "step": 6461 }, { "epoch": 2.0939727802981207, "grad_norm": 0.9047412872314453, "learning_rate": 1.1043550078122342e-06, "loss": 0.0803, "step": 6462 }, { "epoch": 2.094296824368114, "grad_norm": 0.8451228737831116, "learning_rate": 1.1036294593150898e-06, "loss": 0.0753, "step": 6463 }, { "epoch": 2.0946208684381076, "grad_norm": 0.8436135649681091, "learning_rate": 1.1029040817201819e-06, "loss": 0.0772, "step": 6464 }, { "epoch": 2.094944912508101, "grad_norm": 0.8356603980064392, "learning_rate": 1.1021788751162893e-06, "loss": 0.0804, "step": 6465 }, { "epoch": 2.0952689565780944, "grad_norm": 0.8406388163566589, "learning_rate": 1.1014538395921704e-06, "loss": 0.0778, "step": 6466 }, { "epoch": 2.0955930006480883, "grad_norm": 0.8398995995521545, "learning_rate": 1.1007289752365635e-06, "loss": 0.0731, "step": 6467 }, { "epoch": 2.0959170447180817, "grad_norm": 0.8790268898010254, "learning_rate": 1.1000042821381823e-06, "loss": 0.075, "step": 6468 }, { "epoch": 2.096241088788075, "grad_norm": 0.8896059393882751, "learning_rate": 1.0992797603857257e-06, "loss": 0.0794, "step": 6469 }, { "epoch": 2.0965651328580686, "grad_norm": 0.9420241713523865, "learning_rate": 1.0985554100678647e-06, "loss": 0.0787, "step": 6470 }, { "epoch": 2.0968891769280624, "grad_norm": 0.8144295811653137, "learning_rate": 1.0978312312732562e-06, "loss": 0.075, "step": 6471 }, { "epoch": 2.097213220998056, "grad_norm": 0.8950752019882202, "learning_rate": 1.09710722409053e-06, "loss": 0.084, "step": 6472 }, { "epoch": 2.0975372650680493, "grad_norm": 0.8596192598342896, "learning_rate": 1.0963833886082987e-06, "loss": 0.0787, "step": 6473 }, { "epoch": 2.0978613091380427, "grad_norm": 0.8208886981010437, "learning_rate": 1.0956597249151532e-06, "loss": 0.0747, "step": 6474 }, { "epoch": 2.098185353208036, "grad_norm": 0.8358070254325867, "learning_rate": 1.0949362330996605e-06, "loss": 0.0718, "step": 6475 }, { "epoch": 2.09850939727803, "grad_norm": 0.8843006491661072, "learning_rate": 1.094212913250373e-06, "loss": 0.0749, "step": 6476 }, { "epoch": 2.0988334413480234, "grad_norm": 0.8269924521446228, "learning_rate": 1.0934897654558134e-06, "loss": 0.0763, "step": 6477 }, { "epoch": 2.099157485418017, "grad_norm": 0.8224289417266846, "learning_rate": 1.0927667898044927e-06, "loss": 0.076, "step": 6478 }, { "epoch": 2.0994815294880103, "grad_norm": 0.7836449146270752, "learning_rate": 1.092043986384893e-06, "loss": 0.0702, "step": 6479 }, { "epoch": 2.0998055735580037, "grad_norm": 0.9487519264221191, "learning_rate": 1.091321355285479e-06, "loss": 0.0868, "step": 6480 }, { "epoch": 2.1001296176279975, "grad_norm": 0.8501242995262146, "learning_rate": 1.0905988965946942e-06, "loss": 0.0752, "step": 6481 }, { "epoch": 2.100453661697991, "grad_norm": 0.9248630404472351, "learning_rate": 1.0898766104009606e-06, "loss": 0.087, "step": 6482 }, { "epoch": 2.1007777057679844, "grad_norm": 0.8161444067955017, "learning_rate": 1.0891544967926795e-06, "loss": 0.0741, "step": 6483 }, { "epoch": 2.101101749837978, "grad_norm": 0.8928951025009155, "learning_rate": 1.0884325558582283e-06, "loss": 0.0764, "step": 6484 }, { "epoch": 2.1014257939079717, "grad_norm": 0.9648940563201904, "learning_rate": 1.0877107876859688e-06, "loss": 0.0839, "step": 6485 }, { "epoch": 2.101749837977965, "grad_norm": 0.9580192565917969, "learning_rate": 1.086989192364236e-06, "loss": 0.0772, "step": 6486 }, { "epoch": 2.1020738820479585, "grad_norm": 0.8301624655723572, "learning_rate": 1.0862677699813471e-06, "loss": 0.0742, "step": 6487 }, { "epoch": 2.102397926117952, "grad_norm": 0.8573307394981384, "learning_rate": 1.0855465206255972e-06, "loss": 0.0776, "step": 6488 }, { "epoch": 2.1027219701879454, "grad_norm": 0.8289283514022827, "learning_rate": 1.0848254443852602e-06, "loss": 0.0796, "step": 6489 }, { "epoch": 2.1030460142579392, "grad_norm": 0.8256188035011292, "learning_rate": 1.084104541348589e-06, "loss": 0.0715, "step": 6490 }, { "epoch": 2.1033700583279327, "grad_norm": 0.9029165506362915, "learning_rate": 1.0833838116038156e-06, "loss": 0.0785, "step": 6491 }, { "epoch": 2.103694102397926, "grad_norm": 0.8527599573135376, "learning_rate": 1.0826632552391484e-06, "loss": 0.0781, "step": 6492 }, { "epoch": 2.1040181464679195, "grad_norm": 0.8875585198402405, "learning_rate": 1.081942872342779e-06, "loss": 0.0807, "step": 6493 }, { "epoch": 2.1043421905379134, "grad_norm": 0.8848504424095154, "learning_rate": 1.0812226630028738e-06, "loss": 0.0773, "step": 6494 }, { "epoch": 2.104666234607907, "grad_norm": 0.8722944855690002, "learning_rate": 1.0805026273075797e-06, "loss": 0.0819, "step": 6495 }, { "epoch": 2.1049902786779002, "grad_norm": 0.9383862018585205, "learning_rate": 1.0797827653450222e-06, "loss": 0.0837, "step": 6496 }, { "epoch": 2.1053143227478937, "grad_norm": 0.8965055346488953, "learning_rate": 1.0790630772033057e-06, "loss": 0.0843, "step": 6497 }, { "epoch": 2.105638366817887, "grad_norm": 0.8149116635322571, "learning_rate": 1.0783435629705134e-06, "loss": 0.0697, "step": 6498 }, { "epoch": 2.105962410887881, "grad_norm": 0.8215212225914001, "learning_rate": 1.0776242227347044e-06, "loss": 0.0753, "step": 6499 }, { "epoch": 2.1062864549578744, "grad_norm": 0.9240198135375977, "learning_rate": 1.0769050565839228e-06, "loss": 0.0829, "step": 6500 }, { "epoch": 2.106610499027868, "grad_norm": 0.8391976356506348, "learning_rate": 1.0761860646061838e-06, "loss": 0.075, "step": 6501 }, { "epoch": 2.106934543097861, "grad_norm": 0.8725175857543945, "learning_rate": 1.0754672468894889e-06, "loss": 0.0838, "step": 6502 }, { "epoch": 2.1072585871678546, "grad_norm": 0.861038088798523, "learning_rate": 1.0747486035218116e-06, "loss": 0.0774, "step": 6503 }, { "epoch": 2.1075826312378485, "grad_norm": 0.9436843991279602, "learning_rate": 1.0740301345911075e-06, "loss": 0.0855, "step": 6504 }, { "epoch": 2.107906675307842, "grad_norm": 0.8671337962150574, "learning_rate": 1.0733118401853112e-06, "loss": 0.0792, "step": 6505 }, { "epoch": 2.1082307193778353, "grad_norm": 0.8089661598205566, "learning_rate": 1.0725937203923327e-06, "loss": 0.073, "step": 6506 }, { "epoch": 2.1085547634478288, "grad_norm": 0.9231501221656799, "learning_rate": 1.0718757753000665e-06, "loss": 0.0797, "step": 6507 }, { "epoch": 2.108878807517822, "grad_norm": 0.8823448419570923, "learning_rate": 1.071158004996378e-06, "loss": 0.0775, "step": 6508 }, { "epoch": 2.109202851587816, "grad_norm": 0.8127901554107666, "learning_rate": 1.070440409569119e-06, "loss": 0.0745, "step": 6509 }, { "epoch": 2.1095268956578095, "grad_norm": 0.8558254241943359, "learning_rate": 1.0697229891061141e-06, "loss": 0.0778, "step": 6510 }, { "epoch": 2.109850939727803, "grad_norm": 0.8050652742385864, "learning_rate": 1.0690057436951689e-06, "loss": 0.0712, "step": 6511 }, { "epoch": 2.1101749837977963, "grad_norm": 0.8715561032295227, "learning_rate": 1.068288673424068e-06, "loss": 0.0712, "step": 6512 }, { "epoch": 2.11049902786779, "grad_norm": 0.8177485466003418, "learning_rate": 1.067571778380573e-06, "loss": 0.0771, "step": 6513 }, { "epoch": 2.1108230719377836, "grad_norm": 0.7821464538574219, "learning_rate": 1.0668550586524256e-06, "loss": 0.0701, "step": 6514 }, { "epoch": 2.111147116007777, "grad_norm": 0.860443651676178, "learning_rate": 1.066138514327345e-06, "loss": 0.0755, "step": 6515 }, { "epoch": 2.1114711600777705, "grad_norm": 0.8350194692611694, "learning_rate": 1.0654221454930305e-06, "loss": 0.0736, "step": 6516 }, { "epoch": 2.111795204147764, "grad_norm": 0.9321835041046143, "learning_rate": 1.0647059522371565e-06, "loss": 0.0906, "step": 6517 }, { "epoch": 2.1121192482177578, "grad_norm": 0.8878209590911865, "learning_rate": 1.0639899346473792e-06, "loss": 0.0741, "step": 6518 }, { "epoch": 2.112443292287751, "grad_norm": 0.8689842224121094, "learning_rate": 1.0632740928113323e-06, "loss": 0.0777, "step": 6519 }, { "epoch": 2.1127673363577446, "grad_norm": 0.8841999769210815, "learning_rate": 1.0625584268166278e-06, "loss": 0.0792, "step": 6520 }, { "epoch": 2.113091380427738, "grad_norm": 0.8802910447120667, "learning_rate": 1.0618429367508564e-06, "loss": 0.0782, "step": 6521 }, { "epoch": 2.113415424497732, "grad_norm": 0.780536413192749, "learning_rate": 1.061127622701588e-06, "loss": 0.0698, "step": 6522 }, { "epoch": 2.1137394685677253, "grad_norm": 0.9865183234214783, "learning_rate": 1.0604124847563674e-06, "loss": 0.0846, "step": 6523 }, { "epoch": 2.1140635126377187, "grad_norm": 0.868303656578064, "learning_rate": 1.0596975230027243e-06, "loss": 0.0799, "step": 6524 }, { "epoch": 2.114387556707712, "grad_norm": 0.865825355052948, "learning_rate": 1.05898273752816e-06, "loss": 0.0759, "step": 6525 }, { "epoch": 2.1147116007777056, "grad_norm": 0.8290408849716187, "learning_rate": 1.0582681284201587e-06, "loss": 0.0761, "step": 6526 }, { "epoch": 2.1150356448476995, "grad_norm": 0.7881754636764526, "learning_rate": 1.0575536957661814e-06, "loss": 0.0694, "step": 6527 }, { "epoch": 2.115359688917693, "grad_norm": 0.9243281483650208, "learning_rate": 1.056839439653668e-06, "loss": 0.0826, "step": 6528 }, { "epoch": 2.1156837329876863, "grad_norm": 0.8762599229812622, "learning_rate": 1.056125360170037e-06, "loss": 0.0782, "step": 6529 }, { "epoch": 2.1160077770576797, "grad_norm": 0.9436377286911011, "learning_rate": 1.0554114574026823e-06, "loss": 0.0877, "step": 6530 }, { "epoch": 2.116331821127673, "grad_norm": 0.8819335103034973, "learning_rate": 1.0546977314389822e-06, "loss": 0.0803, "step": 6531 }, { "epoch": 2.116655865197667, "grad_norm": 0.8602144718170166, "learning_rate": 1.0539841823662867e-06, "loss": 0.0821, "step": 6532 }, { "epoch": 2.1169799092676604, "grad_norm": 0.8705927729606628, "learning_rate": 1.0532708102719303e-06, "loss": 0.0798, "step": 6533 }, { "epoch": 2.117303953337654, "grad_norm": 0.9389498233795166, "learning_rate": 1.0525576152432204e-06, "loss": 0.0813, "step": 6534 }, { "epoch": 2.1176279974076473, "grad_norm": 0.9420464634895325, "learning_rate": 1.051844597367446e-06, "loss": 0.0812, "step": 6535 }, { "epoch": 2.117952041477641, "grad_norm": 0.8560028076171875, "learning_rate": 1.0511317567318737e-06, "loss": 0.0802, "step": 6536 }, { "epoch": 2.1182760855476346, "grad_norm": 0.8696438670158386, "learning_rate": 1.0504190934237484e-06, "loss": 0.0747, "step": 6537 }, { "epoch": 2.118600129617628, "grad_norm": 0.9260076284408569, "learning_rate": 1.0497066075302939e-06, "loss": 0.0838, "step": 6538 }, { "epoch": 2.1189241736876214, "grad_norm": 0.8828380107879639, "learning_rate": 1.0489942991387088e-06, "loss": 0.0757, "step": 6539 }, { "epoch": 2.119248217757615, "grad_norm": 0.8743027448654175, "learning_rate": 1.0482821683361767e-06, "loss": 0.0775, "step": 6540 }, { "epoch": 2.1195722618276087, "grad_norm": 0.8967780470848083, "learning_rate": 1.0475702152098522e-06, "loss": 0.0787, "step": 6541 }, { "epoch": 2.119896305897602, "grad_norm": 0.7822275757789612, "learning_rate": 1.0468584398468729e-06, "loss": 0.0706, "step": 6542 }, { "epoch": 2.1202203499675956, "grad_norm": 0.8575571775436401, "learning_rate": 1.0461468423343532e-06, "loss": 0.0796, "step": 6543 }, { "epoch": 2.120544394037589, "grad_norm": 0.8331258893013, "learning_rate": 1.0454354227593855e-06, "loss": 0.0745, "step": 6544 }, { "epoch": 2.120868438107583, "grad_norm": 0.8735781908035278, "learning_rate": 1.0447241812090408e-06, "loss": 0.0797, "step": 6545 }, { "epoch": 2.1211924821775763, "grad_norm": 0.8512864112854004, "learning_rate": 1.0440131177703692e-06, "loss": 0.0795, "step": 6546 }, { "epoch": 2.1215165262475697, "grad_norm": 0.8513296246528625, "learning_rate": 1.0433022325303956e-06, "loss": 0.0754, "step": 6547 }, { "epoch": 2.121840570317563, "grad_norm": 0.8489466905593872, "learning_rate": 1.042591525576127e-06, "loss": 0.0775, "step": 6548 }, { "epoch": 2.1221646143875565, "grad_norm": 0.8595914840698242, "learning_rate": 1.041880996994547e-06, "loss": 0.0787, "step": 6549 }, { "epoch": 2.1224886584575504, "grad_norm": 0.8622096180915833, "learning_rate": 1.0411706468726173e-06, "loss": 0.0804, "step": 6550 }, { "epoch": 2.122812702527544, "grad_norm": 0.874594509601593, "learning_rate": 1.040460475297278e-06, "loss": 0.0791, "step": 6551 }, { "epoch": 2.1231367465975373, "grad_norm": 0.8095867037773132, "learning_rate": 1.039750482355447e-06, "loss": 0.0788, "step": 6552 }, { "epoch": 2.1234607906675307, "grad_norm": 0.8671919107437134, "learning_rate": 1.0390406681340212e-06, "loss": 0.0759, "step": 6553 }, { "epoch": 2.123784834737524, "grad_norm": 0.8312748074531555, "learning_rate": 1.0383310327198728e-06, "loss": 0.077, "step": 6554 }, { "epoch": 2.124108878807518, "grad_norm": 0.8587348461151123, "learning_rate": 1.0376215761998578e-06, "loss": 0.0744, "step": 6555 }, { "epoch": 2.1244329228775114, "grad_norm": 0.8469531536102295, "learning_rate": 1.0369122986608044e-06, "loss": 0.0803, "step": 6556 }, { "epoch": 2.124756966947505, "grad_norm": 0.8027523756027222, "learning_rate": 1.0362032001895214e-06, "loss": 0.0767, "step": 6557 }, { "epoch": 2.1250810110174982, "grad_norm": 0.8883047103881836, "learning_rate": 1.0354942808727962e-06, "loss": 0.0837, "step": 6558 }, { "epoch": 2.1254050550874917, "grad_norm": 0.8956378698348999, "learning_rate": 1.0347855407973933e-06, "loss": 0.0811, "step": 6559 }, { "epoch": 2.1257290991574855, "grad_norm": 0.813138484954834, "learning_rate": 1.034076980050057e-06, "loss": 0.071, "step": 6560 }, { "epoch": 2.126053143227479, "grad_norm": 0.8464450836181641, "learning_rate": 1.0333685987175052e-06, "loss": 0.0788, "step": 6561 }, { "epoch": 2.1263771872974724, "grad_norm": 0.8753923773765564, "learning_rate": 1.0326603968864407e-06, "loss": 0.0759, "step": 6562 }, { "epoch": 2.126701231367466, "grad_norm": 0.9201774001121521, "learning_rate": 1.0319523746435367e-06, "loss": 0.082, "step": 6563 }, { "epoch": 2.1270252754374597, "grad_norm": 0.8545686602592468, "learning_rate": 1.0312445320754522e-06, "loss": 0.0812, "step": 6564 }, { "epoch": 2.127349319507453, "grad_norm": 0.9130364060401917, "learning_rate": 1.0305368692688175e-06, "loss": 0.0795, "step": 6565 }, { "epoch": 2.1276733635774465, "grad_norm": 0.8865534067153931, "learning_rate": 1.0298293863102444e-06, "loss": 0.0792, "step": 6566 }, { "epoch": 2.12799740764744, "grad_norm": 0.8899914026260376, "learning_rate": 1.0291220832863219e-06, "loss": 0.0793, "step": 6567 }, { "epoch": 2.1283214517174334, "grad_norm": 0.8158039450645447, "learning_rate": 1.0284149602836174e-06, "loss": 0.0769, "step": 6568 }, { "epoch": 2.1286454957874272, "grad_norm": 0.8327584266662598, "learning_rate": 1.0277080173886766e-06, "loss": 0.0752, "step": 6569 }, { "epoch": 2.1289695398574207, "grad_norm": 0.8870092630386353, "learning_rate": 1.0270012546880207e-06, "loss": 0.0802, "step": 6570 }, { "epoch": 2.129293583927414, "grad_norm": 0.8395013213157654, "learning_rate": 1.0262946722681513e-06, "loss": 0.0752, "step": 6571 }, { "epoch": 2.1296176279974075, "grad_norm": 0.8073970079421997, "learning_rate": 1.0255882702155476e-06, "loss": 0.0723, "step": 6572 }, { "epoch": 2.1299416720674014, "grad_norm": 0.8960281610488892, "learning_rate": 1.024882048616666e-06, "loss": 0.0795, "step": 6573 }, { "epoch": 2.130265716137395, "grad_norm": 0.8593527674674988, "learning_rate": 1.0241760075579418e-06, "loss": 0.0766, "step": 6574 }, { "epoch": 2.130589760207388, "grad_norm": 0.8314931988716125, "learning_rate": 1.0234701471257868e-06, "loss": 0.0757, "step": 6575 }, { "epoch": 2.1309138042773816, "grad_norm": 0.842562735080719, "learning_rate": 1.0227644674065923e-06, "loss": 0.0728, "step": 6576 }, { "epoch": 2.131237848347375, "grad_norm": 0.9939832091331482, "learning_rate": 1.0220589684867269e-06, "loss": 0.0761, "step": 6577 }, { "epoch": 2.131561892417369, "grad_norm": 0.8881149291992188, "learning_rate": 1.021353650452535e-06, "loss": 0.0752, "step": 6578 }, { "epoch": 2.1318859364873624, "grad_norm": 0.8410991430282593, "learning_rate": 1.0206485133903424e-06, "loss": 0.0735, "step": 6579 }, { "epoch": 2.1322099805573558, "grad_norm": 0.8679512143135071, "learning_rate": 1.0199435573864502e-06, "loss": 0.0777, "step": 6580 }, { "epoch": 2.132534024627349, "grad_norm": 0.954770028591156, "learning_rate": 1.0192387825271384e-06, "loss": 0.0841, "step": 6581 }, { "epoch": 2.1328580686973426, "grad_norm": 0.9145404100418091, "learning_rate": 1.018534188898665e-06, "loss": 0.0809, "step": 6582 }, { "epoch": 2.1331821127673365, "grad_norm": 0.8095046281814575, "learning_rate": 1.0178297765872651e-06, "loss": 0.076, "step": 6583 }, { "epoch": 2.13350615683733, "grad_norm": 0.9143222570419312, "learning_rate": 1.0171255456791531e-06, "loss": 0.0817, "step": 6584 }, { "epoch": 2.1338302009073233, "grad_norm": 0.8669990301132202, "learning_rate": 1.016421496260517e-06, "loss": 0.0811, "step": 6585 }, { "epoch": 2.1341542449773168, "grad_norm": 0.8360460996627808, "learning_rate": 1.0157176284175293e-06, "loss": 0.0687, "step": 6586 }, { "epoch": 2.1344782890473106, "grad_norm": 0.8764594793319702, "learning_rate": 1.0150139422363342e-06, "loss": 0.0768, "step": 6587 }, { "epoch": 2.134802333117304, "grad_norm": 0.9600023627281189, "learning_rate": 1.0143104378030565e-06, "loss": 0.0873, "step": 6588 }, { "epoch": 2.1351263771872975, "grad_norm": 0.9232833385467529, "learning_rate": 1.013607115203799e-06, "loss": 0.0801, "step": 6589 }, { "epoch": 2.135450421257291, "grad_norm": 0.9177942276000977, "learning_rate": 1.012903974524641e-06, "loss": 0.0792, "step": 6590 }, { "epoch": 2.1357744653272843, "grad_norm": 0.9922571182250977, "learning_rate": 1.0122010158516412e-06, "loss": 0.0898, "step": 6591 }, { "epoch": 2.136098509397278, "grad_norm": 0.8474758267402649, "learning_rate": 1.0114982392708325e-06, "loss": 0.0775, "step": 6592 }, { "epoch": 2.1364225534672716, "grad_norm": 0.8396010398864746, "learning_rate": 1.010795644868231e-06, "loss": 0.0799, "step": 6593 }, { "epoch": 2.136746597537265, "grad_norm": 0.8584730625152588, "learning_rate": 1.0100932327298244e-06, "loss": 0.0732, "step": 6594 }, { "epoch": 2.1370706416072585, "grad_norm": 0.8349241018295288, "learning_rate": 1.0093910029415843e-06, "loss": 0.0753, "step": 6595 }, { "epoch": 2.1373946856772523, "grad_norm": 0.8902902603149414, "learning_rate": 1.0086889555894545e-06, "loss": 0.0806, "step": 6596 }, { "epoch": 2.1377187297472457, "grad_norm": 0.89394211769104, "learning_rate": 1.0079870907593592e-06, "loss": 0.08, "step": 6597 }, { "epoch": 2.138042773817239, "grad_norm": 0.8607198596000671, "learning_rate": 1.0072854085372005e-06, "loss": 0.0767, "step": 6598 }, { "epoch": 2.1383668178872326, "grad_norm": 0.8974934816360474, "learning_rate": 1.0065839090088572e-06, "loss": 0.0791, "step": 6599 }, { "epoch": 2.138690861957226, "grad_norm": 0.92155522108078, "learning_rate": 1.0058825922601866e-06, "loss": 0.0852, "step": 6600 }, { "epoch": 2.13901490602722, "grad_norm": 0.7993993759155273, "learning_rate": 1.005181458377022e-06, "loss": 0.0703, "step": 6601 }, { "epoch": 2.1393389500972133, "grad_norm": 0.8989080786705017, "learning_rate": 1.0044805074451757e-06, "loss": 0.0828, "step": 6602 }, { "epoch": 2.1396629941672067, "grad_norm": 0.8937354683876038, "learning_rate": 1.003779739550438e-06, "loss": 0.0802, "step": 6603 }, { "epoch": 2.1399870382372, "grad_norm": 0.8435376882553101, "learning_rate": 1.003079154778575e-06, "loss": 0.0763, "step": 6604 }, { "epoch": 2.1403110823071936, "grad_norm": 0.8782016038894653, "learning_rate": 1.0023787532153325e-06, "loss": 0.08, "step": 6605 }, { "epoch": 2.1406351263771874, "grad_norm": 0.8370432257652283, "learning_rate": 1.0016785349464326e-06, "loss": 0.0724, "step": 6606 }, { "epoch": 2.140959170447181, "grad_norm": 0.86859530210495, "learning_rate": 1.0009785000575747e-06, "loss": 0.0791, "step": 6607 }, { "epoch": 2.1412832145171743, "grad_norm": 0.8456296324729919, "learning_rate": 1.0002786486344379e-06, "loss": 0.0682, "step": 6608 }, { "epoch": 2.1416072585871677, "grad_norm": 0.8785818219184875, "learning_rate": 9.995789807626754e-07, "loss": 0.0787, "step": 6609 }, { "epoch": 2.141931302657161, "grad_norm": 0.876229465007782, "learning_rate": 9.988794965279203e-07, "loss": 0.0824, "step": 6610 }, { "epoch": 2.142255346727155, "grad_norm": 0.8579014539718628, "learning_rate": 9.981801960157827e-07, "loss": 0.08, "step": 6611 }, { "epoch": 2.1425793907971484, "grad_norm": 0.844199538230896, "learning_rate": 9.974810793118505e-07, "loss": 0.0755, "step": 6612 }, { "epoch": 2.142903434867142, "grad_norm": 0.8682866096496582, "learning_rate": 9.967821465016893e-07, "loss": 0.0719, "step": 6613 }, { "epoch": 2.1432274789371353, "grad_norm": 0.8322014808654785, "learning_rate": 9.960833976708398e-07, "loss": 0.0719, "step": 6614 }, { "epoch": 2.143551523007129, "grad_norm": 0.9430346488952637, "learning_rate": 9.953848329048248e-07, "loss": 0.0817, "step": 6615 }, { "epoch": 2.1438755670771226, "grad_norm": 0.9577280879020691, "learning_rate": 9.94686452289139e-07, "loss": 0.0837, "step": 6616 }, { "epoch": 2.144199611147116, "grad_norm": 0.917645275592804, "learning_rate": 9.939882559092604e-07, "loss": 0.0821, "step": 6617 }, { "epoch": 2.1445236552171094, "grad_norm": 0.8027808666229248, "learning_rate": 9.93290243850638e-07, "loss": 0.0721, "step": 6618 }, { "epoch": 2.144847699287103, "grad_norm": 0.862285315990448, "learning_rate": 9.925924161987057e-07, "loss": 0.0782, "step": 6619 }, { "epoch": 2.1451717433570967, "grad_norm": 0.9676426649093628, "learning_rate": 9.918947730388682e-07, "loss": 0.0891, "step": 6620 }, { "epoch": 2.14549578742709, "grad_norm": 0.8734449148178101, "learning_rate": 9.911973144565105e-07, "loss": 0.0862, "step": 6621 }, { "epoch": 2.1458198314970836, "grad_norm": 0.9051758050918579, "learning_rate": 9.90500040536996e-07, "loss": 0.0715, "step": 6622 }, { "epoch": 2.146143875567077, "grad_norm": 0.976852536201477, "learning_rate": 9.898029513656618e-07, "loss": 0.0819, "step": 6623 }, { "epoch": 2.146467919637071, "grad_norm": 0.8941224217414856, "learning_rate": 9.891060470278286e-07, "loss": 0.0824, "step": 6624 }, { "epoch": 2.1467919637070643, "grad_norm": 0.958651065826416, "learning_rate": 9.884093276087871e-07, "loss": 0.0833, "step": 6625 }, { "epoch": 2.1471160077770577, "grad_norm": 0.9196736812591553, "learning_rate": 9.877127931938111e-07, "loss": 0.0773, "step": 6626 }, { "epoch": 2.147440051847051, "grad_norm": 0.8458701968193054, "learning_rate": 9.87016443868149e-07, "loss": 0.0756, "step": 6627 }, { "epoch": 2.1477640959170445, "grad_norm": 0.8649837374687195, "learning_rate": 9.863202797170273e-07, "loss": 0.0764, "step": 6628 }, { "epoch": 2.1480881399870384, "grad_norm": 1.1385384798049927, "learning_rate": 9.8562430082565e-07, "loss": 0.0797, "step": 6629 }, { "epoch": 2.148412184057032, "grad_norm": 0.8109810948371887, "learning_rate": 9.849285072791978e-07, "loss": 0.0715, "step": 6630 }, { "epoch": 2.1487362281270252, "grad_norm": 0.8902484774589539, "learning_rate": 9.8423289916283e-07, "loss": 0.078, "step": 6631 }, { "epoch": 2.1490602721970187, "grad_norm": 0.8315941095352173, "learning_rate": 9.835374765616809e-07, "loss": 0.0742, "step": 6632 }, { "epoch": 2.149384316267012, "grad_norm": 0.8010660409927368, "learning_rate": 9.82842239560864e-07, "loss": 0.0734, "step": 6633 }, { "epoch": 2.149708360337006, "grad_norm": 0.8385948538780212, "learning_rate": 9.821471882454703e-07, "loss": 0.0769, "step": 6634 }, { "epoch": 2.1500324044069994, "grad_norm": 0.8368885517120361, "learning_rate": 9.814523227005662e-07, "loss": 0.0806, "step": 6635 }, { "epoch": 2.150356448476993, "grad_norm": 0.9209007620811462, "learning_rate": 9.807576430111975e-07, "loss": 0.0696, "step": 6636 }, { "epoch": 2.1506804925469862, "grad_norm": 0.8243295550346375, "learning_rate": 9.800631492623867e-07, "loss": 0.0712, "step": 6637 }, { "epoch": 2.15100453661698, "grad_norm": 0.8927811980247498, "learning_rate": 9.793688415391304e-07, "loss": 0.0759, "step": 6638 }, { "epoch": 2.1513285806869735, "grad_norm": 0.8509796261787415, "learning_rate": 9.786747199264088e-07, "loss": 0.076, "step": 6639 }, { "epoch": 2.151652624756967, "grad_norm": 0.8079892992973328, "learning_rate": 9.779807845091722e-07, "loss": 0.0723, "step": 6640 }, { "epoch": 2.1519766688269604, "grad_norm": 0.993626058101654, "learning_rate": 9.77287035372355e-07, "loss": 0.0811, "step": 6641 }, { "epoch": 2.152300712896954, "grad_norm": 0.8776823282241821, "learning_rate": 9.76593472600863e-07, "loss": 0.0759, "step": 6642 }, { "epoch": 2.1526247569669477, "grad_norm": 0.9392797946929932, "learning_rate": 9.75900096279582e-07, "loss": 0.0822, "step": 6643 }, { "epoch": 2.152948801036941, "grad_norm": 0.8545043468475342, "learning_rate": 9.752069064933758e-07, "loss": 0.0795, "step": 6644 }, { "epoch": 2.1532728451069345, "grad_norm": 0.9270698428153992, "learning_rate": 9.745139033270812e-07, "loss": 0.0839, "step": 6645 }, { "epoch": 2.153596889176928, "grad_norm": 0.896757185459137, "learning_rate": 9.738210868655187e-07, "loss": 0.0772, "step": 6646 }, { "epoch": 2.153920933246922, "grad_norm": 0.8868404626846313, "learning_rate": 9.73128457193479e-07, "loss": 0.0796, "step": 6647 }, { "epoch": 2.154244977316915, "grad_norm": 0.8353792428970337, "learning_rate": 9.724360143957367e-07, "loss": 0.0741, "step": 6648 }, { "epoch": 2.1545690213869086, "grad_norm": 0.9920429587364197, "learning_rate": 9.717437585570375e-07, "loss": 0.0852, "step": 6649 }, { "epoch": 2.154893065456902, "grad_norm": 0.8376013040542603, "learning_rate": 9.710516897621072e-07, "loss": 0.0719, "step": 6650 }, { "epoch": 2.1552171095268955, "grad_norm": 0.8551664352416992, "learning_rate": 9.703598080956488e-07, "loss": 0.0782, "step": 6651 }, { "epoch": 2.1555411535968894, "grad_norm": 0.8405658006668091, "learning_rate": 9.696681136423422e-07, "loss": 0.0779, "step": 6652 }, { "epoch": 2.155865197666883, "grad_norm": 0.9270169138908386, "learning_rate": 9.689766064868434e-07, "loss": 0.0805, "step": 6653 }, { "epoch": 2.156189241736876, "grad_norm": 0.8944373726844788, "learning_rate": 9.682852867137865e-07, "loss": 0.0836, "step": 6654 }, { "epoch": 2.1565132858068696, "grad_norm": 0.8218947052955627, "learning_rate": 9.675941544077833e-07, "loss": 0.0726, "step": 6655 }, { "epoch": 2.156837329876863, "grad_norm": 0.8564376831054688, "learning_rate": 9.6690320965342e-07, "loss": 0.0757, "step": 6656 }, { "epoch": 2.157161373946857, "grad_norm": 0.8816162347793579, "learning_rate": 9.66212452535262e-07, "loss": 0.079, "step": 6657 }, { "epoch": 2.1574854180168503, "grad_norm": 0.8961756825447083, "learning_rate": 9.655218831378518e-07, "loss": 0.0798, "step": 6658 }, { "epoch": 2.1578094620868438, "grad_norm": 0.8354091048240662, "learning_rate": 9.648315015457083e-07, "loss": 0.0713, "step": 6659 }, { "epoch": 2.158133506156837, "grad_norm": 0.8975980281829834, "learning_rate": 9.641413078433274e-07, "loss": 0.0757, "step": 6660 }, { "epoch": 2.158457550226831, "grad_norm": 0.8505983352661133, "learning_rate": 9.63451302115182e-07, "loss": 0.0717, "step": 6661 }, { "epoch": 2.1587815942968245, "grad_norm": 0.8575515747070312, "learning_rate": 9.627614844457222e-07, "loss": 0.0771, "step": 6662 }, { "epoch": 2.159105638366818, "grad_norm": 0.9212492108345032, "learning_rate": 9.620718549193764e-07, "loss": 0.0781, "step": 6663 }, { "epoch": 2.1594296824368113, "grad_norm": 0.8683608174324036, "learning_rate": 9.61382413620546e-07, "loss": 0.0807, "step": 6664 }, { "epoch": 2.1597537265068047, "grad_norm": 0.8901230096817017, "learning_rate": 9.606931606336134e-07, "loss": 0.0773, "step": 6665 }, { "epoch": 2.1600777705767986, "grad_norm": 0.8463370203971863, "learning_rate": 9.60004096042936e-07, "loss": 0.0687, "step": 6666 }, { "epoch": 2.160401814646792, "grad_norm": 0.8447796702384949, "learning_rate": 9.593152199328494e-07, "loss": 0.0747, "step": 6667 }, { "epoch": 2.1607258587167855, "grad_norm": 0.9252559542655945, "learning_rate": 9.586265323876653e-07, "loss": 0.0866, "step": 6668 }, { "epoch": 2.161049902786779, "grad_norm": 0.8271765112876892, "learning_rate": 9.579380334916704e-07, "loss": 0.0729, "step": 6669 }, { "epoch": 2.1613739468567728, "grad_norm": 0.9104677438735962, "learning_rate": 9.572497233291337e-07, "loss": 0.0845, "step": 6670 }, { "epoch": 2.161697990926766, "grad_norm": 0.8818185925483704, "learning_rate": 9.56561601984294e-07, "loss": 0.0786, "step": 6671 }, { "epoch": 2.1620220349967596, "grad_norm": 0.8366871476173401, "learning_rate": 9.558736695413745e-07, "loss": 0.0789, "step": 6672 }, { "epoch": 2.162346079066753, "grad_norm": 0.9518574476242065, "learning_rate": 9.551859260845686e-07, "loss": 0.0734, "step": 6673 }, { "epoch": 2.1626701231367464, "grad_norm": 0.9152442812919617, "learning_rate": 9.544983716980505e-07, "loss": 0.0801, "step": 6674 }, { "epoch": 2.1629941672067403, "grad_norm": 0.8730852603912354, "learning_rate": 9.5381100646597e-07, "loss": 0.0755, "step": 6675 }, { "epoch": 2.1633182112767337, "grad_norm": 0.8248627185821533, "learning_rate": 9.531238304724538e-07, "loss": 0.074, "step": 6676 }, { "epoch": 2.163642255346727, "grad_norm": 0.8867030739784241, "learning_rate": 9.524368438016071e-07, "loss": 0.0783, "step": 6677 }, { "epoch": 2.1639662994167206, "grad_norm": 0.8418394923210144, "learning_rate": 9.517500465375071e-07, "loss": 0.0737, "step": 6678 }, { "epoch": 2.164290343486714, "grad_norm": 0.9146268963813782, "learning_rate": 9.510634387642151e-07, "loss": 0.0805, "step": 6679 }, { "epoch": 2.164614387556708, "grad_norm": 0.907317578792572, "learning_rate": 9.503770205657625e-07, "loss": 0.0743, "step": 6680 }, { "epoch": 2.1649384316267013, "grad_norm": 0.9126441478729248, "learning_rate": 9.496907920261609e-07, "loss": 0.0778, "step": 6681 }, { "epoch": 2.1652624756966947, "grad_norm": 0.929598331451416, "learning_rate": 9.490047532293984e-07, "loss": 0.0799, "step": 6682 }, { "epoch": 2.165586519766688, "grad_norm": 0.8832554817199707, "learning_rate": 9.48318904259439e-07, "loss": 0.0764, "step": 6683 }, { "epoch": 2.1659105638366816, "grad_norm": 0.8561263084411621, "learning_rate": 9.476332452002245e-07, "loss": 0.0788, "step": 6684 }, { "epoch": 2.1662346079066754, "grad_norm": 0.8597761392593384, "learning_rate": 9.469477761356727e-07, "loss": 0.0754, "step": 6685 }, { "epoch": 2.166558651976669, "grad_norm": 0.9028628468513489, "learning_rate": 9.462624971496793e-07, "loss": 0.0819, "step": 6686 }, { "epoch": 2.1668826960466623, "grad_norm": 0.89371258020401, "learning_rate": 9.455774083261138e-07, "loss": 0.0739, "step": 6687 }, { "epoch": 2.1672067401166557, "grad_norm": 0.806632399559021, "learning_rate": 9.448925097488257e-07, "loss": 0.0744, "step": 6688 }, { "epoch": 2.1675307841866496, "grad_norm": 0.9763009548187256, "learning_rate": 9.442078015016398e-07, "loss": 0.086, "step": 6689 }, { "epoch": 2.167854828256643, "grad_norm": 0.9614084959030151, "learning_rate": 9.435232836683577e-07, "loss": 0.089, "step": 6690 }, { "epoch": 2.1681788723266364, "grad_norm": 0.9859657287597656, "learning_rate": 9.42838956332758e-07, "loss": 0.0826, "step": 6691 }, { "epoch": 2.16850291639663, "grad_norm": 0.8706166744232178, "learning_rate": 9.421548195785962e-07, "loss": 0.0781, "step": 6692 }, { "epoch": 2.1688269604666233, "grad_norm": 0.928634524345398, "learning_rate": 9.414708734896019e-07, "loss": 0.0798, "step": 6693 }, { "epoch": 2.169151004536617, "grad_norm": 0.9566361904144287, "learning_rate": 9.407871181494865e-07, "loss": 0.0887, "step": 6694 }, { "epoch": 2.1694750486066106, "grad_norm": 0.8379427790641785, "learning_rate": 9.401035536419326e-07, "loss": 0.0746, "step": 6695 }, { "epoch": 2.169799092676604, "grad_norm": 0.8841421008110046, "learning_rate": 9.394201800506028e-07, "loss": 0.0803, "step": 6696 }, { "epoch": 2.1701231367465974, "grad_norm": 0.8510726094245911, "learning_rate": 9.387369974591353e-07, "loss": 0.0761, "step": 6697 }, { "epoch": 2.1704471808165913, "grad_norm": 0.874298632144928, "learning_rate": 9.380540059511453e-07, "loss": 0.0805, "step": 6698 }, { "epoch": 2.1707712248865847, "grad_norm": 1.0128662586212158, "learning_rate": 9.373712056102249e-07, "loss": 0.0888, "step": 6699 }, { "epoch": 2.171095268956578, "grad_norm": 0.842639148235321, "learning_rate": 9.366885965199398e-07, "loss": 0.0772, "step": 6700 }, { "epoch": 2.1714193130265715, "grad_norm": 0.8456000089645386, "learning_rate": 9.360061787638383e-07, "loss": 0.0776, "step": 6701 }, { "epoch": 2.171743357096565, "grad_norm": 0.8387840390205383, "learning_rate": 9.353239524254382e-07, "loss": 0.0755, "step": 6702 }, { "epoch": 2.172067401166559, "grad_norm": 0.8434891700744629, "learning_rate": 9.346419175882407e-07, "loss": 0.0772, "step": 6703 }, { "epoch": 2.1723914452365523, "grad_norm": 0.8305788040161133, "learning_rate": 9.339600743357177e-07, "loss": 0.0731, "step": 6704 }, { "epoch": 2.1727154893065457, "grad_norm": 0.8565192222595215, "learning_rate": 9.332784227513212e-07, "loss": 0.0745, "step": 6705 }, { "epoch": 2.173039533376539, "grad_norm": 0.8777684569358826, "learning_rate": 9.325969629184789e-07, "loss": 0.0762, "step": 6706 }, { "epoch": 2.1733635774465325, "grad_norm": 0.8476821184158325, "learning_rate": 9.319156949205943e-07, "loss": 0.0786, "step": 6707 }, { "epoch": 2.1736876215165264, "grad_norm": 0.9548823833465576, "learning_rate": 9.312346188410496e-07, "loss": 0.0743, "step": 6708 }, { "epoch": 2.17401166558652, "grad_norm": 0.9136479496955872, "learning_rate": 9.30553734763199e-07, "loss": 0.0792, "step": 6709 }, { "epoch": 2.1743357096565132, "grad_norm": 0.896507203578949, "learning_rate": 9.298730427703795e-07, "loss": 0.0808, "step": 6710 }, { "epoch": 2.1746597537265067, "grad_norm": 0.9299559593200684, "learning_rate": 9.291925429458987e-07, "loss": 0.0768, "step": 6711 }, { "epoch": 2.1749837977965005, "grad_norm": 0.9667069315910339, "learning_rate": 9.285122353730439e-07, "loss": 0.0793, "step": 6712 }, { "epoch": 2.175307841866494, "grad_norm": 0.8196617364883423, "learning_rate": 9.278321201350784e-07, "loss": 0.0742, "step": 6713 }, { "epoch": 2.1756318859364874, "grad_norm": 0.9729592204093933, "learning_rate": 9.271521973152418e-07, "loss": 0.0806, "step": 6714 }, { "epoch": 2.175955930006481, "grad_norm": 1.6799136400222778, "learning_rate": 9.264724669967498e-07, "loss": 0.1204, "step": 6715 }, { "epoch": 2.176279974076474, "grad_norm": 0.9227179288864136, "learning_rate": 9.257929292627956e-07, "loss": 0.0829, "step": 6716 }, { "epoch": 2.176604018146468, "grad_norm": 0.877239465713501, "learning_rate": 9.251135841965467e-07, "loss": 0.0788, "step": 6717 }, { "epoch": 2.1769280622164615, "grad_norm": 0.776756227016449, "learning_rate": 9.244344318811491e-07, "loss": 0.0711, "step": 6718 }, { "epoch": 2.177252106286455, "grad_norm": 0.9062384963035583, "learning_rate": 9.237554723997242e-07, "loss": 0.079, "step": 6719 }, { "epoch": 2.1775761503564484, "grad_norm": 0.8527910113334656, "learning_rate": 9.230767058353701e-07, "loss": 0.0787, "step": 6720 }, { "epoch": 2.1779001944264422, "grad_norm": 0.8512275218963623, "learning_rate": 9.223981322711617e-07, "loss": 0.0759, "step": 6721 }, { "epoch": 2.1782242384964356, "grad_norm": 0.8467569351196289, "learning_rate": 9.217197517901494e-07, "loss": 0.0738, "step": 6722 }, { "epoch": 2.178548282566429, "grad_norm": 0.8667898178100586, "learning_rate": 9.210415644753615e-07, "loss": 0.0753, "step": 6723 }, { "epoch": 2.1788723266364225, "grad_norm": 0.9238751530647278, "learning_rate": 9.203635704097988e-07, "loss": 0.0782, "step": 6724 }, { "epoch": 2.179196370706416, "grad_norm": 0.8899182677268982, "learning_rate": 9.196857696764446e-07, "loss": 0.0813, "step": 6725 }, { "epoch": 2.17952041477641, "grad_norm": 0.8934040665626526, "learning_rate": 9.190081623582531e-07, "loss": 0.0813, "step": 6726 }, { "epoch": 2.179844458846403, "grad_norm": 0.8986812829971313, "learning_rate": 9.183307485381571e-07, "loss": 0.0833, "step": 6727 }, { "epoch": 2.1801685029163966, "grad_norm": 0.9125000834465027, "learning_rate": 9.176535282990656e-07, "loss": 0.0782, "step": 6728 }, { "epoch": 2.18049254698639, "grad_norm": 0.8446953296661377, "learning_rate": 9.169765017238641e-07, "loss": 0.0761, "step": 6729 }, { "epoch": 2.1808165910563835, "grad_norm": 0.9098793268203735, "learning_rate": 9.162996688954148e-07, "loss": 0.0786, "step": 6730 }, { "epoch": 2.1811406351263773, "grad_norm": 0.8848891258239746, "learning_rate": 9.156230298965529e-07, "loss": 0.0786, "step": 6731 }, { "epoch": 2.1814646791963708, "grad_norm": 0.920576274394989, "learning_rate": 9.149465848100958e-07, "loss": 0.0821, "step": 6732 }, { "epoch": 2.181788723266364, "grad_norm": 0.8231469988822937, "learning_rate": 9.142703337188305e-07, "loss": 0.0769, "step": 6733 }, { "epoch": 2.1821127673363576, "grad_norm": 0.8333210349082947, "learning_rate": 9.135942767055272e-07, "loss": 0.0774, "step": 6734 }, { "epoch": 2.182436811406351, "grad_norm": 0.885479211807251, "learning_rate": 9.129184138529259e-07, "loss": 0.077, "step": 6735 }, { "epoch": 2.182760855476345, "grad_norm": 0.940333366394043, "learning_rate": 9.122427452437465e-07, "loss": 0.0842, "step": 6736 }, { "epoch": 2.1830848995463383, "grad_norm": 0.9102086424827576, "learning_rate": 9.115672709606846e-07, "loss": 0.0846, "step": 6737 }, { "epoch": 2.1834089436163318, "grad_norm": 0.9427852034568787, "learning_rate": 9.108919910864111e-07, "loss": 0.0812, "step": 6738 }, { "epoch": 2.183732987686325, "grad_norm": 0.8577998876571655, "learning_rate": 9.102169057035753e-07, "loss": 0.0743, "step": 6739 }, { "epoch": 2.184057031756319, "grad_norm": 0.8352759480476379, "learning_rate": 9.095420148947984e-07, "loss": 0.0749, "step": 6740 }, { "epoch": 2.1843810758263125, "grad_norm": 0.8783653378486633, "learning_rate": 9.088673187426836e-07, "loss": 0.0756, "step": 6741 }, { "epoch": 2.184705119896306, "grad_norm": 0.9297550916671753, "learning_rate": 9.081928173298046e-07, "loss": 0.0805, "step": 6742 }, { "epoch": 2.1850291639662993, "grad_norm": 0.9048861265182495, "learning_rate": 9.075185107387149e-07, "loss": 0.0832, "step": 6743 }, { "epoch": 2.1853532080362927, "grad_norm": 0.869208574295044, "learning_rate": 9.068443990519432e-07, "loss": 0.0796, "step": 6744 }, { "epoch": 2.1856772521062866, "grad_norm": 0.8934289813041687, "learning_rate": 9.061704823519943e-07, "loss": 0.0795, "step": 6745 }, { "epoch": 2.18600129617628, "grad_norm": 0.856629490852356, "learning_rate": 9.054967607213486e-07, "loss": 0.0773, "step": 6746 }, { "epoch": 2.1863253402462735, "grad_norm": 0.9060944318771362, "learning_rate": 9.048232342424642e-07, "loss": 0.0839, "step": 6747 }, { "epoch": 2.186649384316267, "grad_norm": 0.9247906804084778, "learning_rate": 9.04149902997773e-07, "loss": 0.0853, "step": 6748 }, { "epoch": 2.1869734283862607, "grad_norm": 0.8763548135757446, "learning_rate": 9.034767670696842e-07, "loss": 0.0825, "step": 6749 }, { "epoch": 2.187297472456254, "grad_norm": 0.9106340408325195, "learning_rate": 9.028038265405836e-07, "loss": 0.0807, "step": 6750 }, { "epoch": 2.1876215165262476, "grad_norm": 0.8381937742233276, "learning_rate": 9.021310814928328e-07, "loss": 0.0751, "step": 6751 }, { "epoch": 2.187945560596241, "grad_norm": 0.8870315551757812, "learning_rate": 9.01458532008769e-07, "loss": 0.0776, "step": 6752 }, { "epoch": 2.1882696046662344, "grad_norm": 0.8370722532272339, "learning_rate": 9.007861781707056e-07, "loss": 0.0757, "step": 6753 }, { "epoch": 2.1885936487362283, "grad_norm": 0.9604190587997437, "learning_rate": 9.001140200609334e-07, "loss": 0.0829, "step": 6754 }, { "epoch": 2.1889176928062217, "grad_norm": 0.9112382531166077, "learning_rate": 8.994420577617155e-07, "loss": 0.0829, "step": 6755 }, { "epoch": 2.189241736876215, "grad_norm": 0.8749806880950928, "learning_rate": 8.987702913552964e-07, "loss": 0.0755, "step": 6756 }, { "epoch": 2.1895657809462086, "grad_norm": 0.8946632146835327, "learning_rate": 8.980987209238922e-07, "loss": 0.0771, "step": 6757 }, { "epoch": 2.189889825016202, "grad_norm": 0.8695065975189209, "learning_rate": 8.974273465496966e-07, "loss": 0.0739, "step": 6758 }, { "epoch": 2.190213869086196, "grad_norm": 0.7892318964004517, "learning_rate": 8.967561683148798e-07, "loss": 0.0708, "step": 6759 }, { "epoch": 2.1905379131561893, "grad_norm": 0.8877748250961304, "learning_rate": 8.960851863015874e-07, "loss": 0.0767, "step": 6760 }, { "epoch": 2.1908619572261827, "grad_norm": 0.9287748336791992, "learning_rate": 8.954144005919422e-07, "loss": 0.0806, "step": 6761 }, { "epoch": 2.191186001296176, "grad_norm": 0.8311761617660522, "learning_rate": 8.947438112680387e-07, "loss": 0.0729, "step": 6762 }, { "epoch": 2.19151004536617, "grad_norm": 0.8091285824775696, "learning_rate": 8.940734184119542e-07, "loss": 0.0716, "step": 6763 }, { "epoch": 2.1918340894361634, "grad_norm": 0.9159359335899353, "learning_rate": 8.934032221057354e-07, "loss": 0.0796, "step": 6764 }, { "epoch": 2.192158133506157, "grad_norm": 0.8579771518707275, "learning_rate": 8.927332224314106e-07, "loss": 0.0765, "step": 6765 }, { "epoch": 2.1924821775761503, "grad_norm": 0.9172267317771912, "learning_rate": 8.92063419470979e-07, "loss": 0.0767, "step": 6766 }, { "epoch": 2.1928062216461437, "grad_norm": 0.8143701553344727, "learning_rate": 8.91393813306419e-07, "loss": 0.0715, "step": 6767 }, { "epoch": 2.1931302657161376, "grad_norm": 0.8438109159469604, "learning_rate": 8.907244040196836e-07, "loss": 0.0751, "step": 6768 }, { "epoch": 2.193454309786131, "grad_norm": 0.960784912109375, "learning_rate": 8.900551916927022e-07, "loss": 0.0819, "step": 6769 }, { "epoch": 2.1937783538561244, "grad_norm": 0.8121520280838013, "learning_rate": 8.893861764073808e-07, "loss": 0.0717, "step": 6770 }, { "epoch": 2.194102397926118, "grad_norm": 0.8404099941253662, "learning_rate": 8.887173582455985e-07, "loss": 0.0752, "step": 6771 }, { "epoch": 2.1944264419961117, "grad_norm": 0.898887574672699, "learning_rate": 8.88048737289213e-07, "loss": 0.0817, "step": 6772 }, { "epoch": 2.194750486066105, "grad_norm": 0.9333533644676208, "learning_rate": 8.873803136200574e-07, "loss": 0.0818, "step": 6773 }, { "epoch": 2.1950745301360985, "grad_norm": 0.8780669569969177, "learning_rate": 8.8671208731994e-07, "loss": 0.0763, "step": 6774 }, { "epoch": 2.195398574206092, "grad_norm": 0.9539688229560852, "learning_rate": 8.860440584706451e-07, "loss": 0.0791, "step": 6775 }, { "epoch": 2.1957226182760854, "grad_norm": 0.9045437574386597, "learning_rate": 8.853762271539332e-07, "loss": 0.0721, "step": 6776 }, { "epoch": 2.1960466623460793, "grad_norm": 0.8967257142066956, "learning_rate": 8.847085934515404e-07, "loss": 0.0753, "step": 6777 }, { "epoch": 2.1963707064160727, "grad_norm": 0.8401694297790527, "learning_rate": 8.840411574451793e-07, "loss": 0.0737, "step": 6778 }, { "epoch": 2.196694750486066, "grad_norm": 0.7956202030181885, "learning_rate": 8.833739192165352e-07, "loss": 0.0689, "step": 6779 }, { "epoch": 2.1970187945560595, "grad_norm": 0.8320735096931458, "learning_rate": 8.827068788472751e-07, "loss": 0.0773, "step": 6780 }, { "epoch": 2.197342838626053, "grad_norm": 0.8158857822418213, "learning_rate": 8.820400364190351e-07, "loss": 0.0726, "step": 6781 }, { "epoch": 2.197666882696047, "grad_norm": 0.9041940569877625, "learning_rate": 8.813733920134321e-07, "loss": 0.08, "step": 6782 }, { "epoch": 2.1979909267660402, "grad_norm": 0.8967098593711853, "learning_rate": 8.807069457120571e-07, "loss": 0.0809, "step": 6783 }, { "epoch": 2.1983149708360337, "grad_norm": 0.908525288105011, "learning_rate": 8.80040697596474e-07, "loss": 0.079, "step": 6784 }, { "epoch": 2.198639014906027, "grad_norm": 0.8507987856864929, "learning_rate": 8.79374647748229e-07, "loss": 0.0752, "step": 6785 }, { "epoch": 2.1989630589760205, "grad_norm": 0.9298058152198792, "learning_rate": 8.787087962488367e-07, "loss": 0.0854, "step": 6786 }, { "epoch": 2.1992871030460144, "grad_norm": 0.9183042645454407, "learning_rate": 8.780431431797937e-07, "loss": 0.0795, "step": 6787 }, { "epoch": 2.199611147116008, "grad_norm": 0.8680285215377808, "learning_rate": 8.773776886225668e-07, "loss": 0.0757, "step": 6788 }, { "epoch": 2.1999351911860012, "grad_norm": 0.9786904454231262, "learning_rate": 8.767124326586043e-07, "loss": 0.0822, "step": 6789 }, { "epoch": 2.2002592352559946, "grad_norm": 0.8955831527709961, "learning_rate": 8.760473753693243e-07, "loss": 0.0764, "step": 6790 }, { "epoch": 2.2005832793259885, "grad_norm": 0.8659034967422485, "learning_rate": 8.753825168361249e-07, "loss": 0.0783, "step": 6791 }, { "epoch": 2.200907323395982, "grad_norm": 0.8939731121063232, "learning_rate": 8.747178571403786e-07, "loss": 0.0758, "step": 6792 }, { "epoch": 2.2012313674659754, "grad_norm": 0.8434916734695435, "learning_rate": 8.74053396363431e-07, "loss": 0.0772, "step": 6793 }, { "epoch": 2.201555411535969, "grad_norm": 0.8473688960075378, "learning_rate": 8.733891345866088e-07, "loss": 0.0751, "step": 6794 }, { "epoch": 2.201879455605962, "grad_norm": 0.9216592907905579, "learning_rate": 8.727250718912089e-07, "loss": 0.0774, "step": 6795 }, { "epoch": 2.202203499675956, "grad_norm": 0.9039542078971863, "learning_rate": 8.72061208358507e-07, "loss": 0.0773, "step": 6796 }, { "epoch": 2.2025275437459495, "grad_norm": 0.8723130226135254, "learning_rate": 8.713975440697536e-07, "loss": 0.0784, "step": 6797 }, { "epoch": 2.202851587815943, "grad_norm": 0.9859674572944641, "learning_rate": 8.707340791061747e-07, "loss": 0.0848, "step": 6798 }, { "epoch": 2.2031756318859363, "grad_norm": 0.9083540439605713, "learning_rate": 8.700708135489722e-07, "loss": 0.0791, "step": 6799 }, { "epoch": 2.20349967595593, "grad_norm": 0.8758490085601807, "learning_rate": 8.694077474793227e-07, "loss": 0.0807, "step": 6800 }, { "epoch": 2.2038237200259236, "grad_norm": 0.9906332492828369, "learning_rate": 8.687448809783799e-07, "loss": 0.0829, "step": 6801 }, { "epoch": 2.204147764095917, "grad_norm": 0.8625491857528687, "learning_rate": 8.680822141272727e-07, "loss": 0.0741, "step": 6802 }, { "epoch": 2.2044718081659105, "grad_norm": 0.9207634925842285, "learning_rate": 8.674197470071033e-07, "loss": 0.0819, "step": 6803 }, { "epoch": 2.204795852235904, "grad_norm": 0.8922463655471802, "learning_rate": 8.667574796989526e-07, "loss": 0.0779, "step": 6804 }, { "epoch": 2.2051198963058978, "grad_norm": 0.8703399896621704, "learning_rate": 8.66095412283875e-07, "loss": 0.0763, "step": 6805 }, { "epoch": 2.205443940375891, "grad_norm": 0.8407284617424011, "learning_rate": 8.654335448429016e-07, "loss": 0.0749, "step": 6806 }, { "epoch": 2.2057679844458846, "grad_norm": 0.8342950344085693, "learning_rate": 8.647718774570385e-07, "loss": 0.0733, "step": 6807 }, { "epoch": 2.206092028515878, "grad_norm": 0.8918887376785278, "learning_rate": 8.641104102072676e-07, "loss": 0.0794, "step": 6808 }, { "epoch": 2.2064160725858715, "grad_norm": 0.8364049196243286, "learning_rate": 8.634491431745465e-07, "loss": 0.0695, "step": 6809 }, { "epoch": 2.2067401166558653, "grad_norm": 0.8273484110832214, "learning_rate": 8.627880764398055e-07, "loss": 0.0723, "step": 6810 }, { "epoch": 2.2070641607258588, "grad_norm": 0.8052671551704407, "learning_rate": 8.621272100839562e-07, "loss": 0.0719, "step": 6811 }, { "epoch": 2.207388204795852, "grad_norm": 1.0396753549575806, "learning_rate": 8.614665441878798e-07, "loss": 0.0945, "step": 6812 }, { "epoch": 2.2077122488658456, "grad_norm": 0.8524155616760254, "learning_rate": 8.60806078832436e-07, "loss": 0.076, "step": 6813 }, { "epoch": 2.2080362929358395, "grad_norm": 0.7909897565841675, "learning_rate": 8.601458140984606e-07, "loss": 0.0706, "step": 6814 }, { "epoch": 2.208360337005833, "grad_norm": 0.897506833076477, "learning_rate": 8.594857500667606e-07, "loss": 0.079, "step": 6815 }, { "epoch": 2.2086843810758263, "grad_norm": 0.8871533274650574, "learning_rate": 8.588258868181251e-07, "loss": 0.0832, "step": 6816 }, { "epoch": 2.2090084251458197, "grad_norm": 0.8382236957550049, "learning_rate": 8.581662244333116e-07, "loss": 0.0723, "step": 6817 }, { "epoch": 2.209332469215813, "grad_norm": 0.85988450050354, "learning_rate": 8.575067629930601e-07, "loss": 0.075, "step": 6818 }, { "epoch": 2.209656513285807, "grad_norm": 0.9264865517616272, "learning_rate": 8.568475025780781e-07, "loss": 0.081, "step": 6819 }, { "epoch": 2.2099805573558005, "grad_norm": 0.9004557728767395, "learning_rate": 8.561884432690568e-07, "loss": 0.0802, "step": 6820 }, { "epoch": 2.210304601425794, "grad_norm": 0.8373770713806152, "learning_rate": 8.555295851466556e-07, "loss": 0.0776, "step": 6821 }, { "epoch": 2.2106286454957873, "grad_norm": 1.0056724548339844, "learning_rate": 8.548709282915135e-07, "loss": 0.0845, "step": 6822 }, { "epoch": 2.210952689565781, "grad_norm": 0.7826520800590515, "learning_rate": 8.542124727842438e-07, "loss": 0.0699, "step": 6823 }, { "epoch": 2.2112767336357746, "grad_norm": 0.9300687909126282, "learning_rate": 8.535542187054352e-07, "loss": 0.0808, "step": 6824 }, { "epoch": 2.211600777705768, "grad_norm": 0.8985550999641418, "learning_rate": 8.528961661356519e-07, "loss": 0.0791, "step": 6825 }, { "epoch": 2.2119248217757614, "grad_norm": 0.8403812646865845, "learning_rate": 8.52238315155432e-07, "loss": 0.0727, "step": 6826 }, { "epoch": 2.212248865845755, "grad_norm": 0.8673394918441772, "learning_rate": 8.515806658452908e-07, "loss": 0.0763, "step": 6827 }, { "epoch": 2.2125729099157487, "grad_norm": 0.9250020980834961, "learning_rate": 8.50923218285718e-07, "loss": 0.0803, "step": 6828 }, { "epoch": 2.212896953985742, "grad_norm": 0.8896529674530029, "learning_rate": 8.502659725571791e-07, "loss": 0.0769, "step": 6829 }, { "epoch": 2.2132209980557356, "grad_norm": 0.903784990310669, "learning_rate": 8.496089287401144e-07, "loss": 0.0782, "step": 6830 }, { "epoch": 2.213545042125729, "grad_norm": 0.8488913178443909, "learning_rate": 8.489520869149398e-07, "loss": 0.0757, "step": 6831 }, { "epoch": 2.2138690861957224, "grad_norm": 0.8580282926559448, "learning_rate": 8.482954471620464e-07, "loss": 0.0709, "step": 6832 }, { "epoch": 2.2141931302657163, "grad_norm": 0.9183516502380371, "learning_rate": 8.476390095618015e-07, "loss": 0.0809, "step": 6833 }, { "epoch": 2.2145171743357097, "grad_norm": 0.878735363483429, "learning_rate": 8.469827741945447e-07, "loss": 0.0762, "step": 6834 }, { "epoch": 2.214841218405703, "grad_norm": 0.88670814037323, "learning_rate": 8.46326741140594e-07, "loss": 0.0742, "step": 6835 }, { "epoch": 2.2151652624756966, "grad_norm": 0.8494538068771362, "learning_rate": 8.456709104802413e-07, "loss": 0.0772, "step": 6836 }, { "epoch": 2.21548930654569, "grad_norm": 0.8244561553001404, "learning_rate": 8.450152822937541e-07, "loss": 0.074, "step": 6837 }, { "epoch": 2.215813350615684, "grad_norm": 0.9178814888000488, "learning_rate": 8.443598566613756e-07, "loss": 0.0767, "step": 6838 }, { "epoch": 2.2161373946856773, "grad_norm": 0.8819333910942078, "learning_rate": 8.437046336633212e-07, "loss": 0.0808, "step": 6839 }, { "epoch": 2.2164614387556707, "grad_norm": 0.8642560839653015, "learning_rate": 8.430496133797872e-07, "loss": 0.0767, "step": 6840 }, { "epoch": 2.216785482825664, "grad_norm": 0.8757349252700806, "learning_rate": 8.423947958909381e-07, "loss": 0.0792, "step": 6841 }, { "epoch": 2.217109526895658, "grad_norm": 0.939451277256012, "learning_rate": 8.41740181276921e-07, "loss": 0.0846, "step": 6842 }, { "epoch": 2.2174335709656514, "grad_norm": 0.9106187224388123, "learning_rate": 8.410857696178518e-07, "loss": 0.0788, "step": 6843 }, { "epoch": 2.217757615035645, "grad_norm": 0.8261914849281311, "learning_rate": 8.404315609938246e-07, "loss": 0.0746, "step": 6844 }, { "epoch": 2.2180816591056383, "grad_norm": 0.9008349180221558, "learning_rate": 8.397775554849086e-07, "loss": 0.081, "step": 6845 }, { "epoch": 2.2184057031756317, "grad_norm": 0.9082056283950806, "learning_rate": 8.391237531711474e-07, "loss": 0.0825, "step": 6846 }, { "epoch": 2.2187297472456255, "grad_norm": 0.9769788980484009, "learning_rate": 8.384701541325612e-07, "loss": 0.0867, "step": 6847 }, { "epoch": 2.219053791315619, "grad_norm": 0.8257579803466797, "learning_rate": 8.378167584491417e-07, "loss": 0.0738, "step": 6848 }, { "epoch": 2.2193778353856124, "grad_norm": 0.9038711190223694, "learning_rate": 8.371635662008615e-07, "loss": 0.0738, "step": 6849 }, { "epoch": 2.219701879455606, "grad_norm": 0.9552277326583862, "learning_rate": 8.365105774676624e-07, "loss": 0.078, "step": 6850 }, { "epoch": 2.2200259235255997, "grad_norm": 0.9162812232971191, "learning_rate": 8.358577923294647e-07, "loss": 0.0839, "step": 6851 }, { "epoch": 2.220349967595593, "grad_norm": 0.8380052447319031, "learning_rate": 8.352052108661634e-07, "loss": 0.0762, "step": 6852 }, { "epoch": 2.2206740116655865, "grad_norm": 0.8755866885185242, "learning_rate": 8.345528331576275e-07, "loss": 0.0779, "step": 6853 }, { "epoch": 2.22099805573558, "grad_norm": 0.9222939014434814, "learning_rate": 8.339006592837021e-07, "loss": 0.0866, "step": 6854 }, { "epoch": 2.2213220998055734, "grad_norm": 0.853366494178772, "learning_rate": 8.33248689324207e-07, "loss": 0.0782, "step": 6855 }, { "epoch": 2.2216461438755672, "grad_norm": 0.8806123733520508, "learning_rate": 8.325969233589376e-07, "loss": 0.076, "step": 6856 }, { "epoch": 2.2219701879455607, "grad_norm": 0.8320830464363098, "learning_rate": 8.319453614676626e-07, "loss": 0.0744, "step": 6857 }, { "epoch": 2.222294232015554, "grad_norm": 0.9103955626487732, "learning_rate": 8.31294003730127e-07, "loss": 0.0827, "step": 6858 }, { "epoch": 2.2226182760855475, "grad_norm": 0.8920407891273499, "learning_rate": 8.306428502260511e-07, "loss": 0.0771, "step": 6859 }, { "epoch": 2.222942320155541, "grad_norm": 0.8198265433311462, "learning_rate": 8.299919010351296e-07, "loss": 0.0722, "step": 6860 }, { "epoch": 2.223266364225535, "grad_norm": 0.8588047027587891, "learning_rate": 8.293411562370327e-07, "loss": 0.0776, "step": 6861 }, { "epoch": 2.2235904082955282, "grad_norm": 0.9530218243598938, "learning_rate": 8.286906159114058e-07, "loss": 0.0795, "step": 6862 }, { "epoch": 2.2239144523655217, "grad_norm": 0.9290475845336914, "learning_rate": 8.280402801378662e-07, "loss": 0.0838, "step": 6863 }, { "epoch": 2.224238496435515, "grad_norm": 0.9210637807846069, "learning_rate": 8.27390148996012e-07, "loss": 0.0817, "step": 6864 }, { "epoch": 2.224562540505509, "grad_norm": 0.8271251320838928, "learning_rate": 8.267402225654112e-07, "loss": 0.0722, "step": 6865 }, { "epoch": 2.2248865845755024, "grad_norm": 0.9257084131240845, "learning_rate": 8.260905009256081e-07, "loss": 0.0842, "step": 6866 }, { "epoch": 2.225210628645496, "grad_norm": 0.8730802536010742, "learning_rate": 8.254409841561234e-07, "loss": 0.0773, "step": 6867 }, { "epoch": 2.225534672715489, "grad_norm": 0.9253896474838257, "learning_rate": 8.24791672336451e-07, "loss": 0.0816, "step": 6868 }, { "epoch": 2.2258587167854826, "grad_norm": 0.8534382581710815, "learning_rate": 8.241425655460616e-07, "loss": 0.0757, "step": 6869 }, { "epoch": 2.2261827608554765, "grad_norm": 0.8908569812774658, "learning_rate": 8.23493663864397e-07, "loss": 0.0805, "step": 6870 }, { "epoch": 2.22650680492547, "grad_norm": 0.8594645261764526, "learning_rate": 8.228449673708797e-07, "loss": 0.0773, "step": 6871 }, { "epoch": 2.2268308489954634, "grad_norm": 0.8712368607521057, "learning_rate": 8.221964761449008e-07, "loss": 0.0772, "step": 6872 }, { "epoch": 2.2271548930654568, "grad_norm": 0.8286442160606384, "learning_rate": 8.215481902658323e-07, "loss": 0.0725, "step": 6873 }, { "epoch": 2.2274789371354506, "grad_norm": 0.8417304754257202, "learning_rate": 8.209001098130157e-07, "loss": 0.0778, "step": 6874 }, { "epoch": 2.227802981205444, "grad_norm": 0.8697782754898071, "learning_rate": 8.20252234865771e-07, "loss": 0.0789, "step": 6875 }, { "epoch": 2.2281270252754375, "grad_norm": 0.833076000213623, "learning_rate": 8.196045655033913e-07, "loss": 0.0754, "step": 6876 }, { "epoch": 2.228451069345431, "grad_norm": 0.8312671780586243, "learning_rate": 8.189571018051454e-07, "loss": 0.0749, "step": 6877 }, { "epoch": 2.2287751134154243, "grad_norm": 0.9436811208724976, "learning_rate": 8.183098438502771e-07, "loss": 0.0847, "step": 6878 }, { "epoch": 2.229099157485418, "grad_norm": 0.898650586605072, "learning_rate": 8.176627917180025e-07, "loss": 0.0808, "step": 6879 }, { "epoch": 2.2294232015554116, "grad_norm": 0.842548131942749, "learning_rate": 8.170159454875173e-07, "loss": 0.077, "step": 6880 }, { "epoch": 2.229747245625405, "grad_norm": 0.9235697984695435, "learning_rate": 8.163693052379873e-07, "loss": 0.0834, "step": 6881 }, { "epoch": 2.2300712896953985, "grad_norm": 0.8513306975364685, "learning_rate": 8.157228710485554e-07, "loss": 0.0716, "step": 6882 }, { "epoch": 2.230395333765392, "grad_norm": 0.8318166136741638, "learning_rate": 8.15076642998339e-07, "loss": 0.0729, "step": 6883 }, { "epoch": 2.2307193778353858, "grad_norm": 0.9229317903518677, "learning_rate": 8.144306211664302e-07, "loss": 0.0802, "step": 6884 }, { "epoch": 2.231043421905379, "grad_norm": 0.945536732673645, "learning_rate": 8.137848056318959e-07, "loss": 0.0848, "step": 6885 }, { "epoch": 2.2313674659753726, "grad_norm": 0.938944399356842, "learning_rate": 8.131391964737773e-07, "loss": 0.0819, "step": 6886 }, { "epoch": 2.231691510045366, "grad_norm": 0.9072917103767395, "learning_rate": 8.12493793771092e-07, "loss": 0.0767, "step": 6887 }, { "epoch": 2.2320155541153595, "grad_norm": 0.8586976528167725, "learning_rate": 8.118485976028292e-07, "loss": 0.0773, "step": 6888 }, { "epoch": 2.2323395981853533, "grad_norm": 0.918808102607727, "learning_rate": 8.112036080479554e-07, "loss": 0.0816, "step": 6889 }, { "epoch": 2.2326636422553467, "grad_norm": 0.8832508325576782, "learning_rate": 8.10558825185411e-07, "loss": 0.0805, "step": 6890 }, { "epoch": 2.23298768632534, "grad_norm": 0.9577889442443848, "learning_rate": 8.099142490941117e-07, "loss": 0.0767, "step": 6891 }, { "epoch": 2.2333117303953336, "grad_norm": 0.8775278925895691, "learning_rate": 8.09269879852947e-07, "loss": 0.0769, "step": 6892 }, { "epoch": 2.2336357744653275, "grad_norm": 0.8546897172927856, "learning_rate": 8.086257175407819e-07, "loss": 0.0775, "step": 6893 }, { "epoch": 2.233959818535321, "grad_norm": 0.9491926431655884, "learning_rate": 8.079817622364539e-07, "loss": 0.0835, "step": 6894 }, { "epoch": 2.2342838626053143, "grad_norm": 0.9256629943847656, "learning_rate": 8.073380140187795e-07, "loss": 0.0813, "step": 6895 }, { "epoch": 2.2346079066753077, "grad_norm": 0.8432714939117432, "learning_rate": 8.066944729665455e-07, "loss": 0.0735, "step": 6896 }, { "epoch": 2.234931950745301, "grad_norm": 0.9228755235671997, "learning_rate": 8.060511391585152e-07, "loss": 0.0847, "step": 6897 }, { "epoch": 2.235255994815295, "grad_norm": 0.9158481955528259, "learning_rate": 8.054080126734271e-07, "loss": 0.0815, "step": 6898 }, { "epoch": 2.2355800388852884, "grad_norm": 0.9194527864456177, "learning_rate": 8.047650935899931e-07, "loss": 0.0792, "step": 6899 }, { "epoch": 2.235904082955282, "grad_norm": 0.9050752520561218, "learning_rate": 8.041223819869015e-07, "loss": 0.0754, "step": 6900 }, { "epoch": 2.2362281270252753, "grad_norm": 0.8906585574150085, "learning_rate": 8.034798779428113e-07, "loss": 0.0802, "step": 6901 }, { "epoch": 2.236552171095269, "grad_norm": 0.9296242594718933, "learning_rate": 8.02837581536362e-07, "loss": 0.0753, "step": 6902 }, { "epoch": 2.2368762151652626, "grad_norm": 0.8792465329170227, "learning_rate": 8.021954928461611e-07, "loss": 0.0787, "step": 6903 }, { "epoch": 2.237200259235256, "grad_norm": 0.9047431349754333, "learning_rate": 8.015536119507977e-07, "loss": 0.0749, "step": 6904 }, { "epoch": 2.2375243033052494, "grad_norm": 0.9151782393455505, "learning_rate": 8.009119389288292e-07, "loss": 0.0756, "step": 6905 }, { "epoch": 2.237848347375243, "grad_norm": 0.8479213118553162, "learning_rate": 8.002704738587911e-07, "loss": 0.0714, "step": 6906 }, { "epoch": 2.2381723914452367, "grad_norm": 0.8420944809913635, "learning_rate": 7.996292168191919e-07, "loss": 0.0767, "step": 6907 }, { "epoch": 2.23849643551523, "grad_norm": 0.8398191332817078, "learning_rate": 7.989881678885158e-07, "loss": 0.0722, "step": 6908 }, { "epoch": 2.2388204795852236, "grad_norm": 0.9042385816574097, "learning_rate": 7.983473271452219e-07, "loss": 0.0768, "step": 6909 }, { "epoch": 2.239144523655217, "grad_norm": 0.8790053129196167, "learning_rate": 7.977066946677404e-07, "loss": 0.0777, "step": 6910 }, { "epoch": 2.2394685677252104, "grad_norm": 0.8296760320663452, "learning_rate": 7.970662705344812e-07, "loss": 0.0712, "step": 6911 }, { "epoch": 2.2397926117952043, "grad_norm": 0.8679847717285156, "learning_rate": 7.964260548238242e-07, "loss": 0.0754, "step": 6912 }, { "epoch": 2.2401166558651977, "grad_norm": 0.8999646902084351, "learning_rate": 7.957860476141261e-07, "loss": 0.0784, "step": 6913 }, { "epoch": 2.240440699935191, "grad_norm": 0.888016402721405, "learning_rate": 7.951462489837178e-07, "loss": 0.083, "step": 6914 }, { "epoch": 2.2407647440051845, "grad_norm": 0.9187142252922058, "learning_rate": 7.945066590109044e-07, "loss": 0.0781, "step": 6915 }, { "epoch": 2.2410887880751784, "grad_norm": 0.8380825519561768, "learning_rate": 7.938672777739654e-07, "loss": 0.0762, "step": 6916 }, { "epoch": 2.241412832145172, "grad_norm": 0.9074355959892273, "learning_rate": 7.932281053511559e-07, "loss": 0.0815, "step": 6917 }, { "epoch": 2.2417368762151653, "grad_norm": 0.8893736600875854, "learning_rate": 7.925891418207024e-07, "loss": 0.0788, "step": 6918 }, { "epoch": 2.2420609202851587, "grad_norm": 0.9274340271949768, "learning_rate": 7.919503872608092e-07, "loss": 0.0852, "step": 6919 }, { "epoch": 2.242384964355152, "grad_norm": 0.9161635041236877, "learning_rate": 7.913118417496532e-07, "loss": 0.0793, "step": 6920 }, { "epoch": 2.242709008425146, "grad_norm": 0.8371251821517944, "learning_rate": 7.906735053653866e-07, "loss": 0.0734, "step": 6921 }, { "epoch": 2.2430330524951394, "grad_norm": 0.880815863609314, "learning_rate": 7.900353781861353e-07, "loss": 0.0789, "step": 6922 }, { "epoch": 2.243357096565133, "grad_norm": 0.8050634860992432, "learning_rate": 7.893974602899998e-07, "loss": 0.0719, "step": 6923 }, { "epoch": 2.2436811406351262, "grad_norm": 0.9162620902061462, "learning_rate": 7.887597517550564e-07, "loss": 0.0804, "step": 6924 }, { "epoch": 2.24400518470512, "grad_norm": 0.8453664183616638, "learning_rate": 7.881222526593513e-07, "loss": 0.0709, "step": 6925 }, { "epoch": 2.2443292287751135, "grad_norm": 0.8469733595848083, "learning_rate": 7.87484963080912e-07, "loss": 0.0742, "step": 6926 }, { "epoch": 2.244653272845107, "grad_norm": 0.8893758058547974, "learning_rate": 7.868478830977331e-07, "loss": 0.0832, "step": 6927 }, { "epoch": 2.2449773169151004, "grad_norm": 0.8658952713012695, "learning_rate": 7.862110127877903e-07, "loss": 0.0757, "step": 6928 }, { "epoch": 2.245301360985094, "grad_norm": 0.9522693157196045, "learning_rate": 7.855743522290283e-07, "loss": 0.0762, "step": 6929 }, { "epoch": 2.2456254050550877, "grad_norm": 0.9310401678085327, "learning_rate": 7.849379014993683e-07, "loss": 0.0814, "step": 6930 }, { "epoch": 2.245949449125081, "grad_norm": 0.878420352935791, "learning_rate": 7.84301660676707e-07, "loss": 0.0753, "step": 6931 }, { "epoch": 2.2462734931950745, "grad_norm": 0.8977034687995911, "learning_rate": 7.836656298389114e-07, "loss": 0.0761, "step": 6932 }, { "epoch": 2.246597537265068, "grad_norm": 0.8511933088302612, "learning_rate": 7.830298090638291e-07, "loss": 0.077, "step": 6933 }, { "epoch": 2.2469215813350614, "grad_norm": 0.9201487302780151, "learning_rate": 7.823941984292752e-07, "loss": 0.0838, "step": 6934 }, { "epoch": 2.2472456254050552, "grad_norm": 0.8686670660972595, "learning_rate": 7.817587980130451e-07, "loss": 0.0747, "step": 6935 }, { "epoch": 2.2475696694750487, "grad_norm": 0.9665684103965759, "learning_rate": 7.811236078929033e-07, "loss": 0.0828, "step": 6936 }, { "epoch": 2.247893713545042, "grad_norm": 0.938249945640564, "learning_rate": 7.80488628146592e-07, "loss": 0.0831, "step": 6937 }, { "epoch": 2.2482177576150355, "grad_norm": 0.9339914321899414, "learning_rate": 7.798538588518265e-07, "loss": 0.0858, "step": 6938 }, { "epoch": 2.248541801685029, "grad_norm": 0.8758047819137573, "learning_rate": 7.792193000862964e-07, "loss": 0.0719, "step": 6939 }, { "epoch": 2.248865845755023, "grad_norm": 0.8981780409812927, "learning_rate": 7.785849519276661e-07, "loss": 0.0785, "step": 6940 }, { "epoch": 2.249189889825016, "grad_norm": 0.8714284896850586, "learning_rate": 7.779508144535725e-07, "loss": 0.0779, "step": 6941 }, { "epoch": 2.2495139338950096, "grad_norm": 0.9096783399581909, "learning_rate": 7.773168877416285e-07, "loss": 0.076, "step": 6942 }, { "epoch": 2.249837977965003, "grad_norm": 0.8372735977172852, "learning_rate": 7.766831718694204e-07, "loss": 0.0806, "step": 6943 }, { "epoch": 2.250162022034997, "grad_norm": 0.8659960031509399, "learning_rate": 7.760496669145093e-07, "loss": 0.0769, "step": 6944 }, { "epoch": 2.2504860661049904, "grad_norm": 0.8610251545906067, "learning_rate": 7.754163729544297e-07, "loss": 0.081, "step": 6945 }, { "epoch": 2.250810110174984, "grad_norm": 0.9168947339057922, "learning_rate": 7.747832900666907e-07, "loss": 0.0815, "step": 6946 }, { "epoch": 2.251134154244977, "grad_norm": 0.844121515750885, "learning_rate": 7.741504183287757e-07, "loss": 0.071, "step": 6947 }, { "epoch": 2.251458198314971, "grad_norm": 0.905340850353241, "learning_rate": 7.73517757818143e-07, "loss": 0.0842, "step": 6948 }, { "epoch": 2.2517822423849645, "grad_norm": 0.9762375950813293, "learning_rate": 7.728853086122212e-07, "loss": 0.09, "step": 6949 }, { "epoch": 2.252106286454958, "grad_norm": 0.8414440155029297, "learning_rate": 7.722530707884196e-07, "loss": 0.0731, "step": 6950 }, { "epoch": 2.2524303305249513, "grad_norm": 0.8716568946838379, "learning_rate": 7.716210444241154e-07, "loss": 0.0737, "step": 6951 }, { "epoch": 2.2527543745949448, "grad_norm": 0.8804923892021179, "learning_rate": 7.709892295966634e-07, "loss": 0.0807, "step": 6952 }, { "epoch": 2.2530784186649386, "grad_norm": 0.8454858064651489, "learning_rate": 7.703576263833915e-07, "loss": 0.0759, "step": 6953 }, { "epoch": 2.253402462734932, "grad_norm": 0.8796680569648743, "learning_rate": 7.697262348616019e-07, "loss": 0.0783, "step": 6954 }, { "epoch": 2.2537265068049255, "grad_norm": 0.8306034803390503, "learning_rate": 7.690950551085716e-07, "loss": 0.0716, "step": 6955 }, { "epoch": 2.254050550874919, "grad_norm": 0.8274477124214172, "learning_rate": 7.684640872015484e-07, "loss": 0.0737, "step": 6956 }, { "epoch": 2.2543745949449123, "grad_norm": 0.954513430595398, "learning_rate": 7.678333312177602e-07, "loss": 0.0817, "step": 6957 }, { "epoch": 2.254698639014906, "grad_norm": 0.8665919899940491, "learning_rate": 7.672027872344017e-07, "loss": 0.0754, "step": 6958 }, { "epoch": 2.2550226830848996, "grad_norm": 0.8250190615653992, "learning_rate": 7.665724553286491e-07, "loss": 0.0759, "step": 6959 }, { "epoch": 2.255346727154893, "grad_norm": 0.870201826095581, "learning_rate": 7.659423355776463e-07, "loss": 0.0772, "step": 6960 }, { "epoch": 2.2556707712248865, "grad_norm": 0.9322313070297241, "learning_rate": 7.653124280585145e-07, "loss": 0.0788, "step": 6961 }, { "epoch": 2.25599481529488, "grad_norm": 0.9001493453979492, "learning_rate": 7.646827328483486e-07, "loss": 0.0808, "step": 6962 }, { "epoch": 2.2563188593648738, "grad_norm": 0.9827464818954468, "learning_rate": 7.64053250024217e-07, "loss": 0.0774, "step": 6963 }, { "epoch": 2.256642903434867, "grad_norm": 0.9482652544975281, "learning_rate": 7.634239796631629e-07, "loss": 0.0885, "step": 6964 }, { "epoch": 2.2569669475048606, "grad_norm": 0.8908950090408325, "learning_rate": 7.62794921842201e-07, "loss": 0.0751, "step": 6965 }, { "epoch": 2.257290991574854, "grad_norm": 0.8292078971862793, "learning_rate": 7.621660766383246e-07, "loss": 0.0678, "step": 6966 }, { "epoch": 2.257615035644848, "grad_norm": 0.9161243438720703, "learning_rate": 7.615374441284962e-07, "loss": 0.0778, "step": 6967 }, { "epoch": 2.2579390797148413, "grad_norm": 0.9270373582839966, "learning_rate": 7.60909024389655e-07, "loss": 0.0809, "step": 6968 }, { "epoch": 2.2582631237848347, "grad_norm": 0.8143619298934937, "learning_rate": 7.602808174987137e-07, "loss": 0.0735, "step": 6969 }, { "epoch": 2.258587167854828, "grad_norm": 0.8208263516426086, "learning_rate": 7.596528235325582e-07, "loss": 0.0738, "step": 6970 }, { "epoch": 2.2589112119248216, "grad_norm": 0.9438315033912659, "learning_rate": 7.590250425680496e-07, "loss": 0.085, "step": 6971 }, { "epoch": 2.2592352559948155, "grad_norm": 0.8808038830757141, "learning_rate": 7.583974746820222e-07, "loss": 0.0782, "step": 6972 }, { "epoch": 2.259559300064809, "grad_norm": 0.8868935704231262, "learning_rate": 7.577701199512835e-07, "loss": 0.0781, "step": 6973 }, { "epoch": 2.2598833441348023, "grad_norm": 0.8319234848022461, "learning_rate": 7.571429784526157e-07, "loss": 0.0716, "step": 6974 }, { "epoch": 2.2602073882047957, "grad_norm": 0.9218267798423767, "learning_rate": 7.565160502627752e-07, "loss": 0.0759, "step": 6975 }, { "epoch": 2.2605314322747896, "grad_norm": 0.9264833927154541, "learning_rate": 7.558893354584923e-07, "loss": 0.0782, "step": 6976 }, { "epoch": 2.260855476344783, "grad_norm": 0.8697494268417358, "learning_rate": 7.5526283411647e-07, "loss": 0.079, "step": 6977 }, { "epoch": 2.2611795204147764, "grad_norm": 0.892841100692749, "learning_rate": 7.546365463133867e-07, "loss": 0.0778, "step": 6978 }, { "epoch": 2.26150356448477, "grad_norm": 0.92507404088974, "learning_rate": 7.540104721258945e-07, "loss": 0.0789, "step": 6979 }, { "epoch": 2.2618276085547633, "grad_norm": 0.8921122550964355, "learning_rate": 7.533846116306162e-07, "loss": 0.0797, "step": 6980 }, { "epoch": 2.262151652624757, "grad_norm": 0.9586029648780823, "learning_rate": 7.527589649041548e-07, "loss": 0.0803, "step": 6981 }, { "epoch": 2.2624756966947506, "grad_norm": 0.822803258895874, "learning_rate": 7.521335320230804e-07, "loss": 0.0721, "step": 6982 }, { "epoch": 2.262799740764744, "grad_norm": 0.9236354231834412, "learning_rate": 7.515083130639411e-07, "loss": 0.0795, "step": 6983 }, { "epoch": 2.2631237848347374, "grad_norm": 0.999817967414856, "learning_rate": 7.508833081032577e-07, "loss": 0.085, "step": 6984 }, { "epoch": 2.263447828904731, "grad_norm": 0.8854063153266907, "learning_rate": 7.502585172175244e-07, "loss": 0.0722, "step": 6985 }, { "epoch": 2.2637718729747247, "grad_norm": 1.0334446430206299, "learning_rate": 7.496339404832109e-07, "loss": 0.0834, "step": 6986 }, { "epoch": 2.264095917044718, "grad_norm": 0.8735396862030029, "learning_rate": 7.490095779767564e-07, "loss": 0.0726, "step": 6987 }, { "epoch": 2.2644199611147116, "grad_norm": 0.943081796169281, "learning_rate": 7.483854297745805e-07, "loss": 0.0785, "step": 6988 }, { "epoch": 2.264744005184705, "grad_norm": 0.901913583278656, "learning_rate": 7.47761495953069e-07, "loss": 0.0777, "step": 6989 }, { "epoch": 2.2650680492546984, "grad_norm": 0.870795488357544, "learning_rate": 7.471377765885893e-07, "loss": 0.0803, "step": 6990 }, { "epoch": 2.2653920933246923, "grad_norm": 0.9563770294189453, "learning_rate": 7.465142717574761e-07, "loss": 0.0827, "step": 6991 }, { "epoch": 2.2657161373946857, "grad_norm": 0.8497862815856934, "learning_rate": 7.458909815360407e-07, "loss": 0.0727, "step": 6992 }, { "epoch": 2.266040181464679, "grad_norm": 0.873889684677124, "learning_rate": 7.45267906000568e-07, "loss": 0.0787, "step": 6993 }, { "epoch": 2.2663642255346725, "grad_norm": 0.8586767911911011, "learning_rate": 7.446450452273168e-07, "loss": 0.0758, "step": 6994 }, { "epoch": 2.2666882696046664, "grad_norm": 0.9471902251243591, "learning_rate": 7.440223992925194e-07, "loss": 0.0819, "step": 6995 }, { "epoch": 2.26701231367466, "grad_norm": 0.8423433303833008, "learning_rate": 7.433999682723805e-07, "loss": 0.0707, "step": 6996 }, { "epoch": 2.2673363577446533, "grad_norm": 0.8941042423248291, "learning_rate": 7.427777522430804e-07, "loss": 0.0761, "step": 6997 }, { "epoch": 2.2676604018146467, "grad_norm": 0.923392653465271, "learning_rate": 7.42155751280772e-07, "loss": 0.074, "step": 6998 }, { "epoch": 2.2679844458846405, "grad_norm": 0.867560088634491, "learning_rate": 7.415339654615824e-07, "loss": 0.0731, "step": 6999 }, { "epoch": 2.268308489954634, "grad_norm": 0.8599602580070496, "learning_rate": 7.409123948616123e-07, "loss": 0.0772, "step": 7000 }, { "epoch": 2.2686325340246274, "grad_norm": 0.8821036219596863, "learning_rate": 7.402910395569357e-07, "loss": 0.0772, "step": 7001 }, { "epoch": 2.268956578094621, "grad_norm": 0.9057403802871704, "learning_rate": 7.396698996236004e-07, "loss": 0.0803, "step": 7002 }, { "epoch": 2.2692806221646142, "grad_norm": 0.87221759557724, "learning_rate": 7.39048975137629e-07, "loss": 0.0733, "step": 7003 }, { "epoch": 2.269604666234608, "grad_norm": 0.9132773876190186, "learning_rate": 7.38428266175015e-07, "loss": 0.0799, "step": 7004 }, { "epoch": 2.2699287103046015, "grad_norm": 0.9301595091819763, "learning_rate": 7.378077728117277e-07, "loss": 0.0705, "step": 7005 }, { "epoch": 2.270252754374595, "grad_norm": 0.8766486644744873, "learning_rate": 7.371874951237099e-07, "loss": 0.0772, "step": 7006 }, { "epoch": 2.2705767984445884, "grad_norm": 0.8661054968833923, "learning_rate": 7.365674331868772e-07, "loss": 0.0746, "step": 7007 }, { "epoch": 2.270900842514582, "grad_norm": 0.8911915421485901, "learning_rate": 7.359475870771202e-07, "loss": 0.0753, "step": 7008 }, { "epoch": 2.2712248865845757, "grad_norm": 0.8957100510597229, "learning_rate": 7.353279568702995e-07, "loss": 0.0747, "step": 7009 }, { "epoch": 2.271548930654569, "grad_norm": 0.9232888221740723, "learning_rate": 7.347085426422551e-07, "loss": 0.0777, "step": 7010 }, { "epoch": 2.2718729747245625, "grad_norm": 0.9190528988838196, "learning_rate": 7.340893444687944e-07, "loss": 0.0815, "step": 7011 }, { "epoch": 2.272197018794556, "grad_norm": 0.8859702348709106, "learning_rate": 7.334703624257039e-07, "loss": 0.0778, "step": 7012 }, { "epoch": 2.2725210628645494, "grad_norm": 0.8363940119743347, "learning_rate": 7.328515965887389e-07, "loss": 0.072, "step": 7013 }, { "epoch": 2.2728451069345432, "grad_norm": 0.8972896337509155, "learning_rate": 7.322330470336314e-07, "loss": 0.0725, "step": 7014 }, { "epoch": 2.2731691510045366, "grad_norm": 0.8751944899559021, "learning_rate": 7.316147138360855e-07, "loss": 0.0777, "step": 7015 }, { "epoch": 2.27349319507453, "grad_norm": 0.9271724224090576, "learning_rate": 7.309965970717795e-07, "loss": 0.0822, "step": 7016 }, { "epoch": 2.2738172391445235, "grad_norm": 0.8555557131767273, "learning_rate": 7.303786968163651e-07, "loss": 0.0749, "step": 7017 }, { "epoch": 2.2741412832145174, "grad_norm": 0.8882536292076111, "learning_rate": 7.297610131454657e-07, "loss": 0.0797, "step": 7018 }, { "epoch": 2.274465327284511, "grad_norm": 0.8157416582107544, "learning_rate": 7.291435461346827e-07, "loss": 0.0772, "step": 7019 }, { "epoch": 2.274789371354504, "grad_norm": 1.0363208055496216, "learning_rate": 7.285262958595846e-07, "loss": 0.0758, "step": 7020 }, { "epoch": 2.2751134154244976, "grad_norm": 0.9158379435539246, "learning_rate": 7.279092623957204e-07, "loss": 0.0813, "step": 7021 }, { "epoch": 2.2754374594944915, "grad_norm": 0.9131003022193909, "learning_rate": 7.272924458186064e-07, "loss": 0.0831, "step": 7022 }, { "epoch": 2.275761503564485, "grad_norm": 0.8216702938079834, "learning_rate": 7.26675846203736e-07, "loss": 0.0757, "step": 7023 }, { "epoch": 2.2760855476344783, "grad_norm": 0.8345896005630493, "learning_rate": 7.26059463626575e-07, "loss": 0.0729, "step": 7024 }, { "epoch": 2.2764095917044718, "grad_norm": 0.8480775952339172, "learning_rate": 7.254432981625626e-07, "loss": 0.0707, "step": 7025 }, { "epoch": 2.276733635774465, "grad_norm": 0.8742204308509827, "learning_rate": 7.248273498871119e-07, "loss": 0.073, "step": 7026 }, { "epoch": 2.277057679844459, "grad_norm": 0.933005154132843, "learning_rate": 7.242116188756082e-07, "loss": 0.0865, "step": 7027 }, { "epoch": 2.2773817239144525, "grad_norm": 0.880244791507721, "learning_rate": 7.235961052034113e-07, "loss": 0.078, "step": 7028 }, { "epoch": 2.277705767984446, "grad_norm": 0.8771071434020996, "learning_rate": 7.22980808945854e-07, "loss": 0.081, "step": 7029 }, { "epoch": 2.2780298120544393, "grad_norm": 0.8915297389030457, "learning_rate": 7.22365730178243e-07, "loss": 0.0824, "step": 7030 }, { "epoch": 2.2783538561244328, "grad_norm": 0.8193331956863403, "learning_rate": 7.217508689758576e-07, "loss": 0.0777, "step": 7031 }, { "epoch": 2.2786779001944266, "grad_norm": 0.8842501640319824, "learning_rate": 7.211362254139512e-07, "loss": 0.0792, "step": 7032 }, { "epoch": 2.27900194426442, "grad_norm": 0.80404132604599, "learning_rate": 7.205217995677502e-07, "loss": 0.0728, "step": 7033 }, { "epoch": 2.2793259883344135, "grad_norm": 0.8987725973129272, "learning_rate": 7.199075915124548e-07, "loss": 0.0773, "step": 7034 }, { "epoch": 2.279650032404407, "grad_norm": 0.8705540895462036, "learning_rate": 7.192936013232368e-07, "loss": 0.0809, "step": 7035 }, { "epoch": 2.2799740764744003, "grad_norm": 0.8713969588279724, "learning_rate": 7.186798290752436e-07, "loss": 0.0782, "step": 7036 }, { "epoch": 2.280298120544394, "grad_norm": 0.8617414236068726, "learning_rate": 7.180662748435946e-07, "loss": 0.0748, "step": 7037 }, { "epoch": 2.2806221646143876, "grad_norm": 0.8548529148101807, "learning_rate": 7.174529387033832e-07, "loss": 0.0764, "step": 7038 }, { "epoch": 2.280946208684381, "grad_norm": 0.858833909034729, "learning_rate": 7.168398207296764e-07, "loss": 0.0757, "step": 7039 }, { "epoch": 2.2812702527543745, "grad_norm": 0.8891960978507996, "learning_rate": 7.162269209975117e-07, "loss": 0.0763, "step": 7040 }, { "epoch": 2.281594296824368, "grad_norm": 0.8929044008255005, "learning_rate": 7.156142395819055e-07, "loss": 0.0788, "step": 7041 }, { "epoch": 2.2819183408943617, "grad_norm": 0.9119802117347717, "learning_rate": 7.150017765578401e-07, "loss": 0.0863, "step": 7042 }, { "epoch": 2.282242384964355, "grad_norm": 0.882005512714386, "learning_rate": 7.143895320002789e-07, "loss": 0.0825, "step": 7043 }, { "epoch": 2.2825664290343486, "grad_norm": 0.9207246899604797, "learning_rate": 7.137775059841523e-07, "loss": 0.0849, "step": 7044 }, { "epoch": 2.282890473104342, "grad_norm": 0.9211490750312805, "learning_rate": 7.131656985843669e-07, "loss": 0.0803, "step": 7045 }, { "epoch": 2.283214517174336, "grad_norm": 0.909271776676178, "learning_rate": 7.125541098758021e-07, "loss": 0.0818, "step": 7046 }, { "epoch": 2.2835385612443293, "grad_norm": 0.8487682938575745, "learning_rate": 7.119427399333104e-07, "loss": 0.0725, "step": 7047 }, { "epoch": 2.2838626053143227, "grad_norm": 1.0248647928237915, "learning_rate": 7.113315888317182e-07, "loss": 0.0883, "step": 7048 }, { "epoch": 2.284186649384316, "grad_norm": 0.9240375757217407, "learning_rate": 7.107206566458225e-07, "loss": 0.0771, "step": 7049 }, { "epoch": 2.28451069345431, "grad_norm": 0.8063546419143677, "learning_rate": 7.101099434503986e-07, "loss": 0.0724, "step": 7050 }, { "epoch": 2.2848347375243034, "grad_norm": 0.9139791131019592, "learning_rate": 7.09499449320189e-07, "loss": 0.0846, "step": 7051 }, { "epoch": 2.285158781594297, "grad_norm": 0.877644419670105, "learning_rate": 7.088891743299136e-07, "loss": 0.0788, "step": 7052 }, { "epoch": 2.2854828256642903, "grad_norm": 0.9822368621826172, "learning_rate": 7.08279118554264e-07, "loss": 0.0813, "step": 7053 }, { "epoch": 2.2858068697342837, "grad_norm": 0.9277673363685608, "learning_rate": 7.076692820679051e-07, "loss": 0.0823, "step": 7054 }, { "epoch": 2.2861309138042776, "grad_norm": 0.9683315753936768, "learning_rate": 7.070596649454748e-07, "loss": 0.076, "step": 7055 }, { "epoch": 2.286454957874271, "grad_norm": 0.8959435224533081, "learning_rate": 7.064502672615847e-07, "loss": 0.0811, "step": 7056 }, { "epoch": 2.2867790019442644, "grad_norm": 0.9008287787437439, "learning_rate": 7.058410890908196e-07, "loss": 0.0741, "step": 7057 }, { "epoch": 2.287103046014258, "grad_norm": 0.9168748259544373, "learning_rate": 7.052321305077356e-07, "loss": 0.0799, "step": 7058 }, { "epoch": 2.2874270900842513, "grad_norm": 0.9024057984352112, "learning_rate": 7.046233915868642e-07, "loss": 0.0723, "step": 7059 }, { "epoch": 2.287751134154245, "grad_norm": 0.9057157039642334, "learning_rate": 7.04014872402709e-07, "loss": 0.0794, "step": 7060 }, { "epoch": 2.2880751782242386, "grad_norm": 0.8857600688934326, "learning_rate": 7.034065730297471e-07, "loss": 0.079, "step": 7061 }, { "epoch": 2.288399222294232, "grad_norm": 0.9752936959266663, "learning_rate": 7.027984935424284e-07, "loss": 0.0859, "step": 7062 }, { "epoch": 2.2887232663642254, "grad_norm": 0.9727655053138733, "learning_rate": 7.021906340151763e-07, "loss": 0.0856, "step": 7063 }, { "epoch": 2.289047310434219, "grad_norm": 0.8868271708488464, "learning_rate": 7.015829945223851e-07, "loss": 0.0802, "step": 7064 }, { "epoch": 2.2893713545042127, "grad_norm": 0.9148418307304382, "learning_rate": 7.009755751384267e-07, "loss": 0.0787, "step": 7065 }, { "epoch": 2.289695398574206, "grad_norm": 0.8575038313865662, "learning_rate": 7.003683759376415e-07, "loss": 0.0737, "step": 7066 }, { "epoch": 2.2900194426441995, "grad_norm": 0.7997493743896484, "learning_rate": 6.997613969943451e-07, "loss": 0.0717, "step": 7067 }, { "epoch": 2.290343486714193, "grad_norm": 0.95135498046875, "learning_rate": 6.99154638382826e-07, "loss": 0.0864, "step": 7068 }, { "epoch": 2.290667530784187, "grad_norm": 0.8583988547325134, "learning_rate": 6.985481001773456e-07, "loss": 0.0747, "step": 7069 }, { "epoch": 2.2909915748541803, "grad_norm": 0.8334120512008667, "learning_rate": 6.979417824521393e-07, "loss": 0.0741, "step": 7070 }, { "epoch": 2.2913156189241737, "grad_norm": 0.8967745900154114, "learning_rate": 6.97335685281412e-07, "loss": 0.0838, "step": 7071 }, { "epoch": 2.291639662994167, "grad_norm": 0.8889458179473877, "learning_rate": 6.967298087393471e-07, "loss": 0.0747, "step": 7072 }, { "epoch": 2.291963707064161, "grad_norm": 0.9700846076011658, "learning_rate": 6.96124152900095e-07, "loss": 0.0783, "step": 7073 }, { "epoch": 2.2922877511341544, "grad_norm": 0.8384163975715637, "learning_rate": 6.955187178377853e-07, "loss": 0.076, "step": 7074 }, { "epoch": 2.292611795204148, "grad_norm": 0.8633262515068054, "learning_rate": 6.949135036265153e-07, "loss": 0.0745, "step": 7075 }, { "epoch": 2.2929358392741412, "grad_norm": 0.9053198099136353, "learning_rate": 6.943085103403577e-07, "loss": 0.0759, "step": 7076 }, { "epoch": 2.2932598833441347, "grad_norm": 0.8445454239845276, "learning_rate": 6.937037380533579e-07, "loss": 0.0737, "step": 7077 }, { "epoch": 2.2935839274141285, "grad_norm": 0.8526014089584351, "learning_rate": 6.930991868395343e-07, "loss": 0.0763, "step": 7078 }, { "epoch": 2.293907971484122, "grad_norm": 0.8953704833984375, "learning_rate": 6.924948567728787e-07, "loss": 0.0803, "step": 7079 }, { "epoch": 2.2942320155541154, "grad_norm": 0.8394033312797546, "learning_rate": 6.918907479273535e-07, "loss": 0.0736, "step": 7080 }, { "epoch": 2.294556059624109, "grad_norm": 0.9278076887130737, "learning_rate": 6.912868603768979e-07, "loss": 0.0836, "step": 7081 }, { "epoch": 2.2948801036941022, "grad_norm": 0.8417161107063293, "learning_rate": 6.906831941954206e-07, "loss": 0.0768, "step": 7082 }, { "epoch": 2.295204147764096, "grad_norm": 0.873684287071228, "learning_rate": 6.900797494568045e-07, "loss": 0.0789, "step": 7083 }, { "epoch": 2.2955281918340895, "grad_norm": 0.9292371869087219, "learning_rate": 6.894765262349056e-07, "loss": 0.0797, "step": 7084 }, { "epoch": 2.295852235904083, "grad_norm": 0.8600883483886719, "learning_rate": 6.88873524603553e-07, "loss": 0.0759, "step": 7085 }, { "epoch": 2.2961762799740764, "grad_norm": 0.9637444019317627, "learning_rate": 6.882707446365477e-07, "loss": 0.087, "step": 7086 }, { "epoch": 2.29650032404407, "grad_norm": 0.9064836502075195, "learning_rate": 6.876681864076646e-07, "loss": 0.077, "step": 7087 }, { "epoch": 2.2968243681140637, "grad_norm": 0.9461975693702698, "learning_rate": 6.870658499906505e-07, "loss": 0.0814, "step": 7088 }, { "epoch": 2.297148412184057, "grad_norm": 0.8657816052436829, "learning_rate": 6.864637354592266e-07, "loss": 0.0801, "step": 7089 }, { "epoch": 2.2974724562540505, "grad_norm": 0.8060582280158997, "learning_rate": 6.858618428870842e-07, "loss": 0.0702, "step": 7090 }, { "epoch": 2.297796500324044, "grad_norm": 0.905968964099884, "learning_rate": 6.852601723478902e-07, "loss": 0.08, "step": 7091 }, { "epoch": 2.2981205443940373, "grad_norm": 0.8682804107666016, "learning_rate": 6.84658723915283e-07, "loss": 0.0763, "step": 7092 }, { "epoch": 2.298444588464031, "grad_norm": 0.8451237678527832, "learning_rate": 6.840574976628741e-07, "loss": 0.0792, "step": 7093 }, { "epoch": 2.2987686325340246, "grad_norm": 0.880070686340332, "learning_rate": 6.834564936642488e-07, "loss": 0.077, "step": 7094 }, { "epoch": 2.299092676604018, "grad_norm": 0.9359896779060364, "learning_rate": 6.828557119929613e-07, "loss": 0.0838, "step": 7095 }, { "epoch": 2.2994167206740115, "grad_norm": 0.8509678840637207, "learning_rate": 6.822551527225452e-07, "loss": 0.0741, "step": 7096 }, { "epoch": 2.2997407647440054, "grad_norm": 0.8210530281066895, "learning_rate": 6.816548159264993e-07, "loss": 0.0713, "step": 7097 }, { "epoch": 2.3000648088139988, "grad_norm": 0.8998435139656067, "learning_rate": 6.810547016783029e-07, "loss": 0.0779, "step": 7098 }, { "epoch": 2.300388852883992, "grad_norm": 0.8631594777107239, "learning_rate": 6.804548100514013e-07, "loss": 0.0773, "step": 7099 }, { "epoch": 2.3007128969539856, "grad_norm": 0.8914357423782349, "learning_rate": 6.798551411192165e-07, "loss": 0.0814, "step": 7100 }, { "epoch": 2.3010369410239795, "grad_norm": 0.9626676440238953, "learning_rate": 6.792556949551426e-07, "loss": 0.0868, "step": 7101 }, { "epoch": 2.301360985093973, "grad_norm": 0.7938775420188904, "learning_rate": 6.786564716325441e-07, "loss": 0.0731, "step": 7102 }, { "epoch": 2.3016850291639663, "grad_norm": 0.8719715476036072, "learning_rate": 6.780574712247632e-07, "loss": 0.0738, "step": 7103 }, { "epoch": 2.3020090732339598, "grad_norm": 0.8483251333236694, "learning_rate": 6.774586938051084e-07, "loss": 0.0767, "step": 7104 }, { "epoch": 2.302333117303953, "grad_norm": 0.8604863286018372, "learning_rate": 6.768601394468674e-07, "loss": 0.0791, "step": 7105 }, { "epoch": 2.302657161373947, "grad_norm": 0.8614899516105652, "learning_rate": 6.762618082232952e-07, "loss": 0.0803, "step": 7106 }, { "epoch": 2.3029812054439405, "grad_norm": 0.8272896409034729, "learning_rate": 6.756637002076225e-07, "loss": 0.0766, "step": 7107 }, { "epoch": 2.303305249513934, "grad_norm": 0.9013105034828186, "learning_rate": 6.750658154730522e-07, "loss": 0.0779, "step": 7108 }, { "epoch": 2.3036292935839273, "grad_norm": 0.9156434535980225, "learning_rate": 6.744681540927588e-07, "loss": 0.0818, "step": 7109 }, { "epoch": 2.3039533376539207, "grad_norm": 0.8647904992103577, "learning_rate": 6.738707161398914e-07, "loss": 0.0819, "step": 7110 }, { "epoch": 2.3042773817239146, "grad_norm": 0.9575430750846863, "learning_rate": 6.732735016875697e-07, "loss": 0.0802, "step": 7111 }, { "epoch": 2.304601425793908, "grad_norm": 0.8224124312400818, "learning_rate": 6.726765108088881e-07, "loss": 0.0706, "step": 7112 }, { "epoch": 2.3049254698639015, "grad_norm": 0.8639494180679321, "learning_rate": 6.720797435769111e-07, "loss": 0.0799, "step": 7113 }, { "epoch": 2.305249513933895, "grad_norm": 0.9154525995254517, "learning_rate": 6.714832000646778e-07, "loss": 0.0777, "step": 7114 }, { "epoch": 2.3055735580038883, "grad_norm": 0.8499510884284973, "learning_rate": 6.708868803451992e-07, "loss": 0.0753, "step": 7115 }, { "epoch": 2.305897602073882, "grad_norm": 0.8317762613296509, "learning_rate": 6.702907844914597e-07, "loss": 0.0748, "step": 7116 }, { "epoch": 2.3062216461438756, "grad_norm": 0.9097682237625122, "learning_rate": 6.696949125764149e-07, "loss": 0.082, "step": 7117 }, { "epoch": 2.306545690213869, "grad_norm": 0.8897013068199158, "learning_rate": 6.690992646729949e-07, "loss": 0.0825, "step": 7118 }, { "epoch": 2.3068697342838624, "grad_norm": 0.8689218163490295, "learning_rate": 6.685038408540989e-07, "loss": 0.0808, "step": 7119 }, { "epoch": 2.3071937783538563, "grad_norm": 0.8567068576812744, "learning_rate": 6.679086411926039e-07, "loss": 0.0765, "step": 7120 }, { "epoch": 2.3075178224238497, "grad_norm": 0.9129802584648132, "learning_rate": 6.673136657613547e-07, "loss": 0.0805, "step": 7121 }, { "epoch": 2.307841866493843, "grad_norm": 0.9538443684577942, "learning_rate": 6.667189146331707e-07, "loss": 0.0819, "step": 7122 }, { "epoch": 2.3081659105638366, "grad_norm": 0.8463201522827148, "learning_rate": 6.661243878808443e-07, "loss": 0.0706, "step": 7123 }, { "epoch": 2.3084899546338304, "grad_norm": 0.8989773988723755, "learning_rate": 6.655300855771393e-07, "loss": 0.0759, "step": 7124 }, { "epoch": 2.308813998703824, "grad_norm": 0.9400907158851624, "learning_rate": 6.649360077947939e-07, "loss": 0.0796, "step": 7125 }, { "epoch": 2.3091380427738173, "grad_norm": 0.8975268602371216, "learning_rate": 6.643421546065146e-07, "loss": 0.0813, "step": 7126 }, { "epoch": 2.3094620868438107, "grad_norm": 0.9153731465339661, "learning_rate": 6.637485260849866e-07, "loss": 0.0739, "step": 7127 }, { "epoch": 2.309786130913804, "grad_norm": 0.9203081130981445, "learning_rate": 6.63155122302861e-07, "loss": 0.0748, "step": 7128 }, { "epoch": 2.310110174983798, "grad_norm": 0.8517372012138367, "learning_rate": 6.625619433327681e-07, "loss": 0.0753, "step": 7129 }, { "epoch": 2.3104342190537914, "grad_norm": 0.8861249089241028, "learning_rate": 6.619689892473046e-07, "loss": 0.078, "step": 7130 }, { "epoch": 2.310758263123785, "grad_norm": 0.9046693444252014, "learning_rate": 6.613762601190435e-07, "loss": 0.0819, "step": 7131 }, { "epoch": 2.3110823071937783, "grad_norm": 0.8858131170272827, "learning_rate": 6.60783756020529e-07, "loss": 0.0739, "step": 7132 }, { "epoch": 2.3114063512637717, "grad_norm": 0.9197781085968018, "learning_rate": 6.601914770242776e-07, "loss": 0.0803, "step": 7133 }, { "epoch": 2.3117303953337656, "grad_norm": 0.7731859683990479, "learning_rate": 6.595994232027794e-07, "loss": 0.0658, "step": 7134 }, { "epoch": 2.312054439403759, "grad_norm": 0.8590306639671326, "learning_rate": 6.590075946284941e-07, "loss": 0.0697, "step": 7135 }, { "epoch": 2.3123784834737524, "grad_norm": 0.9065210223197937, "learning_rate": 6.584159913738583e-07, "loss": 0.0851, "step": 7136 }, { "epoch": 2.312702527543746, "grad_norm": 0.8334217667579651, "learning_rate": 6.578246135112765e-07, "loss": 0.0666, "step": 7137 }, { "epoch": 2.3130265716137393, "grad_norm": 0.8901119232177734, "learning_rate": 6.572334611131284e-07, "loss": 0.0822, "step": 7138 }, { "epoch": 2.313350615683733, "grad_norm": 0.9318228960037231, "learning_rate": 6.566425342517652e-07, "loss": 0.0764, "step": 7139 }, { "epoch": 2.3136746597537265, "grad_norm": 0.8824783563613892, "learning_rate": 6.560518329995108e-07, "loss": 0.0824, "step": 7140 }, { "epoch": 2.31399870382372, "grad_norm": 0.8565016984939575, "learning_rate": 6.554613574286614e-07, "loss": 0.0759, "step": 7141 }, { "epoch": 2.3143227478937134, "grad_norm": 0.9348742961883545, "learning_rate": 6.548711076114858e-07, "loss": 0.0855, "step": 7142 }, { "epoch": 2.314646791963707, "grad_norm": 0.8648019433021545, "learning_rate": 6.542810836202237e-07, "loss": 0.0764, "step": 7143 }, { "epoch": 2.3149708360337007, "grad_norm": 0.9514819979667664, "learning_rate": 6.536912855270894e-07, "loss": 0.0861, "step": 7144 }, { "epoch": 2.315294880103694, "grad_norm": 0.8484213948249817, "learning_rate": 6.531017134042678e-07, "loss": 0.0725, "step": 7145 }, { "epoch": 2.3156189241736875, "grad_norm": 0.850391149520874, "learning_rate": 6.52512367323917e-07, "loss": 0.0698, "step": 7146 }, { "epoch": 2.315942968243681, "grad_norm": 0.8870947360992432, "learning_rate": 6.519232473581675e-07, "loss": 0.081, "step": 7147 }, { "epoch": 2.316267012313675, "grad_norm": 0.9229629635810852, "learning_rate": 6.513343535791216e-07, "loss": 0.0778, "step": 7148 }, { "epoch": 2.3165910563836682, "grad_norm": 0.9507783651351929, "learning_rate": 6.507456860588554e-07, "loss": 0.0783, "step": 7149 }, { "epoch": 2.3169151004536617, "grad_norm": 0.8440315127372742, "learning_rate": 6.501572448694135e-07, "loss": 0.0726, "step": 7150 }, { "epoch": 2.317239144523655, "grad_norm": 0.8955333232879639, "learning_rate": 6.495690300828183e-07, "loss": 0.0771, "step": 7151 }, { "epoch": 2.317563188593649, "grad_norm": 0.8333034515380859, "learning_rate": 6.489810417710596e-07, "loss": 0.0721, "step": 7152 }, { "epoch": 2.3178872326636424, "grad_norm": 0.8312009572982788, "learning_rate": 6.483932800061021e-07, "loss": 0.0732, "step": 7153 }, { "epoch": 2.318211276733636, "grad_norm": 0.9005463719367981, "learning_rate": 6.478057448598821e-07, "loss": 0.0803, "step": 7154 }, { "epoch": 2.3185353208036292, "grad_norm": 0.9170337915420532, "learning_rate": 6.472184364043085e-07, "loss": 0.0806, "step": 7155 }, { "epoch": 2.3188593648736227, "grad_norm": 0.9278320074081421, "learning_rate": 6.466313547112627e-07, "loss": 0.0755, "step": 7156 }, { "epoch": 2.3191834089436165, "grad_norm": 0.8766685128211975, "learning_rate": 6.460444998525953e-07, "loss": 0.0784, "step": 7157 }, { "epoch": 2.31950745301361, "grad_norm": 0.9274651408195496, "learning_rate": 6.454578719001353e-07, "loss": 0.0803, "step": 7158 }, { "epoch": 2.3198314970836034, "grad_norm": 0.9497063755989075, "learning_rate": 6.448714709256768e-07, "loss": 0.0826, "step": 7159 }, { "epoch": 2.320155541153597, "grad_norm": 0.9060593843460083, "learning_rate": 6.442852970009925e-07, "loss": 0.0832, "step": 7160 }, { "epoch": 2.32047958522359, "grad_norm": 0.8644150495529175, "learning_rate": 6.436993501978226e-07, "loss": 0.0739, "step": 7161 }, { "epoch": 2.320803629293584, "grad_norm": 0.9196017980575562, "learning_rate": 6.431136305878819e-07, "loss": 0.0771, "step": 7162 }, { "epoch": 2.3211276733635775, "grad_norm": 0.8810617327690125, "learning_rate": 6.425281382428566e-07, "loss": 0.081, "step": 7163 }, { "epoch": 2.321451717433571, "grad_norm": 0.8554599285125732, "learning_rate": 6.419428732344055e-07, "loss": 0.0773, "step": 7164 }, { "epoch": 2.3217757615035644, "grad_norm": 0.8686216473579407, "learning_rate": 6.413578356341602e-07, "loss": 0.075, "step": 7165 }, { "epoch": 2.3220998055735578, "grad_norm": 0.8910344839096069, "learning_rate": 6.407730255137212e-07, "loss": 0.0846, "step": 7166 }, { "epoch": 2.3224238496435516, "grad_norm": 0.909019947052002, "learning_rate": 6.401884429446667e-07, "loss": 0.0808, "step": 7167 }, { "epoch": 2.322747893713545, "grad_norm": 0.9781714677810669, "learning_rate": 6.396040879985416e-07, "loss": 0.0804, "step": 7168 }, { "epoch": 2.3230719377835385, "grad_norm": 0.8202382922172546, "learning_rate": 6.390199607468661e-07, "loss": 0.0725, "step": 7169 }, { "epoch": 2.323395981853532, "grad_norm": 0.8984230160713196, "learning_rate": 6.384360612611317e-07, "loss": 0.0815, "step": 7170 }, { "epoch": 2.323720025923526, "grad_norm": 0.8639374375343323, "learning_rate": 6.378523896128022e-07, "loss": 0.0767, "step": 7171 }, { "epoch": 2.324044069993519, "grad_norm": 0.8666595816612244, "learning_rate": 6.37268945873313e-07, "loss": 0.0745, "step": 7172 }, { "epoch": 2.3243681140635126, "grad_norm": 0.8796083927154541, "learning_rate": 6.36685730114073e-07, "loss": 0.0782, "step": 7173 }, { "epoch": 2.324692158133506, "grad_norm": 0.956018328666687, "learning_rate": 6.361027424064609e-07, "loss": 0.0861, "step": 7174 }, { "epoch": 2.3250162022035, "grad_norm": 0.9067574143409729, "learning_rate": 6.355199828218289e-07, "loss": 0.0766, "step": 7175 }, { "epoch": 2.3253402462734933, "grad_norm": 0.9012507200241089, "learning_rate": 6.349374514315015e-07, "loss": 0.0801, "step": 7176 }, { "epoch": 2.3256642903434868, "grad_norm": 0.8362342119216919, "learning_rate": 6.343551483067751e-07, "loss": 0.0734, "step": 7177 }, { "epoch": 2.32598833441348, "grad_norm": 0.8647367358207703, "learning_rate": 6.337730735189174e-07, "loss": 0.0754, "step": 7178 }, { "epoch": 2.3263123784834736, "grad_norm": 0.9567950367927551, "learning_rate": 6.331912271391688e-07, "loss": 0.0785, "step": 7179 }, { "epoch": 2.3266364225534675, "grad_norm": 0.8517213463783264, "learning_rate": 6.326096092387429e-07, "loss": 0.0775, "step": 7180 }, { "epoch": 2.326960466623461, "grad_norm": 0.8583937287330627, "learning_rate": 6.320282198888217e-07, "loss": 0.0757, "step": 7181 }, { "epoch": 2.3272845106934543, "grad_norm": 0.919492244720459, "learning_rate": 6.314470591605646e-07, "loss": 0.0824, "step": 7182 }, { "epoch": 2.3276085547634477, "grad_norm": 0.8579474091529846, "learning_rate": 6.308661271250974e-07, "loss": 0.0771, "step": 7183 }, { "epoch": 2.327932598833441, "grad_norm": 0.9200006723403931, "learning_rate": 6.302854238535219e-07, "loss": 0.0801, "step": 7184 }, { "epoch": 2.328256642903435, "grad_norm": 0.8708798885345459, "learning_rate": 6.2970494941691e-07, "loss": 0.0802, "step": 7185 }, { "epoch": 2.3285806869734285, "grad_norm": 0.8774047493934631, "learning_rate": 6.291247038863066e-07, "loss": 0.0805, "step": 7186 }, { "epoch": 2.328904731043422, "grad_norm": 0.866742730140686, "learning_rate": 6.285446873327289e-07, "loss": 0.0756, "step": 7187 }, { "epoch": 2.3292287751134153, "grad_norm": 0.9446413516998291, "learning_rate": 6.279648998271626e-07, "loss": 0.0804, "step": 7188 }, { "epoch": 2.3295528191834087, "grad_norm": 0.8982147574424744, "learning_rate": 6.273853414405715e-07, "loss": 0.0781, "step": 7189 }, { "epoch": 2.3298768632534026, "grad_norm": 0.8736512660980225, "learning_rate": 6.268060122438846e-07, "loss": 0.0783, "step": 7190 }, { "epoch": 2.330200907323396, "grad_norm": 0.9103041291236877, "learning_rate": 6.262269123080095e-07, "loss": 0.0775, "step": 7191 }, { "epoch": 2.3305249513933894, "grad_norm": 0.9528464078903198, "learning_rate": 6.256480417038202e-07, "loss": 0.081, "step": 7192 }, { "epoch": 2.330848995463383, "grad_norm": 0.82795649766922, "learning_rate": 6.250694005021651e-07, "loss": 0.0743, "step": 7193 }, { "epoch": 2.3311730395333763, "grad_norm": 1.019910454750061, "learning_rate": 6.244909887738651e-07, "loss": 0.0845, "step": 7194 }, { "epoch": 2.33149708360337, "grad_norm": 0.8735139966011047, "learning_rate": 6.239128065897113e-07, "loss": 0.0805, "step": 7195 }, { "epoch": 2.3318211276733636, "grad_norm": 0.8908354043960571, "learning_rate": 6.233348540204689e-07, "loss": 0.0829, "step": 7196 }, { "epoch": 2.332145171743357, "grad_norm": 0.9360215663909912, "learning_rate": 6.227571311368724e-07, "loss": 0.0812, "step": 7197 }, { "epoch": 2.3324692158133504, "grad_norm": 0.8365440368652344, "learning_rate": 6.221796380096298e-07, "loss": 0.0698, "step": 7198 }, { "epoch": 2.3327932598833443, "grad_norm": 0.8395832777023315, "learning_rate": 6.216023747094207e-07, "loss": 0.0766, "step": 7199 }, { "epoch": 2.3331173039533377, "grad_norm": 0.8404845595359802, "learning_rate": 6.210253413068964e-07, "loss": 0.07, "step": 7200 }, { "epoch": 2.333441348023331, "grad_norm": 0.9587187767028809, "learning_rate": 6.20448537872681e-07, "loss": 0.0864, "step": 7201 }, { "epoch": 2.3337653920933246, "grad_norm": 0.8404291868209839, "learning_rate": 6.198719644773687e-07, "loss": 0.0724, "step": 7202 }, { "epoch": 2.3340894361633184, "grad_norm": 0.8552994132041931, "learning_rate": 6.192956211915269e-07, "loss": 0.0766, "step": 7203 }, { "epoch": 2.334413480233312, "grad_norm": 0.8210543990135193, "learning_rate": 6.187195080856953e-07, "loss": 0.0723, "step": 7204 }, { "epoch": 2.3347375243033053, "grad_norm": 0.9449579119682312, "learning_rate": 6.181436252303829e-07, "loss": 0.0801, "step": 7205 }, { "epoch": 2.3350615683732987, "grad_norm": 0.8949571251869202, "learning_rate": 6.175679726960731e-07, "loss": 0.0786, "step": 7206 }, { "epoch": 2.335385612443292, "grad_norm": 0.8811914920806885, "learning_rate": 6.169925505532201e-07, "loss": 0.0784, "step": 7207 }, { "epoch": 2.335709656513286, "grad_norm": 0.8562451004981995, "learning_rate": 6.164173588722497e-07, "loss": 0.0728, "step": 7208 }, { "epoch": 2.3360337005832794, "grad_norm": 0.895576536655426, "learning_rate": 6.158423977235611e-07, "loss": 0.0799, "step": 7209 }, { "epoch": 2.336357744653273, "grad_norm": 0.8703263401985168, "learning_rate": 6.152676671775215e-07, "loss": 0.0737, "step": 7210 }, { "epoch": 2.3366817887232663, "grad_norm": 0.9055336713790894, "learning_rate": 6.146931673044751e-07, "loss": 0.0796, "step": 7211 }, { "epoch": 2.3370058327932597, "grad_norm": 0.9409792423248291, "learning_rate": 6.141188981747323e-07, "loss": 0.0804, "step": 7212 }, { "epoch": 2.3373298768632536, "grad_norm": 0.8985840082168579, "learning_rate": 6.135448598585814e-07, "loss": 0.081, "step": 7213 }, { "epoch": 2.337653920933247, "grad_norm": 0.8504696488380432, "learning_rate": 6.129710524262758e-07, "loss": 0.0721, "step": 7214 }, { "epoch": 2.3379779650032404, "grad_norm": 0.9190945625305176, "learning_rate": 6.123974759480469e-07, "loss": 0.0776, "step": 7215 }, { "epoch": 2.338302009073234, "grad_norm": 0.9012274742126465, "learning_rate": 6.118241304940928e-07, "loss": 0.0747, "step": 7216 }, { "epoch": 2.3386260531432272, "grad_norm": 0.8527746796607971, "learning_rate": 6.112510161345861e-07, "loss": 0.0742, "step": 7217 }, { "epoch": 2.338950097213221, "grad_norm": 0.9016045928001404, "learning_rate": 6.106781329396714e-07, "loss": 0.0717, "step": 7218 }, { "epoch": 2.3392741412832145, "grad_norm": 0.8989761471748352, "learning_rate": 6.101054809794615e-07, "loss": 0.0788, "step": 7219 }, { "epoch": 2.339598185353208, "grad_norm": 0.9128497242927551, "learning_rate": 6.095330603240468e-07, "loss": 0.0778, "step": 7220 }, { "epoch": 2.3399222294232014, "grad_norm": 1.0037459135055542, "learning_rate": 6.089608710434836e-07, "loss": 0.0694, "step": 7221 }, { "epoch": 2.3402462734931953, "grad_norm": 0.7942954897880554, "learning_rate": 6.083889132078033e-07, "loss": 0.0699, "step": 7222 }, { "epoch": 2.3405703175631887, "grad_norm": 0.949350893497467, "learning_rate": 6.078171868870075e-07, "loss": 0.0783, "step": 7223 }, { "epoch": 2.340894361633182, "grad_norm": 0.8463143706321716, "learning_rate": 6.072456921510703e-07, "loss": 0.0696, "step": 7224 }, { "epoch": 2.3412184057031755, "grad_norm": 0.9158247113227844, "learning_rate": 6.066744290699372e-07, "loss": 0.078, "step": 7225 }, { "epoch": 2.3415424497731694, "grad_norm": 0.8678821325302124, "learning_rate": 6.061033977135253e-07, "loss": 0.0799, "step": 7226 }, { "epoch": 2.341866493843163, "grad_norm": 0.9464151263237, "learning_rate": 6.055325981517238e-07, "loss": 0.0845, "step": 7227 }, { "epoch": 2.3421905379131562, "grad_norm": 0.8548157811164856, "learning_rate": 6.049620304543916e-07, "loss": 0.0744, "step": 7228 }, { "epoch": 2.3425145819831497, "grad_norm": 0.9701404571533203, "learning_rate": 6.043916946913613e-07, "loss": 0.0882, "step": 7229 }, { "epoch": 2.342838626053143, "grad_norm": 0.8720276951789856, "learning_rate": 6.038215909324372e-07, "loss": 0.0744, "step": 7230 }, { "epoch": 2.343162670123137, "grad_norm": 0.8316831588745117, "learning_rate": 6.032517192473935e-07, "loss": 0.0749, "step": 7231 }, { "epoch": 2.3434867141931304, "grad_norm": 0.8839612007141113, "learning_rate": 6.026820797059777e-07, "loss": 0.0755, "step": 7232 }, { "epoch": 2.343810758263124, "grad_norm": 0.9230669736862183, "learning_rate": 6.021126723779075e-07, "loss": 0.085, "step": 7233 }, { "epoch": 2.344134802333117, "grad_norm": 0.8618535399436951, "learning_rate": 6.015434973328735e-07, "loss": 0.0746, "step": 7234 }, { "epoch": 2.3444588464031106, "grad_norm": 0.9741597771644592, "learning_rate": 6.009745546405377e-07, "loss": 0.0699, "step": 7235 }, { "epoch": 2.3447828904731045, "grad_norm": 0.8314618468284607, "learning_rate": 6.00405844370531e-07, "loss": 0.0711, "step": 7236 }, { "epoch": 2.345106934543098, "grad_norm": 0.9069364070892334, "learning_rate": 5.998373665924606e-07, "loss": 0.0817, "step": 7237 }, { "epoch": 2.3454309786130914, "grad_norm": 0.8206673264503479, "learning_rate": 5.992691213759011e-07, "loss": 0.071, "step": 7238 }, { "epoch": 2.345755022683085, "grad_norm": 0.8871822357177734, "learning_rate": 5.987011087904007e-07, "loss": 0.0785, "step": 7239 }, { "epoch": 2.346079066753078, "grad_norm": 0.8879174590110779, "learning_rate": 5.981333289054792e-07, "loss": 0.0764, "step": 7240 }, { "epoch": 2.346403110823072, "grad_norm": 0.884364664554596, "learning_rate": 5.975657817906253e-07, "loss": 0.0777, "step": 7241 }, { "epoch": 2.3467271548930655, "grad_norm": 0.8927558660507202, "learning_rate": 5.96998467515304e-07, "loss": 0.0831, "step": 7242 }, { "epoch": 2.347051198963059, "grad_norm": 0.8751205205917358, "learning_rate": 5.964313861489466e-07, "loss": 0.0721, "step": 7243 }, { "epoch": 2.3473752430330523, "grad_norm": 0.9098830819129944, "learning_rate": 5.958645377609606e-07, "loss": 0.08, "step": 7244 }, { "epoch": 2.347699287103046, "grad_norm": 0.9423938393592834, "learning_rate": 5.952979224207205e-07, "loss": 0.0823, "step": 7245 }, { "epoch": 2.3480233311730396, "grad_norm": 0.8223200440406799, "learning_rate": 5.947315401975773e-07, "loss": 0.0715, "step": 7246 }, { "epoch": 2.348347375243033, "grad_norm": 0.8894121646881104, "learning_rate": 5.941653911608486e-07, "loss": 0.0779, "step": 7247 }, { "epoch": 2.3486714193130265, "grad_norm": 0.8771853446960449, "learning_rate": 5.935994753798258e-07, "loss": 0.0764, "step": 7248 }, { "epoch": 2.34899546338302, "grad_norm": 0.8687610030174255, "learning_rate": 5.930337929237726e-07, "loss": 0.0744, "step": 7249 }, { "epoch": 2.3493195074530138, "grad_norm": 0.9485291242599487, "learning_rate": 5.924683438619208e-07, "loss": 0.0777, "step": 7250 }, { "epoch": 2.349643551523007, "grad_norm": 0.9002734422683716, "learning_rate": 5.91903128263479e-07, "loss": 0.072, "step": 7251 }, { "epoch": 2.3499675955930006, "grad_norm": 0.8892845511436462, "learning_rate": 5.913381461976217e-07, "loss": 0.0741, "step": 7252 }, { "epoch": 2.350291639662994, "grad_norm": 0.9703330397605896, "learning_rate": 5.907733977334978e-07, "loss": 0.0824, "step": 7253 }, { "epoch": 2.350615683732988, "grad_norm": 0.8633927702903748, "learning_rate": 5.902088829402274e-07, "loss": 0.07, "step": 7254 }, { "epoch": 2.3509397278029813, "grad_norm": 0.8836617469787598, "learning_rate": 5.896446018869018e-07, "loss": 0.0734, "step": 7255 }, { "epoch": 2.3512637718729748, "grad_norm": 0.9725565314292908, "learning_rate": 5.890805546425832e-07, "loss": 0.0798, "step": 7256 }, { "epoch": 2.351587815942968, "grad_norm": 0.930892825126648, "learning_rate": 5.885167412763051e-07, "loss": 0.0788, "step": 7257 }, { "epoch": 2.3519118600129616, "grad_norm": 0.883513867855072, "learning_rate": 5.879531618570738e-07, "loss": 0.0811, "step": 7258 }, { "epoch": 2.3522359040829555, "grad_norm": 0.9249931573867798, "learning_rate": 5.873898164538658e-07, "loss": 0.0767, "step": 7259 }, { "epoch": 2.352559948152949, "grad_norm": 0.8391486406326294, "learning_rate": 5.868267051356283e-07, "loss": 0.0742, "step": 7260 }, { "epoch": 2.3528839922229423, "grad_norm": 0.9068962335586548, "learning_rate": 5.86263827971281e-07, "loss": 0.076, "step": 7261 }, { "epoch": 2.3532080362929357, "grad_norm": 0.904548704624176, "learning_rate": 5.857011850297148e-07, "loss": 0.0786, "step": 7262 }, { "epoch": 2.353532080362929, "grad_norm": 0.8462362289428711, "learning_rate": 5.851387763797916e-07, "loss": 0.0788, "step": 7263 }, { "epoch": 2.353856124432923, "grad_norm": 0.8829976916313171, "learning_rate": 5.845766020903459e-07, "loss": 0.0765, "step": 7264 }, { "epoch": 2.3541801685029164, "grad_norm": 0.9879518747329712, "learning_rate": 5.840146622301796e-07, "loss": 0.0889, "step": 7265 }, { "epoch": 2.35450421257291, "grad_norm": 0.907825767993927, "learning_rate": 5.834529568680722e-07, "loss": 0.0775, "step": 7266 }, { "epoch": 2.3548282566429033, "grad_norm": 0.8694019913673401, "learning_rate": 5.828914860727674e-07, "loss": 0.0776, "step": 7267 }, { "epoch": 2.3551523007128967, "grad_norm": 0.8807018995285034, "learning_rate": 5.823302499129873e-07, "loss": 0.0794, "step": 7268 }, { "epoch": 2.3554763447828906, "grad_norm": 0.8571588397026062, "learning_rate": 5.817692484574197e-07, "loss": 0.0733, "step": 7269 }, { "epoch": 2.355800388852884, "grad_norm": 0.8664126396179199, "learning_rate": 5.81208481774726e-07, "loss": 0.075, "step": 7270 }, { "epoch": 2.3561244329228774, "grad_norm": 0.8779671788215637, "learning_rate": 5.806479499335385e-07, "loss": 0.075, "step": 7271 }, { "epoch": 2.356448476992871, "grad_norm": 0.9118459224700928, "learning_rate": 5.800876530024615e-07, "loss": 0.0799, "step": 7272 }, { "epoch": 2.3567725210628647, "grad_norm": 0.8571300506591797, "learning_rate": 5.795275910500703e-07, "loss": 0.074, "step": 7273 }, { "epoch": 2.357096565132858, "grad_norm": 0.8532280325889587, "learning_rate": 5.789677641449087e-07, "loss": 0.076, "step": 7274 }, { "epoch": 2.3574206092028516, "grad_norm": 0.8807914853096008, "learning_rate": 5.784081723554971e-07, "loss": 0.0749, "step": 7275 }, { "epoch": 2.357744653272845, "grad_norm": 0.9329232573509216, "learning_rate": 5.778488157503223e-07, "loss": 0.0825, "step": 7276 }, { "epoch": 2.358068697342839, "grad_norm": 0.8534782528877258, "learning_rate": 5.772896943978446e-07, "loss": 0.0748, "step": 7277 }, { "epoch": 2.3583927414128323, "grad_norm": 0.9644832015037537, "learning_rate": 5.767308083664949e-07, "loss": 0.0815, "step": 7278 }, { "epoch": 2.3587167854828257, "grad_norm": 0.9723904132843018, "learning_rate": 5.761721577246754e-07, "loss": 0.0836, "step": 7279 }, { "epoch": 2.359040829552819, "grad_norm": 0.8773126602172852, "learning_rate": 5.756137425407598e-07, "loss": 0.0798, "step": 7280 }, { "epoch": 2.3593648736228126, "grad_norm": 0.9173557162284851, "learning_rate": 5.750555628830928e-07, "loss": 0.0787, "step": 7281 }, { "epoch": 2.3596889176928064, "grad_norm": 0.9228742718696594, "learning_rate": 5.744976188199905e-07, "loss": 0.0773, "step": 7282 }, { "epoch": 2.3600129617628, "grad_norm": 0.9630638360977173, "learning_rate": 5.739399104197388e-07, "loss": 0.0814, "step": 7283 }, { "epoch": 2.3603370058327933, "grad_norm": 0.8754711747169495, "learning_rate": 5.733824377505965e-07, "loss": 0.0739, "step": 7284 }, { "epoch": 2.3606610499027867, "grad_norm": 0.828157365322113, "learning_rate": 5.728252008807925e-07, "loss": 0.0729, "step": 7285 }, { "epoch": 2.36098509397278, "grad_norm": 0.8452654480934143, "learning_rate": 5.722681998785273e-07, "loss": 0.0762, "step": 7286 }, { "epoch": 2.361309138042774, "grad_norm": 1.0112296342849731, "learning_rate": 5.717114348119726e-07, "loss": 0.0846, "step": 7287 }, { "epoch": 2.3616331821127674, "grad_norm": 0.8676653504371643, "learning_rate": 5.711549057492718e-07, "loss": 0.0693, "step": 7288 }, { "epoch": 2.361957226182761, "grad_norm": 0.8431695699691772, "learning_rate": 5.705986127585364e-07, "loss": 0.0721, "step": 7289 }, { "epoch": 2.3622812702527543, "grad_norm": 0.8941037058830261, "learning_rate": 5.700425559078543e-07, "loss": 0.0732, "step": 7290 }, { "epoch": 2.3626053143227477, "grad_norm": 0.8676804304122925, "learning_rate": 5.694867352652791e-07, "loss": 0.0797, "step": 7291 }, { "epoch": 2.3629293583927415, "grad_norm": 0.8437778949737549, "learning_rate": 5.689311508988385e-07, "loss": 0.0774, "step": 7292 }, { "epoch": 2.363253402462735, "grad_norm": 0.8584735989570618, "learning_rate": 5.68375802876531e-07, "loss": 0.0709, "step": 7293 }, { "epoch": 2.3635774465327284, "grad_norm": 0.901317834854126, "learning_rate": 5.678206912663259e-07, "loss": 0.0752, "step": 7294 }, { "epoch": 2.363901490602722, "grad_norm": 0.9817501306533813, "learning_rate": 5.672658161361636e-07, "loss": 0.084, "step": 7295 }, { "epoch": 2.3642255346727157, "grad_norm": 0.8984825611114502, "learning_rate": 5.667111775539538e-07, "loss": 0.0792, "step": 7296 }, { "epoch": 2.364549578742709, "grad_norm": 0.903588056564331, "learning_rate": 5.661567755875816e-07, "loss": 0.0761, "step": 7297 }, { "epoch": 2.3648736228127025, "grad_norm": 0.9453230500221252, "learning_rate": 5.656026103048975e-07, "loss": 0.0767, "step": 7298 }, { "epoch": 2.365197666882696, "grad_norm": 0.8390913009643555, "learning_rate": 5.650486817737291e-07, "loss": 0.0739, "step": 7299 }, { "epoch": 2.3655217109526894, "grad_norm": 0.8238028287887573, "learning_rate": 5.644949900618696e-07, "loss": 0.0686, "step": 7300 }, { "epoch": 2.3658457550226832, "grad_norm": 0.8137726187705994, "learning_rate": 5.639415352370858e-07, "loss": 0.0707, "step": 7301 }, { "epoch": 2.3661697990926767, "grad_norm": 0.8957241773605347, "learning_rate": 5.633883173671159e-07, "loss": 0.0747, "step": 7302 }, { "epoch": 2.36649384316267, "grad_norm": 0.9324427843093872, "learning_rate": 5.628353365196682e-07, "loss": 0.0825, "step": 7303 }, { "epoch": 2.3668178872326635, "grad_norm": 0.8149759769439697, "learning_rate": 5.622825927624226e-07, "loss": 0.0717, "step": 7304 }, { "epoch": 2.3671419313026574, "grad_norm": 0.8785021901130676, "learning_rate": 5.617300861630276e-07, "loss": 0.0781, "step": 7305 }, { "epoch": 2.367465975372651, "grad_norm": 0.9200155735015869, "learning_rate": 5.611778167891077e-07, "loss": 0.0813, "step": 7306 }, { "epoch": 2.3677900194426442, "grad_norm": 0.9415361881256104, "learning_rate": 5.60625784708253e-07, "loss": 0.0797, "step": 7307 }, { "epoch": 2.3681140635126376, "grad_norm": 0.8852391839027405, "learning_rate": 5.600739899880275e-07, "loss": 0.0765, "step": 7308 }, { "epoch": 2.368438107582631, "grad_norm": 0.9021180868148804, "learning_rate": 5.595224326959662e-07, "loss": 0.0767, "step": 7309 }, { "epoch": 2.368762151652625, "grad_norm": 0.809059202671051, "learning_rate": 5.589711128995734e-07, "loss": 0.0706, "step": 7310 }, { "epoch": 2.3690861957226184, "grad_norm": 0.9455664753913879, "learning_rate": 5.584200306663259e-07, "loss": 0.078, "step": 7311 }, { "epoch": 2.369410239792612, "grad_norm": 0.8230124115943909, "learning_rate": 5.578691860636706e-07, "loss": 0.0735, "step": 7312 }, { "epoch": 2.369734283862605, "grad_norm": 0.8970810174942017, "learning_rate": 5.573185791590266e-07, "loss": 0.0771, "step": 7313 }, { "epoch": 2.3700583279325986, "grad_norm": 0.8559962511062622, "learning_rate": 5.567682100197808e-07, "loss": 0.0742, "step": 7314 }, { "epoch": 2.3703823720025925, "grad_norm": 0.769929826259613, "learning_rate": 5.562180787132945e-07, "loss": 0.0634, "step": 7315 }, { "epoch": 2.370706416072586, "grad_norm": 0.8632463216781616, "learning_rate": 5.55668185306898e-07, "loss": 0.0743, "step": 7316 }, { "epoch": 2.3710304601425793, "grad_norm": 0.857993483543396, "learning_rate": 5.551185298678929e-07, "loss": 0.075, "step": 7317 }, { "epoch": 2.3713545042125728, "grad_norm": 0.9158240556716919, "learning_rate": 5.545691124635518e-07, "loss": 0.077, "step": 7318 }, { "epoch": 2.371678548282566, "grad_norm": 0.9267992377281189, "learning_rate": 5.54019933161119e-07, "loss": 0.0814, "step": 7319 }, { "epoch": 2.37200259235256, "grad_norm": 0.961631715297699, "learning_rate": 5.534709920278064e-07, "loss": 0.0796, "step": 7320 }, { "epoch": 2.3723266364225535, "grad_norm": 0.8961342573165894, "learning_rate": 5.52922289130802e-07, "loss": 0.0798, "step": 7321 }, { "epoch": 2.372650680492547, "grad_norm": 0.9417958855628967, "learning_rate": 5.523738245372596e-07, "loss": 0.0781, "step": 7322 }, { "epoch": 2.3729747245625403, "grad_norm": 0.9764118194580078, "learning_rate": 5.518255983143061e-07, "loss": 0.0829, "step": 7323 }, { "epoch": 2.373298768632534, "grad_norm": 0.8964194059371948, "learning_rate": 5.512776105290402e-07, "loss": 0.077, "step": 7324 }, { "epoch": 2.3736228127025276, "grad_norm": 0.8937922716140747, "learning_rate": 5.507298612485293e-07, "loss": 0.0792, "step": 7325 }, { "epoch": 2.373946856772521, "grad_norm": 0.8468626737594604, "learning_rate": 5.501823505398137e-07, "loss": 0.0732, "step": 7326 }, { "epoch": 2.3742709008425145, "grad_norm": 0.8795669674873352, "learning_rate": 5.496350784699015e-07, "loss": 0.0729, "step": 7327 }, { "epoch": 2.3745949449125083, "grad_norm": 0.9298455715179443, "learning_rate": 5.490880451057759e-07, "loss": 0.0791, "step": 7328 }, { "epoch": 2.3749189889825018, "grad_norm": 0.8860503435134888, "learning_rate": 5.485412505143858e-07, "loss": 0.0755, "step": 7329 }, { "epoch": 2.375243033052495, "grad_norm": 0.9270402193069458, "learning_rate": 5.479946947626566e-07, "loss": 0.0825, "step": 7330 }, { "epoch": 2.3755670771224886, "grad_norm": 0.9208892583847046, "learning_rate": 5.474483779174791e-07, "loss": 0.0741, "step": 7331 }, { "epoch": 2.375891121192482, "grad_norm": 0.8634201884269714, "learning_rate": 5.469023000457183e-07, "loss": 0.0761, "step": 7332 }, { "epoch": 2.376215165262476, "grad_norm": 0.8918706178665161, "learning_rate": 5.463564612142083e-07, "loss": 0.0763, "step": 7333 }, { "epoch": 2.3765392093324693, "grad_norm": 0.9424406886100769, "learning_rate": 5.458108614897545e-07, "loss": 0.0848, "step": 7334 }, { "epoch": 2.3768632534024627, "grad_norm": 0.8506817817687988, "learning_rate": 5.452655009391341e-07, "loss": 0.0749, "step": 7335 }, { "epoch": 2.377187297472456, "grad_norm": 0.8940818905830383, "learning_rate": 5.447203796290918e-07, "loss": 0.082, "step": 7336 }, { "epoch": 2.3775113415424496, "grad_norm": 0.9235643148422241, "learning_rate": 5.441754976263478e-07, "loss": 0.0852, "step": 7337 }, { "epoch": 2.3778353856124435, "grad_norm": 0.875626266002655, "learning_rate": 5.436308549975883e-07, "loss": 0.0747, "step": 7338 }, { "epoch": 2.378159429682437, "grad_norm": 0.8669414520263672, "learning_rate": 5.430864518094731e-07, "loss": 0.0719, "step": 7339 }, { "epoch": 2.3784834737524303, "grad_norm": 0.8698946833610535, "learning_rate": 5.425422881286319e-07, "loss": 0.0818, "step": 7340 }, { "epoch": 2.3788075178224237, "grad_norm": 0.937790036201477, "learning_rate": 5.419983640216647e-07, "loss": 0.0846, "step": 7341 }, { "epoch": 2.379131561892417, "grad_norm": 0.8682368397712708, "learning_rate": 5.414546795551429e-07, "loss": 0.078, "step": 7342 }, { "epoch": 2.379455605962411, "grad_norm": 0.8425775766372681, "learning_rate": 5.409112347956089e-07, "loss": 0.0746, "step": 7343 }, { "epoch": 2.3797796500324044, "grad_norm": 0.9196928143501282, "learning_rate": 5.403680298095737e-07, "loss": 0.0747, "step": 7344 }, { "epoch": 2.380103694102398, "grad_norm": 0.8911183476448059, "learning_rate": 5.398250646635209e-07, "loss": 0.082, "step": 7345 }, { "epoch": 2.3804277381723913, "grad_norm": 0.9871136546134949, "learning_rate": 5.392823394239042e-07, "loss": 0.083, "step": 7346 }, { "epoch": 2.380751782242385, "grad_norm": 0.8932740688323975, "learning_rate": 5.387398541571479e-07, "loss": 0.0797, "step": 7347 }, { "epoch": 2.3810758263123786, "grad_norm": 0.902044415473938, "learning_rate": 5.381976089296467e-07, "loss": 0.0784, "step": 7348 }, { "epoch": 2.381399870382372, "grad_norm": 0.8423619866371155, "learning_rate": 5.376556038077668e-07, "loss": 0.0747, "step": 7349 }, { "epoch": 2.3817239144523654, "grad_norm": 1.0458214282989502, "learning_rate": 5.371138388578448e-07, "loss": 0.0808, "step": 7350 }, { "epoch": 2.3820479585223593, "grad_norm": 0.9807311296463013, "learning_rate": 5.365723141461851e-07, "loss": 0.0848, "step": 7351 }, { "epoch": 2.3823720025923527, "grad_norm": 0.8166460990905762, "learning_rate": 5.360310297390681e-07, "loss": 0.0718, "step": 7352 }, { "epoch": 2.382696046662346, "grad_norm": 0.8361016511917114, "learning_rate": 5.354899857027398e-07, "loss": 0.0745, "step": 7353 }, { "epoch": 2.3830200907323396, "grad_norm": 0.8787776827812195, "learning_rate": 5.349491821034192e-07, "loss": 0.0782, "step": 7354 }, { "epoch": 2.383344134802333, "grad_norm": 0.8866326212882996, "learning_rate": 5.344086190072955e-07, "loss": 0.0734, "step": 7355 }, { "epoch": 2.383668178872327, "grad_norm": 0.9183595776557922, "learning_rate": 5.338682964805286e-07, "loss": 0.0826, "step": 7356 }, { "epoch": 2.3839922229423203, "grad_norm": 0.8560160398483276, "learning_rate": 5.333282145892493e-07, "loss": 0.0719, "step": 7357 }, { "epoch": 2.3843162670123137, "grad_norm": 0.8655824065208435, "learning_rate": 5.327883733995562e-07, "loss": 0.0793, "step": 7358 }, { "epoch": 2.384640311082307, "grad_norm": 0.9186474084854126, "learning_rate": 5.322487729775233e-07, "loss": 0.0791, "step": 7359 }, { "epoch": 2.3849643551523005, "grad_norm": 0.8937341570854187, "learning_rate": 5.317094133891903e-07, "loss": 0.0776, "step": 7360 }, { "epoch": 2.3852883992222944, "grad_norm": 0.9676603078842163, "learning_rate": 5.311702947005718e-07, "loss": 0.0764, "step": 7361 }, { "epoch": 2.385612443292288, "grad_norm": 0.8829269409179688, "learning_rate": 5.306314169776486e-07, "loss": 0.0728, "step": 7362 }, { "epoch": 2.3859364873622813, "grad_norm": 0.9241729378700256, "learning_rate": 5.30092780286375e-07, "loss": 0.0774, "step": 7363 }, { "epoch": 2.3862605314322747, "grad_norm": 0.8636072278022766, "learning_rate": 5.295543846926752e-07, "loss": 0.077, "step": 7364 }, { "epoch": 2.386584575502268, "grad_norm": 0.882157564163208, "learning_rate": 5.290162302624433e-07, "loss": 0.0787, "step": 7365 }, { "epoch": 2.386908619572262, "grad_norm": 0.9587984681129456, "learning_rate": 5.284783170615446e-07, "loss": 0.086, "step": 7366 }, { "epoch": 2.3872326636422554, "grad_norm": 0.8575683832168579, "learning_rate": 5.279406451558136e-07, "loss": 0.0726, "step": 7367 }, { "epoch": 2.387556707712249, "grad_norm": 0.8097689151763916, "learning_rate": 5.274032146110567e-07, "loss": 0.071, "step": 7368 }, { "epoch": 2.3878807517822422, "grad_norm": 0.9363576769828796, "learning_rate": 5.268660254930499e-07, "loss": 0.0774, "step": 7369 }, { "epoch": 2.3882047958522357, "grad_norm": 0.8731128573417664, "learning_rate": 5.263290778675401e-07, "loss": 0.0754, "step": 7370 }, { "epoch": 2.3885288399222295, "grad_norm": 0.9303240180015564, "learning_rate": 5.257923718002447e-07, "loss": 0.0767, "step": 7371 }, { "epoch": 2.388852883992223, "grad_norm": 0.88297039270401, "learning_rate": 5.252559073568514e-07, "loss": 0.0738, "step": 7372 }, { "epoch": 2.3891769280622164, "grad_norm": 0.8268675208091736, "learning_rate": 5.247196846030178e-07, "loss": 0.0696, "step": 7373 }, { "epoch": 2.38950097213221, "grad_norm": 0.9614165425300598, "learning_rate": 5.241837036043731e-07, "loss": 0.0773, "step": 7374 }, { "epoch": 2.3898250162022037, "grad_norm": 0.9003193974494934, "learning_rate": 5.236479644265153e-07, "loss": 0.0764, "step": 7375 }, { "epoch": 2.390149060272197, "grad_norm": 0.8745166063308716, "learning_rate": 5.231124671350141e-07, "loss": 0.0763, "step": 7376 }, { "epoch": 2.3904731043421905, "grad_norm": 0.8800874948501587, "learning_rate": 5.225772117954089e-07, "loss": 0.0757, "step": 7377 }, { "epoch": 2.390797148412184, "grad_norm": 0.8205560445785522, "learning_rate": 5.220421984732104e-07, "loss": 0.0746, "step": 7378 }, { "epoch": 2.391121192482178, "grad_norm": 0.9015682935714722, "learning_rate": 5.215074272338986e-07, "loss": 0.0766, "step": 7379 }, { "epoch": 2.3914452365521712, "grad_norm": 0.8613619208335876, "learning_rate": 5.20972898142924e-07, "loss": 0.0771, "step": 7380 }, { "epoch": 2.3917692806221647, "grad_norm": 0.8701586127281189, "learning_rate": 5.204386112657095e-07, "loss": 0.0797, "step": 7381 }, { "epoch": 2.392093324692158, "grad_norm": 0.8837924599647522, "learning_rate": 5.199045666676436e-07, "loss": 0.0712, "step": 7382 }, { "epoch": 2.3924173687621515, "grad_norm": 0.9213774800300598, "learning_rate": 5.193707644140913e-07, "loss": 0.085, "step": 7383 }, { "epoch": 2.3927414128321454, "grad_norm": 0.8316073417663574, "learning_rate": 5.188372045703824e-07, "loss": 0.069, "step": 7384 }, { "epoch": 2.393065456902139, "grad_norm": 0.8606321811676025, "learning_rate": 5.183038872018215e-07, "loss": 0.0767, "step": 7385 }, { "epoch": 2.393389500972132, "grad_norm": 0.9383647441864014, "learning_rate": 5.1777081237368e-07, "loss": 0.0792, "step": 7386 }, { "epoch": 2.3937135450421256, "grad_norm": 0.8869799971580505, "learning_rate": 5.172379801512014e-07, "loss": 0.0786, "step": 7387 }, { "epoch": 2.394037589112119, "grad_norm": 0.8742722272872925, "learning_rate": 5.167053905996003e-07, "loss": 0.0808, "step": 7388 }, { "epoch": 2.394361633182113, "grad_norm": 0.960090696811676, "learning_rate": 5.161730437840585e-07, "loss": 0.0822, "step": 7389 }, { "epoch": 2.3946856772521063, "grad_norm": 0.8900645971298218, "learning_rate": 5.15640939769732e-07, "loss": 0.0793, "step": 7390 }, { "epoch": 2.3950097213220998, "grad_norm": 0.8405351638793945, "learning_rate": 5.151090786217433e-07, "loss": 0.0768, "step": 7391 }, { "epoch": 2.395333765392093, "grad_norm": 0.8822963237762451, "learning_rate": 5.145774604051895e-07, "loss": 0.0766, "step": 7392 }, { "epoch": 2.3956578094620866, "grad_norm": 1.0235251188278198, "learning_rate": 5.140460851851336e-07, "loss": 0.0812, "step": 7393 }, { "epoch": 2.3959818535320805, "grad_norm": 0.9306867122650146, "learning_rate": 5.135149530266112e-07, "loss": 0.0772, "step": 7394 }, { "epoch": 2.396305897602074, "grad_norm": 0.9007172584533691, "learning_rate": 5.129840639946279e-07, "loss": 0.0726, "step": 7395 }, { "epoch": 2.3966299416720673, "grad_norm": 0.8892046213150024, "learning_rate": 5.124534181541596e-07, "loss": 0.0761, "step": 7396 }, { "epoch": 2.3969539857420608, "grad_norm": 0.9353534579277039, "learning_rate": 5.119230155701515e-07, "loss": 0.0809, "step": 7397 }, { "epoch": 2.3972780298120546, "grad_norm": 0.9226713180541992, "learning_rate": 5.113928563075213e-07, "loss": 0.0803, "step": 7398 }, { "epoch": 2.397602073882048, "grad_norm": 0.9027561545372009, "learning_rate": 5.108629404311535e-07, "loss": 0.0785, "step": 7399 }, { "epoch": 2.3979261179520415, "grad_norm": 0.8203698396682739, "learning_rate": 5.103332680059053e-07, "loss": 0.0717, "step": 7400 }, { "epoch": 2.398250162022035, "grad_norm": 0.9251837134361267, "learning_rate": 5.098038390966039e-07, "loss": 0.0805, "step": 7401 }, { "epoch": 2.3985742060920288, "grad_norm": 0.871580958366394, "learning_rate": 5.09274653768046e-07, "loss": 0.0773, "step": 7402 }, { "epoch": 2.398898250162022, "grad_norm": 0.8269453644752502, "learning_rate": 5.087457120849984e-07, "loss": 0.0713, "step": 7403 }, { "epoch": 2.3992222942320156, "grad_norm": 0.8618307113647461, "learning_rate": 5.082170141121992e-07, "loss": 0.0769, "step": 7404 }, { "epoch": 2.399546338302009, "grad_norm": 0.8813310861587524, "learning_rate": 5.076885599143558e-07, "loss": 0.075, "step": 7405 }, { "epoch": 2.3998703823720025, "grad_norm": 0.8791375160217285, "learning_rate": 5.071603495561444e-07, "loss": 0.0776, "step": 7406 }, { "epoch": 2.4001944264419963, "grad_norm": 0.9294980764389038, "learning_rate": 5.066323831022155e-07, "loss": 0.0752, "step": 7407 }, { "epoch": 2.4005184705119897, "grad_norm": 0.8981596827507019, "learning_rate": 5.061046606171849e-07, "loss": 0.0741, "step": 7408 }, { "epoch": 2.400842514581983, "grad_norm": 0.9037610292434692, "learning_rate": 5.055771821656416e-07, "loss": 0.0753, "step": 7409 }, { "epoch": 2.4011665586519766, "grad_norm": 0.8545142412185669, "learning_rate": 5.05049947812144e-07, "loss": 0.0744, "step": 7410 }, { "epoch": 2.40149060272197, "grad_norm": 0.9342654347419739, "learning_rate": 5.045229576212191e-07, "loss": 0.0821, "step": 7411 }, { "epoch": 2.401814646791964, "grad_norm": 0.9164658188819885, "learning_rate": 5.039962116573676e-07, "loss": 0.0774, "step": 7412 }, { "epoch": 2.4021386908619573, "grad_norm": 0.8833655118942261, "learning_rate": 5.034697099850557e-07, "loss": 0.0759, "step": 7413 }, { "epoch": 2.4024627349319507, "grad_norm": 0.8713326454162598, "learning_rate": 5.029434526687249e-07, "loss": 0.0817, "step": 7414 }, { "epoch": 2.402786779001944, "grad_norm": 0.9473174214363098, "learning_rate": 5.02417439772781e-07, "loss": 0.0757, "step": 7415 }, { "epoch": 2.4031108230719376, "grad_norm": 0.9125643968582153, "learning_rate": 5.01891671361606e-07, "loss": 0.0807, "step": 7416 }, { "epoch": 2.4034348671419314, "grad_norm": 0.9707140922546387, "learning_rate": 5.013661474995463e-07, "loss": 0.084, "step": 7417 }, { "epoch": 2.403758911211925, "grad_norm": 0.8440059423446655, "learning_rate": 5.008408682509219e-07, "loss": 0.0736, "step": 7418 }, { "epoch": 2.4040829552819183, "grad_norm": 0.9085173606872559, "learning_rate": 5.003158336800218e-07, "loss": 0.0772, "step": 7419 }, { "epoch": 2.4044069993519117, "grad_norm": 0.8836762309074402, "learning_rate": 4.997910438511052e-07, "loss": 0.0792, "step": 7420 }, { "epoch": 2.404731043421905, "grad_norm": 0.9048094749450684, "learning_rate": 4.992664988284021e-07, "loss": 0.077, "step": 7421 }, { "epoch": 2.405055087491899, "grad_norm": 0.8553664088249207, "learning_rate": 4.987421986761101e-07, "loss": 0.0742, "step": 7422 }, { "epoch": 2.4053791315618924, "grad_norm": 0.9607149958610535, "learning_rate": 4.982181434583996e-07, "loss": 0.0833, "step": 7423 }, { "epoch": 2.405703175631886, "grad_norm": 0.8982037305831909, "learning_rate": 4.976943332394093e-07, "loss": 0.0718, "step": 7424 }, { "epoch": 2.4060272197018793, "grad_norm": 0.8702731728553772, "learning_rate": 4.971707680832491e-07, "loss": 0.0737, "step": 7425 }, { "epoch": 2.406351263771873, "grad_norm": 0.8381253480911255, "learning_rate": 4.966474480539976e-07, "loss": 0.0773, "step": 7426 }, { "epoch": 2.4066753078418666, "grad_norm": 0.851047158241272, "learning_rate": 4.961243732157048e-07, "loss": 0.072, "step": 7427 }, { "epoch": 2.40699935191186, "grad_norm": 0.8857436776161194, "learning_rate": 4.956015436323897e-07, "loss": 0.0812, "step": 7428 }, { "epoch": 2.4073233959818534, "grad_norm": 0.9682055115699768, "learning_rate": 4.950789593680422e-07, "loss": 0.0776, "step": 7429 }, { "epoch": 2.4076474400518473, "grad_norm": 0.8536563515663147, "learning_rate": 4.945566204866201e-07, "loss": 0.0733, "step": 7430 }, { "epoch": 2.4079714841218407, "grad_norm": 0.9816359281539917, "learning_rate": 4.940345270520536e-07, "loss": 0.0837, "step": 7431 }, { "epoch": 2.408295528191834, "grad_norm": 0.8802758455276489, "learning_rate": 4.935126791282419e-07, "loss": 0.0764, "step": 7432 }, { "epoch": 2.4086195722618275, "grad_norm": 0.76444411277771, "learning_rate": 4.929910767790536e-07, "loss": 0.0652, "step": 7433 }, { "epoch": 2.408943616331821, "grad_norm": 0.9820328950881958, "learning_rate": 4.92469720068329e-07, "loss": 0.0814, "step": 7434 }, { "epoch": 2.409267660401815, "grad_norm": 0.8797508478164673, "learning_rate": 4.919486090598749e-07, "loss": 0.0721, "step": 7435 }, { "epoch": 2.4095917044718083, "grad_norm": 0.8678030967712402, "learning_rate": 4.91427743817473e-07, "loss": 0.0716, "step": 7436 }, { "epoch": 2.4099157485418017, "grad_norm": 0.8386548757553101, "learning_rate": 4.909071244048694e-07, "loss": 0.075, "step": 7437 }, { "epoch": 2.410239792611795, "grad_norm": 0.8689800500869751, "learning_rate": 4.903867508857857e-07, "loss": 0.0774, "step": 7438 }, { "epoch": 2.4105638366817885, "grad_norm": 0.9039058685302734, "learning_rate": 4.898666233239083e-07, "loss": 0.0809, "step": 7439 }, { "epoch": 2.4108878807517824, "grad_norm": 0.8700675368309021, "learning_rate": 4.893467417828967e-07, "loss": 0.077, "step": 7440 }, { "epoch": 2.411211924821776, "grad_norm": 0.8966511487960815, "learning_rate": 4.888271063263791e-07, "loss": 0.0792, "step": 7441 }, { "epoch": 2.4115359688917692, "grad_norm": 0.904863178730011, "learning_rate": 4.883077170179542e-07, "loss": 0.0772, "step": 7442 }, { "epoch": 2.4118600129617627, "grad_norm": 0.9016261696815491, "learning_rate": 4.877885739211907e-07, "loss": 0.0735, "step": 7443 }, { "epoch": 2.412184057031756, "grad_norm": 0.8624441027641296, "learning_rate": 4.872696770996246e-07, "loss": 0.078, "step": 7444 }, { "epoch": 2.41250810110175, "grad_norm": 0.8437727093696594, "learning_rate": 4.867510266167669e-07, "loss": 0.073, "step": 7445 }, { "epoch": 2.4128321451717434, "grad_norm": 0.8561378121376038, "learning_rate": 4.862326225360927e-07, "loss": 0.0799, "step": 7446 }, { "epoch": 2.413156189241737, "grad_norm": 0.8445996642112732, "learning_rate": 4.85714464921051e-07, "loss": 0.072, "step": 7447 }, { "epoch": 2.4134802333117302, "grad_norm": 0.8771276473999023, "learning_rate": 4.851965538350589e-07, "loss": 0.0772, "step": 7448 }, { "epoch": 2.413804277381724, "grad_norm": 0.875706672668457, "learning_rate": 4.846788893415038e-07, "loss": 0.0793, "step": 7449 }, { "epoch": 2.4141283214517175, "grad_norm": 0.9223209619522095, "learning_rate": 4.841614715037429e-07, "loss": 0.0832, "step": 7450 }, { "epoch": 2.414452365521711, "grad_norm": 0.8857447504997253, "learning_rate": 4.83644300385103e-07, "loss": 0.0809, "step": 7451 }, { "epoch": 2.4147764095917044, "grad_norm": 0.9135066866874695, "learning_rate": 4.831273760488816e-07, "loss": 0.0811, "step": 7452 }, { "epoch": 2.4151004536616982, "grad_norm": 0.8678467869758606, "learning_rate": 4.82610698558344e-07, "loss": 0.0724, "step": 7453 }, { "epoch": 2.4154244977316917, "grad_norm": 0.9949643015861511, "learning_rate": 4.820942679767268e-07, "loss": 0.0755, "step": 7454 }, { "epoch": 2.415748541801685, "grad_norm": 0.7823389768600464, "learning_rate": 4.815780843672366e-07, "loss": 0.0701, "step": 7455 }, { "epoch": 2.4160725858716785, "grad_norm": 0.8314511179924011, "learning_rate": 4.810621477930488e-07, "loss": 0.0706, "step": 7456 }, { "epoch": 2.416396629941672, "grad_norm": 0.937160849571228, "learning_rate": 4.805464583173094e-07, "loss": 0.0782, "step": 7457 }, { "epoch": 2.416720674011666, "grad_norm": 0.9175649881362915, "learning_rate": 4.800310160031335e-07, "loss": 0.0718, "step": 7458 }, { "epoch": 2.417044718081659, "grad_norm": 0.8398677110671997, "learning_rate": 4.795158209136067e-07, "loss": 0.0718, "step": 7459 }, { "epoch": 2.4173687621516526, "grad_norm": 0.8556437492370605, "learning_rate": 4.79000873111784e-07, "loss": 0.0711, "step": 7460 }, { "epoch": 2.417692806221646, "grad_norm": 0.8410119414329529, "learning_rate": 4.784861726606893e-07, "loss": 0.0727, "step": 7461 }, { "epoch": 2.4180168502916395, "grad_norm": 0.8888702392578125, "learning_rate": 4.779717196233169e-07, "loss": 0.0771, "step": 7462 }, { "epoch": 2.4183408943616334, "grad_norm": 0.881953775882721, "learning_rate": 4.774575140626317e-07, "loss": 0.0769, "step": 7463 }, { "epoch": 2.4186649384316268, "grad_norm": 0.8769305944442749, "learning_rate": 4.769435560415666e-07, "loss": 0.0735, "step": 7464 }, { "epoch": 2.41898898250162, "grad_norm": 0.8964110612869263, "learning_rate": 4.764298456230265e-07, "loss": 0.076, "step": 7465 }, { "epoch": 2.4193130265716136, "grad_norm": 0.8946002125740051, "learning_rate": 4.7591638286988234e-07, "loss": 0.0747, "step": 7466 }, { "epoch": 2.419637070641607, "grad_norm": 0.9408652782440186, "learning_rate": 4.754031678449794e-07, "loss": 0.0811, "step": 7467 }, { "epoch": 2.419961114711601, "grad_norm": 0.8090646266937256, "learning_rate": 4.7489020061112805e-07, "loss": 0.0685, "step": 7468 }, { "epoch": 2.4202851587815943, "grad_norm": 0.7830265164375305, "learning_rate": 4.743774812311125e-07, "loss": 0.0681, "step": 7469 }, { "epoch": 2.4206092028515878, "grad_norm": 0.8681769967079163, "learning_rate": 4.7386500976768337e-07, "loss": 0.0783, "step": 7470 }, { "epoch": 2.420933246921581, "grad_norm": 0.9414371252059937, "learning_rate": 4.733527862835624e-07, "loss": 0.0794, "step": 7471 }, { "epoch": 2.4212572909915746, "grad_norm": 0.9063982963562012, "learning_rate": 4.728408108414409e-07, "loss": 0.0799, "step": 7472 }, { "epoch": 2.4215813350615685, "grad_norm": 0.8320420980453491, "learning_rate": 4.7232908350397984e-07, "loss": 0.0723, "step": 7473 }, { "epoch": 2.421905379131562, "grad_norm": 0.9368253350257874, "learning_rate": 4.7181760433381017e-07, "loss": 0.0796, "step": 7474 }, { "epoch": 2.4222294232015553, "grad_norm": 0.8854978680610657, "learning_rate": 4.7130637339352995e-07, "loss": 0.0759, "step": 7475 }, { "epoch": 2.4225534672715487, "grad_norm": 0.8954752087593079, "learning_rate": 4.707953907457119e-07, "loss": 0.0801, "step": 7476 }, { "epoch": 2.4228775113415426, "grad_norm": 0.8707118630409241, "learning_rate": 4.702846564528929e-07, "loss": 0.0724, "step": 7477 }, { "epoch": 2.423201555411536, "grad_norm": 0.8651489019393921, "learning_rate": 4.6977417057758297e-07, "loss": 0.0747, "step": 7478 }, { "epoch": 2.4235255994815295, "grad_norm": 0.9725545048713684, "learning_rate": 4.6926393318226045e-07, "loss": 0.0816, "step": 7479 }, { "epoch": 2.423849643551523, "grad_norm": 0.8611765503883362, "learning_rate": 4.6875394432937345e-07, "loss": 0.0763, "step": 7480 }, { "epoch": 2.4241736876215167, "grad_norm": 0.9670865535736084, "learning_rate": 4.6824420408133953e-07, "loss": 0.085, "step": 7481 }, { "epoch": 2.42449773169151, "grad_norm": 0.9267457127571106, "learning_rate": 4.677347125005463e-07, "loss": 0.0829, "step": 7482 }, { "epoch": 2.4248217757615036, "grad_norm": 0.9122713208198547, "learning_rate": 4.6722546964935114e-07, "loss": 0.0747, "step": 7483 }, { "epoch": 2.425145819831497, "grad_norm": 0.913609504699707, "learning_rate": 4.6671647559007884e-07, "loss": 0.0811, "step": 7484 }, { "epoch": 2.4254698639014904, "grad_norm": 0.9412457346916199, "learning_rate": 4.6620773038502625e-07, "loss": 0.0756, "step": 7485 }, { "epoch": 2.4257939079714843, "grad_norm": 0.9006617665290833, "learning_rate": 4.656992340964589e-07, "loss": 0.0763, "step": 7486 }, { "epoch": 2.4261179520414777, "grad_norm": 0.8255707621574402, "learning_rate": 4.651909867866117e-07, "loss": 0.0725, "step": 7487 }, { "epoch": 2.426441996111471, "grad_norm": 0.8910168409347534, "learning_rate": 4.64682988517689e-07, "loss": 0.0737, "step": 7488 }, { "epoch": 2.4267660401814646, "grad_norm": 0.868298351764679, "learning_rate": 4.641752393518661e-07, "loss": 0.0737, "step": 7489 }, { "epoch": 2.427090084251458, "grad_norm": 0.9420347809791565, "learning_rate": 4.6366773935128423e-07, "loss": 0.0761, "step": 7490 }, { "epoch": 2.427414128321452, "grad_norm": 0.8676708340644836, "learning_rate": 4.631604885780591e-07, "loss": 0.0755, "step": 7491 }, { "epoch": 2.4277381723914453, "grad_norm": 0.8301357626914978, "learning_rate": 4.6265348709427146e-07, "loss": 0.07, "step": 7492 }, { "epoch": 2.4280622164614387, "grad_norm": 0.8298184871673584, "learning_rate": 4.621467349619738e-07, "loss": 0.0741, "step": 7493 }, { "epoch": 2.428386260531432, "grad_norm": 0.8731589317321777, "learning_rate": 4.6164023224318786e-07, "loss": 0.0742, "step": 7494 }, { "epoch": 2.4287103046014256, "grad_norm": 0.960504412651062, "learning_rate": 4.6113397899990474e-07, "loss": 0.0854, "step": 7495 }, { "epoch": 2.4290343486714194, "grad_norm": 0.8900678157806396, "learning_rate": 4.6062797529408537e-07, "loss": 0.0763, "step": 7496 }, { "epoch": 2.429358392741413, "grad_norm": 0.9121445417404175, "learning_rate": 4.6012222118765796e-07, "loss": 0.0855, "step": 7497 }, { "epoch": 2.4296824368114063, "grad_norm": 0.9721124768257141, "learning_rate": 4.5961671674252447e-07, "loss": 0.0817, "step": 7498 }, { "epoch": 2.4300064808813997, "grad_norm": 0.8571234345436096, "learning_rate": 4.5911146202055113e-07, "loss": 0.0742, "step": 7499 }, { "epoch": 2.4303305249513936, "grad_norm": 0.9155340790748596, "learning_rate": 4.5860645708357855e-07, "loss": 0.0755, "step": 7500 }, { "epoch": 2.430654569021387, "grad_norm": 0.8868303298950195, "learning_rate": 4.581017019934131e-07, "loss": 0.0793, "step": 7501 }, { "epoch": 2.4309786130913804, "grad_norm": 0.884955644607544, "learning_rate": 4.57597196811832e-07, "loss": 0.0747, "step": 7502 }, { "epoch": 2.431302657161374, "grad_norm": 0.9260040521621704, "learning_rate": 4.5709294160058204e-07, "loss": 0.0853, "step": 7503 }, { "epoch": 2.4316267012313677, "grad_norm": 0.9470503926277161, "learning_rate": 4.565889364213791e-07, "loss": 0.0772, "step": 7504 }, { "epoch": 2.431950745301361, "grad_norm": 0.8989836573600769, "learning_rate": 4.5608518133590933e-07, "loss": 0.075, "step": 7505 }, { "epoch": 2.4322747893713546, "grad_norm": 0.931161105632782, "learning_rate": 4.5558167640582545e-07, "loss": 0.0788, "step": 7506 }, { "epoch": 2.432598833441348, "grad_norm": 0.9857456088066101, "learning_rate": 4.550784216927542e-07, "loss": 0.0817, "step": 7507 }, { "epoch": 2.4329228775113414, "grad_norm": 0.8618913888931274, "learning_rate": 4.5457541725828696e-07, "loss": 0.0778, "step": 7508 }, { "epoch": 2.4332469215813353, "grad_norm": 0.9804918169975281, "learning_rate": 4.5407266316398745e-07, "loss": 0.076, "step": 7509 }, { "epoch": 2.4335709656513287, "grad_norm": 0.8626995086669922, "learning_rate": 4.5357015947138786e-07, "loss": 0.0722, "step": 7510 }, { "epoch": 2.433895009721322, "grad_norm": 0.9694250822067261, "learning_rate": 4.530679062419899e-07, "loss": 0.0836, "step": 7511 }, { "epoch": 2.4342190537913155, "grad_norm": 0.9092100262641907, "learning_rate": 4.5256590353726426e-07, "loss": 0.0753, "step": 7512 }, { "epoch": 2.434543097861309, "grad_norm": 0.8885576128959656, "learning_rate": 4.520641514186522e-07, "loss": 0.0825, "step": 7513 }, { "epoch": 2.434867141931303, "grad_norm": 0.8387184739112854, "learning_rate": 4.5156264994756144e-07, "loss": 0.0771, "step": 7514 }, { "epoch": 2.4351911860012962, "grad_norm": 0.8722485303878784, "learning_rate": 4.510613991853721e-07, "loss": 0.0747, "step": 7515 }, { "epoch": 2.4355152300712897, "grad_norm": 0.921558678150177, "learning_rate": 4.5056039919343236e-07, "loss": 0.0811, "step": 7516 }, { "epoch": 2.435839274141283, "grad_norm": 0.9149647951126099, "learning_rate": 4.5005965003305953e-07, "loss": 0.0766, "step": 7517 }, { "epoch": 2.4361633182112765, "grad_norm": 0.9356411099433899, "learning_rate": 4.4955915176554065e-07, "loss": 0.0772, "step": 7518 }, { "epoch": 2.4364873622812704, "grad_norm": 0.8513500690460205, "learning_rate": 4.490589044521315e-07, "loss": 0.0774, "step": 7519 }, { "epoch": 2.436811406351264, "grad_norm": 0.889763355255127, "learning_rate": 4.4855890815405867e-07, "loss": 0.0774, "step": 7520 }, { "epoch": 2.4371354504212572, "grad_norm": 0.8515815138816833, "learning_rate": 4.4805916293251486e-07, "loss": 0.0752, "step": 7521 }, { "epoch": 2.4374594944912507, "grad_norm": 0.8402429819107056, "learning_rate": 4.4755966884866606e-07, "loss": 0.0702, "step": 7522 }, { "epoch": 2.4377835385612445, "grad_norm": 0.8742625117301941, "learning_rate": 4.470604259636438e-07, "loss": 0.0729, "step": 7523 }, { "epoch": 2.438107582631238, "grad_norm": 0.877052366733551, "learning_rate": 4.465614343385524e-07, "loss": 0.0752, "step": 7524 }, { "epoch": 2.4384316267012314, "grad_norm": 0.8343896865844727, "learning_rate": 4.46062694034462e-07, "loss": 0.0757, "step": 7525 }, { "epoch": 2.438755670771225, "grad_norm": 0.8815245628356934, "learning_rate": 4.455642051124143e-07, "loss": 0.0768, "step": 7526 }, { "epoch": 2.439079714841218, "grad_norm": 0.8473525047302246, "learning_rate": 4.4506596763341985e-07, "loss": 0.0715, "step": 7527 }, { "epoch": 2.439403758911212, "grad_norm": 0.8910574316978455, "learning_rate": 4.445679816584567e-07, "loss": 0.0757, "step": 7528 }, { "epoch": 2.4397278029812055, "grad_norm": 0.9735031127929688, "learning_rate": 4.4407024724847534e-07, "loss": 0.0831, "step": 7529 }, { "epoch": 2.440051847051199, "grad_norm": 0.9022817611694336, "learning_rate": 4.4357276446439197e-07, "loss": 0.078, "step": 7530 }, { "epoch": 2.4403758911211924, "grad_norm": 0.9188567996025085, "learning_rate": 4.4307553336709525e-07, "loss": 0.0773, "step": 7531 }, { "epoch": 2.440699935191186, "grad_norm": 0.854328453540802, "learning_rate": 4.4257855401744044e-07, "loss": 0.0743, "step": 7532 }, { "epoch": 2.4410239792611796, "grad_norm": 0.8932430744171143, "learning_rate": 4.42081826476253e-07, "loss": 0.0767, "step": 7533 }, { "epoch": 2.441348023331173, "grad_norm": 0.8871773481369019, "learning_rate": 4.4158535080432803e-07, "loss": 0.0731, "step": 7534 }, { "epoch": 2.4416720674011665, "grad_norm": 0.8521711826324463, "learning_rate": 4.4108912706242876e-07, "loss": 0.0697, "step": 7535 }, { "epoch": 2.44199611147116, "grad_norm": 0.8758248090744019, "learning_rate": 4.405931553112894e-07, "loss": 0.0703, "step": 7536 }, { "epoch": 2.442320155541154, "grad_norm": 0.9381188750267029, "learning_rate": 4.4009743561161e-07, "loss": 0.0814, "step": 7537 }, { "epoch": 2.442644199611147, "grad_norm": 0.888369083404541, "learning_rate": 4.396019680240643e-07, "loss": 0.0791, "step": 7538 }, { "epoch": 2.4429682436811406, "grad_norm": 0.8075094819068909, "learning_rate": 4.3910675260929096e-07, "loss": 0.0729, "step": 7539 }, { "epoch": 2.443292287751134, "grad_norm": 0.9342337250709534, "learning_rate": 4.386117894278999e-07, "loss": 0.0769, "step": 7540 }, { "epoch": 2.4436163318211275, "grad_norm": 0.8695838451385498, "learning_rate": 4.381170785404704e-07, "loss": 0.0783, "step": 7541 }, { "epoch": 2.4439403758911213, "grad_norm": 1.10112464427948, "learning_rate": 4.376226200075495e-07, "loss": 0.0892, "step": 7542 }, { "epoch": 2.4442644199611148, "grad_norm": 0.9249840378761292, "learning_rate": 4.3712841388965476e-07, "loss": 0.0785, "step": 7543 }, { "epoch": 2.444588464031108, "grad_norm": 0.8668658137321472, "learning_rate": 4.3663446024727247e-07, "loss": 0.0733, "step": 7544 }, { "epoch": 2.4449125081011016, "grad_norm": 0.9287576079368591, "learning_rate": 4.3614075914085617e-07, "loss": 0.079, "step": 7545 }, { "epoch": 2.445236552171095, "grad_norm": 0.8992236256599426, "learning_rate": 4.356473106308326e-07, "loss": 0.0833, "step": 7546 }, { "epoch": 2.445560596241089, "grad_norm": 0.9160580039024353, "learning_rate": 4.351541147775931e-07, "loss": 0.0805, "step": 7547 }, { "epoch": 2.4458846403110823, "grad_norm": 0.8093187808990479, "learning_rate": 4.346611716415006e-07, "loss": 0.0675, "step": 7548 }, { "epoch": 2.4462086843810757, "grad_norm": 0.9260373115539551, "learning_rate": 4.341684812828867e-07, "loss": 0.0807, "step": 7549 }, { "epoch": 2.446532728451069, "grad_norm": 0.859801709651947, "learning_rate": 4.336760437620519e-07, "loss": 0.0742, "step": 7550 }, { "epoch": 2.446856772521063, "grad_norm": 0.8447008728981018, "learning_rate": 4.331838591392662e-07, "loss": 0.0717, "step": 7551 }, { "epoch": 2.4471808165910565, "grad_norm": 0.9389599561691284, "learning_rate": 4.326919274747668e-07, "loss": 0.0794, "step": 7552 }, { "epoch": 2.44750486066105, "grad_norm": 0.8455019593238831, "learning_rate": 4.322002488287635e-07, "loss": 0.0755, "step": 7553 }, { "epoch": 2.4478289047310433, "grad_norm": 0.8933506608009338, "learning_rate": 4.317088232614308e-07, "loss": 0.081, "step": 7554 }, { "epoch": 2.448152948801037, "grad_norm": 0.9255868792533875, "learning_rate": 4.3121765083291663e-07, "loss": 0.0815, "step": 7555 }, { "epoch": 2.4484769928710306, "grad_norm": 0.9462663531303406, "learning_rate": 4.307267316033342e-07, "loss": 0.0837, "step": 7556 }, { "epoch": 2.448801036941024, "grad_norm": 0.9613263607025146, "learning_rate": 4.3023606563276753e-07, "loss": 0.0836, "step": 7557 }, { "epoch": 2.4491250810110174, "grad_norm": 0.8939090967178345, "learning_rate": 4.297456529812702e-07, "loss": 0.0757, "step": 7558 }, { "epoch": 2.449449125081011, "grad_norm": 0.9026023149490356, "learning_rate": 4.292554937088622e-07, "loss": 0.0752, "step": 7559 }, { "epoch": 2.4497731691510047, "grad_norm": 0.9327993392944336, "learning_rate": 4.287655878755365e-07, "loss": 0.0784, "step": 7560 }, { "epoch": 2.450097213220998, "grad_norm": 0.8403828740119934, "learning_rate": 4.282759355412505e-07, "loss": 0.0742, "step": 7561 }, { "epoch": 2.4504212572909916, "grad_norm": 0.8635743260383606, "learning_rate": 4.2778653676593534e-07, "loss": 0.074, "step": 7562 }, { "epoch": 2.450745301360985, "grad_norm": 0.9350886344909668, "learning_rate": 4.272973916094872e-07, "loss": 0.0831, "step": 7563 }, { "epoch": 2.4510693454309784, "grad_norm": 0.9209923148155212, "learning_rate": 4.268085001317726e-07, "loss": 0.0791, "step": 7564 }, { "epoch": 2.4513933895009723, "grad_norm": 0.9279548525810242, "learning_rate": 4.263198623926279e-07, "loss": 0.0814, "step": 7565 }, { "epoch": 2.4517174335709657, "grad_norm": 0.8781946897506714, "learning_rate": 4.258314784518569e-07, "loss": 0.0767, "step": 7566 }, { "epoch": 2.452041477640959, "grad_norm": 0.8827422857284546, "learning_rate": 4.253433483692337e-07, "loss": 0.0772, "step": 7567 }, { "epoch": 2.4523655217109526, "grad_norm": 0.8563030958175659, "learning_rate": 4.248554722045009e-07, "loss": 0.0717, "step": 7568 }, { "epoch": 2.452689565780946, "grad_norm": 0.8876157999038696, "learning_rate": 4.2436785001736896e-07, "loss": 0.0746, "step": 7569 }, { "epoch": 2.45301360985094, "grad_norm": 0.8552733659744263, "learning_rate": 4.2388048186751823e-07, "loss": 0.0748, "step": 7570 }, { "epoch": 2.4533376539209333, "grad_norm": 0.9103438258171082, "learning_rate": 4.233933678145982e-07, "loss": 0.0773, "step": 7571 }, { "epoch": 2.4536616979909267, "grad_norm": 0.985542893409729, "learning_rate": 4.229065079182268e-07, "loss": 0.0763, "step": 7572 }, { "epoch": 2.45398574206092, "grad_norm": 0.8953328728675842, "learning_rate": 4.224199022379913e-07, "loss": 0.0772, "step": 7573 }, { "epoch": 2.454309786130914, "grad_norm": 0.8356739282608032, "learning_rate": 4.2193355083344684e-07, "loss": 0.0724, "step": 7574 }, { "epoch": 2.4546338302009074, "grad_norm": 0.8867468237876892, "learning_rate": 4.2144745376411946e-07, "loss": 0.0726, "step": 7575 }, { "epoch": 2.454957874270901, "grad_norm": 0.8875829577445984, "learning_rate": 4.2096161108950015e-07, "loss": 0.0739, "step": 7576 }, { "epoch": 2.4552819183408943, "grad_norm": 0.8456739187240601, "learning_rate": 4.204760228690546e-07, "loss": 0.0714, "step": 7577 }, { "epoch": 2.4556059624108877, "grad_norm": 0.7902773022651672, "learning_rate": 4.1999068916221184e-07, "loss": 0.0668, "step": 7578 }, { "epoch": 2.4559300064808816, "grad_norm": 0.8657953143119812, "learning_rate": 4.1950561002837257e-07, "loss": 0.0737, "step": 7579 }, { "epoch": 2.456254050550875, "grad_norm": 0.8508570194244385, "learning_rate": 4.1902078552690573e-07, "loss": 0.072, "step": 7580 }, { "epoch": 2.4565780946208684, "grad_norm": 0.867751955986023, "learning_rate": 4.185362157171496e-07, "loss": 0.0761, "step": 7581 }, { "epoch": 2.456902138690862, "grad_norm": 0.9046393632888794, "learning_rate": 4.1805190065841107e-07, "loss": 0.0764, "step": 7582 }, { "epoch": 2.4572261827608557, "grad_norm": 0.9099305272102356, "learning_rate": 4.175678404099637e-07, "loss": 0.081, "step": 7583 }, { "epoch": 2.457550226830849, "grad_norm": 0.8977386951446533, "learning_rate": 4.1708403503105456e-07, "loss": 0.0759, "step": 7584 }, { "epoch": 2.4578742709008425, "grad_norm": 0.8734598159790039, "learning_rate": 4.166004845808941e-07, "loss": 0.0714, "step": 7585 }, { "epoch": 2.458198314970836, "grad_norm": 0.8450359106063843, "learning_rate": 4.1611718911866663e-07, "loss": 0.0708, "step": 7586 }, { "epoch": 2.4585223590408294, "grad_norm": 0.8886892199516296, "learning_rate": 4.1563414870352093e-07, "loss": 0.0749, "step": 7587 }, { "epoch": 2.4588464031108233, "grad_norm": 0.8860898017883301, "learning_rate": 4.1515136339457725e-07, "loss": 0.0758, "step": 7588 }, { "epoch": 2.4591704471808167, "grad_norm": 0.8982911109924316, "learning_rate": 4.146688332509241e-07, "loss": 0.0784, "step": 7589 }, { "epoch": 2.45949449125081, "grad_norm": 0.8104112148284912, "learning_rate": 4.1418655833161794e-07, "loss": 0.0663, "step": 7590 }, { "epoch": 2.4598185353208035, "grad_norm": 0.8376603126525879, "learning_rate": 4.137045386956853e-07, "loss": 0.0751, "step": 7591 }, { "epoch": 2.460142579390797, "grad_norm": 0.9051219820976257, "learning_rate": 4.1322277440211973e-07, "loss": 0.075, "step": 7592 }, { "epoch": 2.460466623460791, "grad_norm": 0.9663034677505493, "learning_rate": 4.1274126550988505e-07, "loss": 0.0786, "step": 7593 }, { "epoch": 2.4607906675307842, "grad_norm": 0.9784535765647888, "learning_rate": 4.1226001207791327e-07, "loss": 0.0798, "step": 7594 }, { "epoch": 2.4611147116007777, "grad_norm": 0.93511962890625, "learning_rate": 4.1177901416510485e-07, "loss": 0.0793, "step": 7595 }, { "epoch": 2.461438755670771, "grad_norm": 0.9316219687461853, "learning_rate": 4.112982718303299e-07, "loss": 0.08, "step": 7596 }, { "epoch": 2.4617627997407645, "grad_norm": 0.8901696801185608, "learning_rate": 4.1081778513242606e-07, "loss": 0.0755, "step": 7597 }, { "epoch": 2.4620868438107584, "grad_norm": 0.8933848142623901, "learning_rate": 4.103375541302007e-07, "loss": 0.0787, "step": 7598 }, { "epoch": 2.462410887880752, "grad_norm": 0.8804818391799927, "learning_rate": 4.0985757888242965e-07, "loss": 0.0797, "step": 7599 }, { "epoch": 2.462734931950745, "grad_norm": 0.8956480026245117, "learning_rate": 4.0937785944785617e-07, "loss": 0.0733, "step": 7600 }, { "epoch": 2.4630589760207386, "grad_norm": 0.8888890743255615, "learning_rate": 4.0889839588519386e-07, "loss": 0.0777, "step": 7601 }, { "epoch": 2.4633830200907325, "grad_norm": 0.9150384068489075, "learning_rate": 4.0841918825312465e-07, "loss": 0.0815, "step": 7602 }, { "epoch": 2.463707064160726, "grad_norm": 0.8889130353927612, "learning_rate": 4.0794023661029856e-07, "loss": 0.0794, "step": 7603 }, { "epoch": 2.4640311082307194, "grad_norm": 0.8525159358978271, "learning_rate": 4.0746154101533485e-07, "loss": 0.0778, "step": 7604 }, { "epoch": 2.464355152300713, "grad_norm": 0.8394224047660828, "learning_rate": 4.0698310152682107e-07, "loss": 0.0727, "step": 7605 }, { "epoch": 2.4646791963707066, "grad_norm": 0.8760896921157837, "learning_rate": 4.065049182033146e-07, "loss": 0.0716, "step": 7606 }, { "epoch": 2.4650032404407, "grad_norm": 0.8698969483375549, "learning_rate": 4.0602699110333795e-07, "loss": 0.0713, "step": 7607 }, { "epoch": 2.4653272845106935, "grad_norm": 0.9061896204948425, "learning_rate": 4.0554932028538774e-07, "loss": 0.0797, "step": 7608 }, { "epoch": 2.465651328580687, "grad_norm": 0.8686105608940125, "learning_rate": 4.050719058079244e-07, "loss": 0.0752, "step": 7609 }, { "epoch": 2.4659753726506803, "grad_norm": 0.8417704701423645, "learning_rate": 4.045947477293791e-07, "loss": 0.0717, "step": 7610 }, { "epoch": 2.466299416720674, "grad_norm": 0.8493486642837524, "learning_rate": 4.041178461081519e-07, "loss": 0.0736, "step": 7611 }, { "epoch": 2.4666234607906676, "grad_norm": 0.8493620157241821, "learning_rate": 4.036412010026103e-07, "loss": 0.0761, "step": 7612 }, { "epoch": 2.466947504860661, "grad_norm": 0.8524096012115479, "learning_rate": 4.0316481247109215e-07, "loss": 0.0755, "step": 7613 }, { "epoch": 2.4672715489306545, "grad_norm": 0.8808268904685974, "learning_rate": 4.0268868057190075e-07, "loss": 0.0798, "step": 7614 }, { "epoch": 2.467595593000648, "grad_norm": 0.8217577338218689, "learning_rate": 4.022128053633123e-07, "loss": 0.0657, "step": 7615 }, { "epoch": 2.4679196370706418, "grad_norm": 0.9325581192970276, "learning_rate": 4.017371869035674e-07, "loss": 0.0794, "step": 7616 }, { "epoch": 2.468243681140635, "grad_norm": 0.8738077878952026, "learning_rate": 4.01261825250879e-07, "loss": 0.0732, "step": 7617 }, { "epoch": 2.4685677252106286, "grad_norm": 0.8512647747993469, "learning_rate": 4.0078672046342553e-07, "loss": 0.0738, "step": 7618 }, { "epoch": 2.468891769280622, "grad_norm": 0.9815152287483215, "learning_rate": 4.0031187259935546e-07, "loss": 0.0859, "step": 7619 }, { "epoch": 2.4692158133506155, "grad_norm": 0.8624076247215271, "learning_rate": 3.998372817167856e-07, "loss": 0.0729, "step": 7620 }, { "epoch": 2.4695398574206093, "grad_norm": 0.8829675316810608, "learning_rate": 3.993629478738012e-07, "loss": 0.0728, "step": 7621 }, { "epoch": 2.4698639014906028, "grad_norm": 0.9542384743690491, "learning_rate": 3.988888711284569e-07, "loss": 0.0772, "step": 7622 }, { "epoch": 2.470187945560596, "grad_norm": 0.899666965007782, "learning_rate": 3.9841505153877387e-07, "loss": 0.0792, "step": 7623 }, { "epoch": 2.4705119896305896, "grad_norm": 0.8875526785850525, "learning_rate": 3.9794148916274365e-07, "loss": 0.0734, "step": 7624 }, { "epoch": 2.4708360337005835, "grad_norm": 0.9024166464805603, "learning_rate": 3.974681840583255e-07, "loss": 0.0757, "step": 7625 }, { "epoch": 2.471160077770577, "grad_norm": 0.9139639139175415, "learning_rate": 3.969951362834476e-07, "loss": 0.0785, "step": 7626 }, { "epoch": 2.4714841218405703, "grad_norm": 0.9424812197685242, "learning_rate": 3.965223458960063e-07, "loss": 0.0789, "step": 7627 }, { "epoch": 2.4718081659105637, "grad_norm": 0.8154076933860779, "learning_rate": 3.9604981295386673e-07, "loss": 0.0729, "step": 7628 }, { "epoch": 2.4721322099805576, "grad_norm": 0.9030548334121704, "learning_rate": 3.9557753751486237e-07, "loss": 0.0774, "step": 7629 }, { "epoch": 2.472456254050551, "grad_norm": 0.9466914534568787, "learning_rate": 3.9510551963679534e-07, "loss": 0.0798, "step": 7630 }, { "epoch": 2.4727802981205445, "grad_norm": 0.8710300922393799, "learning_rate": 3.9463375937743546e-07, "loss": 0.0785, "step": 7631 }, { "epoch": 2.473104342190538, "grad_norm": 0.9093683958053589, "learning_rate": 3.941622567945216e-07, "loss": 0.0805, "step": 7632 }, { "epoch": 2.4734283862605313, "grad_norm": 0.8014646768569946, "learning_rate": 3.9369101194576156e-07, "loss": 0.0689, "step": 7633 }, { "epoch": 2.473752430330525, "grad_norm": 0.8499481678009033, "learning_rate": 3.93220024888831e-07, "loss": 0.0765, "step": 7634 }, { "epoch": 2.4740764744005186, "grad_norm": 0.9159678816795349, "learning_rate": 3.927492956813747e-07, "loss": 0.0752, "step": 7635 }, { "epoch": 2.474400518470512, "grad_norm": 0.9946656227111816, "learning_rate": 3.922788243810038e-07, "loss": 0.0856, "step": 7636 }, { "epoch": 2.4747245625405054, "grad_norm": 0.9459431171417236, "learning_rate": 3.918086110453015e-07, "loss": 0.0742, "step": 7637 }, { "epoch": 2.475048606610499, "grad_norm": 0.9478768706321716, "learning_rate": 3.9133865573181524e-07, "loss": 0.0778, "step": 7638 }, { "epoch": 2.4753726506804927, "grad_norm": 0.8506389856338501, "learning_rate": 3.9086895849806547e-07, "loss": 0.0737, "step": 7639 }, { "epoch": 2.475696694750486, "grad_norm": 0.8261045217514038, "learning_rate": 3.903995194015364e-07, "loss": 0.0735, "step": 7640 }, { "epoch": 2.4760207388204796, "grad_norm": 0.8218585848808289, "learning_rate": 3.899303384996836e-07, "loss": 0.0756, "step": 7641 }, { "epoch": 2.476344782890473, "grad_norm": 0.9573899507522583, "learning_rate": 3.894614158499302e-07, "loss": 0.0828, "step": 7642 }, { "epoch": 2.4766688269604664, "grad_norm": 0.8530287146568298, "learning_rate": 3.889927515096681e-07, "loss": 0.0715, "step": 7643 }, { "epoch": 2.4769928710304603, "grad_norm": 0.9650495648384094, "learning_rate": 3.885243455362578e-07, "loss": 0.0821, "step": 7644 }, { "epoch": 2.4773169151004537, "grad_norm": 0.877484142780304, "learning_rate": 3.8805619798702565e-07, "loss": 0.0707, "step": 7645 }, { "epoch": 2.477640959170447, "grad_norm": 0.9321907162666321, "learning_rate": 3.8758830891927056e-07, "loss": 0.0803, "step": 7646 }, { "epoch": 2.4779650032404406, "grad_norm": 0.9718896746635437, "learning_rate": 3.8712067839025647e-07, "loss": 0.0789, "step": 7647 }, { "epoch": 2.478289047310434, "grad_norm": 0.942742109298706, "learning_rate": 3.86653306457217e-07, "loss": 0.0786, "step": 7648 }, { "epoch": 2.478613091380428, "grad_norm": 0.9233473539352417, "learning_rate": 3.861861931773542e-07, "loss": 0.0781, "step": 7649 }, { "epoch": 2.4789371354504213, "grad_norm": 0.8698999285697937, "learning_rate": 3.8571933860783785e-07, "loss": 0.0681, "step": 7650 }, { "epoch": 2.4792611795204147, "grad_norm": 0.9471836090087891, "learning_rate": 3.8525274280580646e-07, "loss": 0.0738, "step": 7651 }, { "epoch": 2.479585223590408, "grad_norm": 0.9754993915557861, "learning_rate": 3.8478640582836733e-07, "loss": 0.0794, "step": 7652 }, { "epoch": 2.479909267660402, "grad_norm": 0.9015049934387207, "learning_rate": 3.8432032773259574e-07, "loss": 0.0806, "step": 7653 }, { "epoch": 2.4802333117303954, "grad_norm": 0.8969537019729614, "learning_rate": 3.838545085755341e-07, "loss": 0.0801, "step": 7654 }, { "epoch": 2.480557355800389, "grad_norm": 0.8950188755989075, "learning_rate": 3.8338894841419476e-07, "loss": 0.0739, "step": 7655 }, { "epoch": 2.4808813998703823, "grad_norm": 0.8588986992835999, "learning_rate": 3.8292364730555754e-07, "loss": 0.0748, "step": 7656 }, { "epoch": 2.481205443940376, "grad_norm": 0.9033975601196289, "learning_rate": 3.8245860530657126e-07, "loss": 0.0758, "step": 7657 }, { "epoch": 2.4815294880103695, "grad_norm": 0.9325078725814819, "learning_rate": 3.8199382247415236e-07, "loss": 0.0837, "step": 7658 }, { "epoch": 2.481853532080363, "grad_norm": 0.8054845333099365, "learning_rate": 3.8152929886518587e-07, "loss": 0.0657, "step": 7659 }, { "epoch": 2.4821775761503564, "grad_norm": 0.8823132514953613, "learning_rate": 3.810650345365241e-07, "loss": 0.0747, "step": 7660 }, { "epoch": 2.48250162022035, "grad_norm": 0.8961814045906067, "learning_rate": 3.8060102954499024e-07, "loss": 0.0737, "step": 7661 }, { "epoch": 2.4828256642903437, "grad_norm": 0.9573402404785156, "learning_rate": 3.8013728394737216e-07, "loss": 0.0805, "step": 7662 }, { "epoch": 2.483149708360337, "grad_norm": 0.8844099640846252, "learning_rate": 3.796737978004289e-07, "loss": 0.079, "step": 7663 }, { "epoch": 2.4834737524303305, "grad_norm": 0.8763425350189209, "learning_rate": 3.792105711608865e-07, "loss": 0.076, "step": 7664 }, { "epoch": 2.483797796500324, "grad_norm": 0.8611736297607422, "learning_rate": 3.7874760408543933e-07, "loss": 0.0784, "step": 7665 }, { "epoch": 2.4841218405703174, "grad_norm": 0.9122564792633057, "learning_rate": 3.7828489663075065e-07, "loss": 0.0801, "step": 7666 }, { "epoch": 2.4844458846403112, "grad_norm": 0.9059411287307739, "learning_rate": 3.778224488534496e-07, "loss": 0.0789, "step": 7667 }, { "epoch": 2.4847699287103047, "grad_norm": 0.881196916103363, "learning_rate": 3.773602608101376e-07, "loss": 0.0751, "step": 7668 }, { "epoch": 2.485093972780298, "grad_norm": 0.8495291471481323, "learning_rate": 3.7689833255737995e-07, "loss": 0.0765, "step": 7669 }, { "epoch": 2.4854180168502915, "grad_norm": 0.9652096033096313, "learning_rate": 3.764366641517145e-07, "loss": 0.0818, "step": 7670 }, { "epoch": 2.485742060920285, "grad_norm": 0.8796998858451843, "learning_rate": 3.759752556496421e-07, "loss": 0.0694, "step": 7671 }, { "epoch": 2.486066104990279, "grad_norm": 0.9088475704193115, "learning_rate": 3.7551410710763764e-07, "loss": 0.0767, "step": 7672 }, { "epoch": 2.4863901490602722, "grad_norm": 0.9164015054702759, "learning_rate": 3.7505321858213926e-07, "loss": 0.078, "step": 7673 }, { "epoch": 2.4867141931302656, "grad_norm": 0.941716194152832, "learning_rate": 3.7459259012955606e-07, "loss": 0.0779, "step": 7674 }, { "epoch": 2.487038237200259, "grad_norm": 0.9306308031082153, "learning_rate": 3.7413222180626455e-07, "loss": 0.0756, "step": 7675 }, { "epoch": 2.487362281270253, "grad_norm": 0.8866762518882751, "learning_rate": 3.736721136686081e-07, "loss": 0.0742, "step": 7676 }, { "epoch": 2.4876863253402464, "grad_norm": 0.8724895715713501, "learning_rate": 3.7321226577290147e-07, "loss": 0.0736, "step": 7677 }, { "epoch": 2.48801036941024, "grad_norm": 0.7774869203567505, "learning_rate": 3.7275267817542425e-07, "loss": 0.0683, "step": 7678 }, { "epoch": 2.488334413480233, "grad_norm": 0.9494317173957825, "learning_rate": 3.7229335093242587e-07, "loss": 0.0802, "step": 7679 }, { "epoch": 2.488658457550227, "grad_norm": 0.8791287541389465, "learning_rate": 3.7183428410012326e-07, "loss": 0.0781, "step": 7680 }, { "epoch": 2.4889825016202205, "grad_norm": 0.9582348465919495, "learning_rate": 3.713754777347023e-07, "loss": 0.0751, "step": 7681 }, { "epoch": 2.489306545690214, "grad_norm": 0.8857023119926453, "learning_rate": 3.7091693189231615e-07, "loss": 0.0724, "step": 7682 }, { "epoch": 2.4896305897602073, "grad_norm": 0.8695082068443298, "learning_rate": 3.704586466290863e-07, "loss": 0.0777, "step": 7683 }, { "epoch": 2.4899546338302008, "grad_norm": 1.0032886266708374, "learning_rate": 3.7000062200110266e-07, "loss": 0.0815, "step": 7684 }, { "epoch": 2.4902786779001946, "grad_norm": 0.9594873189926147, "learning_rate": 3.6954285806442337e-07, "loss": 0.0804, "step": 7685 }, { "epoch": 2.490602721970188, "grad_norm": 0.854714035987854, "learning_rate": 3.6908535487507335e-07, "loss": 0.0725, "step": 7686 }, { "epoch": 2.4909267660401815, "grad_norm": 0.8480677604675293, "learning_rate": 3.68628112489047e-07, "loss": 0.0729, "step": 7687 }, { "epoch": 2.491250810110175, "grad_norm": 0.9920457601547241, "learning_rate": 3.681711309623065e-07, "loss": 0.0751, "step": 7688 }, { "epoch": 2.4915748541801683, "grad_norm": 0.8475012183189392, "learning_rate": 3.677144103507818e-07, "loss": 0.0735, "step": 7689 }, { "epoch": 2.491898898250162, "grad_norm": 0.9704048037528992, "learning_rate": 3.672579507103716e-07, "loss": 0.0799, "step": 7690 }, { "epoch": 2.4922229423201556, "grad_norm": 0.9394801259040833, "learning_rate": 3.668017520969405e-07, "loss": 0.0808, "step": 7691 }, { "epoch": 2.492546986390149, "grad_norm": 0.9730172753334045, "learning_rate": 3.663458145663254e-07, "loss": 0.0847, "step": 7692 }, { "epoch": 2.4928710304601425, "grad_norm": 0.9635066986083984, "learning_rate": 3.65890138174326e-07, "loss": 0.077, "step": 7693 }, { "epoch": 2.493195074530136, "grad_norm": 0.8948413729667664, "learning_rate": 3.6543472297671495e-07, "loss": 0.0779, "step": 7694 }, { "epoch": 2.4935191186001298, "grad_norm": 0.8416469097137451, "learning_rate": 3.6497956902922904e-07, "loss": 0.0733, "step": 7695 }, { "epoch": 2.493843162670123, "grad_norm": 0.8402621150016785, "learning_rate": 3.645246763875754e-07, "loss": 0.0705, "step": 7696 }, { "epoch": 2.4941672067401166, "grad_norm": 0.9553708434104919, "learning_rate": 3.640700451074289e-07, "loss": 0.0795, "step": 7697 }, { "epoch": 2.49449125081011, "grad_norm": 0.8238723278045654, "learning_rate": 3.636156752444303e-07, "loss": 0.0689, "step": 7698 }, { "epoch": 2.4948152948801035, "grad_norm": 0.8835716843605042, "learning_rate": 3.631615668541921e-07, "loss": 0.0758, "step": 7699 }, { "epoch": 2.4951393389500973, "grad_norm": 0.8276665210723877, "learning_rate": 3.6270771999229124e-07, "loss": 0.0724, "step": 7700 }, { "epoch": 2.4954633830200907, "grad_norm": 0.9201515316963196, "learning_rate": 3.622541347142758e-07, "loss": 0.082, "step": 7701 }, { "epoch": 2.495787427090084, "grad_norm": 0.9209465384483337, "learning_rate": 3.618008110756588e-07, "loss": 0.0825, "step": 7702 }, { "epoch": 2.4961114711600776, "grad_norm": 0.9255928993225098, "learning_rate": 3.6134774913192314e-07, "loss": 0.0785, "step": 7703 }, { "epoch": 2.4964355152300715, "grad_norm": 0.8700760006904602, "learning_rate": 3.608949489385191e-07, "loss": 0.0711, "step": 7704 }, { "epoch": 2.496759559300065, "grad_norm": 0.8912569284439087, "learning_rate": 3.6044241055086525e-07, "loss": 0.0759, "step": 7705 }, { "epoch": 2.4970836033700583, "grad_norm": 0.9345059990882874, "learning_rate": 3.599901340243478e-07, "loss": 0.0758, "step": 7706 }, { "epoch": 2.4974076474400517, "grad_norm": 0.8387685418128967, "learning_rate": 3.5953811941432104e-07, "loss": 0.0718, "step": 7707 }, { "epoch": 2.4977316915100456, "grad_norm": 0.9619724154472351, "learning_rate": 3.590863667761077e-07, "loss": 0.08, "step": 7708 }, { "epoch": 2.498055735580039, "grad_norm": 0.8170272707939148, "learning_rate": 3.5863487616499713e-07, "loss": 0.068, "step": 7709 }, { "epoch": 2.4983797796500324, "grad_norm": 0.9006374478340149, "learning_rate": 3.581836476362474e-07, "loss": 0.0817, "step": 7710 }, { "epoch": 2.498703823720026, "grad_norm": 0.8181907534599304, "learning_rate": 3.5773268124508485e-07, "loss": 0.0697, "step": 7711 }, { "epoch": 2.4990278677900193, "grad_norm": 0.8882855176925659, "learning_rate": 3.5728197704670344e-07, "loss": 0.0795, "step": 7712 }, { "epoch": 2.499351911860013, "grad_norm": 0.9369103908538818, "learning_rate": 3.5683153509626504e-07, "loss": 0.0806, "step": 7713 }, { "epoch": 2.4996759559300066, "grad_norm": 0.8526079654693604, "learning_rate": 3.563813554488996e-07, "loss": 0.0784, "step": 7714 }, { "epoch": 2.5, "grad_norm": 0.8463532328605652, "learning_rate": 3.559314381597034e-07, "loss": 0.0697, "step": 7715 }, { "epoch": 2.5003240440699934, "grad_norm": 1.0224663019180298, "learning_rate": 3.55481783283744e-07, "loss": 0.0814, "step": 7716 }, { "epoch": 2.500648088139987, "grad_norm": 0.8725597858428955, "learning_rate": 3.5503239087605337e-07, "loss": 0.0755, "step": 7717 }, { "epoch": 2.5009721322099807, "grad_norm": 0.9240145087242126, "learning_rate": 3.54583260991633e-07, "loss": 0.0751, "step": 7718 }, { "epoch": 2.501296176279974, "grad_norm": 0.8926657438278198, "learning_rate": 3.541343936854524e-07, "loss": 0.0756, "step": 7719 }, { "epoch": 2.5016202203499676, "grad_norm": 0.9064965844154358, "learning_rate": 3.5368578901244843e-07, "loss": 0.0736, "step": 7720 }, { "epoch": 2.501944264419961, "grad_norm": 0.8855481743812561, "learning_rate": 3.5323744702752657e-07, "loss": 0.0742, "step": 7721 }, { "epoch": 2.5022683084899544, "grad_norm": 0.8861925601959229, "learning_rate": 3.5278936778555763e-07, "loss": 0.0752, "step": 7722 }, { "epoch": 2.5025923525599483, "grad_norm": 0.8892893195152283, "learning_rate": 3.523415513413847e-07, "loss": 0.0763, "step": 7723 }, { "epoch": 2.5029163966299417, "grad_norm": 0.9149244427680969, "learning_rate": 3.518939977498137e-07, "loss": 0.0763, "step": 7724 }, { "epoch": 2.503240440699935, "grad_norm": 0.897413969039917, "learning_rate": 3.514467070656233e-07, "loss": 0.0775, "step": 7725 }, { "epoch": 2.5035644847699285, "grad_norm": 0.9678398370742798, "learning_rate": 3.509996793435558e-07, "loss": 0.0826, "step": 7726 }, { "epoch": 2.503888528839922, "grad_norm": 0.8727210164070129, "learning_rate": 3.505529146383235e-07, "loss": 0.0726, "step": 7727 }, { "epoch": 2.504212572909916, "grad_norm": 0.8535559773445129, "learning_rate": 3.501064130046064e-07, "loss": 0.0716, "step": 7728 }, { "epoch": 2.5045366169799093, "grad_norm": 0.8813205361366272, "learning_rate": 3.496601744970518e-07, "loss": 0.0734, "step": 7729 }, { "epoch": 2.5048606610499027, "grad_norm": 0.8789255023002625, "learning_rate": 3.492141991702752e-07, "loss": 0.0756, "step": 7730 }, { "epoch": 2.5051847051198965, "grad_norm": 0.9259575009346008, "learning_rate": 3.4876848707885854e-07, "loss": 0.0787, "step": 7731 }, { "epoch": 2.50550874918989, "grad_norm": 0.9527920484542847, "learning_rate": 3.483230382773545e-07, "loss": 0.076, "step": 7732 }, { "epoch": 2.5058327932598834, "grad_norm": 0.8948280215263367, "learning_rate": 3.478778528202803e-07, "loss": 0.0753, "step": 7733 }, { "epoch": 2.506156837329877, "grad_norm": 0.9487646222114563, "learning_rate": 3.474329307621227e-07, "loss": 0.0781, "step": 7734 }, { "epoch": 2.5064808813998702, "grad_norm": 0.9494906663894653, "learning_rate": 3.469882721573356e-07, "loss": 0.0825, "step": 7735 }, { "epoch": 2.506804925469864, "grad_norm": 0.8777042031288147, "learning_rate": 3.465438770603416e-07, "loss": 0.0758, "step": 7736 }, { "epoch": 2.5071289695398575, "grad_norm": 0.9567082524299622, "learning_rate": 3.4609974552552993e-07, "loss": 0.0814, "step": 7737 }, { "epoch": 2.507453013609851, "grad_norm": 0.8560287952423096, "learning_rate": 3.456558776072585e-07, "loss": 0.0738, "step": 7738 }, { "epoch": 2.5077770576798444, "grad_norm": 0.9227958917617798, "learning_rate": 3.4521227335985146e-07, "loss": 0.0746, "step": 7739 }, { "epoch": 2.508101101749838, "grad_norm": 0.929495632648468, "learning_rate": 3.447689328376022e-07, "loss": 0.0823, "step": 7740 }, { "epoch": 2.5084251458198317, "grad_norm": 0.8940978050231934, "learning_rate": 3.4432585609477125e-07, "loss": 0.0738, "step": 7741 }, { "epoch": 2.508749189889825, "grad_norm": 0.9019840955734253, "learning_rate": 3.438830431855872e-07, "loss": 0.0772, "step": 7742 }, { "epoch": 2.5090732339598185, "grad_norm": 0.8739924430847168, "learning_rate": 3.434404941642455e-07, "loss": 0.0754, "step": 7743 }, { "epoch": 2.509397278029812, "grad_norm": 0.8102080225944519, "learning_rate": 3.4299820908491045e-07, "loss": 0.071, "step": 7744 }, { "epoch": 2.5097213220998054, "grad_norm": 0.9312887191772461, "learning_rate": 3.4255618800171366e-07, "loss": 0.0817, "step": 7745 }, { "epoch": 2.5100453661697992, "grad_norm": 0.9610565304756165, "learning_rate": 3.421144309687527e-07, "loss": 0.084, "step": 7746 }, { "epoch": 2.5103694102397927, "grad_norm": 0.9115961790084839, "learning_rate": 3.4167293804009656e-07, "loss": 0.0737, "step": 7747 }, { "epoch": 2.510693454309786, "grad_norm": 0.8804581165313721, "learning_rate": 3.412317092697781e-07, "loss": 0.077, "step": 7748 }, { "epoch": 2.5110174983797795, "grad_norm": 0.8988832831382751, "learning_rate": 3.407907447117997e-07, "loss": 0.0783, "step": 7749 }, { "epoch": 2.511341542449773, "grad_norm": 0.9219678640365601, "learning_rate": 3.4035004442013157e-07, "loss": 0.0739, "step": 7750 }, { "epoch": 2.511665586519767, "grad_norm": 0.868253767490387, "learning_rate": 3.399096084487108e-07, "loss": 0.0753, "step": 7751 }, { "epoch": 2.51198963058976, "grad_norm": 0.8362921476364136, "learning_rate": 3.394694368514434e-07, "loss": 0.0707, "step": 7752 }, { "epoch": 2.5123136746597536, "grad_norm": 0.9166553020477295, "learning_rate": 3.390295296822002e-07, "loss": 0.079, "step": 7753 }, { "epoch": 2.5126377187297475, "grad_norm": 0.8944741487503052, "learning_rate": 3.3858988699482397e-07, "loss": 0.0769, "step": 7754 }, { "epoch": 2.512961762799741, "grad_norm": 0.987773060798645, "learning_rate": 3.381505088431203e-07, "loss": 0.0813, "step": 7755 }, { "epoch": 2.5132858068697344, "grad_norm": 0.9104339480400085, "learning_rate": 3.377113952808669e-07, "loss": 0.0811, "step": 7756 }, { "epoch": 2.5136098509397278, "grad_norm": 0.932121753692627, "learning_rate": 3.3727254636180597e-07, "loss": 0.0782, "step": 7757 }, { "epoch": 2.513933895009721, "grad_norm": 0.93451988697052, "learning_rate": 3.3683396213964826e-07, "loss": 0.0737, "step": 7758 }, { "epoch": 2.514257939079715, "grad_norm": 0.8424649834632874, "learning_rate": 3.363956426680728e-07, "loss": 0.073, "step": 7759 }, { "epoch": 2.5145819831497085, "grad_norm": 0.8437464237213135, "learning_rate": 3.3595758800072515e-07, "loss": 0.0735, "step": 7760 }, { "epoch": 2.514906027219702, "grad_norm": 0.877597451210022, "learning_rate": 3.355197981912198e-07, "loss": 0.0731, "step": 7761 }, { "epoch": 2.5152300712896953, "grad_norm": 0.8775132894515991, "learning_rate": 3.350822732931361e-07, "loss": 0.0726, "step": 7762 }, { "epoch": 2.5155541153596888, "grad_norm": 0.8069689273834229, "learning_rate": 3.3464501336002544e-07, "loss": 0.0703, "step": 7763 }, { "epoch": 2.5158781594296826, "grad_norm": 0.9043518900871277, "learning_rate": 3.342080184454022e-07, "loss": 0.0787, "step": 7764 }, { "epoch": 2.516202203499676, "grad_norm": 0.8696305155754089, "learning_rate": 3.337712886027511e-07, "loss": 0.0705, "step": 7765 }, { "epoch": 2.5165262475696695, "grad_norm": 0.9685964584350586, "learning_rate": 3.3333482388552356e-07, "loss": 0.0765, "step": 7766 }, { "epoch": 2.516850291639663, "grad_norm": 1.0007911920547485, "learning_rate": 3.3289862434713857e-07, "loss": 0.083, "step": 7767 }, { "epoch": 2.5171743357096563, "grad_norm": 0.9157785177230835, "learning_rate": 3.3246269004098275e-07, "loss": 0.0797, "step": 7768 }, { "epoch": 2.51749837977965, "grad_norm": 0.8096321821212769, "learning_rate": 3.320270210204107e-07, "loss": 0.0695, "step": 7769 }, { "epoch": 2.5178224238496436, "grad_norm": 0.9370676279067993, "learning_rate": 3.3159161733874347e-07, "loss": 0.0757, "step": 7770 }, { "epoch": 2.518146467919637, "grad_norm": 0.9458437561988831, "learning_rate": 3.311564790492702e-07, "loss": 0.0779, "step": 7771 }, { "epoch": 2.5184705119896305, "grad_norm": 0.942054271697998, "learning_rate": 3.307216062052479e-07, "loss": 0.0758, "step": 7772 }, { "epoch": 2.518794556059624, "grad_norm": 0.8612393736839294, "learning_rate": 3.3028699885990085e-07, "loss": 0.0741, "step": 7773 }, { "epoch": 2.5191186001296177, "grad_norm": 0.90648353099823, "learning_rate": 3.298526570664207e-07, "loss": 0.0795, "step": 7774 }, { "epoch": 2.519442644199611, "grad_norm": 0.8954811096191406, "learning_rate": 3.294185808779665e-07, "loss": 0.0764, "step": 7775 }, { "epoch": 2.5197666882696046, "grad_norm": 0.900047779083252, "learning_rate": 3.289847703476659e-07, "loss": 0.0791, "step": 7776 }, { "epoch": 2.5200907323395985, "grad_norm": 0.8750745058059692, "learning_rate": 3.285512255286111e-07, "loss": 0.0747, "step": 7777 }, { "epoch": 2.5204147764095914, "grad_norm": 0.8622852563858032, "learning_rate": 3.2811794647386625e-07, "loss": 0.077, "step": 7778 }, { "epoch": 2.5207388204795853, "grad_norm": 0.8372640609741211, "learning_rate": 3.276849332364587e-07, "loss": 0.0732, "step": 7779 }, { "epoch": 2.5210628645495787, "grad_norm": 0.8605118989944458, "learning_rate": 3.2725218586938584e-07, "loss": 0.0746, "step": 7780 }, { "epoch": 2.521386908619572, "grad_norm": 0.9245160818099976, "learning_rate": 3.2681970442561134e-07, "loss": 0.0733, "step": 7781 }, { "epoch": 2.521710952689566, "grad_norm": 0.9335709810256958, "learning_rate": 3.2638748895806705e-07, "loss": 0.081, "step": 7782 }, { "epoch": 2.5220349967595594, "grad_norm": 0.914212167263031, "learning_rate": 3.259555395196526e-07, "loss": 0.0799, "step": 7783 }, { "epoch": 2.522359040829553, "grad_norm": 0.9667627215385437, "learning_rate": 3.255238561632326e-07, "loss": 0.0784, "step": 7784 }, { "epoch": 2.5226830848995463, "grad_norm": 0.9132807850837708, "learning_rate": 3.250924389416432e-07, "loss": 0.0822, "step": 7785 }, { "epoch": 2.5230071289695397, "grad_norm": 0.9615965485572815, "learning_rate": 3.2466128790768327e-07, "loss": 0.0795, "step": 7786 }, { "epoch": 2.5233311730395336, "grad_norm": 0.8738017678260803, "learning_rate": 3.2423040311412384e-07, "loss": 0.0746, "step": 7787 }, { "epoch": 2.523655217109527, "grad_norm": 0.8721928000450134, "learning_rate": 3.2379978461369976e-07, "loss": 0.0766, "step": 7788 }, { "epoch": 2.5239792611795204, "grad_norm": 0.9483674764633179, "learning_rate": 3.233694324591144e-07, "loss": 0.0831, "step": 7789 }, { "epoch": 2.524303305249514, "grad_norm": 0.8579971194267273, "learning_rate": 3.229393467030395e-07, "loss": 0.0734, "step": 7790 }, { "epoch": 2.5246273493195073, "grad_norm": 0.9196363687515259, "learning_rate": 3.225095273981127e-07, "loss": 0.0777, "step": 7791 }, { "epoch": 2.524951393389501, "grad_norm": 0.9274152517318726, "learning_rate": 3.2207997459694053e-07, "loss": 0.0823, "step": 7792 }, { "epoch": 2.5252754374594946, "grad_norm": 1.0055663585662842, "learning_rate": 3.2165068835209506e-07, "loss": 0.0832, "step": 7793 }, { "epoch": 2.525599481529488, "grad_norm": 0.8837597370147705, "learning_rate": 3.2122166871611736e-07, "loss": 0.0719, "step": 7794 }, { "epoch": 2.5259235255994814, "grad_norm": 0.9010149240493774, "learning_rate": 3.207929157415152e-07, "loss": 0.0727, "step": 7795 }, { "epoch": 2.526247569669475, "grad_norm": 0.9038609862327576, "learning_rate": 3.2036442948076395e-07, "loss": 0.0782, "step": 7796 }, { "epoch": 2.5265716137394687, "grad_norm": 0.9510405659675598, "learning_rate": 3.199362099863057e-07, "loss": 0.077, "step": 7797 }, { "epoch": 2.526895657809462, "grad_norm": 0.9556993246078491, "learning_rate": 3.19508257310551e-07, "loss": 0.0768, "step": 7798 }, { "epoch": 2.5272197018794555, "grad_norm": 0.8914854526519775, "learning_rate": 3.190805715058765e-07, "loss": 0.0772, "step": 7799 }, { "epoch": 2.527543745949449, "grad_norm": 0.8729838728904724, "learning_rate": 3.1865315262462783e-07, "loss": 0.0731, "step": 7800 }, { "epoch": 2.5278677900194424, "grad_norm": 0.9543265700340271, "learning_rate": 3.182260007191157e-07, "loss": 0.0803, "step": 7801 }, { "epoch": 2.5281918340894363, "grad_norm": 0.8208547234535217, "learning_rate": 3.1779911584161963e-07, "loss": 0.0695, "step": 7802 }, { "epoch": 2.5285158781594297, "grad_norm": 0.8887255191802979, "learning_rate": 3.173724980443868e-07, "loss": 0.0766, "step": 7803 }, { "epoch": 2.528839922229423, "grad_norm": 0.8995735049247742, "learning_rate": 3.1694614737963036e-07, "loss": 0.071, "step": 7804 }, { "epoch": 2.529163966299417, "grad_norm": 0.8485842943191528, "learning_rate": 3.165200638995328e-07, "loss": 0.0709, "step": 7805 }, { "epoch": 2.5294880103694104, "grad_norm": 0.8586344718933105, "learning_rate": 3.160942476562404e-07, "loss": 0.0716, "step": 7806 }, { "epoch": 2.529812054439404, "grad_norm": 0.8358325958251953, "learning_rate": 3.1566869870187115e-07, "loss": 0.0732, "step": 7807 }, { "epoch": 2.5301360985093972, "grad_norm": 0.9183731079101562, "learning_rate": 3.1524341708850633e-07, "loss": 0.0829, "step": 7808 }, { "epoch": 2.5304601425793907, "grad_norm": 0.8194887638092041, "learning_rate": 3.148184028681983e-07, "loss": 0.0709, "step": 7809 }, { "epoch": 2.5307841866493845, "grad_norm": 0.8380728363990784, "learning_rate": 3.1439365609296253e-07, "loss": 0.0782, "step": 7810 }, { "epoch": 2.531108230719378, "grad_norm": 0.8186984658241272, "learning_rate": 3.1396917681478595e-07, "loss": 0.0712, "step": 7811 }, { "epoch": 2.5314322747893714, "grad_norm": 0.8302901387214661, "learning_rate": 3.13544965085619e-07, "loss": 0.0681, "step": 7812 }, { "epoch": 2.531756318859365, "grad_norm": 0.911170244216919, "learning_rate": 3.1312102095738205e-07, "loss": 0.0755, "step": 7813 }, { "epoch": 2.5320803629293582, "grad_norm": 0.9269139766693115, "learning_rate": 3.12697344481962e-07, "loss": 0.0769, "step": 7814 }, { "epoch": 2.532404406999352, "grad_norm": 0.9284564256668091, "learning_rate": 3.1227393571121117e-07, "loss": 0.0753, "step": 7815 }, { "epoch": 2.5327284510693455, "grad_norm": 0.9173905253410339, "learning_rate": 3.1185079469695263e-07, "loss": 0.078, "step": 7816 }, { "epoch": 2.533052495139339, "grad_norm": 0.97370845079422, "learning_rate": 3.1142792149097297e-07, "loss": 0.0832, "step": 7817 }, { "epoch": 2.5333765392093324, "grad_norm": 0.9200071096420288, "learning_rate": 3.110053161450299e-07, "loss": 0.0781, "step": 7818 }, { "epoch": 2.533700583279326, "grad_norm": 0.9102277755737305, "learning_rate": 3.105829787108444e-07, "loss": 0.0759, "step": 7819 }, { "epoch": 2.5340246273493197, "grad_norm": 0.8209194540977478, "learning_rate": 3.10160909240107e-07, "loss": 0.0703, "step": 7820 }, { "epoch": 2.534348671419313, "grad_norm": 0.824000895023346, "learning_rate": 3.0973910778447523e-07, "loss": 0.0712, "step": 7821 }, { "epoch": 2.5346727154893065, "grad_norm": 0.8389688730239868, "learning_rate": 3.0931757439557313e-07, "loss": 0.0693, "step": 7822 }, { "epoch": 2.5349967595593, "grad_norm": 0.9663325548171997, "learning_rate": 3.08896309124993e-07, "loss": 0.0794, "step": 7823 }, { "epoch": 2.5353208036292934, "grad_norm": 0.902708888053894, "learning_rate": 3.084753120242928e-07, "loss": 0.0775, "step": 7824 }, { "epoch": 2.535644847699287, "grad_norm": 0.8854089379310608, "learning_rate": 3.0805458314499855e-07, "loss": 0.0725, "step": 7825 }, { "epoch": 2.5359688917692806, "grad_norm": 0.8439732789993286, "learning_rate": 3.076341225386037e-07, "loss": 0.0745, "step": 7826 }, { "epoch": 2.536292935839274, "grad_norm": 0.915687084197998, "learning_rate": 3.0721393025656853e-07, "loss": 0.0714, "step": 7827 }, { "epoch": 2.536616979909268, "grad_norm": 0.9396669268608093, "learning_rate": 3.0679400635032053e-07, "loss": 0.0804, "step": 7828 }, { "epoch": 2.536941023979261, "grad_norm": 0.8426708579063416, "learning_rate": 3.063743508712544e-07, "loss": 0.0727, "step": 7829 }, { "epoch": 2.537265068049255, "grad_norm": 1.0279992818832397, "learning_rate": 3.059549638707315e-07, "loss": 0.0823, "step": 7830 }, { "epoch": 2.537589112119248, "grad_norm": 0.9124258160591125, "learning_rate": 3.0553584540008176e-07, "loss": 0.0826, "step": 7831 }, { "epoch": 2.5379131561892416, "grad_norm": 0.9020891785621643, "learning_rate": 3.0511699551059927e-07, "loss": 0.0766, "step": 7832 }, { "epoch": 2.5382372002592355, "grad_norm": 0.877143144607544, "learning_rate": 3.0469841425354945e-07, "loss": 0.0779, "step": 7833 }, { "epoch": 2.538561244329229, "grad_norm": 0.9416757822036743, "learning_rate": 3.0428010168016107e-07, "loss": 0.0785, "step": 7834 }, { "epoch": 2.5388852883992223, "grad_norm": 0.9122092127799988, "learning_rate": 3.0386205784163207e-07, "loss": 0.0755, "step": 7835 }, { "epoch": 2.5392093324692158, "grad_norm": 0.8774349689483643, "learning_rate": 3.0344428278912765e-07, "loss": 0.0727, "step": 7836 }, { "epoch": 2.539533376539209, "grad_norm": 0.8674144744873047, "learning_rate": 3.030267765737774e-07, "loss": 0.0749, "step": 7837 }, { "epoch": 2.539857420609203, "grad_norm": 0.9305141568183899, "learning_rate": 3.026095392466824e-07, "loss": 0.0856, "step": 7838 }, { "epoch": 2.5401814646791965, "grad_norm": 0.8678079843521118, "learning_rate": 3.021925708589066e-07, "loss": 0.0758, "step": 7839 }, { "epoch": 2.54050550874919, "grad_norm": 0.9064074754714966, "learning_rate": 3.0177587146148435e-07, "loss": 0.0787, "step": 7840 }, { "epoch": 2.5408295528191833, "grad_norm": 0.9406875371932983, "learning_rate": 3.013594411054144e-07, "loss": 0.0781, "step": 7841 }, { "epoch": 2.5411535968891767, "grad_norm": 0.883948564529419, "learning_rate": 3.0094327984166506e-07, "loss": 0.0786, "step": 7842 }, { "epoch": 2.5414776409591706, "grad_norm": 0.9419253468513489, "learning_rate": 3.0052738772116925e-07, "loss": 0.0809, "step": 7843 }, { "epoch": 2.541801685029164, "grad_norm": 0.8636225461959839, "learning_rate": 3.001117647948287e-07, "loss": 0.0756, "step": 7844 }, { "epoch": 2.5421257290991575, "grad_norm": 0.8698762059211731, "learning_rate": 2.996964111135123e-07, "loss": 0.0747, "step": 7845 }, { "epoch": 2.542449773169151, "grad_norm": 0.9199694395065308, "learning_rate": 2.992813267280531e-07, "loss": 0.0767, "step": 7846 }, { "epoch": 2.5427738172391443, "grad_norm": 0.931075394153595, "learning_rate": 2.988665116892564e-07, "loss": 0.0843, "step": 7847 }, { "epoch": 2.543097861309138, "grad_norm": 0.8880044221878052, "learning_rate": 2.9845196604788935e-07, "loss": 0.0744, "step": 7848 }, { "epoch": 2.5434219053791316, "grad_norm": 0.9354466199874878, "learning_rate": 2.980376898546888e-07, "loss": 0.0771, "step": 7849 }, { "epoch": 2.543745949449125, "grad_norm": 0.8996589183807373, "learning_rate": 2.976236831603588e-07, "loss": 0.0777, "step": 7850 }, { "epoch": 2.5440699935191184, "grad_norm": 0.9524653553962708, "learning_rate": 2.972099460155689e-07, "loss": 0.0783, "step": 7851 }, { "epoch": 2.544394037589112, "grad_norm": 0.928536057472229, "learning_rate": 2.9679647847095735e-07, "loss": 0.0809, "step": 7852 }, { "epoch": 2.5447180816591057, "grad_norm": 0.8683675527572632, "learning_rate": 2.9638328057712775e-07, "loss": 0.0703, "step": 7853 }, { "epoch": 2.545042125729099, "grad_norm": 0.9104581475257874, "learning_rate": 2.9597035238465214e-07, "loss": 0.0828, "step": 7854 }, { "epoch": 2.5453661697990926, "grad_norm": 0.9258098602294922, "learning_rate": 2.9555769394406934e-07, "loss": 0.0819, "step": 7855 }, { "epoch": 2.5456902138690864, "grad_norm": 0.8759539723396301, "learning_rate": 2.9514530530588367e-07, "loss": 0.0726, "step": 7856 }, { "epoch": 2.54601425793908, "grad_norm": 0.9606726169586182, "learning_rate": 2.947331865205677e-07, "loss": 0.079, "step": 7857 }, { "epoch": 2.5463383020090733, "grad_norm": 0.9050784111022949, "learning_rate": 2.943213376385612e-07, "loss": 0.0774, "step": 7858 }, { "epoch": 2.5466623460790667, "grad_norm": 0.9557004570960999, "learning_rate": 2.9390975871027046e-07, "loss": 0.0804, "step": 7859 }, { "epoch": 2.54698639014906, "grad_norm": 0.8810763955116272, "learning_rate": 2.934984497860691e-07, "loss": 0.0795, "step": 7860 }, { "epoch": 2.547310434219054, "grad_norm": 0.8604434728622437, "learning_rate": 2.9308741091629596e-07, "loss": 0.0784, "step": 7861 }, { "epoch": 2.5476344782890474, "grad_norm": 0.8944520950317383, "learning_rate": 2.9267664215126e-07, "loss": 0.075, "step": 7862 }, { "epoch": 2.547958522359041, "grad_norm": 0.9683983325958252, "learning_rate": 2.9226614354123356e-07, "loss": 0.0823, "step": 7863 }, { "epoch": 2.5482825664290343, "grad_norm": 0.8846125602722168, "learning_rate": 2.9185591513645947e-07, "loss": 0.0787, "step": 7864 }, { "epoch": 2.5486066104990277, "grad_norm": 0.9127246737480164, "learning_rate": 2.914459569871447e-07, "loss": 0.0744, "step": 7865 }, { "epoch": 2.5489306545690216, "grad_norm": 0.8919868469238281, "learning_rate": 2.91036269143464e-07, "loss": 0.0749, "step": 7866 }, { "epoch": 2.549254698639015, "grad_norm": 0.9504753351211548, "learning_rate": 2.9062685165555963e-07, "loss": 0.0729, "step": 7867 }, { "epoch": 2.5495787427090084, "grad_norm": 1.0182647705078125, "learning_rate": 2.9021770457354046e-07, "loss": 0.077, "step": 7868 }, { "epoch": 2.549902786779002, "grad_norm": 0.923927366733551, "learning_rate": 2.8980882794748227e-07, "loss": 0.0802, "step": 7869 }, { "epoch": 2.5502268308489953, "grad_norm": 0.9612078666687012, "learning_rate": 2.894002218274261e-07, "loss": 0.0803, "step": 7870 }, { "epoch": 2.550550874918989, "grad_norm": 0.9394988417625427, "learning_rate": 2.8899188626338363e-07, "loss": 0.0846, "step": 7871 }, { "epoch": 2.5508749189889826, "grad_norm": 0.9127728343009949, "learning_rate": 2.8858382130532965e-07, "loss": 0.0777, "step": 7872 }, { "epoch": 2.551198963058976, "grad_norm": 0.8843898773193359, "learning_rate": 2.8817602700320747e-07, "loss": 0.078, "step": 7873 }, { "epoch": 2.5515230071289694, "grad_norm": 0.9183049201965332, "learning_rate": 2.8776850340692777e-07, "loss": 0.0765, "step": 7874 }, { "epoch": 2.551847051198963, "grad_norm": 0.9552394151687622, "learning_rate": 2.87361250566367e-07, "loss": 0.0776, "step": 7875 }, { "epoch": 2.5521710952689567, "grad_norm": 0.9023454189300537, "learning_rate": 2.869542685313692e-07, "loss": 0.0791, "step": 7876 }, { "epoch": 2.55249513933895, "grad_norm": 0.908761203289032, "learning_rate": 2.865475573517451e-07, "loss": 0.073, "step": 7877 }, { "epoch": 2.5528191834089435, "grad_norm": 0.954011082649231, "learning_rate": 2.8614111707727267e-07, "loss": 0.0789, "step": 7878 }, { "epoch": 2.5531432274789374, "grad_norm": 0.9677690267562866, "learning_rate": 2.8573494775769485e-07, "loss": 0.0789, "step": 7879 }, { "epoch": 2.5534672715489304, "grad_norm": 0.9998055696487427, "learning_rate": 2.853290494427238e-07, "loss": 0.0818, "step": 7880 }, { "epoch": 2.5537913156189243, "grad_norm": 0.9223211407661438, "learning_rate": 2.8492342218203766e-07, "loss": 0.0832, "step": 7881 }, { "epoch": 2.5541153596889177, "grad_norm": 0.8618203997612, "learning_rate": 2.845180660252808e-07, "loss": 0.0725, "step": 7882 }, { "epoch": 2.554439403758911, "grad_norm": 0.920971691608429, "learning_rate": 2.8411298102206524e-07, "loss": 0.08, "step": 7883 }, { "epoch": 2.554763447828905, "grad_norm": 0.7992531061172485, "learning_rate": 2.837081672219694e-07, "loss": 0.0707, "step": 7884 }, { "epoch": 2.5550874918988984, "grad_norm": 0.869141161441803, "learning_rate": 2.833036246745385e-07, "loss": 0.0752, "step": 7885 }, { "epoch": 2.555411535968892, "grad_norm": 0.9022712707519531, "learning_rate": 2.828993534292851e-07, "loss": 0.0765, "step": 7886 }, { "epoch": 2.5557355800388852, "grad_norm": 0.938575804233551, "learning_rate": 2.824953535356872e-07, "loss": 0.0774, "step": 7887 }, { "epoch": 2.5560596241088787, "grad_norm": 0.8692349791526794, "learning_rate": 2.820916250431907e-07, "loss": 0.0685, "step": 7888 }, { "epoch": 2.5563836681788725, "grad_norm": 0.8855343461036682, "learning_rate": 2.8168816800120845e-07, "loss": 0.0721, "step": 7889 }, { "epoch": 2.556707712248866, "grad_norm": 0.863601803779602, "learning_rate": 2.812849824591196e-07, "loss": 0.0754, "step": 7890 }, { "epoch": 2.5570317563188594, "grad_norm": 0.883549690246582, "learning_rate": 2.808820684662705e-07, "loss": 0.0752, "step": 7891 }, { "epoch": 2.557355800388853, "grad_norm": 0.8747949004173279, "learning_rate": 2.804794260719726e-07, "loss": 0.0797, "step": 7892 }, { "epoch": 2.557679844458846, "grad_norm": 0.8760228157043457, "learning_rate": 2.800770553255072e-07, "loss": 0.0759, "step": 7893 }, { "epoch": 2.55800388852884, "grad_norm": 1.0065068006515503, "learning_rate": 2.796749562761186e-07, "loss": 0.0788, "step": 7894 }, { "epoch": 2.5583279325988335, "grad_norm": 0.9806517958641052, "learning_rate": 2.7927312897302217e-07, "loss": 0.0827, "step": 7895 }, { "epoch": 2.558651976668827, "grad_norm": 0.9483767747879028, "learning_rate": 2.7887157346539574e-07, "loss": 0.0805, "step": 7896 }, { "epoch": 2.5589760207388204, "grad_norm": 0.8807239532470703, "learning_rate": 2.7847028980238666e-07, "loss": 0.0723, "step": 7897 }, { "epoch": 2.559300064808814, "grad_norm": 0.8338807225227356, "learning_rate": 2.780692780331079e-07, "loss": 0.0682, "step": 7898 }, { "epoch": 2.5596241088788076, "grad_norm": 0.9363901019096375, "learning_rate": 2.7766853820663963e-07, "loss": 0.0807, "step": 7899 }, { "epoch": 2.559948152948801, "grad_norm": 0.8420773148536682, "learning_rate": 2.7726807037202903e-07, "loss": 0.0716, "step": 7900 }, { "epoch": 2.5602721970187945, "grad_norm": 0.9283404350280762, "learning_rate": 2.7686787457828796e-07, "loss": 0.0797, "step": 7901 }, { "epoch": 2.560596241088788, "grad_norm": 0.864181399345398, "learning_rate": 2.764679508743981e-07, "loss": 0.0729, "step": 7902 }, { "epoch": 2.5609202851587813, "grad_norm": 0.89631187915802, "learning_rate": 2.7606829930930555e-07, "loss": 0.0727, "step": 7903 }, { "epoch": 2.561244329228775, "grad_norm": 0.8723975419998169, "learning_rate": 2.7566891993192347e-07, "loss": 0.0751, "step": 7904 }, { "epoch": 2.5615683732987686, "grad_norm": 0.827847957611084, "learning_rate": 2.752698127911327e-07, "loss": 0.0709, "step": 7905 }, { "epoch": 2.561892417368762, "grad_norm": 0.8541545271873474, "learning_rate": 2.748709779357794e-07, "loss": 0.0735, "step": 7906 }, { "epoch": 2.562216461438756, "grad_norm": 0.9275196194648743, "learning_rate": 2.744724154146777e-07, "loss": 0.0831, "step": 7907 }, { "epoch": 2.5625405055087493, "grad_norm": 0.8933717012405396, "learning_rate": 2.740741252766077e-07, "loss": 0.0717, "step": 7908 }, { "epoch": 2.5628645495787428, "grad_norm": 0.8624947667121887, "learning_rate": 2.736761075703165e-07, "loss": 0.0727, "step": 7909 }, { "epoch": 2.563188593648736, "grad_norm": 0.874412477016449, "learning_rate": 2.732783623445168e-07, "loss": 0.0767, "step": 7910 }, { "epoch": 2.5635126377187296, "grad_norm": 0.9391658902168274, "learning_rate": 2.728808896478891e-07, "loss": 0.0822, "step": 7911 }, { "epoch": 2.5638366817887235, "grad_norm": 0.8865946531295776, "learning_rate": 2.7248368952908055e-07, "loss": 0.0781, "step": 7912 }, { "epoch": 2.564160725858717, "grad_norm": 0.8825016021728516, "learning_rate": 2.7208676203670406e-07, "loss": 0.0784, "step": 7913 }, { "epoch": 2.5644847699287103, "grad_norm": 0.8830437660217285, "learning_rate": 2.716901072193404e-07, "loss": 0.0709, "step": 7914 }, { "epoch": 2.5648088139987038, "grad_norm": 0.9725536704063416, "learning_rate": 2.71293725125536e-07, "loss": 0.0849, "step": 7915 }, { "epoch": 2.565132858068697, "grad_norm": 0.9594155550003052, "learning_rate": 2.7089761580380346e-07, "loss": 0.0754, "step": 7916 }, { "epoch": 2.565456902138691, "grad_norm": 0.8194584250450134, "learning_rate": 2.7050177930262406e-07, "loss": 0.0711, "step": 7917 }, { "epoch": 2.5657809462086845, "grad_norm": 0.8426803350448608, "learning_rate": 2.701062156704434e-07, "loss": 0.0744, "step": 7918 }, { "epoch": 2.566104990278678, "grad_norm": 0.9032554030418396, "learning_rate": 2.697109249556748e-07, "loss": 0.0805, "step": 7919 }, { "epoch": 2.5664290343486713, "grad_norm": 0.9481576681137085, "learning_rate": 2.6931590720669807e-07, "loss": 0.085, "step": 7920 }, { "epoch": 2.5667530784186647, "grad_norm": 0.9876210689544678, "learning_rate": 2.6892116247185964e-07, "loss": 0.0689, "step": 7921 }, { "epoch": 2.5670771224886586, "grad_norm": 0.8956534266471863, "learning_rate": 2.6852669079947294e-07, "loss": 0.0766, "step": 7922 }, { "epoch": 2.567401166558652, "grad_norm": 0.9033135771751404, "learning_rate": 2.681324922378159e-07, "loss": 0.0742, "step": 7923 }, { "epoch": 2.5677252106286454, "grad_norm": 0.8424497246742249, "learning_rate": 2.6773856683513677e-07, "loss": 0.0726, "step": 7924 }, { "epoch": 2.568049254698639, "grad_norm": 0.8959171772003174, "learning_rate": 2.673449146396459e-07, "loss": 0.0754, "step": 7925 }, { "epoch": 2.5683732987686323, "grad_norm": 0.9140695929527283, "learning_rate": 2.6695153569952475e-07, "loss": 0.0819, "step": 7926 }, { "epoch": 2.568697342838626, "grad_norm": 0.8918660283088684, "learning_rate": 2.665584300629176e-07, "loss": 0.0733, "step": 7927 }, { "epoch": 2.5690213869086196, "grad_norm": 0.819005012512207, "learning_rate": 2.661655977779373e-07, "loss": 0.0703, "step": 7928 }, { "epoch": 2.569345430978613, "grad_norm": 0.8803487420082092, "learning_rate": 2.6577303889266244e-07, "loss": 0.0717, "step": 7929 }, { "epoch": 2.569669475048607, "grad_norm": 0.9302169680595398, "learning_rate": 2.6538075345513864e-07, "loss": 0.0779, "step": 7930 }, { "epoch": 2.5699935191186, "grad_norm": 0.9317216277122498, "learning_rate": 2.6498874151337865e-07, "loss": 0.0785, "step": 7931 }, { "epoch": 2.5703175631885937, "grad_norm": 0.910908579826355, "learning_rate": 2.6459700311535885e-07, "loss": 0.0759, "step": 7932 }, { "epoch": 2.570641607258587, "grad_norm": 0.8003696203231812, "learning_rate": 2.642055383090264e-07, "loss": 0.0688, "step": 7933 }, { "epoch": 2.5709656513285806, "grad_norm": 0.8143637776374817, "learning_rate": 2.638143471422916e-07, "loss": 0.0736, "step": 7934 }, { "epoch": 2.5712896953985744, "grad_norm": 0.8914068341255188, "learning_rate": 2.634234296630328e-07, "loss": 0.0732, "step": 7935 }, { "epoch": 2.571613739468568, "grad_norm": 0.8780104517936707, "learning_rate": 2.6303278591909426e-07, "loss": 0.0765, "step": 7936 }, { "epoch": 2.5719377835385613, "grad_norm": 0.9786894917488098, "learning_rate": 2.626424159582872e-07, "loss": 0.0779, "step": 7937 }, { "epoch": 2.5722618276085547, "grad_norm": 0.892594575881958, "learning_rate": 2.622523198283894e-07, "loss": 0.0763, "step": 7938 }, { "epoch": 2.572585871678548, "grad_norm": 0.9260560870170593, "learning_rate": 2.6186249757714474e-07, "loss": 0.0812, "step": 7939 }, { "epoch": 2.572909915748542, "grad_norm": 0.8858899474143982, "learning_rate": 2.614729492522633e-07, "loss": 0.0703, "step": 7940 }, { "epoch": 2.5732339598185354, "grad_norm": 0.9430214762687683, "learning_rate": 2.61083674901422e-07, "loss": 0.0809, "step": 7941 }, { "epoch": 2.573558003888529, "grad_norm": 0.841761589050293, "learning_rate": 2.6069467457226467e-07, "loss": 0.073, "step": 7942 }, { "epoch": 2.5738820479585223, "grad_norm": 0.9282339215278625, "learning_rate": 2.6030594831240094e-07, "loss": 0.0765, "step": 7943 }, { "epoch": 2.5742060920285157, "grad_norm": 0.875710666179657, "learning_rate": 2.599174961694073e-07, "loss": 0.0785, "step": 7944 }, { "epoch": 2.5745301360985096, "grad_norm": 0.8414289355278015, "learning_rate": 2.595293181908265e-07, "loss": 0.0697, "step": 7945 }, { "epoch": 2.574854180168503, "grad_norm": 0.9343950152397156, "learning_rate": 2.59141414424168e-07, "loss": 0.0788, "step": 7946 }, { "epoch": 2.5751782242384964, "grad_norm": 0.9397087693214417, "learning_rate": 2.587537849169064e-07, "loss": 0.0778, "step": 7947 }, { "epoch": 2.57550226830849, "grad_norm": 0.9125270843505859, "learning_rate": 2.5836642971648534e-07, "loss": 0.0782, "step": 7948 }, { "epoch": 2.5758263123784833, "grad_norm": 0.8976326584815979, "learning_rate": 2.579793488703122e-07, "loss": 0.0714, "step": 7949 }, { "epoch": 2.576150356448477, "grad_norm": 0.8638294339179993, "learning_rate": 2.5759254242576246e-07, "loss": 0.0724, "step": 7950 }, { "epoch": 2.5764744005184705, "grad_norm": 0.88809734582901, "learning_rate": 2.572060104301771e-07, "loss": 0.077, "step": 7951 }, { "epoch": 2.576798444588464, "grad_norm": 0.9091419577598572, "learning_rate": 2.5681975293086443e-07, "loss": 0.0752, "step": 7952 }, { "epoch": 2.5771224886584574, "grad_norm": 0.9178985953330994, "learning_rate": 2.564337699750985e-07, "loss": 0.0796, "step": 7953 }, { "epoch": 2.577446532728451, "grad_norm": 0.9933173656463623, "learning_rate": 2.560480616101191e-07, "loss": 0.08, "step": 7954 }, { "epoch": 2.5777705767984447, "grad_norm": 0.8052213191986084, "learning_rate": 2.556626278831345e-07, "loss": 0.0671, "step": 7955 }, { "epoch": 2.578094620868438, "grad_norm": 0.916199266910553, "learning_rate": 2.552774688413165e-07, "loss": 0.073, "step": 7956 }, { "epoch": 2.5784186649384315, "grad_norm": 0.8220997452735901, "learning_rate": 2.5489258453180676e-07, "loss": 0.0688, "step": 7957 }, { "epoch": 2.5787427090084254, "grad_norm": 0.8971430063247681, "learning_rate": 2.545079750017099e-07, "loss": 0.0735, "step": 7958 }, { "epoch": 2.579066753078419, "grad_norm": 0.9529185891151428, "learning_rate": 2.541236402980987e-07, "loss": 0.0812, "step": 7959 }, { "epoch": 2.5793907971484122, "grad_norm": 0.8507676124572754, "learning_rate": 2.5373958046801207e-07, "loss": 0.0739, "step": 7960 }, { "epoch": 2.5797148412184057, "grad_norm": 1.0006403923034668, "learning_rate": 2.5335579555845563e-07, "loss": 0.0805, "step": 7961 }, { "epoch": 2.580038885288399, "grad_norm": 0.91471928358078, "learning_rate": 2.5297228561640075e-07, "loss": 0.0799, "step": 7962 }, { "epoch": 2.580362929358393, "grad_norm": 0.8677946925163269, "learning_rate": 2.5258905068878433e-07, "loss": 0.0774, "step": 7963 }, { "epoch": 2.5806869734283864, "grad_norm": 0.916735827922821, "learning_rate": 2.522060908225127e-07, "loss": 0.074, "step": 7964 }, { "epoch": 2.58101101749838, "grad_norm": 0.9161584377288818, "learning_rate": 2.518234060644545e-07, "loss": 0.0721, "step": 7965 }, { "epoch": 2.5813350615683732, "grad_norm": 0.9978834390640259, "learning_rate": 2.5144099646144724e-07, "loss": 0.0773, "step": 7966 }, { "epoch": 2.5816591056383666, "grad_norm": 0.843064546585083, "learning_rate": 2.510588620602947e-07, "loss": 0.0709, "step": 7967 }, { "epoch": 2.5819831497083605, "grad_norm": 0.8937796354293823, "learning_rate": 2.506770029077657e-07, "loss": 0.076, "step": 7968 }, { "epoch": 2.582307193778354, "grad_norm": 0.8655698299407959, "learning_rate": 2.502954190505963e-07, "loss": 0.0783, "step": 7969 }, { "epoch": 2.5826312378483474, "grad_norm": 0.8840304017066956, "learning_rate": 2.499141105354894e-07, "loss": 0.0797, "step": 7970 }, { "epoch": 2.582955281918341, "grad_norm": 0.8878140449523926, "learning_rate": 2.495330774091126e-07, "loss": 0.0743, "step": 7971 }, { "epoch": 2.583279325988334, "grad_norm": 0.9055677056312561, "learning_rate": 2.4915231971810064e-07, "loss": 0.0733, "step": 7972 }, { "epoch": 2.583603370058328, "grad_norm": 0.8808376789093018, "learning_rate": 2.4877183750905475e-07, "loss": 0.075, "step": 7973 }, { "epoch": 2.5839274141283215, "grad_norm": 0.8990373611450195, "learning_rate": 2.483916308285425e-07, "loss": 0.0744, "step": 7974 }, { "epoch": 2.584251458198315, "grad_norm": 0.8307132720947266, "learning_rate": 2.4801169972309745e-07, "loss": 0.066, "step": 7975 }, { "epoch": 2.5845755022683083, "grad_norm": 0.9437209963798523, "learning_rate": 2.4763204423921937e-07, "loss": 0.0841, "step": 7976 }, { "epoch": 2.5848995463383018, "grad_norm": 0.9493826627731323, "learning_rate": 2.47252664423375e-07, "loss": 0.0777, "step": 7977 }, { "epoch": 2.5852235904082956, "grad_norm": 0.8011552691459656, "learning_rate": 2.4687356032199516e-07, "loss": 0.0667, "step": 7978 }, { "epoch": 2.585547634478289, "grad_norm": 0.8402678966522217, "learning_rate": 2.464947319814806e-07, "loss": 0.0706, "step": 7979 }, { "epoch": 2.5858716785482825, "grad_norm": 0.9520148634910583, "learning_rate": 2.461161794481945e-07, "loss": 0.0821, "step": 7980 }, { "epoch": 2.5861957226182763, "grad_norm": 0.8773624897003174, "learning_rate": 2.4573790276846947e-07, "loss": 0.0786, "step": 7981 }, { "epoch": 2.5865197666882693, "grad_norm": 0.8360825181007385, "learning_rate": 2.453599019886016e-07, "loss": 0.0694, "step": 7982 }, { "epoch": 2.586843810758263, "grad_norm": 0.9423222541809082, "learning_rate": 2.449821771548552e-07, "loss": 0.0773, "step": 7983 }, { "epoch": 2.5871678548282566, "grad_norm": 0.8453056216239929, "learning_rate": 2.446047283134606e-07, "loss": 0.0694, "step": 7984 }, { "epoch": 2.58749189889825, "grad_norm": 0.8733270764350891, "learning_rate": 2.4422755551061246e-07, "loss": 0.0732, "step": 7985 }, { "epoch": 2.587815942968244, "grad_norm": 0.879425048828125, "learning_rate": 2.4385065879247466e-07, "loss": 0.076, "step": 7986 }, { "epoch": 2.5881399870382373, "grad_norm": 0.8516666889190674, "learning_rate": 2.4347403820517423e-07, "loss": 0.0745, "step": 7987 }, { "epoch": 2.5884640311082308, "grad_norm": 0.8790467977523804, "learning_rate": 2.4309769379480764e-07, "loss": 0.0691, "step": 7988 }, { "epoch": 2.588788075178224, "grad_norm": 0.9171347618103027, "learning_rate": 2.427216256074341e-07, "loss": 0.0751, "step": 7989 }, { "epoch": 2.5891121192482176, "grad_norm": 0.9451388120651245, "learning_rate": 2.423458336890816e-07, "loss": 0.0833, "step": 7990 }, { "epoch": 2.5894361633182115, "grad_norm": 0.8528581857681274, "learning_rate": 2.4197031808574327e-07, "loss": 0.0739, "step": 7991 }, { "epoch": 2.589760207388205, "grad_norm": 0.8192191123962402, "learning_rate": 2.4159507884337877e-07, "loss": 0.0716, "step": 7992 }, { "epoch": 2.5900842514581983, "grad_norm": 0.9669711589813232, "learning_rate": 2.4122011600791334e-07, "loss": 0.0794, "step": 7993 }, { "epoch": 2.5904082955281917, "grad_norm": 0.9040258526802063, "learning_rate": 2.408454296252397e-07, "loss": 0.081, "step": 7994 }, { "epoch": 2.590732339598185, "grad_norm": 0.9025174975395203, "learning_rate": 2.404710197412144e-07, "loss": 0.0758, "step": 7995 }, { "epoch": 2.591056383668179, "grad_norm": 0.9596523642539978, "learning_rate": 2.4009688640166257e-07, "loss": 0.0827, "step": 7996 }, { "epoch": 2.5913804277381725, "grad_norm": 0.915219783782959, "learning_rate": 2.397230296523742e-07, "loss": 0.0773, "step": 7997 }, { "epoch": 2.591704471808166, "grad_norm": 0.8500992059707642, "learning_rate": 2.3934944953910576e-07, "loss": 0.0725, "step": 7998 }, { "epoch": 2.5920285158781593, "grad_norm": 0.8785587549209595, "learning_rate": 2.3897614610757984e-07, "loss": 0.0747, "step": 7999 }, { "epoch": 2.5923525599481527, "grad_norm": 0.8524248003959656, "learning_rate": 2.386031194034855e-07, "loss": 0.0693, "step": 8000 }, { "epoch": 2.5926766040181466, "grad_norm": 0.9006550312042236, "learning_rate": 2.3823036947247773e-07, "loss": 0.0764, "step": 8001 }, { "epoch": 2.59300064808814, "grad_norm": 0.8533654808998108, "learning_rate": 2.3785789636017604e-07, "loss": 0.0735, "step": 8002 }, { "epoch": 2.5933246921581334, "grad_norm": 0.8423067927360535, "learning_rate": 2.374857001121697e-07, "loss": 0.0731, "step": 8003 }, { "epoch": 2.593648736228127, "grad_norm": 0.9366042017936707, "learning_rate": 2.371137807740101e-07, "loss": 0.0797, "step": 8004 }, { "epoch": 2.5939727802981203, "grad_norm": 1.1670465469360352, "learning_rate": 2.3674213839121745e-07, "loss": 0.0781, "step": 8005 }, { "epoch": 2.594296824368114, "grad_norm": 0.9376868009567261, "learning_rate": 2.3637077300927762e-07, "loss": 0.0815, "step": 8006 }, { "epoch": 2.5946208684381076, "grad_norm": 0.8395928740501404, "learning_rate": 2.3599968467364037e-07, "loss": 0.0699, "step": 8007 }, { "epoch": 2.594944912508101, "grad_norm": 0.8741575479507446, "learning_rate": 2.3562887342972574e-07, "loss": 0.075, "step": 8008 }, { "epoch": 2.595268956578095, "grad_norm": 0.9370665550231934, "learning_rate": 2.3525833932291491e-07, "loss": 0.0759, "step": 8009 }, { "epoch": 2.5955930006480883, "grad_norm": 0.9461312890052795, "learning_rate": 2.3488808239855998e-07, "loss": 0.0786, "step": 8010 }, { "epoch": 2.5959170447180817, "grad_norm": 0.8575646281242371, "learning_rate": 2.3451810270197494e-07, "loss": 0.0711, "step": 8011 }, { "epoch": 2.596241088788075, "grad_norm": 0.9234625697135925, "learning_rate": 2.341484002784436e-07, "loss": 0.0805, "step": 8012 }, { "epoch": 2.5965651328580686, "grad_norm": 0.8345510959625244, "learning_rate": 2.3377897517321224e-07, "loss": 0.0734, "step": 8013 }, { "epoch": 2.5968891769280624, "grad_norm": 0.8022050261497498, "learning_rate": 2.3340982743149582e-07, "loss": 0.0669, "step": 8014 }, { "epoch": 2.597213220998056, "grad_norm": 0.9139599800109863, "learning_rate": 2.3304095709847402e-07, "loss": 0.0778, "step": 8015 }, { "epoch": 2.5975372650680493, "grad_norm": 0.863365888595581, "learning_rate": 2.3267236421929323e-07, "loss": 0.0714, "step": 8016 }, { "epoch": 2.5978613091380427, "grad_norm": 0.9220430254936218, "learning_rate": 2.3230404883906626e-07, "loss": 0.0783, "step": 8017 }, { "epoch": 2.598185353208036, "grad_norm": 0.9168446660041809, "learning_rate": 2.319360110028701e-07, "loss": 0.0742, "step": 8018 }, { "epoch": 2.59850939727803, "grad_norm": 0.9191902875900269, "learning_rate": 2.3156825075574956e-07, "loss": 0.0682, "step": 8019 }, { "epoch": 2.5988334413480234, "grad_norm": 0.9062588810920715, "learning_rate": 2.312007681427153e-07, "loss": 0.0841, "step": 8020 }, { "epoch": 2.599157485418017, "grad_norm": 0.8564749956130981, "learning_rate": 2.30833563208743e-07, "loss": 0.077, "step": 8021 }, { "epoch": 2.5994815294880103, "grad_norm": 0.8729365468025208, "learning_rate": 2.304666359987756e-07, "loss": 0.0756, "step": 8022 }, { "epoch": 2.5998055735580037, "grad_norm": 0.9407651424407959, "learning_rate": 2.300999865577211e-07, "loss": 0.0796, "step": 8023 }, { "epoch": 2.6001296176279975, "grad_norm": 0.9075953364372253, "learning_rate": 2.2973361493045382e-07, "loss": 0.0782, "step": 8024 }, { "epoch": 2.600453661697991, "grad_norm": 0.8575737476348877, "learning_rate": 2.293675211618146e-07, "loss": 0.0762, "step": 8025 }, { "epoch": 2.6007777057679844, "grad_norm": 0.8725225329399109, "learning_rate": 2.2900170529660898e-07, "loss": 0.072, "step": 8026 }, { "epoch": 2.601101749837978, "grad_norm": 0.8768883943557739, "learning_rate": 2.2863616737960976e-07, "loss": 0.0757, "step": 8027 }, { "epoch": 2.6014257939079712, "grad_norm": 0.8702290654182434, "learning_rate": 2.2827090745555502e-07, "loss": 0.0729, "step": 8028 }, { "epoch": 2.601749837977965, "grad_norm": 0.9248967170715332, "learning_rate": 2.279059255691493e-07, "loss": 0.0831, "step": 8029 }, { "epoch": 2.6020738820479585, "grad_norm": 0.8153299689292908, "learning_rate": 2.2754122176506244e-07, "loss": 0.0705, "step": 8030 }, { "epoch": 2.602397926117952, "grad_norm": 0.8644693493843079, "learning_rate": 2.271767960879312e-07, "loss": 0.0724, "step": 8031 }, { "epoch": 2.602721970187946, "grad_norm": 0.8582781553268433, "learning_rate": 2.2681264858235797e-07, "loss": 0.0707, "step": 8032 }, { "epoch": 2.6030460142579392, "grad_norm": 0.8408800959587097, "learning_rate": 2.2644877929290932e-07, "loss": 0.0761, "step": 8033 }, { "epoch": 2.6033700583279327, "grad_norm": 0.8595169186592102, "learning_rate": 2.2608518826412128e-07, "loss": 0.0719, "step": 8034 }, { "epoch": 2.603694102397926, "grad_norm": 1.0227017402648926, "learning_rate": 2.2572187554049274e-07, "loss": 0.0763, "step": 8035 }, { "epoch": 2.6040181464679195, "grad_norm": 0.8645336627960205, "learning_rate": 2.2535884116648976e-07, "loss": 0.0726, "step": 8036 }, { "epoch": 2.6043421905379134, "grad_norm": 0.8675413727760315, "learning_rate": 2.2499608518654432e-07, "loss": 0.0754, "step": 8037 }, { "epoch": 2.604666234607907, "grad_norm": 0.9440889358520508, "learning_rate": 2.2463360764505448e-07, "loss": 0.073, "step": 8038 }, { "epoch": 2.6049902786779002, "grad_norm": 0.9420521855354309, "learning_rate": 2.2427140858638424e-07, "loss": 0.0767, "step": 8039 }, { "epoch": 2.6053143227478937, "grad_norm": 0.8823314905166626, "learning_rate": 2.2390948805486174e-07, "loss": 0.0723, "step": 8040 }, { "epoch": 2.605638366817887, "grad_norm": 0.7878745794296265, "learning_rate": 2.2354784609478485e-07, "loss": 0.0685, "step": 8041 }, { "epoch": 2.605962410887881, "grad_norm": 0.9195688366889954, "learning_rate": 2.2318648275041267e-07, "loss": 0.0742, "step": 8042 }, { "epoch": 2.6062864549578744, "grad_norm": 0.8763304352760315, "learning_rate": 2.2282539806597476e-07, "loss": 0.0712, "step": 8043 }, { "epoch": 2.606610499027868, "grad_norm": 0.8439639806747437, "learning_rate": 2.22464592085663e-07, "loss": 0.0722, "step": 8044 }, { "epoch": 2.606934543097861, "grad_norm": 0.8869640231132507, "learning_rate": 2.2210406485363656e-07, "loss": 0.0726, "step": 8045 }, { "epoch": 2.6072585871678546, "grad_norm": 0.9663493633270264, "learning_rate": 2.217438164140212e-07, "loss": 0.0801, "step": 8046 }, { "epoch": 2.6075826312378485, "grad_norm": 0.9520936608314514, "learning_rate": 2.213838468109075e-07, "loss": 0.0728, "step": 8047 }, { "epoch": 2.607906675307842, "grad_norm": 0.9151571989059448, "learning_rate": 2.210241560883525e-07, "loss": 0.0816, "step": 8048 }, { "epoch": 2.6082307193778353, "grad_norm": 0.9211403131484985, "learning_rate": 2.206647442903781e-07, "loss": 0.0747, "step": 8049 }, { "epoch": 2.6085547634478288, "grad_norm": 0.8817241787910461, "learning_rate": 2.2030561146097363e-07, "loss": 0.0784, "step": 8050 }, { "epoch": 2.608878807517822, "grad_norm": 0.8810299634933472, "learning_rate": 2.199467576440928e-07, "loss": 0.0756, "step": 8051 }, { "epoch": 2.609202851587816, "grad_norm": 0.9325541853904724, "learning_rate": 2.195881828836563e-07, "loss": 0.0813, "step": 8052 }, { "epoch": 2.6095268956578095, "grad_norm": 0.9286937117576599, "learning_rate": 2.1922988722355044e-07, "loss": 0.0797, "step": 8053 }, { "epoch": 2.609850939727803, "grad_norm": 0.8863601684570312, "learning_rate": 2.188718707076265e-07, "loss": 0.0763, "step": 8054 }, { "epoch": 2.6101749837977968, "grad_norm": 0.9030467867851257, "learning_rate": 2.185141333797025e-07, "loss": 0.0777, "step": 8055 }, { "epoch": 2.6104990278677898, "grad_norm": 0.8651726841926575, "learning_rate": 2.181566752835626e-07, "loss": 0.0719, "step": 8056 }, { "epoch": 2.6108230719377836, "grad_norm": 0.9293838739395142, "learning_rate": 2.177994964629554e-07, "loss": 0.0827, "step": 8057 }, { "epoch": 2.611147116007777, "grad_norm": 0.8936684727668762, "learning_rate": 2.174425969615962e-07, "loss": 0.0734, "step": 8058 }, { "epoch": 2.6114711600777705, "grad_norm": 0.8786556720733643, "learning_rate": 2.1708597682316645e-07, "loss": 0.0698, "step": 8059 }, { "epoch": 2.6117952041477643, "grad_norm": 0.9012254476547241, "learning_rate": 2.1672963609131292e-07, "loss": 0.0762, "step": 8060 }, { "epoch": 2.6121192482177578, "grad_norm": 0.8727022409439087, "learning_rate": 2.1637357480964821e-07, "loss": 0.0766, "step": 8061 }, { "epoch": 2.612443292287751, "grad_norm": 0.8324522376060486, "learning_rate": 2.1601779302175026e-07, "loss": 0.0743, "step": 8062 }, { "epoch": 2.6127673363577446, "grad_norm": 0.9140172004699707, "learning_rate": 2.1566229077116445e-07, "loss": 0.079, "step": 8063 }, { "epoch": 2.613091380427738, "grad_norm": 0.9623644948005676, "learning_rate": 2.1530706810139913e-07, "loss": 0.0846, "step": 8064 }, { "epoch": 2.613415424497732, "grad_norm": 0.9511628746986389, "learning_rate": 2.1495212505593221e-07, "loss": 0.0781, "step": 8065 }, { "epoch": 2.6137394685677253, "grad_norm": 0.900143563747406, "learning_rate": 2.1459746167820372e-07, "loss": 0.0783, "step": 8066 }, { "epoch": 2.6140635126377187, "grad_norm": 0.9561235904693604, "learning_rate": 2.142430780116214e-07, "loss": 0.079, "step": 8067 }, { "epoch": 2.614387556707712, "grad_norm": 0.8517616391181946, "learning_rate": 2.1388897409955867e-07, "loss": 0.072, "step": 8068 }, { "epoch": 2.6147116007777056, "grad_norm": 0.9002208113670349, "learning_rate": 2.1353514998535414e-07, "loss": 0.0777, "step": 8069 }, { "epoch": 2.6150356448476995, "grad_norm": 0.918381929397583, "learning_rate": 2.1318160571231316e-07, "loss": 0.0763, "step": 8070 }, { "epoch": 2.615359688917693, "grad_norm": 0.864824652671814, "learning_rate": 2.128283413237045e-07, "loss": 0.0759, "step": 8071 }, { "epoch": 2.6156837329876863, "grad_norm": 0.9191448092460632, "learning_rate": 2.1247535686276632e-07, "loss": 0.0793, "step": 8072 }, { "epoch": 2.6160077770576797, "grad_norm": 0.8766631484031677, "learning_rate": 2.121226523726988e-07, "loss": 0.0725, "step": 8073 }, { "epoch": 2.616331821127673, "grad_norm": 0.8818141222000122, "learning_rate": 2.1177022789667045e-07, "loss": 0.0762, "step": 8074 }, { "epoch": 2.616655865197667, "grad_norm": 0.8716321587562561, "learning_rate": 2.1141808347781428e-07, "loss": 0.0733, "step": 8075 }, { "epoch": 2.6169799092676604, "grad_norm": 0.9057003259658813, "learning_rate": 2.110662191592297e-07, "loss": 0.0791, "step": 8076 }, { "epoch": 2.617303953337654, "grad_norm": 0.8317652344703674, "learning_rate": 2.1071463498398114e-07, "loss": 0.0728, "step": 8077 }, { "epoch": 2.6176279974076473, "grad_norm": 0.9148250222206116, "learning_rate": 2.103633309950995e-07, "loss": 0.0758, "step": 8078 }, { "epoch": 2.6179520414776407, "grad_norm": 0.8296970129013062, "learning_rate": 2.1001230723558087e-07, "loss": 0.0729, "step": 8079 }, { "epoch": 2.6182760855476346, "grad_norm": 0.9037541747093201, "learning_rate": 2.0966156374838677e-07, "loss": 0.0737, "step": 8080 }, { "epoch": 2.618600129617628, "grad_norm": 1.0087367296218872, "learning_rate": 2.0931110057644505e-07, "loss": 0.0821, "step": 8081 }, { "epoch": 2.6189241736876214, "grad_norm": 0.9129602909088135, "learning_rate": 2.089609177626492e-07, "loss": 0.0733, "step": 8082 }, { "epoch": 2.6192482177576153, "grad_norm": 0.9093072414398193, "learning_rate": 2.0861101534985774e-07, "loss": 0.084, "step": 8083 }, { "epoch": 2.6195722618276087, "grad_norm": 0.8904097080230713, "learning_rate": 2.082613933808958e-07, "loss": 0.077, "step": 8084 }, { "epoch": 2.619896305897602, "grad_norm": 0.8340948224067688, "learning_rate": 2.079120518985539e-07, "loss": 0.07, "step": 8085 }, { "epoch": 2.6202203499675956, "grad_norm": 0.8659706711769104, "learning_rate": 2.07562990945587e-07, "loss": 0.0713, "step": 8086 }, { "epoch": 2.620544394037589, "grad_norm": 0.8465261459350586, "learning_rate": 2.0721421056471818e-07, "loss": 0.0757, "step": 8087 }, { "epoch": 2.620868438107583, "grad_norm": 0.9324637055397034, "learning_rate": 2.0686571079863383e-07, "loss": 0.0799, "step": 8088 }, { "epoch": 2.6211924821775763, "grad_norm": 0.9742748141288757, "learning_rate": 2.0651749168998703e-07, "loss": 0.0832, "step": 8089 }, { "epoch": 2.6215165262475697, "grad_norm": 0.8751499652862549, "learning_rate": 2.0616955328139675e-07, "loss": 0.0719, "step": 8090 }, { "epoch": 2.621840570317563, "grad_norm": 0.9169896245002747, "learning_rate": 2.058218956154473e-07, "loss": 0.0795, "step": 8091 }, { "epoch": 2.6221646143875565, "grad_norm": 0.8359293937683105, "learning_rate": 2.0547451873468877e-07, "loss": 0.0737, "step": 8092 }, { "epoch": 2.6224886584575504, "grad_norm": 0.8692148327827454, "learning_rate": 2.051274226816355e-07, "loss": 0.0772, "step": 8093 }, { "epoch": 2.622812702527544, "grad_norm": 0.8982337713241577, "learning_rate": 2.0478060749877044e-07, "loss": 0.0739, "step": 8094 }, { "epoch": 2.6231367465975373, "grad_norm": 0.8163446187973022, "learning_rate": 2.0443407322853882e-07, "loss": 0.0681, "step": 8095 }, { "epoch": 2.6234607906675307, "grad_norm": 0.8136507272720337, "learning_rate": 2.0408781991335446e-07, "loss": 0.0718, "step": 8096 }, { "epoch": 2.623784834737524, "grad_norm": 0.9225520491600037, "learning_rate": 2.0374184759559463e-07, "loss": 0.0756, "step": 8097 }, { "epoch": 2.624108878807518, "grad_norm": 0.8884847164154053, "learning_rate": 2.033961563176029e-07, "loss": 0.0728, "step": 8098 }, { "epoch": 2.6244329228775114, "grad_norm": 0.8490235805511475, "learning_rate": 2.0305074612168906e-07, "loss": 0.0729, "step": 8099 }, { "epoch": 2.624756966947505, "grad_norm": 0.9549583196640015, "learning_rate": 2.0270561705012765e-07, "loss": 0.0855, "step": 8100 }, { "epoch": 2.6250810110174982, "grad_norm": 0.8930221796035767, "learning_rate": 2.0236076914515956e-07, "loss": 0.0778, "step": 8101 }, { "epoch": 2.6254050550874917, "grad_norm": 0.8836579918861389, "learning_rate": 2.020162024489894e-07, "loss": 0.0766, "step": 8102 }, { "epoch": 2.6257290991574855, "grad_norm": 0.888401448726654, "learning_rate": 2.0167191700379092e-07, "loss": 0.075, "step": 8103 }, { "epoch": 2.626053143227479, "grad_norm": 0.8471822142601013, "learning_rate": 2.0132791285169985e-07, "loss": 0.07, "step": 8104 }, { "epoch": 2.6263771872974724, "grad_norm": 0.8982986807823181, "learning_rate": 2.0098419003481946e-07, "loss": 0.077, "step": 8105 }, { "epoch": 2.6267012313674662, "grad_norm": 0.9074251651763916, "learning_rate": 2.0064074859521777e-07, "loss": 0.0791, "step": 8106 }, { "epoch": 2.6270252754374592, "grad_norm": 0.8768463730812073, "learning_rate": 2.0029758857492893e-07, "loss": 0.0765, "step": 8107 }, { "epoch": 2.627349319507453, "grad_norm": 0.8786301016807556, "learning_rate": 1.9995471001595267e-07, "loss": 0.0731, "step": 8108 }, { "epoch": 2.6276733635774465, "grad_norm": 0.9357177019119263, "learning_rate": 1.9961211296025352e-07, "loss": 0.0816, "step": 8109 }, { "epoch": 2.62799740764744, "grad_norm": 0.900934100151062, "learning_rate": 1.992697974497629e-07, "loss": 0.0779, "step": 8110 }, { "epoch": 2.628321451717434, "grad_norm": 0.8593557476997375, "learning_rate": 1.989277635263756e-07, "loss": 0.0727, "step": 8111 }, { "epoch": 2.6286454957874272, "grad_norm": 0.8760018348693848, "learning_rate": 1.9858601123195403e-07, "loss": 0.0765, "step": 8112 }, { "epoch": 2.6289695398574207, "grad_norm": 0.8476484417915344, "learning_rate": 1.9824454060832526e-07, "loss": 0.0741, "step": 8113 }, { "epoch": 2.629293583927414, "grad_norm": 0.9107114672660828, "learning_rate": 1.9790335169728197e-07, "loss": 0.077, "step": 8114 }, { "epoch": 2.6296176279974075, "grad_norm": 0.8905239701271057, "learning_rate": 1.9756244454058244e-07, "loss": 0.0753, "step": 8115 }, { "epoch": 2.6299416720674014, "grad_norm": 0.9123937487602234, "learning_rate": 1.9722181917995103e-07, "loss": 0.0777, "step": 8116 }, { "epoch": 2.630265716137395, "grad_norm": 0.8795695304870605, "learning_rate": 1.9688147565707528e-07, "loss": 0.0694, "step": 8117 }, { "epoch": 2.630589760207388, "grad_norm": 0.886893093585968, "learning_rate": 1.9654141401361183e-07, "loss": 0.0743, "step": 8118 }, { "epoch": 2.6309138042773816, "grad_norm": 0.8590728640556335, "learning_rate": 1.9620163429117906e-07, "loss": 0.0748, "step": 8119 }, { "epoch": 2.631237848347375, "grad_norm": 0.8743232488632202, "learning_rate": 1.958621365313648e-07, "loss": 0.0776, "step": 8120 }, { "epoch": 2.631561892417369, "grad_norm": 0.92427659034729, "learning_rate": 1.9552292077571894e-07, "loss": 0.0805, "step": 8121 }, { "epoch": 2.6318859364873624, "grad_norm": 0.8138688802719116, "learning_rate": 1.9518398706575846e-07, "loss": 0.0691, "step": 8122 }, { "epoch": 2.6322099805573558, "grad_norm": 0.8546965718269348, "learning_rate": 1.948453354429661e-07, "loss": 0.0748, "step": 8123 }, { "epoch": 2.632534024627349, "grad_norm": 0.9168205857276917, "learning_rate": 1.9450696594878804e-07, "loss": 0.0775, "step": 8124 }, { "epoch": 2.6328580686973426, "grad_norm": 0.8308161497116089, "learning_rate": 1.941688786246393e-07, "loss": 0.0719, "step": 8125 }, { "epoch": 2.6331821127673365, "grad_norm": 0.9307392239570618, "learning_rate": 1.9383107351189672e-07, "loss": 0.0791, "step": 8126 }, { "epoch": 2.63350615683733, "grad_norm": 0.8862630128860474, "learning_rate": 1.9349355065190618e-07, "loss": 0.0738, "step": 8127 }, { "epoch": 2.6338302009073233, "grad_norm": 0.8637521266937256, "learning_rate": 1.9315631008597596e-07, "loss": 0.0716, "step": 8128 }, { "epoch": 2.6341542449773168, "grad_norm": 0.8959033489227295, "learning_rate": 1.9281935185538141e-07, "loss": 0.0738, "step": 8129 }, { "epoch": 2.63447828904731, "grad_norm": 0.9268590211868286, "learning_rate": 1.9248267600136317e-07, "loss": 0.083, "step": 8130 }, { "epoch": 2.634802333117304, "grad_norm": 0.9983721971511841, "learning_rate": 1.9214628256512656e-07, "loss": 0.0865, "step": 8131 }, { "epoch": 2.6351263771872975, "grad_norm": 0.8777361512184143, "learning_rate": 1.918101715878437e-07, "loss": 0.0715, "step": 8132 }, { "epoch": 2.635450421257291, "grad_norm": 0.8931231498718262, "learning_rate": 1.9147434311065028e-07, "loss": 0.0742, "step": 8133 }, { "epoch": 2.6357744653272848, "grad_norm": 0.8385959267616272, "learning_rate": 1.911387971746495e-07, "loss": 0.0697, "step": 8134 }, { "epoch": 2.636098509397278, "grad_norm": 0.9038499593734741, "learning_rate": 1.9080353382090798e-07, "loss": 0.077, "step": 8135 }, { "epoch": 2.6364225534672716, "grad_norm": 0.8563414216041565, "learning_rate": 1.9046855309045957e-07, "loss": 0.0759, "step": 8136 }, { "epoch": 2.636746597537265, "grad_norm": 0.9571034908294678, "learning_rate": 1.9013385502430175e-07, "loss": 0.0775, "step": 8137 }, { "epoch": 2.6370706416072585, "grad_norm": 0.9299083352088928, "learning_rate": 1.8979943966339924e-07, "loss": 0.0743, "step": 8138 }, { "epoch": 2.6373946856772523, "grad_norm": 0.8955670595169067, "learning_rate": 1.8946530704868072e-07, "loss": 0.0721, "step": 8139 }, { "epoch": 2.6377187297472457, "grad_norm": 0.9154726266860962, "learning_rate": 1.891314572210412e-07, "loss": 0.0757, "step": 8140 }, { "epoch": 2.638042773817239, "grad_norm": 0.9504216313362122, "learning_rate": 1.887978902213397e-07, "loss": 0.0827, "step": 8141 }, { "epoch": 2.6383668178872326, "grad_norm": 0.8857448697090149, "learning_rate": 1.8846460609040302e-07, "loss": 0.0727, "step": 8142 }, { "epoch": 2.638690861957226, "grad_norm": 0.9364683628082275, "learning_rate": 1.881316048690207e-07, "loss": 0.075, "step": 8143 }, { "epoch": 2.63901490602722, "grad_norm": 0.8507384657859802, "learning_rate": 1.8779888659794937e-07, "loss": 0.0733, "step": 8144 }, { "epoch": 2.6393389500972133, "grad_norm": 0.889306366443634, "learning_rate": 1.874664513179106e-07, "loss": 0.0781, "step": 8145 }, { "epoch": 2.6396629941672067, "grad_norm": 0.9445775151252747, "learning_rate": 1.8713429906959097e-07, "loss": 0.0788, "step": 8146 }, { "epoch": 2.6399870382372, "grad_norm": 0.8926195502281189, "learning_rate": 1.8680242989364327e-07, "loss": 0.0717, "step": 8147 }, { "epoch": 2.6403110823071936, "grad_norm": 0.93767249584198, "learning_rate": 1.8647084383068393e-07, "loss": 0.0792, "step": 8148 }, { "epoch": 2.6406351263771874, "grad_norm": 0.8567405343055725, "learning_rate": 1.8613954092129738e-07, "loss": 0.0762, "step": 8149 }, { "epoch": 2.640959170447181, "grad_norm": 0.8923830986022949, "learning_rate": 1.858085212060304e-07, "loss": 0.074, "step": 8150 }, { "epoch": 2.6412832145171743, "grad_norm": 0.8800657391548157, "learning_rate": 1.85477784725398e-07, "loss": 0.0727, "step": 8151 }, { "epoch": 2.6416072585871677, "grad_norm": 0.9234258532524109, "learning_rate": 1.851473315198782e-07, "loss": 0.0783, "step": 8152 }, { "epoch": 2.641931302657161, "grad_norm": 0.9874683618545532, "learning_rate": 1.848171616299152e-07, "loss": 0.077, "step": 8153 }, { "epoch": 2.642255346727155, "grad_norm": 0.8514154553413391, "learning_rate": 1.8448727509591951e-07, "loss": 0.0728, "step": 8154 }, { "epoch": 2.6425793907971484, "grad_norm": 0.8893564939498901, "learning_rate": 1.8415767195826468e-07, "loss": 0.0739, "step": 8155 }, { "epoch": 2.642903434867142, "grad_norm": 0.8647539019584656, "learning_rate": 1.8382835225729256e-07, "loss": 0.0739, "step": 8156 }, { "epoch": 2.6432274789371357, "grad_norm": 0.8942198157310486, "learning_rate": 1.834993160333068e-07, "loss": 0.0779, "step": 8157 }, { "epoch": 2.6435515230071287, "grad_norm": 0.9216142296791077, "learning_rate": 1.831705633265804e-07, "loss": 0.0782, "step": 8158 }, { "epoch": 2.6438755670771226, "grad_norm": 0.9345462322235107, "learning_rate": 1.8284209417734762e-07, "loss": 0.0765, "step": 8159 }, { "epoch": 2.644199611147116, "grad_norm": 0.8918818831443787, "learning_rate": 1.8251390862581097e-07, "loss": 0.0756, "step": 8160 }, { "epoch": 2.6445236552171094, "grad_norm": 0.8900420069694519, "learning_rate": 1.8218600671213698e-07, "loss": 0.0719, "step": 8161 }, { "epoch": 2.6448476992871033, "grad_norm": 1.0553818941116333, "learning_rate": 1.8185838847645743e-07, "loss": 0.0802, "step": 8162 }, { "epoch": 2.6451717433570967, "grad_norm": 0.8427667617797852, "learning_rate": 1.8153105395886967e-07, "loss": 0.0705, "step": 8163 }, { "epoch": 2.64549578742709, "grad_norm": 0.8833165764808655, "learning_rate": 1.8120400319943692e-07, "loss": 0.0756, "step": 8164 }, { "epoch": 2.6458198314970836, "grad_norm": 0.8940858244895935, "learning_rate": 1.8087723623818608e-07, "loss": 0.0751, "step": 8165 }, { "epoch": 2.646143875567077, "grad_norm": 0.9112303853034973, "learning_rate": 1.805507531151107e-07, "loss": 0.0717, "step": 8166 }, { "epoch": 2.646467919637071, "grad_norm": 0.8989704251289368, "learning_rate": 1.8022455387016913e-07, "loss": 0.0718, "step": 8167 }, { "epoch": 2.6467919637070643, "grad_norm": 0.8399885296821594, "learning_rate": 1.7989863854328492e-07, "loss": 0.0698, "step": 8168 }, { "epoch": 2.6471160077770577, "grad_norm": 0.9487384557723999, "learning_rate": 1.7957300717434706e-07, "loss": 0.0804, "step": 8169 }, { "epoch": 2.647440051847051, "grad_norm": 0.9109880924224854, "learning_rate": 1.7924765980320974e-07, "loss": 0.075, "step": 8170 }, { "epoch": 2.6477640959170445, "grad_norm": 0.901547372341156, "learning_rate": 1.7892259646969278e-07, "loss": 0.0751, "step": 8171 }, { "epoch": 2.6480881399870384, "grad_norm": 0.9755034446716309, "learning_rate": 1.785978172135791e-07, "loss": 0.0749, "step": 8172 }, { "epoch": 2.648412184057032, "grad_norm": 0.9883907437324524, "learning_rate": 1.782733220746205e-07, "loss": 0.0868, "step": 8173 }, { "epoch": 2.6487362281270252, "grad_norm": 0.8916382193565369, "learning_rate": 1.7794911109253105e-07, "loss": 0.076, "step": 8174 }, { "epoch": 2.6490602721970187, "grad_norm": 0.8999601602554321, "learning_rate": 1.7762518430699122e-07, "loss": 0.0766, "step": 8175 }, { "epoch": 2.649384316267012, "grad_norm": 0.8907502889633179, "learning_rate": 1.7730154175764623e-07, "loss": 0.0748, "step": 8176 }, { "epoch": 2.649708360337006, "grad_norm": 0.9117937684059143, "learning_rate": 1.7697818348410722e-07, "loss": 0.0861, "step": 8177 }, { "epoch": 2.6500324044069994, "grad_norm": 0.9239206314086914, "learning_rate": 1.7665510952595027e-07, "loss": 0.0715, "step": 8178 }, { "epoch": 2.650356448476993, "grad_norm": 0.9190769195556641, "learning_rate": 1.7633231992271572e-07, "loss": 0.0763, "step": 8179 }, { "epoch": 2.6506804925469862, "grad_norm": 0.8508328795433044, "learning_rate": 1.7600981471391083e-07, "loss": 0.0718, "step": 8180 }, { "epoch": 2.6510045366169797, "grad_norm": 0.9987004995346069, "learning_rate": 1.7568759393900597e-07, "loss": 0.0826, "step": 8181 }, { "epoch": 2.6513285806869735, "grad_norm": 0.8714659214019775, "learning_rate": 1.7536565763743934e-07, "loss": 0.0751, "step": 8182 }, { "epoch": 2.651652624756967, "grad_norm": 0.9042683243751526, "learning_rate": 1.7504400584861137e-07, "loss": 0.0777, "step": 8183 }, { "epoch": 2.6519766688269604, "grad_norm": 0.8759123086929321, "learning_rate": 1.7472263861189e-07, "loss": 0.0709, "step": 8184 }, { "epoch": 2.6523007128969542, "grad_norm": 0.9868377447128296, "learning_rate": 1.7440155596660735e-07, "loss": 0.0781, "step": 8185 }, { "epoch": 2.6526247569669477, "grad_norm": 1.0885810852050781, "learning_rate": 1.7408075795206037e-07, "loss": 0.0899, "step": 8186 }, { "epoch": 2.652948801036941, "grad_norm": 0.8971470594406128, "learning_rate": 1.7376024460751262e-07, "loss": 0.0759, "step": 8187 }, { "epoch": 2.6532728451069345, "grad_norm": 0.9231646060943604, "learning_rate": 1.7344001597219024e-07, "loss": 0.0789, "step": 8188 }, { "epoch": 2.653596889176928, "grad_norm": 0.8947360515594482, "learning_rate": 1.7312007208528796e-07, "loss": 0.0782, "step": 8189 }, { "epoch": 2.653920933246922, "grad_norm": 0.8938325643539429, "learning_rate": 1.7280041298596257e-07, "loss": 0.0746, "step": 8190 }, { "epoch": 2.654244977316915, "grad_norm": 0.8647114038467407, "learning_rate": 1.7248103871333743e-07, "loss": 0.0722, "step": 8191 }, { "epoch": 2.6545690213869086, "grad_norm": 0.9191124439239502, "learning_rate": 1.7216194930650105e-07, "loss": 0.0764, "step": 8192 }, { "epoch": 2.654893065456902, "grad_norm": 0.9956697225570679, "learning_rate": 1.7184314480450713e-07, "loss": 0.0865, "step": 8193 }, { "epoch": 2.6552171095268955, "grad_norm": 0.8507396578788757, "learning_rate": 1.715246252463737e-07, "loss": 0.0738, "step": 8194 }, { "epoch": 2.6555411535968894, "grad_norm": 0.9137740135192871, "learning_rate": 1.7120639067108508e-07, "loss": 0.0797, "step": 8195 }, { "epoch": 2.655865197666883, "grad_norm": 0.8848572969436646, "learning_rate": 1.7088844111758956e-07, "loss": 0.0751, "step": 8196 }, { "epoch": 2.656189241736876, "grad_norm": 0.9681692719459534, "learning_rate": 1.7057077662480131e-07, "loss": 0.0783, "step": 8197 }, { "epoch": 2.6565132858068696, "grad_norm": 0.8566903471946716, "learning_rate": 1.7025339723159924e-07, "loss": 0.0746, "step": 8198 }, { "epoch": 2.656837329876863, "grad_norm": 0.8328977227210999, "learning_rate": 1.6993630297682778e-07, "loss": 0.0696, "step": 8199 }, { "epoch": 2.657161373946857, "grad_norm": 0.8919262290000916, "learning_rate": 1.6961949389929593e-07, "loss": 0.0793, "step": 8200 }, { "epoch": 2.6574854180168503, "grad_norm": 0.898391604423523, "learning_rate": 1.693029700377785e-07, "loss": 0.0735, "step": 8201 }, { "epoch": 2.6578094620868438, "grad_norm": 0.8991305232048035, "learning_rate": 1.6898673143101479e-07, "loss": 0.0758, "step": 8202 }, { "epoch": 2.658133506156837, "grad_norm": 0.8869103789329529, "learning_rate": 1.6867077811770826e-07, "loss": 0.0771, "step": 8203 }, { "epoch": 2.6584575502268306, "grad_norm": 0.8388248682022095, "learning_rate": 1.683551101365305e-07, "loss": 0.0676, "step": 8204 }, { "epoch": 2.6587815942968245, "grad_norm": 0.8820380568504333, "learning_rate": 1.6803972752611475e-07, "loss": 0.0733, "step": 8205 }, { "epoch": 2.659105638366818, "grad_norm": 0.9473530650138855, "learning_rate": 1.6772463032506126e-07, "loss": 0.0744, "step": 8206 }, { "epoch": 2.6594296824368113, "grad_norm": 0.829882025718689, "learning_rate": 1.6740981857193471e-07, "loss": 0.0712, "step": 8207 }, { "epoch": 2.659753726506805, "grad_norm": 0.9000360369682312, "learning_rate": 1.6709529230526544e-07, "loss": 0.0782, "step": 8208 }, { "epoch": 2.660077770576798, "grad_norm": 0.9191071391105652, "learning_rate": 1.667810515635482e-07, "loss": 0.0772, "step": 8209 }, { "epoch": 2.660401814646792, "grad_norm": 0.9518386125564575, "learning_rate": 1.6646709638524216e-07, "loss": 0.0757, "step": 8210 }, { "epoch": 2.6607258587167855, "grad_norm": 0.9048896431922913, "learning_rate": 1.6615342680877417e-07, "loss": 0.0738, "step": 8211 }, { "epoch": 2.661049902786779, "grad_norm": 1.0090075731277466, "learning_rate": 1.6584004287253235e-07, "loss": 0.0852, "step": 8212 }, { "epoch": 2.6613739468567728, "grad_norm": 0.8919034004211426, "learning_rate": 1.6552694461487385e-07, "loss": 0.0799, "step": 8213 }, { "epoch": 2.661697990926766, "grad_norm": 0.9021881818771362, "learning_rate": 1.652141320741174e-07, "loss": 0.0765, "step": 8214 }, { "epoch": 2.6620220349967596, "grad_norm": 0.9298197627067566, "learning_rate": 1.6490160528854855e-07, "loss": 0.0791, "step": 8215 }, { "epoch": 2.662346079066753, "grad_norm": 0.9251485466957092, "learning_rate": 1.6458936429641803e-07, "loss": 0.0746, "step": 8216 }, { "epoch": 2.6626701231367464, "grad_norm": 0.9725478887557983, "learning_rate": 1.642774091359406e-07, "loss": 0.0808, "step": 8217 }, { "epoch": 2.6629941672067403, "grad_norm": 0.8733885884284973, "learning_rate": 1.6396573984529707e-07, "loss": 0.0739, "step": 8218 }, { "epoch": 2.6633182112767337, "grad_norm": 0.9233888387680054, "learning_rate": 1.6365435646263223e-07, "loss": 0.0786, "step": 8219 }, { "epoch": 2.663642255346727, "grad_norm": 0.8216432332992554, "learning_rate": 1.6334325902605642e-07, "loss": 0.0716, "step": 8220 }, { "epoch": 2.6639662994167206, "grad_norm": 0.9246054887771606, "learning_rate": 1.63032447573645e-07, "loss": 0.0745, "step": 8221 }, { "epoch": 2.664290343486714, "grad_norm": 0.9085432291030884, "learning_rate": 1.6272192214343868e-07, "loss": 0.0795, "step": 8222 }, { "epoch": 2.664614387556708, "grad_norm": 0.9263147711753845, "learning_rate": 1.6241168277344232e-07, "loss": 0.0809, "step": 8223 }, { "epoch": 2.6649384316267013, "grad_norm": 0.8804634213447571, "learning_rate": 1.6210172950162639e-07, "loss": 0.076, "step": 8224 }, { "epoch": 2.6652624756966947, "grad_norm": 0.845128059387207, "learning_rate": 1.617920623659261e-07, "loss": 0.0737, "step": 8225 }, { "epoch": 2.665586519766688, "grad_norm": 0.935748815536499, "learning_rate": 1.6148268140424224e-07, "loss": 0.0789, "step": 8226 }, { "epoch": 2.6659105638366816, "grad_norm": 0.9177601933479309, "learning_rate": 1.6117358665443922e-07, "loss": 0.0818, "step": 8227 }, { "epoch": 2.6662346079066754, "grad_norm": 1.0410524606704712, "learning_rate": 1.6086477815434763e-07, "loss": 0.0862, "step": 8228 }, { "epoch": 2.666558651976669, "grad_norm": 0.927940845489502, "learning_rate": 1.6055625594176254e-07, "loss": 0.0824, "step": 8229 }, { "epoch": 2.6668826960466623, "grad_norm": 0.9064210653305054, "learning_rate": 1.602480200544443e-07, "loss": 0.0824, "step": 8230 }, { "epoch": 2.6672067401166557, "grad_norm": 0.9253581166267395, "learning_rate": 1.5994007053011796e-07, "loss": 0.08, "step": 8231 }, { "epoch": 2.667530784186649, "grad_norm": 0.9229341149330139, "learning_rate": 1.5963240740647285e-07, "loss": 0.0813, "step": 8232 }, { "epoch": 2.667854828256643, "grad_norm": 0.9699390530586243, "learning_rate": 1.5932503072116524e-07, "loss": 0.0793, "step": 8233 }, { "epoch": 2.6681788723266364, "grad_norm": 0.8560695052146912, "learning_rate": 1.5901794051181362e-07, "loss": 0.0739, "step": 8234 }, { "epoch": 2.66850291639663, "grad_norm": 0.924231767654419, "learning_rate": 1.5871113681600464e-07, "loss": 0.0779, "step": 8235 }, { "epoch": 2.6688269604666237, "grad_norm": 0.9453070163726807, "learning_rate": 1.5840461967128628e-07, "loss": 0.0789, "step": 8236 }, { "epoch": 2.669151004536617, "grad_norm": 0.9044185876846313, "learning_rate": 1.5809838911517438e-07, "loss": 0.0771, "step": 8237 }, { "epoch": 2.6694750486066106, "grad_norm": 0.8933587670326233, "learning_rate": 1.5779244518514813e-07, "loss": 0.0749, "step": 8238 }, { "epoch": 2.669799092676604, "grad_norm": 0.883310854434967, "learning_rate": 1.574867879186523e-07, "loss": 0.0769, "step": 8239 }, { "epoch": 2.6701231367465974, "grad_norm": 0.8501051068305969, "learning_rate": 1.5718141735309695e-07, "loss": 0.0702, "step": 8240 }, { "epoch": 2.6704471808165913, "grad_norm": 0.9186578392982483, "learning_rate": 1.5687633352585467e-07, "loss": 0.0821, "step": 8241 }, { "epoch": 2.6707712248865847, "grad_norm": 0.8473071455955505, "learning_rate": 1.5657153647426703e-07, "loss": 0.0718, "step": 8242 }, { "epoch": 2.671095268956578, "grad_norm": 0.9314771294593811, "learning_rate": 1.5626702623563694e-07, "loss": 0.0765, "step": 8243 }, { "epoch": 2.6714193130265715, "grad_norm": 0.9406991600990295, "learning_rate": 1.5596280284723348e-07, "loss": 0.08, "step": 8244 }, { "epoch": 2.671743357096565, "grad_norm": 0.8677089810371399, "learning_rate": 1.5565886634629102e-07, "loss": 0.0721, "step": 8245 }, { "epoch": 2.672067401166559, "grad_norm": 0.8874663710594177, "learning_rate": 1.5535521677000813e-07, "loss": 0.0771, "step": 8246 }, { "epoch": 2.6723914452365523, "grad_norm": 0.9458112120628357, "learning_rate": 1.5505185415554903e-07, "loss": 0.0728, "step": 8247 }, { "epoch": 2.6727154893065457, "grad_norm": 0.8996087312698364, "learning_rate": 1.54748778540042e-07, "loss": 0.0798, "step": 8248 }, { "epoch": 2.673039533376539, "grad_norm": 0.8373512625694275, "learning_rate": 1.544459899605813e-07, "loss": 0.0695, "step": 8249 }, { "epoch": 2.6733635774465325, "grad_norm": 0.960312008857727, "learning_rate": 1.5414348845422394e-07, "loss": 0.0808, "step": 8250 }, { "epoch": 2.6736876215165264, "grad_norm": 0.9382338523864746, "learning_rate": 1.538412740579942e-07, "loss": 0.0818, "step": 8251 }, { "epoch": 2.67401166558652, "grad_norm": 0.9835246205329895, "learning_rate": 1.5353934680888e-07, "loss": 0.0764, "step": 8252 }, { "epoch": 2.6743357096565132, "grad_norm": 0.8817523121833801, "learning_rate": 1.5323770674383398e-07, "loss": 0.0725, "step": 8253 }, { "epoch": 2.6746597537265067, "grad_norm": 0.9373438358306885, "learning_rate": 1.529363538997744e-07, "loss": 0.0793, "step": 8254 }, { "epoch": 2.6749837977965, "grad_norm": 0.9446477293968201, "learning_rate": 1.526352883135837e-07, "loss": 0.0751, "step": 8255 }, { "epoch": 2.675307841866494, "grad_norm": 0.8968738317489624, "learning_rate": 1.5233451002210964e-07, "loss": 0.0786, "step": 8256 }, { "epoch": 2.6756318859364874, "grad_norm": 0.8211001753807068, "learning_rate": 1.520340190621647e-07, "loss": 0.0704, "step": 8257 }, { "epoch": 2.675955930006481, "grad_norm": 0.8999126553535461, "learning_rate": 1.5173381547052528e-07, "loss": 0.0739, "step": 8258 }, { "epoch": 2.6762799740764747, "grad_norm": 0.8938018679618835, "learning_rate": 1.5143389928393398e-07, "loss": 0.0775, "step": 8259 }, { "epoch": 2.6766040181464676, "grad_norm": 0.8835743069648743, "learning_rate": 1.5113427053909725e-07, "loss": 0.0747, "step": 8260 }, { "epoch": 2.6769280622164615, "grad_norm": 0.8831462264060974, "learning_rate": 1.508349292726874e-07, "loss": 0.0682, "step": 8261 }, { "epoch": 2.677252106286455, "grad_norm": 0.8573110103607178, "learning_rate": 1.505358755213407e-07, "loss": 0.0685, "step": 8262 }, { "epoch": 2.6775761503564484, "grad_norm": 0.8490044474601746, "learning_rate": 1.5023710932165758e-07, "loss": 0.0758, "step": 8263 }, { "epoch": 2.6779001944264422, "grad_norm": 0.9448397755622864, "learning_rate": 1.4993863071020548e-07, "loss": 0.0759, "step": 8264 }, { "epoch": 2.6782242384964356, "grad_norm": 0.8347598314285278, "learning_rate": 1.4964043972351377e-07, "loss": 0.0688, "step": 8265 }, { "epoch": 2.678548282566429, "grad_norm": 0.8894073367118835, "learning_rate": 1.4934253639807994e-07, "loss": 0.0748, "step": 8266 }, { "epoch": 2.6788723266364225, "grad_norm": 0.8460086584091187, "learning_rate": 1.4904492077036286e-07, "loss": 0.0695, "step": 8267 }, { "epoch": 2.679196370706416, "grad_norm": 0.8650538921356201, "learning_rate": 1.4874759287678898e-07, "loss": 0.0716, "step": 8268 }, { "epoch": 2.67952041477641, "grad_norm": 1.0882456302642822, "learning_rate": 1.484505527537475e-07, "loss": 0.0779, "step": 8269 }, { "epoch": 2.679844458846403, "grad_norm": 0.891619861125946, "learning_rate": 1.4815380043759374e-07, "loss": 0.0729, "step": 8270 }, { "epoch": 2.6801685029163966, "grad_norm": 0.9411903619766235, "learning_rate": 1.4785733596464736e-07, "loss": 0.0789, "step": 8271 }, { "epoch": 2.68049254698639, "grad_norm": 0.887125551700592, "learning_rate": 1.4756115937119202e-07, "loss": 0.0711, "step": 8272 }, { "epoch": 2.6808165910563835, "grad_norm": 0.8783424496650696, "learning_rate": 1.4726527069347796e-07, "loss": 0.0773, "step": 8273 }, { "epoch": 2.6811406351263773, "grad_norm": 0.8920451998710632, "learning_rate": 1.4696966996771838e-07, "loss": 0.0746, "step": 8274 }, { "epoch": 2.6814646791963708, "grad_norm": 0.8676562905311584, "learning_rate": 1.4667435723009187e-07, "loss": 0.0767, "step": 8275 }, { "epoch": 2.681788723266364, "grad_norm": 0.9486330151557922, "learning_rate": 1.463793325167423e-07, "loss": 0.0845, "step": 8276 }, { "epoch": 2.6821127673363576, "grad_norm": 0.9392081499099731, "learning_rate": 1.4608459586377743e-07, "loss": 0.08, "step": 8277 }, { "epoch": 2.682436811406351, "grad_norm": 0.880453884601593, "learning_rate": 1.4579014730727037e-07, "loss": 0.074, "step": 8278 }, { "epoch": 2.682760855476345, "grad_norm": 0.9142937064170837, "learning_rate": 1.4549598688325896e-07, "loss": 0.0768, "step": 8279 }, { "epoch": 2.6830848995463383, "grad_norm": 0.9541929364204407, "learning_rate": 1.4520211462774548e-07, "loss": 0.0793, "step": 8280 }, { "epoch": 2.6834089436163318, "grad_norm": 0.8086893558502197, "learning_rate": 1.4490853057669675e-07, "loss": 0.07, "step": 8281 }, { "epoch": 2.683732987686325, "grad_norm": 0.8273548483848572, "learning_rate": 1.4461523476604482e-07, "loss": 0.0775, "step": 8282 }, { "epoch": 2.6840570317563186, "grad_norm": 0.9654455780982971, "learning_rate": 1.4432222723168632e-07, "loss": 0.0723, "step": 8283 }, { "epoch": 2.6843810758263125, "grad_norm": 0.8216709494590759, "learning_rate": 1.4402950800948223e-07, "loss": 0.0723, "step": 8284 }, { "epoch": 2.684705119896306, "grad_norm": 1.047164797782898, "learning_rate": 1.437370771352589e-07, "loss": 0.0794, "step": 8285 }, { "epoch": 2.6850291639662993, "grad_norm": 0.8784307837486267, "learning_rate": 1.4344493464480745e-07, "loss": 0.0776, "step": 8286 }, { "epoch": 2.685353208036293, "grad_norm": 0.9339602589607239, "learning_rate": 1.4315308057388206e-07, "loss": 0.0711, "step": 8287 }, { "epoch": 2.6856772521062866, "grad_norm": 0.8273693323135376, "learning_rate": 1.428615149582041e-07, "loss": 0.0707, "step": 8288 }, { "epoch": 2.68600129617628, "grad_norm": 0.8852351903915405, "learning_rate": 1.425702378334573e-07, "loss": 0.0792, "step": 8289 }, { "epoch": 2.6863253402462735, "grad_norm": 0.970825731754303, "learning_rate": 1.4227924923529228e-07, "loss": 0.084, "step": 8290 }, { "epoch": 2.686649384316267, "grad_norm": 0.9002171754837036, "learning_rate": 1.4198854919932225e-07, "loss": 0.0799, "step": 8291 }, { "epoch": 2.6869734283862607, "grad_norm": 0.9297456741333008, "learning_rate": 1.4169813776112652e-07, "loss": 0.0801, "step": 8292 }, { "epoch": 2.687297472456254, "grad_norm": 0.9804297685623169, "learning_rate": 1.4140801495624913e-07, "loss": 0.0758, "step": 8293 }, { "epoch": 2.6876215165262476, "grad_norm": 0.8457171320915222, "learning_rate": 1.4111818082019696e-07, "loss": 0.0754, "step": 8294 }, { "epoch": 2.687945560596241, "grad_norm": 0.9038613438606262, "learning_rate": 1.4082863538844444e-07, "loss": 0.0792, "step": 8295 }, { "epoch": 2.6882696046662344, "grad_norm": 0.9129397869110107, "learning_rate": 1.4053937869642737e-07, "loss": 0.0764, "step": 8296 }, { "epoch": 2.6885936487362283, "grad_norm": 0.9389241337776184, "learning_rate": 1.402504107795502e-07, "loss": 0.0786, "step": 8297 }, { "epoch": 2.6889176928062217, "grad_norm": 0.8619992733001709, "learning_rate": 1.39961731673178e-07, "loss": 0.0741, "step": 8298 }, { "epoch": 2.689241736876215, "grad_norm": 0.9079556465148926, "learning_rate": 1.3967334141264277e-07, "loss": 0.0786, "step": 8299 }, { "epoch": 2.6895657809462086, "grad_norm": 0.8391004204750061, "learning_rate": 1.39385240033241e-07, "loss": 0.0763, "step": 8300 }, { "epoch": 2.689889825016202, "grad_norm": 0.890021800994873, "learning_rate": 1.3909742757023336e-07, "loss": 0.0748, "step": 8301 }, { "epoch": 2.690213869086196, "grad_norm": 0.9386758208274841, "learning_rate": 1.3880990405884532e-07, "loss": 0.0769, "step": 8302 }, { "epoch": 2.6905379131561893, "grad_norm": 0.8605058789253235, "learning_rate": 1.3852266953426674e-07, "loss": 0.0778, "step": 8303 }, { "epoch": 2.6908619572261827, "grad_norm": 0.8319392800331116, "learning_rate": 1.3823572403165285e-07, "loss": 0.0707, "step": 8304 }, { "epoch": 2.691186001296176, "grad_norm": 0.9255814552307129, "learning_rate": 1.3794906758612252e-07, "loss": 0.0805, "step": 8305 }, { "epoch": 2.6915100453661696, "grad_norm": 0.8639987111091614, "learning_rate": 1.376627002327599e-07, "loss": 0.0804, "step": 8306 }, { "epoch": 2.6918340894361634, "grad_norm": 0.8433483839035034, "learning_rate": 1.373766220066136e-07, "loss": 0.0735, "step": 8307 }, { "epoch": 2.692158133506157, "grad_norm": 0.9205812811851501, "learning_rate": 1.3709083294269676e-07, "loss": 0.0761, "step": 8308 }, { "epoch": 2.6924821775761503, "grad_norm": 0.8764326572418213, "learning_rate": 1.368053330759872e-07, "loss": 0.0779, "step": 8309 }, { "epoch": 2.692806221646144, "grad_norm": 0.8411714434623718, "learning_rate": 1.3652012244142754e-07, "loss": 0.0706, "step": 8310 }, { "epoch": 2.693130265716137, "grad_norm": 0.8792476058006287, "learning_rate": 1.362352010739243e-07, "loss": 0.0788, "step": 8311 }, { "epoch": 2.693454309786131, "grad_norm": 0.9278955459594727, "learning_rate": 1.3595056900834986e-07, "loss": 0.0807, "step": 8312 }, { "epoch": 2.6937783538561244, "grad_norm": 0.9351165294647217, "learning_rate": 1.3566622627953968e-07, "loss": 0.0781, "step": 8313 }, { "epoch": 2.694102397926118, "grad_norm": 0.9058972597122192, "learning_rate": 1.3538217292229482e-07, "loss": 0.0781, "step": 8314 }, { "epoch": 2.6944264419961117, "grad_norm": 0.8710022568702698, "learning_rate": 1.3509840897138083e-07, "loss": 0.076, "step": 8315 }, { "epoch": 2.694750486066105, "grad_norm": 0.9338528513908386, "learning_rate": 1.3481493446152766e-07, "loss": 0.0745, "step": 8316 }, { "epoch": 2.6950745301360985, "grad_norm": 0.8894299864768982, "learning_rate": 1.3453174942743008e-07, "loss": 0.0784, "step": 8317 }, { "epoch": 2.695398574206092, "grad_norm": 0.8650580048561096, "learning_rate": 1.3424885390374593e-07, "loss": 0.0744, "step": 8318 }, { "epoch": 2.6957226182760854, "grad_norm": 1.0599915981292725, "learning_rate": 1.3396624792510082e-07, "loss": 0.0808, "step": 8319 }, { "epoch": 2.6960466623460793, "grad_norm": 0.9517320990562439, "learning_rate": 1.33683931526081e-07, "loss": 0.0806, "step": 8320 }, { "epoch": 2.6963707064160727, "grad_norm": 0.8994131684303284, "learning_rate": 1.3340190474124104e-07, "loss": 0.076, "step": 8321 }, { "epoch": 2.696694750486066, "grad_norm": 0.8776226043701172, "learning_rate": 1.3312016760509722e-07, "loss": 0.0708, "step": 8322 }, { "epoch": 2.6970187945560595, "grad_norm": 0.869220495223999, "learning_rate": 1.3283872015213168e-07, "loss": 0.0753, "step": 8323 }, { "epoch": 2.697342838626053, "grad_norm": 0.9218916893005371, "learning_rate": 1.3255756241679102e-07, "loss": 0.0783, "step": 8324 }, { "epoch": 2.697666882696047, "grad_norm": 0.9783483743667603, "learning_rate": 1.3227669443348578e-07, "loss": 0.082, "step": 8325 }, { "epoch": 2.6979909267660402, "grad_norm": 0.9013862609863281, "learning_rate": 1.3199611623659235e-07, "loss": 0.08, "step": 8326 }, { "epoch": 2.6983149708360337, "grad_norm": 0.9175854921340942, "learning_rate": 1.3171582786044968e-07, "loss": 0.0744, "step": 8327 }, { "epoch": 2.698639014906027, "grad_norm": 0.8921010494232178, "learning_rate": 1.3143582933936333e-07, "loss": 0.0722, "step": 8328 }, { "epoch": 2.6989630589760205, "grad_norm": 0.9263914227485657, "learning_rate": 1.3115612070760174e-07, "loss": 0.0738, "step": 8329 }, { "epoch": 2.6992871030460144, "grad_norm": 0.8688660860061646, "learning_rate": 1.3087670199939894e-07, "loss": 0.0731, "step": 8330 }, { "epoch": 2.699611147116008, "grad_norm": 0.9509294033050537, "learning_rate": 1.3059757324895283e-07, "loss": 0.0757, "step": 8331 }, { "epoch": 2.6999351911860012, "grad_norm": 0.8982728123664856, "learning_rate": 1.303187344904261e-07, "loss": 0.0762, "step": 8332 }, { "epoch": 2.7002592352559946, "grad_norm": 0.9074303507804871, "learning_rate": 1.3004018575794586e-07, "loss": 0.0788, "step": 8333 }, { "epoch": 2.700583279325988, "grad_norm": 0.9729928374290466, "learning_rate": 1.2976192708560432e-07, "loss": 0.0804, "step": 8334 }, { "epoch": 2.700907323395982, "grad_norm": 0.9070225358009338, "learning_rate": 1.2948395850745726e-07, "loss": 0.077, "step": 8335 }, { "epoch": 2.7012313674659754, "grad_norm": 0.8616864085197449, "learning_rate": 1.29206280057525e-07, "loss": 0.0711, "step": 8336 }, { "epoch": 2.701555411535969, "grad_norm": 0.8884169459342957, "learning_rate": 1.2892889176979284e-07, "loss": 0.075, "step": 8337 }, { "epoch": 2.7018794556059627, "grad_norm": 0.9448602795600891, "learning_rate": 1.2865179367821083e-07, "loss": 0.0793, "step": 8338 }, { "epoch": 2.702203499675956, "grad_norm": 0.8624501824378967, "learning_rate": 1.283749858166927e-07, "loss": 0.0771, "step": 8339 }, { "epoch": 2.7025275437459495, "grad_norm": 0.9130458831787109, "learning_rate": 1.280984682191172e-07, "loss": 0.0735, "step": 8340 }, { "epoch": 2.702851587815943, "grad_norm": 0.9405670762062073, "learning_rate": 1.2782224091932775e-07, "loss": 0.0797, "step": 8341 }, { "epoch": 2.7031756318859363, "grad_norm": 0.8795937299728394, "learning_rate": 1.2754630395113098e-07, "loss": 0.0748, "step": 8342 }, { "epoch": 2.70349967595593, "grad_norm": 0.9006626605987549, "learning_rate": 1.2727065734830013e-07, "loss": 0.0781, "step": 8343 }, { "epoch": 2.7038237200259236, "grad_norm": 0.9724311232566833, "learning_rate": 1.269953011445707e-07, "loss": 0.078, "step": 8344 }, { "epoch": 2.704147764095917, "grad_norm": 0.8542706370353699, "learning_rate": 1.267202353736438e-07, "loss": 0.0688, "step": 8345 }, { "epoch": 2.7044718081659105, "grad_norm": 0.8407845497131348, "learning_rate": 1.26445460069185e-07, "loss": 0.0743, "step": 8346 }, { "epoch": 2.704795852235904, "grad_norm": 0.8969268202781677, "learning_rate": 1.2617097526482407e-07, "loss": 0.0806, "step": 8347 }, { "epoch": 2.7051198963058978, "grad_norm": 0.8536497354507446, "learning_rate": 1.2589678099415582e-07, "loss": 0.0747, "step": 8348 }, { "epoch": 2.705443940375891, "grad_norm": 0.9546661376953125, "learning_rate": 1.256228772907378e-07, "loss": 0.0842, "step": 8349 }, { "epoch": 2.7057679844458846, "grad_norm": 0.9164236187934875, "learning_rate": 1.2534926418809433e-07, "loss": 0.0775, "step": 8350 }, { "epoch": 2.706092028515878, "grad_norm": 0.9462824463844299, "learning_rate": 1.2507594171971198e-07, "loss": 0.0798, "step": 8351 }, { "epoch": 2.7064160725858715, "grad_norm": 0.8498296141624451, "learning_rate": 1.2480290991904398e-07, "loss": 0.0735, "step": 8352 }, { "epoch": 2.7067401166558653, "grad_norm": 0.9814295172691345, "learning_rate": 1.245301688195058e-07, "loss": 0.0735, "step": 8353 }, { "epoch": 2.7070641607258588, "grad_norm": 0.9598404765129089, "learning_rate": 1.2425771845447853e-07, "loss": 0.0789, "step": 8354 }, { "epoch": 2.707388204795852, "grad_norm": 0.8988109827041626, "learning_rate": 1.2398555885730774e-07, "loss": 0.0806, "step": 8355 }, { "epoch": 2.7077122488658456, "grad_norm": 0.8295194506645203, "learning_rate": 1.2371369006130256e-07, "loss": 0.0776, "step": 8356 }, { "epoch": 2.708036292935839, "grad_norm": 0.9095755815505981, "learning_rate": 1.2344211209973811e-07, "loss": 0.0747, "step": 8357 }, { "epoch": 2.708360337005833, "grad_norm": 0.9850380420684814, "learning_rate": 1.2317082500585163e-07, "loss": 0.0831, "step": 8358 }, { "epoch": 2.7086843810758263, "grad_norm": 0.9201284050941467, "learning_rate": 1.2289982881284718e-07, "loss": 0.076, "step": 8359 }, { "epoch": 2.7090084251458197, "grad_norm": 0.8760098814964294, "learning_rate": 1.226291235538915e-07, "loss": 0.0787, "step": 8360 }, { "epoch": 2.7093324692158136, "grad_norm": 0.8688327074050903, "learning_rate": 1.223587092621162e-07, "loss": 0.0774, "step": 8361 }, { "epoch": 2.709656513285807, "grad_norm": 0.8596488237380981, "learning_rate": 1.2208858597061752e-07, "loss": 0.0744, "step": 8362 }, { "epoch": 2.7099805573558005, "grad_norm": 0.8696610331535339, "learning_rate": 1.21818753712456e-07, "loss": 0.0775, "step": 8363 }, { "epoch": 2.710304601425794, "grad_norm": 0.8679714798927307, "learning_rate": 1.2154921252065633e-07, "loss": 0.0711, "step": 8364 }, { "epoch": 2.7106286454957873, "grad_norm": 0.9504297375679016, "learning_rate": 1.2127996242820822e-07, "loss": 0.0822, "step": 8365 }, { "epoch": 2.710952689565781, "grad_norm": 0.8958672285079956, "learning_rate": 1.2101100346806478e-07, "loss": 0.0749, "step": 8366 }, { "epoch": 2.7112767336357746, "grad_norm": 0.8755009174346924, "learning_rate": 1.2074233567314408e-07, "loss": 0.0745, "step": 8367 }, { "epoch": 2.711600777705768, "grad_norm": 0.9226860404014587, "learning_rate": 1.2047395907632818e-07, "loss": 0.0808, "step": 8368 }, { "epoch": 2.7119248217757614, "grad_norm": 0.9615294933319092, "learning_rate": 1.2020587371046445e-07, "loss": 0.08, "step": 8369 }, { "epoch": 2.712248865845755, "grad_norm": 0.956706702709198, "learning_rate": 1.1993807960836322e-07, "loss": 0.0772, "step": 8370 }, { "epoch": 2.7125729099157487, "grad_norm": 0.896210253238678, "learning_rate": 1.1967057680280058e-07, "loss": 0.075, "step": 8371 }, { "epoch": 2.712896953985742, "grad_norm": 0.9272021055221558, "learning_rate": 1.1940336532651614e-07, "loss": 0.0735, "step": 8372 }, { "epoch": 2.7132209980557356, "grad_norm": 0.9543355703353882, "learning_rate": 1.1913644521221345e-07, "loss": 0.0809, "step": 8373 }, { "epoch": 2.713545042125729, "grad_norm": 0.8474852442741394, "learning_rate": 1.1886981649256169e-07, "loss": 0.0731, "step": 8374 }, { "epoch": 2.7138690861957224, "grad_norm": 0.9196550250053406, "learning_rate": 1.1860347920019304e-07, "loss": 0.0784, "step": 8375 }, { "epoch": 2.7141931302657163, "grad_norm": 0.879184365272522, "learning_rate": 1.1833743336770482e-07, "loss": 0.0735, "step": 8376 }, { "epoch": 2.7145171743357097, "grad_norm": 0.9110177755355835, "learning_rate": 1.1807167902765843e-07, "loss": 0.0745, "step": 8377 }, { "epoch": 2.714841218405703, "grad_norm": 0.8994605541229248, "learning_rate": 1.1780621621257953e-07, "loss": 0.0757, "step": 8378 }, { "epoch": 2.7151652624756966, "grad_norm": 0.8339632153511047, "learning_rate": 1.1754104495495882e-07, "loss": 0.0705, "step": 8379 }, { "epoch": 2.71548930654569, "grad_norm": 0.8231309652328491, "learning_rate": 1.1727616528724949e-07, "loss": 0.0689, "step": 8380 }, { "epoch": 2.715813350615684, "grad_norm": 0.9211947917938232, "learning_rate": 1.1701157724187173e-07, "loss": 0.0828, "step": 8381 }, { "epoch": 2.7161373946856773, "grad_norm": 0.9162659645080566, "learning_rate": 1.1674728085120713e-07, "loss": 0.0784, "step": 8382 }, { "epoch": 2.7164614387556707, "grad_norm": 0.9284588694572449, "learning_rate": 1.1648327614760452e-07, "loss": 0.0834, "step": 8383 }, { "epoch": 2.7167854828256646, "grad_norm": 0.8979219198226929, "learning_rate": 1.1621956316337391e-07, "loss": 0.0778, "step": 8384 }, { "epoch": 2.7171095268956575, "grad_norm": 0.7882916927337646, "learning_rate": 1.1595614193079224e-07, "loss": 0.067, "step": 8385 }, { "epoch": 2.7174335709656514, "grad_norm": 0.8570539355278015, "learning_rate": 1.1569301248209958e-07, "loss": 0.0701, "step": 8386 }, { "epoch": 2.717757615035645, "grad_norm": 0.9228929877281189, "learning_rate": 1.1543017484950015e-07, "loss": 0.0758, "step": 8387 }, { "epoch": 2.7180816591056383, "grad_norm": 0.8897868990898132, "learning_rate": 1.1516762906516322e-07, "loss": 0.0745, "step": 8388 }, { "epoch": 2.718405703175632, "grad_norm": 0.874020516872406, "learning_rate": 1.1490537516122141e-07, "loss": 0.078, "step": 8389 }, { "epoch": 2.7187297472456255, "grad_norm": 0.8467276096343994, "learning_rate": 1.1464341316977184e-07, "loss": 0.0707, "step": 8390 }, { "epoch": 2.719053791315619, "grad_norm": 0.9447891712188721, "learning_rate": 1.1438174312287664e-07, "loss": 0.0795, "step": 8391 }, { "epoch": 2.7193778353856124, "grad_norm": 0.9125964641571045, "learning_rate": 1.1412036505256158e-07, "loss": 0.0781, "step": 8392 }, { "epoch": 2.719701879455606, "grad_norm": 0.893666684627533, "learning_rate": 1.1385927899081661e-07, "loss": 0.0769, "step": 8393 }, { "epoch": 2.7200259235255997, "grad_norm": 0.9132059812545776, "learning_rate": 1.1359848496959618e-07, "loss": 0.0725, "step": 8394 }, { "epoch": 2.720349967595593, "grad_norm": 0.8375158905982971, "learning_rate": 1.1333798302081922e-07, "loss": 0.0701, "step": 8395 }, { "epoch": 2.7206740116655865, "grad_norm": 0.9415892362594604, "learning_rate": 1.1307777317636882e-07, "loss": 0.0827, "step": 8396 }, { "epoch": 2.72099805573558, "grad_norm": 0.8136382699012756, "learning_rate": 1.1281785546809115e-07, "loss": 0.0695, "step": 8397 }, { "epoch": 2.7213220998055734, "grad_norm": 0.8608342409133911, "learning_rate": 1.1255822992779858e-07, "loss": 0.071, "step": 8398 }, { "epoch": 2.7216461438755672, "grad_norm": 0.8565046787261963, "learning_rate": 1.1229889658726623e-07, "loss": 0.0757, "step": 8399 }, { "epoch": 2.7219701879455607, "grad_norm": 0.875421404838562, "learning_rate": 1.1203985547823427e-07, "loss": 0.0747, "step": 8400 }, { "epoch": 2.722294232015554, "grad_norm": 0.886735200881958, "learning_rate": 1.1178110663240676e-07, "loss": 0.0787, "step": 8401 }, { "epoch": 2.7226182760855475, "grad_norm": 0.937952995300293, "learning_rate": 1.1152265008145202e-07, "loss": 0.0784, "step": 8402 }, { "epoch": 2.722942320155541, "grad_norm": 0.9049589037895203, "learning_rate": 1.1126448585700306e-07, "loss": 0.0774, "step": 8403 }, { "epoch": 2.723266364225535, "grad_norm": 0.9026086330413818, "learning_rate": 1.110066139906557e-07, "loss": 0.0744, "step": 8404 }, { "epoch": 2.7235904082955282, "grad_norm": 0.9097543358802795, "learning_rate": 1.1074903451397195e-07, "loss": 0.0777, "step": 8405 }, { "epoch": 2.7239144523655217, "grad_norm": 0.8582219481468201, "learning_rate": 1.1049174745847657e-07, "loss": 0.0689, "step": 8406 }, { "epoch": 2.724238496435515, "grad_norm": 0.8198797106742859, "learning_rate": 1.1023475285565882e-07, "loss": 0.0707, "step": 8407 }, { "epoch": 2.7245625405055085, "grad_norm": 0.9629983305931091, "learning_rate": 1.099780507369727e-07, "loss": 0.0816, "step": 8408 }, { "epoch": 2.7248865845755024, "grad_norm": 0.9551554918289185, "learning_rate": 1.0972164113383616e-07, "loss": 0.0772, "step": 8409 }, { "epoch": 2.725210628645496, "grad_norm": 0.8462727665901184, "learning_rate": 1.09465524077631e-07, "loss": 0.0693, "step": 8410 }, { "epoch": 2.725534672715489, "grad_norm": 1.0670876502990723, "learning_rate": 1.0920969959970301e-07, "loss": 0.0793, "step": 8411 }, { "epoch": 2.725858716785483, "grad_norm": 0.9187403321266174, "learning_rate": 1.0895416773136408e-07, "loss": 0.0821, "step": 8412 }, { "epoch": 2.7261827608554765, "grad_norm": 0.939841091632843, "learning_rate": 1.0869892850388697e-07, "loss": 0.077, "step": 8413 }, { "epoch": 2.72650680492547, "grad_norm": 0.9433113932609558, "learning_rate": 1.0844398194851197e-07, "loss": 0.0807, "step": 8414 }, { "epoch": 2.7268308489954634, "grad_norm": 0.96625155210495, "learning_rate": 1.0818932809644161e-07, "loss": 0.0771, "step": 8415 }, { "epoch": 2.7271548930654568, "grad_norm": 1.2603490352630615, "learning_rate": 1.0793496697884265e-07, "loss": 0.0754, "step": 8416 }, { "epoch": 2.7274789371354506, "grad_norm": 0.8630130290985107, "learning_rate": 1.0768089862684684e-07, "loss": 0.0787, "step": 8417 }, { "epoch": 2.727802981205444, "grad_norm": 0.9122354984283447, "learning_rate": 1.0742712307154957e-07, "loss": 0.0814, "step": 8418 }, { "epoch": 2.7281270252754375, "grad_norm": 0.8623340129852295, "learning_rate": 1.0717364034401073e-07, "loss": 0.0741, "step": 8419 }, { "epoch": 2.728451069345431, "grad_norm": 0.9016427993774414, "learning_rate": 1.0692045047525384e-07, "loss": 0.075, "step": 8420 }, { "epoch": 2.7287751134154243, "grad_norm": 0.8372197151184082, "learning_rate": 1.066675534962669e-07, "loss": 0.0736, "step": 8421 }, { "epoch": 2.729099157485418, "grad_norm": 0.9791613817214966, "learning_rate": 1.0641494943800234e-07, "loss": 0.0818, "step": 8422 }, { "epoch": 2.7294232015554116, "grad_norm": 0.9533630609512329, "learning_rate": 1.0616263833137602e-07, "loss": 0.0758, "step": 8423 }, { "epoch": 2.729747245625405, "grad_norm": 0.924326479434967, "learning_rate": 1.0591062020726878e-07, "loss": 0.0781, "step": 8424 }, { "epoch": 2.7300712896953985, "grad_norm": 0.9117130041122437, "learning_rate": 1.0565889509652483e-07, "loss": 0.0769, "step": 8425 }, { "epoch": 2.730395333765392, "grad_norm": 0.7807494401931763, "learning_rate": 1.0540746302995341e-07, "loss": 0.0665, "step": 8426 }, { "epoch": 2.7307193778353858, "grad_norm": 0.8621374368667603, "learning_rate": 1.0515632403832715e-07, "loss": 0.073, "step": 8427 }, { "epoch": 2.731043421905379, "grad_norm": 1.0108366012573242, "learning_rate": 1.0490547815238228e-07, "loss": 0.081, "step": 8428 }, { "epoch": 2.7313674659753726, "grad_norm": 0.8846213817596436, "learning_rate": 1.0465492540282146e-07, "loss": 0.0788, "step": 8429 }, { "epoch": 2.731691510045366, "grad_norm": 0.8477645516395569, "learning_rate": 1.044046658203085e-07, "loss": 0.0723, "step": 8430 }, { "epoch": 2.7320155541153595, "grad_norm": 1.0279101133346558, "learning_rate": 1.0415469943547335e-07, "loss": 0.0787, "step": 8431 }, { "epoch": 2.7323395981853533, "grad_norm": 0.8763721585273743, "learning_rate": 1.0390502627890986e-07, "loss": 0.0769, "step": 8432 }, { "epoch": 2.7326636422553467, "grad_norm": 0.8510463833808899, "learning_rate": 1.0365564638117442e-07, "loss": 0.075, "step": 8433 }, { "epoch": 2.73298768632534, "grad_norm": 0.891975462436676, "learning_rate": 1.0340655977279012e-07, "loss": 0.0804, "step": 8434 }, { "epoch": 2.733311730395334, "grad_norm": 0.8500216007232666, "learning_rate": 1.0315776648424119e-07, "loss": 0.0746, "step": 8435 }, { "epoch": 2.733635774465327, "grad_norm": 0.8794539570808411, "learning_rate": 1.0290926654597938e-07, "loss": 0.0736, "step": 8436 }, { "epoch": 2.733959818535321, "grad_norm": 0.871335506439209, "learning_rate": 1.0266105998841702e-07, "loss": 0.072, "step": 8437 }, { "epoch": 2.7342838626053143, "grad_norm": 0.9088922739028931, "learning_rate": 1.0241314684193343e-07, "loss": 0.0765, "step": 8438 }, { "epoch": 2.7346079066753077, "grad_norm": 0.889601469039917, "learning_rate": 1.0216552713686989e-07, "loss": 0.0759, "step": 8439 }, { "epoch": 2.7349319507453016, "grad_norm": 0.8953149914741516, "learning_rate": 1.01918200903533e-07, "loss": 0.0782, "step": 8440 }, { "epoch": 2.735255994815295, "grad_norm": 0.9992877840995789, "learning_rate": 1.0167116817219325e-07, "loss": 0.0872, "step": 8441 }, { "epoch": 2.7355800388852884, "grad_norm": 0.90580815076828, "learning_rate": 1.0142442897308453e-07, "loss": 0.0779, "step": 8442 }, { "epoch": 2.735904082955282, "grad_norm": 1.0264875888824463, "learning_rate": 1.0117798333640627e-07, "loss": 0.082, "step": 8443 }, { "epoch": 2.7362281270252753, "grad_norm": 0.864154577255249, "learning_rate": 1.0093183129231993e-07, "loss": 0.0738, "step": 8444 }, { "epoch": 2.736552171095269, "grad_norm": 0.8238134384155273, "learning_rate": 1.0068597287095305e-07, "loss": 0.0756, "step": 8445 }, { "epoch": 2.7368762151652626, "grad_norm": 0.8123138546943665, "learning_rate": 1.0044040810239547e-07, "loss": 0.0729, "step": 8446 }, { "epoch": 2.737200259235256, "grad_norm": 0.8799177408218384, "learning_rate": 1.0019513701670285e-07, "loss": 0.0752, "step": 8447 }, { "epoch": 2.7375243033052494, "grad_norm": 0.9700337648391724, "learning_rate": 9.995015964389315e-08, "loss": 0.0769, "step": 8448 }, { "epoch": 2.737848347375243, "grad_norm": 0.9147710204124451, "learning_rate": 9.970547601394986e-08, "loss": 0.0766, "step": 8449 }, { "epoch": 2.7381723914452367, "grad_norm": 0.9626017212867737, "learning_rate": 9.94610861568196e-08, "loss": 0.0794, "step": 8450 }, { "epoch": 2.73849643551523, "grad_norm": 0.8789365291595459, "learning_rate": 9.92169901024137e-08, "loss": 0.0753, "step": 8451 }, { "epoch": 2.7388204795852236, "grad_norm": 0.8948490023612976, "learning_rate": 9.897318788060662e-08, "loss": 0.0762, "step": 8452 }, { "epoch": 2.739144523655217, "grad_norm": 0.867906928062439, "learning_rate": 9.872967952123752e-08, "loss": 0.0745, "step": 8453 }, { "epoch": 2.7394685677252104, "grad_norm": 0.8962223529815674, "learning_rate": 9.848646505410953e-08, "loss": 0.0736, "step": 8454 }, { "epoch": 2.7397926117952043, "grad_norm": 0.8685925602912903, "learning_rate": 9.824354450898966e-08, "loss": 0.0709, "step": 8455 }, { "epoch": 2.7401166558651977, "grad_norm": 0.955605685710907, "learning_rate": 9.800091791560939e-08, "loss": 0.0813, "step": 8456 }, { "epoch": 2.740440699935191, "grad_norm": 0.8607534170150757, "learning_rate": 9.775858530366334e-08, "loss": 0.0706, "step": 8457 }, { "epoch": 2.7407647440051845, "grad_norm": 1.0163851976394653, "learning_rate": 9.751654670281135e-08, "loss": 0.0786, "step": 8458 }, { "epoch": 2.741088788075178, "grad_norm": 0.9263463020324707, "learning_rate": 9.727480214267559e-08, "loss": 0.0811, "step": 8459 }, { "epoch": 2.741412832145172, "grad_norm": 0.9529585838317871, "learning_rate": 9.70333516528446e-08, "loss": 0.0847, "step": 8460 }, { "epoch": 2.7417368762151653, "grad_norm": 0.9520207643508911, "learning_rate": 9.679219526286837e-08, "loss": 0.0809, "step": 8461 }, { "epoch": 2.7420609202851587, "grad_norm": 0.8074455261230469, "learning_rate": 9.655133300226271e-08, "loss": 0.0715, "step": 8462 }, { "epoch": 2.7423849643551526, "grad_norm": 0.9240773916244507, "learning_rate": 9.631076490050684e-08, "loss": 0.0797, "step": 8463 }, { "epoch": 2.742709008425146, "grad_norm": 0.9265851378440857, "learning_rate": 9.60704909870433e-08, "loss": 0.0794, "step": 8464 }, { "epoch": 2.7430330524951394, "grad_norm": 0.865867018699646, "learning_rate": 9.583051129128051e-08, "loss": 0.0718, "step": 8465 }, { "epoch": 2.743357096565133, "grad_norm": 0.8869981169700623, "learning_rate": 9.559082584258833e-08, "loss": 0.0818, "step": 8466 }, { "epoch": 2.7436811406351262, "grad_norm": 0.9116880297660828, "learning_rate": 9.535143467030327e-08, "loss": 0.0793, "step": 8467 }, { "epoch": 2.74400518470512, "grad_norm": 0.8475185632705688, "learning_rate": 9.511233780372303e-08, "loss": 0.0705, "step": 8468 }, { "epoch": 2.7443292287751135, "grad_norm": 0.8898075819015503, "learning_rate": 9.487353527211223e-08, "loss": 0.0798, "step": 8469 }, { "epoch": 2.744653272845107, "grad_norm": 0.9375563263893127, "learning_rate": 9.463502710469697e-08, "loss": 0.0784, "step": 8470 }, { "epoch": 2.7449773169151004, "grad_norm": 0.8835859298706055, "learning_rate": 9.439681333066858e-08, "loss": 0.0717, "step": 8471 }, { "epoch": 2.745301360985094, "grad_norm": 0.8493410348892212, "learning_rate": 9.415889397918238e-08, "loss": 0.0708, "step": 8472 }, { "epoch": 2.7456254050550877, "grad_norm": 0.8702937960624695, "learning_rate": 9.3921269079357e-08, "loss": 0.075, "step": 8473 }, { "epoch": 2.745949449125081, "grad_norm": 0.8226486444473267, "learning_rate": 9.368393866027614e-08, "loss": 0.0681, "step": 8474 }, { "epoch": 2.7462734931950745, "grad_norm": 0.8785573244094849, "learning_rate": 9.344690275098573e-08, "loss": 0.0749, "step": 8475 }, { "epoch": 2.746597537265068, "grad_norm": 0.8903120160102844, "learning_rate": 9.321016138049727e-08, "loss": 0.0722, "step": 8476 }, { "epoch": 2.7469215813350614, "grad_norm": 0.9668460488319397, "learning_rate": 9.297371457778565e-08, "loss": 0.0811, "step": 8477 }, { "epoch": 2.7472456254050552, "grad_norm": 0.9164747595787048, "learning_rate": 9.273756237178938e-08, "loss": 0.0802, "step": 8478 }, { "epoch": 2.7475696694750487, "grad_norm": 0.9257664084434509, "learning_rate": 9.250170479141146e-08, "loss": 0.0764, "step": 8479 }, { "epoch": 2.747893713545042, "grad_norm": 0.8608869314193726, "learning_rate": 9.226614186551852e-08, "loss": 0.0742, "step": 8480 }, { "epoch": 2.7482177576150355, "grad_norm": 0.8738452196121216, "learning_rate": 9.20308736229411e-08, "loss": 0.0737, "step": 8481 }, { "epoch": 2.748541801685029, "grad_norm": 0.9163529276847839, "learning_rate": 9.179590009247397e-08, "loss": 0.0807, "step": 8482 }, { "epoch": 2.748865845755023, "grad_norm": 0.9178310632705688, "learning_rate": 9.15612213028752e-08, "loss": 0.074, "step": 8483 }, { "epoch": 2.749189889825016, "grad_norm": 0.8706983923912048, "learning_rate": 9.132683728286767e-08, "loss": 0.0756, "step": 8484 }, { "epoch": 2.7495139338950096, "grad_norm": 0.9542733430862427, "learning_rate": 9.109274806113732e-08, "loss": 0.0796, "step": 8485 }, { "epoch": 2.7498379779650035, "grad_norm": 0.8895235061645508, "learning_rate": 9.085895366633457e-08, "loss": 0.0785, "step": 8486 }, { "epoch": 2.7501620220349965, "grad_norm": 0.9087440967559814, "learning_rate": 9.062545412707375e-08, "loss": 0.0707, "step": 8487 }, { "epoch": 2.7504860661049904, "grad_norm": 0.998375415802002, "learning_rate": 9.039224947193254e-08, "loss": 0.0817, "step": 8488 }, { "epoch": 2.750810110174984, "grad_norm": 0.8863471746444702, "learning_rate": 9.01593397294534e-08, "loss": 0.0761, "step": 8489 }, { "epoch": 2.751134154244977, "grad_norm": 0.9317454099655151, "learning_rate": 8.992672492814158e-08, "loss": 0.0776, "step": 8490 }, { "epoch": 2.751458198314971, "grad_norm": 0.900424063205719, "learning_rate": 8.969440509646821e-08, "loss": 0.0793, "step": 8491 }, { "epoch": 2.7517822423849645, "grad_norm": 0.8822380900382996, "learning_rate": 8.946238026286552e-08, "loss": 0.0736, "step": 8492 }, { "epoch": 2.752106286454958, "grad_norm": 0.8611971139907837, "learning_rate": 8.923065045573165e-08, "loss": 0.076, "step": 8493 }, { "epoch": 2.7524303305249513, "grad_norm": 0.8159151077270508, "learning_rate": 8.899921570342807e-08, "loss": 0.0683, "step": 8494 }, { "epoch": 2.7527543745949448, "grad_norm": 0.7950197458267212, "learning_rate": 8.876807603428017e-08, "loss": 0.068, "step": 8495 }, { "epoch": 2.7530784186649386, "grad_norm": 0.9587838649749756, "learning_rate": 8.853723147657755e-08, "loss": 0.0804, "step": 8496 }, { "epoch": 2.753402462734932, "grad_norm": 0.9382262825965881, "learning_rate": 8.830668205857263e-08, "loss": 0.0795, "step": 8497 }, { "epoch": 2.7537265068049255, "grad_norm": 0.9049416184425354, "learning_rate": 8.807642780848335e-08, "loss": 0.0774, "step": 8498 }, { "epoch": 2.754050550874919, "grad_norm": 0.8489470481872559, "learning_rate": 8.784646875448971e-08, "loss": 0.0745, "step": 8499 }, { "epoch": 2.7543745949449123, "grad_norm": 0.8656203150749207, "learning_rate": 8.761680492473668e-08, "loss": 0.0756, "step": 8500 }, { "epoch": 2.754698639014906, "grad_norm": 0.824772298336029, "learning_rate": 8.738743634733316e-08, "loss": 0.0738, "step": 8501 }, { "epoch": 2.7550226830848996, "grad_norm": 0.8814231753349304, "learning_rate": 8.715836305035169e-08, "loss": 0.0725, "step": 8502 }, { "epoch": 2.755346727154893, "grad_norm": 0.85878586769104, "learning_rate": 8.692958506182847e-08, "loss": 0.0776, "step": 8503 }, { "epoch": 2.7556707712248865, "grad_norm": 0.8849053978919983, "learning_rate": 8.67011024097636e-08, "loss": 0.0712, "step": 8504 }, { "epoch": 2.75599481529488, "grad_norm": 0.9141733050346375, "learning_rate": 8.647291512212136e-08, "loss": 0.0816, "step": 8505 }, { "epoch": 2.7563188593648738, "grad_norm": 0.8470417261123657, "learning_rate": 8.624502322682942e-08, "loss": 0.0681, "step": 8506 }, { "epoch": 2.756642903434867, "grad_norm": 0.856762707233429, "learning_rate": 8.601742675177993e-08, "loss": 0.0709, "step": 8507 }, { "epoch": 2.7569669475048606, "grad_norm": 0.8500531315803528, "learning_rate": 8.57901257248278e-08, "loss": 0.0693, "step": 8508 }, { "epoch": 2.757290991574854, "grad_norm": 0.8620449304580688, "learning_rate": 8.556312017379332e-08, "loss": 0.0778, "step": 8509 }, { "epoch": 2.7576150356448474, "grad_norm": 0.9037514925003052, "learning_rate": 8.533641012645921e-08, "loss": 0.0777, "step": 8510 }, { "epoch": 2.7579390797148413, "grad_norm": 0.871510922908783, "learning_rate": 8.510999561057276e-08, "loss": 0.0746, "step": 8511 }, { "epoch": 2.7582631237848347, "grad_norm": 0.8849137425422668, "learning_rate": 8.488387665384457e-08, "loss": 0.0758, "step": 8512 }, { "epoch": 2.758587167854828, "grad_norm": 1.0339986085891724, "learning_rate": 8.465805328395055e-08, "loss": 0.0714, "step": 8513 }, { "epoch": 2.758911211924822, "grad_norm": 0.8823992609977722, "learning_rate": 8.443252552852776e-08, "loss": 0.0776, "step": 8514 }, { "epoch": 2.7592352559948155, "grad_norm": 0.9128614068031311, "learning_rate": 8.42072934151797e-08, "loss": 0.0755, "step": 8515 }, { "epoch": 2.759559300064809, "grad_norm": 0.8525954484939575, "learning_rate": 8.398235697147205e-08, "loss": 0.068, "step": 8516 }, { "epoch": 2.7598833441348023, "grad_norm": 0.9029262661933899, "learning_rate": 8.375771622493506e-08, "loss": 0.0798, "step": 8517 }, { "epoch": 2.7602073882047957, "grad_norm": 0.9318966269493103, "learning_rate": 8.353337120306282e-08, "loss": 0.0786, "step": 8518 }, { "epoch": 2.7605314322747896, "grad_norm": 0.9503961205482483, "learning_rate": 8.330932193331226e-08, "loss": 0.0827, "step": 8519 }, { "epoch": 2.760855476344783, "grad_norm": 0.8519180417060852, "learning_rate": 8.308556844310589e-08, "loss": 0.0688, "step": 8520 }, { "epoch": 2.7611795204147764, "grad_norm": 0.8932039141654968, "learning_rate": 8.286211075982764e-08, "loss": 0.078, "step": 8521 }, { "epoch": 2.76150356448477, "grad_norm": 0.8670885562896729, "learning_rate": 8.263894891082813e-08, "loss": 0.0763, "step": 8522 }, { "epoch": 2.7618276085547633, "grad_norm": 0.8525798320770264, "learning_rate": 8.241608292341913e-08, "loss": 0.074, "step": 8523 }, { "epoch": 2.762151652624757, "grad_norm": 0.8913243412971497, "learning_rate": 8.219351282487742e-08, "loss": 0.0743, "step": 8524 }, { "epoch": 2.7624756966947506, "grad_norm": 0.7880865931510925, "learning_rate": 8.197123864244344e-08, "loss": 0.0682, "step": 8525 }, { "epoch": 2.762799740764744, "grad_norm": 0.8847666382789612, "learning_rate": 8.174926040332182e-08, "loss": 0.0758, "step": 8526 }, { "epoch": 2.7631237848347374, "grad_norm": 0.8482584953308105, "learning_rate": 8.152757813468027e-08, "loss": 0.0698, "step": 8527 }, { "epoch": 2.763447828904731, "grad_norm": 0.8862192630767822, "learning_rate": 8.130619186365012e-08, "loss": 0.0785, "step": 8528 }, { "epoch": 2.7637718729747247, "grad_norm": 0.9533929228782654, "learning_rate": 8.10851016173278e-08, "loss": 0.0805, "step": 8529 }, { "epoch": 2.764095917044718, "grad_norm": 0.9363769292831421, "learning_rate": 8.086430742277191e-08, "loss": 0.0752, "step": 8530 }, { "epoch": 2.7644199611147116, "grad_norm": 0.8216090798377991, "learning_rate": 8.064380930700556e-08, "loss": 0.0719, "step": 8531 }, { "epoch": 2.764744005184705, "grad_norm": 0.836811363697052, "learning_rate": 8.042360729701604e-08, "loss": 0.078, "step": 8532 }, { "epoch": 2.7650680492546984, "grad_norm": 0.8772767186164856, "learning_rate": 8.020370141975347e-08, "loss": 0.0774, "step": 8533 }, { "epoch": 2.7653920933246923, "grad_norm": 0.9011791944503784, "learning_rate": 7.998409170213245e-08, "loss": 0.0757, "step": 8534 }, { "epoch": 2.7657161373946857, "grad_norm": 0.9040393233299255, "learning_rate": 7.976477817103117e-08, "loss": 0.0782, "step": 8535 }, { "epoch": 2.766040181464679, "grad_norm": 0.877589762210846, "learning_rate": 7.954576085329152e-08, "loss": 0.0731, "step": 8536 }, { "epoch": 2.766364225534673, "grad_norm": 0.8785209655761719, "learning_rate": 7.93270397757187e-08, "loss": 0.0741, "step": 8537 }, { "epoch": 2.766688269604666, "grad_norm": 0.9807142019271851, "learning_rate": 7.910861496508216e-08, "loss": 0.0821, "step": 8538 }, { "epoch": 2.76701231367466, "grad_norm": 0.8884080648422241, "learning_rate": 7.88904864481152e-08, "loss": 0.0752, "step": 8539 }, { "epoch": 2.7673363577446533, "grad_norm": 0.9433414340019226, "learning_rate": 7.867265425151454e-08, "loss": 0.0815, "step": 8540 }, { "epoch": 2.7676604018146467, "grad_norm": 0.9098288416862488, "learning_rate": 7.845511840194081e-08, "loss": 0.0772, "step": 8541 }, { "epoch": 2.7679844458846405, "grad_norm": 0.976300835609436, "learning_rate": 7.823787892601825e-08, "loss": 0.0837, "step": 8542 }, { "epoch": 2.768308489954634, "grad_norm": 0.9695088863372803, "learning_rate": 7.802093585033449e-08, "loss": 0.0816, "step": 8543 }, { "epoch": 2.7686325340246274, "grad_norm": 0.8716114163398743, "learning_rate": 7.780428920144217e-08, "loss": 0.0727, "step": 8544 }, { "epoch": 2.768956578094621, "grad_norm": 0.8254791498184204, "learning_rate": 7.758793900585565e-08, "loss": 0.0727, "step": 8545 }, { "epoch": 2.7692806221646142, "grad_norm": 0.88580721616745, "learning_rate": 7.737188529005484e-08, "loss": 0.0765, "step": 8546 }, { "epoch": 2.769604666234608, "grad_norm": 0.963691771030426, "learning_rate": 7.715612808048251e-08, "loss": 0.0782, "step": 8547 }, { "epoch": 2.7699287103046015, "grad_norm": 0.894769549369812, "learning_rate": 7.6940667403545e-08, "loss": 0.0757, "step": 8548 }, { "epoch": 2.770252754374595, "grad_norm": 0.8622497916221619, "learning_rate": 7.672550328561318e-08, "loss": 0.0733, "step": 8549 }, { "epoch": 2.7705767984445884, "grad_norm": 0.9186211824417114, "learning_rate": 7.651063575301986e-08, "loss": 0.0802, "step": 8550 }, { "epoch": 2.770900842514582, "grad_norm": 0.8643956780433655, "learning_rate": 7.62960648320643e-08, "loss": 0.0755, "step": 8551 }, { "epoch": 2.7712248865845757, "grad_norm": 1.073041319847107, "learning_rate": 7.608179054900634e-08, "loss": 0.088, "step": 8552 }, { "epoch": 2.771548930654569, "grad_norm": 0.878182590007782, "learning_rate": 7.586781293007273e-08, "loss": 0.0751, "step": 8553 }, { "epoch": 2.7718729747245625, "grad_norm": 0.9077216982841492, "learning_rate": 7.565413200145089e-08, "loss": 0.078, "step": 8554 }, { "epoch": 2.772197018794556, "grad_norm": 0.9496373534202576, "learning_rate": 7.544074778929378e-08, "loss": 0.0785, "step": 8555 }, { "epoch": 2.7725210628645494, "grad_norm": 1.0797755718231201, "learning_rate": 7.522766031971774e-08, "loss": 0.0777, "step": 8556 }, { "epoch": 2.7728451069345432, "grad_norm": 0.903744101524353, "learning_rate": 7.501486961880245e-08, "loss": 0.0752, "step": 8557 }, { "epoch": 2.7731691510045366, "grad_norm": 0.8617210388183594, "learning_rate": 7.480237571259153e-08, "loss": 0.0697, "step": 8558 }, { "epoch": 2.77349319507453, "grad_norm": 0.9265851378440857, "learning_rate": 7.459017862709194e-08, "loss": 0.0775, "step": 8559 }, { "epoch": 2.7738172391445235, "grad_norm": 0.8909482359886169, "learning_rate": 7.437827838827488e-08, "loss": 0.0731, "step": 8560 }, { "epoch": 2.774141283214517, "grad_norm": 0.866847038269043, "learning_rate": 7.416667502207458e-08, "loss": 0.0715, "step": 8561 }, { "epoch": 2.774465327284511, "grad_norm": 0.8507097959518433, "learning_rate": 7.395536855438923e-08, "loss": 0.0721, "step": 8562 }, { "epoch": 2.774789371354504, "grad_norm": 0.8779752850532532, "learning_rate": 7.37443590110809e-08, "loss": 0.0779, "step": 8563 }, { "epoch": 2.7751134154244976, "grad_norm": 0.8747959733009338, "learning_rate": 7.353364641797533e-08, "loss": 0.0752, "step": 8564 }, { "epoch": 2.7754374594944915, "grad_norm": 0.9139300584793091, "learning_rate": 7.332323080086106e-08, "loss": 0.0761, "step": 8565 }, { "epoch": 2.775761503564485, "grad_norm": 0.9525585770606995, "learning_rate": 7.311311218549166e-08, "loss": 0.0815, "step": 8566 }, { "epoch": 2.7760855476344783, "grad_norm": 0.9190864562988281, "learning_rate": 7.290329059758294e-08, "loss": 0.0834, "step": 8567 }, { "epoch": 2.7764095917044718, "grad_norm": 0.8622193336486816, "learning_rate": 7.269376606281547e-08, "loss": 0.0754, "step": 8568 }, { "epoch": 2.776733635774465, "grad_norm": 0.9313884973526001, "learning_rate": 7.248453860683291e-08, "loss": 0.0809, "step": 8569 }, { "epoch": 2.777057679844459, "grad_norm": 0.941539466381073, "learning_rate": 7.227560825524255e-08, "loss": 0.0815, "step": 8570 }, { "epoch": 2.7773817239144525, "grad_norm": 0.8436852097511292, "learning_rate": 7.20669750336156e-08, "loss": 0.0732, "step": 8571 }, { "epoch": 2.777705767984446, "grad_norm": 0.9402943253517151, "learning_rate": 7.185863896748662e-08, "loss": 0.0792, "step": 8572 }, { "epoch": 2.7780298120544393, "grad_norm": 0.917573094367981, "learning_rate": 7.165060008235414e-08, "loss": 0.0725, "step": 8573 }, { "epoch": 2.7783538561244328, "grad_norm": 0.9383273124694824, "learning_rate": 7.14428584036797e-08, "loss": 0.0735, "step": 8574 }, { "epoch": 2.7786779001944266, "grad_norm": 0.8470904231071472, "learning_rate": 7.123541395688966e-08, "loss": 0.0689, "step": 8575 }, { "epoch": 2.77900194426442, "grad_norm": 0.8494551181793213, "learning_rate": 7.102826676737202e-08, "loss": 0.0759, "step": 8576 }, { "epoch": 2.7793259883344135, "grad_norm": 0.9772122502326965, "learning_rate": 7.082141686048066e-08, "loss": 0.0809, "step": 8577 }, { "epoch": 2.779650032404407, "grad_norm": 0.9359027743339539, "learning_rate": 7.061486426153146e-08, "loss": 0.0775, "step": 8578 }, { "epoch": 2.7799740764744003, "grad_norm": 0.8508344292640686, "learning_rate": 7.040860899580475e-08, "loss": 0.073, "step": 8579 }, { "epoch": 2.780298120544394, "grad_norm": 0.9112252593040466, "learning_rate": 7.020265108854423e-08, "loss": 0.078, "step": 8580 }, { "epoch": 2.7806221646143876, "grad_norm": 0.8710126280784607, "learning_rate": 6.99969905649564e-08, "loss": 0.0665, "step": 8581 }, { "epoch": 2.780946208684381, "grad_norm": 0.8951093554496765, "learning_rate": 6.979162745021306e-08, "loss": 0.0749, "step": 8582 }, { "epoch": 2.7812702527543745, "grad_norm": 0.8741493821144104, "learning_rate": 6.958656176944801e-08, "loss": 0.0747, "step": 8583 }, { "epoch": 2.781594296824368, "grad_norm": 0.9016504883766174, "learning_rate": 6.938179354776003e-08, "loss": 0.0765, "step": 8584 }, { "epoch": 2.7819183408943617, "grad_norm": 0.8614570498466492, "learning_rate": 6.917732281020995e-08, "loss": 0.0734, "step": 8585 }, { "epoch": 2.782242384964355, "grad_norm": 0.8869245052337646, "learning_rate": 6.897314958182327e-08, "loss": 0.076, "step": 8586 }, { "epoch": 2.7825664290343486, "grad_norm": 0.877219557762146, "learning_rate": 6.87692738875892e-08, "loss": 0.0757, "step": 8587 }, { "epoch": 2.7828904731043425, "grad_norm": 0.8669131994247437, "learning_rate": 6.856569575245969e-08, "loss": 0.077, "step": 8588 }, { "epoch": 2.7832145171743354, "grad_norm": 0.9033114910125732, "learning_rate": 6.836241520135123e-08, "loss": 0.0776, "step": 8589 }, { "epoch": 2.7835385612443293, "grad_norm": 1.0447516441345215, "learning_rate": 6.815943225914278e-08, "loss": 0.0828, "step": 8590 }, { "epoch": 2.7838626053143227, "grad_norm": 0.8430342078208923, "learning_rate": 6.795674695067783e-08, "loss": 0.0679, "step": 8591 }, { "epoch": 2.784186649384316, "grad_norm": 0.9234492182731628, "learning_rate": 6.77543593007629e-08, "loss": 0.0769, "step": 8592 }, { "epoch": 2.78451069345431, "grad_norm": 0.9635306596755981, "learning_rate": 6.755226933416876e-08, "loss": 0.0751, "step": 8593 }, { "epoch": 2.7848347375243034, "grad_norm": 0.9842798709869385, "learning_rate": 6.735047707562863e-08, "loss": 0.0839, "step": 8594 }, { "epoch": 2.785158781594297, "grad_norm": 0.8382657766342163, "learning_rate": 6.714898254984031e-08, "loss": 0.0679, "step": 8595 }, { "epoch": 2.7854828256642903, "grad_norm": 0.9571649432182312, "learning_rate": 6.69477857814646e-08, "loss": 0.0812, "step": 8596 }, { "epoch": 2.7858068697342837, "grad_norm": 0.8997218608856201, "learning_rate": 6.674688679512654e-08, "loss": 0.0781, "step": 8597 }, { "epoch": 2.7861309138042776, "grad_norm": 0.9701622128486633, "learning_rate": 6.654628561541337e-08, "loss": 0.0777, "step": 8598 }, { "epoch": 2.786454957874271, "grad_norm": 1.0179133415222168, "learning_rate": 6.634598226687772e-08, "loss": 0.08, "step": 8599 }, { "epoch": 2.7867790019442644, "grad_norm": 0.8601292371749878, "learning_rate": 6.614597677403384e-08, "loss": 0.0739, "step": 8600 }, { "epoch": 2.787103046014258, "grad_norm": 0.8769491910934448, "learning_rate": 6.594626916136077e-08, "loss": 0.076, "step": 8601 }, { "epoch": 2.7874270900842513, "grad_norm": 0.9738726019859314, "learning_rate": 6.574685945330145e-08, "loss": 0.0784, "step": 8602 }, { "epoch": 2.787751134154245, "grad_norm": 0.8933708667755127, "learning_rate": 6.554774767426026e-08, "loss": 0.0796, "step": 8603 }, { "epoch": 2.7880751782242386, "grad_norm": 0.9039463400840759, "learning_rate": 6.534893384860824e-08, "loss": 0.0783, "step": 8604 }, { "epoch": 2.788399222294232, "grad_norm": 0.8605583906173706, "learning_rate": 6.515041800067678e-08, "loss": 0.0728, "step": 8605 }, { "epoch": 2.7887232663642254, "grad_norm": 0.9187542200088501, "learning_rate": 6.495220015476366e-08, "loss": 0.0771, "step": 8606 }, { "epoch": 2.789047310434219, "grad_norm": 0.8536416888237, "learning_rate": 6.475428033512754e-08, "loss": 0.0666, "step": 8607 }, { "epoch": 2.7893713545042127, "grad_norm": 0.9643122553825378, "learning_rate": 6.455665856599291e-08, "loss": 0.0805, "step": 8608 }, { "epoch": 2.789695398574206, "grad_norm": 0.945328950881958, "learning_rate": 6.435933487154627e-08, "loss": 0.078, "step": 8609 }, { "epoch": 2.7900194426441995, "grad_norm": 0.8264806866645813, "learning_rate": 6.416230927593803e-08, "loss": 0.0706, "step": 8610 }, { "epoch": 2.790343486714193, "grad_norm": 0.9553369879722595, "learning_rate": 6.39655818032825e-08, "loss": 0.0743, "step": 8611 }, { "epoch": 2.7906675307841864, "grad_norm": 0.8676332235336304, "learning_rate": 6.376915247765735e-08, "loss": 0.0691, "step": 8612 }, { "epoch": 2.7909915748541803, "grad_norm": 0.8517956733703613, "learning_rate": 6.357302132310338e-08, "loss": 0.0711, "step": 8613 }, { "epoch": 2.7913156189241737, "grad_norm": 0.896984338760376, "learning_rate": 6.337718836362473e-08, "loss": 0.0777, "step": 8614 }, { "epoch": 2.791639662994167, "grad_norm": 0.9082828760147095, "learning_rate": 6.318165362319023e-08, "loss": 0.0751, "step": 8615 }, { "epoch": 2.791963707064161, "grad_norm": 0.840907871723175, "learning_rate": 6.298641712573105e-08, "loss": 0.0696, "step": 8616 }, { "epoch": 2.7922877511341544, "grad_norm": 0.8931449055671692, "learning_rate": 6.279147889514226e-08, "loss": 0.0739, "step": 8617 }, { "epoch": 2.792611795204148, "grad_norm": 0.9458181858062744, "learning_rate": 6.259683895528251e-08, "loss": 0.0794, "step": 8618 }, { "epoch": 2.7929358392741412, "grad_norm": 0.9642060399055481, "learning_rate": 6.24024973299736e-08, "loss": 0.084, "step": 8619 }, { "epoch": 2.7932598833441347, "grad_norm": 0.84830641746521, "learning_rate": 6.220845404300124e-08, "loss": 0.0746, "step": 8620 }, { "epoch": 2.7935839274141285, "grad_norm": 0.8820691108703613, "learning_rate": 6.201470911811474e-08, "loss": 0.0767, "step": 8621 }, { "epoch": 2.793907971484122, "grad_norm": 0.9251769185066223, "learning_rate": 6.182126257902626e-08, "loss": 0.0783, "step": 8622 }, { "epoch": 2.7942320155541154, "grad_norm": 0.8165448307991028, "learning_rate": 6.162811444941159e-08, "loss": 0.0677, "step": 8623 }, { "epoch": 2.794556059624109, "grad_norm": 0.9533488750457764, "learning_rate": 6.143526475291067e-08, "loss": 0.0765, "step": 8624 }, { "epoch": 2.7948801036941022, "grad_norm": 0.9226420521736145, "learning_rate": 6.124271351312605e-08, "loss": 0.0778, "step": 8625 }, { "epoch": 2.795204147764096, "grad_norm": 0.8921147584915161, "learning_rate": 6.105046075362441e-08, "loss": 0.0766, "step": 8626 }, { "epoch": 2.7955281918340895, "grad_norm": 0.9103520512580872, "learning_rate": 6.085850649793529e-08, "loss": 0.0762, "step": 8627 }, { "epoch": 2.795852235904083, "grad_norm": 0.9070742726325989, "learning_rate": 6.066685076955264e-08, "loss": 0.0747, "step": 8628 }, { "epoch": 2.7961762799740764, "grad_norm": 0.8707572221755981, "learning_rate": 6.047549359193245e-08, "loss": 0.0719, "step": 8629 }, { "epoch": 2.79650032404407, "grad_norm": 0.8317505717277527, "learning_rate": 6.028443498849596e-08, "loss": 0.0694, "step": 8630 }, { "epoch": 2.7968243681140637, "grad_norm": 0.8567790389060974, "learning_rate": 6.009367498262587e-08, "loss": 0.0693, "step": 8631 }, { "epoch": 2.797148412184057, "grad_norm": 0.926906406879425, "learning_rate": 5.990321359767015e-08, "loss": 0.0761, "step": 8632 }, { "epoch": 2.7974724562540505, "grad_norm": 0.8667235970497131, "learning_rate": 5.97130508569388e-08, "loss": 0.0714, "step": 8633 }, { "epoch": 2.797796500324044, "grad_norm": 0.9117993116378784, "learning_rate": 5.95231867837065e-08, "loss": 0.0782, "step": 8634 }, { "epoch": 2.7981205443940373, "grad_norm": 0.8850106596946716, "learning_rate": 5.933362140121052e-08, "loss": 0.0771, "step": 8635 }, { "epoch": 2.798444588464031, "grad_norm": 0.878083348274231, "learning_rate": 5.9144354732651455e-08, "loss": 0.0781, "step": 8636 }, { "epoch": 2.7987686325340246, "grad_norm": 0.8513649106025696, "learning_rate": 5.8955386801194394e-08, "loss": 0.071, "step": 8637 }, { "epoch": 2.799092676604018, "grad_norm": 0.8753370642662048, "learning_rate": 5.8766717629966387e-08, "loss": 0.0764, "step": 8638 }, { "epoch": 2.799416720674012, "grad_norm": 0.8825068473815918, "learning_rate": 5.857834724205979e-08, "loss": 0.0749, "step": 8639 }, { "epoch": 2.7997407647440054, "grad_norm": 0.8680891394615173, "learning_rate": 5.839027566052841e-08, "loss": 0.0749, "step": 8640 }, { "epoch": 2.8000648088139988, "grad_norm": 0.9579867124557495, "learning_rate": 5.820250290839047e-08, "loss": 0.0761, "step": 8641 }, { "epoch": 2.800388852883992, "grad_norm": 0.9145108461380005, "learning_rate": 5.801502900862788e-08, "loss": 0.0731, "step": 8642 }, { "epoch": 2.8007128969539856, "grad_norm": 0.926467776298523, "learning_rate": 5.782785398418561e-08, "loss": 0.08, "step": 8643 }, { "epoch": 2.8010369410239795, "grad_norm": 0.9767326712608337, "learning_rate": 5.7640977857972016e-08, "loss": 0.0781, "step": 8644 }, { "epoch": 2.801360985093973, "grad_norm": 0.9075636267662048, "learning_rate": 5.745440065285879e-08, "loss": 0.0828, "step": 8645 }, { "epoch": 2.8016850291639663, "grad_norm": 0.8939979076385498, "learning_rate": 5.726812239168128e-08, "loss": 0.078, "step": 8646 }, { "epoch": 2.8020090732339598, "grad_norm": 0.8266425728797913, "learning_rate": 5.708214309723792e-08, "loss": 0.0732, "step": 8647 }, { "epoch": 2.802333117303953, "grad_norm": 0.7988861799240112, "learning_rate": 5.689646279229105e-08, "loss": 0.0642, "step": 8648 }, { "epoch": 2.802657161373947, "grad_norm": 0.9102234840393066, "learning_rate": 5.671108149956611e-08, "loss": 0.0711, "step": 8649 }, { "epoch": 2.8029812054439405, "grad_norm": 0.983685314655304, "learning_rate": 5.6525999241751894e-08, "loss": 0.0815, "step": 8650 }, { "epoch": 2.803305249513934, "grad_norm": 0.8759168386459351, "learning_rate": 5.6341216041500555e-08, "loss": 0.071, "step": 8651 }, { "epoch": 2.8036292935839273, "grad_norm": 0.8909490704536438, "learning_rate": 5.6156731921428455e-08, "loss": 0.078, "step": 8652 }, { "epoch": 2.8039533376539207, "grad_norm": 0.9161653518676758, "learning_rate": 5.597254690411363e-08, "loss": 0.0755, "step": 8653 }, { "epoch": 2.8042773817239146, "grad_norm": 0.8215521574020386, "learning_rate": 5.5788661012099176e-08, "loss": 0.073, "step": 8654 }, { "epoch": 2.804601425793908, "grad_norm": 0.8546934723854065, "learning_rate": 5.560507426789069e-08, "loss": 0.0679, "step": 8655 }, { "epoch": 2.8049254698639015, "grad_norm": 0.9193974733352661, "learning_rate": 5.5421786693957705e-08, "loss": 0.0754, "step": 8656 }, { "epoch": 2.805249513933895, "grad_norm": 0.9443433880805969, "learning_rate": 5.523879831273282e-08, "loss": 0.0774, "step": 8657 }, { "epoch": 2.8055735580038883, "grad_norm": 1.0062669515609741, "learning_rate": 5.505610914661147e-08, "loss": 0.0806, "step": 8658 }, { "epoch": 2.805897602073882, "grad_norm": 0.8628509044647217, "learning_rate": 5.487371921795381e-08, "loss": 0.0766, "step": 8659 }, { "epoch": 2.8062216461438756, "grad_norm": 0.8530628085136414, "learning_rate": 5.4691628549082e-08, "loss": 0.0747, "step": 8660 }, { "epoch": 2.806545690213869, "grad_norm": 0.8226529955863953, "learning_rate": 5.450983716228292e-08, "loss": 0.0721, "step": 8661 }, { "epoch": 2.806869734283863, "grad_norm": 0.9194059371948242, "learning_rate": 5.4328345079805164e-08, "loss": 0.0806, "step": 8662 }, { "epoch": 2.807193778353856, "grad_norm": 0.9187507629394531, "learning_rate": 5.4147152323862085e-08, "loss": 0.0722, "step": 8663 }, { "epoch": 2.8075178224238497, "grad_norm": 0.9742519855499268, "learning_rate": 5.3966258916629824e-08, "loss": 0.08, "step": 8664 }, { "epoch": 2.807841866493843, "grad_norm": 0.8697577714920044, "learning_rate": 5.378566488024817e-08, "loss": 0.0716, "step": 8665 }, { "epoch": 2.8081659105638366, "grad_norm": 0.9710431694984436, "learning_rate": 5.3605370236820276e-08, "loss": 0.0849, "step": 8666 }, { "epoch": 2.8084899546338304, "grad_norm": 0.8214179873466492, "learning_rate": 5.3425375008411276e-08, "loss": 0.0707, "step": 8667 }, { "epoch": 2.808813998703824, "grad_norm": 0.9119242429733276, "learning_rate": 5.3245679217052424e-08, "loss": 0.077, "step": 8668 }, { "epoch": 2.8091380427738173, "grad_norm": 0.929480791091919, "learning_rate": 5.3066282884735863e-08, "loss": 0.078, "step": 8669 }, { "epoch": 2.8094620868438107, "grad_norm": 0.8982275128364563, "learning_rate": 5.2887186033417914e-08, "loss": 0.0771, "step": 8670 }, { "epoch": 2.809786130913804, "grad_norm": 0.8926584124565125, "learning_rate": 5.270838868501854e-08, "loss": 0.0734, "step": 8671 }, { "epoch": 2.810110174983798, "grad_norm": 0.9076064229011536, "learning_rate": 5.252989086142107e-08, "loss": 0.0772, "step": 8672 }, { "epoch": 2.8104342190537914, "grad_norm": 0.9674218893051147, "learning_rate": 5.235169258447137e-08, "loss": 0.0856, "step": 8673 }, { "epoch": 2.810758263123785, "grad_norm": 1.0249892473220825, "learning_rate": 5.2173793875979204e-08, "loss": 0.0851, "step": 8674 }, { "epoch": 2.8110823071937783, "grad_norm": 0.8544679284095764, "learning_rate": 5.199619475771856e-08, "loss": 0.0683, "step": 8675 }, { "epoch": 2.8114063512637717, "grad_norm": 0.9031746983528137, "learning_rate": 5.181889525142453e-08, "loss": 0.0784, "step": 8676 }, { "epoch": 2.8117303953337656, "grad_norm": 0.8899961113929749, "learning_rate": 5.164189537879782e-08, "loss": 0.0747, "step": 8677 }, { "epoch": 2.812054439403759, "grad_norm": 0.8602359294891357, "learning_rate": 5.146519516150084e-08, "loss": 0.0742, "step": 8678 }, { "epoch": 2.8123784834737524, "grad_norm": 0.9854103326797485, "learning_rate": 5.128879462116071e-08, "loss": 0.0832, "step": 8679 }, { "epoch": 2.812702527543746, "grad_norm": 0.8897486329078674, "learning_rate": 5.111269377936656e-08, "loss": 0.0766, "step": 8680 }, { "epoch": 2.8130265716137393, "grad_norm": 0.8990198969841003, "learning_rate": 5.093689265767143e-08, "loss": 0.0767, "step": 8681 }, { "epoch": 2.813350615683733, "grad_norm": 0.91489577293396, "learning_rate": 5.0761391277591996e-08, "loss": 0.0772, "step": 8682 }, { "epoch": 2.8136746597537265, "grad_norm": 0.88707035779953, "learning_rate": 5.05861896606083e-08, "loss": 0.0782, "step": 8683 }, { "epoch": 2.81399870382372, "grad_norm": 0.9026859402656555, "learning_rate": 5.0411287828162346e-08, "loss": 0.0708, "step": 8684 }, { "epoch": 2.8143227478937134, "grad_norm": 1.0039856433868408, "learning_rate": 5.023668580166091e-08, "loss": 0.0772, "step": 8685 }, { "epoch": 2.814646791963707, "grad_norm": 0.9419729113578796, "learning_rate": 5.0062383602473566e-08, "loss": 0.0777, "step": 8686 }, { "epoch": 2.8149708360337007, "grad_norm": 0.8873862624168396, "learning_rate": 4.9888381251933237e-08, "loss": 0.0732, "step": 8687 }, { "epoch": 2.815294880103694, "grad_norm": 0.9214391708374023, "learning_rate": 4.971467877133651e-08, "loss": 0.078, "step": 8688 }, { "epoch": 2.8156189241736875, "grad_norm": 0.8661234378814697, "learning_rate": 4.954127618194193e-08, "loss": 0.0751, "step": 8689 }, { "epoch": 2.8159429682436814, "grad_norm": 0.8473151922225952, "learning_rate": 4.936817350497336e-08, "loss": 0.0711, "step": 8690 }, { "epoch": 2.816267012313675, "grad_norm": 0.867581307888031, "learning_rate": 4.919537076161579e-08, "loss": 0.0784, "step": 8691 }, { "epoch": 2.8165910563836682, "grad_norm": 0.968896210193634, "learning_rate": 4.90228679730198e-08, "loss": 0.0714, "step": 8692 }, { "epoch": 2.8169151004536617, "grad_norm": 0.8834143280982971, "learning_rate": 4.8850665160297406e-08, "loss": 0.0701, "step": 8693 }, { "epoch": 2.817239144523655, "grad_norm": 0.8912068009376526, "learning_rate": 4.867876234452423e-08, "loss": 0.0774, "step": 8694 }, { "epoch": 2.817563188593649, "grad_norm": 0.8768351078033447, "learning_rate": 4.85071595467404e-08, "loss": 0.0749, "step": 8695 }, { "epoch": 2.8178872326636424, "grad_norm": 0.9168806672096252, "learning_rate": 4.8335856787947447e-08, "loss": 0.076, "step": 8696 }, { "epoch": 2.818211276733636, "grad_norm": 0.8847835659980774, "learning_rate": 4.81648540891122e-08, "loss": 0.0708, "step": 8697 }, { "epoch": 2.8185353208036292, "grad_norm": 0.9171305298805237, "learning_rate": 4.799415147116265e-08, "loss": 0.0824, "step": 8698 }, { "epoch": 2.8188593648736227, "grad_norm": 0.8939773440361023, "learning_rate": 4.782374895499236e-08, "loss": 0.0752, "step": 8699 }, { "epoch": 2.8191834089436165, "grad_norm": 0.9174849390983582, "learning_rate": 4.7653646561455767e-08, "loss": 0.0823, "step": 8700 }, { "epoch": 2.81950745301361, "grad_norm": 0.87873774766922, "learning_rate": 4.7483844311372594e-08, "loss": 0.0725, "step": 8701 }, { "epoch": 2.8198314970836034, "grad_norm": 0.8400615453720093, "learning_rate": 4.731434222552456e-08, "loss": 0.0749, "step": 8702 }, { "epoch": 2.820155541153597, "grad_norm": 0.8082554340362549, "learning_rate": 4.7145140324657e-08, "loss": 0.0714, "step": 8703 }, { "epoch": 2.82047958522359, "grad_norm": 0.8531041145324707, "learning_rate": 4.697623862947892e-08, "loss": 0.073, "step": 8704 }, { "epoch": 2.820803629293584, "grad_norm": 0.9330157041549683, "learning_rate": 4.680763716066239e-08, "loss": 0.0787, "step": 8705 }, { "epoch": 2.8211276733635775, "grad_norm": 0.9409183859825134, "learning_rate": 4.663933593884229e-08, "loss": 0.0773, "step": 8706 }, { "epoch": 2.821451717433571, "grad_norm": 0.8692289590835571, "learning_rate": 4.6471334984616866e-08, "loss": 0.073, "step": 8707 }, { "epoch": 2.8217757615035644, "grad_norm": 0.8960288166999817, "learning_rate": 4.6303634318548006e-08, "loss": 0.0765, "step": 8708 }, { "epoch": 2.8220998055735578, "grad_norm": 0.9870824813842773, "learning_rate": 4.613623396116068e-08, "loss": 0.0898, "step": 8709 }, { "epoch": 2.8224238496435516, "grad_norm": 0.8814342617988586, "learning_rate": 4.596913393294322e-08, "loss": 0.0755, "step": 8710 }, { "epoch": 2.822747893713545, "grad_norm": 0.8680152297019958, "learning_rate": 4.580233425434677e-08, "loss": 0.0758, "step": 8711 }, { "epoch": 2.8230719377835385, "grad_norm": 0.7859067320823669, "learning_rate": 4.563583494578638e-08, "loss": 0.0718, "step": 8712 }, { "epoch": 2.8233959818535324, "grad_norm": 0.8998501896858215, "learning_rate": 4.546963602763937e-08, "loss": 0.077, "step": 8713 }, { "epoch": 2.8237200259235253, "grad_norm": 0.8794705867767334, "learning_rate": 4.530373752024753e-08, "loss": 0.0736, "step": 8714 }, { "epoch": 2.824044069993519, "grad_norm": 0.8842076063156128, "learning_rate": 4.51381394439146e-08, "loss": 0.0693, "step": 8715 }, { "epoch": 2.8243681140635126, "grad_norm": 0.8637332916259766, "learning_rate": 4.497284181890882e-08, "loss": 0.0725, "step": 8716 }, { "epoch": 2.824692158133506, "grad_norm": 0.9013912677764893, "learning_rate": 4.480784466546068e-08, "loss": 0.0785, "step": 8717 }, { "epoch": 2.8250162022035, "grad_norm": 0.963401734828949, "learning_rate": 4.4643148003764015e-08, "loss": 0.0809, "step": 8718 }, { "epoch": 2.8253402462734933, "grad_norm": 0.8618778586387634, "learning_rate": 4.44787518539766e-08, "loss": 0.0737, "step": 8719 }, { "epoch": 2.8256642903434868, "grad_norm": 0.939586877822876, "learning_rate": 4.4314656236218444e-08, "loss": 0.0678, "step": 8720 }, { "epoch": 2.82598833441348, "grad_norm": 0.851174533367157, "learning_rate": 4.415086117057377e-08, "loss": 0.0748, "step": 8721 }, { "epoch": 2.8263123784834736, "grad_norm": 0.9462675452232361, "learning_rate": 4.398736667708875e-08, "loss": 0.0831, "step": 8722 }, { "epoch": 2.8266364225534675, "grad_norm": 0.9516298174858093, "learning_rate": 4.382417277577433e-08, "loss": 0.0782, "step": 8723 }, { "epoch": 2.826960466623461, "grad_norm": 0.9504342079162598, "learning_rate": 4.3661279486603424e-08, "loss": 0.0772, "step": 8724 }, { "epoch": 2.8272845106934543, "grad_norm": 0.9191964864730835, "learning_rate": 4.349868682951286e-08, "loss": 0.0805, "step": 8725 }, { "epoch": 2.8276085547634477, "grad_norm": 0.9080502390861511, "learning_rate": 4.333639482440199e-08, "loss": 0.0817, "step": 8726 }, { "epoch": 2.827932598833441, "grad_norm": 0.812075674533844, "learning_rate": 4.3174403491134385e-08, "loss": 0.069, "step": 8727 }, { "epoch": 2.828256642903435, "grad_norm": 0.8327502608299255, "learning_rate": 4.301271284953584e-08, "loss": 0.0694, "step": 8728 }, { "epoch": 2.8285806869734285, "grad_norm": 0.9250109791755676, "learning_rate": 4.285132291939526e-08, "loss": 0.0786, "step": 8729 }, { "epoch": 2.828904731043422, "grad_norm": 0.8713056445121765, "learning_rate": 4.2690233720466265e-08, "loss": 0.0728, "step": 8730 }, { "epoch": 2.8292287751134153, "grad_norm": 0.9858421087265015, "learning_rate": 4.2529445272463946e-08, "loss": 0.0834, "step": 8731 }, { "epoch": 2.8295528191834087, "grad_norm": 0.8879863619804382, "learning_rate": 4.2368957595067264e-08, "loss": 0.0778, "step": 8732 }, { "epoch": 2.8298768632534026, "grad_norm": 0.9293619394302368, "learning_rate": 4.220877070791857e-08, "loss": 0.0757, "step": 8733 }, { "epoch": 2.830200907323396, "grad_norm": 0.8157750964164734, "learning_rate": 4.204888463062273e-08, "loss": 0.0692, "step": 8734 }, { "epoch": 2.8305249513933894, "grad_norm": 0.8670499324798584, "learning_rate": 4.188929938274911e-08, "loss": 0.0732, "step": 8735 }, { "epoch": 2.830848995463383, "grad_norm": 0.9903301000595093, "learning_rate": 4.1730014983828724e-08, "loss": 0.084, "step": 8736 }, { "epoch": 2.8311730395333763, "grad_norm": 0.9433955550193787, "learning_rate": 4.157103145335628e-08, "loss": 0.079, "step": 8737 }, { "epoch": 2.83149708360337, "grad_norm": 1.0336216688156128, "learning_rate": 4.141234881079065e-08, "loss": 0.0803, "step": 8738 }, { "epoch": 2.8318211276733636, "grad_norm": 0.9306154251098633, "learning_rate": 4.125396707555213e-08, "loss": 0.0789, "step": 8739 }, { "epoch": 2.832145171743357, "grad_norm": 0.9157119393348694, "learning_rate": 4.109588626702576e-08, "loss": 0.0738, "step": 8740 }, { "epoch": 2.832469215813351, "grad_norm": 0.8765474557876587, "learning_rate": 4.0938106404558864e-08, "loss": 0.0739, "step": 8741 }, { "epoch": 2.8327932598833443, "grad_norm": 0.8910435438156128, "learning_rate": 4.078062750746209e-08, "loss": 0.0785, "step": 8742 }, { "epoch": 2.8331173039533377, "grad_norm": 0.9361657500267029, "learning_rate": 4.062344959500947e-08, "loss": 0.0835, "step": 8743 }, { "epoch": 2.833441348023331, "grad_norm": 0.9637318253517151, "learning_rate": 4.0466572686437833e-08, "loss": 0.079, "step": 8744 }, { "epoch": 2.8337653920933246, "grad_norm": 0.8398253917694092, "learning_rate": 4.0309996800947936e-08, "loss": 0.0742, "step": 8745 }, { "epoch": 2.8340894361633184, "grad_norm": 0.8814817070960999, "learning_rate": 4.0153721957702504e-08, "loss": 0.0756, "step": 8746 }, { "epoch": 2.834413480233312, "grad_norm": 0.8335652351379395, "learning_rate": 3.9997748175828467e-08, "loss": 0.0705, "step": 8747 }, { "epoch": 2.8347375243033053, "grad_norm": 0.9374161958694458, "learning_rate": 3.9842075474415545e-08, "loss": 0.0806, "step": 8748 }, { "epoch": 2.8350615683732987, "grad_norm": 0.8186621069908142, "learning_rate": 3.9686703872516e-08, "loss": 0.0706, "step": 8749 }, { "epoch": 2.835385612443292, "grad_norm": 0.9040276408195496, "learning_rate": 3.953163338914656e-08, "loss": 0.0762, "step": 8750 }, { "epoch": 2.835709656513286, "grad_norm": 0.8741912245750427, "learning_rate": 3.9376864043285943e-08, "loss": 0.0759, "step": 8751 }, { "epoch": 2.8360337005832794, "grad_norm": 0.8934181928634644, "learning_rate": 3.922239585387649e-08, "loss": 0.0789, "step": 8752 }, { "epoch": 2.836357744653273, "grad_norm": 0.9757236242294312, "learning_rate": 3.906822883982336e-08, "loss": 0.0792, "step": 8753 }, { "epoch": 2.8366817887232663, "grad_norm": 0.9433469772338867, "learning_rate": 3.891436301999563e-08, "loss": 0.0748, "step": 8754 }, { "epoch": 2.8370058327932597, "grad_norm": 0.8428290486335754, "learning_rate": 3.876079841322461e-08, "loss": 0.0735, "step": 8755 }, { "epoch": 2.8373298768632536, "grad_norm": 0.9037506580352783, "learning_rate": 3.8607535038305276e-08, "loss": 0.0731, "step": 8756 }, { "epoch": 2.837653920933247, "grad_norm": 0.8666014075279236, "learning_rate": 3.84545729139954e-08, "loss": 0.0707, "step": 8757 }, { "epoch": 2.8379779650032404, "grad_norm": 0.8894285559654236, "learning_rate": 3.83019120590164e-08, "loss": 0.0735, "step": 8758 }, { "epoch": 2.838302009073234, "grad_norm": 0.9272712469100952, "learning_rate": 3.814955249205221e-08, "loss": 0.0781, "step": 8759 }, { "epoch": 2.8386260531432272, "grad_norm": 0.9276229739189148, "learning_rate": 3.7997494231750145e-08, "loss": 0.0764, "step": 8760 }, { "epoch": 2.838950097213221, "grad_norm": 0.9098535180091858, "learning_rate": 3.784573729672086e-08, "loss": 0.08, "step": 8761 }, { "epoch": 2.8392741412832145, "grad_norm": 0.8889827728271484, "learning_rate": 3.769428170553785e-08, "loss": 0.0759, "step": 8762 }, { "epoch": 2.839598185353208, "grad_norm": 0.8861127495765686, "learning_rate": 3.754312747673766e-08, "loss": 0.074, "step": 8763 }, { "epoch": 2.839922229423202, "grad_norm": 0.9090628623962402, "learning_rate": 3.739227462882022e-08, "loss": 0.0791, "step": 8764 }, { "epoch": 2.840246273493195, "grad_norm": 0.9150841236114502, "learning_rate": 3.724172318024854e-08, "loss": 0.0775, "step": 8765 }, { "epoch": 2.8405703175631887, "grad_norm": 0.9157789349555969, "learning_rate": 3.709147314944872e-08, "loss": 0.0811, "step": 8766 }, { "epoch": 2.840894361633182, "grad_norm": 0.9196391105651855, "learning_rate": 3.6941524554809924e-08, "loss": 0.0744, "step": 8767 }, { "epoch": 2.8412184057031755, "grad_norm": 0.9282419085502625, "learning_rate": 3.6791877414683594e-08, "loss": 0.0788, "step": 8768 }, { "epoch": 2.8415424497731694, "grad_norm": 0.858267068862915, "learning_rate": 3.664253174738647e-08, "loss": 0.0717, "step": 8769 }, { "epoch": 2.841866493843163, "grad_norm": 0.9045819044113159, "learning_rate": 3.649348757119614e-08, "loss": 0.0741, "step": 8770 }, { "epoch": 2.8421905379131562, "grad_norm": 0.926044762134552, "learning_rate": 3.634474490435413e-08, "loss": 0.0765, "step": 8771 }, { "epoch": 2.8425145819831497, "grad_norm": 0.892652690410614, "learning_rate": 3.6196303765065333e-08, "loss": 0.0766, "step": 8772 }, { "epoch": 2.842838626053143, "grad_norm": 1.0337949991226196, "learning_rate": 3.60481641714977e-08, "loss": 0.0813, "step": 8773 }, { "epoch": 2.843162670123137, "grad_norm": 0.8592774271965027, "learning_rate": 3.590032614178174e-08, "loss": 0.0746, "step": 8774 }, { "epoch": 2.8434867141931304, "grad_norm": 0.8674032092094421, "learning_rate": 3.57527896940113e-08, "loss": 0.0741, "step": 8775 }, { "epoch": 2.843810758263124, "grad_norm": 0.9188392162322998, "learning_rate": 3.560555484624417e-08, "loss": 0.0733, "step": 8776 }, { "epoch": 2.844134802333117, "grad_norm": 0.9348613023757935, "learning_rate": 3.545862161649927e-08, "loss": 0.0835, "step": 8777 }, { "epoch": 2.8444588464031106, "grad_norm": 0.8782564997673035, "learning_rate": 3.531199002276109e-08, "loss": 0.0729, "step": 8778 }, { "epoch": 2.8447828904731045, "grad_norm": 0.8679888844490051, "learning_rate": 3.5165660082975006e-08, "loss": 0.0746, "step": 8779 }, { "epoch": 2.845106934543098, "grad_norm": 0.9066128134727478, "learning_rate": 3.501963181505058e-08, "loss": 0.0723, "step": 8780 }, { "epoch": 2.8454309786130914, "grad_norm": 0.8842100501060486, "learning_rate": 3.487390523686074e-08, "loss": 0.0772, "step": 8781 }, { "epoch": 2.845755022683085, "grad_norm": 0.9143519997596741, "learning_rate": 3.472848036624038e-08, "loss": 0.0754, "step": 8782 }, { "epoch": 2.846079066753078, "grad_norm": 0.8060352802276611, "learning_rate": 3.4583357220988326e-08, "loss": 0.0651, "step": 8783 }, { "epoch": 2.846403110823072, "grad_norm": 0.8758019804954529, "learning_rate": 3.443853581886619e-08, "loss": 0.0753, "step": 8784 }, { "epoch": 2.8467271548930655, "grad_norm": 0.884460985660553, "learning_rate": 3.4294016177598974e-08, "loss": 0.0766, "step": 8785 }, { "epoch": 2.847051198963059, "grad_norm": 0.8895008563995361, "learning_rate": 3.4149798314874195e-08, "loss": 0.0784, "step": 8786 }, { "epoch": 2.8473752430330523, "grad_norm": 0.8915991187095642, "learning_rate": 3.4005882248343e-08, "loss": 0.0747, "step": 8787 }, { "epoch": 2.8476992871030458, "grad_norm": 0.9081102013587952, "learning_rate": 3.3862267995618817e-08, "loss": 0.0789, "step": 8788 }, { "epoch": 2.8480233311730396, "grad_norm": 0.8390566110610962, "learning_rate": 3.3718955574279234e-08, "loss": 0.0707, "step": 8789 }, { "epoch": 2.848347375243033, "grad_norm": 0.8644533157348633, "learning_rate": 3.357594500186384e-08, "loss": 0.0693, "step": 8790 }, { "epoch": 2.8486714193130265, "grad_norm": 0.8662945628166199, "learning_rate": 3.3433236295876134e-08, "loss": 0.0774, "step": 8791 }, { "epoch": 2.8489954633830203, "grad_norm": 0.9425113797187805, "learning_rate": 3.329082947378215e-08, "loss": 0.0799, "step": 8792 }, { "epoch": 2.8493195074530138, "grad_norm": 0.8594256043434143, "learning_rate": 3.314872455301071e-08, "loss": 0.0727, "step": 8793 }, { "epoch": 2.849643551523007, "grad_norm": 0.8771162033081055, "learning_rate": 3.300692155095458e-08, "loss": 0.0759, "step": 8794 }, { "epoch": 2.8499675955930006, "grad_norm": 0.8600313067436218, "learning_rate": 3.286542048496904e-08, "loss": 0.0733, "step": 8795 }, { "epoch": 2.850291639662994, "grad_norm": 0.821404218673706, "learning_rate": 3.272422137237219e-08, "loss": 0.0691, "step": 8796 }, { "epoch": 2.850615683732988, "grad_norm": 0.9205437302589417, "learning_rate": 3.258332423044547e-08, "loss": 0.0769, "step": 8797 }, { "epoch": 2.8509397278029813, "grad_norm": 0.8754212260246277, "learning_rate": 3.2442729076433697e-08, "loss": 0.0762, "step": 8798 }, { "epoch": 2.8512637718729748, "grad_norm": 0.9110721349716187, "learning_rate": 3.230243592754368e-08, "loss": 0.0787, "step": 8799 }, { "epoch": 2.851587815942968, "grad_norm": 0.8736501932144165, "learning_rate": 3.2162444800946655e-08, "loss": 0.0663, "step": 8800 }, { "epoch": 2.8519118600129616, "grad_norm": 0.8907871842384338, "learning_rate": 3.202275571377589e-08, "loss": 0.0794, "step": 8801 }, { "epoch": 2.8522359040829555, "grad_norm": 0.9323838353157043, "learning_rate": 3.188336868312769e-08, "loss": 0.0746, "step": 8802 }, { "epoch": 2.852559948152949, "grad_norm": 0.9358437657356262, "learning_rate": 3.1744283726062306e-08, "loss": 0.0787, "step": 8803 }, { "epoch": 2.8528839922229423, "grad_norm": 0.870239794254303, "learning_rate": 3.160550085960168e-08, "loss": 0.0754, "step": 8804 }, { "epoch": 2.8532080362929357, "grad_norm": 0.8820590376853943, "learning_rate": 3.1467020100732215e-08, "loss": 0.0759, "step": 8805 }, { "epoch": 2.853532080362929, "grad_norm": 0.8466381430625916, "learning_rate": 3.1328841466401746e-08, "loss": 0.0722, "step": 8806 }, { "epoch": 2.853856124432923, "grad_norm": 0.9091260433197021, "learning_rate": 3.1190964973522865e-08, "loss": 0.0795, "step": 8807 }, { "epoch": 2.8541801685029164, "grad_norm": 0.9665597081184387, "learning_rate": 3.105339063896956e-08, "loss": 0.0816, "step": 8808 }, { "epoch": 2.85450421257291, "grad_norm": 0.8894763588905334, "learning_rate": 3.0916118479580593e-08, "loss": 0.0751, "step": 8809 }, { "epoch": 2.8548282566429033, "grad_norm": 0.8645616769790649, "learning_rate": 3.077914851215585e-08, "loss": 0.075, "step": 8810 }, { "epoch": 2.8551523007128967, "grad_norm": 0.9180304408073425, "learning_rate": 3.064248075345916e-08, "loss": 0.0797, "step": 8811 }, { "epoch": 2.8554763447828906, "grad_norm": 0.9625119566917419, "learning_rate": 3.050611522021796e-08, "loss": 0.0794, "step": 8812 }, { "epoch": 2.855800388852884, "grad_norm": 0.9530915021896362, "learning_rate": 3.0370051929121405e-08, "loss": 0.0805, "step": 8813 }, { "epoch": 2.8561244329228774, "grad_norm": 0.9082760810852051, "learning_rate": 3.023429089682284e-08, "loss": 0.0717, "step": 8814 }, { "epoch": 2.8564484769928713, "grad_norm": 0.8581361174583435, "learning_rate": 3.009883213993786e-08, "loss": 0.0719, "step": 8815 }, { "epoch": 2.8567725210628643, "grad_norm": 0.8786942362785339, "learning_rate": 2.996367567504544e-08, "loss": 0.0764, "step": 8816 }, { "epoch": 2.857096565132858, "grad_norm": 0.8706663846969604, "learning_rate": 2.9828821518687045e-08, "loss": 0.075, "step": 8817 }, { "epoch": 2.8574206092028516, "grad_norm": 0.9187700152397156, "learning_rate": 2.9694269687367826e-08, "loss": 0.077, "step": 8818 }, { "epoch": 2.857744653272845, "grad_norm": 0.8860181570053101, "learning_rate": 2.9560020197555716e-08, "loss": 0.0768, "step": 8819 }, { "epoch": 2.858068697342839, "grad_norm": 0.8907939791679382, "learning_rate": 2.9426073065681183e-08, "loss": 0.0751, "step": 8820 }, { "epoch": 2.8583927414128323, "grad_norm": 0.8291096687316895, "learning_rate": 2.929242830813861e-08, "loss": 0.0725, "step": 8821 }, { "epoch": 2.8587167854828257, "grad_norm": 0.9561754465103149, "learning_rate": 2.915908594128436e-08, "loss": 0.0772, "step": 8822 }, { "epoch": 2.859040829552819, "grad_norm": 0.9024383425712585, "learning_rate": 2.9026045981438434e-08, "loss": 0.0768, "step": 8823 }, { "epoch": 2.8593648736228126, "grad_norm": 0.8623805046081543, "learning_rate": 2.889330844488364e-08, "loss": 0.0742, "step": 8824 }, { "epoch": 2.8596889176928064, "grad_norm": 0.8919248580932617, "learning_rate": 2.8760873347865593e-08, "loss": 0.0745, "step": 8825 }, { "epoch": 2.8600129617628, "grad_norm": 0.9097840785980225, "learning_rate": 2.862874070659327e-08, "loss": 0.0785, "step": 8826 }, { "epoch": 2.8603370058327933, "grad_norm": 0.8863422274589539, "learning_rate": 2.8496910537238185e-08, "loss": 0.0747, "step": 8827 }, { "epoch": 2.8606610499027867, "grad_norm": 0.8750684261322021, "learning_rate": 2.8365382855935487e-08, "loss": 0.0758, "step": 8828 }, { "epoch": 2.86098509397278, "grad_norm": 0.8420607447624207, "learning_rate": 2.8234157678782846e-08, "loss": 0.0684, "step": 8829 }, { "epoch": 2.861309138042774, "grad_norm": 0.8596554398536682, "learning_rate": 2.8103235021840204e-08, "loss": 0.0742, "step": 8830 }, { "epoch": 2.8616331821127674, "grad_norm": 0.908790647983551, "learning_rate": 2.7972614901132235e-08, "loss": 0.0815, "step": 8831 }, { "epoch": 2.861957226182761, "grad_norm": 0.8810282945632935, "learning_rate": 2.784229733264504e-08, "loss": 0.0791, "step": 8832 }, { "epoch": 2.8622812702527543, "grad_norm": 0.9018552899360657, "learning_rate": 2.771228233232809e-08, "loss": 0.0773, "step": 8833 }, { "epoch": 2.8626053143227477, "grad_norm": 0.823049783706665, "learning_rate": 2.7582569916094205e-08, "loss": 0.0712, "step": 8834 }, { "epoch": 2.8629293583927415, "grad_norm": 0.9358600974082947, "learning_rate": 2.745316009981902e-08, "loss": 0.0789, "step": 8835 }, { "epoch": 2.863253402462735, "grad_norm": 0.9165016412734985, "learning_rate": 2.732405289934098e-08, "loss": 0.0759, "step": 8836 }, { "epoch": 2.8635774465327284, "grad_norm": 0.8944207429885864, "learning_rate": 2.719524833046133e-08, "loss": 0.0779, "step": 8837 }, { "epoch": 2.863901490602722, "grad_norm": 0.8423044681549072, "learning_rate": 2.7066746408944968e-08, "loss": 0.0691, "step": 8838 }, { "epoch": 2.8642255346727152, "grad_norm": 0.7926920652389526, "learning_rate": 2.6938547150518746e-08, "loss": 0.0654, "step": 8839 }, { "epoch": 2.864549578742709, "grad_norm": 0.9157641530036926, "learning_rate": 2.6810650570873454e-08, "loss": 0.0831, "step": 8840 }, { "epoch": 2.8648736228127025, "grad_norm": 0.866848349571228, "learning_rate": 2.6683056685662122e-08, "loss": 0.0742, "step": 8841 }, { "epoch": 2.865197666882696, "grad_norm": 0.8142363429069519, "learning_rate": 2.6555765510500875e-08, "loss": 0.0728, "step": 8842 }, { "epoch": 2.86552171095269, "grad_norm": 0.8857007026672363, "learning_rate": 2.6428777060969468e-08, "loss": 0.0799, "step": 8843 }, { "epoch": 2.8658457550226832, "grad_norm": 0.9255932569503784, "learning_rate": 2.6302091352609637e-08, "loss": 0.0773, "step": 8844 }, { "epoch": 2.8661697990926767, "grad_norm": 0.9063679575920105, "learning_rate": 2.617570840092648e-08, "loss": 0.0747, "step": 8845 }, { "epoch": 2.86649384316267, "grad_norm": 0.9214964509010315, "learning_rate": 2.604962822138818e-08, "loss": 0.0805, "step": 8846 }, { "epoch": 2.8668178872326635, "grad_norm": 0.9403132796287537, "learning_rate": 2.5923850829425723e-08, "loss": 0.0808, "step": 8847 }, { "epoch": 2.8671419313026574, "grad_norm": 0.8666374683380127, "learning_rate": 2.579837624043291e-08, "loss": 0.0748, "step": 8848 }, { "epoch": 2.867465975372651, "grad_norm": 0.918161153793335, "learning_rate": 2.5673204469766898e-08, "loss": 0.0767, "step": 8849 }, { "epoch": 2.8677900194426442, "grad_norm": 0.9361032843589783, "learning_rate": 2.5548335532747105e-08, "loss": 0.0775, "step": 8850 }, { "epoch": 2.8681140635126376, "grad_norm": 0.8582929968833923, "learning_rate": 2.5423769444656575e-08, "loss": 0.0747, "step": 8851 }, { "epoch": 2.868438107582631, "grad_norm": 0.8770160675048828, "learning_rate": 2.52995062207409e-08, "loss": 0.0758, "step": 8852 }, { "epoch": 2.868762151652625, "grad_norm": 0.8568238615989685, "learning_rate": 2.517554587620874e-08, "loss": 0.0724, "step": 8853 }, { "epoch": 2.8690861957226184, "grad_norm": 0.903739869594574, "learning_rate": 2.5051888426231574e-08, "loss": 0.0743, "step": 8854 }, { "epoch": 2.869410239792612, "grad_norm": 0.8782305121421814, "learning_rate": 2.492853388594396e-08, "loss": 0.0777, "step": 8855 }, { "epoch": 2.869734283862605, "grad_norm": 0.9001045823097229, "learning_rate": 2.480548227044327e-08, "loss": 0.079, "step": 8856 }, { "epoch": 2.8700583279325986, "grad_norm": 0.9648503065109253, "learning_rate": 2.4682733594789677e-08, "loss": 0.0811, "step": 8857 }, { "epoch": 2.8703823720025925, "grad_norm": 0.8991042971611023, "learning_rate": 2.4560287874006716e-08, "loss": 0.0732, "step": 8858 }, { "epoch": 2.870706416072586, "grad_norm": 0.8516407012939453, "learning_rate": 2.443814512308018e-08, "loss": 0.0704, "step": 8859 }, { "epoch": 2.8710304601425793, "grad_norm": 0.9465522766113281, "learning_rate": 2.431630535695978e-08, "loss": 0.0783, "step": 8860 }, { "epoch": 2.8713545042125728, "grad_norm": 0.9010804295539856, "learning_rate": 2.419476859055664e-08, "loss": 0.0747, "step": 8861 }, { "epoch": 2.871678548282566, "grad_norm": 0.9947733283042908, "learning_rate": 2.4073534838746637e-08, "loss": 0.0796, "step": 8862 }, { "epoch": 2.87200259235256, "grad_norm": 0.8678649663925171, "learning_rate": 2.3952604116366795e-08, "loss": 0.0744, "step": 8863 }, { "epoch": 2.8723266364225535, "grad_norm": 0.9155958294868469, "learning_rate": 2.383197643821833e-08, "loss": 0.0753, "step": 8864 }, { "epoch": 2.872650680492547, "grad_norm": 0.8620344400405884, "learning_rate": 2.3711651819064984e-08, "loss": 0.0733, "step": 8865 }, { "epoch": 2.8729747245625408, "grad_norm": 0.8952129483222961, "learning_rate": 2.359163027363276e-08, "loss": 0.0787, "step": 8866 }, { "epoch": 2.8732987686325338, "grad_norm": 0.837651789188385, "learning_rate": 2.3471911816611846e-08, "loss": 0.0733, "step": 8867 }, { "epoch": 2.8736228127025276, "grad_norm": 0.8996900916099548, "learning_rate": 2.335249646265414e-08, "loss": 0.0779, "step": 8868 }, { "epoch": 2.873946856772521, "grad_norm": 1.0499439239501953, "learning_rate": 2.3233384226375167e-08, "loss": 0.0813, "step": 8869 }, { "epoch": 2.8742709008425145, "grad_norm": 0.9339820742607117, "learning_rate": 2.311457512235271e-08, "loss": 0.0768, "step": 8870 }, { "epoch": 2.8745949449125083, "grad_norm": 0.8691450357437134, "learning_rate": 2.2996069165128198e-08, "loss": 0.0759, "step": 8871 }, { "epoch": 2.8749189889825018, "grad_norm": 0.8384815454483032, "learning_rate": 2.2877866369205858e-08, "loss": 0.0684, "step": 8872 }, { "epoch": 2.875243033052495, "grad_norm": 0.9416871070861816, "learning_rate": 2.2759966749051897e-08, "loss": 0.0825, "step": 8873 }, { "epoch": 2.8755670771224886, "grad_norm": 0.8636833429336548, "learning_rate": 2.2642370319096718e-08, "loss": 0.076, "step": 8874 }, { "epoch": 2.875891121192482, "grad_norm": 0.9424716830253601, "learning_rate": 2.2525077093732695e-08, "loss": 0.0786, "step": 8875 }, { "epoch": 2.876215165262476, "grad_norm": 0.8951495885848999, "learning_rate": 2.2408087087315567e-08, "loss": 0.0795, "step": 8876 }, { "epoch": 2.8765392093324693, "grad_norm": 0.8318362832069397, "learning_rate": 2.2291400314163325e-08, "loss": 0.0735, "step": 8877 }, { "epoch": 2.8768632534024627, "grad_norm": 0.829992413520813, "learning_rate": 2.217501678855788e-08, "loss": 0.0717, "step": 8878 }, { "epoch": 2.877187297472456, "grad_norm": 0.8823353052139282, "learning_rate": 2.2058936524742835e-08, "loss": 0.0726, "step": 8879 }, { "epoch": 2.8775113415424496, "grad_norm": 0.9791154861450195, "learning_rate": 2.1943159536925994e-08, "loss": 0.0774, "step": 8880 }, { "epoch": 2.8778353856124435, "grad_norm": 0.9327330589294434, "learning_rate": 2.1827685839276856e-08, "loss": 0.0765, "step": 8881 }, { "epoch": 2.878159429682437, "grad_norm": 1.139756202697754, "learning_rate": 2.1712515445928285e-08, "loss": 0.0779, "step": 8882 }, { "epoch": 2.8784834737524303, "grad_norm": 0.9503368735313416, "learning_rate": 2.159764837097622e-08, "loss": 0.0787, "step": 8883 }, { "epoch": 2.8788075178224237, "grad_norm": 0.8720259666442871, "learning_rate": 2.1483084628479145e-08, "loss": 0.0695, "step": 8884 }, { "epoch": 2.879131561892417, "grad_norm": 0.8664884567260742, "learning_rate": 2.1368824232458618e-08, "loss": 0.0734, "step": 8885 }, { "epoch": 2.879455605962411, "grad_norm": 0.9471036195755005, "learning_rate": 2.125486719689929e-08, "loss": 0.0789, "step": 8886 }, { "epoch": 2.8797796500324044, "grad_norm": 0.9130653142929077, "learning_rate": 2.1141213535747772e-08, "loss": 0.0806, "step": 8887 }, { "epoch": 2.880103694102398, "grad_norm": 0.8658857941627502, "learning_rate": 2.1027863262914617e-08, "loss": 0.071, "step": 8888 }, { "epoch": 2.8804277381723913, "grad_norm": 0.9986435770988464, "learning_rate": 2.0914816392272608e-08, "loss": 0.0697, "step": 8889 }, { "epoch": 2.8807517822423847, "grad_norm": 0.8567577600479126, "learning_rate": 2.0802072937657624e-08, "loss": 0.0725, "step": 8890 }, { "epoch": 2.8810758263123786, "grad_norm": 0.9769272208213806, "learning_rate": 2.068963291286863e-08, "loss": 0.078, "step": 8891 }, { "epoch": 2.881399870382372, "grad_norm": 0.9042466878890991, "learning_rate": 2.0577496331666837e-08, "loss": 0.0742, "step": 8892 }, { "epoch": 2.8817239144523654, "grad_norm": 0.8782934546470642, "learning_rate": 2.046566320777682e-08, "loss": 0.0782, "step": 8893 }, { "epoch": 2.8820479585223593, "grad_norm": 0.8273100256919861, "learning_rate": 2.0354133554885967e-08, "loss": 0.068, "step": 8894 }, { "epoch": 2.8823720025923527, "grad_norm": 0.8472905158996582, "learning_rate": 2.0242907386644195e-08, "loss": 0.0714, "step": 8895 }, { "epoch": 2.882696046662346, "grad_norm": 0.9096794128417969, "learning_rate": 2.0131984716664776e-08, "loss": 0.0804, "step": 8896 }, { "epoch": 2.8830200907323396, "grad_norm": 0.9694199562072754, "learning_rate": 2.002136555852352e-08, "loss": 0.0741, "step": 8897 }, { "epoch": 2.883344134802333, "grad_norm": 0.8909012675285339, "learning_rate": 1.9911049925758765e-08, "loss": 0.0724, "step": 8898 }, { "epoch": 2.883668178872327, "grad_norm": 0.835223376750946, "learning_rate": 1.9801037831872482e-08, "loss": 0.0714, "step": 8899 }, { "epoch": 2.8839922229423203, "grad_norm": 0.918133020401001, "learning_rate": 1.9691329290329185e-08, "loss": 0.0773, "step": 8900 }, { "epoch": 2.8843162670123137, "grad_norm": 0.9661774039268494, "learning_rate": 1.958192431455591e-08, "loss": 0.0806, "step": 8901 }, { "epoch": 2.884640311082307, "grad_norm": 0.9486656785011292, "learning_rate": 1.9472822917942778e-08, "loss": 0.075, "step": 8902 }, { "epoch": 2.8849643551523005, "grad_norm": 0.8935655355453491, "learning_rate": 1.9364025113842444e-08, "loss": 0.0754, "step": 8903 }, { "epoch": 2.8852883992222944, "grad_norm": 0.9863356947898865, "learning_rate": 1.9255530915571197e-08, "loss": 0.0809, "step": 8904 }, { "epoch": 2.885612443292288, "grad_norm": 0.9524531364440918, "learning_rate": 1.9147340336407584e-08, "loss": 0.0837, "step": 8905 }, { "epoch": 2.8859364873622813, "grad_norm": 0.9335321187973022, "learning_rate": 1.9039453389592954e-08, "loss": 0.0768, "step": 8906 }, { "epoch": 2.8862605314322747, "grad_norm": 0.8417344093322754, "learning_rate": 1.893187008833175e-08, "loss": 0.0757, "step": 8907 }, { "epoch": 2.886584575502268, "grad_norm": 0.9135453104972839, "learning_rate": 1.8824590445790935e-08, "loss": 0.0754, "step": 8908 }, { "epoch": 2.886908619572262, "grad_norm": 0.9432932734489441, "learning_rate": 1.871761447510084e-08, "loss": 0.0763, "step": 8909 }, { "epoch": 2.8872326636422554, "grad_norm": 0.8661041259765625, "learning_rate": 1.8610942189353777e-08, "loss": 0.0719, "step": 8910 }, { "epoch": 2.887556707712249, "grad_norm": 0.8979371786117554, "learning_rate": 1.850457360160568e-08, "loss": 0.0748, "step": 8911 }, { "epoch": 2.8878807517822422, "grad_norm": 0.8953410387039185, "learning_rate": 1.839850872487503e-08, "loss": 0.0753, "step": 8912 }, { "epoch": 2.8882047958522357, "grad_norm": 0.8552532196044922, "learning_rate": 1.829274757214339e-08, "loss": 0.0735, "step": 8913 }, { "epoch": 2.8885288399222295, "grad_norm": 0.9590696096420288, "learning_rate": 1.8187290156354565e-08, "loss": 0.0753, "step": 8914 }, { "epoch": 2.888852883992223, "grad_norm": 0.9155333638191223, "learning_rate": 1.808213649041546e-08, "loss": 0.0748, "step": 8915 }, { "epoch": 2.8891769280622164, "grad_norm": 0.9266859889030457, "learning_rate": 1.7977286587196053e-08, "loss": 0.0801, "step": 8916 }, { "epoch": 2.8895009721322102, "grad_norm": 0.9092302918434143, "learning_rate": 1.7872740459529135e-08, "loss": 0.0774, "step": 8917 }, { "epoch": 2.8898250162022032, "grad_norm": 0.8608276844024658, "learning_rate": 1.7768498120209755e-08, "loss": 0.0765, "step": 8918 }, { "epoch": 2.890149060272197, "grad_norm": 0.9430409669876099, "learning_rate": 1.766455958199631e-08, "loss": 0.0771, "step": 8919 }, { "epoch": 2.8904731043421905, "grad_norm": 0.8961855173110962, "learning_rate": 1.7560924857610016e-08, "loss": 0.0726, "step": 8920 }, { "epoch": 2.890797148412184, "grad_norm": 0.8819207549095154, "learning_rate": 1.745759395973462e-08, "loss": 0.0788, "step": 8921 }, { "epoch": 2.891121192482178, "grad_norm": 0.8897761106491089, "learning_rate": 1.7354566901016944e-08, "loss": 0.0744, "step": 8922 }, { "epoch": 2.8914452365521712, "grad_norm": 0.8832787871360779, "learning_rate": 1.7251843694066074e-08, "loss": 0.0732, "step": 8923 }, { "epoch": 2.8917692806221647, "grad_norm": 0.9169782996177673, "learning_rate": 1.7149424351455003e-08, "loss": 0.0779, "step": 8924 }, { "epoch": 2.892093324692158, "grad_norm": 1.0113013982772827, "learning_rate": 1.7047308885718427e-08, "loss": 0.0836, "step": 8925 }, { "epoch": 2.8924173687621515, "grad_norm": 0.928467869758606, "learning_rate": 1.694549730935441e-08, "loss": 0.0792, "step": 8926 }, { "epoch": 2.8927414128321454, "grad_norm": 0.9160869121551514, "learning_rate": 1.684398963482381e-08, "loss": 0.0765, "step": 8927 }, { "epoch": 2.893065456902139, "grad_norm": 0.9572937488555908, "learning_rate": 1.674278587454975e-08, "loss": 0.0787, "step": 8928 }, { "epoch": 2.893389500972132, "grad_norm": 0.9312818646430969, "learning_rate": 1.6641886040919263e-08, "loss": 0.0751, "step": 8929 }, { "epoch": 2.8937135450421256, "grad_norm": 0.8742788434028625, "learning_rate": 1.654129014628081e-08, "loss": 0.0771, "step": 8930 }, { "epoch": 2.894037589112119, "grad_norm": 0.8427819609642029, "learning_rate": 1.6440998202947034e-08, "loss": 0.0747, "step": 8931 }, { "epoch": 2.894361633182113, "grad_norm": 0.934770941734314, "learning_rate": 1.634101022319229e-08, "loss": 0.0737, "step": 8932 }, { "epoch": 2.8946856772521063, "grad_norm": 0.9312441945075989, "learning_rate": 1.6241326219254006e-08, "loss": 0.0798, "step": 8933 }, { "epoch": 2.8950097213220998, "grad_norm": 0.9229906797409058, "learning_rate": 1.6141946203332703e-08, "loss": 0.0798, "step": 8934 }, { "epoch": 2.895333765392093, "grad_norm": 0.9197176098823547, "learning_rate": 1.6042870187591985e-08, "loss": 0.0769, "step": 8935 }, { "epoch": 2.8956578094620866, "grad_norm": 0.8258293867111206, "learning_rate": 1.5944098184156876e-08, "loss": 0.0704, "step": 8936 }, { "epoch": 2.8959818535320805, "grad_norm": 0.9437922239303589, "learning_rate": 1.5845630205117147e-08, "loss": 0.0793, "step": 8937 }, { "epoch": 2.896305897602074, "grad_norm": 0.8437861800193787, "learning_rate": 1.5747466262523438e-08, "loss": 0.0678, "step": 8938 }, { "epoch": 2.8966299416720673, "grad_norm": 0.8374882936477661, "learning_rate": 1.5649606368390578e-08, "loss": 0.07, "step": 8939 }, { "epoch": 2.8969539857420608, "grad_norm": 0.8916781544685364, "learning_rate": 1.5552050534695383e-08, "loss": 0.076, "step": 8940 }, { "epoch": 2.897278029812054, "grad_norm": 0.951287031173706, "learning_rate": 1.5454798773378023e-08, "loss": 0.0762, "step": 8941 }, { "epoch": 2.897602073882048, "grad_norm": 0.9072433114051819, "learning_rate": 1.5357851096340915e-08, "loss": 0.0766, "step": 8942 }, { "epoch": 2.8979261179520415, "grad_norm": 0.8875364065170288, "learning_rate": 1.526120751544985e-08, "loss": 0.0753, "step": 8943 }, { "epoch": 2.898250162022035, "grad_norm": 0.9446718096733093, "learning_rate": 1.5164868042532864e-08, "loss": 0.0797, "step": 8944 }, { "epoch": 2.8985742060920288, "grad_norm": 0.8039813041687012, "learning_rate": 1.506883268938053e-08, "loss": 0.0686, "step": 8945 }, { "epoch": 2.898898250162022, "grad_norm": 0.9617775082588196, "learning_rate": 1.4973101467747608e-08, "loss": 0.0756, "step": 8946 }, { "epoch": 2.8992222942320156, "grad_norm": 0.8695583343505859, "learning_rate": 1.4877674389349728e-08, "loss": 0.0708, "step": 8947 }, { "epoch": 2.899546338302009, "grad_norm": 0.8433423042297363, "learning_rate": 1.4782551465866713e-08, "loss": 0.0711, "step": 8948 }, { "epoch": 2.8998703823720025, "grad_norm": 0.844932496547699, "learning_rate": 1.4687732708940916e-08, "loss": 0.0705, "step": 8949 }, { "epoch": 2.9001944264419963, "grad_norm": 0.8735789060592651, "learning_rate": 1.4593218130176668e-08, "loss": 0.0734, "step": 8950 }, { "epoch": 2.9005184705119897, "grad_norm": 0.9091753363609314, "learning_rate": 1.4499007741141934e-08, "loss": 0.0773, "step": 8951 }, { "epoch": 2.900842514581983, "grad_norm": 0.935634970664978, "learning_rate": 1.4405101553367218e-08, "loss": 0.0738, "step": 8952 }, { "epoch": 2.9011665586519766, "grad_norm": 0.8336095213890076, "learning_rate": 1.4311499578345821e-08, "loss": 0.0711, "step": 8953 }, { "epoch": 2.90149060272197, "grad_norm": 0.9978169798851013, "learning_rate": 1.421820182753303e-08, "loss": 0.0811, "step": 8954 }, { "epoch": 2.901814646791964, "grad_norm": 0.9416351318359375, "learning_rate": 1.4125208312348593e-08, "loss": 0.0807, "step": 8955 }, { "epoch": 2.9021386908619573, "grad_norm": 0.88267582654953, "learning_rate": 1.403251904417341e-08, "loss": 0.0718, "step": 8956 }, { "epoch": 2.9024627349319507, "grad_norm": 0.9784106016159058, "learning_rate": 1.3940134034351738e-08, "loss": 0.0809, "step": 8957 }, { "epoch": 2.902786779001944, "grad_norm": 0.8735864758491516, "learning_rate": 1.3848053294190922e-08, "loss": 0.0742, "step": 8958 }, { "epoch": 2.9031108230719376, "grad_norm": 0.892750084400177, "learning_rate": 1.3756276834960558e-08, "loss": 0.0772, "step": 8959 }, { "epoch": 2.9034348671419314, "grad_norm": 0.9043106436729431, "learning_rate": 1.366480466789305e-08, "loss": 0.0715, "step": 8960 }, { "epoch": 2.903758911211925, "grad_norm": 0.826783299446106, "learning_rate": 1.3573636804183887e-08, "loss": 0.073, "step": 8961 }, { "epoch": 2.9040829552819183, "grad_norm": 0.8980486392974854, "learning_rate": 1.3482773254991365e-08, "loss": 0.0747, "step": 8962 }, { "epoch": 2.9044069993519117, "grad_norm": 0.9460495710372925, "learning_rate": 1.3392214031435757e-08, "loss": 0.0796, "step": 8963 }, { "epoch": 2.904731043421905, "grad_norm": 0.8458935022354126, "learning_rate": 1.3301959144600974e-08, "loss": 0.0707, "step": 8964 }, { "epoch": 2.905055087491899, "grad_norm": 1.1474437713623047, "learning_rate": 1.3212008605533177e-08, "loss": 0.0799, "step": 8965 }, { "epoch": 2.9053791315618924, "grad_norm": 0.8916202783584595, "learning_rate": 1.312236242524162e-08, "loss": 0.0785, "step": 8966 }, { "epoch": 2.905703175631886, "grad_norm": 0.9507387280464172, "learning_rate": 1.3033020614698078e-08, "loss": 0.08, "step": 8967 }, { "epoch": 2.9060272197018797, "grad_norm": 0.9159635305404663, "learning_rate": 1.2943983184837417e-08, "loss": 0.0736, "step": 8968 }, { "epoch": 2.906351263771873, "grad_norm": 0.8017541766166687, "learning_rate": 1.2855250146556197e-08, "loss": 0.0638, "step": 8969 }, { "epoch": 2.9066753078418666, "grad_norm": 0.9174900054931641, "learning_rate": 1.2766821510715177e-08, "loss": 0.0783, "step": 8970 }, { "epoch": 2.90699935191186, "grad_norm": 0.8530569076538086, "learning_rate": 1.2678697288136809e-08, "loss": 0.0733, "step": 8971 }, { "epoch": 2.9073233959818534, "grad_norm": 0.8402195572853088, "learning_rate": 1.2590877489606911e-08, "loss": 0.0709, "step": 8972 }, { "epoch": 2.9076474400518473, "grad_norm": 0.9232442378997803, "learning_rate": 1.2503362125873552e-08, "loss": 0.0761, "step": 8973 }, { "epoch": 2.9079714841218407, "grad_norm": 0.8968020081520081, "learning_rate": 1.241615120764761e-08, "loss": 0.0748, "step": 8974 }, { "epoch": 2.908295528191834, "grad_norm": 0.8251527547836304, "learning_rate": 1.2329244745603596e-08, "loss": 0.07, "step": 8975 }, { "epoch": 2.9086195722618275, "grad_norm": 0.9001858830451965, "learning_rate": 1.2242642750376899e-08, "loss": 0.0788, "step": 8976 }, { "epoch": 2.908943616331821, "grad_norm": 0.8136048316955566, "learning_rate": 1.2156345232567923e-08, "loss": 0.0658, "step": 8977 }, { "epoch": 2.909267660401815, "grad_norm": 0.9805194139480591, "learning_rate": 1.2070352202737668e-08, "loss": 0.085, "step": 8978 }, { "epoch": 2.9095917044718083, "grad_norm": 0.9120486378669739, "learning_rate": 1.19846636714116e-08, "loss": 0.0792, "step": 8979 }, { "epoch": 2.9099157485418017, "grad_norm": 0.8749681711196899, "learning_rate": 1.1899279649076612e-08, "loss": 0.0758, "step": 8980 }, { "epoch": 2.910239792611795, "grad_norm": 0.8955467343330383, "learning_rate": 1.181420014618323e-08, "loss": 0.0757, "step": 8981 }, { "epoch": 2.9105638366817885, "grad_norm": 0.8159177303314209, "learning_rate": 1.172942517314396e-08, "loss": 0.069, "step": 8982 }, { "epoch": 2.9108878807517824, "grad_norm": 0.8539730906486511, "learning_rate": 1.1644954740334946e-08, "loss": 0.0723, "step": 8983 }, { "epoch": 2.911211924821776, "grad_norm": 0.8731797337532043, "learning_rate": 1.1560788858094584e-08, "loss": 0.0707, "step": 8984 }, { "epoch": 2.9115359688917692, "grad_norm": 0.8900837898254395, "learning_rate": 1.1476927536723248e-08, "loss": 0.0783, "step": 8985 }, { "epoch": 2.9118600129617627, "grad_norm": 0.9353973269462585, "learning_rate": 1.1393370786485502e-08, "loss": 0.0749, "step": 8986 }, { "epoch": 2.912184057031756, "grad_norm": 1.0125935077667236, "learning_rate": 1.1310118617607613e-08, "loss": 0.0734, "step": 8987 }, { "epoch": 2.91250810110175, "grad_norm": 0.8920942544937134, "learning_rate": 1.122717104027865e-08, "loss": 0.0751, "step": 8988 }, { "epoch": 2.9128321451717434, "grad_norm": 0.8845387697219849, "learning_rate": 1.1144528064650772e-08, "loss": 0.0738, "step": 8989 }, { "epoch": 2.913156189241737, "grad_norm": 0.8738760352134705, "learning_rate": 1.1062189700838944e-08, "loss": 0.0751, "step": 8990 }, { "epoch": 2.9134802333117307, "grad_norm": 0.9137491583824158, "learning_rate": 1.0980155958920103e-08, "loss": 0.0756, "step": 8991 }, { "epoch": 2.9138042773817237, "grad_norm": 0.9099265336990356, "learning_rate": 1.089842684893455e-08, "loss": 0.0775, "step": 8992 }, { "epoch": 2.9141283214517175, "grad_norm": 0.9011944532394409, "learning_rate": 1.0817002380885123e-08, "loss": 0.0768, "step": 8993 }, { "epoch": 2.914452365521711, "grad_norm": 0.8906083106994629, "learning_rate": 1.0735882564737732e-08, "loss": 0.0737, "step": 8994 }, { "epoch": 2.9147764095917044, "grad_norm": 0.8653832674026489, "learning_rate": 1.0655067410419994e-08, "loss": 0.0744, "step": 8995 }, { "epoch": 2.9151004536616982, "grad_norm": 0.9044657349586487, "learning_rate": 1.057455692782372e-08, "loss": 0.0776, "step": 8996 }, { "epoch": 2.9154244977316917, "grad_norm": 0.8942205309867859, "learning_rate": 1.049435112680186e-08, "loss": 0.0729, "step": 8997 }, { "epoch": 2.915748541801685, "grad_norm": 0.8953902721405029, "learning_rate": 1.0414450017171007e-08, "loss": 0.0782, "step": 8998 }, { "epoch": 2.9160725858716785, "grad_norm": 0.923188328742981, "learning_rate": 1.0334853608710838e-08, "loss": 0.0765, "step": 8999 }, { "epoch": 2.916396629941672, "grad_norm": 0.8497654795646667, "learning_rate": 1.0255561911162449e-08, "loss": 0.0725, "step": 9000 }, { "epoch": 2.916720674011666, "grad_norm": 0.8314547538757324, "learning_rate": 1.0176574934230854e-08, "loss": 0.0688, "step": 9001 }, { "epoch": 2.917044718081659, "grad_norm": 0.9178106188774109, "learning_rate": 1.0097892687583044e-08, "loss": 0.0754, "step": 9002 }, { "epoch": 2.9173687621516526, "grad_norm": 0.8695831298828125, "learning_rate": 1.0019515180849094e-08, "loss": 0.0704, "step": 9003 }, { "epoch": 2.917692806221646, "grad_norm": 0.884425163269043, "learning_rate": 9.941442423621606e-09, "loss": 0.0756, "step": 9004 }, { "epoch": 2.9180168502916395, "grad_norm": 0.877781093120575, "learning_rate": 9.863674425455716e-09, "loss": 0.0717, "step": 9005 }, { "epoch": 2.9183408943616334, "grad_norm": 0.9392322301864624, "learning_rate": 9.78621119586992e-09, "loss": 0.0812, "step": 9006 }, { "epoch": 2.9186649384316268, "grad_norm": 0.9074358344078064, "learning_rate": 9.709052744344694e-09, "loss": 0.074, "step": 9007 }, { "epoch": 2.91898898250162, "grad_norm": 0.8570203185081482, "learning_rate": 9.63219908032359e-09, "loss": 0.0724, "step": 9008 }, { "epoch": 2.9193130265716136, "grad_norm": 0.8743963837623596, "learning_rate": 9.55565021321242e-09, "loss": 0.072, "step": 9009 }, { "epoch": 2.919637070641607, "grad_norm": 0.9489037990570068, "learning_rate": 9.479406152380632e-09, "loss": 0.0769, "step": 9010 }, { "epoch": 2.919961114711601, "grad_norm": 0.9726477861404419, "learning_rate": 9.403466907159375e-09, "loss": 0.0763, "step": 9011 }, { "epoch": 2.9202851587815943, "grad_norm": 0.9409971833229065, "learning_rate": 9.327832486842603e-09, "loss": 0.0794, "step": 9012 }, { "epoch": 2.9206092028515878, "grad_norm": 0.9196006059646606, "learning_rate": 9.25250290068791e-09, "loss": 0.0735, "step": 9013 }, { "epoch": 2.920933246921581, "grad_norm": 0.8963923454284668, "learning_rate": 9.17747815791431e-09, "loss": 0.0765, "step": 9014 }, { "epoch": 2.9212572909915746, "grad_norm": 0.9616303443908691, "learning_rate": 9.102758267704736e-09, "loss": 0.0803, "step": 9015 }, { "epoch": 2.9215813350615685, "grad_norm": 0.9092485308647156, "learning_rate": 9.02834323920354e-09, "loss": 0.0712, "step": 9016 }, { "epoch": 2.921905379131562, "grad_norm": 0.838982105255127, "learning_rate": 8.954233081518438e-09, "loss": 0.072, "step": 9017 }, { "epoch": 2.9222294232015553, "grad_norm": 0.8191535472869873, "learning_rate": 8.880427803720226e-09, "loss": 0.0703, "step": 9018 }, { "epoch": 2.922553467271549, "grad_norm": 0.8949090838432312, "learning_rate": 8.806927414841959e-09, "loss": 0.0784, "step": 9019 }, { "epoch": 2.9228775113415426, "grad_norm": 0.9300609827041626, "learning_rate": 8.73373192387894e-09, "loss": 0.0806, "step": 9020 }, { "epoch": 2.923201555411536, "grad_norm": 0.8566632270812988, "learning_rate": 8.660841339789561e-09, "loss": 0.0694, "step": 9021 }, { "epoch": 2.9235255994815295, "grad_norm": 0.9081147313117981, "learning_rate": 8.58825567149557e-09, "loss": 0.0752, "step": 9022 }, { "epoch": 2.923849643551523, "grad_norm": 0.876440167427063, "learning_rate": 8.515974927880144e-09, "loss": 0.0728, "step": 9023 }, { "epoch": 2.9241736876215167, "grad_norm": 0.9899942278862, "learning_rate": 8.443999117790091e-09, "loss": 0.0767, "step": 9024 }, { "epoch": 2.92449773169151, "grad_norm": 0.8901786208152771, "learning_rate": 8.3723282500342e-09, "loss": 0.0758, "step": 9025 }, { "epoch": 2.9248217757615036, "grad_norm": 0.9070116281509399, "learning_rate": 8.3009623333849e-09, "loss": 0.0752, "step": 9026 }, { "epoch": 2.925145819831497, "grad_norm": 0.8795725107192993, "learning_rate": 8.229901376575755e-09, "loss": 0.0714, "step": 9027 }, { "epoch": 2.9254698639014904, "grad_norm": 0.8601377606391907, "learning_rate": 8.15914538830509e-09, "loss": 0.0753, "step": 9028 }, { "epoch": 2.9257939079714843, "grad_norm": 0.9134733080863953, "learning_rate": 8.088694377231532e-09, "loss": 0.0781, "step": 9029 }, { "epoch": 2.9261179520414777, "grad_norm": 0.9644197821617126, "learning_rate": 8.018548351978738e-09, "loss": 0.0774, "step": 9030 }, { "epoch": 2.926441996111471, "grad_norm": 0.8478583693504333, "learning_rate": 7.948707321130956e-09, "loss": 0.0697, "step": 9031 }, { "epoch": 2.9267660401814646, "grad_norm": 0.8698458671569824, "learning_rate": 7.879171293236621e-09, "loss": 0.073, "step": 9032 }, { "epoch": 2.927090084251458, "grad_norm": 0.8994610905647278, "learning_rate": 7.80994027680615e-09, "loss": 0.074, "step": 9033 }, { "epoch": 2.927414128321452, "grad_norm": 0.8590171337127686, "learning_rate": 7.741014280312765e-09, "loss": 0.0762, "step": 9034 }, { "epoch": 2.9277381723914453, "grad_norm": 0.8662635684013367, "learning_rate": 7.672393312192218e-09, "loss": 0.0708, "step": 9035 }, { "epoch": 2.9280622164614387, "grad_norm": 0.8634116649627686, "learning_rate": 7.604077380843067e-09, "loss": 0.0696, "step": 9036 }, { "epoch": 2.928386260531432, "grad_norm": 0.9314274787902832, "learning_rate": 7.536066494626681e-09, "loss": 0.0795, "step": 9037 }, { "epoch": 2.9287103046014256, "grad_norm": 0.9240031242370605, "learning_rate": 7.468360661866957e-09, "loss": 0.0763, "step": 9038 }, { "epoch": 2.9290343486714194, "grad_norm": 0.9062063694000244, "learning_rate": 7.400959890850046e-09, "loss": 0.0756, "step": 9039 }, { "epoch": 2.929358392741413, "grad_norm": 0.8826825022697449, "learning_rate": 7.333864189825735e-09, "loss": 0.0753, "step": 9040 }, { "epoch": 2.9296824368114063, "grad_norm": 0.8756586909294128, "learning_rate": 7.267073567005234e-09, "loss": 0.075, "step": 9041 }, { "epoch": 2.9300064808814, "grad_norm": 0.8725214004516602, "learning_rate": 7.2005880305636714e-09, "loss": 0.0749, "step": 9042 }, { "epoch": 2.930330524951393, "grad_norm": 0.893915057182312, "learning_rate": 7.134407588637871e-09, "loss": 0.0748, "step": 9043 }, { "epoch": 2.930654569021387, "grad_norm": 0.888831377029419, "learning_rate": 7.068532249327742e-09, "loss": 0.0767, "step": 9044 }, { "epoch": 2.9309786130913804, "grad_norm": 0.9237712025642395, "learning_rate": 7.002962020695725e-09, "loss": 0.0777, "step": 9045 }, { "epoch": 2.931302657161374, "grad_norm": 0.9445968866348267, "learning_rate": 6.937696910767067e-09, "loss": 0.0789, "step": 9046 }, { "epoch": 2.9316267012313677, "grad_norm": 0.9167876243591309, "learning_rate": 6.872736927529822e-09, "loss": 0.0734, "step": 9047 }, { "epoch": 2.931950745301361, "grad_norm": 0.8834000825881958, "learning_rate": 6.8080820789340195e-09, "loss": 0.0711, "step": 9048 }, { "epoch": 2.9322747893713546, "grad_norm": 0.820752739906311, "learning_rate": 6.743732372893053e-09, "loss": 0.0645, "step": 9049 }, { "epoch": 2.932598833441348, "grad_norm": 0.8337461948394775, "learning_rate": 6.679687817282843e-09, "loss": 0.0708, "step": 9050 }, { "epoch": 2.9329228775113414, "grad_norm": 0.825693666934967, "learning_rate": 6.615948419941565e-09, "loss": 0.0689, "step": 9051 }, { "epoch": 2.9332469215813353, "grad_norm": 0.9408130049705505, "learning_rate": 6.5525141886702005e-09, "loss": 0.0824, "step": 9052 }, { "epoch": 2.9335709656513287, "grad_norm": 0.9178377389907837, "learning_rate": 6.489385131232817e-09, "loss": 0.0797, "step": 9053 }, { "epoch": 2.933895009721322, "grad_norm": 0.8189285397529602, "learning_rate": 6.426561255355457e-09, "loss": 0.0652, "step": 9054 }, { "epoch": 2.9342190537913155, "grad_norm": 0.9435027241706848, "learning_rate": 6.364042568727524e-09, "loss": 0.0835, "step": 9055 }, { "epoch": 2.934543097861309, "grad_norm": 0.8830663561820984, "learning_rate": 6.301829079000399e-09, "loss": 0.0733, "step": 9056 }, { "epoch": 2.934867141931303, "grad_norm": 0.8811532258987427, "learning_rate": 6.239920793788546e-09, "loss": 0.0783, "step": 9057 }, { "epoch": 2.9351911860012962, "grad_norm": 0.8556127548217773, "learning_rate": 6.178317720668958e-09, "loss": 0.0755, "step": 9058 }, { "epoch": 2.9355152300712897, "grad_norm": 0.9733546376228333, "learning_rate": 6.117019867181162e-09, "loss": 0.0823, "step": 9059 }, { "epoch": 2.935839274141283, "grad_norm": 0.8635657429695129, "learning_rate": 6.056027240827489e-09, "loss": 0.0771, "step": 9060 }, { "epoch": 2.9361633182112765, "grad_norm": 0.8800527453422546, "learning_rate": 5.995339849073079e-09, "loss": 0.0762, "step": 9061 }, { "epoch": 2.9364873622812704, "grad_norm": 0.9648496508598328, "learning_rate": 5.9349576993447675e-09, "loss": 0.0749, "step": 9062 }, { "epoch": 2.936811406351264, "grad_norm": 0.9104852080345154, "learning_rate": 5.874880799033589e-09, "loss": 0.077, "step": 9063 }, { "epoch": 2.9371354504212572, "grad_norm": 1.2932277917861938, "learning_rate": 5.815109155491716e-09, "loss": 0.08, "step": 9064 }, { "epoch": 2.9374594944912507, "grad_norm": 0.9217556715011597, "learning_rate": 5.755642776035242e-09, "loss": 0.0768, "step": 9065 }, { "epoch": 2.937783538561244, "grad_norm": 0.9468752145767212, "learning_rate": 5.696481667941678e-09, "loss": 0.0814, "step": 9066 }, { "epoch": 2.938107582631238, "grad_norm": 0.8518416881561279, "learning_rate": 5.637625838452176e-09, "loss": 0.072, "step": 9067 }, { "epoch": 2.9384316267012314, "grad_norm": 0.884737491607666, "learning_rate": 5.579075294769864e-09, "loss": 0.0755, "step": 9068 }, { "epoch": 2.938755670771225, "grad_norm": 0.8714679479598999, "learning_rate": 5.520830044060677e-09, "loss": 0.0683, "step": 9069 }, { "epoch": 2.9390797148412187, "grad_norm": 0.8819865584373474, "learning_rate": 5.46289009345391e-09, "loss": 0.0747, "step": 9070 }, { "epoch": 2.939403758911212, "grad_norm": 0.9256206750869751, "learning_rate": 5.405255450040003e-09, "loss": 0.0767, "step": 9071 }, { "epoch": 2.9397278029812055, "grad_norm": 0.9435095191001892, "learning_rate": 5.347926120873592e-09, "loss": 0.0751, "step": 9072 }, { "epoch": 2.940051847051199, "grad_norm": 0.9027978181838989, "learning_rate": 5.290902112970731e-09, "loss": 0.0725, "step": 9073 }, { "epoch": 2.9403758911211924, "grad_norm": 0.9899285435676575, "learning_rate": 5.234183433310835e-09, "loss": 0.079, "step": 9074 }, { "epoch": 2.940699935191186, "grad_norm": 0.9293980598449707, "learning_rate": 5.177770088835854e-09, "loss": 0.0799, "step": 9075 }, { "epoch": 2.9410239792611796, "grad_norm": 0.873587429523468, "learning_rate": 5.121662086449708e-09, "loss": 0.0732, "step": 9076 }, { "epoch": 2.941348023331173, "grad_norm": 0.8625730276107788, "learning_rate": 5.065859433019959e-09, "loss": 0.0709, "step": 9077 }, { "epoch": 2.9416720674011665, "grad_norm": 0.9578263163566589, "learning_rate": 5.010362135376423e-09, "loss": 0.0792, "step": 9078 }, { "epoch": 2.94199611147116, "grad_norm": 0.86197429895401, "learning_rate": 4.955170200310888e-09, "loss": 0.0748, "step": 9079 }, { "epoch": 2.942320155541154, "grad_norm": 0.8910972476005554, "learning_rate": 4.9002836345787845e-09, "loss": 0.0769, "step": 9080 }, { "epoch": 2.942644199611147, "grad_norm": 0.9671840071678162, "learning_rate": 4.845702444897515e-09, "loss": 0.0748, "step": 9081 }, { "epoch": 2.9429682436811406, "grad_norm": 0.950499951839447, "learning_rate": 4.791426637947294e-09, "loss": 0.0832, "step": 9082 }, { "epoch": 2.943292287751134, "grad_norm": 0.8624812364578247, "learning_rate": 4.7374562203708615e-09, "loss": 0.074, "step": 9083 }, { "epoch": 2.9436163318211275, "grad_norm": 0.8232616782188416, "learning_rate": 4.683791198773768e-09, "loss": 0.0725, "step": 9084 }, { "epoch": 2.9439403758911213, "grad_norm": 0.8955888748168945, "learning_rate": 4.630431579724371e-09, "loss": 0.0796, "step": 9085 }, { "epoch": 2.9442644199611148, "grad_norm": 0.9281444549560547, "learning_rate": 4.577377369752722e-09, "loss": 0.0725, "step": 9086 }, { "epoch": 2.944588464031108, "grad_norm": 0.9185742735862732, "learning_rate": 4.524628575352796e-09, "loss": 0.0808, "step": 9087 }, { "epoch": 2.9449125081011016, "grad_norm": 0.8552374839782715, "learning_rate": 4.472185202980261e-09, "loss": 0.0713, "step": 9088 }, { "epoch": 2.945236552171095, "grad_norm": 0.9765703678131104, "learning_rate": 4.420047259053595e-09, "loss": 0.0811, "step": 9089 }, { "epoch": 2.945560596241089, "grad_norm": 0.8993111848831177, "learning_rate": 4.36821474995408e-09, "loss": 0.0735, "step": 9090 }, { "epoch": 2.9458846403110823, "grad_norm": 0.8508222103118896, "learning_rate": 4.316687682025256e-09, "loss": 0.0722, "step": 9091 }, { "epoch": 2.9462086843810757, "grad_norm": 0.8553758263587952, "learning_rate": 4.26546606157402e-09, "loss": 0.0702, "step": 9092 }, { "epoch": 2.9465327284510696, "grad_norm": 0.8335778713226318, "learning_rate": 4.2145498948692465e-09, "loss": 0.0704, "step": 9093 }, { "epoch": 2.9468567725210626, "grad_norm": 0.8840744495391846, "learning_rate": 4.163939188142341e-09, "loss": 0.0749, "step": 9094 }, { "epoch": 2.9471808165910565, "grad_norm": 0.7947471737861633, "learning_rate": 4.113633947587792e-09, "loss": 0.0645, "step": 9095 }, { "epoch": 2.94750486066105, "grad_norm": 0.9484292268753052, "learning_rate": 4.063634179362341e-09, "loss": 0.0805, "step": 9096 }, { "epoch": 2.9478289047310433, "grad_norm": 0.9598378539085388, "learning_rate": 4.013939889585538e-09, "loss": 0.085, "step": 9097 }, { "epoch": 2.948152948801037, "grad_norm": 0.8770298361778259, "learning_rate": 3.964551084339463e-09, "loss": 0.0748, "step": 9098 }, { "epoch": 2.9484769928710306, "grad_norm": 0.8788001537322998, "learning_rate": 3.915467769668724e-09, "loss": 0.0736, "step": 9099 }, { "epoch": 2.948801036941024, "grad_norm": 0.8909417390823364, "learning_rate": 3.866689951580738e-09, "loss": 0.0748, "step": 9100 }, { "epoch": 2.9491250810110174, "grad_norm": 0.8614071011543274, "learning_rate": 3.818217636045729e-09, "loss": 0.0742, "step": 9101 }, { "epoch": 2.949449125081011, "grad_norm": 0.8587439060211182, "learning_rate": 3.770050828995897e-09, "loss": 0.0741, "step": 9102 }, { "epoch": 2.9497731691510047, "grad_norm": 0.9640584588050842, "learning_rate": 3.7221895363262485e-09, "loss": 0.0776, "step": 9103 }, { "epoch": 2.950097213220998, "grad_norm": 0.893592119216919, "learning_rate": 3.674633763894875e-09, "loss": 0.0729, "step": 9104 }, { "epoch": 2.9504212572909916, "grad_norm": 0.9449043869972229, "learning_rate": 3.6273835175221204e-09, "loss": 0.0827, "step": 9105 }, { "epoch": 2.950745301360985, "grad_norm": 0.8701707124710083, "learning_rate": 3.58043880299086e-09, "loss": 0.0779, "step": 9106 }, { "epoch": 2.9510693454309784, "grad_norm": 0.890649676322937, "learning_rate": 3.533799626046497e-09, "loss": 0.0773, "step": 9107 }, { "epoch": 2.9513933895009723, "grad_norm": 0.9598814249038696, "learning_rate": 3.487465992397521e-09, "loss": 0.0737, "step": 9108 }, { "epoch": 2.9517174335709657, "grad_norm": 0.8655813932418823, "learning_rate": 3.4414379077146733e-09, "loss": 0.0735, "step": 9109 }, { "epoch": 2.952041477640959, "grad_norm": 0.8902621865272522, "learning_rate": 3.3957153776312257e-09, "loss": 0.0765, "step": 9110 }, { "epoch": 2.9523655217109526, "grad_norm": 0.957643985748291, "learning_rate": 3.3502984077429803e-09, "loss": 0.077, "step": 9111 }, { "epoch": 2.952689565780946, "grad_norm": 0.9296534657478333, "learning_rate": 3.3051870036091004e-09, "loss": 0.0731, "step": 9112 }, { "epoch": 2.95301360985094, "grad_norm": 0.8171263933181763, "learning_rate": 3.260381170750171e-09, "loss": 0.069, "step": 9113 }, { "epoch": 2.9533376539209333, "grad_norm": 0.918368935585022, "learning_rate": 3.215880914650693e-09, "loss": 0.0774, "step": 9114 }, { "epoch": 2.9536616979909267, "grad_norm": 0.9412216544151306, "learning_rate": 3.171686240756033e-09, "loss": 0.0747, "step": 9115 }, { "epoch": 2.95398574206092, "grad_norm": 0.8553744554519653, "learning_rate": 3.1277971544763088e-09, "loss": 0.0745, "step": 9116 }, { "epoch": 2.9543097861309136, "grad_norm": 0.9307593703269958, "learning_rate": 3.0842136611825004e-09, "loss": 0.0809, "step": 9117 }, { "epoch": 2.9546338302009074, "grad_norm": 0.8458048701286316, "learning_rate": 3.0409357662086748e-09, "loss": 0.0703, "step": 9118 }, { "epoch": 2.954957874270901, "grad_norm": 0.8757748007774353, "learning_rate": 2.997963474852261e-09, "loss": 0.0726, "step": 9119 }, { "epoch": 2.9552819183408943, "grad_norm": 0.8589341640472412, "learning_rate": 2.9552967923721086e-09, "loss": 0.0681, "step": 9120 }, { "epoch": 2.955605962410888, "grad_norm": 0.8835259079933167, "learning_rate": 2.9129357239901514e-09, "loss": 0.0714, "step": 9121 }, { "epoch": 2.9559300064808816, "grad_norm": 0.962489128112793, "learning_rate": 2.8708802748914077e-09, "loss": 0.0749, "step": 9122 }, { "epoch": 2.956254050550875, "grad_norm": 0.9141943454742432, "learning_rate": 2.829130450222872e-09, "loss": 0.0805, "step": 9123 }, { "epoch": 2.9565780946208684, "grad_norm": 0.8826735019683838, "learning_rate": 2.7876862550940685e-09, "loss": 0.0726, "step": 9124 }, { "epoch": 2.956902138690862, "grad_norm": 0.9047395586967468, "learning_rate": 2.7465476945778835e-09, "loss": 0.0787, "step": 9125 }, { "epoch": 2.9572261827608557, "grad_norm": 0.8255360126495361, "learning_rate": 2.705714773708623e-09, "loss": 0.0691, "step": 9126 }, { "epoch": 2.957550226830849, "grad_norm": 0.8987961411476135, "learning_rate": 2.6651874974845115e-09, "loss": 0.0771, "step": 9127 }, { "epoch": 2.9578742709008425, "grad_norm": 0.9574360847473145, "learning_rate": 2.6249658708651928e-09, "loss": 0.0798, "step": 9128 }, { "epoch": 2.958198314970836, "grad_norm": 0.9309230446815491, "learning_rate": 2.5850498987733952e-09, "loss": 0.078, "step": 9129 }, { "epoch": 2.9585223590408294, "grad_norm": 0.8887049555778503, "learning_rate": 2.545439586094933e-09, "loss": 0.0735, "step": 9130 }, { "epoch": 2.9588464031108233, "grad_norm": 0.8663487434387207, "learning_rate": 2.506134937677318e-09, "loss": 0.0737, "step": 9131 }, { "epoch": 2.9591704471808167, "grad_norm": 0.9009921550750732, "learning_rate": 2.4671359583314237e-09, "loss": 0.0763, "step": 9132 }, { "epoch": 2.95949449125081, "grad_norm": 0.8658737540245056, "learning_rate": 2.4284426528298212e-09, "loss": 0.0745, "step": 9133 }, { "epoch": 2.9598185353208035, "grad_norm": 0.8811216354370117, "learning_rate": 2.3900550259084445e-09, "loss": 0.0735, "step": 9134 }, { "epoch": 2.960142579390797, "grad_norm": 0.9001662731170654, "learning_rate": 2.351973082265757e-09, "loss": 0.0747, "step": 9135 }, { "epoch": 2.960466623460791, "grad_norm": 0.8873869776725769, "learning_rate": 2.314196826562476e-09, "loss": 0.0767, "step": 9136 }, { "epoch": 2.9607906675307842, "grad_norm": 0.8550229072570801, "learning_rate": 2.2767262634218466e-09, "loss": 0.0712, "step": 9137 }, { "epoch": 2.9611147116007777, "grad_norm": 0.8947347402572632, "learning_rate": 2.239561397430201e-09, "loss": 0.0723, "step": 9138 }, { "epoch": 2.961438755670771, "grad_norm": 0.8800020217895508, "learning_rate": 2.2027022331361226e-09, "loss": 0.075, "step": 9139 }, { "epoch": 2.9617627997407645, "grad_norm": 0.9262352585792542, "learning_rate": 2.1661487750504473e-09, "loss": 0.0776, "step": 9140 }, { "epoch": 2.9620868438107584, "grad_norm": 0.8233925700187683, "learning_rate": 2.129901027647652e-09, "loss": 0.07, "step": 9141 }, { "epoch": 2.962410887880752, "grad_norm": 1.0221971273422241, "learning_rate": 2.0939589953633542e-09, "loss": 0.0802, "step": 9142 }, { "epoch": 2.962734931950745, "grad_norm": 0.9081821441650391, "learning_rate": 2.0583226825970915e-09, "loss": 0.0765, "step": 9143 }, { "epoch": 2.963058976020739, "grad_norm": 0.8890132904052734, "learning_rate": 2.022992093710097e-09, "loss": 0.0722, "step": 9144 }, { "epoch": 2.963383020090732, "grad_norm": 1.019970178604126, "learning_rate": 1.9879672330266886e-09, "loss": 0.0771, "step": 9145 }, { "epoch": 2.963707064160726, "grad_norm": 0.9550405144691467, "learning_rate": 1.9532481048334383e-09, "loss": 0.0753, "step": 9146 }, { "epoch": 2.9640311082307194, "grad_norm": 0.8712491989135742, "learning_rate": 1.918834713379447e-09, "loss": 0.0777, "step": 9147 }, { "epoch": 2.964355152300713, "grad_norm": 0.9841358065605164, "learning_rate": 1.884727062876901e-09, "loss": 0.0801, "step": 9148 }, { "epoch": 2.9646791963707066, "grad_norm": 0.8419924378395081, "learning_rate": 1.8509251575002386e-09, "loss": 0.0753, "step": 9149 }, { "epoch": 2.9650032404407, "grad_norm": 0.8987525701522827, "learning_rate": 1.8174290013864282e-09, "loss": 0.0763, "step": 9150 }, { "epoch": 2.9653272845106935, "grad_norm": 0.9673410654067993, "learning_rate": 1.784238598634691e-09, "loss": 0.0864, "step": 9151 }, { "epoch": 2.965651328580687, "grad_norm": 0.8701542615890503, "learning_rate": 1.7513539533078882e-09, "loss": 0.0739, "step": 9152 }, { "epoch": 2.9659753726506803, "grad_norm": 0.8537030816078186, "learning_rate": 1.7187750694303007e-09, "loss": 0.0737, "step": 9153 }, { "epoch": 2.966299416720674, "grad_norm": 0.8703850507736206, "learning_rate": 1.686501950989572e-09, "loss": 0.0735, "step": 9154 }, { "epoch": 2.9666234607906676, "grad_norm": 0.909617006778717, "learning_rate": 1.6545346019350427e-09, "loss": 0.0799, "step": 9155 }, { "epoch": 2.966947504860661, "grad_norm": 0.8157674074172974, "learning_rate": 1.6228730261799718e-09, "loss": 0.0682, "step": 9156 }, { "epoch": 2.9672715489306545, "grad_norm": 1.0222302675247192, "learning_rate": 1.5915172275990375e-09, "loss": 0.0821, "step": 9157 }, { "epoch": 2.967595593000648, "grad_norm": 0.9033092260360718, "learning_rate": 1.5604672100297258e-09, "loss": 0.0754, "step": 9158 }, { "epoch": 2.9679196370706418, "grad_norm": 0.9216779470443726, "learning_rate": 1.5297229772726075e-09, "loss": 0.0773, "step": 9159 }, { "epoch": 2.968243681140635, "grad_norm": 0.8437354564666748, "learning_rate": 1.499284533090506e-09, "loss": 0.0717, "step": 9160 }, { "epoch": 2.9685677252106286, "grad_norm": 0.9266118407249451, "learning_rate": 1.469151881208497e-09, "loss": 0.0744, "step": 9161 }, { "epoch": 2.968891769280622, "grad_norm": 0.8228974342346191, "learning_rate": 1.4393250253144642e-09, "loss": 0.0707, "step": 9162 }, { "epoch": 2.9692158133506155, "grad_norm": 0.9221566319465637, "learning_rate": 1.4098039690593756e-09, "loss": 0.0741, "step": 9163 }, { "epoch": 2.9695398574206093, "grad_norm": 0.8759603500366211, "learning_rate": 1.3805887160558973e-09, "loss": 0.0726, "step": 9164 }, { "epoch": 2.9698639014906028, "grad_norm": 0.8887774348258972, "learning_rate": 1.3516792698797797e-09, "loss": 0.077, "step": 9165 }, { "epoch": 2.970187945560596, "grad_norm": 0.8909733891487122, "learning_rate": 1.323075634069304e-09, "loss": 0.076, "step": 9166 }, { "epoch": 2.9705119896305896, "grad_norm": 0.8457849621772766, "learning_rate": 1.2947778121255584e-09, "loss": 0.0691, "step": 9167 }, { "epoch": 2.970836033700583, "grad_norm": 0.9030209183692932, "learning_rate": 1.2667858075113281e-09, "loss": 0.0733, "step": 9168 }, { "epoch": 2.971160077770577, "grad_norm": 0.8783456683158875, "learning_rate": 1.239099623653317e-09, "loss": 0.0711, "step": 9169 }, { "epoch": 2.9714841218405703, "grad_norm": 0.9108428359031677, "learning_rate": 1.2117192639393704e-09, "loss": 0.0746, "step": 9170 }, { "epoch": 2.9718081659105637, "grad_norm": 1.0172836780548096, "learning_rate": 1.1846447317206967e-09, "loss": 0.0755, "step": 9171 }, { "epoch": 2.9721322099805576, "grad_norm": 0.849394679069519, "learning_rate": 1.1578760303113113e-09, "loss": 0.0711, "step": 9172 }, { "epoch": 2.972456254050551, "grad_norm": 0.9604840874671936, "learning_rate": 1.131413162987205e-09, "loss": 0.0752, "step": 9173 }, { "epoch": 2.9727802981205445, "grad_norm": 0.8981707096099854, "learning_rate": 1.1052561329871757e-09, "loss": 0.0768, "step": 9174 }, { "epoch": 2.973104342190538, "grad_norm": 0.9010798931121826, "learning_rate": 1.0794049435128296e-09, "loss": 0.0777, "step": 9175 }, { "epoch": 2.9734283862605313, "grad_norm": 0.9054232239723206, "learning_rate": 1.0538595977277466e-09, "loss": 0.0755, "step": 9176 }, { "epoch": 2.973752430330525, "grad_norm": 0.9445099830627441, "learning_rate": 1.028620098758315e-09, "loss": 0.0802, "step": 9177 }, { "epoch": 2.9740764744005186, "grad_norm": 0.8789932727813721, "learning_rate": 1.0036864496942856e-09, "loss": 0.0764, "step": 9178 }, { "epoch": 2.974400518470512, "grad_norm": 0.8579067587852478, "learning_rate": 9.79058653586551e-10, "loss": 0.0727, "step": 9179 }, { "epoch": 2.9747245625405054, "grad_norm": 0.7822898626327515, "learning_rate": 9.54736713449922e-10, "loss": 0.0699, "step": 9180 }, { "epoch": 2.975048606610499, "grad_norm": 0.8219559788703918, "learning_rate": 9.307206322606288e-10, "loss": 0.0704, "step": 9181 }, { "epoch": 2.9753726506804927, "grad_norm": 0.9843807816505432, "learning_rate": 9.070104129582647e-10, "loss": 0.0795, "step": 9182 }, { "epoch": 2.975696694750486, "grad_norm": 0.8420466184616089, "learning_rate": 8.836060584449524e-10, "loss": 0.0702, "step": 9183 }, { "epoch": 2.9760207388204796, "grad_norm": 0.8671947121620178, "learning_rate": 8.6050757158479e-10, "loss": 0.0741, "step": 9184 }, { "epoch": 2.976344782890473, "grad_norm": 0.8479039072990417, "learning_rate": 8.377149552049602e-10, "loss": 0.0718, "step": 9185 }, { "epoch": 2.9766688269604664, "grad_norm": 0.9086983799934387, "learning_rate": 8.15228212095176e-10, "loss": 0.0828, "step": 9186 }, { "epoch": 2.9769928710304603, "grad_norm": 0.8832724690437317, "learning_rate": 7.930473450074028e-10, "loss": 0.0757, "step": 9187 }, { "epoch": 2.9773169151004537, "grad_norm": 0.9127610325813293, "learning_rate": 7.711723566564133e-10, "loss": 0.0773, "step": 9188 }, { "epoch": 2.977640959170447, "grad_norm": 0.8464711904525757, "learning_rate": 7.496032497195105e-10, "loss": 0.0687, "step": 9189 }, { "epoch": 2.9779650032404406, "grad_norm": 0.8912038803100586, "learning_rate": 7.283400268365271e-10, "loss": 0.074, "step": 9190 }, { "epoch": 2.978289047310434, "grad_norm": 0.8227494359016418, "learning_rate": 7.073826906098258e-10, "loss": 0.0739, "step": 9191 }, { "epoch": 2.978613091380428, "grad_norm": 0.8756099939346313, "learning_rate": 6.867312436045769e-10, "loss": 0.0773, "step": 9192 }, { "epoch": 2.9789371354504213, "grad_norm": 0.8831959962844849, "learning_rate": 6.663856883482034e-10, "loss": 0.0825, "step": 9193 }, { "epoch": 2.9792611795204147, "grad_norm": 0.8920246958732605, "learning_rate": 6.463460273306577e-10, "loss": 0.0719, "step": 9194 }, { "epoch": 2.9795852235904086, "grad_norm": 0.8495813012123108, "learning_rate": 6.266122630049776e-10, "loss": 0.072, "step": 9195 }, { "epoch": 2.9799092676604015, "grad_norm": 0.9280455112457275, "learning_rate": 6.071843977861758e-10, "loss": 0.0774, "step": 9196 }, { "epoch": 2.9802333117303954, "grad_norm": 0.8548804521560669, "learning_rate": 5.880624340517948e-10, "loss": 0.0727, "step": 9197 }, { "epoch": 2.980557355800389, "grad_norm": 0.8589762449264526, "learning_rate": 5.692463741424625e-10, "loss": 0.0755, "step": 9198 }, { "epoch": 2.9808813998703823, "grad_norm": 0.9303783178329468, "learning_rate": 5.507362203607814e-10, "loss": 0.0791, "step": 9199 }, { "epoch": 2.981205443940376, "grad_norm": 0.9959030151367188, "learning_rate": 5.325319749727165e-10, "loss": 0.0807, "step": 9200 }, { "epoch": 2.9815294880103695, "grad_norm": 0.8675582408905029, "learning_rate": 5.146336402059304e-10, "loss": 0.0704, "step": 9201 }, { "epoch": 2.981853532080363, "grad_norm": 0.9374891519546509, "learning_rate": 4.970412182511708e-10, "loss": 0.0801, "step": 9202 }, { "epoch": 2.9821775761503564, "grad_norm": 0.931382417678833, "learning_rate": 4.797547112614376e-10, "loss": 0.0776, "step": 9203 }, { "epoch": 2.98250162022035, "grad_norm": 0.9373607635498047, "learning_rate": 4.627741213525383e-10, "loss": 0.0757, "step": 9204 }, { "epoch": 2.9828256642903437, "grad_norm": 0.9142207503318787, "learning_rate": 4.460994506028105e-10, "loss": 0.0796, "step": 9205 }, { "epoch": 2.983149708360337, "grad_norm": 0.9252228736877441, "learning_rate": 4.2973070105256643e-10, "loss": 0.0803, "step": 9206 }, { "epoch": 2.9834737524303305, "grad_norm": 0.8329778909683228, "learning_rate": 4.136678747060363e-10, "loss": 0.0701, "step": 9207 }, { "epoch": 2.983797796500324, "grad_norm": 0.8641289472579956, "learning_rate": 3.9791097352831487e-10, "loss": 0.0733, "step": 9208 }, { "epoch": 2.9841218405703174, "grad_norm": 0.9849048852920532, "learning_rate": 3.824599994484146e-10, "loss": 0.0825, "step": 9209 }, { "epoch": 2.9844458846403112, "grad_norm": 0.943912148475647, "learning_rate": 3.673149543573229e-10, "loss": 0.0816, "step": 9210 }, { "epoch": 2.9847699287103047, "grad_norm": 0.8786362409591675, "learning_rate": 3.5247584010827953e-10, "loss": 0.0729, "step": 9211 }, { "epoch": 2.985093972780298, "grad_norm": 0.8849793672561646, "learning_rate": 3.3794265851816444e-10, "loss": 0.0729, "step": 9212 }, { "epoch": 2.9854180168502915, "grad_norm": 0.8750109672546387, "learning_rate": 3.237154113649998e-10, "loss": 0.0716, "step": 9213 }, { "epoch": 2.985742060920285, "grad_norm": 0.9164808392524719, "learning_rate": 3.0979410039017053e-10, "loss": 0.079, "step": 9214 }, { "epoch": 2.986066104990279, "grad_norm": 0.8990651965141296, "learning_rate": 2.961787272978689e-10, "loss": 0.0741, "step": 9215 }, { "epoch": 2.9863901490602722, "grad_norm": 0.8636859059333801, "learning_rate": 2.828692937542621e-10, "loss": 0.0776, "step": 9216 }, { "epoch": 2.9867141931302656, "grad_norm": 0.9257625937461853, "learning_rate": 2.6986580138832487e-10, "loss": 0.0764, "step": 9217 }, { "epoch": 2.987038237200259, "grad_norm": 0.8682085275650024, "learning_rate": 2.571682517915619e-10, "loss": 0.0687, "step": 9218 }, { "epoch": 2.9873622812702525, "grad_norm": 0.8867075443267822, "learning_rate": 2.447766465180079e-10, "loss": 0.0683, "step": 9219 }, { "epoch": 2.9876863253402464, "grad_norm": 0.8165310025215149, "learning_rate": 2.3269098708422754e-10, "loss": 0.0705, "step": 9220 }, { "epoch": 2.98801036941024, "grad_norm": 0.8468376398086548, "learning_rate": 2.2091127496959298e-10, "loss": 0.0734, "step": 9221 }, { "epoch": 2.988334413480233, "grad_norm": 0.9028320908546448, "learning_rate": 2.0943751161545122e-10, "loss": 0.0772, "step": 9222 }, { "epoch": 2.988658457550227, "grad_norm": 0.8585754632949829, "learning_rate": 1.98269698426512e-10, "loss": 0.0772, "step": 9223 }, { "epoch": 2.9889825016202205, "grad_norm": 0.8857394456863403, "learning_rate": 1.8740783676945984e-10, "loss": 0.0774, "step": 9224 }, { "epoch": 2.989306545690214, "grad_norm": 0.9060828685760498, "learning_rate": 1.768519279732317e-10, "loss": 0.0781, "step": 9225 }, { "epoch": 2.9896305897602073, "grad_norm": 0.8916698694229126, "learning_rate": 1.666019733306823e-10, "loss": 0.0747, "step": 9226 }, { "epoch": 2.9899546338302008, "grad_norm": 0.9118140935897827, "learning_rate": 1.5665797409553097e-10, "loss": 0.0743, "step": 9227 }, { "epoch": 2.9902786779001946, "grad_norm": 0.8600789904594421, "learning_rate": 1.4701993148485972e-10, "loss": 0.0761, "step": 9228 }, { "epoch": 2.990602721970188, "grad_norm": 0.8507561087608337, "learning_rate": 1.3768784667883562e-10, "loss": 0.0763, "step": 9229 }, { "epoch": 2.9909267660401815, "grad_norm": 0.922105610370636, "learning_rate": 1.2866172081904548e-10, "loss": 0.0756, "step": 9230 }, { "epoch": 2.991250810110175, "grad_norm": 0.8797658085823059, "learning_rate": 1.1994155501071636e-10, "loss": 0.0753, "step": 9231 }, { "epoch": 2.9915748541801683, "grad_norm": 0.9012943506240845, "learning_rate": 1.1152735032077255e-10, "loss": 0.0758, "step": 9232 }, { "epoch": 2.991898898250162, "grad_norm": 0.8567914962768555, "learning_rate": 1.0341910777894593e-10, "loss": 0.0726, "step": 9233 }, { "epoch": 2.9922229423201556, "grad_norm": 0.8917068839073181, "learning_rate": 9.561682837777586e-11, "loss": 0.073, "step": 9234 }, { "epoch": 2.992546986390149, "grad_norm": 0.9291151165962219, "learning_rate": 8.812051307205416e-11, "loss": 0.079, "step": 9235 }, { "epoch": 2.9928710304601425, "grad_norm": 0.9489535689353943, "learning_rate": 8.093016277938015e-11, "loss": 0.0809, "step": 9236 }, { "epoch": 2.993195074530136, "grad_norm": 0.8829405903816223, "learning_rate": 7.404577837988313e-11, "loss": 0.0781, "step": 9237 }, { "epoch": 2.9935191186001298, "grad_norm": 0.905556321144104, "learning_rate": 6.746736071594484e-11, "loss": 0.0801, "step": 9238 }, { "epoch": 2.993843162670123, "grad_norm": 0.8819782137870789, "learning_rate": 6.119491059303206e-11, "loss": 0.0757, "step": 9239 }, { "epoch": 2.9941672067401166, "grad_norm": 0.8596265912055969, "learning_rate": 5.522842877830892e-11, "loss": 0.0718, "step": 9240 }, { "epoch": 2.99449125081011, "grad_norm": 0.8243787288665771, "learning_rate": 4.956791600230215e-11, "loss": 0.0697, "step": 9241 }, { "epoch": 2.9948152948801035, "grad_norm": 0.8741186857223511, "learning_rate": 4.4213372957790935e-11, "loss": 0.0742, "step": 9242 }, { "epoch": 2.9951393389500973, "grad_norm": 0.9056291580200195, "learning_rate": 3.9164800300084404e-11, "loss": 0.0766, "step": 9243 }, { "epoch": 2.9954633830200907, "grad_norm": 0.9046027660369873, "learning_rate": 3.442219864729923e-11, "loss": 0.0743, "step": 9244 }, { "epoch": 2.995787427090084, "grad_norm": 0.9297699332237244, "learning_rate": 2.998556857952695e-11, "loss": 0.0782, "step": 9245 }, { "epoch": 2.996111471160078, "grad_norm": 0.8244128823280334, "learning_rate": 2.5854910639944165e-11, "loss": 0.0684, "step": 9246 }, { "epoch": 2.9964355152300715, "grad_norm": 0.8610013723373413, "learning_rate": 2.203022533425747e-11, "loss": 0.0778, "step": 9247 }, { "epoch": 2.996759559300065, "grad_norm": 0.9346343278884888, "learning_rate": 1.8511513130148317e-11, "loss": 0.0835, "step": 9248 }, { "epoch": 2.9970836033700583, "grad_norm": 0.8872416019439697, "learning_rate": 1.529877445866079e-11, "loss": 0.0726, "step": 9249 }, { "epoch": 2.9974076474400517, "grad_norm": 0.9257415533065796, "learning_rate": 1.2392009713091402e-11, "loss": 0.0748, "step": 9250 }, { "epoch": 2.9977316915100456, "grad_norm": 0.9216912984848022, "learning_rate": 9.791219248711515e-12, "loss": 0.075, "step": 9251 }, { "epoch": 2.998055735580039, "grad_norm": 0.9601297378540039, "learning_rate": 7.496403384155137e-12, "loss": 0.0854, "step": 9252 }, { "epoch": 2.9983797796500324, "grad_norm": 0.9359740614891052, "learning_rate": 5.507562400308697e-12, "loss": 0.0747, "step": 9253 }, { "epoch": 2.998703823720026, "grad_norm": 0.8407431840896606, "learning_rate": 3.8246965403110344e-12, "loss": 0.067, "step": 9254 }, { "epoch": 2.9990278677900193, "grad_norm": 0.8539666533470154, "learning_rate": 2.4478060103860777e-12, "loss": 0.0713, "step": 9255 }, { "epoch": 2.999351911860013, "grad_norm": 0.881554365158081, "learning_rate": 1.376890979287726e-12, "loss": 0.0703, "step": 9256 }, { "epoch": 2.9996759559300066, "grad_norm": 0.9617207646369934, "learning_rate": 6.119515774671847e-13, "loss": 0.0793, "step": 9257 }, { "epoch": 3.0, "grad_norm": 0.8892857432365417, "learning_rate": 1.529878990158551e-13, "loss": 0.076, "step": 9258 } ], "logging_steps": 1.0, "max_steps": 9258, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.876319338303901e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }