| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 1004, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00099601593625498, | |
| "grad_norm": 0.4463295638561249, | |
| "learning_rate": 9.900990099009901e-08, | |
| "loss": 2.6197, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.00199203187250996, | |
| "grad_norm": 0.8444207906723022, | |
| "learning_rate": 1.9801980198019803e-07, | |
| "loss": 2.7588, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.00298804780876494, | |
| "grad_norm": 0.6591606140136719, | |
| "learning_rate": 2.9702970297029703e-07, | |
| "loss": 2.9263, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.00398406374501992, | |
| "grad_norm": 0.5656299591064453, | |
| "learning_rate": 3.9603960396039606e-07, | |
| "loss": 2.4296, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0049800796812749, | |
| "grad_norm": 0.5051721930503845, | |
| "learning_rate": 4.950495049504951e-07, | |
| "loss": 2.3939, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.00597609561752988, | |
| "grad_norm": 0.7762399911880493, | |
| "learning_rate": 5.940594059405941e-07, | |
| "loss": 2.6638, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0069721115537848604, | |
| "grad_norm": 0.5301679968833923, | |
| "learning_rate": 6.930693069306931e-07, | |
| "loss": 2.2199, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.00796812749003984, | |
| "grad_norm": 0.6617525219917297, | |
| "learning_rate": 7.920792079207921e-07, | |
| "loss": 2.8019, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.008964143426294821, | |
| "grad_norm": 0.7944237589836121, | |
| "learning_rate": 8.910891089108911e-07, | |
| "loss": 2.7146, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.0099601593625498, | |
| "grad_norm": 0.6918312907218933, | |
| "learning_rate": 9.900990099009902e-07, | |
| "loss": 2.6128, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.010956175298804782, | |
| "grad_norm": 0.550072193145752, | |
| "learning_rate": 1.0891089108910893e-06, | |
| "loss": 2.5252, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.01195219123505976, | |
| "grad_norm": 0.4420550763607025, | |
| "learning_rate": 1.1881188118811881e-06, | |
| "loss": 2.6964, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.012948207171314742, | |
| "grad_norm": 0.4683515429496765, | |
| "learning_rate": 1.2871287128712872e-06, | |
| "loss": 2.6433, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.013944223107569721, | |
| "grad_norm": 0.5689812898635864, | |
| "learning_rate": 1.3861386138613863e-06, | |
| "loss": 2.3309, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.014940239043824702, | |
| "grad_norm": 0.5711223483085632, | |
| "learning_rate": 1.4851485148514852e-06, | |
| "loss": 2.4396, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.01593625498007968, | |
| "grad_norm": 0.4562544822692871, | |
| "learning_rate": 1.5841584158415842e-06, | |
| "loss": 2.234, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.01693227091633466, | |
| "grad_norm": 0.33882570266723633, | |
| "learning_rate": 1.6831683168316833e-06, | |
| "loss": 2.5468, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.017928286852589643, | |
| "grad_norm": 0.46446338295936584, | |
| "learning_rate": 1.7821782178217822e-06, | |
| "loss": 2.6143, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.018924302788844622, | |
| "grad_norm": 0.625619649887085, | |
| "learning_rate": 1.8811881188118813e-06, | |
| "loss": 2.5565, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.0199203187250996, | |
| "grad_norm": 0.5139931440353394, | |
| "learning_rate": 1.9801980198019803e-06, | |
| "loss": 2.8371, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02091633466135458, | |
| "grad_norm": 0.45826011896133423, | |
| "learning_rate": 2.0792079207920794e-06, | |
| "loss": 2.5883, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.021912350597609563, | |
| "grad_norm": 0.5945838093757629, | |
| "learning_rate": 2.1782178217821785e-06, | |
| "loss": 2.6956, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.022908366533864542, | |
| "grad_norm": 0.6705940961837769, | |
| "learning_rate": 2.2772277227722776e-06, | |
| "loss": 3.1889, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.02390438247011952, | |
| "grad_norm": 0.489014208316803, | |
| "learning_rate": 2.3762376237623762e-06, | |
| "loss": 2.646, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.0249003984063745, | |
| "grad_norm": 0.5540168285369873, | |
| "learning_rate": 2.4752475247524753e-06, | |
| "loss": 2.6909, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.025896414342629483, | |
| "grad_norm": 0.34993091225624084, | |
| "learning_rate": 2.5742574257425744e-06, | |
| "loss": 2.5703, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.026892430278884463, | |
| "grad_norm": 0.6204649209976196, | |
| "learning_rate": 2.6732673267326735e-06, | |
| "loss": 2.6973, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.027888446215139442, | |
| "grad_norm": 0.6948006749153137, | |
| "learning_rate": 2.7722772277227726e-06, | |
| "loss": 2.8826, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.02888446215139442, | |
| "grad_norm": 0.4865665137767792, | |
| "learning_rate": 2.8712871287128712e-06, | |
| "loss": 2.6566, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.029880478087649404, | |
| "grad_norm": 0.7654755711555481, | |
| "learning_rate": 2.9702970297029703e-06, | |
| "loss": 2.9627, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.030876494023904383, | |
| "grad_norm": 0.636715829372406, | |
| "learning_rate": 3.0693069306930694e-06, | |
| "loss": 2.3846, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.03187250996015936, | |
| "grad_norm": 0.3698335289955139, | |
| "learning_rate": 3.1683168316831685e-06, | |
| "loss": 2.6724, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.03286852589641434, | |
| "grad_norm": 0.7592146396636963, | |
| "learning_rate": 3.2673267326732676e-06, | |
| "loss": 2.9239, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.03386454183266932, | |
| "grad_norm": 0.7194887399673462, | |
| "learning_rate": 3.3663366336633666e-06, | |
| "loss": 2.4727, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.0348605577689243, | |
| "grad_norm": 0.4150688350200653, | |
| "learning_rate": 3.4653465346534653e-06, | |
| "loss": 2.6407, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.035856573705179286, | |
| "grad_norm": 0.4450097382068634, | |
| "learning_rate": 3.5643564356435644e-06, | |
| "loss": 2.6709, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.036852589641434265, | |
| "grad_norm": 0.4754781424999237, | |
| "learning_rate": 3.6633663366336635e-06, | |
| "loss": 2.6848, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.037848605577689244, | |
| "grad_norm": 0.41208407282829285, | |
| "learning_rate": 3.7623762376237625e-06, | |
| "loss": 2.5176, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.03884462151394422, | |
| "grad_norm": 0.41328731179237366, | |
| "learning_rate": 3.861386138613862e-06, | |
| "loss": 2.3275, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.0398406374501992, | |
| "grad_norm": 0.5368106365203857, | |
| "learning_rate": 3.960396039603961e-06, | |
| "loss": 2.4877, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.04083665338645418, | |
| "grad_norm": 0.37100547552108765, | |
| "learning_rate": 4.05940594059406e-06, | |
| "loss": 2.5933, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.04183266932270916, | |
| "grad_norm": 0.4816776216030121, | |
| "learning_rate": 4.158415841584159e-06, | |
| "loss": 2.844, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.04282868525896414, | |
| "grad_norm": 0.4209342896938324, | |
| "learning_rate": 4.2574257425742575e-06, | |
| "loss": 2.5422, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.043824701195219126, | |
| "grad_norm": 0.6714078783988953, | |
| "learning_rate": 4.356435643564357e-06, | |
| "loss": 2.7081, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.044820717131474105, | |
| "grad_norm": 0.26568883657455444, | |
| "learning_rate": 4.455445544554456e-06, | |
| "loss": 2.9217, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.045816733067729085, | |
| "grad_norm": 0.37946802377700806, | |
| "learning_rate": 4.554455445544555e-06, | |
| "loss": 2.4118, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.046812749003984064, | |
| "grad_norm": 0.6484291553497314, | |
| "learning_rate": 4.653465346534654e-06, | |
| "loss": 2.547, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.04780876494023904, | |
| "grad_norm": 0.29198533296585083, | |
| "learning_rate": 4.7524752475247525e-06, | |
| "loss": 2.519, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.04880478087649402, | |
| "grad_norm": 0.7583147883415222, | |
| "learning_rate": 4.851485148514852e-06, | |
| "loss": 2.9767, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.049800796812749, | |
| "grad_norm": 0.5130609273910522, | |
| "learning_rate": 4.950495049504951e-06, | |
| "loss": 2.7004, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05079681274900399, | |
| "grad_norm": 0.31620916724205017, | |
| "learning_rate": 5.04950495049505e-06, | |
| "loss": 2.4462, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.05179282868525897, | |
| "grad_norm": 0.8395189046859741, | |
| "learning_rate": 5.148514851485149e-06, | |
| "loss": 2.5588, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.052788844621513946, | |
| "grad_norm": 0.387138307094574, | |
| "learning_rate": 5.247524752475248e-06, | |
| "loss": 2.1448, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.053784860557768925, | |
| "grad_norm": 1.2158163785934448, | |
| "learning_rate": 5.346534653465347e-06, | |
| "loss": 2.3136, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.054780876494023904, | |
| "grad_norm": 0.2387009710073471, | |
| "learning_rate": 5.4455445544554465e-06, | |
| "loss": 2.1985, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.055776892430278883, | |
| "grad_norm": 0.3181290924549103, | |
| "learning_rate": 5.544554455445545e-06, | |
| "loss": 2.3419, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.05677290836653386, | |
| "grad_norm": 0.37027299404144287, | |
| "learning_rate": 5.643564356435644e-06, | |
| "loss": 2.6245, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.05776892430278884, | |
| "grad_norm": 1.2025309801101685, | |
| "learning_rate": 5.7425742574257425e-06, | |
| "loss": 3.9658, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.05876494023904383, | |
| "grad_norm": 0.2420024573802948, | |
| "learning_rate": 5.841584158415842e-06, | |
| "loss": 2.6491, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.05976095617529881, | |
| "grad_norm": 0.5019764304161072, | |
| "learning_rate": 5.940594059405941e-06, | |
| "loss": 2.5349, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.060756972111553786, | |
| "grad_norm": 0.42150792479515076, | |
| "learning_rate": 6.03960396039604e-06, | |
| "loss": 3.0192, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.061752988047808766, | |
| "grad_norm": 0.21461670100688934, | |
| "learning_rate": 6.138613861386139e-06, | |
| "loss": 2.4131, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.06274900398406374, | |
| "grad_norm": 0.27715393900871277, | |
| "learning_rate": 6.237623762376238e-06, | |
| "loss": 2.0699, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.06374501992031872, | |
| "grad_norm": 0.23175019025802612, | |
| "learning_rate": 6.336633663366337e-06, | |
| "loss": 2.454, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.0647410358565737, | |
| "grad_norm": 1.4909917116165161, | |
| "learning_rate": 6.4356435643564364e-06, | |
| "loss": 2.394, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.06573705179282868, | |
| "grad_norm": 0.25836002826690674, | |
| "learning_rate": 6.534653465346535e-06, | |
| "loss": 2.1991, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.06673306772908366, | |
| "grad_norm": 0.24367666244506836, | |
| "learning_rate": 6.633663366336635e-06, | |
| "loss": 2.1038, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.06772908366533864, | |
| "grad_norm": 0.2366018295288086, | |
| "learning_rate": 6.732673267326733e-06, | |
| "loss": 2.4102, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.06872509960159362, | |
| "grad_norm": 0.2741665542125702, | |
| "learning_rate": 6.831683168316833e-06, | |
| "loss": 2.4163, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.0697211155378486, | |
| "grad_norm": 1.1350017786026, | |
| "learning_rate": 6.930693069306931e-06, | |
| "loss": 3.1909, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.07071713147410359, | |
| "grad_norm": 0.7466657161712646, | |
| "learning_rate": 7.02970297029703e-06, | |
| "loss": 3.0505, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.07171314741035857, | |
| "grad_norm": 0.6016573309898376, | |
| "learning_rate": 7.128712871287129e-06, | |
| "loss": 2.523, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.07270916334661355, | |
| "grad_norm": 0.29950985312461853, | |
| "learning_rate": 7.227722772277228e-06, | |
| "loss": 2.2338, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.07370517928286853, | |
| "grad_norm": 0.3367365598678589, | |
| "learning_rate": 7.326732673267327e-06, | |
| "loss": 2.3668, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.07470119521912351, | |
| "grad_norm": 0.23957543075084686, | |
| "learning_rate": 7.425742574257426e-06, | |
| "loss": 2.3929, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.07569721115537849, | |
| "grad_norm": 0.2996574342250824, | |
| "learning_rate": 7.524752475247525e-06, | |
| "loss": 2.4929, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.07669322709163347, | |
| "grad_norm": 1.1166422367095947, | |
| "learning_rate": 7.6237623762376246e-06, | |
| "loss": 2.2256, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.07768924302788845, | |
| "grad_norm": 0.3733150362968445, | |
| "learning_rate": 7.722772277227724e-06, | |
| "loss": 2.4912, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.07868525896414343, | |
| "grad_norm": 0.3624296486377716, | |
| "learning_rate": 7.821782178217822e-06, | |
| "loss": 2.7605, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.0796812749003984, | |
| "grad_norm": 0.47846829891204834, | |
| "learning_rate": 7.920792079207921e-06, | |
| "loss": 2.7636, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.08067729083665338, | |
| "grad_norm": 0.3782709836959839, | |
| "learning_rate": 8.019801980198021e-06, | |
| "loss": 2.4481, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.08167330677290836, | |
| "grad_norm": 0.5004844665527344, | |
| "learning_rate": 8.11881188118812e-06, | |
| "loss": 2.4166, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.08266932270916334, | |
| "grad_norm": 0.20830737054347992, | |
| "learning_rate": 8.217821782178218e-06, | |
| "loss": 2.4728, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.08366533864541832, | |
| "grad_norm": 0.2479114830493927, | |
| "learning_rate": 8.316831683168318e-06, | |
| "loss": 2.2449, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.0846613545816733, | |
| "grad_norm": 0.42911332845687866, | |
| "learning_rate": 8.415841584158416e-06, | |
| "loss": 2.2295, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.08565737051792828, | |
| "grad_norm": 0.44220131635665894, | |
| "learning_rate": 8.514851485148515e-06, | |
| "loss": 2.4495, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.08665338645418327, | |
| "grad_norm": 0.23947738111019135, | |
| "learning_rate": 8.613861386138615e-06, | |
| "loss": 2.1415, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.08764940239043825, | |
| "grad_norm": 0.42801541090011597, | |
| "learning_rate": 8.712871287128714e-06, | |
| "loss": 2.3226, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.08864541832669323, | |
| "grad_norm": 0.39098042249679565, | |
| "learning_rate": 8.811881188118812e-06, | |
| "loss": 2.3063, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.08964143426294821, | |
| "grad_norm": 0.29923197627067566, | |
| "learning_rate": 8.910891089108911e-06, | |
| "loss": 2.6163, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.09063745019920319, | |
| "grad_norm": 0.2684191167354584, | |
| "learning_rate": 9.009900990099011e-06, | |
| "loss": 2.6845, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.09163346613545817, | |
| "grad_norm": 0.27097082138061523, | |
| "learning_rate": 9.10891089108911e-06, | |
| "loss": 2.598, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.09262948207171315, | |
| "grad_norm": 0.2647894024848938, | |
| "learning_rate": 9.20792079207921e-06, | |
| "loss": 2.2659, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.09362549800796813, | |
| "grad_norm": 0.38580745458602905, | |
| "learning_rate": 9.306930693069308e-06, | |
| "loss": 2.5818, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.0946215139442231, | |
| "grad_norm": 0.23376502096652985, | |
| "learning_rate": 9.405940594059405e-06, | |
| "loss": 2.207, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.09561752988047809, | |
| "grad_norm": 0.7030872702598572, | |
| "learning_rate": 9.504950495049505e-06, | |
| "loss": 2.1312, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.09661354581673307, | |
| "grad_norm": 0.23579809069633484, | |
| "learning_rate": 9.603960396039604e-06, | |
| "loss": 2.0685, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.09760956175298804, | |
| "grad_norm": 0.3901154100894928, | |
| "learning_rate": 9.702970297029704e-06, | |
| "loss": 2.662, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.09860557768924302, | |
| "grad_norm": 0.2687411606311798, | |
| "learning_rate": 9.801980198019802e-06, | |
| "loss": 2.9062, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.099601593625498, | |
| "grad_norm": 0.1916651427745819, | |
| "learning_rate": 9.900990099009901e-06, | |
| "loss": 2.1874, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.10059760956175298, | |
| "grad_norm": 0.3915342688560486, | |
| "learning_rate": 1e-05, | |
| "loss": 2.4196, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.10159362549800798, | |
| "grad_norm": 0.4256736636161804, | |
| "learning_rate": 9.999969740355927e-06, | |
| "loss": 2.3229, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.10258964143426295, | |
| "grad_norm": 0.24454592168331146, | |
| "learning_rate": 9.999878961789962e-06, | |
| "loss": 2.4725, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.10358565737051793, | |
| "grad_norm": 0.4549209475517273, | |
| "learning_rate": 9.999727665400876e-06, | |
| "loss": 2.6802, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.10458167330677291, | |
| "grad_norm": 0.27968448400497437, | |
| "learning_rate": 9.999515853019941e-06, | |
| "loss": 2.154, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.10557768924302789, | |
| "grad_norm": 0.30781856179237366, | |
| "learning_rate": 9.999243527210904e-06, | |
| "loss": 2.1358, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.10657370517928287, | |
| "grad_norm": 0.22190262377262115, | |
| "learning_rate": 9.998910691269957e-06, | |
| "loss": 2.1951, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.10756972111553785, | |
| "grad_norm": 0.36984801292419434, | |
| "learning_rate": 9.998517349225698e-06, | |
| "loss": 2.2283, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.10856573705179283, | |
| "grad_norm": 0.24837668240070343, | |
| "learning_rate": 9.998063505839084e-06, | |
| "loss": 2.4556, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.10956175298804781, | |
| "grad_norm": 0.17527616024017334, | |
| "learning_rate": 9.99754916660337e-06, | |
| "loss": 2.2414, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.11055776892430279, | |
| "grad_norm": 0.2681446969509125, | |
| "learning_rate": 9.996974337744047e-06, | |
| "loss": 2.6504, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.11155378486055777, | |
| "grad_norm": 0.41997164487838745, | |
| "learning_rate": 9.99633902621876e-06, | |
| "loss": 2.4704, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.11254980079681275, | |
| "grad_norm": 0.43319636583328247, | |
| "learning_rate": 9.995643239717228e-06, | |
| "loss": 2.4391, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.11354581673306773, | |
| "grad_norm": 0.3344462811946869, | |
| "learning_rate": 9.994886986661155e-06, | |
| "loss": 2.4113, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.1145418326693227, | |
| "grad_norm": 0.2086816132068634, | |
| "learning_rate": 9.994070276204115e-06, | |
| "loss": 2.1469, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.11553784860557768, | |
| "grad_norm": 0.35499969124794006, | |
| "learning_rate": 9.993193118231463e-06, | |
| "loss": 2.6212, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.11653386454183266, | |
| "grad_norm": 0.4640713036060333, | |
| "learning_rate": 9.992255523360187e-06, | |
| "loss": 2.5926, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.11752988047808766, | |
| "grad_norm": 0.3302168548107147, | |
| "learning_rate": 9.991257502938805e-06, | |
| "loss": 2.0769, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.11852589641434264, | |
| "grad_norm": 0.4918990135192871, | |
| "learning_rate": 9.990199069047216e-06, | |
| "loss": 2.4818, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.11952191235059761, | |
| "grad_norm": 0.25973260402679443, | |
| "learning_rate": 9.989080234496548e-06, | |
| "loss": 2.3236, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1205179282868526, | |
| "grad_norm": 0.26280921697616577, | |
| "learning_rate": 9.98790101282902e-06, | |
| "loss": 2.4276, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.12151394422310757, | |
| "grad_norm": 0.21018964052200317, | |
| "learning_rate": 9.986661418317759e-06, | |
| "loss": 2.3006, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.12250996015936255, | |
| "grad_norm": 0.30688363313674927, | |
| "learning_rate": 9.985361465966644e-06, | |
| "loss": 2.3402, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.12350597609561753, | |
| "grad_norm": 0.27469494938850403, | |
| "learning_rate": 9.984001171510112e-06, | |
| "loss": 2.7987, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.12450199203187251, | |
| "grad_norm": 0.2825527489185333, | |
| "learning_rate": 9.982580551412972e-06, | |
| "loss": 2.4642, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.1254980079681275, | |
| "grad_norm": 0.6109227538108826, | |
| "learning_rate": 9.98109962287021e-06, | |
| "loss": 2.6041, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.12649402390438247, | |
| "grad_norm": 0.3983345031738281, | |
| "learning_rate": 9.979558403806773e-06, | |
| "loss": 2.4599, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.12749003984063745, | |
| "grad_norm": 0.3466341495513916, | |
| "learning_rate": 9.977956912877356e-06, | |
| "loss": 2.1902, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.12848605577689243, | |
| "grad_norm": 0.3762282729148865, | |
| "learning_rate": 9.97629516946618e-06, | |
| "loss": 2.2643, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.1294820717131474, | |
| "grad_norm": 0.523991048336029, | |
| "learning_rate": 9.974573193686747e-06, | |
| "loss": 2.196, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.13047808764940239, | |
| "grad_norm": 0.23254041373729706, | |
| "learning_rate": 9.97279100638161e-06, | |
| "loss": 2.4402, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.13147410358565736, | |
| "grad_norm": 0.3137255311012268, | |
| "learning_rate": 9.970948629122108e-06, | |
| "loss": 2.5905, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.13247011952191234, | |
| "grad_norm": 0.21106691658496857, | |
| "learning_rate": 9.969046084208116e-06, | |
| "loss": 2.3683, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.13346613545816732, | |
| "grad_norm": 0.4183836877346039, | |
| "learning_rate": 9.967083394667763e-06, | |
| "loss": 2.0614, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.1344621513944223, | |
| "grad_norm": 0.4468408226966858, | |
| "learning_rate": 9.965060584257165e-06, | |
| "loss": 2.4639, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.13545816733067728, | |
| "grad_norm": 0.22207094728946686, | |
| "learning_rate": 9.962977677460132e-06, | |
| "loss": 2.2261, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.13645418326693226, | |
| "grad_norm": 0.2465856373310089, | |
| "learning_rate": 9.960834699487873e-06, | |
| "loss": 2.2444, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.13745019920318724, | |
| "grad_norm": 0.3648821711540222, | |
| "learning_rate": 9.958631676278686e-06, | |
| "loss": 2.6132, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.13844621513944222, | |
| "grad_norm": 0.21419532597064972, | |
| "learning_rate": 9.956368634497648e-06, | |
| "loss": 2.4863, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.1394422310756972, | |
| "grad_norm": 0.5358874797821045, | |
| "learning_rate": 9.9540456015363e-06, | |
| "loss": 2.401, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.14043824701195218, | |
| "grad_norm": 0.25208160281181335, | |
| "learning_rate": 9.951662605512298e-06, | |
| "loss": 2.5901, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.14143426294820718, | |
| "grad_norm": 0.2659305930137634, | |
| "learning_rate": 9.949219675269089e-06, | |
| "loss": 2.3229, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.14243027888446216, | |
| "grad_norm": 0.5667638778686523, | |
| "learning_rate": 9.946716840375552e-06, | |
| "loss": 2.4998, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.14342629482071714, | |
| "grad_norm": 0.315893292427063, | |
| "learning_rate": 9.944154131125643e-06, | |
| "loss": 2.4017, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.14442231075697212, | |
| "grad_norm": 0.43832895159721375, | |
| "learning_rate": 9.941531578538032e-06, | |
| "loss": 2.473, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.1454183266932271, | |
| "grad_norm": 0.2750052511692047, | |
| "learning_rate": 9.938849214355722e-06, | |
| "loss": 2.3208, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.14641434262948208, | |
| "grad_norm": 0.26100143790245056, | |
| "learning_rate": 9.936107071045665e-06, | |
| "loss": 2.2151, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.14741035856573706, | |
| "grad_norm": 0.3080121576786041, | |
| "learning_rate": 9.933305181798374e-06, | |
| "loss": 2.0868, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.14840637450199204, | |
| "grad_norm": 0.24721984565258026, | |
| "learning_rate": 9.93044358052752e-06, | |
| "loss": 2.3312, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.14940239043824702, | |
| "grad_norm": 0.5916289687156677, | |
| "learning_rate": 9.927522301869515e-06, | |
| "loss": 2.6542, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.150398406374502, | |
| "grad_norm": 0.22790588438510895, | |
| "learning_rate": 9.924541381183099e-06, | |
| "loss": 2.3121, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.15139442231075698, | |
| "grad_norm": 0.3201110064983368, | |
| "learning_rate": 9.921500854548916e-06, | |
| "loss": 2.8776, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.15239043824701196, | |
| "grad_norm": 0.3063032627105713, | |
| "learning_rate": 9.918400758769063e-06, | |
| "loss": 2.3604, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.15338645418326693, | |
| "grad_norm": 0.41891732811927795, | |
| "learning_rate": 9.915241131366657e-06, | |
| "loss": 2.1036, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.15438247011952191, | |
| "grad_norm": 1.0909143686294556, | |
| "learning_rate": 9.912022010585385e-06, | |
| "loss": 2.5247, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.1553784860557769, | |
| "grad_norm": 0.26673582196235657, | |
| "learning_rate": 9.90874343538902e-06, | |
| "loss": 2.159, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.15637450199203187, | |
| "grad_norm": 0.3614170551300049, | |
| "learning_rate": 9.905405445460972e-06, | |
| "loss": 2.4383, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.15737051792828685, | |
| "grad_norm": 0.5078898668289185, | |
| "learning_rate": 9.902008081203796e-06, | |
| "loss": 2.2543, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.15836653386454183, | |
| "grad_norm": 0.3936934769153595, | |
| "learning_rate": 9.898551383738707e-06, | |
| "loss": 2.538, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.1593625498007968, | |
| "grad_norm": 0.6516975164413452, | |
| "learning_rate": 9.895035394905073e-06, | |
| "loss": 2.5301, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.1603585657370518, | |
| "grad_norm": 0.26518794894218445, | |
| "learning_rate": 9.89146015725993e-06, | |
| "loss": 2.2118, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.16135458167330677, | |
| "grad_norm": 0.41366127133369446, | |
| "learning_rate": 9.887825714077439e-06, | |
| "loss": 2.4799, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.16235059760956175, | |
| "grad_norm": 0.5400006771087646, | |
| "learning_rate": 9.884132109348386e-06, | |
| "loss": 2.5019, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.16334661354581673, | |
| "grad_norm": 0.36508408188819885, | |
| "learning_rate": 9.880379387779637e-06, | |
| "loss": 2.7938, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.1643426294820717, | |
| "grad_norm": 0.4240388572216034, | |
| "learning_rate": 9.876567594793597e-06, | |
| "loss": 2.5002, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.16533864541832669, | |
| "grad_norm": 0.277864933013916, | |
| "learning_rate": 9.87269677652767e-06, | |
| "loss": 2.5436, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.16633466135458166, | |
| "grad_norm": 0.25840163230895996, | |
| "learning_rate": 9.868766979833686e-06, | |
| "loss": 2.2811, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.16733067729083664, | |
| "grad_norm": 0.32198214530944824, | |
| "learning_rate": 9.864778252277344e-06, | |
| "loss": 2.3215, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.16832669322709162, | |
| "grad_norm": 0.613046407699585, | |
| "learning_rate": 9.86073064213764e-06, | |
| "loss": 2.8133, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.1693227091633466, | |
| "grad_norm": 0.38470038771629333, | |
| "learning_rate": 9.856624198406262e-06, | |
| "loss": 2.4133, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.17031872509960158, | |
| "grad_norm": 0.3675747811794281, | |
| "learning_rate": 9.852458970787027e-06, | |
| "loss": 2.0617, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.17131474103585656, | |
| "grad_norm": 0.26074767112731934, | |
| "learning_rate": 9.848235009695255e-06, | |
| "loss": 2.132, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.17231075697211157, | |
| "grad_norm": 0.5974801778793335, | |
| "learning_rate": 9.84395236625717e-06, | |
| "loss": 2.3888, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.17330677290836655, | |
| "grad_norm": 0.2652048170566559, | |
| "learning_rate": 9.839611092309278e-06, | |
| "loss": 2.4468, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.17430278884462153, | |
| "grad_norm": 0.6336271166801453, | |
| "learning_rate": 9.835211240397743e-06, | |
| "loss": 2.3256, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.1752988047808765, | |
| "grad_norm": 0.3853505849838257, | |
| "learning_rate": 9.830752863777741e-06, | |
| "loss": 2.5527, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.17629482071713148, | |
| "grad_norm": 0.25374558568000793, | |
| "learning_rate": 9.826236016412833e-06, | |
| "loss": 2.5593, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.17729083665338646, | |
| "grad_norm": 0.29101264476776123, | |
| "learning_rate": 9.821660752974294e-06, | |
| "loss": 2.6399, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.17828685258964144, | |
| "grad_norm": 0.7464101910591125, | |
| "learning_rate": 9.817027128840462e-06, | |
| "loss": 2.1674, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.17928286852589642, | |
| "grad_norm": 0.28557366132736206, | |
| "learning_rate": 9.812335200096064e-06, | |
| "loss": 2.7127, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.1802788844621514, | |
| "grad_norm": 0.5655897259712219, | |
| "learning_rate": 9.807585023531536e-06, | |
| "loss": 2.0397, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.18127490039840638, | |
| "grad_norm": 0.2831386625766754, | |
| "learning_rate": 9.802776656642341e-06, | |
| "loss": 2.1947, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.18227091633466136, | |
| "grad_norm": 0.30917420983314514, | |
| "learning_rate": 9.797910157628265e-06, | |
| "loss": 2.3951, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.18326693227091634, | |
| "grad_norm": 0.3886703550815582, | |
| "learning_rate": 9.792985585392722e-06, | |
| "loss": 2.4107, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.18426294820717132, | |
| "grad_norm": 0.51981121301651, | |
| "learning_rate": 9.78800299954203e-06, | |
| "loss": 2.3655, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.1852589641434263, | |
| "grad_norm": 0.31090375781059265, | |
| "learning_rate": 9.782962460384701e-06, | |
| "loss": 1.984, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.18625498007968128, | |
| "grad_norm": 0.4561314880847931, | |
| "learning_rate": 9.777864028930705e-06, | |
| "loss": 3.0161, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.18725099601593626, | |
| "grad_norm": 0.3265978693962097, | |
| "learning_rate": 9.772707766890726e-06, | |
| "loss": 2.6738, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.18824701195219123, | |
| "grad_norm": 0.5627899765968323, | |
| "learning_rate": 9.767493736675429e-06, | |
| "loss": 2.4544, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.1892430278884462, | |
| "grad_norm": 0.3551636338233948, | |
| "learning_rate": 9.762222001394692e-06, | |
| "loss": 2.4696, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.1902390438247012, | |
| "grad_norm": 0.27445298433303833, | |
| "learning_rate": 9.756892624856848e-06, | |
| "loss": 2.5626, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.19123505976095617, | |
| "grad_norm": 0.4346907436847687, | |
| "learning_rate": 9.751505671567914e-06, | |
| "loss": 2.6588, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.19223107569721115, | |
| "grad_norm": 1.7177170515060425, | |
| "learning_rate": 9.746061206730801e-06, | |
| "loss": 3.3538, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.19322709163346613, | |
| "grad_norm": 0.294007807970047, | |
| "learning_rate": 9.740559296244543e-06, | |
| "loss": 2.7963, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.1942231075697211, | |
| "grad_norm": 0.3322044014930725, | |
| "learning_rate": 9.735000006703475e-06, | |
| "loss": 2.1763, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.1952191235059761, | |
| "grad_norm": 0.2852723002433777, | |
| "learning_rate": 9.72938340539645e-06, | |
| "loss": 2.2182, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.19621513944223107, | |
| "grad_norm": 0.2600834369659424, | |
| "learning_rate": 9.723709560306009e-06, | |
| "loss": 2.7632, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.19721115537848605, | |
| "grad_norm": 0.27677562832832336, | |
| "learning_rate": 9.717978540107566e-06, | |
| "loss": 2.3831, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.19820717131474103, | |
| "grad_norm": 0.4312080144882202, | |
| "learning_rate": 9.712190414168573e-06, | |
| "loss": 2.4096, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.199203187250996, | |
| "grad_norm": 0.7516922950744629, | |
| "learning_rate": 9.706345252547681e-06, | |
| "loss": 3.0072, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.20019920318725098, | |
| "grad_norm": 0.47257497906684875, | |
| "learning_rate": 9.700443125993897e-06, | |
| "loss": 2.4537, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.20119521912350596, | |
| "grad_norm": 0.5163850784301758, | |
| "learning_rate": 9.694484105945719e-06, | |
| "loss": 2.4488, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.20219123505976094, | |
| "grad_norm": 0.2632780373096466, | |
| "learning_rate": 9.688468264530278e-06, | |
| "loss": 2.5477, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.20318725099601595, | |
| "grad_norm": 1.0932762622833252, | |
| "learning_rate": 9.682395674562459e-06, | |
| "loss": 2.8381, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.20418326693227093, | |
| "grad_norm": 0.568217396736145, | |
| "learning_rate": 9.676266409544031e-06, | |
| "loss": 2.2398, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.2051792828685259, | |
| "grad_norm": 0.5864899754524231, | |
| "learning_rate": 9.670080543662742e-06, | |
| "loss": 2.5067, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.2061752988047809, | |
| "grad_norm": 0.38742220401763916, | |
| "learning_rate": 9.663838151791431e-06, | |
| "loss": 2.3831, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.20717131474103587, | |
| "grad_norm": 0.441034197807312, | |
| "learning_rate": 9.657539309487123e-06, | |
| "loss": 2.3785, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.20816733067729085, | |
| "grad_norm": 1.6438182592391968, | |
| "learning_rate": 9.651184092990109e-06, | |
| "loss": 3.6952, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.20916334661354583, | |
| "grad_norm": 0.35267430543899536, | |
| "learning_rate": 9.644772579223023e-06, | |
| "loss": 2.4354, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.2101593625498008, | |
| "grad_norm": 0.27790936827659607, | |
| "learning_rate": 9.638304845789916e-06, | |
| "loss": 2.3407, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.21115537848605578, | |
| "grad_norm": 0.4317843019962311, | |
| "learning_rate": 9.631780970975311e-06, | |
| "loss": 2.2805, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.21215139442231076, | |
| "grad_norm": 0.35801681876182556, | |
| "learning_rate": 9.625201033743262e-06, | |
| "loss": 2.3219, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.21314741035856574, | |
| "grad_norm": 0.3666556477546692, | |
| "learning_rate": 9.618565113736388e-06, | |
| "loss": 2.1962, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.21414342629482072, | |
| "grad_norm": 0.36347630620002747, | |
| "learning_rate": 9.611873291274927e-06, | |
| "loss": 2.1945, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.2151394422310757, | |
| "grad_norm": 0.47142109274864197, | |
| "learning_rate": 9.60512564735574e-06, | |
| "loss": 2.1907, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.21613545816733068, | |
| "grad_norm": 0.3300761580467224, | |
| "learning_rate": 9.598322263651352e-06, | |
| "loss": 2.0638, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.21713147410358566, | |
| "grad_norm": 0.3918429911136627, | |
| "learning_rate": 9.591463222508947e-06, | |
| "loss": 2.4349, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.21812749003984064, | |
| "grad_norm": 0.3837280869483948, | |
| "learning_rate": 9.584548606949384e-06, | |
| "loss": 2.2359, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.21912350597609562, | |
| "grad_norm": 0.47225990891456604, | |
| "learning_rate": 9.577578500666187e-06, | |
| "loss": 2.4696, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.2201195219123506, | |
| "grad_norm": 0.3448033630847931, | |
| "learning_rate": 9.570552988024527e-06, | |
| "loss": 2.3639, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.22111553784860558, | |
| "grad_norm": 0.39937150478363037, | |
| "learning_rate": 9.563472154060212e-06, | |
| "loss": 2.3513, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.22211155378486055, | |
| "grad_norm": 0.3486849367618561, | |
| "learning_rate": 9.556336084478645e-06, | |
| "loss": 2.3674, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.22310756972111553, | |
| "grad_norm": 0.4388813376426697, | |
| "learning_rate": 9.5491448656538e-06, | |
| "loss": 2.4748, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.2241035856573705, | |
| "grad_norm": 0.4307428300380707, | |
| "learning_rate": 9.541898584627164e-06, | |
| "loss": 2.1206, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.2250996015936255, | |
| "grad_norm": 0.5265683531761169, | |
| "learning_rate": 9.534597329106688e-06, | |
| "loss": 2.589, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.22609561752988047, | |
| "grad_norm": 0.5943540930747986, | |
| "learning_rate": 9.527241187465735e-06, | |
| "loss": 2.8641, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.22709163346613545, | |
| "grad_norm": 0.3554113209247589, | |
| "learning_rate": 9.519830248741991e-06, | |
| "loss": 2.0978, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.22808764940239043, | |
| "grad_norm": 0.43764352798461914, | |
| "learning_rate": 9.512364602636405e-06, | |
| "loss": 2.2777, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.2290836653386454, | |
| "grad_norm": 0.27372264862060547, | |
| "learning_rate": 9.504844339512096e-06, | |
| "loss": 2.3366, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.2300796812749004, | |
| "grad_norm": 0.5419708490371704, | |
| "learning_rate": 9.497269550393257e-06, | |
| "loss": 2.5115, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.23107569721115537, | |
| "grad_norm": 0.3294195234775543, | |
| "learning_rate": 9.489640326964058e-06, | |
| "loss": 2.3812, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.23207171314741035, | |
| "grad_norm": 0.3676604926586151, | |
| "learning_rate": 9.481956761567531e-06, | |
| "loss": 2.1645, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.23306772908366533, | |
| "grad_norm": 0.24499647319316864, | |
| "learning_rate": 9.47421894720446e-06, | |
| "loss": 2.436, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.2340637450199203, | |
| "grad_norm": 0.4562065601348877, | |
| "learning_rate": 9.466426977532246e-06, | |
| "loss": 2.4614, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.2350597609561753, | |
| "grad_norm": 0.4152824580669403, | |
| "learning_rate": 9.458580946863784e-06, | |
| "loss": 2.6406, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.2360557768924303, | |
| "grad_norm": 0.2812240421772003, | |
| "learning_rate": 9.45068095016631e-06, | |
| "loss": 2.4129, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.23705179282868527, | |
| "grad_norm": 0.37695300579071045, | |
| "learning_rate": 9.442727083060258e-06, | |
| "loss": 2.8288, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.23804780876494025, | |
| "grad_norm": 0.29094114899635315, | |
| "learning_rate": 9.434719441818106e-06, | |
| "loss": 2.2392, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.23904382470119523, | |
| "grad_norm": 0.6004308462142944, | |
| "learning_rate": 9.426658123363202e-06, | |
| "loss": 2.6978, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.2400398406374502, | |
| "grad_norm": 0.824376106262207, | |
| "learning_rate": 9.418543225268598e-06, | |
| "loss": 2.8384, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.2410358565737052, | |
| "grad_norm": 0.37361350655555725, | |
| "learning_rate": 9.410374845755862e-06, | |
| "loss": 2.7737, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.24203187250996017, | |
| "grad_norm": 0.3311799168586731, | |
| "learning_rate": 9.402153083693898e-06, | |
| "loss": 2.6569, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.24302788844621515, | |
| "grad_norm": 0.31674501299858093, | |
| "learning_rate": 9.393878038597748e-06, | |
| "loss": 2.4791, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.24402390438247012, | |
| "grad_norm": 0.5512855052947998, | |
| "learning_rate": 9.385549810627374e-06, | |
| "loss": 2.0412, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.2450199203187251, | |
| "grad_norm": 0.48202139139175415, | |
| "learning_rate": 9.377168500586465e-06, | |
| "loss": 2.4472, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.24601593625498008, | |
| "grad_norm": 0.29134997725486755, | |
| "learning_rate": 9.3687342099212e-06, | |
| "loss": 2.1911, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.24701195219123506, | |
| "grad_norm": 0.27262917160987854, | |
| "learning_rate": 9.36024704071904e-06, | |
| "loss": 2.3633, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.24800796812749004, | |
| "grad_norm": 0.2618001699447632, | |
| "learning_rate": 9.351707095707465e-06, | |
| "loss": 2.3556, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.24900398406374502, | |
| "grad_norm": 0.4861814081668854, | |
| "learning_rate": 9.343114478252758e-06, | |
| "loss": 2.2809, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 0.3331791162490845, | |
| "learning_rate": 9.334469292358736e-06, | |
| "loss": 2.2747, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.250996015936255, | |
| "grad_norm": 0.3599317669868469, | |
| "learning_rate": 9.3257716426655e-06, | |
| "loss": 2.3204, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.25199203187250996, | |
| "grad_norm": 0.28026479482650757, | |
| "learning_rate": 9.317021634448162e-06, | |
| "loss": 2.5997, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.25298804780876494, | |
| "grad_norm": 0.4968087077140808, | |
| "learning_rate": 9.308219373615574e-06, | |
| "loss": 2.4623, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.2539840637450199, | |
| "grad_norm": 0.5899234414100647, | |
| "learning_rate": 9.299364966709051e-06, | |
| "loss": 2.4678, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.2549800796812749, | |
| "grad_norm": 0.27741050720214844, | |
| "learning_rate": 9.290458520901072e-06, | |
| "loss": 2.4373, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.2559760956175299, | |
| "grad_norm": 0.44141483306884766, | |
| "learning_rate": 9.28150014399399e-06, | |
| "loss": 2.3013, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.25697211155378485, | |
| "grad_norm": 0.4108343720436096, | |
| "learning_rate": 9.272489944418724e-06, | |
| "loss": 2.6281, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.25796812749003983, | |
| "grad_norm": 0.4309611916542053, | |
| "learning_rate": 9.263428031233444e-06, | |
| "loss": 2.6192, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.2589641434262948, | |
| "grad_norm": 0.3191240727901459, | |
| "learning_rate": 9.25431451412226e-06, | |
| "loss": 2.3667, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.2599601593625498, | |
| "grad_norm": 0.4311404824256897, | |
| "learning_rate": 9.245149503393884e-06, | |
| "loss": 2.5286, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.26095617529880477, | |
| "grad_norm": 0.8753085136413574, | |
| "learning_rate": 9.235933109980302e-06, | |
| "loss": 2.6609, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.26195219123505975, | |
| "grad_norm": 0.679023265838623, | |
| "learning_rate": 9.226665445435428e-06, | |
| "loss": 2.4715, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.26294820717131473, | |
| "grad_norm": 0.4910929501056671, | |
| "learning_rate": 9.217346621933753e-06, | |
| "loss": 2.0939, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.2639442231075697, | |
| "grad_norm": 1.3370636701583862, | |
| "learning_rate": 9.207976752268992e-06, | |
| "loss": 2.4367, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.2649402390438247, | |
| "grad_norm": 0.4148995578289032, | |
| "learning_rate": 9.19855594985271e-06, | |
| "loss": 2.5403, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.26593625498007967, | |
| "grad_norm": 0.5130553841590881, | |
| "learning_rate": 9.189084328712961e-06, | |
| "loss": 2.654, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.26693227091633465, | |
| "grad_norm": 0.5000612139701843, | |
| "learning_rate": 9.179562003492898e-06, | |
| "loss": 2.2451, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.2679282868525896, | |
| "grad_norm": 1.3651481866836548, | |
| "learning_rate": 9.16998908944939e-06, | |
| "loss": 2.3566, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.2689243027888446, | |
| "grad_norm": 0.4688972532749176, | |
| "learning_rate": 9.160365702451625e-06, | |
| "loss": 2.4274, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.2699203187250996, | |
| "grad_norm": 0.44729602336883545, | |
| "learning_rate": 9.150691958979712e-06, | |
| "loss": 2.3431, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.27091633466135456, | |
| "grad_norm": 0.4126404821872711, | |
| "learning_rate": 9.14096797612326e-06, | |
| "loss": 2.4358, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.27191235059760954, | |
| "grad_norm": 0.8661454319953918, | |
| "learning_rate": 9.131193871579975e-06, | |
| "loss": 2.6125, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.2729083665338645, | |
| "grad_norm": 0.35947325825691223, | |
| "learning_rate": 9.121369763654228e-06, | |
| "loss": 2.2618, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.2739043824701195, | |
| "grad_norm": 0.3399883210659027, | |
| "learning_rate": 9.111495771255623e-06, | |
| "loss": 2.1576, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.2749003984063745, | |
| "grad_norm": 0.4308667778968811, | |
| "learning_rate": 9.101572013897555e-06, | |
| "loss": 2.0927, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.27589641434262946, | |
| "grad_norm": 0.3370983302593231, | |
| "learning_rate": 9.091598611695774e-06, | |
| "loss": 2.3073, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.27689243027888444, | |
| "grad_norm": 0.30387991666793823, | |
| "learning_rate": 9.081575685366919e-06, | |
| "loss": 2.5888, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.2778884462151394, | |
| "grad_norm": 0.6190817952156067, | |
| "learning_rate": 9.071503356227063e-06, | |
| "loss": 2.5076, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.2788844621513944, | |
| "grad_norm": 0.43932202458381653, | |
| "learning_rate": 9.061381746190243e-06, | |
| "loss": 2.3828, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.2798804780876494, | |
| "grad_norm": 0.4108044505119324, | |
| "learning_rate": 9.051210977766987e-06, | |
| "loss": 2.4859, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.28087649402390436, | |
| "grad_norm": 0.7741344571113586, | |
| "learning_rate": 9.040991174062827e-06, | |
| "loss": 2.332, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.2818725099601594, | |
| "grad_norm": 0.42518022656440735, | |
| "learning_rate": 9.030722458776815e-06, | |
| "loss": 2.5427, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.28286852589641437, | |
| "grad_norm": 0.3040229082107544, | |
| "learning_rate": 9.020404956200016e-06, | |
| "loss": 2.5173, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.28386454183266935, | |
| "grad_norm": 0.4257875680923462, | |
| "learning_rate": 9.010038791214012e-06, | |
| "loss": 2.3872, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.2848605577689243, | |
| "grad_norm": 0.6529532074928284, | |
| "learning_rate": 8.999624089289389e-06, | |
| "loss": 2.2936, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.2858565737051793, | |
| "grad_norm": 0.2723180055618286, | |
| "learning_rate": 8.989160976484218e-06, | |
| "loss": 2.5736, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.2868525896414343, | |
| "grad_norm": 0.3371571898460388, | |
| "learning_rate": 8.978649579442525e-06, | |
| "loss": 2.3213, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.28784860557768926, | |
| "grad_norm": 0.4722624719142914, | |
| "learning_rate": 8.968090025392763e-06, | |
| "loss": 2.4039, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.28884462151394424, | |
| "grad_norm": 0.6963698863983154, | |
| "learning_rate": 8.957482442146271e-06, | |
| "loss": 2.4849, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.2898406374501992, | |
| "grad_norm": 0.41670724749565125, | |
| "learning_rate": 8.946826958095726e-06, | |
| "loss": 2.4297, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.2908366533864542, | |
| "grad_norm": 0.44924449920654297, | |
| "learning_rate": 8.936123702213593e-06, | |
| "loss": 2.29, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.2918326693227092, | |
| "grad_norm": 0.5405289530754089, | |
| "learning_rate": 8.925372804050554e-06, | |
| "loss": 2.4732, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.29282868525896416, | |
| "grad_norm": 0.5333283543586731, | |
| "learning_rate": 8.914574393733953e-06, | |
| "loss": 2.3553, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.29382470119521914, | |
| "grad_norm": 0.4173821806907654, | |
| "learning_rate": 8.903728601966206e-06, | |
| "loss": 2.4729, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.2948207171314741, | |
| "grad_norm": 0.6668480634689331, | |
| "learning_rate": 8.892835560023236e-06, | |
| "loss": 2.7302, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.2958167330677291, | |
| "grad_norm": 0.5601832270622253, | |
| "learning_rate": 8.881895399752873e-06, | |
| "loss": 2.8201, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.2968127490039841, | |
| "grad_norm": 0.7715175151824951, | |
| "learning_rate": 8.870908253573255e-06, | |
| "loss": 2.439, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.29780876494023906, | |
| "grad_norm": 0.6411163210868835, | |
| "learning_rate": 8.85987425447124e-06, | |
| "loss": 2.2098, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.29880478087649404, | |
| "grad_norm": 1.8174595832824707, | |
| "learning_rate": 8.848793536000779e-06, | |
| "loss": 2.862, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.299800796812749, | |
| "grad_norm": 0.4861983060836792, | |
| "learning_rate": 8.837666232281312e-06, | |
| "loss": 1.964, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.300796812749004, | |
| "grad_norm": 0.629531979560852, | |
| "learning_rate": 8.826492477996138e-06, | |
| "loss": 2.4866, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.301792828685259, | |
| "grad_norm": 0.33506232500076294, | |
| "learning_rate": 8.81527240839079e-06, | |
| "loss": 2.1813, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.30278884462151395, | |
| "grad_norm": 0.5065098404884338, | |
| "learning_rate": 8.80400615927139e-06, | |
| "loss": 2.2313, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.30378486055776893, | |
| "grad_norm": 0.31633898615837097, | |
| "learning_rate": 8.792693867003017e-06, | |
| "loss": 2.5764, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.3047808764940239, | |
| "grad_norm": 0.6082801818847656, | |
| "learning_rate": 8.781335668508044e-06, | |
| "loss": 2.1408, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.3057768924302789, | |
| "grad_norm": 0.3309324383735657, | |
| "learning_rate": 8.76993170126449e-06, | |
| "loss": 2.5198, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.30677290836653387, | |
| "grad_norm": 0.38401421904563904, | |
| "learning_rate": 8.758482103304348e-06, | |
| "loss": 2.2784, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.30776892430278885, | |
| "grad_norm": 0.4021207392215729, | |
| "learning_rate": 8.746987013211924e-06, | |
| "loss": 2.1789, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.30876494023904383, | |
| "grad_norm": 1.5585757493972778, | |
| "learning_rate": 8.735446570122151e-06, | |
| "loss": 2.429, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.3097609561752988, | |
| "grad_norm": 0.5734106302261353, | |
| "learning_rate": 8.72386091371891e-06, | |
| "loss": 2.3943, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.3107569721115538, | |
| "grad_norm": 0.5181722044944763, | |
| "learning_rate": 8.712230184233337e-06, | |
| "loss": 2.4501, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.31175298804780877, | |
| "grad_norm": 0.42989951372146606, | |
| "learning_rate": 8.700554522442124e-06, | |
| "loss": 2.3612, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.31274900398406374, | |
| "grad_norm": 0.8372073769569397, | |
| "learning_rate": 8.688834069665819e-06, | |
| "loss": 2.6138, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.3137450199203187, | |
| "grad_norm": 0.5447811484336853, | |
| "learning_rate": 8.677068967767117e-06, | |
| "loss": 2.4036, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.3147410358565737, | |
| "grad_norm": 0.28892412781715393, | |
| "learning_rate": 8.665259359149132e-06, | |
| "loss": 2.4249, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.3157370517928287, | |
| "grad_norm": 0.33981916308403015, | |
| "learning_rate": 8.653405386753688e-06, | |
| "loss": 2.8255, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.31673306772908366, | |
| "grad_norm": 0.37266361713409424, | |
| "learning_rate": 8.64150719405958e-06, | |
| "loss": 2.4619, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.31772908366533864, | |
| "grad_norm": 0.611991286277771, | |
| "learning_rate": 8.629564925080838e-06, | |
| "loss": 2.6266, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.3187250996015936, | |
| "grad_norm": 0.7753398418426514, | |
| "learning_rate": 8.617578724364984e-06, | |
| "loss": 2.5113, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.3197211155378486, | |
| "grad_norm": 0.7812793850898743, | |
| "learning_rate": 8.605548736991284e-06, | |
| "loss": 2.1736, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.3207171314741036, | |
| "grad_norm": 0.5114774703979492, | |
| "learning_rate": 8.593475108568995e-06, | |
| "loss": 2.7169, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.32171314741035856, | |
| "grad_norm": 0.3701231777667999, | |
| "learning_rate": 8.581357985235595e-06, | |
| "loss": 2.2407, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.32270916334661354, | |
| "grad_norm": 1.135130763053894, | |
| "learning_rate": 8.569197513655022e-06, | |
| "loss": 2.0901, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.3237051792828685, | |
| "grad_norm": 0.2848869562149048, | |
| "learning_rate": 8.55699384101589e-06, | |
| "loss": 2.592, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.3247011952191235, | |
| "grad_norm": 0.5609592795372009, | |
| "learning_rate": 8.544747115029717e-06, | |
| "loss": 2.3673, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.3256972111553785, | |
| "grad_norm": 0.3470471203327179, | |
| "learning_rate": 8.53245748392913e-06, | |
| "loss": 2.377, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.32669322709163345, | |
| "grad_norm": 0.9178757667541504, | |
| "learning_rate": 8.520125096466072e-06, | |
| "loss": 2.7617, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.32768924302788843, | |
| "grad_norm": 0.46402791142463684, | |
| "learning_rate": 8.50775010191001e-06, | |
| "loss": 2.2848, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.3286852589641434, | |
| "grad_norm": 0.4459151327610016, | |
| "learning_rate": 8.495332650046112e-06, | |
| "loss": 2.1105, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.3296812749003984, | |
| "grad_norm": 0.7026370763778687, | |
| "learning_rate": 8.48287289117345e-06, | |
| "loss": 2.6823, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.33067729083665337, | |
| "grad_norm": 0.7429327964782715, | |
| "learning_rate": 8.470370976103171e-06, | |
| "loss": 2.2689, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.33167330677290835, | |
| "grad_norm": 0.37948814034461975, | |
| "learning_rate": 8.457827056156673e-06, | |
| "loss": 2.628, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.33266932270916333, | |
| "grad_norm": 0.3749179244041443, | |
| "learning_rate": 8.44524128316378e-06, | |
| "loss": 2.4552, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.3336653386454183, | |
| "grad_norm": 0.421303391456604, | |
| "learning_rate": 8.432613809460895e-06, | |
| "loss": 2.1797, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.3346613545816733, | |
| "grad_norm": 0.5645405054092407, | |
| "learning_rate": 8.419944787889162e-06, | |
| "loss": 2.4308, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.33565737051792827, | |
| "grad_norm": 0.7171806693077087, | |
| "learning_rate": 8.407234371792614e-06, | |
| "loss": 2.3868, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.33665338645418325, | |
| "grad_norm": 0.7937443852424622, | |
| "learning_rate": 8.394482715016318e-06, | |
| "loss": 2.433, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.3376494023904382, | |
| "grad_norm": 0.5325895547866821, | |
| "learning_rate": 8.381689971904514e-06, | |
| "loss": 2.5116, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.3386454183266932, | |
| "grad_norm": 0.3550787568092346, | |
| "learning_rate": 8.368856297298742e-06, | |
| "loss": 2.4187, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.3396414342629482, | |
| "grad_norm": 0.5118217468261719, | |
| "learning_rate": 8.355981846535972e-06, | |
| "loss": 2.1325, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.34063745019920316, | |
| "grad_norm": 0.35231295228004456, | |
| "learning_rate": 8.343066775446724e-06, | |
| "loss": 2.3751, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.34163346613545814, | |
| "grad_norm": 0.6949347853660583, | |
| "learning_rate": 8.330111240353178e-06, | |
| "loss": 2.5615, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.3426294820717131, | |
| "grad_norm": 0.5718231797218323, | |
| "learning_rate": 8.317115398067289e-06, | |
| "loss": 2.2858, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.3436254980079681, | |
| "grad_norm": 0.6337103843688965, | |
| "learning_rate": 8.30407940588888e-06, | |
| "loss": 2.5088, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.34462151394422313, | |
| "grad_norm": 0.4129788875579834, | |
| "learning_rate": 8.29100342160374e-06, | |
| "loss": 2.2748, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.3456175298804781, | |
| "grad_norm": 0.3462570607662201, | |
| "learning_rate": 8.27788760348173e-06, | |
| "loss": 2.3205, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.3466135458167331, | |
| "grad_norm": 0.4417884349822998, | |
| "learning_rate": 8.26473211027484e-06, | |
| "loss": 2.3901, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.34760956175298807, | |
| "grad_norm": 0.45579978823661804, | |
| "learning_rate": 8.251537101215287e-06, | |
| "loss": 2.2336, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.34860557768924305, | |
| "grad_norm": 0.6957226991653442, | |
| "learning_rate": 8.238302736013587e-06, | |
| "loss": 2.7518, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.34960159362549803, | |
| "grad_norm": 0.696114718914032, | |
| "learning_rate": 8.225029174856602e-06, | |
| "loss": 2.0373, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.350597609561753, | |
| "grad_norm": 0.40747031569480896, | |
| "learning_rate": 8.211716578405635e-06, | |
| "loss": 2.4626, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.351593625498008, | |
| "grad_norm": 0.47290411591529846, | |
| "learning_rate": 8.198365107794457e-06, | |
| "loss": 2.5871, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.35258964143426297, | |
| "grad_norm": 0.592217206954956, | |
| "learning_rate": 8.184974924627365e-06, | |
| "loss": 2.3886, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.35358565737051795, | |
| "grad_norm": 0.8985310196876526, | |
| "learning_rate": 8.171546190977231e-06, | |
| "loss": 2.6021, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.3545816733067729, | |
| "grad_norm": 0.7999231815338135, | |
| "learning_rate": 8.158079069383535e-06, | |
| "loss": 2.2757, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.3555776892430279, | |
| "grad_norm": 0.6230331063270569, | |
| "learning_rate": 8.1445737228504e-06, | |
| "loss": 2.4343, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.3565737051792829, | |
| "grad_norm": 0.43640607595443726, | |
| "learning_rate": 8.131030314844617e-06, | |
| "loss": 1.9181, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.35756972111553786, | |
| "grad_norm": 0.5745819211006165, | |
| "learning_rate": 8.117449009293668e-06, | |
| "loss": 2.9202, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.35856573705179284, | |
| "grad_norm": 0.46643760800361633, | |
| "learning_rate": 8.103829970583742e-06, | |
| "loss": 2.5197, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.3595617529880478, | |
| "grad_norm": 0.5368001461029053, | |
| "learning_rate": 8.090173363557748e-06, | |
| "loss": 2.3562, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.3605577689243028, | |
| "grad_norm": 0.5117561221122742, | |
| "learning_rate": 8.076479353513308e-06, | |
| "loss": 2.2398, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.3615537848605578, | |
| "grad_norm": 1.2853957414627075, | |
| "learning_rate": 8.06274810620077e-06, | |
| "loss": 2.041, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.36254980079681276, | |
| "grad_norm": 0.5592566132545471, | |
| "learning_rate": 8.0489797878212e-06, | |
| "loss": 2.614, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.36354581673306774, | |
| "grad_norm": 0.3889990448951721, | |
| "learning_rate": 8.035174565024362e-06, | |
| "loss": 2.4095, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.3645418326693227, | |
| "grad_norm": 0.39642640948295593, | |
| "learning_rate": 8.021332604906709e-06, | |
| "loss": 1.9971, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.3655378486055777, | |
| "grad_norm": 0.5725635290145874, | |
| "learning_rate": 8.007454075009352e-06, | |
| "loss": 2.4322, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.3665338645418327, | |
| "grad_norm": 0.538329541683197, | |
| "learning_rate": 7.993539143316044e-06, | |
| "loss": 2.3927, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.36752988047808766, | |
| "grad_norm": 0.8502817153930664, | |
| "learning_rate": 7.979587978251136e-06, | |
| "loss": 2.6487, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.36852589641434264, | |
| "grad_norm": 0.5096033811569214, | |
| "learning_rate": 7.965600748677545e-06, | |
| "loss": 2.4393, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.3695219123505976, | |
| "grad_norm": 0.39716872572898865, | |
| "learning_rate": 7.951577623894701e-06, | |
| "loss": 2.9555, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.3705179282868526, | |
| "grad_norm": 0.6120476126670837, | |
| "learning_rate": 7.937518773636518e-06, | |
| "loss": 2.1758, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.3715139442231076, | |
| "grad_norm": 0.4519326984882355, | |
| "learning_rate": 7.923424368069312e-06, | |
| "loss": 2.4508, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.37250996015936255, | |
| "grad_norm": 0.4252610504627228, | |
| "learning_rate": 7.909294577789765e-06, | |
| "loss": 2.134, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.37350597609561753, | |
| "grad_norm": 0.5511481761932373, | |
| "learning_rate": 7.895129573822844e-06, | |
| "loss": 2.4735, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.3745019920318725, | |
| "grad_norm": 1.1340324878692627, | |
| "learning_rate": 7.880929527619742e-06, | |
| "loss": 2.7602, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.3754980079681275, | |
| "grad_norm": 0.45862501859664917, | |
| "learning_rate": 7.866694611055796e-06, | |
| "loss": 2.5242, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.37649402390438247, | |
| "grad_norm": 0.48843666911125183, | |
| "learning_rate": 7.852424996428412e-06, | |
| "loss": 2.3878, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.37749003984063745, | |
| "grad_norm": 1.07990562915802, | |
| "learning_rate": 7.838120856454967e-06, | |
| "loss": 2.2745, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.3784860557768924, | |
| "grad_norm": 0.466766893863678, | |
| "learning_rate": 7.823782364270743e-06, | |
| "loss": 2.5844, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.3794820717131474, | |
| "grad_norm": 0.6437628865242004, | |
| "learning_rate": 7.809409693426803e-06, | |
| "loss": 2.381, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.3804780876494024, | |
| "grad_norm": 0.6378084421157837, | |
| "learning_rate": 7.79500301788791e-06, | |
| "loss": 2.4076, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.38147410358565736, | |
| "grad_norm": 0.6559402346611023, | |
| "learning_rate": 7.780562512030414e-06, | |
| "loss": 2.2115, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.38247011952191234, | |
| "grad_norm": 0.4882892668247223, | |
| "learning_rate": 7.766088350640141e-06, | |
| "loss": 2.179, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.3834661354581673, | |
| "grad_norm": 0.37981244921684265, | |
| "learning_rate": 7.75158070891028e-06, | |
| "loss": 2.3806, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.3844621513944223, | |
| "grad_norm": 0.6670547723770142, | |
| "learning_rate": 7.737039762439263e-06, | |
| "loss": 2.0258, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.3854581673306773, | |
| "grad_norm": 0.5870895981788635, | |
| "learning_rate": 7.722465687228634e-06, | |
| "loss": 2.4609, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.38645418326693226, | |
| "grad_norm": 0.5210617184638977, | |
| "learning_rate": 7.707858659680924e-06, | |
| "loss": 2.0152, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.38745019920318724, | |
| "grad_norm": 0.8872121572494507, | |
| "learning_rate": 7.693218856597515e-06, | |
| "loss": 2.4107, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.3884462151394422, | |
| "grad_norm": 0.408750057220459, | |
| "learning_rate": 7.6785464551765e-06, | |
| "loss": 2.4381, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.3894422310756972, | |
| "grad_norm": 0.4789107143878937, | |
| "learning_rate": 7.663841633010539e-06, | |
| "loss": 2.3057, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.3904382470119522, | |
| "grad_norm": 0.37599480152130127, | |
| "learning_rate": 7.649104568084701e-06, | |
| "loss": 2.4101, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.39143426294820716, | |
| "grad_norm": 0.6352246403694153, | |
| "learning_rate": 7.634335438774325e-06, | |
| "loss": 2.2202, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.39243027888446214, | |
| "grad_norm": 0.5684521198272705, | |
| "learning_rate": 7.619534423842852e-06, | |
| "loss": 2.7473, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.3934262948207171, | |
| "grad_norm": 0.5998817682266235, | |
| "learning_rate": 7.604701702439652e-06, | |
| "loss": 2.109, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.3944223107569721, | |
| "grad_norm": 0.69579017162323, | |
| "learning_rate": 7.589837454097879e-06, | |
| "loss": 2.6706, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.3954183266932271, | |
| "grad_norm": 0.5617753267288208, | |
| "learning_rate": 7.574941858732279e-06, | |
| "loss": 2.4175, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.39641434262948205, | |
| "grad_norm": 0.509094774723053, | |
| "learning_rate": 7.560015096637015e-06, | |
| "loss": 2.4367, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.39741035856573703, | |
| "grad_norm": 0.5854381322860718, | |
| "learning_rate": 7.54505734848349e-06, | |
| "loss": 2.3366, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.398406374501992, | |
| "grad_norm": 0.7329273819923401, | |
| "learning_rate": 7.53006879531816e-06, | |
| "loss": 2.2252, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.399402390438247, | |
| "grad_norm": 0.7855085730552673, | |
| "learning_rate": 7.515049618560337e-06, | |
| "loss": 2.2244, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.40039840637450197, | |
| "grad_norm": 0.608400821685791, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 2.2232, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.40139442231075695, | |
| "grad_norm": 0.5910929441452026, | |
| "learning_rate": 7.484920121795589e-06, | |
| "loss": 2.4034, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.40239043824701193, | |
| "grad_norm": 0.5254145264625549, | |
| "learning_rate": 7.469810166471802e-06, | |
| "loss": 2.3053, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.4033864541832669, | |
| "grad_norm": 0.5197448134422302, | |
| "learning_rate": 7.454670316917387e-06, | |
| "loss": 2.2416, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.4043824701195219, | |
| "grad_norm": 0.759675145149231, | |
| "learning_rate": 7.4395007563829295e-06, | |
| "loss": 2.4197, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.40537848605577687, | |
| "grad_norm": 0.4646972417831421, | |
| "learning_rate": 7.424301668478626e-06, | |
| "loss": 2.5638, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.4063745019920319, | |
| "grad_norm": 0.5564824938774109, | |
| "learning_rate": 7.4090732371720774e-06, | |
| "loss": 2.5038, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.4073705179282869, | |
| "grad_norm": 0.8897591829299927, | |
| "learning_rate": 7.393815646786047e-06, | |
| "loss": 2.6751, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.40836653386454186, | |
| "grad_norm": 0.8237727284431458, | |
| "learning_rate": 7.378529081996233e-06, | |
| "loss": 2.2649, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.40936254980079684, | |
| "grad_norm": 0.4922022521495819, | |
| "learning_rate": 7.363213727829045e-06, | |
| "loss": 2.2894, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.4103585657370518, | |
| "grad_norm": 0.48476412892341614, | |
| "learning_rate": 7.347869769659346e-06, | |
| "loss": 2.4317, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.4113545816733068, | |
| "grad_norm": 0.7627730965614319, | |
| "learning_rate": 7.332497393208221e-06, | |
| "loss": 3.0171, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.4123505976095618, | |
| "grad_norm": 1.4489892721176147, | |
| "learning_rate": 7.317096784540728e-06, | |
| "loss": 3.0745, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.41334661354581675, | |
| "grad_norm": 0.44949018955230713, | |
| "learning_rate": 7.301668130063639e-06, | |
| "loss": 2.4086, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.41434262948207173, | |
| "grad_norm": 0.9026828408241272, | |
| "learning_rate": 7.286211616523193e-06, | |
| "loss": 2.4219, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.4153386454183267, | |
| "grad_norm": 0.5528742074966431, | |
| "learning_rate": 7.2707274310028306e-06, | |
| "loss": 2.069, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.4163346613545817, | |
| "grad_norm": 0.8069695830345154, | |
| "learning_rate": 7.255215760920925e-06, | |
| "loss": 2.2901, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.41733067729083667, | |
| "grad_norm": 0.9854758977890015, | |
| "learning_rate": 7.239676794028526e-06, | |
| "loss": 2.533, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.41832669322709165, | |
| "grad_norm": 0.8304996490478516, | |
| "learning_rate": 7.224110718407075e-06, | |
| "loss": 2.4076, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.41932270916334663, | |
| "grad_norm": 0.5824740529060364, | |
| "learning_rate": 7.208517722466135e-06, | |
| "loss": 2.2298, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.4203187250996016, | |
| "grad_norm": 0.740998387336731, | |
| "learning_rate": 7.192897994941111e-06, | |
| "loss": 2.2991, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.4213147410358566, | |
| "grad_norm": 0.9516714215278625, | |
| "learning_rate": 7.177251724890957e-06, | |
| "loss": 2.2348, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.42231075697211157, | |
| "grad_norm": 1.330517053604126, | |
| "learning_rate": 7.1615791016959024e-06, | |
| "loss": 2.5969, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.42330677290836655, | |
| "grad_norm": 0.8636577129364014, | |
| "learning_rate": 7.145880315055145e-06, | |
| "loss": 2.4203, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.4243027888446215, | |
| "grad_norm": 0.40839532017707825, | |
| "learning_rate": 7.1301555549845634e-06, | |
| "loss": 2.428, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.4252988047808765, | |
| "grad_norm": 0.6337350010871887, | |
| "learning_rate": 7.114405011814415e-06, | |
| "loss": 2.3018, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.4262948207171315, | |
| "grad_norm": 0.8653415441513062, | |
| "learning_rate": 7.098628876187031e-06, | |
| "loss": 2.8947, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.42729083665338646, | |
| "grad_norm": 0.7558097839355469, | |
| "learning_rate": 7.082827339054513e-06, | |
| "loss": 2.2061, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.42828685258964144, | |
| "grad_norm": 0.8000844717025757, | |
| "learning_rate": 7.067000591676416e-06, | |
| "loss": 2.4076, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.4292828685258964, | |
| "grad_norm": 0.7074631452560425, | |
| "learning_rate": 7.051148825617435e-06, | |
| "loss": 2.7278, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.4302788844621514, | |
| "grad_norm": 1.1026921272277832, | |
| "learning_rate": 7.035272232745093e-06, | |
| "loss": 2.5602, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.4312749003984064, | |
| "grad_norm": 0.8589175939559937, | |
| "learning_rate": 7.019371005227407e-06, | |
| "loss": 2.3789, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.43227091633466136, | |
| "grad_norm": 0.8680172562599182, | |
| "learning_rate": 7.003445335530572e-06, | |
| "loss": 2.6989, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.43326693227091634, | |
| "grad_norm": 0.7565051317214966, | |
| "learning_rate": 6.987495416416627e-06, | |
| "loss": 2.3688, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.4342629482071713, | |
| "grad_norm": 1.050288438796997, | |
| "learning_rate": 6.9715214409411204e-06, | |
| "loss": 2.1948, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.4352589641434263, | |
| "grad_norm": 0.5728120803833008, | |
| "learning_rate": 6.95552360245078e-06, | |
| "loss": 2.2716, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.4362549800796813, | |
| "grad_norm": 1.0542654991149902, | |
| "learning_rate": 6.939502094581164e-06, | |
| "loss": 2.5845, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.43725099601593626, | |
| "grad_norm": 0.7242105603218079, | |
| "learning_rate": 6.923457111254322e-06, | |
| "loss": 2.2048, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.43824701195219123, | |
| "grad_norm": 1.0688732862472534, | |
| "learning_rate": 6.9073888466764495e-06, | |
| "loss": 2.1883, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.4392430278884462, | |
| "grad_norm": 0.8276563286781311, | |
| "learning_rate": 6.891297495335531e-06, | |
| "loss": 2.2532, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.4402390438247012, | |
| "grad_norm": 0.7350841760635376, | |
| "learning_rate": 6.875183251998993e-06, | |
| "loss": 2.2517, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.44123505976095617, | |
| "grad_norm": 0.7074620127677917, | |
| "learning_rate": 6.859046311711344e-06, | |
| "loss": 2.1943, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.44223107569721115, | |
| "grad_norm": 0.9273977279663086, | |
| "learning_rate": 6.84288686979181e-06, | |
| "loss": 2.4566, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.44322709163346613, | |
| "grad_norm": 0.54817795753479, | |
| "learning_rate": 6.8267051218319766e-06, | |
| "loss": 2.1578, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.4442231075697211, | |
| "grad_norm": 0.8614233136177063, | |
| "learning_rate": 6.810501263693416e-06, | |
| "loss": 2.3546, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.4452191235059761, | |
| "grad_norm": 0.5919457077980042, | |
| "learning_rate": 6.7942754915053225e-06, | |
| "loss": 2.4907, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.44621513944223107, | |
| "grad_norm": 0.7229816317558289, | |
| "learning_rate": 6.77802800166213e-06, | |
| "loss": 2.2884, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.44721115537848605, | |
| "grad_norm": 1.1251389980316162, | |
| "learning_rate": 6.761758990821143e-06, | |
| "loss": 2.508, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.448207171314741, | |
| "grad_norm": 0.7033310532569885, | |
| "learning_rate": 6.745468655900156e-06, | |
| "loss": 2.4315, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.449203187250996, | |
| "grad_norm": 1.2203772068023682, | |
| "learning_rate": 6.7291571940750575e-06, | |
| "loss": 2.6156, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.450199203187251, | |
| "grad_norm": 0.7150283455848694, | |
| "learning_rate": 6.712824802777465e-06, | |
| "loss": 2.2121, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.45119521912350596, | |
| "grad_norm": 0.9422833323478699, | |
| "learning_rate": 6.696471679692313e-06, | |
| "loss": 2.2294, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.45219123505976094, | |
| "grad_norm": 0.6846040487289429, | |
| "learning_rate": 6.680098022755478e-06, | |
| "loss": 2.5567, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.4531872509960159, | |
| "grad_norm": 0.5450727343559265, | |
| "learning_rate": 6.66370403015137e-06, | |
| "loss": 2.4599, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.4541832669322709, | |
| "grad_norm": 0.5914618968963623, | |
| "learning_rate": 6.647289900310545e-06, | |
| "loss": 2.5134, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.4551792828685259, | |
| "grad_norm": 0.8993861079216003, | |
| "learning_rate": 6.63085583190729e-06, | |
| "loss": 2.5729, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.45617529880478086, | |
| "grad_norm": 0.5783509016036987, | |
| "learning_rate": 6.614402023857231e-06, | |
| "loss": 2.5881, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.45717131474103584, | |
| "grad_norm": 0.6340298652648926, | |
| "learning_rate": 6.597928675314918e-06, | |
| "loss": 2.4509, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.4581673306772908, | |
| "grad_norm": 0.7813217043876648, | |
| "learning_rate": 6.581435985671418e-06, | |
| "loss": 2.242, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.4591633466135458, | |
| "grad_norm": 0.8051680326461792, | |
| "learning_rate": 6.564924154551895e-06, | |
| "loss": 2.398, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.4601593625498008, | |
| "grad_norm": 0.6447633504867554, | |
| "learning_rate": 6.548393381813205e-06, | |
| "loss": 2.9214, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.46115537848605576, | |
| "grad_norm": 0.5684821605682373, | |
| "learning_rate": 6.5318438675414665e-06, | |
| "loss": 2.2545, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.46215139442231074, | |
| "grad_norm": 0.6067225337028503, | |
| "learning_rate": 6.515275812049644e-06, | |
| "loss": 2.5541, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.4631474103585657, | |
| "grad_norm": 0.5634474754333496, | |
| "learning_rate": 6.498689415875121e-06, | |
| "loss": 2.581, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.4641434262948207, | |
| "grad_norm": 0.4764470160007477, | |
| "learning_rate": 6.48208487977728e-06, | |
| "loss": 2.2492, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.4651394422310757, | |
| "grad_norm": 0.6636649370193481, | |
| "learning_rate": 6.4654624047350575e-06, | |
| "loss": 2.418, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.46613545816733065, | |
| "grad_norm": 0.6568376421928406, | |
| "learning_rate": 6.448822191944526e-06, | |
| "loss": 2.4608, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.46713147410358563, | |
| "grad_norm": 0.5867657661437988, | |
| "learning_rate": 6.432164442816452e-06, | |
| "loss": 2.7101, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.4681274900398406, | |
| "grad_norm": 0.5845500826835632, | |
| "learning_rate": 6.41548935897386e-06, | |
| "loss": 2.1822, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.46912350597609564, | |
| "grad_norm": 0.9894917011260986, | |
| "learning_rate": 6.398797142249591e-06, | |
| "loss": 2.4232, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.4701195219123506, | |
| "grad_norm": 0.5987226366996765, | |
| "learning_rate": 6.3820879946838585e-06, | |
| "loss": 2.6506, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.4711155378486056, | |
| "grad_norm": 1.552355408668518, | |
| "learning_rate": 6.365362118521807e-06, | |
| "loss": 3.0233, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.4721115537848606, | |
| "grad_norm": 0.6667497158050537, | |
| "learning_rate": 6.348619716211058e-06, | |
| "loss": 2.6748, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.47310756972111556, | |
| "grad_norm": 0.970600962638855, | |
| "learning_rate": 6.33186099039927e-06, | |
| "loss": 2.4542, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.47410358565737054, | |
| "grad_norm": 0.6683152914047241, | |
| "learning_rate": 6.31508614393167e-06, | |
| "loss": 2.4034, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.4750996015936255, | |
| "grad_norm": 0.9013263583183289, | |
| "learning_rate": 6.2982953798486124e-06, | |
| "loss": 2.4747, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.4760956175298805, | |
| "grad_norm": 0.8827518820762634, | |
| "learning_rate": 6.2814889013831174e-06, | |
| "loss": 2.5649, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.4770916334661355, | |
| "grad_norm": 0.8040870428085327, | |
| "learning_rate": 6.264666911958404e-06, | |
| "loss": 2.2855, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.47808764940239046, | |
| "grad_norm": 0.9028819799423218, | |
| "learning_rate": 6.247829615185441e-06, | |
| "loss": 2.3607, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.47908366533864544, | |
| "grad_norm": 0.8722829818725586, | |
| "learning_rate": 6.230977214860468e-06, | |
| "loss": 2.2346, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.4800796812749004, | |
| "grad_norm": 0.6933993697166443, | |
| "learning_rate": 6.214109914962542e-06, | |
| "loss": 2.7604, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.4810756972111554, | |
| "grad_norm": 0.6176011562347412, | |
| "learning_rate": 6.1972279196510565e-06, | |
| "loss": 2.8278, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.4820717131474104, | |
| "grad_norm": 0.7204033732414246, | |
| "learning_rate": 6.180331433263283e-06, | |
| "loss": 2.4275, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.48306772908366535, | |
| "grad_norm": 1.1777395009994507, | |
| "learning_rate": 6.1634206603118844e-06, | |
| "loss": 2.501, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.48406374501992033, | |
| "grad_norm": 1.3905079364776611, | |
| "learning_rate": 6.146495805482451e-06, | |
| "loss": 2.9668, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.4850597609561753, | |
| "grad_norm": 1.8294525146484375, | |
| "learning_rate": 6.129557073631013e-06, | |
| "loss": 2.0932, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.4860557768924303, | |
| "grad_norm": 1.0543792247772217, | |
| "learning_rate": 6.112604669781572e-06, | |
| "loss": 2.1609, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.48705179282868527, | |
| "grad_norm": 0.6805559992790222, | |
| "learning_rate": 6.09563879912361e-06, | |
| "loss": 2.5514, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.48804780876494025, | |
| "grad_norm": 0.913993239402771, | |
| "learning_rate": 6.07865966700961e-06, | |
| "loss": 2.1365, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.48904382470119523, | |
| "grad_norm": 1.127585768699646, | |
| "learning_rate": 6.06166747895257e-06, | |
| "loss": 2.5846, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.4900398406374502, | |
| "grad_norm": 0.5136232376098633, | |
| "learning_rate": 6.044662440623512e-06, | |
| "loss": 2.3032, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.4910358565737052, | |
| "grad_norm": 1.1162834167480469, | |
| "learning_rate": 6.027644757849004e-06, | |
| "loss": 2.4506, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.49203187250996017, | |
| "grad_norm": 0.8343062996864319, | |
| "learning_rate": 6.0106146366086514e-06, | |
| "loss": 2.5259, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.49302788844621515, | |
| "grad_norm": 1.2255266904830933, | |
| "learning_rate": 5.99357228303262e-06, | |
| "loss": 2.4964, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.4940239043824701, | |
| "grad_norm": 0.8280947804450989, | |
| "learning_rate": 5.976517903399128e-06, | |
| "loss": 2.3576, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.4950199203187251, | |
| "grad_norm": 0.5358011722564697, | |
| "learning_rate": 5.959451704131962e-06, | |
| "loss": 2.4103, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.4960159362549801, | |
| "grad_norm": 0.7300974130630493, | |
| "learning_rate": 5.9423738917979655e-06, | |
| "loss": 2.4792, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.49701195219123506, | |
| "grad_norm": 1.1368849277496338, | |
| "learning_rate": 5.92528467310455e-06, | |
| "loss": 2.7343, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.49800796812749004, | |
| "grad_norm": 1.1836261749267578, | |
| "learning_rate": 5.908184254897183e-06, | |
| "loss": 2.263, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.499003984063745, | |
| "grad_norm": 0.7548766136169434, | |
| "learning_rate": 5.891072844156895e-06, | |
| "loss": 2.5232, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.5676743984222412, | |
| "learning_rate": 5.87395064799776e-06, | |
| "loss": 2.532, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.500996015936255, | |
| "grad_norm": 0.6243408918380737, | |
| "learning_rate": 5.856817873664409e-06, | |
| "loss": 2.6337, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.50199203187251, | |
| "grad_norm": 0.6920816898345947, | |
| "learning_rate": 5.839674728529499e-06, | |
| "loss": 2.3761, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.5029880478087649, | |
| "grad_norm": 0.773199737071991, | |
| "learning_rate": 5.8225214200912195e-06, | |
| "loss": 2.3334, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.5039840637450199, | |
| "grad_norm": 0.8243865370750427, | |
| "learning_rate": 5.8053581559707754e-06, | |
| "loss": 2.2201, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.5049800796812749, | |
| "grad_norm": 0.6483944654464722, | |
| "learning_rate": 5.788185143909868e-06, | |
| "loss": 2.2019, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.5059760956175299, | |
| "grad_norm": 1.1562165021896362, | |
| "learning_rate": 5.7710025917681954e-06, | |
| "loss": 2.6738, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.5069721115537849, | |
| "grad_norm": 0.7714682221412659, | |
| "learning_rate": 5.753810707520918e-06, | |
| "loss": 2.5662, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.5079681274900398, | |
| "grad_norm": 1.8939898014068604, | |
| "learning_rate": 5.736609699256158e-06, | |
| "loss": 2.4841, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.5089641434262948, | |
| "grad_norm": 1.0769683122634888, | |
| "learning_rate": 5.719399775172468e-06, | |
| "loss": 2.2339, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.5099601593625498, | |
| "grad_norm": 0.7231705188751221, | |
| "learning_rate": 5.702181143576323e-06, | |
| "loss": 2.2754, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.5109561752988048, | |
| "grad_norm": 1.0154212713241577, | |
| "learning_rate": 5.684954012879583e-06, | |
| "loss": 2.5029, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.5119521912350598, | |
| "grad_norm": 1.130210041999817, | |
| "learning_rate": 5.66771859159699e-06, | |
| "loss": 2.5272, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.5129482071713147, | |
| "grad_norm": 0.6206554770469666, | |
| "learning_rate": 5.6504750883436275e-06, | |
| "loss": 2.3235, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.5139442231075697, | |
| "grad_norm": 1.0481131076812744, | |
| "learning_rate": 5.633223711832403e-06, | |
| "loss": 2.2866, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.5149402390438247, | |
| "grad_norm": 0.7321045398712158, | |
| "learning_rate": 5.615964670871524e-06, | |
| "loss": 2.4402, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.5159362549800797, | |
| "grad_norm": 0.7684382796287537, | |
| "learning_rate": 5.5986981743619615e-06, | |
| "loss": 2.3404, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.5169322709163346, | |
| "grad_norm": 1.1246601343154907, | |
| "learning_rate": 5.581424431294936e-06, | |
| "loss": 2.2032, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.5179282868525896, | |
| "grad_norm": 0.617168128490448, | |
| "learning_rate": 5.56414365074937e-06, | |
| "loss": 2.5379, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.5189243027888446, | |
| "grad_norm": 0.7718055248260498, | |
| "learning_rate": 5.546856041889374e-06, | |
| "loss": 2.4803, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.5199203187250996, | |
| "grad_norm": 1.5422130823135376, | |
| "learning_rate": 5.5295618139617e-06, | |
| "loss": 2.3014, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.5209163346613546, | |
| "grad_norm": 0.5689607262611389, | |
| "learning_rate": 5.512261176293226e-06, | |
| "loss": 2.1094, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.5219123505976095, | |
| "grad_norm": 0.6268129944801331, | |
| "learning_rate": 5.494954338288404e-06, | |
| "loss": 2.4562, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.5229083665338645, | |
| "grad_norm": 1.1070692539215088, | |
| "learning_rate": 5.477641509426739e-06, | |
| "loss": 2.0365, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.5239043824701195, | |
| "grad_norm": 1.2830649614334106, | |
| "learning_rate": 5.460322899260245e-06, | |
| "loss": 2.2455, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.5249003984063745, | |
| "grad_norm": 0.6706056594848633, | |
| "learning_rate": 5.442998717410916e-06, | |
| "loss": 2.3095, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.5258964143426295, | |
| "grad_norm": 0.6565206050872803, | |
| "learning_rate": 5.425669173568179e-06, | |
| "loss": 2.5873, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.5268924302788844, | |
| "grad_norm": 0.8396487236022949, | |
| "learning_rate": 5.408334477486369e-06, | |
| "loss": 2.4121, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.5278884462151394, | |
| "grad_norm": 0.8442867398262024, | |
| "learning_rate": 5.390994838982178e-06, | |
| "loss": 2.1112, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.5288844621513944, | |
| "grad_norm": 0.6712931990623474, | |
| "learning_rate": 5.373650467932122e-06, | |
| "loss": 2.5735, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.5298804780876494, | |
| "grad_norm": 0.8547630906105042, | |
| "learning_rate": 5.356301574269998e-06, | |
| "loss": 2.5899, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.5308764940239044, | |
| "grad_norm": 1.1971904039382935, | |
| "learning_rate": 5.338948367984347e-06, | |
| "loss": 2.7327, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.5318725099601593, | |
| "grad_norm": 0.8025546669960022, | |
| "learning_rate": 5.321591059115906e-06, | |
| "loss": 2.3269, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.5328685258964143, | |
| "grad_norm": 0.9815241098403931, | |
| "learning_rate": 5.30422985775507e-06, | |
| "loss": 2.4555, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.5338645418326693, | |
| "grad_norm": 0.9032710194587708, | |
| "learning_rate": 5.286864974039349e-06, | |
| "loss": 2.4246, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.5348605577689243, | |
| "grad_norm": 1.508058786392212, | |
| "learning_rate": 5.269496618150823e-06, | |
| "loss": 2.1206, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.5358565737051793, | |
| "grad_norm": 1.0455362796783447, | |
| "learning_rate": 5.2521250003136005e-06, | |
| "loss": 2.4406, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.5368525896414342, | |
| "grad_norm": 0.8184682726860046, | |
| "learning_rate": 5.234750330791268e-06, | |
| "loss": 2.1303, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.5378486055776892, | |
| "grad_norm": 0.8355326056480408, | |
| "learning_rate": 5.217372819884353e-06, | |
| "loss": 2.3468, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.5388446215139442, | |
| "grad_norm": 1.0594871044158936, | |
| "learning_rate": 5.199992677927775e-06, | |
| "loss": 2.2558, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.5398406374501992, | |
| "grad_norm": 1.3626909255981445, | |
| "learning_rate": 5.182610115288296e-06, | |
| "loss": 2.4741, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.5408366533864541, | |
| "grad_norm": 0.6529922485351562, | |
| "learning_rate": 5.165225342361978e-06, | |
| "loss": 2.3131, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.5418326693227091, | |
| "grad_norm": 0.8616756796836853, | |
| "learning_rate": 5.147838569571642e-06, | |
| "loss": 2.2786, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.5428286852589641, | |
| "grad_norm": 1.9104212522506714, | |
| "learning_rate": 5.1304500073643045e-06, | |
| "loss": 2.2784, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.5438247011952191, | |
| "grad_norm": 0.7414434552192688, | |
| "learning_rate": 5.1130598662086514e-06, | |
| "loss": 2.1551, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.5448207171314741, | |
| "grad_norm": 0.889681339263916, | |
| "learning_rate": 5.095668356592474e-06, | |
| "loss": 1.9545, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.545816733067729, | |
| "grad_norm": 0.8562681674957275, | |
| "learning_rate": 5.078275689020129e-06, | |
| "loss": 2.1665, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.546812749003984, | |
| "grad_norm": 0.810234010219574, | |
| "learning_rate": 5.060882074009988e-06, | |
| "loss": 2.5324, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.547808764940239, | |
| "grad_norm": 1.867493748664856, | |
| "learning_rate": 5.043487722091891e-06, | |
| "loss": 2.6972, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.548804780876494, | |
| "grad_norm": 0.8987257480621338, | |
| "learning_rate": 5.026092843804599e-06, | |
| "loss": 2.3632, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.549800796812749, | |
| "grad_norm": 0.9021519422531128, | |
| "learning_rate": 5.0086976496932374e-06, | |
| "loss": 2.2825, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.5507968127490039, | |
| "grad_norm": 1.0589499473571777, | |
| "learning_rate": 4.991302350306764e-06, | |
| "loss": 2.6046, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.5517928286852589, | |
| "grad_norm": 0.8575243949890137, | |
| "learning_rate": 4.973907156195405e-06, | |
| "loss": 2.6962, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.5527888446215139, | |
| "grad_norm": 1.7027395963668823, | |
| "learning_rate": 4.956512277908109e-06, | |
| "loss": 2.4405, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.5537848605577689, | |
| "grad_norm": 0.8842843770980835, | |
| "learning_rate": 4.939117925990013e-06, | |
| "loss": 2.4694, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.5547808764940239, | |
| "grad_norm": 0.9840981364250183, | |
| "learning_rate": 4.921724310979872e-06, | |
| "loss": 2.5034, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.5557768924302788, | |
| "grad_norm": 0.8198688626289368, | |
| "learning_rate": 4.904331643407527e-06, | |
| "loss": 2.5664, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.5567729083665338, | |
| "grad_norm": 1.1393426656723022, | |
| "learning_rate": 4.886940133791349e-06, | |
| "loss": 2.4415, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.5577689243027888, | |
| "grad_norm": 0.835932731628418, | |
| "learning_rate": 4.869549992635697e-06, | |
| "loss": 2.3719, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.5587649402390438, | |
| "grad_norm": 0.952167272567749, | |
| "learning_rate": 4.8521614304283615e-06, | |
| "loss": 2.3092, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.5597609561752988, | |
| "grad_norm": 0.8146136999130249, | |
| "learning_rate": 4.834774657638023e-06, | |
| "loss": 2.5503, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.5607569721115537, | |
| "grad_norm": 0.7990023493766785, | |
| "learning_rate": 4.817389884711706e-06, | |
| "loss": 2.3319, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.5617529880478087, | |
| "grad_norm": 0.5947994589805603, | |
| "learning_rate": 4.800007322072226e-06, | |
| "loss": 2.1591, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.5627490039840638, | |
| "grad_norm": 0.8183361291885376, | |
| "learning_rate": 4.7826271801156485e-06, | |
| "loss": 2.431, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.5637450199203188, | |
| "grad_norm": 0.896101713180542, | |
| "learning_rate": 4.765249669208733e-06, | |
| "loss": 2.3313, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.5647410358565738, | |
| "grad_norm": 1.7690149545669556, | |
| "learning_rate": 4.747874999686401e-06, | |
| "loss": 2.1365, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.5657370517928287, | |
| "grad_norm": 0.8712881803512573, | |
| "learning_rate": 4.730503381849179e-06, | |
| "loss": 2.4139, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.5667330677290837, | |
| "grad_norm": 1.0946391820907593, | |
| "learning_rate": 4.713135025960652e-06, | |
| "loss": 2.1844, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.5677290836653387, | |
| "grad_norm": 0.9877662658691406, | |
| "learning_rate": 4.695770142244931e-06, | |
| "loss": 2.4189, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.5687250996015937, | |
| "grad_norm": 3.4181249141693115, | |
| "learning_rate": 4.6784089408840955e-06, | |
| "loss": 2.7926, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.5697211155378487, | |
| "grad_norm": 0.7407424449920654, | |
| "learning_rate": 4.661051632015655e-06, | |
| "loss": 2.4036, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.5707171314741036, | |
| "grad_norm": 0.8603093028068542, | |
| "learning_rate": 4.643698425730004e-06, | |
| "loss": 2.0895, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.5717131474103586, | |
| "grad_norm": 1.5615819692611694, | |
| "learning_rate": 4.626349532067879e-06, | |
| "loss": 2.6668, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.5727091633466136, | |
| "grad_norm": 0.9211438298225403, | |
| "learning_rate": 4.609005161017824e-06, | |
| "loss": 2.7534, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.5737051792828686, | |
| "grad_norm": 0.6863355040550232, | |
| "learning_rate": 4.591665522513633e-06, | |
| "loss": 2.4823, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.5747011952191236, | |
| "grad_norm": 1.1497968435287476, | |
| "learning_rate": 4.574330826431822e-06, | |
| "loss": 2.7213, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.5756972111553785, | |
| "grad_norm": 0.6396723389625549, | |
| "learning_rate": 4.557001282589086e-06, | |
| "loss": 2.342, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.5766932270916335, | |
| "grad_norm": 0.7893930673599243, | |
| "learning_rate": 4.5396771007397565e-06, | |
| "loss": 2.426, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.5776892430278885, | |
| "grad_norm": 1.0288350582122803, | |
| "learning_rate": 4.5223584905732635e-06, | |
| "loss": 2.3415, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.5786852589641435, | |
| "grad_norm": 1.215003252029419, | |
| "learning_rate": 4.505045661711596e-06, | |
| "loss": 2.2311, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.5796812749003984, | |
| "grad_norm": 1.2418211698532104, | |
| "learning_rate": 4.487738823706775e-06, | |
| "loss": 2.6043, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.5806772908366534, | |
| "grad_norm": 0.658145546913147, | |
| "learning_rate": 4.470438186038301e-06, | |
| "loss": 2.5859, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.5816733067729084, | |
| "grad_norm": 0.9392750859260559, | |
| "learning_rate": 4.4531439581106295e-06, | |
| "loss": 2.4185, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.5826693227091634, | |
| "grad_norm": 0.726355254650116, | |
| "learning_rate": 4.43585634925063e-06, | |
| "loss": 2.656, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.5836653386454184, | |
| "grad_norm": 0.8283027410507202, | |
| "learning_rate": 4.418575568705066e-06, | |
| "loss": 2.6963, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.5846613545816733, | |
| "grad_norm": 2.0478458404541016, | |
| "learning_rate": 4.401301825638039e-06, | |
| "loss": 2.8958, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.5856573705179283, | |
| "grad_norm": 0.9227387309074402, | |
| "learning_rate": 4.3840353291284776e-06, | |
| "loss": 2.5498, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.5866533864541833, | |
| "grad_norm": 1.2917126417160034, | |
| "learning_rate": 4.366776288167598e-06, | |
| "loss": 2.4996, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.5876494023904383, | |
| "grad_norm": 0.9034551978111267, | |
| "learning_rate": 4.349524911656373e-06, | |
| "loss": 2.4281, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.5886454183266933, | |
| "grad_norm": 1.010878324508667, | |
| "learning_rate": 4.332281408403011e-06, | |
| "loss": 2.2912, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.5896414342629482, | |
| "grad_norm": 1.7873375415802002, | |
| "learning_rate": 4.315045987120417e-06, | |
| "loss": 2.5171, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.5906374501992032, | |
| "grad_norm": 0.8005262613296509, | |
| "learning_rate": 4.297818856423679e-06, | |
| "loss": 2.33, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.5916334661354582, | |
| "grad_norm": 0.6728765368461609, | |
| "learning_rate": 4.280600224827533e-06, | |
| "loss": 2.1524, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.5926294820717132, | |
| "grad_norm": 0.8610662221908569, | |
| "learning_rate": 4.2633903007438445e-06, | |
| "loss": 2.7524, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.5936254980079682, | |
| "grad_norm": 1.0232973098754883, | |
| "learning_rate": 4.2461892924790825e-06, | |
| "loss": 2.6512, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.5946215139442231, | |
| "grad_norm": 1.0610368251800537, | |
| "learning_rate": 4.228997408231806e-06, | |
| "loss": 2.4996, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.5956175298804781, | |
| "grad_norm": 1.2796133756637573, | |
| "learning_rate": 4.2118148560901325e-06, | |
| "loss": 2.2488, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.5966135458167331, | |
| "grad_norm": 1.5423349142074585, | |
| "learning_rate": 4.194641844029227e-06, | |
| "loss": 2.6293, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.5976095617529881, | |
| "grad_norm": 1.5228114128112793, | |
| "learning_rate": 4.1774785799087805e-06, | |
| "loss": 2.3751, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.598605577689243, | |
| "grad_norm": 0.9803175330162048, | |
| "learning_rate": 4.160325271470502e-06, | |
| "loss": 2.5003, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.599601593625498, | |
| "grad_norm": 1.0139139890670776, | |
| "learning_rate": 4.143182126335594e-06, | |
| "loss": 2.5435, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.600597609561753, | |
| "grad_norm": 0.8577011227607727, | |
| "learning_rate": 4.12604935200224e-06, | |
| "loss": 2.6227, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.601593625498008, | |
| "grad_norm": 1.015549659729004, | |
| "learning_rate": 4.108927155843108e-06, | |
| "loss": 2.3803, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.602589641434263, | |
| "grad_norm": 1.5675932168960571, | |
| "learning_rate": 4.091815745102818e-06, | |
| "loss": 2.364, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.603585657370518, | |
| "grad_norm": 0.7266266345977783, | |
| "learning_rate": 4.074715326895453e-06, | |
| "loss": 2.6198, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.6045816733067729, | |
| "grad_norm": 0.617874026298523, | |
| "learning_rate": 4.0576261082020345e-06, | |
| "loss": 2.6864, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.6055776892430279, | |
| "grad_norm": 1.3024420738220215, | |
| "learning_rate": 4.040548295868039e-06, | |
| "loss": 2.4742, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.6065737051792829, | |
| "grad_norm": 0.7109612822532654, | |
| "learning_rate": 4.023482096600873e-06, | |
| "loss": 2.6456, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.6075697211155379, | |
| "grad_norm": 1.2119102478027344, | |
| "learning_rate": 4.006427716967382e-06, | |
| "loss": 2.3745, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.6085657370517928, | |
| "grad_norm": 1.3014880418777466, | |
| "learning_rate": 3.9893853633913485e-06, | |
| "loss": 2.6845, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.6095617529880478, | |
| "grad_norm": 1.3628534078598022, | |
| "learning_rate": 3.9723552421509975e-06, | |
| "loss": 2.6973, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.6105577689243028, | |
| "grad_norm": 0.9777284860610962, | |
| "learning_rate": 3.955337559376489e-06, | |
| "loss": 2.4989, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.6115537848605578, | |
| "grad_norm": 0.6070024371147156, | |
| "learning_rate": 3.938332521047434e-06, | |
| "loss": 2.0082, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.6125498007968128, | |
| "grad_norm": 0.6223677396774292, | |
| "learning_rate": 3.921340332990392e-06, | |
| "loss": 2.2016, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.6135458167330677, | |
| "grad_norm": 1.2076197862625122, | |
| "learning_rate": 3.904361200876391e-06, | |
| "loss": 2.7328, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.6145418326693227, | |
| "grad_norm": 0.7502063512802124, | |
| "learning_rate": 3.887395330218429e-06, | |
| "loss": 2.1634, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.6155378486055777, | |
| "grad_norm": 1.090084195137024, | |
| "learning_rate": 3.8704429263689865e-06, | |
| "loss": 2.2409, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.6165338645418327, | |
| "grad_norm": 1.7830555438995361, | |
| "learning_rate": 3.853504194517551e-06, | |
| "loss": 2.5541, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.6175298804780877, | |
| "grad_norm": 1.0715655088424683, | |
| "learning_rate": 3.836579339688116e-06, | |
| "loss": 2.7304, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.6185258964143426, | |
| "grad_norm": 0.7255896925926208, | |
| "learning_rate": 3.819668566736719e-06, | |
| "loss": 2.5671, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.6195219123505976, | |
| "grad_norm": 1.475665807723999, | |
| "learning_rate": 3.802772080348943e-06, | |
| "loss": 2.2374, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.6205179282868526, | |
| "grad_norm": 1.1244341135025024, | |
| "learning_rate": 3.7858900850374596e-06, | |
| "loss": 2.2705, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.6215139442231076, | |
| "grad_norm": 1.270950436592102, | |
| "learning_rate": 3.769022785139534e-06, | |
| "loss": 2.427, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.6225099601593626, | |
| "grad_norm": 0.9996942281723022, | |
| "learning_rate": 3.752170384814562e-06, | |
| "loss": 2.3181, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.6235059760956175, | |
| "grad_norm": 0.9702761173248291, | |
| "learning_rate": 3.7353330880415963e-06, | |
| "loss": 2.4871, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.6245019920318725, | |
| "grad_norm": 0.7174897193908691, | |
| "learning_rate": 3.7185110986168842e-06, | |
| "loss": 2.6481, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.6254980079681275, | |
| "grad_norm": 1.0198302268981934, | |
| "learning_rate": 3.701704620151389e-06, | |
| "loss": 2.4368, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.6264940239043825, | |
| "grad_norm": 0.6317278742790222, | |
| "learning_rate": 3.6849138560683305e-06, | |
| "loss": 2.2506, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.6274900398406374, | |
| "grad_norm": 1.6083205938339233, | |
| "learning_rate": 3.6681390096007315e-06, | |
| "loss": 2.441, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.6284860557768924, | |
| "grad_norm": 1.1788543462753296, | |
| "learning_rate": 3.651380283788942e-06, | |
| "loss": 2.0867, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.6294820717131474, | |
| "grad_norm": 1.6041985750198364, | |
| "learning_rate": 3.634637881478196e-06, | |
| "loss": 2.7786, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.6304780876494024, | |
| "grad_norm": 0.7498704195022583, | |
| "learning_rate": 3.617912005316142e-06, | |
| "loss": 2.5885, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.6314741035856574, | |
| "grad_norm": 1.2260042428970337, | |
| "learning_rate": 3.6012028577504106e-06, | |
| "loss": 2.5491, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.6324701195219123, | |
| "grad_norm": 0.766639232635498, | |
| "learning_rate": 3.5845106410261417e-06, | |
| "loss": 2.6436, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.6334661354581673, | |
| "grad_norm": 0.8522284626960754, | |
| "learning_rate": 3.56783555718355e-06, | |
| "loss": 2.361, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.6344621513944223, | |
| "grad_norm": 1.09912109375, | |
| "learning_rate": 3.551177808055476e-06, | |
| "loss": 2.5303, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.6354581673306773, | |
| "grad_norm": 1.4560422897338867, | |
| "learning_rate": 3.534537595264944e-06, | |
| "loss": 2.6122, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.6364541832669323, | |
| "grad_norm": 0.858035147190094, | |
| "learning_rate": 3.5179151202227214e-06, | |
| "loss": 2.3591, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.6374501992031872, | |
| "grad_norm": 4.184999942779541, | |
| "learning_rate": 3.5013105841248794e-06, | |
| "loss": 2.3339, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.6384462151394422, | |
| "grad_norm": 1.2636277675628662, | |
| "learning_rate": 3.4847241879503574e-06, | |
| "loss": 2.6084, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.6394422310756972, | |
| "grad_norm": 1.3735069036483765, | |
| "learning_rate": 3.4681561324585356e-06, | |
| "loss": 2.4582, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.6404382470119522, | |
| "grad_norm": 1.3198506832122803, | |
| "learning_rate": 3.451606618186796e-06, | |
| "loss": 2.2207, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.6414342629482072, | |
| "grad_norm": 0.895077109336853, | |
| "learning_rate": 3.435075845448105e-06, | |
| "loss": 2.141, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.6424302788844621, | |
| "grad_norm": 1.5022435188293457, | |
| "learning_rate": 3.418564014328583e-06, | |
| "loss": 2.5608, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.6434262948207171, | |
| "grad_norm": 0.9838452935218811, | |
| "learning_rate": 3.402071324685082e-06, | |
| "loss": 2.372, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.6444223107569721, | |
| "grad_norm": 0.6322600841522217, | |
| "learning_rate": 3.3855979761427705e-06, | |
| "loss": 2.5499, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.6454183266932271, | |
| "grad_norm": 1.3608890771865845, | |
| "learning_rate": 3.3691441680927105e-06, | |
| "loss": 2.4689, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.646414342629482, | |
| "grad_norm": 0.9520907998085022, | |
| "learning_rate": 3.352710099689457e-06, | |
| "loss": 2.5012, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.647410358565737, | |
| "grad_norm": 3.0419979095458984, | |
| "learning_rate": 3.3362959698486307e-06, | |
| "loss": 2.2773, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.648406374501992, | |
| "grad_norm": 1.0915313959121704, | |
| "learning_rate": 3.3199019772445253e-06, | |
| "loss": 2.3744, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.649402390438247, | |
| "grad_norm": 1.543050765991211, | |
| "learning_rate": 3.3035283203076877e-06, | |
| "loss": 2.3499, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.650398406374502, | |
| "grad_norm": 1.0574357509613037, | |
| "learning_rate": 3.287175197222537e-06, | |
| "loss": 2.516, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.651394422310757, | |
| "grad_norm": 1.3127410411834717, | |
| "learning_rate": 3.2708428059249437e-06, | |
| "loss": 2.4012, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.6523904382470119, | |
| "grad_norm": 0.9456487894058228, | |
| "learning_rate": 3.254531344099847e-06, | |
| "loss": 2.773, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.6533864541832669, | |
| "grad_norm": 1.1509116888046265, | |
| "learning_rate": 3.2382410091788567e-06, | |
| "loss": 2.7622, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.6543824701195219, | |
| "grad_norm": 1.0328110456466675, | |
| "learning_rate": 3.221971998337872e-06, | |
| "loss": 2.5343, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.6553784860557769, | |
| "grad_norm": 1.723029613494873, | |
| "learning_rate": 3.2057245084946796e-06, | |
| "loss": 2.8968, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.6563745019920318, | |
| "grad_norm": 1.13263738155365, | |
| "learning_rate": 3.189498736306584e-06, | |
| "loss": 2.4689, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.6573705179282868, | |
| "grad_norm": 1.4411126375198364, | |
| "learning_rate": 3.173294878168025e-06, | |
| "loss": 2.0715, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.6583665338645418, | |
| "grad_norm": 2.4463119506835938, | |
| "learning_rate": 3.1571131302081916e-06, | |
| "loss": 2.5004, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.6593625498007968, | |
| "grad_norm": 1.063270926475525, | |
| "learning_rate": 3.140953688288658e-06, | |
| "loss": 2.4079, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.6603585657370518, | |
| "grad_norm": 1.860757827758789, | |
| "learning_rate": 3.1248167480010083e-06, | |
| "loss": 2.6755, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.6613545816733067, | |
| "grad_norm": 1.5435043573379517, | |
| "learning_rate": 3.1087025046644704e-06, | |
| "loss": 2.7499, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.6623505976095617, | |
| "grad_norm": 0.7970728278160095, | |
| "learning_rate": 3.0926111533235526e-06, | |
| "loss": 2.1911, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.6633466135458167, | |
| "grad_norm": 1.1135482788085938, | |
| "learning_rate": 3.0765428887456794e-06, | |
| "loss": 2.6387, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.6643426294820717, | |
| "grad_norm": 1.2876728773117065, | |
| "learning_rate": 3.0604979054188367e-06, | |
| "loss": 2.6715, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.6653386454183267, | |
| "grad_norm": 0.6579734683036804, | |
| "learning_rate": 3.044476397549221e-06, | |
| "loss": 2.1833, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.6663346613545816, | |
| "grad_norm": 1.7546638250350952, | |
| "learning_rate": 3.0284785590588804e-06, | |
| "loss": 2.5761, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.6673306772908366, | |
| "grad_norm": 1.1617887020111084, | |
| "learning_rate": 3.012504583583374e-06, | |
| "loss": 2.4205, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.6683266932270916, | |
| "grad_norm": 1.4457294940948486, | |
| "learning_rate": 2.9965546644694287e-06, | |
| "loss": 2.178, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.6693227091633466, | |
| "grad_norm": 0.9334515333175659, | |
| "learning_rate": 2.9806289947725947e-06, | |
| "loss": 2.5343, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.6703187250996016, | |
| "grad_norm": 1.115212082862854, | |
| "learning_rate": 2.9647277672549093e-06, | |
| "loss": 2.1731, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.6713147410358565, | |
| "grad_norm": 1.1038217544555664, | |
| "learning_rate": 2.948851174382565e-06, | |
| "loss": 2.3589, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.6723107569721115, | |
| "grad_norm": 1.4897500276565552, | |
| "learning_rate": 2.9329994083235857e-06, | |
| "loss": 2.4302, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.6733067729083665, | |
| "grad_norm": 1.7196754217147827, | |
| "learning_rate": 2.9171726609454875e-06, | |
| "loss": 2.5387, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.6743027888446215, | |
| "grad_norm": 1.271872878074646, | |
| "learning_rate": 2.9013711238129693e-06, | |
| "loss": 2.1938, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.6752988047808764, | |
| "grad_norm": 1.0383085012435913, | |
| "learning_rate": 2.885594988185587e-06, | |
| "loss": 2.5842, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.6762948207171314, | |
| "grad_norm": 1.9233471155166626, | |
| "learning_rate": 2.8698444450154395e-06, | |
| "loss": 2.4421, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.6772908366533864, | |
| "grad_norm": 0.9812890291213989, | |
| "learning_rate": 2.8541196849448582e-06, | |
| "loss": 2.1876, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.6782868525896414, | |
| "grad_norm": 1.217011570930481, | |
| "learning_rate": 2.8384208983040997e-06, | |
| "loss": 2.5115, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.6792828685258964, | |
| "grad_norm": 1.4093648195266724, | |
| "learning_rate": 2.8227482751090445e-06, | |
| "loss": 2.5296, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.6802788844621513, | |
| "grad_norm": 1.0479772090911865, | |
| "learning_rate": 2.8071020050588927e-06, | |
| "loss": 2.3801, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.6812749003984063, | |
| "grad_norm": 0.761779248714447, | |
| "learning_rate": 2.7914822775338678e-06, | |
| "loss": 2.397, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.6822709163346613, | |
| "grad_norm": 0.7536188364028931, | |
| "learning_rate": 2.775889281592927e-06, | |
| "loss": 2.2802, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.6832669322709163, | |
| "grad_norm": 1.1621276140213013, | |
| "learning_rate": 2.760323205971476e-06, | |
| "loss": 2.3802, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.6842629482071713, | |
| "grad_norm": 1.2401965856552124, | |
| "learning_rate": 2.744784239079077e-06, | |
| "loss": 1.9567, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.6852589641434262, | |
| "grad_norm": 0.9456545114517212, | |
| "learning_rate": 2.7292725689971732e-06, | |
| "loss": 2.547, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.6862549800796812, | |
| "grad_norm": 1.3253943920135498, | |
| "learning_rate": 2.7137883834768076e-06, | |
| "loss": 2.2105, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.6872509960159362, | |
| "grad_norm": 1.525397777557373, | |
| "learning_rate": 2.6983318699363627e-06, | |
| "loss": 2.3682, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.6882470119521913, | |
| "grad_norm": 0.9517590403556824, | |
| "learning_rate": 2.6829032154592745e-06, | |
| "loss": 2.2159, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.6892430278884463, | |
| "grad_norm": 0.8040021061897278, | |
| "learning_rate": 2.6675026067917808e-06, | |
| "loss": 2.3967, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.6902390438247012, | |
| "grad_norm": 1.6833242177963257, | |
| "learning_rate": 2.652130230340655e-06, | |
| "loss": 2.9864, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.6912350597609562, | |
| "grad_norm": 1.104771614074707, | |
| "learning_rate": 2.636786272170956e-06, | |
| "loss": 2.5124, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.6922310756972112, | |
| "grad_norm": 1.1763907670974731, | |
| "learning_rate": 2.621470918003768e-06, | |
| "loss": 2.6426, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.6932270916334662, | |
| "grad_norm": 2.304222345352173, | |
| "learning_rate": 2.6061843532139563e-06, | |
| "loss": 2.4522, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.6942231075697212, | |
| "grad_norm": 0.8599796891212463, | |
| "learning_rate": 2.5909267628279234e-06, | |
| "loss": 2.6796, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.6952191235059761, | |
| "grad_norm": 1.0061733722686768, | |
| "learning_rate": 2.5756983315213748e-06, | |
| "loss": 2.5076, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.6962151394422311, | |
| "grad_norm": 1.392606258392334, | |
| "learning_rate": 2.560499243617074e-06, | |
| "loss": 2.5134, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.6972111553784861, | |
| "grad_norm": 1.2116351127624512, | |
| "learning_rate": 2.5453296830826135e-06, | |
| "loss": 2.0634, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.6982071713147411, | |
| "grad_norm": 0.7071558237075806, | |
| "learning_rate": 2.5301898335281994e-06, | |
| "loss": 2.1104, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.6992031872509961, | |
| "grad_norm": 1.8307946920394897, | |
| "learning_rate": 2.5150798782044123e-06, | |
| "loss": 2.8147, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.700199203187251, | |
| "grad_norm": 0.9716182351112366, | |
| "learning_rate": 2.5000000000000015e-06, | |
| "loss": 2.4836, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.701195219123506, | |
| "grad_norm": 0.7655389308929443, | |
| "learning_rate": 2.4849503814396624e-06, | |
| "loss": 2.2803, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.702191235059761, | |
| "grad_norm": 1.1354485750198364, | |
| "learning_rate": 2.469931204681841e-06, | |
| "loss": 2.5936, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.703187250996016, | |
| "grad_norm": 0.9272159337997437, | |
| "learning_rate": 2.4549426515165116e-06, | |
| "loss": 2.6629, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.704183266932271, | |
| "grad_norm": 1.20318603515625, | |
| "learning_rate": 2.439984903362988e-06, | |
| "loss": 2.341, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.7051792828685259, | |
| "grad_norm": 1.0813405513763428, | |
| "learning_rate": 2.425058141267722e-06, | |
| "loss": 2.5484, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.7061752988047809, | |
| "grad_norm": 0.6365978121757507, | |
| "learning_rate": 2.4101625459021212e-06, | |
| "loss": 2.2276, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.7071713147410359, | |
| "grad_norm": 1.4600951671600342, | |
| "learning_rate": 2.3952982975603494e-06, | |
| "loss": 2.7489, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.7081673306772909, | |
| "grad_norm": 1.0905722379684448, | |
| "learning_rate": 2.3804655761571517e-06, | |
| "loss": 2.7045, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.7091633466135459, | |
| "grad_norm": 1.2118492126464844, | |
| "learning_rate": 2.3656645612256747e-06, | |
| "loss": 2.2625, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.7101593625498008, | |
| "grad_norm": 2.2730562686920166, | |
| "learning_rate": 2.3508954319153e-06, | |
| "loss": 2.9233, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.7111553784860558, | |
| "grad_norm": 1.1532260179519653, | |
| "learning_rate": 2.3361583669894634e-06, | |
| "loss": 2.6882, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.7121513944223108, | |
| "grad_norm": 1.74001944065094, | |
| "learning_rate": 2.321453544823499e-06, | |
| "loss": 2.296, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.7131474103585658, | |
| "grad_norm": 1.6281747817993164, | |
| "learning_rate": 2.306781143402485e-06, | |
| "loss": 2.5453, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.7141434262948207, | |
| "grad_norm": 1.2188794612884521, | |
| "learning_rate": 2.2921413403190774e-06, | |
| "loss": 2.3351, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.7151394422310757, | |
| "grad_norm": 1.180245280265808, | |
| "learning_rate": 2.2775343127713685e-06, | |
| "loss": 2.8909, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.7161354581673307, | |
| "grad_norm": 1.207853078842163, | |
| "learning_rate": 2.2629602375607373e-06, | |
| "loss": 2.2249, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.7171314741035857, | |
| "grad_norm": 1.1911535263061523, | |
| "learning_rate": 2.24841929108972e-06, | |
| "loss": 1.9728, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.7181274900398407, | |
| "grad_norm": 2.913970470428467, | |
| "learning_rate": 2.23391164935986e-06, | |
| "loss": 3.3, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.7191235059760956, | |
| "grad_norm": 1.0136604309082031, | |
| "learning_rate": 2.219437487969588e-06, | |
| "loss": 2.3078, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.7201195219123506, | |
| "grad_norm": 1.5370888710021973, | |
| "learning_rate": 2.20499698211209e-06, | |
| "loss": 2.4226, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.7211155378486056, | |
| "grad_norm": 0.8609825968742371, | |
| "learning_rate": 2.190590306573198e-06, | |
| "loss": 2.5837, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.7221115537848606, | |
| "grad_norm": 0.7969903945922852, | |
| "learning_rate": 2.1762176357292582e-06, | |
| "loss": 2.5065, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.7231075697211156, | |
| "grad_norm": 0.6895061135292053, | |
| "learning_rate": 2.1618791435450334e-06, | |
| "loss": 2.4443, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.7241035856573705, | |
| "grad_norm": 1.005803108215332, | |
| "learning_rate": 2.1475750035715914e-06, | |
| "loss": 2.8449, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.7250996015936255, | |
| "grad_norm": 1.464055061340332, | |
| "learning_rate": 2.1333053889442033e-06, | |
| "loss": 2.436, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.7260956175298805, | |
| "grad_norm": 0.7166134715080261, | |
| "learning_rate": 2.1190704723802587e-06, | |
| "loss": 2.6141, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.7270916334661355, | |
| "grad_norm": 1.2269198894500732, | |
| "learning_rate": 2.104870426177157e-06, | |
| "loss": 2.0039, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.7280876494023905, | |
| "grad_norm": 1.233473539352417, | |
| "learning_rate": 2.0907054222102367e-06, | |
| "loss": 2.4503, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.7290836653386454, | |
| "grad_norm": 0.8751947283744812, | |
| "learning_rate": 2.0765756319306897e-06, | |
| "loss": 2.482, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.7300796812749004, | |
| "grad_norm": 1.954285979270935, | |
| "learning_rate": 2.0624812263634847e-06, | |
| "loss": 2.6237, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.7310756972111554, | |
| "grad_norm": 0.7244362235069275, | |
| "learning_rate": 2.048422376105299e-06, | |
| "loss": 2.3371, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.7320717131474104, | |
| "grad_norm": 0.7712534666061401, | |
| "learning_rate": 2.034399251322458e-06, | |
| "loss": 2.6775, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.7330677290836654, | |
| "grad_norm": 1.0466793775558472, | |
| "learning_rate": 2.020412021748866e-06, | |
| "loss": 2.2773, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.7340637450199203, | |
| "grad_norm": 1.417794942855835, | |
| "learning_rate": 2.0064608566839584e-06, | |
| "loss": 2.7359, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.7350597609561753, | |
| "grad_norm": 0.5706871747970581, | |
| "learning_rate": 1.9925459249906488e-06, | |
| "loss": 2.246, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.7360557768924303, | |
| "grad_norm": 1.112219214439392, | |
| "learning_rate": 1.978667395093293e-06, | |
| "loss": 2.5444, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.7370517928286853, | |
| "grad_norm": 1.5537924766540527, | |
| "learning_rate": 1.964825434975639e-06, | |
| "loss": 2.2497, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.7380478087649402, | |
| "grad_norm": 0.7418034672737122, | |
| "learning_rate": 1.9510202121788003e-06, | |
| "loss": 2.4711, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.7390438247011952, | |
| "grad_norm": 1.0376439094543457, | |
| "learning_rate": 1.9372518937992306e-06, | |
| "loss": 2.1369, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.7400398406374502, | |
| "grad_norm": 1.6938295364379883, | |
| "learning_rate": 1.923520646486695e-06, | |
| "loss": 2.7013, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.7410358565737052, | |
| "grad_norm": 1.1227657794952393, | |
| "learning_rate": 1.9098266364422554e-06, | |
| "loss": 2.1956, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.7420318725099602, | |
| "grad_norm": 0.8521560430526733, | |
| "learning_rate": 1.8961700294162578e-06, | |
| "loss": 2.7621, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.7430278884462151, | |
| "grad_norm": 1.3367222547531128, | |
| "learning_rate": 1.8825509907063328e-06, | |
| "loss": 2.3669, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.7440239043824701, | |
| "grad_norm": 1.0971968173980713, | |
| "learning_rate": 1.8689696851553847e-06, | |
| "loss": 2.2727, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.7450199203187251, | |
| "grad_norm": 0.7232230305671692, | |
| "learning_rate": 1.8554262771496017e-06, | |
| "loss": 2.4247, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.7460159362549801, | |
| "grad_norm": 0.779901921749115, | |
| "learning_rate": 1.8419209306164653e-06, | |
| "loss": 2.4956, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.7470119521912351, | |
| "grad_norm": 0.9150820970535278, | |
| "learning_rate": 1.82845380902277e-06, | |
| "loss": 2.6319, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.74800796812749, | |
| "grad_norm": 1.0264116525650024, | |
| "learning_rate": 1.8150250753726363e-06, | |
| "loss": 2.537, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.749003984063745, | |
| "grad_norm": 1.3325294256210327, | |
| "learning_rate": 1.8016348922055448e-06, | |
| "loss": 2.5891, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 0.9217858910560608, | |
| "learning_rate": 1.7882834215943645e-06, | |
| "loss": 2.3572, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.750996015936255, | |
| "grad_norm": 1.020738959312439, | |
| "learning_rate": 1.7749708251433983e-06, | |
| "loss": 2.4734, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.75199203187251, | |
| "grad_norm": 0.9455721378326416, | |
| "learning_rate": 1.7616972639864166e-06, | |
| "loss": 2.4533, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.7529880478087649, | |
| "grad_norm": 1.7625263929367065, | |
| "learning_rate": 1.7484628987847125e-06, | |
| "loss": 2.5292, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.7539840637450199, | |
| "grad_norm": 1.2456424236297607, | |
| "learning_rate": 1.7352678897251606e-06, | |
| "loss": 2.5379, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.7549800796812749, | |
| "grad_norm": 1.9081121683120728, | |
| "learning_rate": 1.7221123965182712e-06, | |
| "loss": 2.413, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.7559760956175299, | |
| "grad_norm": 1.062225341796875, | |
| "learning_rate": 1.7089965783962608e-06, | |
| "loss": 2.458, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.7569721115537849, | |
| "grad_norm": 1.1116987466812134, | |
| "learning_rate": 1.6959205941111228e-06, | |
| "loss": 2.4556, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.7579681274900398, | |
| "grad_norm": 1.6234967708587646, | |
| "learning_rate": 1.6828846019327128e-06, | |
| "loss": 2.5499, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.7589641434262948, | |
| "grad_norm": 0.741877555847168, | |
| "learning_rate": 1.6698887596468232e-06, | |
| "loss": 2.1629, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.7599601593625498, | |
| "grad_norm": 2.112726926803589, | |
| "learning_rate": 1.6569332245532777e-06, | |
| "loss": 2.85, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.7609561752988048, | |
| "grad_norm": 0.9073076844215393, | |
| "learning_rate": 1.6440181534640277e-06, | |
| "loss": 2.4195, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.7619521912350598, | |
| "grad_norm": 1.2635924816131592, | |
| "learning_rate": 1.6311437027012582e-06, | |
| "loss": 2.3853, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.7629482071713147, | |
| "grad_norm": 1.6688510179519653, | |
| "learning_rate": 1.618310028095486e-06, | |
| "loss": 2.186, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.7639442231075697, | |
| "grad_norm": 1.076957106590271, | |
| "learning_rate": 1.6055172849836826e-06, | |
| "loss": 2.3887, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.7649402390438247, | |
| "grad_norm": 1.0081124305725098, | |
| "learning_rate": 1.5927656282073861e-06, | |
| "loss": 2.4315, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.7659362549800797, | |
| "grad_norm": 1.2986465692520142, | |
| "learning_rate": 1.5800552121108392e-06, | |
| "loss": 2.6633, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.7669322709163346, | |
| "grad_norm": 0.7207338213920593, | |
| "learning_rate": 1.567386190539107e-06, | |
| "loss": 2.6924, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.7679282868525896, | |
| "grad_norm": 0.6458574533462524, | |
| "learning_rate": 1.5547587168362204e-06, | |
| "loss": 2.7688, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.7689243027888446, | |
| "grad_norm": 1.0633124113082886, | |
| "learning_rate": 1.5421729438433274e-06, | |
| "loss": 2.1328, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.7699203187250996, | |
| "grad_norm": 2.143666982650757, | |
| "learning_rate": 1.5296290238968303e-06, | |
| "loss": 2.29, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.7709163346613546, | |
| "grad_norm": 0.5651401281356812, | |
| "learning_rate": 1.517127108826551e-06, | |
| "loss": 2.4732, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.7719123505976095, | |
| "grad_norm": 0.8489325642585754, | |
| "learning_rate": 1.5046673499538893e-06, | |
| "loss": 2.3174, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.7729083665338645, | |
| "grad_norm": 1.1251336336135864, | |
| "learning_rate": 1.4922498980899907e-06, | |
| "loss": 2.2915, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.7739043824701195, | |
| "grad_norm": 0.7484387755393982, | |
| "learning_rate": 1.4798749035339278e-06, | |
| "loss": 2.3685, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.7749003984063745, | |
| "grad_norm": 1.1463130712509155, | |
| "learning_rate": 1.4675425160708723e-06, | |
| "loss": 2.468, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.7758964143426295, | |
| "grad_norm": 1.5645790100097656, | |
| "learning_rate": 1.4552528849702852e-06, | |
| "loss": 2.6442, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.7768924302788844, | |
| "grad_norm": 1.8811829090118408, | |
| "learning_rate": 1.4430061589841122e-06, | |
| "loss": 2.5609, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.7778884462151394, | |
| "grad_norm": 0.8737534284591675, | |
| "learning_rate": 1.4308024863449805e-06, | |
| "loss": 2.6824, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.7788844621513944, | |
| "grad_norm": 1.1957892179489136, | |
| "learning_rate": 1.4186420147644053e-06, | |
| "loss": 2.3529, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.7798804780876494, | |
| "grad_norm": 1.2302711009979248, | |
| "learning_rate": 1.4065248914310066e-06, | |
| "loss": 2.513, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.7808764940239044, | |
| "grad_norm": 0.5240752100944519, | |
| "learning_rate": 1.3944512630087182e-06, | |
| "loss": 2.4043, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.7818725099601593, | |
| "grad_norm": 1.9195410013198853, | |
| "learning_rate": 1.3824212756350196e-06, | |
| "loss": 2.8095, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.7828685258964143, | |
| "grad_norm": 0.9604887962341309, | |
| "learning_rate": 1.3704350749191642e-06, | |
| "loss": 2.3252, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.7838645418326693, | |
| "grad_norm": 0.9721193313598633, | |
| "learning_rate": 1.3584928059404207e-06, | |
| "loss": 2.4578, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.7848605577689243, | |
| "grad_norm": 1.9194726943969727, | |
| "learning_rate": 1.3465946132463125e-06, | |
| "loss": 2.623, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.7858565737051793, | |
| "grad_norm": 1.482784390449524, | |
| "learning_rate": 1.3347406408508695e-06, | |
| "loss": 2.7708, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.7868525896414342, | |
| "grad_norm": 0.7451381683349609, | |
| "learning_rate": 1.3229310322328847e-06, | |
| "loss": 2.4386, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.7878486055776892, | |
| "grad_norm": 0.6679832339286804, | |
| "learning_rate": 1.3111659303341824e-06, | |
| "loss": 2.37, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.7888446215139442, | |
| "grad_norm": 0.8974138498306274, | |
| "learning_rate": 1.2994454775578785e-06, | |
| "loss": 2.2855, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.7898406374501992, | |
| "grad_norm": 1.3459084033966064, | |
| "learning_rate": 1.2877698157666663e-06, | |
| "loss": 2.7191, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.7908366533864541, | |
| "grad_norm": 1.0975403785705566, | |
| "learning_rate": 1.2761390862810907e-06, | |
| "loss": 2.2521, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.7918326693227091, | |
| "grad_norm": 0.9908530712127686, | |
| "learning_rate": 1.2645534298778506e-06, | |
| "loss": 2.603, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.7928286852589641, | |
| "grad_norm": 0.7879658937454224, | |
| "learning_rate": 1.253012986788078e-06, | |
| "loss": 2.6744, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.7938247011952191, | |
| "grad_norm": 0.9611647129058838, | |
| "learning_rate": 1.2415178966956531e-06, | |
| "loss": 2.3191, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.7948207171314741, | |
| "grad_norm": 2.035386085510254, | |
| "learning_rate": 1.2300682987355122e-06, | |
| "loss": 2.46, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.795816733067729, | |
| "grad_norm": 1.4089851379394531, | |
| "learning_rate": 1.2186643314919571e-06, | |
| "loss": 2.164, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.796812749003984, | |
| "grad_norm": 0.944324254989624, | |
| "learning_rate": 1.2073061329969843e-06, | |
| "loss": 2.4692, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.797808764940239, | |
| "grad_norm": 1.0982811450958252, | |
| "learning_rate": 1.1959938407286099e-06, | |
| "loss": 2.2721, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.798804780876494, | |
| "grad_norm": 0.8596687316894531, | |
| "learning_rate": 1.1847275916092116e-06, | |
| "loss": 2.3065, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.799800796812749, | |
| "grad_norm": 0.8045834898948669, | |
| "learning_rate": 1.1735075220038634e-06, | |
| "loss": 2.1781, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.8007968127490039, | |
| "grad_norm": 1.5618336200714111, | |
| "learning_rate": 1.1623337677186902e-06, | |
| "loss": 2.4166, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.8017928286852589, | |
| "grad_norm": 1.3380889892578125, | |
| "learning_rate": 1.151206463999222e-06, | |
| "loss": 2.5112, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.8027888446215139, | |
| "grad_norm": 1.37197744846344, | |
| "learning_rate": 1.1401257455287612e-06, | |
| "loss": 2.4657, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.8037848605577689, | |
| "grad_norm": 1.3427671194076538, | |
| "learning_rate": 1.1290917464267458e-06, | |
| "loss": 2.5771, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.8047808764940239, | |
| "grad_norm": 0.5480353832244873, | |
| "learning_rate": 1.1181046002471292e-06, | |
| "loss": 2.2615, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.8057768924302788, | |
| "grad_norm": 1.03799569606781, | |
| "learning_rate": 1.107164439976764e-06, | |
| "loss": 2.1258, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.8067729083665338, | |
| "grad_norm": 0.874638020992279, | |
| "learning_rate": 1.0962713980337947e-06, | |
| "loss": 2.3982, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.8077689243027888, | |
| "grad_norm": 0.7668205499649048, | |
| "learning_rate": 1.085425606266049e-06, | |
| "loss": 2.5981, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.8087649402390438, | |
| "grad_norm": 1.033339262008667, | |
| "learning_rate": 1.0746271959494453e-06, | |
| "loss": 2.425, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.8097609561752988, | |
| "grad_norm": 0.7686687707901001, | |
| "learning_rate": 1.063876297786407e-06, | |
| "loss": 2.287, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.8107569721115537, | |
| "grad_norm": 0.8868098855018616, | |
| "learning_rate": 1.0531730419042736e-06, | |
| "loss": 2.4047, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.8117529880478087, | |
| "grad_norm": 0.9515554308891296, | |
| "learning_rate": 1.04251755785373e-06, | |
| "loss": 2.2864, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.8127490039840638, | |
| "grad_norm": 0.96048903465271, | |
| "learning_rate": 1.0319099746072375e-06, | |
| "loss": 2.305, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.8137450199203188, | |
| "grad_norm": 1.2892875671386719, | |
| "learning_rate": 1.0213504205574758e-06, | |
| "loss": 2.8046, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.8147410358565738, | |
| "grad_norm": 1.2894792556762695, | |
| "learning_rate": 1.0108390235157828e-06, | |
| "loss": 2.3662, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.8157370517928287, | |
| "grad_norm": 0.894437849521637, | |
| "learning_rate": 1.0003759107106116e-06, | |
| "loss": 2.3213, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.8167330677290837, | |
| "grad_norm": 0.8480390906333923, | |
| "learning_rate": 9.899612087859883e-07, | |
| "loss": 2.2743, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.8177290836653387, | |
| "grad_norm": 0.6957425475120544, | |
| "learning_rate": 9.795950437999852e-07, | |
| "loss": 2.6014, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.8187250996015937, | |
| "grad_norm": 1.1951571702957153, | |
| "learning_rate": 9.692775412231863e-07, | |
| "loss": 2.5359, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.8197211155378487, | |
| "grad_norm": 1.5518149137496948, | |
| "learning_rate": 9.590088259371738e-07, | |
| "loss": 2.5717, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.8207171314741036, | |
| "grad_norm": 0.9850301146507263, | |
| "learning_rate": 9.487890222330137e-07, | |
| "loss": 2.3225, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.8217131474103586, | |
| "grad_norm": 1.0830625295639038, | |
| "learning_rate": 9.386182538097582e-07, | |
| "loss": 2.49, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.8227091633466136, | |
| "grad_norm": 1.1903777122497559, | |
| "learning_rate": 9.284966437729387e-07, | |
| "loss": 2.3532, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.8237051792828686, | |
| "grad_norm": 1.2485320568084717, | |
| "learning_rate": 9.184243146330829e-07, | |
| "loss": 2.286, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.8247011952191236, | |
| "grad_norm": 1.5328834056854248, | |
| "learning_rate": 9.084013883042276e-07, | |
| "loss": 2.5148, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.8256972111553785, | |
| "grad_norm": 1.7866473197937012, | |
| "learning_rate": 8.984279861024453e-07, | |
| "loss": 2.7636, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.8266932270916335, | |
| "grad_norm": 0.8635814785957336, | |
| "learning_rate": 8.885042287443785e-07, | |
| "loss": 2.579, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.8276892430278885, | |
| "grad_norm": 1.480765461921692, | |
| "learning_rate": 8.786302363457733e-07, | |
| "loss": 2.6228, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.8286852589641435, | |
| "grad_norm": 0.6803283095359802, | |
| "learning_rate": 8.688061284200266e-07, | |
| "loss": 2.4377, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.8296812749003984, | |
| "grad_norm": 1.2872114181518555, | |
| "learning_rate": 8.590320238767425e-07, | |
| "loss": 2.4269, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.8306772908366534, | |
| "grad_norm": 1.054971694946289, | |
| "learning_rate": 8.493080410202914e-07, | |
| "loss": 2.4407, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.8316733067729084, | |
| "grad_norm": 0.8980826735496521, | |
| "learning_rate": 8.396342975483751e-07, | |
| "loss": 2.352, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.8326693227091634, | |
| "grad_norm": 0.7561918497085571, | |
| "learning_rate": 8.30010910550611e-07, | |
| "loss": 2.3562, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.8336653386454184, | |
| "grad_norm": 0.6130694150924683, | |
| "learning_rate": 8.204379965071036e-07, | |
| "loss": 2.3059, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.8346613545816733, | |
| "grad_norm": 1.1262505054473877, | |
| "learning_rate": 8.109156712870397e-07, | |
| "loss": 2.7065, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.8356573705179283, | |
| "grad_norm": 2.2383527755737305, | |
| "learning_rate": 8.014440501472909e-07, | |
| "loss": 2.6112, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.8366533864541833, | |
| "grad_norm": 1.0032474994659424, | |
| "learning_rate": 7.920232477310102e-07, | |
| "loss": 2.4155, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.8376494023904383, | |
| "grad_norm": 1.5110204219818115, | |
| "learning_rate": 7.826533780662481e-07, | |
| "loss": 2.2312, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.8386454183266933, | |
| "grad_norm": 1.206811785697937, | |
| "learning_rate": 7.733345545645726e-07, | |
| "loss": 2.7882, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.8396414342629482, | |
| "grad_norm": 1.1954952478408813, | |
| "learning_rate": 7.640668900196985e-07, | |
| "loss": 2.5765, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.8406374501992032, | |
| "grad_norm": 0.8079789280891418, | |
| "learning_rate": 7.54850496606117e-07, | |
| "loss": 2.0988, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.8416334661354582, | |
| "grad_norm": 1.204300880432129, | |
| "learning_rate": 7.456854858777418e-07, | |
| "loss": 2.6726, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.8426294820717132, | |
| "grad_norm": 2.1050732135772705, | |
| "learning_rate": 7.365719687665568e-07, | |
| "loss": 2.4657, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.8436254980079682, | |
| "grad_norm": 1.1028344631195068, | |
| "learning_rate": 7.27510055581278e-07, | |
| "loss": 2.7165, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.8446215139442231, | |
| "grad_norm": 0.9454997777938843, | |
| "learning_rate": 7.184998560060114e-07, | |
| "loss": 2.4538, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.8456175298804781, | |
| "grad_norm": 1.000157117843628, | |
| "learning_rate": 7.095414790989292e-07, | |
| "loss": 2.2186, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.8466135458167331, | |
| "grad_norm": 1.7517778873443604, | |
| "learning_rate": 7.006350332909495e-07, | |
| "loss": 2.4932, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.8476095617529881, | |
| "grad_norm": 1.2720731496810913, | |
| "learning_rate": 6.917806263844268e-07, | |
| "loss": 2.287, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.848605577689243, | |
| "grad_norm": 1.1185457706451416, | |
| "learning_rate": 6.829783655518402e-07, | |
| "loss": 2.4009, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.849601593625498, | |
| "grad_norm": 1.228535771369934, | |
| "learning_rate": 6.742283573345004e-07, | |
| "loss": 2.4729, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.850597609561753, | |
| "grad_norm": 1.3845924139022827, | |
| "learning_rate": 6.655307076412637e-07, | |
| "loss": 2.5723, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.851593625498008, | |
| "grad_norm": 1.2278180122375488, | |
| "learning_rate": 6.568855217472425e-07, | |
| "loss": 2.4958, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.852589641434263, | |
| "grad_norm": 0.7195264101028442, | |
| "learning_rate": 6.482929042925363e-07, | |
| "loss": 2.5879, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.853585657370518, | |
| "grad_norm": 1.1685850620269775, | |
| "learning_rate": 6.397529592809615e-07, | |
| "loss": 2.3411, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.8545816733067729, | |
| "grad_norm": 0.935212254524231, | |
| "learning_rate": 6.312657900788e-07, | |
| "loss": 2.3266, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.8555776892430279, | |
| "grad_norm": 1.0523936748504639, | |
| "learning_rate": 6.228314994135376e-07, | |
| "loss": 2.5915, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.8565737051792829, | |
| "grad_norm": 0.7740164399147034, | |
| "learning_rate": 6.14450189372628e-07, | |
| "loss": 2.7521, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.8575697211155379, | |
| "grad_norm": 2.0991780757904053, | |
| "learning_rate": 6.061219614022535e-07, | |
| "loss": 2.2285, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.8585657370517928, | |
| "grad_norm": 0.990088164806366, | |
| "learning_rate": 5.978469163061018e-07, | |
| "loss": 2.3091, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.8595617529880478, | |
| "grad_norm": 1.3237099647521973, | |
| "learning_rate": 5.896251542441395e-07, | |
| "loss": 2.4856, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.8605577689243028, | |
| "grad_norm": 0.9690184593200684, | |
| "learning_rate": 5.814567747314049e-07, | |
| "loss": 2.3608, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.8615537848605578, | |
| "grad_norm": 2.3030495643615723, | |
| "learning_rate": 5.733418766367988e-07, | |
| "loss": 2.4188, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.8625498007968128, | |
| "grad_norm": 1.1400495767593384, | |
| "learning_rate": 5.652805581818943e-07, | |
| "loss": 2.2339, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.8635458167330677, | |
| "grad_norm": 1.5023630857467651, | |
| "learning_rate": 5.572729169397422e-07, | |
| "loss": 2.1393, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.8645418326693227, | |
| "grad_norm": 0.9161491394042969, | |
| "learning_rate": 5.493190498336903e-07, | |
| "loss": 2.3602, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.8655378486055777, | |
| "grad_norm": 0.9129965901374817, | |
| "learning_rate": 5.414190531362162e-07, | |
| "loss": 2.3639, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.8665338645418327, | |
| "grad_norm": 1.9090954065322876, | |
| "learning_rate": 5.335730224677538e-07, | |
| "loss": 2.2505, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.8675298804780877, | |
| "grad_norm": 0.8173048496246338, | |
| "learning_rate": 5.25781052795541e-07, | |
| "loss": 2.2072, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.8685258964143426, | |
| "grad_norm": 2.2648603916168213, | |
| "learning_rate": 5.180432384324691e-07, | |
| "loss": 2.458, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.8695219123505976, | |
| "grad_norm": 0.906757652759552, | |
| "learning_rate": 5.103596730359428e-07, | |
| "loss": 2.5185, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.8705179282868526, | |
| "grad_norm": 0.9674282073974609, | |
| "learning_rate": 5.027304496067431e-07, | |
| "loss": 2.3208, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.8715139442231076, | |
| "grad_norm": 1.4019418954849243, | |
| "learning_rate": 4.951556604879049e-07, | |
| "loss": 2.531, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.8725099601593626, | |
| "grad_norm": 1.1170932054519653, | |
| "learning_rate": 4.876353973635955e-07, | |
| "loss": 2.2724, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.8735059760956175, | |
| "grad_norm": 0.8795150518417358, | |
| "learning_rate": 4.8016975125801e-07, | |
| "loss": 2.4447, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.8745019920318725, | |
| "grad_norm": 1.4134328365325928, | |
| "learning_rate": 4.727588125342669e-07, | |
| "loss": 2.4728, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.8754980079681275, | |
| "grad_norm": 0.8537651300430298, | |
| "learning_rate": 4.6540267089331294e-07, | |
| "loss": 1.8693, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.8764940239043825, | |
| "grad_norm": 0.973147988319397, | |
| "learning_rate": 4.581014153728386e-07, | |
| "loss": 2.6805, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.8774900398406374, | |
| "grad_norm": 2.0054306983947754, | |
| "learning_rate": 4.508551343462014e-07, | |
| "loss": 2.8345, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.8784860557768924, | |
| "grad_norm": 1.3600786924362183, | |
| "learning_rate": 4.4366391552135567e-07, | |
| "loss": 2.4685, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.8794820717131474, | |
| "grad_norm": 1.042197823524475, | |
| "learning_rate": 4.3652784593978927e-07, | |
| "loss": 2.2609, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.8804780876494024, | |
| "grad_norm": 1.342214822769165, | |
| "learning_rate": 4.29447011975474e-07, | |
| "loss": 2.3443, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.8814741035856574, | |
| "grad_norm": 1.0559214353561401, | |
| "learning_rate": 4.224214993338149e-07, | |
| "loss": 2.5197, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.8824701195219123, | |
| "grad_norm": 0.9082587361335754, | |
| "learning_rate": 4.154513930506171e-07, | |
| "loss": 2.4609, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.8834661354581673, | |
| "grad_norm": 1.555578589439392, | |
| "learning_rate": 4.0853677749105426e-07, | |
| "loss": 2.5883, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.8844621513944223, | |
| "grad_norm": 0.6700481176376343, | |
| "learning_rate": 4.0167773634865017e-07, | |
| "loss": 2.4754, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.8854581673306773, | |
| "grad_norm": 1.7728710174560547, | |
| "learning_rate": 3.9487435264426056e-07, | |
| "loss": 2.5669, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.8864541832669323, | |
| "grad_norm": 0.8938197493553162, | |
| "learning_rate": 3.8812670872507454e-07, | |
| "loss": 2.7976, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.8874501992031872, | |
| "grad_norm": 1.6636402606964111, | |
| "learning_rate": 3.8143488626361135e-07, | |
| "loss": 2.4172, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.8884462151394422, | |
| "grad_norm": 1.0676062107086182, | |
| "learning_rate": 3.747989662567403e-07, | |
| "loss": 2.582, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.8894422310756972, | |
| "grad_norm": 1.280415654182434, | |
| "learning_rate": 3.6821902902469066e-07, | |
| "loss": 2.2295, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.8904382470119522, | |
| "grad_norm": 1.0814982652664185, | |
| "learning_rate": 3.6169515421008494e-07, | |
| "loss": 2.7001, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.8914342629482072, | |
| "grad_norm": 1.077160120010376, | |
| "learning_rate": 3.5522742077697734e-07, | |
| "loss": 2.49, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.8924302788844621, | |
| "grad_norm": 1.2483303546905518, | |
| "learning_rate": 3.4881590700989175e-07, | |
| "loss": 2.3397, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.8934262948207171, | |
| "grad_norm": 0.6463543772697449, | |
| "learning_rate": 3.4246069051287747e-07, | |
| "loss": 2.3176, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.8944223107569721, | |
| "grad_norm": 0.8918944001197815, | |
| "learning_rate": 3.3616184820856936e-07, | |
| "loss": 2.5445, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.8954183266932271, | |
| "grad_norm": 0.963518500328064, | |
| "learning_rate": 3.299194563372604e-07, | |
| "loss": 2.3521, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.896414342629482, | |
| "grad_norm": 1.8490091562271118, | |
| "learning_rate": 3.237335904559713e-07, | |
| "loss": 1.8583, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.897410358565737, | |
| "grad_norm": 1.2349917888641357, | |
| "learning_rate": 3.176043254375422e-07, | |
| "loss": 2.5093, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.898406374501992, | |
| "grad_norm": 1.3500816822052002, | |
| "learning_rate": 3.1153173546972395e-07, | |
| "loss": 2.3507, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.899402390438247, | |
| "grad_norm": 1.2740628719329834, | |
| "learning_rate": 3.055158940542818e-07, | |
| "loss": 2.4008, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.900398406374502, | |
| "grad_norm": 33.82315444946289, | |
| "learning_rate": 2.9955687400610336e-07, | |
| "loss": 2.3794, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.901394422310757, | |
| "grad_norm": 0.8968676328659058, | |
| "learning_rate": 2.9365474745231935e-07, | |
| "loss": 2.3304, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.9023904382470119, | |
| "grad_norm": 1.474859356880188, | |
| "learning_rate": 2.878095858314278e-07, | |
| "loss": 2.2761, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.9033864541832669, | |
| "grad_norm": 1.0338733196258545, | |
| "learning_rate": 2.820214598924348e-07, | |
| "loss": 2.222, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.9043824701195219, | |
| "grad_norm": 1.2461026906967163, | |
| "learning_rate": 2.7629043969399193e-07, | |
| "loss": 2.129, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.9053784860557769, | |
| "grad_norm": 1.0711873769760132, | |
| "learning_rate": 2.7061659460355047e-07, | |
| "loss": 2.5671, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.9063745019920318, | |
| "grad_norm": 2.0772130489349365, | |
| "learning_rate": 2.6499999329652525e-07, | |
| "loss": 2.4514, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.9073705179282868, | |
| "grad_norm": 0.8682186603546143, | |
| "learning_rate": 2.594407037554586e-07, | |
| "loss": 2.4655, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.9083665338645418, | |
| "grad_norm": 2.363909959793091, | |
| "learning_rate": 2.539387932691995e-07, | |
| "loss": 3.0087, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.9093625498007968, | |
| "grad_norm": 1.1324294805526733, | |
| "learning_rate": 2.4849432843208786e-07, | |
| "loss": 2.6718, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.9103585657370518, | |
| "grad_norm": 1.2291409969329834, | |
| "learning_rate": 2.431073751431529e-07, | |
| "loss": 2.4459, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.9113545816733067, | |
| "grad_norm": 1.1384942531585693, | |
| "learning_rate": 2.377779986053097e-07, | |
| "loss": 2.3891, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.9123505976095617, | |
| "grad_norm": 0.9154942631721497, | |
| "learning_rate": 2.3250626332457226e-07, | |
| "loss": 2.3965, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.9133466135458167, | |
| "grad_norm": 3.3389575481414795, | |
| "learning_rate": 2.2729223310927473e-07, | |
| "loss": 2.4395, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.9143426294820717, | |
| "grad_norm": 1.6811953783035278, | |
| "learning_rate": 2.2213597106929608e-07, | |
| "loss": 2.6017, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.9153386454183267, | |
| "grad_norm": 0.8396251201629639, | |
| "learning_rate": 2.1703753961529906e-07, | |
| "loss": 2.7736, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.9163346613545816, | |
| "grad_norm": 1.4626351594924927, | |
| "learning_rate": 2.1199700045797077e-07, | |
| "loss": 2.2861, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.9173306772908366, | |
| "grad_norm": 1.5617847442626953, | |
| "learning_rate": 2.070144146072789e-07, | |
| "loss": 2.6273, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.9183266932270916, | |
| "grad_norm": 1.4151337146759033, | |
| "learning_rate": 2.0208984237173546e-07, | |
| "loss": 2.1617, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.9193227091633466, | |
| "grad_norm": 0.9167352318763733, | |
| "learning_rate": 1.9722334335766092e-07, | |
| "loss": 2.3805, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.9203187250996016, | |
| "grad_norm": 0.840559720993042, | |
| "learning_rate": 1.9241497646846463e-07, | |
| "loss": 2.3247, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.9213147410358565, | |
| "grad_norm": 0.9646689891815186, | |
| "learning_rate": 1.876647999039377e-07, | |
| "loss": 2.5301, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.9223107569721115, | |
| "grad_norm": 0.7615554928779602, | |
| "learning_rate": 1.829728711595391e-07, | |
| "loss": 2.3153, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.9233067729083665, | |
| "grad_norm": 1.138389229774475, | |
| "learning_rate": 1.7833924702570725e-07, | |
| "loss": 2.573, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.9243027888446215, | |
| "grad_norm": 1.0490334033966064, | |
| "learning_rate": 1.7376398358716852e-07, | |
| "loss": 2.6447, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.9252988047808764, | |
| "grad_norm": 0.9057884812355042, | |
| "learning_rate": 1.6924713622225975e-07, | |
| "loss": 2.6325, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.9262948207171314, | |
| "grad_norm": 2.5391931533813477, | |
| "learning_rate": 1.6478875960225904e-07, | |
| "loss": 2.7326, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.9272908366533864, | |
| "grad_norm": 1.9867279529571533, | |
| "learning_rate": 1.6038890769072223e-07, | |
| "loss": 2.4295, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.9282868525896414, | |
| "grad_norm": 0.7426679730415344, | |
| "learning_rate": 1.5604763374283073e-07, | |
| "loss": 2.3705, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.9292828685258964, | |
| "grad_norm": 1.033311367034912, | |
| "learning_rate": 1.5176499030474578e-07, | |
| "loss": 2.2442, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.9302788844621513, | |
| "grad_norm": 1.394702672958374, | |
| "learning_rate": 1.4754102921297363e-07, | |
| "loss": 2.2417, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.9312749003984063, | |
| "grad_norm": 1.109711766242981, | |
| "learning_rate": 1.4337580159373864e-07, | |
| "loss": 2.1194, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.9322709163346613, | |
| "grad_norm": 0.810978353023529, | |
| "learning_rate": 1.3926935786236218e-07, | |
| "loss": 2.251, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.9332669322709163, | |
| "grad_norm": 0.9075368642807007, | |
| "learning_rate": 1.3522174772265585e-07, | |
| "loss": 2.3295, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.9342629482071713, | |
| "grad_norm": 1.0174400806427002, | |
| "learning_rate": 1.3123302016631477e-07, | |
| "loss": 2.4231, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.9352589641434262, | |
| "grad_norm": 1.4848259687423706, | |
| "learning_rate": 1.2730322347233037e-07, | |
| "loss": 2.327, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.9362549800796812, | |
| "grad_norm": 0.7531813979148865, | |
| "learning_rate": 1.2343240520640287e-07, | |
| "loss": 2.5221, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.9372509960159362, | |
| "grad_norm": 2.318554162979126, | |
| "learning_rate": 1.196206122203647e-07, | |
| "loss": 2.3946, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.9382470119521913, | |
| "grad_norm": 0.8973721861839294, | |
| "learning_rate": 1.158678906516153e-07, | |
| "loss": 2.1919, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.9392430278884463, | |
| "grad_norm": 1.4439376592636108, | |
| "learning_rate": 1.1217428592256218e-07, | |
| "loss": 2.3653, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.9402390438247012, | |
| "grad_norm": 1.6569935083389282, | |
| "learning_rate": 1.0853984274007246e-07, | |
| "loss": 2.6982, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.9412350597609562, | |
| "grad_norm": 1.5078299045562744, | |
| "learning_rate": 1.0496460509492767e-07, | |
| "loss": 2.4708, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.9422310756972112, | |
| "grad_norm": 2.2158310413360596, | |
| "learning_rate": 1.0144861626129599e-07, | |
| "loss": 2.5999, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.9432270916334662, | |
| "grad_norm": 1.4059021472930908, | |
| "learning_rate": 9.799191879620474e-08, | |
| "loss": 2.4373, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.9442231075697212, | |
| "grad_norm": 2.9164271354675293, | |
| "learning_rate": 9.459455453902866e-08, | |
| "loss": 2.6697, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.9452191235059761, | |
| "grad_norm": 1.275817632675171, | |
| "learning_rate": 9.125656461098142e-08, | |
| "loss": 2.48, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 0.9462151394422311, | |
| "grad_norm": 0.821499764919281, | |
| "learning_rate": 8.797798941461655e-08, | |
| "loss": 2.4301, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.9472111553784861, | |
| "grad_norm": 1.4214954376220703, | |
| "learning_rate": 8.475886863334282e-08, | |
| "loss": 2.2847, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.9482071713147411, | |
| "grad_norm": 0.9953071475028992, | |
| "learning_rate": 8.15992412309391e-08, | |
| "loss": 2.464, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.9492031872509961, | |
| "grad_norm": 1.4973928928375244, | |
| "learning_rate": 7.84991454510864e-08, | |
| "loss": 2.0293, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.950199203187251, | |
| "grad_norm": 1.326232671737671, | |
| "learning_rate": 7.545861881690097e-08, | |
| "loss": 2.4445, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.951195219123506, | |
| "grad_norm": 1.0779331922531128, | |
| "learning_rate": 7.247769813048644e-08, | |
| "loss": 2.7232, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.952191235059761, | |
| "grad_norm": 1.0593082904815674, | |
| "learning_rate": 6.955641947248127e-08, | |
| "loss": 2.8634, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.953187250996016, | |
| "grad_norm": 0.8761929869651794, | |
| "learning_rate": 6.669481820162638e-08, | |
| "loss": 2.3328, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.954183266932271, | |
| "grad_norm": 0.9143054485321045, | |
| "learning_rate": 6.389292895433608e-08, | |
| "loss": 2.2261, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.9551792828685259, | |
| "grad_norm": 0.8204777240753174, | |
| "learning_rate": 6.115078564427946e-08, | |
| "loss": 2.4155, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.9561752988047809, | |
| "grad_norm": 0.7546234726905823, | |
| "learning_rate": 5.8468421461968517e-08, | |
| "loss": 2.267, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.9571713147410359, | |
| "grad_norm": 1.0376356840133667, | |
| "learning_rate": 5.584586887435739e-08, | |
| "loss": 2.4496, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 0.9581673306772909, | |
| "grad_norm": 0.6573870182037354, | |
| "learning_rate": 5.3283159624448745e-08, | |
| "loss": 2.4904, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.9591633466135459, | |
| "grad_norm": 1.3613762855529785, | |
| "learning_rate": 5.0780324730911877e-08, | |
| "loss": 2.4824, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.9601593625498008, | |
| "grad_norm": 1.4304169416427612, | |
| "learning_rate": 4.833739448770247e-08, | |
| "loss": 2.9062, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.9611553784860558, | |
| "grad_norm": 0.9547715783119202, | |
| "learning_rate": 4.5954398463700647e-08, | |
| "loss": 2.4977, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.9621513944223108, | |
| "grad_norm": 1.3909553289413452, | |
| "learning_rate": 4.3631365502351805e-08, | |
| "loss": 2.2116, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.9631474103585658, | |
| "grad_norm": 0.7387050986289978, | |
| "learning_rate": 4.136832372131583e-08, | |
| "loss": 2.5225, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 0.9641434262948207, | |
| "grad_norm": 1.2469770908355713, | |
| "learning_rate": 3.916530051212841e-08, | |
| "loss": 2.4759, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.9651394422310757, | |
| "grad_norm": 1.3780826330184937, | |
| "learning_rate": 3.702232253986804e-08, | |
| "loss": 2.0538, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.9661354581673307, | |
| "grad_norm": 0.9699292778968811, | |
| "learning_rate": 3.4939415742835655e-08, | |
| "loss": 2.3441, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.9671314741035857, | |
| "grad_norm": 1.147615671157837, | |
| "learning_rate": 3.2916605332238284e-08, | |
| "loss": 2.4042, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 0.9681274900398407, | |
| "grad_norm": 1.1738359928131104, | |
| "learning_rate": 3.095391579188589e-08, | |
| "loss": 2.539, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.9691235059760956, | |
| "grad_norm": 0.640042781829834, | |
| "learning_rate": 2.9051370877892226e-08, | |
| "loss": 2.3044, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 0.9701195219123506, | |
| "grad_norm": 0.7774790525436401, | |
| "learning_rate": 2.7208993618390578e-08, | |
| "loss": 2.2616, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.9711155378486056, | |
| "grad_norm": 1.0389803647994995, | |
| "learning_rate": 2.5426806313252895e-08, | |
| "loss": 2.4425, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.9721115537848606, | |
| "grad_norm": 1.3041914701461792, | |
| "learning_rate": 2.370483053382111e-08, | |
| "loss": 2.39, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.9731075697211156, | |
| "grad_norm": 0.6934490203857422, | |
| "learning_rate": 2.2043087122644023e-08, | |
| "loss": 2.2232, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 0.9741035856573705, | |
| "grad_norm": 0.6868986487388611, | |
| "learning_rate": 2.0441596193227497e-08, | |
| "loss": 2.4806, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.9750996015936255, | |
| "grad_norm": 1.1924256086349487, | |
| "learning_rate": 1.8900377129790205e-08, | |
| "loss": 2.4314, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.9760956175298805, | |
| "grad_norm": 0.7938891053199768, | |
| "learning_rate": 1.741944858702771e-08, | |
| "loss": 2.4715, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.9770916334661355, | |
| "grad_norm": 0.9900745749473572, | |
| "learning_rate": 1.5998828489888762e-08, | |
| "loss": 2.0915, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 0.9780876494023905, | |
| "grad_norm": 3.0510518550872803, | |
| "learning_rate": 1.4638534033356578e-08, | |
| "loss": 3.3239, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.9790836653386454, | |
| "grad_norm": 1.1174182891845703, | |
| "learning_rate": 1.333858168224178e-08, | |
| "loss": 2.1631, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 0.9800796812749004, | |
| "grad_norm": 0.7828091979026794, | |
| "learning_rate": 1.2098987170982013e-08, | |
| "loss": 2.2998, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.9810756972111554, | |
| "grad_norm": 0.8114204406738281, | |
| "learning_rate": 1.0919765503453195e-08, | |
| "loss": 2.3996, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.9820717131474104, | |
| "grad_norm": 0.625230073928833, | |
| "learning_rate": 9.800930952786336e-09, | |
| "loss": 2.6785, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.9830677290836654, | |
| "grad_norm": 1.6107351779937744, | |
| "learning_rate": 8.742497061195455e-09, | |
| "loss": 2.6999, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.9840637450199203, | |
| "grad_norm": 1.5219416618347168, | |
| "learning_rate": 7.744476639813814e-09, | |
| "loss": 2.3396, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.9850597609561753, | |
| "grad_norm": 1.0381386280059814, | |
| "learning_rate": 6.806881768539053e-09, | |
| "loss": 2.2097, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.9860557768924303, | |
| "grad_norm": 1.1309791803359985, | |
| "learning_rate": 5.929723795884967e-09, | |
| "loss": 2.4901, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.9870517928286853, | |
| "grad_norm": 0.979224443435669, | |
| "learning_rate": 5.113013338847173e-09, | |
| "loss": 2.355, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 0.9880478087649402, | |
| "grad_norm": 0.9343250393867493, | |
| "learning_rate": 4.356760282773209e-09, | |
| "loss": 2.682, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.9890438247011952, | |
| "grad_norm": 4.211667060852051, | |
| "learning_rate": 3.660973781242083e-09, | |
| "loss": 3.9138, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 0.9900398406374502, | |
| "grad_norm": 0.9402066469192505, | |
| "learning_rate": 3.0256622559543537e-09, | |
| "loss": 2.1841, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.9910358565737052, | |
| "grad_norm": 1.136916995048523, | |
| "learning_rate": 2.4508333966305473e-09, | |
| "loss": 2.2469, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.9920318725099602, | |
| "grad_norm": 1.080809473991394, | |
| "learning_rate": 1.936494160916791e-09, | |
| "loss": 2.4922, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.9930278884462151, | |
| "grad_norm": 0.9956486821174622, | |
| "learning_rate": 1.4826507743032071e-09, | |
| "loss": 2.5901, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 0.9940239043824701, | |
| "grad_norm": 2.1008529663085938, | |
| "learning_rate": 1.089308730043981e-09, | |
| "loss": 2.8828, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.9950199203187251, | |
| "grad_norm": 1.3167147636413574, | |
| "learning_rate": 7.564727890968515e-10, | |
| "loss": 2.3331, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.9960159362549801, | |
| "grad_norm": 1.1669059991836548, | |
| "learning_rate": 4.841469800592746e-10, | |
| "loss": 1.9942, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.9970119521912351, | |
| "grad_norm": 0.9161001443862915, | |
| "learning_rate": 2.723345991245685e-10, | |
| "loss": 2.2932, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 0.99800796812749, | |
| "grad_norm": 0.8516436815261841, | |
| "learning_rate": 1.210382100397256e-10, | |
| "loss": 2.2065, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.999003984063745, | |
| "grad_norm": 0.7772925496101379, | |
| "learning_rate": 3.0259644074326355e-11, | |
| "loss": 2.4638, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.244535207748413, | |
| "learning_rate": 0.0, | |
| "loss": 2.3463, | |
| "step": 1004 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1004, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.5282748355775386e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |