{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5119897602047959, "eval_steps": 500, "global_step": 6400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.999840003199936e-05, "grad_norm": 0.33082761638311287, "learning_rate": 0.0, "loss": 0.3004, "step": 1 }, { "epoch": 0.00015999680006399873, "grad_norm": 0.2913215150810986, "learning_rate": 1.3333333333333334e-08, "loss": 0.3327, "step": 2 }, { "epoch": 0.00023999520009599807, "grad_norm": 0.37934142492177475, "learning_rate": 2.6666666666666667e-08, "loss": 0.3023, "step": 3 }, { "epoch": 0.00031999360012799745, "grad_norm": 0.25570776418714214, "learning_rate": 4e-08, "loss": 0.3572, "step": 4 }, { "epoch": 0.0003999920001599968, "grad_norm": 0.4048563084271804, "learning_rate": 5.3333333333333334e-08, "loss": 0.2973, "step": 5 }, { "epoch": 0.00047999040019199615, "grad_norm": 0.294296710431263, "learning_rate": 6.666666666666668e-08, "loss": 0.3336, "step": 6 }, { "epoch": 0.0005599888002239956, "grad_norm": 0.2970810285479718, "learning_rate": 8e-08, "loss": 0.3015, "step": 7 }, { "epoch": 0.0006399872002559949, "grad_norm": 0.2801138817171433, "learning_rate": 9.333333333333335e-08, "loss": 0.3129, "step": 8 }, { "epoch": 0.0007199856002879943, "grad_norm": 0.2967514174191538, "learning_rate": 1.0666666666666667e-07, "loss": 0.311, "step": 9 }, { "epoch": 0.0007999840003199936, "grad_norm": 0.3437099305921091, "learning_rate": 1.2000000000000002e-07, "loss": 0.3253, "step": 10 }, { "epoch": 0.000879982400351993, "grad_norm": 0.5397417630661548, "learning_rate": 1.3333333333333336e-07, "loss": 0.3175, "step": 11 }, { "epoch": 0.0009599808003839923, "grad_norm": 0.3224658652779924, "learning_rate": 1.4666666666666668e-07, "loss": 0.289, "step": 12 }, { "epoch": 0.0010399792004159916, "grad_norm": 0.2897167268442966, "learning_rate": 1.6e-07, "loss": 0.3194, "step": 13 }, { "epoch": 0.001119977600447991, "grad_norm": 0.24959442619549038, "learning_rate": 1.7333333333333335e-07, "loss": 0.3221, "step": 14 }, { "epoch": 0.0011999760004799903, "grad_norm": 0.29492771533023515, "learning_rate": 1.866666666666667e-07, "loss": 0.2977, "step": 15 }, { "epoch": 0.0012799744005119898, "grad_norm": 0.22293301820824413, "learning_rate": 2.0000000000000002e-07, "loss": 0.3693, "step": 16 }, { "epoch": 0.001359972800543989, "grad_norm": 0.6168528884798137, "learning_rate": 2.1333333333333334e-07, "loss": 0.3079, "step": 17 }, { "epoch": 0.0014399712005759885, "grad_norm": 0.41567461031521735, "learning_rate": 2.266666666666667e-07, "loss": 0.3335, "step": 18 }, { "epoch": 0.0015199696006079877, "grad_norm": 0.292472875842971, "learning_rate": 2.4000000000000003e-07, "loss": 0.3392, "step": 19 }, { "epoch": 0.0015999680006399872, "grad_norm": 0.34853964356449135, "learning_rate": 2.533333333333333e-07, "loss": 0.2795, "step": 20 }, { "epoch": 0.0016799664006719867, "grad_norm": 0.2780087291513081, "learning_rate": 2.666666666666667e-07, "loss": 0.3044, "step": 21 }, { "epoch": 0.001759964800703986, "grad_norm": 0.2827647097216766, "learning_rate": 2.8e-07, "loss": 0.3239, "step": 22 }, { "epoch": 0.0018399632007359854, "grad_norm": 0.28414530182467007, "learning_rate": 2.9333333333333337e-07, "loss": 0.3287, "step": 23 }, { "epoch": 0.0019199616007679846, "grad_norm": 0.31806015664635406, "learning_rate": 3.0666666666666666e-07, "loss": 0.3391, "step": 24 }, { "epoch": 0.001999960000799984, "grad_norm": 0.2520307529365584, "learning_rate": 3.2e-07, "loss": 0.3457, "step": 25 }, { "epoch": 0.0020799584008319833, "grad_norm": 0.23216855173011924, "learning_rate": 3.3333333333333335e-07, "loss": 0.3713, "step": 26 }, { "epoch": 0.0021599568008639825, "grad_norm": 0.31912182901643477, "learning_rate": 3.466666666666667e-07, "loss": 0.3166, "step": 27 }, { "epoch": 0.002239955200895982, "grad_norm": 0.32092495180662667, "learning_rate": 3.6e-07, "loss": 0.3344, "step": 28 }, { "epoch": 0.0023199536009279815, "grad_norm": 0.319796041750652, "learning_rate": 3.733333333333334e-07, "loss": 0.303, "step": 29 }, { "epoch": 0.0023999520009599807, "grad_norm": 0.3035068407364502, "learning_rate": 3.8666666666666674e-07, "loss": 0.3238, "step": 30 }, { "epoch": 0.0024799504009919804, "grad_norm": 0.5933202783110759, "learning_rate": 4.0000000000000003e-07, "loss": 0.2862, "step": 31 }, { "epoch": 0.0025599488010239796, "grad_norm": 0.41253800734855595, "learning_rate": 4.133333333333334e-07, "loss": 0.3092, "step": 32 }, { "epoch": 0.002639947201055979, "grad_norm": 0.293001215764292, "learning_rate": 4.266666666666667e-07, "loss": 0.3142, "step": 33 }, { "epoch": 0.002719945601087978, "grad_norm": 0.2510822578099165, "learning_rate": 4.4e-07, "loss": 0.3713, "step": 34 }, { "epoch": 0.0027999440011199778, "grad_norm": 0.23189303424493002, "learning_rate": 4.533333333333334e-07, "loss": 0.3416, "step": 35 }, { "epoch": 0.002879942401151977, "grad_norm": 0.30229246491134404, "learning_rate": 4.666666666666667e-07, "loss": 0.2961, "step": 36 }, { "epoch": 0.0029599408011839762, "grad_norm": 0.3205739844872136, "learning_rate": 4.800000000000001e-07, "loss": 0.3484, "step": 37 }, { "epoch": 0.0030399392012159755, "grad_norm": 0.3273582334140839, "learning_rate": 4.933333333333334e-07, "loss": 0.3024, "step": 38 }, { "epoch": 0.003119937601247975, "grad_norm": 0.26615517021880947, "learning_rate": 5.066666666666667e-07, "loss": 0.3244, "step": 39 }, { "epoch": 0.0031999360012799744, "grad_norm": 0.3068949619534162, "learning_rate": 5.2e-07, "loss": 0.3079, "step": 40 }, { "epoch": 0.0032799344013119736, "grad_norm": 0.2803283985047437, "learning_rate": 5.333333333333335e-07, "loss": 0.337, "step": 41 }, { "epoch": 0.0033599328013439733, "grad_norm": 0.31672734122837837, "learning_rate": 5.466666666666667e-07, "loss": 0.338, "step": 42 }, { "epoch": 0.0034399312013759726, "grad_norm": 0.3393482800519383, "learning_rate": 5.6e-07, "loss": 0.2953, "step": 43 }, { "epoch": 0.003519929601407972, "grad_norm": 0.3025747711461973, "learning_rate": 5.733333333333334e-07, "loss": 0.304, "step": 44 }, { "epoch": 0.003599928001439971, "grad_norm": 0.2720493658372177, "learning_rate": 5.866666666666667e-07, "loss": 0.3414, "step": 45 }, { "epoch": 0.0036799264014719707, "grad_norm": 0.3142370602638329, "learning_rate": 6.000000000000001e-07, "loss": 0.3004, "step": 46 }, { "epoch": 0.00375992480150397, "grad_norm": 0.3323679423072982, "learning_rate": 6.133333333333333e-07, "loss": 0.3186, "step": 47 }, { "epoch": 0.003839923201535969, "grad_norm": 0.32612763196974376, "learning_rate": 6.266666666666667e-07, "loss": 0.3048, "step": 48 }, { "epoch": 0.003919921601567969, "grad_norm": 0.28751297529858044, "learning_rate": 6.4e-07, "loss": 0.3247, "step": 49 }, { "epoch": 0.003999920001599968, "grad_norm": 0.3824429393686504, "learning_rate": 6.533333333333334e-07, "loss": 0.2935, "step": 50 }, { "epoch": 0.004079918401631967, "grad_norm": 0.3018533488251579, "learning_rate": 6.666666666666667e-07, "loss": 0.316, "step": 51 }, { "epoch": 0.004159916801663967, "grad_norm": 0.30195343332232594, "learning_rate": 6.800000000000001e-07, "loss": 0.3115, "step": 52 }, { "epoch": 0.004239915201695966, "grad_norm": 0.3506805684458278, "learning_rate": 6.933333333333334e-07, "loss": 0.3171, "step": 53 }, { "epoch": 0.004319913601727965, "grad_norm": 0.3185810107485491, "learning_rate": 7.066666666666667e-07, "loss": 0.3006, "step": 54 }, { "epoch": 0.004399912001759965, "grad_norm": 0.2693358810664699, "learning_rate": 7.2e-07, "loss": 0.3206, "step": 55 }, { "epoch": 0.004479910401791964, "grad_norm": 0.24392659455219035, "learning_rate": 7.333333333333334e-07, "loss": 0.3347, "step": 56 }, { "epoch": 0.004559908801823964, "grad_norm": 0.31015163744900837, "learning_rate": 7.466666666666668e-07, "loss": 0.3381, "step": 57 }, { "epoch": 0.004639907201855963, "grad_norm": 0.3102769361351094, "learning_rate": 7.6e-07, "loss": 0.3075, "step": 58 }, { "epoch": 0.004719905601887962, "grad_norm": 0.5538706349504963, "learning_rate": 7.733333333333335e-07, "loss": 0.2794, "step": 59 }, { "epoch": 0.004799904001919961, "grad_norm": 0.2523934291589158, "learning_rate": 7.866666666666667e-07, "loss": 0.3431, "step": 60 }, { "epoch": 0.004879902401951961, "grad_norm": 0.32621842899414777, "learning_rate": 8.000000000000001e-07, "loss": 0.2905, "step": 61 }, { "epoch": 0.004959900801983961, "grad_norm": 0.29215921145765517, "learning_rate": 8.133333333333333e-07, "loss": 0.3196, "step": 62 }, { "epoch": 0.00503989920201596, "grad_norm": 0.2780688840346979, "learning_rate": 8.266666666666668e-07, "loss": 0.3203, "step": 63 }, { "epoch": 0.005119897602047959, "grad_norm": 0.3351603443264391, "learning_rate": 8.400000000000001e-07, "loss": 0.2977, "step": 64 }, { "epoch": 0.0051998960020799585, "grad_norm": 0.26823219956448324, "learning_rate": 8.533333333333334e-07, "loss": 0.3367, "step": 65 }, { "epoch": 0.005279894402111958, "grad_norm": 0.25880892021145024, "learning_rate": 8.666666666666668e-07, "loss": 0.3182, "step": 66 }, { "epoch": 0.005359892802143957, "grad_norm": 0.3010709774746063, "learning_rate": 8.8e-07, "loss": 0.323, "step": 67 }, { "epoch": 0.005439891202175956, "grad_norm": 0.3162791184817396, "learning_rate": 8.933333333333334e-07, "loss": 0.3075, "step": 68 }, { "epoch": 0.005519889602207955, "grad_norm": 0.34011821812360454, "learning_rate": 9.066666666666668e-07, "loss": 0.3203, "step": 69 }, { "epoch": 0.0055998880022399555, "grad_norm": 0.31415942388867163, "learning_rate": 9.200000000000001e-07, "loss": 0.2991, "step": 70 }, { "epoch": 0.005679886402271955, "grad_norm": 0.6463148407796293, "learning_rate": 9.333333333333334e-07, "loss": 0.3208, "step": 71 }, { "epoch": 0.005759884802303954, "grad_norm": 0.29028764402440893, "learning_rate": 9.466666666666667e-07, "loss": 0.2996, "step": 72 }, { "epoch": 0.005839883202335953, "grad_norm": 0.26343528569421293, "learning_rate": 9.600000000000001e-07, "loss": 0.2955, "step": 73 }, { "epoch": 0.0059198816023679525, "grad_norm": 0.24518518576843773, "learning_rate": 9.733333333333333e-07, "loss": 0.3423, "step": 74 }, { "epoch": 0.005999880002399952, "grad_norm": 0.3090146188968856, "learning_rate": 9.866666666666668e-07, "loss": 0.2769, "step": 75 }, { "epoch": 0.006079878402431951, "grad_norm": 0.32206118968734543, "learning_rate": 1.0000000000000002e-06, "loss": 0.2957, "step": 76 }, { "epoch": 0.006159876802463951, "grad_norm": 0.31197354570045155, "learning_rate": 1.0133333333333333e-06, "loss": 0.3135, "step": 77 }, { "epoch": 0.00623987520249595, "grad_norm": 0.26084204240920733, "learning_rate": 1.0266666666666669e-06, "loss": 0.3344, "step": 78 }, { "epoch": 0.0063198736025279496, "grad_norm": 0.5554863834434538, "learning_rate": 1.04e-06, "loss": 0.2986, "step": 79 }, { "epoch": 0.006399872002559949, "grad_norm": 0.3437186634680115, "learning_rate": 1.0533333333333333e-06, "loss": 0.2904, "step": 80 }, { "epoch": 0.006479870402591948, "grad_norm": 0.3449567726972603, "learning_rate": 1.066666666666667e-06, "loss": 0.3552, "step": 81 }, { "epoch": 0.006559868802623947, "grad_norm": 0.34546186732465545, "learning_rate": 1.08e-06, "loss": 0.3182, "step": 82 }, { "epoch": 0.0066398672026559465, "grad_norm": 0.2914646622905985, "learning_rate": 1.0933333333333334e-06, "loss": 0.3257, "step": 83 }, { "epoch": 0.006719865602687947, "grad_norm": 0.31653187802750293, "learning_rate": 1.1066666666666667e-06, "loss": 0.3203, "step": 84 }, { "epoch": 0.006799864002719946, "grad_norm": 0.24428581351680245, "learning_rate": 1.12e-06, "loss": 0.3329, "step": 85 }, { "epoch": 0.006879862402751945, "grad_norm": 0.3264501191696231, "learning_rate": 1.1333333333333334e-06, "loss": 0.2985, "step": 86 }, { "epoch": 0.006959860802783944, "grad_norm": 0.331657711723914, "learning_rate": 1.1466666666666668e-06, "loss": 0.296, "step": 87 }, { "epoch": 0.007039859202815944, "grad_norm": 0.3482337100975519, "learning_rate": 1.1600000000000001e-06, "loss": 0.3218, "step": 88 }, { "epoch": 0.007119857602847943, "grad_norm": 0.3231501703266636, "learning_rate": 1.1733333333333335e-06, "loss": 0.328, "step": 89 }, { "epoch": 0.007199856002879942, "grad_norm": 0.26271769233482944, "learning_rate": 1.1866666666666668e-06, "loss": 0.3523, "step": 90 }, { "epoch": 0.007279854402911942, "grad_norm": 0.3271718103928566, "learning_rate": 1.2000000000000002e-06, "loss": 0.2787, "step": 91 }, { "epoch": 0.007359852802943941, "grad_norm": 0.2926708153372138, "learning_rate": 1.2133333333333335e-06, "loss": 0.3081, "step": 92 }, { "epoch": 0.007439851202975941, "grad_norm": 0.35069411873426115, "learning_rate": 1.2266666666666666e-06, "loss": 0.3011, "step": 93 }, { "epoch": 0.00751984960300794, "grad_norm": 0.2632834455201133, "learning_rate": 1.2400000000000002e-06, "loss": 0.3406, "step": 94 }, { "epoch": 0.007599848003039939, "grad_norm": 0.28331254478310913, "learning_rate": 1.2533333333333333e-06, "loss": 0.3247, "step": 95 }, { "epoch": 0.007679846403071938, "grad_norm": 0.38123973784502396, "learning_rate": 1.2666666666666669e-06, "loss": 0.295, "step": 96 }, { "epoch": 0.007759844803103938, "grad_norm": 0.26652668798241846, "learning_rate": 1.28e-06, "loss": 0.3176, "step": 97 }, { "epoch": 0.007839843203135938, "grad_norm": 0.24613725839199013, "learning_rate": 1.2933333333333334e-06, "loss": 0.3442, "step": 98 }, { "epoch": 0.007919841603167936, "grad_norm": 0.3396773807987067, "learning_rate": 1.3066666666666667e-06, "loss": 0.3068, "step": 99 }, { "epoch": 0.007999840003199936, "grad_norm": 0.2711808553445019, "learning_rate": 1.32e-06, "loss": 0.3127, "step": 100 }, { "epoch": 0.008079838403231935, "grad_norm": 0.32447170953166365, "learning_rate": 1.3333333333333334e-06, "loss": 0.298, "step": 101 }, { "epoch": 0.008159836803263935, "grad_norm": 0.34664107574619685, "learning_rate": 1.3466666666666668e-06, "loss": 0.2921, "step": 102 }, { "epoch": 0.008239835203295935, "grad_norm": 0.3120193430526548, "learning_rate": 1.3600000000000001e-06, "loss": 0.3122, "step": 103 }, { "epoch": 0.008319833603327933, "grad_norm": 0.22406001378841364, "learning_rate": 1.3733333333333335e-06, "loss": 0.3299, "step": 104 }, { "epoch": 0.008399832003359933, "grad_norm": 0.2073963799155983, "learning_rate": 1.3866666666666668e-06, "loss": 0.3748, "step": 105 }, { "epoch": 0.008479830403391932, "grad_norm": 0.3018638438274108, "learning_rate": 1.4000000000000001e-06, "loss": 0.3232, "step": 106 }, { "epoch": 0.008559828803423932, "grad_norm": 0.2682173287690338, "learning_rate": 1.4133333333333335e-06, "loss": 0.303, "step": 107 }, { "epoch": 0.00863982720345593, "grad_norm": 0.3308859366521956, "learning_rate": 1.4266666666666668e-06, "loss": 0.265, "step": 108 }, { "epoch": 0.00871982560348793, "grad_norm": 0.3261373953947699, "learning_rate": 1.44e-06, "loss": 0.3287, "step": 109 }, { "epoch": 0.00879982400351993, "grad_norm": 0.24759434596082106, "learning_rate": 1.4533333333333335e-06, "loss": 0.3327, "step": 110 }, { "epoch": 0.008879822403551929, "grad_norm": 0.2861526656612504, "learning_rate": 1.4666666666666669e-06, "loss": 0.331, "step": 111 }, { "epoch": 0.008959820803583929, "grad_norm": 0.3907065597658347, "learning_rate": 1.48e-06, "loss": 0.3278, "step": 112 }, { "epoch": 0.009039819203615927, "grad_norm": 0.3361061876380628, "learning_rate": 1.4933333333333336e-06, "loss": 0.2867, "step": 113 }, { "epoch": 0.009119817603647927, "grad_norm": 0.2737033370360045, "learning_rate": 1.506666666666667e-06, "loss": 0.3141, "step": 114 }, { "epoch": 0.009199816003679926, "grad_norm": 0.32057287945733126, "learning_rate": 1.52e-06, "loss": 0.3078, "step": 115 }, { "epoch": 0.009279814403711926, "grad_norm": 0.2880083112543209, "learning_rate": 1.5333333333333334e-06, "loss": 0.3265, "step": 116 }, { "epoch": 0.009359812803743926, "grad_norm": 0.3068585166271396, "learning_rate": 1.546666666666667e-06, "loss": 0.3072, "step": 117 }, { "epoch": 0.009439811203775924, "grad_norm": 0.3310708719534575, "learning_rate": 1.56e-06, "loss": 0.3308, "step": 118 }, { "epoch": 0.009519809603807924, "grad_norm": 0.35319095655987787, "learning_rate": 1.5733333333333334e-06, "loss": 0.2979, "step": 119 }, { "epoch": 0.009599808003839923, "grad_norm": 0.38291022014761195, "learning_rate": 1.586666666666667e-06, "loss": 0.3106, "step": 120 }, { "epoch": 0.009679806403871923, "grad_norm": 0.28039860675214856, "learning_rate": 1.6000000000000001e-06, "loss": 0.3451, "step": 121 }, { "epoch": 0.009759804803903921, "grad_norm": 0.30434607561402777, "learning_rate": 1.6133333333333335e-06, "loss": 0.2993, "step": 122 }, { "epoch": 0.009839803203935921, "grad_norm": 0.2814533190285778, "learning_rate": 1.6266666666666666e-06, "loss": 0.3162, "step": 123 }, { "epoch": 0.009919801603967921, "grad_norm": 0.3068835840774817, "learning_rate": 1.6400000000000002e-06, "loss": 0.3153, "step": 124 }, { "epoch": 0.00999980000399992, "grad_norm": 0.30208460253548947, "learning_rate": 1.6533333333333335e-06, "loss": 0.3212, "step": 125 }, { "epoch": 0.01007979840403192, "grad_norm": 0.23661646141165826, "learning_rate": 1.6666666666666667e-06, "loss": 0.3918, "step": 126 }, { "epoch": 0.010159796804063918, "grad_norm": 0.2982549924909667, "learning_rate": 1.6800000000000002e-06, "loss": 0.3343, "step": 127 }, { "epoch": 0.010239795204095918, "grad_norm": 0.2762048716470555, "learning_rate": 1.6933333333333336e-06, "loss": 0.3016, "step": 128 }, { "epoch": 0.010319793604127917, "grad_norm": 0.28168626997008966, "learning_rate": 1.7066666666666667e-06, "loss": 0.3308, "step": 129 }, { "epoch": 0.010399792004159917, "grad_norm": 0.2717980676095484, "learning_rate": 1.72e-06, "loss": 0.3179, "step": 130 }, { "epoch": 0.010479790404191917, "grad_norm": 0.2769762403645166, "learning_rate": 1.7333333333333336e-06, "loss": 0.3048, "step": 131 }, { "epoch": 0.010559788804223915, "grad_norm": 0.7155079454198295, "learning_rate": 1.7466666666666667e-06, "loss": 0.3132, "step": 132 }, { "epoch": 0.010639787204255916, "grad_norm": 0.32604722004289993, "learning_rate": 1.76e-06, "loss": 0.2984, "step": 133 }, { "epoch": 0.010719785604287914, "grad_norm": 0.28082026435241464, "learning_rate": 1.7733333333333336e-06, "loss": 0.3271, "step": 134 }, { "epoch": 0.010799784004319914, "grad_norm": 0.2580492809834986, "learning_rate": 1.7866666666666668e-06, "loss": 0.3271, "step": 135 }, { "epoch": 0.010879782404351912, "grad_norm": 0.23186860912525978, "learning_rate": 1.8000000000000001e-06, "loss": 0.3393, "step": 136 }, { "epoch": 0.010959780804383912, "grad_norm": 0.24050673417079582, "learning_rate": 1.8133333333333337e-06, "loss": 0.3286, "step": 137 }, { "epoch": 0.01103977920441591, "grad_norm": 0.33944889101431586, "learning_rate": 1.8266666666666668e-06, "loss": 0.3256, "step": 138 }, { "epoch": 0.011119777604447911, "grad_norm": 0.34861335103444774, "learning_rate": 1.8400000000000002e-06, "loss": 0.3147, "step": 139 }, { "epoch": 0.011199776004479911, "grad_norm": 0.3054947283984174, "learning_rate": 1.8533333333333333e-06, "loss": 0.277, "step": 140 }, { "epoch": 0.01127977440451191, "grad_norm": 0.25707925448175856, "learning_rate": 1.8666666666666669e-06, "loss": 0.3382, "step": 141 }, { "epoch": 0.01135977280454391, "grad_norm": 0.33699829514137974, "learning_rate": 1.8800000000000002e-06, "loss": 0.2822, "step": 142 }, { "epoch": 0.011439771204575908, "grad_norm": 0.35804014393151257, "learning_rate": 1.8933333333333333e-06, "loss": 0.3177, "step": 143 }, { "epoch": 0.011519769604607908, "grad_norm": 0.32390316520716644, "learning_rate": 1.906666666666667e-06, "loss": 0.2732, "step": 144 }, { "epoch": 0.011599768004639906, "grad_norm": 0.2814263862802696, "learning_rate": 1.9200000000000003e-06, "loss": 0.3068, "step": 145 }, { "epoch": 0.011679766404671907, "grad_norm": 0.2824335737318314, "learning_rate": 1.9333333333333336e-06, "loss": 0.3458, "step": 146 }, { "epoch": 0.011759764804703907, "grad_norm": 0.2526526201951397, "learning_rate": 1.9466666666666665e-06, "loss": 0.3573, "step": 147 }, { "epoch": 0.011839763204735905, "grad_norm": 0.3315184280731441, "learning_rate": 1.9600000000000003e-06, "loss": 0.327, "step": 148 }, { "epoch": 0.011919761604767905, "grad_norm": 0.2996394862715356, "learning_rate": 1.9733333333333336e-06, "loss": 0.297, "step": 149 }, { "epoch": 0.011999760004799903, "grad_norm": 0.28557116476251826, "learning_rate": 1.9866666666666666e-06, "loss": 0.3164, "step": 150 }, { "epoch": 0.012079758404831904, "grad_norm": 0.3474585203182206, "learning_rate": 2.0000000000000003e-06, "loss": 0.3063, "step": 151 }, { "epoch": 0.012159756804863902, "grad_norm": 0.2838233309413948, "learning_rate": 2.0133333333333337e-06, "loss": 0.3179, "step": 152 }, { "epoch": 0.012239755204895902, "grad_norm": 0.27615612365628756, "learning_rate": 2.0266666666666666e-06, "loss": 0.3108, "step": 153 }, { "epoch": 0.012319753604927902, "grad_norm": 0.24142417286479195, "learning_rate": 2.04e-06, "loss": 0.3283, "step": 154 }, { "epoch": 0.0123997520049599, "grad_norm": 0.290926430945227, "learning_rate": 2.0533333333333337e-06, "loss": 0.3385, "step": 155 }, { "epoch": 0.0124797504049919, "grad_norm": 0.3132276711601257, "learning_rate": 2.0666666666666666e-06, "loss": 0.2807, "step": 156 }, { "epoch": 0.012559748805023899, "grad_norm": 0.34458617055163127, "learning_rate": 2.08e-06, "loss": 0.3206, "step": 157 }, { "epoch": 0.012639747205055899, "grad_norm": 0.3023273595363232, "learning_rate": 2.0933333333333338e-06, "loss": 0.3014, "step": 158 }, { "epoch": 0.012719745605087898, "grad_norm": 0.25745242084237335, "learning_rate": 2.1066666666666667e-06, "loss": 0.3254, "step": 159 }, { "epoch": 0.012799744005119898, "grad_norm": 0.32493655477608646, "learning_rate": 2.12e-06, "loss": 0.2883, "step": 160 }, { "epoch": 0.012879742405151898, "grad_norm": 0.28080304349663776, "learning_rate": 2.133333333333334e-06, "loss": 0.3022, "step": 161 }, { "epoch": 0.012959740805183896, "grad_norm": 0.32367069405436416, "learning_rate": 2.1466666666666667e-06, "loss": 0.298, "step": 162 }, { "epoch": 0.013039739205215896, "grad_norm": 0.4864549162381663, "learning_rate": 2.16e-06, "loss": 0.328, "step": 163 }, { "epoch": 0.013119737605247895, "grad_norm": 0.29610513405004935, "learning_rate": 2.1733333333333334e-06, "loss": 0.3253, "step": 164 }, { "epoch": 0.013199736005279895, "grad_norm": 0.3151967084316128, "learning_rate": 2.1866666666666668e-06, "loss": 0.2984, "step": 165 }, { "epoch": 0.013279734405311893, "grad_norm": 0.3916853549551865, "learning_rate": 2.2e-06, "loss": 0.2801, "step": 166 }, { "epoch": 0.013359732805343893, "grad_norm": 0.30601304519609385, "learning_rate": 2.2133333333333335e-06, "loss": 0.2888, "step": 167 }, { "epoch": 0.013439731205375893, "grad_norm": 0.3381241678567355, "learning_rate": 2.226666666666667e-06, "loss": 0.2906, "step": 168 }, { "epoch": 0.013519729605407892, "grad_norm": 0.28894608440476305, "learning_rate": 2.24e-06, "loss": 0.3187, "step": 169 }, { "epoch": 0.013599728005439892, "grad_norm": 0.22955562250062986, "learning_rate": 2.2533333333333335e-06, "loss": 0.3267, "step": 170 }, { "epoch": 0.01367972640547189, "grad_norm": 0.30934833995723104, "learning_rate": 2.266666666666667e-06, "loss": 0.2846, "step": 171 }, { "epoch": 0.01375972480550389, "grad_norm": 0.30865767644143693, "learning_rate": 2.28e-06, "loss": 0.2863, "step": 172 }, { "epoch": 0.013839723205535889, "grad_norm": 0.34305395987637777, "learning_rate": 2.2933333333333335e-06, "loss": 0.3204, "step": 173 }, { "epoch": 0.013919721605567889, "grad_norm": 0.6126986499342134, "learning_rate": 2.306666666666667e-06, "loss": 0.2926, "step": 174 }, { "epoch": 0.013999720005599889, "grad_norm": 0.2469095808190779, "learning_rate": 2.3200000000000002e-06, "loss": 0.3504, "step": 175 }, { "epoch": 0.014079718405631887, "grad_norm": 0.5492167929499058, "learning_rate": 2.3333333333333336e-06, "loss": 0.3077, "step": 176 }, { "epoch": 0.014159716805663887, "grad_norm": 0.25598488910292205, "learning_rate": 2.346666666666667e-06, "loss": 0.3102, "step": 177 }, { "epoch": 0.014239715205695886, "grad_norm": 0.3168172064990585, "learning_rate": 2.3600000000000003e-06, "loss": 0.2991, "step": 178 }, { "epoch": 0.014319713605727886, "grad_norm": 0.3075481865433467, "learning_rate": 2.3733333333333336e-06, "loss": 0.2994, "step": 179 }, { "epoch": 0.014399712005759884, "grad_norm": 0.3251897713263359, "learning_rate": 2.386666666666667e-06, "loss": 0.2996, "step": 180 }, { "epoch": 0.014479710405791884, "grad_norm": 0.27217892447144404, "learning_rate": 2.4000000000000003e-06, "loss": 0.3358, "step": 181 }, { "epoch": 0.014559708805823884, "grad_norm": 0.3455998928690932, "learning_rate": 2.4133333333333337e-06, "loss": 0.3328, "step": 182 }, { "epoch": 0.014639707205855883, "grad_norm": 0.2731181827584468, "learning_rate": 2.426666666666667e-06, "loss": 0.3282, "step": 183 }, { "epoch": 0.014719705605887883, "grad_norm": 0.28232584736296523, "learning_rate": 2.4400000000000004e-06, "loss": 0.3025, "step": 184 }, { "epoch": 0.014799704005919881, "grad_norm": 0.30617747120959193, "learning_rate": 2.4533333333333333e-06, "loss": 0.2767, "step": 185 }, { "epoch": 0.014879702405951881, "grad_norm": 0.26242050632790365, "learning_rate": 2.466666666666667e-06, "loss": 0.386, "step": 186 }, { "epoch": 0.01495970080598388, "grad_norm": 0.33121529206856665, "learning_rate": 2.4800000000000004e-06, "loss": 0.2838, "step": 187 }, { "epoch": 0.01503969920601588, "grad_norm": 0.249645253775799, "learning_rate": 2.4933333333333333e-06, "loss": 0.3549, "step": 188 }, { "epoch": 0.015119697606047878, "grad_norm": 0.2342341076903019, "learning_rate": 2.5066666666666667e-06, "loss": 0.3365, "step": 189 }, { "epoch": 0.015199696006079878, "grad_norm": 0.3104847568722879, "learning_rate": 2.52e-06, "loss": 0.2963, "step": 190 }, { "epoch": 0.015279694406111878, "grad_norm": 0.3170305859768765, "learning_rate": 2.5333333333333338e-06, "loss": 0.276, "step": 191 }, { "epoch": 0.015359692806143877, "grad_norm": 0.3483537982466923, "learning_rate": 2.5466666666666667e-06, "loss": 0.2738, "step": 192 }, { "epoch": 0.015439691206175877, "grad_norm": 0.3105215828837361, "learning_rate": 2.56e-06, "loss": 0.33, "step": 193 }, { "epoch": 0.015519689606207875, "grad_norm": 0.27326728991656857, "learning_rate": 2.573333333333334e-06, "loss": 0.3052, "step": 194 }, { "epoch": 0.015599688006239875, "grad_norm": 0.33989069089174095, "learning_rate": 2.5866666666666667e-06, "loss": 0.3009, "step": 195 }, { "epoch": 0.015679686406271875, "grad_norm": 0.28612282268628964, "learning_rate": 2.6e-06, "loss": 0.3059, "step": 196 }, { "epoch": 0.015759684806303874, "grad_norm": 0.2718603913846913, "learning_rate": 2.6133333333333334e-06, "loss": 0.3011, "step": 197 }, { "epoch": 0.015839683206335872, "grad_norm": 0.2614884604702425, "learning_rate": 2.6266666666666668e-06, "loss": 0.3548, "step": 198 }, { "epoch": 0.015919681606367874, "grad_norm": 0.27931581114935067, "learning_rate": 2.64e-06, "loss": 0.3019, "step": 199 }, { "epoch": 0.015999680006399872, "grad_norm": 0.2915159304805085, "learning_rate": 2.6533333333333335e-06, "loss": 0.3341, "step": 200 }, { "epoch": 0.01607967840643187, "grad_norm": 0.2926927148294065, "learning_rate": 2.666666666666667e-06, "loss": 0.319, "step": 201 }, { "epoch": 0.01615967680646387, "grad_norm": 0.3312925133896497, "learning_rate": 2.68e-06, "loss": 0.2729, "step": 202 }, { "epoch": 0.01623967520649587, "grad_norm": 0.2179060164387047, "learning_rate": 2.6933333333333335e-06, "loss": 0.3681, "step": 203 }, { "epoch": 0.01631967360652787, "grad_norm": 0.3347584423990397, "learning_rate": 2.706666666666667e-06, "loss": 0.2767, "step": 204 }, { "epoch": 0.016399672006559868, "grad_norm": 0.33366562549913675, "learning_rate": 2.7200000000000002e-06, "loss": 0.2604, "step": 205 }, { "epoch": 0.01647967040659187, "grad_norm": 0.26367098633160124, "learning_rate": 2.7333333333333336e-06, "loss": 0.3546, "step": 206 }, { "epoch": 0.016559668806623868, "grad_norm": 0.27008295534598054, "learning_rate": 2.746666666666667e-06, "loss": 0.3559, "step": 207 }, { "epoch": 0.016639667206655866, "grad_norm": 0.3291417108207968, "learning_rate": 2.7600000000000003e-06, "loss": 0.2895, "step": 208 }, { "epoch": 0.016719665606687865, "grad_norm": 0.32160900992127883, "learning_rate": 2.7733333333333336e-06, "loss": 0.3265, "step": 209 }, { "epoch": 0.016799664006719867, "grad_norm": 0.3248210046270003, "learning_rate": 2.786666666666667e-06, "loss": 0.2655, "step": 210 }, { "epoch": 0.016879662406751865, "grad_norm": 0.3115821033219484, "learning_rate": 2.8000000000000003e-06, "loss": 0.3139, "step": 211 }, { "epoch": 0.016959660806783863, "grad_norm": 0.3563420955853021, "learning_rate": 2.8133333333333336e-06, "loss": 0.2991, "step": 212 }, { "epoch": 0.017039659206815865, "grad_norm": 0.3328634897760365, "learning_rate": 2.826666666666667e-06, "loss": 0.2986, "step": 213 }, { "epoch": 0.017119657606847864, "grad_norm": 0.33190746734616233, "learning_rate": 2.84e-06, "loss": 0.3162, "step": 214 }, { "epoch": 0.017199656006879862, "grad_norm": 0.3198933164469127, "learning_rate": 2.8533333333333337e-06, "loss": 0.3012, "step": 215 }, { "epoch": 0.01727965440691186, "grad_norm": 0.27769053245211817, "learning_rate": 2.866666666666667e-06, "loss": 0.2983, "step": 216 }, { "epoch": 0.017359652806943862, "grad_norm": 0.2343062711718492, "learning_rate": 2.88e-06, "loss": 0.34, "step": 217 }, { "epoch": 0.01743965120697586, "grad_norm": 0.24325308505926416, "learning_rate": 2.8933333333333337e-06, "loss": 0.3422, "step": 218 }, { "epoch": 0.01751964960700786, "grad_norm": 0.308235214312061, "learning_rate": 2.906666666666667e-06, "loss": 0.3263, "step": 219 }, { "epoch": 0.01759964800703986, "grad_norm": 0.34781277296217233, "learning_rate": 2.92e-06, "loss": 0.2939, "step": 220 }, { "epoch": 0.01767964640707186, "grad_norm": 0.34775821006879976, "learning_rate": 2.9333333333333338e-06, "loss": 0.3055, "step": 221 }, { "epoch": 0.017759644807103857, "grad_norm": 0.27283654468903656, "learning_rate": 2.946666666666667e-06, "loss": 0.3121, "step": 222 }, { "epoch": 0.017839643207135856, "grad_norm": 0.2615045670797844, "learning_rate": 2.96e-06, "loss": 0.3394, "step": 223 }, { "epoch": 0.017919641607167858, "grad_norm": 0.28460403063393813, "learning_rate": 2.973333333333334e-06, "loss": 0.2963, "step": 224 }, { "epoch": 0.017999640007199856, "grad_norm": 0.2769691341015105, "learning_rate": 2.986666666666667e-06, "loss": 0.3057, "step": 225 }, { "epoch": 0.018079638407231854, "grad_norm": 0.3349651366664022, "learning_rate": 3e-06, "loss": 0.3228, "step": 226 }, { "epoch": 0.018159636807263856, "grad_norm": 0.26785345594532933, "learning_rate": 3.013333333333334e-06, "loss": 0.3615, "step": 227 }, { "epoch": 0.018239635207295855, "grad_norm": 0.2591169067604971, "learning_rate": 3.0266666666666668e-06, "loss": 0.3542, "step": 228 }, { "epoch": 0.018319633607327853, "grad_norm": 0.22763209752794938, "learning_rate": 3.04e-06, "loss": 0.3627, "step": 229 }, { "epoch": 0.01839963200735985, "grad_norm": 0.3218481945467067, "learning_rate": 3.053333333333334e-06, "loss": 0.3161, "step": 230 }, { "epoch": 0.018479630407391853, "grad_norm": 0.3462518883470296, "learning_rate": 3.066666666666667e-06, "loss": 0.2941, "step": 231 }, { "epoch": 0.01855962880742385, "grad_norm": 0.3051645638246147, "learning_rate": 3.08e-06, "loss": 0.2911, "step": 232 }, { "epoch": 0.01863962720745585, "grad_norm": 0.28833639974298064, "learning_rate": 3.093333333333334e-06, "loss": 0.3289, "step": 233 }, { "epoch": 0.018719625607487852, "grad_norm": 0.2919252438417406, "learning_rate": 3.106666666666667e-06, "loss": 0.3028, "step": 234 }, { "epoch": 0.01879962400751985, "grad_norm": 0.3060051978435512, "learning_rate": 3.12e-06, "loss": 0.3081, "step": 235 }, { "epoch": 0.01887962240755185, "grad_norm": 0.26465653917640974, "learning_rate": 3.133333333333334e-06, "loss": 0.3343, "step": 236 }, { "epoch": 0.018959620807583847, "grad_norm": 0.26619948422474793, "learning_rate": 3.146666666666667e-06, "loss": 0.3242, "step": 237 }, { "epoch": 0.01903961920761585, "grad_norm": 0.4546845613794587, "learning_rate": 3.1600000000000002e-06, "loss": 0.2977, "step": 238 }, { "epoch": 0.019119617607647847, "grad_norm": 0.2817255172513679, "learning_rate": 3.173333333333334e-06, "loss": 0.3271, "step": 239 }, { "epoch": 0.019199616007679846, "grad_norm": 0.19494477035554048, "learning_rate": 3.186666666666667e-06, "loss": 0.3957, "step": 240 }, { "epoch": 0.019279614407711847, "grad_norm": 0.31053646058844525, "learning_rate": 3.2000000000000003e-06, "loss": 0.2888, "step": 241 }, { "epoch": 0.019359612807743846, "grad_norm": 0.3102409308782587, "learning_rate": 3.213333333333334e-06, "loss": 0.2894, "step": 242 }, { "epoch": 0.019439611207775844, "grad_norm": 0.31258675349254783, "learning_rate": 3.226666666666667e-06, "loss": 0.3442, "step": 243 }, { "epoch": 0.019519609607807843, "grad_norm": 0.24097887922992603, "learning_rate": 3.2400000000000003e-06, "loss": 0.3371, "step": 244 }, { "epoch": 0.019599608007839844, "grad_norm": 0.2714303907685429, "learning_rate": 3.2533333333333332e-06, "loss": 0.3193, "step": 245 }, { "epoch": 0.019679606407871843, "grad_norm": 0.3043463393020328, "learning_rate": 3.266666666666667e-06, "loss": 0.3245, "step": 246 }, { "epoch": 0.01975960480790384, "grad_norm": 0.22279006432469095, "learning_rate": 3.2800000000000004e-06, "loss": 0.3263, "step": 247 }, { "epoch": 0.019839603207935843, "grad_norm": 0.27346275057272, "learning_rate": 3.2933333333333333e-06, "loss": 0.2966, "step": 248 }, { "epoch": 0.01991960160796784, "grad_norm": 0.2338181782737284, "learning_rate": 3.306666666666667e-06, "loss": 0.3484, "step": 249 }, { "epoch": 0.01999960000799984, "grad_norm": 0.5860536705677688, "learning_rate": 3.3200000000000004e-06, "loss": 0.3222, "step": 250 }, { "epoch": 0.020079598408031838, "grad_norm": 0.285745082606006, "learning_rate": 3.3333333333333333e-06, "loss": 0.3388, "step": 251 }, { "epoch": 0.02015959680806384, "grad_norm": 0.28754198669729975, "learning_rate": 3.346666666666667e-06, "loss": 0.3248, "step": 252 }, { "epoch": 0.02023959520809584, "grad_norm": 0.29454098602666395, "learning_rate": 3.3600000000000004e-06, "loss": 0.3337, "step": 253 }, { "epoch": 0.020319593608127837, "grad_norm": 0.31440723855089225, "learning_rate": 3.3733333333333334e-06, "loss": 0.2931, "step": 254 }, { "epoch": 0.02039959200815984, "grad_norm": 0.2466671934492438, "learning_rate": 3.386666666666667e-06, "loss": 0.3158, "step": 255 }, { "epoch": 0.020479590408191837, "grad_norm": 0.35549219828394024, "learning_rate": 3.4000000000000005e-06, "loss": 0.2902, "step": 256 }, { "epoch": 0.020559588808223835, "grad_norm": 0.26736274497055573, "learning_rate": 3.4133333333333334e-06, "loss": 0.3105, "step": 257 }, { "epoch": 0.020639587208255834, "grad_norm": 0.24379641065263255, "learning_rate": 3.426666666666667e-06, "loss": 0.337, "step": 258 }, { "epoch": 0.020719585608287835, "grad_norm": 0.26720088550300264, "learning_rate": 3.44e-06, "loss": 0.2836, "step": 259 }, { "epoch": 0.020799584008319834, "grad_norm": 0.29918995189544784, "learning_rate": 3.4533333333333334e-06, "loss": 0.3187, "step": 260 }, { "epoch": 0.020879582408351832, "grad_norm": 0.2908575401893748, "learning_rate": 3.4666666666666672e-06, "loss": 0.3534, "step": 261 }, { "epoch": 0.020959580808383834, "grad_norm": 0.20321947169654234, "learning_rate": 3.48e-06, "loss": 0.3518, "step": 262 }, { "epoch": 0.021039579208415832, "grad_norm": 0.27410965008950094, "learning_rate": 3.4933333333333335e-06, "loss": 0.308, "step": 263 }, { "epoch": 0.02111957760844783, "grad_norm": 0.3236580142952976, "learning_rate": 3.5066666666666673e-06, "loss": 0.3167, "step": 264 }, { "epoch": 0.02119957600847983, "grad_norm": 0.33185429800955873, "learning_rate": 3.52e-06, "loss": 0.3114, "step": 265 }, { "epoch": 0.02127957440851183, "grad_norm": 0.2561620835312766, "learning_rate": 3.5333333333333335e-06, "loss": 0.3494, "step": 266 }, { "epoch": 0.02135957280854383, "grad_norm": 0.3412288207221602, "learning_rate": 3.5466666666666673e-06, "loss": 0.3035, "step": 267 }, { "epoch": 0.021439571208575828, "grad_norm": 0.29847707968616566, "learning_rate": 3.5600000000000002e-06, "loss": 0.2871, "step": 268 }, { "epoch": 0.021519569608607826, "grad_norm": 0.28849358223805194, "learning_rate": 3.5733333333333336e-06, "loss": 0.3141, "step": 269 }, { "epoch": 0.021599568008639828, "grad_norm": 0.3262848070532352, "learning_rate": 3.5866666666666673e-06, "loss": 0.2829, "step": 270 }, { "epoch": 0.021679566408671826, "grad_norm": 0.2810840132422428, "learning_rate": 3.6000000000000003e-06, "loss": 0.3146, "step": 271 }, { "epoch": 0.021759564808703825, "grad_norm": 0.2507903447564924, "learning_rate": 3.6133333333333336e-06, "loss": 0.3319, "step": 272 }, { "epoch": 0.021839563208735827, "grad_norm": 0.25539464054896616, "learning_rate": 3.6266666666666674e-06, "loss": 0.3488, "step": 273 }, { "epoch": 0.021919561608767825, "grad_norm": 0.2850274726617135, "learning_rate": 3.6400000000000003e-06, "loss": 0.318, "step": 274 }, { "epoch": 0.021999560008799823, "grad_norm": 0.2888100552701197, "learning_rate": 3.6533333333333336e-06, "loss": 0.3178, "step": 275 }, { "epoch": 0.02207955840883182, "grad_norm": 0.31353112975835506, "learning_rate": 3.6666666666666666e-06, "loss": 0.3249, "step": 276 }, { "epoch": 0.022159556808863824, "grad_norm": 0.2468922489531051, "learning_rate": 3.6800000000000003e-06, "loss": 0.3302, "step": 277 }, { "epoch": 0.022239555208895822, "grad_norm": 0.25082212114864855, "learning_rate": 3.6933333333333337e-06, "loss": 0.348, "step": 278 }, { "epoch": 0.02231955360892782, "grad_norm": 0.2829664029581924, "learning_rate": 3.7066666666666666e-06, "loss": 0.2986, "step": 279 }, { "epoch": 0.022399552008959822, "grad_norm": 0.32273623185519507, "learning_rate": 3.7200000000000004e-06, "loss": 0.2948, "step": 280 }, { "epoch": 0.02247955040899182, "grad_norm": 0.23166747886010355, "learning_rate": 3.7333333333333337e-06, "loss": 0.3714, "step": 281 }, { "epoch": 0.02255954880902382, "grad_norm": 0.29795140352711585, "learning_rate": 3.7466666666666667e-06, "loss": 0.3234, "step": 282 }, { "epoch": 0.022639547209055817, "grad_norm": 0.2843383420956478, "learning_rate": 3.7600000000000004e-06, "loss": 0.3229, "step": 283 }, { "epoch": 0.02271954560908782, "grad_norm": 0.3224715369377984, "learning_rate": 3.7733333333333338e-06, "loss": 0.2782, "step": 284 }, { "epoch": 0.022799544009119817, "grad_norm": 0.25562952206538647, "learning_rate": 3.7866666666666667e-06, "loss": 0.3489, "step": 285 }, { "epoch": 0.022879542409151816, "grad_norm": 0.3166754235113625, "learning_rate": 3.8000000000000005e-06, "loss": 0.2673, "step": 286 }, { "epoch": 0.022959540809183818, "grad_norm": 0.2997155781122687, "learning_rate": 3.813333333333334e-06, "loss": 0.3084, "step": 287 }, { "epoch": 0.023039539209215816, "grad_norm": 0.30596102593439195, "learning_rate": 3.826666666666667e-06, "loss": 0.3125, "step": 288 }, { "epoch": 0.023119537609247814, "grad_norm": 0.3209867136184037, "learning_rate": 3.8400000000000005e-06, "loss": 0.2846, "step": 289 }, { "epoch": 0.023199536009279813, "grad_norm": 0.19881600823457748, "learning_rate": 3.853333333333334e-06, "loss": 0.382, "step": 290 }, { "epoch": 0.023279534409311815, "grad_norm": 0.310407455101293, "learning_rate": 3.866666666666667e-06, "loss": 0.291, "step": 291 }, { "epoch": 0.023359532809343813, "grad_norm": 0.3007535639216627, "learning_rate": 3.88e-06, "loss": 0.3378, "step": 292 }, { "epoch": 0.02343953120937581, "grad_norm": 0.29921480775916937, "learning_rate": 3.893333333333333e-06, "loss": 0.3118, "step": 293 }, { "epoch": 0.023519529609407813, "grad_norm": 0.38457980029073435, "learning_rate": 3.906666666666667e-06, "loss": 0.2927, "step": 294 }, { "epoch": 0.02359952800943981, "grad_norm": 0.30980457406830697, "learning_rate": 3.920000000000001e-06, "loss": 0.2821, "step": 295 }, { "epoch": 0.02367952640947181, "grad_norm": 0.31204294392915943, "learning_rate": 3.9333333333333335e-06, "loss": 0.3232, "step": 296 }, { "epoch": 0.02375952480950381, "grad_norm": 0.37429849506928003, "learning_rate": 3.946666666666667e-06, "loss": 0.2985, "step": 297 }, { "epoch": 0.02383952320953581, "grad_norm": 0.28736993796258586, "learning_rate": 3.96e-06, "loss": 0.3193, "step": 298 }, { "epoch": 0.02391952160956781, "grad_norm": 0.29400874352009126, "learning_rate": 3.973333333333333e-06, "loss": 0.3001, "step": 299 }, { "epoch": 0.023999520009599807, "grad_norm": 0.30070683796841075, "learning_rate": 3.986666666666667e-06, "loss": 0.3235, "step": 300 }, { "epoch": 0.02407951840963181, "grad_norm": 0.33673323996864846, "learning_rate": 4.000000000000001e-06, "loss": 0.3149, "step": 301 }, { "epoch": 0.024159516809663807, "grad_norm": 0.5915871723118653, "learning_rate": 4.013333333333334e-06, "loss": 0.2946, "step": 302 }, { "epoch": 0.024239515209695806, "grad_norm": 0.34529717455075243, "learning_rate": 4.026666666666667e-06, "loss": 0.3, "step": 303 }, { "epoch": 0.024319513609727804, "grad_norm": 0.3446956777151793, "learning_rate": 4.04e-06, "loss": 0.2934, "step": 304 }, { "epoch": 0.024399512009759806, "grad_norm": 0.2984152835975952, "learning_rate": 4.053333333333333e-06, "loss": 0.3182, "step": 305 }, { "epoch": 0.024479510409791804, "grad_norm": 0.2508654937902377, "learning_rate": 4.066666666666667e-06, "loss": 0.3331, "step": 306 }, { "epoch": 0.024559508809823802, "grad_norm": 0.3086005504622366, "learning_rate": 4.08e-06, "loss": 0.28, "step": 307 }, { "epoch": 0.024639507209855804, "grad_norm": 0.3250461069503589, "learning_rate": 4.093333333333334e-06, "loss": 0.3038, "step": 308 }, { "epoch": 0.024719505609887803, "grad_norm": 0.2974487401261438, "learning_rate": 4.1066666666666674e-06, "loss": 0.281, "step": 309 }, { "epoch": 0.0247995040099198, "grad_norm": 0.2826114481260454, "learning_rate": 4.12e-06, "loss": 0.3027, "step": 310 }, { "epoch": 0.0248795024099518, "grad_norm": 0.26723447852426074, "learning_rate": 4.133333333333333e-06, "loss": 0.3315, "step": 311 }, { "epoch": 0.0249595008099838, "grad_norm": 0.3308253861904821, "learning_rate": 4.146666666666667e-06, "loss": 0.3034, "step": 312 }, { "epoch": 0.0250394992100158, "grad_norm": 0.3011186680573111, "learning_rate": 4.16e-06, "loss": 0.3112, "step": 313 }, { "epoch": 0.025119497610047798, "grad_norm": 0.29767619437973025, "learning_rate": 4.173333333333334e-06, "loss": 0.315, "step": 314 }, { "epoch": 0.0251994960100798, "grad_norm": 0.3317550541125557, "learning_rate": 4.1866666666666675e-06, "loss": 0.2821, "step": 315 }, { "epoch": 0.025279494410111798, "grad_norm": 0.29864154407830745, "learning_rate": 4.2000000000000004e-06, "loss": 0.3086, "step": 316 }, { "epoch": 0.025359492810143797, "grad_norm": 0.31372143566377436, "learning_rate": 4.213333333333333e-06, "loss": 0.334, "step": 317 }, { "epoch": 0.025439491210175795, "grad_norm": 0.28286029132169016, "learning_rate": 4.226666666666667e-06, "loss": 0.3124, "step": 318 }, { "epoch": 0.025519489610207797, "grad_norm": 0.28343166514808793, "learning_rate": 4.24e-06, "loss": 0.2962, "step": 319 }, { "epoch": 0.025599488010239795, "grad_norm": 0.30091351251625587, "learning_rate": 4.253333333333334e-06, "loss": 0.3192, "step": 320 }, { "epoch": 0.025679486410271794, "grad_norm": 0.4069227954473359, "learning_rate": 4.266666666666668e-06, "loss": 0.3123, "step": 321 }, { "epoch": 0.025759484810303795, "grad_norm": 0.30520424644249466, "learning_rate": 4.2800000000000005e-06, "loss": 0.3165, "step": 322 }, { "epoch": 0.025839483210335794, "grad_norm": 0.3106393666632331, "learning_rate": 4.2933333333333334e-06, "loss": 0.3106, "step": 323 }, { "epoch": 0.025919481610367792, "grad_norm": 0.31385237368859015, "learning_rate": 4.306666666666666e-06, "loss": 0.2776, "step": 324 }, { "epoch": 0.02599948001039979, "grad_norm": 0.3595754742533826, "learning_rate": 4.32e-06, "loss": 0.2775, "step": 325 }, { "epoch": 0.026079478410431792, "grad_norm": 0.29918408080553893, "learning_rate": 4.333333333333334e-06, "loss": 0.2779, "step": 326 }, { "epoch": 0.02615947681046379, "grad_norm": 0.3137959946957865, "learning_rate": 4.346666666666667e-06, "loss": 0.2743, "step": 327 }, { "epoch": 0.02623947521049579, "grad_norm": 0.2971425585858211, "learning_rate": 4.360000000000001e-06, "loss": 0.3164, "step": 328 }, { "epoch": 0.02631947361052779, "grad_norm": 0.3002236460836409, "learning_rate": 4.3733333333333335e-06, "loss": 0.3046, "step": 329 }, { "epoch": 0.02639947201055979, "grad_norm": 0.32795878075933393, "learning_rate": 4.3866666666666665e-06, "loss": 0.298, "step": 330 }, { "epoch": 0.026479470410591788, "grad_norm": 0.3198183211880077, "learning_rate": 4.4e-06, "loss": 0.2778, "step": 331 }, { "epoch": 0.026559468810623786, "grad_norm": 0.8442752941754909, "learning_rate": 4.413333333333334e-06, "loss": 0.316, "step": 332 }, { "epoch": 0.026639467210655788, "grad_norm": 0.3285862245822231, "learning_rate": 4.426666666666667e-06, "loss": 0.3014, "step": 333 }, { "epoch": 0.026719465610687786, "grad_norm": 0.3268362991602705, "learning_rate": 4.440000000000001e-06, "loss": 0.2932, "step": 334 }, { "epoch": 0.026799464010719785, "grad_norm": 0.34828548322000935, "learning_rate": 4.453333333333334e-06, "loss": 0.2936, "step": 335 }, { "epoch": 0.026879462410751787, "grad_norm": 0.36401067477247345, "learning_rate": 4.4666666666666665e-06, "loss": 0.2669, "step": 336 }, { "epoch": 0.026959460810783785, "grad_norm": 0.30458351664481537, "learning_rate": 4.48e-06, "loss": 0.2808, "step": 337 }, { "epoch": 0.027039459210815783, "grad_norm": 0.2827353121459644, "learning_rate": 4.493333333333333e-06, "loss": 0.3226, "step": 338 }, { "epoch": 0.02711945761084778, "grad_norm": 0.3917300253429823, "learning_rate": 4.506666666666667e-06, "loss": 0.3077, "step": 339 }, { "epoch": 0.027199456010879784, "grad_norm": 0.35563710777596164, "learning_rate": 4.520000000000001e-06, "loss": 0.2813, "step": 340 }, { "epoch": 0.027279454410911782, "grad_norm": 0.36915547194942855, "learning_rate": 4.533333333333334e-06, "loss": 0.286, "step": 341 }, { "epoch": 0.02735945281094378, "grad_norm": 0.33409295314600185, "learning_rate": 4.546666666666667e-06, "loss": 0.2822, "step": 342 }, { "epoch": 0.027439451210975782, "grad_norm": 0.31416386957118053, "learning_rate": 4.56e-06, "loss": 0.3153, "step": 343 }, { "epoch": 0.02751944961100778, "grad_norm": 0.3019158422957072, "learning_rate": 4.573333333333333e-06, "loss": 0.3024, "step": 344 }, { "epoch": 0.02759944801103978, "grad_norm": 0.3100351093467754, "learning_rate": 4.586666666666667e-06, "loss": 0.3185, "step": 345 }, { "epoch": 0.027679446411071777, "grad_norm": 0.3014305733934458, "learning_rate": 4.600000000000001e-06, "loss": 0.3339, "step": 346 }, { "epoch": 0.02775944481110378, "grad_norm": 0.28706731839929256, "learning_rate": 4.613333333333334e-06, "loss": 0.2886, "step": 347 }, { "epoch": 0.027839443211135777, "grad_norm": 0.29222632215345756, "learning_rate": 4.626666666666667e-06, "loss": 0.3016, "step": 348 }, { "epoch": 0.027919441611167776, "grad_norm": 0.2782406768670366, "learning_rate": 4.6400000000000005e-06, "loss": 0.3402, "step": 349 }, { "epoch": 0.027999440011199778, "grad_norm": 0.38339644650291627, "learning_rate": 4.653333333333333e-06, "loss": 0.2998, "step": 350 }, { "epoch": 0.028079438411231776, "grad_norm": 0.3317118638530234, "learning_rate": 4.666666666666667e-06, "loss": 0.3235, "step": 351 }, { "epoch": 0.028159436811263774, "grad_norm": 0.32472311190940556, "learning_rate": 4.680000000000001e-06, "loss": 0.326, "step": 352 }, { "epoch": 0.028239435211295773, "grad_norm": 0.3076013504127327, "learning_rate": 4.693333333333334e-06, "loss": 0.2994, "step": 353 }, { "epoch": 0.028319433611327775, "grad_norm": 0.3304973225502543, "learning_rate": 4.706666666666667e-06, "loss": 0.3097, "step": 354 }, { "epoch": 0.028399432011359773, "grad_norm": 1.3978856065414307, "learning_rate": 4.7200000000000005e-06, "loss": 0.3172, "step": 355 }, { "epoch": 0.02847943041139177, "grad_norm": 0.3491918327195412, "learning_rate": 4.7333333333333335e-06, "loss": 0.307, "step": 356 }, { "epoch": 0.028559428811423773, "grad_norm": 0.25924244706907446, "learning_rate": 4.746666666666667e-06, "loss": 0.3225, "step": 357 }, { "epoch": 0.02863942721145577, "grad_norm": 0.3101495324155655, "learning_rate": 4.76e-06, "loss": 0.3293, "step": 358 }, { "epoch": 0.02871942561148777, "grad_norm": 0.35678900095091365, "learning_rate": 4.773333333333334e-06, "loss": 0.2805, "step": 359 }, { "epoch": 0.02879942401151977, "grad_norm": 0.3067994625744902, "learning_rate": 4.786666666666667e-06, "loss": 0.3176, "step": 360 }, { "epoch": 0.02887942241155177, "grad_norm": 0.24092945275116656, "learning_rate": 4.800000000000001e-06, "loss": 0.3469, "step": 361 }, { "epoch": 0.02895942081158377, "grad_norm": 0.34334792749258597, "learning_rate": 4.8133333333333336e-06, "loss": 0.295, "step": 362 }, { "epoch": 0.029039419211615767, "grad_norm": 0.3321900781436285, "learning_rate": 4.826666666666667e-06, "loss": 0.2995, "step": 363 }, { "epoch": 0.02911941761164777, "grad_norm": 0.25049811240530573, "learning_rate": 4.84e-06, "loss": 0.3508, "step": 364 }, { "epoch": 0.029199416011679767, "grad_norm": 0.3607572045401755, "learning_rate": 4.853333333333334e-06, "loss": 0.3096, "step": 365 }, { "epoch": 0.029279414411711766, "grad_norm": 0.3294829442818441, "learning_rate": 4.866666666666667e-06, "loss": 0.2691, "step": 366 }, { "epoch": 0.029359412811743764, "grad_norm": 0.5526739565126829, "learning_rate": 4.880000000000001e-06, "loss": 0.298, "step": 367 }, { "epoch": 0.029439411211775766, "grad_norm": 0.3335696256707955, "learning_rate": 4.893333333333334e-06, "loss": 0.2933, "step": 368 }, { "epoch": 0.029519409611807764, "grad_norm": 0.3663640552692995, "learning_rate": 4.9066666666666666e-06, "loss": 0.3128, "step": 369 }, { "epoch": 0.029599408011839762, "grad_norm": 0.37571911507081424, "learning_rate": 4.92e-06, "loss": 0.3072, "step": 370 }, { "epoch": 0.02967940641187176, "grad_norm": 0.31555521230821104, "learning_rate": 4.933333333333334e-06, "loss": 0.2768, "step": 371 }, { "epoch": 0.029759404811903763, "grad_norm": 0.245298954856371, "learning_rate": 4.946666666666667e-06, "loss": 0.3602, "step": 372 }, { "epoch": 0.02983940321193576, "grad_norm": 0.2456806518200736, "learning_rate": 4.960000000000001e-06, "loss": 0.3594, "step": 373 }, { "epoch": 0.02991940161196776, "grad_norm": 0.3005615123583751, "learning_rate": 4.973333333333334e-06, "loss": 0.3053, "step": 374 }, { "epoch": 0.02999940001199976, "grad_norm": 0.3201933337711833, "learning_rate": 4.986666666666667e-06, "loss": 0.2999, "step": 375 }, { "epoch": 0.03007939841203176, "grad_norm": 0.3502857133296534, "learning_rate": 5e-06, "loss": 0.3083, "step": 376 }, { "epoch": 0.030159396812063758, "grad_norm": 0.35536163911687196, "learning_rate": 5.013333333333333e-06, "loss": 0.2998, "step": 377 }, { "epoch": 0.030239395212095756, "grad_norm": 0.2929929658030338, "learning_rate": 5.026666666666667e-06, "loss": 0.3055, "step": 378 }, { "epoch": 0.030319393612127758, "grad_norm": 0.2841828850712374, "learning_rate": 5.04e-06, "loss": 0.3142, "step": 379 }, { "epoch": 0.030399392012159757, "grad_norm": 0.2610288400838785, "learning_rate": 5.053333333333334e-06, "loss": 0.3478, "step": 380 }, { "epoch": 0.030479390412191755, "grad_norm": 0.31758189304705964, "learning_rate": 5.0666666666666676e-06, "loss": 0.2909, "step": 381 }, { "epoch": 0.030559388812223757, "grad_norm": 0.2931222908516946, "learning_rate": 5.0800000000000005e-06, "loss": 0.3001, "step": 382 }, { "epoch": 0.030639387212255755, "grad_norm": 0.3203454950065711, "learning_rate": 5.093333333333333e-06, "loss": 0.2707, "step": 383 }, { "epoch": 0.030719385612287754, "grad_norm": 0.3714027708987862, "learning_rate": 5.106666666666667e-06, "loss": 0.2929, "step": 384 }, { "epoch": 0.030799384012319752, "grad_norm": 0.31815491416426595, "learning_rate": 5.12e-06, "loss": 0.3094, "step": 385 }, { "epoch": 0.030879382412351754, "grad_norm": 0.3342354971381128, "learning_rate": 5.133333333333334e-06, "loss": 0.2717, "step": 386 }, { "epoch": 0.030959380812383752, "grad_norm": 0.3082120376726215, "learning_rate": 5.146666666666668e-06, "loss": 0.3074, "step": 387 }, { "epoch": 0.03103937921241575, "grad_norm": 0.3066371516027656, "learning_rate": 5.1600000000000006e-06, "loss": 0.3079, "step": 388 }, { "epoch": 0.031119377612447752, "grad_norm": 0.281534543660837, "learning_rate": 5.1733333333333335e-06, "loss": 0.3537, "step": 389 }, { "epoch": 0.03119937601247975, "grad_norm": 0.357506865511471, "learning_rate": 5.186666666666667e-06, "loss": 0.2948, "step": 390 }, { "epoch": 0.03127937441251175, "grad_norm": 0.33973068675563084, "learning_rate": 5.2e-06, "loss": 0.2879, "step": 391 }, { "epoch": 0.03135937281254375, "grad_norm": 0.43378779358682595, "learning_rate": 5.213333333333334e-06, "loss": 0.2907, "step": 392 }, { "epoch": 0.03143937121257575, "grad_norm": 0.2753200794392037, "learning_rate": 5.226666666666667e-06, "loss": 0.3461, "step": 393 }, { "epoch": 0.03151936961260775, "grad_norm": 0.31597559998812197, "learning_rate": 5.240000000000001e-06, "loss": 0.3296, "step": 394 }, { "epoch": 0.031599368012639746, "grad_norm": 1.9657686448382365, "learning_rate": 5.2533333333333336e-06, "loss": 0.2876, "step": 395 }, { "epoch": 0.031679366412671744, "grad_norm": 0.6104971373102391, "learning_rate": 5.2666666666666665e-06, "loss": 0.332, "step": 396 }, { "epoch": 0.03175936481270374, "grad_norm": 0.5223884639865138, "learning_rate": 5.28e-06, "loss": 0.2847, "step": 397 }, { "epoch": 0.03183936321273575, "grad_norm": 1.0127908583483225, "learning_rate": 5.293333333333334e-06, "loss": 0.3041, "step": 398 }, { "epoch": 0.031919361612767747, "grad_norm": 0.2896248684319602, "learning_rate": 5.306666666666667e-06, "loss": 0.3153, "step": 399 }, { "epoch": 0.031999360012799745, "grad_norm": 0.3111725505787792, "learning_rate": 5.320000000000001e-06, "loss": 0.3069, "step": 400 }, { "epoch": 0.03207935841283174, "grad_norm": 0.2612742530323411, "learning_rate": 5.333333333333334e-06, "loss": 0.3485, "step": 401 }, { "epoch": 0.03215935681286374, "grad_norm": 0.355851186485992, "learning_rate": 5.346666666666667e-06, "loss": 0.2903, "step": 402 }, { "epoch": 0.03223935521289574, "grad_norm": 0.3321772891090357, "learning_rate": 5.36e-06, "loss": 0.303, "step": 403 }, { "epoch": 0.03231935361292774, "grad_norm": 0.31126794542814573, "learning_rate": 5.373333333333334e-06, "loss": 0.3146, "step": 404 }, { "epoch": 0.032399352012959744, "grad_norm": 0.22591448085272395, "learning_rate": 5.386666666666667e-06, "loss": 0.3594, "step": 405 }, { "epoch": 0.03247935041299174, "grad_norm": 0.3308646714495262, "learning_rate": 5.400000000000001e-06, "loss": 0.2796, "step": 406 }, { "epoch": 0.03255934881302374, "grad_norm": 0.292154591870389, "learning_rate": 5.413333333333334e-06, "loss": 0.3175, "step": 407 }, { "epoch": 0.03263934721305574, "grad_norm": 0.3788761336953055, "learning_rate": 5.426666666666667e-06, "loss": 0.3106, "step": 408 }, { "epoch": 0.03271934561308774, "grad_norm": 0.23266665825540928, "learning_rate": 5.4400000000000004e-06, "loss": 0.3814, "step": 409 }, { "epoch": 0.032799344013119736, "grad_norm": 0.37785222072422875, "learning_rate": 5.453333333333334e-06, "loss": 0.2828, "step": 410 }, { "epoch": 0.032879342413151734, "grad_norm": 0.3752555153860843, "learning_rate": 5.466666666666667e-06, "loss": 0.2992, "step": 411 }, { "epoch": 0.03295934081318374, "grad_norm": 0.3241619726591749, "learning_rate": 5.480000000000001e-06, "loss": 0.2767, "step": 412 }, { "epoch": 0.03303933921321574, "grad_norm": 0.20911832526144952, "learning_rate": 5.493333333333334e-06, "loss": 0.3501, "step": 413 }, { "epoch": 0.033119337613247736, "grad_norm": 0.35445576794204775, "learning_rate": 5.506666666666667e-06, "loss": 0.285, "step": 414 }, { "epoch": 0.033199336013279734, "grad_norm": 0.3490736265715893, "learning_rate": 5.5200000000000005e-06, "loss": 0.3003, "step": 415 }, { "epoch": 0.03327933441331173, "grad_norm": 0.296556437643436, "learning_rate": 5.533333333333334e-06, "loss": 0.324, "step": 416 }, { "epoch": 0.03335933281334373, "grad_norm": 0.29025738851167604, "learning_rate": 5.546666666666667e-06, "loss": 0.3215, "step": 417 }, { "epoch": 0.03343933121337573, "grad_norm": 0.28164931440429375, "learning_rate": 5.560000000000001e-06, "loss": 0.3412, "step": 418 }, { "epoch": 0.033519329613407735, "grad_norm": 0.31967845861110017, "learning_rate": 5.573333333333334e-06, "loss": 0.3107, "step": 419 }, { "epoch": 0.03359932801343973, "grad_norm": 0.358408808675009, "learning_rate": 5.586666666666667e-06, "loss": 0.2924, "step": 420 }, { "epoch": 0.03367932641347173, "grad_norm": 0.35010371693620035, "learning_rate": 5.600000000000001e-06, "loss": 0.308, "step": 421 }, { "epoch": 0.03375932481350373, "grad_norm": 0.2989354809093555, "learning_rate": 5.613333333333334e-06, "loss": 0.3066, "step": 422 }, { "epoch": 0.03383932321353573, "grad_norm": 0.47528340359375043, "learning_rate": 5.626666666666667e-06, "loss": 0.3008, "step": 423 }, { "epoch": 0.03391932161356773, "grad_norm": 0.35241261808016133, "learning_rate": 5.64e-06, "loss": 0.2965, "step": 424 }, { "epoch": 0.033999320013599725, "grad_norm": 0.33834186394947136, "learning_rate": 5.653333333333334e-06, "loss": 0.3227, "step": 425 }, { "epoch": 0.03407931841363173, "grad_norm": 0.35873235767341694, "learning_rate": 5.666666666666667e-06, "loss": 0.2735, "step": 426 }, { "epoch": 0.03415931681366373, "grad_norm": 0.361019283306883, "learning_rate": 5.68e-06, "loss": 0.318, "step": 427 }, { "epoch": 0.03423931521369573, "grad_norm": 0.3491865418351152, "learning_rate": 5.6933333333333344e-06, "loss": 0.3186, "step": 428 }, { "epoch": 0.034319313613727725, "grad_norm": 0.27729888276670156, "learning_rate": 5.706666666666667e-06, "loss": 0.3601, "step": 429 }, { "epoch": 0.034399312013759724, "grad_norm": 0.29064256849850584, "learning_rate": 5.72e-06, "loss": 0.3138, "step": 430 }, { "epoch": 0.03447931041379172, "grad_norm": 0.3508156937459735, "learning_rate": 5.733333333333334e-06, "loss": 0.2651, "step": 431 }, { "epoch": 0.03455930881382372, "grad_norm": 0.28511748103168333, "learning_rate": 5.746666666666667e-06, "loss": 0.3234, "step": 432 }, { "epoch": 0.034639307213855726, "grad_norm": 0.3144218865965763, "learning_rate": 5.76e-06, "loss": 0.2989, "step": 433 }, { "epoch": 0.034719305613887724, "grad_norm": 0.3366749921569193, "learning_rate": 5.7733333333333345e-06, "loss": 0.2801, "step": 434 }, { "epoch": 0.03479930401391972, "grad_norm": 0.31012021904677867, "learning_rate": 5.7866666666666674e-06, "loss": 0.2949, "step": 435 }, { "epoch": 0.03487930241395172, "grad_norm": 0.3347669197981209, "learning_rate": 5.8e-06, "loss": 0.2909, "step": 436 }, { "epoch": 0.03495930081398372, "grad_norm": 0.3075000005306856, "learning_rate": 5.813333333333334e-06, "loss": 0.2854, "step": 437 }, { "epoch": 0.03503929921401572, "grad_norm": 0.27842169718598414, "learning_rate": 5.826666666666667e-06, "loss": 0.3276, "step": 438 }, { "epoch": 0.035119297614047716, "grad_norm": 0.332358261271771, "learning_rate": 5.84e-06, "loss": 0.3129, "step": 439 }, { "epoch": 0.03519929601407972, "grad_norm": 0.3250165592387918, "learning_rate": 5.853333333333335e-06, "loss": 0.2736, "step": 440 }, { "epoch": 0.03527929441411172, "grad_norm": 0.2903957305908445, "learning_rate": 5.8666666666666675e-06, "loss": 0.3028, "step": 441 }, { "epoch": 0.03535929281414372, "grad_norm": 0.3022783193404657, "learning_rate": 5.8800000000000005e-06, "loss": 0.3237, "step": 442 }, { "epoch": 0.03543929121417572, "grad_norm": 0.3501135428170432, "learning_rate": 5.893333333333334e-06, "loss": 0.2902, "step": 443 }, { "epoch": 0.035519289614207715, "grad_norm": 0.2827949040230491, "learning_rate": 5.906666666666667e-06, "loss": 0.3109, "step": 444 }, { "epoch": 0.03559928801423971, "grad_norm": 0.27880451846861004, "learning_rate": 5.92e-06, "loss": 0.3103, "step": 445 }, { "epoch": 0.03567928641427171, "grad_norm": 0.2890679709946553, "learning_rate": 5.933333333333335e-06, "loss": 0.2921, "step": 446 }, { "epoch": 0.03575928481430372, "grad_norm": 0.3712738466813804, "learning_rate": 5.946666666666668e-06, "loss": 0.2905, "step": 447 }, { "epoch": 0.035839283214335715, "grad_norm": 0.27790778029213054, "learning_rate": 5.9600000000000005e-06, "loss": 0.3553, "step": 448 }, { "epoch": 0.035919281614367714, "grad_norm": 0.24502036618265147, "learning_rate": 5.973333333333334e-06, "loss": 0.3341, "step": 449 }, { "epoch": 0.03599928001439971, "grad_norm": 0.27160629781154755, "learning_rate": 5.986666666666667e-06, "loss": 0.3496, "step": 450 }, { "epoch": 0.03607927841443171, "grad_norm": 0.33303463131843214, "learning_rate": 6e-06, "loss": 0.3185, "step": 451 }, { "epoch": 0.03615927681446371, "grad_norm": 0.3316125161555937, "learning_rate": 6.013333333333335e-06, "loss": 0.3098, "step": 452 }, { "epoch": 0.03623927521449571, "grad_norm": 0.3397618672938498, "learning_rate": 6.026666666666668e-06, "loss": 0.2824, "step": 453 }, { "epoch": 0.03631927361452771, "grad_norm": 0.33597321735886654, "learning_rate": 6.040000000000001e-06, "loss": 0.3003, "step": 454 }, { "epoch": 0.03639927201455971, "grad_norm": 0.3125718610236431, "learning_rate": 6.0533333333333335e-06, "loss": 0.2748, "step": 455 }, { "epoch": 0.03647927041459171, "grad_norm": 0.2870651408313308, "learning_rate": 6.066666666666667e-06, "loss": 0.3474, "step": 456 }, { "epoch": 0.03655926881462371, "grad_norm": 0.33301081350451345, "learning_rate": 6.08e-06, "loss": 0.274, "step": 457 }, { "epoch": 0.036639267214655706, "grad_norm": 0.3354120476704053, "learning_rate": 6.093333333333333e-06, "loss": 0.3103, "step": 458 }, { "epoch": 0.036719265614687704, "grad_norm": 0.30463034017489843, "learning_rate": 6.106666666666668e-06, "loss": 0.3112, "step": 459 }, { "epoch": 0.0367992640147197, "grad_norm": 0.4397832576246099, "learning_rate": 6.120000000000001e-06, "loss": 0.2616, "step": 460 }, { "epoch": 0.03687926241475171, "grad_norm": 0.3266197113937444, "learning_rate": 6.133333333333334e-06, "loss": 0.3131, "step": 461 }, { "epoch": 0.036959260814783707, "grad_norm": 0.33484207703049224, "learning_rate": 6.146666666666667e-06, "loss": 0.3084, "step": 462 }, { "epoch": 0.037039259214815705, "grad_norm": 0.29669930459354066, "learning_rate": 6.16e-06, "loss": 0.3237, "step": 463 }, { "epoch": 0.0371192576148477, "grad_norm": 0.2974988619046092, "learning_rate": 6.173333333333333e-06, "loss": 0.314, "step": 464 }, { "epoch": 0.0371992560148797, "grad_norm": 0.2705470479205864, "learning_rate": 6.186666666666668e-06, "loss": 0.3229, "step": 465 }, { "epoch": 0.0372792544149117, "grad_norm": 0.29645599925760746, "learning_rate": 6.200000000000001e-06, "loss": 0.2933, "step": 466 }, { "epoch": 0.0373592528149437, "grad_norm": 0.37792065493147536, "learning_rate": 6.213333333333334e-06, "loss": 0.2998, "step": 467 }, { "epoch": 0.037439251214975704, "grad_norm": 0.31915362480869564, "learning_rate": 6.2266666666666675e-06, "loss": 0.2722, "step": 468 }, { "epoch": 0.0375192496150077, "grad_norm": 0.3349686116525976, "learning_rate": 6.24e-06, "loss": 0.2854, "step": 469 }, { "epoch": 0.0375992480150397, "grad_norm": 0.31042995091475517, "learning_rate": 6.253333333333333e-06, "loss": 0.2925, "step": 470 }, { "epoch": 0.0376792464150717, "grad_norm": 0.2588527472137994, "learning_rate": 6.266666666666668e-06, "loss": 0.3326, "step": 471 }, { "epoch": 0.0377592448151037, "grad_norm": 0.4097898202167709, "learning_rate": 6.280000000000001e-06, "loss": 0.2951, "step": 472 }, { "epoch": 0.037839243215135696, "grad_norm": 0.31931766298662156, "learning_rate": 6.293333333333334e-06, "loss": 0.3149, "step": 473 }, { "epoch": 0.037919241615167694, "grad_norm": 0.28354492178943036, "learning_rate": 6.3066666666666676e-06, "loss": 0.2982, "step": 474 }, { "epoch": 0.0379992400151997, "grad_norm": 0.5919782257025219, "learning_rate": 6.3200000000000005e-06, "loss": 0.2767, "step": 475 }, { "epoch": 0.0380792384152317, "grad_norm": 0.3497273734790228, "learning_rate": 6.333333333333333e-06, "loss": 0.3049, "step": 476 }, { "epoch": 0.038159236815263696, "grad_norm": 0.33520106800548843, "learning_rate": 6.346666666666668e-06, "loss": 0.2928, "step": 477 }, { "epoch": 0.038239235215295694, "grad_norm": 0.2997709642492014, "learning_rate": 6.360000000000001e-06, "loss": 0.3049, "step": 478 }, { "epoch": 0.03831923361532769, "grad_norm": 0.3176268010674167, "learning_rate": 6.373333333333334e-06, "loss": 0.3095, "step": 479 }, { "epoch": 0.03839923201535969, "grad_norm": 0.35906242550966316, "learning_rate": 6.386666666666668e-06, "loss": 0.3206, "step": 480 }, { "epoch": 0.03847923041539169, "grad_norm": 0.33384146928757163, "learning_rate": 6.4000000000000006e-06, "loss": 0.3095, "step": 481 }, { "epoch": 0.038559228815423695, "grad_norm": 0.2982741594194618, "learning_rate": 6.4133333333333335e-06, "loss": 0.31, "step": 482 }, { "epoch": 0.03863922721545569, "grad_norm": 0.3019238865273718, "learning_rate": 6.426666666666668e-06, "loss": 0.3168, "step": 483 }, { "epoch": 0.03871922561548769, "grad_norm": 0.3541025012123874, "learning_rate": 6.440000000000001e-06, "loss": 0.293, "step": 484 }, { "epoch": 0.03879922401551969, "grad_norm": 0.2921419503259735, "learning_rate": 6.453333333333334e-06, "loss": 0.3191, "step": 485 }, { "epoch": 0.03887922241555169, "grad_norm": 0.33533179895737614, "learning_rate": 6.466666666666667e-06, "loss": 0.3178, "step": 486 }, { "epoch": 0.03895922081558369, "grad_norm": 0.303441875254397, "learning_rate": 6.480000000000001e-06, "loss": 0.2938, "step": 487 }, { "epoch": 0.039039219215615685, "grad_norm": 0.41029584738632285, "learning_rate": 6.4933333333333336e-06, "loss": 0.3036, "step": 488 }, { "epoch": 0.03911921761564769, "grad_norm": 0.43390006219856647, "learning_rate": 6.5066666666666665e-06, "loss": 0.3036, "step": 489 }, { "epoch": 0.03919921601567969, "grad_norm": 0.2962197281284441, "learning_rate": 6.520000000000001e-06, "loss": 0.317, "step": 490 }, { "epoch": 0.03927921441571169, "grad_norm": 0.3029760826691946, "learning_rate": 6.533333333333334e-06, "loss": 0.3189, "step": 491 }, { "epoch": 0.039359212815743685, "grad_norm": 0.3043303861548939, "learning_rate": 6.546666666666667e-06, "loss": 0.318, "step": 492 }, { "epoch": 0.039439211215775684, "grad_norm": 0.2607504640285908, "learning_rate": 6.560000000000001e-06, "loss": 0.3419, "step": 493 }, { "epoch": 0.03951920961580768, "grad_norm": 0.3363186674243666, "learning_rate": 6.573333333333334e-06, "loss": 0.3118, "step": 494 }, { "epoch": 0.03959920801583968, "grad_norm": 0.28470788566264854, "learning_rate": 6.5866666666666666e-06, "loss": 0.3438, "step": 495 }, { "epoch": 0.039679206415871686, "grad_norm": 0.346026521635282, "learning_rate": 6.600000000000001e-06, "loss": 0.2824, "step": 496 }, { "epoch": 0.039759204815903684, "grad_norm": 0.34707869461125024, "learning_rate": 6.613333333333334e-06, "loss": 0.2756, "step": 497 }, { "epoch": 0.03983920321593568, "grad_norm": 0.39467823751709674, "learning_rate": 6.626666666666667e-06, "loss": 0.3012, "step": 498 }, { "epoch": 0.03991920161596768, "grad_norm": 0.3358019278275616, "learning_rate": 6.640000000000001e-06, "loss": 0.3089, "step": 499 }, { "epoch": 0.03999920001599968, "grad_norm": 0.24376858347215066, "learning_rate": 6.653333333333334e-06, "loss": 0.365, "step": 500 }, { "epoch": 0.04007919841603168, "grad_norm": 0.3046401025688762, "learning_rate": 6.666666666666667e-06, "loss": 0.331, "step": 501 }, { "epoch": 0.040159196816063676, "grad_norm": 0.2693221662620079, "learning_rate": 6.680000000000001e-06, "loss": 0.3293, "step": 502 }, { "epoch": 0.04023919521609568, "grad_norm": 0.25247424245155686, "learning_rate": 6.693333333333334e-06, "loss": 0.333, "step": 503 }, { "epoch": 0.04031919361612768, "grad_norm": 0.3169724396118525, "learning_rate": 6.706666666666667e-06, "loss": 0.3187, "step": 504 }, { "epoch": 0.04039919201615968, "grad_norm": 0.5881998972475462, "learning_rate": 6.720000000000001e-06, "loss": 0.2752, "step": 505 }, { "epoch": 0.04047919041619168, "grad_norm": 0.3002963988601855, "learning_rate": 6.733333333333334e-06, "loss": 0.2935, "step": 506 }, { "epoch": 0.040559188816223675, "grad_norm": 0.29552095066249523, "learning_rate": 6.746666666666667e-06, "loss": 0.3152, "step": 507 }, { "epoch": 0.04063918721625567, "grad_norm": 0.2537159447710243, "learning_rate": 6.760000000000001e-06, "loss": 0.3399, "step": 508 }, { "epoch": 0.04071918561628767, "grad_norm": 0.33234160937933743, "learning_rate": 6.773333333333334e-06, "loss": 0.2989, "step": 509 }, { "epoch": 0.04079918401631968, "grad_norm": 0.32268573186034016, "learning_rate": 6.786666666666667e-06, "loss": 0.2855, "step": 510 }, { "epoch": 0.040879182416351675, "grad_norm": 0.30733727664969035, "learning_rate": 6.800000000000001e-06, "loss": 0.3271, "step": 511 }, { "epoch": 0.040959180816383674, "grad_norm": 0.2888758745237012, "learning_rate": 6.813333333333334e-06, "loss": 0.3099, "step": 512 }, { "epoch": 0.04103917921641567, "grad_norm": 0.258533115813079, "learning_rate": 6.826666666666667e-06, "loss": 0.3634, "step": 513 }, { "epoch": 0.04111917761644767, "grad_norm": 0.32797077931041857, "learning_rate": 6.8400000000000014e-06, "loss": 0.2737, "step": 514 }, { "epoch": 0.04119917601647967, "grad_norm": 0.33155121183837055, "learning_rate": 6.853333333333334e-06, "loss": 0.2865, "step": 515 }, { "epoch": 0.04127917441651167, "grad_norm": 0.3108357589344553, "learning_rate": 6.866666666666667e-06, "loss": 0.3167, "step": 516 }, { "epoch": 0.04135917281654367, "grad_norm": 0.24991574927473204, "learning_rate": 6.88e-06, "loss": 0.328, "step": 517 }, { "epoch": 0.04143917121657567, "grad_norm": 0.43485053216596936, "learning_rate": 6.893333333333334e-06, "loss": 0.2946, "step": 518 }, { "epoch": 0.04151916961660767, "grad_norm": 0.8428664264426944, "learning_rate": 6.906666666666667e-06, "loss": 0.3041, "step": 519 }, { "epoch": 0.04159916801663967, "grad_norm": 0.4140750267696265, "learning_rate": 6.92e-06, "loss": 0.2796, "step": 520 }, { "epoch": 0.041679166416671666, "grad_norm": 0.28133627665652994, "learning_rate": 6.9333333333333344e-06, "loss": 0.3108, "step": 521 }, { "epoch": 0.041759164816703664, "grad_norm": 0.29082274810357817, "learning_rate": 6.946666666666667e-06, "loss": 0.3402, "step": 522 }, { "epoch": 0.04183916321673566, "grad_norm": 0.3733187336059807, "learning_rate": 6.96e-06, "loss": 0.2879, "step": 523 }, { "epoch": 0.04191916161676767, "grad_norm": 0.318706739739197, "learning_rate": 6.973333333333334e-06, "loss": 0.308, "step": 524 }, { "epoch": 0.041999160016799666, "grad_norm": 0.2832623430930091, "learning_rate": 6.986666666666667e-06, "loss": 0.2935, "step": 525 }, { "epoch": 0.042079158416831665, "grad_norm": 0.3362264764755571, "learning_rate": 7e-06, "loss": 0.2954, "step": 526 }, { "epoch": 0.04215915681686366, "grad_norm": 0.314784032088021, "learning_rate": 7.0133333333333345e-06, "loss": 0.3209, "step": 527 }, { "epoch": 0.04223915521689566, "grad_norm": 0.28819551985251074, "learning_rate": 7.0266666666666674e-06, "loss": 0.3094, "step": 528 }, { "epoch": 0.04231915361692766, "grad_norm": 0.29946809520844103, "learning_rate": 7.04e-06, "loss": 0.3222, "step": 529 }, { "epoch": 0.04239915201695966, "grad_norm": 0.2786109400664497, "learning_rate": 7.053333333333334e-06, "loss": 0.3049, "step": 530 }, { "epoch": 0.04247915041699166, "grad_norm": 0.3197022273747613, "learning_rate": 7.066666666666667e-06, "loss": 0.298, "step": 531 }, { "epoch": 0.04255914881702366, "grad_norm": 0.33746979463956367, "learning_rate": 7.08e-06, "loss": 0.2831, "step": 532 }, { "epoch": 0.04263914721705566, "grad_norm": 0.31856036505044, "learning_rate": 7.093333333333335e-06, "loss": 0.309, "step": 533 }, { "epoch": 0.04271914561708766, "grad_norm": 0.3660656461056129, "learning_rate": 7.1066666666666675e-06, "loss": 0.3041, "step": 534 }, { "epoch": 0.04279914401711966, "grad_norm": 0.2954128931866404, "learning_rate": 7.1200000000000004e-06, "loss": 0.328, "step": 535 }, { "epoch": 0.042879142417151656, "grad_norm": 0.3363128939125697, "learning_rate": 7.133333333333334e-06, "loss": 0.3146, "step": 536 }, { "epoch": 0.042959140817183654, "grad_norm": 0.29953174973848057, "learning_rate": 7.146666666666667e-06, "loss": 0.3333, "step": 537 }, { "epoch": 0.04303913921721565, "grad_norm": 0.36975823450622436, "learning_rate": 7.16e-06, "loss": 0.2887, "step": 538 }, { "epoch": 0.04311913761724766, "grad_norm": 0.32247062717335323, "learning_rate": 7.173333333333335e-06, "loss": 0.2782, "step": 539 }, { "epoch": 0.043199136017279656, "grad_norm": 0.2786142635386788, "learning_rate": 7.186666666666668e-06, "loss": 0.3361, "step": 540 }, { "epoch": 0.043279134417311654, "grad_norm": 0.27537515037340293, "learning_rate": 7.2000000000000005e-06, "loss": 0.3135, "step": 541 }, { "epoch": 0.04335913281734365, "grad_norm": 0.3589920861126592, "learning_rate": 7.213333333333334e-06, "loss": 0.2829, "step": 542 }, { "epoch": 0.04343913121737565, "grad_norm": 0.29938013437954697, "learning_rate": 7.226666666666667e-06, "loss": 0.2973, "step": 543 }, { "epoch": 0.04351912961740765, "grad_norm": 0.30316694415023465, "learning_rate": 7.24e-06, "loss": 0.3121, "step": 544 }, { "epoch": 0.04359912801743965, "grad_norm": 0.31917214871252914, "learning_rate": 7.253333333333335e-06, "loss": 0.3232, "step": 545 }, { "epoch": 0.04367912641747165, "grad_norm": 0.35032567603107734, "learning_rate": 7.266666666666668e-06, "loss": 0.287, "step": 546 }, { "epoch": 0.04375912481750365, "grad_norm": 0.40656293117119846, "learning_rate": 7.280000000000001e-06, "loss": 0.2785, "step": 547 }, { "epoch": 0.04383912321753565, "grad_norm": 0.3141356092079716, "learning_rate": 7.2933333333333335e-06, "loss": 0.3143, "step": 548 }, { "epoch": 0.04391912161756765, "grad_norm": 0.7744596350911961, "learning_rate": 7.306666666666667e-06, "loss": 0.2694, "step": 549 }, { "epoch": 0.04399912001759965, "grad_norm": 0.31971476112755937, "learning_rate": 7.32e-06, "loss": 0.2868, "step": 550 }, { "epoch": 0.044079118417631645, "grad_norm": 0.28496312422337494, "learning_rate": 7.333333333333333e-06, "loss": 0.3111, "step": 551 }, { "epoch": 0.04415911681766364, "grad_norm": 0.3567713398018572, "learning_rate": 7.346666666666668e-06, "loss": 0.2888, "step": 552 }, { "epoch": 0.04423911521769565, "grad_norm": 0.29427192379710804, "learning_rate": 7.360000000000001e-06, "loss": 0.3384, "step": 553 }, { "epoch": 0.04431911361772765, "grad_norm": 0.24743843822184564, "learning_rate": 7.373333333333334e-06, "loss": 0.3318, "step": 554 }, { "epoch": 0.044399112017759645, "grad_norm": 0.358752055188852, "learning_rate": 7.386666666666667e-06, "loss": 0.2932, "step": 555 }, { "epoch": 0.044479110417791644, "grad_norm": 0.3206421976582925, "learning_rate": 7.4e-06, "loss": 0.3094, "step": 556 }, { "epoch": 0.04455910881782364, "grad_norm": 0.26761742225044916, "learning_rate": 7.413333333333333e-06, "loss": 0.3399, "step": 557 }, { "epoch": 0.04463910721785564, "grad_norm": 0.27064350330456893, "learning_rate": 7.426666666666668e-06, "loss": 0.3345, "step": 558 }, { "epoch": 0.04471910561788764, "grad_norm": 0.29101192174270596, "learning_rate": 7.440000000000001e-06, "loss": 0.3305, "step": 559 }, { "epoch": 0.044799104017919644, "grad_norm": 0.3917772721807302, "learning_rate": 7.453333333333334e-06, "loss": 0.2923, "step": 560 }, { "epoch": 0.04487910241795164, "grad_norm": 0.3620528877940992, "learning_rate": 7.4666666666666675e-06, "loss": 0.311, "step": 561 }, { "epoch": 0.04495910081798364, "grad_norm": 0.3230945435774983, "learning_rate": 7.48e-06, "loss": 0.3032, "step": 562 }, { "epoch": 0.04503909921801564, "grad_norm": 0.350005362355763, "learning_rate": 7.493333333333333e-06, "loss": 0.3022, "step": 563 }, { "epoch": 0.04511909761804764, "grad_norm": 0.2958772164710638, "learning_rate": 7.506666666666668e-06, "loss": 0.2971, "step": 564 }, { "epoch": 0.045199096018079636, "grad_norm": 0.3247961180824908, "learning_rate": 7.520000000000001e-06, "loss": 0.3245, "step": 565 }, { "epoch": 0.045279094418111634, "grad_norm": 0.2694177380329182, "learning_rate": 7.533333333333334e-06, "loss": 0.2914, "step": 566 }, { "epoch": 0.04535909281814364, "grad_norm": 0.2518577316045925, "learning_rate": 7.5466666666666675e-06, "loss": 0.3827, "step": 567 }, { "epoch": 0.04543909121817564, "grad_norm": 0.3502034514282393, "learning_rate": 7.5600000000000005e-06, "loss": 0.2766, "step": 568 }, { "epoch": 0.04551908961820764, "grad_norm": 0.3091282899274977, "learning_rate": 7.573333333333333e-06, "loss": 0.3095, "step": 569 }, { "epoch": 0.045599088018239635, "grad_norm": 0.2442219176585478, "learning_rate": 7.586666666666668e-06, "loss": 0.3272, "step": 570 }, { "epoch": 0.04567908641827163, "grad_norm": 0.32889910123831523, "learning_rate": 7.600000000000001e-06, "loss": 0.3205, "step": 571 }, { "epoch": 0.04575908481830363, "grad_norm": 0.25534793694505276, "learning_rate": 7.613333333333334e-06, "loss": 0.3391, "step": 572 }, { "epoch": 0.04583908321833563, "grad_norm": 0.3545177108243395, "learning_rate": 7.626666666666668e-06, "loss": 0.2839, "step": 573 }, { "epoch": 0.045919081618367635, "grad_norm": 0.32473986777049285, "learning_rate": 7.640000000000001e-06, "loss": 0.2834, "step": 574 }, { "epoch": 0.045999080018399634, "grad_norm": 0.31906170470992173, "learning_rate": 7.653333333333333e-06, "loss": 0.3231, "step": 575 }, { "epoch": 0.04607907841843163, "grad_norm": 0.2687398201356283, "learning_rate": 7.666666666666667e-06, "loss": 0.3427, "step": 576 }, { "epoch": 0.04615907681846363, "grad_norm": 0.27100988341265847, "learning_rate": 7.680000000000001e-06, "loss": 0.3388, "step": 577 }, { "epoch": 0.04623907521849563, "grad_norm": 0.31030817272159894, "learning_rate": 7.693333333333333e-06, "loss": 0.3132, "step": 578 }, { "epoch": 0.04631907361852763, "grad_norm": 0.36354142939370093, "learning_rate": 7.706666666666669e-06, "loss": 0.2946, "step": 579 }, { "epoch": 0.046399072018559626, "grad_norm": 0.3426449246017012, "learning_rate": 7.72e-06, "loss": 0.3292, "step": 580 }, { "epoch": 0.04647907041859163, "grad_norm": 0.3088370390840374, "learning_rate": 7.733333333333334e-06, "loss": 0.3173, "step": 581 }, { "epoch": 0.04655906881862363, "grad_norm": 0.28759815818156115, "learning_rate": 7.746666666666666e-06, "loss": 0.3194, "step": 582 }, { "epoch": 0.04663906721865563, "grad_norm": 0.344707868764643, "learning_rate": 7.76e-06, "loss": 0.2831, "step": 583 }, { "epoch": 0.046719065618687626, "grad_norm": 0.29742355530798625, "learning_rate": 7.773333333333334e-06, "loss": 0.3244, "step": 584 }, { "epoch": 0.046799064018719624, "grad_norm": 0.2836383142386181, "learning_rate": 7.786666666666666e-06, "loss": 0.3128, "step": 585 }, { "epoch": 0.04687906241875162, "grad_norm": 0.2545638071639253, "learning_rate": 7.800000000000002e-06, "loss": 0.3374, "step": 586 }, { "epoch": 0.04695906081878362, "grad_norm": 0.3261965294333678, "learning_rate": 7.813333333333334e-06, "loss": 0.2777, "step": 587 }, { "epoch": 0.047039059218815626, "grad_norm": 0.2852167773669578, "learning_rate": 7.826666666666667e-06, "loss": 0.3279, "step": 588 }, { "epoch": 0.047119057618847625, "grad_norm": 0.3393587054160582, "learning_rate": 7.840000000000001e-06, "loss": 0.2837, "step": 589 }, { "epoch": 0.04719905601887962, "grad_norm": 0.3247099685645798, "learning_rate": 7.853333333333333e-06, "loss": 0.2864, "step": 590 }, { "epoch": 0.04727905441891162, "grad_norm": 0.2944077670255197, "learning_rate": 7.866666666666667e-06, "loss": 0.305, "step": 591 }, { "epoch": 0.04735905281894362, "grad_norm": 0.2954680839159076, "learning_rate": 7.88e-06, "loss": 0.2996, "step": 592 }, { "epoch": 0.04743905121897562, "grad_norm": 0.2985561061963635, "learning_rate": 7.893333333333335e-06, "loss": 0.3208, "step": 593 }, { "epoch": 0.04751904961900762, "grad_norm": 0.3007988746030726, "learning_rate": 7.906666666666667e-06, "loss": 0.3007, "step": 594 }, { "epoch": 0.04759904801903962, "grad_norm": 0.25398057694135, "learning_rate": 7.92e-06, "loss": 0.344, "step": 595 }, { "epoch": 0.04767904641907162, "grad_norm": 0.2679593313964034, "learning_rate": 7.933333333333334e-06, "loss": 0.3506, "step": 596 }, { "epoch": 0.04775904481910362, "grad_norm": 0.301174678656074, "learning_rate": 7.946666666666666e-06, "loss": 0.3115, "step": 597 }, { "epoch": 0.04783904321913562, "grad_norm": 0.3163709906123511, "learning_rate": 7.960000000000002e-06, "loss": 0.334, "step": 598 }, { "epoch": 0.047919041619167616, "grad_norm": 0.2835034971944169, "learning_rate": 7.973333333333334e-06, "loss": 0.3268, "step": 599 }, { "epoch": 0.047999040019199614, "grad_norm": 0.3274329488304935, "learning_rate": 7.986666666666668e-06, "loss": 0.2909, "step": 600 }, { "epoch": 0.04807903841923161, "grad_norm": 0.2991466638559298, "learning_rate": 8.000000000000001e-06, "loss": 0.333, "step": 601 }, { "epoch": 0.04815903681926362, "grad_norm": 0.24739835322229659, "learning_rate": 8.013333333333333e-06, "loss": 0.3557, "step": 602 }, { "epoch": 0.048239035219295616, "grad_norm": 0.3268090809918588, "learning_rate": 8.026666666666667e-06, "loss": 0.3015, "step": 603 }, { "epoch": 0.048319033619327614, "grad_norm": 0.2529108025233396, "learning_rate": 8.040000000000001e-06, "loss": 0.3354, "step": 604 }, { "epoch": 0.04839903201935961, "grad_norm": 0.28297194060246894, "learning_rate": 8.053333333333335e-06, "loss": 0.3263, "step": 605 }, { "epoch": 0.04847903041939161, "grad_norm": 0.27272261674002124, "learning_rate": 8.066666666666667e-06, "loss": 0.3036, "step": 606 }, { "epoch": 0.04855902881942361, "grad_norm": 0.2910811610239878, "learning_rate": 8.08e-06, "loss": 0.3054, "step": 607 }, { "epoch": 0.04863902721945561, "grad_norm": 0.3253114566305926, "learning_rate": 8.093333333333334e-06, "loss": 0.3206, "step": 608 }, { "epoch": 0.04871902561948761, "grad_norm": 0.3152132203792218, "learning_rate": 8.106666666666666e-06, "loss": 0.2833, "step": 609 }, { "epoch": 0.04879902401951961, "grad_norm": 0.30025638241841135, "learning_rate": 8.120000000000002e-06, "loss": 0.324, "step": 610 }, { "epoch": 0.04887902241955161, "grad_norm": 0.28742975904796864, "learning_rate": 8.133333333333334e-06, "loss": 0.3175, "step": 611 }, { "epoch": 0.04895902081958361, "grad_norm": 0.3359495630777932, "learning_rate": 8.146666666666668e-06, "loss": 0.2843, "step": 612 }, { "epoch": 0.04903901921961561, "grad_norm": 0.26493287194458254, "learning_rate": 8.16e-06, "loss": 0.3458, "step": 613 }, { "epoch": 0.049119017619647605, "grad_norm": 0.3408797805899529, "learning_rate": 8.173333333333334e-06, "loss": 0.2916, "step": 614 }, { "epoch": 0.0491990160196796, "grad_norm": 0.33809001556753493, "learning_rate": 8.186666666666667e-06, "loss": 0.3062, "step": 615 }, { "epoch": 0.04927901441971161, "grad_norm": 0.2845092295614473, "learning_rate": 8.2e-06, "loss": 0.3206, "step": 616 }, { "epoch": 0.04935901281974361, "grad_norm": 0.25195126164698894, "learning_rate": 8.213333333333335e-06, "loss": 0.3354, "step": 617 }, { "epoch": 0.049439011219775605, "grad_norm": 0.3394270003618235, "learning_rate": 8.226666666666667e-06, "loss": 0.3106, "step": 618 }, { "epoch": 0.049519009619807604, "grad_norm": 0.2935637552361525, "learning_rate": 8.24e-06, "loss": 0.29, "step": 619 }, { "epoch": 0.0495990080198396, "grad_norm": 0.26339841567707817, "learning_rate": 8.253333333333334e-06, "loss": 0.3446, "step": 620 }, { "epoch": 0.0496790064198716, "grad_norm": 0.3438598627089339, "learning_rate": 8.266666666666667e-06, "loss": 0.2687, "step": 621 }, { "epoch": 0.0497590048199036, "grad_norm": 0.31207737407635, "learning_rate": 8.28e-06, "loss": 0.2871, "step": 622 }, { "epoch": 0.049839003219935604, "grad_norm": 0.31511180418472057, "learning_rate": 8.293333333333334e-06, "loss": 0.3274, "step": 623 }, { "epoch": 0.0499190016199676, "grad_norm": 0.29772904984139775, "learning_rate": 8.306666666666668e-06, "loss": 0.3299, "step": 624 }, { "epoch": 0.0499990000199996, "grad_norm": 0.33777227246528174, "learning_rate": 8.32e-06, "loss": 0.3214, "step": 625 }, { "epoch": 0.0500789984200316, "grad_norm": 0.3364393345256382, "learning_rate": 8.333333333333334e-06, "loss": 0.308, "step": 626 }, { "epoch": 0.0501589968200636, "grad_norm": 0.34732079135653326, "learning_rate": 8.346666666666668e-06, "loss": 0.2888, "step": 627 }, { "epoch": 0.050238995220095596, "grad_norm": 0.2930826081982636, "learning_rate": 8.36e-06, "loss": 0.3033, "step": 628 }, { "epoch": 0.050318993620127594, "grad_norm": 0.25469903467158445, "learning_rate": 8.373333333333335e-06, "loss": 0.3325, "step": 629 }, { "epoch": 0.0503989920201596, "grad_norm": 0.2842973464502169, "learning_rate": 8.386666666666667e-06, "loss": 0.3279, "step": 630 }, { "epoch": 0.0504789904201916, "grad_norm": 0.22195792662400626, "learning_rate": 8.400000000000001e-06, "loss": 0.3773, "step": 631 }, { "epoch": 0.050558988820223597, "grad_norm": 0.27740979336171634, "learning_rate": 8.413333333333335e-06, "loss": 0.3384, "step": 632 }, { "epoch": 0.050638987220255595, "grad_norm": 0.3732154224552938, "learning_rate": 8.426666666666667e-06, "loss": 0.2965, "step": 633 }, { "epoch": 0.05071898562028759, "grad_norm": 0.28266568420922794, "learning_rate": 8.44e-06, "loss": 0.3302, "step": 634 }, { "epoch": 0.05079898402031959, "grad_norm": 0.27510971592016004, "learning_rate": 8.453333333333334e-06, "loss": 0.3511, "step": 635 }, { "epoch": 0.05087898242035159, "grad_norm": 0.28020366293303306, "learning_rate": 8.466666666666668e-06, "loss": 0.3422, "step": 636 }, { "epoch": 0.050958980820383595, "grad_norm": 0.8196942413526801, "learning_rate": 8.48e-06, "loss": 0.3521, "step": 637 }, { "epoch": 0.051038979220415594, "grad_norm": 0.36368109926784503, "learning_rate": 8.493333333333334e-06, "loss": 0.3071, "step": 638 }, { "epoch": 0.05111897762044759, "grad_norm": 0.35538100915584425, "learning_rate": 8.506666666666668e-06, "loss": 0.3404, "step": 639 }, { "epoch": 0.05119897602047959, "grad_norm": 0.3619070679729844, "learning_rate": 8.52e-06, "loss": 0.2795, "step": 640 }, { "epoch": 0.05127897442051159, "grad_norm": 0.31923621030184124, "learning_rate": 8.533333333333335e-06, "loss": 0.3077, "step": 641 }, { "epoch": 0.05135897282054359, "grad_norm": 0.41536988325033963, "learning_rate": 8.546666666666667e-06, "loss": 0.2871, "step": 642 }, { "epoch": 0.051438971220575586, "grad_norm": 0.29831981671162405, "learning_rate": 8.560000000000001e-06, "loss": 0.3182, "step": 643 }, { "epoch": 0.05151896962060759, "grad_norm": 0.3253842821903802, "learning_rate": 8.573333333333333e-06, "loss": 0.29, "step": 644 }, { "epoch": 0.05159896802063959, "grad_norm": 0.3124173721292992, "learning_rate": 8.586666666666667e-06, "loss": 0.3298, "step": 645 }, { "epoch": 0.05167896642067159, "grad_norm": 0.29722686222543593, "learning_rate": 8.6e-06, "loss": 0.3067, "step": 646 }, { "epoch": 0.051758964820703586, "grad_norm": 0.2283915129641728, "learning_rate": 8.613333333333333e-06, "loss": 0.368, "step": 647 }, { "epoch": 0.051838963220735584, "grad_norm": 0.340931476791012, "learning_rate": 8.626666666666668e-06, "loss": 0.3222, "step": 648 }, { "epoch": 0.05191896162076758, "grad_norm": 0.3005000002803097, "learning_rate": 8.64e-06, "loss": 0.362, "step": 649 }, { "epoch": 0.05199896002079958, "grad_norm": 0.2879090524411462, "learning_rate": 8.653333333333334e-06, "loss": 0.3257, "step": 650 }, { "epoch": 0.052078958420831586, "grad_norm": 0.256151410774499, "learning_rate": 8.666666666666668e-06, "loss": 0.3463, "step": 651 }, { "epoch": 0.052158956820863585, "grad_norm": 0.35244110018552693, "learning_rate": 8.68e-06, "loss": 0.3078, "step": 652 }, { "epoch": 0.05223895522089558, "grad_norm": 0.29949410599902526, "learning_rate": 8.693333333333334e-06, "loss": 0.3236, "step": 653 }, { "epoch": 0.05231895362092758, "grad_norm": 0.31571726448551496, "learning_rate": 8.706666666666667e-06, "loss": 0.3203, "step": 654 }, { "epoch": 0.05239895202095958, "grad_norm": 0.3009203092976193, "learning_rate": 8.720000000000001e-06, "loss": 0.3388, "step": 655 }, { "epoch": 0.05247895042099158, "grad_norm": 0.27707513386666965, "learning_rate": 8.733333333333333e-06, "loss": 0.3357, "step": 656 }, { "epoch": 0.05255894882102358, "grad_norm": 0.3327018041628904, "learning_rate": 8.746666666666667e-06, "loss": 0.2939, "step": 657 }, { "epoch": 0.05263894722105558, "grad_norm": 0.31569376380067976, "learning_rate": 8.76e-06, "loss": 0.2864, "step": 658 }, { "epoch": 0.05271894562108758, "grad_norm": 0.3372875049163226, "learning_rate": 8.773333333333333e-06, "loss": 0.2851, "step": 659 }, { "epoch": 0.05279894402111958, "grad_norm": 0.35105142744664414, "learning_rate": 8.786666666666668e-06, "loss": 0.3019, "step": 660 }, { "epoch": 0.05287894242115158, "grad_norm": 0.2992783020773157, "learning_rate": 8.8e-06, "loss": 0.3409, "step": 661 }, { "epoch": 0.052958940821183575, "grad_norm": 0.29530772009626577, "learning_rate": 8.813333333333334e-06, "loss": 0.339, "step": 662 }, { "epoch": 0.053038939221215574, "grad_norm": 0.34242968981634675, "learning_rate": 8.826666666666668e-06, "loss": 0.3031, "step": 663 }, { "epoch": 0.05311893762124757, "grad_norm": 0.3276982950077085, "learning_rate": 8.84e-06, "loss": 0.3344, "step": 664 }, { "epoch": 0.05319893602127958, "grad_norm": 0.25124228591032477, "learning_rate": 8.853333333333334e-06, "loss": 0.345, "step": 665 }, { "epoch": 0.053278934421311576, "grad_norm": 0.2707954693498505, "learning_rate": 8.866666666666668e-06, "loss": 0.3589, "step": 666 }, { "epoch": 0.053358932821343574, "grad_norm": 0.27919039500251086, "learning_rate": 8.880000000000001e-06, "loss": 0.3086, "step": 667 }, { "epoch": 0.05343893122137557, "grad_norm": 0.3299186580410715, "learning_rate": 8.893333333333333e-06, "loss": 0.3082, "step": 668 }, { "epoch": 0.05351892962140757, "grad_norm": 0.41990335225219644, "learning_rate": 8.906666666666667e-06, "loss": 0.3102, "step": 669 }, { "epoch": 0.05359892802143957, "grad_norm": 0.28599965643564496, "learning_rate": 8.920000000000001e-06, "loss": 0.3164, "step": 670 }, { "epoch": 0.05367892642147157, "grad_norm": 0.34808391734402944, "learning_rate": 8.933333333333333e-06, "loss": 0.3217, "step": 671 }, { "epoch": 0.05375892482150357, "grad_norm": 0.254439022609339, "learning_rate": 8.946666666666669e-06, "loss": 0.322, "step": 672 }, { "epoch": 0.05383892322153557, "grad_norm": 0.37020766567084756, "learning_rate": 8.96e-06, "loss": 0.2955, "step": 673 }, { "epoch": 0.05391892162156757, "grad_norm": 0.2849948791622441, "learning_rate": 8.973333333333334e-06, "loss": 0.3409, "step": 674 }, { "epoch": 0.05399892002159957, "grad_norm": 0.2507800065229427, "learning_rate": 8.986666666666666e-06, "loss": 0.3189, "step": 675 }, { "epoch": 0.05407891842163157, "grad_norm": 0.3572472676386684, "learning_rate": 9e-06, "loss": 0.2764, "step": 676 }, { "epoch": 0.054158916821663565, "grad_norm": 0.2720468222677146, "learning_rate": 9.013333333333334e-06, "loss": 0.3474, "step": 677 }, { "epoch": 0.05423891522169556, "grad_norm": 0.3092085221001216, "learning_rate": 9.026666666666666e-06, "loss": 0.3042, "step": 678 }, { "epoch": 0.05431891362172757, "grad_norm": 0.5220723866063631, "learning_rate": 9.040000000000002e-06, "loss": 0.3025, "step": 679 }, { "epoch": 0.05439891202175957, "grad_norm": 0.2952834986414631, "learning_rate": 9.053333333333334e-06, "loss": 0.3322, "step": 680 }, { "epoch": 0.054478910421791565, "grad_norm": 0.28083119974941356, "learning_rate": 9.066666666666667e-06, "loss": 0.2991, "step": 681 }, { "epoch": 0.054558908821823564, "grad_norm": 0.33732197148992027, "learning_rate": 9.080000000000001e-06, "loss": 0.2905, "step": 682 }, { "epoch": 0.05463890722185556, "grad_norm": 0.3984458691919837, "learning_rate": 9.093333333333333e-06, "loss": 0.2884, "step": 683 }, { "epoch": 0.05471890562188756, "grad_norm": 0.2510279050525228, "learning_rate": 9.106666666666667e-06, "loss": 0.3415, "step": 684 }, { "epoch": 0.05479890402191956, "grad_norm": 0.32958914924248883, "learning_rate": 9.12e-06, "loss": 0.283, "step": 685 }, { "epoch": 0.054878902421951564, "grad_norm": 0.30974303957340654, "learning_rate": 9.133333333333335e-06, "loss": 0.3015, "step": 686 }, { "epoch": 0.05495890082198356, "grad_norm": 0.34455977206231336, "learning_rate": 9.146666666666667e-06, "loss": 0.2768, "step": 687 }, { "epoch": 0.05503889922201556, "grad_norm": 0.3332914051398539, "learning_rate": 9.16e-06, "loss": 0.3043, "step": 688 }, { "epoch": 0.05511889762204756, "grad_norm": 0.25870417417674735, "learning_rate": 9.173333333333334e-06, "loss": 0.3549, "step": 689 }, { "epoch": 0.05519889602207956, "grad_norm": 0.27113924945741075, "learning_rate": 9.186666666666666e-06, "loss": 0.3326, "step": 690 }, { "epoch": 0.055278894422111556, "grad_norm": 0.35059384838302976, "learning_rate": 9.200000000000002e-06, "loss": 0.2838, "step": 691 }, { "epoch": 0.055358892822143554, "grad_norm": 1.8528370686227036, "learning_rate": 9.213333333333334e-06, "loss": 0.3087, "step": 692 }, { "epoch": 0.05543889122217556, "grad_norm": 0.4118964907754694, "learning_rate": 9.226666666666668e-06, "loss": 0.2844, "step": 693 }, { "epoch": 0.05551888962220756, "grad_norm": 0.30237810948877875, "learning_rate": 9.240000000000001e-06, "loss": 0.3092, "step": 694 }, { "epoch": 0.055598888022239557, "grad_norm": 0.3392299230775422, "learning_rate": 9.253333333333333e-06, "loss": 0.2874, "step": 695 }, { "epoch": 0.055678886422271555, "grad_norm": 0.3128477107597521, "learning_rate": 9.266666666666667e-06, "loss": 0.2906, "step": 696 }, { "epoch": 0.05575888482230355, "grad_norm": 0.307323897115656, "learning_rate": 9.280000000000001e-06, "loss": 0.3326, "step": 697 }, { "epoch": 0.05583888322233555, "grad_norm": 0.3244991064595367, "learning_rate": 9.293333333333335e-06, "loss": 0.3306, "step": 698 }, { "epoch": 0.05591888162236755, "grad_norm": 0.2926053870136065, "learning_rate": 9.306666666666667e-06, "loss": 0.3141, "step": 699 }, { "epoch": 0.055998880022399555, "grad_norm": 0.2421416783310131, "learning_rate": 9.32e-06, "loss": 0.3441, "step": 700 }, { "epoch": 0.056078878422431554, "grad_norm": 0.3690062702559569, "learning_rate": 9.333333333333334e-06, "loss": 0.3224, "step": 701 }, { "epoch": 0.05615887682246355, "grad_norm": 0.36388521151178416, "learning_rate": 9.346666666666666e-06, "loss": 0.3045, "step": 702 }, { "epoch": 0.05623887522249555, "grad_norm": 0.2527855713869621, "learning_rate": 9.360000000000002e-06, "loss": 0.3288, "step": 703 }, { "epoch": 0.05631887362252755, "grad_norm": 0.2585172517011842, "learning_rate": 9.373333333333334e-06, "loss": 0.3428, "step": 704 }, { "epoch": 0.05639887202255955, "grad_norm": 0.2604854393414402, "learning_rate": 9.386666666666668e-06, "loss": 0.3439, "step": 705 }, { "epoch": 0.056478870422591546, "grad_norm": 0.3183776494016085, "learning_rate": 9.4e-06, "loss": 0.3031, "step": 706 }, { "epoch": 0.05655886882262355, "grad_norm": 0.23453220047838064, "learning_rate": 9.413333333333334e-06, "loss": 0.3445, "step": 707 }, { "epoch": 0.05663886722265555, "grad_norm": 0.3072292931743912, "learning_rate": 9.426666666666667e-06, "loss": 0.3194, "step": 708 }, { "epoch": 0.05671886562268755, "grad_norm": 0.29486187302040784, "learning_rate": 9.440000000000001e-06, "loss": 0.3, "step": 709 }, { "epoch": 0.056798864022719546, "grad_norm": 0.30440180669587297, "learning_rate": 9.453333333333335e-06, "loss": 0.3104, "step": 710 }, { "epoch": 0.056878862422751544, "grad_norm": 0.33928836943813184, "learning_rate": 9.466666666666667e-06, "loss": 0.3202, "step": 711 }, { "epoch": 0.05695886082278354, "grad_norm": 0.32879885154079164, "learning_rate": 9.48e-06, "loss": 0.2803, "step": 712 }, { "epoch": 0.05703885922281554, "grad_norm": 0.26156127123572537, "learning_rate": 9.493333333333334e-06, "loss": 0.3397, "step": 713 }, { "epoch": 0.057118857622847546, "grad_norm": 0.2962002522808446, "learning_rate": 9.506666666666667e-06, "loss": 0.3196, "step": 714 }, { "epoch": 0.057198856022879545, "grad_norm": 0.25384069215962385, "learning_rate": 9.52e-06, "loss": 0.3138, "step": 715 }, { "epoch": 0.05727885442291154, "grad_norm": 0.350693396161863, "learning_rate": 9.533333333333334e-06, "loss": 0.2698, "step": 716 }, { "epoch": 0.05735885282294354, "grad_norm": 0.28837578069701103, "learning_rate": 9.546666666666668e-06, "loss": 0.3151, "step": 717 }, { "epoch": 0.05743885122297554, "grad_norm": 0.27555420357623844, "learning_rate": 9.56e-06, "loss": 0.2947, "step": 718 }, { "epoch": 0.05751884962300754, "grad_norm": 0.33600184027855773, "learning_rate": 9.573333333333334e-06, "loss": 0.2912, "step": 719 }, { "epoch": 0.05759884802303954, "grad_norm": 0.3131343156054618, "learning_rate": 9.586666666666667e-06, "loss": 0.2845, "step": 720 }, { "epoch": 0.05767884642307154, "grad_norm": 0.32839701241404035, "learning_rate": 9.600000000000001e-06, "loss": 0.2799, "step": 721 }, { "epoch": 0.05775884482310354, "grad_norm": 0.3212106140341321, "learning_rate": 9.613333333333335e-06, "loss": 0.3217, "step": 722 }, { "epoch": 0.05783884322313554, "grad_norm": 0.25252461776454266, "learning_rate": 9.626666666666667e-06, "loss": 0.3353, "step": 723 }, { "epoch": 0.05791884162316754, "grad_norm": 0.24037883844139904, "learning_rate": 9.640000000000001e-06, "loss": 0.3451, "step": 724 }, { "epoch": 0.057998840023199535, "grad_norm": 0.31095952895557355, "learning_rate": 9.653333333333335e-06, "loss": 0.305, "step": 725 }, { "epoch": 0.058078838423231534, "grad_norm": 0.33485933628816067, "learning_rate": 9.666666666666667e-06, "loss": 0.3334, "step": 726 }, { "epoch": 0.05815883682326353, "grad_norm": 0.31985505907968914, "learning_rate": 9.68e-06, "loss": 0.328, "step": 727 }, { "epoch": 0.05823883522329554, "grad_norm": 0.3774757991027118, "learning_rate": 9.693333333333334e-06, "loss": 0.3069, "step": 728 }, { "epoch": 0.058318833623327536, "grad_norm": 0.32543648008073467, "learning_rate": 9.706666666666668e-06, "loss": 0.3202, "step": 729 }, { "epoch": 0.058398832023359534, "grad_norm": 0.35242843957213776, "learning_rate": 9.72e-06, "loss": 0.2975, "step": 730 }, { "epoch": 0.05847883042339153, "grad_norm": 0.27290230400053705, "learning_rate": 9.733333333333334e-06, "loss": 0.3033, "step": 731 }, { "epoch": 0.05855882882342353, "grad_norm": 0.21370320862999415, "learning_rate": 9.746666666666668e-06, "loss": 0.4041, "step": 732 }, { "epoch": 0.05863882722345553, "grad_norm": 0.3063261628055901, "learning_rate": 9.760000000000001e-06, "loss": 0.2943, "step": 733 }, { "epoch": 0.05871882562348753, "grad_norm": 0.2958569286224122, "learning_rate": 9.773333333333335e-06, "loss": 0.3331, "step": 734 }, { "epoch": 0.05879882402351953, "grad_norm": 0.5851582264072526, "learning_rate": 9.786666666666667e-06, "loss": 0.2883, "step": 735 }, { "epoch": 0.05887882242355153, "grad_norm": 0.3256544060883519, "learning_rate": 9.800000000000001e-06, "loss": 0.2764, "step": 736 }, { "epoch": 0.05895882082358353, "grad_norm": 0.3329371427491858, "learning_rate": 9.813333333333333e-06, "loss": 0.2837, "step": 737 }, { "epoch": 0.05903881922361553, "grad_norm": 0.3101892312902451, "learning_rate": 9.826666666666667e-06, "loss": 0.3351, "step": 738 }, { "epoch": 0.05911881762364753, "grad_norm": 0.33160958979871064, "learning_rate": 9.84e-06, "loss": 0.272, "step": 739 }, { "epoch": 0.059198816023679525, "grad_norm": 0.3329108268349368, "learning_rate": 9.853333333333334e-06, "loss": 0.3371, "step": 740 }, { "epoch": 0.05927881442371152, "grad_norm": 0.3233304504191216, "learning_rate": 9.866666666666668e-06, "loss": 0.2868, "step": 741 }, { "epoch": 0.05935881282374352, "grad_norm": 0.30563238052838554, "learning_rate": 9.88e-06, "loss": 0.2778, "step": 742 }, { "epoch": 0.05943881122377553, "grad_norm": 0.33017617138630784, "learning_rate": 9.893333333333334e-06, "loss": 0.3032, "step": 743 }, { "epoch": 0.059518809623807525, "grad_norm": 0.34472193365708464, "learning_rate": 9.906666666666668e-06, "loss": 0.278, "step": 744 }, { "epoch": 0.059598808023839524, "grad_norm": 0.33325594383614676, "learning_rate": 9.920000000000002e-06, "loss": 0.2658, "step": 745 }, { "epoch": 0.05967880642387152, "grad_norm": 0.3160160101487113, "learning_rate": 9.933333333333334e-06, "loss": 0.317, "step": 746 }, { "epoch": 0.05975880482390352, "grad_norm": 0.2902744659795111, "learning_rate": 9.946666666666667e-06, "loss": 0.3025, "step": 747 }, { "epoch": 0.05983880322393552, "grad_norm": 0.2791714670562879, "learning_rate": 9.960000000000001e-06, "loss": 0.3458, "step": 748 }, { "epoch": 0.05991880162396752, "grad_norm": 0.33271426430696055, "learning_rate": 9.973333333333333e-06, "loss": 0.2788, "step": 749 }, { "epoch": 0.05999880002399952, "grad_norm": 0.28766011324461654, "learning_rate": 9.986666666666667e-06, "loss": 0.3159, "step": 750 }, { "epoch": 0.06007879842403152, "grad_norm": 0.3526844716665321, "learning_rate": 1e-05, "loss": 0.2866, "step": 751 }, { "epoch": 0.06015879682406352, "grad_norm": 0.34454130736303923, "learning_rate": 9.999999958041858e-06, "loss": 0.3015, "step": 752 }, { "epoch": 0.06023879522409552, "grad_norm": 0.30040506594110644, "learning_rate": 9.999999832167426e-06, "loss": 0.3348, "step": 753 }, { "epoch": 0.060318793624127516, "grad_norm": 0.304284929154205, "learning_rate": 9.99999962237671e-06, "loss": 0.3215, "step": 754 }, { "epoch": 0.060398792024159514, "grad_norm": 0.29279485871493055, "learning_rate": 9.999999328669713e-06, "loss": 0.3369, "step": 755 }, { "epoch": 0.06047879042419151, "grad_norm": 0.3386829020012594, "learning_rate": 9.999998951046439e-06, "loss": 0.2944, "step": 756 }, { "epoch": 0.06055878882422352, "grad_norm": 0.29152306993283794, "learning_rate": 9.999998489506897e-06, "loss": 0.3564, "step": 757 }, { "epoch": 0.060638787224255516, "grad_norm": 0.33479080452143656, "learning_rate": 9.999997944051089e-06, "loss": 0.2823, "step": 758 }, { "epoch": 0.060718785624287515, "grad_norm": 0.29891410542596814, "learning_rate": 9.999997314679031e-06, "loss": 0.2931, "step": 759 }, { "epoch": 0.06079878402431951, "grad_norm": 0.3065257606257566, "learning_rate": 9.99999660139073e-06, "loss": 0.285, "step": 760 }, { "epoch": 0.06087878242435151, "grad_norm": 0.3465903381136881, "learning_rate": 9.999995804186196e-06, "loss": 0.2694, "step": 761 }, { "epoch": 0.06095878082438351, "grad_norm": 0.2949167845266175, "learning_rate": 9.999994923065446e-06, "loss": 0.3105, "step": 762 }, { "epoch": 0.06103877922441551, "grad_norm": 0.35584238865904855, "learning_rate": 9.999993958028495e-06, "loss": 0.3081, "step": 763 }, { "epoch": 0.061118777624447514, "grad_norm": 0.33811528199246277, "learning_rate": 9.999992909075355e-06, "loss": 0.2744, "step": 764 }, { "epoch": 0.06119877602447951, "grad_norm": 0.28601475491219797, "learning_rate": 9.999991776206049e-06, "loss": 0.3231, "step": 765 }, { "epoch": 0.06127877442451151, "grad_norm": 0.32422075004554857, "learning_rate": 9.999990559420591e-06, "loss": 0.32, "step": 766 }, { "epoch": 0.06135877282454351, "grad_norm": 0.3026685579515361, "learning_rate": 9.999989258719005e-06, "loss": 0.3241, "step": 767 }, { "epoch": 0.06143877122457551, "grad_norm": 0.3338005490502487, "learning_rate": 9.999987874101312e-06, "loss": 0.2935, "step": 768 }, { "epoch": 0.061518769624607506, "grad_norm": 0.27249934087171196, "learning_rate": 9.999986405567535e-06, "loss": 0.3425, "step": 769 }, { "epoch": 0.061598768024639504, "grad_norm": 0.4260320079450853, "learning_rate": 9.999984853117697e-06, "loss": 0.3268, "step": 770 }, { "epoch": 0.06167876642467151, "grad_norm": 0.4692360536772432, "learning_rate": 9.999983216751826e-06, "loss": 0.303, "step": 771 }, { "epoch": 0.06175876482470351, "grad_norm": 0.5665946202088298, "learning_rate": 9.99998149646995e-06, "loss": 0.3169, "step": 772 }, { "epoch": 0.061838763224735506, "grad_norm": 0.3633678361199125, "learning_rate": 9.999979692272095e-06, "loss": 0.2867, "step": 773 }, { "epoch": 0.061918761624767504, "grad_norm": 0.27166581987426736, "learning_rate": 9.999977804158294e-06, "loss": 0.3083, "step": 774 }, { "epoch": 0.0619987600247995, "grad_norm": 0.2731532669064598, "learning_rate": 9.999975832128578e-06, "loss": 0.3336, "step": 775 }, { "epoch": 0.0620787584248315, "grad_norm": 0.28301758958492174, "learning_rate": 9.99997377618298e-06, "loss": 0.3149, "step": 776 }, { "epoch": 0.0621587568248635, "grad_norm": 0.28343763143867584, "learning_rate": 9.999971636321535e-06, "loss": 0.3325, "step": 777 }, { "epoch": 0.062238755224895505, "grad_norm": 0.34574030222504953, "learning_rate": 9.999969412544278e-06, "loss": 0.2844, "step": 778 }, { "epoch": 0.0623187536249275, "grad_norm": 0.2830608915094326, "learning_rate": 9.999967104851244e-06, "loss": 0.3435, "step": 779 }, { "epoch": 0.0623987520249595, "grad_norm": 0.3365611821912759, "learning_rate": 9.999964713242478e-06, "loss": 0.3173, "step": 780 }, { "epoch": 0.0624787504249915, "grad_norm": 0.2858577253840155, "learning_rate": 9.999962237718015e-06, "loss": 0.3093, "step": 781 }, { "epoch": 0.0625587488250235, "grad_norm": 0.4533288064383276, "learning_rate": 9.9999596782779e-06, "loss": 0.3051, "step": 782 }, { "epoch": 0.0626387472250555, "grad_norm": 0.34049192356213587, "learning_rate": 9.99995703492217e-06, "loss": 0.2879, "step": 783 }, { "epoch": 0.0627187456250875, "grad_norm": 0.3012409240608873, "learning_rate": 9.999954307650876e-06, "loss": 0.34, "step": 784 }, { "epoch": 0.0627987440251195, "grad_norm": 0.2968526314610915, "learning_rate": 9.999951496464062e-06, "loss": 0.3266, "step": 785 }, { "epoch": 0.0628787424251515, "grad_norm": 0.3210616140957018, "learning_rate": 9.999948601361773e-06, "loss": 0.3067, "step": 786 }, { "epoch": 0.06295874082518349, "grad_norm": 0.3521520582792855, "learning_rate": 9.999945622344058e-06, "loss": 0.3015, "step": 787 }, { "epoch": 0.0630387392252155, "grad_norm": 0.31400764020618854, "learning_rate": 9.99994255941097e-06, "loss": 0.2778, "step": 788 }, { "epoch": 0.0631187376252475, "grad_norm": 1.1320894505713925, "learning_rate": 9.999939412562558e-06, "loss": 0.296, "step": 789 }, { "epoch": 0.06319873602527949, "grad_norm": 0.4474003210841208, "learning_rate": 9.999936181798874e-06, "loss": 0.2894, "step": 790 }, { "epoch": 0.0632787344253115, "grad_norm": 0.33719606241734273, "learning_rate": 9.999932867119974e-06, "loss": 0.2848, "step": 791 }, { "epoch": 0.06335873282534349, "grad_norm": 0.32164119945542424, "learning_rate": 9.999929468525914e-06, "loss": 0.2966, "step": 792 }, { "epoch": 0.0634387312253755, "grad_norm": 0.35482592497914334, "learning_rate": 9.999925986016748e-06, "loss": 0.3034, "step": 793 }, { "epoch": 0.06351872962540749, "grad_norm": 0.32487914889490105, "learning_rate": 9.999922419592537e-06, "loss": 0.3028, "step": 794 }, { "epoch": 0.06359872802543949, "grad_norm": 0.3343689325929624, "learning_rate": 9.99991876925334e-06, "loss": 0.3105, "step": 795 }, { "epoch": 0.0636787264254715, "grad_norm": 0.26959506618921114, "learning_rate": 9.99991503499922e-06, "loss": 0.3778, "step": 796 }, { "epoch": 0.06375872482550349, "grad_norm": 0.2615678084045503, "learning_rate": 9.999911216830239e-06, "loss": 0.3158, "step": 797 }, { "epoch": 0.06383872322553549, "grad_norm": 0.29678374459825013, "learning_rate": 9.999907314746457e-06, "loss": 0.3116, "step": 798 }, { "epoch": 0.06391872162556748, "grad_norm": 0.3122237853890146, "learning_rate": 9.999903328747946e-06, "loss": 0.3171, "step": 799 }, { "epoch": 0.06399872002559949, "grad_norm": 0.35289122075626556, "learning_rate": 9.999899258834769e-06, "loss": 0.2967, "step": 800 }, { "epoch": 0.06407871842563148, "grad_norm": 0.3339393468164776, "learning_rate": 9.999895105006995e-06, "loss": 0.2837, "step": 801 }, { "epoch": 0.06415871682566349, "grad_norm": 0.33901067013543945, "learning_rate": 9.999890867264693e-06, "loss": 0.2929, "step": 802 }, { "epoch": 0.06423871522569549, "grad_norm": 0.49212948840384874, "learning_rate": 9.999886545607935e-06, "loss": 0.2754, "step": 803 }, { "epoch": 0.06431871362572748, "grad_norm": 0.3613725725946296, "learning_rate": 9.999882140036794e-06, "loss": 0.3092, "step": 804 }, { "epoch": 0.06439871202575949, "grad_norm": 0.3122380182663793, "learning_rate": 9.999877650551344e-06, "loss": 0.2954, "step": 805 }, { "epoch": 0.06447871042579148, "grad_norm": 0.35452566924458495, "learning_rate": 9.999873077151659e-06, "loss": 0.2986, "step": 806 }, { "epoch": 0.06455870882582349, "grad_norm": 0.32091417039466086, "learning_rate": 9.999868419837818e-06, "loss": 0.3032, "step": 807 }, { "epoch": 0.06463870722585548, "grad_norm": 0.33589821110072005, "learning_rate": 9.999863678609895e-06, "loss": 0.2753, "step": 808 }, { "epoch": 0.06471870562588748, "grad_norm": 0.3885732555710655, "learning_rate": 9.999858853467972e-06, "loss": 0.2885, "step": 809 }, { "epoch": 0.06479870402591949, "grad_norm": 0.3000688135501107, "learning_rate": 9.999853944412133e-06, "loss": 0.3396, "step": 810 }, { "epoch": 0.06487870242595148, "grad_norm": 0.31908088386782457, "learning_rate": 9.999848951442455e-06, "loss": 0.2678, "step": 811 }, { "epoch": 0.06495870082598348, "grad_norm": 0.32998759142285744, "learning_rate": 9.999843874559026e-06, "loss": 0.2746, "step": 812 }, { "epoch": 0.06503869922601548, "grad_norm": 0.28369174102258066, "learning_rate": 9.99983871376193e-06, "loss": 0.3381, "step": 813 }, { "epoch": 0.06511869762604748, "grad_norm": 0.36153579901620225, "learning_rate": 9.999833469051251e-06, "loss": 0.2793, "step": 814 }, { "epoch": 0.06519869602607947, "grad_norm": 0.304507852677711, "learning_rate": 9.999828140427082e-06, "loss": 0.3347, "step": 815 }, { "epoch": 0.06527869442611148, "grad_norm": 0.3749989558189015, "learning_rate": 9.999822727889507e-06, "loss": 0.2984, "step": 816 }, { "epoch": 0.06535869282614348, "grad_norm": 0.3097756171081361, "learning_rate": 9.99981723143862e-06, "loss": 0.3062, "step": 817 }, { "epoch": 0.06543869122617547, "grad_norm": 0.31722382003466015, "learning_rate": 9.999811651074513e-06, "loss": 0.292, "step": 818 }, { "epoch": 0.06551868962620748, "grad_norm": 0.2597025573948709, "learning_rate": 9.99980598679728e-06, "loss": 0.3213, "step": 819 }, { "epoch": 0.06559868802623947, "grad_norm": 0.21773321585146505, "learning_rate": 9.999800238607017e-06, "loss": 0.376, "step": 820 }, { "epoch": 0.06567868642627148, "grad_norm": 0.24386620927410277, "learning_rate": 9.999794406503816e-06, "loss": 0.3245, "step": 821 }, { "epoch": 0.06575868482630347, "grad_norm": 0.2836226912888357, "learning_rate": 9.99978849048778e-06, "loss": 0.3109, "step": 822 }, { "epoch": 0.06583868322633547, "grad_norm": 0.30392952183385813, "learning_rate": 9.999782490559004e-06, "loss": 0.3184, "step": 823 }, { "epoch": 0.06591868162636748, "grad_norm": 0.25329092060401626, "learning_rate": 9.999776406717592e-06, "loss": 0.3421, "step": 824 }, { "epoch": 0.06599868002639947, "grad_norm": 0.3286974482373493, "learning_rate": 9.999770238963646e-06, "loss": 0.3007, "step": 825 }, { "epoch": 0.06607867842643148, "grad_norm": 0.3357618677234781, "learning_rate": 9.999763987297266e-06, "loss": 0.2904, "step": 826 }, { "epoch": 0.06615867682646347, "grad_norm": 0.288379407718952, "learning_rate": 9.999757651718562e-06, "loss": 0.2949, "step": 827 }, { "epoch": 0.06623867522649547, "grad_norm": 0.3307194139454798, "learning_rate": 9.999751232227636e-06, "loss": 0.3177, "step": 828 }, { "epoch": 0.06631867362652746, "grad_norm": 0.2858103872043677, "learning_rate": 9.999744728824599e-06, "loss": 0.3374, "step": 829 }, { "epoch": 0.06639867202655947, "grad_norm": 0.28295822925883035, "learning_rate": 9.999738141509557e-06, "loss": 0.3543, "step": 830 }, { "epoch": 0.06647867042659147, "grad_norm": 0.3407271227598823, "learning_rate": 9.999731470282621e-06, "loss": 0.2914, "step": 831 }, { "epoch": 0.06655866882662347, "grad_norm": 0.2771287086614404, "learning_rate": 9.999724715143908e-06, "loss": 0.3129, "step": 832 }, { "epoch": 0.06663866722665547, "grad_norm": 0.28489926698097295, "learning_rate": 9.999717876093525e-06, "loss": 0.3131, "step": 833 }, { "epoch": 0.06671866562668746, "grad_norm": 0.3325025628924701, "learning_rate": 9.999710953131589e-06, "loss": 0.2799, "step": 834 }, { "epoch": 0.06679866402671947, "grad_norm": 0.30464812119369733, "learning_rate": 9.999703946258217e-06, "loss": 0.2861, "step": 835 }, { "epoch": 0.06687866242675146, "grad_norm": 0.26117995376252917, "learning_rate": 9.999696855473525e-06, "loss": 0.3587, "step": 836 }, { "epoch": 0.06695866082678346, "grad_norm": 0.30527132252110845, "learning_rate": 9.999689680777634e-06, "loss": 0.3159, "step": 837 }, { "epoch": 0.06703865922681547, "grad_norm": 0.2961056326196707, "learning_rate": 9.999682422170663e-06, "loss": 0.3187, "step": 838 }, { "epoch": 0.06711865762684746, "grad_norm": 0.316315169906044, "learning_rate": 9.999675079652736e-06, "loss": 0.3196, "step": 839 }, { "epoch": 0.06719865602687947, "grad_norm": 0.31220143140115564, "learning_rate": 9.999667653223972e-06, "loss": 0.3285, "step": 840 }, { "epoch": 0.06727865442691146, "grad_norm": 0.3268976513845286, "learning_rate": 9.9996601428845e-06, "loss": 0.2869, "step": 841 }, { "epoch": 0.06735865282694346, "grad_norm": 0.24482625318580367, "learning_rate": 9.999652548634443e-06, "loss": 0.3519, "step": 842 }, { "epoch": 0.06743865122697545, "grad_norm": 0.20743034223042126, "learning_rate": 9.99964487047393e-06, "loss": 0.3782, "step": 843 }, { "epoch": 0.06751864962700746, "grad_norm": 0.4346332051521387, "learning_rate": 9.999637108403091e-06, "loss": 0.3216, "step": 844 }, { "epoch": 0.06759864802703947, "grad_norm": 0.2880547520027332, "learning_rate": 9.999629262422053e-06, "loss": 0.3263, "step": 845 }, { "epoch": 0.06767864642707146, "grad_norm": 0.30184781898213153, "learning_rate": 9.99962133253095e-06, "loss": 0.3083, "step": 846 }, { "epoch": 0.06775864482710346, "grad_norm": 0.29343891247936804, "learning_rate": 9.999613318729915e-06, "loss": 0.3287, "step": 847 }, { "epoch": 0.06783864322713545, "grad_norm": 0.3073275633377766, "learning_rate": 9.999605221019082e-06, "loss": 0.3199, "step": 848 }, { "epoch": 0.06791864162716746, "grad_norm": 0.3560461036806395, "learning_rate": 9.999597039398586e-06, "loss": 0.3202, "step": 849 }, { "epoch": 0.06799864002719945, "grad_norm": 0.6114206388246446, "learning_rate": 9.999588773868566e-06, "loss": 0.2802, "step": 850 }, { "epoch": 0.06807863842723146, "grad_norm": 0.2923793972898796, "learning_rate": 9.99958042442916e-06, "loss": 0.3103, "step": 851 }, { "epoch": 0.06815863682726346, "grad_norm": 0.2984755197747074, "learning_rate": 9.999571991080508e-06, "loss": 0.2977, "step": 852 }, { "epoch": 0.06823863522729545, "grad_norm": 0.32157105170811334, "learning_rate": 9.999563473822752e-06, "loss": 0.3207, "step": 853 }, { "epoch": 0.06831863362732746, "grad_norm": 0.32848645814233124, "learning_rate": 9.999554872656034e-06, "loss": 0.2515, "step": 854 }, { "epoch": 0.06839863202735945, "grad_norm": 0.3585774135921888, "learning_rate": 9.9995461875805e-06, "loss": 0.3213, "step": 855 }, { "epoch": 0.06847863042739145, "grad_norm": 0.3349066780603757, "learning_rate": 9.999537418596294e-06, "loss": 0.3034, "step": 856 }, { "epoch": 0.06855862882742345, "grad_norm": 0.250654871136222, "learning_rate": 9.999528565703564e-06, "loss": 0.333, "step": 857 }, { "epoch": 0.06863862722745545, "grad_norm": 0.2797080485039279, "learning_rate": 9.99951962890246e-06, "loss": 0.3367, "step": 858 }, { "epoch": 0.06871862562748746, "grad_norm": 0.35543046820929536, "learning_rate": 9.999510608193128e-06, "loss": 0.2826, "step": 859 }, { "epoch": 0.06879862402751945, "grad_norm": 0.47537950747569424, "learning_rate": 9.999501503575723e-06, "loss": 0.3151, "step": 860 }, { "epoch": 0.06887862242755145, "grad_norm": 0.2941357899505503, "learning_rate": 9.999492315050396e-06, "loss": 0.327, "step": 861 }, { "epoch": 0.06895862082758344, "grad_norm": 0.3710929799698191, "learning_rate": 9.999483042617304e-06, "loss": 0.3071, "step": 862 }, { "epoch": 0.06903861922761545, "grad_norm": 0.3155009973514162, "learning_rate": 9.999473686276598e-06, "loss": 0.2707, "step": 863 }, { "epoch": 0.06911861762764744, "grad_norm": 0.4628012801933148, "learning_rate": 9.999464246028439e-06, "loss": 0.2817, "step": 864 }, { "epoch": 0.06919861602767945, "grad_norm": 0.32032826179603335, "learning_rate": 9.999454721872983e-06, "loss": 0.2963, "step": 865 }, { "epoch": 0.06927861442771145, "grad_norm": 0.7661699979234026, "learning_rate": 9.999445113810392e-06, "loss": 0.3102, "step": 866 }, { "epoch": 0.06935861282774344, "grad_norm": 0.321374265349741, "learning_rate": 9.999435421840826e-06, "loss": 0.3147, "step": 867 }, { "epoch": 0.06943861122777545, "grad_norm": 0.31874388243033475, "learning_rate": 9.999425645964447e-06, "loss": 0.3316, "step": 868 }, { "epoch": 0.06951860962780744, "grad_norm": 0.29860104988455355, "learning_rate": 9.99941578618142e-06, "loss": 0.319, "step": 869 }, { "epoch": 0.06959860802783945, "grad_norm": 0.30054519230834287, "learning_rate": 9.999405842491912e-06, "loss": 0.3456, "step": 870 }, { "epoch": 0.06967860642787144, "grad_norm": 0.41801898325084913, "learning_rate": 9.999395814896086e-06, "loss": 0.3022, "step": 871 }, { "epoch": 0.06975860482790344, "grad_norm": 0.3626328267978701, "learning_rate": 9.999385703394113e-06, "loss": 0.277, "step": 872 }, { "epoch": 0.06983860322793545, "grad_norm": 0.3015972166288483, "learning_rate": 9.999375507986163e-06, "loss": 0.3279, "step": 873 }, { "epoch": 0.06991860162796744, "grad_norm": 0.28319933045952733, "learning_rate": 9.999365228672404e-06, "loss": 0.3538, "step": 874 }, { "epoch": 0.06999860002799944, "grad_norm": 0.29970802715120304, "learning_rate": 9.999354865453012e-06, "loss": 0.31, "step": 875 }, { "epoch": 0.07007859842803144, "grad_norm": 0.30350342791761536, "learning_rate": 9.999344418328161e-06, "loss": 0.3384, "step": 876 }, { "epoch": 0.07015859682806344, "grad_norm": 0.31810227023067783, "learning_rate": 9.999333887298025e-06, "loss": 0.3004, "step": 877 }, { "epoch": 0.07023859522809543, "grad_norm": 0.6866076307206432, "learning_rate": 9.999323272362779e-06, "loss": 0.2921, "step": 878 }, { "epoch": 0.07031859362812744, "grad_norm": 0.29716429362144453, "learning_rate": 9.999312573522606e-06, "loss": 0.3121, "step": 879 }, { "epoch": 0.07039859202815944, "grad_norm": 0.2925023029285221, "learning_rate": 9.99930179077768e-06, "loss": 0.2964, "step": 880 }, { "epoch": 0.07047859042819143, "grad_norm": 0.29065379543897074, "learning_rate": 9.999290924128186e-06, "loss": 0.3079, "step": 881 }, { "epoch": 0.07055858882822344, "grad_norm": 0.3273714302332476, "learning_rate": 9.999279973574303e-06, "loss": 0.3218, "step": 882 }, { "epoch": 0.07063858722825543, "grad_norm": 0.28966850261552146, "learning_rate": 9.999268939116218e-06, "loss": 0.3225, "step": 883 }, { "epoch": 0.07071858562828744, "grad_norm": 0.33603343368876815, "learning_rate": 9.999257820754116e-06, "loss": 0.2906, "step": 884 }, { "epoch": 0.07079858402831943, "grad_norm": 0.22707881918763348, "learning_rate": 9.999246618488181e-06, "loss": 0.359, "step": 885 }, { "epoch": 0.07087858242835143, "grad_norm": 0.23913340314025489, "learning_rate": 9.999235332318603e-06, "loss": 0.3883, "step": 886 }, { "epoch": 0.07095858082838344, "grad_norm": 0.39838311567870205, "learning_rate": 9.99922396224557e-06, "loss": 0.3028, "step": 887 }, { "epoch": 0.07103857922841543, "grad_norm": 0.3186128109962625, "learning_rate": 9.999212508269274e-06, "loss": 0.3126, "step": 888 }, { "epoch": 0.07111857762844744, "grad_norm": 0.2825226338939764, "learning_rate": 9.999200970389909e-06, "loss": 0.315, "step": 889 }, { "epoch": 0.07119857602847943, "grad_norm": 0.31523118531663585, "learning_rate": 9.999189348607664e-06, "loss": 0.2787, "step": 890 }, { "epoch": 0.07127857442851143, "grad_norm": 0.2847158304797025, "learning_rate": 9.999177642922736e-06, "loss": 0.3411, "step": 891 }, { "epoch": 0.07135857282854342, "grad_norm": 0.33357590620540634, "learning_rate": 9.999165853335325e-06, "loss": 0.2822, "step": 892 }, { "epoch": 0.07143857122857543, "grad_norm": 0.34322851374654223, "learning_rate": 9.999153979845625e-06, "loss": 0.3483, "step": 893 }, { "epoch": 0.07151856962860743, "grad_norm": 0.30048330516808835, "learning_rate": 9.999142022453836e-06, "loss": 0.3176, "step": 894 }, { "epoch": 0.07159856802863943, "grad_norm": 0.35094029625908313, "learning_rate": 9.999129981160159e-06, "loss": 0.3135, "step": 895 }, { "epoch": 0.07167856642867143, "grad_norm": 0.3405289734734757, "learning_rate": 9.999117855964797e-06, "loss": 0.3066, "step": 896 }, { "epoch": 0.07175856482870342, "grad_norm": 0.531654700275743, "learning_rate": 9.99910564686795e-06, "loss": 0.33, "step": 897 }, { "epoch": 0.07183856322873543, "grad_norm": 0.36689265164701906, "learning_rate": 9.999093353869828e-06, "loss": 0.3317, "step": 898 }, { "epoch": 0.07191856162876742, "grad_norm": 0.2884737955362762, "learning_rate": 9.999080976970635e-06, "loss": 0.3278, "step": 899 }, { "epoch": 0.07199856002879942, "grad_norm": 0.28923390447366376, "learning_rate": 9.999068516170577e-06, "loss": 0.3086, "step": 900 }, { "epoch": 0.07207855842883143, "grad_norm": 0.3538448417863541, "learning_rate": 9.999055971469864e-06, "loss": 0.3066, "step": 901 }, { "epoch": 0.07215855682886342, "grad_norm": 0.2694161396908422, "learning_rate": 9.999043342868708e-06, "loss": 0.3449, "step": 902 }, { "epoch": 0.07223855522889543, "grad_norm": 0.26685647147984126, "learning_rate": 9.99903063036732e-06, "loss": 0.3352, "step": 903 }, { "epoch": 0.07231855362892742, "grad_norm": 0.3259867131932054, "learning_rate": 9.999017833965914e-06, "loss": 0.3023, "step": 904 }, { "epoch": 0.07239855202895942, "grad_norm": 0.28033383984958504, "learning_rate": 9.999004953664703e-06, "loss": 0.3279, "step": 905 }, { "epoch": 0.07247855042899141, "grad_norm": 0.2516773645713914, "learning_rate": 9.998991989463906e-06, "loss": 0.3197, "step": 906 }, { "epoch": 0.07255854882902342, "grad_norm": 0.36273981028380786, "learning_rate": 9.998978941363739e-06, "loss": 0.2977, "step": 907 }, { "epoch": 0.07263854722905543, "grad_norm": 0.30825315485564986, "learning_rate": 9.998965809364421e-06, "loss": 0.3185, "step": 908 }, { "epoch": 0.07271854562908742, "grad_norm": 0.23737112162017282, "learning_rate": 9.998952593466171e-06, "loss": 0.3293, "step": 909 }, { "epoch": 0.07279854402911942, "grad_norm": 0.2833876914512136, "learning_rate": 9.998939293669213e-06, "loss": 0.3038, "step": 910 }, { "epoch": 0.07287854242915141, "grad_norm": 0.42855843550103295, "learning_rate": 9.998925909973769e-06, "loss": 0.3134, "step": 911 }, { "epoch": 0.07295854082918342, "grad_norm": 0.28711370902613625, "learning_rate": 9.998912442380065e-06, "loss": 0.3365, "step": 912 }, { "epoch": 0.07303853922921541, "grad_norm": 0.317298845026507, "learning_rate": 9.998898890888325e-06, "loss": 0.3232, "step": 913 }, { "epoch": 0.07311853762924742, "grad_norm": 0.3042676846050397, "learning_rate": 9.998885255498778e-06, "loss": 0.3413, "step": 914 }, { "epoch": 0.07319853602927942, "grad_norm": 0.2641136670631214, "learning_rate": 9.998871536211652e-06, "loss": 0.3665, "step": 915 }, { "epoch": 0.07327853442931141, "grad_norm": 0.3202256601343914, "learning_rate": 9.998857733027179e-06, "loss": 0.3122, "step": 916 }, { "epoch": 0.07335853282934342, "grad_norm": 0.36247827930015253, "learning_rate": 9.998843845945587e-06, "loss": 0.257, "step": 917 }, { "epoch": 0.07343853122937541, "grad_norm": 0.28295246226779514, "learning_rate": 9.998829874967114e-06, "loss": 0.3201, "step": 918 }, { "epoch": 0.07351852962940741, "grad_norm": 0.30030632259180257, "learning_rate": 9.99881582009199e-06, "loss": 0.3289, "step": 919 }, { "epoch": 0.0735985280294394, "grad_norm": 0.3642857165626749, "learning_rate": 9.998801681320452e-06, "loss": 0.3055, "step": 920 }, { "epoch": 0.07367852642947141, "grad_norm": 0.35244807062529443, "learning_rate": 9.99878745865274e-06, "loss": 0.2926, "step": 921 }, { "epoch": 0.07375852482950342, "grad_norm": 0.26925829576299365, "learning_rate": 9.99877315208909e-06, "loss": 0.3355, "step": 922 }, { "epoch": 0.07383852322953541, "grad_norm": 0.31693613062825665, "learning_rate": 9.99875876162974e-06, "loss": 0.2737, "step": 923 }, { "epoch": 0.07391852162956741, "grad_norm": 0.3339030755538431, "learning_rate": 9.998744287274937e-06, "loss": 0.2828, "step": 924 }, { "epoch": 0.0739985200295994, "grad_norm": 0.3091429003619955, "learning_rate": 9.998729729024922e-06, "loss": 0.3198, "step": 925 }, { "epoch": 0.07407851842963141, "grad_norm": 0.3029008515891064, "learning_rate": 9.998715086879938e-06, "loss": 0.3425, "step": 926 }, { "epoch": 0.0741585168296634, "grad_norm": 0.2743957663640204, "learning_rate": 9.998700360840231e-06, "loss": 0.3498, "step": 927 }, { "epoch": 0.0742385152296954, "grad_norm": 0.3194125909333428, "learning_rate": 9.998685550906048e-06, "loss": 0.2926, "step": 928 }, { "epoch": 0.07431851362972741, "grad_norm": 0.3153345989993005, "learning_rate": 9.998670657077638e-06, "loss": 0.3284, "step": 929 }, { "epoch": 0.0743985120297594, "grad_norm": 0.2962340588450138, "learning_rate": 9.998655679355252e-06, "loss": 0.3281, "step": 930 }, { "epoch": 0.07447851042979141, "grad_norm": 0.34204540319260884, "learning_rate": 9.99864061773914e-06, "loss": 0.3392, "step": 931 }, { "epoch": 0.0745585088298234, "grad_norm": 0.3460179890160004, "learning_rate": 9.998625472229555e-06, "loss": 0.3106, "step": 932 }, { "epoch": 0.0746385072298554, "grad_norm": 0.35612331114946627, "learning_rate": 9.998610242826752e-06, "loss": 0.2849, "step": 933 }, { "epoch": 0.0747185056298874, "grad_norm": 0.2805762961982535, "learning_rate": 9.998594929530985e-06, "loss": 0.3151, "step": 934 }, { "epoch": 0.0747985040299194, "grad_norm": 0.3305900574893122, "learning_rate": 9.998579532342511e-06, "loss": 0.2812, "step": 935 }, { "epoch": 0.07487850242995141, "grad_norm": 0.31614811632016304, "learning_rate": 9.998564051261593e-06, "loss": 0.3121, "step": 936 }, { "epoch": 0.0749585008299834, "grad_norm": 0.32616497993418414, "learning_rate": 9.998548486288483e-06, "loss": 0.3382, "step": 937 }, { "epoch": 0.0750384992300154, "grad_norm": 0.32374662058488274, "learning_rate": 9.998532837423448e-06, "loss": 0.2949, "step": 938 }, { "epoch": 0.0751184976300474, "grad_norm": 0.2639322522516908, "learning_rate": 9.998517104666749e-06, "loss": 0.3375, "step": 939 }, { "epoch": 0.0751984960300794, "grad_norm": 0.3429512898494253, "learning_rate": 9.998501288018651e-06, "loss": 0.3107, "step": 940 }, { "epoch": 0.07527849443011139, "grad_norm": 0.2961043802990138, "learning_rate": 9.998485387479418e-06, "loss": 0.3151, "step": 941 }, { "epoch": 0.0753584928301434, "grad_norm": 0.3207619441986614, "learning_rate": 9.998469403049318e-06, "loss": 0.3191, "step": 942 }, { "epoch": 0.0754384912301754, "grad_norm": 0.25259197092792135, "learning_rate": 9.998453334728619e-06, "loss": 0.3492, "step": 943 }, { "epoch": 0.0755184896302074, "grad_norm": 0.26607035336731205, "learning_rate": 9.998437182517589e-06, "loss": 0.3109, "step": 944 }, { "epoch": 0.0755984880302394, "grad_norm": 0.3095723706810741, "learning_rate": 9.9984209464165e-06, "loss": 0.3197, "step": 945 }, { "epoch": 0.07567848643027139, "grad_norm": 0.2906994902380521, "learning_rate": 9.998404626425627e-06, "loss": 0.3208, "step": 946 }, { "epoch": 0.0757584848303034, "grad_norm": 0.37095484390605793, "learning_rate": 9.998388222545242e-06, "loss": 0.2908, "step": 947 }, { "epoch": 0.07583848323033539, "grad_norm": 0.33311119159043867, "learning_rate": 9.998371734775618e-06, "loss": 0.2865, "step": 948 }, { "epoch": 0.0759184816303674, "grad_norm": 0.31047094509315915, "learning_rate": 9.998355163117035e-06, "loss": 0.3425, "step": 949 }, { "epoch": 0.0759984800303994, "grad_norm": 0.34316326352164156, "learning_rate": 9.99833850756977e-06, "loss": 0.3123, "step": 950 }, { "epoch": 0.07607847843043139, "grad_norm": 0.26416866802349737, "learning_rate": 9.998321768134101e-06, "loss": 0.3353, "step": 951 }, { "epoch": 0.0761584768304634, "grad_norm": 0.31700262119304584, "learning_rate": 9.998304944810314e-06, "loss": 0.3215, "step": 952 }, { "epoch": 0.07623847523049539, "grad_norm": 0.3323728655426343, "learning_rate": 9.998288037598684e-06, "loss": 0.291, "step": 953 }, { "epoch": 0.07631847363052739, "grad_norm": 0.300428098076248, "learning_rate": 9.998271046499501e-06, "loss": 0.312, "step": 954 }, { "epoch": 0.07639847203055938, "grad_norm": 0.40117205074825596, "learning_rate": 9.998253971513048e-06, "loss": 0.2845, "step": 955 }, { "epoch": 0.07647847043059139, "grad_norm": 0.2934713167443252, "learning_rate": 9.99823681263961e-06, "loss": 0.336, "step": 956 }, { "epoch": 0.0765584688306234, "grad_norm": 0.3046555655299077, "learning_rate": 9.998219569879476e-06, "loss": 0.2986, "step": 957 }, { "epoch": 0.07663846723065539, "grad_norm": 0.34447567097937876, "learning_rate": 9.998202243232937e-06, "loss": 0.3142, "step": 958 }, { "epoch": 0.07671846563068739, "grad_norm": 0.32552140178348066, "learning_rate": 9.998184832700282e-06, "loss": 0.2872, "step": 959 }, { "epoch": 0.07679846403071938, "grad_norm": 0.414640257479251, "learning_rate": 9.998167338281803e-06, "loss": 0.3002, "step": 960 }, { "epoch": 0.07687846243075139, "grad_norm": 0.3005059383881909, "learning_rate": 9.998149759977795e-06, "loss": 0.3236, "step": 961 }, { "epoch": 0.07695846083078338, "grad_norm": 0.28314142435755596, "learning_rate": 9.998132097788554e-06, "loss": 0.3271, "step": 962 }, { "epoch": 0.07703845923081538, "grad_norm": 0.330800544738117, "learning_rate": 9.998114351714373e-06, "loss": 0.3339, "step": 963 }, { "epoch": 0.07711845763084739, "grad_norm": 0.35807438473415937, "learning_rate": 9.998096521755552e-06, "loss": 0.3101, "step": 964 }, { "epoch": 0.07719845603087938, "grad_norm": 0.26743430200408586, "learning_rate": 9.99807860791239e-06, "loss": 0.334, "step": 965 }, { "epoch": 0.07727845443091139, "grad_norm": 0.3060667747402942, "learning_rate": 9.998060610185187e-06, "loss": 0.331, "step": 966 }, { "epoch": 0.07735845283094338, "grad_norm": 0.2954348052423245, "learning_rate": 9.998042528574246e-06, "loss": 0.3258, "step": 967 }, { "epoch": 0.07743845123097538, "grad_norm": 0.31044693004102286, "learning_rate": 9.99802436307987e-06, "loss": 0.3329, "step": 968 }, { "epoch": 0.07751844963100737, "grad_norm": 0.2884319489948687, "learning_rate": 9.998006113702363e-06, "loss": 0.308, "step": 969 }, { "epoch": 0.07759844803103938, "grad_norm": 0.3128652725221715, "learning_rate": 9.997987780442033e-06, "loss": 0.3143, "step": 970 }, { "epoch": 0.07767844643107139, "grad_norm": 0.27977003156194064, "learning_rate": 9.997969363299187e-06, "loss": 0.303, "step": 971 }, { "epoch": 0.07775844483110338, "grad_norm": 0.30627445888831595, "learning_rate": 9.997950862274134e-06, "loss": 0.3093, "step": 972 }, { "epoch": 0.07783844323113538, "grad_norm": 0.25513555037593205, "learning_rate": 9.997932277367183e-06, "loss": 0.343, "step": 973 }, { "epoch": 0.07791844163116737, "grad_norm": 0.27506729459930485, "learning_rate": 9.997913608578651e-06, "loss": 0.3421, "step": 974 }, { "epoch": 0.07799844003119938, "grad_norm": 0.30285205423854594, "learning_rate": 9.997894855908844e-06, "loss": 0.3244, "step": 975 }, { "epoch": 0.07807843843123137, "grad_norm": 0.3408747757138861, "learning_rate": 9.997876019358083e-06, "loss": 0.2707, "step": 976 }, { "epoch": 0.07815843683126338, "grad_norm": 0.23530762933875393, "learning_rate": 9.997857098926679e-06, "loss": 0.3429, "step": 977 }, { "epoch": 0.07823843523129538, "grad_norm": 0.38361673504863586, "learning_rate": 9.997838094614956e-06, "loss": 0.3373, "step": 978 }, { "epoch": 0.07831843363132737, "grad_norm": 0.2881876184207208, "learning_rate": 9.997819006423227e-06, "loss": 0.3015, "step": 979 }, { "epoch": 0.07839843203135938, "grad_norm": 0.28910724951259187, "learning_rate": 9.997799834351814e-06, "loss": 0.3093, "step": 980 }, { "epoch": 0.07847843043139137, "grad_norm": 0.34745793904767547, "learning_rate": 9.99778057840104e-06, "loss": 0.3017, "step": 981 }, { "epoch": 0.07855842883142337, "grad_norm": 0.3217752144288568, "learning_rate": 9.997761238571227e-06, "loss": 0.2913, "step": 982 }, { "epoch": 0.07863842723145537, "grad_norm": 0.3298380423069833, "learning_rate": 9.9977418148627e-06, "loss": 0.2823, "step": 983 }, { "epoch": 0.07871842563148737, "grad_norm": 0.33207326996160563, "learning_rate": 9.997722307275785e-06, "loss": 0.2762, "step": 984 }, { "epoch": 0.07879842403151938, "grad_norm": 0.3369495613230937, "learning_rate": 9.99770271581081e-06, "loss": 0.3013, "step": 985 }, { "epoch": 0.07887842243155137, "grad_norm": 0.3078921477860902, "learning_rate": 9.997683040468103e-06, "loss": 0.3285, "step": 986 }, { "epoch": 0.07895842083158337, "grad_norm": 0.2906765881690975, "learning_rate": 9.997663281247993e-06, "loss": 0.3404, "step": 987 }, { "epoch": 0.07903841923161536, "grad_norm": 0.3200641043001228, "learning_rate": 9.997643438150814e-06, "loss": 0.3293, "step": 988 }, { "epoch": 0.07911841763164737, "grad_norm": 0.27999887105401894, "learning_rate": 9.9976235111769e-06, "loss": 0.3069, "step": 989 }, { "epoch": 0.07919841603167936, "grad_norm": 0.35132035991432753, "learning_rate": 9.99760350032658e-06, "loss": 0.3143, "step": 990 }, { "epoch": 0.07927841443171137, "grad_norm": 0.36487130571936177, "learning_rate": 9.997583405600194e-06, "loss": 0.3009, "step": 991 }, { "epoch": 0.07935841283174337, "grad_norm": 0.3386057666259531, "learning_rate": 9.997563226998082e-06, "loss": 0.3071, "step": 992 }, { "epoch": 0.07943841123177536, "grad_norm": 0.29096601909665687, "learning_rate": 9.997542964520576e-06, "loss": 0.338, "step": 993 }, { "epoch": 0.07951840963180737, "grad_norm": 0.2799918723267478, "learning_rate": 9.99752261816802e-06, "loss": 0.3246, "step": 994 }, { "epoch": 0.07959840803183936, "grad_norm": 0.3305568355038778, "learning_rate": 9.997502187940757e-06, "loss": 0.3111, "step": 995 }, { "epoch": 0.07967840643187137, "grad_norm": 0.29763518156057117, "learning_rate": 9.997481673839125e-06, "loss": 0.3686, "step": 996 }, { "epoch": 0.07975840483190336, "grad_norm": 0.30780614834520464, "learning_rate": 9.997461075863473e-06, "loss": 0.3406, "step": 997 }, { "epoch": 0.07983840323193536, "grad_norm": 0.339429540242941, "learning_rate": 9.997440394014143e-06, "loss": 0.3098, "step": 998 }, { "epoch": 0.07991840163196737, "grad_norm": 0.31230674056657237, "learning_rate": 9.997419628291485e-06, "loss": 0.319, "step": 999 }, { "epoch": 0.07999840003199936, "grad_norm": 0.32994648197852433, "learning_rate": 9.997398778695847e-06, "loss": 0.2853, "step": 1000 }, { "epoch": 0.08007839843203136, "grad_norm": 0.33815039042345024, "learning_rate": 9.997377845227577e-06, "loss": 0.2982, "step": 1001 }, { "epoch": 0.08015839683206336, "grad_norm": 0.29187753039875036, "learning_rate": 9.997356827887026e-06, "loss": 0.3394, "step": 1002 }, { "epoch": 0.08023839523209536, "grad_norm": 0.279755247934047, "learning_rate": 9.99733572667455e-06, "loss": 0.3221, "step": 1003 }, { "epoch": 0.08031839363212735, "grad_norm": 0.25185514220515154, "learning_rate": 9.997314541590502e-06, "loss": 0.3441, "step": 1004 }, { "epoch": 0.08039839203215936, "grad_norm": 0.4059835828636773, "learning_rate": 9.997293272635236e-06, "loss": 0.339, "step": 1005 }, { "epoch": 0.08047839043219136, "grad_norm": 0.27775640057768064, "learning_rate": 9.99727191980911e-06, "loss": 0.3442, "step": 1006 }, { "epoch": 0.08055838883222335, "grad_norm": 0.32869754471714496, "learning_rate": 9.997250483112483e-06, "loss": 0.3082, "step": 1007 }, { "epoch": 0.08063838723225536, "grad_norm": 0.32743063042423676, "learning_rate": 9.997228962545715e-06, "loss": 0.2681, "step": 1008 }, { "epoch": 0.08071838563228735, "grad_norm": 0.2551864563966594, "learning_rate": 9.997207358109166e-06, "loss": 0.3501, "step": 1009 }, { "epoch": 0.08079838403231936, "grad_norm": 0.3719108021606949, "learning_rate": 9.997185669803197e-06, "loss": 0.301, "step": 1010 }, { "epoch": 0.08087838243235135, "grad_norm": 0.3240776724396333, "learning_rate": 9.997163897628175e-06, "loss": 0.322, "step": 1011 }, { "epoch": 0.08095838083238335, "grad_norm": 0.30233982071991194, "learning_rate": 9.997142041584467e-06, "loss": 0.3085, "step": 1012 }, { "epoch": 0.08103837923241536, "grad_norm": 0.3442648379441969, "learning_rate": 9.997120101672434e-06, "loss": 0.2874, "step": 1013 }, { "epoch": 0.08111837763244735, "grad_norm": 0.36091755267418346, "learning_rate": 9.99709807789245e-06, "loss": 0.311, "step": 1014 }, { "epoch": 0.08119837603247936, "grad_norm": 0.34588376053132763, "learning_rate": 9.997075970244878e-06, "loss": 0.2886, "step": 1015 }, { "epoch": 0.08127837443251135, "grad_norm": 0.33104857713866476, "learning_rate": 9.997053778730095e-06, "loss": 0.2953, "step": 1016 }, { "epoch": 0.08135837283254335, "grad_norm": 0.3435035243083412, "learning_rate": 9.997031503348473e-06, "loss": 0.3189, "step": 1017 }, { "epoch": 0.08143837123257534, "grad_norm": 0.31372190170406855, "learning_rate": 9.997009144100383e-06, "loss": 0.3067, "step": 1018 }, { "epoch": 0.08151836963260735, "grad_norm": 0.32410564866357006, "learning_rate": 9.996986700986201e-06, "loss": 0.3234, "step": 1019 }, { "epoch": 0.08159836803263935, "grad_norm": 0.28854055757672614, "learning_rate": 9.996964174006304e-06, "loss": 0.3176, "step": 1020 }, { "epoch": 0.08167836643267135, "grad_norm": 0.293423545515452, "learning_rate": 9.996941563161071e-06, "loss": 0.314, "step": 1021 }, { "epoch": 0.08175836483270335, "grad_norm": 0.3463719825025683, "learning_rate": 9.996918868450882e-06, "loss": 0.2933, "step": 1022 }, { "epoch": 0.08183836323273534, "grad_norm": 0.4392795367408748, "learning_rate": 9.996896089876116e-06, "loss": 0.3108, "step": 1023 }, { "epoch": 0.08191836163276735, "grad_norm": 0.28900803444265566, "learning_rate": 9.996873227437156e-06, "loss": 0.3258, "step": 1024 }, { "epoch": 0.08199836003279934, "grad_norm": 0.30641802228300485, "learning_rate": 9.996850281134385e-06, "loss": 0.2722, "step": 1025 }, { "epoch": 0.08207835843283134, "grad_norm": 0.3722525131418996, "learning_rate": 9.99682725096819e-06, "loss": 0.3452, "step": 1026 }, { "epoch": 0.08215835683286335, "grad_norm": 0.30291628271957177, "learning_rate": 9.996804136938956e-06, "loss": 0.315, "step": 1027 }, { "epoch": 0.08223835523289534, "grad_norm": 0.33402926294676216, "learning_rate": 9.99678093904707e-06, "loss": 0.2876, "step": 1028 }, { "epoch": 0.08231835363292735, "grad_norm": 0.4079412386440936, "learning_rate": 9.996757657292923e-06, "loss": 0.3215, "step": 1029 }, { "epoch": 0.08239835203295934, "grad_norm": 0.32744933019605826, "learning_rate": 9.996734291676907e-06, "loss": 0.3225, "step": 1030 }, { "epoch": 0.08247835043299134, "grad_norm": 0.3230424291470875, "learning_rate": 9.996710842199412e-06, "loss": 0.3256, "step": 1031 }, { "epoch": 0.08255834883302333, "grad_norm": 0.25603658860631406, "learning_rate": 9.996687308860832e-06, "loss": 0.3371, "step": 1032 }, { "epoch": 0.08263834723305534, "grad_norm": 0.3872535367644594, "learning_rate": 9.99666369166156e-06, "loss": 0.2943, "step": 1033 }, { "epoch": 0.08271834563308735, "grad_norm": 0.26362564867370236, "learning_rate": 9.996639990601998e-06, "loss": 0.331, "step": 1034 }, { "epoch": 0.08279834403311934, "grad_norm": 0.3032944292791754, "learning_rate": 9.996616205682538e-06, "loss": 0.3254, "step": 1035 }, { "epoch": 0.08287834243315134, "grad_norm": 0.2558898622870006, "learning_rate": 9.99659233690358e-06, "loss": 0.3235, "step": 1036 }, { "epoch": 0.08295834083318333, "grad_norm": 0.27861790488206184, "learning_rate": 9.996568384265529e-06, "loss": 0.3053, "step": 1037 }, { "epoch": 0.08303833923321534, "grad_norm": 0.26966444516082927, "learning_rate": 9.996544347768782e-06, "loss": 0.3645, "step": 1038 }, { "epoch": 0.08311833763324733, "grad_norm": 0.2679090232752216, "learning_rate": 9.996520227413747e-06, "loss": 0.3417, "step": 1039 }, { "epoch": 0.08319833603327934, "grad_norm": 0.33835459429485487, "learning_rate": 9.996496023200823e-06, "loss": 0.3066, "step": 1040 }, { "epoch": 0.08327833443331134, "grad_norm": 0.3118287245188823, "learning_rate": 9.996471735130422e-06, "loss": 0.2878, "step": 1041 }, { "epoch": 0.08335833283334333, "grad_norm": 0.32156335759034027, "learning_rate": 9.996447363202947e-06, "loss": 0.2684, "step": 1042 }, { "epoch": 0.08343833123337534, "grad_norm": 0.5325381218929562, "learning_rate": 9.99642290741881e-06, "loss": 0.2872, "step": 1043 }, { "epoch": 0.08351832963340733, "grad_norm": 0.335879638456767, "learning_rate": 9.99639836777842e-06, "loss": 0.3004, "step": 1044 }, { "epoch": 0.08359832803343933, "grad_norm": 0.2736386369854071, "learning_rate": 9.99637374428219e-06, "loss": 0.3418, "step": 1045 }, { "epoch": 0.08367832643347133, "grad_norm": 0.2609782590444062, "learning_rate": 9.996349036930533e-06, "loss": 0.3431, "step": 1046 }, { "epoch": 0.08375832483350333, "grad_norm": 0.38467484126179985, "learning_rate": 9.996324245723863e-06, "loss": 0.2792, "step": 1047 }, { "epoch": 0.08383832323353534, "grad_norm": 0.39280502041402643, "learning_rate": 9.996299370662597e-06, "loss": 0.287, "step": 1048 }, { "epoch": 0.08391832163356733, "grad_norm": 0.3569748640965383, "learning_rate": 9.99627441174715e-06, "loss": 0.2861, "step": 1049 }, { "epoch": 0.08399832003359933, "grad_norm": 0.31244605421410254, "learning_rate": 9.996249368977945e-06, "loss": 0.3103, "step": 1050 }, { "epoch": 0.08407831843363132, "grad_norm": 0.35281129255342325, "learning_rate": 9.9962242423554e-06, "loss": 0.3156, "step": 1051 }, { "epoch": 0.08415831683366333, "grad_norm": 0.31764325727093257, "learning_rate": 9.996199031879935e-06, "loss": 0.3364, "step": 1052 }, { "epoch": 0.08423831523369532, "grad_norm": 0.4216973364516531, "learning_rate": 9.996173737551976e-06, "loss": 0.303, "step": 1053 }, { "epoch": 0.08431831363372733, "grad_norm": 0.27700852381156565, "learning_rate": 9.996148359371946e-06, "loss": 0.3418, "step": 1054 }, { "epoch": 0.08439831203375932, "grad_norm": 0.38200298126868704, "learning_rate": 9.996122897340273e-06, "loss": 0.2956, "step": 1055 }, { "epoch": 0.08447831043379132, "grad_norm": 0.28078025921946115, "learning_rate": 9.996097351457381e-06, "loss": 0.3735, "step": 1056 }, { "epoch": 0.08455830883382333, "grad_norm": 0.32466865172090137, "learning_rate": 9.9960717217237e-06, "loss": 0.3188, "step": 1057 }, { "epoch": 0.08463830723385532, "grad_norm": 0.23519534372554735, "learning_rate": 9.996046008139663e-06, "loss": 0.3741, "step": 1058 }, { "epoch": 0.08471830563388733, "grad_norm": 0.30483733855312045, "learning_rate": 9.996020210705697e-06, "loss": 0.3372, "step": 1059 }, { "epoch": 0.08479830403391932, "grad_norm": 0.3124553947291308, "learning_rate": 9.995994329422239e-06, "loss": 0.3349, "step": 1060 }, { "epoch": 0.08487830243395132, "grad_norm": 0.30392874542338777, "learning_rate": 9.995968364289719e-06, "loss": 0.3241, "step": 1061 }, { "epoch": 0.08495830083398331, "grad_norm": 0.32245049888837485, "learning_rate": 9.995942315308577e-06, "loss": 0.3178, "step": 1062 }, { "epoch": 0.08503829923401532, "grad_norm": 0.26471003103982005, "learning_rate": 9.995916182479248e-06, "loss": 0.3593, "step": 1063 }, { "epoch": 0.08511829763404732, "grad_norm": 0.30199577439109876, "learning_rate": 9.995889965802171e-06, "loss": 0.3182, "step": 1064 }, { "epoch": 0.08519829603407932, "grad_norm": 0.31274590719312867, "learning_rate": 9.995863665277787e-06, "loss": 0.2941, "step": 1065 }, { "epoch": 0.08527829443411132, "grad_norm": 0.342629350138759, "learning_rate": 9.995837280906535e-06, "loss": 0.2967, "step": 1066 }, { "epoch": 0.08535829283414331, "grad_norm": 0.34296909050739893, "learning_rate": 9.99581081268886e-06, "loss": 0.3181, "step": 1067 }, { "epoch": 0.08543829123417532, "grad_norm": 0.34849293164447015, "learning_rate": 9.995784260625205e-06, "loss": 0.2875, "step": 1068 }, { "epoch": 0.08551828963420731, "grad_norm": 0.29009933804078714, "learning_rate": 9.995757624716019e-06, "loss": 0.3161, "step": 1069 }, { "epoch": 0.08559828803423931, "grad_norm": 0.19585532204086822, "learning_rate": 9.995730904961743e-06, "loss": 0.3801, "step": 1070 }, { "epoch": 0.08567828643427132, "grad_norm": 0.2633059695105179, "learning_rate": 9.99570410136283e-06, "loss": 0.3482, "step": 1071 }, { "epoch": 0.08575828483430331, "grad_norm": 0.3052516543601372, "learning_rate": 9.995677213919726e-06, "loss": 0.314, "step": 1072 }, { "epoch": 0.08583828323433532, "grad_norm": 0.29045963629524285, "learning_rate": 9.995650242632887e-06, "loss": 0.3165, "step": 1073 }, { "epoch": 0.08591828163436731, "grad_norm": 0.3328544237953998, "learning_rate": 9.995623187502763e-06, "loss": 0.3368, "step": 1074 }, { "epoch": 0.08599828003439931, "grad_norm": 0.3053682523089739, "learning_rate": 9.99559604852981e-06, "loss": 0.2751, "step": 1075 }, { "epoch": 0.0860782784344313, "grad_norm": 0.2714134172409879, "learning_rate": 9.995568825714479e-06, "loss": 0.2974, "step": 1076 }, { "epoch": 0.08615827683446331, "grad_norm": 0.3438440508974596, "learning_rate": 9.995541519057231e-06, "loss": 0.2836, "step": 1077 }, { "epoch": 0.08623827523449532, "grad_norm": 0.310393650792298, "learning_rate": 9.995514128558523e-06, "loss": 0.3027, "step": 1078 }, { "epoch": 0.0863182736345273, "grad_norm": 0.33711186993885395, "learning_rate": 9.995486654218815e-06, "loss": 0.3142, "step": 1079 }, { "epoch": 0.08639827203455931, "grad_norm": 0.32224263976181167, "learning_rate": 9.995459096038568e-06, "loss": 0.3031, "step": 1080 }, { "epoch": 0.0864782704345913, "grad_norm": 0.3418395734553405, "learning_rate": 9.995431454018246e-06, "loss": 0.2704, "step": 1081 }, { "epoch": 0.08655826883462331, "grad_norm": 0.297789685680779, "learning_rate": 9.995403728158311e-06, "loss": 0.2975, "step": 1082 }, { "epoch": 0.0866382672346553, "grad_norm": 0.29348981360478565, "learning_rate": 9.995375918459227e-06, "loss": 0.3194, "step": 1083 }, { "epoch": 0.0867182656346873, "grad_norm": 0.4902866863571245, "learning_rate": 9.995348024921463e-06, "loss": 0.2855, "step": 1084 }, { "epoch": 0.08679826403471931, "grad_norm": 0.33330277755786136, "learning_rate": 9.995320047545488e-06, "loss": 0.3258, "step": 1085 }, { "epoch": 0.0868782624347513, "grad_norm": 0.2740249763002998, "learning_rate": 9.995291986331767e-06, "loss": 0.3325, "step": 1086 }, { "epoch": 0.08695826083478331, "grad_norm": 0.3408933373620087, "learning_rate": 9.995263841280776e-06, "loss": 0.2993, "step": 1087 }, { "epoch": 0.0870382592348153, "grad_norm": 0.3031063900963532, "learning_rate": 9.995235612392986e-06, "loss": 0.3222, "step": 1088 }, { "epoch": 0.0871182576348473, "grad_norm": 0.2502837778770749, "learning_rate": 9.99520729966887e-06, "loss": 0.3364, "step": 1089 }, { "epoch": 0.0871982560348793, "grad_norm": 0.35748485496614607, "learning_rate": 9.995178903108904e-06, "loss": 0.2942, "step": 1090 }, { "epoch": 0.0872782544349113, "grad_norm": 0.3555823361678222, "learning_rate": 9.995150422713561e-06, "loss": 0.3036, "step": 1091 }, { "epoch": 0.0873582528349433, "grad_norm": 0.3516767614264302, "learning_rate": 9.995121858483326e-06, "loss": 0.2775, "step": 1092 }, { "epoch": 0.0874382512349753, "grad_norm": 0.3342221376356437, "learning_rate": 9.995093210418672e-06, "loss": 0.2896, "step": 1093 }, { "epoch": 0.0875182496350073, "grad_norm": 0.38175787639915015, "learning_rate": 9.995064478520083e-06, "loss": 0.3225, "step": 1094 }, { "epoch": 0.0875982480350393, "grad_norm": 0.3291457453192748, "learning_rate": 9.995035662788039e-06, "loss": 0.2687, "step": 1095 }, { "epoch": 0.0876782464350713, "grad_norm": 0.3379320080823688, "learning_rate": 9.995006763223028e-06, "loss": 0.2913, "step": 1096 }, { "epoch": 0.08775824483510329, "grad_norm": 0.33494001523055583, "learning_rate": 9.99497777982553e-06, "loss": 0.2875, "step": 1097 }, { "epoch": 0.0878382432351353, "grad_norm": 0.28030322892451337, "learning_rate": 9.994948712596033e-06, "loss": 0.3402, "step": 1098 }, { "epoch": 0.0879182416351673, "grad_norm": 0.2547753550649061, "learning_rate": 9.994919561535026e-06, "loss": 0.3399, "step": 1099 }, { "epoch": 0.0879982400351993, "grad_norm": 0.2417990846917735, "learning_rate": 9.994890326642998e-06, "loss": 0.323, "step": 1100 }, { "epoch": 0.0880782384352313, "grad_norm": 0.40394887803809776, "learning_rate": 9.99486100792044e-06, "loss": 0.2921, "step": 1101 }, { "epoch": 0.08815823683526329, "grad_norm": 0.389572500490799, "learning_rate": 9.994831605367842e-06, "loss": 0.2709, "step": 1102 }, { "epoch": 0.0882382352352953, "grad_norm": 0.3693626832339756, "learning_rate": 9.9948021189857e-06, "loss": 0.2883, "step": 1103 }, { "epoch": 0.08831823363532729, "grad_norm": 0.5292367315887432, "learning_rate": 9.994772548774506e-06, "loss": 0.31, "step": 1104 }, { "epoch": 0.08839823203535929, "grad_norm": 0.3752113934340967, "learning_rate": 9.994742894734759e-06, "loss": 0.3006, "step": 1105 }, { "epoch": 0.0884782304353913, "grad_norm": 0.3264503928718569, "learning_rate": 9.994713156866956e-06, "loss": 0.2709, "step": 1106 }, { "epoch": 0.08855822883542329, "grad_norm": 0.3292072846562249, "learning_rate": 9.994683335171594e-06, "loss": 0.2846, "step": 1107 }, { "epoch": 0.0886382272354553, "grad_norm": 0.2944920506216355, "learning_rate": 9.994653429649178e-06, "loss": 0.3069, "step": 1108 }, { "epoch": 0.08871822563548729, "grad_norm": 0.33142288438776357, "learning_rate": 9.994623440300205e-06, "loss": 0.3041, "step": 1109 }, { "epoch": 0.08879822403551929, "grad_norm": 0.2695811068140248, "learning_rate": 9.99459336712518e-06, "loss": 0.316, "step": 1110 }, { "epoch": 0.08887822243555128, "grad_norm": 0.2716862425216491, "learning_rate": 9.99456321012461e-06, "loss": 0.3092, "step": 1111 }, { "epoch": 0.08895822083558329, "grad_norm": 0.309055003074196, "learning_rate": 9.994532969298999e-06, "loss": 0.2731, "step": 1112 }, { "epoch": 0.08903821923561529, "grad_norm": 0.28227866826893694, "learning_rate": 9.994502644648854e-06, "loss": 0.3395, "step": 1113 }, { "epoch": 0.08911821763564728, "grad_norm": 0.30176394973207316, "learning_rate": 9.994472236174686e-06, "loss": 0.3181, "step": 1114 }, { "epoch": 0.08919821603567929, "grad_norm": 0.31529772068413553, "learning_rate": 9.994441743877003e-06, "loss": 0.2785, "step": 1115 }, { "epoch": 0.08927821443571128, "grad_norm": 0.29342782703419024, "learning_rate": 9.994411167756319e-06, "loss": 0.3352, "step": 1116 }, { "epoch": 0.08935821283574329, "grad_norm": 0.35663002922122833, "learning_rate": 9.994380507813146e-06, "loss": 0.2602, "step": 1117 }, { "epoch": 0.08943821123577528, "grad_norm": 0.3090479557779549, "learning_rate": 9.994349764047999e-06, "loss": 0.3173, "step": 1118 }, { "epoch": 0.08951820963580728, "grad_norm": 0.26867713477482036, "learning_rate": 9.994318936461393e-06, "loss": 0.334, "step": 1119 }, { "epoch": 0.08959820803583929, "grad_norm": 0.2663020096977348, "learning_rate": 9.994288025053846e-06, "loss": 0.2921, "step": 1120 }, { "epoch": 0.08967820643587128, "grad_norm": 0.34647757135967905, "learning_rate": 9.994257029825876e-06, "loss": 0.3168, "step": 1121 }, { "epoch": 0.08975820483590329, "grad_norm": 0.3084340285336873, "learning_rate": 9.994225950778005e-06, "loss": 0.2928, "step": 1122 }, { "epoch": 0.08983820323593528, "grad_norm": 0.3350039376404601, "learning_rate": 9.994194787910754e-06, "loss": 0.2856, "step": 1123 }, { "epoch": 0.08991820163596728, "grad_norm": 0.27082485458385747, "learning_rate": 9.994163541224645e-06, "loss": 0.3464, "step": 1124 }, { "epoch": 0.08999820003599927, "grad_norm": 0.2630252580391773, "learning_rate": 9.994132210720204e-06, "loss": 0.3253, "step": 1125 }, { "epoch": 0.09007819843603128, "grad_norm": 0.2641326608970051, "learning_rate": 9.994100796397954e-06, "loss": 0.3379, "step": 1126 }, { "epoch": 0.09015819683606328, "grad_norm": 0.2650701043258218, "learning_rate": 9.994069298258427e-06, "loss": 0.3345, "step": 1127 }, { "epoch": 0.09023819523609528, "grad_norm": 0.3128592679954027, "learning_rate": 9.994037716302146e-06, "loss": 0.326, "step": 1128 }, { "epoch": 0.09031819363612728, "grad_norm": 0.296270761468446, "learning_rate": 9.994006050529645e-06, "loss": 0.3256, "step": 1129 }, { "epoch": 0.09039819203615927, "grad_norm": 0.2828148736583494, "learning_rate": 9.993974300941455e-06, "loss": 0.2952, "step": 1130 }, { "epoch": 0.09047819043619128, "grad_norm": 0.2796649650954519, "learning_rate": 9.993942467538107e-06, "loss": 0.3381, "step": 1131 }, { "epoch": 0.09055818883622327, "grad_norm": 0.3151368856315861, "learning_rate": 9.993910550320137e-06, "loss": 0.3371, "step": 1132 }, { "epoch": 0.09063818723625527, "grad_norm": 0.28245747975267826, "learning_rate": 9.99387854928808e-06, "loss": 0.3082, "step": 1133 }, { "epoch": 0.09071818563628728, "grad_norm": 0.32361889119371196, "learning_rate": 9.993846464442473e-06, "loss": 0.2995, "step": 1134 }, { "epoch": 0.09079818403631927, "grad_norm": 0.2813358383018441, "learning_rate": 9.993814295783855e-06, "loss": 0.3077, "step": 1135 }, { "epoch": 0.09087818243635128, "grad_norm": 0.3026912738633303, "learning_rate": 9.993782043312765e-06, "loss": 0.3102, "step": 1136 }, { "epoch": 0.09095818083638327, "grad_norm": 0.24039332572931846, "learning_rate": 9.993749707029746e-06, "loss": 0.3404, "step": 1137 }, { "epoch": 0.09103817923641527, "grad_norm": 0.3171604716413527, "learning_rate": 9.993717286935339e-06, "loss": 0.3046, "step": 1138 }, { "epoch": 0.09111817763644726, "grad_norm": 0.35600585478704544, "learning_rate": 9.99368478303009e-06, "loss": 0.3196, "step": 1139 }, { "epoch": 0.09119817603647927, "grad_norm": 0.3188123205527489, "learning_rate": 9.99365219531454e-06, "loss": 0.3164, "step": 1140 }, { "epoch": 0.09127817443651128, "grad_norm": 0.30752893837296913, "learning_rate": 9.993619523789241e-06, "loss": 0.3034, "step": 1141 }, { "epoch": 0.09135817283654327, "grad_norm": 0.3545662479986179, "learning_rate": 9.99358676845474e-06, "loss": 0.3311, "step": 1142 }, { "epoch": 0.09143817123657527, "grad_norm": 0.27140060442224245, "learning_rate": 9.993553929311587e-06, "loss": 0.3334, "step": 1143 }, { "epoch": 0.09151816963660726, "grad_norm": 0.3530343564342775, "learning_rate": 9.993521006360329e-06, "loss": 0.3019, "step": 1144 }, { "epoch": 0.09159816803663927, "grad_norm": 0.25501891872163307, "learning_rate": 9.993487999601522e-06, "loss": 0.3078, "step": 1145 }, { "epoch": 0.09167816643667126, "grad_norm": 0.24185740548790127, "learning_rate": 9.993454909035724e-06, "loss": 0.3441, "step": 1146 }, { "epoch": 0.09175816483670327, "grad_norm": 0.302461767338097, "learning_rate": 9.993421734663484e-06, "loss": 0.3076, "step": 1147 }, { "epoch": 0.09183816323673527, "grad_norm": 0.31997533045008764, "learning_rate": 9.993388476485361e-06, "loss": 0.2858, "step": 1148 }, { "epoch": 0.09191816163676726, "grad_norm": 0.33352700801025476, "learning_rate": 9.993355134501914e-06, "loss": 0.3036, "step": 1149 }, { "epoch": 0.09199816003679927, "grad_norm": 0.23477152550410624, "learning_rate": 9.9933217087137e-06, "loss": 0.3307, "step": 1150 }, { "epoch": 0.09207815843683126, "grad_norm": 0.3241193696016506, "learning_rate": 9.993288199121283e-06, "loss": 0.2866, "step": 1151 }, { "epoch": 0.09215815683686326, "grad_norm": 0.29862810839684006, "learning_rate": 9.993254605725225e-06, "loss": 0.3027, "step": 1152 }, { "epoch": 0.09223815523689526, "grad_norm": 0.31052349229189347, "learning_rate": 9.993220928526086e-06, "loss": 0.3052, "step": 1153 }, { "epoch": 0.09231815363692726, "grad_norm": 0.32688402190217625, "learning_rate": 9.993187167524437e-06, "loss": 0.2826, "step": 1154 }, { "epoch": 0.09239815203695927, "grad_norm": 0.43221238048685584, "learning_rate": 9.993153322720841e-06, "loss": 0.306, "step": 1155 }, { "epoch": 0.09247815043699126, "grad_norm": 0.31361974540620574, "learning_rate": 9.993119394115866e-06, "loss": 0.2957, "step": 1156 }, { "epoch": 0.09255814883702326, "grad_norm": 0.2372527917951173, "learning_rate": 9.993085381710083e-06, "loss": 0.3334, "step": 1157 }, { "epoch": 0.09263814723705525, "grad_norm": 0.3787659030960463, "learning_rate": 9.993051285504063e-06, "loss": 0.3053, "step": 1158 }, { "epoch": 0.09271814563708726, "grad_norm": 0.2514564379744673, "learning_rate": 9.993017105498378e-06, "loss": 0.333, "step": 1159 }, { "epoch": 0.09279814403711925, "grad_norm": 0.28744698194014523, "learning_rate": 9.992982841693599e-06, "loss": 0.3222, "step": 1160 }, { "epoch": 0.09287814243715126, "grad_norm": 0.20346121831885777, "learning_rate": 9.992948494090303e-06, "loss": 0.3555, "step": 1161 }, { "epoch": 0.09295814083718326, "grad_norm": 0.22740467560663488, "learning_rate": 9.992914062689068e-06, "loss": 0.377, "step": 1162 }, { "epoch": 0.09303813923721525, "grad_norm": 0.20088268737007853, "learning_rate": 9.992879547490469e-06, "loss": 0.3767, "step": 1163 }, { "epoch": 0.09311813763724726, "grad_norm": 0.3302587245313672, "learning_rate": 9.992844948495088e-06, "loss": 0.2773, "step": 1164 }, { "epoch": 0.09319813603727925, "grad_norm": 0.3190881697599193, "learning_rate": 9.992810265703503e-06, "loss": 0.3487, "step": 1165 }, { "epoch": 0.09327813443731126, "grad_norm": 0.2843003259587228, "learning_rate": 9.992775499116299e-06, "loss": 0.323, "step": 1166 }, { "epoch": 0.09335813283734325, "grad_norm": 0.28923702185263334, "learning_rate": 9.992740648734057e-06, "loss": 0.3134, "step": 1167 }, { "epoch": 0.09343813123737525, "grad_norm": 0.6165482291257397, "learning_rate": 9.992705714557362e-06, "loss": 0.2937, "step": 1168 }, { "epoch": 0.09351812963740726, "grad_norm": 0.362392729699564, "learning_rate": 9.992670696586802e-06, "loss": 0.2989, "step": 1169 }, { "epoch": 0.09359812803743925, "grad_norm": 0.32207836775413334, "learning_rate": 9.992635594822965e-06, "loss": 0.2955, "step": 1170 }, { "epoch": 0.09367812643747125, "grad_norm": 0.2647476605454954, "learning_rate": 9.992600409266437e-06, "loss": 0.3182, "step": 1171 }, { "epoch": 0.09375812483750325, "grad_norm": 0.2677071427789346, "learning_rate": 9.992565139917812e-06, "loss": 0.3568, "step": 1172 }, { "epoch": 0.09383812323753525, "grad_norm": 0.265695309870558, "learning_rate": 9.99252978677768e-06, "loss": 0.3582, "step": 1173 }, { "epoch": 0.09391812163756724, "grad_norm": 0.28933844261169883, "learning_rate": 9.992494349846635e-06, "loss": 0.3034, "step": 1174 }, { "epoch": 0.09399812003759925, "grad_norm": 0.24966436690867466, "learning_rate": 9.992458829125271e-06, "loss": 0.321, "step": 1175 }, { "epoch": 0.09407811843763125, "grad_norm": 0.2979458823286954, "learning_rate": 9.992423224614185e-06, "loss": 0.322, "step": 1176 }, { "epoch": 0.09415811683766324, "grad_norm": 0.37666835239644997, "learning_rate": 9.992387536313975e-06, "loss": 0.3137, "step": 1177 }, { "epoch": 0.09423811523769525, "grad_norm": 0.24910144848028087, "learning_rate": 9.992351764225238e-06, "loss": 0.3252, "step": 1178 }, { "epoch": 0.09431811363772724, "grad_norm": 0.45518140789557215, "learning_rate": 9.992315908348578e-06, "loss": 0.3145, "step": 1179 }, { "epoch": 0.09439811203775925, "grad_norm": 0.3574587389265085, "learning_rate": 9.992279968684592e-06, "loss": 0.3062, "step": 1180 }, { "epoch": 0.09447811043779124, "grad_norm": 0.31609703853016824, "learning_rate": 9.992243945233886e-06, "loss": 0.2903, "step": 1181 }, { "epoch": 0.09455810883782324, "grad_norm": 0.2698606232678801, "learning_rate": 9.992207837997064e-06, "loss": 0.3315, "step": 1182 }, { "epoch": 0.09463810723785525, "grad_norm": 0.25917329643829207, "learning_rate": 9.992171646974734e-06, "loss": 0.3661, "step": 1183 }, { "epoch": 0.09471810563788724, "grad_norm": 0.3263059841584088, "learning_rate": 9.9921353721675e-06, "loss": 0.3093, "step": 1184 }, { "epoch": 0.09479810403791925, "grad_norm": 0.2956940930046495, "learning_rate": 9.99209901357597e-06, "loss": 0.3079, "step": 1185 }, { "epoch": 0.09487810243795124, "grad_norm": 0.2685558090337002, "learning_rate": 9.99206257120076e-06, "loss": 0.3446, "step": 1186 }, { "epoch": 0.09495810083798324, "grad_norm": 0.3131040853626263, "learning_rate": 9.992026045042478e-06, "loss": 0.3114, "step": 1187 }, { "epoch": 0.09503809923801523, "grad_norm": 0.30931169369990325, "learning_rate": 9.991989435101736e-06, "loss": 0.3113, "step": 1188 }, { "epoch": 0.09511809763804724, "grad_norm": 0.3140938456843547, "learning_rate": 9.99195274137915e-06, "loss": 0.2967, "step": 1189 }, { "epoch": 0.09519809603807924, "grad_norm": 0.3021078159539041, "learning_rate": 9.991915963875336e-06, "loss": 0.3053, "step": 1190 }, { "epoch": 0.09527809443811124, "grad_norm": 0.35104514548535, "learning_rate": 9.991879102590912e-06, "loss": 0.2844, "step": 1191 }, { "epoch": 0.09535809283814324, "grad_norm": 0.28970511773908714, "learning_rate": 9.991842157526493e-06, "loss": 0.3172, "step": 1192 }, { "epoch": 0.09543809123817523, "grad_norm": 0.31035348927258927, "learning_rate": 9.9918051286827e-06, "loss": 0.3229, "step": 1193 }, { "epoch": 0.09551808963820724, "grad_norm": 0.3094756803925865, "learning_rate": 9.991768016060159e-06, "loss": 0.3181, "step": 1194 }, { "epoch": 0.09559808803823923, "grad_norm": 0.2945036503778562, "learning_rate": 9.99173081965949e-06, "loss": 0.3161, "step": 1195 }, { "epoch": 0.09567808643827123, "grad_norm": 0.3036940900954199, "learning_rate": 9.991693539481317e-06, "loss": 0.3022, "step": 1196 }, { "epoch": 0.09575808483830324, "grad_norm": 0.23338442756401814, "learning_rate": 9.991656175526264e-06, "loss": 0.3233, "step": 1197 }, { "epoch": 0.09583808323833523, "grad_norm": 0.3356766740445785, "learning_rate": 9.99161872779496e-06, "loss": 0.3084, "step": 1198 }, { "epoch": 0.09591808163836724, "grad_norm": 0.32150153531273873, "learning_rate": 9.991581196288035e-06, "loss": 0.2711, "step": 1199 }, { "epoch": 0.09599808003839923, "grad_norm": 0.3377743039809436, "learning_rate": 9.991543581006116e-06, "loss": 0.3043, "step": 1200 }, { "epoch": 0.09607807843843123, "grad_norm": 0.39570810617129387, "learning_rate": 9.991505881949837e-06, "loss": 0.286, "step": 1201 }, { "epoch": 0.09615807683846322, "grad_norm": 0.32451725203786386, "learning_rate": 9.991468099119828e-06, "loss": 0.2799, "step": 1202 }, { "epoch": 0.09623807523849523, "grad_norm": 0.3635693970246387, "learning_rate": 9.991430232516725e-06, "loss": 0.2935, "step": 1203 }, { "epoch": 0.09631807363852724, "grad_norm": 0.30611953569173006, "learning_rate": 9.991392282141161e-06, "loss": 0.3244, "step": 1204 }, { "epoch": 0.09639807203855923, "grad_norm": 0.34507660532781204, "learning_rate": 9.991354247993776e-06, "loss": 0.2896, "step": 1205 }, { "epoch": 0.09647807043859123, "grad_norm": 0.3074957507688657, "learning_rate": 9.991316130075208e-06, "loss": 0.303, "step": 1206 }, { "epoch": 0.09655806883862322, "grad_norm": 0.32739494400790675, "learning_rate": 9.991277928386095e-06, "loss": 0.3596, "step": 1207 }, { "epoch": 0.09663806723865523, "grad_norm": 0.2667528346494223, "learning_rate": 9.99123964292708e-06, "loss": 0.343, "step": 1208 }, { "epoch": 0.09671806563868722, "grad_norm": 0.2935436880471149, "learning_rate": 9.991201273698805e-06, "loss": 0.3225, "step": 1209 }, { "epoch": 0.09679806403871923, "grad_norm": 0.3462450731645947, "learning_rate": 9.991162820701911e-06, "loss": 0.3161, "step": 1210 }, { "epoch": 0.09687806243875123, "grad_norm": 0.31331579533043286, "learning_rate": 9.991124283937049e-06, "loss": 0.327, "step": 1211 }, { "epoch": 0.09695806083878322, "grad_norm": 0.2926329699694107, "learning_rate": 9.991085663404862e-06, "loss": 0.3073, "step": 1212 }, { "epoch": 0.09703805923881523, "grad_norm": 0.2549894897391792, "learning_rate": 9.991046959105998e-06, "loss": 0.3559, "step": 1213 }, { "epoch": 0.09711805763884722, "grad_norm": 0.3672521042995224, "learning_rate": 9.991008171041107e-06, "loss": 0.2749, "step": 1214 }, { "epoch": 0.09719805603887922, "grad_norm": 0.330753046382212, "learning_rate": 9.990969299210843e-06, "loss": 0.3052, "step": 1215 }, { "epoch": 0.09727805443891122, "grad_norm": 0.33852157550638545, "learning_rate": 9.990930343615854e-06, "loss": 0.2843, "step": 1216 }, { "epoch": 0.09735805283894322, "grad_norm": 0.3071865110051899, "learning_rate": 9.990891304256796e-06, "loss": 0.3034, "step": 1217 }, { "epoch": 0.09743805123897523, "grad_norm": 0.3543877929036405, "learning_rate": 9.990852181134323e-06, "loss": 0.3042, "step": 1218 }, { "epoch": 0.09751804963900722, "grad_norm": 0.3296751124860639, "learning_rate": 9.990812974249094e-06, "loss": 0.2936, "step": 1219 }, { "epoch": 0.09759804803903922, "grad_norm": 0.3319169403443758, "learning_rate": 9.990773683601764e-06, "loss": 0.281, "step": 1220 }, { "epoch": 0.09767804643907121, "grad_norm": 0.29435761612760664, "learning_rate": 9.990734309192995e-06, "loss": 0.3026, "step": 1221 }, { "epoch": 0.09775804483910322, "grad_norm": 0.3508696198780259, "learning_rate": 9.990694851023446e-06, "loss": 0.2991, "step": 1222 }, { "epoch": 0.09783804323913521, "grad_norm": 0.27849028589025154, "learning_rate": 9.99065530909378e-06, "loss": 0.3651, "step": 1223 }, { "epoch": 0.09791804163916722, "grad_norm": 0.24643777819752472, "learning_rate": 9.99061568340466e-06, "loss": 0.3446, "step": 1224 }, { "epoch": 0.09799804003919922, "grad_norm": 0.36466697956223487, "learning_rate": 9.990575973956754e-06, "loss": 0.2957, "step": 1225 }, { "epoch": 0.09807803843923121, "grad_norm": 0.28234719611897835, "learning_rate": 9.990536180750724e-06, "loss": 0.3128, "step": 1226 }, { "epoch": 0.09815803683926322, "grad_norm": 0.26571628888189464, "learning_rate": 9.990496303787243e-06, "loss": 0.3374, "step": 1227 }, { "epoch": 0.09823803523929521, "grad_norm": 0.3165685919933477, "learning_rate": 9.990456343066975e-06, "loss": 0.2846, "step": 1228 }, { "epoch": 0.09831803363932722, "grad_norm": 0.3037004282280437, "learning_rate": 9.990416298590593e-06, "loss": 0.297, "step": 1229 }, { "epoch": 0.0983980320393592, "grad_norm": 0.22884022513625024, "learning_rate": 9.990376170358769e-06, "loss": 0.3537, "step": 1230 }, { "epoch": 0.09847803043939121, "grad_norm": 0.3393243856563129, "learning_rate": 9.990335958372178e-06, "loss": 0.2936, "step": 1231 }, { "epoch": 0.09855802883942322, "grad_norm": 0.33740733112670124, "learning_rate": 9.99029566263149e-06, "loss": 0.3046, "step": 1232 }, { "epoch": 0.09863802723945521, "grad_norm": 0.39086042470060434, "learning_rate": 9.990255283137388e-06, "loss": 0.2935, "step": 1233 }, { "epoch": 0.09871802563948721, "grad_norm": 0.2825160437529858, "learning_rate": 9.990214819890545e-06, "loss": 0.3135, "step": 1234 }, { "epoch": 0.0987980240395192, "grad_norm": 0.3024014630499565, "learning_rate": 9.990174272891642e-06, "loss": 0.2969, "step": 1235 }, { "epoch": 0.09887802243955121, "grad_norm": 0.26399402582762344, "learning_rate": 9.990133642141359e-06, "loss": 0.3313, "step": 1236 }, { "epoch": 0.0989580208395832, "grad_norm": 0.2928909917195688, "learning_rate": 9.990092927640378e-06, "loss": 0.3032, "step": 1237 }, { "epoch": 0.09903801923961521, "grad_norm": 0.2875495531790397, "learning_rate": 9.99005212938938e-06, "loss": 0.3308, "step": 1238 }, { "epoch": 0.09911801763964721, "grad_norm": 0.3599689676409383, "learning_rate": 9.990011247389055e-06, "loss": 0.2725, "step": 1239 }, { "epoch": 0.0991980160396792, "grad_norm": 0.3040189589717307, "learning_rate": 9.989970281640085e-06, "loss": 0.2988, "step": 1240 }, { "epoch": 0.09927801443971121, "grad_norm": 0.3301827038386289, "learning_rate": 9.989929232143159e-06, "loss": 0.3195, "step": 1241 }, { "epoch": 0.0993580128397432, "grad_norm": 0.3317958817552823, "learning_rate": 9.989888098898965e-06, "loss": 0.2705, "step": 1242 }, { "epoch": 0.0994380112397752, "grad_norm": 0.293612853487226, "learning_rate": 9.989846881908194e-06, "loss": 0.2994, "step": 1243 }, { "epoch": 0.0995180096398072, "grad_norm": 0.31166233169067775, "learning_rate": 9.989805581171537e-06, "loss": 0.3215, "step": 1244 }, { "epoch": 0.0995980080398392, "grad_norm": 0.3352203043464999, "learning_rate": 9.98976419668969e-06, "loss": 0.3226, "step": 1245 }, { "epoch": 0.09967800643987121, "grad_norm": 0.313630090794967, "learning_rate": 9.989722728463345e-06, "loss": 0.3171, "step": 1246 }, { "epoch": 0.0997580048399032, "grad_norm": 0.2695007737475229, "learning_rate": 9.989681176493197e-06, "loss": 0.3466, "step": 1247 }, { "epoch": 0.0998380032399352, "grad_norm": 0.38569026874778967, "learning_rate": 9.989639540779945e-06, "loss": 0.2876, "step": 1248 }, { "epoch": 0.0999180016399672, "grad_norm": 0.35687023715211613, "learning_rate": 9.989597821324288e-06, "loss": 0.2975, "step": 1249 }, { "epoch": 0.0999980000399992, "grad_norm": 0.20107465899013952, "learning_rate": 9.989556018126925e-06, "loss": 0.3793, "step": 1250 }, { "epoch": 0.1000779984400312, "grad_norm": 0.3046596668292926, "learning_rate": 9.98951413118856e-06, "loss": 0.3289, "step": 1251 }, { "epoch": 0.1001579968400632, "grad_norm": 0.2611889258583932, "learning_rate": 9.989472160509892e-06, "loss": 0.3395, "step": 1252 }, { "epoch": 0.1002379952400952, "grad_norm": 0.3639602629968415, "learning_rate": 9.989430106091629e-06, "loss": 0.2988, "step": 1253 }, { "epoch": 0.1003179936401272, "grad_norm": 0.27403630401752826, "learning_rate": 9.989387967934477e-06, "loss": 0.305, "step": 1254 }, { "epoch": 0.1003979920401592, "grad_norm": 0.31924905918529256, "learning_rate": 9.989345746039138e-06, "loss": 0.3015, "step": 1255 }, { "epoch": 0.10047799044019119, "grad_norm": 0.32996067929703016, "learning_rate": 9.989303440406328e-06, "loss": 0.3147, "step": 1256 }, { "epoch": 0.1005579888402232, "grad_norm": 0.34996604942401544, "learning_rate": 9.989261051036752e-06, "loss": 0.3006, "step": 1257 }, { "epoch": 0.10063798724025519, "grad_norm": 0.2878509099106884, "learning_rate": 9.989218577931124e-06, "loss": 0.3324, "step": 1258 }, { "epoch": 0.1007179856402872, "grad_norm": 0.25956996621191714, "learning_rate": 9.989176021090155e-06, "loss": 0.3512, "step": 1259 }, { "epoch": 0.1007979840403192, "grad_norm": 0.5301139208275449, "learning_rate": 9.989133380514558e-06, "loss": 0.2907, "step": 1260 }, { "epoch": 0.10087798244035119, "grad_norm": 0.5293445331642519, "learning_rate": 9.989090656205052e-06, "loss": 0.281, "step": 1261 }, { "epoch": 0.1009579808403832, "grad_norm": 0.38544550742549455, "learning_rate": 9.989047848162353e-06, "loss": 0.3259, "step": 1262 }, { "epoch": 0.10103797924041519, "grad_norm": 0.2808493567214432, "learning_rate": 9.989004956387179e-06, "loss": 0.3577, "step": 1263 }, { "epoch": 0.10111797764044719, "grad_norm": 0.3167349638535576, "learning_rate": 9.98896198088025e-06, "loss": 0.3187, "step": 1264 }, { "epoch": 0.10119797604047918, "grad_norm": 0.20477675587648825, "learning_rate": 9.988918921642287e-06, "loss": 0.3642, "step": 1265 }, { "epoch": 0.10127797444051119, "grad_norm": 0.24129749910625356, "learning_rate": 9.988875778674014e-06, "loss": 0.3235, "step": 1266 }, { "epoch": 0.1013579728405432, "grad_norm": 0.3406247640971144, "learning_rate": 9.988832551976151e-06, "loss": 0.2857, "step": 1267 }, { "epoch": 0.10143797124057519, "grad_norm": 0.2583420632215183, "learning_rate": 9.988789241549429e-06, "loss": 0.3466, "step": 1268 }, { "epoch": 0.10151796964060719, "grad_norm": 0.39769910501089345, "learning_rate": 9.988745847394572e-06, "loss": 0.2969, "step": 1269 }, { "epoch": 0.10159796804063918, "grad_norm": 0.23638316246652688, "learning_rate": 9.98870236951231e-06, "loss": 0.3511, "step": 1270 }, { "epoch": 0.10167796644067119, "grad_norm": 0.2961384507963859, "learning_rate": 9.988658807903369e-06, "loss": 0.3383, "step": 1271 }, { "epoch": 0.10175796484070318, "grad_norm": 0.34816728036944666, "learning_rate": 9.988615162568483e-06, "loss": 0.3226, "step": 1272 }, { "epoch": 0.10183796324073519, "grad_norm": 0.6701901856513401, "learning_rate": 9.988571433508383e-06, "loss": 0.3058, "step": 1273 }, { "epoch": 0.10191796164076719, "grad_norm": 0.35191364392506774, "learning_rate": 9.988527620723804e-06, "loss": 0.3205, "step": 1274 }, { "epoch": 0.10199796004079918, "grad_norm": 0.4927961845676967, "learning_rate": 9.988483724215483e-06, "loss": 0.3149, "step": 1275 }, { "epoch": 0.10207795844083119, "grad_norm": 0.3857376980686955, "learning_rate": 9.988439743984155e-06, "loss": 0.2871, "step": 1276 }, { "epoch": 0.10215795684086318, "grad_norm": 0.30136918542905744, "learning_rate": 9.988395680030556e-06, "loss": 0.3063, "step": 1277 }, { "epoch": 0.10223795524089518, "grad_norm": 0.31694495102436554, "learning_rate": 9.988351532355428e-06, "loss": 0.3365, "step": 1278 }, { "epoch": 0.10231795364092718, "grad_norm": 0.3050659818145193, "learning_rate": 9.988307300959513e-06, "loss": 0.3163, "step": 1279 }, { "epoch": 0.10239795204095918, "grad_norm": 0.2868988167797236, "learning_rate": 9.988262985843551e-06, "loss": 0.312, "step": 1280 }, { "epoch": 0.10247795044099119, "grad_norm": 0.2936633728806369, "learning_rate": 9.988218587008287e-06, "loss": 0.3031, "step": 1281 }, { "epoch": 0.10255794884102318, "grad_norm": 0.30562312034708167, "learning_rate": 9.988174104454466e-06, "loss": 0.2953, "step": 1282 }, { "epoch": 0.10263794724105518, "grad_norm": 0.30098613252972745, "learning_rate": 9.988129538182833e-06, "loss": 0.3101, "step": 1283 }, { "epoch": 0.10271794564108717, "grad_norm": 0.3439752151035775, "learning_rate": 9.988084888194139e-06, "loss": 0.3316, "step": 1284 }, { "epoch": 0.10279794404111918, "grad_norm": 0.2290340858711675, "learning_rate": 9.98804015448913e-06, "loss": 0.3739, "step": 1285 }, { "epoch": 0.10287794244115117, "grad_norm": 0.30234721270267423, "learning_rate": 9.98799533706856e-06, "loss": 0.3108, "step": 1286 }, { "epoch": 0.10295794084118318, "grad_norm": 0.3167429964534761, "learning_rate": 9.987950435933179e-06, "loss": 0.3055, "step": 1287 }, { "epoch": 0.10303793924121518, "grad_norm": 0.35936923207854876, "learning_rate": 9.987905451083742e-06, "loss": 0.2913, "step": 1288 }, { "epoch": 0.10311793764124717, "grad_norm": 0.33185189738148346, "learning_rate": 9.987860382521003e-06, "loss": 0.2946, "step": 1289 }, { "epoch": 0.10319793604127918, "grad_norm": 0.317909989613041, "learning_rate": 9.987815230245717e-06, "loss": 0.3149, "step": 1290 }, { "epoch": 0.10327793444131117, "grad_norm": 0.3385313019442595, "learning_rate": 9.987769994258645e-06, "loss": 0.2729, "step": 1291 }, { "epoch": 0.10335793284134318, "grad_norm": 0.2854576039475464, "learning_rate": 9.987724674560544e-06, "loss": 0.3226, "step": 1292 }, { "epoch": 0.10343793124137517, "grad_norm": 0.3442677702237156, "learning_rate": 9.987679271152175e-06, "loss": 0.2935, "step": 1293 }, { "epoch": 0.10351792964140717, "grad_norm": 0.34197836726400965, "learning_rate": 9.9876337840343e-06, "loss": 0.3077, "step": 1294 }, { "epoch": 0.10359792804143918, "grad_norm": 0.29507960965647795, "learning_rate": 9.987588213207684e-06, "loss": 0.3307, "step": 1295 }, { "epoch": 0.10367792644147117, "grad_norm": 0.25240740382005794, "learning_rate": 9.98754255867309e-06, "loss": 0.3734, "step": 1296 }, { "epoch": 0.10375792484150317, "grad_norm": 0.3974352571139531, "learning_rate": 9.987496820431284e-06, "loss": 0.2913, "step": 1297 }, { "epoch": 0.10383792324153517, "grad_norm": 0.2937148592539245, "learning_rate": 9.987450998483035e-06, "loss": 0.3165, "step": 1298 }, { "epoch": 0.10391792164156717, "grad_norm": 0.35243267867548045, "learning_rate": 9.987405092829113e-06, "loss": 0.352, "step": 1299 }, { "epoch": 0.10399792004159916, "grad_norm": 0.3370132406639814, "learning_rate": 9.987359103470284e-06, "loss": 0.2804, "step": 1300 }, { "epoch": 0.10407791844163117, "grad_norm": 0.3370863623867093, "learning_rate": 9.987313030407325e-06, "loss": 0.315, "step": 1301 }, { "epoch": 0.10415791684166317, "grad_norm": 0.29846961511591147, "learning_rate": 9.987266873641005e-06, "loss": 0.3393, "step": 1302 }, { "epoch": 0.10423791524169516, "grad_norm": 0.2655617079971374, "learning_rate": 9.987220633172101e-06, "loss": 0.3483, "step": 1303 }, { "epoch": 0.10431791364172717, "grad_norm": 0.3039496905169994, "learning_rate": 9.987174309001389e-06, "loss": 0.3167, "step": 1304 }, { "epoch": 0.10439791204175916, "grad_norm": 0.8820324191915728, "learning_rate": 9.987127901129647e-06, "loss": 0.324, "step": 1305 }, { "epoch": 0.10447791044179117, "grad_norm": 0.2690644999703079, "learning_rate": 9.987081409557653e-06, "loss": 0.3436, "step": 1306 }, { "epoch": 0.10455790884182316, "grad_norm": 0.42625475439059285, "learning_rate": 9.987034834286186e-06, "loss": 0.2909, "step": 1307 }, { "epoch": 0.10463790724185516, "grad_norm": 0.30301684141354085, "learning_rate": 9.98698817531603e-06, "loss": 0.3169, "step": 1308 }, { "epoch": 0.10471790564188717, "grad_norm": 0.35746521313791585, "learning_rate": 9.986941432647968e-06, "loss": 0.353, "step": 1309 }, { "epoch": 0.10479790404191916, "grad_norm": 0.3444488962456452, "learning_rate": 9.986894606282781e-06, "loss": 0.3015, "step": 1310 }, { "epoch": 0.10487790244195117, "grad_norm": 0.2599942598559724, "learning_rate": 9.98684769622126e-06, "loss": 0.3412, "step": 1311 }, { "epoch": 0.10495790084198316, "grad_norm": 0.45244421192262807, "learning_rate": 9.986800702464188e-06, "loss": 0.3179, "step": 1312 }, { "epoch": 0.10503789924201516, "grad_norm": 0.28006695860917663, "learning_rate": 9.986753625012358e-06, "loss": 0.2973, "step": 1313 }, { "epoch": 0.10511789764204715, "grad_norm": 0.3071198583963096, "learning_rate": 9.986706463866555e-06, "loss": 0.2926, "step": 1314 }, { "epoch": 0.10519789604207916, "grad_norm": 0.3165090994527639, "learning_rate": 9.986659219027575e-06, "loss": 0.2932, "step": 1315 }, { "epoch": 0.10527789444211116, "grad_norm": 0.26672877344909224, "learning_rate": 9.986611890496207e-06, "loss": 0.3505, "step": 1316 }, { "epoch": 0.10535789284214316, "grad_norm": 0.2895156775431847, "learning_rate": 9.986564478273249e-06, "loss": 0.3298, "step": 1317 }, { "epoch": 0.10543789124217516, "grad_norm": 0.2799604794561796, "learning_rate": 9.986516982359495e-06, "loss": 0.3032, "step": 1318 }, { "epoch": 0.10551788964220715, "grad_norm": 0.37240292853299173, "learning_rate": 9.986469402755742e-06, "loss": 0.3039, "step": 1319 }, { "epoch": 0.10559788804223916, "grad_norm": 0.272665775076525, "learning_rate": 9.986421739462787e-06, "loss": 0.365, "step": 1320 }, { "epoch": 0.10567788644227115, "grad_norm": 0.29670359182274086, "learning_rate": 9.986373992481434e-06, "loss": 0.3136, "step": 1321 }, { "epoch": 0.10575788484230315, "grad_norm": 0.21156399629307512, "learning_rate": 9.986326161812482e-06, "loss": 0.364, "step": 1322 }, { "epoch": 0.10583788324233516, "grad_norm": 0.2635861252657959, "learning_rate": 9.986278247456735e-06, "loss": 0.319, "step": 1323 }, { "epoch": 0.10591788164236715, "grad_norm": 0.31265765705063625, "learning_rate": 9.986230249414994e-06, "loss": 0.3545, "step": 1324 }, { "epoch": 0.10599788004239916, "grad_norm": 0.25935565466178173, "learning_rate": 9.986182167688066e-06, "loss": 0.3392, "step": 1325 }, { "epoch": 0.10607787844243115, "grad_norm": 0.2652829491049044, "learning_rate": 9.98613400227676e-06, "loss": 0.3271, "step": 1326 }, { "epoch": 0.10615787684246315, "grad_norm": 0.42698749029613575, "learning_rate": 9.986085753181883e-06, "loss": 0.3037, "step": 1327 }, { "epoch": 0.10623787524249514, "grad_norm": 0.29384866440506857, "learning_rate": 9.986037420404244e-06, "loss": 0.3019, "step": 1328 }, { "epoch": 0.10631787364252715, "grad_norm": 0.5075400258226809, "learning_rate": 9.985989003944655e-06, "loss": 0.2607, "step": 1329 }, { "epoch": 0.10639787204255916, "grad_norm": 0.3039461273386364, "learning_rate": 9.985940503803928e-06, "loss": 0.3111, "step": 1330 }, { "epoch": 0.10647787044259115, "grad_norm": 0.33458028149626645, "learning_rate": 9.985891919982878e-06, "loss": 0.2939, "step": 1331 }, { "epoch": 0.10655786884262315, "grad_norm": 0.4022390686572386, "learning_rate": 9.98584325248232e-06, "loss": 0.2909, "step": 1332 }, { "epoch": 0.10663786724265514, "grad_norm": 0.2877003106087541, "learning_rate": 9.98579450130307e-06, "loss": 0.2898, "step": 1333 }, { "epoch": 0.10671786564268715, "grad_norm": 0.34474511312778466, "learning_rate": 9.985745666445948e-06, "loss": 0.2765, "step": 1334 }, { "epoch": 0.10679786404271914, "grad_norm": 0.32283272866703494, "learning_rate": 9.98569674791177e-06, "loss": 0.2855, "step": 1335 }, { "epoch": 0.10687786244275115, "grad_norm": 1.1229365030885068, "learning_rate": 9.985647745701362e-06, "loss": 0.3144, "step": 1336 }, { "epoch": 0.10695786084278315, "grad_norm": 0.3347897688339787, "learning_rate": 9.985598659815543e-06, "loss": 0.272, "step": 1337 }, { "epoch": 0.10703785924281514, "grad_norm": 0.3254276245874741, "learning_rate": 9.985549490255138e-06, "loss": 0.2892, "step": 1338 }, { "epoch": 0.10711785764284715, "grad_norm": 0.33438608112543416, "learning_rate": 9.985500237020972e-06, "loss": 0.2889, "step": 1339 }, { "epoch": 0.10719785604287914, "grad_norm": 0.2872012281666103, "learning_rate": 9.98545090011387e-06, "loss": 0.3263, "step": 1340 }, { "epoch": 0.10727785444291114, "grad_norm": 0.31516981903686203, "learning_rate": 9.985401479534664e-06, "loss": 0.3241, "step": 1341 }, { "epoch": 0.10735785284294314, "grad_norm": 0.3979255205966222, "learning_rate": 9.98535197528418e-06, "loss": 0.3172, "step": 1342 }, { "epoch": 0.10743785124297514, "grad_norm": 0.3311000065107302, "learning_rate": 9.985302387363249e-06, "loss": 0.2856, "step": 1343 }, { "epoch": 0.10751784964300715, "grad_norm": 0.33735574769190096, "learning_rate": 9.985252715772705e-06, "loss": 0.2927, "step": 1344 }, { "epoch": 0.10759784804303914, "grad_norm": 0.2836524534655475, "learning_rate": 9.985202960513381e-06, "loss": 0.3041, "step": 1345 }, { "epoch": 0.10767784644307114, "grad_norm": 0.418141294242237, "learning_rate": 9.985153121586111e-06, "loss": 0.2725, "step": 1346 }, { "epoch": 0.10775784484310313, "grad_norm": 0.4360924370905652, "learning_rate": 9.985103198991733e-06, "loss": 0.3061, "step": 1347 }, { "epoch": 0.10783784324313514, "grad_norm": 0.31317808664606367, "learning_rate": 9.985053192731085e-06, "loss": 0.3226, "step": 1348 }, { "epoch": 0.10791784164316713, "grad_norm": 0.35114821662992646, "learning_rate": 9.985003102805004e-06, "loss": 0.3003, "step": 1349 }, { "epoch": 0.10799784004319914, "grad_norm": 0.34346522816141983, "learning_rate": 9.98495292921433e-06, "loss": 0.2976, "step": 1350 }, { "epoch": 0.10807783844323114, "grad_norm": 0.2841215362063792, "learning_rate": 9.984902671959911e-06, "loss": 0.3243, "step": 1351 }, { "epoch": 0.10815783684326313, "grad_norm": 0.34871208125747283, "learning_rate": 9.984852331042585e-06, "loss": 0.2795, "step": 1352 }, { "epoch": 0.10823783524329514, "grad_norm": 0.3335832276308041, "learning_rate": 9.984801906463199e-06, "loss": 0.2786, "step": 1353 }, { "epoch": 0.10831783364332713, "grad_norm": 0.2263235448444128, "learning_rate": 9.984751398222598e-06, "loss": 0.369, "step": 1354 }, { "epoch": 0.10839783204335914, "grad_norm": 0.3579014521990015, "learning_rate": 9.984700806321631e-06, "loss": 0.3142, "step": 1355 }, { "epoch": 0.10847783044339113, "grad_norm": 0.3524676693619381, "learning_rate": 9.984650130761146e-06, "loss": 0.2998, "step": 1356 }, { "epoch": 0.10855782884342313, "grad_norm": 0.3354042591208965, "learning_rate": 9.984599371541995e-06, "loss": 0.294, "step": 1357 }, { "epoch": 0.10863782724345514, "grad_norm": 0.358227469252063, "learning_rate": 9.984548528665028e-06, "loss": 0.2949, "step": 1358 }, { "epoch": 0.10871782564348713, "grad_norm": 0.3380515846468206, "learning_rate": 9.984497602131101e-06, "loss": 0.2934, "step": 1359 }, { "epoch": 0.10879782404351913, "grad_norm": 0.3470165351129875, "learning_rate": 9.984446591941065e-06, "loss": 0.2825, "step": 1360 }, { "epoch": 0.10887782244355113, "grad_norm": 0.30430799180903056, "learning_rate": 9.98439549809578e-06, "loss": 0.3155, "step": 1361 }, { "epoch": 0.10895782084358313, "grad_norm": 0.2786739661600301, "learning_rate": 9.984344320596103e-06, "loss": 0.3539, "step": 1362 }, { "epoch": 0.10903781924361512, "grad_norm": 0.2798544229523909, "learning_rate": 9.984293059442888e-06, "loss": 0.3413, "step": 1363 }, { "epoch": 0.10911781764364713, "grad_norm": 0.3882108173734194, "learning_rate": 9.984241714636999e-06, "loss": 0.3238, "step": 1364 }, { "epoch": 0.10919781604367913, "grad_norm": 0.35321414537182727, "learning_rate": 9.9841902861793e-06, "loss": 0.2991, "step": 1365 }, { "epoch": 0.10927781444371112, "grad_norm": 0.2834593596321515, "learning_rate": 9.984138774070651e-06, "loss": 0.2955, "step": 1366 }, { "epoch": 0.10935781284374313, "grad_norm": 0.3071572663719183, "learning_rate": 9.984087178311917e-06, "loss": 0.314, "step": 1367 }, { "epoch": 0.10943781124377512, "grad_norm": 0.3451213092158988, "learning_rate": 9.984035498903965e-06, "loss": 0.2773, "step": 1368 }, { "epoch": 0.10951780964380713, "grad_norm": 0.3384903614165167, "learning_rate": 9.98398373584766e-06, "loss": 0.2929, "step": 1369 }, { "epoch": 0.10959780804383912, "grad_norm": 0.3389986485063915, "learning_rate": 9.983931889143874e-06, "loss": 0.28, "step": 1370 }, { "epoch": 0.10967780644387112, "grad_norm": 0.2926365930347929, "learning_rate": 9.983879958793476e-06, "loss": 0.3217, "step": 1371 }, { "epoch": 0.10975780484390313, "grad_norm": 0.3350498945875562, "learning_rate": 9.983827944797336e-06, "loss": 0.3051, "step": 1372 }, { "epoch": 0.10983780324393512, "grad_norm": 0.2577098138025434, "learning_rate": 9.983775847156327e-06, "loss": 0.3572, "step": 1373 }, { "epoch": 0.10991780164396713, "grad_norm": 0.30659277855840583, "learning_rate": 9.983723665871326e-06, "loss": 0.3033, "step": 1374 }, { "epoch": 0.10999780004399912, "grad_norm": 0.33947810651758475, "learning_rate": 9.983671400943206e-06, "loss": 0.2825, "step": 1375 }, { "epoch": 0.11007779844403112, "grad_norm": 0.2923985072662539, "learning_rate": 9.983619052372847e-06, "loss": 0.3018, "step": 1376 }, { "epoch": 0.11015779684406311, "grad_norm": 0.3760544257107967, "learning_rate": 9.983566620161126e-06, "loss": 0.2711, "step": 1377 }, { "epoch": 0.11023779524409512, "grad_norm": 0.30763794397167393, "learning_rate": 9.983514104308923e-06, "loss": 0.3189, "step": 1378 }, { "epoch": 0.11031779364412712, "grad_norm": 0.2945578848786041, "learning_rate": 9.983461504817119e-06, "loss": 0.3359, "step": 1379 }, { "epoch": 0.11039779204415912, "grad_norm": 0.34270587721575296, "learning_rate": 9.983408821686596e-06, "loss": 0.3107, "step": 1380 }, { "epoch": 0.11047779044419112, "grad_norm": 0.26706071138871557, "learning_rate": 9.983356054918238e-06, "loss": 0.35, "step": 1381 }, { "epoch": 0.11055778884422311, "grad_norm": 0.22567891958630917, "learning_rate": 9.983303204512935e-06, "loss": 0.3599, "step": 1382 }, { "epoch": 0.11063778724425512, "grad_norm": 0.33980870107461, "learning_rate": 9.983250270471569e-06, "loss": 0.2793, "step": 1383 }, { "epoch": 0.11071778564428711, "grad_norm": 0.2911898495649174, "learning_rate": 9.983197252795031e-06, "loss": 0.3477, "step": 1384 }, { "epoch": 0.11079778404431911, "grad_norm": 0.2972147659998989, "learning_rate": 9.98314415148421e-06, "loss": 0.3156, "step": 1385 }, { "epoch": 0.11087778244435112, "grad_norm": 0.33817898180635353, "learning_rate": 9.98309096654e-06, "loss": 0.2728, "step": 1386 }, { "epoch": 0.11095778084438311, "grad_norm": 0.4474019938067382, "learning_rate": 9.983037697963287e-06, "loss": 0.3085, "step": 1387 }, { "epoch": 0.11103777924441512, "grad_norm": 0.28548989785634743, "learning_rate": 9.982984345754972e-06, "loss": 0.3134, "step": 1388 }, { "epoch": 0.11111777764444711, "grad_norm": 0.3399075028099732, "learning_rate": 9.982930909915944e-06, "loss": 0.3351, "step": 1389 }, { "epoch": 0.11119777604447911, "grad_norm": 0.29761108577632617, "learning_rate": 9.982877390447106e-06, "loss": 0.2727, "step": 1390 }, { "epoch": 0.1112777744445111, "grad_norm": 0.26276528135687366, "learning_rate": 9.982823787349352e-06, "loss": 0.33, "step": 1391 }, { "epoch": 0.11135777284454311, "grad_norm": 0.279946862441137, "learning_rate": 9.982770100623584e-06, "loss": 0.3112, "step": 1392 }, { "epoch": 0.11143777124457512, "grad_norm": 0.2718546689068045, "learning_rate": 9.982716330270701e-06, "loss": 0.2948, "step": 1393 }, { "epoch": 0.1115177696446071, "grad_norm": 0.30181062053838537, "learning_rate": 9.98266247629161e-06, "loss": 0.2862, "step": 1394 }, { "epoch": 0.11159776804463911, "grad_norm": 0.24667720148422817, "learning_rate": 9.982608538687208e-06, "loss": 0.3263, "step": 1395 }, { "epoch": 0.1116777664446711, "grad_norm": 0.327123223374991, "learning_rate": 9.982554517458403e-06, "loss": 0.3201, "step": 1396 }, { "epoch": 0.11175776484470311, "grad_norm": 0.43777605250179946, "learning_rate": 9.982500412606105e-06, "loss": 0.2834, "step": 1397 }, { "epoch": 0.1118377632447351, "grad_norm": 0.26571596143566883, "learning_rate": 9.982446224131217e-06, "loss": 0.3378, "step": 1398 }, { "epoch": 0.1119177616447671, "grad_norm": 0.2568340781964374, "learning_rate": 9.982391952034653e-06, "loss": 0.3358, "step": 1399 }, { "epoch": 0.11199776004479911, "grad_norm": 0.24132138161118302, "learning_rate": 9.982337596317321e-06, "loss": 0.3411, "step": 1400 }, { "epoch": 0.1120777584448311, "grad_norm": 0.2643760362831179, "learning_rate": 9.982283156980133e-06, "loss": 0.3351, "step": 1401 }, { "epoch": 0.11215775684486311, "grad_norm": 0.26961492781031765, "learning_rate": 9.982228634024004e-06, "loss": 0.3578, "step": 1402 }, { "epoch": 0.1122377552448951, "grad_norm": 0.3113643028930828, "learning_rate": 9.982174027449849e-06, "loss": 0.2781, "step": 1403 }, { "epoch": 0.1123177536449271, "grad_norm": 0.3152292197825255, "learning_rate": 9.982119337258585e-06, "loss": 0.3184, "step": 1404 }, { "epoch": 0.1123977520449591, "grad_norm": 0.2578151671721826, "learning_rate": 9.982064563451128e-06, "loss": 0.3374, "step": 1405 }, { "epoch": 0.1124777504449911, "grad_norm": 0.33190529764117943, "learning_rate": 9.982009706028399e-06, "loss": 0.2964, "step": 1406 }, { "epoch": 0.1125577488450231, "grad_norm": 0.3266734397325813, "learning_rate": 9.981954764991318e-06, "loss": 0.2819, "step": 1407 }, { "epoch": 0.1126377472450551, "grad_norm": 0.2968811814987577, "learning_rate": 9.981899740340807e-06, "loss": 0.315, "step": 1408 }, { "epoch": 0.1127177456450871, "grad_norm": 0.31673607049427976, "learning_rate": 9.981844632077788e-06, "loss": 0.2664, "step": 1409 }, { "epoch": 0.1127977440451191, "grad_norm": 0.27705925859633873, "learning_rate": 9.98178944020319e-06, "loss": 0.3423, "step": 1410 }, { "epoch": 0.1128777424451511, "grad_norm": 0.32536013581500256, "learning_rate": 9.981734164717936e-06, "loss": 0.303, "step": 1411 }, { "epoch": 0.11295774084518309, "grad_norm": 0.2706952881579151, "learning_rate": 9.981678805622954e-06, "loss": 0.2986, "step": 1412 }, { "epoch": 0.1130377392452151, "grad_norm": 0.3247750360500546, "learning_rate": 9.981623362919173e-06, "loss": 0.3074, "step": 1413 }, { "epoch": 0.1131177376452471, "grad_norm": 0.3245698134320572, "learning_rate": 9.981567836607526e-06, "loss": 0.2818, "step": 1414 }, { "epoch": 0.1131977360452791, "grad_norm": 0.31380831107247337, "learning_rate": 9.981512226688943e-06, "loss": 0.2638, "step": 1415 }, { "epoch": 0.1132777344453111, "grad_norm": 0.3302184240510599, "learning_rate": 9.981456533164356e-06, "loss": 0.2896, "step": 1416 }, { "epoch": 0.11335773284534309, "grad_norm": 0.2703846217199556, "learning_rate": 9.981400756034701e-06, "loss": 0.3108, "step": 1417 }, { "epoch": 0.1134377312453751, "grad_norm": 0.29058278083226297, "learning_rate": 9.981344895300916e-06, "loss": 0.3085, "step": 1418 }, { "epoch": 0.11351772964540709, "grad_norm": 0.3204833693640965, "learning_rate": 9.981288950963935e-06, "loss": 0.2898, "step": 1419 }, { "epoch": 0.11359772804543909, "grad_norm": 0.29668528463685284, "learning_rate": 9.981232923024699e-06, "loss": 0.2979, "step": 1420 }, { "epoch": 0.1136777264454711, "grad_norm": 0.24285224028759425, "learning_rate": 9.981176811484148e-06, "loss": 0.3691, "step": 1421 }, { "epoch": 0.11375772484550309, "grad_norm": 0.3158911302976063, "learning_rate": 9.981120616343222e-06, "loss": 0.2951, "step": 1422 }, { "epoch": 0.1138377232455351, "grad_norm": 0.3637552199024051, "learning_rate": 9.981064337602869e-06, "loss": 0.2922, "step": 1423 }, { "epoch": 0.11391772164556709, "grad_norm": 0.32651378336051123, "learning_rate": 9.981007975264029e-06, "loss": 0.3062, "step": 1424 }, { "epoch": 0.11399772004559909, "grad_norm": 0.32003476616525206, "learning_rate": 9.980951529327649e-06, "loss": 0.2997, "step": 1425 }, { "epoch": 0.11407771844563108, "grad_norm": 0.29555774476124724, "learning_rate": 9.980894999794678e-06, "loss": 0.3205, "step": 1426 }, { "epoch": 0.11415771684566309, "grad_norm": 0.2890746054574132, "learning_rate": 9.980838386666063e-06, "loss": 0.2948, "step": 1427 }, { "epoch": 0.11423771524569509, "grad_norm": 0.3458946919176639, "learning_rate": 9.980781689942753e-06, "loss": 0.3197, "step": 1428 }, { "epoch": 0.11431771364572708, "grad_norm": 0.2990655353141574, "learning_rate": 9.980724909625704e-06, "loss": 0.2949, "step": 1429 }, { "epoch": 0.11439771204575909, "grad_norm": 0.3337600702981667, "learning_rate": 9.980668045715864e-06, "loss": 0.2937, "step": 1430 }, { "epoch": 0.11447771044579108, "grad_norm": 0.3486863418960651, "learning_rate": 9.98061109821419e-06, "loss": 0.2714, "step": 1431 }, { "epoch": 0.11455770884582309, "grad_norm": 0.3015139990742092, "learning_rate": 9.980554067121637e-06, "loss": 0.3126, "step": 1432 }, { "epoch": 0.11463770724585508, "grad_norm": 0.5036819319033887, "learning_rate": 9.980496952439162e-06, "loss": 0.3147, "step": 1433 }, { "epoch": 0.11471770564588708, "grad_norm": 0.2854939014908416, "learning_rate": 9.980439754167723e-06, "loss": 0.3046, "step": 1434 }, { "epoch": 0.11479770404591909, "grad_norm": 0.3386204937853727, "learning_rate": 9.980382472308283e-06, "loss": 0.2793, "step": 1435 }, { "epoch": 0.11487770244595108, "grad_norm": 0.45164165461291966, "learning_rate": 9.980325106861802e-06, "loss": 0.281, "step": 1436 }, { "epoch": 0.11495770084598309, "grad_norm": 0.3501993025413012, "learning_rate": 9.980267657829241e-06, "loss": 0.301, "step": 1437 }, { "epoch": 0.11503769924601508, "grad_norm": 0.3087839880243543, "learning_rate": 9.980210125211565e-06, "loss": 0.3071, "step": 1438 }, { "epoch": 0.11511769764604708, "grad_norm": 0.24451092987643727, "learning_rate": 9.98015250900974e-06, "loss": 0.3576, "step": 1439 }, { "epoch": 0.11519769604607907, "grad_norm": 0.33312836101539495, "learning_rate": 9.980094809224732e-06, "loss": 0.286, "step": 1440 }, { "epoch": 0.11527769444611108, "grad_norm": 0.2901214304728016, "learning_rate": 9.980037025857511e-06, "loss": 0.2946, "step": 1441 }, { "epoch": 0.11535769284614308, "grad_norm": 0.28985762073093546, "learning_rate": 9.979979158909046e-06, "loss": 0.3042, "step": 1442 }, { "epoch": 0.11543769124617508, "grad_norm": 0.36106296513149383, "learning_rate": 9.979921208380308e-06, "loss": 0.307, "step": 1443 }, { "epoch": 0.11551768964620708, "grad_norm": 0.28809511390562903, "learning_rate": 9.979863174272272e-06, "loss": 0.326, "step": 1444 }, { "epoch": 0.11559768804623907, "grad_norm": 0.32190326401354225, "learning_rate": 9.979805056585907e-06, "loss": 0.2967, "step": 1445 }, { "epoch": 0.11567768644627108, "grad_norm": 0.3008408052431682, "learning_rate": 9.979746855322192e-06, "loss": 0.3279, "step": 1446 }, { "epoch": 0.11575768484630307, "grad_norm": 0.2499216574047745, "learning_rate": 9.979688570482102e-06, "loss": 0.3503, "step": 1447 }, { "epoch": 0.11583768324633507, "grad_norm": 0.28669117049920684, "learning_rate": 9.979630202066619e-06, "loss": 0.3148, "step": 1448 }, { "epoch": 0.11591768164636708, "grad_norm": 0.4187697294160275, "learning_rate": 9.979571750076717e-06, "loss": 0.28, "step": 1449 }, { "epoch": 0.11599768004639907, "grad_norm": 0.3572209385508511, "learning_rate": 9.979513214513381e-06, "loss": 0.3283, "step": 1450 }, { "epoch": 0.11607767844643108, "grad_norm": 0.30147665738404217, "learning_rate": 9.979454595377594e-06, "loss": 0.3227, "step": 1451 }, { "epoch": 0.11615767684646307, "grad_norm": 0.36225540456146976, "learning_rate": 9.979395892670336e-06, "loss": 0.319, "step": 1452 }, { "epoch": 0.11623767524649507, "grad_norm": 0.3070999353893005, "learning_rate": 9.979337106392596e-06, "loss": 0.2935, "step": 1453 }, { "epoch": 0.11631767364652706, "grad_norm": 0.5279769431102683, "learning_rate": 9.979278236545356e-06, "loss": 0.317, "step": 1454 }, { "epoch": 0.11639767204655907, "grad_norm": 0.3450699672279452, "learning_rate": 9.97921928312961e-06, "loss": 0.2873, "step": 1455 }, { "epoch": 0.11647767044659108, "grad_norm": 0.34255939265265894, "learning_rate": 9.979160246146343e-06, "loss": 0.323, "step": 1456 }, { "epoch": 0.11655766884662307, "grad_norm": 0.2902131464997649, "learning_rate": 9.979101125596548e-06, "loss": 0.3152, "step": 1457 }, { "epoch": 0.11663766724665507, "grad_norm": 0.37785094994138085, "learning_rate": 9.979041921481217e-06, "loss": 0.3129, "step": 1458 }, { "epoch": 0.11671766564668706, "grad_norm": 0.3302957376558734, "learning_rate": 9.978982633801342e-06, "loss": 0.3085, "step": 1459 }, { "epoch": 0.11679766404671907, "grad_norm": 0.3112910081678639, "learning_rate": 9.978923262557918e-06, "loss": 0.3209, "step": 1460 }, { "epoch": 0.11687766244675106, "grad_norm": 0.34660039058612774, "learning_rate": 9.978863807751944e-06, "loss": 0.3399, "step": 1461 }, { "epoch": 0.11695766084678307, "grad_norm": 0.32229968929426706, "learning_rate": 9.978804269384417e-06, "loss": 0.3208, "step": 1462 }, { "epoch": 0.11703765924681507, "grad_norm": 0.34257960233020696, "learning_rate": 9.978744647456335e-06, "loss": 0.279, "step": 1463 }, { "epoch": 0.11711765764684706, "grad_norm": 0.34136657588100766, "learning_rate": 9.9786849419687e-06, "loss": 0.2811, "step": 1464 }, { "epoch": 0.11719765604687907, "grad_norm": 0.3211573022383478, "learning_rate": 9.978625152922511e-06, "loss": 0.2812, "step": 1465 }, { "epoch": 0.11727765444691106, "grad_norm": 0.3301161313973079, "learning_rate": 9.978565280318777e-06, "loss": 0.2868, "step": 1466 }, { "epoch": 0.11735765284694306, "grad_norm": 0.38159317546514854, "learning_rate": 9.978505324158499e-06, "loss": 0.2893, "step": 1467 }, { "epoch": 0.11743765124697506, "grad_norm": 0.30024634169715086, "learning_rate": 9.978445284442684e-06, "loss": 0.3197, "step": 1468 }, { "epoch": 0.11751764964700706, "grad_norm": 0.26133989016953835, "learning_rate": 9.97838516117234e-06, "loss": 0.3684, "step": 1469 }, { "epoch": 0.11759764804703907, "grad_norm": 0.35906397718844013, "learning_rate": 9.978324954348472e-06, "loss": 0.2825, "step": 1470 }, { "epoch": 0.11767764644707106, "grad_norm": 0.3286317533720119, "learning_rate": 9.978264663972099e-06, "loss": 0.2907, "step": 1471 }, { "epoch": 0.11775764484710306, "grad_norm": 0.26986448589140516, "learning_rate": 9.978204290044225e-06, "loss": 0.3237, "step": 1472 }, { "epoch": 0.11783764324713505, "grad_norm": 0.3232349921602911, "learning_rate": 9.978143832565868e-06, "loss": 0.295, "step": 1473 }, { "epoch": 0.11791764164716706, "grad_norm": 0.3657146918647761, "learning_rate": 9.978083291538041e-06, "loss": 0.2783, "step": 1474 }, { "epoch": 0.11799764004719905, "grad_norm": 0.2771884573992658, "learning_rate": 9.97802266696176e-06, "loss": 0.2884, "step": 1475 }, { "epoch": 0.11807763844723106, "grad_norm": 0.34310011874585267, "learning_rate": 9.97796195883804e-06, "loss": 0.2897, "step": 1476 }, { "epoch": 0.11815763684726305, "grad_norm": 0.2435888730935232, "learning_rate": 9.977901167167904e-06, "loss": 0.3544, "step": 1477 }, { "epoch": 0.11823763524729505, "grad_norm": 0.3103673532728682, "learning_rate": 9.977840291952373e-06, "loss": 0.3291, "step": 1478 }, { "epoch": 0.11831763364732706, "grad_norm": 0.317614922138713, "learning_rate": 9.977779333192464e-06, "loss": 0.311, "step": 1479 }, { "epoch": 0.11839763204735905, "grad_norm": 0.3271110334014837, "learning_rate": 9.977718290889202e-06, "loss": 0.2923, "step": 1480 }, { "epoch": 0.11847763044739106, "grad_norm": 0.31018444377527504, "learning_rate": 9.977657165043613e-06, "loss": 0.3029, "step": 1481 }, { "epoch": 0.11855762884742305, "grad_norm": 0.32276842556863816, "learning_rate": 9.977595955656722e-06, "loss": 0.2833, "step": 1482 }, { "epoch": 0.11863762724745505, "grad_norm": 0.2929831885525505, "learning_rate": 9.977534662729556e-06, "loss": 0.3103, "step": 1483 }, { "epoch": 0.11871762564748704, "grad_norm": 0.2793754796056116, "learning_rate": 9.977473286263145e-06, "loss": 0.2848, "step": 1484 }, { "epoch": 0.11879762404751905, "grad_norm": 0.28825136415120856, "learning_rate": 9.977411826258516e-06, "loss": 0.3111, "step": 1485 }, { "epoch": 0.11887762244755105, "grad_norm": 0.2800849463181504, "learning_rate": 9.977350282716703e-06, "loss": 0.317, "step": 1486 }, { "epoch": 0.11895762084758305, "grad_norm": 0.2854853040851749, "learning_rate": 9.977288655638737e-06, "loss": 0.311, "step": 1487 }, { "epoch": 0.11903761924761505, "grad_norm": 0.3188425101986074, "learning_rate": 9.977226945025655e-06, "loss": 0.3041, "step": 1488 }, { "epoch": 0.11911761764764704, "grad_norm": 0.2872613664932559, "learning_rate": 9.977165150878492e-06, "loss": 0.3128, "step": 1489 }, { "epoch": 0.11919761604767905, "grad_norm": 0.3240081271199343, "learning_rate": 9.977103273198285e-06, "loss": 0.321, "step": 1490 }, { "epoch": 0.11927761444771104, "grad_norm": 0.32873356293552747, "learning_rate": 9.977041311986072e-06, "loss": 0.3208, "step": 1491 }, { "epoch": 0.11935761284774304, "grad_norm": 0.3203868320710008, "learning_rate": 9.97697926724289e-06, "loss": 0.283, "step": 1492 }, { "epoch": 0.11943761124777505, "grad_norm": 0.3054043730128793, "learning_rate": 9.976917138969784e-06, "loss": 0.3523, "step": 1493 }, { "epoch": 0.11951760964780704, "grad_norm": 0.3674858121403118, "learning_rate": 9.976854927167799e-06, "loss": 0.2866, "step": 1494 }, { "epoch": 0.11959760804783905, "grad_norm": 0.32809462029957726, "learning_rate": 9.976792631837973e-06, "loss": 0.2959, "step": 1495 }, { "epoch": 0.11967760644787104, "grad_norm": 0.3021737904215764, "learning_rate": 9.976730252981354e-06, "loss": 0.307, "step": 1496 }, { "epoch": 0.11975760484790304, "grad_norm": 0.29884948751944573, "learning_rate": 9.976667790598991e-06, "loss": 0.3114, "step": 1497 }, { "epoch": 0.11983760324793503, "grad_norm": 0.323232091189394, "learning_rate": 9.97660524469193e-06, "loss": 0.2891, "step": 1498 }, { "epoch": 0.11991760164796704, "grad_norm": 0.31988370657863524, "learning_rate": 9.976542615261223e-06, "loss": 0.2866, "step": 1499 }, { "epoch": 0.11999760004799905, "grad_norm": 0.3492068403565844, "learning_rate": 9.976479902307918e-06, "loss": 0.2926, "step": 1500 }, { "epoch": 0.12007759844803104, "grad_norm": 0.31893173931742136, "learning_rate": 9.97641710583307e-06, "loss": 0.3222, "step": 1501 }, { "epoch": 0.12015759684806304, "grad_norm": 0.3080721393753546, "learning_rate": 9.976354225837733e-06, "loss": 0.3097, "step": 1502 }, { "epoch": 0.12023759524809503, "grad_norm": 0.2654106512289159, "learning_rate": 9.97629126232296e-06, "loss": 0.3371, "step": 1503 }, { "epoch": 0.12031759364812704, "grad_norm": 0.3175950676091756, "learning_rate": 9.97622821528981e-06, "loss": 0.3081, "step": 1504 }, { "epoch": 0.12039759204815903, "grad_norm": 0.27579554520545546, "learning_rate": 9.97616508473934e-06, "loss": 0.3414, "step": 1505 }, { "epoch": 0.12047759044819104, "grad_norm": 0.24773422292196634, "learning_rate": 9.97610187067261e-06, "loss": 0.3402, "step": 1506 }, { "epoch": 0.12055758884822304, "grad_norm": 0.3269540778817153, "learning_rate": 9.976038573090679e-06, "loss": 0.2687, "step": 1507 }, { "epoch": 0.12063758724825503, "grad_norm": 0.29435912748136317, "learning_rate": 9.975975191994614e-06, "loss": 0.3114, "step": 1508 }, { "epoch": 0.12071758564828704, "grad_norm": 0.31180608889896605, "learning_rate": 9.975911727385473e-06, "loss": 0.3141, "step": 1509 }, { "epoch": 0.12079758404831903, "grad_norm": 0.3741440907311983, "learning_rate": 9.975848179264325e-06, "loss": 0.2712, "step": 1510 }, { "epoch": 0.12087758244835103, "grad_norm": 0.3080836248256556, "learning_rate": 9.975784547632237e-06, "loss": 0.3299, "step": 1511 }, { "epoch": 0.12095758084838303, "grad_norm": 0.5226236387917746, "learning_rate": 9.975720832490274e-06, "loss": 0.2799, "step": 1512 }, { "epoch": 0.12103757924841503, "grad_norm": 0.4286751809891568, "learning_rate": 9.975657033839506e-06, "loss": 0.2932, "step": 1513 }, { "epoch": 0.12111757764844704, "grad_norm": 0.36005550131069336, "learning_rate": 9.975593151681006e-06, "loss": 0.2867, "step": 1514 }, { "epoch": 0.12119757604847903, "grad_norm": 0.400946489366523, "learning_rate": 9.975529186015844e-06, "loss": 0.2831, "step": 1515 }, { "epoch": 0.12127757444851103, "grad_norm": 0.33439622278390013, "learning_rate": 9.975465136845095e-06, "loss": 0.284, "step": 1516 }, { "epoch": 0.12135757284854302, "grad_norm": 0.28467649066049355, "learning_rate": 9.975401004169834e-06, "loss": 0.3313, "step": 1517 }, { "epoch": 0.12143757124857503, "grad_norm": 0.38430441032851415, "learning_rate": 9.975336787991135e-06, "loss": 0.3158, "step": 1518 }, { "epoch": 0.12151756964860702, "grad_norm": 0.3461471552793967, "learning_rate": 9.975272488310077e-06, "loss": 0.2703, "step": 1519 }, { "epoch": 0.12159756804863903, "grad_norm": 0.25635041378940554, "learning_rate": 9.97520810512774e-06, "loss": 0.3407, "step": 1520 }, { "epoch": 0.12167756644867103, "grad_norm": 0.36860805507721267, "learning_rate": 9.975143638445205e-06, "loss": 0.312, "step": 1521 }, { "epoch": 0.12175756484870302, "grad_norm": 0.33025431942268124, "learning_rate": 9.975079088263553e-06, "loss": 0.2711, "step": 1522 }, { "epoch": 0.12183756324873503, "grad_norm": 0.2685661278626135, "learning_rate": 9.975014454583867e-06, "loss": 0.3011, "step": 1523 }, { "epoch": 0.12191756164876702, "grad_norm": 0.27592491432763816, "learning_rate": 9.974949737407232e-06, "loss": 0.3361, "step": 1524 }, { "epoch": 0.12199756004879903, "grad_norm": 0.2810609031775785, "learning_rate": 9.974884936734734e-06, "loss": 0.303, "step": 1525 }, { "epoch": 0.12207755844883102, "grad_norm": 0.2959647019456412, "learning_rate": 9.97482005256746e-06, "loss": 0.3277, "step": 1526 }, { "epoch": 0.12215755684886302, "grad_norm": 0.31007940798028527, "learning_rate": 9.974755084906503e-06, "loss": 0.3325, "step": 1527 }, { "epoch": 0.12223755524889503, "grad_norm": 0.294643580488709, "learning_rate": 9.974690033752947e-06, "loss": 0.3513, "step": 1528 }, { "epoch": 0.12231755364892702, "grad_norm": 0.23361512903103868, "learning_rate": 9.974624899107887e-06, "loss": 0.3576, "step": 1529 }, { "epoch": 0.12239755204895902, "grad_norm": 0.3488065907248092, "learning_rate": 9.974559680972418e-06, "loss": 0.3302, "step": 1530 }, { "epoch": 0.12247755044899102, "grad_norm": 0.2759016892422713, "learning_rate": 9.974494379347632e-06, "loss": 0.356, "step": 1531 }, { "epoch": 0.12255754884902302, "grad_norm": 0.4088228894844026, "learning_rate": 9.974428994234626e-06, "loss": 0.3301, "step": 1532 }, { "epoch": 0.12263754724905501, "grad_norm": 0.306047966666119, "learning_rate": 9.974363525634496e-06, "loss": 0.3246, "step": 1533 }, { "epoch": 0.12271754564908702, "grad_norm": 0.2440230840992797, "learning_rate": 9.974297973548343e-06, "loss": 0.3225, "step": 1534 }, { "epoch": 0.12279754404911902, "grad_norm": 0.24050105891261542, "learning_rate": 9.974232337977265e-06, "loss": 0.336, "step": 1535 }, { "epoch": 0.12287754244915101, "grad_norm": 0.3342336181750534, "learning_rate": 9.974166618922365e-06, "loss": 0.2853, "step": 1536 }, { "epoch": 0.12295754084918302, "grad_norm": 0.3599751638622429, "learning_rate": 9.974100816384746e-06, "loss": 0.2842, "step": 1537 }, { "epoch": 0.12303753924921501, "grad_norm": 0.3255886392617455, "learning_rate": 9.974034930365513e-06, "loss": 0.3004, "step": 1538 }, { "epoch": 0.12311753764924702, "grad_norm": 0.35342236698654944, "learning_rate": 9.97396896086577e-06, "loss": 0.2981, "step": 1539 }, { "epoch": 0.12319753604927901, "grad_norm": 0.3356546294397884, "learning_rate": 9.973902907886623e-06, "loss": 0.2734, "step": 1540 }, { "epoch": 0.12327753444931101, "grad_norm": 0.3013855894876544, "learning_rate": 9.973836771429185e-06, "loss": 0.3076, "step": 1541 }, { "epoch": 0.12335753284934302, "grad_norm": 0.27718568684371786, "learning_rate": 9.973770551494562e-06, "loss": 0.3192, "step": 1542 }, { "epoch": 0.12343753124937501, "grad_norm": 0.36203360444852395, "learning_rate": 9.973704248083868e-06, "loss": 0.2889, "step": 1543 }, { "epoch": 0.12351752964940702, "grad_norm": 0.33284406458829935, "learning_rate": 9.973637861198213e-06, "loss": 0.2897, "step": 1544 }, { "epoch": 0.123597528049439, "grad_norm": 0.322474791867277, "learning_rate": 9.973571390838715e-06, "loss": 0.2821, "step": 1545 }, { "epoch": 0.12367752644947101, "grad_norm": 0.2586611371768474, "learning_rate": 9.973504837006487e-06, "loss": 0.3333, "step": 1546 }, { "epoch": 0.123757524849503, "grad_norm": 0.4434038262028073, "learning_rate": 9.973438199702645e-06, "loss": 0.3183, "step": 1547 }, { "epoch": 0.12383752324953501, "grad_norm": 0.30057080920877394, "learning_rate": 9.97337147892831e-06, "loss": 0.3156, "step": 1548 }, { "epoch": 0.12391752164956701, "grad_norm": 0.2817218688979036, "learning_rate": 9.9733046746846e-06, "loss": 0.3197, "step": 1549 }, { "epoch": 0.123997520049599, "grad_norm": 0.3364460012975601, "learning_rate": 9.973237786972637e-06, "loss": 0.2755, "step": 1550 }, { "epoch": 0.12407751844963101, "grad_norm": 0.25995755145271016, "learning_rate": 9.973170815793543e-06, "loss": 0.322, "step": 1551 }, { "epoch": 0.124157516849663, "grad_norm": 0.32065157844305897, "learning_rate": 9.973103761148444e-06, "loss": 0.3188, "step": 1552 }, { "epoch": 0.12423751524969501, "grad_norm": 0.3131628021305152, "learning_rate": 9.973036623038462e-06, "loss": 0.2938, "step": 1553 }, { "epoch": 0.124317513649727, "grad_norm": 0.33124132803205336, "learning_rate": 9.972969401464728e-06, "loss": 0.3336, "step": 1554 }, { "epoch": 0.124397512049759, "grad_norm": 0.347177685732667, "learning_rate": 9.972902096428365e-06, "loss": 0.2671, "step": 1555 }, { "epoch": 0.12447751044979101, "grad_norm": 0.2862238918086368, "learning_rate": 9.972834707930505e-06, "loss": 0.3058, "step": 1556 }, { "epoch": 0.124557508849823, "grad_norm": 0.2812246291021389, "learning_rate": 9.972767235972283e-06, "loss": 0.355, "step": 1557 }, { "epoch": 0.124637507249855, "grad_norm": 0.3508556622891445, "learning_rate": 9.972699680554824e-06, "loss": 0.2853, "step": 1558 }, { "epoch": 0.124717505649887, "grad_norm": 0.31934129236613185, "learning_rate": 9.972632041679268e-06, "loss": 0.3181, "step": 1559 }, { "epoch": 0.124797504049919, "grad_norm": 0.4008315118730672, "learning_rate": 9.972564319346747e-06, "loss": 0.267, "step": 1560 }, { "epoch": 0.124877502449951, "grad_norm": 0.3867394289324721, "learning_rate": 9.972496513558399e-06, "loss": 0.2942, "step": 1561 }, { "epoch": 0.124957500849983, "grad_norm": 0.2655012617834147, "learning_rate": 9.97242862431536e-06, "loss": 0.3325, "step": 1562 }, { "epoch": 0.125037499250015, "grad_norm": 0.30430963968798325, "learning_rate": 9.972360651618772e-06, "loss": 0.3227, "step": 1563 }, { "epoch": 0.125117497650047, "grad_norm": 0.32976349762845253, "learning_rate": 9.972292595469775e-06, "loss": 0.2949, "step": 1564 }, { "epoch": 0.125197496050079, "grad_norm": 0.2642429426489294, "learning_rate": 9.972224455869508e-06, "loss": 0.3433, "step": 1565 }, { "epoch": 0.125277494450111, "grad_norm": 0.34800812853193486, "learning_rate": 9.972156232819122e-06, "loss": 0.2636, "step": 1566 }, { "epoch": 0.125357492850143, "grad_norm": 0.24551195984941754, "learning_rate": 9.972087926319753e-06, "loss": 0.3203, "step": 1567 }, { "epoch": 0.125437491250175, "grad_norm": 0.3514690991332886, "learning_rate": 9.972019536372554e-06, "loss": 0.2853, "step": 1568 }, { "epoch": 0.125517489650207, "grad_norm": 0.3376762173730531, "learning_rate": 9.971951062978671e-06, "loss": 0.288, "step": 1569 }, { "epoch": 0.125597488050239, "grad_norm": 0.2743899761263726, "learning_rate": 9.971882506139251e-06, "loss": 0.316, "step": 1570 }, { "epoch": 0.125677486450271, "grad_norm": 0.3273720877629741, "learning_rate": 9.971813865855448e-06, "loss": 0.2802, "step": 1571 }, { "epoch": 0.125757484850303, "grad_norm": 0.27218182959151843, "learning_rate": 9.971745142128413e-06, "loss": 0.319, "step": 1572 }, { "epoch": 0.125837483250335, "grad_norm": 0.33635903810220347, "learning_rate": 9.971676334959297e-06, "loss": 0.2896, "step": 1573 }, { "epoch": 0.12591748165036698, "grad_norm": 0.36089753673816033, "learning_rate": 9.971607444349258e-06, "loss": 0.2737, "step": 1574 }, { "epoch": 0.12599748005039899, "grad_norm": 0.29738625056333357, "learning_rate": 9.971538470299452e-06, "loss": 0.3319, "step": 1575 }, { "epoch": 0.126077478450431, "grad_norm": 0.33751500201620127, "learning_rate": 9.971469412811032e-06, "loss": 0.2994, "step": 1576 }, { "epoch": 0.126157476850463, "grad_norm": 0.333507026377891, "learning_rate": 9.971400271885163e-06, "loss": 0.276, "step": 1577 }, { "epoch": 0.126237475250495, "grad_norm": 0.3417766187453086, "learning_rate": 9.971331047523002e-06, "loss": 0.2931, "step": 1578 }, { "epoch": 0.12631747365052698, "grad_norm": 0.20737597012634765, "learning_rate": 9.971261739725713e-06, "loss": 0.389, "step": 1579 }, { "epoch": 0.12639747205055898, "grad_norm": 0.19969070918404824, "learning_rate": 9.971192348494456e-06, "loss": 0.3746, "step": 1580 }, { "epoch": 0.126477470450591, "grad_norm": 0.343190067986367, "learning_rate": 9.971122873830398e-06, "loss": 0.2911, "step": 1581 }, { "epoch": 0.126557468850623, "grad_norm": 0.3241189976081756, "learning_rate": 9.971053315734706e-06, "loss": 0.2901, "step": 1582 }, { "epoch": 0.126637467250655, "grad_norm": 0.3651618005218322, "learning_rate": 9.970983674208546e-06, "loss": 0.2814, "step": 1583 }, { "epoch": 0.12671746565068698, "grad_norm": 0.3054901278587988, "learning_rate": 9.970913949253085e-06, "loss": 0.3148, "step": 1584 }, { "epoch": 0.12679746405071898, "grad_norm": 0.37956813633003, "learning_rate": 9.970844140869495e-06, "loss": 0.2912, "step": 1585 }, { "epoch": 0.126877462450751, "grad_norm": 0.3087361815326322, "learning_rate": 9.970774249058947e-06, "loss": 0.3268, "step": 1586 }, { "epoch": 0.126957460850783, "grad_norm": 0.2947287798107453, "learning_rate": 9.970704273822618e-06, "loss": 0.3157, "step": 1587 }, { "epoch": 0.12703745925081497, "grad_norm": 0.35760370725899226, "learning_rate": 9.970634215161677e-06, "loss": 0.3033, "step": 1588 }, { "epoch": 0.12711745765084698, "grad_norm": 0.2705000717013212, "learning_rate": 9.9705640730773e-06, "loss": 0.326, "step": 1589 }, { "epoch": 0.12719745605087898, "grad_norm": 0.28820740181724164, "learning_rate": 9.970493847570669e-06, "loss": 0.332, "step": 1590 }, { "epoch": 0.127277454450911, "grad_norm": 0.33870280790828833, "learning_rate": 9.970423538642959e-06, "loss": 0.2995, "step": 1591 }, { "epoch": 0.127357452850943, "grad_norm": 0.3073152895623792, "learning_rate": 9.97035314629535e-06, "loss": 0.3027, "step": 1592 }, { "epoch": 0.12743745125097497, "grad_norm": 0.24343305844285854, "learning_rate": 9.970282670529024e-06, "loss": 0.3209, "step": 1593 }, { "epoch": 0.12751744965100698, "grad_norm": 0.2817249692395849, "learning_rate": 9.970212111345164e-06, "loss": 0.2954, "step": 1594 }, { "epoch": 0.12759744805103898, "grad_norm": 0.19507604619833765, "learning_rate": 9.970141468744953e-06, "loss": 0.3399, "step": 1595 }, { "epoch": 0.12767744645107099, "grad_norm": 0.33644849661089504, "learning_rate": 9.97007074272958e-06, "loss": 0.3003, "step": 1596 }, { "epoch": 0.127757444851103, "grad_norm": 0.28551690329400675, "learning_rate": 9.969999933300229e-06, "loss": 0.2963, "step": 1597 }, { "epoch": 0.12783744325113497, "grad_norm": 0.24111935984584498, "learning_rate": 9.969929040458088e-06, "loss": 0.333, "step": 1598 }, { "epoch": 0.12791744165116697, "grad_norm": 0.2440483033012155, "learning_rate": 9.96985806420435e-06, "loss": 0.3258, "step": 1599 }, { "epoch": 0.12799744005119898, "grad_norm": 0.37418989712799433, "learning_rate": 9.969787004540202e-06, "loss": 0.3256, "step": 1600 }, { "epoch": 0.12807743845123098, "grad_norm": 0.30313124375864736, "learning_rate": 9.969715861466839e-06, "loss": 0.3146, "step": 1601 }, { "epoch": 0.12815743685126296, "grad_norm": 0.28303531578204344, "learning_rate": 9.969644634985456e-06, "loss": 0.2844, "step": 1602 }, { "epoch": 0.12823743525129497, "grad_norm": 0.31588089694356136, "learning_rate": 9.969573325097247e-06, "loss": 0.2564, "step": 1603 }, { "epoch": 0.12831743365132697, "grad_norm": 0.33485316524707903, "learning_rate": 9.96950193180341e-06, "loss": 0.2972, "step": 1604 }, { "epoch": 0.12839743205135898, "grad_norm": 0.27738539766765796, "learning_rate": 9.96943045510514e-06, "loss": 0.2948, "step": 1605 }, { "epoch": 0.12847743045139098, "grad_norm": 0.28739144946239215, "learning_rate": 9.96935889500364e-06, "loss": 0.3398, "step": 1606 }, { "epoch": 0.12855742885142296, "grad_norm": 0.3780784558149297, "learning_rate": 9.969287251500109e-06, "loss": 0.3029, "step": 1607 }, { "epoch": 0.12863742725145497, "grad_norm": 0.39436280254310757, "learning_rate": 9.969215524595751e-06, "loss": 0.2871, "step": 1608 }, { "epoch": 0.12871742565148697, "grad_norm": 0.2781032973117167, "learning_rate": 9.96914371429177e-06, "loss": 0.3417, "step": 1609 }, { "epoch": 0.12879742405151898, "grad_norm": 0.3091777596402673, "learning_rate": 9.96907182058937e-06, "loss": 0.3156, "step": 1610 }, { "epoch": 0.12887742245155098, "grad_norm": 0.34476330496554697, "learning_rate": 9.968999843489755e-06, "loss": 0.2797, "step": 1611 }, { "epoch": 0.12895742085158296, "grad_norm": 0.29137705953747706, "learning_rate": 9.968927782994139e-06, "loss": 0.3471, "step": 1612 }, { "epoch": 0.12903741925161497, "grad_norm": 0.22494499800610587, "learning_rate": 9.968855639103727e-06, "loss": 0.3429, "step": 1613 }, { "epoch": 0.12911741765164697, "grad_norm": 0.3384786965693821, "learning_rate": 9.968783411819732e-06, "loss": 0.3129, "step": 1614 }, { "epoch": 0.12919741605167898, "grad_norm": 0.37245070269968894, "learning_rate": 9.968711101143364e-06, "loss": 0.3024, "step": 1615 }, { "epoch": 0.12927741445171095, "grad_norm": 0.33261383673094785, "learning_rate": 9.968638707075839e-06, "loss": 0.2809, "step": 1616 }, { "epoch": 0.12935741285174296, "grad_norm": 0.3538913564212979, "learning_rate": 9.96856622961837e-06, "loss": 0.3058, "step": 1617 }, { "epoch": 0.12943741125177496, "grad_norm": 0.33543894188521006, "learning_rate": 9.968493668772177e-06, "loss": 0.2722, "step": 1618 }, { "epoch": 0.12951740965180697, "grad_norm": 0.3673480373341944, "learning_rate": 9.968421024538473e-06, "loss": 0.311, "step": 1619 }, { "epoch": 0.12959740805183897, "grad_norm": 0.32641897734890835, "learning_rate": 9.968348296918479e-06, "loss": 0.2983, "step": 1620 }, { "epoch": 0.12967740645187095, "grad_norm": 0.3145459493680963, "learning_rate": 9.968275485913417e-06, "loss": 0.3104, "step": 1621 }, { "epoch": 0.12975740485190296, "grad_norm": 0.34435664527218157, "learning_rate": 9.968202591524508e-06, "loss": 0.3085, "step": 1622 }, { "epoch": 0.12983740325193496, "grad_norm": 0.29708028988303964, "learning_rate": 9.968129613752975e-06, "loss": 0.3048, "step": 1623 }, { "epoch": 0.12991740165196697, "grad_norm": 0.3933300902453698, "learning_rate": 9.968056552600043e-06, "loss": 0.2908, "step": 1624 }, { "epoch": 0.12999740005199897, "grad_norm": 0.2657896128021916, "learning_rate": 9.967983408066939e-06, "loss": 0.3433, "step": 1625 }, { "epoch": 0.13007739845203095, "grad_norm": 0.2649772146562545, "learning_rate": 9.96791018015489e-06, "loss": 0.3518, "step": 1626 }, { "epoch": 0.13015739685206296, "grad_norm": 0.3384894253270735, "learning_rate": 9.967836868865125e-06, "loss": 0.2699, "step": 1627 }, { "epoch": 0.13023739525209496, "grad_norm": 0.3053194370367204, "learning_rate": 9.967763474198873e-06, "loss": 0.3118, "step": 1628 }, { "epoch": 0.13031739365212697, "grad_norm": 0.29400780305374524, "learning_rate": 9.967689996157368e-06, "loss": 0.3054, "step": 1629 }, { "epoch": 0.13039739205215894, "grad_norm": 0.2543343038972484, "learning_rate": 9.967616434741842e-06, "loss": 0.3273, "step": 1630 }, { "epoch": 0.13047739045219095, "grad_norm": 0.3950385774304625, "learning_rate": 9.967542789953532e-06, "loss": 0.2573, "step": 1631 }, { "epoch": 0.13055738885222296, "grad_norm": 0.3166082015848201, "learning_rate": 9.967469061793672e-06, "loss": 0.2924, "step": 1632 }, { "epoch": 0.13063738725225496, "grad_norm": 0.23891815135943617, "learning_rate": 9.967395250263496e-06, "loss": 0.3427, "step": 1633 }, { "epoch": 0.13071738565228697, "grad_norm": 0.2653217347899633, "learning_rate": 9.96732135536425e-06, "loss": 0.2931, "step": 1634 }, { "epoch": 0.13079738405231894, "grad_norm": 0.2660758979103026, "learning_rate": 9.967247377097168e-06, "loss": 0.3439, "step": 1635 }, { "epoch": 0.13087738245235095, "grad_norm": 0.31902449798630683, "learning_rate": 9.967173315463494e-06, "loss": 0.2798, "step": 1636 }, { "epoch": 0.13095738085238295, "grad_norm": 0.32309647957953375, "learning_rate": 9.967099170464473e-06, "loss": 0.2801, "step": 1637 }, { "epoch": 0.13103737925241496, "grad_norm": 0.3630500586472057, "learning_rate": 9.967024942101345e-06, "loss": 0.3, "step": 1638 }, { "epoch": 0.13111737765244696, "grad_norm": 0.453720988216507, "learning_rate": 9.966950630375361e-06, "loss": 0.2773, "step": 1639 }, { "epoch": 0.13119737605247894, "grad_norm": 0.30711283945170126, "learning_rate": 9.966876235287762e-06, "loss": 0.3018, "step": 1640 }, { "epoch": 0.13127737445251095, "grad_norm": 0.33838315886780596, "learning_rate": 9.966801756839802e-06, "loss": 0.2963, "step": 1641 }, { "epoch": 0.13135737285254295, "grad_norm": 0.39474138784436325, "learning_rate": 9.966727195032729e-06, "loss": 0.2941, "step": 1642 }, { "epoch": 0.13143737125257496, "grad_norm": 0.38753809544863926, "learning_rate": 9.966652549867795e-06, "loss": 0.2865, "step": 1643 }, { "epoch": 0.13151736965260694, "grad_norm": 0.5787026827731592, "learning_rate": 9.96657782134625e-06, "loss": 0.2697, "step": 1644 }, { "epoch": 0.13159736805263894, "grad_norm": 0.3197434565585619, "learning_rate": 9.966503009469352e-06, "loss": 0.3239, "step": 1645 }, { "epoch": 0.13167736645267095, "grad_norm": 0.3935251810016136, "learning_rate": 9.966428114238353e-06, "loss": 0.2811, "step": 1646 }, { "epoch": 0.13175736485270295, "grad_norm": 0.32167362638562774, "learning_rate": 9.966353135654513e-06, "loss": 0.3045, "step": 1647 }, { "epoch": 0.13183736325273496, "grad_norm": 0.22817981950295207, "learning_rate": 9.966278073719091e-06, "loss": 0.3479, "step": 1648 }, { "epoch": 0.13191736165276693, "grad_norm": 0.32914152365333305, "learning_rate": 9.966202928433344e-06, "loss": 0.287, "step": 1649 }, { "epoch": 0.13199736005279894, "grad_norm": 0.2565248530649696, "learning_rate": 9.966127699798533e-06, "loss": 0.3216, "step": 1650 }, { "epoch": 0.13207735845283095, "grad_norm": 0.2604344735331237, "learning_rate": 9.966052387815923e-06, "loss": 0.3377, "step": 1651 }, { "epoch": 0.13215735685286295, "grad_norm": 0.2657973200421256, "learning_rate": 9.965976992486777e-06, "loss": 0.337, "step": 1652 }, { "epoch": 0.13223735525289496, "grad_norm": 0.44261816520682506, "learning_rate": 9.96590151381236e-06, "loss": 0.2821, "step": 1653 }, { "epoch": 0.13231735365292693, "grad_norm": 0.3499848148337784, "learning_rate": 9.965825951793939e-06, "loss": 0.313, "step": 1654 }, { "epoch": 0.13239735205295894, "grad_norm": 0.3377789495785721, "learning_rate": 9.965750306432782e-06, "loss": 0.2759, "step": 1655 }, { "epoch": 0.13247735045299094, "grad_norm": 0.3084300248662882, "learning_rate": 9.965674577730157e-06, "loss": 0.3087, "step": 1656 }, { "epoch": 0.13255734885302295, "grad_norm": 0.32778165414405397, "learning_rate": 9.965598765687338e-06, "loss": 0.3075, "step": 1657 }, { "epoch": 0.13263734725305493, "grad_norm": 0.2905680233037324, "learning_rate": 9.965522870305598e-06, "loss": 0.3195, "step": 1658 }, { "epoch": 0.13271734565308693, "grad_norm": 0.3294272355686789, "learning_rate": 9.965446891586208e-06, "loss": 0.2893, "step": 1659 }, { "epoch": 0.13279734405311894, "grad_norm": 0.33005569419541053, "learning_rate": 9.965370829530444e-06, "loss": 0.308, "step": 1660 }, { "epoch": 0.13287734245315094, "grad_norm": 0.30360017842174963, "learning_rate": 9.96529468413958e-06, "loss": 0.3015, "step": 1661 }, { "epoch": 0.13295734085318295, "grad_norm": 0.3234667842233614, "learning_rate": 9.9652184554149e-06, "loss": 0.2988, "step": 1662 }, { "epoch": 0.13303733925321493, "grad_norm": 0.2837535920210326, "learning_rate": 9.965142143357677e-06, "loss": 0.3403, "step": 1663 }, { "epoch": 0.13311733765324693, "grad_norm": 0.26632172850544816, "learning_rate": 9.965065747969196e-06, "loss": 0.3682, "step": 1664 }, { "epoch": 0.13319733605327894, "grad_norm": 0.3041415048672628, "learning_rate": 9.964989269250737e-06, "loss": 0.3044, "step": 1665 }, { "epoch": 0.13327733445331094, "grad_norm": 0.32970071912713045, "learning_rate": 9.964912707203587e-06, "loss": 0.2879, "step": 1666 }, { "epoch": 0.13335733285334295, "grad_norm": 0.3187449002069163, "learning_rate": 9.964836061829026e-06, "loss": 0.283, "step": 1667 }, { "epoch": 0.13343733125337492, "grad_norm": 0.3381036568929224, "learning_rate": 9.964759333128344e-06, "loss": 0.3026, "step": 1668 }, { "epoch": 0.13351732965340693, "grad_norm": 0.34993053866836077, "learning_rate": 9.964682521102827e-06, "loss": 0.3305, "step": 1669 }, { "epoch": 0.13359732805343894, "grad_norm": 0.3869382941791446, "learning_rate": 9.964605625753763e-06, "loss": 0.2971, "step": 1670 }, { "epoch": 0.13367732645347094, "grad_norm": 0.31727011304715647, "learning_rate": 9.964528647082447e-06, "loss": 0.324, "step": 1671 }, { "epoch": 0.13375732485350292, "grad_norm": 0.27064077673825926, "learning_rate": 9.964451585090167e-06, "loss": 0.3373, "step": 1672 }, { "epoch": 0.13383732325353492, "grad_norm": 0.3010738763319749, "learning_rate": 9.964374439778217e-06, "loss": 0.3097, "step": 1673 }, { "epoch": 0.13391732165356693, "grad_norm": 0.3106080288510451, "learning_rate": 9.964297211147893e-06, "loss": 0.3044, "step": 1674 }, { "epoch": 0.13399732005359893, "grad_norm": 0.35253993125968475, "learning_rate": 9.964219899200489e-06, "loss": 0.3073, "step": 1675 }, { "epoch": 0.13407731845363094, "grad_norm": 0.26899506633024683, "learning_rate": 9.964142503937305e-06, "loss": 0.3529, "step": 1676 }, { "epoch": 0.13415731685366292, "grad_norm": 0.34071389061918994, "learning_rate": 9.964065025359639e-06, "loss": 0.2923, "step": 1677 }, { "epoch": 0.13423731525369492, "grad_norm": 0.3020345058078009, "learning_rate": 9.963987463468791e-06, "loss": 0.3235, "step": 1678 }, { "epoch": 0.13431731365372693, "grad_norm": 0.3291600823809039, "learning_rate": 9.963909818266063e-06, "loss": 0.294, "step": 1679 }, { "epoch": 0.13439731205375893, "grad_norm": 0.2917055224651526, "learning_rate": 9.963832089752758e-06, "loss": 0.3115, "step": 1680 }, { "epoch": 0.1344773104537909, "grad_norm": 0.27063817168409676, "learning_rate": 9.96375427793018e-06, "loss": 0.3429, "step": 1681 }, { "epoch": 0.13455730885382292, "grad_norm": 0.29569271171031547, "learning_rate": 9.963676382799637e-06, "loss": 0.3103, "step": 1682 }, { "epoch": 0.13463730725385492, "grad_norm": 0.3416539883702798, "learning_rate": 9.963598404362435e-06, "loss": 0.2869, "step": 1683 }, { "epoch": 0.13471730565388693, "grad_norm": 0.317345440697365, "learning_rate": 9.96352034261988e-06, "loss": 0.2802, "step": 1684 }, { "epoch": 0.13479730405391893, "grad_norm": 0.3798252914000913, "learning_rate": 9.963442197573288e-06, "loss": 0.3015, "step": 1685 }, { "epoch": 0.1348773024539509, "grad_norm": 0.38545882238974566, "learning_rate": 9.963363969223968e-06, "loss": 0.2691, "step": 1686 }, { "epoch": 0.13495730085398291, "grad_norm": 0.4489555407948747, "learning_rate": 9.96328565757323e-06, "loss": 0.3135, "step": 1687 }, { "epoch": 0.13503729925401492, "grad_norm": 0.30985761749304114, "learning_rate": 9.96320726262239e-06, "loss": 0.3127, "step": 1688 }, { "epoch": 0.13511729765404693, "grad_norm": 0.34437822862994905, "learning_rate": 9.963128784372765e-06, "loss": 0.2817, "step": 1689 }, { "epoch": 0.13519729605407893, "grad_norm": 0.32296207524824966, "learning_rate": 9.963050222825672e-06, "loss": 0.2981, "step": 1690 }, { "epoch": 0.1352772944541109, "grad_norm": 0.30673484253410654, "learning_rate": 9.962971577982428e-06, "loss": 0.3147, "step": 1691 }, { "epoch": 0.1353572928541429, "grad_norm": 0.2965287340778612, "learning_rate": 9.962892849844355e-06, "loss": 0.2996, "step": 1692 }, { "epoch": 0.13543729125417492, "grad_norm": 0.3686616630824169, "learning_rate": 9.962814038412772e-06, "loss": 0.3024, "step": 1693 }, { "epoch": 0.13551728965420692, "grad_norm": 0.36857623307427717, "learning_rate": 9.962735143689003e-06, "loss": 0.2994, "step": 1694 }, { "epoch": 0.1355972880542389, "grad_norm": 0.26964413945230015, "learning_rate": 9.96265616567437e-06, "loss": 0.3283, "step": 1695 }, { "epoch": 0.1356772864542709, "grad_norm": 0.30960092724603844, "learning_rate": 9.962577104370206e-06, "loss": 0.3208, "step": 1696 }, { "epoch": 0.1357572848543029, "grad_norm": 0.3930353754480697, "learning_rate": 9.962497959777828e-06, "loss": 0.2865, "step": 1697 }, { "epoch": 0.13583728325433492, "grad_norm": 0.32100007809044945, "learning_rate": 9.962418731898571e-06, "loss": 0.2746, "step": 1698 }, { "epoch": 0.13591728165436692, "grad_norm": 0.34289121610315537, "learning_rate": 9.96233942073376e-06, "loss": 0.296, "step": 1699 }, { "epoch": 0.1359972800543989, "grad_norm": 0.25172116748252965, "learning_rate": 9.96226002628473e-06, "loss": 0.3662, "step": 1700 }, { "epoch": 0.1360772784544309, "grad_norm": 0.32448629756567077, "learning_rate": 9.962180548552812e-06, "loss": 0.2831, "step": 1701 }, { "epoch": 0.1361572768544629, "grad_norm": 0.33516748900371574, "learning_rate": 9.96210098753934e-06, "loss": 0.2974, "step": 1702 }, { "epoch": 0.13623727525449492, "grad_norm": 1.0576067079658895, "learning_rate": 9.96202134324565e-06, "loss": 0.2794, "step": 1703 }, { "epoch": 0.13631727365452692, "grad_norm": 0.3391188007108373, "learning_rate": 9.961941615673075e-06, "loss": 0.2821, "step": 1704 }, { "epoch": 0.1363972720545589, "grad_norm": 0.3148933314226744, "learning_rate": 9.961861804822958e-06, "loss": 0.2716, "step": 1705 }, { "epoch": 0.1364772704545909, "grad_norm": 0.31931580022594086, "learning_rate": 9.961781910696636e-06, "loss": 0.3235, "step": 1706 }, { "epoch": 0.1365572688546229, "grad_norm": 0.29937630478445426, "learning_rate": 9.961701933295451e-06, "loss": 0.3282, "step": 1707 }, { "epoch": 0.13663726725465491, "grad_norm": 0.28782150017118074, "learning_rate": 9.961621872620744e-06, "loss": 0.2973, "step": 1708 }, { "epoch": 0.1367172656546869, "grad_norm": 0.2742400772084222, "learning_rate": 9.961541728673859e-06, "loss": 0.3294, "step": 1709 }, { "epoch": 0.1367972640547189, "grad_norm": 0.45880200381966524, "learning_rate": 9.961461501456142e-06, "loss": 0.2775, "step": 1710 }, { "epoch": 0.1368772624547509, "grad_norm": 0.31562064636694503, "learning_rate": 9.96138119096894e-06, "loss": 0.3602, "step": 1711 }, { "epoch": 0.1369572608547829, "grad_norm": 0.2903401516378683, "learning_rate": 9.961300797213597e-06, "loss": 0.3042, "step": 1712 }, { "epoch": 0.1370372592548149, "grad_norm": 0.3294868849179385, "learning_rate": 9.961220320191466e-06, "loss": 0.3185, "step": 1713 }, { "epoch": 0.1371172576548469, "grad_norm": 0.3224538803028663, "learning_rate": 9.961139759903898e-06, "loss": 0.2654, "step": 1714 }, { "epoch": 0.1371972560548789, "grad_norm": 0.32407631488149774, "learning_rate": 9.961059116352242e-06, "loss": 0.2885, "step": 1715 }, { "epoch": 0.1372772544549109, "grad_norm": 0.35838211973323353, "learning_rate": 9.960978389537853e-06, "loss": 0.2871, "step": 1716 }, { "epoch": 0.1373572528549429, "grad_norm": 0.27286313005129115, "learning_rate": 9.960897579462088e-06, "loss": 0.3507, "step": 1717 }, { "epoch": 0.1374372512549749, "grad_norm": 0.24887063569142082, "learning_rate": 9.9608166861263e-06, "loss": 0.335, "step": 1718 }, { "epoch": 0.1375172496550069, "grad_norm": 0.2755737102753344, "learning_rate": 9.960735709531848e-06, "loss": 0.3083, "step": 1719 }, { "epoch": 0.1375972480550389, "grad_norm": 0.2939927270526707, "learning_rate": 9.960654649680092e-06, "loss": 0.3053, "step": 1720 }, { "epoch": 0.1376772464550709, "grad_norm": 0.3114105543719996, "learning_rate": 9.960573506572391e-06, "loss": 0.285, "step": 1721 }, { "epoch": 0.1377572448551029, "grad_norm": 0.2611624811881945, "learning_rate": 9.960492280210105e-06, "loss": 0.3274, "step": 1722 }, { "epoch": 0.13783724325513488, "grad_norm": 0.2864737101510064, "learning_rate": 9.960410970594603e-06, "loss": 0.3145, "step": 1723 }, { "epoch": 0.1379172416551669, "grad_norm": 0.3063205908763903, "learning_rate": 9.960329577727244e-06, "loss": 0.301, "step": 1724 }, { "epoch": 0.1379972400551989, "grad_norm": 0.2791774800395622, "learning_rate": 9.960248101609396e-06, "loss": 0.3277, "step": 1725 }, { "epoch": 0.1380772384552309, "grad_norm": 0.4676364919085364, "learning_rate": 9.96016654224243e-06, "loss": 0.296, "step": 1726 }, { "epoch": 0.1381572368552629, "grad_norm": 0.31381224432593324, "learning_rate": 9.960084899627707e-06, "loss": 0.3222, "step": 1727 }, { "epoch": 0.13823723525529488, "grad_norm": 0.29165147182370316, "learning_rate": 9.960003173766603e-06, "loss": 0.3154, "step": 1728 }, { "epoch": 0.1383172336553269, "grad_norm": 0.5581375819852616, "learning_rate": 9.95992136466049e-06, "loss": 0.3212, "step": 1729 }, { "epoch": 0.1383972320553589, "grad_norm": 0.27612364071745177, "learning_rate": 9.959839472310737e-06, "loss": 0.3421, "step": 1730 }, { "epoch": 0.1384772304553909, "grad_norm": 0.3552131712701409, "learning_rate": 9.959757496718723e-06, "loss": 0.2728, "step": 1731 }, { "epoch": 0.1385572288554229, "grad_norm": 0.30504713248927423, "learning_rate": 9.95967543788582e-06, "loss": 0.2973, "step": 1732 }, { "epoch": 0.13863722725545488, "grad_norm": 0.3117372839299666, "learning_rate": 9.959593295813409e-06, "loss": 0.3144, "step": 1733 }, { "epoch": 0.1387172256554869, "grad_norm": 0.3661145015161783, "learning_rate": 9.959511070502864e-06, "loss": 0.2853, "step": 1734 }, { "epoch": 0.1387972240555189, "grad_norm": 0.2986015314313455, "learning_rate": 9.959428761955569e-06, "loss": 0.3089, "step": 1735 }, { "epoch": 0.1388772224555509, "grad_norm": 0.322042310593675, "learning_rate": 9.959346370172902e-06, "loss": 0.3096, "step": 1736 }, { "epoch": 0.13895722085558287, "grad_norm": 1.0597179866444584, "learning_rate": 9.95926389515625e-06, "loss": 0.3024, "step": 1737 }, { "epoch": 0.13903721925561488, "grad_norm": 0.3332901498427141, "learning_rate": 9.959181336906993e-06, "loss": 0.3218, "step": 1738 }, { "epoch": 0.13911721765564689, "grad_norm": 0.30180490543673855, "learning_rate": 9.959098695426518e-06, "loss": 0.3039, "step": 1739 }, { "epoch": 0.1391972160556789, "grad_norm": 0.2875304219816326, "learning_rate": 9.959015970716215e-06, "loss": 0.3123, "step": 1740 }, { "epoch": 0.1392772144557109, "grad_norm": 0.32021173107962153, "learning_rate": 9.958933162777468e-06, "loss": 0.2849, "step": 1741 }, { "epoch": 0.13935721285574287, "grad_norm": 0.27952604512851636, "learning_rate": 9.958850271611669e-06, "loss": 0.2933, "step": 1742 }, { "epoch": 0.13943721125577488, "grad_norm": 0.2586265294576407, "learning_rate": 9.958767297220209e-06, "loss": 0.3285, "step": 1743 }, { "epoch": 0.13951720965580688, "grad_norm": 0.28383422193166746, "learning_rate": 9.95868423960448e-06, "loss": 0.3074, "step": 1744 }, { "epoch": 0.1395972080558389, "grad_norm": 0.380703359676753, "learning_rate": 9.958601098765877e-06, "loss": 0.2644, "step": 1745 }, { "epoch": 0.1396772064558709, "grad_norm": 0.31990860162341744, "learning_rate": 9.958517874705793e-06, "loss": 0.3046, "step": 1746 }, { "epoch": 0.13975720485590287, "grad_norm": 0.3136246366776693, "learning_rate": 9.958434567425627e-06, "loss": 0.2742, "step": 1747 }, { "epoch": 0.13983720325593488, "grad_norm": 0.4744114241245162, "learning_rate": 9.958351176926779e-06, "loss": 0.2807, "step": 1748 }, { "epoch": 0.13991720165596688, "grad_norm": 0.35019852055583867, "learning_rate": 9.958267703210645e-06, "loss": 0.2684, "step": 1749 }, { "epoch": 0.1399972000559989, "grad_norm": 0.3219857074865576, "learning_rate": 9.958184146278626e-06, "loss": 0.2843, "step": 1750 }, { "epoch": 0.14007719845603087, "grad_norm": 0.2741862901592053, "learning_rate": 9.958100506132127e-06, "loss": 0.2995, "step": 1751 }, { "epoch": 0.14015719685606287, "grad_norm": 0.29855136100499374, "learning_rate": 9.958016782772548e-06, "loss": 0.3164, "step": 1752 }, { "epoch": 0.14023719525609488, "grad_norm": 0.31182241558337687, "learning_rate": 9.957932976201298e-06, "loss": 0.3218, "step": 1753 }, { "epoch": 0.14031719365612688, "grad_norm": 0.3017526580799603, "learning_rate": 9.957849086419784e-06, "loss": 0.2808, "step": 1754 }, { "epoch": 0.1403971920561589, "grad_norm": 0.29259984357276614, "learning_rate": 9.95776511342941e-06, "loss": 0.3188, "step": 1755 }, { "epoch": 0.14047719045619086, "grad_norm": 0.2586671324325039, "learning_rate": 9.957681057231586e-06, "loss": 0.3414, "step": 1756 }, { "epoch": 0.14055718885622287, "grad_norm": 0.3422309319001371, "learning_rate": 9.957596917827726e-06, "loss": 0.2914, "step": 1757 }, { "epoch": 0.14063718725625488, "grad_norm": 0.30411665437657764, "learning_rate": 9.957512695219237e-06, "loss": 0.3257, "step": 1758 }, { "epoch": 0.14071718565628688, "grad_norm": 0.3148256689329255, "learning_rate": 9.95742838940754e-06, "loss": 0.2595, "step": 1759 }, { "epoch": 0.14079718405631889, "grad_norm": 0.3235320161935557, "learning_rate": 9.957344000394044e-06, "loss": 0.2817, "step": 1760 }, { "epoch": 0.14087718245635086, "grad_norm": 0.2346349466963199, "learning_rate": 9.957259528180166e-06, "loss": 0.3598, "step": 1761 }, { "epoch": 0.14095718085638287, "grad_norm": 0.32775946235867615, "learning_rate": 9.957174972767325e-06, "loss": 0.2561, "step": 1762 }, { "epoch": 0.14103717925641487, "grad_norm": 0.29272448983335647, "learning_rate": 9.95709033415694e-06, "loss": 0.2984, "step": 1763 }, { "epoch": 0.14111717765644688, "grad_norm": 0.3381410463768757, "learning_rate": 9.957005612350433e-06, "loss": 0.2756, "step": 1764 }, { "epoch": 0.14119717605647886, "grad_norm": 0.37881724019186336, "learning_rate": 9.956920807349222e-06, "loss": 0.2972, "step": 1765 }, { "epoch": 0.14127717445651086, "grad_norm": 0.2615131462346856, "learning_rate": 9.956835919154733e-06, "loss": 0.3333, "step": 1766 }, { "epoch": 0.14135717285654287, "grad_norm": 0.25063255370545584, "learning_rate": 9.95675094776839e-06, "loss": 0.3151, "step": 1767 }, { "epoch": 0.14143717125657487, "grad_norm": 0.3013728292075032, "learning_rate": 9.95666589319162e-06, "loss": 0.3241, "step": 1768 }, { "epoch": 0.14151716965660688, "grad_norm": 0.3012317428421379, "learning_rate": 9.956580755425847e-06, "loss": 0.3201, "step": 1769 }, { "epoch": 0.14159716805663886, "grad_norm": 0.5038706896382061, "learning_rate": 9.956495534472506e-06, "loss": 0.2782, "step": 1770 }, { "epoch": 0.14167716645667086, "grad_norm": 0.24984419727694965, "learning_rate": 9.956410230333023e-06, "loss": 0.3574, "step": 1771 }, { "epoch": 0.14175716485670287, "grad_norm": 0.4299887767701957, "learning_rate": 9.95632484300883e-06, "loss": 0.3258, "step": 1772 }, { "epoch": 0.14183716325673487, "grad_norm": 0.3560279802668401, "learning_rate": 9.956239372501361e-06, "loss": 0.295, "step": 1773 }, { "epoch": 0.14191716165676688, "grad_norm": 0.24932007515283539, "learning_rate": 9.95615381881205e-06, "loss": 0.3191, "step": 1774 }, { "epoch": 0.14199716005679885, "grad_norm": 0.3028728743262735, "learning_rate": 9.956068181942333e-06, "loss": 0.311, "step": 1775 }, { "epoch": 0.14207715845683086, "grad_norm": 0.259154033880076, "learning_rate": 9.955982461893648e-06, "loss": 0.3354, "step": 1776 }, { "epoch": 0.14215715685686287, "grad_norm": 0.2897432537846509, "learning_rate": 9.955896658667433e-06, "loss": 0.3272, "step": 1777 }, { "epoch": 0.14223715525689487, "grad_norm": 0.33038176949553716, "learning_rate": 9.955810772265128e-06, "loss": 0.3015, "step": 1778 }, { "epoch": 0.14231715365692685, "grad_norm": 0.3516805659520599, "learning_rate": 9.955724802688173e-06, "loss": 0.2816, "step": 1779 }, { "epoch": 0.14239715205695885, "grad_norm": 0.2804767726724162, "learning_rate": 9.955638749938015e-06, "loss": 0.3108, "step": 1780 }, { "epoch": 0.14247715045699086, "grad_norm": 0.26314051644794173, "learning_rate": 9.955552614016093e-06, "loss": 0.3426, "step": 1781 }, { "epoch": 0.14255714885702286, "grad_norm": 0.2947321905415446, "learning_rate": 9.955466394923857e-06, "loss": 0.3145, "step": 1782 }, { "epoch": 0.14263714725705487, "grad_norm": 0.3241423092435149, "learning_rate": 9.955380092662751e-06, "loss": 0.3086, "step": 1783 }, { "epoch": 0.14271714565708685, "grad_norm": 0.32229750720492134, "learning_rate": 9.955293707234225e-06, "loss": 0.3166, "step": 1784 }, { "epoch": 0.14279714405711885, "grad_norm": 0.3235463306806733, "learning_rate": 9.955207238639729e-06, "loss": 0.2799, "step": 1785 }, { "epoch": 0.14287714245715086, "grad_norm": 0.28474506326534615, "learning_rate": 9.955120686880713e-06, "loss": 0.3157, "step": 1786 }, { "epoch": 0.14295714085718286, "grad_norm": 0.30609190171772627, "learning_rate": 9.955034051958632e-06, "loss": 0.3106, "step": 1787 }, { "epoch": 0.14303713925721487, "grad_norm": 0.3092621200982988, "learning_rate": 9.954947333874937e-06, "loss": 0.316, "step": 1788 }, { "epoch": 0.14311713765724685, "grad_norm": 0.31639271963250565, "learning_rate": 9.954860532631086e-06, "loss": 0.2757, "step": 1789 }, { "epoch": 0.14319713605727885, "grad_norm": 0.3204171762520193, "learning_rate": 9.954773648228532e-06, "loss": 0.2879, "step": 1790 }, { "epoch": 0.14327713445731086, "grad_norm": 0.31469817598687255, "learning_rate": 9.954686680668737e-06, "loss": 0.3093, "step": 1791 }, { "epoch": 0.14335713285734286, "grad_norm": 0.3113543190689672, "learning_rate": 9.954599629953162e-06, "loss": 0.3275, "step": 1792 }, { "epoch": 0.14343713125737484, "grad_norm": 0.29493445256883705, "learning_rate": 9.954512496083262e-06, "loss": 0.3189, "step": 1793 }, { "epoch": 0.14351712965740684, "grad_norm": 0.22707340882769847, "learning_rate": 9.954425279060504e-06, "loss": 0.317, "step": 1794 }, { "epoch": 0.14359712805743885, "grad_norm": 0.2946919884516097, "learning_rate": 9.95433797888635e-06, "loss": 0.272, "step": 1795 }, { "epoch": 0.14367712645747086, "grad_norm": 0.17511818261201095, "learning_rate": 9.954250595562267e-06, "loss": 0.394, "step": 1796 }, { "epoch": 0.14375712485750286, "grad_norm": 0.2707159674605878, "learning_rate": 9.95416312908972e-06, "loss": 0.2902, "step": 1797 }, { "epoch": 0.14383712325753484, "grad_norm": 0.20682608381513143, "learning_rate": 9.954075579470178e-06, "loss": 0.3629, "step": 1798 }, { "epoch": 0.14391712165756684, "grad_norm": 0.2890110393980868, "learning_rate": 9.953987946705108e-06, "loss": 0.2858, "step": 1799 }, { "epoch": 0.14399712005759885, "grad_norm": 0.30537360603386277, "learning_rate": 9.953900230795983e-06, "loss": 0.3256, "step": 1800 }, { "epoch": 0.14407711845763085, "grad_norm": 0.26136906917458186, "learning_rate": 9.953812431744274e-06, "loss": 0.3354, "step": 1801 }, { "epoch": 0.14415711685766286, "grad_norm": 0.32368559541867314, "learning_rate": 9.953724549551458e-06, "loss": 0.2883, "step": 1802 }, { "epoch": 0.14423711525769484, "grad_norm": 0.31932677549930444, "learning_rate": 9.953636584219004e-06, "loss": 0.2861, "step": 1803 }, { "epoch": 0.14431711365772684, "grad_norm": 0.38308612066912484, "learning_rate": 9.953548535748392e-06, "loss": 0.304, "step": 1804 }, { "epoch": 0.14439711205775885, "grad_norm": 0.24594211358594562, "learning_rate": 9.9534604041411e-06, "loss": 0.3508, "step": 1805 }, { "epoch": 0.14447711045779085, "grad_norm": 0.284554801293597, "learning_rate": 9.953372189398607e-06, "loss": 0.3167, "step": 1806 }, { "epoch": 0.14455710885782283, "grad_norm": 0.28293831246303786, "learning_rate": 9.953283891522393e-06, "loss": 0.3166, "step": 1807 }, { "epoch": 0.14463710725785484, "grad_norm": 0.3123143336106142, "learning_rate": 9.953195510513936e-06, "loss": 0.2617, "step": 1808 }, { "epoch": 0.14471710565788684, "grad_norm": 0.3444926159483871, "learning_rate": 9.953107046374726e-06, "loss": 0.2705, "step": 1809 }, { "epoch": 0.14479710405791885, "grad_norm": 0.2619360553088575, "learning_rate": 9.953018499106245e-06, "loss": 0.3351, "step": 1810 }, { "epoch": 0.14487710245795085, "grad_norm": 0.26946106436617817, "learning_rate": 9.95292986870998e-06, "loss": 0.3355, "step": 1811 }, { "epoch": 0.14495710085798283, "grad_norm": 0.3154463376474094, "learning_rate": 9.952841155187413e-06, "loss": 0.3016, "step": 1812 }, { "epoch": 0.14503709925801483, "grad_norm": 0.28944627024388114, "learning_rate": 9.95275235854004e-06, "loss": 0.3136, "step": 1813 }, { "epoch": 0.14511709765804684, "grad_norm": 0.30575120888023094, "learning_rate": 9.952663478769352e-06, "loss": 0.3015, "step": 1814 }, { "epoch": 0.14519709605807885, "grad_norm": 0.2744983369056166, "learning_rate": 9.952574515876833e-06, "loss": 0.2981, "step": 1815 }, { "epoch": 0.14527709445811085, "grad_norm": 0.25090335956737053, "learning_rate": 9.952485469863981e-06, "loss": 0.3315, "step": 1816 }, { "epoch": 0.14535709285814283, "grad_norm": 0.34582112367748274, "learning_rate": 9.952396340732292e-06, "loss": 0.3081, "step": 1817 }, { "epoch": 0.14543709125817483, "grad_norm": 0.2613232653280772, "learning_rate": 9.952307128483257e-06, "loss": 0.3307, "step": 1818 }, { "epoch": 0.14551708965820684, "grad_norm": 0.4210963978330547, "learning_rate": 9.952217833118377e-06, "loss": 0.2973, "step": 1819 }, { "epoch": 0.14559708805823884, "grad_norm": 0.3118596303508891, "learning_rate": 9.95212845463915e-06, "loss": 0.2901, "step": 1820 }, { "epoch": 0.14567708645827082, "grad_norm": 0.4399119912972604, "learning_rate": 9.952038993047076e-06, "loss": 0.2918, "step": 1821 }, { "epoch": 0.14575708485830283, "grad_norm": 0.287987278639579, "learning_rate": 9.951949448343656e-06, "loss": 0.2916, "step": 1822 }, { "epoch": 0.14583708325833483, "grad_norm": 0.564479166839949, "learning_rate": 9.951859820530394e-06, "loss": 0.3385, "step": 1823 }, { "epoch": 0.14591708165836684, "grad_norm": 0.3428624229558714, "learning_rate": 9.951770109608792e-06, "loss": 0.3038, "step": 1824 }, { "epoch": 0.14599708005839884, "grad_norm": 0.29459868837725184, "learning_rate": 9.951680315580356e-06, "loss": 0.2992, "step": 1825 }, { "epoch": 0.14607707845843082, "grad_norm": 0.2871018425543814, "learning_rate": 9.951590438446597e-06, "loss": 0.3225, "step": 1826 }, { "epoch": 0.14615707685846283, "grad_norm": 0.33733151647126747, "learning_rate": 9.951500478209018e-06, "loss": 0.2872, "step": 1827 }, { "epoch": 0.14623707525849483, "grad_norm": 0.27339140850401356, "learning_rate": 9.951410434869133e-06, "loss": 0.3075, "step": 1828 }, { "epoch": 0.14631707365852684, "grad_norm": 0.3335200226637057, "learning_rate": 9.951320308428449e-06, "loss": 0.282, "step": 1829 }, { "epoch": 0.14639707205855884, "grad_norm": 0.26738975982876784, "learning_rate": 9.951230098888484e-06, "loss": 0.3389, "step": 1830 }, { "epoch": 0.14647707045859082, "grad_norm": 0.3079110352221004, "learning_rate": 9.951139806250747e-06, "loss": 0.3202, "step": 1831 }, { "epoch": 0.14655706885862282, "grad_norm": 0.2702218338207828, "learning_rate": 9.951049430516758e-06, "loss": 0.3402, "step": 1832 }, { "epoch": 0.14663706725865483, "grad_norm": 0.27317461904279994, "learning_rate": 9.950958971688028e-06, "loss": 0.3096, "step": 1833 }, { "epoch": 0.14671706565868683, "grad_norm": 0.28137355012401005, "learning_rate": 9.95086842976608e-06, "loss": 0.2917, "step": 1834 }, { "epoch": 0.1467970640587188, "grad_norm": 0.28761896666717185, "learning_rate": 9.950777804752432e-06, "loss": 0.2997, "step": 1835 }, { "epoch": 0.14687706245875082, "grad_norm": 0.3415591719291004, "learning_rate": 9.950687096648606e-06, "loss": 0.3124, "step": 1836 }, { "epoch": 0.14695706085878282, "grad_norm": 0.2286831298212902, "learning_rate": 9.950596305456124e-06, "loss": 0.324, "step": 1837 }, { "epoch": 0.14703705925881483, "grad_norm": 0.32869636574507294, "learning_rate": 9.950505431176507e-06, "loss": 0.2921, "step": 1838 }, { "epoch": 0.14711705765884683, "grad_norm": 0.16772669505942764, "learning_rate": 9.950414473811283e-06, "loss": 0.3936, "step": 1839 }, { "epoch": 0.1471970560588788, "grad_norm": 0.28649100670019073, "learning_rate": 9.95032343336198e-06, "loss": 0.3099, "step": 1840 }, { "epoch": 0.14727705445891082, "grad_norm": 0.27133145364991984, "learning_rate": 9.950232309830121e-06, "loss": 0.2822, "step": 1841 }, { "epoch": 0.14735705285894282, "grad_norm": 0.355369594295927, "learning_rate": 9.95014110321724e-06, "loss": 0.3131, "step": 1842 }, { "epoch": 0.14743705125897483, "grad_norm": 0.3355537274064852, "learning_rate": 9.950049813524865e-06, "loss": 0.3021, "step": 1843 }, { "epoch": 0.14751704965900683, "grad_norm": 0.32254511745643205, "learning_rate": 9.94995844075453e-06, "loss": 0.2645, "step": 1844 }, { "epoch": 0.1475970480590388, "grad_norm": 0.3204618145122401, "learning_rate": 9.949866984907768e-06, "loss": 0.2793, "step": 1845 }, { "epoch": 0.14767704645907082, "grad_norm": 0.2628702424701707, "learning_rate": 9.949775445986112e-06, "loss": 0.3296, "step": 1846 }, { "epoch": 0.14775704485910282, "grad_norm": 0.28343470231694806, "learning_rate": 9.9496838239911e-06, "loss": 0.3012, "step": 1847 }, { "epoch": 0.14783704325913483, "grad_norm": 0.2037717652319075, "learning_rate": 9.949592118924271e-06, "loss": 0.3574, "step": 1848 }, { "epoch": 0.1479170416591668, "grad_norm": 0.42539111489672166, "learning_rate": 9.949500330787162e-06, "loss": 0.3811, "step": 1849 }, { "epoch": 0.1479970400591988, "grad_norm": 0.27700887234414806, "learning_rate": 9.949408459581316e-06, "loss": 0.3216, "step": 1850 }, { "epoch": 0.14807703845923081, "grad_norm": 0.2779837158618126, "learning_rate": 9.94931650530827e-06, "loss": 0.3433, "step": 1851 }, { "epoch": 0.14815703685926282, "grad_norm": 0.2899179613048597, "learning_rate": 9.949224467969574e-06, "loss": 0.3186, "step": 1852 }, { "epoch": 0.14823703525929482, "grad_norm": 0.5197571788806743, "learning_rate": 9.949132347566765e-06, "loss": 0.2746, "step": 1853 }, { "epoch": 0.1483170336593268, "grad_norm": 0.286463152698303, "learning_rate": 9.949040144101396e-06, "loss": 0.3122, "step": 1854 }, { "epoch": 0.1483970320593588, "grad_norm": 0.32277761143261857, "learning_rate": 9.948947857575012e-06, "loss": 0.2943, "step": 1855 }, { "epoch": 0.1484770304593908, "grad_norm": 0.31974179742061093, "learning_rate": 9.948855487989161e-06, "loss": 0.3237, "step": 1856 }, { "epoch": 0.14855702885942282, "grad_norm": 0.3466111990865634, "learning_rate": 9.948763035345393e-06, "loss": 0.2699, "step": 1857 }, { "epoch": 0.14863702725945482, "grad_norm": 0.3292729549858262, "learning_rate": 9.94867049964526e-06, "loss": 0.2734, "step": 1858 }, { "epoch": 0.1487170256594868, "grad_norm": 0.3306165631879451, "learning_rate": 9.948577880890318e-06, "loss": 0.2743, "step": 1859 }, { "epoch": 0.1487970240595188, "grad_norm": 0.45019858639406835, "learning_rate": 9.948485179082117e-06, "loss": 0.3, "step": 1860 }, { "epoch": 0.1488770224595508, "grad_norm": 0.2831601948633049, "learning_rate": 9.948392394222214e-06, "loss": 0.3263, "step": 1861 }, { "epoch": 0.14895702085958282, "grad_norm": 0.2884494626802798, "learning_rate": 9.94829952631217e-06, "loss": 0.3183, "step": 1862 }, { "epoch": 0.1490370192596148, "grad_norm": 0.3142569100184952, "learning_rate": 9.948206575353539e-06, "loss": 0.2871, "step": 1863 }, { "epoch": 0.1491170176596468, "grad_norm": 0.3493683457326879, "learning_rate": 9.948113541347882e-06, "loss": 0.3434, "step": 1864 }, { "epoch": 0.1491970160596788, "grad_norm": 0.3004402718031714, "learning_rate": 9.948020424296762e-06, "loss": 0.3347, "step": 1865 }, { "epoch": 0.1492770144597108, "grad_norm": 0.33573487157449655, "learning_rate": 9.947927224201741e-06, "loss": 0.2749, "step": 1866 }, { "epoch": 0.14935701285974282, "grad_norm": 0.29497337677065777, "learning_rate": 9.947833941064382e-06, "loss": 0.3171, "step": 1867 }, { "epoch": 0.1494370112597748, "grad_norm": 0.31216686853788317, "learning_rate": 9.947740574886253e-06, "loss": 0.2656, "step": 1868 }, { "epoch": 0.1495170096598068, "grad_norm": 0.299370916220887, "learning_rate": 9.94764712566892e-06, "loss": 0.2896, "step": 1869 }, { "epoch": 0.1495970080598388, "grad_norm": 0.35034504582011555, "learning_rate": 9.947553593413948e-06, "loss": 0.3045, "step": 1870 }, { "epoch": 0.1496770064598708, "grad_norm": 0.3915856739342979, "learning_rate": 9.947459978122912e-06, "loss": 0.2939, "step": 1871 }, { "epoch": 0.14975700485990281, "grad_norm": 0.27324450111796184, "learning_rate": 9.947366279797382e-06, "loss": 0.3196, "step": 1872 }, { "epoch": 0.1498370032599348, "grad_norm": 0.23974963319293438, "learning_rate": 9.947272498438929e-06, "loss": 0.3388, "step": 1873 }, { "epoch": 0.1499170016599668, "grad_norm": 0.32731745439760157, "learning_rate": 9.947178634049127e-06, "loss": 0.2865, "step": 1874 }, { "epoch": 0.1499970000599988, "grad_norm": 0.3132702973741263, "learning_rate": 9.947084686629552e-06, "loss": 0.2696, "step": 1875 }, { "epoch": 0.1500769984600308, "grad_norm": 0.30088133474177303, "learning_rate": 9.946990656181782e-06, "loss": 0.3076, "step": 1876 }, { "epoch": 0.15015699686006279, "grad_norm": 0.3420583506596322, "learning_rate": 9.946896542707391e-06, "loss": 0.2923, "step": 1877 }, { "epoch": 0.1502369952600948, "grad_norm": 0.46027287232057934, "learning_rate": 9.946802346207963e-06, "loss": 0.2765, "step": 1878 }, { "epoch": 0.1503169936601268, "grad_norm": 0.3151837933860946, "learning_rate": 9.946708066685077e-06, "loss": 0.2527, "step": 1879 }, { "epoch": 0.1503969920601588, "grad_norm": 0.28301358959449174, "learning_rate": 9.946613704140315e-06, "loss": 0.3462, "step": 1880 }, { "epoch": 0.1504769904601908, "grad_norm": 0.2611324915074516, "learning_rate": 9.946519258575263e-06, "loss": 0.3231, "step": 1881 }, { "epoch": 0.15055698886022278, "grad_norm": 0.2830739805365615, "learning_rate": 9.946424729991502e-06, "loss": 0.338, "step": 1882 }, { "epoch": 0.1506369872602548, "grad_norm": 0.3253565459614321, "learning_rate": 9.946330118390622e-06, "loss": 0.2956, "step": 1883 }, { "epoch": 0.1507169856602868, "grad_norm": 0.41220679415380274, "learning_rate": 9.946235423774211e-06, "loss": 0.2889, "step": 1884 }, { "epoch": 0.1507969840603188, "grad_norm": 0.3109660103563562, "learning_rate": 9.946140646143856e-06, "loss": 0.3315, "step": 1885 }, { "epoch": 0.1508769824603508, "grad_norm": 0.38596949211608217, "learning_rate": 9.946045785501148e-06, "loss": 0.3225, "step": 1886 }, { "epoch": 0.15095698086038278, "grad_norm": 0.3339679133978762, "learning_rate": 9.94595084184768e-06, "loss": 0.3095, "step": 1887 }, { "epoch": 0.1510369792604148, "grad_norm": 0.2894816453898282, "learning_rate": 9.945855815185046e-06, "loss": 0.298, "step": 1888 }, { "epoch": 0.1511169776604468, "grad_norm": 0.360803512625732, "learning_rate": 9.945760705514839e-06, "loss": 0.2944, "step": 1889 }, { "epoch": 0.1511969760604788, "grad_norm": 0.3628019256451538, "learning_rate": 9.945665512838657e-06, "loss": 0.2772, "step": 1890 }, { "epoch": 0.15127697446051078, "grad_norm": 0.20035706495116146, "learning_rate": 9.945570237158098e-06, "loss": 0.3463, "step": 1891 }, { "epoch": 0.15135697286054278, "grad_norm": 0.31489336439107407, "learning_rate": 9.945474878474758e-06, "loss": 0.31, "step": 1892 }, { "epoch": 0.1514369712605748, "grad_norm": 0.3371685502009679, "learning_rate": 9.94537943679024e-06, "loss": 0.2784, "step": 1893 }, { "epoch": 0.1515169696606068, "grad_norm": 0.3123657942543008, "learning_rate": 9.945283912106145e-06, "loss": 0.2899, "step": 1894 }, { "epoch": 0.1515969680606388, "grad_norm": 0.31830182623763087, "learning_rate": 9.945188304424078e-06, "loss": 0.2804, "step": 1895 }, { "epoch": 0.15167696646067078, "grad_norm": 0.35986647307557873, "learning_rate": 9.945092613745642e-06, "loss": 0.2773, "step": 1896 }, { "epoch": 0.15175696486070278, "grad_norm": 0.31601971500211273, "learning_rate": 9.944996840072442e-06, "loss": 0.2946, "step": 1897 }, { "epoch": 0.1518369632607348, "grad_norm": 0.3275030722606867, "learning_rate": 9.944900983406087e-06, "loss": 0.2762, "step": 1898 }, { "epoch": 0.1519169616607668, "grad_norm": 0.2883875538357041, "learning_rate": 9.944805043748185e-06, "loss": 0.2919, "step": 1899 }, { "epoch": 0.1519969600607988, "grad_norm": 0.31496523144795685, "learning_rate": 9.944709021100347e-06, "loss": 0.2816, "step": 1900 }, { "epoch": 0.15207695846083077, "grad_norm": 0.35493257580438625, "learning_rate": 9.944612915464183e-06, "loss": 0.2945, "step": 1901 }, { "epoch": 0.15215695686086278, "grad_norm": 0.31007967641599943, "learning_rate": 9.944516726841308e-06, "loss": 0.2771, "step": 1902 }, { "epoch": 0.15223695526089479, "grad_norm": 0.3407784653582902, "learning_rate": 9.944420455233335e-06, "loss": 0.3248, "step": 1903 }, { "epoch": 0.1523169536609268, "grad_norm": 0.3277126303965842, "learning_rate": 9.94432410064188e-06, "loss": 0.2749, "step": 1904 }, { "epoch": 0.15239695206095877, "grad_norm": 0.3243543805856334, "learning_rate": 9.94422766306856e-06, "loss": 0.2992, "step": 1905 }, { "epoch": 0.15247695046099077, "grad_norm": 0.26179412250906714, "learning_rate": 9.944131142514994e-06, "loss": 0.3056, "step": 1906 }, { "epoch": 0.15255694886102278, "grad_norm": 0.2785204697187977, "learning_rate": 9.944034538982804e-06, "loss": 0.3112, "step": 1907 }, { "epoch": 0.15263694726105478, "grad_norm": 0.316223583149207, "learning_rate": 9.943937852473605e-06, "loss": 0.3181, "step": 1908 }, { "epoch": 0.1527169456610868, "grad_norm": 0.2658340177863202, "learning_rate": 9.943841082989027e-06, "loss": 0.2993, "step": 1909 }, { "epoch": 0.15279694406111877, "grad_norm": 0.34191815866569764, "learning_rate": 9.943744230530689e-06, "loss": 0.2711, "step": 1910 }, { "epoch": 0.15287694246115077, "grad_norm": 0.32841962974101413, "learning_rate": 9.943647295100219e-06, "loss": 0.3133, "step": 1911 }, { "epoch": 0.15295694086118278, "grad_norm": 0.35883213639904044, "learning_rate": 9.943550276699244e-06, "loss": 0.3199, "step": 1912 }, { "epoch": 0.15303693926121478, "grad_norm": 0.30156919934819676, "learning_rate": 9.94345317532939e-06, "loss": 0.2832, "step": 1913 }, { "epoch": 0.1531169376612468, "grad_norm": 0.2868351210873058, "learning_rate": 9.943355990992289e-06, "loss": 0.3013, "step": 1914 }, { "epoch": 0.15319693606127877, "grad_norm": 0.2946587459800661, "learning_rate": 9.94325872368957e-06, "loss": 0.2973, "step": 1915 }, { "epoch": 0.15327693446131077, "grad_norm": 0.29831093465693553, "learning_rate": 9.943161373422869e-06, "loss": 0.3222, "step": 1916 }, { "epoch": 0.15335693286134278, "grad_norm": 0.3386638956830772, "learning_rate": 9.943063940193817e-06, "loss": 0.2833, "step": 1917 }, { "epoch": 0.15343693126137478, "grad_norm": 0.2868698428970782, "learning_rate": 9.94296642400405e-06, "loss": 0.2917, "step": 1918 }, { "epoch": 0.15351692966140676, "grad_norm": 0.3791255728042178, "learning_rate": 9.942868824855202e-06, "loss": 0.2764, "step": 1919 }, { "epoch": 0.15359692806143876, "grad_norm": 0.28572584237585447, "learning_rate": 9.942771142748917e-06, "loss": 0.3086, "step": 1920 }, { "epoch": 0.15367692646147077, "grad_norm": 0.2536290066873153, "learning_rate": 9.94267337768683e-06, "loss": 0.3354, "step": 1921 }, { "epoch": 0.15375692486150278, "grad_norm": 0.3231069430599438, "learning_rate": 9.942575529670582e-06, "loss": 0.2594, "step": 1922 }, { "epoch": 0.15383692326153478, "grad_norm": 0.3130327942294197, "learning_rate": 9.942477598701815e-06, "loss": 0.2779, "step": 1923 }, { "epoch": 0.15391692166156676, "grad_norm": 0.32538530277231054, "learning_rate": 9.942379584782176e-06, "loss": 0.3002, "step": 1924 }, { "epoch": 0.15399692006159876, "grad_norm": 0.2706298250783191, "learning_rate": 9.942281487913306e-06, "loss": 0.3416, "step": 1925 }, { "epoch": 0.15407691846163077, "grad_norm": 0.31959008849859943, "learning_rate": 9.942183308096853e-06, "loss": 0.2696, "step": 1926 }, { "epoch": 0.15415691686166277, "grad_norm": 0.26577896553433417, "learning_rate": 9.942085045334464e-06, "loss": 0.3213, "step": 1927 }, { "epoch": 0.15423691526169478, "grad_norm": 0.27254963288123485, "learning_rate": 9.94198669962779e-06, "loss": 0.3184, "step": 1928 }, { "epoch": 0.15431691366172676, "grad_norm": 0.3016045978165824, "learning_rate": 9.941888270978482e-06, "loss": 0.3046, "step": 1929 }, { "epoch": 0.15439691206175876, "grad_norm": 0.2874779313449009, "learning_rate": 9.941789759388187e-06, "loss": 0.3014, "step": 1930 }, { "epoch": 0.15447691046179077, "grad_norm": 0.36056563442619455, "learning_rate": 9.941691164858565e-06, "loss": 0.2631, "step": 1931 }, { "epoch": 0.15455690886182277, "grad_norm": 0.3927924331129919, "learning_rate": 9.941592487391265e-06, "loss": 0.2994, "step": 1932 }, { "epoch": 0.15463690726185475, "grad_norm": 0.3150792463892834, "learning_rate": 9.941493726987947e-06, "loss": 0.2685, "step": 1933 }, { "epoch": 0.15471690566188676, "grad_norm": 0.27724982112327506, "learning_rate": 9.941394883650266e-06, "loss": 0.2994, "step": 1934 }, { "epoch": 0.15479690406191876, "grad_norm": 0.26740898269289404, "learning_rate": 9.941295957379884e-06, "loss": 0.2849, "step": 1935 }, { "epoch": 0.15487690246195077, "grad_norm": 0.3234473105276574, "learning_rate": 9.941196948178457e-06, "loss": 0.282, "step": 1936 }, { "epoch": 0.15495690086198277, "grad_norm": 0.3014168749387208, "learning_rate": 9.941097856047652e-06, "loss": 0.2991, "step": 1937 }, { "epoch": 0.15503689926201475, "grad_norm": 0.2921167973124924, "learning_rate": 9.940998680989127e-06, "loss": 0.3008, "step": 1938 }, { "epoch": 0.15511689766204675, "grad_norm": 0.2994366993365175, "learning_rate": 9.940899423004548e-06, "loss": 0.3037, "step": 1939 }, { "epoch": 0.15519689606207876, "grad_norm": 0.3226771448289312, "learning_rate": 9.940800082095583e-06, "loss": 0.3053, "step": 1940 }, { "epoch": 0.15527689446211076, "grad_norm": 0.2434821420973053, "learning_rate": 9.940700658263897e-06, "loss": 0.3271, "step": 1941 }, { "epoch": 0.15535689286214277, "grad_norm": 0.3177729962718052, "learning_rate": 9.94060115151116e-06, "loss": 0.2605, "step": 1942 }, { "epoch": 0.15543689126217475, "grad_norm": 0.291119337301666, "learning_rate": 9.940501561839043e-06, "loss": 0.3179, "step": 1943 }, { "epoch": 0.15551688966220675, "grad_norm": 0.35606126665376925, "learning_rate": 9.940401889249213e-06, "loss": 0.304, "step": 1944 }, { "epoch": 0.15559688806223876, "grad_norm": 0.3384219456754074, "learning_rate": 9.940302133743347e-06, "loss": 0.2803, "step": 1945 }, { "epoch": 0.15567688646227076, "grad_norm": 0.8944823592957772, "learning_rate": 9.940202295323116e-06, "loss": 0.2958, "step": 1946 }, { "epoch": 0.15575688486230274, "grad_norm": 0.2866956059110046, "learning_rate": 9.940102373990202e-06, "loss": 0.3089, "step": 1947 }, { "epoch": 0.15583688326233475, "grad_norm": 0.31768839474460686, "learning_rate": 9.940002369746273e-06, "loss": 0.3028, "step": 1948 }, { "epoch": 0.15591688166236675, "grad_norm": 0.33033292503840583, "learning_rate": 9.939902282593015e-06, "loss": 0.2736, "step": 1949 }, { "epoch": 0.15599688006239876, "grad_norm": 2.123138540393024, "learning_rate": 9.939802112532103e-06, "loss": 0.2974, "step": 1950 }, { "epoch": 0.15607687846243076, "grad_norm": 0.28928403017680615, "learning_rate": 9.93970185956522e-06, "loss": 0.2997, "step": 1951 }, { "epoch": 0.15615687686246274, "grad_norm": 0.3331706244042608, "learning_rate": 9.93960152369405e-06, "loss": 0.2855, "step": 1952 }, { "epoch": 0.15623687526249475, "grad_norm": 0.26783340952157336, "learning_rate": 9.939501104920275e-06, "loss": 0.3432, "step": 1953 }, { "epoch": 0.15631687366252675, "grad_norm": 0.26650864377933553, "learning_rate": 9.939400603245581e-06, "loss": 0.318, "step": 1954 }, { "epoch": 0.15639687206255876, "grad_norm": 0.3629216180233225, "learning_rate": 9.939300018671654e-06, "loss": 0.2961, "step": 1955 }, { "epoch": 0.15647687046259076, "grad_norm": 0.5947336439646187, "learning_rate": 9.939199351200182e-06, "loss": 0.2979, "step": 1956 }, { "epoch": 0.15655686886262274, "grad_norm": 0.24725058002732525, "learning_rate": 9.939098600832857e-06, "loss": 0.3237, "step": 1957 }, { "epoch": 0.15663686726265474, "grad_norm": 0.353539628846901, "learning_rate": 9.938997767571366e-06, "loss": 0.3079, "step": 1958 }, { "epoch": 0.15671686566268675, "grad_norm": 0.256775710924978, "learning_rate": 9.938896851417406e-06, "loss": 0.3205, "step": 1959 }, { "epoch": 0.15679686406271875, "grad_norm": 0.3425475794246176, "learning_rate": 9.938795852372667e-06, "loss": 0.2812, "step": 1960 }, { "epoch": 0.15687686246275073, "grad_norm": 0.23593916471198498, "learning_rate": 9.938694770438843e-06, "loss": 0.3338, "step": 1961 }, { "epoch": 0.15695686086278274, "grad_norm": 0.20874566330252095, "learning_rate": 9.938593605617637e-06, "loss": 0.3488, "step": 1962 }, { "epoch": 0.15703685926281474, "grad_norm": 0.3210869159591659, "learning_rate": 9.93849235791074e-06, "loss": 0.2834, "step": 1963 }, { "epoch": 0.15711685766284675, "grad_norm": 0.32824481394583327, "learning_rate": 9.938391027319853e-06, "loss": 0.2964, "step": 1964 }, { "epoch": 0.15719685606287875, "grad_norm": 0.29161702233315906, "learning_rate": 9.93828961384668e-06, "loss": 0.3166, "step": 1965 }, { "epoch": 0.15727685446291073, "grad_norm": 0.2588679863957179, "learning_rate": 9.938188117492919e-06, "loss": 0.3197, "step": 1966 }, { "epoch": 0.15735685286294274, "grad_norm": 0.2991901550583303, "learning_rate": 9.938086538260277e-06, "loss": 0.3141, "step": 1967 }, { "epoch": 0.15743685126297474, "grad_norm": 0.2373571460420263, "learning_rate": 9.937984876150455e-06, "loss": 0.3403, "step": 1968 }, { "epoch": 0.15751684966300675, "grad_norm": 0.27797961478436123, "learning_rate": 9.937883131165163e-06, "loss": 0.2942, "step": 1969 }, { "epoch": 0.15759684806303875, "grad_norm": 0.313459888523142, "learning_rate": 9.937781303306105e-06, "loss": 0.2905, "step": 1970 }, { "epoch": 0.15767684646307073, "grad_norm": 0.18233826081065072, "learning_rate": 9.937679392574991e-06, "loss": 0.3995, "step": 1971 }, { "epoch": 0.15775684486310274, "grad_norm": 0.2956874431100799, "learning_rate": 9.937577398973533e-06, "loss": 0.3298, "step": 1972 }, { "epoch": 0.15783684326313474, "grad_norm": 0.3560142279117845, "learning_rate": 9.937475322503442e-06, "loss": 0.2801, "step": 1973 }, { "epoch": 0.15791684166316675, "grad_norm": 0.3102471995347657, "learning_rate": 9.93737316316643e-06, "loss": 0.3104, "step": 1974 }, { "epoch": 0.15799684006319872, "grad_norm": 0.2936088942445335, "learning_rate": 9.937270920964214e-06, "loss": 0.3097, "step": 1975 }, { "epoch": 0.15807683846323073, "grad_norm": 0.26910560048961457, "learning_rate": 9.93716859589851e-06, "loss": 0.3433, "step": 1976 }, { "epoch": 0.15815683686326273, "grad_norm": 0.2919644913402779, "learning_rate": 9.937066187971031e-06, "loss": 0.3404, "step": 1977 }, { "epoch": 0.15823683526329474, "grad_norm": 0.2719191882954073, "learning_rate": 9.9369636971835e-06, "loss": 0.332, "step": 1978 }, { "epoch": 0.15831683366332674, "grad_norm": 0.28901665532367127, "learning_rate": 9.936861123537636e-06, "loss": 0.3204, "step": 1979 }, { "epoch": 0.15839683206335872, "grad_norm": 0.32271995431330563, "learning_rate": 9.936758467035161e-06, "loss": 0.3525, "step": 1980 }, { "epoch": 0.15847683046339073, "grad_norm": 0.4475416968890067, "learning_rate": 9.936655727677795e-06, "loss": 0.3335, "step": 1981 }, { "epoch": 0.15855682886342273, "grad_norm": 0.34832908968617937, "learning_rate": 9.936552905467266e-06, "loss": 0.2595, "step": 1982 }, { "epoch": 0.15863682726345474, "grad_norm": 0.3371639518254655, "learning_rate": 9.936450000405297e-06, "loss": 0.2859, "step": 1983 }, { "epoch": 0.15871682566348674, "grad_norm": 0.32434960199141255, "learning_rate": 9.93634701249362e-06, "loss": 0.3092, "step": 1984 }, { "epoch": 0.15879682406351872, "grad_norm": 0.3108385525904896, "learning_rate": 9.936243941733956e-06, "loss": 0.2879, "step": 1985 }, { "epoch": 0.15887682246355073, "grad_norm": 0.29828337033894703, "learning_rate": 9.936140788128039e-06, "loss": 0.3099, "step": 1986 }, { "epoch": 0.15895682086358273, "grad_norm": 0.2905310288292043, "learning_rate": 9.9360375516776e-06, "loss": 0.3059, "step": 1987 }, { "epoch": 0.15903681926361474, "grad_norm": 0.2824472375123829, "learning_rate": 9.935934232384374e-06, "loss": 0.2969, "step": 1988 }, { "epoch": 0.15911681766364671, "grad_norm": 0.2855051453237634, "learning_rate": 9.93583083025009e-06, "loss": 0.3027, "step": 1989 }, { "epoch": 0.15919681606367872, "grad_norm": 1.2438407842811185, "learning_rate": 9.935727345276487e-06, "loss": 0.2918, "step": 1990 }, { "epoch": 0.15927681446371073, "grad_norm": 0.2883612405465744, "learning_rate": 9.9356237774653e-06, "loss": 0.3004, "step": 1991 }, { "epoch": 0.15935681286374273, "grad_norm": 0.28781977965373595, "learning_rate": 9.935520126818269e-06, "loss": 0.3096, "step": 1992 }, { "epoch": 0.15943681126377474, "grad_norm": 0.4968861461530532, "learning_rate": 9.935416393337132e-06, "loss": 0.3046, "step": 1993 }, { "epoch": 0.1595168096638067, "grad_norm": 0.35610699215577285, "learning_rate": 9.93531257702363e-06, "loss": 0.2914, "step": 1994 }, { "epoch": 0.15959680806383872, "grad_norm": 0.24555119841806142, "learning_rate": 9.935208677879508e-06, "loss": 0.3247, "step": 1995 }, { "epoch": 0.15967680646387072, "grad_norm": 0.3445156274523415, "learning_rate": 9.935104695906506e-06, "loss": 0.2972, "step": 1996 }, { "epoch": 0.15975680486390273, "grad_norm": 0.38663321560641845, "learning_rate": 9.935000631106372e-06, "loss": 0.2902, "step": 1997 }, { "epoch": 0.15983680326393473, "grad_norm": 0.2981423721350435, "learning_rate": 9.934896483480851e-06, "loss": 0.3021, "step": 1998 }, { "epoch": 0.1599168016639667, "grad_norm": 0.3223091364872938, "learning_rate": 9.93479225303169e-06, "loss": 0.2676, "step": 1999 }, { "epoch": 0.15999680006399872, "grad_norm": 0.3389354411333486, "learning_rate": 9.934687939760642e-06, "loss": 0.3015, "step": 2000 }, { "epoch": 0.16007679846403072, "grad_norm": 0.2968751407080333, "learning_rate": 9.934583543669454e-06, "loss": 0.3209, "step": 2001 }, { "epoch": 0.16015679686406273, "grad_norm": 0.36261609121872346, "learning_rate": 9.93447906475988e-06, "loss": 0.2895, "step": 2002 }, { "epoch": 0.1602367952640947, "grad_norm": 0.28266869991408483, "learning_rate": 9.934374503033672e-06, "loss": 0.3129, "step": 2003 }, { "epoch": 0.1603167936641267, "grad_norm": 0.328105884068455, "learning_rate": 9.934269858492587e-06, "loss": 0.2818, "step": 2004 }, { "epoch": 0.16039679206415872, "grad_norm": 0.4531626802863882, "learning_rate": 9.934165131138381e-06, "loss": 0.2631, "step": 2005 }, { "epoch": 0.16047679046419072, "grad_norm": 0.31175207748463274, "learning_rate": 9.934060320972811e-06, "loss": 0.3416, "step": 2006 }, { "epoch": 0.16055678886422273, "grad_norm": 0.27819934788155004, "learning_rate": 9.933955427997634e-06, "loss": 0.3179, "step": 2007 }, { "epoch": 0.1606367872642547, "grad_norm": 0.42046561569885477, "learning_rate": 9.933850452214612e-06, "loss": 0.2747, "step": 2008 }, { "epoch": 0.1607167856642867, "grad_norm": 0.3374576150570054, "learning_rate": 9.933745393625509e-06, "loss": 0.2969, "step": 2009 }, { "epoch": 0.16079678406431872, "grad_norm": 0.2785923874961948, "learning_rate": 9.933640252232087e-06, "loss": 0.2939, "step": 2010 }, { "epoch": 0.16087678246435072, "grad_norm": 0.2647996212822832, "learning_rate": 9.933535028036108e-06, "loss": 0.3029, "step": 2011 }, { "epoch": 0.16095678086438273, "grad_norm": 0.28150666968321303, "learning_rate": 9.93342972103934e-06, "loss": 0.3099, "step": 2012 }, { "epoch": 0.1610367792644147, "grad_norm": 0.3540097063560117, "learning_rate": 9.933324331243553e-06, "loss": 0.2804, "step": 2013 }, { "epoch": 0.1611167776644467, "grad_norm": 0.3226258433038972, "learning_rate": 9.93321885865051e-06, "loss": 0.3082, "step": 2014 }, { "epoch": 0.16119677606447871, "grad_norm": 0.3495634632864703, "learning_rate": 9.933113303261987e-06, "loss": 0.2927, "step": 2015 }, { "epoch": 0.16127677446451072, "grad_norm": 0.2924312313285416, "learning_rate": 9.933007665079752e-06, "loss": 0.3158, "step": 2016 }, { "epoch": 0.1613567728645427, "grad_norm": 0.29460187709681573, "learning_rate": 9.932901944105578e-06, "loss": 0.2937, "step": 2017 }, { "epoch": 0.1614367712645747, "grad_norm": 0.3145263865114884, "learning_rate": 9.932796140341242e-06, "loss": 0.2795, "step": 2018 }, { "epoch": 0.1615167696646067, "grad_norm": 0.3109208515777231, "learning_rate": 9.932690253788516e-06, "loss": 0.2728, "step": 2019 }, { "epoch": 0.1615967680646387, "grad_norm": 0.2827757943067093, "learning_rate": 9.93258428444918e-06, "loss": 0.3068, "step": 2020 }, { "epoch": 0.16167676646467072, "grad_norm": 0.29801789061824474, "learning_rate": 9.932478232325013e-06, "loss": 0.3131, "step": 2021 }, { "epoch": 0.1617567648647027, "grad_norm": 0.300066131654582, "learning_rate": 9.932372097417793e-06, "loss": 0.2897, "step": 2022 }, { "epoch": 0.1618367632647347, "grad_norm": 0.43030235696806723, "learning_rate": 9.9322658797293e-06, "loss": 0.2822, "step": 2023 }, { "epoch": 0.1619167616647667, "grad_norm": 0.2917046724238359, "learning_rate": 9.93215957926132e-06, "loss": 0.3382, "step": 2024 }, { "epoch": 0.1619967600647987, "grad_norm": 0.3103294520178998, "learning_rate": 9.932053196015634e-06, "loss": 0.282, "step": 2025 }, { "epoch": 0.16207675846483072, "grad_norm": 0.3227811910781441, "learning_rate": 9.93194672999403e-06, "loss": 0.285, "step": 2026 }, { "epoch": 0.1621567568648627, "grad_norm": 0.28412302400350264, "learning_rate": 9.931840181198296e-06, "loss": 0.3248, "step": 2027 }, { "epoch": 0.1622367552648947, "grad_norm": 0.29180415940981985, "learning_rate": 9.931733549630215e-06, "loss": 0.3129, "step": 2028 }, { "epoch": 0.1623167536649267, "grad_norm": 0.2772177411273436, "learning_rate": 9.931626835291581e-06, "loss": 0.3113, "step": 2029 }, { "epoch": 0.1623967520649587, "grad_norm": 0.3102598246651718, "learning_rate": 9.931520038184184e-06, "loss": 0.2698, "step": 2030 }, { "epoch": 0.1624767504649907, "grad_norm": 0.3749355037097101, "learning_rate": 9.931413158309816e-06, "loss": 0.2758, "step": 2031 }, { "epoch": 0.1625567488650227, "grad_norm": 0.2673871235658931, "learning_rate": 9.93130619567027e-06, "loss": 0.3305, "step": 2032 }, { "epoch": 0.1626367472650547, "grad_norm": 0.2993632822591623, "learning_rate": 9.931199150267343e-06, "loss": 0.3179, "step": 2033 }, { "epoch": 0.1627167456650867, "grad_norm": 0.46468678674961444, "learning_rate": 9.931092022102829e-06, "loss": 0.3463, "step": 2034 }, { "epoch": 0.1627967440651187, "grad_norm": 0.32546445539864916, "learning_rate": 9.93098481117853e-06, "loss": 0.2949, "step": 2035 }, { "epoch": 0.1628767424651507, "grad_norm": 0.24537549794232927, "learning_rate": 9.930877517496242e-06, "loss": 0.3388, "step": 2036 }, { "epoch": 0.1629567408651827, "grad_norm": 0.2896727046965226, "learning_rate": 9.930770141057767e-06, "loss": 0.3267, "step": 2037 }, { "epoch": 0.1630367392652147, "grad_norm": 0.3646729676007411, "learning_rate": 9.930662681864906e-06, "loss": 0.3145, "step": 2038 }, { "epoch": 0.1631167376652467, "grad_norm": 0.3246249494533878, "learning_rate": 9.930555139919465e-06, "loss": 0.2555, "step": 2039 }, { "epoch": 0.1631967360652787, "grad_norm": 0.31047771059898943, "learning_rate": 9.930447515223244e-06, "loss": 0.2823, "step": 2040 }, { "epoch": 0.16327673446531069, "grad_norm": 0.30489563137121306, "learning_rate": 9.930339807778056e-06, "loss": 0.3164, "step": 2041 }, { "epoch": 0.1633567328653427, "grad_norm": 0.2708505017555708, "learning_rate": 9.930232017585704e-06, "loss": 0.3409, "step": 2042 }, { "epoch": 0.1634367312653747, "grad_norm": 0.27613585284837033, "learning_rate": 9.930124144647998e-06, "loss": 0.3151, "step": 2043 }, { "epoch": 0.1635167296654067, "grad_norm": 0.2573171694407268, "learning_rate": 9.930016188966749e-06, "loss": 0.2984, "step": 2044 }, { "epoch": 0.16359672806543868, "grad_norm": 0.32483054169675796, "learning_rate": 9.929908150543769e-06, "loss": 0.301, "step": 2045 }, { "epoch": 0.16367672646547068, "grad_norm": 0.3282723065043363, "learning_rate": 9.92980002938087e-06, "loss": 0.3002, "step": 2046 }, { "epoch": 0.1637567248655027, "grad_norm": 0.2942875595467553, "learning_rate": 9.929691825479868e-06, "loss": 0.3104, "step": 2047 }, { "epoch": 0.1638367232655347, "grad_norm": 0.29648902607067845, "learning_rate": 9.92958353884258e-06, "loss": 0.3007, "step": 2048 }, { "epoch": 0.1639167216655667, "grad_norm": 0.3323779109255999, "learning_rate": 9.929475169470819e-06, "loss": 0.3191, "step": 2049 }, { "epoch": 0.16399672006559868, "grad_norm": 0.24900330227006712, "learning_rate": 9.929366717366408e-06, "loss": 0.3208, "step": 2050 }, { "epoch": 0.16407671846563068, "grad_norm": 0.2771006938988489, "learning_rate": 9.929258182531167e-06, "loss": 0.3414, "step": 2051 }, { "epoch": 0.1641567168656627, "grad_norm": 0.318919987120989, "learning_rate": 9.929149564966915e-06, "loss": 0.2812, "step": 2052 }, { "epoch": 0.1642367152656947, "grad_norm": 0.37899398351306995, "learning_rate": 9.929040864675477e-06, "loss": 0.3083, "step": 2053 }, { "epoch": 0.1643167136657267, "grad_norm": 0.27473698429141086, "learning_rate": 9.928932081658677e-06, "loss": 0.2927, "step": 2054 }, { "epoch": 0.16439671206575868, "grad_norm": 0.29387493528289566, "learning_rate": 9.92882321591834e-06, "loss": 0.3092, "step": 2055 }, { "epoch": 0.16447671046579068, "grad_norm": 0.25145998054783447, "learning_rate": 9.928714267456295e-06, "loss": 0.34, "step": 2056 }, { "epoch": 0.1645567088658227, "grad_norm": 0.29103391565460074, "learning_rate": 9.928605236274368e-06, "loss": 0.3198, "step": 2057 }, { "epoch": 0.1646367072658547, "grad_norm": 0.2948898569870395, "learning_rate": 9.928496122374388e-06, "loss": 0.3236, "step": 2058 }, { "epoch": 0.16471670566588667, "grad_norm": 0.22882413645760086, "learning_rate": 9.928386925758191e-06, "loss": 0.3508, "step": 2059 }, { "epoch": 0.16479670406591868, "grad_norm": 0.2870428290504838, "learning_rate": 9.928277646427605e-06, "loss": 0.3059, "step": 2060 }, { "epoch": 0.16487670246595068, "grad_norm": 0.30218266748330275, "learning_rate": 9.928168284384468e-06, "loss": 0.2944, "step": 2061 }, { "epoch": 0.16495670086598269, "grad_norm": 0.3237261709981678, "learning_rate": 9.928058839630612e-06, "loss": 0.292, "step": 2062 }, { "epoch": 0.1650366992660147, "grad_norm": 0.28878113343686035, "learning_rate": 9.927949312167876e-06, "loss": 0.2905, "step": 2063 }, { "epoch": 0.16511669766604667, "grad_norm": 0.27915257720569336, "learning_rate": 9.927839701998098e-06, "loss": 0.3126, "step": 2064 }, { "epoch": 0.16519669606607867, "grad_norm": 0.2443014418745416, "learning_rate": 9.927730009123116e-06, "loss": 0.326, "step": 2065 }, { "epoch": 0.16527669446611068, "grad_norm": 0.34505327992132423, "learning_rate": 9.927620233544772e-06, "loss": 0.2791, "step": 2066 }, { "epoch": 0.16535669286614268, "grad_norm": 0.29445914239095056, "learning_rate": 9.92751037526491e-06, "loss": 0.2935, "step": 2067 }, { "epoch": 0.1654366912661747, "grad_norm": 0.350564409055299, "learning_rate": 9.927400434285372e-06, "loss": 0.3205, "step": 2068 }, { "epoch": 0.16551668966620667, "grad_norm": 0.35258949036162895, "learning_rate": 9.927290410608003e-06, "loss": 0.2981, "step": 2069 }, { "epoch": 0.16559668806623867, "grad_norm": 0.28547370883708645, "learning_rate": 9.92718030423465e-06, "loss": 0.3202, "step": 2070 }, { "epoch": 0.16567668646627068, "grad_norm": 0.28679915795589933, "learning_rate": 9.927070115167161e-06, "loss": 0.296, "step": 2071 }, { "epoch": 0.16575668486630268, "grad_norm": 0.2820621298380895, "learning_rate": 9.926959843407387e-06, "loss": 0.3156, "step": 2072 }, { "epoch": 0.16583668326633466, "grad_norm": 0.2996160639064728, "learning_rate": 9.926849488957176e-06, "loss": 0.2934, "step": 2073 }, { "epoch": 0.16591668166636667, "grad_norm": 0.3050953030562137, "learning_rate": 9.92673905181838e-06, "loss": 0.3301, "step": 2074 }, { "epoch": 0.16599668006639867, "grad_norm": 0.42569906281833886, "learning_rate": 9.926628531992855e-06, "loss": 0.2854, "step": 2075 }, { "epoch": 0.16607667846643068, "grad_norm": 0.284517162889428, "learning_rate": 9.926517929482454e-06, "loss": 0.3087, "step": 2076 }, { "epoch": 0.16615667686646268, "grad_norm": 0.3188657173590444, "learning_rate": 9.926407244289033e-06, "loss": 0.2878, "step": 2077 }, { "epoch": 0.16623667526649466, "grad_norm": 0.2957576480672817, "learning_rate": 9.926296476414451e-06, "loss": 0.2865, "step": 2078 }, { "epoch": 0.16631667366652667, "grad_norm": 0.24215609358489673, "learning_rate": 9.926185625860567e-06, "loss": 0.3469, "step": 2079 }, { "epoch": 0.16639667206655867, "grad_norm": 0.24149424115276494, "learning_rate": 9.926074692629241e-06, "loss": 0.3212, "step": 2080 }, { "epoch": 0.16647667046659068, "grad_norm": 0.2864256359063075, "learning_rate": 9.925963676722335e-06, "loss": 0.2901, "step": 2081 }, { "epoch": 0.16655666886662268, "grad_norm": 0.2564242849503747, "learning_rate": 9.925852578141711e-06, "loss": 0.3534, "step": 2082 }, { "epoch": 0.16663666726665466, "grad_norm": 0.3133293218070104, "learning_rate": 9.925741396889235e-06, "loss": 0.2562, "step": 2083 }, { "epoch": 0.16671666566668666, "grad_norm": 0.32229732284041823, "learning_rate": 9.925630132966772e-06, "loss": 0.2729, "step": 2084 }, { "epoch": 0.16679666406671867, "grad_norm": 0.3300973761485826, "learning_rate": 9.925518786376192e-06, "loss": 0.2879, "step": 2085 }, { "epoch": 0.16687666246675067, "grad_norm": 0.32016089799442193, "learning_rate": 9.925407357119359e-06, "loss": 0.3115, "step": 2086 }, { "epoch": 0.16695666086678265, "grad_norm": 0.305268395533445, "learning_rate": 9.925295845198148e-06, "loss": 0.246, "step": 2087 }, { "epoch": 0.16703665926681466, "grad_norm": 0.3030992904594177, "learning_rate": 9.925184250614427e-06, "loss": 0.286, "step": 2088 }, { "epoch": 0.16711665766684666, "grad_norm": 0.30851975303268475, "learning_rate": 9.92507257337007e-06, "loss": 0.2833, "step": 2089 }, { "epoch": 0.16719665606687867, "grad_norm": 0.24864383713819052, "learning_rate": 9.924960813466952e-06, "loss": 0.3259, "step": 2090 }, { "epoch": 0.16727665446691067, "grad_norm": 0.2975092364269963, "learning_rate": 9.92484897090695e-06, "loss": 0.3067, "step": 2091 }, { "epoch": 0.16735665286694265, "grad_norm": 0.29192769337469865, "learning_rate": 9.924737045691938e-06, "loss": 0.2971, "step": 2092 }, { "epoch": 0.16743665126697466, "grad_norm": 0.2320288562788732, "learning_rate": 9.924625037823797e-06, "loss": 0.3326, "step": 2093 }, { "epoch": 0.16751664966700666, "grad_norm": 0.29136998768279093, "learning_rate": 9.924512947304403e-06, "loss": 0.3005, "step": 2094 }, { "epoch": 0.16759664806703867, "grad_norm": 0.2959481140531302, "learning_rate": 9.924400774135641e-06, "loss": 0.3094, "step": 2095 }, { "epoch": 0.16767664646707067, "grad_norm": 0.34360539502211024, "learning_rate": 9.924288518319394e-06, "loss": 0.2826, "step": 2096 }, { "epoch": 0.16775664486710265, "grad_norm": 0.29357643997439486, "learning_rate": 9.924176179857543e-06, "loss": 0.3075, "step": 2097 }, { "epoch": 0.16783664326713466, "grad_norm": 0.24684161805198337, "learning_rate": 9.924063758751976e-06, "loss": 0.3365, "step": 2098 }, { "epoch": 0.16791664166716666, "grad_norm": 0.2948633502995829, "learning_rate": 9.923951255004577e-06, "loss": 0.3083, "step": 2099 }, { "epoch": 0.16799664006719867, "grad_norm": 0.34358517584085074, "learning_rate": 9.923838668617238e-06, "loss": 0.2702, "step": 2100 }, { "epoch": 0.16807663846723064, "grad_norm": 0.33105465344399826, "learning_rate": 9.923725999591846e-06, "loss": 0.288, "step": 2101 }, { "epoch": 0.16815663686726265, "grad_norm": 0.3309024643305375, "learning_rate": 9.923613247930293e-06, "loss": 0.2726, "step": 2102 }, { "epoch": 0.16823663526729465, "grad_norm": 0.3249873474552501, "learning_rate": 9.92350041363447e-06, "loss": 0.3193, "step": 2103 }, { "epoch": 0.16831663366732666, "grad_norm": 0.2694635218051305, "learning_rate": 9.923387496706273e-06, "loss": 0.3221, "step": 2104 }, { "epoch": 0.16839663206735866, "grad_norm": 0.3185256235992889, "learning_rate": 9.923274497147595e-06, "loss": 0.2784, "step": 2105 }, { "epoch": 0.16847663046739064, "grad_norm": 0.34825592715875764, "learning_rate": 9.923161414960331e-06, "loss": 0.2789, "step": 2106 }, { "epoch": 0.16855662886742265, "grad_norm": 0.2930165114204921, "learning_rate": 9.923048250146383e-06, "loss": 0.3036, "step": 2107 }, { "epoch": 0.16863662726745465, "grad_norm": 0.25457050039476486, "learning_rate": 9.92293500270765e-06, "loss": 0.3279, "step": 2108 }, { "epoch": 0.16871662566748666, "grad_norm": 0.30317993746597344, "learning_rate": 9.922821672646028e-06, "loss": 0.2668, "step": 2109 }, { "epoch": 0.16879662406751864, "grad_norm": 0.29354054918647643, "learning_rate": 9.922708259963423e-06, "loss": 0.2891, "step": 2110 }, { "epoch": 0.16887662246755064, "grad_norm": 0.2307833362333732, "learning_rate": 9.922594764661737e-06, "loss": 0.3437, "step": 2111 }, { "epoch": 0.16895662086758265, "grad_norm": 0.2893513886646398, "learning_rate": 9.922481186742875e-06, "loss": 0.289, "step": 2112 }, { "epoch": 0.16903661926761465, "grad_norm": 0.31843158846122066, "learning_rate": 9.922367526208746e-06, "loss": 0.285, "step": 2113 }, { "epoch": 0.16911661766764666, "grad_norm": 0.36210982886282705, "learning_rate": 9.922253783061253e-06, "loss": 0.2893, "step": 2114 }, { "epoch": 0.16919661606767863, "grad_norm": 0.313219105341854, "learning_rate": 9.922139957302308e-06, "loss": 0.2751, "step": 2115 }, { "epoch": 0.16927661446771064, "grad_norm": 0.3003082046460499, "learning_rate": 9.922026048933819e-06, "loss": 0.2952, "step": 2116 }, { "epoch": 0.16935661286774265, "grad_norm": 0.2514870859616452, "learning_rate": 9.921912057957701e-06, "loss": 0.3307, "step": 2117 }, { "epoch": 0.16943661126777465, "grad_norm": 0.28624968869974593, "learning_rate": 9.921797984375866e-06, "loss": 0.3017, "step": 2118 }, { "epoch": 0.16951660966780666, "grad_norm": 0.29064446688182977, "learning_rate": 9.921683828190225e-06, "loss": 0.2957, "step": 2119 }, { "epoch": 0.16959660806783863, "grad_norm": 0.28869322741215836, "learning_rate": 9.9215695894027e-06, "loss": 0.3143, "step": 2120 }, { "epoch": 0.16967660646787064, "grad_norm": 0.2387779742533509, "learning_rate": 9.9214552680152e-06, "loss": 0.3612, "step": 2121 }, { "epoch": 0.16975660486790264, "grad_norm": 0.3197505704089971, "learning_rate": 9.921340864029653e-06, "loss": 0.2959, "step": 2122 }, { "epoch": 0.16983660326793465, "grad_norm": 0.36126638193109434, "learning_rate": 9.921226377447975e-06, "loss": 0.2942, "step": 2123 }, { "epoch": 0.16991660166796663, "grad_norm": 0.5321695368531306, "learning_rate": 9.921111808272087e-06, "loss": 0.2913, "step": 2124 }, { "epoch": 0.16999660006799863, "grad_norm": 0.3193250433671711, "learning_rate": 9.920997156503912e-06, "loss": 0.2823, "step": 2125 }, { "epoch": 0.17007659846803064, "grad_norm": 0.3023044382349287, "learning_rate": 9.920882422145372e-06, "loss": 0.3213, "step": 2126 }, { "epoch": 0.17015659686806264, "grad_norm": 0.3199438722910674, "learning_rate": 9.920767605198396e-06, "loss": 0.2755, "step": 2127 }, { "epoch": 0.17023659526809465, "grad_norm": 0.34971011123388573, "learning_rate": 9.920652705664912e-06, "loss": 0.2809, "step": 2128 }, { "epoch": 0.17031659366812663, "grad_norm": 0.2462277837906863, "learning_rate": 9.920537723546843e-06, "loss": 0.332, "step": 2129 }, { "epoch": 0.17039659206815863, "grad_norm": 0.3247660198411913, "learning_rate": 9.920422658846126e-06, "loss": 0.2644, "step": 2130 }, { "epoch": 0.17047659046819064, "grad_norm": 0.3044339050533969, "learning_rate": 9.920307511564686e-06, "loss": 0.3046, "step": 2131 }, { "epoch": 0.17055658886822264, "grad_norm": 0.26308903269882067, "learning_rate": 9.92019228170446e-06, "loss": 0.3528, "step": 2132 }, { "epoch": 0.17063658726825465, "grad_norm": 0.2771894539067521, "learning_rate": 9.920076969267375e-06, "loss": 0.2973, "step": 2133 }, { "epoch": 0.17071658566828662, "grad_norm": 0.6356136314750569, "learning_rate": 9.919961574255377e-06, "loss": 0.3051, "step": 2134 }, { "epoch": 0.17079658406831863, "grad_norm": 0.44399253202524924, "learning_rate": 9.919846096670393e-06, "loss": 0.3121, "step": 2135 }, { "epoch": 0.17087658246835064, "grad_norm": 0.30981264347813436, "learning_rate": 9.919730536514367e-06, "loss": 0.2629, "step": 2136 }, { "epoch": 0.17095658086838264, "grad_norm": 0.24137235564041146, "learning_rate": 9.919614893789234e-06, "loss": 0.3213, "step": 2137 }, { "epoch": 0.17103657926841462, "grad_norm": 0.25430767273304306, "learning_rate": 9.919499168496938e-06, "loss": 0.342, "step": 2138 }, { "epoch": 0.17111657766844662, "grad_norm": 0.27598393231255003, "learning_rate": 9.919383360639423e-06, "loss": 0.2938, "step": 2139 }, { "epoch": 0.17119657606847863, "grad_norm": 0.2756439053478478, "learning_rate": 9.919267470218628e-06, "loss": 0.3301, "step": 2140 }, { "epoch": 0.17127657446851063, "grad_norm": 0.23035634134485747, "learning_rate": 9.9191514972365e-06, "loss": 0.3553, "step": 2141 }, { "epoch": 0.17135657286854264, "grad_norm": 0.37963777035644597, "learning_rate": 9.919035441694985e-06, "loss": 0.2998, "step": 2142 }, { "epoch": 0.17143657126857462, "grad_norm": 0.28215279339902427, "learning_rate": 9.918919303596034e-06, "loss": 0.3223, "step": 2143 }, { "epoch": 0.17151656966860662, "grad_norm": 0.5246272151869219, "learning_rate": 9.91880308294159e-06, "loss": 0.3085, "step": 2144 }, { "epoch": 0.17159656806863863, "grad_norm": 0.3491492115950614, "learning_rate": 9.918686779733608e-06, "loss": 0.3138, "step": 2145 }, { "epoch": 0.17167656646867063, "grad_norm": 0.21142573446764898, "learning_rate": 9.918570393974041e-06, "loss": 0.3555, "step": 2146 }, { "epoch": 0.17175656486870264, "grad_norm": 0.3082936233998234, "learning_rate": 9.91845392566484e-06, "loss": 0.2988, "step": 2147 }, { "epoch": 0.17183656326873462, "grad_norm": 0.3405303974408421, "learning_rate": 9.918337374807958e-06, "loss": 0.2972, "step": 2148 }, { "epoch": 0.17191656166876662, "grad_norm": 0.3715830651706167, "learning_rate": 9.918220741405356e-06, "loss": 0.2823, "step": 2149 }, { "epoch": 0.17199656006879863, "grad_norm": 0.29513003022903345, "learning_rate": 9.918104025458985e-06, "loss": 0.2964, "step": 2150 }, { "epoch": 0.17207655846883063, "grad_norm": 0.41357158020023543, "learning_rate": 9.917987226970811e-06, "loss": 0.2993, "step": 2151 }, { "epoch": 0.1721565568688626, "grad_norm": 0.29572195974882093, "learning_rate": 9.917870345942789e-06, "loss": 0.3297, "step": 2152 }, { "epoch": 0.17223655526889461, "grad_norm": 0.34651737400198596, "learning_rate": 9.917753382376883e-06, "loss": 0.2663, "step": 2153 }, { "epoch": 0.17231655366892662, "grad_norm": 0.29111979679515226, "learning_rate": 9.917636336275055e-06, "loss": 0.2925, "step": 2154 }, { "epoch": 0.17239655206895863, "grad_norm": 0.32741385639702947, "learning_rate": 9.91751920763927e-06, "loss": 0.2921, "step": 2155 }, { "epoch": 0.17247655046899063, "grad_norm": 0.33394487197858524, "learning_rate": 9.917401996471494e-06, "loss": 0.2663, "step": 2156 }, { "epoch": 0.1725565488690226, "grad_norm": 0.2814006594577355, "learning_rate": 9.917284702773692e-06, "loss": 0.289, "step": 2157 }, { "epoch": 0.1726365472690546, "grad_norm": 0.2802053100578063, "learning_rate": 9.917167326547837e-06, "loss": 0.3108, "step": 2158 }, { "epoch": 0.17271654566908662, "grad_norm": 0.2714124651698787, "learning_rate": 9.917049867795896e-06, "loss": 0.2769, "step": 2159 }, { "epoch": 0.17279654406911862, "grad_norm": 1.5360669815385892, "learning_rate": 9.91693232651984e-06, "loss": 0.3267, "step": 2160 }, { "epoch": 0.17287654246915063, "grad_norm": 0.32758573693625803, "learning_rate": 9.916814702721641e-06, "loss": 0.2832, "step": 2161 }, { "epoch": 0.1729565408691826, "grad_norm": 0.28747517663749467, "learning_rate": 9.916696996403279e-06, "loss": 0.3042, "step": 2162 }, { "epoch": 0.1730365392692146, "grad_norm": 0.3223179020947917, "learning_rate": 9.916579207566721e-06, "loss": 0.2953, "step": 2163 }, { "epoch": 0.17311653766924662, "grad_norm": 0.3243979792635454, "learning_rate": 9.916461336213949e-06, "loss": 0.2689, "step": 2164 }, { "epoch": 0.17319653606927862, "grad_norm": 0.28971419130451553, "learning_rate": 9.916343382346942e-06, "loss": 0.3334, "step": 2165 }, { "epoch": 0.1732765344693106, "grad_norm": 0.25956534006906684, "learning_rate": 9.916225345967677e-06, "loss": 0.324, "step": 2166 }, { "epoch": 0.1733565328693426, "grad_norm": 0.2740661587980621, "learning_rate": 9.916107227078133e-06, "loss": 0.3338, "step": 2167 }, { "epoch": 0.1734365312693746, "grad_norm": 0.21451879926146697, "learning_rate": 9.915989025680299e-06, "loss": 0.3592, "step": 2168 }, { "epoch": 0.17351652966940662, "grad_norm": 0.31288754023635434, "learning_rate": 9.915870741776153e-06, "loss": 0.2874, "step": 2169 }, { "epoch": 0.17359652806943862, "grad_norm": 0.29586617821913547, "learning_rate": 9.915752375367681e-06, "loss": 0.3099, "step": 2170 }, { "epoch": 0.1736765264694706, "grad_norm": 0.3282065165377944, "learning_rate": 9.915633926456874e-06, "loss": 0.3029, "step": 2171 }, { "epoch": 0.1737565248695026, "grad_norm": 0.4684352255002045, "learning_rate": 9.915515395045715e-06, "loss": 0.2777, "step": 2172 }, { "epoch": 0.1738365232695346, "grad_norm": 0.32252223255515816, "learning_rate": 9.915396781136197e-06, "loss": 0.2889, "step": 2173 }, { "epoch": 0.17391652166956661, "grad_norm": 0.25721943591637264, "learning_rate": 9.91527808473031e-06, "loss": 0.3229, "step": 2174 }, { "epoch": 0.17399652006959862, "grad_norm": 0.32343137010678946, "learning_rate": 9.91515930583004e-06, "loss": 0.3073, "step": 2175 }, { "epoch": 0.1740765184696306, "grad_norm": 0.5722792524137117, "learning_rate": 9.91504044443739e-06, "loss": 0.279, "step": 2176 }, { "epoch": 0.1741565168696626, "grad_norm": 0.19970854531086682, "learning_rate": 9.914921500554347e-06, "loss": 0.3548, "step": 2177 }, { "epoch": 0.1742365152696946, "grad_norm": 0.3349355759935075, "learning_rate": 9.914802474182912e-06, "loss": 0.295, "step": 2178 }, { "epoch": 0.1743165136697266, "grad_norm": 0.24411384223195468, "learning_rate": 9.914683365325083e-06, "loss": 0.3389, "step": 2179 }, { "epoch": 0.1743965120697586, "grad_norm": 0.3269832203187113, "learning_rate": 9.914564173982856e-06, "loss": 0.2615, "step": 2180 }, { "epoch": 0.1744765104697906, "grad_norm": 0.3180765032347907, "learning_rate": 9.914444900158234e-06, "loss": 0.2748, "step": 2181 }, { "epoch": 0.1745565088698226, "grad_norm": 0.3444980005956865, "learning_rate": 9.914325543853216e-06, "loss": 0.2573, "step": 2182 }, { "epoch": 0.1746365072698546, "grad_norm": 0.3414980646435654, "learning_rate": 9.914206105069806e-06, "loss": 0.299, "step": 2183 }, { "epoch": 0.1747165056698866, "grad_norm": 0.2821911036885186, "learning_rate": 9.91408658381001e-06, "loss": 0.3067, "step": 2184 }, { "epoch": 0.1747965040699186, "grad_norm": 0.2773653704557142, "learning_rate": 9.913966980075834e-06, "loss": 0.3194, "step": 2185 }, { "epoch": 0.1748765024699506, "grad_norm": 0.2585870690064825, "learning_rate": 9.913847293869286e-06, "loss": 0.3259, "step": 2186 }, { "epoch": 0.1749565008699826, "grad_norm": 0.3259601159768096, "learning_rate": 9.91372752519237e-06, "loss": 0.3211, "step": 2187 }, { "epoch": 0.1750364992700146, "grad_norm": 0.34908121210768117, "learning_rate": 9.913607674047102e-06, "loss": 0.2756, "step": 2188 }, { "epoch": 0.1751164976700466, "grad_norm": 0.33014016279699515, "learning_rate": 9.91348774043549e-06, "loss": 0.287, "step": 2189 }, { "epoch": 0.1751964960700786, "grad_norm": 0.32849560365908476, "learning_rate": 9.913367724359548e-06, "loss": 0.2913, "step": 2190 }, { "epoch": 0.1752764944701106, "grad_norm": 0.3837399282874312, "learning_rate": 9.91324762582129e-06, "loss": 0.3003, "step": 2191 }, { "epoch": 0.1753564928701426, "grad_norm": 0.3108473247678813, "learning_rate": 9.913127444822732e-06, "loss": 0.2785, "step": 2192 }, { "epoch": 0.1754364912701746, "grad_norm": 0.30946056425886553, "learning_rate": 9.91300718136589e-06, "loss": 0.3177, "step": 2193 }, { "epoch": 0.17551648967020658, "grad_norm": 0.2122239140868899, "learning_rate": 9.912886835452783e-06, "loss": 0.3556, "step": 2194 }, { "epoch": 0.1755964880702386, "grad_norm": 0.3349836023569884, "learning_rate": 9.912766407085432e-06, "loss": 0.2818, "step": 2195 }, { "epoch": 0.1756764864702706, "grad_norm": 0.3308669166261636, "learning_rate": 9.912645896265858e-06, "loss": 0.2892, "step": 2196 }, { "epoch": 0.1757564848703026, "grad_norm": 0.2930554454074347, "learning_rate": 9.912525302996081e-06, "loss": 0.2817, "step": 2197 }, { "epoch": 0.1758364832703346, "grad_norm": 0.32796863130746, "learning_rate": 9.912404627278128e-06, "loss": 0.2665, "step": 2198 }, { "epoch": 0.17591648167036658, "grad_norm": 0.3623872542284336, "learning_rate": 9.91228386911402e-06, "loss": 0.2783, "step": 2199 }, { "epoch": 0.1759964800703986, "grad_norm": 0.28351562454059315, "learning_rate": 9.91216302850579e-06, "loss": 0.3174, "step": 2200 }, { "epoch": 0.1760764784704306, "grad_norm": 0.28471547802592956, "learning_rate": 9.912042105455462e-06, "loss": 0.3494, "step": 2201 }, { "epoch": 0.1761564768704626, "grad_norm": 0.3050931235469551, "learning_rate": 9.911921099965066e-06, "loss": 0.2967, "step": 2202 }, { "epoch": 0.1762364752704946, "grad_norm": 0.331954750263589, "learning_rate": 9.911800012036633e-06, "loss": 0.2696, "step": 2203 }, { "epoch": 0.17631647367052658, "grad_norm": 0.2633551953189909, "learning_rate": 9.911678841672196e-06, "loss": 0.3294, "step": 2204 }, { "epoch": 0.17639647207055859, "grad_norm": 0.3647429766984875, "learning_rate": 9.911557588873787e-06, "loss": 0.2876, "step": 2205 }, { "epoch": 0.1764764704705906, "grad_norm": 0.252049420944994, "learning_rate": 9.911436253643445e-06, "loss": 0.3428, "step": 2206 }, { "epoch": 0.1765564688706226, "grad_norm": 0.3112973492211981, "learning_rate": 9.911314835983202e-06, "loss": 0.2766, "step": 2207 }, { "epoch": 0.17663646727065457, "grad_norm": 0.2940265450023907, "learning_rate": 9.911193335895095e-06, "loss": 0.3167, "step": 2208 }, { "epoch": 0.17671646567068658, "grad_norm": 0.2620645115030399, "learning_rate": 9.911071753381168e-06, "loss": 0.3302, "step": 2209 }, { "epoch": 0.17679646407071858, "grad_norm": 0.3055631349597738, "learning_rate": 9.910950088443455e-06, "loss": 0.2985, "step": 2210 }, { "epoch": 0.1768764624707506, "grad_norm": 0.33723422007601406, "learning_rate": 9.910828341084006e-06, "loss": 0.2738, "step": 2211 }, { "epoch": 0.1769564608707826, "grad_norm": 0.255736892567754, "learning_rate": 9.91070651130486e-06, "loss": 0.351, "step": 2212 }, { "epoch": 0.17703645927081457, "grad_norm": 0.2823110740658937, "learning_rate": 9.91058459910806e-06, "loss": 0.3351, "step": 2213 }, { "epoch": 0.17711645767084658, "grad_norm": 0.3605496428016245, "learning_rate": 9.910462604495655e-06, "loss": 0.2656, "step": 2214 }, { "epoch": 0.17719645607087858, "grad_norm": 0.22269199338176454, "learning_rate": 9.910340527469692e-06, "loss": 0.3578, "step": 2215 }, { "epoch": 0.1772764544709106, "grad_norm": 0.36646975328893466, "learning_rate": 9.910218368032219e-06, "loss": 0.2822, "step": 2216 }, { "epoch": 0.1773564528709426, "grad_norm": 0.3654899343414024, "learning_rate": 9.910096126185286e-06, "loss": 0.3041, "step": 2217 }, { "epoch": 0.17743645127097457, "grad_norm": 0.30592231878782594, "learning_rate": 9.909973801930946e-06, "loss": 0.2759, "step": 2218 }, { "epoch": 0.17751644967100658, "grad_norm": 0.33404706369006754, "learning_rate": 9.90985139527125e-06, "loss": 0.3037, "step": 2219 }, { "epoch": 0.17759644807103858, "grad_norm": 0.32606663944582154, "learning_rate": 9.909728906208254e-06, "loss": 0.3073, "step": 2220 }, { "epoch": 0.1776764464710706, "grad_norm": 0.3051769727405732, "learning_rate": 9.909606334744013e-06, "loss": 0.2745, "step": 2221 }, { "epoch": 0.17775644487110256, "grad_norm": 0.24122210105408814, "learning_rate": 9.909483680880585e-06, "loss": 0.3296, "step": 2222 }, { "epoch": 0.17783644327113457, "grad_norm": 0.394209021182731, "learning_rate": 9.909360944620027e-06, "loss": 0.3157, "step": 2223 }, { "epoch": 0.17791644167116658, "grad_norm": 0.4478452126833092, "learning_rate": 9.909238125964403e-06, "loss": 0.2772, "step": 2224 }, { "epoch": 0.17799644007119858, "grad_norm": 0.24278381621843823, "learning_rate": 9.909115224915768e-06, "loss": 0.3057, "step": 2225 }, { "epoch": 0.17807643847123059, "grad_norm": 0.3201935677327144, "learning_rate": 9.908992241476189e-06, "loss": 0.2961, "step": 2226 }, { "epoch": 0.17815643687126256, "grad_norm": 0.2982477515771541, "learning_rate": 9.90886917564773e-06, "loss": 0.3123, "step": 2227 }, { "epoch": 0.17823643527129457, "grad_norm": 0.2977213542669658, "learning_rate": 9.908746027432453e-06, "loss": 0.2883, "step": 2228 }, { "epoch": 0.17831643367132657, "grad_norm": 0.33105000203470697, "learning_rate": 9.908622796832427e-06, "loss": 0.2722, "step": 2229 }, { "epoch": 0.17839643207135858, "grad_norm": 0.2887265699872081, "learning_rate": 9.908499483849723e-06, "loss": 0.3036, "step": 2230 }, { "epoch": 0.17847643047139058, "grad_norm": 0.31224244102902876, "learning_rate": 9.908376088486407e-06, "loss": 0.3219, "step": 2231 }, { "epoch": 0.17855642887142256, "grad_norm": 0.321798343767336, "learning_rate": 9.908252610744552e-06, "loss": 0.2768, "step": 2232 }, { "epoch": 0.17863642727145457, "grad_norm": 0.27482416388292685, "learning_rate": 9.908129050626228e-06, "loss": 0.3041, "step": 2233 }, { "epoch": 0.17871642567148657, "grad_norm": 0.2904326813932616, "learning_rate": 9.90800540813351e-06, "loss": 0.3088, "step": 2234 }, { "epoch": 0.17879642407151858, "grad_norm": 0.335847530744689, "learning_rate": 9.907881683268472e-06, "loss": 0.2777, "step": 2235 }, { "epoch": 0.17887642247155056, "grad_norm": 0.2821625580121489, "learning_rate": 9.907757876033193e-06, "loss": 0.3041, "step": 2236 }, { "epoch": 0.17895642087158256, "grad_norm": 0.2846221076025696, "learning_rate": 9.90763398642975e-06, "loss": 0.2897, "step": 2237 }, { "epoch": 0.17903641927161457, "grad_norm": 0.29549203407902536, "learning_rate": 9.907510014460222e-06, "loss": 0.3196, "step": 2238 }, { "epoch": 0.17911641767164657, "grad_norm": 0.27495954725428484, "learning_rate": 9.907385960126689e-06, "loss": 0.3042, "step": 2239 }, { "epoch": 0.17919641607167858, "grad_norm": 0.3105245894150985, "learning_rate": 9.907261823431236e-06, "loss": 0.2705, "step": 2240 }, { "epoch": 0.17927641447171055, "grad_norm": 0.3228019357195032, "learning_rate": 9.907137604375941e-06, "loss": 0.3013, "step": 2241 }, { "epoch": 0.17935641287174256, "grad_norm": 0.3006033171754618, "learning_rate": 9.907013302962893e-06, "loss": 0.312, "step": 2242 }, { "epoch": 0.17943641127177457, "grad_norm": 0.30520327028996524, "learning_rate": 9.906888919194178e-06, "loss": 0.3038, "step": 2243 }, { "epoch": 0.17951640967180657, "grad_norm": 0.3354768103624835, "learning_rate": 9.90676445307188e-06, "loss": 0.2937, "step": 2244 }, { "epoch": 0.17959640807183858, "grad_norm": 0.2764242095226642, "learning_rate": 9.906639904598092e-06, "loss": 0.3007, "step": 2245 }, { "epoch": 0.17967640647187055, "grad_norm": 0.40074569474046123, "learning_rate": 9.906515273774903e-06, "loss": 0.3466, "step": 2246 }, { "epoch": 0.17975640487190256, "grad_norm": 0.30842910390571976, "learning_rate": 9.906390560604404e-06, "loss": 0.277, "step": 2247 }, { "epoch": 0.17983640327193456, "grad_norm": 0.24426128626942076, "learning_rate": 9.906265765088689e-06, "loss": 0.3228, "step": 2248 }, { "epoch": 0.17991640167196657, "grad_norm": 0.28849825786981287, "learning_rate": 9.906140887229852e-06, "loss": 0.3513, "step": 2249 }, { "epoch": 0.17999640007199855, "grad_norm": 0.3029006063459155, "learning_rate": 9.906015927029989e-06, "loss": 0.3012, "step": 2250 }, { "epoch": 0.18007639847203055, "grad_norm": 0.2655859289907593, "learning_rate": 9.905890884491196e-06, "loss": 0.3121, "step": 2251 }, { "epoch": 0.18015639687206256, "grad_norm": 0.26261808571779227, "learning_rate": 9.905765759615573e-06, "loss": 0.3164, "step": 2252 }, { "epoch": 0.18023639527209456, "grad_norm": 0.28710893334250004, "learning_rate": 9.905640552405222e-06, "loss": 0.3226, "step": 2253 }, { "epoch": 0.18031639367212657, "grad_norm": 0.3055622488945309, "learning_rate": 9.90551526286224e-06, "loss": 0.3134, "step": 2254 }, { "epoch": 0.18039639207215855, "grad_norm": 0.38955677908043257, "learning_rate": 9.905389890988734e-06, "loss": 0.2805, "step": 2255 }, { "epoch": 0.18047639047219055, "grad_norm": 0.3218900597039972, "learning_rate": 9.905264436786805e-06, "loss": 0.3014, "step": 2256 }, { "epoch": 0.18055638887222256, "grad_norm": 0.3083594158007672, "learning_rate": 9.90513890025856e-06, "loss": 0.3019, "step": 2257 }, { "epoch": 0.18063638727225456, "grad_norm": 0.21367763820039123, "learning_rate": 9.905013281406103e-06, "loss": 0.3614, "step": 2258 }, { "epoch": 0.18071638567228657, "grad_norm": 0.294859138635244, "learning_rate": 9.904887580231548e-06, "loss": 0.3055, "step": 2259 }, { "epoch": 0.18079638407231854, "grad_norm": 0.30072604780617934, "learning_rate": 9.904761796737002e-06, "loss": 0.3064, "step": 2260 }, { "epoch": 0.18087638247235055, "grad_norm": 0.34484316610442517, "learning_rate": 9.904635930924573e-06, "loss": 0.2805, "step": 2261 }, { "epoch": 0.18095638087238256, "grad_norm": 0.31854529372785434, "learning_rate": 9.904509982796377e-06, "loss": 0.2563, "step": 2262 }, { "epoch": 0.18103637927241456, "grad_norm": 0.29687984292796754, "learning_rate": 9.904383952354528e-06, "loss": 0.2956, "step": 2263 }, { "epoch": 0.18111637767244654, "grad_norm": 0.3098839172490386, "learning_rate": 9.90425783960114e-06, "loss": 0.3195, "step": 2264 }, { "epoch": 0.18119637607247854, "grad_norm": 0.32092088354044235, "learning_rate": 9.904131644538327e-06, "loss": 0.3098, "step": 2265 }, { "epoch": 0.18127637447251055, "grad_norm": 0.3516159853967498, "learning_rate": 9.904005367168212e-06, "loss": 0.2808, "step": 2266 }, { "epoch": 0.18135637287254255, "grad_norm": 0.36404985927140604, "learning_rate": 9.903879007492912e-06, "loss": 0.2952, "step": 2267 }, { "epoch": 0.18143637127257456, "grad_norm": 0.37202064436635934, "learning_rate": 9.903752565514546e-06, "loss": 0.2979, "step": 2268 }, { "epoch": 0.18151636967260654, "grad_norm": 0.3206488979925511, "learning_rate": 9.90362604123524e-06, "loss": 0.2947, "step": 2269 }, { "epoch": 0.18159636807263854, "grad_norm": 0.3734315180807726, "learning_rate": 9.903499434657113e-06, "loss": 0.2745, "step": 2270 }, { "epoch": 0.18167636647267055, "grad_norm": 0.3032924354676774, "learning_rate": 9.903372745782294e-06, "loss": 0.3062, "step": 2271 }, { "epoch": 0.18175636487270255, "grad_norm": 0.29645708121221287, "learning_rate": 9.903245974612906e-06, "loss": 0.25, "step": 2272 }, { "epoch": 0.18183636327273456, "grad_norm": 0.28676001952958413, "learning_rate": 9.903119121151079e-06, "loss": 0.3331, "step": 2273 }, { "epoch": 0.18191636167276654, "grad_norm": 0.2536170025660417, "learning_rate": 9.90299218539894e-06, "loss": 0.3305, "step": 2274 }, { "epoch": 0.18199636007279854, "grad_norm": 0.3522000072916745, "learning_rate": 9.90286516735862e-06, "loss": 0.2591, "step": 2275 }, { "epoch": 0.18207635847283055, "grad_norm": 0.24633740218420444, "learning_rate": 9.902738067032254e-06, "loss": 0.3333, "step": 2276 }, { "epoch": 0.18215635687286255, "grad_norm": 0.30346312674543047, "learning_rate": 9.90261088442197e-06, "loss": 0.312, "step": 2277 }, { "epoch": 0.18223635527289453, "grad_norm": 0.3196980307588306, "learning_rate": 9.902483619529905e-06, "loss": 0.3131, "step": 2278 }, { "epoch": 0.18231635367292653, "grad_norm": 0.27860081939721126, "learning_rate": 9.902356272358196e-06, "loss": 0.3342, "step": 2279 }, { "epoch": 0.18239635207295854, "grad_norm": 0.3130070210387328, "learning_rate": 9.902228842908979e-06, "loss": 0.2538, "step": 2280 }, { "epoch": 0.18247635047299055, "grad_norm": 0.3347523996311443, "learning_rate": 9.902101331184391e-06, "loss": 0.2607, "step": 2281 }, { "epoch": 0.18255634887302255, "grad_norm": 0.27729371241521844, "learning_rate": 9.901973737186576e-06, "loss": 0.3363, "step": 2282 }, { "epoch": 0.18263634727305453, "grad_norm": 0.3325465729213715, "learning_rate": 9.901846060917673e-06, "loss": 0.3153, "step": 2283 }, { "epoch": 0.18271634567308653, "grad_norm": 0.3145602390673542, "learning_rate": 9.901718302379823e-06, "loss": 0.3193, "step": 2284 }, { "epoch": 0.18279634407311854, "grad_norm": 0.39470343625608306, "learning_rate": 9.901590461575175e-06, "loss": 0.2666, "step": 2285 }, { "epoch": 0.18287634247315054, "grad_norm": 0.29525973471194455, "learning_rate": 9.901462538505871e-06, "loss": 0.3126, "step": 2286 }, { "epoch": 0.18295634087318255, "grad_norm": 0.3072257293867776, "learning_rate": 9.901334533174058e-06, "loss": 0.2806, "step": 2287 }, { "epoch": 0.18303633927321453, "grad_norm": 0.2731369512213435, "learning_rate": 9.901206445581886e-06, "loss": 0.2875, "step": 2288 }, { "epoch": 0.18311633767324653, "grad_norm": 0.36400054882727756, "learning_rate": 9.901078275731504e-06, "loss": 0.274, "step": 2289 }, { "epoch": 0.18319633607327854, "grad_norm": 0.28652766435324106, "learning_rate": 9.900950023625064e-06, "loss": 0.3068, "step": 2290 }, { "epoch": 0.18327633447331054, "grad_norm": 0.34934296758984823, "learning_rate": 9.900821689264715e-06, "loss": 0.2897, "step": 2291 }, { "epoch": 0.18335633287334252, "grad_norm": 0.4846501409415694, "learning_rate": 9.900693272652617e-06, "loss": 0.2756, "step": 2292 }, { "epoch": 0.18343633127337453, "grad_norm": 0.32872762804784045, "learning_rate": 9.90056477379092e-06, "loss": 0.2766, "step": 2293 }, { "epoch": 0.18351632967340653, "grad_norm": 0.33164670451439615, "learning_rate": 9.900436192681782e-06, "loss": 0.3018, "step": 2294 }, { "epoch": 0.18359632807343854, "grad_norm": 0.273337330998633, "learning_rate": 9.90030752932736e-06, "loss": 0.2867, "step": 2295 }, { "epoch": 0.18367632647347054, "grad_norm": 0.3455921633556097, "learning_rate": 9.900178783729817e-06, "loss": 0.2884, "step": 2296 }, { "epoch": 0.18375632487350252, "grad_norm": 0.36310582356318455, "learning_rate": 9.90004995589131e-06, "loss": 0.2635, "step": 2297 }, { "epoch": 0.18383632327353452, "grad_norm": 0.2489700439381916, "learning_rate": 9.899921045814002e-06, "loss": 0.331, "step": 2298 }, { "epoch": 0.18391632167356653, "grad_norm": 0.30137752427878106, "learning_rate": 9.899792053500059e-06, "loss": 0.2877, "step": 2299 }, { "epoch": 0.18399632007359853, "grad_norm": 0.2933254651740342, "learning_rate": 9.899662978951643e-06, "loss": 0.2926, "step": 2300 }, { "epoch": 0.18407631847363054, "grad_norm": 0.34122590759341387, "learning_rate": 9.899533822170922e-06, "loss": 0.2709, "step": 2301 }, { "epoch": 0.18415631687366252, "grad_norm": 0.28604807662561615, "learning_rate": 9.899404583160064e-06, "loss": 0.3031, "step": 2302 }, { "epoch": 0.18423631527369452, "grad_norm": 0.2814917599428059, "learning_rate": 9.899275261921236e-06, "loss": 0.298, "step": 2303 }, { "epoch": 0.18431631367372653, "grad_norm": 0.29310475781460477, "learning_rate": 9.899145858456609e-06, "loss": 0.3053, "step": 2304 }, { "epoch": 0.18439631207375853, "grad_norm": 0.3081457235605028, "learning_rate": 9.899016372768355e-06, "loss": 0.2816, "step": 2305 }, { "epoch": 0.1844763104737905, "grad_norm": 0.3082793240269065, "learning_rate": 9.898886804858648e-06, "loss": 0.2713, "step": 2306 }, { "epoch": 0.18455630887382252, "grad_norm": 0.3321396278641944, "learning_rate": 9.898757154729663e-06, "loss": 0.2787, "step": 2307 }, { "epoch": 0.18463630727385452, "grad_norm": 0.3466185324405436, "learning_rate": 9.898627422383575e-06, "loss": 0.2975, "step": 2308 }, { "epoch": 0.18471630567388653, "grad_norm": 0.3422615722618602, "learning_rate": 9.898497607822561e-06, "loss": 0.281, "step": 2309 }, { "epoch": 0.18479630407391853, "grad_norm": 0.2614109809738245, "learning_rate": 9.8983677110488e-06, "loss": 0.3467, "step": 2310 }, { "epoch": 0.1848763024739505, "grad_norm": 0.3204206366719577, "learning_rate": 9.898237732064472e-06, "loss": 0.2933, "step": 2311 }, { "epoch": 0.18495630087398252, "grad_norm": 0.5003514894176285, "learning_rate": 9.898107670871756e-06, "loss": 0.2933, "step": 2312 }, { "epoch": 0.18503629927401452, "grad_norm": 0.2785755067958378, "learning_rate": 9.897977527472842e-06, "loss": 0.2984, "step": 2313 }, { "epoch": 0.18511629767404653, "grad_norm": 0.3972639137161272, "learning_rate": 9.897847301869907e-06, "loss": 0.2993, "step": 2314 }, { "epoch": 0.18519629607407853, "grad_norm": 0.2606780564175842, "learning_rate": 9.89771699406514e-06, "loss": 0.3337, "step": 2315 }, { "epoch": 0.1852762944741105, "grad_norm": 0.2498395521344486, "learning_rate": 9.89758660406073e-06, "loss": 0.3398, "step": 2316 }, { "epoch": 0.18535629287414251, "grad_norm": 0.2730345002102742, "learning_rate": 9.89745613185886e-06, "loss": 0.3155, "step": 2317 }, { "epoch": 0.18543629127417452, "grad_norm": 0.3744686115512236, "learning_rate": 9.897325577461721e-06, "loss": 0.2704, "step": 2318 }, { "epoch": 0.18551628967420652, "grad_norm": 0.2984615541065681, "learning_rate": 9.897194940871509e-06, "loss": 0.3177, "step": 2319 }, { "epoch": 0.1855962880742385, "grad_norm": 0.3209764472706581, "learning_rate": 9.897064222090411e-06, "loss": 0.2944, "step": 2320 }, { "epoch": 0.1856762864742705, "grad_norm": 0.2726405223376503, "learning_rate": 9.896933421120623e-06, "loss": 0.3089, "step": 2321 }, { "epoch": 0.1857562848743025, "grad_norm": 0.3121191102482848, "learning_rate": 9.89680253796434e-06, "loss": 0.2605, "step": 2322 }, { "epoch": 0.18583628327433452, "grad_norm": 0.25571568023466, "learning_rate": 9.89667157262376e-06, "loss": 0.3052, "step": 2323 }, { "epoch": 0.18591628167436652, "grad_norm": 0.25807789166266315, "learning_rate": 9.89654052510108e-06, "loss": 0.3427, "step": 2324 }, { "epoch": 0.1859962800743985, "grad_norm": 0.7118819727249713, "learning_rate": 9.896409395398499e-06, "loss": 0.3064, "step": 2325 }, { "epoch": 0.1860762784744305, "grad_norm": 0.3534721975326911, "learning_rate": 9.896278183518216e-06, "loss": 0.2977, "step": 2326 }, { "epoch": 0.1861562768744625, "grad_norm": 0.30787784279364555, "learning_rate": 9.896146889462438e-06, "loss": 0.2865, "step": 2327 }, { "epoch": 0.18623627527449452, "grad_norm": 0.33335676465309655, "learning_rate": 9.896015513233364e-06, "loss": 0.2865, "step": 2328 }, { "epoch": 0.18631627367452652, "grad_norm": 0.26473899251373073, "learning_rate": 9.895884054833202e-06, "loss": 0.2899, "step": 2329 }, { "epoch": 0.1863962720745585, "grad_norm": 0.25715972314468105, "learning_rate": 9.895752514264156e-06, "loss": 0.3261, "step": 2330 }, { "epoch": 0.1864762704745905, "grad_norm": 0.2672529629664939, "learning_rate": 9.895620891528437e-06, "loss": 0.3356, "step": 2331 }, { "epoch": 0.1865562688746225, "grad_norm": 0.3391599346840918, "learning_rate": 9.895489186628248e-06, "loss": 0.2819, "step": 2332 }, { "epoch": 0.18663626727465452, "grad_norm": 0.27913093583156906, "learning_rate": 9.895357399565806e-06, "loss": 0.3144, "step": 2333 }, { "epoch": 0.1867162656746865, "grad_norm": 0.2544007551203407, "learning_rate": 9.89522553034332e-06, "loss": 0.3348, "step": 2334 }, { "epoch": 0.1867962640747185, "grad_norm": 0.4697885513289864, "learning_rate": 9.895093578963002e-06, "loss": 0.289, "step": 2335 }, { "epoch": 0.1868762624747505, "grad_norm": 0.22722016232959383, "learning_rate": 9.894961545427069e-06, "loss": 0.362, "step": 2336 }, { "epoch": 0.1869562608747825, "grad_norm": 0.3023447793126911, "learning_rate": 9.894829429737734e-06, "loss": 0.2609, "step": 2337 }, { "epoch": 0.18703625927481451, "grad_norm": 0.29337839797537213, "learning_rate": 9.894697231897216e-06, "loss": 0.3222, "step": 2338 }, { "epoch": 0.1871162576748465, "grad_norm": 0.34616709657520256, "learning_rate": 9.894564951907737e-06, "loss": 0.2818, "step": 2339 }, { "epoch": 0.1871962560748785, "grad_norm": 0.35697138727337746, "learning_rate": 9.894432589771512e-06, "loss": 0.2914, "step": 2340 }, { "epoch": 0.1872762544749105, "grad_norm": 0.2999035465249017, "learning_rate": 9.894300145490763e-06, "loss": 0.3108, "step": 2341 }, { "epoch": 0.1873562528749425, "grad_norm": 0.30889385836091476, "learning_rate": 9.894167619067715e-06, "loss": 0.3056, "step": 2342 }, { "epoch": 0.1874362512749745, "grad_norm": 0.4227377197747017, "learning_rate": 9.894035010504592e-06, "loss": 0.2708, "step": 2343 }, { "epoch": 0.1875162496750065, "grad_norm": 0.3051002526055171, "learning_rate": 9.893902319803619e-06, "loss": 0.334, "step": 2344 }, { "epoch": 0.1875962480750385, "grad_norm": 0.3437984879597763, "learning_rate": 9.893769546967023e-06, "loss": 0.2677, "step": 2345 }, { "epoch": 0.1876762464750705, "grad_norm": 0.30737032156345057, "learning_rate": 9.89363669199703e-06, "loss": 0.2619, "step": 2346 }, { "epoch": 0.1877562448751025, "grad_norm": 0.29070417097587087, "learning_rate": 9.893503754895874e-06, "loss": 0.3198, "step": 2347 }, { "epoch": 0.18783624327513448, "grad_norm": 0.3386429972378958, "learning_rate": 9.893370735665784e-06, "loss": 0.2841, "step": 2348 }, { "epoch": 0.1879162416751665, "grad_norm": 0.323953695603506, "learning_rate": 9.893237634308995e-06, "loss": 0.2705, "step": 2349 }, { "epoch": 0.1879962400751985, "grad_norm": 0.30627914516461774, "learning_rate": 9.893104450827736e-06, "loss": 0.2691, "step": 2350 }, { "epoch": 0.1880762384752305, "grad_norm": 0.3227794432786858, "learning_rate": 9.892971185224244e-06, "loss": 0.2842, "step": 2351 }, { "epoch": 0.1881562368752625, "grad_norm": 0.3123943226079266, "learning_rate": 9.892837837500758e-06, "loss": 0.3249, "step": 2352 }, { "epoch": 0.18823623527529448, "grad_norm": 0.3730810029571364, "learning_rate": 9.892704407659514e-06, "loss": 0.3192, "step": 2353 }, { "epoch": 0.1883162336753265, "grad_norm": 0.29071588837833817, "learning_rate": 9.892570895702753e-06, "loss": 0.2947, "step": 2354 }, { "epoch": 0.1883962320753585, "grad_norm": 0.2526784826509826, "learning_rate": 9.892437301632713e-06, "loss": 0.3296, "step": 2355 }, { "epoch": 0.1884762304753905, "grad_norm": 0.2949362515746845, "learning_rate": 9.892303625451639e-06, "loss": 0.3101, "step": 2356 }, { "epoch": 0.1885562288754225, "grad_norm": 0.31716934229551896, "learning_rate": 9.892169867161774e-06, "loss": 0.3018, "step": 2357 }, { "epoch": 0.18863622727545448, "grad_norm": 0.3822951576004735, "learning_rate": 9.89203602676536e-06, "loss": 0.2916, "step": 2358 }, { "epoch": 0.1887162256754865, "grad_norm": 0.31989735533302277, "learning_rate": 9.891902104264646e-06, "loss": 0.2584, "step": 2359 }, { "epoch": 0.1887962240755185, "grad_norm": 0.3060474495814552, "learning_rate": 9.891768099661881e-06, "loss": 0.2645, "step": 2360 }, { "epoch": 0.1888762224755505, "grad_norm": 0.24913457952373075, "learning_rate": 9.891634012959311e-06, "loss": 0.3386, "step": 2361 }, { "epoch": 0.18895622087558248, "grad_norm": 0.27129711263923684, "learning_rate": 9.891499844159187e-06, "loss": 0.3079, "step": 2362 }, { "epoch": 0.18903621927561448, "grad_norm": 0.2905646614786227, "learning_rate": 9.891365593263761e-06, "loss": 0.3122, "step": 2363 }, { "epoch": 0.1891162176756465, "grad_norm": 0.2894199628349873, "learning_rate": 9.891231260275287e-06, "loss": 0.2942, "step": 2364 }, { "epoch": 0.1891962160756785, "grad_norm": 0.32209857413200826, "learning_rate": 9.891096845196019e-06, "loss": 0.3086, "step": 2365 }, { "epoch": 0.1892762144757105, "grad_norm": 0.32710686043861265, "learning_rate": 9.890962348028213e-06, "loss": 0.2875, "step": 2366 }, { "epoch": 0.18935621287574247, "grad_norm": 0.3645381502356643, "learning_rate": 9.890827768774127e-06, "loss": 0.2717, "step": 2367 }, { "epoch": 0.18943621127577448, "grad_norm": 0.2655446107158337, "learning_rate": 9.890693107436018e-06, "loss": 0.3437, "step": 2368 }, { "epoch": 0.18951620967580649, "grad_norm": 0.2859137374141619, "learning_rate": 9.890558364016148e-06, "loss": 0.309, "step": 2369 }, { "epoch": 0.1895962080758385, "grad_norm": 0.2762738840085754, "learning_rate": 9.890423538516777e-06, "loss": 0.3335, "step": 2370 }, { "epoch": 0.1896762064758705, "grad_norm": 0.29211964401080076, "learning_rate": 9.890288630940168e-06, "loss": 0.2923, "step": 2371 }, { "epoch": 0.18975620487590247, "grad_norm": 0.34957818682927105, "learning_rate": 9.890153641288587e-06, "loss": 0.2969, "step": 2372 }, { "epoch": 0.18983620327593448, "grad_norm": 0.33519457500326516, "learning_rate": 9.890018569564298e-06, "loss": 0.2675, "step": 2373 }, { "epoch": 0.18991620167596648, "grad_norm": 0.27867395211981894, "learning_rate": 9.889883415769566e-06, "loss": 0.3519, "step": 2374 }, { "epoch": 0.1899962000759985, "grad_norm": 0.23001185492002293, "learning_rate": 9.889748179906661e-06, "loss": 0.3623, "step": 2375 }, { "epoch": 0.19007619847603047, "grad_norm": 0.2895844829859647, "learning_rate": 9.889612861977855e-06, "loss": 0.3085, "step": 2376 }, { "epoch": 0.19015619687606247, "grad_norm": 0.31458321005565115, "learning_rate": 9.889477461985415e-06, "loss": 0.2803, "step": 2377 }, { "epoch": 0.19023619527609448, "grad_norm": 0.2879545089244242, "learning_rate": 9.889341979931616e-06, "loss": 0.2932, "step": 2378 }, { "epoch": 0.19031619367612648, "grad_norm": 0.36676278845548094, "learning_rate": 9.889206415818733e-06, "loss": 0.2885, "step": 2379 }, { "epoch": 0.1903961920761585, "grad_norm": 0.3879702752753962, "learning_rate": 9.889070769649038e-06, "loss": 0.2852, "step": 2380 }, { "epoch": 0.19047619047619047, "grad_norm": 0.3546940590573756, "learning_rate": 9.88893504142481e-06, "loss": 0.2782, "step": 2381 }, { "epoch": 0.19055618887622247, "grad_norm": 0.29038723636169717, "learning_rate": 9.888799231148325e-06, "loss": 0.3129, "step": 2382 }, { "epoch": 0.19063618727625448, "grad_norm": 0.2791022606069941, "learning_rate": 9.888663338821864e-06, "loss": 0.3053, "step": 2383 }, { "epoch": 0.19071618567628648, "grad_norm": 0.22099306627951712, "learning_rate": 9.888527364447709e-06, "loss": 0.3685, "step": 2384 }, { "epoch": 0.1907961840763185, "grad_norm": 0.3378135439204717, "learning_rate": 9.888391308028138e-06, "loss": 0.2916, "step": 2385 }, { "epoch": 0.19087618247635046, "grad_norm": 0.2769700503993682, "learning_rate": 9.888255169565437e-06, "loss": 0.3282, "step": 2386 }, { "epoch": 0.19095618087638247, "grad_norm": 0.25621880233623057, "learning_rate": 9.888118949061891e-06, "loss": 0.317, "step": 2387 }, { "epoch": 0.19103617927641448, "grad_norm": 0.2865056182900127, "learning_rate": 9.887982646519784e-06, "loss": 0.3013, "step": 2388 }, { "epoch": 0.19111617767644648, "grad_norm": 0.3292030614921192, "learning_rate": 9.887846261941408e-06, "loss": 0.2976, "step": 2389 }, { "epoch": 0.19119617607647846, "grad_norm": 0.3504298429537061, "learning_rate": 9.88770979532905e-06, "loss": 0.2674, "step": 2390 }, { "epoch": 0.19127617447651046, "grad_norm": 0.3238198466391161, "learning_rate": 9.887573246684998e-06, "loss": 0.2853, "step": 2391 }, { "epoch": 0.19135617287654247, "grad_norm": 0.2987093593053379, "learning_rate": 9.887436616011546e-06, "loss": 0.2836, "step": 2392 }, { "epoch": 0.19143617127657447, "grad_norm": 0.34897537691218683, "learning_rate": 9.887299903310985e-06, "loss": 0.2515, "step": 2393 }, { "epoch": 0.19151616967660648, "grad_norm": 0.2598213466687978, "learning_rate": 9.887163108585612e-06, "loss": 0.3266, "step": 2394 }, { "epoch": 0.19159616807663846, "grad_norm": 0.2940560350387327, "learning_rate": 9.887026231837722e-06, "loss": 0.324, "step": 2395 }, { "epoch": 0.19167616647667046, "grad_norm": 0.2699714343161921, "learning_rate": 9.886889273069614e-06, "loss": 0.3323, "step": 2396 }, { "epoch": 0.19175616487670247, "grad_norm": 0.25873492320067437, "learning_rate": 9.886752232283582e-06, "loss": 0.3419, "step": 2397 }, { "epoch": 0.19183616327673447, "grad_norm": 0.31991465440170747, "learning_rate": 9.88661510948193e-06, "loss": 0.2953, "step": 2398 }, { "epoch": 0.19191616167676648, "grad_norm": 0.3249208478104891, "learning_rate": 9.886477904666958e-06, "loss": 0.3157, "step": 2399 }, { "epoch": 0.19199616007679846, "grad_norm": 0.3162527981849793, "learning_rate": 9.886340617840968e-06, "loss": 0.299, "step": 2400 }, { "epoch": 0.19207615847683046, "grad_norm": 0.2925548542448652, "learning_rate": 9.886203249006265e-06, "loss": 0.3166, "step": 2401 }, { "epoch": 0.19215615687686247, "grad_norm": 0.3108995328204289, "learning_rate": 9.886065798165156e-06, "loss": 0.2566, "step": 2402 }, { "epoch": 0.19223615527689447, "grad_norm": 0.35593089009064205, "learning_rate": 9.885928265319946e-06, "loss": 0.2697, "step": 2403 }, { "epoch": 0.19231615367692645, "grad_norm": 0.343831927440169, "learning_rate": 9.885790650472943e-06, "loss": 0.2809, "step": 2404 }, { "epoch": 0.19239615207695845, "grad_norm": 0.35112878965920985, "learning_rate": 9.885652953626456e-06, "loss": 0.2873, "step": 2405 }, { "epoch": 0.19247615047699046, "grad_norm": 0.32342997325176764, "learning_rate": 9.8855151747828e-06, "loss": 0.2982, "step": 2406 }, { "epoch": 0.19255614887702246, "grad_norm": 0.2912375937111901, "learning_rate": 9.885377313944284e-06, "loss": 0.3128, "step": 2407 }, { "epoch": 0.19263614727705447, "grad_norm": 0.32923896214245174, "learning_rate": 9.88523937111322e-06, "loss": 0.2906, "step": 2408 }, { "epoch": 0.19271614567708645, "grad_norm": 0.24363921099042216, "learning_rate": 9.885101346291928e-06, "loss": 0.3215, "step": 2409 }, { "epoch": 0.19279614407711845, "grad_norm": 0.287582024156925, "learning_rate": 9.884963239482721e-06, "loss": 0.2866, "step": 2410 }, { "epoch": 0.19287614247715046, "grad_norm": 0.28753461949843867, "learning_rate": 9.884825050687918e-06, "loss": 0.3066, "step": 2411 }, { "epoch": 0.19295614087718246, "grad_norm": 0.30170143136504596, "learning_rate": 9.884686779909837e-06, "loss": 0.3175, "step": 2412 }, { "epoch": 0.19303613927721447, "grad_norm": 0.3230072366447336, "learning_rate": 9.884548427150802e-06, "loss": 0.2757, "step": 2413 }, { "epoch": 0.19311613767724645, "grad_norm": 0.28449466880156626, "learning_rate": 9.884409992413131e-06, "loss": 0.3079, "step": 2414 }, { "epoch": 0.19319613607727845, "grad_norm": 0.3575550479600838, "learning_rate": 9.88427147569915e-06, "loss": 0.3009, "step": 2415 }, { "epoch": 0.19327613447731046, "grad_norm": 0.29136119323630466, "learning_rate": 9.884132877011183e-06, "loss": 0.2901, "step": 2416 }, { "epoch": 0.19335613287734246, "grad_norm": 0.2614443579218222, "learning_rate": 9.883994196351555e-06, "loss": 0.3291, "step": 2417 }, { "epoch": 0.19343613127737444, "grad_norm": 0.31207099013636763, "learning_rate": 9.883855433722596e-06, "loss": 0.3323, "step": 2418 }, { "epoch": 0.19351612967740645, "grad_norm": 0.32276161020074934, "learning_rate": 9.883716589126633e-06, "loss": 0.2771, "step": 2419 }, { "epoch": 0.19359612807743845, "grad_norm": 0.380370496908728, "learning_rate": 9.883577662565997e-06, "loss": 0.321, "step": 2420 }, { "epoch": 0.19367612647747046, "grad_norm": 0.29253021048072253, "learning_rate": 9.883438654043019e-06, "loss": 0.3281, "step": 2421 }, { "epoch": 0.19375612487750246, "grad_norm": 0.42989168636421377, "learning_rate": 9.883299563560032e-06, "loss": 0.2878, "step": 2422 }, { "epoch": 0.19383612327753444, "grad_norm": 0.33322170148868346, "learning_rate": 9.88316039111937e-06, "loss": 0.2946, "step": 2423 }, { "epoch": 0.19391612167756644, "grad_norm": 0.2712831929606459, "learning_rate": 9.883021136723372e-06, "loss": 0.3382, "step": 2424 }, { "epoch": 0.19399612007759845, "grad_norm": 0.350749527602192, "learning_rate": 9.88288180037437e-06, "loss": 0.3, "step": 2425 }, { "epoch": 0.19407611847763045, "grad_norm": 0.2893780578063207, "learning_rate": 9.882742382074707e-06, "loss": 0.3217, "step": 2426 }, { "epoch": 0.19415611687766246, "grad_norm": 0.3125377321530427, "learning_rate": 9.882602881826721e-06, "loss": 0.2906, "step": 2427 }, { "epoch": 0.19423611527769444, "grad_norm": 0.3458472019489112, "learning_rate": 9.882463299632753e-06, "loss": 0.2768, "step": 2428 }, { "epoch": 0.19431611367772644, "grad_norm": 0.2860308493709997, "learning_rate": 9.882323635495145e-06, "loss": 0.321, "step": 2429 }, { "epoch": 0.19439611207775845, "grad_norm": 0.3274497822709407, "learning_rate": 9.882183889416243e-06, "loss": 0.2694, "step": 2430 }, { "epoch": 0.19447611047779045, "grad_norm": 0.2867208036091842, "learning_rate": 9.882044061398393e-06, "loss": 0.3238, "step": 2431 }, { "epoch": 0.19455610887782243, "grad_norm": 0.24925430957835096, "learning_rate": 9.881904151443938e-06, "loss": 0.3083, "step": 2432 }, { "epoch": 0.19463610727785444, "grad_norm": 0.295593610447912, "learning_rate": 9.88176415955523e-06, "loss": 0.3291, "step": 2433 }, { "epoch": 0.19471610567788644, "grad_norm": 0.27584048332662, "learning_rate": 9.881624085734614e-06, "loss": 0.3289, "step": 2434 }, { "epoch": 0.19479610407791845, "grad_norm": 0.39569936141902073, "learning_rate": 9.881483929984446e-06, "loss": 0.279, "step": 2435 }, { "epoch": 0.19487610247795045, "grad_norm": 0.3996419460572606, "learning_rate": 9.881343692307076e-06, "loss": 0.2771, "step": 2436 }, { "epoch": 0.19495610087798243, "grad_norm": 0.3519500919181083, "learning_rate": 9.881203372704857e-06, "loss": 0.2942, "step": 2437 }, { "epoch": 0.19503609927801444, "grad_norm": 0.2560451991341518, "learning_rate": 9.881062971180146e-06, "loss": 0.3403, "step": 2438 }, { "epoch": 0.19511609767804644, "grad_norm": 0.32695682401607506, "learning_rate": 9.880922487735295e-06, "loss": 0.2877, "step": 2439 }, { "epoch": 0.19519609607807845, "grad_norm": 0.33161277507138803, "learning_rate": 9.880781922372669e-06, "loss": 0.2735, "step": 2440 }, { "epoch": 0.19527609447811045, "grad_norm": 0.24206560368889032, "learning_rate": 9.88064127509462e-06, "loss": 0.3205, "step": 2441 }, { "epoch": 0.19535609287814243, "grad_norm": 0.2982483040386578, "learning_rate": 9.880500545903513e-06, "loss": 0.312, "step": 2442 }, { "epoch": 0.19543609127817443, "grad_norm": 0.2862445243547295, "learning_rate": 9.880359734801708e-06, "loss": 0.3135, "step": 2443 }, { "epoch": 0.19551608967820644, "grad_norm": 0.29525374985522085, "learning_rate": 9.880218841791568e-06, "loss": 0.2946, "step": 2444 }, { "epoch": 0.19559608807823844, "grad_norm": 0.2849388938750511, "learning_rate": 9.880077866875459e-06, "loss": 0.2966, "step": 2445 }, { "epoch": 0.19567608647827042, "grad_norm": 0.33532488534431437, "learning_rate": 9.879936810055746e-06, "loss": 0.2776, "step": 2446 }, { "epoch": 0.19575608487830243, "grad_norm": 0.3047728567057308, "learning_rate": 9.879795671334798e-06, "loss": 0.2652, "step": 2447 }, { "epoch": 0.19583608327833443, "grad_norm": 0.31141759485667686, "learning_rate": 9.87965445071498e-06, "loss": 0.2548, "step": 2448 }, { "epoch": 0.19591608167836644, "grad_norm": 0.31076067514272154, "learning_rate": 9.879513148198668e-06, "loss": 0.2816, "step": 2449 }, { "epoch": 0.19599608007839844, "grad_norm": 0.25037213905934613, "learning_rate": 9.879371763788228e-06, "loss": 0.3161, "step": 2450 }, { "epoch": 0.19607607847843042, "grad_norm": 0.24937138050144456, "learning_rate": 9.879230297486034e-06, "loss": 0.3222, "step": 2451 }, { "epoch": 0.19615607687846243, "grad_norm": 0.2802995243055443, "learning_rate": 9.879088749294463e-06, "loss": 0.2934, "step": 2452 }, { "epoch": 0.19623607527849443, "grad_norm": 0.3506298073580569, "learning_rate": 9.878947119215889e-06, "loss": 0.2867, "step": 2453 }, { "epoch": 0.19631607367852644, "grad_norm": 0.3255369697273748, "learning_rate": 9.878805407252689e-06, "loss": 0.2857, "step": 2454 }, { "epoch": 0.19639607207855844, "grad_norm": 0.2893414875906338, "learning_rate": 9.87866361340724e-06, "loss": 0.2947, "step": 2455 }, { "epoch": 0.19647607047859042, "grad_norm": 0.39805723841441526, "learning_rate": 9.878521737681925e-06, "loss": 0.2862, "step": 2456 }, { "epoch": 0.19655606887862243, "grad_norm": 0.41990772557645983, "learning_rate": 9.878379780079122e-06, "loss": 0.2775, "step": 2457 }, { "epoch": 0.19663606727865443, "grad_norm": 0.3020374725086929, "learning_rate": 9.878237740601215e-06, "loss": 0.2982, "step": 2458 }, { "epoch": 0.19671606567868644, "grad_norm": 0.3162887489576106, "learning_rate": 9.878095619250588e-06, "loss": 0.2849, "step": 2459 }, { "epoch": 0.1967960640787184, "grad_norm": 0.2780042268219262, "learning_rate": 9.877953416029625e-06, "loss": 0.3062, "step": 2460 }, { "epoch": 0.19687606247875042, "grad_norm": 0.2793847414690196, "learning_rate": 9.877811130940715e-06, "loss": 0.2988, "step": 2461 }, { "epoch": 0.19695606087878242, "grad_norm": 0.26876140322926706, "learning_rate": 9.877668763986243e-06, "loss": 0.2997, "step": 2462 }, { "epoch": 0.19703605927881443, "grad_norm": 0.24684775297924227, "learning_rate": 9.8775263151686e-06, "loss": 0.318, "step": 2463 }, { "epoch": 0.19711605767884643, "grad_norm": 0.2789719048627102, "learning_rate": 9.877383784490177e-06, "loss": 0.2848, "step": 2464 }, { "epoch": 0.1971960560788784, "grad_norm": 0.3209649709546764, "learning_rate": 9.877241171953367e-06, "loss": 0.2563, "step": 2465 }, { "epoch": 0.19727605447891042, "grad_norm": 0.27072595422558393, "learning_rate": 9.877098477560562e-06, "loss": 0.3263, "step": 2466 }, { "epoch": 0.19735605287894242, "grad_norm": 0.37282841305137376, "learning_rate": 9.876955701314157e-06, "loss": 0.2967, "step": 2467 }, { "epoch": 0.19743605127897443, "grad_norm": 0.3608877831699875, "learning_rate": 9.876812843216548e-06, "loss": 0.2648, "step": 2468 }, { "epoch": 0.19751604967900643, "grad_norm": 0.32434821277736336, "learning_rate": 9.876669903270133e-06, "loss": 0.2734, "step": 2469 }, { "epoch": 0.1975960480790384, "grad_norm": 0.43411774309647644, "learning_rate": 9.87652688147731e-06, "loss": 0.2882, "step": 2470 }, { "epoch": 0.19767604647907042, "grad_norm": 0.30999849698856097, "learning_rate": 9.876383777840484e-06, "loss": 0.3376, "step": 2471 }, { "epoch": 0.19775604487910242, "grad_norm": 0.2744638471044857, "learning_rate": 9.87624059236205e-06, "loss": 0.3308, "step": 2472 }, { "epoch": 0.19783604327913443, "grad_norm": 0.271948012042772, "learning_rate": 9.876097325044416e-06, "loss": 0.3316, "step": 2473 }, { "epoch": 0.1979160416791664, "grad_norm": 0.31925002149734755, "learning_rate": 9.875953975889984e-06, "loss": 0.2757, "step": 2474 }, { "epoch": 0.1979960400791984, "grad_norm": 0.2740866818677422, "learning_rate": 9.87581054490116e-06, "loss": 0.3358, "step": 2475 }, { "epoch": 0.19807603847923042, "grad_norm": 0.29377435594612916, "learning_rate": 9.875667032080354e-06, "loss": 0.3011, "step": 2476 }, { "epoch": 0.19815603687926242, "grad_norm": 0.31095485228516095, "learning_rate": 9.87552343742997e-06, "loss": 0.2858, "step": 2477 }, { "epoch": 0.19823603527929443, "grad_norm": 0.3111704161791113, "learning_rate": 9.875379760952422e-06, "loss": 0.269, "step": 2478 }, { "epoch": 0.1983160336793264, "grad_norm": 0.33746539806254855, "learning_rate": 9.875236002650119e-06, "loss": 0.2881, "step": 2479 }, { "epoch": 0.1983960320793584, "grad_norm": 0.35346244368460855, "learning_rate": 9.875092162525476e-06, "loss": 0.2817, "step": 2480 }, { "epoch": 0.19847603047939041, "grad_norm": 0.2915984188407071, "learning_rate": 9.874948240580903e-06, "loss": 0.3346, "step": 2481 }, { "epoch": 0.19855602887942242, "grad_norm": 0.30760062473496075, "learning_rate": 9.874804236818823e-06, "loss": 0.2676, "step": 2482 }, { "epoch": 0.19863602727945442, "grad_norm": 0.34001360797097285, "learning_rate": 9.874660151241644e-06, "loss": 0.3072, "step": 2483 }, { "epoch": 0.1987160256794864, "grad_norm": 0.4703492988539531, "learning_rate": 9.874515983851788e-06, "loss": 0.2802, "step": 2484 }, { "epoch": 0.1987960240795184, "grad_norm": 0.31191250767120043, "learning_rate": 9.874371734651678e-06, "loss": 0.3126, "step": 2485 }, { "epoch": 0.1988760224795504, "grad_norm": 0.33120765273751773, "learning_rate": 9.874227403643729e-06, "loss": 0.2631, "step": 2486 }, { "epoch": 0.19895602087958242, "grad_norm": 0.29535075578252534, "learning_rate": 9.874082990830366e-06, "loss": 0.3287, "step": 2487 }, { "epoch": 0.1990360192796144, "grad_norm": 0.2807656621855734, "learning_rate": 9.873938496214014e-06, "loss": 0.3153, "step": 2488 }, { "epoch": 0.1991160176796464, "grad_norm": 0.24900389638186068, "learning_rate": 9.873793919797099e-06, "loss": 0.3429, "step": 2489 }, { "epoch": 0.1991960160796784, "grad_norm": 0.3343860035900998, "learning_rate": 9.873649261582043e-06, "loss": 0.2902, "step": 2490 }, { "epoch": 0.1992760144797104, "grad_norm": 0.26278213219263286, "learning_rate": 9.873504521571278e-06, "loss": 0.3481, "step": 2491 }, { "epoch": 0.19935601287974242, "grad_norm": 0.3009982918031528, "learning_rate": 9.873359699767229e-06, "loss": 0.3101, "step": 2492 }, { "epoch": 0.1994360112797744, "grad_norm": 0.3233752344444209, "learning_rate": 9.87321479617233e-06, "loss": 0.2629, "step": 2493 }, { "epoch": 0.1995160096798064, "grad_norm": 0.2930155626945073, "learning_rate": 9.873069810789013e-06, "loss": 0.3096, "step": 2494 }, { "epoch": 0.1995960080798384, "grad_norm": 0.29402489128825876, "learning_rate": 9.87292474361971e-06, "loss": 0.2736, "step": 2495 }, { "epoch": 0.1996760064798704, "grad_norm": 0.28086199719298965, "learning_rate": 9.872779594666856e-06, "loss": 0.2896, "step": 2496 }, { "epoch": 0.19975600487990242, "grad_norm": 0.3720197991308306, "learning_rate": 9.872634363932887e-06, "loss": 0.2761, "step": 2497 }, { "epoch": 0.1998360032799344, "grad_norm": 0.3041056253799466, "learning_rate": 9.87248905142024e-06, "loss": 0.2884, "step": 2498 }, { "epoch": 0.1999160016799664, "grad_norm": 0.3147776737370676, "learning_rate": 9.872343657131355e-06, "loss": 0.2813, "step": 2499 }, { "epoch": 0.1999960000799984, "grad_norm": 0.36771054697241135, "learning_rate": 9.87219818106867e-06, "loss": 0.2809, "step": 2500 }, { "epoch": 0.2000759984800304, "grad_norm": 0.31214376381122216, "learning_rate": 9.872052623234632e-06, "loss": 0.2678, "step": 2501 }, { "epoch": 0.2001559968800624, "grad_norm": 0.3706037130845664, "learning_rate": 9.871906983631676e-06, "loss": 0.2901, "step": 2502 }, { "epoch": 0.2002359952800944, "grad_norm": 0.25635475077629827, "learning_rate": 9.871761262262252e-06, "loss": 0.3324, "step": 2503 }, { "epoch": 0.2003159936801264, "grad_norm": 0.36663918442864113, "learning_rate": 9.871615459128805e-06, "loss": 0.2691, "step": 2504 }, { "epoch": 0.2003959920801584, "grad_norm": 0.3880893797444578, "learning_rate": 9.871469574233781e-06, "loss": 0.2696, "step": 2505 }, { "epoch": 0.2004759904801904, "grad_norm": 0.3502549702036501, "learning_rate": 9.871323607579628e-06, "loss": 0.2682, "step": 2506 }, { "epoch": 0.20055598888022239, "grad_norm": 0.3035134431422913, "learning_rate": 9.871177559168795e-06, "loss": 0.322, "step": 2507 }, { "epoch": 0.2006359872802544, "grad_norm": 0.3000155201642723, "learning_rate": 9.871031429003735e-06, "loss": 0.2945, "step": 2508 }, { "epoch": 0.2007159856802864, "grad_norm": 0.3458682613510783, "learning_rate": 9.8708852170869e-06, "loss": 0.3107, "step": 2509 }, { "epoch": 0.2007959840803184, "grad_norm": 0.28556943385431643, "learning_rate": 9.870738923420746e-06, "loss": 0.3288, "step": 2510 }, { "epoch": 0.2008759824803504, "grad_norm": 0.30743684386888975, "learning_rate": 9.870592548007725e-06, "loss": 0.3107, "step": 2511 }, { "epoch": 0.20095598088038238, "grad_norm": 0.41174604803246856, "learning_rate": 9.870446090850295e-06, "loss": 0.2527, "step": 2512 }, { "epoch": 0.2010359792804144, "grad_norm": 0.2127036740695035, "learning_rate": 9.870299551950912e-06, "loss": 0.3448, "step": 2513 }, { "epoch": 0.2011159776804464, "grad_norm": 0.4114119181636995, "learning_rate": 9.87015293131204e-06, "loss": 0.2947, "step": 2514 }, { "epoch": 0.2011959760804784, "grad_norm": 0.336565649624392, "learning_rate": 9.870006228936135e-06, "loss": 0.3127, "step": 2515 }, { "epoch": 0.20127597448051038, "grad_norm": 0.27243877892233254, "learning_rate": 9.869859444825664e-06, "loss": 0.2959, "step": 2516 }, { "epoch": 0.20135597288054238, "grad_norm": 0.30387059662020083, "learning_rate": 9.869712578983085e-06, "loss": 0.2984, "step": 2517 }, { "epoch": 0.2014359712805744, "grad_norm": 0.3121277870256797, "learning_rate": 9.869565631410867e-06, "loss": 0.3122, "step": 2518 }, { "epoch": 0.2015159696806064, "grad_norm": 0.2953190454284815, "learning_rate": 9.869418602111475e-06, "loss": 0.2573, "step": 2519 }, { "epoch": 0.2015959680806384, "grad_norm": 0.29153610250715895, "learning_rate": 9.869271491087376e-06, "loss": 0.3092, "step": 2520 }, { "epoch": 0.20167596648067038, "grad_norm": 0.25673396585990116, "learning_rate": 9.869124298341039e-06, "loss": 0.3277, "step": 2521 }, { "epoch": 0.20175596488070238, "grad_norm": 0.38451946628164757, "learning_rate": 9.868977023874937e-06, "loss": 0.2614, "step": 2522 }, { "epoch": 0.2018359632807344, "grad_norm": 0.32469348468808834, "learning_rate": 9.868829667691538e-06, "loss": 0.3064, "step": 2523 }, { "epoch": 0.2019159616807664, "grad_norm": 0.3739748175455632, "learning_rate": 9.868682229793317e-06, "loss": 0.2727, "step": 2524 }, { "epoch": 0.20199596008079837, "grad_norm": 0.27917317953680815, "learning_rate": 9.868534710182747e-06, "loss": 0.2943, "step": 2525 }, { "epoch": 0.20207595848083038, "grad_norm": 0.34585163227207605, "learning_rate": 9.868387108862307e-06, "loss": 0.2578, "step": 2526 }, { "epoch": 0.20215595688086238, "grad_norm": 0.2494883622840502, "learning_rate": 9.868239425834472e-06, "loss": 0.3114, "step": 2527 }, { "epoch": 0.20223595528089439, "grad_norm": 0.3124776255954141, "learning_rate": 9.868091661101719e-06, "loss": 0.2362, "step": 2528 }, { "epoch": 0.2023159536809264, "grad_norm": 0.28844148601111425, "learning_rate": 9.867943814666533e-06, "loss": 0.3066, "step": 2529 }, { "epoch": 0.20239595208095837, "grad_norm": 0.32739632459084717, "learning_rate": 9.86779588653139e-06, "loss": 0.2906, "step": 2530 }, { "epoch": 0.20247595048099037, "grad_norm": 0.30203661640886614, "learning_rate": 9.867647876698776e-06, "loss": 0.318, "step": 2531 }, { "epoch": 0.20255594888102238, "grad_norm": 0.3005049714844136, "learning_rate": 9.867499785171174e-06, "loss": 0.3092, "step": 2532 }, { "epoch": 0.20263594728105438, "grad_norm": 0.33151061009463134, "learning_rate": 9.867351611951071e-06, "loss": 0.2592, "step": 2533 }, { "epoch": 0.2027159456810864, "grad_norm": 0.3463336664243194, "learning_rate": 9.86720335704095e-06, "loss": 0.277, "step": 2534 }, { "epoch": 0.20279594408111837, "grad_norm": 0.2482258481366194, "learning_rate": 9.867055020443302e-06, "loss": 0.3411, "step": 2535 }, { "epoch": 0.20287594248115037, "grad_norm": 0.36597878379474863, "learning_rate": 9.866906602160616e-06, "loss": 0.2718, "step": 2536 }, { "epoch": 0.20295594088118238, "grad_norm": 0.3147076332952683, "learning_rate": 9.866758102195384e-06, "loss": 0.2599, "step": 2537 }, { "epoch": 0.20303593928121438, "grad_norm": 0.301839912448097, "learning_rate": 9.866609520550097e-06, "loss": 0.2994, "step": 2538 }, { "epoch": 0.20311593768124636, "grad_norm": 0.31115526389640036, "learning_rate": 9.86646085722725e-06, "loss": 0.2958, "step": 2539 }, { "epoch": 0.20319593608127837, "grad_norm": 0.31253599647178126, "learning_rate": 9.866312112229335e-06, "loss": 0.2912, "step": 2540 }, { "epoch": 0.20327593448131037, "grad_norm": 0.2804238986866396, "learning_rate": 9.866163285558851e-06, "loss": 0.3055, "step": 2541 }, { "epoch": 0.20335593288134238, "grad_norm": 0.32638630035232546, "learning_rate": 9.866014377218297e-06, "loss": 0.2917, "step": 2542 }, { "epoch": 0.20343593128137438, "grad_norm": 0.28911941105504807, "learning_rate": 9.865865387210169e-06, "loss": 0.3104, "step": 2543 }, { "epoch": 0.20351592968140636, "grad_norm": 0.2954711260065343, "learning_rate": 9.86571631553697e-06, "loss": 0.3023, "step": 2544 }, { "epoch": 0.20359592808143837, "grad_norm": 0.27580838740040864, "learning_rate": 9.8655671622012e-06, "loss": 0.2992, "step": 2545 }, { "epoch": 0.20367592648147037, "grad_norm": 0.22830238930840105, "learning_rate": 9.865417927205363e-06, "loss": 0.3247, "step": 2546 }, { "epoch": 0.20375592488150238, "grad_norm": 0.3347282680138978, "learning_rate": 9.865268610551966e-06, "loss": 0.307, "step": 2547 }, { "epoch": 0.20383592328153438, "grad_norm": 0.33840686872862186, "learning_rate": 9.86511921224351e-06, "loss": 0.2502, "step": 2548 }, { "epoch": 0.20391592168156636, "grad_norm": 0.29619123584674023, "learning_rate": 9.864969732282507e-06, "loss": 0.2916, "step": 2549 }, { "epoch": 0.20399592008159836, "grad_norm": 0.31171500612831754, "learning_rate": 9.864820170671466e-06, "loss": 0.2785, "step": 2550 }, { "epoch": 0.20407591848163037, "grad_norm": 0.3270667075498327, "learning_rate": 9.864670527412891e-06, "loss": 0.269, "step": 2551 }, { "epoch": 0.20415591688166237, "grad_norm": 0.2496264533236665, "learning_rate": 9.8645208025093e-06, "loss": 0.3458, "step": 2552 }, { "epoch": 0.20423591528169435, "grad_norm": 0.28151360059638797, "learning_rate": 9.864370995963204e-06, "loss": 0.291, "step": 2553 }, { "epoch": 0.20431591368172636, "grad_norm": 0.2793813007934314, "learning_rate": 9.864221107777116e-06, "loss": 0.2938, "step": 2554 }, { "epoch": 0.20439591208175836, "grad_norm": 0.3013546586306375, "learning_rate": 9.864071137953552e-06, "loss": 0.3102, "step": 2555 }, { "epoch": 0.20447591048179037, "grad_norm": 0.3238735392023272, "learning_rate": 9.86392108649503e-06, "loss": 0.2733, "step": 2556 }, { "epoch": 0.20455590888182237, "grad_norm": 0.2528648475838113, "learning_rate": 9.863770953404068e-06, "loss": 0.3599, "step": 2557 }, { "epoch": 0.20463590728185435, "grad_norm": 0.2958940495704302, "learning_rate": 9.863620738683186e-06, "loss": 0.3012, "step": 2558 }, { "epoch": 0.20471590568188636, "grad_norm": 0.27027703977830325, "learning_rate": 9.863470442334904e-06, "loss": 0.289, "step": 2559 }, { "epoch": 0.20479590408191836, "grad_norm": 0.24131474615738915, "learning_rate": 9.863320064361743e-06, "loss": 0.3239, "step": 2560 }, { "epoch": 0.20487590248195037, "grad_norm": 0.3292687121316407, "learning_rate": 9.863169604766231e-06, "loss": 0.2658, "step": 2561 }, { "epoch": 0.20495590088198237, "grad_norm": 0.35704150610215796, "learning_rate": 9.863019063550892e-06, "loss": 0.2738, "step": 2562 }, { "epoch": 0.20503589928201435, "grad_norm": 0.27468754467581213, "learning_rate": 9.86286844071825e-06, "loss": 0.2987, "step": 2563 }, { "epoch": 0.20511589768204636, "grad_norm": 0.283725797229102, "learning_rate": 9.862717736270834e-06, "loss": 0.3267, "step": 2564 }, { "epoch": 0.20519589608207836, "grad_norm": 0.26799631941171415, "learning_rate": 9.862566950211175e-06, "loss": 0.3306, "step": 2565 }, { "epoch": 0.20527589448211037, "grad_norm": 0.3349905029087938, "learning_rate": 9.862416082541803e-06, "loss": 0.3778, "step": 2566 }, { "epoch": 0.20535589288214234, "grad_norm": 0.2651739749496281, "learning_rate": 9.862265133265248e-06, "loss": 0.3521, "step": 2567 }, { "epoch": 0.20543589128217435, "grad_norm": 0.33430554324665285, "learning_rate": 9.862114102384045e-06, "loss": 0.2847, "step": 2568 }, { "epoch": 0.20551588968220635, "grad_norm": 0.31793197732536266, "learning_rate": 9.861962989900732e-06, "loss": 0.285, "step": 2569 }, { "epoch": 0.20559588808223836, "grad_norm": 0.3048678447015555, "learning_rate": 9.86181179581784e-06, "loss": 0.3134, "step": 2570 }, { "epoch": 0.20567588648227036, "grad_norm": 0.25872361853646114, "learning_rate": 9.861660520137908e-06, "loss": 0.3295, "step": 2571 }, { "epoch": 0.20575588488230234, "grad_norm": 0.3152439438992059, "learning_rate": 9.861509162863475e-06, "loss": 0.2994, "step": 2572 }, { "epoch": 0.20583588328233435, "grad_norm": 0.3180730359281916, "learning_rate": 9.861357723997082e-06, "loss": 0.2696, "step": 2573 }, { "epoch": 0.20591588168236635, "grad_norm": 0.25642345422382495, "learning_rate": 9.861206203541271e-06, "loss": 0.3093, "step": 2574 }, { "epoch": 0.20599588008239836, "grad_norm": 0.3066590860851833, "learning_rate": 9.861054601498586e-06, "loss": 0.279, "step": 2575 }, { "epoch": 0.20607587848243036, "grad_norm": 0.2878753419464467, "learning_rate": 9.860902917871566e-06, "loss": 0.3188, "step": 2576 }, { "epoch": 0.20615587688246234, "grad_norm": 0.2593035731459659, "learning_rate": 9.860751152662762e-06, "loss": 0.2917, "step": 2577 }, { "epoch": 0.20623587528249435, "grad_norm": 0.3577923093024356, "learning_rate": 9.860599305874721e-06, "loss": 0.3451, "step": 2578 }, { "epoch": 0.20631587368252635, "grad_norm": 0.2283692151709258, "learning_rate": 9.860447377509989e-06, "loss": 0.3336, "step": 2579 }, { "epoch": 0.20639587208255836, "grad_norm": 0.2788654231907945, "learning_rate": 9.860295367571115e-06, "loss": 0.2968, "step": 2580 }, { "epoch": 0.20647587048259033, "grad_norm": 0.27789653311159646, "learning_rate": 9.860143276060655e-06, "loss": 0.3022, "step": 2581 }, { "epoch": 0.20655586888262234, "grad_norm": 0.2684105659566808, "learning_rate": 9.859991102981159e-06, "loss": 0.3194, "step": 2582 }, { "epoch": 0.20663586728265435, "grad_norm": 0.24175737729596883, "learning_rate": 9.859838848335178e-06, "loss": 0.3166, "step": 2583 }, { "epoch": 0.20671586568268635, "grad_norm": 0.22743378125783334, "learning_rate": 9.859686512125271e-06, "loss": 0.3576, "step": 2584 }, { "epoch": 0.20679586408271836, "grad_norm": 0.28867984116195733, "learning_rate": 9.859534094353994e-06, "loss": 0.3021, "step": 2585 }, { "epoch": 0.20687586248275033, "grad_norm": 0.310601753771648, "learning_rate": 9.859381595023905e-06, "loss": 0.2729, "step": 2586 }, { "epoch": 0.20695586088278234, "grad_norm": 0.307048033907565, "learning_rate": 9.859229014137564e-06, "loss": 0.2583, "step": 2587 }, { "epoch": 0.20703585928281434, "grad_norm": 0.4888119069224936, "learning_rate": 9.85907635169753e-06, "loss": 0.3714, "step": 2588 }, { "epoch": 0.20711585768284635, "grad_norm": 0.3153847734855788, "learning_rate": 9.858923607706366e-06, "loss": 0.2576, "step": 2589 }, { "epoch": 0.20719585608287835, "grad_norm": 0.2218734860720186, "learning_rate": 9.858770782166636e-06, "loss": 0.3715, "step": 2590 }, { "epoch": 0.20727585448291033, "grad_norm": 0.2834264116325163, "learning_rate": 9.858617875080904e-06, "loss": 0.317, "step": 2591 }, { "epoch": 0.20735585288294234, "grad_norm": 0.32784744837547514, "learning_rate": 9.858464886451737e-06, "loss": 0.2731, "step": 2592 }, { "epoch": 0.20743585128297434, "grad_norm": 0.35870952179236076, "learning_rate": 9.858311816281703e-06, "loss": 0.2497, "step": 2593 }, { "epoch": 0.20751584968300635, "grad_norm": 0.29527538196502084, "learning_rate": 9.85815866457337e-06, "loss": 0.2998, "step": 2594 }, { "epoch": 0.20759584808303833, "grad_norm": 0.297987564398225, "learning_rate": 9.858005431329309e-06, "loss": 0.2902, "step": 2595 }, { "epoch": 0.20767584648307033, "grad_norm": 0.32385108690487874, "learning_rate": 9.857852116552094e-06, "loss": 0.2652, "step": 2596 }, { "epoch": 0.20775584488310234, "grad_norm": 0.25062474573476295, "learning_rate": 9.857698720244294e-06, "loss": 0.3187, "step": 2597 }, { "epoch": 0.20783584328313434, "grad_norm": 0.31147193334369294, "learning_rate": 9.857545242408485e-06, "loss": 0.2814, "step": 2598 }, { "epoch": 0.20791584168316635, "grad_norm": 0.31595150721051446, "learning_rate": 9.857391683047244e-06, "loss": 0.2742, "step": 2599 }, { "epoch": 0.20799584008319832, "grad_norm": 0.43452394381523424, "learning_rate": 9.857238042163147e-06, "loss": 0.2829, "step": 2600 }, { "epoch": 0.20807583848323033, "grad_norm": 0.31876869018677917, "learning_rate": 9.857084319758772e-06, "loss": 0.2677, "step": 2601 }, { "epoch": 0.20815583688326234, "grad_norm": 0.3001456554338103, "learning_rate": 9.856930515836701e-06, "loss": 0.2962, "step": 2602 }, { "epoch": 0.20823583528329434, "grad_norm": 0.32170999788480525, "learning_rate": 9.856776630399514e-06, "loss": 0.2757, "step": 2603 }, { "epoch": 0.20831583368332635, "grad_norm": 0.33767545123662196, "learning_rate": 9.856622663449796e-06, "loss": 0.2782, "step": 2604 }, { "epoch": 0.20839583208335832, "grad_norm": 0.2866285089547352, "learning_rate": 9.856468614990127e-06, "loss": 0.2908, "step": 2605 }, { "epoch": 0.20847583048339033, "grad_norm": 0.3399330209114603, "learning_rate": 9.856314485023094e-06, "loss": 0.3188, "step": 2606 }, { "epoch": 0.20855582888342233, "grad_norm": 0.27410844614186114, "learning_rate": 9.856160273551285e-06, "loss": 0.3375, "step": 2607 }, { "epoch": 0.20863582728345434, "grad_norm": 0.4022746721880151, "learning_rate": 9.856005980577287e-06, "loss": 0.2875, "step": 2608 }, { "epoch": 0.20871582568348632, "grad_norm": 0.24003499723735117, "learning_rate": 9.855851606103691e-06, "loss": 0.3559, "step": 2609 }, { "epoch": 0.20879582408351832, "grad_norm": 0.253744530744458, "learning_rate": 9.855697150133086e-06, "loss": 0.3283, "step": 2610 }, { "epoch": 0.20887582248355033, "grad_norm": 0.3025758528064443, "learning_rate": 9.855542612668066e-06, "loss": 0.2692, "step": 2611 }, { "epoch": 0.20895582088358233, "grad_norm": 0.2837250700054285, "learning_rate": 9.855387993711224e-06, "loss": 0.3056, "step": 2612 }, { "epoch": 0.20903581928361434, "grad_norm": 0.318261309439039, "learning_rate": 9.855233293265153e-06, "loss": 0.2707, "step": 2613 }, { "epoch": 0.20911581768364632, "grad_norm": 0.3004698880274868, "learning_rate": 9.855078511332455e-06, "loss": 0.2937, "step": 2614 }, { "epoch": 0.20919581608367832, "grad_norm": 0.8035849371079382, "learning_rate": 9.85492364791572e-06, "loss": 0.2819, "step": 2615 }, { "epoch": 0.20927581448371033, "grad_norm": 0.5977262378878152, "learning_rate": 9.854768703017553e-06, "loss": 0.2764, "step": 2616 }, { "epoch": 0.20935581288374233, "grad_norm": 0.3787727099325343, "learning_rate": 9.854613676640551e-06, "loss": 0.3052, "step": 2617 }, { "epoch": 0.20943581128377434, "grad_norm": 0.2708423938342487, "learning_rate": 9.854458568787319e-06, "loss": 0.2947, "step": 2618 }, { "epoch": 0.20951580968380631, "grad_norm": 0.26247177006934125, "learning_rate": 9.854303379460458e-06, "loss": 0.3177, "step": 2619 }, { "epoch": 0.20959580808383832, "grad_norm": 0.3005402015428693, "learning_rate": 9.854148108662574e-06, "loss": 0.297, "step": 2620 }, { "epoch": 0.20967580648387033, "grad_norm": 0.31853259802685274, "learning_rate": 9.853992756396272e-06, "loss": 0.266, "step": 2621 }, { "epoch": 0.20975580488390233, "grad_norm": 0.2808401081517454, "learning_rate": 9.853837322664159e-06, "loss": 0.3335, "step": 2622 }, { "epoch": 0.2098358032839343, "grad_norm": 0.32120698607564574, "learning_rate": 9.853681807468845e-06, "loss": 0.3193, "step": 2623 }, { "epoch": 0.2099158016839663, "grad_norm": 0.2954754884644285, "learning_rate": 9.853526210812939e-06, "loss": 0.276, "step": 2624 }, { "epoch": 0.20999580008399832, "grad_norm": 0.23618435555819356, "learning_rate": 9.853370532699052e-06, "loss": 0.3529, "step": 2625 }, { "epoch": 0.21007579848403032, "grad_norm": 0.33736958083233115, "learning_rate": 9.853214773129796e-06, "loss": 0.3109, "step": 2626 }, { "epoch": 0.21015579688406233, "grad_norm": 0.3100424290521666, "learning_rate": 9.853058932107789e-06, "loss": 0.2984, "step": 2627 }, { "epoch": 0.2102357952840943, "grad_norm": 0.24733352365560216, "learning_rate": 9.852903009635642e-06, "loss": 0.3289, "step": 2628 }, { "epoch": 0.2103157936841263, "grad_norm": 0.36558602648613103, "learning_rate": 9.852747005715976e-06, "loss": 0.3159, "step": 2629 }, { "epoch": 0.21039579208415832, "grad_norm": 0.3392219252115747, "learning_rate": 9.852590920351406e-06, "loss": 0.3059, "step": 2630 }, { "epoch": 0.21047579048419032, "grad_norm": 0.2739304012487695, "learning_rate": 9.852434753544552e-06, "loss": 0.2976, "step": 2631 }, { "epoch": 0.21055578888422233, "grad_norm": 0.2661625745298221, "learning_rate": 9.852278505298039e-06, "loss": 0.3204, "step": 2632 }, { "epoch": 0.2106357872842543, "grad_norm": 0.28307330441230977, "learning_rate": 9.852122175614484e-06, "loss": 0.3194, "step": 2633 }, { "epoch": 0.2107157856842863, "grad_norm": 0.3325161548955037, "learning_rate": 9.851965764496513e-06, "loss": 0.2768, "step": 2634 }, { "epoch": 0.21079578408431832, "grad_norm": 0.2466102774768095, "learning_rate": 9.85180927194675e-06, "loss": 0.3232, "step": 2635 }, { "epoch": 0.21087578248435032, "grad_norm": 0.25269394461887223, "learning_rate": 9.851652697967825e-06, "loss": 0.3501, "step": 2636 }, { "epoch": 0.2109557808843823, "grad_norm": 0.38021228774154636, "learning_rate": 9.85149604256236e-06, "loss": 0.2765, "step": 2637 }, { "epoch": 0.2110357792844143, "grad_norm": 0.41340643597588905, "learning_rate": 9.85133930573299e-06, "loss": 0.3104, "step": 2638 }, { "epoch": 0.2111157776844463, "grad_norm": 0.35090828512811995, "learning_rate": 9.851182487482342e-06, "loss": 0.2985, "step": 2639 }, { "epoch": 0.21119577608447831, "grad_norm": 0.29919557305630723, "learning_rate": 9.85102558781305e-06, "loss": 0.314, "step": 2640 }, { "epoch": 0.21127577448451032, "grad_norm": 0.32767191415306157, "learning_rate": 9.850868606727745e-06, "loss": 0.283, "step": 2641 }, { "epoch": 0.2113557728845423, "grad_norm": 0.3609568296619404, "learning_rate": 9.850711544229064e-06, "loss": 0.3284, "step": 2642 }, { "epoch": 0.2114357712845743, "grad_norm": 0.2804793339197424, "learning_rate": 9.85055440031964e-06, "loss": 0.3137, "step": 2643 }, { "epoch": 0.2115157696846063, "grad_norm": 0.31692955604348066, "learning_rate": 9.850397175002114e-06, "loss": 0.2571, "step": 2644 }, { "epoch": 0.2115957680846383, "grad_norm": 0.31280534854586156, "learning_rate": 9.850239868279123e-06, "loss": 0.2978, "step": 2645 }, { "epoch": 0.21167576648467032, "grad_norm": 0.2461719714667789, "learning_rate": 9.850082480153306e-06, "loss": 0.3073, "step": 2646 }, { "epoch": 0.2117557648847023, "grad_norm": 0.2438606334097585, "learning_rate": 9.849925010627308e-06, "loss": 0.3389, "step": 2647 }, { "epoch": 0.2118357632847343, "grad_norm": 0.250981202875402, "learning_rate": 9.849767459703767e-06, "loss": 0.3223, "step": 2648 }, { "epoch": 0.2119157616847663, "grad_norm": 0.27319398114539656, "learning_rate": 9.84960982738533e-06, "loss": 0.2959, "step": 2649 }, { "epoch": 0.2119957600847983, "grad_norm": 0.2588723060701358, "learning_rate": 9.849452113674644e-06, "loss": 0.3395, "step": 2650 }, { "epoch": 0.2120757584848303, "grad_norm": 0.3113830424072568, "learning_rate": 9.849294318574353e-06, "loss": 0.2764, "step": 2651 }, { "epoch": 0.2121557568848623, "grad_norm": 0.3255334493629717, "learning_rate": 9.849136442087106e-06, "loss": 0.2656, "step": 2652 }, { "epoch": 0.2122357552848943, "grad_norm": 0.2672229038025903, "learning_rate": 9.848978484215554e-06, "loss": 0.2985, "step": 2653 }, { "epoch": 0.2123157536849263, "grad_norm": 0.3228294562911482, "learning_rate": 9.848820444962348e-06, "loss": 0.2608, "step": 2654 }, { "epoch": 0.2123957520849583, "grad_norm": 0.27731408432430354, "learning_rate": 9.848662324330139e-06, "loss": 0.2955, "step": 2655 }, { "epoch": 0.2124757504849903, "grad_norm": 0.2557945280358065, "learning_rate": 9.848504122321581e-06, "loss": 0.3484, "step": 2656 }, { "epoch": 0.2125557488850223, "grad_norm": 0.2996916312690073, "learning_rate": 9.848345838939329e-06, "loss": 0.2703, "step": 2657 }, { "epoch": 0.2126357472850543, "grad_norm": 0.309715122700965, "learning_rate": 9.848187474186042e-06, "loss": 0.2927, "step": 2658 }, { "epoch": 0.2127157456850863, "grad_norm": 0.2992297600551249, "learning_rate": 9.848029028064374e-06, "loss": 0.2612, "step": 2659 }, { "epoch": 0.2127957440851183, "grad_norm": 0.27926854080156416, "learning_rate": 9.847870500576987e-06, "loss": 0.2968, "step": 2660 }, { "epoch": 0.2128757424851503, "grad_norm": 0.3346592535363179, "learning_rate": 9.847711891726543e-06, "loss": 0.2819, "step": 2661 }, { "epoch": 0.2129557408851823, "grad_norm": 0.27941362800859104, "learning_rate": 9.847553201515701e-06, "loss": 0.2962, "step": 2662 }, { "epoch": 0.2130357392852143, "grad_norm": 0.32169481724417076, "learning_rate": 9.847394429947124e-06, "loss": 0.2822, "step": 2663 }, { "epoch": 0.2131157376852463, "grad_norm": 0.28639363064721884, "learning_rate": 9.847235577023477e-06, "loss": 0.3015, "step": 2664 }, { "epoch": 0.21319573608527828, "grad_norm": 0.39301790129794173, "learning_rate": 9.847076642747429e-06, "loss": 0.2971, "step": 2665 }, { "epoch": 0.2132757344853103, "grad_norm": 0.275527562346172, "learning_rate": 9.846917627121644e-06, "loss": 0.2941, "step": 2666 }, { "epoch": 0.2133557328853423, "grad_norm": 0.26514752441994255, "learning_rate": 9.846758530148793e-06, "loss": 0.3269, "step": 2667 }, { "epoch": 0.2134357312853743, "grad_norm": 0.27964971645020964, "learning_rate": 9.846599351831546e-06, "loss": 0.3041, "step": 2668 }, { "epoch": 0.2135157296854063, "grad_norm": 0.3391421443429832, "learning_rate": 9.84644009217257e-06, "loss": 0.3131, "step": 2669 }, { "epoch": 0.21359572808543828, "grad_norm": 0.3218926405260842, "learning_rate": 9.846280751174547e-06, "loss": 0.274, "step": 2670 }, { "epoch": 0.21367572648547029, "grad_norm": 0.3003566018120908, "learning_rate": 9.846121328840143e-06, "loss": 0.3007, "step": 2671 }, { "epoch": 0.2137557248855023, "grad_norm": 0.5875141239115877, "learning_rate": 9.845961825172038e-06, "loss": 0.2909, "step": 2672 }, { "epoch": 0.2138357232855343, "grad_norm": 0.32763270391057214, "learning_rate": 9.845802240172908e-06, "loss": 0.3179, "step": 2673 }, { "epoch": 0.2139157216855663, "grad_norm": 0.29805498012725107, "learning_rate": 9.845642573845429e-06, "loss": 0.2981, "step": 2674 }, { "epoch": 0.21399572008559828, "grad_norm": 0.2894932349216363, "learning_rate": 9.845482826192284e-06, "loss": 0.3048, "step": 2675 }, { "epoch": 0.21407571848563028, "grad_norm": 0.3526205180494495, "learning_rate": 9.845322997216153e-06, "loss": 0.2697, "step": 2676 }, { "epoch": 0.2141557168856623, "grad_norm": 0.32036767967792773, "learning_rate": 9.845163086919718e-06, "loss": 0.2841, "step": 2677 }, { "epoch": 0.2142357152856943, "grad_norm": 0.33478769480695575, "learning_rate": 9.845003095305663e-06, "loss": 0.2603, "step": 2678 }, { "epoch": 0.21431571368572627, "grad_norm": 0.31702206193184534, "learning_rate": 9.844843022376673e-06, "loss": 0.3114, "step": 2679 }, { "epoch": 0.21439571208575828, "grad_norm": 0.33810202678168527, "learning_rate": 9.844682868135436e-06, "loss": 0.2937, "step": 2680 }, { "epoch": 0.21447571048579028, "grad_norm": 0.30718656185501, "learning_rate": 9.844522632584636e-06, "loss": 0.3157, "step": 2681 }, { "epoch": 0.2145557088858223, "grad_norm": 0.23715494967858633, "learning_rate": 9.844362315726967e-06, "loss": 0.3422, "step": 2682 }, { "epoch": 0.2146357072858543, "grad_norm": 0.34566301881893324, "learning_rate": 9.844201917565119e-06, "loss": 0.2728, "step": 2683 }, { "epoch": 0.21471570568588627, "grad_norm": 0.3272919656944929, "learning_rate": 9.844041438101781e-06, "loss": 0.2648, "step": 2684 }, { "epoch": 0.21479570408591828, "grad_norm": 0.28607274994312515, "learning_rate": 9.843880877339648e-06, "loss": 0.2856, "step": 2685 }, { "epoch": 0.21487570248595028, "grad_norm": 0.31462507401100903, "learning_rate": 9.843720235281416e-06, "loss": 0.3092, "step": 2686 }, { "epoch": 0.2149557008859823, "grad_norm": 0.2823580456228662, "learning_rate": 9.843559511929777e-06, "loss": 0.3329, "step": 2687 }, { "epoch": 0.2150356992860143, "grad_norm": 0.26847056859120855, "learning_rate": 9.843398707287433e-06, "loss": 0.3682, "step": 2688 }, { "epoch": 0.21511569768604627, "grad_norm": 0.33347020968981606, "learning_rate": 9.843237821357082e-06, "loss": 0.3108, "step": 2689 }, { "epoch": 0.21519569608607828, "grad_norm": 0.33186024591765123, "learning_rate": 9.843076854141422e-06, "loss": 0.2978, "step": 2690 }, { "epoch": 0.21527569448611028, "grad_norm": 0.3208605690650275, "learning_rate": 9.842915805643156e-06, "loss": 0.2797, "step": 2691 }, { "epoch": 0.21535569288614229, "grad_norm": 0.29784265882873934, "learning_rate": 9.842754675864988e-06, "loss": 0.3138, "step": 2692 }, { "epoch": 0.21543569128617426, "grad_norm": 0.29555145652749976, "learning_rate": 9.84259346480962e-06, "loss": 0.2786, "step": 2693 }, { "epoch": 0.21551568968620627, "grad_norm": 0.27404166731387225, "learning_rate": 9.842432172479759e-06, "loss": 0.3245, "step": 2694 }, { "epoch": 0.21559568808623827, "grad_norm": 0.31743892318865347, "learning_rate": 9.842270798878111e-06, "loss": 0.2671, "step": 2695 }, { "epoch": 0.21567568648627028, "grad_norm": 0.2889267506297946, "learning_rate": 9.842109344007386e-06, "loss": 0.2865, "step": 2696 }, { "epoch": 0.21575568488630228, "grad_norm": 0.31324974216615253, "learning_rate": 9.841947807870293e-06, "loss": 0.2966, "step": 2697 }, { "epoch": 0.21583568328633426, "grad_norm": 0.32175102113058407, "learning_rate": 9.841786190469542e-06, "loss": 0.2806, "step": 2698 }, { "epoch": 0.21591568168636627, "grad_norm": 0.3383437745680364, "learning_rate": 9.841624491807846e-06, "loss": 0.2849, "step": 2699 }, { "epoch": 0.21599568008639827, "grad_norm": 0.3241243546944871, "learning_rate": 9.84146271188792e-06, "loss": 0.3025, "step": 2700 }, { "epoch": 0.21607567848643028, "grad_norm": 0.2968437116223096, "learning_rate": 9.841300850712479e-06, "loss": 0.2936, "step": 2701 }, { "epoch": 0.21615567688646228, "grad_norm": 0.2882845376907925, "learning_rate": 9.84113890828424e-06, "loss": 0.3052, "step": 2702 }, { "epoch": 0.21623567528649426, "grad_norm": 0.33500568623570515, "learning_rate": 9.840976884605916e-06, "loss": 0.2784, "step": 2703 }, { "epoch": 0.21631567368652627, "grad_norm": 0.3297713416465176, "learning_rate": 9.840814779680234e-06, "loss": 0.2649, "step": 2704 }, { "epoch": 0.21639567208655827, "grad_norm": 0.3010535321913041, "learning_rate": 9.840652593509909e-06, "loss": 0.2516, "step": 2705 }, { "epoch": 0.21647567048659028, "grad_norm": 0.24571714681986007, "learning_rate": 9.840490326097667e-06, "loss": 0.3246, "step": 2706 }, { "epoch": 0.21655566888662225, "grad_norm": 0.2709688932949307, "learning_rate": 9.840327977446226e-06, "loss": 0.3261, "step": 2707 }, { "epoch": 0.21663566728665426, "grad_norm": 0.36225667082066015, "learning_rate": 9.840165547558317e-06, "loss": 0.2843, "step": 2708 }, { "epoch": 0.21671566568668627, "grad_norm": 0.30258306213515673, "learning_rate": 9.840003036436661e-06, "loss": 0.268, "step": 2709 }, { "epoch": 0.21679566408671827, "grad_norm": 0.3250927940334589, "learning_rate": 9.839840444083988e-06, "loss": 0.2861, "step": 2710 }, { "epoch": 0.21687566248675028, "grad_norm": 0.30794826838587813, "learning_rate": 9.839677770503028e-06, "loss": 0.2927, "step": 2711 }, { "epoch": 0.21695566088678225, "grad_norm": 0.2920344666268955, "learning_rate": 9.839515015696509e-06, "loss": 0.2981, "step": 2712 }, { "epoch": 0.21703565928681426, "grad_norm": 0.3433353209551213, "learning_rate": 9.839352179667162e-06, "loss": 0.2781, "step": 2713 }, { "epoch": 0.21711565768684626, "grad_norm": 0.321036770205326, "learning_rate": 9.839189262417721e-06, "loss": 0.2556, "step": 2714 }, { "epoch": 0.21719565608687827, "grad_norm": 0.33210105088529196, "learning_rate": 9.83902626395092e-06, "loss": 0.2669, "step": 2715 }, { "epoch": 0.21727565448691027, "grad_norm": 0.3191826253855119, "learning_rate": 9.838863184269496e-06, "loss": 0.2693, "step": 2716 }, { "epoch": 0.21735565288694225, "grad_norm": 0.27652427372262117, "learning_rate": 9.838700023376184e-06, "loss": 0.2868, "step": 2717 }, { "epoch": 0.21743565128697426, "grad_norm": 0.31398565315749427, "learning_rate": 9.838536781273725e-06, "loss": 0.2857, "step": 2718 }, { "epoch": 0.21751564968700626, "grad_norm": 0.27710338916230115, "learning_rate": 9.838373457964856e-06, "loss": 0.3442, "step": 2719 }, { "epoch": 0.21759564808703827, "grad_norm": 0.26119808307859543, "learning_rate": 9.838210053452318e-06, "loss": 0.3374, "step": 2720 }, { "epoch": 0.21767564648707025, "grad_norm": 0.364142839383261, "learning_rate": 9.838046567738856e-06, "loss": 0.276, "step": 2721 }, { "epoch": 0.21775564488710225, "grad_norm": 0.23074399437991608, "learning_rate": 9.837883000827214e-06, "loss": 0.3596, "step": 2722 }, { "epoch": 0.21783564328713426, "grad_norm": 0.4792957180415395, "learning_rate": 9.837719352720133e-06, "loss": 0.2765, "step": 2723 }, { "epoch": 0.21791564168716626, "grad_norm": 0.30512474907492615, "learning_rate": 9.837555623420363e-06, "loss": 0.3101, "step": 2724 }, { "epoch": 0.21799564008719827, "grad_norm": 0.4083352573008931, "learning_rate": 9.83739181293065e-06, "loss": 0.2663, "step": 2725 }, { "epoch": 0.21807563848723024, "grad_norm": 0.347837399700398, "learning_rate": 9.837227921253747e-06, "loss": 0.2635, "step": 2726 }, { "epoch": 0.21815563688726225, "grad_norm": 0.28583372002693014, "learning_rate": 9.837063948392401e-06, "loss": 0.3367, "step": 2727 }, { "epoch": 0.21823563528729426, "grad_norm": 0.3239847965080378, "learning_rate": 9.836899894349364e-06, "loss": 0.2685, "step": 2728 }, { "epoch": 0.21831563368732626, "grad_norm": 0.2655627145338744, "learning_rate": 9.836735759127391e-06, "loss": 0.3102, "step": 2729 }, { "epoch": 0.21839563208735827, "grad_norm": 0.44753151826142484, "learning_rate": 9.836571542729236e-06, "loss": 0.269, "step": 2730 }, { "epoch": 0.21847563048739024, "grad_norm": 0.3328863091845624, "learning_rate": 9.836407245157656e-06, "loss": 0.2742, "step": 2731 }, { "epoch": 0.21855562888742225, "grad_norm": 0.3410880122098285, "learning_rate": 9.836242866415406e-06, "loss": 0.2599, "step": 2732 }, { "epoch": 0.21863562728745425, "grad_norm": 0.24883249994566176, "learning_rate": 9.836078406505249e-06, "loss": 0.3308, "step": 2733 }, { "epoch": 0.21871562568748626, "grad_norm": 0.2568036851508712, "learning_rate": 9.83591386542994e-06, "loss": 0.3312, "step": 2734 }, { "epoch": 0.21879562408751824, "grad_norm": 0.3549816513595237, "learning_rate": 9.835749243192245e-06, "loss": 0.2959, "step": 2735 }, { "epoch": 0.21887562248755024, "grad_norm": 0.20508016165095397, "learning_rate": 9.835584539794925e-06, "loss": 0.3618, "step": 2736 }, { "epoch": 0.21895562088758225, "grad_norm": 0.2947832887422123, "learning_rate": 9.835419755240743e-06, "loss": 0.2985, "step": 2737 }, { "epoch": 0.21903561928761425, "grad_norm": 0.23694130743523875, "learning_rate": 9.835254889532466e-06, "loss": 0.3296, "step": 2738 }, { "epoch": 0.21911561768764626, "grad_norm": 0.30613778259311786, "learning_rate": 9.835089942672862e-06, "loss": 0.2953, "step": 2739 }, { "epoch": 0.21919561608767824, "grad_norm": 0.2520373860967617, "learning_rate": 9.834924914664696e-06, "loss": 0.328, "step": 2740 }, { "epoch": 0.21927561448771024, "grad_norm": 0.26820830544684077, "learning_rate": 9.834759805510742e-06, "loss": 0.2981, "step": 2741 }, { "epoch": 0.21935561288774225, "grad_norm": 0.28310543265742477, "learning_rate": 9.83459461521377e-06, "loss": 0.3033, "step": 2742 }, { "epoch": 0.21943561128777425, "grad_norm": 0.46558995996700625, "learning_rate": 9.834429343776551e-06, "loss": 0.3073, "step": 2743 }, { "epoch": 0.21951560968780626, "grad_norm": 0.3419822007292441, "learning_rate": 9.834263991201857e-06, "loss": 0.277, "step": 2744 }, { "epoch": 0.21959560808783823, "grad_norm": 0.33671400275896723, "learning_rate": 9.834098557492467e-06, "loss": 0.2817, "step": 2745 }, { "epoch": 0.21967560648787024, "grad_norm": 0.3053576986719263, "learning_rate": 9.833933042651156e-06, "loss": 0.3021, "step": 2746 }, { "epoch": 0.21975560488790225, "grad_norm": 0.278869592231678, "learning_rate": 9.8337674466807e-06, "loss": 0.317, "step": 2747 }, { "epoch": 0.21983560328793425, "grad_norm": 0.2772025766159624, "learning_rate": 9.833601769583883e-06, "loss": 0.294, "step": 2748 }, { "epoch": 0.21991560168796623, "grad_norm": 0.2762096890157505, "learning_rate": 9.833436011363482e-06, "loss": 0.3102, "step": 2749 }, { "epoch": 0.21999560008799823, "grad_norm": 0.2962296325603438, "learning_rate": 9.833270172022277e-06, "loss": 0.2652, "step": 2750 }, { "epoch": 0.22007559848803024, "grad_norm": 0.27892391636371006, "learning_rate": 9.833104251563058e-06, "loss": 0.3093, "step": 2751 }, { "epoch": 0.22015559688806224, "grad_norm": 0.2826854057343321, "learning_rate": 9.832938249988602e-06, "loss": 0.2952, "step": 2752 }, { "epoch": 0.22023559528809425, "grad_norm": 0.28984837723954787, "learning_rate": 9.832772167301701e-06, "loss": 0.2906, "step": 2753 }, { "epoch": 0.22031559368812623, "grad_norm": 0.24474436996528706, "learning_rate": 9.832606003505139e-06, "loss": 0.3373, "step": 2754 }, { "epoch": 0.22039559208815823, "grad_norm": 0.28531884139798375, "learning_rate": 9.832439758601706e-06, "loss": 0.2875, "step": 2755 }, { "epoch": 0.22047559048819024, "grad_norm": 0.3889518278504408, "learning_rate": 9.832273432594192e-06, "loss": 0.2888, "step": 2756 }, { "epoch": 0.22055558888822224, "grad_norm": 0.31358457851726484, "learning_rate": 9.83210702548539e-06, "loss": 0.2613, "step": 2757 }, { "epoch": 0.22063558728825425, "grad_norm": 0.2818486488104054, "learning_rate": 9.831940537278088e-06, "loss": 0.2976, "step": 2758 }, { "epoch": 0.22071558568828623, "grad_norm": 0.3041743534993681, "learning_rate": 9.831773967975085e-06, "loss": 0.3385, "step": 2759 }, { "epoch": 0.22079558408831823, "grad_norm": 0.2657031713422216, "learning_rate": 9.831607317579178e-06, "loss": 0.2959, "step": 2760 }, { "epoch": 0.22087558248835024, "grad_norm": 0.35126019099305517, "learning_rate": 9.831440586093157e-06, "loss": 0.2856, "step": 2761 }, { "epoch": 0.22095558088838224, "grad_norm": 0.31539668613165267, "learning_rate": 9.831273773519826e-06, "loss": 0.2987, "step": 2762 }, { "epoch": 0.22103557928841422, "grad_norm": 0.19301876229647127, "learning_rate": 9.831106879861982e-06, "loss": 0.3533, "step": 2763 }, { "epoch": 0.22111557768844622, "grad_norm": 0.3597979432086649, "learning_rate": 9.830939905122429e-06, "loss": 0.2721, "step": 2764 }, { "epoch": 0.22119557608847823, "grad_norm": 0.27028818646640096, "learning_rate": 9.830772849303967e-06, "loss": 0.3375, "step": 2765 }, { "epoch": 0.22127557448851023, "grad_norm": 0.27609937566928244, "learning_rate": 9.830605712409399e-06, "loss": 0.2904, "step": 2766 }, { "epoch": 0.22135557288854224, "grad_norm": 0.2708883982284736, "learning_rate": 9.830438494441533e-06, "loss": 0.2907, "step": 2767 }, { "epoch": 0.22143557128857422, "grad_norm": 0.28664579065424456, "learning_rate": 9.830271195403171e-06, "loss": 0.2387, "step": 2768 }, { "epoch": 0.22151556968860622, "grad_norm": 0.24268426947492366, "learning_rate": 9.830103815297126e-06, "loss": 0.3444, "step": 2769 }, { "epoch": 0.22159556808863823, "grad_norm": 0.2584215309622741, "learning_rate": 9.829936354126202e-06, "loss": 0.3257, "step": 2770 }, { "epoch": 0.22167556648867023, "grad_norm": 0.42756734149467834, "learning_rate": 9.829768811893214e-06, "loss": 0.2464, "step": 2771 }, { "epoch": 0.22175556488870224, "grad_norm": 0.2956583654662576, "learning_rate": 9.829601188600972e-06, "loss": 0.2987, "step": 2772 }, { "epoch": 0.22183556328873422, "grad_norm": 0.41135328763894985, "learning_rate": 9.829433484252292e-06, "loss": 0.2689, "step": 2773 }, { "epoch": 0.22191556168876622, "grad_norm": 0.25479562674346473, "learning_rate": 9.829265698849983e-06, "loss": 0.3213, "step": 2774 }, { "epoch": 0.22199556008879823, "grad_norm": 0.2619112180027783, "learning_rate": 9.829097832396864e-06, "loss": 0.3338, "step": 2775 }, { "epoch": 0.22207555848883023, "grad_norm": 0.3181700521634343, "learning_rate": 9.828929884895753e-06, "loss": 0.2803, "step": 2776 }, { "epoch": 0.2221555568888622, "grad_norm": 0.2425961994417845, "learning_rate": 9.82876185634947e-06, "loss": 0.3154, "step": 2777 }, { "epoch": 0.22223555528889422, "grad_norm": 0.25666679399800707, "learning_rate": 9.82859374676083e-06, "loss": 0.3398, "step": 2778 }, { "epoch": 0.22231555368892622, "grad_norm": 0.310089123725397, "learning_rate": 9.828425556132659e-06, "loss": 0.2835, "step": 2779 }, { "epoch": 0.22239555208895823, "grad_norm": 0.23590066745262112, "learning_rate": 9.828257284467778e-06, "loss": 0.3345, "step": 2780 }, { "epoch": 0.22247555048899023, "grad_norm": 0.3218966870135753, "learning_rate": 9.828088931769012e-06, "loss": 0.3018, "step": 2781 }, { "epoch": 0.2225555488890222, "grad_norm": 0.3278771663091074, "learning_rate": 9.827920498039185e-06, "loss": 0.2778, "step": 2782 }, { "epoch": 0.22263554728905421, "grad_norm": 0.3259407018457449, "learning_rate": 9.827751983281126e-06, "loss": 0.3144, "step": 2783 }, { "epoch": 0.22271554568908622, "grad_norm": 0.21358686684524758, "learning_rate": 9.827583387497664e-06, "loss": 0.3469, "step": 2784 }, { "epoch": 0.22279554408911822, "grad_norm": 0.2969374554508598, "learning_rate": 9.827414710691624e-06, "loss": 0.2954, "step": 2785 }, { "epoch": 0.22287554248915023, "grad_norm": 0.2722321939915326, "learning_rate": 9.82724595286584e-06, "loss": 0.3288, "step": 2786 }, { "epoch": 0.2229555408891822, "grad_norm": 0.3307719806588941, "learning_rate": 9.827077114023145e-06, "loss": 0.2831, "step": 2787 }, { "epoch": 0.2230355392892142, "grad_norm": 0.32519824964350474, "learning_rate": 9.82690819416637e-06, "loss": 0.2936, "step": 2788 }, { "epoch": 0.22311553768924622, "grad_norm": 0.33243564456585944, "learning_rate": 9.826739193298353e-06, "loss": 0.2761, "step": 2789 }, { "epoch": 0.22319553608927822, "grad_norm": 0.3031233247548669, "learning_rate": 9.826570111421929e-06, "loss": 0.313, "step": 2790 }, { "epoch": 0.2232755344893102, "grad_norm": 0.32889574985144054, "learning_rate": 9.826400948539935e-06, "loss": 0.28, "step": 2791 }, { "epoch": 0.2233555328893422, "grad_norm": 0.29675962602250633, "learning_rate": 9.826231704655212e-06, "loss": 0.2894, "step": 2792 }, { "epoch": 0.2234355312893742, "grad_norm": 0.3264323016476866, "learning_rate": 9.826062379770598e-06, "loss": 0.2668, "step": 2793 }, { "epoch": 0.22351552968940622, "grad_norm": 0.3056468078206645, "learning_rate": 9.825892973888937e-06, "loss": 0.3313, "step": 2794 }, { "epoch": 0.22359552808943822, "grad_norm": 0.33479481297415675, "learning_rate": 9.82572348701307e-06, "loss": 0.2812, "step": 2795 }, { "epoch": 0.2236755264894702, "grad_norm": 0.24312384574232604, "learning_rate": 9.825553919145845e-06, "loss": 0.3256, "step": 2796 }, { "epoch": 0.2237555248895022, "grad_norm": 0.33455726808038416, "learning_rate": 9.825384270290104e-06, "loss": 0.2688, "step": 2797 }, { "epoch": 0.2238355232895342, "grad_norm": 0.20786618013182903, "learning_rate": 9.825214540448698e-06, "loss": 0.336, "step": 2798 }, { "epoch": 0.22391552168956622, "grad_norm": 0.3902924261106954, "learning_rate": 9.825044729624472e-06, "loss": 0.2951, "step": 2799 }, { "epoch": 0.22399552008959822, "grad_norm": 0.36141011328120415, "learning_rate": 9.824874837820278e-06, "loss": 0.2645, "step": 2800 }, { "epoch": 0.2240755184896302, "grad_norm": 0.29397943433504675, "learning_rate": 9.824704865038967e-06, "loss": 0.3014, "step": 2801 }, { "epoch": 0.2241555168896622, "grad_norm": 0.3088152417119995, "learning_rate": 9.824534811283393e-06, "loss": 0.2766, "step": 2802 }, { "epoch": 0.2242355152896942, "grad_norm": 0.32536508113358453, "learning_rate": 9.824364676556406e-06, "loss": 0.264, "step": 2803 }, { "epoch": 0.22431551368972621, "grad_norm": 0.3222333487637704, "learning_rate": 9.824194460860867e-06, "loss": 0.2622, "step": 2804 }, { "epoch": 0.2243955120897582, "grad_norm": 0.28243423747395074, "learning_rate": 9.824024164199627e-06, "loss": 0.2957, "step": 2805 }, { "epoch": 0.2244755104897902, "grad_norm": 0.3430101187383174, "learning_rate": 9.82385378657555e-06, "loss": 0.2794, "step": 2806 }, { "epoch": 0.2245555088898222, "grad_norm": 0.30170529761601594, "learning_rate": 9.823683327991492e-06, "loss": 0.2998, "step": 2807 }, { "epoch": 0.2246355072898542, "grad_norm": 0.2792551926949058, "learning_rate": 9.823512788450313e-06, "loss": 0.2887, "step": 2808 }, { "epoch": 0.2247155056898862, "grad_norm": 0.3565739231763268, "learning_rate": 9.82334216795488e-06, "loss": 0.2981, "step": 2809 }, { "epoch": 0.2247955040899182, "grad_norm": 0.29247348419244473, "learning_rate": 9.82317146650805e-06, "loss": 0.3111, "step": 2810 }, { "epoch": 0.2248755024899502, "grad_norm": 0.35156310888539855, "learning_rate": 9.823000684112691e-06, "loss": 0.2757, "step": 2811 }, { "epoch": 0.2249555008899822, "grad_norm": 0.6380107811587993, "learning_rate": 9.822829820771671e-06, "loss": 0.2677, "step": 2812 }, { "epoch": 0.2250354992900142, "grad_norm": 0.302627695018539, "learning_rate": 9.822658876487854e-06, "loss": 0.3083, "step": 2813 }, { "epoch": 0.2251154976900462, "grad_norm": 0.3031087832758983, "learning_rate": 9.822487851264113e-06, "loss": 0.3096, "step": 2814 }, { "epoch": 0.2251954960900782, "grad_norm": 0.34693579892961685, "learning_rate": 9.822316745103316e-06, "loss": 0.2824, "step": 2815 }, { "epoch": 0.2252754944901102, "grad_norm": 0.3302604864919298, "learning_rate": 9.822145558008333e-06, "loss": 0.2822, "step": 2816 }, { "epoch": 0.2253554928901422, "grad_norm": 0.393393172020002, "learning_rate": 9.821974289982042e-06, "loss": 0.2569, "step": 2817 }, { "epoch": 0.2254354912901742, "grad_norm": 0.28973125401530564, "learning_rate": 9.821802941027314e-06, "loss": 0.2965, "step": 2818 }, { "epoch": 0.22551548969020618, "grad_norm": 0.36986866250518635, "learning_rate": 9.821631511147025e-06, "loss": 0.3046, "step": 2819 }, { "epoch": 0.2255954880902382, "grad_norm": 0.31753481111825277, "learning_rate": 9.821460000344053e-06, "loss": 0.2985, "step": 2820 }, { "epoch": 0.2256754864902702, "grad_norm": 0.29486798651425994, "learning_rate": 9.821288408621276e-06, "loss": 0.2836, "step": 2821 }, { "epoch": 0.2257554848903022, "grad_norm": 0.3394435778688585, "learning_rate": 9.821116735981573e-06, "loss": 0.284, "step": 2822 }, { "epoch": 0.2258354832903342, "grad_norm": 0.38080315536991344, "learning_rate": 9.820944982427826e-06, "loss": 0.3433, "step": 2823 }, { "epoch": 0.22591548169036618, "grad_norm": 0.5712058756570679, "learning_rate": 9.820773147962919e-06, "loss": 0.2803, "step": 2824 }, { "epoch": 0.2259954800903982, "grad_norm": 0.2730113783413178, "learning_rate": 9.820601232589735e-06, "loss": 0.3187, "step": 2825 }, { "epoch": 0.2260754784904302, "grad_norm": 0.3066507803765661, "learning_rate": 9.820429236311158e-06, "loss": 0.2674, "step": 2826 }, { "epoch": 0.2261554768904622, "grad_norm": 0.32097813625075844, "learning_rate": 9.820257159130076e-06, "loss": 0.303, "step": 2827 }, { "epoch": 0.2262354752904942, "grad_norm": 0.25517862246193185, "learning_rate": 9.820085001049377e-06, "loss": 0.3375, "step": 2828 }, { "epoch": 0.22631547369052618, "grad_norm": 0.33969974736772013, "learning_rate": 9.81991276207195e-06, "loss": 0.2863, "step": 2829 }, { "epoch": 0.2263954720905582, "grad_norm": 0.3449054338402782, "learning_rate": 9.819740442200685e-06, "loss": 0.2887, "step": 2830 }, { "epoch": 0.2264754704905902, "grad_norm": 0.26241390360242645, "learning_rate": 9.819568041438477e-06, "loss": 0.3215, "step": 2831 }, { "epoch": 0.2265554688906222, "grad_norm": 0.28837138122269645, "learning_rate": 9.819395559788216e-06, "loss": 0.3085, "step": 2832 }, { "epoch": 0.22663546729065417, "grad_norm": 0.24610936941864156, "learning_rate": 9.819222997252798e-06, "loss": 0.3277, "step": 2833 }, { "epoch": 0.22671546569068618, "grad_norm": 0.2895707100625207, "learning_rate": 9.819050353835117e-06, "loss": 0.3119, "step": 2834 }, { "epoch": 0.22679546409071819, "grad_norm": 0.2981039787348253, "learning_rate": 9.818877629538077e-06, "loss": 0.3098, "step": 2835 }, { "epoch": 0.2268754624907502, "grad_norm": 0.34896706608987865, "learning_rate": 9.818704824364571e-06, "loss": 0.287, "step": 2836 }, { "epoch": 0.2269554608907822, "grad_norm": 0.30931190767962996, "learning_rate": 9.818531938317499e-06, "loss": 0.2552, "step": 2837 }, { "epoch": 0.22703545929081417, "grad_norm": 0.35493825927360395, "learning_rate": 9.818358971399767e-06, "loss": 0.2686, "step": 2838 }, { "epoch": 0.22711545769084618, "grad_norm": 0.28195964122994477, "learning_rate": 9.818185923614274e-06, "loss": 0.2896, "step": 2839 }, { "epoch": 0.22719545609087818, "grad_norm": 0.31771860145746555, "learning_rate": 9.818012794963927e-06, "loss": 0.2785, "step": 2840 }, { "epoch": 0.2272754544909102, "grad_norm": 0.2379791786726089, "learning_rate": 9.817839585451629e-06, "loss": 0.3305, "step": 2841 }, { "epoch": 0.2273554528909422, "grad_norm": 0.21446388280018136, "learning_rate": 9.81766629508029e-06, "loss": 0.3605, "step": 2842 }, { "epoch": 0.22743545129097417, "grad_norm": 0.33536153488175674, "learning_rate": 9.817492923852817e-06, "loss": 0.2795, "step": 2843 }, { "epoch": 0.22751544969100618, "grad_norm": 0.32965559388052224, "learning_rate": 9.817319471772117e-06, "loss": 0.3097, "step": 2844 }, { "epoch": 0.22759544809103818, "grad_norm": 0.27789479640478076, "learning_rate": 9.817145938841106e-06, "loss": 0.3017, "step": 2845 }, { "epoch": 0.2276754464910702, "grad_norm": 0.29116083155246303, "learning_rate": 9.816972325062694e-06, "loss": 0.2942, "step": 2846 }, { "epoch": 0.22775544489110217, "grad_norm": 0.2852941848512344, "learning_rate": 9.816798630439794e-06, "loss": 0.2826, "step": 2847 }, { "epoch": 0.22783544329113417, "grad_norm": 0.3390132545668271, "learning_rate": 9.816624854975324e-06, "loss": 0.2584, "step": 2848 }, { "epoch": 0.22791544169116618, "grad_norm": 4.552299893741447, "learning_rate": 9.816450998672195e-06, "loss": 0.2518, "step": 2849 }, { "epoch": 0.22799544009119818, "grad_norm": 0.5354968133124064, "learning_rate": 9.816277061533332e-06, "loss": 0.3024, "step": 2850 }, { "epoch": 0.2280754384912302, "grad_norm": 0.2379432136536726, "learning_rate": 9.816103043561648e-06, "loss": 0.3383, "step": 2851 }, { "epoch": 0.22815543689126216, "grad_norm": 0.2841401912723477, "learning_rate": 9.815928944760068e-06, "loss": 0.2888, "step": 2852 }, { "epoch": 0.22823543529129417, "grad_norm": 0.350176396847407, "learning_rate": 9.815754765131511e-06, "loss": 0.3164, "step": 2853 }, { "epoch": 0.22831543369132618, "grad_norm": 0.32359760961803513, "learning_rate": 9.815580504678903e-06, "loss": 0.2856, "step": 2854 }, { "epoch": 0.22839543209135818, "grad_norm": 0.36137550041740946, "learning_rate": 9.815406163405165e-06, "loss": 0.2441, "step": 2855 }, { "epoch": 0.22847543049139019, "grad_norm": 0.44823054821236286, "learning_rate": 9.815231741313227e-06, "loss": 0.3009, "step": 2856 }, { "epoch": 0.22855542889142216, "grad_norm": 0.30634504078237407, "learning_rate": 9.815057238406015e-06, "loss": 0.2918, "step": 2857 }, { "epoch": 0.22863542729145417, "grad_norm": 0.32786766115995913, "learning_rate": 9.814882654686456e-06, "loss": 0.3012, "step": 2858 }, { "epoch": 0.22871542569148617, "grad_norm": 0.3548557466306779, "learning_rate": 9.814707990157482e-06, "loss": 0.3124, "step": 2859 }, { "epoch": 0.22879542409151818, "grad_norm": 0.34656071151942575, "learning_rate": 9.814533244822025e-06, "loss": 0.2826, "step": 2860 }, { "epoch": 0.22887542249155016, "grad_norm": 0.3028679335179603, "learning_rate": 9.814358418683014e-06, "loss": 0.2901, "step": 2861 }, { "epoch": 0.22895542089158216, "grad_norm": 0.3006034706355257, "learning_rate": 9.814183511743387e-06, "loss": 0.2968, "step": 2862 }, { "epoch": 0.22903541929161417, "grad_norm": 0.28232449889379063, "learning_rate": 9.814008524006077e-06, "loss": 0.3064, "step": 2863 }, { "epoch": 0.22911541769164617, "grad_norm": 0.3575040397575339, "learning_rate": 9.813833455474025e-06, "loss": 0.2946, "step": 2864 }, { "epoch": 0.22919541609167818, "grad_norm": 0.30024328142386997, "learning_rate": 9.813658306150164e-06, "loss": 0.3045, "step": 2865 }, { "epoch": 0.22927541449171016, "grad_norm": 0.2678906401170984, "learning_rate": 9.813483076037438e-06, "loss": 0.3547, "step": 2866 }, { "epoch": 0.22935541289174216, "grad_norm": 0.350971199324955, "learning_rate": 9.813307765138784e-06, "loss": 0.2745, "step": 2867 }, { "epoch": 0.22943541129177417, "grad_norm": 0.27679514295409147, "learning_rate": 9.813132373457147e-06, "loss": 0.3323, "step": 2868 }, { "epoch": 0.22951540969180617, "grad_norm": 0.4476153605934737, "learning_rate": 9.81295690099547e-06, "loss": 0.2921, "step": 2869 }, { "epoch": 0.22959540809183818, "grad_norm": 0.32952290189488787, "learning_rate": 9.812781347756697e-06, "loss": 0.2911, "step": 2870 }, { "epoch": 0.22967540649187015, "grad_norm": 0.37902207056552817, "learning_rate": 9.812605713743775e-06, "loss": 0.281, "step": 2871 }, { "epoch": 0.22975540489190216, "grad_norm": 0.3627156164743356, "learning_rate": 9.812429998959652e-06, "loss": 0.2654, "step": 2872 }, { "epoch": 0.22983540329193416, "grad_norm": 0.2777324422142774, "learning_rate": 9.812254203407278e-06, "loss": 0.3219, "step": 2873 }, { "epoch": 0.22991540169196617, "grad_norm": 0.33421424571686753, "learning_rate": 9.8120783270896e-06, "loss": 0.291, "step": 2874 }, { "epoch": 0.22999540009199815, "grad_norm": 0.25237272088786455, "learning_rate": 9.811902370009576e-06, "loss": 0.337, "step": 2875 }, { "epoch": 0.23007539849203015, "grad_norm": 0.34807911249487616, "learning_rate": 9.811726332170153e-06, "loss": 0.2677, "step": 2876 }, { "epoch": 0.23015539689206216, "grad_norm": 0.3813077855344696, "learning_rate": 9.811550213574287e-06, "loss": 0.2788, "step": 2877 }, { "epoch": 0.23023539529209416, "grad_norm": 0.3430368653491586, "learning_rate": 9.811374014224935e-06, "loss": 0.2698, "step": 2878 }, { "epoch": 0.23031539369212617, "grad_norm": 0.28040863769742647, "learning_rate": 9.811197734125055e-06, "loss": 0.326, "step": 2879 }, { "epoch": 0.23039539209215815, "grad_norm": 0.2893105404958831, "learning_rate": 9.811021373277603e-06, "loss": 0.3094, "step": 2880 }, { "epoch": 0.23047539049219015, "grad_norm": 0.3016796605357451, "learning_rate": 9.810844931685542e-06, "loss": 0.299, "step": 2881 }, { "epoch": 0.23055538889222216, "grad_norm": 0.28737541412543294, "learning_rate": 9.81066840935183e-06, "loss": 0.3388, "step": 2882 }, { "epoch": 0.23063538729225416, "grad_norm": 0.3213815460091929, "learning_rate": 9.810491806279432e-06, "loss": 0.2631, "step": 2883 }, { "epoch": 0.23071538569228617, "grad_norm": 0.3202434828369608, "learning_rate": 9.810315122471309e-06, "loss": 0.296, "step": 2884 }, { "epoch": 0.23079538409231815, "grad_norm": 0.2878981459651451, "learning_rate": 9.81013835793043e-06, "loss": 0.3132, "step": 2885 }, { "epoch": 0.23087538249235015, "grad_norm": 0.2942354562993355, "learning_rate": 9.80996151265976e-06, "loss": 0.2945, "step": 2886 }, { "epoch": 0.23095538089238216, "grad_norm": 0.3161415658395395, "learning_rate": 9.809784586662268e-06, "loss": 0.2702, "step": 2887 }, { "epoch": 0.23103537929241416, "grad_norm": 0.33082684171376475, "learning_rate": 9.809607579940922e-06, "loss": 0.3352, "step": 2888 }, { "epoch": 0.23111537769244614, "grad_norm": 0.3431429291501672, "learning_rate": 9.809430492498693e-06, "loss": 0.2654, "step": 2889 }, { "epoch": 0.23119537609247814, "grad_norm": 0.30387802690260884, "learning_rate": 9.809253324338554e-06, "loss": 0.3197, "step": 2890 }, { "epoch": 0.23127537449251015, "grad_norm": 0.28981388177797623, "learning_rate": 9.809076075463476e-06, "loss": 0.3071, "step": 2891 }, { "epoch": 0.23135537289254215, "grad_norm": 0.3477562458310773, "learning_rate": 9.808898745876439e-06, "loss": 0.2761, "step": 2892 }, { "epoch": 0.23143537129257416, "grad_norm": 0.3633355817237512, "learning_rate": 9.808721335580414e-06, "loss": 0.2866, "step": 2893 }, { "epoch": 0.23151536969260614, "grad_norm": 0.32200710071978406, "learning_rate": 9.80854384457838e-06, "loss": 0.2797, "step": 2894 }, { "epoch": 0.23159536809263814, "grad_norm": 0.2957026891113077, "learning_rate": 9.808366272873317e-06, "loss": 0.2943, "step": 2895 }, { "epoch": 0.23167536649267015, "grad_norm": 0.1975566005310841, "learning_rate": 9.808188620468204e-06, "loss": 0.386, "step": 2896 }, { "epoch": 0.23175536489270215, "grad_norm": 0.33904846839535463, "learning_rate": 9.808010887366024e-06, "loss": 0.2773, "step": 2897 }, { "epoch": 0.23183536329273416, "grad_norm": 0.3248290532592434, "learning_rate": 9.807833073569758e-06, "loss": 0.2765, "step": 2898 }, { "epoch": 0.23191536169276614, "grad_norm": 0.30368960953369173, "learning_rate": 9.807655179082392e-06, "loss": 0.2841, "step": 2899 }, { "epoch": 0.23199536009279814, "grad_norm": 0.3201841405243278, "learning_rate": 9.80747720390691e-06, "loss": 0.2795, "step": 2900 }, { "epoch": 0.23207535849283015, "grad_norm": 0.29593085068935, "learning_rate": 9.807299148046301e-06, "loss": 0.2971, "step": 2901 }, { "epoch": 0.23215535689286215, "grad_norm": 0.33258594183800666, "learning_rate": 9.807121011503552e-06, "loss": 0.3167, "step": 2902 }, { "epoch": 0.23223535529289413, "grad_norm": 0.2951744922122382, "learning_rate": 9.806942794281654e-06, "loss": 0.2816, "step": 2903 }, { "epoch": 0.23231535369292614, "grad_norm": 0.2822058872214629, "learning_rate": 9.806764496383595e-06, "loss": 0.2869, "step": 2904 }, { "epoch": 0.23239535209295814, "grad_norm": 0.30245379605556677, "learning_rate": 9.80658611781237e-06, "loss": 0.303, "step": 2905 }, { "epoch": 0.23247535049299015, "grad_norm": 0.2613250120667393, "learning_rate": 9.806407658570973e-06, "loss": 0.3321, "step": 2906 }, { "epoch": 0.23255534889302215, "grad_norm": 0.3833071678108477, "learning_rate": 9.806229118662398e-06, "loss": 0.259, "step": 2907 }, { "epoch": 0.23263534729305413, "grad_norm": 0.3256194876302207, "learning_rate": 9.806050498089643e-06, "loss": 0.2662, "step": 2908 }, { "epoch": 0.23271534569308613, "grad_norm": 0.2544751485067872, "learning_rate": 9.805871796855704e-06, "loss": 0.335, "step": 2909 }, { "epoch": 0.23279534409311814, "grad_norm": 0.31840997600911863, "learning_rate": 9.80569301496358e-06, "loss": 0.2838, "step": 2910 }, { "epoch": 0.23287534249315014, "grad_norm": 0.2839815610398486, "learning_rate": 9.805514152416274e-06, "loss": 0.2934, "step": 2911 }, { "epoch": 0.23295534089318215, "grad_norm": 0.29603915239964707, "learning_rate": 9.805335209216787e-06, "loss": 0.2947, "step": 2912 }, { "epoch": 0.23303533929321413, "grad_norm": 0.30802267809517675, "learning_rate": 9.80515618536812e-06, "loss": 0.3018, "step": 2913 }, { "epoch": 0.23311533769324613, "grad_norm": 0.2875083640744617, "learning_rate": 9.80497708087328e-06, "loss": 0.2879, "step": 2914 }, { "epoch": 0.23319533609327814, "grad_norm": 0.286485238851757, "learning_rate": 9.80479789573527e-06, "loss": 0.2981, "step": 2915 }, { "epoch": 0.23327533449331014, "grad_norm": 0.31931103348272644, "learning_rate": 9.8046186299571e-06, "loss": 0.2853, "step": 2916 }, { "epoch": 0.23335533289334212, "grad_norm": 0.2809465059431971, "learning_rate": 9.804439283541781e-06, "loss": 0.2938, "step": 2917 }, { "epoch": 0.23343533129337413, "grad_norm": 0.371183069899213, "learning_rate": 9.804259856492318e-06, "loss": 0.3192, "step": 2918 }, { "epoch": 0.23351532969340613, "grad_norm": 0.3095768129349247, "learning_rate": 9.804080348811725e-06, "loss": 0.2683, "step": 2919 }, { "epoch": 0.23359532809343814, "grad_norm": 0.2884060080492279, "learning_rate": 9.803900760503015e-06, "loss": 0.3113, "step": 2920 }, { "epoch": 0.23367532649347014, "grad_norm": 0.18420338116983706, "learning_rate": 9.803721091569201e-06, "loss": 0.3569, "step": 2921 }, { "epoch": 0.23375532489350212, "grad_norm": 0.3866165227233019, "learning_rate": 9.803541342013299e-06, "loss": 0.2789, "step": 2922 }, { "epoch": 0.23383532329353413, "grad_norm": 0.3136911516691616, "learning_rate": 9.803361511838324e-06, "loss": 0.292, "step": 2923 }, { "epoch": 0.23391532169356613, "grad_norm": 0.3139841046105295, "learning_rate": 9.803181601047296e-06, "loss": 0.2864, "step": 2924 }, { "epoch": 0.23399532009359814, "grad_norm": 0.7156320508377609, "learning_rate": 9.803001609643234e-06, "loss": 0.2872, "step": 2925 }, { "epoch": 0.23407531849363014, "grad_norm": 0.3093972189497827, "learning_rate": 9.802821537629162e-06, "loss": 0.2689, "step": 2926 }, { "epoch": 0.23415531689366212, "grad_norm": 0.34576995308752667, "learning_rate": 9.802641385008096e-06, "loss": 0.2718, "step": 2927 }, { "epoch": 0.23423531529369412, "grad_norm": 0.2669261479024423, "learning_rate": 9.802461151783064e-06, "loss": 0.2953, "step": 2928 }, { "epoch": 0.23431531369372613, "grad_norm": 0.2778605474588341, "learning_rate": 9.80228083795709e-06, "loss": 0.2938, "step": 2929 }, { "epoch": 0.23439531209375813, "grad_norm": 0.3153459998291426, "learning_rate": 9.8021004435332e-06, "loss": 0.274, "step": 2930 }, { "epoch": 0.2344753104937901, "grad_norm": 0.3042643397962928, "learning_rate": 9.80191996851442e-06, "loss": 0.3049, "step": 2931 }, { "epoch": 0.23455530889382212, "grad_norm": 0.2810640375570699, "learning_rate": 9.80173941290378e-06, "loss": 0.2752, "step": 2932 }, { "epoch": 0.23463530729385412, "grad_norm": 0.35029874005477063, "learning_rate": 9.801558776704315e-06, "loss": 0.2891, "step": 2933 }, { "epoch": 0.23471530569388613, "grad_norm": 0.3302681592387155, "learning_rate": 9.801378059919049e-06, "loss": 0.3143, "step": 2934 }, { "epoch": 0.23479530409391813, "grad_norm": 0.32902754013508334, "learning_rate": 9.801197262551019e-06, "loss": 0.2649, "step": 2935 }, { "epoch": 0.2348753024939501, "grad_norm": 0.30143525366193513, "learning_rate": 9.801016384603259e-06, "loss": 0.3076, "step": 2936 }, { "epoch": 0.23495530089398212, "grad_norm": 0.2950286435659639, "learning_rate": 9.800835426078804e-06, "loss": 0.2864, "step": 2937 }, { "epoch": 0.23503529929401412, "grad_norm": 0.2886197604190246, "learning_rate": 9.800654386980692e-06, "loss": 0.3057, "step": 2938 }, { "epoch": 0.23511529769404613, "grad_norm": 0.3349307097080484, "learning_rate": 9.800473267311962e-06, "loss": 0.3086, "step": 2939 }, { "epoch": 0.23519529609407813, "grad_norm": 0.30458485290954795, "learning_rate": 9.800292067075651e-06, "loss": 0.2875, "step": 2940 }, { "epoch": 0.2352752944941101, "grad_norm": 0.2657456449217982, "learning_rate": 9.800110786274803e-06, "loss": 0.2761, "step": 2941 }, { "epoch": 0.23535529289414212, "grad_norm": 0.2844425767048757, "learning_rate": 9.79992942491246e-06, "loss": 0.297, "step": 2942 }, { "epoch": 0.23543529129417412, "grad_norm": 0.29706764419014725, "learning_rate": 9.799747982991665e-06, "loss": 0.2966, "step": 2943 }, { "epoch": 0.23551528969420613, "grad_norm": 0.2981523676796156, "learning_rate": 9.799566460515464e-06, "loss": 0.294, "step": 2944 }, { "epoch": 0.2355952880942381, "grad_norm": 0.31984706794791723, "learning_rate": 9.799384857486902e-06, "loss": 0.2666, "step": 2945 }, { "epoch": 0.2356752864942701, "grad_norm": 0.28757138061950765, "learning_rate": 9.799203173909028e-06, "loss": 0.3494, "step": 2946 }, { "epoch": 0.23575528489430211, "grad_norm": 0.3463662811425225, "learning_rate": 9.799021409784892e-06, "loss": 0.2529, "step": 2947 }, { "epoch": 0.23583528329433412, "grad_norm": 0.30171384676569757, "learning_rate": 9.798839565117541e-06, "loss": 0.3091, "step": 2948 }, { "epoch": 0.23591528169436612, "grad_norm": 0.2497302586569846, "learning_rate": 9.798657639910033e-06, "loss": 0.3062, "step": 2949 }, { "epoch": 0.2359952800943981, "grad_norm": 0.3388699331241011, "learning_rate": 9.798475634165417e-06, "loss": 0.2662, "step": 2950 }, { "epoch": 0.2360752784944301, "grad_norm": 0.35006879274503844, "learning_rate": 9.798293547886748e-06, "loss": 0.2703, "step": 2951 }, { "epoch": 0.2361552768944621, "grad_norm": 0.22004554728366202, "learning_rate": 9.798111381077082e-06, "loss": 0.3565, "step": 2952 }, { "epoch": 0.23623527529449412, "grad_norm": 0.3110676567945162, "learning_rate": 9.79792913373948e-06, "loss": 0.2705, "step": 2953 }, { "epoch": 0.2363152736945261, "grad_norm": 0.30986703687485045, "learning_rate": 9.797746805876996e-06, "loss": 0.2872, "step": 2954 }, { "epoch": 0.2363952720945581, "grad_norm": 1.1966868998010027, "learning_rate": 9.79756439749269e-06, "loss": 0.2791, "step": 2955 }, { "epoch": 0.2364752704945901, "grad_norm": 0.30969822109107553, "learning_rate": 9.797381908589627e-06, "loss": 0.2872, "step": 2956 }, { "epoch": 0.2365552688946221, "grad_norm": 0.25544114438781207, "learning_rate": 9.797199339170866e-06, "loss": 0.285, "step": 2957 }, { "epoch": 0.23663526729465412, "grad_norm": 0.3038233583629293, "learning_rate": 9.797016689239476e-06, "loss": 0.2793, "step": 2958 }, { "epoch": 0.2367152656946861, "grad_norm": 0.25458814791769385, "learning_rate": 9.796833958798517e-06, "loss": 0.3116, "step": 2959 }, { "epoch": 0.2367952640947181, "grad_norm": 0.35899247760306424, "learning_rate": 9.79665114785106e-06, "loss": 0.2738, "step": 2960 }, { "epoch": 0.2368752624947501, "grad_norm": 0.28606400914018576, "learning_rate": 9.79646825640017e-06, "loss": 0.2915, "step": 2961 }, { "epoch": 0.2369552608947821, "grad_norm": 0.31552405571307507, "learning_rate": 9.796285284448919e-06, "loss": 0.2617, "step": 2962 }, { "epoch": 0.23703525929481412, "grad_norm": 0.32663266804221264, "learning_rate": 9.796102232000378e-06, "loss": 0.2849, "step": 2963 }, { "epoch": 0.2371152576948461, "grad_norm": 0.21896067443946426, "learning_rate": 9.795919099057616e-06, "loss": 0.3361, "step": 2964 }, { "epoch": 0.2371952560948781, "grad_norm": 0.3017545556149471, "learning_rate": 9.795735885623708e-06, "loss": 0.2697, "step": 2965 }, { "epoch": 0.2372752544949101, "grad_norm": 0.3049294047843202, "learning_rate": 9.795552591701732e-06, "loss": 0.3069, "step": 2966 }, { "epoch": 0.2373552528949421, "grad_norm": 0.31199509519568996, "learning_rate": 9.795369217294759e-06, "loss": 0.2783, "step": 2967 }, { "epoch": 0.2374352512949741, "grad_norm": 0.31991986375481757, "learning_rate": 9.795185762405872e-06, "loss": 0.2711, "step": 2968 }, { "epoch": 0.2375152496950061, "grad_norm": 0.2548193904334765, "learning_rate": 9.795002227038146e-06, "loss": 0.3432, "step": 2969 }, { "epoch": 0.2375952480950381, "grad_norm": 0.31783220837296405, "learning_rate": 9.794818611194662e-06, "loss": 0.2831, "step": 2970 }, { "epoch": 0.2376752464950701, "grad_norm": 0.30272512027014703, "learning_rate": 9.794634914878505e-06, "loss": 0.2848, "step": 2971 }, { "epoch": 0.2377552448951021, "grad_norm": 0.30395645540534616, "learning_rate": 9.794451138092754e-06, "loss": 0.3036, "step": 2972 }, { "epoch": 0.23783524329513409, "grad_norm": 0.23673877372734747, "learning_rate": 9.794267280840494e-06, "loss": 0.3336, "step": 2973 }, { "epoch": 0.2379152416951661, "grad_norm": 0.2708686071736194, "learning_rate": 9.794083343124812e-06, "loss": 0.2826, "step": 2974 }, { "epoch": 0.2379952400951981, "grad_norm": 0.31852771532366536, "learning_rate": 9.793899324948795e-06, "loss": 0.2695, "step": 2975 }, { "epoch": 0.2380752384952301, "grad_norm": 0.3140648590913954, "learning_rate": 9.79371522631553e-06, "loss": 0.2666, "step": 2976 }, { "epoch": 0.2381552368952621, "grad_norm": 0.29611521793134804, "learning_rate": 9.79353104722811e-06, "loss": 0.304, "step": 2977 }, { "epoch": 0.23823523529529408, "grad_norm": 0.24080792863610254, "learning_rate": 9.793346787689622e-06, "loss": 0.3249, "step": 2978 }, { "epoch": 0.2383152336953261, "grad_norm": 0.33059480454093304, "learning_rate": 9.793162447703161e-06, "loss": 0.2835, "step": 2979 }, { "epoch": 0.2383952320953581, "grad_norm": 0.3051598330739392, "learning_rate": 9.79297802727182e-06, "loss": 0.2961, "step": 2980 }, { "epoch": 0.2384752304953901, "grad_norm": 0.2836506287854809, "learning_rate": 9.792793526398694e-06, "loss": 0.3039, "step": 2981 }, { "epoch": 0.23855522889542208, "grad_norm": 0.3107079789153107, "learning_rate": 9.79260894508688e-06, "loss": 0.2543, "step": 2982 }, { "epoch": 0.23863522729545408, "grad_norm": 0.20184273802156774, "learning_rate": 9.792424283339477e-06, "loss": 0.4019, "step": 2983 }, { "epoch": 0.2387152256954861, "grad_norm": 0.28231648378272167, "learning_rate": 9.792239541159581e-06, "loss": 0.2985, "step": 2984 }, { "epoch": 0.2387952240955181, "grad_norm": 0.2307841533136294, "learning_rate": 9.792054718550297e-06, "loss": 0.3023, "step": 2985 }, { "epoch": 0.2388752224955501, "grad_norm": 0.3357385871036078, "learning_rate": 9.791869815514723e-06, "loss": 0.2655, "step": 2986 }, { "epoch": 0.23895522089558208, "grad_norm": 0.3064048846323717, "learning_rate": 9.791684832055962e-06, "loss": 0.2584, "step": 2987 }, { "epoch": 0.23903521929561408, "grad_norm": 0.32205256850572594, "learning_rate": 9.791499768177124e-06, "loss": 0.2625, "step": 2988 }, { "epoch": 0.2391152176956461, "grad_norm": 0.33410193594859283, "learning_rate": 9.79131462388131e-06, "loss": 0.271, "step": 2989 }, { "epoch": 0.2391952160956781, "grad_norm": 0.3341786081080264, "learning_rate": 9.791129399171628e-06, "loss": 0.2865, "step": 2990 }, { "epoch": 0.2392752144957101, "grad_norm": 0.2749110056215002, "learning_rate": 9.790944094051188e-06, "loss": 0.3228, "step": 2991 }, { "epoch": 0.23935521289574208, "grad_norm": 0.2951214848887231, "learning_rate": 9.790758708523099e-06, "loss": 0.2837, "step": 2992 }, { "epoch": 0.23943521129577408, "grad_norm": 0.3018272512432192, "learning_rate": 9.790573242590473e-06, "loss": 0.2504, "step": 2993 }, { "epoch": 0.23951520969580609, "grad_norm": 0.32430565204467254, "learning_rate": 9.790387696256422e-06, "loss": 0.2769, "step": 2994 }, { "epoch": 0.2395952080958381, "grad_norm": 0.2864657586127316, "learning_rate": 9.790202069524061e-06, "loss": 0.2542, "step": 2995 }, { "epoch": 0.23967520649587007, "grad_norm": 0.34528215843029125, "learning_rate": 9.790016362396506e-06, "loss": 0.2747, "step": 2996 }, { "epoch": 0.23975520489590207, "grad_norm": 0.25159441423707923, "learning_rate": 9.789830574876873e-06, "loss": 0.325, "step": 2997 }, { "epoch": 0.23983520329593408, "grad_norm": 0.3315768244227891, "learning_rate": 9.789644706968278e-06, "loss": 0.3028, "step": 2998 }, { "epoch": 0.23991520169596608, "grad_norm": 0.24223165137641967, "learning_rate": 9.789458758673843e-06, "loss": 0.3371, "step": 2999 }, { "epoch": 0.2399952000959981, "grad_norm": 0.3172154648504064, "learning_rate": 9.789272729996689e-06, "loss": 0.2871, "step": 3000 }, { "epoch": 0.24007519849603007, "grad_norm": 0.3403037358604376, "learning_rate": 9.789086620939936e-06, "loss": 0.2482, "step": 3001 }, { "epoch": 0.24015519689606207, "grad_norm": 0.30351299694534395, "learning_rate": 9.788900431506709e-06, "loss": 0.3034, "step": 3002 }, { "epoch": 0.24023519529609408, "grad_norm": 0.3458478226329017, "learning_rate": 9.788714161700135e-06, "loss": 0.2635, "step": 3003 }, { "epoch": 0.24031519369612608, "grad_norm": 0.2923109492954135, "learning_rate": 9.788527811523336e-06, "loss": 0.3079, "step": 3004 }, { "epoch": 0.2403951920961581, "grad_norm": 0.27359555796958407, "learning_rate": 9.78834138097944e-06, "loss": 0.3044, "step": 3005 }, { "epoch": 0.24047519049619007, "grad_norm": 0.2690690877949422, "learning_rate": 9.78815487007158e-06, "loss": 0.2843, "step": 3006 }, { "epoch": 0.24055518889622207, "grad_norm": 0.33983420453492813, "learning_rate": 9.787968278802883e-06, "loss": 0.2785, "step": 3007 }, { "epoch": 0.24063518729625408, "grad_norm": 0.3268249113703334, "learning_rate": 9.78778160717648e-06, "loss": 0.266, "step": 3008 }, { "epoch": 0.24071518569628608, "grad_norm": 0.2693428819884109, "learning_rate": 9.787594855195509e-06, "loss": 0.2864, "step": 3009 }, { "epoch": 0.24079518409631806, "grad_norm": 0.26663551116209877, "learning_rate": 9.787408022863097e-06, "loss": 0.3052, "step": 3010 }, { "epoch": 0.24087518249635007, "grad_norm": 0.266101868160398, "learning_rate": 9.787221110182384e-06, "loss": 0.3409, "step": 3011 }, { "epoch": 0.24095518089638207, "grad_norm": 0.3181706829361852, "learning_rate": 9.787034117156506e-06, "loss": 0.2856, "step": 3012 }, { "epoch": 0.24103517929641408, "grad_norm": 0.499112122181375, "learning_rate": 9.786847043788601e-06, "loss": 0.2661, "step": 3013 }, { "epoch": 0.24111517769644608, "grad_norm": 0.29313108650088443, "learning_rate": 9.786659890081811e-06, "loss": 0.2891, "step": 3014 }, { "epoch": 0.24119517609647806, "grad_norm": 0.31731566370819686, "learning_rate": 9.786472656039275e-06, "loss": 0.2645, "step": 3015 }, { "epoch": 0.24127517449651006, "grad_norm": 0.2880820944356039, "learning_rate": 9.786285341664135e-06, "loss": 0.295, "step": 3016 }, { "epoch": 0.24135517289654207, "grad_norm": 0.3167003258936168, "learning_rate": 9.786097946959534e-06, "loss": 0.2834, "step": 3017 }, { "epoch": 0.24143517129657407, "grad_norm": 0.2528755654853122, "learning_rate": 9.785910471928621e-06, "loss": 0.3217, "step": 3018 }, { "epoch": 0.24151516969660608, "grad_norm": 0.3332560152387603, "learning_rate": 9.785722916574539e-06, "loss": 0.2646, "step": 3019 }, { "epoch": 0.24159516809663806, "grad_norm": 0.25156173656243175, "learning_rate": 9.785535280900437e-06, "loss": 0.296, "step": 3020 }, { "epoch": 0.24167516649667006, "grad_norm": 0.31428653694755915, "learning_rate": 9.785347564909464e-06, "loss": 0.2635, "step": 3021 }, { "epoch": 0.24175516489670207, "grad_norm": 0.4887764792099205, "learning_rate": 9.78515976860477e-06, "loss": 0.2902, "step": 3022 }, { "epoch": 0.24183516329673407, "grad_norm": 0.2825174452197186, "learning_rate": 9.784971891989508e-06, "loss": 0.3103, "step": 3023 }, { "epoch": 0.24191516169676605, "grad_norm": 0.23495952913436058, "learning_rate": 9.784783935066828e-06, "loss": 0.316, "step": 3024 }, { "epoch": 0.24199516009679806, "grad_norm": 0.3302554543255552, "learning_rate": 9.78459589783989e-06, "loss": 0.2579, "step": 3025 }, { "epoch": 0.24207515849683006, "grad_norm": 0.36530593477874856, "learning_rate": 9.784407780311845e-06, "loss": 0.286, "step": 3026 }, { "epoch": 0.24215515689686207, "grad_norm": 0.31424262672399134, "learning_rate": 9.784219582485853e-06, "loss": 0.3096, "step": 3027 }, { "epoch": 0.24223515529689407, "grad_norm": 0.30604335877420313, "learning_rate": 9.784031304365072e-06, "loss": 0.2733, "step": 3028 }, { "epoch": 0.24231515369692605, "grad_norm": 0.29165470388813486, "learning_rate": 9.78384294595266e-06, "loss": 0.311, "step": 3029 }, { "epoch": 0.24239515209695806, "grad_norm": 0.3141428916212566, "learning_rate": 9.78365450725178e-06, "loss": 0.2791, "step": 3030 }, { "epoch": 0.24247515049699006, "grad_norm": 0.3513446508226203, "learning_rate": 9.783465988265594e-06, "loss": 0.305, "step": 3031 }, { "epoch": 0.24255514889702207, "grad_norm": 0.3128965968597101, "learning_rate": 9.78327738899727e-06, "loss": 0.2785, "step": 3032 }, { "epoch": 0.24263514729705407, "grad_norm": 0.32659013609709037, "learning_rate": 9.783088709449967e-06, "loss": 0.2581, "step": 3033 }, { "epoch": 0.24271514569708605, "grad_norm": 0.29780028953274873, "learning_rate": 9.782899949626853e-06, "loss": 0.2735, "step": 3034 }, { "epoch": 0.24279514409711805, "grad_norm": 0.32911214260937544, "learning_rate": 9.7827111095311e-06, "loss": 0.2645, "step": 3035 }, { "epoch": 0.24287514249715006, "grad_norm": 0.3058683177461703, "learning_rate": 9.782522189165873e-06, "loss": 0.2599, "step": 3036 }, { "epoch": 0.24295514089718206, "grad_norm": 0.3018559923800518, "learning_rate": 9.782333188534345e-06, "loss": 0.3163, "step": 3037 }, { "epoch": 0.24303513929721404, "grad_norm": 0.35130398851790273, "learning_rate": 9.782144107639686e-06, "loss": 0.275, "step": 3038 }, { "epoch": 0.24311513769724605, "grad_norm": 0.2753139169580173, "learning_rate": 9.781954946485072e-06, "loss": 0.3022, "step": 3039 }, { "epoch": 0.24319513609727805, "grad_norm": 0.28902516909497084, "learning_rate": 9.781765705073679e-06, "loss": 0.3018, "step": 3040 }, { "epoch": 0.24327513449731006, "grad_norm": 0.2702473997635275, "learning_rate": 9.781576383408678e-06, "loss": 0.3008, "step": 3041 }, { "epoch": 0.24335513289734206, "grad_norm": 0.31976478220407334, "learning_rate": 9.781386981493249e-06, "loss": 0.263, "step": 3042 }, { "epoch": 0.24343513129737404, "grad_norm": 0.2918703750343495, "learning_rate": 9.781197499330572e-06, "loss": 0.2855, "step": 3043 }, { "epoch": 0.24351512969740605, "grad_norm": 0.31182659920799544, "learning_rate": 9.781007936923825e-06, "loss": 0.2518, "step": 3044 }, { "epoch": 0.24359512809743805, "grad_norm": 0.30301870677231585, "learning_rate": 9.78081829427619e-06, "loss": 0.2803, "step": 3045 }, { "epoch": 0.24367512649747006, "grad_norm": 0.3206395881246978, "learning_rate": 9.780628571390853e-06, "loss": 0.2854, "step": 3046 }, { "epoch": 0.24375512489750206, "grad_norm": 0.23894183492628182, "learning_rate": 9.780438768270992e-06, "loss": 0.3213, "step": 3047 }, { "epoch": 0.24383512329753404, "grad_norm": 0.31123147441317217, "learning_rate": 9.780248884919799e-06, "loss": 0.2691, "step": 3048 }, { "epoch": 0.24391512169756605, "grad_norm": 0.3258184100803723, "learning_rate": 9.780058921340456e-06, "loss": 0.266, "step": 3049 }, { "epoch": 0.24399512009759805, "grad_norm": 0.2490481856550722, "learning_rate": 9.779868877536154e-06, "loss": 0.3271, "step": 3050 }, { "epoch": 0.24407511849763006, "grad_norm": 0.29501399334033884, "learning_rate": 9.779678753510082e-06, "loss": 0.2978, "step": 3051 }, { "epoch": 0.24415511689766203, "grad_norm": 0.3293627338212833, "learning_rate": 9.779488549265429e-06, "loss": 0.2536, "step": 3052 }, { "epoch": 0.24423511529769404, "grad_norm": 0.3163076790876164, "learning_rate": 9.77929826480539e-06, "loss": 0.2977, "step": 3053 }, { "epoch": 0.24431511369772604, "grad_norm": 0.5043741272360033, "learning_rate": 9.779107900133157e-06, "loss": 0.2996, "step": 3054 }, { "epoch": 0.24439511209775805, "grad_norm": 0.3679864982254292, "learning_rate": 9.778917455251924e-06, "loss": 0.2782, "step": 3055 }, { "epoch": 0.24447511049779005, "grad_norm": 0.28931736767332183, "learning_rate": 9.77872693016489e-06, "loss": 0.3284, "step": 3056 }, { "epoch": 0.24455510889782203, "grad_norm": 0.23200463723555667, "learning_rate": 9.778536324875252e-06, "loss": 0.2971, "step": 3057 }, { "epoch": 0.24463510729785404, "grad_norm": 0.2729819359106997, "learning_rate": 9.778345639386206e-06, "loss": 0.3195, "step": 3058 }, { "epoch": 0.24471510569788604, "grad_norm": 0.33404891863436337, "learning_rate": 9.778154873700956e-06, "loss": 0.2621, "step": 3059 }, { "epoch": 0.24479510409791805, "grad_norm": 0.32870271104589577, "learning_rate": 9.777964027822701e-06, "loss": 0.3116, "step": 3060 }, { "epoch": 0.24487510249795005, "grad_norm": 0.22519185142925494, "learning_rate": 9.777773101754648e-06, "loss": 0.3703, "step": 3061 }, { "epoch": 0.24495510089798203, "grad_norm": 0.3365647932265385, "learning_rate": 9.777582095499995e-06, "loss": 0.2793, "step": 3062 }, { "epoch": 0.24503509929801404, "grad_norm": 0.30441162904786606, "learning_rate": 9.777391009061954e-06, "loss": 0.2828, "step": 3063 }, { "epoch": 0.24511509769804604, "grad_norm": 0.28644365705133507, "learning_rate": 9.777199842443729e-06, "loss": 0.2879, "step": 3064 }, { "epoch": 0.24519509609807805, "grad_norm": 0.2966228841810259, "learning_rate": 9.777008595648527e-06, "loss": 0.3145, "step": 3065 }, { "epoch": 0.24527509449811002, "grad_norm": 0.27027226974388907, "learning_rate": 9.776817268679562e-06, "loss": 0.335, "step": 3066 }, { "epoch": 0.24535509289814203, "grad_norm": 0.2296889240172845, "learning_rate": 9.77662586154004e-06, "loss": 0.3139, "step": 3067 }, { "epoch": 0.24543509129817404, "grad_norm": 0.255096222810185, "learning_rate": 9.776434374233178e-06, "loss": 0.331, "step": 3068 }, { "epoch": 0.24551508969820604, "grad_norm": 0.4319542245186691, "learning_rate": 9.776242806762187e-06, "loss": 0.3438, "step": 3069 }, { "epoch": 0.24559508809823805, "grad_norm": 0.3413211685378461, "learning_rate": 9.776051159130283e-06, "loss": 0.2741, "step": 3070 }, { "epoch": 0.24567508649827002, "grad_norm": 0.27080491474753127, "learning_rate": 9.775859431340681e-06, "loss": 0.293, "step": 3071 }, { "epoch": 0.24575508489830203, "grad_norm": 0.33970203133520444, "learning_rate": 9.775667623396601e-06, "loss": 0.2721, "step": 3072 }, { "epoch": 0.24583508329833403, "grad_norm": 0.2724470607668596, "learning_rate": 9.775475735301261e-06, "loss": 0.3463, "step": 3073 }, { "epoch": 0.24591508169836604, "grad_norm": 0.30747640971273876, "learning_rate": 9.775283767057883e-06, "loss": 0.2898, "step": 3074 }, { "epoch": 0.24599508009839804, "grad_norm": 0.29194169430773564, "learning_rate": 9.775091718669688e-06, "loss": 0.2555, "step": 3075 }, { "epoch": 0.24607507849843002, "grad_norm": 0.316860011792518, "learning_rate": 9.774899590139897e-06, "loss": 0.2879, "step": 3076 }, { "epoch": 0.24615507689846203, "grad_norm": 0.2995507920827911, "learning_rate": 9.774707381471737e-06, "loss": 0.2816, "step": 3077 }, { "epoch": 0.24623507529849403, "grad_norm": 0.25499531005472453, "learning_rate": 9.774515092668435e-06, "loss": 0.3451, "step": 3078 }, { "epoch": 0.24631507369852604, "grad_norm": 0.29942815615249313, "learning_rate": 9.774322723733216e-06, "loss": 0.2998, "step": 3079 }, { "epoch": 0.24639507209855802, "grad_norm": 0.27342280409800107, "learning_rate": 9.774130274669309e-06, "loss": 0.3112, "step": 3080 }, { "epoch": 0.24647507049859002, "grad_norm": 0.31731505339244853, "learning_rate": 9.773937745479942e-06, "loss": 0.2805, "step": 3081 }, { "epoch": 0.24655506889862203, "grad_norm": 0.3123969939483814, "learning_rate": 9.773745136168352e-06, "loss": 0.3003, "step": 3082 }, { "epoch": 0.24663506729865403, "grad_norm": 0.26952883835143066, "learning_rate": 9.773552446737764e-06, "loss": 0.2724, "step": 3083 }, { "epoch": 0.24671506569868604, "grad_norm": 0.26326345472443974, "learning_rate": 9.773359677191418e-06, "loss": 0.3075, "step": 3084 }, { "epoch": 0.24679506409871801, "grad_norm": 0.34024679826568205, "learning_rate": 9.773166827532548e-06, "loss": 0.308, "step": 3085 }, { "epoch": 0.24687506249875002, "grad_norm": 0.32871923333498027, "learning_rate": 9.772973897764389e-06, "loss": 0.2628, "step": 3086 }, { "epoch": 0.24695506089878203, "grad_norm": 0.26587052116698695, "learning_rate": 9.77278088789018e-06, "loss": 0.3303, "step": 3087 }, { "epoch": 0.24703505929881403, "grad_norm": 0.3144201460443083, "learning_rate": 9.77258779791316e-06, "loss": 0.2939, "step": 3088 }, { "epoch": 0.24711505769884604, "grad_norm": 0.23891789720261453, "learning_rate": 9.77239462783657e-06, "loss": 0.3162, "step": 3089 }, { "epoch": 0.247195056098878, "grad_norm": 0.34417679590389627, "learning_rate": 9.77220137766365e-06, "loss": 0.2922, "step": 3090 }, { "epoch": 0.24727505449891002, "grad_norm": 0.29328897436891405, "learning_rate": 9.772008047397647e-06, "loss": 0.3097, "step": 3091 }, { "epoch": 0.24735505289894202, "grad_norm": 0.2446005038709564, "learning_rate": 9.771814637041806e-06, "loss": 0.3411, "step": 3092 }, { "epoch": 0.24743505129897403, "grad_norm": 0.3517788268670302, "learning_rate": 9.771621146599369e-06, "loss": 0.2545, "step": 3093 }, { "epoch": 0.247515049699006, "grad_norm": 0.31954605551797144, "learning_rate": 9.771427576073584e-06, "loss": 0.2497, "step": 3094 }, { "epoch": 0.247595048099038, "grad_norm": 0.38167982554778185, "learning_rate": 9.771233925467703e-06, "loss": 0.2772, "step": 3095 }, { "epoch": 0.24767504649907002, "grad_norm": 0.3255399169571891, "learning_rate": 9.771040194784973e-06, "loss": 0.2562, "step": 3096 }, { "epoch": 0.24775504489910202, "grad_norm": 0.28286986479732285, "learning_rate": 9.770846384028647e-06, "loss": 0.3102, "step": 3097 }, { "epoch": 0.24783504329913403, "grad_norm": 0.3612863164605747, "learning_rate": 9.770652493201977e-06, "loss": 0.2943, "step": 3098 }, { "epoch": 0.247915041699166, "grad_norm": 0.3168649848502705, "learning_rate": 9.77045852230822e-06, "loss": 0.2698, "step": 3099 }, { "epoch": 0.247995040099198, "grad_norm": 0.32849858658128883, "learning_rate": 9.770264471350628e-06, "loss": 0.2953, "step": 3100 }, { "epoch": 0.24807503849923002, "grad_norm": 0.31385057119707854, "learning_rate": 9.770070340332457e-06, "loss": 0.298, "step": 3101 }, { "epoch": 0.24815503689926202, "grad_norm": 0.34516009215340687, "learning_rate": 9.769876129256969e-06, "loss": 0.263, "step": 3102 }, { "epoch": 0.24823503529929403, "grad_norm": 0.44322728229939024, "learning_rate": 9.769681838127421e-06, "loss": 0.2522, "step": 3103 }, { "epoch": 0.248315033699326, "grad_norm": 0.334640302637303, "learning_rate": 9.769487466947075e-06, "loss": 0.2378, "step": 3104 }, { "epoch": 0.248395032099358, "grad_norm": 0.2375613673412739, "learning_rate": 9.76929301571919e-06, "loss": 0.3302, "step": 3105 }, { "epoch": 0.24847503049939001, "grad_norm": 0.3218906643486621, "learning_rate": 9.769098484447034e-06, "loss": 0.3145, "step": 3106 }, { "epoch": 0.24855502889942202, "grad_norm": 0.23786766950075333, "learning_rate": 9.76890387313387e-06, "loss": 0.3452, "step": 3107 }, { "epoch": 0.248635027299454, "grad_norm": 0.24578248063222669, "learning_rate": 9.768709181782962e-06, "loss": 0.3301, "step": 3108 }, { "epoch": 0.248715025699486, "grad_norm": 0.3247073109242354, "learning_rate": 9.768514410397583e-06, "loss": 0.276, "step": 3109 }, { "epoch": 0.248795024099518, "grad_norm": 0.3091310869026541, "learning_rate": 9.768319558980997e-06, "loss": 0.2529, "step": 3110 }, { "epoch": 0.24887502249955, "grad_norm": 0.7231704557010377, "learning_rate": 9.768124627536474e-06, "loss": 0.2712, "step": 3111 }, { "epoch": 0.24895502089958202, "grad_norm": 0.2757231559350152, "learning_rate": 9.767929616067289e-06, "loss": 0.2942, "step": 3112 }, { "epoch": 0.249035019299614, "grad_norm": 0.2502254882635002, "learning_rate": 9.767734524576714e-06, "loss": 0.322, "step": 3113 }, { "epoch": 0.249115017699646, "grad_norm": 0.30291739176643107, "learning_rate": 9.767539353068021e-06, "loss": 0.2513, "step": 3114 }, { "epoch": 0.249195016099678, "grad_norm": 0.32701353695295515, "learning_rate": 9.767344101544489e-06, "loss": 0.2941, "step": 3115 }, { "epoch": 0.24927501449971, "grad_norm": 0.2791713759134054, "learning_rate": 9.767148770009393e-06, "loss": 0.2808, "step": 3116 }, { "epoch": 0.24935501289974202, "grad_norm": 0.5971855118611495, "learning_rate": 9.76695335846601e-06, "loss": 0.2939, "step": 3117 }, { "epoch": 0.249435011299774, "grad_norm": 0.2754846734676285, "learning_rate": 9.766757866917622e-06, "loss": 0.3219, "step": 3118 }, { "epoch": 0.249515009699806, "grad_norm": 0.29331806050667314, "learning_rate": 9.76656229536751e-06, "loss": 0.2953, "step": 3119 }, { "epoch": 0.249595008099838, "grad_norm": 0.26598381966268686, "learning_rate": 9.766366643818954e-06, "loss": 0.3029, "step": 3120 }, { "epoch": 0.24967500649987, "grad_norm": 0.2325438730593487, "learning_rate": 9.76617091227524e-06, "loss": 0.3282, "step": 3121 }, { "epoch": 0.249755004899902, "grad_norm": 0.3201023287744438, "learning_rate": 9.76597510073965e-06, "loss": 0.2714, "step": 3122 }, { "epoch": 0.249835003299934, "grad_norm": 0.31257830277634696, "learning_rate": 9.765779209215474e-06, "loss": 0.2732, "step": 3123 }, { "epoch": 0.249915001699966, "grad_norm": 0.48566379791154374, "learning_rate": 9.765583237705999e-06, "loss": 0.2648, "step": 3124 }, { "epoch": 0.249995000099998, "grad_norm": 0.32847257438351457, "learning_rate": 9.765387186214512e-06, "loss": 0.3163, "step": 3125 }, { "epoch": 0.25007499850003, "grad_norm": 0.35912689079795534, "learning_rate": 9.765191054744305e-06, "loss": 0.3195, "step": 3126 }, { "epoch": 0.250154996900062, "grad_norm": 0.30305233523238356, "learning_rate": 9.76499484329867e-06, "loss": 0.3107, "step": 3127 }, { "epoch": 0.250234995300094, "grad_norm": 0.29303062304337557, "learning_rate": 9.764798551880898e-06, "loss": 0.2854, "step": 3128 }, { "epoch": 0.250314993700126, "grad_norm": 0.2791212878065816, "learning_rate": 9.764602180494285e-06, "loss": 0.3055, "step": 3129 }, { "epoch": 0.250394992100158, "grad_norm": 0.2685813521171975, "learning_rate": 9.764405729142129e-06, "loss": 0.3099, "step": 3130 }, { "epoch": 0.25047499050019, "grad_norm": 0.31286079404996653, "learning_rate": 9.764209197827721e-06, "loss": 0.2606, "step": 3131 }, { "epoch": 0.250554988900222, "grad_norm": 0.2919168005108195, "learning_rate": 9.764012586554364e-06, "loss": 0.2934, "step": 3132 }, { "epoch": 0.250634987300254, "grad_norm": 0.31864235475792807, "learning_rate": 9.763815895325357e-06, "loss": 0.2723, "step": 3133 }, { "epoch": 0.250714985700286, "grad_norm": 0.251421311433494, "learning_rate": 9.763619124144003e-06, "loss": 0.3258, "step": 3134 }, { "epoch": 0.250794984100318, "grad_norm": 0.3651064910573383, "learning_rate": 9.7634222730136e-06, "loss": 0.2575, "step": 3135 }, { "epoch": 0.25087498250035, "grad_norm": 0.25729249414434596, "learning_rate": 9.763225341937455e-06, "loss": 0.3264, "step": 3136 }, { "epoch": 0.250954980900382, "grad_norm": 0.30144316623451045, "learning_rate": 9.763028330918874e-06, "loss": 0.2436, "step": 3137 }, { "epoch": 0.251034979300414, "grad_norm": 0.32593775625968424, "learning_rate": 9.76283123996116e-06, "loss": 0.266, "step": 3138 }, { "epoch": 0.251114977700446, "grad_norm": 0.2938718733586929, "learning_rate": 9.762634069067622e-06, "loss": 0.3136, "step": 3139 }, { "epoch": 0.251194976100478, "grad_norm": 0.31234139817401346, "learning_rate": 9.76243681824157e-06, "loss": 0.274, "step": 3140 }, { "epoch": 0.25127497450051, "grad_norm": 0.323618036090237, "learning_rate": 9.762239487486316e-06, "loss": 0.2798, "step": 3141 }, { "epoch": 0.251354972900542, "grad_norm": 0.23281698233229056, "learning_rate": 9.762042076805169e-06, "loss": 0.3586, "step": 3142 }, { "epoch": 0.25143497130057396, "grad_norm": 0.2875508610252089, "learning_rate": 9.761844586201444e-06, "loss": 0.303, "step": 3143 }, { "epoch": 0.251514969700606, "grad_norm": 0.32343200371070885, "learning_rate": 9.761647015678455e-06, "loss": 0.2952, "step": 3144 }, { "epoch": 0.251594968100638, "grad_norm": 0.3070580428762798, "learning_rate": 9.761449365239518e-06, "loss": 0.2932, "step": 3145 }, { "epoch": 0.25167496650067, "grad_norm": 0.3188035438070415, "learning_rate": 9.761251634887949e-06, "loss": 0.2941, "step": 3146 }, { "epoch": 0.251754964900702, "grad_norm": 0.30375534864295345, "learning_rate": 9.761053824627068e-06, "loss": 0.3033, "step": 3147 }, { "epoch": 0.25183496330073396, "grad_norm": 0.32955671063662617, "learning_rate": 9.760855934460193e-06, "loss": 0.2585, "step": 3148 }, { "epoch": 0.251914961700766, "grad_norm": 0.2321404191599689, "learning_rate": 9.76065796439065e-06, "loss": 0.3327, "step": 3149 }, { "epoch": 0.25199496010079797, "grad_norm": 0.29660382532939567, "learning_rate": 9.760459914421756e-06, "loss": 0.2441, "step": 3150 }, { "epoch": 0.25207495850083, "grad_norm": 0.2989727475467132, "learning_rate": 9.76026178455684e-06, "loss": 0.297, "step": 3151 }, { "epoch": 0.252154956900862, "grad_norm": 0.3331313336036164, "learning_rate": 9.760063574799221e-06, "loss": 0.2783, "step": 3152 }, { "epoch": 0.25223495530089396, "grad_norm": 0.3543229019862621, "learning_rate": 9.759865285152231e-06, "loss": 0.2771, "step": 3153 }, { "epoch": 0.252314953700926, "grad_norm": 0.2862505184288917, "learning_rate": 9.759666915619195e-06, "loss": 0.2839, "step": 3154 }, { "epoch": 0.25239495210095797, "grad_norm": 0.3137654879459138, "learning_rate": 9.759468466203444e-06, "loss": 0.3133, "step": 3155 }, { "epoch": 0.25247495050099, "grad_norm": 0.27556620221835976, "learning_rate": 9.759269936908308e-06, "loss": 0.2817, "step": 3156 }, { "epoch": 0.252554948901022, "grad_norm": 0.2882192169145278, "learning_rate": 9.75907132773712e-06, "loss": 0.3015, "step": 3157 }, { "epoch": 0.25263494730105396, "grad_norm": 0.32162320697352065, "learning_rate": 9.75887263869321e-06, "loss": 0.2822, "step": 3158 }, { "epoch": 0.252714945701086, "grad_norm": 0.2931425106674063, "learning_rate": 9.758673869779915e-06, "loss": 0.3031, "step": 3159 }, { "epoch": 0.25279494410111797, "grad_norm": 0.31050957134911716, "learning_rate": 9.758475021000572e-06, "loss": 0.2668, "step": 3160 }, { "epoch": 0.25287494250115, "grad_norm": 0.3468256199857036, "learning_rate": 9.758276092358518e-06, "loss": 0.2716, "step": 3161 }, { "epoch": 0.252954940901182, "grad_norm": 0.22159482162827437, "learning_rate": 9.758077083857091e-06, "loss": 0.3687, "step": 3162 }, { "epoch": 0.25303493930121396, "grad_norm": 0.19353265301436734, "learning_rate": 9.75787799549963e-06, "loss": 0.3327, "step": 3163 }, { "epoch": 0.253114937701246, "grad_norm": 0.3287110143423559, "learning_rate": 9.757678827289476e-06, "loss": 0.2856, "step": 3164 }, { "epoch": 0.25319493610127797, "grad_norm": 0.37082405106310107, "learning_rate": 9.757479579229974e-06, "loss": 0.2537, "step": 3165 }, { "epoch": 0.25327493450131, "grad_norm": 0.3073932256828583, "learning_rate": 9.757280251324468e-06, "loss": 0.2532, "step": 3166 }, { "epoch": 0.253354932901342, "grad_norm": 0.2959037483714209, "learning_rate": 9.757080843576301e-06, "loss": 0.2572, "step": 3167 }, { "epoch": 0.25343493130137396, "grad_norm": 0.35160350708915283, "learning_rate": 9.756881355988823e-06, "loss": 0.2728, "step": 3168 }, { "epoch": 0.253514929701406, "grad_norm": 0.3016602684599165, "learning_rate": 9.756681788565379e-06, "loss": 0.2867, "step": 3169 }, { "epoch": 0.25359492810143797, "grad_norm": 0.2884132604018904, "learning_rate": 9.756482141309319e-06, "loss": 0.2867, "step": 3170 }, { "epoch": 0.25367492650146994, "grad_norm": 0.27790658732169743, "learning_rate": 9.756282414223995e-06, "loss": 0.3007, "step": 3171 }, { "epoch": 0.253754924901502, "grad_norm": 0.3320424066135094, "learning_rate": 9.756082607312756e-06, "loss": 0.2822, "step": 3172 }, { "epoch": 0.25383492330153395, "grad_norm": 0.28418536740923783, "learning_rate": 9.75588272057896e-06, "loss": 0.2905, "step": 3173 }, { "epoch": 0.253914921701566, "grad_norm": 0.4074420406588612, "learning_rate": 9.75568275402596e-06, "loss": 0.2992, "step": 3174 }, { "epoch": 0.25399492010159797, "grad_norm": 0.3231161694624928, "learning_rate": 9.755482707657109e-06, "loss": 0.2618, "step": 3175 }, { "epoch": 0.25407491850162994, "grad_norm": 0.27744247795812155, "learning_rate": 9.755282581475769e-06, "loss": 0.2821, "step": 3176 }, { "epoch": 0.254154916901662, "grad_norm": 0.33259579697216696, "learning_rate": 9.755082375485296e-06, "loss": 0.2837, "step": 3177 }, { "epoch": 0.25423491530169395, "grad_norm": 0.3084921934587112, "learning_rate": 9.75488208968905e-06, "loss": 0.2683, "step": 3178 }, { "epoch": 0.254314913701726, "grad_norm": 0.32846868113232724, "learning_rate": 9.754681724090396e-06, "loss": 0.2514, "step": 3179 }, { "epoch": 0.25439491210175796, "grad_norm": 0.28290460456135014, "learning_rate": 9.75448127869269e-06, "loss": 0.3106, "step": 3180 }, { "epoch": 0.25447491050178994, "grad_norm": 0.27222462079886184, "learning_rate": 9.754280753499306e-06, "loss": 0.3334, "step": 3181 }, { "epoch": 0.254554908901822, "grad_norm": 0.2910273087368221, "learning_rate": 9.7540801485136e-06, "loss": 0.2987, "step": 3182 }, { "epoch": 0.25463490730185395, "grad_norm": 0.28369410318268795, "learning_rate": 9.753879463738942e-06, "loss": 0.2972, "step": 3183 }, { "epoch": 0.254714905701886, "grad_norm": 0.301861897644405, "learning_rate": 9.753678699178702e-06, "loss": 0.2635, "step": 3184 }, { "epoch": 0.25479490410191796, "grad_norm": 0.3084697951275763, "learning_rate": 9.753477854836248e-06, "loss": 0.2574, "step": 3185 }, { "epoch": 0.25487490250194994, "grad_norm": 0.2873646860488063, "learning_rate": 9.75327693071495e-06, "loss": 0.2897, "step": 3186 }, { "epoch": 0.254954900901982, "grad_norm": 0.2729266506001137, "learning_rate": 9.753075926818182e-06, "loss": 0.2986, "step": 3187 }, { "epoch": 0.25503489930201395, "grad_norm": 0.34359885026253295, "learning_rate": 9.752874843149316e-06, "loss": 0.2704, "step": 3188 }, { "epoch": 0.255114897702046, "grad_norm": 0.29824452263794987, "learning_rate": 9.752673679711728e-06, "loss": 0.3029, "step": 3189 }, { "epoch": 0.25519489610207796, "grad_norm": 0.28658816453112573, "learning_rate": 9.752472436508794e-06, "loss": 0.3078, "step": 3190 }, { "epoch": 0.25527489450210994, "grad_norm": 0.2932443757340073, "learning_rate": 9.75227111354389e-06, "loss": 0.293, "step": 3191 }, { "epoch": 0.25535489290214197, "grad_norm": 0.26676321530325825, "learning_rate": 9.752069710820398e-06, "loss": 0.3405, "step": 3192 }, { "epoch": 0.25543489130217395, "grad_norm": 0.30716866592177, "learning_rate": 9.751868228341695e-06, "loss": 0.2971, "step": 3193 }, { "epoch": 0.255514889702206, "grad_norm": 0.2667353096275026, "learning_rate": 9.751666666111162e-06, "loss": 0.3178, "step": 3194 }, { "epoch": 0.25559488810223796, "grad_norm": 0.24691711895703883, "learning_rate": 9.751465024132184e-06, "loss": 0.322, "step": 3195 }, { "epoch": 0.25567488650226994, "grad_norm": 0.3277837754072894, "learning_rate": 9.751263302408146e-06, "loss": 0.2694, "step": 3196 }, { "epoch": 0.25575488490230197, "grad_norm": 0.28454348245061956, "learning_rate": 9.751061500942434e-06, "loss": 0.323, "step": 3197 }, { "epoch": 0.25583488330233395, "grad_norm": 0.28680987886515613, "learning_rate": 9.75085961973843e-06, "loss": 0.2886, "step": 3198 }, { "epoch": 0.2559148817023659, "grad_norm": 0.3176873727220522, "learning_rate": 9.750657658799528e-06, "loss": 0.2793, "step": 3199 }, { "epoch": 0.25599488010239796, "grad_norm": 0.30954563210196295, "learning_rate": 9.750455618129115e-06, "loss": 0.2824, "step": 3200 }, { "epoch": 0.25607487850242994, "grad_norm": 0.3185704757330435, "learning_rate": 9.75025349773058e-06, "loss": 0.2666, "step": 3201 }, { "epoch": 0.25615487690246197, "grad_norm": 0.308420890862119, "learning_rate": 9.750051297607317e-06, "loss": 0.265, "step": 3202 }, { "epoch": 0.25623487530249395, "grad_norm": 0.367922928666844, "learning_rate": 9.749849017762723e-06, "loss": 0.2583, "step": 3203 }, { "epoch": 0.2563148737025259, "grad_norm": 0.3619544134758433, "learning_rate": 9.749646658200187e-06, "loss": 0.2704, "step": 3204 }, { "epoch": 0.25639487210255796, "grad_norm": 0.29600735509099035, "learning_rate": 9.749444218923108e-06, "loss": 0.3101, "step": 3205 }, { "epoch": 0.25647487050258994, "grad_norm": 0.23244678078243874, "learning_rate": 9.749241699934883e-06, "loss": 0.3079, "step": 3206 }, { "epoch": 0.25655486890262197, "grad_norm": 0.2331057033800822, "learning_rate": 9.749039101238914e-06, "loss": 0.3264, "step": 3207 }, { "epoch": 0.25663486730265395, "grad_norm": 0.2663008358159601, "learning_rate": 9.748836422838597e-06, "loss": 0.3294, "step": 3208 }, { "epoch": 0.2567148657026859, "grad_norm": 0.27988414306715015, "learning_rate": 9.748633664737334e-06, "loss": 0.2956, "step": 3209 }, { "epoch": 0.25679486410271796, "grad_norm": 0.4228685523315738, "learning_rate": 9.74843082693853e-06, "loss": 0.2795, "step": 3210 }, { "epoch": 0.25687486250274993, "grad_norm": 0.34759790146086567, "learning_rate": 9.74822790944559e-06, "loss": 0.2736, "step": 3211 }, { "epoch": 0.25695486090278197, "grad_norm": 0.2464487087484887, "learning_rate": 9.748024912261917e-06, "loss": 0.3283, "step": 3212 }, { "epoch": 0.25703485930281395, "grad_norm": 0.28081185935381325, "learning_rate": 9.74782183539092e-06, "loss": 0.2894, "step": 3213 }, { "epoch": 0.2571148577028459, "grad_norm": 0.30395882478133074, "learning_rate": 9.747618678836006e-06, "loss": 0.2646, "step": 3214 }, { "epoch": 0.25719485610287796, "grad_norm": 0.31355911123205715, "learning_rate": 9.747415442600585e-06, "loss": 0.2707, "step": 3215 }, { "epoch": 0.25727485450290993, "grad_norm": 0.2918532539368113, "learning_rate": 9.747212126688067e-06, "loss": 0.3107, "step": 3216 }, { "epoch": 0.25735485290294197, "grad_norm": 0.28087800356704284, "learning_rate": 9.747008731101865e-06, "loss": 0.3038, "step": 3217 }, { "epoch": 0.25743485130297394, "grad_norm": 0.28380640312623645, "learning_rate": 9.746805255845395e-06, "loss": 0.2921, "step": 3218 }, { "epoch": 0.2575148497030059, "grad_norm": 0.3140372124811116, "learning_rate": 9.74660170092207e-06, "loss": 0.2707, "step": 3219 }, { "epoch": 0.25759484810303795, "grad_norm": 0.27691319017455657, "learning_rate": 9.746398066335304e-06, "loss": 0.3023, "step": 3220 }, { "epoch": 0.25767484650306993, "grad_norm": 0.3603708862689575, "learning_rate": 9.746194352088518e-06, "loss": 0.3079, "step": 3221 }, { "epoch": 0.25775484490310197, "grad_norm": 0.21567589896663422, "learning_rate": 9.74599055818513e-06, "loss": 0.3618, "step": 3222 }, { "epoch": 0.25783484330313394, "grad_norm": 0.34908780889054475, "learning_rate": 9.74578668462856e-06, "loss": 0.2725, "step": 3223 }, { "epoch": 0.2579148417031659, "grad_norm": 0.30762254538976935, "learning_rate": 9.74558273142223e-06, "loss": 0.312, "step": 3224 }, { "epoch": 0.25799484010319795, "grad_norm": 0.28903001684581536, "learning_rate": 9.745378698569562e-06, "loss": 0.3015, "step": 3225 }, { "epoch": 0.25807483850322993, "grad_norm": 0.32875194600273516, "learning_rate": 9.745174586073982e-06, "loss": 0.2676, "step": 3226 }, { "epoch": 0.2581548369032619, "grad_norm": 0.29455268459193984, "learning_rate": 9.744970393938915e-06, "loss": 0.3179, "step": 3227 }, { "epoch": 0.25823483530329394, "grad_norm": 0.30325676975412563, "learning_rate": 9.744766122167786e-06, "loss": 0.2761, "step": 3228 }, { "epoch": 0.2583148337033259, "grad_norm": 0.2948242738377378, "learning_rate": 9.744561770764027e-06, "loss": 0.2992, "step": 3229 }, { "epoch": 0.25839483210335795, "grad_norm": 0.27051516574762985, "learning_rate": 9.744357339731065e-06, "loss": 0.265, "step": 3230 }, { "epoch": 0.25847483050338993, "grad_norm": 0.2621034616887968, "learning_rate": 9.744152829072333e-06, "loss": 0.3029, "step": 3231 }, { "epoch": 0.2585548289034219, "grad_norm": 0.2910382533903895, "learning_rate": 9.743948238791262e-06, "loss": 0.2938, "step": 3232 }, { "epoch": 0.25863482730345394, "grad_norm": 0.39652395647044114, "learning_rate": 9.743743568891287e-06, "loss": 0.287, "step": 3233 }, { "epoch": 0.2587148257034859, "grad_norm": 0.29744086540449843, "learning_rate": 9.743538819375839e-06, "loss": 0.2595, "step": 3234 }, { "epoch": 0.25879482410351795, "grad_norm": 0.3508410950734714, "learning_rate": 9.743333990248359e-06, "loss": 0.2828, "step": 3235 }, { "epoch": 0.25887482250354993, "grad_norm": 0.2920741821379883, "learning_rate": 9.743129081512284e-06, "loss": 0.291, "step": 3236 }, { "epoch": 0.2589548209035819, "grad_norm": 0.2735367332359823, "learning_rate": 9.742924093171051e-06, "loss": 0.3086, "step": 3237 }, { "epoch": 0.25903481930361394, "grad_norm": 0.3114733871551781, "learning_rate": 9.742719025228102e-06, "loss": 0.2513, "step": 3238 }, { "epoch": 0.2591148177036459, "grad_norm": 0.31151903013245424, "learning_rate": 9.742513877686877e-06, "loss": 0.2538, "step": 3239 }, { "epoch": 0.25919481610367795, "grad_norm": 0.3405091854579703, "learning_rate": 9.742308650550821e-06, "loss": 0.2658, "step": 3240 }, { "epoch": 0.2592748145037099, "grad_norm": 0.2648100413471504, "learning_rate": 9.742103343823376e-06, "loss": 0.3161, "step": 3241 }, { "epoch": 0.2593548129037419, "grad_norm": 0.33849745698521233, "learning_rate": 9.741897957507993e-06, "loss": 0.2983, "step": 3242 }, { "epoch": 0.25943481130377394, "grad_norm": 0.3024443579502387, "learning_rate": 9.741692491608112e-06, "loss": 0.2665, "step": 3243 }, { "epoch": 0.2595148097038059, "grad_norm": 0.2672136156028046, "learning_rate": 9.741486946127186e-06, "loss": 0.3054, "step": 3244 }, { "epoch": 0.25959480810383795, "grad_norm": 0.2670619187863482, "learning_rate": 9.741281321068663e-06, "loss": 0.2994, "step": 3245 }, { "epoch": 0.2596748065038699, "grad_norm": 0.24086180092342724, "learning_rate": 9.741075616435995e-06, "loss": 0.3194, "step": 3246 }, { "epoch": 0.2597548049039019, "grad_norm": 0.31699460478142966, "learning_rate": 9.740869832232634e-06, "loss": 0.3209, "step": 3247 }, { "epoch": 0.25983480330393394, "grad_norm": 0.26048216398761814, "learning_rate": 9.740663968462034e-06, "loss": 0.3144, "step": 3248 }, { "epoch": 0.2599148017039659, "grad_norm": 0.27773802069981673, "learning_rate": 9.740458025127649e-06, "loss": 0.3074, "step": 3249 }, { "epoch": 0.25999480010399795, "grad_norm": 0.3552467262478054, "learning_rate": 9.740252002232936e-06, "loss": 0.2972, "step": 3250 }, { "epoch": 0.2600747985040299, "grad_norm": 0.3045304557309494, "learning_rate": 9.740045899781353e-06, "loss": 0.2403, "step": 3251 }, { "epoch": 0.2601547969040619, "grad_norm": 0.27604615190718945, "learning_rate": 9.73983971777636e-06, "loss": 0.3216, "step": 3252 }, { "epoch": 0.26023479530409394, "grad_norm": 0.32356406830005247, "learning_rate": 9.739633456221415e-06, "loss": 0.2939, "step": 3253 }, { "epoch": 0.2603147937041259, "grad_norm": 1.487630158580523, "learning_rate": 9.739427115119981e-06, "loss": 0.2982, "step": 3254 }, { "epoch": 0.2603947921041579, "grad_norm": 0.2962492920471454, "learning_rate": 9.739220694475522e-06, "loss": 0.2591, "step": 3255 }, { "epoch": 0.2604747905041899, "grad_norm": 0.27195383092622316, "learning_rate": 9.7390141942915e-06, "loss": 0.2951, "step": 3256 }, { "epoch": 0.2605547889042219, "grad_norm": 0.32019956156401685, "learning_rate": 9.738807614571384e-06, "loss": 0.2961, "step": 3257 }, { "epoch": 0.26063478730425393, "grad_norm": 0.29118752375192614, "learning_rate": 9.738600955318637e-06, "loss": 0.2946, "step": 3258 }, { "epoch": 0.2607147857042859, "grad_norm": 0.231787575535212, "learning_rate": 9.738394216536733e-06, "loss": 0.3174, "step": 3259 }, { "epoch": 0.2607947841043179, "grad_norm": 0.2836800528797434, "learning_rate": 9.738187398229137e-06, "loss": 0.2879, "step": 3260 }, { "epoch": 0.2608747825043499, "grad_norm": 0.3182819570698695, "learning_rate": 9.737980500399322e-06, "loss": 0.2618, "step": 3261 }, { "epoch": 0.2609547809043819, "grad_norm": 0.2864719108999295, "learning_rate": 9.73777352305076e-06, "loss": 0.2906, "step": 3262 }, { "epoch": 0.26103477930441393, "grad_norm": 0.3500651926917599, "learning_rate": 9.737566466186922e-06, "loss": 0.2821, "step": 3263 }, { "epoch": 0.2611147777044459, "grad_norm": 0.2793646048901002, "learning_rate": 9.73735932981129e-06, "loss": 0.2991, "step": 3264 }, { "epoch": 0.2611947761044779, "grad_norm": 0.2931298048840424, "learning_rate": 9.737152113927335e-06, "loss": 0.295, "step": 3265 }, { "epoch": 0.2612747745045099, "grad_norm": 0.3073007769412102, "learning_rate": 9.736944818538536e-06, "loss": 0.2789, "step": 3266 }, { "epoch": 0.2613547729045419, "grad_norm": 0.3039940650108085, "learning_rate": 9.736737443648372e-06, "loss": 0.2951, "step": 3267 }, { "epoch": 0.26143477130457393, "grad_norm": 0.31881615996196216, "learning_rate": 9.736529989260323e-06, "loss": 0.269, "step": 3268 }, { "epoch": 0.2615147697046059, "grad_norm": 0.21991980248983584, "learning_rate": 9.73632245537787e-06, "loss": 0.3677, "step": 3269 }, { "epoch": 0.2615947681046379, "grad_norm": 0.3240602565368453, "learning_rate": 9.7361148420045e-06, "loss": 0.2777, "step": 3270 }, { "epoch": 0.2616747665046699, "grad_norm": 0.31730705942922377, "learning_rate": 9.735907149143695e-06, "loss": 0.2688, "step": 3271 }, { "epoch": 0.2617547649047019, "grad_norm": 0.2941215203614757, "learning_rate": 9.73569937679894e-06, "loss": 0.3002, "step": 3272 }, { "epoch": 0.26183476330473393, "grad_norm": 0.31247364950108886, "learning_rate": 9.735491524973723e-06, "loss": 0.2729, "step": 3273 }, { "epoch": 0.2619147617047659, "grad_norm": 0.2862502786293765, "learning_rate": 9.73528359367153e-06, "loss": 0.298, "step": 3274 }, { "epoch": 0.2619947601047979, "grad_norm": 0.29113964497483635, "learning_rate": 9.735075582895856e-06, "loss": 0.3006, "step": 3275 }, { "epoch": 0.2620747585048299, "grad_norm": 0.3021742418006258, "learning_rate": 9.734867492650187e-06, "loss": 0.2571, "step": 3276 }, { "epoch": 0.2621547569048619, "grad_norm": 0.3244099191976601, "learning_rate": 9.734659322938018e-06, "loss": 0.3033, "step": 3277 }, { "epoch": 0.26223475530489393, "grad_norm": 0.2394255828914602, "learning_rate": 9.734451073762843e-06, "loss": 0.3787, "step": 3278 }, { "epoch": 0.2623147537049259, "grad_norm": 0.3038020855181389, "learning_rate": 9.734242745128156e-06, "loss": 0.2949, "step": 3279 }, { "epoch": 0.2623947521049579, "grad_norm": 0.25954194895489024, "learning_rate": 9.734034337037452e-06, "loss": 0.3314, "step": 3280 }, { "epoch": 0.2624747505049899, "grad_norm": 0.26239388442958606, "learning_rate": 9.733825849494232e-06, "loss": 0.3346, "step": 3281 }, { "epoch": 0.2625547489050219, "grad_norm": 0.3672997475674531, "learning_rate": 9.733617282501994e-06, "loss": 0.2895, "step": 3282 }, { "epoch": 0.2626347473050539, "grad_norm": 0.3249400349568183, "learning_rate": 9.733408636064236e-06, "loss": 0.2603, "step": 3283 }, { "epoch": 0.2627147457050859, "grad_norm": 0.3275285268875061, "learning_rate": 9.733199910184464e-06, "loss": 0.2615, "step": 3284 }, { "epoch": 0.2627947441051179, "grad_norm": 0.35728076425360167, "learning_rate": 9.732991104866179e-06, "loss": 0.2599, "step": 3285 }, { "epoch": 0.2628747425051499, "grad_norm": 0.3243932245599651, "learning_rate": 9.732782220112884e-06, "loss": 0.3011, "step": 3286 }, { "epoch": 0.2629547409051819, "grad_norm": 0.25779972397750855, "learning_rate": 9.732573255928086e-06, "loss": 0.3318, "step": 3287 }, { "epoch": 0.26303473930521387, "grad_norm": 0.39904677150893636, "learning_rate": 9.732364212315293e-06, "loss": 0.2593, "step": 3288 }, { "epoch": 0.2631147377052459, "grad_norm": 0.2852369885953849, "learning_rate": 9.732155089278013e-06, "loss": 0.3126, "step": 3289 }, { "epoch": 0.2631947361052779, "grad_norm": 0.31633298247727804, "learning_rate": 9.731945886819756e-06, "loss": 0.2659, "step": 3290 }, { "epoch": 0.2632747345053099, "grad_norm": 0.32495253968216375, "learning_rate": 9.731736604944031e-06, "loss": 0.2863, "step": 3291 }, { "epoch": 0.2633547329053419, "grad_norm": 0.2887882074764957, "learning_rate": 9.731527243654352e-06, "loss": 0.2771, "step": 3292 }, { "epoch": 0.26343473130537387, "grad_norm": 0.2987400485853798, "learning_rate": 9.731317802954233e-06, "loss": 0.29, "step": 3293 }, { "epoch": 0.2635147297054059, "grad_norm": 0.19954308000688015, "learning_rate": 9.731108282847189e-06, "loss": 0.3381, "step": 3294 }, { "epoch": 0.2635947281054379, "grad_norm": 0.2874608593695783, "learning_rate": 9.730898683336735e-06, "loss": 0.2999, "step": 3295 }, { "epoch": 0.2636747265054699, "grad_norm": 0.3511455185501061, "learning_rate": 9.730689004426392e-06, "loss": 0.249, "step": 3296 }, { "epoch": 0.2637547249055019, "grad_norm": 0.29820350159924747, "learning_rate": 9.730479246119677e-06, "loss": 0.2955, "step": 3297 }, { "epoch": 0.26383472330553387, "grad_norm": 0.3158625618793012, "learning_rate": 9.73026940842011e-06, "loss": 0.2737, "step": 3298 }, { "epoch": 0.2639147217055659, "grad_norm": 0.37134953695820383, "learning_rate": 9.730059491331214e-06, "loss": 0.2896, "step": 3299 }, { "epoch": 0.2639947201055979, "grad_norm": 0.3056288199349463, "learning_rate": 9.72984949485651e-06, "loss": 0.2691, "step": 3300 }, { "epoch": 0.2640747185056299, "grad_norm": 0.27795776233097025, "learning_rate": 9.729639418999524e-06, "loss": 0.3042, "step": 3301 }, { "epoch": 0.2641547169056619, "grad_norm": 0.3495656334955742, "learning_rate": 9.729429263763781e-06, "loss": 0.2512, "step": 3302 }, { "epoch": 0.26423471530569387, "grad_norm": 0.3145251999706438, "learning_rate": 9.72921902915281e-06, "loss": 0.2573, "step": 3303 }, { "epoch": 0.2643147137057259, "grad_norm": 0.23899468241463667, "learning_rate": 9.729008715170137e-06, "loss": 0.3523, "step": 3304 }, { "epoch": 0.2643947121057579, "grad_norm": 0.3450568362481362, "learning_rate": 9.728798321819294e-06, "loss": 0.2934, "step": 3305 }, { "epoch": 0.2644747105057899, "grad_norm": 0.32223327205109004, "learning_rate": 9.72858784910381e-06, "loss": 0.2886, "step": 3306 }, { "epoch": 0.2645547089058219, "grad_norm": 0.3223086249263875, "learning_rate": 9.728377297027218e-06, "loss": 0.2516, "step": 3307 }, { "epoch": 0.26463470730585387, "grad_norm": 0.2866324183886891, "learning_rate": 9.728166665593053e-06, "loss": 0.3006, "step": 3308 }, { "epoch": 0.2647147057058859, "grad_norm": 0.24527495178358014, "learning_rate": 9.727955954804848e-06, "loss": 0.3525, "step": 3309 }, { "epoch": 0.2647947041059179, "grad_norm": 0.20786093537727424, "learning_rate": 9.727745164666142e-06, "loss": 0.3362, "step": 3310 }, { "epoch": 0.26487470250594985, "grad_norm": 0.3472425808348966, "learning_rate": 9.727534295180471e-06, "loss": 0.2604, "step": 3311 }, { "epoch": 0.2649547009059819, "grad_norm": 0.3270131390312293, "learning_rate": 9.727323346351374e-06, "loss": 0.2616, "step": 3312 }, { "epoch": 0.26503469930601387, "grad_norm": 0.2725110248153651, "learning_rate": 9.727112318182392e-06, "loss": 0.3448, "step": 3313 }, { "epoch": 0.2651146977060459, "grad_norm": 0.4115174831327253, "learning_rate": 9.726901210677066e-06, "loss": 0.2647, "step": 3314 }, { "epoch": 0.2651946961060779, "grad_norm": 0.2826592282726971, "learning_rate": 9.72669002383894e-06, "loss": 0.3028, "step": 3315 }, { "epoch": 0.26527469450610985, "grad_norm": 0.3365942294289492, "learning_rate": 9.72647875767156e-06, "loss": 0.2884, "step": 3316 }, { "epoch": 0.2653546929061419, "grad_norm": 0.28974386117515627, "learning_rate": 9.726267412178467e-06, "loss": 0.2839, "step": 3317 }, { "epoch": 0.26543469130617386, "grad_norm": 0.2969030564574937, "learning_rate": 9.726055987363212e-06, "loss": 0.2713, "step": 3318 }, { "epoch": 0.2655146897062059, "grad_norm": 0.31580232381748147, "learning_rate": 9.725844483229342e-06, "loss": 0.2748, "step": 3319 }, { "epoch": 0.2655946881062379, "grad_norm": 0.28374228199959434, "learning_rate": 9.725632899780406e-06, "loss": 0.2951, "step": 3320 }, { "epoch": 0.26567468650626985, "grad_norm": 0.32877307915009857, "learning_rate": 9.725421237019957e-06, "loss": 0.3004, "step": 3321 }, { "epoch": 0.2657546849063019, "grad_norm": 0.25142999068398847, "learning_rate": 9.725209494951547e-06, "loss": 0.3092, "step": 3322 }, { "epoch": 0.26583468330633386, "grad_norm": 0.3464101290305094, "learning_rate": 9.724997673578727e-06, "loss": 0.2976, "step": 3323 }, { "epoch": 0.2659146817063659, "grad_norm": 0.24850765856760965, "learning_rate": 9.724785772905057e-06, "loss": 0.3281, "step": 3324 }, { "epoch": 0.2659946801063979, "grad_norm": 0.2970398200846788, "learning_rate": 9.724573792934089e-06, "loss": 0.2882, "step": 3325 }, { "epoch": 0.26607467850642985, "grad_norm": 0.21853837980171653, "learning_rate": 9.724361733669383e-06, "loss": 0.357, "step": 3326 }, { "epoch": 0.2661546769064619, "grad_norm": 0.44497950736512093, "learning_rate": 9.724149595114496e-06, "loss": 0.249, "step": 3327 }, { "epoch": 0.26623467530649386, "grad_norm": 0.27895155376721537, "learning_rate": 9.723937377272989e-06, "loss": 0.3032, "step": 3328 }, { "epoch": 0.2663146737065259, "grad_norm": 0.2739129974646797, "learning_rate": 9.723725080148426e-06, "loss": 0.2957, "step": 3329 }, { "epoch": 0.2663946721065579, "grad_norm": 0.297392650414487, "learning_rate": 9.723512703744369e-06, "loss": 0.2627, "step": 3330 }, { "epoch": 0.26647467050658985, "grad_norm": 0.41813483304230814, "learning_rate": 9.72330024806438e-06, "loss": 0.2982, "step": 3331 }, { "epoch": 0.2665546689066219, "grad_norm": 0.28240711448204286, "learning_rate": 9.723087713112027e-06, "loss": 0.3125, "step": 3332 }, { "epoch": 0.26663466730665386, "grad_norm": 0.278401573935145, "learning_rate": 9.722875098890878e-06, "loss": 0.3001, "step": 3333 }, { "epoch": 0.2667146657066859, "grad_norm": 0.30625688373713295, "learning_rate": 9.722662405404499e-06, "loss": 0.2993, "step": 3334 }, { "epoch": 0.26679466410671787, "grad_norm": 0.29653441867906993, "learning_rate": 9.72244963265646e-06, "loss": 0.276, "step": 3335 }, { "epoch": 0.26687466250674985, "grad_norm": 0.30711738345171774, "learning_rate": 9.722236780650333e-06, "loss": 0.2703, "step": 3336 }, { "epoch": 0.2669546609067819, "grad_norm": 0.3051518138513531, "learning_rate": 9.72202384938969e-06, "loss": 0.2599, "step": 3337 }, { "epoch": 0.26703465930681386, "grad_norm": 0.26865016629155475, "learning_rate": 9.721810838878105e-06, "loss": 0.288, "step": 3338 }, { "epoch": 0.26711465770684584, "grad_norm": 0.32100437801558435, "learning_rate": 9.721597749119151e-06, "loss": 0.2618, "step": 3339 }, { "epoch": 0.26719465610687787, "grad_norm": 0.2742990030426426, "learning_rate": 9.721384580116409e-06, "loss": 0.3327, "step": 3340 }, { "epoch": 0.26727465450690985, "grad_norm": 0.250498245779253, "learning_rate": 9.721171331873452e-06, "loss": 0.3222, "step": 3341 }, { "epoch": 0.2673546529069419, "grad_norm": 0.290789182304481, "learning_rate": 9.720958004393859e-06, "loss": 0.3059, "step": 3342 }, { "epoch": 0.26743465130697386, "grad_norm": 0.25820050014333923, "learning_rate": 9.720744597681213e-06, "loss": 0.2884, "step": 3343 }, { "epoch": 0.26751464970700584, "grad_norm": 0.33825639371484695, "learning_rate": 9.720531111739095e-06, "loss": 0.2566, "step": 3344 }, { "epoch": 0.26759464810703787, "grad_norm": 1.2888987200921869, "learning_rate": 9.720317546571088e-06, "loss": 0.25, "step": 3345 }, { "epoch": 0.26767464650706985, "grad_norm": 0.28241611393234, "learning_rate": 9.720103902180776e-06, "loss": 0.3097, "step": 3346 }, { "epoch": 0.2677546449071019, "grad_norm": 0.2824841646686313, "learning_rate": 9.719890178571744e-06, "loss": 0.3105, "step": 3347 }, { "epoch": 0.26783464330713386, "grad_norm": 0.29996986732106473, "learning_rate": 9.71967637574758e-06, "loss": 0.2492, "step": 3348 }, { "epoch": 0.26791464170716583, "grad_norm": 0.3177365184043487, "learning_rate": 9.719462493711873e-06, "loss": 0.2867, "step": 3349 }, { "epoch": 0.26799464010719787, "grad_norm": 0.24767417615348608, "learning_rate": 9.719248532468209e-06, "loss": 0.313, "step": 3350 }, { "epoch": 0.26807463850722985, "grad_norm": 0.3259646254900526, "learning_rate": 9.719034492020183e-06, "loss": 0.2646, "step": 3351 }, { "epoch": 0.2681546369072619, "grad_norm": 0.2727055541190775, "learning_rate": 9.718820372371385e-06, "loss": 0.3003, "step": 3352 }, { "epoch": 0.26823463530729386, "grad_norm": 0.3111205801927099, "learning_rate": 9.718606173525411e-06, "loss": 0.2741, "step": 3353 }, { "epoch": 0.26831463370732583, "grad_norm": 0.3043032031903069, "learning_rate": 9.718391895485853e-06, "loss": 0.2728, "step": 3354 }, { "epoch": 0.26839463210735787, "grad_norm": 0.2486021372953984, "learning_rate": 9.718177538256309e-06, "loss": 0.3073, "step": 3355 }, { "epoch": 0.26847463050738984, "grad_norm": 0.24096639141712556, "learning_rate": 9.717963101840375e-06, "loss": 0.3225, "step": 3356 }, { "epoch": 0.2685546289074219, "grad_norm": 0.3120002122875939, "learning_rate": 9.717748586241653e-06, "loss": 0.2758, "step": 3357 }, { "epoch": 0.26863462730745385, "grad_norm": 0.28756894606994393, "learning_rate": 9.717533991463742e-06, "loss": 0.3028, "step": 3358 }, { "epoch": 0.26871462570748583, "grad_norm": 0.32677744244938034, "learning_rate": 9.71731931751024e-06, "loss": 0.2708, "step": 3359 }, { "epoch": 0.26879462410751787, "grad_norm": 0.2940324280410806, "learning_rate": 9.717104564384756e-06, "loss": 0.2724, "step": 3360 }, { "epoch": 0.26887462250754984, "grad_norm": 0.3815781685218439, "learning_rate": 9.71688973209089e-06, "loss": 0.2579, "step": 3361 }, { "epoch": 0.2689546209075818, "grad_norm": 0.3052222603650581, "learning_rate": 9.716674820632248e-06, "loss": 0.2879, "step": 3362 }, { "epoch": 0.26903461930761385, "grad_norm": 0.2701807211767095, "learning_rate": 9.716459830012439e-06, "loss": 0.2969, "step": 3363 }, { "epoch": 0.26911461770764583, "grad_norm": 0.2934183183848906, "learning_rate": 9.71624476023507e-06, "loss": 0.3023, "step": 3364 }, { "epoch": 0.26919461610767786, "grad_norm": 0.3043936713373222, "learning_rate": 9.71602961130375e-06, "loss": 0.2504, "step": 3365 }, { "epoch": 0.26927461450770984, "grad_norm": 0.2836284709611108, "learning_rate": 9.71581438322209e-06, "loss": 0.2908, "step": 3366 }, { "epoch": 0.2693546129077418, "grad_norm": 0.29046907736885885, "learning_rate": 9.715599075993705e-06, "loss": 0.285, "step": 3367 }, { "epoch": 0.26943461130777385, "grad_norm": 0.3377959964629928, "learning_rate": 9.715383689622205e-06, "loss": 0.2779, "step": 3368 }, { "epoch": 0.26951460970780583, "grad_norm": 0.3293983847601966, "learning_rate": 9.715168224111205e-06, "loss": 0.274, "step": 3369 }, { "epoch": 0.26959460810783786, "grad_norm": 0.28717511577037863, "learning_rate": 9.714952679464324e-06, "loss": 0.2818, "step": 3370 }, { "epoch": 0.26967460650786984, "grad_norm": 0.27764524530597867, "learning_rate": 9.714737055685176e-06, "loss": 0.292, "step": 3371 }, { "epoch": 0.2697546049079018, "grad_norm": 0.3114071421627263, "learning_rate": 9.714521352777383e-06, "loss": 0.2627, "step": 3372 }, { "epoch": 0.26983460330793385, "grad_norm": 0.2723969094675828, "learning_rate": 9.714305570744564e-06, "loss": 0.2963, "step": 3373 }, { "epoch": 0.26991460170796583, "grad_norm": 0.31989046544946853, "learning_rate": 9.71408970959034e-06, "loss": 0.2511, "step": 3374 }, { "epoch": 0.26999460010799786, "grad_norm": 0.1954346841357914, "learning_rate": 9.713873769318333e-06, "loss": 0.3505, "step": 3375 }, { "epoch": 0.27007459850802984, "grad_norm": 0.2672554019239106, "learning_rate": 9.713657749932172e-06, "loss": 0.3058, "step": 3376 }, { "epoch": 0.2701545969080618, "grad_norm": 0.263092771566001, "learning_rate": 9.713441651435477e-06, "loss": 0.3082, "step": 3377 }, { "epoch": 0.27023459530809385, "grad_norm": 0.2806704501647788, "learning_rate": 9.713225473831878e-06, "loss": 0.2928, "step": 3378 }, { "epoch": 0.2703145937081258, "grad_norm": 0.33940064480119825, "learning_rate": 9.713009217125e-06, "loss": 0.3054, "step": 3379 }, { "epoch": 0.27039459210815786, "grad_norm": 0.2856751494725099, "learning_rate": 9.712792881318475e-06, "loss": 0.3352, "step": 3380 }, { "epoch": 0.27047459050818984, "grad_norm": 0.24291882245532978, "learning_rate": 9.712576466415935e-06, "loss": 0.369, "step": 3381 }, { "epoch": 0.2705545889082218, "grad_norm": 0.31407908794926154, "learning_rate": 9.712359972421008e-06, "loss": 0.3012, "step": 3382 }, { "epoch": 0.27063458730825385, "grad_norm": 0.3380004327703376, "learning_rate": 9.712143399337333e-06, "loss": 0.2626, "step": 3383 }, { "epoch": 0.2707145857082858, "grad_norm": 0.3013170073120938, "learning_rate": 9.711926747168539e-06, "loss": 0.2743, "step": 3384 }, { "epoch": 0.27079458410831786, "grad_norm": 0.3300293994939693, "learning_rate": 9.711710015918266e-06, "loss": 0.2603, "step": 3385 }, { "epoch": 0.27087458250834984, "grad_norm": 0.3258324076830785, "learning_rate": 9.71149320559015e-06, "loss": 0.2594, "step": 3386 }, { "epoch": 0.2709545809083818, "grad_norm": 0.32567848907553143, "learning_rate": 9.71127631618783e-06, "loss": 0.2612, "step": 3387 }, { "epoch": 0.27103457930841385, "grad_norm": 0.3040966604903849, "learning_rate": 9.711059347714947e-06, "loss": 0.274, "step": 3388 }, { "epoch": 0.2711145777084458, "grad_norm": 0.3542670554711023, "learning_rate": 9.710842300175141e-06, "loss": 0.276, "step": 3389 }, { "epoch": 0.2711945761084778, "grad_norm": 0.29618113692179054, "learning_rate": 9.710625173572057e-06, "loss": 0.2925, "step": 3390 }, { "epoch": 0.27127457450850984, "grad_norm": 0.2795797016294275, "learning_rate": 9.710407967909336e-06, "loss": 0.2974, "step": 3391 }, { "epoch": 0.2713545729085418, "grad_norm": 0.2865469162148655, "learning_rate": 9.710190683190626e-06, "loss": 0.286, "step": 3392 }, { "epoch": 0.27143457130857385, "grad_norm": 0.3150370508779848, "learning_rate": 9.709973319419572e-06, "loss": 0.2857, "step": 3393 }, { "epoch": 0.2715145697086058, "grad_norm": 0.3224244835097185, "learning_rate": 9.709755876599822e-06, "loss": 0.2887, "step": 3394 }, { "epoch": 0.2715945681086378, "grad_norm": 0.2931722640292034, "learning_rate": 9.709538354735026e-06, "loss": 0.2779, "step": 3395 }, { "epoch": 0.27167456650866983, "grad_norm": 0.3091348559939679, "learning_rate": 9.709320753828837e-06, "loss": 0.2879, "step": 3396 }, { "epoch": 0.2717545649087018, "grad_norm": 0.33430000686222283, "learning_rate": 9.709103073884905e-06, "loss": 0.2657, "step": 3397 }, { "epoch": 0.27183456330873385, "grad_norm": 0.26577951180156834, "learning_rate": 9.708885314906882e-06, "loss": 0.3432, "step": 3398 }, { "epoch": 0.2719145617087658, "grad_norm": 0.3290026370917659, "learning_rate": 9.708667476898423e-06, "loss": 0.2842, "step": 3399 }, { "epoch": 0.2719945601087978, "grad_norm": 0.2644554855859131, "learning_rate": 9.708449559863187e-06, "loss": 0.3232, "step": 3400 }, { "epoch": 0.27207455850882983, "grad_norm": 0.27777301703914026, "learning_rate": 9.708231563804828e-06, "loss": 0.3071, "step": 3401 }, { "epoch": 0.2721545569088618, "grad_norm": 0.3547900861605703, "learning_rate": 9.708013488727006e-06, "loss": 0.2784, "step": 3402 }, { "epoch": 0.27223455530889384, "grad_norm": 0.2618491764283487, "learning_rate": 9.707795334633383e-06, "loss": 0.2938, "step": 3403 }, { "epoch": 0.2723145537089258, "grad_norm": 0.3366615249883749, "learning_rate": 9.707577101527616e-06, "loss": 0.2787, "step": 3404 }, { "epoch": 0.2723945521089578, "grad_norm": 0.32633388071026886, "learning_rate": 9.707358789413373e-06, "loss": 0.2755, "step": 3405 }, { "epoch": 0.27247455050898983, "grad_norm": 0.2714848638949016, "learning_rate": 9.707140398294313e-06, "loss": 0.2816, "step": 3406 }, { "epoch": 0.2725545489090218, "grad_norm": 0.2762041495719878, "learning_rate": 9.706921928174105e-06, "loss": 0.2721, "step": 3407 }, { "epoch": 0.27263454730905384, "grad_norm": 0.26482309361463785, "learning_rate": 9.706703379056412e-06, "loss": 0.3166, "step": 3408 }, { "epoch": 0.2727145457090858, "grad_norm": 0.32858530451554446, "learning_rate": 9.706484750944905e-06, "loss": 0.2631, "step": 3409 }, { "epoch": 0.2727945441091178, "grad_norm": 0.31745761189708305, "learning_rate": 9.706266043843253e-06, "loss": 0.2863, "step": 3410 }, { "epoch": 0.27287454250914983, "grad_norm": 0.30593428584074167, "learning_rate": 9.706047257755124e-06, "loss": 0.2885, "step": 3411 }, { "epoch": 0.2729545409091818, "grad_norm": 0.2735380321845854, "learning_rate": 9.705828392684194e-06, "loss": 0.3356, "step": 3412 }, { "epoch": 0.27303453930921384, "grad_norm": 0.2615612160715219, "learning_rate": 9.705609448634133e-06, "loss": 0.323, "step": 3413 }, { "epoch": 0.2731145377092458, "grad_norm": 0.2982344390512166, "learning_rate": 9.705390425608617e-06, "loss": 0.2881, "step": 3414 }, { "epoch": 0.2731945361092778, "grad_norm": 0.3071638640440055, "learning_rate": 9.705171323611322e-06, "loss": 0.2992, "step": 3415 }, { "epoch": 0.27327453450930983, "grad_norm": 0.3447159883085105, "learning_rate": 9.704952142645925e-06, "loss": 0.309, "step": 3416 }, { "epoch": 0.2733545329093418, "grad_norm": 0.2774190670158889, "learning_rate": 9.704732882716104e-06, "loss": 0.3167, "step": 3417 }, { "epoch": 0.2734345313093738, "grad_norm": 0.3022004072712756, "learning_rate": 9.70451354382554e-06, "loss": 0.2895, "step": 3418 }, { "epoch": 0.2735145297094058, "grad_norm": 0.27718848587746664, "learning_rate": 9.704294125977912e-06, "loss": 0.3116, "step": 3419 }, { "epoch": 0.2735945281094378, "grad_norm": 0.2865912302171865, "learning_rate": 9.704074629176905e-06, "loss": 0.3117, "step": 3420 }, { "epoch": 0.27367452650946983, "grad_norm": 0.28617222537538883, "learning_rate": 9.703855053426202e-06, "loss": 0.3015, "step": 3421 }, { "epoch": 0.2737545249095018, "grad_norm": 0.2477933144543708, "learning_rate": 9.70363539872949e-06, "loss": 0.3238, "step": 3422 }, { "epoch": 0.2738345233095338, "grad_norm": 0.2652507433983257, "learning_rate": 9.703415665090452e-06, "loss": 0.3303, "step": 3423 }, { "epoch": 0.2739145217095658, "grad_norm": 0.2037894228535896, "learning_rate": 9.703195852512776e-06, "loss": 0.3504, "step": 3424 }, { "epoch": 0.2739945201095978, "grad_norm": 0.30790354644645107, "learning_rate": 9.702975961000155e-06, "loss": 0.2921, "step": 3425 }, { "epoch": 0.2740745185096298, "grad_norm": 0.3165170959258859, "learning_rate": 9.702755990556277e-06, "loss": 0.2727, "step": 3426 }, { "epoch": 0.2741545169096618, "grad_norm": 0.2970940593119044, "learning_rate": 9.702535941184833e-06, "loss": 0.3008, "step": 3427 }, { "epoch": 0.2742345153096938, "grad_norm": 0.30707267989733694, "learning_rate": 9.702315812889518e-06, "loss": 0.266, "step": 3428 }, { "epoch": 0.2743145137097258, "grad_norm": 0.32388109721849806, "learning_rate": 9.702095605674027e-06, "loss": 0.2575, "step": 3429 }, { "epoch": 0.2743945121097578, "grad_norm": 0.25905050208092584, "learning_rate": 9.701875319542052e-06, "loss": 0.3192, "step": 3430 }, { "epoch": 0.2744745105097898, "grad_norm": 0.31540322714569075, "learning_rate": 9.701654954497294e-06, "loss": 0.2647, "step": 3431 }, { "epoch": 0.2745545089098218, "grad_norm": 0.26679652232924855, "learning_rate": 9.70143451054345e-06, "loss": 0.3259, "step": 3432 }, { "epoch": 0.2746345073098538, "grad_norm": 0.31837083310889264, "learning_rate": 9.70121398768422e-06, "loss": 0.294, "step": 3433 }, { "epoch": 0.2747145057098858, "grad_norm": 0.3538398625734197, "learning_rate": 9.700993385923303e-06, "loss": 0.2652, "step": 3434 }, { "epoch": 0.2747945041099178, "grad_norm": 0.32443731258814784, "learning_rate": 9.700772705264405e-06, "loss": 0.2759, "step": 3435 }, { "epoch": 0.2748745025099498, "grad_norm": 0.3274663079846949, "learning_rate": 9.700551945711228e-06, "loss": 0.2682, "step": 3436 }, { "epoch": 0.2749545009099818, "grad_norm": 0.3332624411008144, "learning_rate": 9.700331107267477e-06, "loss": 0.2738, "step": 3437 }, { "epoch": 0.2750344993100138, "grad_norm": 0.3469697981360618, "learning_rate": 9.700110189936858e-06, "loss": 0.2525, "step": 3438 }, { "epoch": 0.2751144977100458, "grad_norm": 0.3185481723066586, "learning_rate": 9.69988919372308e-06, "loss": 0.2637, "step": 3439 }, { "epoch": 0.2751944961100778, "grad_norm": 0.3255951733404259, "learning_rate": 9.69966811862985e-06, "loss": 0.2465, "step": 3440 }, { "epoch": 0.2752744945101098, "grad_norm": 0.3139263980423136, "learning_rate": 9.699446964660882e-06, "loss": 0.2565, "step": 3441 }, { "epoch": 0.2753544929101418, "grad_norm": 0.2916452937014567, "learning_rate": 9.699225731819884e-06, "loss": 0.3051, "step": 3442 }, { "epoch": 0.2754344913101738, "grad_norm": 0.2924444536331352, "learning_rate": 9.69900442011057e-06, "loss": 0.3008, "step": 3443 }, { "epoch": 0.2755144897102058, "grad_norm": 0.2970619984898718, "learning_rate": 9.698783029536653e-06, "loss": 0.2891, "step": 3444 }, { "epoch": 0.2755944881102378, "grad_norm": 0.25925161108317984, "learning_rate": 9.698561560101853e-06, "loss": 0.29, "step": 3445 }, { "epoch": 0.27567448651026977, "grad_norm": 0.3522522008529718, "learning_rate": 9.698340011809883e-06, "loss": 0.2787, "step": 3446 }, { "epoch": 0.2757544849103018, "grad_norm": 0.25942200608700616, "learning_rate": 9.698118384664464e-06, "loss": 0.3372, "step": 3447 }, { "epoch": 0.2758344833103338, "grad_norm": 0.3236105055403877, "learning_rate": 9.697896678669313e-06, "loss": 0.2847, "step": 3448 }, { "epoch": 0.2759144817103658, "grad_norm": 0.28259322706556816, "learning_rate": 9.69767489382815e-06, "loss": 0.3081, "step": 3449 }, { "epoch": 0.2759944801103978, "grad_norm": 0.320447191303649, "learning_rate": 9.697453030144703e-06, "loss": 0.2741, "step": 3450 }, { "epoch": 0.27607447851042977, "grad_norm": 0.31942562013903714, "learning_rate": 9.697231087622691e-06, "loss": 0.2648, "step": 3451 }, { "epoch": 0.2761544769104618, "grad_norm": 0.3069206549259995, "learning_rate": 9.697009066265839e-06, "loss": 0.258, "step": 3452 }, { "epoch": 0.2762344753104938, "grad_norm": 0.25827490507190504, "learning_rate": 9.696786966077875e-06, "loss": 0.3135, "step": 3453 }, { "epoch": 0.2763144737105258, "grad_norm": 0.3388246551684192, "learning_rate": 9.696564787062526e-06, "loss": 0.2701, "step": 3454 }, { "epoch": 0.2763944721105578, "grad_norm": 0.3485375777713787, "learning_rate": 9.69634252922352e-06, "loss": 0.2746, "step": 3455 }, { "epoch": 0.27647447051058976, "grad_norm": 0.3038528240215262, "learning_rate": 9.696120192564587e-06, "loss": 0.281, "step": 3456 }, { "epoch": 0.2765544689106218, "grad_norm": 0.4416445832446767, "learning_rate": 9.69589777708946e-06, "loss": 0.3302, "step": 3457 }, { "epoch": 0.2766344673106538, "grad_norm": 0.32307712399478633, "learning_rate": 9.695675282801873e-06, "loss": 0.3014, "step": 3458 }, { "epoch": 0.2767144657106858, "grad_norm": 0.27799900034853686, "learning_rate": 9.695452709705555e-06, "loss": 0.2947, "step": 3459 }, { "epoch": 0.2767944641107178, "grad_norm": 0.35006191671828635, "learning_rate": 9.695230057804248e-06, "loss": 0.2663, "step": 3460 }, { "epoch": 0.27687446251074976, "grad_norm": 0.3115638032397091, "learning_rate": 9.695007327101685e-06, "loss": 0.2789, "step": 3461 }, { "epoch": 0.2769544609107818, "grad_norm": 0.29179097980121665, "learning_rate": 9.694784517601604e-06, "loss": 0.2689, "step": 3462 }, { "epoch": 0.2770344593108138, "grad_norm": 0.2953682128747273, "learning_rate": 9.694561629307745e-06, "loss": 0.2828, "step": 3463 }, { "epoch": 0.2771144577108458, "grad_norm": 0.282683785376063, "learning_rate": 9.69433866222385e-06, "loss": 0.2923, "step": 3464 }, { "epoch": 0.2771944561108778, "grad_norm": 0.30253245521849464, "learning_rate": 9.694115616353662e-06, "loss": 0.3065, "step": 3465 }, { "epoch": 0.27727445451090976, "grad_norm": 0.2842160775515158, "learning_rate": 9.693892491700919e-06, "loss": 0.3005, "step": 3466 }, { "epoch": 0.2773544529109418, "grad_norm": 0.31437851175070225, "learning_rate": 9.693669288269371e-06, "loss": 0.2484, "step": 3467 }, { "epoch": 0.2774344513109738, "grad_norm": 0.23104963252187732, "learning_rate": 9.693446006062764e-06, "loss": 0.3149, "step": 3468 }, { "epoch": 0.2775144497110058, "grad_norm": 0.31049297068487686, "learning_rate": 9.69322264508484e-06, "loss": 0.2766, "step": 3469 }, { "epoch": 0.2775944481110378, "grad_norm": 0.2690023524368739, "learning_rate": 9.692999205339356e-06, "loss": 0.2919, "step": 3470 }, { "epoch": 0.27767444651106976, "grad_norm": 0.3029907628210159, "learning_rate": 9.692775686830057e-06, "loss": 0.2853, "step": 3471 }, { "epoch": 0.2777544449111018, "grad_norm": 0.5684404932906133, "learning_rate": 9.692552089560695e-06, "loss": 0.2563, "step": 3472 }, { "epoch": 0.27783444331113377, "grad_norm": 0.3215899309324039, "learning_rate": 9.69232841353502e-06, "loss": 0.2471, "step": 3473 }, { "epoch": 0.27791444171116575, "grad_norm": 0.30719762658838706, "learning_rate": 9.69210465875679e-06, "loss": 0.2554, "step": 3474 }, { "epoch": 0.2779944401111978, "grad_norm": 0.34701367591986115, "learning_rate": 9.69188082522976e-06, "loss": 0.2797, "step": 3475 }, { "epoch": 0.27807443851122976, "grad_norm": 0.3094979214597019, "learning_rate": 9.691656912957686e-06, "loss": 0.2661, "step": 3476 }, { "epoch": 0.2781544369112618, "grad_norm": 0.32863185105908344, "learning_rate": 9.691432921944325e-06, "loss": 0.2876, "step": 3477 }, { "epoch": 0.27823443531129377, "grad_norm": 0.2918550380099812, "learning_rate": 9.691208852193438e-06, "loss": 0.2952, "step": 3478 }, { "epoch": 0.27831443371132575, "grad_norm": 0.28682632021408966, "learning_rate": 9.690984703708783e-06, "loss": 0.3103, "step": 3479 }, { "epoch": 0.2783944321113578, "grad_norm": 0.2876496445255653, "learning_rate": 9.690760476494125e-06, "loss": 0.2878, "step": 3480 }, { "epoch": 0.27847443051138976, "grad_norm": 0.3135698285542876, "learning_rate": 9.690536170553226e-06, "loss": 0.2735, "step": 3481 }, { "epoch": 0.2785544289114218, "grad_norm": 0.2818132190689427, "learning_rate": 9.69031178588985e-06, "loss": 0.2998, "step": 3482 }, { "epoch": 0.27863442731145377, "grad_norm": 0.3712455840706995, "learning_rate": 9.690087322507763e-06, "loss": 0.2601, "step": 3483 }, { "epoch": 0.27871442571148575, "grad_norm": 0.31238838748874187, "learning_rate": 9.689862780410732e-06, "loss": 0.2653, "step": 3484 }, { "epoch": 0.2787944241115178, "grad_norm": 0.38769530488490955, "learning_rate": 9.689638159602527e-06, "loss": 0.2687, "step": 3485 }, { "epoch": 0.27887442251154976, "grad_norm": 0.29039237619758346, "learning_rate": 9.689413460086917e-06, "loss": 0.3193, "step": 3486 }, { "epoch": 0.2789544209115818, "grad_norm": 0.25073839412898447, "learning_rate": 9.689188681867675e-06, "loss": 0.3188, "step": 3487 }, { "epoch": 0.27903441931161377, "grad_norm": 0.335596696242481, "learning_rate": 9.68896382494857e-06, "loss": 0.2699, "step": 3488 }, { "epoch": 0.27911441771164575, "grad_norm": 0.25055872779079974, "learning_rate": 9.688738889333376e-06, "loss": 0.3463, "step": 3489 }, { "epoch": 0.2791944161116778, "grad_norm": 0.3127114679308454, "learning_rate": 9.688513875025871e-06, "loss": 0.2711, "step": 3490 }, { "epoch": 0.27927441451170976, "grad_norm": 0.2880476741957524, "learning_rate": 9.68828878202983e-06, "loss": 0.2955, "step": 3491 }, { "epoch": 0.2793544129117418, "grad_norm": 0.30691679753455314, "learning_rate": 9.688063610349033e-06, "loss": 0.31, "step": 3492 }, { "epoch": 0.27943441131177377, "grad_norm": 0.32814671488186115, "learning_rate": 9.687838359987254e-06, "loss": 0.2386, "step": 3493 }, { "epoch": 0.27951440971180574, "grad_norm": 0.3155816575952546, "learning_rate": 9.687613030948277e-06, "loss": 0.2594, "step": 3494 }, { "epoch": 0.2795944081118378, "grad_norm": 0.32365509684370347, "learning_rate": 9.687387623235885e-06, "loss": 0.2897, "step": 3495 }, { "epoch": 0.27967440651186976, "grad_norm": 0.2854343279497684, "learning_rate": 9.687162136853858e-06, "loss": 0.2776, "step": 3496 }, { "epoch": 0.2797544049119018, "grad_norm": 0.3192117243365602, "learning_rate": 9.686936571805982e-06, "loss": 0.2715, "step": 3497 }, { "epoch": 0.27983440331193377, "grad_norm": 0.3090694960718188, "learning_rate": 9.68671092809604e-06, "loss": 0.2648, "step": 3498 }, { "epoch": 0.27991440171196574, "grad_norm": 0.29365525155090805, "learning_rate": 9.686485205727827e-06, "loss": 0.3172, "step": 3499 }, { "epoch": 0.2799944001119978, "grad_norm": 0.24379337465862244, "learning_rate": 9.686259404705122e-06, "loss": 0.3136, "step": 3500 }, { "epoch": 0.28007439851202975, "grad_norm": 0.26877434204596284, "learning_rate": 9.68603352503172e-06, "loss": 0.3034, "step": 3501 }, { "epoch": 0.28015439691206173, "grad_norm": 0.2727502723213497, "learning_rate": 9.685807566711409e-06, "loss": 0.2848, "step": 3502 }, { "epoch": 0.28023439531209376, "grad_norm": 0.2701070517944016, "learning_rate": 9.685581529747982e-06, "loss": 0.3111, "step": 3503 }, { "epoch": 0.28031439371212574, "grad_norm": 0.37025514028448614, "learning_rate": 9.685355414145237e-06, "loss": 0.2724, "step": 3504 }, { "epoch": 0.2803943921121578, "grad_norm": 0.303195262015166, "learning_rate": 9.685129219906964e-06, "loss": 0.3027, "step": 3505 }, { "epoch": 0.28047439051218975, "grad_norm": 0.28952981722471194, "learning_rate": 9.684902947036959e-06, "loss": 0.2783, "step": 3506 }, { "epoch": 0.28055438891222173, "grad_norm": 0.2885010529664571, "learning_rate": 9.684676595539023e-06, "loss": 0.2969, "step": 3507 }, { "epoch": 0.28063438731225376, "grad_norm": 0.3128208179187677, "learning_rate": 9.684450165416953e-06, "loss": 0.2836, "step": 3508 }, { "epoch": 0.28071438571228574, "grad_norm": 0.27154605730218084, "learning_rate": 9.684223656674548e-06, "loss": 0.2945, "step": 3509 }, { "epoch": 0.2807943841123178, "grad_norm": 0.2615764046320604, "learning_rate": 9.683997069315612e-06, "loss": 0.2998, "step": 3510 }, { "epoch": 0.28087438251234975, "grad_norm": 0.3156322050827389, "learning_rate": 9.683770403343947e-06, "loss": 0.2587, "step": 3511 }, { "epoch": 0.28095438091238173, "grad_norm": 0.3148725487243505, "learning_rate": 9.683543658763357e-06, "loss": 0.2829, "step": 3512 }, { "epoch": 0.28103437931241376, "grad_norm": 0.24727285575114463, "learning_rate": 9.683316835577648e-06, "loss": 0.3314, "step": 3513 }, { "epoch": 0.28111437771244574, "grad_norm": 0.24754777489049318, "learning_rate": 9.683089933790626e-06, "loss": 0.2999, "step": 3514 }, { "epoch": 0.2811943761124778, "grad_norm": 0.24461125398433178, "learning_rate": 9.6828629534061e-06, "loss": 0.3122, "step": 3515 }, { "epoch": 0.28127437451250975, "grad_norm": 0.2716940807741596, "learning_rate": 9.682635894427878e-06, "loss": 0.2864, "step": 3516 }, { "epoch": 0.28135437291254173, "grad_norm": 0.22107850125775544, "learning_rate": 9.682408756859772e-06, "loss": 0.3047, "step": 3517 }, { "epoch": 0.28143437131257376, "grad_norm": 0.2720880829421066, "learning_rate": 9.682181540705596e-06, "loss": 0.2953, "step": 3518 }, { "epoch": 0.28151436971260574, "grad_norm": 0.28302441241508147, "learning_rate": 9.681954245969158e-06, "loss": 0.2865, "step": 3519 }, { "epoch": 0.28159436811263777, "grad_norm": 0.31699394921124563, "learning_rate": 9.681726872654278e-06, "loss": 0.2793, "step": 3520 }, { "epoch": 0.28167436651266975, "grad_norm": 0.311326859843477, "learning_rate": 9.681499420764771e-06, "loss": 0.2939, "step": 3521 }, { "epoch": 0.2817543649127017, "grad_norm": 0.3126531677953092, "learning_rate": 9.681271890304451e-06, "loss": 0.2696, "step": 3522 }, { "epoch": 0.28183436331273376, "grad_norm": 0.28627298391850425, "learning_rate": 9.681044281277141e-06, "loss": 0.3104, "step": 3523 }, { "epoch": 0.28191436171276574, "grad_norm": 0.2869154481880232, "learning_rate": 9.68081659368666e-06, "loss": 0.3117, "step": 3524 }, { "epoch": 0.28199436011279777, "grad_norm": 0.3224264963164287, "learning_rate": 9.680588827536828e-06, "loss": 0.2584, "step": 3525 }, { "epoch": 0.28207435851282975, "grad_norm": 0.32270759340202915, "learning_rate": 9.680360982831467e-06, "loss": 0.2619, "step": 3526 }, { "epoch": 0.2821543569128617, "grad_norm": 0.24948287534558383, "learning_rate": 9.680133059574403e-06, "loss": 0.3379, "step": 3527 }, { "epoch": 0.28223435531289376, "grad_norm": 0.3097735572798732, "learning_rate": 9.67990505776946e-06, "loss": 0.2765, "step": 3528 }, { "epoch": 0.28231435371292574, "grad_norm": 0.28730750520419135, "learning_rate": 9.679676977420467e-06, "loss": 0.3304, "step": 3529 }, { "epoch": 0.2823943521129577, "grad_norm": 0.33353603419496103, "learning_rate": 9.679448818531248e-06, "loss": 0.2599, "step": 3530 }, { "epoch": 0.28247435051298975, "grad_norm": 0.3351463484192178, "learning_rate": 9.679220581105636e-06, "loss": 0.2768, "step": 3531 }, { "epoch": 0.2825543489130217, "grad_norm": 0.26887352570595346, "learning_rate": 9.678992265147458e-06, "loss": 0.2887, "step": 3532 }, { "epoch": 0.28263434731305376, "grad_norm": 0.28827037007858636, "learning_rate": 9.67876387066055e-06, "loss": 0.3126, "step": 3533 }, { "epoch": 0.28271434571308574, "grad_norm": 0.31643090337016383, "learning_rate": 9.678535397648741e-06, "loss": 0.255, "step": 3534 }, { "epoch": 0.2827943441131177, "grad_norm": 0.24295894839082222, "learning_rate": 9.67830684611587e-06, "loss": 0.3208, "step": 3535 }, { "epoch": 0.28287434251314975, "grad_norm": 0.2630339611720179, "learning_rate": 9.678078216065766e-06, "loss": 0.3218, "step": 3536 }, { "epoch": 0.2829543409131817, "grad_norm": 0.24506561646479622, "learning_rate": 9.677849507502275e-06, "loss": 0.3143, "step": 3537 }, { "epoch": 0.28303433931321376, "grad_norm": 0.28856380505982, "learning_rate": 9.67762072042923e-06, "loss": 0.307, "step": 3538 }, { "epoch": 0.28311433771324573, "grad_norm": 0.25562677580581983, "learning_rate": 9.67739185485047e-06, "loss": 0.291, "step": 3539 }, { "epoch": 0.2831943361132777, "grad_norm": 0.26830088274597796, "learning_rate": 9.67716291076984e-06, "loss": 0.294, "step": 3540 }, { "epoch": 0.28327433451330974, "grad_norm": 0.24875984164012233, "learning_rate": 9.676933888191178e-06, "loss": 0.3325, "step": 3541 }, { "epoch": 0.2833543329133417, "grad_norm": 0.3443339217621843, "learning_rate": 9.676704787118332e-06, "loss": 0.2966, "step": 3542 }, { "epoch": 0.28343433131337376, "grad_norm": 0.31932565675285657, "learning_rate": 9.676475607555145e-06, "loss": 0.2684, "step": 3543 }, { "epoch": 0.28351432971340573, "grad_norm": 0.31487996172819716, "learning_rate": 9.676246349505462e-06, "loss": 0.2553, "step": 3544 }, { "epoch": 0.2835943281134377, "grad_norm": 0.2842247572322971, "learning_rate": 9.676017012973133e-06, "loss": 0.2895, "step": 3545 }, { "epoch": 0.28367432651346974, "grad_norm": 0.22752549253876772, "learning_rate": 9.675787597962007e-06, "loss": 0.3043, "step": 3546 }, { "epoch": 0.2837543249135017, "grad_norm": 0.22142071462468546, "learning_rate": 9.675558104475933e-06, "loss": 0.3076, "step": 3547 }, { "epoch": 0.28383432331353375, "grad_norm": 0.26692368007808837, "learning_rate": 9.675328532518762e-06, "loss": 0.2825, "step": 3548 }, { "epoch": 0.28391432171356573, "grad_norm": 0.27711547576078527, "learning_rate": 9.67509888209435e-06, "loss": 0.3059, "step": 3549 }, { "epoch": 0.2839943201135977, "grad_norm": 0.3127072257642832, "learning_rate": 9.674869153206547e-06, "loss": 0.2873, "step": 3550 }, { "epoch": 0.28407431851362974, "grad_norm": 0.25228187746615616, "learning_rate": 9.674639345859213e-06, "loss": 0.3232, "step": 3551 }, { "epoch": 0.2841543169136617, "grad_norm": 0.37585151941760914, "learning_rate": 9.674409460056204e-06, "loss": 0.2865, "step": 3552 }, { "epoch": 0.28423431531369375, "grad_norm": 0.27624787579470245, "learning_rate": 9.674179495801375e-06, "loss": 0.3154, "step": 3553 }, { "epoch": 0.28431431371372573, "grad_norm": 0.3891993012699676, "learning_rate": 9.673949453098587e-06, "loss": 0.256, "step": 3554 }, { "epoch": 0.2843943121137577, "grad_norm": 0.30425647695869706, "learning_rate": 9.673719331951706e-06, "loss": 0.2573, "step": 3555 }, { "epoch": 0.28447431051378974, "grad_norm": 0.6599545792969306, "learning_rate": 9.673489132364586e-06, "loss": 0.3142, "step": 3556 }, { "epoch": 0.2845543089138217, "grad_norm": 0.2843458805046887, "learning_rate": 9.673258854341094e-06, "loss": 0.3049, "step": 3557 }, { "epoch": 0.2846343073138537, "grad_norm": 0.3040213883530118, "learning_rate": 9.673028497885098e-06, "loss": 0.2661, "step": 3558 }, { "epoch": 0.28471430571388573, "grad_norm": 0.23763033673227574, "learning_rate": 9.672798063000458e-06, "loss": 0.3155, "step": 3559 }, { "epoch": 0.2847943041139177, "grad_norm": 0.278766534900064, "learning_rate": 9.672567549691046e-06, "loss": 0.2967, "step": 3560 }, { "epoch": 0.28487430251394974, "grad_norm": 0.32771764937400233, "learning_rate": 9.67233695796073e-06, "loss": 0.267, "step": 3561 }, { "epoch": 0.2849543009139817, "grad_norm": 0.23054792919045614, "learning_rate": 9.67210628781338e-06, "loss": 0.3232, "step": 3562 }, { "epoch": 0.2850342993140137, "grad_norm": 0.2189192766962145, "learning_rate": 9.671875539252865e-06, "loss": 0.3567, "step": 3563 }, { "epoch": 0.28511429771404573, "grad_norm": 0.28407415174754974, "learning_rate": 9.671644712283061e-06, "loss": 0.2898, "step": 3564 }, { "epoch": 0.2851942961140777, "grad_norm": 0.31229030550406567, "learning_rate": 9.67141380690784e-06, "loss": 0.3007, "step": 3565 }, { "epoch": 0.28527429451410974, "grad_norm": 0.2962903372689868, "learning_rate": 9.671182823131079e-06, "loss": 0.2663, "step": 3566 }, { "epoch": 0.2853542929141417, "grad_norm": 0.3134596181137637, "learning_rate": 9.670951760956653e-06, "loss": 0.2969, "step": 3567 }, { "epoch": 0.2854342913141737, "grad_norm": 0.23928713432477816, "learning_rate": 9.67072062038844e-06, "loss": 0.3334, "step": 3568 }, { "epoch": 0.2855142897142057, "grad_norm": 1.252922272598145, "learning_rate": 9.670489401430322e-06, "loss": 0.2838, "step": 3569 }, { "epoch": 0.2855942881142377, "grad_norm": 0.332364862607897, "learning_rate": 9.670258104086175e-06, "loss": 0.2872, "step": 3570 }, { "epoch": 0.28567428651426974, "grad_norm": 0.29779300854561425, "learning_rate": 9.670026728359884e-06, "loss": 0.2423, "step": 3571 }, { "epoch": 0.2857542849143017, "grad_norm": 0.33394153390700454, "learning_rate": 9.669795274255334e-06, "loss": 0.2568, "step": 3572 }, { "epoch": 0.2858342833143337, "grad_norm": 0.27766390163841304, "learning_rate": 9.669563741776405e-06, "loss": 0.3134, "step": 3573 }, { "epoch": 0.2859142817143657, "grad_norm": 0.40186118190687187, "learning_rate": 9.669332130926985e-06, "loss": 0.2887, "step": 3574 }, { "epoch": 0.2859942801143977, "grad_norm": 0.30936017999011756, "learning_rate": 9.669100441710962e-06, "loss": 0.2738, "step": 3575 }, { "epoch": 0.28607427851442974, "grad_norm": 0.26752992823786176, "learning_rate": 9.668868674132224e-06, "loss": 0.2763, "step": 3576 }, { "epoch": 0.2861542769144617, "grad_norm": 0.2571555542828409, "learning_rate": 9.66863682819466e-06, "loss": 0.3362, "step": 3577 }, { "epoch": 0.2862342753144937, "grad_norm": 0.32160783997009484, "learning_rate": 9.668404903902161e-06, "loss": 0.26, "step": 3578 }, { "epoch": 0.2863142737145257, "grad_norm": 0.32752325193888654, "learning_rate": 9.668172901258623e-06, "loss": 0.2695, "step": 3579 }, { "epoch": 0.2863942721145577, "grad_norm": 0.2777100720642354, "learning_rate": 9.667940820267935e-06, "loss": 0.3039, "step": 3580 }, { "epoch": 0.28647427051458974, "grad_norm": 0.29056125100124036, "learning_rate": 9.667708660933994e-06, "loss": 0.2915, "step": 3581 }, { "epoch": 0.2865542689146217, "grad_norm": 0.29013375715373413, "learning_rate": 9.667476423260696e-06, "loss": 0.2933, "step": 3582 }, { "epoch": 0.2866342673146537, "grad_norm": 0.2755326473346131, "learning_rate": 9.66724410725194e-06, "loss": 0.2812, "step": 3583 }, { "epoch": 0.2867142657146857, "grad_norm": 0.30203473419123206, "learning_rate": 9.667011712911625e-06, "loss": 0.3021, "step": 3584 }, { "epoch": 0.2867942641147177, "grad_norm": 0.264519233834704, "learning_rate": 9.66677924024365e-06, "loss": 0.3277, "step": 3585 }, { "epoch": 0.2868742625147497, "grad_norm": 0.2524821117628419, "learning_rate": 9.666546689251916e-06, "loss": 0.3091, "step": 3586 }, { "epoch": 0.2869542609147817, "grad_norm": 0.26361569298958415, "learning_rate": 9.666314059940326e-06, "loss": 0.2921, "step": 3587 }, { "epoch": 0.2870342593148137, "grad_norm": 0.2709067721793426, "learning_rate": 9.666081352312789e-06, "loss": 0.3068, "step": 3588 }, { "epoch": 0.2871142577148457, "grad_norm": 0.2079724231244705, "learning_rate": 9.665848566373204e-06, "loss": 0.349, "step": 3589 }, { "epoch": 0.2871942561148777, "grad_norm": 0.29602360914847914, "learning_rate": 9.665615702125482e-06, "loss": 0.2944, "step": 3590 }, { "epoch": 0.2872742545149097, "grad_norm": 0.31818458419094364, "learning_rate": 9.665382759573529e-06, "loss": 0.2531, "step": 3591 }, { "epoch": 0.2873542529149417, "grad_norm": 0.31006998694065263, "learning_rate": 9.665149738721258e-06, "loss": 0.3182, "step": 3592 }, { "epoch": 0.2874342513149737, "grad_norm": 0.32636730740493136, "learning_rate": 9.664916639572574e-06, "loss": 0.2575, "step": 3593 }, { "epoch": 0.2875142497150057, "grad_norm": 0.2920081928252954, "learning_rate": 9.664683462131397e-06, "loss": 0.3063, "step": 3594 }, { "epoch": 0.2875942481150377, "grad_norm": 0.2410949850330817, "learning_rate": 9.664450206401633e-06, "loss": 0.3257, "step": 3595 }, { "epoch": 0.2876742465150697, "grad_norm": 0.3174193011491744, "learning_rate": 9.664216872387202e-06, "loss": 0.2481, "step": 3596 }, { "epoch": 0.2877542449151017, "grad_norm": 0.31090327268265827, "learning_rate": 9.663983460092015e-06, "loss": 0.2735, "step": 3597 }, { "epoch": 0.2878342433151337, "grad_norm": 0.2439558762274201, "learning_rate": 9.663749969519994e-06, "loss": 0.3264, "step": 3598 }, { "epoch": 0.2879142417151657, "grad_norm": 0.2514240283033212, "learning_rate": 9.663516400675057e-06, "loss": 0.3212, "step": 3599 }, { "epoch": 0.2879942401151977, "grad_norm": 0.2835896842328783, "learning_rate": 9.663282753561124e-06, "loss": 0.2974, "step": 3600 }, { "epoch": 0.2880742385152297, "grad_norm": 0.33940816915779104, "learning_rate": 9.663049028182112e-06, "loss": 0.2654, "step": 3601 }, { "epoch": 0.2881542369152617, "grad_norm": 0.20482419056241102, "learning_rate": 9.662815224541949e-06, "loss": 0.3449, "step": 3602 }, { "epoch": 0.2882342353152937, "grad_norm": 0.30337850902951247, "learning_rate": 9.662581342644557e-06, "loss": 0.2593, "step": 3603 }, { "epoch": 0.2883142337153257, "grad_norm": 0.20427567936580654, "learning_rate": 9.662347382493863e-06, "loss": 0.3571, "step": 3604 }, { "epoch": 0.2883942321153577, "grad_norm": 0.24484493972005475, "learning_rate": 9.662113344093791e-06, "loss": 0.3632, "step": 3605 }, { "epoch": 0.2884742305153897, "grad_norm": 0.326462097588612, "learning_rate": 9.66187922744827e-06, "loss": 0.3186, "step": 3606 }, { "epoch": 0.2885542289154217, "grad_norm": 0.30081808241951774, "learning_rate": 9.66164503256123e-06, "loss": 0.2966, "step": 3607 }, { "epoch": 0.2886342273154537, "grad_norm": 0.621189559489383, "learning_rate": 9.6614107594366e-06, "loss": 0.3041, "step": 3608 }, { "epoch": 0.2887142257154857, "grad_norm": 0.31585116718843326, "learning_rate": 9.661176408078315e-06, "loss": 0.2667, "step": 3609 }, { "epoch": 0.2887942241155177, "grad_norm": 0.2680006914897545, "learning_rate": 9.660941978490302e-06, "loss": 0.2991, "step": 3610 }, { "epoch": 0.2888742225155497, "grad_norm": 0.36748299932967704, "learning_rate": 9.660707470676503e-06, "loss": 0.2821, "step": 3611 }, { "epoch": 0.2889542209155817, "grad_norm": 0.28997781194531913, "learning_rate": 9.660472884640848e-06, "loss": 0.3005, "step": 3612 }, { "epoch": 0.2890342193156137, "grad_norm": 0.2873148591015218, "learning_rate": 9.660238220387277e-06, "loss": 0.295, "step": 3613 }, { "epoch": 0.28911421771564566, "grad_norm": 0.28616598892234374, "learning_rate": 9.660003477919727e-06, "loss": 0.3054, "step": 3614 }, { "epoch": 0.2891942161156777, "grad_norm": 0.21877521125323413, "learning_rate": 9.659768657242138e-06, "loss": 0.3195, "step": 3615 }, { "epoch": 0.28927421451570967, "grad_norm": 0.31213642762197963, "learning_rate": 9.659533758358455e-06, "loss": 0.2652, "step": 3616 }, { "epoch": 0.2893542129157417, "grad_norm": 0.2527272388147323, "learning_rate": 9.659298781272615e-06, "loss": 0.3004, "step": 3617 }, { "epoch": 0.2894342113157737, "grad_norm": 0.2814768470351549, "learning_rate": 9.659063725988562e-06, "loss": 0.285, "step": 3618 }, { "epoch": 0.28951420971580566, "grad_norm": 0.2251439533656403, "learning_rate": 9.658828592510243e-06, "loss": 0.3665, "step": 3619 }, { "epoch": 0.2895942081158377, "grad_norm": 0.2505596122145966, "learning_rate": 9.658593380841605e-06, "loss": 0.3168, "step": 3620 }, { "epoch": 0.28967420651586967, "grad_norm": 0.25385916125848074, "learning_rate": 9.658358090986594e-06, "loss": 0.3149, "step": 3621 }, { "epoch": 0.2897542049159017, "grad_norm": 0.3014876867758723, "learning_rate": 9.658122722949161e-06, "loss": 0.2886, "step": 3622 }, { "epoch": 0.2898342033159337, "grad_norm": 0.3070731922875036, "learning_rate": 9.657887276733254e-06, "loss": 0.2604, "step": 3623 }, { "epoch": 0.28991420171596566, "grad_norm": 0.26123416921451614, "learning_rate": 9.657651752342824e-06, "loss": 0.3166, "step": 3624 }, { "epoch": 0.2899942001159977, "grad_norm": 0.26274565773351494, "learning_rate": 9.657416149781826e-06, "loss": 0.3297, "step": 3625 }, { "epoch": 0.29007419851602967, "grad_norm": 0.28286791612407075, "learning_rate": 9.657180469054213e-06, "loss": 0.3042, "step": 3626 }, { "epoch": 0.2901541969160617, "grad_norm": 0.2887230967073155, "learning_rate": 9.65694471016394e-06, "loss": 0.2975, "step": 3627 }, { "epoch": 0.2902341953160937, "grad_norm": 0.30982160227213973, "learning_rate": 9.656708873114966e-06, "loss": 0.2551, "step": 3628 }, { "epoch": 0.29031419371612566, "grad_norm": 0.27219807766382315, "learning_rate": 9.656472957911247e-06, "loss": 0.2931, "step": 3629 }, { "epoch": 0.2903941921161577, "grad_norm": 0.27086082481773255, "learning_rate": 9.656236964556742e-06, "loss": 0.3023, "step": 3630 }, { "epoch": 0.29047419051618967, "grad_norm": 0.2618741052120626, "learning_rate": 9.656000893055416e-06, "loss": 0.3144, "step": 3631 }, { "epoch": 0.2905541889162217, "grad_norm": 0.3053854648384801, "learning_rate": 9.655764743411224e-06, "loss": 0.2463, "step": 3632 }, { "epoch": 0.2906341873162537, "grad_norm": 0.2880725367070506, "learning_rate": 9.655528515628136e-06, "loss": 0.3144, "step": 3633 }, { "epoch": 0.29071418571628566, "grad_norm": 0.31600063153583674, "learning_rate": 9.655292209710111e-06, "loss": 0.238, "step": 3634 }, { "epoch": 0.2907941841163177, "grad_norm": 0.3208901107665132, "learning_rate": 9.655055825661122e-06, "loss": 0.2851, "step": 3635 }, { "epoch": 0.29087418251634967, "grad_norm": 0.2997957886432289, "learning_rate": 9.65481936348513e-06, "loss": 0.2484, "step": 3636 }, { "epoch": 0.2909541809163817, "grad_norm": 0.40280525710843185, "learning_rate": 9.654582823186107e-06, "loss": 0.2942, "step": 3637 }, { "epoch": 0.2910341793164137, "grad_norm": 0.2598381322881362, "learning_rate": 9.654346204768019e-06, "loss": 0.3249, "step": 3638 }, { "epoch": 0.29111417771644565, "grad_norm": 0.28273157136828847, "learning_rate": 9.654109508234843e-06, "loss": 0.3037, "step": 3639 }, { "epoch": 0.2911941761164777, "grad_norm": 0.24193152083222313, "learning_rate": 9.653872733590547e-06, "loss": 0.3275, "step": 3640 }, { "epoch": 0.29127417451650967, "grad_norm": 0.2785288485829046, "learning_rate": 9.653635880839107e-06, "loss": 0.2933, "step": 3641 }, { "epoch": 0.29135417291654164, "grad_norm": 0.26234933785429176, "learning_rate": 9.653398949984497e-06, "loss": 0.3037, "step": 3642 }, { "epoch": 0.2914341713165737, "grad_norm": 0.2409530985579634, "learning_rate": 9.653161941030695e-06, "loss": 0.3253, "step": 3643 }, { "epoch": 0.29151416971660565, "grad_norm": 0.3130850214610556, "learning_rate": 9.652924853981676e-06, "loss": 0.2739, "step": 3644 }, { "epoch": 0.2915941681166377, "grad_norm": 0.28051620437122887, "learning_rate": 9.652687688841422e-06, "loss": 0.3178, "step": 3645 }, { "epoch": 0.29167416651666966, "grad_norm": 0.3221847246746687, "learning_rate": 9.652450445613913e-06, "loss": 0.2544, "step": 3646 }, { "epoch": 0.29175416491670164, "grad_norm": 0.3076556519718147, "learning_rate": 9.652213124303126e-06, "loss": 0.2693, "step": 3647 }, { "epoch": 0.2918341633167337, "grad_norm": 0.29864406862355425, "learning_rate": 9.651975724913051e-06, "loss": 0.2941, "step": 3648 }, { "epoch": 0.29191416171676565, "grad_norm": 0.30740397326881647, "learning_rate": 9.65173824744767e-06, "loss": 0.2609, "step": 3649 }, { "epoch": 0.2919941601167977, "grad_norm": 0.2848300563932702, "learning_rate": 9.65150069191097e-06, "loss": 0.2872, "step": 3650 }, { "epoch": 0.29207415851682966, "grad_norm": 0.2909451909650978, "learning_rate": 9.651263058306932e-06, "loss": 0.2878, "step": 3651 }, { "epoch": 0.29215415691686164, "grad_norm": 0.28404880480673894, "learning_rate": 9.65102534663955e-06, "loss": 0.2977, "step": 3652 }, { "epoch": 0.2922341553168937, "grad_norm": 0.2545819949316656, "learning_rate": 9.650787556912811e-06, "loss": 0.3249, "step": 3653 }, { "epoch": 0.29231415371692565, "grad_norm": 0.34201736190712306, "learning_rate": 9.650549689130706e-06, "loss": 0.2861, "step": 3654 }, { "epoch": 0.2923941521169577, "grad_norm": 0.27140677441089944, "learning_rate": 9.650311743297229e-06, "loss": 0.3028, "step": 3655 }, { "epoch": 0.29247415051698966, "grad_norm": 0.31290119711359193, "learning_rate": 9.650073719416374e-06, "loss": 0.2451, "step": 3656 }, { "epoch": 0.29255414891702164, "grad_norm": 0.28758686258621446, "learning_rate": 9.64983561749213e-06, "loss": 0.2871, "step": 3657 }, { "epoch": 0.29263414731705367, "grad_norm": 0.3263971096916948, "learning_rate": 9.6495974375285e-06, "loss": 0.2724, "step": 3658 }, { "epoch": 0.29271414571708565, "grad_norm": 0.31800109214298666, "learning_rate": 9.649359179529477e-06, "loss": 0.256, "step": 3659 }, { "epoch": 0.2927941441171177, "grad_norm": 0.31727300964075655, "learning_rate": 9.649120843499065e-06, "loss": 0.2603, "step": 3660 }, { "epoch": 0.29287414251714966, "grad_norm": 0.3475772808426779, "learning_rate": 9.648882429441258e-06, "loss": 0.2765, "step": 3661 }, { "epoch": 0.29295414091718164, "grad_norm": 0.3154059517955945, "learning_rate": 9.64864393736006e-06, "loss": 0.2514, "step": 3662 }, { "epoch": 0.29303413931721367, "grad_norm": 0.27476871996524005, "learning_rate": 9.648405367259475e-06, "loss": 0.3094, "step": 3663 }, { "epoch": 0.29311413771724565, "grad_norm": 0.328053225591602, "learning_rate": 9.648166719143504e-06, "loss": 0.2433, "step": 3664 }, { "epoch": 0.2931941361172777, "grad_norm": 0.2772424880219433, "learning_rate": 9.647927993016154e-06, "loss": 0.3289, "step": 3665 }, { "epoch": 0.29327413451730966, "grad_norm": 0.2860111505037963, "learning_rate": 9.647689188881431e-06, "loss": 0.2871, "step": 3666 }, { "epoch": 0.29335413291734164, "grad_norm": 0.23119815102629518, "learning_rate": 9.647450306743345e-06, "loss": 0.316, "step": 3667 }, { "epoch": 0.29343413131737367, "grad_norm": 0.25103273019291483, "learning_rate": 9.647211346605902e-06, "loss": 0.3095, "step": 3668 }, { "epoch": 0.29351412971740565, "grad_norm": 0.27787133399955677, "learning_rate": 9.646972308473115e-06, "loss": 0.2917, "step": 3669 }, { "epoch": 0.2935941281174376, "grad_norm": 0.3204560639969028, "learning_rate": 9.646733192348996e-06, "loss": 0.25, "step": 3670 }, { "epoch": 0.29367412651746966, "grad_norm": 0.2891861766751723, "learning_rate": 9.646493998237557e-06, "loss": 0.2865, "step": 3671 }, { "epoch": 0.29375412491750164, "grad_norm": 0.23214187634365005, "learning_rate": 9.64625472614281e-06, "loss": 0.3255, "step": 3672 }, { "epoch": 0.29383412331753367, "grad_norm": 0.24368769748316976, "learning_rate": 9.646015376068776e-06, "loss": 0.312, "step": 3673 }, { "epoch": 0.29391412171756565, "grad_norm": 0.31150511655916724, "learning_rate": 9.645775948019466e-06, "loss": 0.2851, "step": 3674 }, { "epoch": 0.2939941201175976, "grad_norm": 0.2705216689498565, "learning_rate": 9.645536441998907e-06, "loss": 0.2709, "step": 3675 }, { "epoch": 0.29407411851762966, "grad_norm": 0.32738656870048694, "learning_rate": 9.645296858011109e-06, "loss": 0.265, "step": 3676 }, { "epoch": 0.29415411691766163, "grad_norm": 0.2743633712930602, "learning_rate": 9.6450571960601e-06, "loss": 0.3053, "step": 3677 }, { "epoch": 0.29423411531769367, "grad_norm": 0.24948198917658698, "learning_rate": 9.644817456149898e-06, "loss": 0.2878, "step": 3678 }, { "epoch": 0.29431411371772565, "grad_norm": 0.21507038159488337, "learning_rate": 9.64457763828453e-06, "loss": 0.345, "step": 3679 }, { "epoch": 0.2943941121177576, "grad_norm": 0.4280201945340013, "learning_rate": 9.644337742468017e-06, "loss": 0.2747, "step": 3680 }, { "epoch": 0.29447411051778966, "grad_norm": 0.27187052490272967, "learning_rate": 9.64409776870439e-06, "loss": 0.3133, "step": 3681 }, { "epoch": 0.29455410891782163, "grad_norm": 0.29810668702114657, "learning_rate": 9.643857716997674e-06, "loss": 0.318, "step": 3682 }, { "epoch": 0.29463410731785367, "grad_norm": 0.2396709265443274, "learning_rate": 9.643617587351897e-06, "loss": 0.3183, "step": 3683 }, { "epoch": 0.29471410571788564, "grad_norm": 0.2504255980305616, "learning_rate": 9.64337737977109e-06, "loss": 0.3188, "step": 3684 }, { "epoch": 0.2947941041179176, "grad_norm": 0.25570491083131913, "learning_rate": 9.643137094259285e-06, "loss": 0.3132, "step": 3685 }, { "epoch": 0.29487410251794965, "grad_norm": 0.27973499448631417, "learning_rate": 9.642896730820514e-06, "loss": 0.2907, "step": 3686 }, { "epoch": 0.29495410091798163, "grad_norm": 0.19842977661186897, "learning_rate": 9.642656289458812e-06, "loss": 0.3608, "step": 3687 }, { "epoch": 0.29503409931801367, "grad_norm": 0.27323309896896375, "learning_rate": 9.642415770178213e-06, "loss": 0.3149, "step": 3688 }, { "epoch": 0.29511409771804564, "grad_norm": 0.38977248536656156, "learning_rate": 9.642175172982755e-06, "loss": 0.294, "step": 3689 }, { "epoch": 0.2951940961180776, "grad_norm": 0.26292251450084825, "learning_rate": 9.641934497876476e-06, "loss": 0.304, "step": 3690 }, { "epoch": 0.29527409451810965, "grad_norm": 0.34320477281012707, "learning_rate": 9.641693744863413e-06, "loss": 0.2819, "step": 3691 }, { "epoch": 0.29535409291814163, "grad_norm": 0.3222693957793193, "learning_rate": 9.641452913947611e-06, "loss": 0.2835, "step": 3692 }, { "epoch": 0.29543409131817366, "grad_norm": 0.3008470277823178, "learning_rate": 9.641212005133107e-06, "loss": 0.2646, "step": 3693 }, { "epoch": 0.29551408971820564, "grad_norm": 0.24515306598083095, "learning_rate": 9.64097101842395e-06, "loss": 0.3197, "step": 3694 }, { "epoch": 0.2955940881182376, "grad_norm": 0.38829884202271975, "learning_rate": 9.640729953824178e-06, "loss": 0.3236, "step": 3695 }, { "epoch": 0.29567408651826965, "grad_norm": 0.29560961059019253, "learning_rate": 9.64048881133784e-06, "loss": 0.2889, "step": 3696 }, { "epoch": 0.29575408491830163, "grad_norm": 0.2532809208065725, "learning_rate": 9.640247590968985e-06, "loss": 0.3081, "step": 3697 }, { "epoch": 0.2958340833183336, "grad_norm": 0.2864046071706207, "learning_rate": 9.64000629272166e-06, "loss": 0.2834, "step": 3698 }, { "epoch": 0.29591408171836564, "grad_norm": 0.2928897796205158, "learning_rate": 9.639764916599913e-06, "loss": 0.2786, "step": 3699 }, { "epoch": 0.2959940801183976, "grad_norm": 0.27145428331465776, "learning_rate": 9.639523462607796e-06, "loss": 0.3217, "step": 3700 }, { "epoch": 0.29607407851842965, "grad_norm": 0.28536457158623474, "learning_rate": 9.639281930749363e-06, "loss": 0.2921, "step": 3701 }, { "epoch": 0.29615407691846163, "grad_norm": 0.2748436166073774, "learning_rate": 9.639040321028665e-06, "loss": 0.2955, "step": 3702 }, { "epoch": 0.2962340753184936, "grad_norm": 0.7792886666737343, "learning_rate": 9.63879863344976e-06, "loss": 0.2798, "step": 3703 }, { "epoch": 0.29631407371852564, "grad_norm": 0.319197613409756, "learning_rate": 9.638556868016704e-06, "loss": 0.3174, "step": 3704 }, { "epoch": 0.2963940721185576, "grad_norm": 0.3516996553239381, "learning_rate": 9.638315024733552e-06, "loss": 0.3018, "step": 3705 }, { "epoch": 0.29647407051858965, "grad_norm": 0.3012396617555724, "learning_rate": 9.638073103604364e-06, "loss": 0.273, "step": 3706 }, { "epoch": 0.2965540689186216, "grad_norm": 0.30481295833388966, "learning_rate": 9.6378311046332e-06, "loss": 0.2808, "step": 3707 }, { "epoch": 0.2966340673186536, "grad_norm": 0.31370002630011007, "learning_rate": 9.637589027824123e-06, "loss": 0.2639, "step": 3708 }, { "epoch": 0.29671406571868564, "grad_norm": 0.27322122593818465, "learning_rate": 9.637346873181194e-06, "loss": 0.258, "step": 3709 }, { "epoch": 0.2967940641187176, "grad_norm": 0.27250613028642545, "learning_rate": 9.637104640708482e-06, "loss": 0.3341, "step": 3710 }, { "epoch": 0.29687406251874965, "grad_norm": 0.3150893975119145, "learning_rate": 9.636862330410043e-06, "loss": 0.2692, "step": 3711 }, { "epoch": 0.2969540609187816, "grad_norm": 0.31024142453826253, "learning_rate": 9.636619942289953e-06, "loss": 0.2827, "step": 3712 }, { "epoch": 0.2970340593188136, "grad_norm": 0.2950546739832863, "learning_rate": 9.636377476352277e-06, "loss": 0.2457, "step": 3713 }, { "epoch": 0.29711405771884564, "grad_norm": 0.3596318405935052, "learning_rate": 9.636134932601082e-06, "loss": 0.2705, "step": 3714 }, { "epoch": 0.2971940561188776, "grad_norm": 0.3217345323118577, "learning_rate": 9.63589231104044e-06, "loss": 0.2809, "step": 3715 }, { "epoch": 0.29727405451890965, "grad_norm": 0.23982361333263838, "learning_rate": 9.635649611674425e-06, "loss": 0.3222, "step": 3716 }, { "epoch": 0.2973540529189416, "grad_norm": 0.39246221825566546, "learning_rate": 9.635406834507108e-06, "loss": 0.2791, "step": 3717 }, { "epoch": 0.2974340513189736, "grad_norm": 0.2919341227049066, "learning_rate": 9.635163979542564e-06, "loss": 0.2918, "step": 3718 }, { "epoch": 0.29751404971900564, "grad_norm": 0.2210234633242241, "learning_rate": 9.63492104678487e-06, "loss": 0.3532, "step": 3719 }, { "epoch": 0.2975940481190376, "grad_norm": 0.3522288793019361, "learning_rate": 9.634678036238102e-06, "loss": 0.3004, "step": 3720 }, { "epoch": 0.29767404651906965, "grad_norm": 0.2814137892248608, "learning_rate": 9.634434947906337e-06, "loss": 0.2881, "step": 3721 }, { "epoch": 0.2977540449191016, "grad_norm": 0.31428072205296853, "learning_rate": 9.634191781793659e-06, "loss": 0.2693, "step": 3722 }, { "epoch": 0.2978340433191336, "grad_norm": 0.268707894804558, "learning_rate": 9.633948537904145e-06, "loss": 0.269, "step": 3723 }, { "epoch": 0.29791404171916563, "grad_norm": 0.3703272103027423, "learning_rate": 9.633705216241881e-06, "loss": 0.2482, "step": 3724 }, { "epoch": 0.2979940401191976, "grad_norm": 0.2799431544590881, "learning_rate": 9.633461816810949e-06, "loss": 0.3093, "step": 3725 }, { "epoch": 0.2980740385192296, "grad_norm": 0.29215297072072555, "learning_rate": 9.633218339615433e-06, "loss": 0.3111, "step": 3726 }, { "epoch": 0.2981540369192616, "grad_norm": 0.28201874271282845, "learning_rate": 9.632974784659421e-06, "loss": 0.2873, "step": 3727 }, { "epoch": 0.2982340353192936, "grad_norm": 0.2789291061611354, "learning_rate": 9.632731151946999e-06, "loss": 0.2683, "step": 3728 }, { "epoch": 0.29831403371932563, "grad_norm": 0.3382906313118446, "learning_rate": 9.632487441482258e-06, "loss": 0.2624, "step": 3729 }, { "epoch": 0.2983940321193576, "grad_norm": 0.24943013622185053, "learning_rate": 9.632243653269287e-06, "loss": 0.3272, "step": 3730 }, { "epoch": 0.2984740305193896, "grad_norm": 0.255167024913473, "learning_rate": 9.631999787312179e-06, "loss": 0.3321, "step": 3731 }, { "epoch": 0.2985540289194216, "grad_norm": 0.29736051306144934, "learning_rate": 9.631755843615024e-06, "loss": 0.2855, "step": 3732 }, { "epoch": 0.2986340273194536, "grad_norm": 0.2786282049815428, "learning_rate": 9.631511822181918e-06, "loss": 0.2836, "step": 3733 }, { "epoch": 0.29871402571948563, "grad_norm": 0.2529832848333478, "learning_rate": 9.631267723016956e-06, "loss": 0.3141, "step": 3734 }, { "epoch": 0.2987940241195176, "grad_norm": 0.6475168815239636, "learning_rate": 9.631023546124236e-06, "loss": 0.2877, "step": 3735 }, { "epoch": 0.2988740225195496, "grad_norm": 0.32859645512767577, "learning_rate": 9.630779291507854e-06, "loss": 0.2645, "step": 3736 }, { "epoch": 0.2989540209195816, "grad_norm": 0.28785597464177126, "learning_rate": 9.630534959171912e-06, "loss": 0.2897, "step": 3737 }, { "epoch": 0.2990340193196136, "grad_norm": 0.3181455689232381, "learning_rate": 9.630290549120508e-06, "loss": 0.2866, "step": 3738 }, { "epoch": 0.29911401771964563, "grad_norm": 0.2425466268635054, "learning_rate": 9.630046061357745e-06, "loss": 0.3304, "step": 3739 }, { "epoch": 0.2991940161196776, "grad_norm": 0.25700648831491657, "learning_rate": 9.62980149588773e-06, "loss": 0.3197, "step": 3740 }, { "epoch": 0.2992740145197096, "grad_norm": 0.35519267931075654, "learning_rate": 9.62955685271456e-06, "loss": 0.2966, "step": 3741 }, { "epoch": 0.2993540129197416, "grad_norm": 0.24091482272976955, "learning_rate": 9.629312131842346e-06, "loss": 0.3269, "step": 3742 }, { "epoch": 0.2994340113197736, "grad_norm": 0.3656330195048561, "learning_rate": 9.629067333275195e-06, "loss": 0.2554, "step": 3743 }, { "epoch": 0.29951400971980563, "grad_norm": 0.2852262908138894, "learning_rate": 9.628822457017215e-06, "loss": 0.2608, "step": 3744 }, { "epoch": 0.2995940081198376, "grad_norm": 0.2965723208472894, "learning_rate": 9.628577503072513e-06, "loss": 0.2918, "step": 3745 }, { "epoch": 0.2996740065198696, "grad_norm": 0.3193581453606131, "learning_rate": 9.628332471445206e-06, "loss": 0.2432, "step": 3746 }, { "epoch": 0.2997540049199016, "grad_norm": 0.3383474939622251, "learning_rate": 9.628087362139402e-06, "loss": 0.2877, "step": 3747 }, { "epoch": 0.2998340033199336, "grad_norm": 0.3128038202416646, "learning_rate": 9.627842175159217e-06, "loss": 0.2921, "step": 3748 }, { "epoch": 0.29991400171996563, "grad_norm": 0.3458796821020789, "learning_rate": 9.627596910508763e-06, "loss": 0.3278, "step": 3749 }, { "epoch": 0.2999940001199976, "grad_norm": 0.6562247094933588, "learning_rate": 9.627351568192159e-06, "loss": 0.2622, "step": 3750 }, { "epoch": 0.3000739985200296, "grad_norm": 0.3345316510721369, "learning_rate": 9.627106148213521e-06, "loss": 0.2441, "step": 3751 }, { "epoch": 0.3001539969200616, "grad_norm": 0.39309551517960806, "learning_rate": 9.62686065057697e-06, "loss": 0.2771, "step": 3752 }, { "epoch": 0.3002339953200936, "grad_norm": 0.308974290541272, "learning_rate": 9.626615075286626e-06, "loss": 0.2526, "step": 3753 }, { "epoch": 0.30031399372012557, "grad_norm": 0.33054394464959064, "learning_rate": 9.62636942234661e-06, "loss": 0.2507, "step": 3754 }, { "epoch": 0.3003939921201576, "grad_norm": 0.29144442848464214, "learning_rate": 9.62612369176104e-06, "loss": 0.2787, "step": 3755 }, { "epoch": 0.3004739905201896, "grad_norm": 0.2879023395833271, "learning_rate": 9.625877883534047e-06, "loss": 0.2977, "step": 3756 }, { "epoch": 0.3005539889202216, "grad_norm": 0.3157858332161786, "learning_rate": 9.625631997669757e-06, "loss": 0.274, "step": 3757 }, { "epoch": 0.3006339873202536, "grad_norm": 0.3152540208439719, "learning_rate": 9.62538603417229e-06, "loss": 0.2591, "step": 3758 }, { "epoch": 0.30071398572028557, "grad_norm": 0.28168736361190055, "learning_rate": 9.62513999304578e-06, "loss": 0.3034, "step": 3759 }, { "epoch": 0.3007939841203176, "grad_norm": 0.2869715337774613, "learning_rate": 9.624893874294355e-06, "loss": 0.3126, "step": 3760 }, { "epoch": 0.3008739825203496, "grad_norm": 0.31983042898724656, "learning_rate": 9.624647677922143e-06, "loss": 0.2569, "step": 3761 }, { "epoch": 0.3009539809203816, "grad_norm": 0.36315234007826497, "learning_rate": 9.624401403933279e-06, "loss": 0.2564, "step": 3762 }, { "epoch": 0.3010339793204136, "grad_norm": 0.3159159509095322, "learning_rate": 9.624155052331896e-06, "loss": 0.2604, "step": 3763 }, { "epoch": 0.30111397772044557, "grad_norm": 0.25063653941001807, "learning_rate": 9.623908623122127e-06, "loss": 0.3002, "step": 3764 }, { "epoch": 0.3011939761204776, "grad_norm": 0.2746297749014126, "learning_rate": 9.623662116308108e-06, "loss": 0.2911, "step": 3765 }, { "epoch": 0.3012739745205096, "grad_norm": 0.25971659915489653, "learning_rate": 9.623415531893978e-06, "loss": 0.3258, "step": 3766 }, { "epoch": 0.3013539729205416, "grad_norm": 0.22848702320610767, "learning_rate": 9.623168869883874e-06, "loss": 0.3485, "step": 3767 }, { "epoch": 0.3014339713205736, "grad_norm": 0.3276802975195463, "learning_rate": 9.622922130281937e-06, "loss": 0.2719, "step": 3768 }, { "epoch": 0.30151396972060557, "grad_norm": 0.3147418831064497, "learning_rate": 9.622675313092307e-06, "loss": 0.2681, "step": 3769 }, { "epoch": 0.3015939681206376, "grad_norm": 0.27471357000394214, "learning_rate": 9.622428418319126e-06, "loss": 0.2903, "step": 3770 }, { "epoch": 0.3016739665206696, "grad_norm": 0.2960204708052019, "learning_rate": 9.622181445966539e-06, "loss": 0.2891, "step": 3771 }, { "epoch": 0.3017539649207016, "grad_norm": 0.3081278432304237, "learning_rate": 9.62193439603869e-06, "loss": 0.2623, "step": 3772 }, { "epoch": 0.3018339633207336, "grad_norm": 0.29959819998855064, "learning_rate": 9.621687268539725e-06, "loss": 0.3144, "step": 3773 }, { "epoch": 0.30191396172076557, "grad_norm": 0.2822632179700892, "learning_rate": 9.621440063473795e-06, "loss": 0.2721, "step": 3774 }, { "epoch": 0.3019939601207976, "grad_norm": 0.21427632045311756, "learning_rate": 9.621192780845044e-06, "loss": 0.3472, "step": 3775 }, { "epoch": 0.3020739585208296, "grad_norm": 0.2662552990541531, "learning_rate": 9.620945420657625e-06, "loss": 0.2842, "step": 3776 }, { "epoch": 0.3021539569208616, "grad_norm": 0.29116465745618164, "learning_rate": 9.620697982915688e-06, "loss": 0.2992, "step": 3777 }, { "epoch": 0.3022339553208936, "grad_norm": 0.3348695161420912, "learning_rate": 9.620450467623387e-06, "loss": 0.2465, "step": 3778 }, { "epoch": 0.30231395372092557, "grad_norm": 0.2358412188553948, "learning_rate": 9.620202874784878e-06, "loss": 0.3083, "step": 3779 }, { "epoch": 0.3023939521209576, "grad_norm": 0.2771555959617794, "learning_rate": 9.619955204404312e-06, "loss": 0.3242, "step": 3780 }, { "epoch": 0.3024739505209896, "grad_norm": 0.20610996669403264, "learning_rate": 9.619707456485848e-06, "loss": 0.361, "step": 3781 }, { "epoch": 0.30255394892102155, "grad_norm": 0.24100582838326473, "learning_rate": 9.619459631033645e-06, "loss": 0.3117, "step": 3782 }, { "epoch": 0.3026339473210536, "grad_norm": 0.3317388276072845, "learning_rate": 9.61921172805186e-06, "loss": 0.2673, "step": 3783 }, { "epoch": 0.30271394572108556, "grad_norm": 0.3107686184126036, "learning_rate": 9.618963747544656e-06, "loss": 0.2453, "step": 3784 }, { "epoch": 0.3027939441211176, "grad_norm": 0.29591603280460227, "learning_rate": 9.618715689516194e-06, "loss": 0.2467, "step": 3785 }, { "epoch": 0.3028739425211496, "grad_norm": 0.5291170084959257, "learning_rate": 9.618467553970636e-06, "loss": 0.2752, "step": 3786 }, { "epoch": 0.30295394092118155, "grad_norm": 0.3390655942547282, "learning_rate": 9.61821934091215e-06, "loss": 0.2856, "step": 3787 }, { "epoch": 0.3030339393212136, "grad_norm": 0.30433072623351004, "learning_rate": 9.617971050344896e-06, "loss": 0.3276, "step": 3788 }, { "epoch": 0.30311393772124556, "grad_norm": 0.32704553463861796, "learning_rate": 9.617722682273048e-06, "loss": 0.2422, "step": 3789 }, { "epoch": 0.3031939361212776, "grad_norm": 0.2926224734726999, "learning_rate": 9.61747423670077e-06, "loss": 0.2892, "step": 3790 }, { "epoch": 0.3032739345213096, "grad_norm": 0.28390835478640686, "learning_rate": 9.61722571363223e-06, "loss": 0.3149, "step": 3791 }, { "epoch": 0.30335393292134155, "grad_norm": 0.27449134217943877, "learning_rate": 9.616977113071604e-06, "loss": 0.2846, "step": 3792 }, { "epoch": 0.3034339313213736, "grad_norm": 0.28896656002923693, "learning_rate": 9.616728435023061e-06, "loss": 0.2837, "step": 3793 }, { "epoch": 0.30351392972140556, "grad_norm": 0.35680666227656566, "learning_rate": 9.616479679490778e-06, "loss": 0.2817, "step": 3794 }, { "epoch": 0.3035939281214376, "grad_norm": 0.29092818640389756, "learning_rate": 9.616230846478925e-06, "loss": 0.3038, "step": 3795 }, { "epoch": 0.3036739265214696, "grad_norm": 0.3809271499375508, "learning_rate": 9.615981935991683e-06, "loss": 0.2574, "step": 3796 }, { "epoch": 0.30375392492150155, "grad_norm": 0.2742500134416911, "learning_rate": 9.615732948033225e-06, "loss": 0.2906, "step": 3797 }, { "epoch": 0.3038339233215336, "grad_norm": 0.7198177136901439, "learning_rate": 9.615483882607735e-06, "loss": 0.3125, "step": 3798 }, { "epoch": 0.30391392172156556, "grad_norm": 0.3322955738597489, "learning_rate": 9.615234739719387e-06, "loss": 0.2604, "step": 3799 }, { "epoch": 0.3039939201215976, "grad_norm": 0.23480709401223898, "learning_rate": 9.61498551937237e-06, "loss": 0.3291, "step": 3800 }, { "epoch": 0.30407391852162957, "grad_norm": 0.3289741368819474, "learning_rate": 9.61473622157086e-06, "loss": 0.2534, "step": 3801 }, { "epoch": 0.30415391692166155, "grad_norm": 0.2441793423429152, "learning_rate": 9.614486846319042e-06, "loss": 0.3085, "step": 3802 }, { "epoch": 0.3042339153216936, "grad_norm": 0.268559343801519, "learning_rate": 9.614237393621104e-06, "loss": 0.2991, "step": 3803 }, { "epoch": 0.30431391372172556, "grad_norm": 0.31691605607341283, "learning_rate": 9.613987863481232e-06, "loss": 0.284, "step": 3804 }, { "epoch": 0.30439391212175754, "grad_norm": 0.34193819608321324, "learning_rate": 9.613738255903613e-06, "loss": 0.2819, "step": 3805 }, { "epoch": 0.30447391052178957, "grad_norm": 0.29450632015261274, "learning_rate": 9.613488570892437e-06, "loss": 0.2981, "step": 3806 }, { "epoch": 0.30455390892182155, "grad_norm": 0.2821893541055286, "learning_rate": 9.613238808451894e-06, "loss": 0.2814, "step": 3807 }, { "epoch": 0.3046339073218536, "grad_norm": 0.2973239793777625, "learning_rate": 9.612988968586176e-06, "loss": 0.3008, "step": 3808 }, { "epoch": 0.30471390572188556, "grad_norm": 0.32501317446584055, "learning_rate": 9.612739051299477e-06, "loss": 0.2647, "step": 3809 }, { "epoch": 0.30479390412191754, "grad_norm": 0.30708287310983845, "learning_rate": 9.61248905659599e-06, "loss": 0.2494, "step": 3810 }, { "epoch": 0.30487390252194957, "grad_norm": 0.3665100338740675, "learning_rate": 9.61223898447991e-06, "loss": 0.268, "step": 3811 }, { "epoch": 0.30495390092198155, "grad_norm": 0.3177027509716792, "learning_rate": 9.611988834955437e-06, "loss": 0.2501, "step": 3812 }, { "epoch": 0.3050338993220136, "grad_norm": 0.26673527287010673, "learning_rate": 9.611738608026765e-06, "loss": 0.3025, "step": 3813 }, { "epoch": 0.30511389772204556, "grad_norm": 0.34420837537026416, "learning_rate": 9.6114883036981e-06, "loss": 0.2492, "step": 3814 }, { "epoch": 0.30519389612207753, "grad_norm": 0.28538947787050495, "learning_rate": 9.611237921973637e-06, "loss": 0.2961, "step": 3815 }, { "epoch": 0.30527389452210957, "grad_norm": 0.23390040594024505, "learning_rate": 9.61098746285758e-06, "loss": 0.3395, "step": 3816 }, { "epoch": 0.30535389292214155, "grad_norm": 0.2777698528163031, "learning_rate": 9.610736926354133e-06, "loss": 0.3041, "step": 3817 }, { "epoch": 0.3054338913221736, "grad_norm": 0.31299250673242407, "learning_rate": 9.610486312467502e-06, "loss": 0.2714, "step": 3818 }, { "epoch": 0.30551388972220556, "grad_norm": 0.33049703076523484, "learning_rate": 9.61023562120189e-06, "loss": 0.239, "step": 3819 }, { "epoch": 0.30559388812223753, "grad_norm": 0.2779233114273755, "learning_rate": 9.609984852561508e-06, "loss": 0.2915, "step": 3820 }, { "epoch": 0.30567388652226957, "grad_norm": 0.3960042605009564, "learning_rate": 9.609734006550562e-06, "loss": 0.3103, "step": 3821 }, { "epoch": 0.30575388492230154, "grad_norm": 0.2933981078228004, "learning_rate": 9.609483083173264e-06, "loss": 0.314, "step": 3822 }, { "epoch": 0.3058338833223336, "grad_norm": 0.23975670407659508, "learning_rate": 9.609232082433824e-06, "loss": 0.3117, "step": 3823 }, { "epoch": 0.30591388172236555, "grad_norm": 0.33693168443581706, "learning_rate": 9.608981004336453e-06, "loss": 0.2575, "step": 3824 }, { "epoch": 0.30599388012239753, "grad_norm": 0.2584737640054285, "learning_rate": 9.608729848885369e-06, "loss": 0.2945, "step": 3825 }, { "epoch": 0.30607387852242957, "grad_norm": 0.3427539956567722, "learning_rate": 9.608478616084784e-06, "loss": 0.262, "step": 3826 }, { "epoch": 0.30615387692246154, "grad_norm": 0.25580698875503427, "learning_rate": 9.608227305938915e-06, "loss": 0.3093, "step": 3827 }, { "epoch": 0.3062338753224936, "grad_norm": 0.4642928980650298, "learning_rate": 9.607975918451982e-06, "loss": 0.266, "step": 3828 }, { "epoch": 0.30631387372252555, "grad_norm": 0.27491582515521595, "learning_rate": 9.6077244536282e-06, "loss": 0.2894, "step": 3829 }, { "epoch": 0.30639387212255753, "grad_norm": 0.3048648575886629, "learning_rate": 9.607472911471794e-06, "loss": 0.2922, "step": 3830 }, { "epoch": 0.30647387052258956, "grad_norm": 0.22465292368897324, "learning_rate": 9.607221291986983e-06, "loss": 0.3476, "step": 3831 }, { "epoch": 0.30655386892262154, "grad_norm": 0.30550927674254147, "learning_rate": 9.60696959517799e-06, "loss": 0.2768, "step": 3832 }, { "epoch": 0.3066338673226535, "grad_norm": 0.3116252639666084, "learning_rate": 9.606717821049042e-06, "loss": 0.2726, "step": 3833 }, { "epoch": 0.30671386572268555, "grad_norm": 0.2867152731669325, "learning_rate": 9.606465969604359e-06, "loss": 0.2802, "step": 3834 }, { "epoch": 0.30679386412271753, "grad_norm": 0.27970437215769656, "learning_rate": 9.606214040848174e-06, "loss": 0.304, "step": 3835 }, { "epoch": 0.30687386252274956, "grad_norm": 0.32136627180269983, "learning_rate": 9.605962034784711e-06, "loss": 0.3007, "step": 3836 }, { "epoch": 0.30695386092278154, "grad_norm": 0.3137107034506244, "learning_rate": 9.605709951418201e-06, "loss": 0.2473, "step": 3837 }, { "epoch": 0.3070338593228135, "grad_norm": 0.32728744709941154, "learning_rate": 9.605457790752875e-06, "loss": 0.2938, "step": 3838 }, { "epoch": 0.30711385772284555, "grad_norm": 0.3236730307097795, "learning_rate": 9.605205552792964e-06, "loss": 0.2583, "step": 3839 }, { "epoch": 0.30719385612287753, "grad_norm": 0.26884877121274947, "learning_rate": 9.604953237542703e-06, "loss": 0.2915, "step": 3840 }, { "epoch": 0.30727385452290956, "grad_norm": 0.2765280117174748, "learning_rate": 9.604700845006326e-06, "loss": 0.2876, "step": 3841 }, { "epoch": 0.30735385292294154, "grad_norm": 0.2686954180081356, "learning_rate": 9.604448375188069e-06, "loss": 0.2638, "step": 3842 }, { "epoch": 0.3074338513229735, "grad_norm": 0.4993603649475995, "learning_rate": 9.604195828092169e-06, "loss": 0.2676, "step": 3843 }, { "epoch": 0.30751384972300555, "grad_norm": 0.2874197221424311, "learning_rate": 9.603943203722863e-06, "loss": 0.282, "step": 3844 }, { "epoch": 0.3075938481230375, "grad_norm": 0.7773256645907821, "learning_rate": 9.603690502084396e-06, "loss": 0.3196, "step": 3845 }, { "epoch": 0.30767384652306956, "grad_norm": 0.3238968881475202, "learning_rate": 9.603437723181002e-06, "loss": 0.2654, "step": 3846 }, { "epoch": 0.30775384492310154, "grad_norm": 0.2585152884011422, "learning_rate": 9.603184867016929e-06, "loss": 0.2903, "step": 3847 }, { "epoch": 0.3078338433231335, "grad_norm": 0.27691320438776407, "learning_rate": 9.602931933596418e-06, "loss": 0.2857, "step": 3848 }, { "epoch": 0.30791384172316555, "grad_norm": 0.35775798367894546, "learning_rate": 9.602678922923716e-06, "loss": 0.2723, "step": 3849 }, { "epoch": 0.3079938401231975, "grad_norm": 1.2863576070193736, "learning_rate": 9.602425835003067e-06, "loss": 0.2891, "step": 3850 }, { "epoch": 0.30807383852322956, "grad_norm": 0.54277316129236, "learning_rate": 9.602172669838721e-06, "loss": 0.2418, "step": 3851 }, { "epoch": 0.30815383692326154, "grad_norm": 0.2711723891039418, "learning_rate": 9.601919427434925e-06, "loss": 0.3267, "step": 3852 }, { "epoch": 0.3082338353232935, "grad_norm": 0.27337109813122557, "learning_rate": 9.60166610779593e-06, "loss": 0.3048, "step": 3853 }, { "epoch": 0.30831383372332555, "grad_norm": 0.3268866093802923, "learning_rate": 9.60141271092599e-06, "loss": 0.258, "step": 3854 }, { "epoch": 0.3083938321233575, "grad_norm": 0.29545279189992013, "learning_rate": 9.601159236829353e-06, "loss": 0.2537, "step": 3855 }, { "epoch": 0.30847383052338956, "grad_norm": 0.28963730549731564, "learning_rate": 9.600905685510276e-06, "loss": 0.2837, "step": 3856 }, { "epoch": 0.30855382892342154, "grad_norm": 0.2763666959912324, "learning_rate": 9.600652056973013e-06, "loss": 0.3039, "step": 3857 }, { "epoch": 0.3086338273234535, "grad_norm": 0.3437588748629757, "learning_rate": 9.600398351221824e-06, "loss": 0.2965, "step": 3858 }, { "epoch": 0.30871382572348555, "grad_norm": 0.28654084811095687, "learning_rate": 9.600144568260962e-06, "loss": 0.3021, "step": 3859 }, { "epoch": 0.3087938241235175, "grad_norm": 0.4350037837738027, "learning_rate": 9.59989070809469e-06, "loss": 0.2472, "step": 3860 }, { "epoch": 0.3088738225235495, "grad_norm": 0.2946843504154889, "learning_rate": 9.59963677072727e-06, "loss": 0.2956, "step": 3861 }, { "epoch": 0.30895382092358153, "grad_norm": 0.31017296850475085, "learning_rate": 9.599382756162959e-06, "loss": 0.2619, "step": 3862 }, { "epoch": 0.3090338193236135, "grad_norm": 0.3214027070700401, "learning_rate": 9.599128664406023e-06, "loss": 0.2741, "step": 3863 }, { "epoch": 0.30911381772364555, "grad_norm": 0.30879458191331927, "learning_rate": 9.598874495460726e-06, "loss": 0.3043, "step": 3864 }, { "epoch": 0.3091938161236775, "grad_norm": 0.35835214310783076, "learning_rate": 9.598620249331334e-06, "loss": 0.2661, "step": 3865 }, { "epoch": 0.3092738145237095, "grad_norm": 0.26802161431422383, "learning_rate": 9.598365926022116e-06, "loss": 0.2984, "step": 3866 }, { "epoch": 0.30935381292374153, "grad_norm": 0.33221556146718784, "learning_rate": 9.598111525537336e-06, "loss": 0.2698, "step": 3867 }, { "epoch": 0.3094338113237735, "grad_norm": 0.30883473634301856, "learning_rate": 9.597857047881266e-06, "loss": 0.3095, "step": 3868 }, { "epoch": 0.30951380972380554, "grad_norm": 0.2627994989928834, "learning_rate": 9.597602493058178e-06, "loss": 0.3251, "step": 3869 }, { "epoch": 0.3095938081238375, "grad_norm": 0.3063941750846015, "learning_rate": 9.597347861072343e-06, "loss": 0.288, "step": 3870 }, { "epoch": 0.3096738065238695, "grad_norm": 0.3110071516095005, "learning_rate": 9.597093151928035e-06, "loss": 0.26, "step": 3871 }, { "epoch": 0.30975380492390153, "grad_norm": 0.2758509596028826, "learning_rate": 9.596838365629529e-06, "loss": 0.2954, "step": 3872 }, { "epoch": 0.3098338033239335, "grad_norm": 0.23917969159544608, "learning_rate": 9.5965835021811e-06, "loss": 0.3194, "step": 3873 }, { "epoch": 0.30991380172396554, "grad_norm": 0.32816977876996856, "learning_rate": 9.596328561587027e-06, "loss": 0.2916, "step": 3874 }, { "epoch": 0.3099938001239975, "grad_norm": 0.4589889577704082, "learning_rate": 9.596073543851587e-06, "loss": 0.2476, "step": 3875 }, { "epoch": 0.3100737985240295, "grad_norm": 0.33961492288498996, "learning_rate": 9.595818448979061e-06, "loss": 0.2923, "step": 3876 }, { "epoch": 0.31015379692406153, "grad_norm": 0.27888592423637126, "learning_rate": 9.595563276973732e-06, "loss": 0.2826, "step": 3877 }, { "epoch": 0.3102337953240935, "grad_norm": 0.2973125113353187, "learning_rate": 9.59530802783988e-06, "loss": 0.281, "step": 3878 }, { "epoch": 0.31031379372412554, "grad_norm": 0.5074649206210077, "learning_rate": 9.59505270158179e-06, "loss": 0.262, "step": 3879 }, { "epoch": 0.3103937921241575, "grad_norm": 0.2646974829774116, "learning_rate": 9.594797298203748e-06, "loss": 0.276, "step": 3880 }, { "epoch": 0.3104737905241895, "grad_norm": 0.3177078713149871, "learning_rate": 9.594541817710039e-06, "loss": 0.2816, "step": 3881 }, { "epoch": 0.31055378892422153, "grad_norm": 0.24065522476019588, "learning_rate": 9.59428626010495e-06, "loss": 0.3231, "step": 3882 }, { "epoch": 0.3106337873242535, "grad_norm": 0.27672865647737155, "learning_rate": 9.594030625392772e-06, "loss": 0.2938, "step": 3883 }, { "epoch": 0.31071378572428554, "grad_norm": 0.31334329137748806, "learning_rate": 9.593774913577795e-06, "loss": 0.2926, "step": 3884 }, { "epoch": 0.3107937841243175, "grad_norm": 0.37626839744819696, "learning_rate": 9.593519124664313e-06, "loss": 0.2593, "step": 3885 }, { "epoch": 0.3108737825243495, "grad_norm": 0.3019103638508728, "learning_rate": 9.593263258656614e-06, "loss": 0.2515, "step": 3886 }, { "epoch": 0.31095378092438153, "grad_norm": 0.31050540398668647, "learning_rate": 9.593007315558996e-06, "loss": 0.2989, "step": 3887 }, { "epoch": 0.3110337793244135, "grad_norm": 0.3307990733125002, "learning_rate": 9.59275129537575e-06, "loss": 0.2716, "step": 3888 }, { "epoch": 0.3111137777244455, "grad_norm": 0.30914697713595596, "learning_rate": 9.59249519811118e-06, "loss": 0.2681, "step": 3889 }, { "epoch": 0.3111937761244775, "grad_norm": 0.2869537809107287, "learning_rate": 9.59223902376958e-06, "loss": 0.2586, "step": 3890 }, { "epoch": 0.3112737745245095, "grad_norm": 0.30720149721069334, "learning_rate": 9.591982772355248e-06, "loss": 0.2933, "step": 3891 }, { "epoch": 0.3113537729245415, "grad_norm": 0.29997372052972104, "learning_rate": 9.591726443872487e-06, "loss": 0.2962, "step": 3892 }, { "epoch": 0.3114337713245735, "grad_norm": 0.27278348171440836, "learning_rate": 9.591470038325599e-06, "loss": 0.3052, "step": 3893 }, { "epoch": 0.3115137697246055, "grad_norm": 0.3094339715810796, "learning_rate": 9.591213555718889e-06, "loss": 0.2674, "step": 3894 }, { "epoch": 0.3115937681246375, "grad_norm": 0.28486644794215404, "learning_rate": 9.590956996056656e-06, "loss": 0.287, "step": 3895 }, { "epoch": 0.3116737665246695, "grad_norm": 0.31743902430208826, "learning_rate": 9.59070035934321e-06, "loss": 0.2551, "step": 3896 }, { "epoch": 0.3117537649247015, "grad_norm": 0.32340802124708706, "learning_rate": 9.590443645582859e-06, "loss": 0.2599, "step": 3897 }, { "epoch": 0.3118337633247335, "grad_norm": 0.26358580265571574, "learning_rate": 9.590186854779909e-06, "loss": 0.2823, "step": 3898 }, { "epoch": 0.3119137617247655, "grad_norm": 0.33439674203012215, "learning_rate": 9.58992998693867e-06, "loss": 0.322, "step": 3899 }, { "epoch": 0.3119937601247975, "grad_norm": 0.2831863941837388, "learning_rate": 9.589673042063456e-06, "loss": 0.3015, "step": 3900 }, { "epoch": 0.3120737585248295, "grad_norm": 0.28417321523487726, "learning_rate": 9.589416020158577e-06, "loss": 0.2911, "step": 3901 }, { "epoch": 0.3121537569248615, "grad_norm": 0.29292873122271634, "learning_rate": 9.589158921228346e-06, "loss": 0.298, "step": 3902 }, { "epoch": 0.3122337553248935, "grad_norm": 0.2750471891616348, "learning_rate": 9.58890174527708e-06, "loss": 0.2899, "step": 3903 }, { "epoch": 0.3123137537249255, "grad_norm": 0.2894306006665051, "learning_rate": 9.588644492309093e-06, "loss": 0.2735, "step": 3904 }, { "epoch": 0.3123937521249575, "grad_norm": 0.2753159652953076, "learning_rate": 9.588387162328705e-06, "loss": 0.2735, "step": 3905 }, { "epoch": 0.3124737505249895, "grad_norm": 0.27729447813458397, "learning_rate": 9.588129755340231e-06, "loss": 0.2838, "step": 3906 }, { "epoch": 0.3125537489250215, "grad_norm": 0.24987290914441593, "learning_rate": 9.587872271347996e-06, "loss": 0.3347, "step": 3907 }, { "epoch": 0.3126337473250535, "grad_norm": 0.25821226362023975, "learning_rate": 9.587614710356318e-06, "loss": 0.2846, "step": 3908 }, { "epoch": 0.3127137457250855, "grad_norm": 0.33149551831859797, "learning_rate": 9.587357072369522e-06, "loss": 0.2747, "step": 3909 }, { "epoch": 0.3127937441251175, "grad_norm": 0.3659492192793033, "learning_rate": 9.58709935739193e-06, "loss": 0.2731, "step": 3910 }, { "epoch": 0.3128737425251495, "grad_norm": 0.2720949021289437, "learning_rate": 9.586841565427869e-06, "loss": 0.295, "step": 3911 }, { "epoch": 0.3129537409251815, "grad_norm": 0.318861928957452, "learning_rate": 9.586583696481664e-06, "loss": 0.3334, "step": 3912 }, { "epoch": 0.3130337393252135, "grad_norm": 0.25467458864021475, "learning_rate": 9.586325750557643e-06, "loss": 0.3091, "step": 3913 }, { "epoch": 0.3131137377252455, "grad_norm": 0.2877708954949333, "learning_rate": 9.586067727660138e-06, "loss": 0.2818, "step": 3914 }, { "epoch": 0.3131937361252775, "grad_norm": 0.3102993471880673, "learning_rate": 9.585809627793475e-06, "loss": 0.2695, "step": 3915 }, { "epoch": 0.3132737345253095, "grad_norm": 0.27001233567184524, "learning_rate": 9.585551450961989e-06, "loss": 0.265, "step": 3916 }, { "epoch": 0.31335373292534147, "grad_norm": 1.4311188802097372, "learning_rate": 9.58529319717001e-06, "loss": 0.2364, "step": 3917 }, { "epoch": 0.3134337313253735, "grad_norm": 0.2978597808997237, "learning_rate": 9.585034866421877e-06, "loss": 0.2705, "step": 3918 }, { "epoch": 0.3135137297254055, "grad_norm": 0.3247709718007176, "learning_rate": 9.584776458721922e-06, "loss": 0.2652, "step": 3919 }, { "epoch": 0.3135937281254375, "grad_norm": 0.3996160135425736, "learning_rate": 9.584517974074483e-06, "loss": 0.3403, "step": 3920 }, { "epoch": 0.3136737265254695, "grad_norm": 0.25965274545549266, "learning_rate": 9.584259412483899e-06, "loss": 0.2866, "step": 3921 }, { "epoch": 0.31375372492550146, "grad_norm": 0.3503608886302911, "learning_rate": 9.584000773954507e-06, "loss": 0.2495, "step": 3922 }, { "epoch": 0.3138337233255335, "grad_norm": 0.24925995168079726, "learning_rate": 9.58374205849065e-06, "loss": 0.324, "step": 3923 }, { "epoch": 0.3139137217255655, "grad_norm": 0.29258512902412276, "learning_rate": 9.58348326609667e-06, "loss": 0.2751, "step": 3924 }, { "epoch": 0.3139937201255975, "grad_norm": 0.4528150694793641, "learning_rate": 9.58322439677691e-06, "loss": 0.3089, "step": 3925 }, { "epoch": 0.3140737185256295, "grad_norm": 0.22970094107638747, "learning_rate": 9.582965450535716e-06, "loss": 0.342, "step": 3926 }, { "epoch": 0.31415371692566146, "grad_norm": 0.28154352716643033, "learning_rate": 9.58270642737743e-06, "loss": 0.2764, "step": 3927 }, { "epoch": 0.3142337153256935, "grad_norm": 0.28228610686934, "learning_rate": 9.5824473273064e-06, "loss": 0.2824, "step": 3928 }, { "epoch": 0.3143137137257255, "grad_norm": 0.2781717330394963, "learning_rate": 9.582188150326981e-06, "loss": 0.3191, "step": 3929 }, { "epoch": 0.3143937121257575, "grad_norm": 0.32532496465351285, "learning_rate": 9.581928896443517e-06, "loss": 0.2909, "step": 3930 }, { "epoch": 0.3144737105257895, "grad_norm": 0.27785996581839767, "learning_rate": 9.58166956566036e-06, "loss": 0.3263, "step": 3931 }, { "epoch": 0.31455370892582146, "grad_norm": 0.4067201266905533, "learning_rate": 9.58141015798186e-06, "loss": 0.2775, "step": 3932 }, { "epoch": 0.3146337073258535, "grad_norm": 0.31383028910350796, "learning_rate": 9.581150673412376e-06, "loss": 0.2958, "step": 3933 }, { "epoch": 0.3147137057258855, "grad_norm": 0.34201621859043685, "learning_rate": 9.58089111195626e-06, "loss": 0.2625, "step": 3934 }, { "epoch": 0.3147937041259175, "grad_norm": 0.35328825049775214, "learning_rate": 9.58063147361787e-06, "loss": 0.2761, "step": 3935 }, { "epoch": 0.3148737025259495, "grad_norm": 0.29986101037815416, "learning_rate": 9.58037175840156e-06, "loss": 0.2935, "step": 3936 }, { "epoch": 0.31495370092598146, "grad_norm": 0.30730592029876386, "learning_rate": 9.580111966311692e-06, "loss": 0.2689, "step": 3937 }, { "epoch": 0.3150336993260135, "grad_norm": 0.316582518561074, "learning_rate": 9.579852097352625e-06, "loss": 0.3097, "step": 3938 }, { "epoch": 0.31511369772604547, "grad_norm": 0.33691115655362097, "learning_rate": 9.579592151528721e-06, "loss": 0.2633, "step": 3939 }, { "epoch": 0.3151936961260775, "grad_norm": 0.2965737922613462, "learning_rate": 9.579332128844342e-06, "loss": 0.3, "step": 3940 }, { "epoch": 0.3152736945261095, "grad_norm": 0.248686376204246, "learning_rate": 9.579072029303855e-06, "loss": 0.3372, "step": 3941 }, { "epoch": 0.31535369292614146, "grad_norm": 0.25850999340759045, "learning_rate": 9.57881185291162e-06, "loss": 0.3062, "step": 3942 }, { "epoch": 0.3154336913261735, "grad_norm": 0.3367296420055412, "learning_rate": 9.578551599672008e-06, "loss": 0.2826, "step": 3943 }, { "epoch": 0.31551368972620547, "grad_norm": 0.2807093376051179, "learning_rate": 9.578291269589384e-06, "loss": 0.3022, "step": 3944 }, { "epoch": 0.31559368812623745, "grad_norm": 0.33089238719470726, "learning_rate": 9.57803086266812e-06, "loss": 0.2829, "step": 3945 }, { "epoch": 0.3156736865262695, "grad_norm": 0.30323563722632235, "learning_rate": 9.577770378912584e-06, "loss": 0.2933, "step": 3946 }, { "epoch": 0.31575368492630146, "grad_norm": 0.32487367631960307, "learning_rate": 9.57750981832715e-06, "loss": 0.2614, "step": 3947 }, { "epoch": 0.3158336833263335, "grad_norm": 0.29811040897680635, "learning_rate": 9.577249180916188e-06, "loss": 0.2764, "step": 3948 }, { "epoch": 0.31591368172636547, "grad_norm": 0.3235825274760498, "learning_rate": 9.576988466684077e-06, "loss": 0.2815, "step": 3949 }, { "epoch": 0.31599368012639745, "grad_norm": 0.33389418759453765, "learning_rate": 9.576727675635186e-06, "loss": 0.2555, "step": 3950 }, { "epoch": 0.3160736785264295, "grad_norm": 0.3301049227912578, "learning_rate": 9.5764668077739e-06, "loss": 0.2592, "step": 3951 }, { "epoch": 0.31615367692646146, "grad_norm": 0.28729958209903517, "learning_rate": 9.576205863104588e-06, "loss": 0.2718, "step": 3952 }, { "epoch": 0.3162336753264935, "grad_norm": 0.30330438684038347, "learning_rate": 9.575944841631636e-06, "loss": 0.2804, "step": 3953 }, { "epoch": 0.31631367372652547, "grad_norm": 0.31763471828758494, "learning_rate": 9.575683743359425e-06, "loss": 0.2919, "step": 3954 }, { "epoch": 0.31639367212655745, "grad_norm": 0.3133490432540109, "learning_rate": 9.575422568292336e-06, "loss": 0.266, "step": 3955 }, { "epoch": 0.3164736705265895, "grad_norm": 0.28413874320701693, "learning_rate": 9.575161316434749e-06, "loss": 0.2896, "step": 3956 }, { "epoch": 0.31655366892662146, "grad_norm": 0.31282787605427237, "learning_rate": 9.574899987791054e-06, "loss": 0.3269, "step": 3957 }, { "epoch": 0.3166336673266535, "grad_norm": 0.33526664499800835, "learning_rate": 9.574638582365631e-06, "loss": 0.3121, "step": 3958 }, { "epoch": 0.31671366572668547, "grad_norm": 0.29833517183570424, "learning_rate": 9.574377100162874e-06, "loss": 0.3313, "step": 3959 }, { "epoch": 0.31679366412671744, "grad_norm": 0.2841155937846932, "learning_rate": 9.574115541187166e-06, "loss": 0.2975, "step": 3960 }, { "epoch": 0.3168736625267495, "grad_norm": 0.3006996187651303, "learning_rate": 9.573853905442899e-06, "loss": 0.2867, "step": 3961 }, { "epoch": 0.31695366092678146, "grad_norm": 0.364433452757692, "learning_rate": 9.573592192934465e-06, "loss": 0.2778, "step": 3962 }, { "epoch": 0.3170336593268135, "grad_norm": 0.2834108108811054, "learning_rate": 9.573330403666254e-06, "loss": 0.3072, "step": 3963 }, { "epoch": 0.31711365772684547, "grad_norm": 0.2874029200475847, "learning_rate": 9.573068537642663e-06, "loss": 0.3009, "step": 3964 }, { "epoch": 0.31719365612687744, "grad_norm": 0.2861945034143621, "learning_rate": 9.572806594868082e-06, "loss": 0.3409, "step": 3965 }, { "epoch": 0.3172736545269095, "grad_norm": 0.3693568298861793, "learning_rate": 9.572544575346912e-06, "loss": 0.2641, "step": 3966 }, { "epoch": 0.31735365292694145, "grad_norm": 0.318077472910601, "learning_rate": 9.572282479083548e-06, "loss": 0.3056, "step": 3967 }, { "epoch": 0.3174336513269735, "grad_norm": 0.3404735319657658, "learning_rate": 9.57202030608239e-06, "loss": 0.2746, "step": 3968 }, { "epoch": 0.31751364972700546, "grad_norm": 0.32492418596836403, "learning_rate": 9.571758056347839e-06, "loss": 0.2999, "step": 3969 }, { "epoch": 0.31759364812703744, "grad_norm": 0.30668738174884774, "learning_rate": 9.571495729884294e-06, "loss": 0.301, "step": 3970 }, { "epoch": 0.3176736465270695, "grad_norm": 0.28502096572478286, "learning_rate": 9.571233326696159e-06, "loss": 0.3051, "step": 3971 }, { "epoch": 0.31775364492710145, "grad_norm": 0.2275487732423886, "learning_rate": 9.570970846787838e-06, "loss": 0.3181, "step": 3972 }, { "epoch": 0.31783364332713343, "grad_norm": 0.25242228439122755, "learning_rate": 9.570708290163735e-06, "loss": 0.3196, "step": 3973 }, { "epoch": 0.31791364172716546, "grad_norm": 0.27244667542189976, "learning_rate": 9.570445656828257e-06, "loss": 0.3248, "step": 3974 }, { "epoch": 0.31799364012719744, "grad_norm": 0.2976689546729177, "learning_rate": 9.570182946785816e-06, "loss": 0.2956, "step": 3975 }, { "epoch": 0.3180736385272295, "grad_norm": 0.2982129500863135, "learning_rate": 9.569920160040815e-06, "loss": 0.2614, "step": 3976 }, { "epoch": 0.31815363692726145, "grad_norm": 0.32407168727586233, "learning_rate": 9.569657296597668e-06, "loss": 0.295, "step": 3977 }, { "epoch": 0.31823363532729343, "grad_norm": 0.2919827562703247, "learning_rate": 9.569394356460784e-06, "loss": 0.2863, "step": 3978 }, { "epoch": 0.31831363372732546, "grad_norm": 0.2865151336624842, "learning_rate": 9.569131339634578e-06, "loss": 0.3268, "step": 3979 }, { "epoch": 0.31839363212735744, "grad_norm": 0.31558925569229895, "learning_rate": 9.568868246123466e-06, "loss": 0.2664, "step": 3980 }, { "epoch": 0.3184736305273895, "grad_norm": 2.0248862754846995, "learning_rate": 9.56860507593186e-06, "loss": 0.2821, "step": 3981 }, { "epoch": 0.31855362892742145, "grad_norm": 0.8687422236056815, "learning_rate": 9.56834182906418e-06, "loss": 0.3073, "step": 3982 }, { "epoch": 0.31863362732745343, "grad_norm": 0.2842177648517353, "learning_rate": 9.56807850552484e-06, "loss": 0.282, "step": 3983 }, { "epoch": 0.31871362572748546, "grad_norm": 0.31636699140195035, "learning_rate": 9.567815105318263e-06, "loss": 0.2593, "step": 3984 }, { "epoch": 0.31879362412751744, "grad_norm": 0.2467758243710989, "learning_rate": 9.56755162844887e-06, "loss": 0.3194, "step": 3985 }, { "epoch": 0.31887362252754947, "grad_norm": 0.341488002663993, "learning_rate": 9.567288074921082e-06, "loss": 0.2929, "step": 3986 }, { "epoch": 0.31895362092758145, "grad_norm": 0.2876899845169144, "learning_rate": 9.567024444739319e-06, "loss": 0.2863, "step": 3987 }, { "epoch": 0.3190336193276134, "grad_norm": 0.2859067348665681, "learning_rate": 9.56676073790801e-06, "loss": 0.3072, "step": 3988 }, { "epoch": 0.31911361772764546, "grad_norm": 0.35174386983488926, "learning_rate": 9.566496954431581e-06, "loss": 0.3362, "step": 3989 }, { "epoch": 0.31919361612767744, "grad_norm": 0.26984492023546414, "learning_rate": 9.566233094314456e-06, "loss": 0.2718, "step": 3990 }, { "epoch": 0.31927361452770947, "grad_norm": 0.2704083748539496, "learning_rate": 9.565969157561066e-06, "loss": 0.2921, "step": 3991 }, { "epoch": 0.31935361292774145, "grad_norm": 0.3364570928005177, "learning_rate": 9.565705144175839e-06, "loss": 0.2656, "step": 3992 }, { "epoch": 0.3194336113277734, "grad_norm": 0.26851676493540766, "learning_rate": 9.565441054163205e-06, "loss": 0.2948, "step": 3993 }, { "epoch": 0.31951360972780546, "grad_norm": 0.1697263810751049, "learning_rate": 9.5651768875276e-06, "loss": 0.3719, "step": 3994 }, { "epoch": 0.31959360812783744, "grad_norm": 0.3212125770010283, "learning_rate": 9.564912644273456e-06, "loss": 0.2851, "step": 3995 }, { "epoch": 0.31967360652786947, "grad_norm": 0.3324070803908523, "learning_rate": 9.564648324405206e-06, "loss": 0.2575, "step": 3996 }, { "epoch": 0.31975360492790145, "grad_norm": 0.24694722371162814, "learning_rate": 9.564383927927289e-06, "loss": 0.3078, "step": 3997 }, { "epoch": 0.3198336033279334, "grad_norm": 0.29554648044051524, "learning_rate": 9.56411945484414e-06, "loss": 0.3057, "step": 3998 }, { "epoch": 0.31991360172796546, "grad_norm": 0.31988315139831774, "learning_rate": 9.5638549051602e-06, "loss": 0.2778, "step": 3999 }, { "epoch": 0.31999360012799744, "grad_norm": 0.32641897086496025, "learning_rate": 9.563590278879906e-06, "loss": 0.2608, "step": 4000 }, { "epoch": 0.3200735985280294, "grad_norm": 0.5075193181280644, "learning_rate": 9.563325576007702e-06, "loss": 0.2611, "step": 4001 }, { "epoch": 0.32015359692806145, "grad_norm": 0.2896470124600797, "learning_rate": 9.563060796548029e-06, "loss": 0.2715, "step": 4002 }, { "epoch": 0.3202335953280934, "grad_norm": 0.2712153722754573, "learning_rate": 9.562795940505332e-06, "loss": 0.3055, "step": 4003 }, { "epoch": 0.32031359372812546, "grad_norm": 0.4166443406309326, "learning_rate": 9.562531007884056e-06, "loss": 0.2645, "step": 4004 }, { "epoch": 0.32039359212815743, "grad_norm": 0.3062796891372946, "learning_rate": 9.562265998688648e-06, "loss": 0.2577, "step": 4005 }, { "epoch": 0.3204735905281894, "grad_norm": 0.46964732719571156, "learning_rate": 9.562000912923551e-06, "loss": 0.3008, "step": 4006 }, { "epoch": 0.32055358892822144, "grad_norm": 0.3100781206552371, "learning_rate": 9.561735750593221e-06, "loss": 0.3066, "step": 4007 }, { "epoch": 0.3206335873282534, "grad_norm": 0.296049214094488, "learning_rate": 9.561470511702105e-06, "loss": 0.2972, "step": 4008 }, { "epoch": 0.32071358572828546, "grad_norm": 0.301133650172872, "learning_rate": 9.561205196254652e-06, "loss": 0.2894, "step": 4009 }, { "epoch": 0.32079358412831743, "grad_norm": 0.25227236249208285, "learning_rate": 9.56093980425532e-06, "loss": 0.318, "step": 4010 }, { "epoch": 0.3208735825283494, "grad_norm": 0.24041520518074755, "learning_rate": 9.56067433570856e-06, "loss": 0.3223, "step": 4011 }, { "epoch": 0.32095358092838144, "grad_norm": 0.36256696568392754, "learning_rate": 9.560408790618828e-06, "loss": 0.2879, "step": 4012 }, { "epoch": 0.3210335793284134, "grad_norm": 0.3693646197492591, "learning_rate": 9.56014316899058e-06, "loss": 0.2813, "step": 4013 }, { "epoch": 0.32111357772844545, "grad_norm": 0.2791271227882677, "learning_rate": 9.559877470828277e-06, "loss": 0.2982, "step": 4014 }, { "epoch": 0.32119357612847743, "grad_norm": 0.29407645959942963, "learning_rate": 9.559611696136375e-06, "loss": 0.2719, "step": 4015 }, { "epoch": 0.3212735745285094, "grad_norm": 0.3475739604922169, "learning_rate": 9.559345844919334e-06, "loss": 0.254, "step": 4016 }, { "epoch": 0.32135357292854144, "grad_norm": 0.29903318309828236, "learning_rate": 9.55907991718162e-06, "loss": 0.3073, "step": 4017 }, { "epoch": 0.3214335713285734, "grad_norm": 0.32768885424418126, "learning_rate": 9.558813912927692e-06, "loss": 0.2688, "step": 4018 }, { "epoch": 0.32151356972860545, "grad_norm": 0.34219063500364366, "learning_rate": 9.558547832162017e-06, "loss": 0.2499, "step": 4019 }, { "epoch": 0.32159356812863743, "grad_norm": 0.28114818437122263, "learning_rate": 9.558281674889058e-06, "loss": 0.2848, "step": 4020 }, { "epoch": 0.3216735665286694, "grad_norm": 0.2765459146319245, "learning_rate": 9.558015441113285e-06, "loss": 0.3288, "step": 4021 }, { "epoch": 0.32175356492870144, "grad_norm": 0.3698007664091243, "learning_rate": 9.557749130839164e-06, "loss": 0.2984, "step": 4022 }, { "epoch": 0.3218335633287334, "grad_norm": 0.32412915405056036, "learning_rate": 9.557482744071166e-06, "loss": 0.2531, "step": 4023 }, { "epoch": 0.32191356172876545, "grad_norm": 0.29606563834974864, "learning_rate": 9.557216280813764e-06, "loss": 0.3128, "step": 4024 }, { "epoch": 0.32199356012879743, "grad_norm": 0.3054948339671617, "learning_rate": 9.556949741071423e-06, "loss": 0.2959, "step": 4025 }, { "epoch": 0.3220735585288294, "grad_norm": 0.2553380885639412, "learning_rate": 9.556683124848624e-06, "loss": 0.3212, "step": 4026 }, { "epoch": 0.32215355692886144, "grad_norm": 0.264916518683339, "learning_rate": 9.556416432149838e-06, "loss": 0.3202, "step": 4027 }, { "epoch": 0.3222335553288934, "grad_norm": 0.2557577568551388, "learning_rate": 9.556149662979541e-06, "loss": 0.3353, "step": 4028 }, { "epoch": 0.3223135537289254, "grad_norm": 0.3052677610730034, "learning_rate": 9.555882817342212e-06, "loss": 0.2578, "step": 4029 }, { "epoch": 0.32239355212895743, "grad_norm": 0.32713931659198986, "learning_rate": 9.555615895242327e-06, "loss": 0.2799, "step": 4030 }, { "epoch": 0.3224735505289894, "grad_norm": 0.2900546404195957, "learning_rate": 9.555348896684366e-06, "loss": 0.2914, "step": 4031 }, { "epoch": 0.32255354892902144, "grad_norm": 0.3197630355105231, "learning_rate": 9.555081821672814e-06, "loss": 0.2684, "step": 4032 }, { "epoch": 0.3226335473290534, "grad_norm": 0.33345881717726855, "learning_rate": 9.55481467021215e-06, "loss": 0.3117, "step": 4033 }, { "epoch": 0.3227135457290854, "grad_norm": 0.3384081464782839, "learning_rate": 9.554547442306858e-06, "loss": 0.2861, "step": 4034 }, { "epoch": 0.3227935441291174, "grad_norm": 0.25492748306734875, "learning_rate": 9.554280137961423e-06, "loss": 0.324, "step": 4035 }, { "epoch": 0.3228735425291494, "grad_norm": 0.29424950055754046, "learning_rate": 9.55401275718033e-06, "loss": 0.2669, "step": 4036 }, { "epoch": 0.32295354092918144, "grad_norm": 0.2971300509386836, "learning_rate": 9.553745299968071e-06, "loss": 0.3262, "step": 4037 }, { "epoch": 0.3230335393292134, "grad_norm": 0.2861000213198556, "learning_rate": 9.553477766329132e-06, "loss": 0.2954, "step": 4038 }, { "epoch": 0.3231135377292454, "grad_norm": 0.268185361380556, "learning_rate": 9.553210156268e-06, "loss": 0.2684, "step": 4039 }, { "epoch": 0.3231935361292774, "grad_norm": 0.31192653631494927, "learning_rate": 9.552942469789172e-06, "loss": 0.2562, "step": 4040 }, { "epoch": 0.3232735345293094, "grad_norm": 0.30551885330142187, "learning_rate": 9.552674706897136e-06, "loss": 0.3008, "step": 4041 }, { "epoch": 0.32335353292934144, "grad_norm": 0.23476156166918968, "learning_rate": 9.552406867596388e-06, "loss": 0.296, "step": 4042 }, { "epoch": 0.3234335313293734, "grad_norm": 0.2775178012807703, "learning_rate": 9.552138951891425e-06, "loss": 0.2799, "step": 4043 }, { "epoch": 0.3235135297294054, "grad_norm": 0.33839652542386145, "learning_rate": 9.55187095978674e-06, "loss": 0.2684, "step": 4044 }, { "epoch": 0.3235935281294374, "grad_norm": 0.30491407323811603, "learning_rate": 9.551602891286833e-06, "loss": 0.2693, "step": 4045 }, { "epoch": 0.3236735265294694, "grad_norm": 0.3055082154736578, "learning_rate": 9.551334746396203e-06, "loss": 0.2857, "step": 4046 }, { "epoch": 0.32375352492950144, "grad_norm": 0.2710121364574758, "learning_rate": 9.551066525119349e-06, "loss": 0.2888, "step": 4047 }, { "epoch": 0.3238335233295334, "grad_norm": 0.2757055268244437, "learning_rate": 9.550798227460774e-06, "loss": 0.2923, "step": 4048 }, { "epoch": 0.3239135217295654, "grad_norm": 0.2542502174010691, "learning_rate": 9.550529853424979e-06, "loss": 0.2837, "step": 4049 }, { "epoch": 0.3239935201295974, "grad_norm": 0.31078202408045963, "learning_rate": 9.55026140301647e-06, "loss": 0.2565, "step": 4050 }, { "epoch": 0.3240735185296294, "grad_norm": 0.30190144243575673, "learning_rate": 9.549992876239753e-06, "loss": 0.2638, "step": 4051 }, { "epoch": 0.32415351692966143, "grad_norm": 0.3538883893493791, "learning_rate": 9.549724273099333e-06, "loss": 0.2682, "step": 4052 }, { "epoch": 0.3242335153296934, "grad_norm": 1.2109482459833467, "learning_rate": 9.54945559359972e-06, "loss": 0.2558, "step": 4053 }, { "epoch": 0.3243135137297254, "grad_norm": 0.2912686806043731, "learning_rate": 9.54918683774542e-06, "loss": 0.2554, "step": 4054 }, { "epoch": 0.3243935121297574, "grad_norm": 0.3146192167625131, "learning_rate": 9.548918005540948e-06, "loss": 0.2487, "step": 4055 }, { "epoch": 0.3244735105297894, "grad_norm": 0.21844768163006562, "learning_rate": 9.548649096990811e-06, "loss": 0.3398, "step": 4056 }, { "epoch": 0.3245535089298214, "grad_norm": 0.6801558014844721, "learning_rate": 9.548380112099527e-06, "loss": 0.2867, "step": 4057 }, { "epoch": 0.3246335073298534, "grad_norm": 0.8054304579422645, "learning_rate": 9.548111050871607e-06, "loss": 0.2575, "step": 4058 }, { "epoch": 0.3247135057298854, "grad_norm": 0.33813185412173413, "learning_rate": 9.547841913311567e-06, "loss": 0.2887, "step": 4059 }, { "epoch": 0.3247935041299174, "grad_norm": 0.2836560635347328, "learning_rate": 9.547572699423927e-06, "loss": 0.29, "step": 4060 }, { "epoch": 0.3248735025299494, "grad_norm": 0.2884220151799123, "learning_rate": 9.547303409213202e-06, "loss": 0.3039, "step": 4061 }, { "epoch": 0.3249535009299814, "grad_norm": 0.2698166056217857, "learning_rate": 9.547034042683913e-06, "loss": 0.283, "step": 4062 }, { "epoch": 0.3250334993300134, "grad_norm": 0.2960669444474324, "learning_rate": 9.546764599840581e-06, "loss": 0.2904, "step": 4063 }, { "epoch": 0.3251134977300454, "grad_norm": 0.3221641557980718, "learning_rate": 9.546495080687727e-06, "loss": 0.2635, "step": 4064 }, { "epoch": 0.3251934961300774, "grad_norm": 0.3134634361148778, "learning_rate": 9.546225485229876e-06, "loss": 0.2549, "step": 4065 }, { "epoch": 0.3252734945301094, "grad_norm": 0.24875528951106704, "learning_rate": 9.545955813471552e-06, "loss": 0.34, "step": 4066 }, { "epoch": 0.3253534929301414, "grad_norm": 0.3319478340152881, "learning_rate": 9.545686065417279e-06, "loss": 0.2738, "step": 4067 }, { "epoch": 0.3254334913301734, "grad_norm": 0.2507483284470259, "learning_rate": 9.545416241071588e-06, "loss": 0.3166, "step": 4068 }, { "epoch": 0.3255134897302054, "grad_norm": 0.301063661839558, "learning_rate": 9.545146340439005e-06, "loss": 0.2766, "step": 4069 }, { "epoch": 0.3255934881302374, "grad_norm": 0.3200789615898504, "learning_rate": 9.54487636352406e-06, "loss": 0.3085, "step": 4070 }, { "epoch": 0.3256734865302694, "grad_norm": 0.3594942449972947, "learning_rate": 9.544606310331284e-06, "loss": 0.2719, "step": 4071 }, { "epoch": 0.3257534849303014, "grad_norm": 0.2881992736432892, "learning_rate": 9.544336180865212e-06, "loss": 0.279, "step": 4072 }, { "epoch": 0.3258334833303334, "grad_norm": 0.26913004486202013, "learning_rate": 9.544065975130375e-06, "loss": 0.3312, "step": 4073 }, { "epoch": 0.3259134817303654, "grad_norm": 0.3441375528930245, "learning_rate": 9.543795693131306e-06, "loss": 0.2635, "step": 4074 }, { "epoch": 0.3259934801303974, "grad_norm": 0.32673878317588556, "learning_rate": 9.543525334872546e-06, "loss": 0.2624, "step": 4075 }, { "epoch": 0.3260734785304294, "grad_norm": 0.24107340829890755, "learning_rate": 9.54325490035863e-06, "loss": 0.339, "step": 4076 }, { "epoch": 0.3261534769304614, "grad_norm": 0.2923915734715394, "learning_rate": 9.542984389594096e-06, "loss": 0.2954, "step": 4077 }, { "epoch": 0.3262334753304934, "grad_norm": 0.3001303169206607, "learning_rate": 9.542713802583485e-06, "loss": 0.2827, "step": 4078 }, { "epoch": 0.3263134737305254, "grad_norm": 0.27984853705181556, "learning_rate": 9.54244313933134e-06, "loss": 0.3207, "step": 4079 }, { "epoch": 0.3263934721305574, "grad_norm": 0.28644662960675643, "learning_rate": 9.5421723998422e-06, "loss": 0.29, "step": 4080 }, { "epoch": 0.3264734705305894, "grad_norm": 0.35188282002655014, "learning_rate": 9.541901584120612e-06, "loss": 0.2607, "step": 4081 }, { "epoch": 0.32655346893062137, "grad_norm": 0.31466364220696796, "learning_rate": 9.541630692171119e-06, "loss": 0.2543, "step": 4082 }, { "epoch": 0.3266334673306534, "grad_norm": 0.3156907364146312, "learning_rate": 9.541359723998268e-06, "loss": 0.306, "step": 4083 }, { "epoch": 0.3267134657306854, "grad_norm": 0.26175058440313936, "learning_rate": 9.541088679606609e-06, "loss": 0.3197, "step": 4084 }, { "epoch": 0.32679346413071736, "grad_norm": 0.3371694255451264, "learning_rate": 9.540817559000688e-06, "loss": 0.2635, "step": 4085 }, { "epoch": 0.3268734625307494, "grad_norm": 0.3332781085233276, "learning_rate": 9.540546362185056e-06, "loss": 0.2957, "step": 4086 }, { "epoch": 0.32695346093078137, "grad_norm": 0.2315063819203408, "learning_rate": 9.540275089164266e-06, "loss": 0.3227, "step": 4087 }, { "epoch": 0.3270334593308134, "grad_norm": 0.2521063676269978, "learning_rate": 9.54000373994287e-06, "loss": 0.3165, "step": 4088 }, { "epoch": 0.3271134577308454, "grad_norm": 0.42509382236989884, "learning_rate": 9.539732314525421e-06, "loss": 0.2803, "step": 4089 }, { "epoch": 0.32719345613087736, "grad_norm": 0.2743381982377584, "learning_rate": 9.539460812916477e-06, "loss": 0.279, "step": 4090 }, { "epoch": 0.3272734545309094, "grad_norm": 0.2764559811333477, "learning_rate": 9.539189235120591e-06, "loss": 0.2919, "step": 4091 }, { "epoch": 0.32735345293094137, "grad_norm": 0.31707134596405084, "learning_rate": 9.538917581142325e-06, "loss": 0.2854, "step": 4092 }, { "epoch": 0.3274334513309734, "grad_norm": 0.23850551013460794, "learning_rate": 9.538645850986235e-06, "loss": 0.3225, "step": 4093 }, { "epoch": 0.3275134497310054, "grad_norm": 0.31008110368471076, "learning_rate": 9.538374044656884e-06, "loss": 0.2935, "step": 4094 }, { "epoch": 0.32759344813103736, "grad_norm": 0.18460543601194526, "learning_rate": 9.538102162158832e-06, "loss": 0.3357, "step": 4095 }, { "epoch": 0.3276734465310694, "grad_norm": 0.2833355482826337, "learning_rate": 9.537830203496642e-06, "loss": 0.2967, "step": 4096 }, { "epoch": 0.32775344493110137, "grad_norm": 0.20479504316967445, "learning_rate": 9.53755816867488e-06, "loss": 0.3417, "step": 4097 }, { "epoch": 0.3278334433311334, "grad_norm": 0.2290054765598029, "learning_rate": 9.53728605769811e-06, "loss": 0.3108, "step": 4098 }, { "epoch": 0.3279134417311654, "grad_norm": 0.36893586727068906, "learning_rate": 9.5370138705709e-06, "loss": 0.3165, "step": 4099 }, { "epoch": 0.32799344013119736, "grad_norm": 0.3168477715819342, "learning_rate": 9.536741607297817e-06, "loss": 0.2582, "step": 4100 }, { "epoch": 0.3280734385312294, "grad_norm": 0.31669431405959253, "learning_rate": 9.536469267883432e-06, "loss": 0.275, "step": 4101 }, { "epoch": 0.32815343693126137, "grad_norm": 0.31764342164195486, "learning_rate": 9.536196852332315e-06, "loss": 0.2634, "step": 4102 }, { "epoch": 0.3282334353312934, "grad_norm": 0.6425278933222287, "learning_rate": 9.535924360649038e-06, "loss": 0.2527, "step": 4103 }, { "epoch": 0.3283134337313254, "grad_norm": 0.29581621755885257, "learning_rate": 9.535651792838173e-06, "loss": 0.2591, "step": 4104 }, { "epoch": 0.32839343213135735, "grad_norm": 0.30118869282274835, "learning_rate": 9.535379148904297e-06, "loss": 0.3032, "step": 4105 }, { "epoch": 0.3284734305313894, "grad_norm": 0.2830920529137497, "learning_rate": 9.535106428851984e-06, "loss": 0.2854, "step": 4106 }, { "epoch": 0.32855342893142137, "grad_norm": 0.2996699154339364, "learning_rate": 9.534833632685813e-06, "loss": 0.2743, "step": 4107 }, { "epoch": 0.3286334273314534, "grad_norm": 0.270431222823684, "learning_rate": 9.534560760410361e-06, "loss": 0.3319, "step": 4108 }, { "epoch": 0.3287134257314854, "grad_norm": 0.3224130952289341, "learning_rate": 9.534287812030207e-06, "loss": 0.2665, "step": 4109 }, { "epoch": 0.32879342413151735, "grad_norm": 0.335115943467358, "learning_rate": 9.534014787549932e-06, "loss": 0.2678, "step": 4110 }, { "epoch": 0.3288734225315494, "grad_norm": 0.2500551084012514, "learning_rate": 9.533741686974122e-06, "loss": 0.3124, "step": 4111 }, { "epoch": 0.32895342093158136, "grad_norm": 0.32179509747836377, "learning_rate": 9.533468510307356e-06, "loss": 0.2517, "step": 4112 }, { "epoch": 0.32903341933161334, "grad_norm": 0.25092734120252885, "learning_rate": 9.53319525755422e-06, "loss": 0.3217, "step": 4113 }, { "epoch": 0.3291134177316454, "grad_norm": 0.3137999338440026, "learning_rate": 9.532921928719301e-06, "loss": 0.2451, "step": 4114 }, { "epoch": 0.32919341613167735, "grad_norm": 0.30176603738657304, "learning_rate": 9.532648523807186e-06, "loss": 0.2831, "step": 4115 }, { "epoch": 0.3292734145317094, "grad_norm": 0.27007076204482405, "learning_rate": 9.532375042822464e-06, "loss": 0.2957, "step": 4116 }, { "epoch": 0.32935341293174136, "grad_norm": 0.33451915805712173, "learning_rate": 9.532101485769723e-06, "loss": 0.2637, "step": 4117 }, { "epoch": 0.32943341133177334, "grad_norm": 0.31077902981085714, "learning_rate": 9.531827852653556e-06, "loss": 0.296, "step": 4118 }, { "epoch": 0.3295134097318054, "grad_norm": 0.32491363700130044, "learning_rate": 9.531554143478556e-06, "loss": 0.3041, "step": 4119 }, { "epoch": 0.32959340813183735, "grad_norm": 0.28536976266890873, "learning_rate": 9.531280358249315e-06, "loss": 0.2966, "step": 4120 }, { "epoch": 0.3296734065318694, "grad_norm": 0.32700786541367605, "learning_rate": 9.53100649697043e-06, "loss": 0.2725, "step": 4121 }, { "epoch": 0.32975340493190136, "grad_norm": 0.25124382151309893, "learning_rate": 9.530732559646494e-06, "loss": 0.3409, "step": 4122 }, { "epoch": 0.32983340333193334, "grad_norm": 0.32287087468751224, "learning_rate": 9.530458546282108e-06, "loss": 0.2676, "step": 4123 }, { "epoch": 0.32991340173196537, "grad_norm": 0.2778814747653328, "learning_rate": 9.530184456881869e-06, "loss": 0.3063, "step": 4124 }, { "epoch": 0.32999340013199735, "grad_norm": 0.32167136941954166, "learning_rate": 9.529910291450377e-06, "loss": 0.3212, "step": 4125 }, { "epoch": 0.3300733985320294, "grad_norm": 0.2794985077192064, "learning_rate": 9.529636049992235e-06, "loss": 0.3329, "step": 4126 }, { "epoch": 0.33015339693206136, "grad_norm": 0.28120960024259806, "learning_rate": 9.529361732512044e-06, "loss": 0.2799, "step": 4127 }, { "epoch": 0.33023339533209334, "grad_norm": 0.363779403437616, "learning_rate": 9.52908733901441e-06, "loss": 0.2652, "step": 4128 }, { "epoch": 0.33031339373212537, "grad_norm": 0.3151173668877767, "learning_rate": 9.528812869503934e-06, "loss": 0.2662, "step": 4129 }, { "epoch": 0.33039339213215735, "grad_norm": 0.2827964309593129, "learning_rate": 9.528538323985228e-06, "loss": 0.2933, "step": 4130 }, { "epoch": 0.3304733905321894, "grad_norm": 0.2799196849385178, "learning_rate": 9.528263702462894e-06, "loss": 0.2994, "step": 4131 }, { "epoch": 0.33055338893222136, "grad_norm": 0.31865835119181024, "learning_rate": 9.527989004941547e-06, "loss": 0.2612, "step": 4132 }, { "epoch": 0.33063338733225334, "grad_norm": 0.3236072857927237, "learning_rate": 9.527714231425793e-06, "loss": 0.2374, "step": 4133 }, { "epoch": 0.33071338573228537, "grad_norm": 0.326599885644297, "learning_rate": 9.527439381920245e-06, "loss": 0.2634, "step": 4134 }, { "epoch": 0.33079338413231735, "grad_norm": 0.33752876679304256, "learning_rate": 9.527164456429517e-06, "loss": 0.2763, "step": 4135 }, { "epoch": 0.3308733825323494, "grad_norm": 0.3840046524221778, "learning_rate": 9.52688945495822e-06, "loss": 0.2552, "step": 4136 }, { "epoch": 0.33095338093238136, "grad_norm": 0.3205375258323118, "learning_rate": 9.526614377510971e-06, "loss": 0.2438, "step": 4137 }, { "epoch": 0.33103337933241334, "grad_norm": 0.3047620324022224, "learning_rate": 9.526339224092389e-06, "loss": 0.2609, "step": 4138 }, { "epoch": 0.33111337773244537, "grad_norm": 0.27598933560230243, "learning_rate": 9.526063994707091e-06, "loss": 0.2764, "step": 4139 }, { "epoch": 0.33119337613247735, "grad_norm": 0.28627632148415, "learning_rate": 9.525788689359694e-06, "loss": 0.2779, "step": 4140 }, { "epoch": 0.3312733745325093, "grad_norm": 0.34462279105239907, "learning_rate": 9.525513308054818e-06, "loss": 0.2699, "step": 4141 }, { "epoch": 0.33135337293254136, "grad_norm": 0.23289918828079942, "learning_rate": 9.525237850797088e-06, "loss": 0.3186, "step": 4142 }, { "epoch": 0.33143337133257333, "grad_norm": 0.22948182726099486, "learning_rate": 9.524962317591128e-06, "loss": 0.3098, "step": 4143 }, { "epoch": 0.33151336973260537, "grad_norm": 0.34862687906530354, "learning_rate": 9.524686708441558e-06, "loss": 0.2758, "step": 4144 }, { "epoch": 0.33159336813263735, "grad_norm": 0.3352335778407664, "learning_rate": 9.524411023353007e-06, "loss": 0.2572, "step": 4145 }, { "epoch": 0.3316733665326693, "grad_norm": 0.24622626533941536, "learning_rate": 9.524135262330098e-06, "loss": 0.299, "step": 4146 }, { "epoch": 0.33175336493270136, "grad_norm": 0.35289577127501226, "learning_rate": 9.523859425377464e-06, "loss": 0.2965, "step": 4147 }, { "epoch": 0.33183336333273333, "grad_norm": 0.3315888168707551, "learning_rate": 9.523583512499731e-06, "loss": 0.249, "step": 4148 }, { "epoch": 0.33191336173276537, "grad_norm": 0.26650730921260624, "learning_rate": 9.523307523701532e-06, "loss": 0.2717, "step": 4149 }, { "epoch": 0.33199336013279734, "grad_norm": 0.44258672156889756, "learning_rate": 9.523031458987498e-06, "loss": 0.2636, "step": 4150 }, { "epoch": 0.3320733585328293, "grad_norm": 0.27286703746738733, "learning_rate": 9.52275531836226e-06, "loss": 0.2829, "step": 4151 }, { "epoch": 0.33215335693286135, "grad_norm": 0.2936453029520197, "learning_rate": 9.522479101830457e-06, "loss": 0.271, "step": 4152 }, { "epoch": 0.33223335533289333, "grad_norm": 0.2987200485579244, "learning_rate": 9.522202809396721e-06, "loss": 0.2928, "step": 4153 }, { "epoch": 0.33231335373292537, "grad_norm": 0.26363995176236205, "learning_rate": 9.52192644106569e-06, "loss": 0.3121, "step": 4154 }, { "epoch": 0.33239335213295734, "grad_norm": 0.2737718752816055, "learning_rate": 9.521649996842006e-06, "loss": 0.2779, "step": 4155 }, { "epoch": 0.3324733505329893, "grad_norm": 0.3025786287236667, "learning_rate": 9.521373476730303e-06, "loss": 0.2857, "step": 4156 }, { "epoch": 0.33255334893302135, "grad_norm": 0.2498331208313307, "learning_rate": 9.521096880735226e-06, "loss": 0.3341, "step": 4157 }, { "epoch": 0.33263334733305333, "grad_norm": 0.3096483913169192, "learning_rate": 9.520820208861415e-06, "loss": 0.2429, "step": 4158 }, { "epoch": 0.33271334573308536, "grad_norm": 0.34253369294869657, "learning_rate": 9.520543461113514e-06, "loss": 0.2591, "step": 4159 }, { "epoch": 0.33279334413311734, "grad_norm": 0.30211663780516795, "learning_rate": 9.520266637496167e-06, "loss": 0.2826, "step": 4160 }, { "epoch": 0.3328733425331493, "grad_norm": 0.2851274268699115, "learning_rate": 9.519989738014022e-06, "loss": 0.2868, "step": 4161 }, { "epoch": 0.33295334093318135, "grad_norm": 0.26199754508458256, "learning_rate": 9.519712762671724e-06, "loss": 0.3134, "step": 4162 }, { "epoch": 0.33303333933321333, "grad_norm": 0.32014163321540284, "learning_rate": 9.519435711473926e-06, "loss": 0.2939, "step": 4163 }, { "epoch": 0.33311333773324536, "grad_norm": 0.2751728163060749, "learning_rate": 9.519158584425271e-06, "loss": 0.2912, "step": 4164 }, { "epoch": 0.33319333613327734, "grad_norm": 0.2766209626063788, "learning_rate": 9.518881381530415e-06, "loss": 0.2821, "step": 4165 }, { "epoch": 0.3332733345333093, "grad_norm": 0.33615046433294693, "learning_rate": 9.51860410279401e-06, "loss": 0.2642, "step": 4166 }, { "epoch": 0.33335333293334135, "grad_norm": 0.38223601416577, "learning_rate": 9.518326748220707e-06, "loss": 0.3143, "step": 4167 }, { "epoch": 0.33343333133337333, "grad_norm": 0.22769130761737674, "learning_rate": 9.518049317815163e-06, "loss": 0.3347, "step": 4168 }, { "epoch": 0.3335133297334053, "grad_norm": 0.30242582541347013, "learning_rate": 9.517771811582033e-06, "loss": 0.2326, "step": 4169 }, { "epoch": 0.33359332813343734, "grad_norm": 0.2828283456380088, "learning_rate": 9.517494229525976e-06, "loss": 0.2953, "step": 4170 }, { "epoch": 0.3336733265334693, "grad_norm": 0.29973868634483586, "learning_rate": 9.51721657165165e-06, "loss": 0.2398, "step": 4171 }, { "epoch": 0.33375332493350135, "grad_norm": 0.29402402286140816, "learning_rate": 9.516938837963714e-06, "loss": 0.2885, "step": 4172 }, { "epoch": 0.3338333233335333, "grad_norm": 0.30846077669433797, "learning_rate": 9.51666102846683e-06, "loss": 0.2959, "step": 4173 }, { "epoch": 0.3339133217335653, "grad_norm": 0.32748474666219946, "learning_rate": 9.516383143165662e-06, "loss": 0.2507, "step": 4174 }, { "epoch": 0.33399332013359734, "grad_norm": 0.32312020400649577, "learning_rate": 9.516105182064872e-06, "loss": 0.2617, "step": 4175 }, { "epoch": 0.3340733185336293, "grad_norm": 0.3067508970492394, "learning_rate": 9.515827145169128e-06, "loss": 0.2549, "step": 4176 }, { "epoch": 0.33415331693366135, "grad_norm": 0.3599965355577617, "learning_rate": 9.515549032483091e-06, "loss": 0.2371, "step": 4177 }, { "epoch": 0.3342333153336933, "grad_norm": 0.34105268263584393, "learning_rate": 9.515270844011433e-06, "loss": 0.2482, "step": 4178 }, { "epoch": 0.3343133137337253, "grad_norm": 0.22538237070756553, "learning_rate": 9.51499257975882e-06, "loss": 0.3588, "step": 4179 }, { "epoch": 0.33439331213375734, "grad_norm": 0.2735725628756655, "learning_rate": 9.514714239729925e-06, "loss": 0.2804, "step": 4180 }, { "epoch": 0.3344733105337893, "grad_norm": 0.31888926636045983, "learning_rate": 9.514435823929418e-06, "loss": 0.2822, "step": 4181 }, { "epoch": 0.33455330893382135, "grad_norm": 0.31874364811327177, "learning_rate": 9.514157332361971e-06, "loss": 0.2595, "step": 4182 }, { "epoch": 0.3346333073338533, "grad_norm": 0.29127867918140804, "learning_rate": 9.51387876503226e-06, "loss": 0.2949, "step": 4183 }, { "epoch": 0.3347133057338853, "grad_norm": 0.25089558064270673, "learning_rate": 9.51360012194496e-06, "loss": 0.3177, "step": 4184 }, { "epoch": 0.33479330413391734, "grad_norm": 0.49268450682797754, "learning_rate": 9.513321403104745e-06, "loss": 0.3152, "step": 4185 }, { "epoch": 0.3348733025339493, "grad_norm": 0.24502973088370322, "learning_rate": 9.513042608516296e-06, "loss": 0.3036, "step": 4186 }, { "epoch": 0.33495330093398135, "grad_norm": 0.7535335180494861, "learning_rate": 9.512763738184289e-06, "loss": 0.2516, "step": 4187 }, { "epoch": 0.3350332993340133, "grad_norm": 0.2661974220009502, "learning_rate": 9.512484792113408e-06, "loss": 0.2823, "step": 4188 }, { "epoch": 0.3351132977340453, "grad_norm": 0.36540229322224166, "learning_rate": 9.51220577030833e-06, "loss": 0.2721, "step": 4189 }, { "epoch": 0.33519329613407733, "grad_norm": 0.28545236376950817, "learning_rate": 9.511926672773742e-06, "loss": 0.2977, "step": 4190 }, { "epoch": 0.3352732945341093, "grad_norm": 0.278134527846739, "learning_rate": 9.511647499514327e-06, "loss": 0.3206, "step": 4191 }, { "epoch": 0.33535329293414134, "grad_norm": 0.27688185887636607, "learning_rate": 9.511368250534769e-06, "loss": 0.3029, "step": 4192 }, { "epoch": 0.3354332913341733, "grad_norm": 0.2769087920514703, "learning_rate": 9.511088925839758e-06, "loss": 0.2971, "step": 4193 }, { "epoch": 0.3355132897342053, "grad_norm": 0.35035219106707854, "learning_rate": 9.510809525433977e-06, "loss": 0.2861, "step": 4194 }, { "epoch": 0.33559328813423733, "grad_norm": 0.2969801694667227, "learning_rate": 9.51053004932212e-06, "loss": 0.255, "step": 4195 }, { "epoch": 0.3356732865342693, "grad_norm": 0.2948339751533439, "learning_rate": 9.510250497508873e-06, "loss": 0.2501, "step": 4196 }, { "epoch": 0.3357532849343013, "grad_norm": 0.29281113827003824, "learning_rate": 9.509970869998933e-06, "loss": 0.2566, "step": 4197 }, { "epoch": 0.3358332833343333, "grad_norm": 0.2465854749404492, "learning_rate": 9.50969116679699e-06, "loss": 0.3562, "step": 4198 }, { "epoch": 0.3359132817343653, "grad_norm": 0.26811917766098803, "learning_rate": 9.509411387907738e-06, "loss": 0.3219, "step": 4199 }, { "epoch": 0.33599328013439733, "grad_norm": 0.2905191712206117, "learning_rate": 9.509131533335874e-06, "loss": 0.271, "step": 4200 }, { "epoch": 0.3360732785344293, "grad_norm": 0.2787838427873101, "learning_rate": 9.508851603086094e-06, "loss": 0.304, "step": 4201 }, { "epoch": 0.3361532769344613, "grad_norm": 0.42892097190236117, "learning_rate": 9.508571597163095e-06, "loss": 0.2926, "step": 4202 }, { "epoch": 0.3362332753344933, "grad_norm": 0.30485760616483865, "learning_rate": 9.50829151557158e-06, "loss": 0.2813, "step": 4203 }, { "epoch": 0.3363132737345253, "grad_norm": 0.3181552363455147, "learning_rate": 9.508011358316244e-06, "loss": 0.2841, "step": 4204 }, { "epoch": 0.33639327213455733, "grad_norm": 0.27926069763881844, "learning_rate": 9.507731125401795e-06, "loss": 0.2732, "step": 4205 }, { "epoch": 0.3364732705345893, "grad_norm": 0.26992277033561796, "learning_rate": 9.507450816832934e-06, "loss": 0.2834, "step": 4206 }, { "epoch": 0.3365532689346213, "grad_norm": 0.3651261141929914, "learning_rate": 9.507170432614364e-06, "loss": 0.2931, "step": 4207 }, { "epoch": 0.3366332673346533, "grad_norm": 0.3161952853209081, "learning_rate": 9.506889972750792e-06, "loss": 0.2935, "step": 4208 }, { "epoch": 0.3367132657346853, "grad_norm": 0.25868257943383854, "learning_rate": 9.506609437246924e-06, "loss": 0.263, "step": 4209 }, { "epoch": 0.33679326413471733, "grad_norm": 0.29382827351934276, "learning_rate": 9.506328826107472e-06, "loss": 0.3044, "step": 4210 }, { "epoch": 0.3368732625347493, "grad_norm": 0.29236024368946345, "learning_rate": 9.506048139337142e-06, "loss": 0.2643, "step": 4211 }, { "epoch": 0.3369532609347813, "grad_norm": 0.2730083195624845, "learning_rate": 9.505767376940642e-06, "loss": 0.2759, "step": 4212 }, { "epoch": 0.3370332593348133, "grad_norm": 0.27804821736529406, "learning_rate": 9.50548653892269e-06, "loss": 0.3055, "step": 4213 }, { "epoch": 0.3371132577348453, "grad_norm": 0.5524963149748693, "learning_rate": 9.505205625288e-06, "loss": 0.2849, "step": 4214 }, { "epoch": 0.33719325613487733, "grad_norm": 0.32688167113078737, "learning_rate": 9.504924636041279e-06, "loss": 0.261, "step": 4215 }, { "epoch": 0.3372732545349093, "grad_norm": 0.3011645915518335, "learning_rate": 9.50464357118725e-06, "loss": 0.2342, "step": 4216 }, { "epoch": 0.3373532529349413, "grad_norm": 0.32563717515424556, "learning_rate": 9.504362430730627e-06, "loss": 0.2729, "step": 4217 }, { "epoch": 0.3374332513349733, "grad_norm": 0.3172746333595425, "learning_rate": 9.50408121467613e-06, "loss": 0.2612, "step": 4218 }, { "epoch": 0.3375132497350053, "grad_norm": 0.2458830190890694, "learning_rate": 9.503799923028478e-06, "loss": 0.3062, "step": 4219 }, { "epoch": 0.33759324813503727, "grad_norm": 0.26610409494043075, "learning_rate": 9.503518555792392e-06, "loss": 0.2856, "step": 4220 }, { "epoch": 0.3376732465350693, "grad_norm": 0.2649896720313164, "learning_rate": 9.503237112972594e-06, "loss": 0.3057, "step": 4221 }, { "epoch": 0.3377532449351013, "grad_norm": 0.30134189687860263, "learning_rate": 9.502955594573807e-06, "loss": 0.2603, "step": 4222 }, { "epoch": 0.3378332433351333, "grad_norm": 0.26326425511418927, "learning_rate": 9.502674000600758e-06, "loss": 0.29, "step": 4223 }, { "epoch": 0.3379132417351653, "grad_norm": 0.35501648588678963, "learning_rate": 9.50239233105817e-06, "loss": 0.2467, "step": 4224 }, { "epoch": 0.33799324013519727, "grad_norm": 0.2760707274939826, "learning_rate": 9.502110585950773e-06, "loss": 0.2915, "step": 4225 }, { "epoch": 0.3380732385352293, "grad_norm": 0.29424043954422985, "learning_rate": 9.501828765283295e-06, "loss": 0.285, "step": 4226 }, { "epoch": 0.3381532369352613, "grad_norm": 0.278064592520865, "learning_rate": 9.501546869060466e-06, "loss": 0.3084, "step": 4227 }, { "epoch": 0.3382332353352933, "grad_norm": 0.2843033673256028, "learning_rate": 9.501264897287015e-06, "loss": 0.3127, "step": 4228 }, { "epoch": 0.3383132337353253, "grad_norm": 0.3052274360189814, "learning_rate": 9.500982849967674e-06, "loss": 0.3, "step": 4229 }, { "epoch": 0.33839323213535727, "grad_norm": 0.3107926570627238, "learning_rate": 9.500700727107182e-06, "loss": 0.2561, "step": 4230 }, { "epoch": 0.3384732305353893, "grad_norm": 0.3512836376530983, "learning_rate": 9.50041852871027e-06, "loss": 0.2458, "step": 4231 }, { "epoch": 0.3385532289354213, "grad_norm": 0.2686511713811813, "learning_rate": 9.500136254781674e-06, "loss": 0.2898, "step": 4232 }, { "epoch": 0.3386332273354533, "grad_norm": 0.2832750130606172, "learning_rate": 9.499853905326133e-06, "loss": 0.3042, "step": 4233 }, { "epoch": 0.3387132257354853, "grad_norm": 0.36969808215827404, "learning_rate": 9.499571480348385e-06, "loss": 0.2454, "step": 4234 }, { "epoch": 0.33879322413551727, "grad_norm": 0.33417453707713296, "learning_rate": 9.49928897985317e-06, "loss": 0.2564, "step": 4235 }, { "epoch": 0.3388732225355493, "grad_norm": 0.300208253199366, "learning_rate": 9.49900640384523e-06, "loss": 0.2672, "step": 4236 }, { "epoch": 0.3389532209355813, "grad_norm": 0.3058876665008324, "learning_rate": 9.498723752329304e-06, "loss": 0.259, "step": 4237 }, { "epoch": 0.3390332193356133, "grad_norm": 0.29026773853039556, "learning_rate": 9.498441025310142e-06, "loss": 0.2791, "step": 4238 }, { "epoch": 0.3391132177356453, "grad_norm": 0.3000904461508221, "learning_rate": 9.498158222792484e-06, "loss": 0.2962, "step": 4239 }, { "epoch": 0.33919321613567727, "grad_norm": 0.34956776599279343, "learning_rate": 9.497875344781078e-06, "loss": 0.2625, "step": 4240 }, { "epoch": 0.3392732145357093, "grad_norm": 0.3048818566172097, "learning_rate": 9.497592391280672e-06, "loss": 0.2476, "step": 4241 }, { "epoch": 0.3393532129357413, "grad_norm": 5.469832629541838, "learning_rate": 9.497309362296015e-06, "loss": 0.29, "step": 4242 }, { "epoch": 0.3394332113357733, "grad_norm": 0.43755541579944734, "learning_rate": 9.497026257831856e-06, "loss": 0.305, "step": 4243 }, { "epoch": 0.3395132097358053, "grad_norm": 0.25907917934101155, "learning_rate": 9.496743077892948e-06, "loss": 0.2812, "step": 4244 }, { "epoch": 0.33959320813583727, "grad_norm": 0.31474947307406675, "learning_rate": 9.496459822484043e-06, "loss": 0.2529, "step": 4245 }, { "epoch": 0.3396732065358693, "grad_norm": 0.18692514924378942, "learning_rate": 9.496176491609893e-06, "loss": 0.3496, "step": 4246 }, { "epoch": 0.3397532049359013, "grad_norm": 0.29102379236580905, "learning_rate": 9.495893085275256e-06, "loss": 0.2947, "step": 4247 }, { "epoch": 0.33983320333593325, "grad_norm": 0.3039095028742169, "learning_rate": 9.495609603484887e-06, "loss": 0.2901, "step": 4248 }, { "epoch": 0.3399132017359653, "grad_norm": 0.31852993243479677, "learning_rate": 9.495326046243547e-06, "loss": 0.2634, "step": 4249 }, { "epoch": 0.33999320013599726, "grad_norm": 0.3305617694724181, "learning_rate": 9.495042413555988e-06, "loss": 0.2705, "step": 4250 }, { "epoch": 0.3400731985360293, "grad_norm": 0.26614369319100134, "learning_rate": 9.494758705426978e-06, "loss": 0.3235, "step": 4251 }, { "epoch": 0.3401531969360613, "grad_norm": 0.35185872689168907, "learning_rate": 9.494474921861274e-06, "loss": 0.2433, "step": 4252 }, { "epoch": 0.34023319533609325, "grad_norm": 0.29908461661269914, "learning_rate": 9.494191062863638e-06, "loss": 0.3151, "step": 4253 }, { "epoch": 0.3403131937361253, "grad_norm": 0.33742140893680217, "learning_rate": 9.493907128438838e-06, "loss": 0.2801, "step": 4254 }, { "epoch": 0.34039319213615726, "grad_norm": 0.2950131610756389, "learning_rate": 9.493623118591638e-06, "loss": 0.2831, "step": 4255 }, { "epoch": 0.3404731905361893, "grad_norm": 0.33164400544718636, "learning_rate": 9.493339033326802e-06, "loss": 0.2784, "step": 4256 }, { "epoch": 0.3405531889362213, "grad_norm": 0.29714970690394904, "learning_rate": 9.4930548726491e-06, "loss": 0.2593, "step": 4257 }, { "epoch": 0.34063318733625325, "grad_norm": 0.3158193348514893, "learning_rate": 9.492770636563303e-06, "loss": 0.2496, "step": 4258 }, { "epoch": 0.3407131857362853, "grad_norm": 0.333498635453097, "learning_rate": 9.492486325074177e-06, "loss": 0.2493, "step": 4259 }, { "epoch": 0.34079318413631726, "grad_norm": 0.28780795148769905, "learning_rate": 9.492201938186496e-06, "loss": 0.2911, "step": 4260 }, { "epoch": 0.3408731825363493, "grad_norm": 0.3405918675704944, "learning_rate": 9.491917475905034e-06, "loss": 0.262, "step": 4261 }, { "epoch": 0.3409531809363813, "grad_norm": 0.24380301349587288, "learning_rate": 9.491632938234563e-06, "loss": 0.3427, "step": 4262 }, { "epoch": 0.34103317933641325, "grad_norm": 0.35064649592418756, "learning_rate": 9.491348325179861e-06, "loss": 0.2698, "step": 4263 }, { "epoch": 0.3411131777364453, "grad_norm": 0.2855980789980333, "learning_rate": 9.491063636745704e-06, "loss": 0.2836, "step": 4264 }, { "epoch": 0.34119317613647726, "grad_norm": 0.3542564638270448, "learning_rate": 9.490778872936867e-06, "loss": 0.2281, "step": 4265 }, { "epoch": 0.3412731745365093, "grad_norm": 0.287689247054273, "learning_rate": 9.490494033758132e-06, "loss": 0.2922, "step": 4266 }, { "epoch": 0.34135317293654127, "grad_norm": 0.3530141448578932, "learning_rate": 9.490209119214282e-06, "loss": 0.2512, "step": 4267 }, { "epoch": 0.34143317133657325, "grad_norm": 0.3087465322483459, "learning_rate": 9.489924129310094e-06, "loss": 0.2511, "step": 4268 }, { "epoch": 0.3415131697366053, "grad_norm": 0.25855183961396433, "learning_rate": 9.489639064050353e-06, "loss": 0.3258, "step": 4269 }, { "epoch": 0.34159316813663726, "grad_norm": 0.2919818702032761, "learning_rate": 9.489353923439843e-06, "loss": 0.2988, "step": 4270 }, { "epoch": 0.3416731665366693, "grad_norm": 0.290290736446847, "learning_rate": 9.48906870748335e-06, "loss": 0.3047, "step": 4271 }, { "epoch": 0.34175316493670127, "grad_norm": 0.4815677362309613, "learning_rate": 9.48878341618566e-06, "loss": 0.2404, "step": 4272 }, { "epoch": 0.34183316333673325, "grad_norm": 0.2856137811314598, "learning_rate": 9.488498049551563e-06, "loss": 0.2878, "step": 4273 }, { "epoch": 0.3419131617367653, "grad_norm": 0.30138821353249046, "learning_rate": 9.488212607585847e-06, "loss": 0.2926, "step": 4274 }, { "epoch": 0.34199316013679726, "grad_norm": 0.31495904239448713, "learning_rate": 9.487927090293302e-06, "loss": 0.2538, "step": 4275 }, { "epoch": 0.34207315853682924, "grad_norm": 0.34267501041614296, "learning_rate": 9.487641497678724e-06, "loss": 0.2479, "step": 4276 }, { "epoch": 0.34215315693686127, "grad_norm": 0.3490547029686455, "learning_rate": 9.4873558297469e-06, "loss": 0.2785, "step": 4277 }, { "epoch": 0.34223315533689325, "grad_norm": 0.3197164288875752, "learning_rate": 9.487070086502627e-06, "loss": 0.2519, "step": 4278 }, { "epoch": 0.3423131537369253, "grad_norm": 0.2946325375889917, "learning_rate": 9.486784267950704e-06, "loss": 0.2941, "step": 4279 }, { "epoch": 0.34239315213695726, "grad_norm": 0.32373204956468515, "learning_rate": 9.486498374095922e-06, "loss": 0.2749, "step": 4280 }, { "epoch": 0.34247315053698923, "grad_norm": 0.33761168843910017, "learning_rate": 9.486212404943084e-06, "loss": 0.241, "step": 4281 }, { "epoch": 0.34255314893702127, "grad_norm": 0.33387600386572547, "learning_rate": 9.485926360496988e-06, "loss": 0.273, "step": 4282 }, { "epoch": 0.34263314733705325, "grad_norm": 0.1984838660640497, "learning_rate": 9.485640240762434e-06, "loss": 0.3429, "step": 4283 }, { "epoch": 0.3427131457370853, "grad_norm": 0.2987928069194664, "learning_rate": 9.485354045744226e-06, "loss": 0.2963, "step": 4284 }, { "epoch": 0.34279314413711726, "grad_norm": 0.28167266109497824, "learning_rate": 9.485067775447164e-06, "loss": 0.2793, "step": 4285 }, { "epoch": 0.34287314253714923, "grad_norm": 0.3031111122991727, "learning_rate": 9.484781429876055e-06, "loss": 0.237, "step": 4286 }, { "epoch": 0.34295314093718127, "grad_norm": 0.25940845328958534, "learning_rate": 9.484495009035705e-06, "loss": 0.2803, "step": 4287 }, { "epoch": 0.34303313933721324, "grad_norm": 0.27757859822975783, "learning_rate": 9.48420851293092e-06, "loss": 0.3137, "step": 4288 }, { "epoch": 0.3431131377372453, "grad_norm": 0.30693997414111424, "learning_rate": 9.483921941566508e-06, "loss": 0.2383, "step": 4289 }, { "epoch": 0.34319313613727725, "grad_norm": 0.29798973746055113, "learning_rate": 9.48363529494728e-06, "loss": 0.34, "step": 4290 }, { "epoch": 0.34327313453730923, "grad_norm": 0.2754974623863915, "learning_rate": 9.483348573078046e-06, "loss": 0.3082, "step": 4291 }, { "epoch": 0.34335313293734127, "grad_norm": 0.3553606773548199, "learning_rate": 9.483061775963618e-06, "loss": 0.2677, "step": 4292 }, { "epoch": 0.34343313133737324, "grad_norm": 0.39162978462433223, "learning_rate": 9.48277490360881e-06, "loss": 0.2695, "step": 4293 }, { "epoch": 0.3435131297374053, "grad_norm": 0.22946185310349518, "learning_rate": 9.482487956018436e-06, "loss": 0.3035, "step": 4294 }, { "epoch": 0.34359312813743725, "grad_norm": 0.25586267455064826, "learning_rate": 9.482200933197312e-06, "loss": 0.3323, "step": 4295 }, { "epoch": 0.34367312653746923, "grad_norm": 0.33422549808169644, "learning_rate": 9.481913835150256e-06, "loss": 0.2369, "step": 4296 }, { "epoch": 0.34375312493750126, "grad_norm": 0.28977691416052875, "learning_rate": 9.481626661882084e-06, "loss": 0.269, "step": 4297 }, { "epoch": 0.34383312333753324, "grad_norm": 0.2861293041064024, "learning_rate": 9.48133941339762e-06, "loss": 0.2703, "step": 4298 }, { "epoch": 0.3439131217375653, "grad_norm": 0.314922485077229, "learning_rate": 9.48105208970168e-06, "loss": 0.2665, "step": 4299 }, { "epoch": 0.34399312013759725, "grad_norm": 0.30315282636052626, "learning_rate": 9.480764690799091e-06, "loss": 0.2652, "step": 4300 }, { "epoch": 0.34407311853762923, "grad_norm": 0.2602876280060404, "learning_rate": 9.480477216694674e-06, "loss": 0.2677, "step": 4301 }, { "epoch": 0.34415311693766126, "grad_norm": 0.2948333413950905, "learning_rate": 9.480189667393254e-06, "loss": 0.2896, "step": 4302 }, { "epoch": 0.34423311533769324, "grad_norm": 0.2963929806677376, "learning_rate": 9.479902042899655e-06, "loss": 0.2517, "step": 4303 }, { "epoch": 0.3443131137377252, "grad_norm": 0.3009082100603607, "learning_rate": 9.479614343218709e-06, "loss": 0.2773, "step": 4304 }, { "epoch": 0.34439311213775725, "grad_norm": 0.33546121869581863, "learning_rate": 9.47932656835524e-06, "loss": 0.2717, "step": 4305 }, { "epoch": 0.34447311053778923, "grad_norm": 0.24583208448507946, "learning_rate": 9.47903871831408e-06, "loss": 0.306, "step": 4306 }, { "epoch": 0.34455310893782126, "grad_norm": 0.30823771357260016, "learning_rate": 9.47875079310006e-06, "loss": 0.2604, "step": 4307 }, { "epoch": 0.34463310733785324, "grad_norm": 0.9034362380820966, "learning_rate": 9.47846279271801e-06, "loss": 0.2681, "step": 4308 }, { "epoch": 0.3447131057378852, "grad_norm": 0.34366504153780064, "learning_rate": 9.478174717172768e-06, "loss": 0.2415, "step": 4309 }, { "epoch": 0.34479310413791725, "grad_norm": 0.3184543911648916, "learning_rate": 9.477886566469165e-06, "loss": 0.2698, "step": 4310 }, { "epoch": 0.3448731025379492, "grad_norm": 0.29891552512302727, "learning_rate": 9.47759834061204e-06, "loss": 0.2465, "step": 4311 }, { "epoch": 0.34495310093798126, "grad_norm": 0.2736964547255473, "learning_rate": 9.477310039606228e-06, "loss": 0.2657, "step": 4312 }, { "epoch": 0.34503309933801324, "grad_norm": 0.26370295659622983, "learning_rate": 9.477021663456569e-06, "loss": 0.2864, "step": 4313 }, { "epoch": 0.3451130977380452, "grad_norm": 0.25652185113185444, "learning_rate": 9.476733212167901e-06, "loss": 0.3393, "step": 4314 }, { "epoch": 0.34519309613807725, "grad_norm": 0.30406997333538116, "learning_rate": 9.476444685745067e-06, "loss": 0.2891, "step": 4315 }, { "epoch": 0.3452730945381092, "grad_norm": 0.30455027541346624, "learning_rate": 9.47615608419291e-06, "loss": 0.2524, "step": 4316 }, { "epoch": 0.34535309293814126, "grad_norm": 0.3161252203212158, "learning_rate": 9.475867407516272e-06, "loss": 0.2639, "step": 4317 }, { "epoch": 0.34543309133817324, "grad_norm": 0.2506104985563852, "learning_rate": 9.47557865572e-06, "loss": 0.3075, "step": 4318 }, { "epoch": 0.3455130897382052, "grad_norm": 0.2999786871994182, "learning_rate": 9.475289828808937e-06, "loss": 0.263, "step": 4319 }, { "epoch": 0.34559308813823725, "grad_norm": 0.43822098377154356, "learning_rate": 9.475000926787931e-06, "loss": 0.2666, "step": 4320 }, { "epoch": 0.3456730865382692, "grad_norm": 0.33065879934591713, "learning_rate": 9.474711949661835e-06, "loss": 0.2693, "step": 4321 }, { "epoch": 0.34575308493830126, "grad_norm": 0.5013963560284516, "learning_rate": 9.474422897435496e-06, "loss": 0.277, "step": 4322 }, { "epoch": 0.34583308333833324, "grad_norm": 0.27212794145613994, "learning_rate": 9.474133770113763e-06, "loss": 0.2946, "step": 4323 }, { "epoch": 0.3459130817383652, "grad_norm": 0.2454876242765903, "learning_rate": 9.473844567701491e-06, "loss": 0.2979, "step": 4324 }, { "epoch": 0.34599308013839725, "grad_norm": 0.3152062613802087, "learning_rate": 9.473555290203534e-06, "loss": 0.2878, "step": 4325 }, { "epoch": 0.3460730785384292, "grad_norm": 0.27202476234076606, "learning_rate": 9.473265937624748e-06, "loss": 0.2795, "step": 4326 }, { "epoch": 0.34615307693846126, "grad_norm": 0.2326493819412296, "learning_rate": 9.472976509969984e-06, "loss": 0.3164, "step": 4327 }, { "epoch": 0.34623307533849323, "grad_norm": 0.3625500642682127, "learning_rate": 9.472687007244106e-06, "loss": 0.3042, "step": 4328 }, { "epoch": 0.3463130737385252, "grad_norm": 0.31876132348401975, "learning_rate": 9.47239742945197e-06, "loss": 0.2399, "step": 4329 }, { "epoch": 0.34639307213855725, "grad_norm": 0.3122754610054898, "learning_rate": 9.472107776598435e-06, "loss": 0.2693, "step": 4330 }, { "epoch": 0.3464730705385892, "grad_norm": 0.3308103457778237, "learning_rate": 9.471818048688364e-06, "loss": 0.2542, "step": 4331 }, { "epoch": 0.3465530689386212, "grad_norm": 0.33380310292061505, "learning_rate": 9.471528245726618e-06, "loss": 0.2559, "step": 4332 }, { "epoch": 0.34663306733865323, "grad_norm": 0.2768525847622787, "learning_rate": 9.471238367718064e-06, "loss": 0.2954, "step": 4333 }, { "epoch": 0.3467130657386852, "grad_norm": 0.31696348642123356, "learning_rate": 9.470948414667562e-06, "loss": 0.2425, "step": 4334 }, { "epoch": 0.34679306413871724, "grad_norm": 0.28561634923240226, "learning_rate": 9.470658386579983e-06, "loss": 0.2933, "step": 4335 }, { "epoch": 0.3468730625387492, "grad_norm": 0.3302115402124963, "learning_rate": 9.470368283460193e-06, "loss": 0.2625, "step": 4336 }, { "epoch": 0.3469530609387812, "grad_norm": 0.27056622924408813, "learning_rate": 9.470078105313062e-06, "loss": 0.3167, "step": 4337 }, { "epoch": 0.34703305933881323, "grad_norm": 0.2507326906138062, "learning_rate": 9.469787852143458e-06, "loss": 0.3226, "step": 4338 }, { "epoch": 0.3471130577388452, "grad_norm": 0.2985939255239289, "learning_rate": 9.469497523956253e-06, "loss": 0.2875, "step": 4339 }, { "epoch": 0.34719305613887724, "grad_norm": 0.2762241211467178, "learning_rate": 9.46920712075632e-06, "loss": 0.3181, "step": 4340 }, { "epoch": 0.3472730545389092, "grad_norm": 0.28967330201731833, "learning_rate": 9.468916642548534e-06, "loss": 0.239, "step": 4341 }, { "epoch": 0.3473530529389412, "grad_norm": 0.38097360145867176, "learning_rate": 9.468626089337767e-06, "loss": 0.2573, "step": 4342 }, { "epoch": 0.34743305133897323, "grad_norm": 0.2910523714060384, "learning_rate": 9.468335461128898e-06, "loss": 0.269, "step": 4343 }, { "epoch": 0.3475130497390052, "grad_norm": 0.3187817349021624, "learning_rate": 9.468044757926804e-06, "loss": 0.2587, "step": 4344 }, { "epoch": 0.34759304813903724, "grad_norm": 0.267475803607043, "learning_rate": 9.467753979736365e-06, "loss": 0.3244, "step": 4345 }, { "epoch": 0.3476730465390692, "grad_norm": 0.32592848055603196, "learning_rate": 9.467463126562461e-06, "loss": 0.2634, "step": 4346 }, { "epoch": 0.3477530449391012, "grad_norm": 0.3019395619439381, "learning_rate": 9.467172198409971e-06, "loss": 0.2719, "step": 4347 }, { "epoch": 0.34783304333913323, "grad_norm": 0.26687158148998863, "learning_rate": 9.466881195283782e-06, "loss": 0.2845, "step": 4348 }, { "epoch": 0.3479130417391652, "grad_norm": 0.2864044030207187, "learning_rate": 9.466590117188773e-06, "loss": 0.2918, "step": 4349 }, { "epoch": 0.34799304013919724, "grad_norm": 0.20132515209834007, "learning_rate": 9.466298964129832e-06, "loss": 0.3669, "step": 4350 }, { "epoch": 0.3480730385392292, "grad_norm": 0.29399505437396484, "learning_rate": 9.466007736111846e-06, "loss": 0.2914, "step": 4351 }, { "epoch": 0.3481530369392612, "grad_norm": 0.3197337323327319, "learning_rate": 9.465716433139702e-06, "loss": 0.2461, "step": 4352 }, { "epoch": 0.34823303533929323, "grad_norm": 0.30604925978737085, "learning_rate": 9.465425055218289e-06, "loss": 0.2511, "step": 4353 }, { "epoch": 0.3483130337393252, "grad_norm": 0.31402366617290123, "learning_rate": 9.465133602352497e-06, "loss": 0.3246, "step": 4354 }, { "epoch": 0.34839303213935724, "grad_norm": 0.27231286699225876, "learning_rate": 9.464842074547218e-06, "loss": 0.2834, "step": 4355 }, { "epoch": 0.3484730305393892, "grad_norm": 0.3565449921353315, "learning_rate": 9.464550471807346e-06, "loss": 0.2548, "step": 4356 }, { "epoch": 0.3485530289394212, "grad_norm": 0.28094402877701946, "learning_rate": 9.464258794137771e-06, "loss": 0.2872, "step": 4357 }, { "epoch": 0.3486330273394532, "grad_norm": 0.2905392282394296, "learning_rate": 9.46396704154339e-06, "loss": 0.2863, "step": 4358 }, { "epoch": 0.3487130257394852, "grad_norm": 0.3929194060155688, "learning_rate": 9.463675214029104e-06, "loss": 0.2753, "step": 4359 }, { "epoch": 0.3487930241395172, "grad_norm": 0.31526305450255054, "learning_rate": 9.463383311599806e-06, "loss": 0.27, "step": 4360 }, { "epoch": 0.3488730225395492, "grad_norm": 0.2699418931353672, "learning_rate": 9.463091334260397e-06, "loss": 0.3142, "step": 4361 }, { "epoch": 0.3489530209395812, "grad_norm": 0.3339972773028592, "learning_rate": 9.462799282015775e-06, "loss": 0.295, "step": 4362 }, { "epoch": 0.3490330193396132, "grad_norm": 0.3442178117022778, "learning_rate": 9.462507154870846e-06, "loss": 0.2554, "step": 4363 }, { "epoch": 0.3491130177396452, "grad_norm": 0.2501113741724323, "learning_rate": 9.462214952830507e-06, "loss": 0.3223, "step": 4364 }, { "epoch": 0.3491930161396772, "grad_norm": 0.2915605271924354, "learning_rate": 9.461922675899668e-06, "loss": 0.3114, "step": 4365 }, { "epoch": 0.3492730145397092, "grad_norm": 0.3243893896502367, "learning_rate": 9.461630324083228e-06, "loss": 0.2645, "step": 4366 }, { "epoch": 0.3493530129397412, "grad_norm": 0.2775562163203753, "learning_rate": 9.4613378973861e-06, "loss": 0.3197, "step": 4367 }, { "epoch": 0.3494330113397732, "grad_norm": 0.30941489796468646, "learning_rate": 9.461045395813188e-06, "loss": 0.2636, "step": 4368 }, { "epoch": 0.3495130097398052, "grad_norm": 0.2722517855704053, "learning_rate": 9.460752819369405e-06, "loss": 0.2917, "step": 4369 }, { "epoch": 0.3495930081398372, "grad_norm": 0.3110908886141127, "learning_rate": 9.460460168059655e-06, "loss": 0.2773, "step": 4370 }, { "epoch": 0.3496730065398692, "grad_norm": 0.3619610026116026, "learning_rate": 9.460167441888855e-06, "loss": 0.2633, "step": 4371 }, { "epoch": 0.3497530049399012, "grad_norm": 0.321834807621303, "learning_rate": 9.459874640861917e-06, "loss": 0.2514, "step": 4372 }, { "epoch": 0.3498330033399332, "grad_norm": 0.2877897690292016, "learning_rate": 9.459581764983752e-06, "loss": 0.3004, "step": 4373 }, { "epoch": 0.3499130017399652, "grad_norm": 0.35739789819881634, "learning_rate": 9.459288814259279e-06, "loss": 0.2651, "step": 4374 }, { "epoch": 0.3499930001399972, "grad_norm": 0.3155776403518716, "learning_rate": 9.458995788693414e-06, "loss": 0.2817, "step": 4375 }, { "epoch": 0.3500729985400292, "grad_norm": 0.3410392656074438, "learning_rate": 9.458702688291072e-06, "loss": 0.3102, "step": 4376 }, { "epoch": 0.3501529969400612, "grad_norm": 0.3107243714986064, "learning_rate": 9.458409513057176e-06, "loss": 0.2884, "step": 4377 }, { "epoch": 0.3502329953400932, "grad_norm": 0.3289579373250139, "learning_rate": 9.458116262996646e-06, "loss": 0.2686, "step": 4378 }, { "epoch": 0.3503129937401252, "grad_norm": 0.3362178198620883, "learning_rate": 9.457822938114401e-06, "loss": 0.2733, "step": 4379 }, { "epoch": 0.3503929921401572, "grad_norm": 0.30852884699834876, "learning_rate": 9.457529538415366e-06, "loss": 0.252, "step": 4380 }, { "epoch": 0.3504729905401892, "grad_norm": 0.37718518755574243, "learning_rate": 9.457236063904465e-06, "loss": 0.2872, "step": 4381 }, { "epoch": 0.3505529889402212, "grad_norm": 0.31345469018931643, "learning_rate": 9.456942514586623e-06, "loss": 0.2547, "step": 4382 }, { "epoch": 0.3506329873402532, "grad_norm": 0.3017408692498249, "learning_rate": 9.456648890466767e-06, "loss": 0.2679, "step": 4383 }, { "epoch": 0.3507129857402852, "grad_norm": 0.3081577963014683, "learning_rate": 9.456355191549826e-06, "loss": 0.2648, "step": 4384 }, { "epoch": 0.3507929841403172, "grad_norm": 0.3159367390089897, "learning_rate": 9.456061417840727e-06, "loss": 0.2676, "step": 4385 }, { "epoch": 0.3508729825403492, "grad_norm": 0.32642381202243553, "learning_rate": 9.4557675693444e-06, "loss": 0.264, "step": 4386 }, { "epoch": 0.3509529809403812, "grad_norm": 0.28334396989278965, "learning_rate": 9.455473646065782e-06, "loss": 0.2927, "step": 4387 }, { "epoch": 0.35103297934041316, "grad_norm": 0.2502521089542464, "learning_rate": 9.4551796480098e-06, "loss": 0.2817, "step": 4388 }, { "epoch": 0.3511129777404452, "grad_norm": 0.3064201932697271, "learning_rate": 9.454885575181391e-06, "loss": 0.3039, "step": 4389 }, { "epoch": 0.3511929761404772, "grad_norm": 0.2243859729067831, "learning_rate": 9.454591427585489e-06, "loss": 0.3467, "step": 4390 }, { "epoch": 0.3512729745405092, "grad_norm": 0.31457188423253873, "learning_rate": 9.454297205227034e-06, "loss": 0.2389, "step": 4391 }, { "epoch": 0.3513529729405412, "grad_norm": 0.2740108651853356, "learning_rate": 9.45400290811096e-06, "loss": 0.2692, "step": 4392 }, { "epoch": 0.35143297134057316, "grad_norm": 0.28215901269006904, "learning_rate": 9.45370853624221e-06, "loss": 0.29, "step": 4393 }, { "epoch": 0.3515129697406052, "grad_norm": 0.2868176853006843, "learning_rate": 9.453414089625722e-06, "loss": 0.3175, "step": 4394 }, { "epoch": 0.3515929681406372, "grad_norm": 0.5443891302331229, "learning_rate": 9.453119568266435e-06, "loss": 0.2914, "step": 4395 }, { "epoch": 0.3516729665406692, "grad_norm": 0.3088524527530198, "learning_rate": 9.452824972169298e-06, "loss": 0.247, "step": 4396 }, { "epoch": 0.3517529649407012, "grad_norm": 0.23854697460608962, "learning_rate": 9.452530301339254e-06, "loss": 0.3062, "step": 4397 }, { "epoch": 0.35183296334073316, "grad_norm": 0.27294430814377757, "learning_rate": 9.452235555781246e-06, "loss": 0.2736, "step": 4398 }, { "epoch": 0.3519129617407652, "grad_norm": 0.2906641160778461, "learning_rate": 9.451940735500222e-06, "loss": 0.2789, "step": 4399 }, { "epoch": 0.3519929601407972, "grad_norm": 0.28468058217749204, "learning_rate": 9.451645840501131e-06, "loss": 0.2889, "step": 4400 }, { "epoch": 0.3520729585408292, "grad_norm": 0.35288303542469, "learning_rate": 9.451350870788922e-06, "loss": 0.2879, "step": 4401 }, { "epoch": 0.3521529569408612, "grad_norm": 0.29927280273963813, "learning_rate": 9.451055826368542e-06, "loss": 0.2524, "step": 4402 }, { "epoch": 0.35223295534089316, "grad_norm": 0.2661774931387885, "learning_rate": 9.450760707244948e-06, "loss": 0.2898, "step": 4403 }, { "epoch": 0.3523129537409252, "grad_norm": 0.3123631500564031, "learning_rate": 9.45046551342309e-06, "loss": 0.2758, "step": 4404 }, { "epoch": 0.35239295214095717, "grad_norm": 0.23429733781655696, "learning_rate": 9.450170244907924e-06, "loss": 0.3317, "step": 4405 }, { "epoch": 0.3524729505409892, "grad_norm": 0.22396360431766393, "learning_rate": 9.449874901704404e-06, "loss": 0.3228, "step": 4406 }, { "epoch": 0.3525529489410212, "grad_norm": 0.3211433935513282, "learning_rate": 9.449579483817487e-06, "loss": 0.2655, "step": 4407 }, { "epoch": 0.35263294734105316, "grad_norm": 0.25443751643639356, "learning_rate": 9.449283991252132e-06, "loss": 0.2863, "step": 4408 }, { "epoch": 0.3527129457410852, "grad_norm": 0.2869633988283425, "learning_rate": 9.448988424013298e-06, "loss": 0.3119, "step": 4409 }, { "epoch": 0.35279294414111717, "grad_norm": 0.3584701119246657, "learning_rate": 9.448692782105945e-06, "loss": 0.2582, "step": 4410 }, { "epoch": 0.3528729425411492, "grad_norm": 0.3024010724888866, "learning_rate": 9.448397065535037e-06, "loss": 0.2743, "step": 4411 }, { "epoch": 0.3529529409411812, "grad_norm": 0.38965927812243917, "learning_rate": 9.448101274305533e-06, "loss": 0.2832, "step": 4412 }, { "epoch": 0.35303293934121316, "grad_norm": 0.2606347963680028, "learning_rate": 9.4478054084224e-06, "loss": 0.2953, "step": 4413 }, { "epoch": 0.3531129377412452, "grad_norm": 0.30094340745243525, "learning_rate": 9.447509467890605e-06, "loss": 0.2878, "step": 4414 }, { "epoch": 0.35319293614127717, "grad_norm": 0.2709932647076854, "learning_rate": 9.447213452715114e-06, "loss": 0.3005, "step": 4415 }, { "epoch": 0.35327293454130915, "grad_norm": 0.24338161459454632, "learning_rate": 9.446917362900891e-06, "loss": 0.3201, "step": 4416 }, { "epoch": 0.3533529329413412, "grad_norm": 0.2716207941014577, "learning_rate": 9.446621198452912e-06, "loss": 0.2866, "step": 4417 }, { "epoch": 0.35343293134137316, "grad_norm": 0.3151838389922976, "learning_rate": 9.446324959376142e-06, "loss": 0.2566, "step": 4418 }, { "epoch": 0.3535129297414052, "grad_norm": 0.31889775897723543, "learning_rate": 9.446028645675556e-06, "loss": 0.26, "step": 4419 }, { "epoch": 0.35359292814143717, "grad_norm": 0.2942341704670075, "learning_rate": 9.445732257356125e-06, "loss": 0.2594, "step": 4420 }, { "epoch": 0.35367292654146915, "grad_norm": 0.23628129050290167, "learning_rate": 9.445435794422826e-06, "loss": 0.3173, "step": 4421 }, { "epoch": 0.3537529249415012, "grad_norm": 0.2922182737120497, "learning_rate": 9.445139256880633e-06, "loss": 0.2529, "step": 4422 }, { "epoch": 0.35383292334153316, "grad_norm": 0.31180473630202155, "learning_rate": 9.444842644734521e-06, "loss": 0.2696, "step": 4423 }, { "epoch": 0.3539129217415652, "grad_norm": 0.2664640219560601, "learning_rate": 9.444545957989473e-06, "loss": 0.297, "step": 4424 }, { "epoch": 0.35399292014159717, "grad_norm": 0.23936614827356684, "learning_rate": 9.444249196650465e-06, "loss": 0.3205, "step": 4425 }, { "epoch": 0.35407291854162914, "grad_norm": 0.31225580769074285, "learning_rate": 9.443952360722477e-06, "loss": 0.2897, "step": 4426 }, { "epoch": 0.3541529169416612, "grad_norm": 0.2869200166494667, "learning_rate": 9.443655450210494e-06, "loss": 0.2934, "step": 4427 }, { "epoch": 0.35423291534169316, "grad_norm": 0.20620858285576718, "learning_rate": 9.443358465119495e-06, "loss": 0.3449, "step": 4428 }, { "epoch": 0.3543129137417252, "grad_norm": 0.3178511369967502, "learning_rate": 9.443061405454468e-06, "loss": 0.2609, "step": 4429 }, { "epoch": 0.35439291214175717, "grad_norm": 0.3012788290057523, "learning_rate": 9.442764271220396e-06, "loss": 0.2955, "step": 4430 }, { "epoch": 0.35447291054178914, "grad_norm": 0.2943264347935936, "learning_rate": 9.442467062422267e-06, "loss": 0.2814, "step": 4431 }, { "epoch": 0.3545529089418212, "grad_norm": 0.32795716531023017, "learning_rate": 9.44216977906507e-06, "loss": 0.2624, "step": 4432 }, { "epoch": 0.35463290734185315, "grad_norm": 0.26294382512046843, "learning_rate": 9.441872421153792e-06, "loss": 0.3171, "step": 4433 }, { "epoch": 0.3547129057418852, "grad_norm": 0.23215261991553796, "learning_rate": 9.441574988693428e-06, "loss": 0.299, "step": 4434 }, { "epoch": 0.35479290414191716, "grad_norm": 0.24753659625125082, "learning_rate": 9.441277481688964e-06, "loss": 0.2679, "step": 4435 }, { "epoch": 0.35487290254194914, "grad_norm": 0.341472585630691, "learning_rate": 9.4409799001454e-06, "loss": 0.2477, "step": 4436 }, { "epoch": 0.3549529009419812, "grad_norm": 0.2852737516161907, "learning_rate": 9.440682244067724e-06, "loss": 0.294, "step": 4437 }, { "epoch": 0.35503289934201315, "grad_norm": 0.35447279907086576, "learning_rate": 9.440384513460934e-06, "loss": 0.2857, "step": 4438 }, { "epoch": 0.3551128977420452, "grad_norm": 0.19525603906640657, "learning_rate": 9.440086708330028e-06, "loss": 0.3522, "step": 4439 }, { "epoch": 0.35519289614207716, "grad_norm": 0.2956597073282292, "learning_rate": 9.439788828680003e-06, "loss": 0.3018, "step": 4440 }, { "epoch": 0.35527289454210914, "grad_norm": 0.23992580976088543, "learning_rate": 9.439490874515859e-06, "loss": 0.3063, "step": 4441 }, { "epoch": 0.3553528929421412, "grad_norm": 0.24484804798281992, "learning_rate": 9.439192845842596e-06, "loss": 0.3225, "step": 4442 }, { "epoch": 0.35543289134217315, "grad_norm": 0.3215229986035043, "learning_rate": 9.438894742665217e-06, "loss": 0.2528, "step": 4443 }, { "epoch": 0.35551288974220513, "grad_norm": 0.2631620165223143, "learning_rate": 9.438596564988722e-06, "loss": 0.2877, "step": 4444 }, { "epoch": 0.35559288814223716, "grad_norm": 0.28650928547132853, "learning_rate": 9.43829831281812e-06, "loss": 0.2585, "step": 4445 }, { "epoch": 0.35567288654226914, "grad_norm": 0.33739668125319294, "learning_rate": 9.437999986158413e-06, "loss": 0.2783, "step": 4446 }, { "epoch": 0.3557528849423012, "grad_norm": 0.31955748708451004, "learning_rate": 9.437701585014608e-06, "loss": 0.2561, "step": 4447 }, { "epoch": 0.35583288334233315, "grad_norm": 0.3090232694550856, "learning_rate": 9.437403109391719e-06, "loss": 0.2344, "step": 4448 }, { "epoch": 0.35591288174236513, "grad_norm": 0.2828346847764824, "learning_rate": 9.437104559294748e-06, "loss": 0.2857, "step": 4449 }, { "epoch": 0.35599288014239716, "grad_norm": 0.30289337480785694, "learning_rate": 9.436805934728707e-06, "loss": 0.2753, "step": 4450 }, { "epoch": 0.35607287854242914, "grad_norm": 0.333190583700772, "learning_rate": 9.436507235698613e-06, "loss": 0.2634, "step": 4451 }, { "epoch": 0.35615287694246117, "grad_norm": 0.31440311666104254, "learning_rate": 9.436208462209474e-06, "loss": 0.2709, "step": 4452 }, { "epoch": 0.35623287534249315, "grad_norm": 0.23052313868837052, "learning_rate": 9.435909614266303e-06, "loss": 0.323, "step": 4453 }, { "epoch": 0.3563128737425251, "grad_norm": 0.3033913263148454, "learning_rate": 9.435610691874122e-06, "loss": 0.2558, "step": 4454 }, { "epoch": 0.35639287214255716, "grad_norm": 0.32675912202492474, "learning_rate": 9.435311695037943e-06, "loss": 0.2606, "step": 4455 }, { "epoch": 0.35647287054258914, "grad_norm": 0.45341736283275125, "learning_rate": 9.435012623762785e-06, "loss": 0.2665, "step": 4456 }, { "epoch": 0.35655286894262117, "grad_norm": 0.3270318327387806, "learning_rate": 9.43471347805367e-06, "loss": 0.281, "step": 4457 }, { "epoch": 0.35663286734265315, "grad_norm": 0.2600079143244019, "learning_rate": 9.434414257915614e-06, "loss": 0.2922, "step": 4458 }, { "epoch": 0.3567128657426851, "grad_norm": 0.2586940486877129, "learning_rate": 9.434114963353644e-06, "loss": 0.2807, "step": 4459 }, { "epoch": 0.35679286414271716, "grad_norm": 0.3084230383712127, "learning_rate": 9.433815594372779e-06, "loss": 0.2801, "step": 4460 }, { "epoch": 0.35687286254274914, "grad_norm": 0.2852648501726477, "learning_rate": 9.433516150978045e-06, "loss": 0.2204, "step": 4461 }, { "epoch": 0.35695286094278117, "grad_norm": 0.3019917422443777, "learning_rate": 9.433216633174469e-06, "loss": 0.2573, "step": 4462 }, { "epoch": 0.35703285934281315, "grad_norm": 0.2547033262592317, "learning_rate": 9.432917040967074e-06, "loss": 0.337, "step": 4463 }, { "epoch": 0.3571128577428451, "grad_norm": 0.29461529433087563, "learning_rate": 9.432617374360893e-06, "loss": 0.2485, "step": 4464 }, { "epoch": 0.35719285614287716, "grad_norm": 0.2983154924622732, "learning_rate": 9.432317633360952e-06, "loss": 0.2496, "step": 4465 }, { "epoch": 0.35727285454290914, "grad_norm": 0.30651751004856975, "learning_rate": 9.432017817972282e-06, "loss": 0.3037, "step": 4466 }, { "epoch": 0.35735285294294117, "grad_norm": 0.6587789290331908, "learning_rate": 9.431717928199916e-06, "loss": 0.2777, "step": 4467 }, { "epoch": 0.35743285134297315, "grad_norm": 0.20217020049614517, "learning_rate": 9.431417964048888e-06, "loss": 0.3593, "step": 4468 }, { "epoch": 0.3575128497430051, "grad_norm": 0.2883552702985981, "learning_rate": 9.43111792552423e-06, "loss": 0.2976, "step": 4469 }, { "epoch": 0.35759284814303716, "grad_norm": 0.2844555145949519, "learning_rate": 9.43081781263098e-06, "loss": 0.281, "step": 4470 }, { "epoch": 0.35767284654306913, "grad_norm": 0.31130871097456314, "learning_rate": 9.430517625374171e-06, "loss": 0.2879, "step": 4471 }, { "epoch": 0.3577528449431011, "grad_norm": 0.2724069759505394, "learning_rate": 9.430217363758844e-06, "loss": 0.2945, "step": 4472 }, { "epoch": 0.35783284334313314, "grad_norm": 0.3102826197917452, "learning_rate": 9.42991702779004e-06, "loss": 0.2674, "step": 4473 }, { "epoch": 0.3579128417431651, "grad_norm": 0.3993731542439318, "learning_rate": 9.429616617472796e-06, "loss": 0.272, "step": 4474 }, { "epoch": 0.35799284014319716, "grad_norm": 0.45669771274050597, "learning_rate": 9.429316132812156e-06, "loss": 0.2846, "step": 4475 }, { "epoch": 0.35807283854322913, "grad_norm": 0.29587062075013965, "learning_rate": 9.429015573813163e-06, "loss": 0.2937, "step": 4476 }, { "epoch": 0.3581528369432611, "grad_norm": 0.3071939435957104, "learning_rate": 9.428714940480861e-06, "loss": 0.2597, "step": 4477 }, { "epoch": 0.35823283534329314, "grad_norm": 0.2594193840488259, "learning_rate": 9.428414232820295e-06, "loss": 0.3425, "step": 4478 }, { "epoch": 0.3583128337433251, "grad_norm": 0.19993419728368275, "learning_rate": 9.428113450836514e-06, "loss": 0.3671, "step": 4479 }, { "epoch": 0.35839283214335715, "grad_norm": 0.3150661416341772, "learning_rate": 9.427812594534563e-06, "loss": 0.2637, "step": 4480 }, { "epoch": 0.35847283054338913, "grad_norm": 0.3144894066949601, "learning_rate": 9.427511663919492e-06, "loss": 0.2592, "step": 4481 }, { "epoch": 0.3585528289434211, "grad_norm": 0.27538584832362045, "learning_rate": 9.427210658996353e-06, "loss": 0.2719, "step": 4482 }, { "epoch": 0.35863282734345314, "grad_norm": 0.2531354479254568, "learning_rate": 9.426909579770197e-06, "loss": 0.3137, "step": 4483 }, { "epoch": 0.3587128257434851, "grad_norm": 0.4618133265380159, "learning_rate": 9.42660842624608e-06, "loss": 0.3099, "step": 4484 }, { "epoch": 0.35879282414351715, "grad_norm": 0.21860173599346713, "learning_rate": 9.426307198429053e-06, "loss": 0.347, "step": 4485 }, { "epoch": 0.35887282254354913, "grad_norm": 0.20101630454402167, "learning_rate": 9.42600589632417e-06, "loss": 0.3333, "step": 4486 }, { "epoch": 0.3589528209435811, "grad_norm": 0.2670409930918773, "learning_rate": 9.425704519936492e-06, "loss": 0.3177, "step": 4487 }, { "epoch": 0.35903281934361314, "grad_norm": 0.33940161501015775, "learning_rate": 9.425403069271076e-06, "loss": 0.2541, "step": 4488 }, { "epoch": 0.3591128177436451, "grad_norm": 0.2708896913299644, "learning_rate": 9.425101544332979e-06, "loss": 0.3299, "step": 4489 }, { "epoch": 0.35919281614367715, "grad_norm": 0.2810315733789894, "learning_rate": 9.424799945127263e-06, "loss": 0.3058, "step": 4490 }, { "epoch": 0.35927281454370913, "grad_norm": 0.36450701616855763, "learning_rate": 9.424498271658991e-06, "loss": 0.2752, "step": 4491 }, { "epoch": 0.3593528129437411, "grad_norm": 0.33697939841434216, "learning_rate": 9.424196523933225e-06, "loss": 0.2806, "step": 4492 }, { "epoch": 0.35943281134377314, "grad_norm": 0.31157900290943724, "learning_rate": 9.423894701955028e-06, "loss": 0.2694, "step": 4493 }, { "epoch": 0.3595128097438051, "grad_norm": 0.3132242687415547, "learning_rate": 9.423592805729466e-06, "loss": 0.2606, "step": 4494 }, { "epoch": 0.35959280814383715, "grad_norm": 0.35824916270380586, "learning_rate": 9.42329083526161e-06, "loss": 0.2596, "step": 4495 }, { "epoch": 0.35967280654386913, "grad_norm": 0.3132700667570474, "learning_rate": 9.422988790556524e-06, "loss": 0.2958, "step": 4496 }, { "epoch": 0.3597528049439011, "grad_norm": 0.2842753839713741, "learning_rate": 9.422686671619277e-06, "loss": 0.3272, "step": 4497 }, { "epoch": 0.35983280334393314, "grad_norm": 0.35236423590464855, "learning_rate": 9.42238447845494e-06, "loss": 0.2668, "step": 4498 }, { "epoch": 0.3599128017439651, "grad_norm": 0.3371229421962793, "learning_rate": 9.422082211068586e-06, "loss": 0.2743, "step": 4499 }, { "epoch": 0.3599928001439971, "grad_norm": 0.30034875165333313, "learning_rate": 9.421779869465288e-06, "loss": 0.259, "step": 4500 }, { "epoch": 0.3600727985440291, "grad_norm": 0.4286225481189003, "learning_rate": 9.421477453650118e-06, "loss": 0.2463, "step": 4501 }, { "epoch": 0.3601527969440611, "grad_norm": 0.30272599198383504, "learning_rate": 9.421174963628155e-06, "loss": 0.2491, "step": 4502 }, { "epoch": 0.36023279534409314, "grad_norm": 0.25768740278243524, "learning_rate": 9.420872399404473e-06, "loss": 0.3353, "step": 4503 }, { "epoch": 0.3603127937441251, "grad_norm": 0.27321943339742494, "learning_rate": 9.420569760984152e-06, "loss": 0.2799, "step": 4504 }, { "epoch": 0.3603927921441571, "grad_norm": 0.29506843650648334, "learning_rate": 9.42026704837227e-06, "loss": 0.2649, "step": 4505 }, { "epoch": 0.3604727905441891, "grad_norm": 0.2733325479115751, "learning_rate": 9.419964261573906e-06, "loss": 0.2819, "step": 4506 }, { "epoch": 0.3605527889442211, "grad_norm": 0.35840935516388817, "learning_rate": 9.419661400594145e-06, "loss": 0.2677, "step": 4507 }, { "epoch": 0.36063278734425314, "grad_norm": 0.27087630156695036, "learning_rate": 9.419358465438069e-06, "loss": 0.3158, "step": 4508 }, { "epoch": 0.3607127857442851, "grad_norm": 0.30604821293880397, "learning_rate": 9.41905545611076e-06, "loss": 0.29, "step": 4509 }, { "epoch": 0.3607927841443171, "grad_norm": 0.29834984212125365, "learning_rate": 9.418752372617306e-06, "loss": 0.2724, "step": 4510 }, { "epoch": 0.3608727825443491, "grad_norm": 0.3116680806420882, "learning_rate": 9.418449214962793e-06, "loss": 0.2661, "step": 4511 }, { "epoch": 0.3609527809443811, "grad_norm": 0.20932305616553637, "learning_rate": 9.41814598315231e-06, "loss": 0.3535, "step": 4512 }, { "epoch": 0.36103277934441314, "grad_norm": 0.25947676782948126, "learning_rate": 9.417842677190944e-06, "loss": 0.3026, "step": 4513 }, { "epoch": 0.3611127777444451, "grad_norm": 0.43281973963755344, "learning_rate": 9.417539297083787e-06, "loss": 0.2813, "step": 4514 }, { "epoch": 0.3611927761444771, "grad_norm": 0.26585731791138606, "learning_rate": 9.417235842835929e-06, "loss": 0.3186, "step": 4515 }, { "epoch": 0.3612727745445091, "grad_norm": 0.3071571122215886, "learning_rate": 9.416932314452464e-06, "loss": 0.2814, "step": 4516 }, { "epoch": 0.3613527729445411, "grad_norm": 0.29442914566337125, "learning_rate": 9.416628711938489e-06, "loss": 0.2667, "step": 4517 }, { "epoch": 0.36143277134457313, "grad_norm": 0.2861889292642095, "learning_rate": 9.416325035299094e-06, "loss": 0.2914, "step": 4518 }, { "epoch": 0.3615127697446051, "grad_norm": 0.2355753718154151, "learning_rate": 9.41602128453938e-06, "loss": 0.321, "step": 4519 }, { "epoch": 0.3615927681446371, "grad_norm": 0.2651155794025791, "learning_rate": 9.415717459664443e-06, "loss": 0.2837, "step": 4520 }, { "epoch": 0.3616727665446691, "grad_norm": 0.2514851115644703, "learning_rate": 9.415413560679385e-06, "loss": 0.3071, "step": 4521 }, { "epoch": 0.3617527649447011, "grad_norm": 0.2940407645015243, "learning_rate": 9.415109587589302e-06, "loss": 0.2563, "step": 4522 }, { "epoch": 0.36183276334473313, "grad_norm": 0.28114527438991405, "learning_rate": 9.414805540399298e-06, "loss": 0.294, "step": 4523 }, { "epoch": 0.3619127617447651, "grad_norm": 0.2260838976330332, "learning_rate": 9.414501419114474e-06, "loss": 0.3201, "step": 4524 }, { "epoch": 0.3619927601447971, "grad_norm": 0.59151798067773, "learning_rate": 9.414197223739939e-06, "loss": 0.3096, "step": 4525 }, { "epoch": 0.3620727585448291, "grad_norm": 0.330730994837611, "learning_rate": 9.413892954280793e-06, "loss": 0.3479, "step": 4526 }, { "epoch": 0.3621527569448611, "grad_norm": 0.3860622165168661, "learning_rate": 9.413588610742146e-06, "loss": 0.3143, "step": 4527 }, { "epoch": 0.3622327553448931, "grad_norm": 0.25913422369943356, "learning_rate": 9.413284193129104e-06, "loss": 0.2835, "step": 4528 }, { "epoch": 0.3623127537449251, "grad_norm": 0.36210213359391125, "learning_rate": 9.412979701446776e-06, "loss": 0.2827, "step": 4529 }, { "epoch": 0.3623927521449571, "grad_norm": 0.2898838771167482, "learning_rate": 9.412675135700272e-06, "loss": 0.2877, "step": 4530 }, { "epoch": 0.3624727505449891, "grad_norm": 0.2513596210310494, "learning_rate": 9.412370495894708e-06, "loss": 0.3024, "step": 4531 }, { "epoch": 0.3625527489450211, "grad_norm": 0.3011111371280085, "learning_rate": 9.412065782035193e-06, "loss": 0.2518, "step": 4532 }, { "epoch": 0.3626327473450531, "grad_norm": 0.29918973244841746, "learning_rate": 9.41176099412684e-06, "loss": 0.282, "step": 4533 }, { "epoch": 0.3627127457450851, "grad_norm": 0.28883602464741204, "learning_rate": 9.411456132174768e-06, "loss": 0.3085, "step": 4534 }, { "epoch": 0.3627927441451171, "grad_norm": 0.21863851386648173, "learning_rate": 9.41115119618409e-06, "loss": 0.3481, "step": 4535 }, { "epoch": 0.3628727425451491, "grad_norm": 0.2822141222519014, "learning_rate": 9.410846186159926e-06, "loss": 0.2924, "step": 4536 }, { "epoch": 0.3629527409451811, "grad_norm": 0.28235884195825345, "learning_rate": 9.410541102107394e-06, "loss": 0.2966, "step": 4537 }, { "epoch": 0.3630327393452131, "grad_norm": 0.22863196301218053, "learning_rate": 9.410235944031616e-06, "loss": 0.3269, "step": 4538 }, { "epoch": 0.3631127377452451, "grad_norm": 0.29079668195959973, "learning_rate": 9.40993071193771e-06, "loss": 0.2675, "step": 4539 }, { "epoch": 0.3631927361452771, "grad_norm": 0.2524075925425891, "learning_rate": 9.409625405830804e-06, "loss": 0.3335, "step": 4540 }, { "epoch": 0.3632727345453091, "grad_norm": 0.29576485047062606, "learning_rate": 9.409320025716018e-06, "loss": 0.3, "step": 4541 }, { "epoch": 0.3633527329453411, "grad_norm": 0.26063967091046764, "learning_rate": 9.409014571598478e-06, "loss": 0.3039, "step": 4542 }, { "epoch": 0.3634327313453731, "grad_norm": 0.23136406035153806, "learning_rate": 9.40870904348331e-06, "loss": 0.3416, "step": 4543 }, { "epoch": 0.3635127297454051, "grad_norm": 0.23712501546434936, "learning_rate": 9.408403441375644e-06, "loss": 0.3068, "step": 4544 }, { "epoch": 0.3635927281454371, "grad_norm": 0.30387628213477447, "learning_rate": 9.408097765280608e-06, "loss": 0.2657, "step": 4545 }, { "epoch": 0.3636727265454691, "grad_norm": 0.28223824842109635, "learning_rate": 9.407792015203331e-06, "loss": 0.2734, "step": 4546 }, { "epoch": 0.3637527249455011, "grad_norm": 0.207894299809762, "learning_rate": 9.407486191148947e-06, "loss": 0.3499, "step": 4547 }, { "epoch": 0.36383272334553307, "grad_norm": 0.31848990341620637, "learning_rate": 9.407180293122586e-06, "loss": 0.2722, "step": 4548 }, { "epoch": 0.3639127217455651, "grad_norm": 0.27432527947466057, "learning_rate": 9.406874321129384e-06, "loss": 0.2756, "step": 4549 }, { "epoch": 0.3639927201455971, "grad_norm": 0.2508750191121811, "learning_rate": 9.406568275174475e-06, "loss": 0.2922, "step": 4550 }, { "epoch": 0.3640727185456291, "grad_norm": 0.32027744875107417, "learning_rate": 9.406262155262995e-06, "loss": 0.2654, "step": 4551 }, { "epoch": 0.3641527169456611, "grad_norm": 0.3031426095609422, "learning_rate": 9.405955961400083e-06, "loss": 0.2399, "step": 4552 }, { "epoch": 0.36423271534569307, "grad_norm": 0.3403121690187805, "learning_rate": 9.405649693590877e-06, "loss": 0.2958, "step": 4553 }, { "epoch": 0.3643127137457251, "grad_norm": 0.3078850273436866, "learning_rate": 9.405343351840517e-06, "loss": 0.2715, "step": 4554 }, { "epoch": 0.3643927121457571, "grad_norm": 0.25218323571025897, "learning_rate": 9.405036936154146e-06, "loss": 0.3137, "step": 4555 }, { "epoch": 0.36447271054578906, "grad_norm": 0.3568854564180672, "learning_rate": 9.404730446536905e-06, "loss": 0.3096, "step": 4556 }, { "epoch": 0.3645527089458211, "grad_norm": 0.31619564823892804, "learning_rate": 9.40442388299394e-06, "loss": 0.2496, "step": 4557 }, { "epoch": 0.36463270734585307, "grad_norm": 0.3010449890763942, "learning_rate": 9.404117245530393e-06, "loss": 0.2653, "step": 4558 }, { "epoch": 0.3647127057458851, "grad_norm": 0.31741889594952866, "learning_rate": 9.403810534151411e-06, "loss": 0.2772, "step": 4559 }, { "epoch": 0.3647927041459171, "grad_norm": 0.29957728347144463, "learning_rate": 9.403503748862146e-06, "loss": 0.2574, "step": 4560 }, { "epoch": 0.36487270254594906, "grad_norm": 0.30048095987999945, "learning_rate": 9.403196889667742e-06, "loss": 0.2501, "step": 4561 }, { "epoch": 0.3649527009459811, "grad_norm": 0.23580487420531285, "learning_rate": 9.40288995657335e-06, "loss": 0.3333, "step": 4562 }, { "epoch": 0.36503269934601307, "grad_norm": 0.24036459139326682, "learning_rate": 9.402582949584122e-06, "loss": 0.3071, "step": 4563 }, { "epoch": 0.3651126977460451, "grad_norm": 0.2834537840227493, "learning_rate": 9.40227586870521e-06, "loss": 0.2914, "step": 4564 }, { "epoch": 0.3651926961460771, "grad_norm": 0.29027014858303357, "learning_rate": 9.40196871394177e-06, "loss": 0.2916, "step": 4565 }, { "epoch": 0.36527269454610906, "grad_norm": 0.31099566410256013, "learning_rate": 9.401661485298954e-06, "loss": 0.2968, "step": 4566 }, { "epoch": 0.3653526929461411, "grad_norm": 1.1118475152143055, "learning_rate": 9.401354182781921e-06, "loss": 0.287, "step": 4567 }, { "epoch": 0.36543269134617307, "grad_norm": 0.2953870570671785, "learning_rate": 9.401046806395826e-06, "loss": 0.2497, "step": 4568 }, { "epoch": 0.3655126897462051, "grad_norm": 0.35156429373183057, "learning_rate": 9.400739356145829e-06, "loss": 0.2973, "step": 4569 }, { "epoch": 0.3655926881462371, "grad_norm": 0.26587914941390806, "learning_rate": 9.400431832037092e-06, "loss": 0.2926, "step": 4570 }, { "epoch": 0.36567268654626905, "grad_norm": 0.328839933375101, "learning_rate": 9.400124234074772e-06, "loss": 0.2712, "step": 4571 }, { "epoch": 0.3657526849463011, "grad_norm": 0.2725587201403524, "learning_rate": 9.399816562264034e-06, "loss": 0.2986, "step": 4572 }, { "epoch": 0.36583268334633307, "grad_norm": 0.3207195480820183, "learning_rate": 9.399508816610042e-06, "loss": 0.2612, "step": 4573 }, { "epoch": 0.3659126817463651, "grad_norm": 0.27002140934801894, "learning_rate": 9.399200997117961e-06, "loss": 0.2892, "step": 4574 }, { "epoch": 0.3659926801463971, "grad_norm": 0.27648152829037176, "learning_rate": 9.398893103792956e-06, "loss": 0.3107, "step": 4575 }, { "epoch": 0.36607267854642905, "grad_norm": 0.3205082329959992, "learning_rate": 9.398585136640195e-06, "loss": 0.2802, "step": 4576 }, { "epoch": 0.3661526769464611, "grad_norm": 0.25280020449268564, "learning_rate": 9.398277095664848e-06, "loss": 0.308, "step": 4577 }, { "epoch": 0.36623267534649306, "grad_norm": 0.5656160672890643, "learning_rate": 9.397968980872082e-06, "loss": 0.2627, "step": 4578 }, { "epoch": 0.3663126737465251, "grad_norm": 0.2994260119529522, "learning_rate": 9.397660792267072e-06, "loss": 0.3002, "step": 4579 }, { "epoch": 0.3663926721465571, "grad_norm": 0.2949646105190192, "learning_rate": 9.397352529854987e-06, "loss": 0.2621, "step": 4580 }, { "epoch": 0.36647267054658905, "grad_norm": 0.2913781511531592, "learning_rate": 9.397044193641e-06, "loss": 0.2657, "step": 4581 }, { "epoch": 0.3665526689466211, "grad_norm": 0.2866843539078499, "learning_rate": 9.39673578363029e-06, "loss": 0.2901, "step": 4582 }, { "epoch": 0.36663266734665306, "grad_norm": 0.26608195849976635, "learning_rate": 9.396427299828033e-06, "loss": 0.2932, "step": 4583 }, { "epoch": 0.36671266574668504, "grad_norm": 0.30598439570764546, "learning_rate": 9.396118742239402e-06, "loss": 0.2559, "step": 4584 }, { "epoch": 0.3667926641467171, "grad_norm": 0.2269731161375952, "learning_rate": 9.395810110869579e-06, "loss": 0.3301, "step": 4585 }, { "epoch": 0.36687266254674905, "grad_norm": 0.29815261575523905, "learning_rate": 9.395501405723741e-06, "loss": 0.2811, "step": 4586 }, { "epoch": 0.3669526609467811, "grad_norm": 0.32769361339555636, "learning_rate": 9.395192626807072e-06, "loss": 0.2867, "step": 4587 }, { "epoch": 0.36703265934681306, "grad_norm": 0.281090671303096, "learning_rate": 9.394883774124755e-06, "loss": 0.2941, "step": 4588 }, { "epoch": 0.36711265774684504, "grad_norm": 0.31503907542869636, "learning_rate": 9.39457484768197e-06, "loss": 0.262, "step": 4589 }, { "epoch": 0.36719265614687707, "grad_norm": 0.23001304698108144, "learning_rate": 9.394265847483903e-06, "loss": 0.3111, "step": 4590 }, { "epoch": 0.36727265454690905, "grad_norm": 0.266132330707941, "learning_rate": 9.393956773535742e-06, "loss": 0.3088, "step": 4591 }, { "epoch": 0.3673526529469411, "grad_norm": 0.3010013911452164, "learning_rate": 9.393647625842671e-06, "loss": 0.2875, "step": 4592 }, { "epoch": 0.36743265134697306, "grad_norm": 0.26990632336033993, "learning_rate": 9.393338404409881e-06, "loss": 0.2904, "step": 4593 }, { "epoch": 0.36751264974700504, "grad_norm": 0.2812717508214428, "learning_rate": 9.393029109242562e-06, "loss": 0.307, "step": 4594 }, { "epoch": 0.36759264814703707, "grad_norm": 0.27825922686465965, "learning_rate": 9.392719740345904e-06, "loss": 0.2909, "step": 4595 }, { "epoch": 0.36767264654706905, "grad_norm": 0.25781478149280096, "learning_rate": 9.392410297725099e-06, "loss": 0.2827, "step": 4596 }, { "epoch": 0.3677526449471011, "grad_norm": 0.2722726354463833, "learning_rate": 9.39210078138534e-06, "loss": 0.2637, "step": 4597 }, { "epoch": 0.36783264334713306, "grad_norm": 0.3038452940855688, "learning_rate": 9.391791191331823e-06, "loss": 0.2786, "step": 4598 }, { "epoch": 0.36791264174716504, "grad_norm": 0.33953679487083954, "learning_rate": 9.391481527569744e-06, "loss": 0.2673, "step": 4599 }, { "epoch": 0.36799264014719707, "grad_norm": 0.32923474393822805, "learning_rate": 9.391171790104298e-06, "loss": 0.2721, "step": 4600 }, { "epoch": 0.36807263854722905, "grad_norm": 0.24579890453095807, "learning_rate": 9.390861978940687e-06, "loss": 0.3033, "step": 4601 }, { "epoch": 0.3681526369472611, "grad_norm": 0.2965533628193658, "learning_rate": 9.390552094084107e-06, "loss": 0.2508, "step": 4602 }, { "epoch": 0.36823263534729306, "grad_norm": 0.2858351805187073, "learning_rate": 9.390242135539761e-06, "loss": 0.2756, "step": 4603 }, { "epoch": 0.36831263374732504, "grad_norm": 0.29294292261091875, "learning_rate": 9.389932103312851e-06, "loss": 0.2842, "step": 4604 }, { "epoch": 0.36839263214735707, "grad_norm": 0.2958023675705473, "learning_rate": 9.38962199740858e-06, "loss": 0.2725, "step": 4605 }, { "epoch": 0.36847263054738905, "grad_norm": 0.34806272913676906, "learning_rate": 9.389311817832152e-06, "loss": 0.2734, "step": 4606 }, { "epoch": 0.3685526289474211, "grad_norm": 0.30086264500418197, "learning_rate": 9.389001564588773e-06, "loss": 0.2521, "step": 4607 }, { "epoch": 0.36863262734745306, "grad_norm": 0.2983952640539182, "learning_rate": 9.38869123768365e-06, "loss": 0.3084, "step": 4608 }, { "epoch": 0.36871262574748503, "grad_norm": 0.33458720362553473, "learning_rate": 9.388380837121993e-06, "loss": 0.2553, "step": 4609 }, { "epoch": 0.36879262414751707, "grad_norm": 0.28435329925715064, "learning_rate": 9.38807036290901e-06, "loss": 0.2631, "step": 4610 }, { "epoch": 0.36887262254754905, "grad_norm": 0.3053038317674436, "learning_rate": 9.387759815049911e-06, "loss": 0.2878, "step": 4611 }, { "epoch": 0.368952620947581, "grad_norm": 0.31656285022079794, "learning_rate": 9.38744919354991e-06, "loss": 0.2767, "step": 4612 }, { "epoch": 0.36903261934761306, "grad_norm": 0.24045599706988152, "learning_rate": 9.38713849841422e-06, "loss": 0.3293, "step": 4613 }, { "epoch": 0.36911261774764503, "grad_norm": 0.23785717164441986, "learning_rate": 9.386827729648052e-06, "loss": 0.3306, "step": 4614 }, { "epoch": 0.36919261614767707, "grad_norm": 0.4180508952900747, "learning_rate": 9.386516887256627e-06, "loss": 0.2738, "step": 4615 }, { "epoch": 0.36927261454770904, "grad_norm": 0.28791609003000845, "learning_rate": 9.386205971245157e-06, "loss": 0.2713, "step": 4616 }, { "epoch": 0.369352612947741, "grad_norm": 0.23270118379546492, "learning_rate": 9.385894981618866e-06, "loss": 0.3144, "step": 4617 }, { "epoch": 0.36943261134777305, "grad_norm": 0.2871485491329268, "learning_rate": 9.385583918382966e-06, "loss": 0.2516, "step": 4618 }, { "epoch": 0.36951260974780503, "grad_norm": 0.31839088302898083, "learning_rate": 9.385272781542686e-06, "loss": 0.2677, "step": 4619 }, { "epoch": 0.36959260814783707, "grad_norm": 0.26656999981867596, "learning_rate": 9.38496157110324e-06, "loss": 0.2923, "step": 4620 }, { "epoch": 0.36967260654786904, "grad_norm": 0.3536963266953804, "learning_rate": 9.384650287069856e-06, "loss": 0.2877, "step": 4621 }, { "epoch": 0.369752604947901, "grad_norm": 0.3280761720243504, "learning_rate": 9.384338929447755e-06, "loss": 0.2517, "step": 4622 }, { "epoch": 0.36983260334793305, "grad_norm": 0.3088768793405493, "learning_rate": 9.384027498242168e-06, "loss": 0.2774, "step": 4623 }, { "epoch": 0.36991260174796503, "grad_norm": 0.2673392939817972, "learning_rate": 9.383715993458315e-06, "loss": 0.3176, "step": 4624 }, { "epoch": 0.36999260014799706, "grad_norm": 0.32159499563859906, "learning_rate": 9.38340441510143e-06, "loss": 0.3043, "step": 4625 }, { "epoch": 0.37007259854802904, "grad_norm": 0.27489130810490914, "learning_rate": 9.38309276317674e-06, "loss": 0.3059, "step": 4626 }, { "epoch": 0.370152596948061, "grad_norm": 0.26908686544227184, "learning_rate": 9.382781037689475e-06, "loss": 0.3332, "step": 4627 }, { "epoch": 0.37023259534809305, "grad_norm": 0.25768581013228414, "learning_rate": 9.382469238644864e-06, "loss": 0.3133, "step": 4628 }, { "epoch": 0.37031259374812503, "grad_norm": 0.6438835085732397, "learning_rate": 9.382157366048146e-06, "loss": 0.2875, "step": 4629 }, { "epoch": 0.37039259214815706, "grad_norm": 0.26815661234512994, "learning_rate": 9.381845419904554e-06, "loss": 0.2851, "step": 4630 }, { "epoch": 0.37047259054818904, "grad_norm": 0.3022504065570971, "learning_rate": 9.381533400219319e-06, "loss": 0.2688, "step": 4631 }, { "epoch": 0.370552588948221, "grad_norm": 0.2814119878840409, "learning_rate": 9.381221306997681e-06, "loss": 0.3021, "step": 4632 }, { "epoch": 0.37063258734825305, "grad_norm": 0.31391623319388456, "learning_rate": 9.380909140244878e-06, "loss": 0.2896, "step": 4633 }, { "epoch": 0.37071258574828503, "grad_norm": 0.30656268979820234, "learning_rate": 9.380596899966147e-06, "loss": 0.2909, "step": 4634 }, { "epoch": 0.370792584148317, "grad_norm": 0.30795398449937783, "learning_rate": 9.380284586166732e-06, "loss": 0.2588, "step": 4635 }, { "epoch": 0.37087258254834904, "grad_norm": 0.2849359022624659, "learning_rate": 9.379972198851874e-06, "loss": 0.2951, "step": 4636 }, { "epoch": 0.370952580948381, "grad_norm": 0.30166104848763414, "learning_rate": 9.379659738026812e-06, "loss": 0.2399, "step": 4637 }, { "epoch": 0.37103257934841305, "grad_norm": 0.28771611518273776, "learning_rate": 9.379347203696794e-06, "loss": 0.3038, "step": 4638 }, { "epoch": 0.371112577748445, "grad_norm": 0.2763800709296993, "learning_rate": 9.379034595867062e-06, "loss": 0.2854, "step": 4639 }, { "epoch": 0.371192576148477, "grad_norm": 0.3378184812728501, "learning_rate": 9.378721914542867e-06, "loss": 0.3205, "step": 4640 }, { "epoch": 0.37127257454850904, "grad_norm": 0.2722964242389891, "learning_rate": 9.378409159729454e-06, "loss": 0.2933, "step": 4641 }, { "epoch": 0.371352572948541, "grad_norm": 0.2568437551492094, "learning_rate": 9.378096331432071e-06, "loss": 0.2871, "step": 4642 }, { "epoch": 0.37143257134857305, "grad_norm": 0.31141779802266084, "learning_rate": 9.37778342965597e-06, "loss": 0.2472, "step": 4643 }, { "epoch": 0.371512569748605, "grad_norm": 0.30164303557939237, "learning_rate": 9.377470454406404e-06, "loss": 0.2997, "step": 4644 }, { "epoch": 0.371592568148637, "grad_norm": 0.25955987991021784, "learning_rate": 9.377157405688622e-06, "loss": 0.2978, "step": 4645 }, { "epoch": 0.37167256654866904, "grad_norm": 0.2859636698434837, "learning_rate": 9.37684428350788e-06, "loss": 0.2986, "step": 4646 }, { "epoch": 0.371752564948701, "grad_norm": 0.31251807093040984, "learning_rate": 9.376531087869435e-06, "loss": 0.2628, "step": 4647 }, { "epoch": 0.37183256334873305, "grad_norm": 0.27210950476140455, "learning_rate": 9.37621781877854e-06, "loss": 0.2627, "step": 4648 }, { "epoch": 0.371912561748765, "grad_norm": 0.25472684066751045, "learning_rate": 9.375904476240457e-06, "loss": 0.3423, "step": 4649 }, { "epoch": 0.371992560148797, "grad_norm": 0.3815297294720448, "learning_rate": 9.375591060260439e-06, "loss": 0.279, "step": 4650 }, { "epoch": 0.37207255854882904, "grad_norm": 0.26981390166274777, "learning_rate": 9.37527757084375e-06, "loss": 0.2955, "step": 4651 }, { "epoch": 0.372152556948861, "grad_norm": 0.2917857347964334, "learning_rate": 9.374964007995651e-06, "loss": 0.3084, "step": 4652 }, { "epoch": 0.37223255534889305, "grad_norm": 0.3353738638730792, "learning_rate": 9.374650371721405e-06, "loss": 0.248, "step": 4653 }, { "epoch": 0.372312553748925, "grad_norm": 0.37725142463909683, "learning_rate": 9.374336662026274e-06, "loss": 0.2767, "step": 4654 }, { "epoch": 0.372392552148957, "grad_norm": 0.2800840724068355, "learning_rate": 9.374022878915525e-06, "loss": 0.2966, "step": 4655 }, { "epoch": 0.37247255054898903, "grad_norm": 0.3134351148114533, "learning_rate": 9.373709022394424e-06, "loss": 0.2775, "step": 4656 }, { "epoch": 0.372552548949021, "grad_norm": 0.2625896847014437, "learning_rate": 9.373395092468238e-06, "loss": 0.3059, "step": 4657 }, { "epoch": 0.37263254734905304, "grad_norm": 0.28676629484884947, "learning_rate": 9.373081089142235e-06, "loss": 0.2651, "step": 4658 }, { "epoch": 0.372712545749085, "grad_norm": 0.2782892596161019, "learning_rate": 9.372767012421687e-06, "loss": 0.2933, "step": 4659 }, { "epoch": 0.372792544149117, "grad_norm": 0.28562559892708006, "learning_rate": 9.372452862311862e-06, "loss": 0.2725, "step": 4660 }, { "epoch": 0.37287254254914903, "grad_norm": 0.26748997608943115, "learning_rate": 9.372138638818036e-06, "loss": 0.2871, "step": 4661 }, { "epoch": 0.372952540949181, "grad_norm": 0.2796022726078067, "learning_rate": 9.371824341945481e-06, "loss": 0.3017, "step": 4662 }, { "epoch": 0.373032539349213, "grad_norm": 0.29243598004845744, "learning_rate": 9.371509971699471e-06, "loss": 0.2608, "step": 4663 }, { "epoch": 0.373112537749245, "grad_norm": 0.30200382632076933, "learning_rate": 9.371195528085287e-06, "loss": 0.2804, "step": 4664 }, { "epoch": 0.373192536149277, "grad_norm": 0.28357839833790005, "learning_rate": 9.370881011108198e-06, "loss": 0.271, "step": 4665 }, { "epoch": 0.37327253454930903, "grad_norm": 0.28153314449032313, "learning_rate": 9.37056642077349e-06, "loss": 0.281, "step": 4666 }, { "epoch": 0.373352532949341, "grad_norm": 0.33634541104891535, "learning_rate": 9.370251757086439e-06, "loss": 0.2849, "step": 4667 }, { "epoch": 0.373432531349373, "grad_norm": 0.3838292730602674, "learning_rate": 9.369937020052329e-06, "loss": 0.2898, "step": 4668 }, { "epoch": 0.373512529749405, "grad_norm": 0.4765959671214792, "learning_rate": 9.36962220967644e-06, "loss": 0.2576, "step": 4669 }, { "epoch": 0.373592528149437, "grad_norm": 0.30263956808660836, "learning_rate": 9.369307325964054e-06, "loss": 0.2495, "step": 4670 }, { "epoch": 0.37367252654946903, "grad_norm": 0.3102388571932994, "learning_rate": 9.36899236892046e-06, "loss": 0.3129, "step": 4671 }, { "epoch": 0.373752524949501, "grad_norm": 0.3326410603297145, "learning_rate": 9.368677338550942e-06, "loss": 0.2641, "step": 4672 }, { "epoch": 0.373832523349533, "grad_norm": 0.23756099615171702, "learning_rate": 9.368362234860785e-06, "loss": 0.3134, "step": 4673 }, { "epoch": 0.373912521749565, "grad_norm": 0.29886437655868503, "learning_rate": 9.368047057855282e-06, "loss": 0.2643, "step": 4674 }, { "epoch": 0.373992520149597, "grad_norm": 0.3008738515193368, "learning_rate": 9.36773180753972e-06, "loss": 0.2577, "step": 4675 }, { "epoch": 0.37407251854962903, "grad_norm": 0.2817795060185692, "learning_rate": 9.367416483919387e-06, "loss": 0.2957, "step": 4676 }, { "epoch": 0.374152516949661, "grad_norm": 0.2834223276286296, "learning_rate": 9.367101086999582e-06, "loss": 0.2871, "step": 4677 }, { "epoch": 0.374232515349693, "grad_norm": 0.2774136949114465, "learning_rate": 9.366785616785594e-06, "loss": 0.2872, "step": 4678 }, { "epoch": 0.374312513749725, "grad_norm": 0.24100072942389075, "learning_rate": 9.366470073282718e-06, "loss": 0.3182, "step": 4679 }, { "epoch": 0.374392512149757, "grad_norm": 0.26400384282979905, "learning_rate": 9.36615445649625e-06, "loss": 0.2941, "step": 4680 }, { "epoch": 0.37447251054978903, "grad_norm": 0.29118389309736004, "learning_rate": 9.365838766431487e-06, "loss": 0.3117, "step": 4681 }, { "epoch": 0.374552508949821, "grad_norm": 0.3264776656173028, "learning_rate": 9.365523003093728e-06, "loss": 0.3163, "step": 4682 }, { "epoch": 0.374632507349853, "grad_norm": 0.32265954145840947, "learning_rate": 9.365207166488273e-06, "loss": 0.2599, "step": 4683 }, { "epoch": 0.374712505749885, "grad_norm": 0.7269795164637523, "learning_rate": 9.364891256620422e-06, "loss": 0.2645, "step": 4684 }, { "epoch": 0.374792504149917, "grad_norm": 0.258749341074798, "learning_rate": 9.364575273495475e-06, "loss": 0.2845, "step": 4685 }, { "epoch": 0.374872502549949, "grad_norm": 0.22761247632207296, "learning_rate": 9.364259217118738e-06, "loss": 0.3267, "step": 4686 }, { "epoch": 0.374952500949981, "grad_norm": 0.30801844316727534, "learning_rate": 9.363943087495515e-06, "loss": 0.2845, "step": 4687 }, { "epoch": 0.375032499350013, "grad_norm": 0.277996690762652, "learning_rate": 9.36362688463111e-06, "loss": 0.2961, "step": 4688 }, { "epoch": 0.375112497750045, "grad_norm": 0.2527342044511156, "learning_rate": 9.363310608530834e-06, "loss": 0.32, "step": 4689 }, { "epoch": 0.375192496150077, "grad_norm": 0.33240444190574925, "learning_rate": 9.362994259199988e-06, "loss": 0.3094, "step": 4690 }, { "epoch": 0.37527249455010897, "grad_norm": 0.2740657164953733, "learning_rate": 9.36267783664389e-06, "loss": 0.3145, "step": 4691 }, { "epoch": 0.375352492950141, "grad_norm": 0.35567076802791736, "learning_rate": 9.362361340867846e-06, "loss": 0.2681, "step": 4692 }, { "epoch": 0.375432491350173, "grad_norm": 0.24535327942781943, "learning_rate": 9.362044771877164e-06, "loss": 0.3252, "step": 4693 }, { "epoch": 0.375512489750205, "grad_norm": 0.35322095468792813, "learning_rate": 9.361728129677165e-06, "loss": 0.2763, "step": 4694 }, { "epoch": 0.375592488150237, "grad_norm": 0.24316242408252822, "learning_rate": 9.361411414273159e-06, "loss": 0.3189, "step": 4695 }, { "epoch": 0.37567248655026897, "grad_norm": 0.26768463347269406, "learning_rate": 9.36109462567046e-06, "loss": 0.296, "step": 4696 }, { "epoch": 0.375752484950301, "grad_norm": 0.31278961177299586, "learning_rate": 9.360777763874389e-06, "loss": 0.2562, "step": 4697 }, { "epoch": 0.375832483350333, "grad_norm": 0.2817563764111623, "learning_rate": 9.36046082889026e-06, "loss": 0.2828, "step": 4698 }, { "epoch": 0.375912481750365, "grad_norm": 0.263186110066437, "learning_rate": 9.360143820723395e-06, "loss": 0.3275, "step": 4699 }, { "epoch": 0.375992480150397, "grad_norm": 0.2815778830397835, "learning_rate": 9.359826739379113e-06, "loss": 0.2858, "step": 4700 }, { "epoch": 0.37607247855042897, "grad_norm": 0.2790346637194123, "learning_rate": 9.359509584862735e-06, "loss": 0.298, "step": 4701 }, { "epoch": 0.376152476950461, "grad_norm": 0.32545118933124173, "learning_rate": 9.359192357179587e-06, "loss": 0.2691, "step": 4702 }, { "epoch": 0.376232475350493, "grad_norm": 0.2637625876023099, "learning_rate": 9.35887505633499e-06, "loss": 0.2905, "step": 4703 }, { "epoch": 0.376312473750525, "grad_norm": 0.2958015198850977, "learning_rate": 9.35855768233427e-06, "loss": 0.2717, "step": 4704 }, { "epoch": 0.376392472150557, "grad_norm": 0.2905761974540743, "learning_rate": 9.358240235182754e-06, "loss": 0.2457, "step": 4705 }, { "epoch": 0.37647247055058897, "grad_norm": 0.231397850701339, "learning_rate": 9.35792271488577e-06, "loss": 0.3329, "step": 4706 }, { "epoch": 0.376552468950621, "grad_norm": 0.3078815372030194, "learning_rate": 9.357605121448648e-06, "loss": 0.2699, "step": 4707 }, { "epoch": 0.376632467350653, "grad_norm": 0.3495728004871776, "learning_rate": 9.357287454876715e-06, "loss": 0.2713, "step": 4708 }, { "epoch": 0.376712465750685, "grad_norm": 0.22970366538633463, "learning_rate": 9.356969715175305e-06, "loss": 0.3048, "step": 4709 }, { "epoch": 0.376792464150717, "grad_norm": 0.27149828614080357, "learning_rate": 9.35665190234975e-06, "loss": 0.3096, "step": 4710 }, { "epoch": 0.37687246255074897, "grad_norm": 0.2620521134406785, "learning_rate": 9.356334016405383e-06, "loss": 0.271, "step": 4711 }, { "epoch": 0.376952460950781, "grad_norm": 0.29672659479994967, "learning_rate": 9.356016057347543e-06, "loss": 0.2541, "step": 4712 }, { "epoch": 0.377032459350813, "grad_norm": 0.2845048269291872, "learning_rate": 9.355698025181561e-06, "loss": 0.2902, "step": 4713 }, { "epoch": 0.377112457750845, "grad_norm": 0.22057444746997362, "learning_rate": 9.35537991991278e-06, "loss": 0.3522, "step": 4714 }, { "epoch": 0.377192456150877, "grad_norm": 0.22117835595012575, "learning_rate": 9.355061741546533e-06, "loss": 0.3646, "step": 4715 }, { "epoch": 0.37727245455090896, "grad_norm": 0.3257388823338327, "learning_rate": 9.354743490088166e-06, "loss": 0.2428, "step": 4716 }, { "epoch": 0.377352452950941, "grad_norm": 0.3880830331739957, "learning_rate": 9.354425165543018e-06, "loss": 0.2936, "step": 4717 }, { "epoch": 0.377432451350973, "grad_norm": 0.32389843297190934, "learning_rate": 9.354106767916428e-06, "loss": 0.2703, "step": 4718 }, { "epoch": 0.37751244975100495, "grad_norm": 0.2698153318110809, "learning_rate": 9.353788297213743e-06, "loss": 0.3179, "step": 4719 }, { "epoch": 0.377592448151037, "grad_norm": 0.21754816056131854, "learning_rate": 9.353469753440309e-06, "loss": 0.3271, "step": 4720 }, { "epoch": 0.37767244655106896, "grad_norm": 0.31543811059093235, "learning_rate": 9.35315113660147e-06, "loss": 0.2854, "step": 4721 }, { "epoch": 0.377752444951101, "grad_norm": 0.25162407199770276, "learning_rate": 9.352832446702578e-06, "loss": 0.2778, "step": 4722 }, { "epoch": 0.377832443351133, "grad_norm": 0.29843650840868496, "learning_rate": 9.352513683748974e-06, "loss": 0.2866, "step": 4723 }, { "epoch": 0.37791244175116495, "grad_norm": 0.596364596970844, "learning_rate": 9.352194847746014e-06, "loss": 0.2687, "step": 4724 }, { "epoch": 0.377992440151197, "grad_norm": 0.25959831343278256, "learning_rate": 9.351875938699045e-06, "loss": 0.309, "step": 4725 }, { "epoch": 0.37807243855122896, "grad_norm": 0.3029693451069674, "learning_rate": 9.351556956613423e-06, "loss": 0.2676, "step": 4726 }, { "epoch": 0.378152436951261, "grad_norm": 0.25403835411972375, "learning_rate": 9.351237901494498e-06, "loss": 0.293, "step": 4727 }, { "epoch": 0.378232435351293, "grad_norm": 0.26665258234736167, "learning_rate": 9.35091877334763e-06, "loss": 0.2974, "step": 4728 }, { "epoch": 0.37831243375132495, "grad_norm": 0.41882408645407493, "learning_rate": 9.35059957217817e-06, "loss": 0.2567, "step": 4729 }, { "epoch": 0.378392432151357, "grad_norm": 0.29538992489034543, "learning_rate": 9.350280297991476e-06, "loss": 0.2687, "step": 4730 }, { "epoch": 0.37847243055138896, "grad_norm": 0.2546291437813796, "learning_rate": 9.349960950792907e-06, "loss": 0.2814, "step": 4731 }, { "epoch": 0.378552428951421, "grad_norm": 0.25337431419742307, "learning_rate": 9.349641530587825e-06, "loss": 0.2892, "step": 4732 }, { "epoch": 0.37863242735145297, "grad_norm": 0.30055212740347037, "learning_rate": 9.349322037381587e-06, "loss": 0.2719, "step": 4733 }, { "epoch": 0.37871242575148495, "grad_norm": 0.4418382151695005, "learning_rate": 9.349002471179558e-06, "loss": 0.2846, "step": 4734 }, { "epoch": 0.378792424151517, "grad_norm": 0.2603518449678616, "learning_rate": 9.348682831987101e-06, "loss": 0.2988, "step": 4735 }, { "epoch": 0.37887242255154896, "grad_norm": 0.2824622437105871, "learning_rate": 9.34836311980958e-06, "loss": 0.3015, "step": 4736 }, { "epoch": 0.378952420951581, "grad_norm": 0.300313889548431, "learning_rate": 9.348043334652362e-06, "loss": 0.2897, "step": 4737 }, { "epoch": 0.37903241935161297, "grad_norm": 0.27703618777885997, "learning_rate": 9.34772347652081e-06, "loss": 0.2478, "step": 4738 }, { "epoch": 0.37911241775164495, "grad_norm": 0.28473989189781346, "learning_rate": 9.347403545420298e-06, "loss": 0.2892, "step": 4739 }, { "epoch": 0.379192416151677, "grad_norm": 0.269067828157466, "learning_rate": 9.34708354135619e-06, "loss": 0.2774, "step": 4740 }, { "epoch": 0.37927241455170896, "grad_norm": 0.28976003123172994, "learning_rate": 9.346763464333862e-06, "loss": 0.3331, "step": 4741 }, { "epoch": 0.379352412951741, "grad_norm": 0.25898029910543874, "learning_rate": 9.346443314358682e-06, "loss": 0.2785, "step": 4742 }, { "epoch": 0.37943241135177297, "grad_norm": 0.3052295156775338, "learning_rate": 9.346123091436024e-06, "loss": 0.2651, "step": 4743 }, { "epoch": 0.37951240975180495, "grad_norm": 0.2660681865859992, "learning_rate": 9.345802795571262e-06, "loss": 0.3058, "step": 4744 }, { "epoch": 0.379592408151837, "grad_norm": 0.27871173540684635, "learning_rate": 9.345482426769774e-06, "loss": 0.2864, "step": 4745 }, { "epoch": 0.37967240655186896, "grad_norm": 0.27726904412541825, "learning_rate": 9.345161985036937e-06, "loss": 0.3096, "step": 4746 }, { "epoch": 0.37975240495190093, "grad_norm": 0.27014684741515416, "learning_rate": 9.344841470378125e-06, "loss": 0.301, "step": 4747 }, { "epoch": 0.37983240335193297, "grad_norm": 0.25929576152687656, "learning_rate": 9.34452088279872e-06, "loss": 0.2981, "step": 4748 }, { "epoch": 0.37991240175196495, "grad_norm": 0.2899825999096309, "learning_rate": 9.344200222304103e-06, "loss": 0.259, "step": 4749 }, { "epoch": 0.379992400151997, "grad_norm": 0.3284273014537563, "learning_rate": 9.343879488899653e-06, "loss": 0.2802, "step": 4750 }, { "epoch": 0.38007239855202896, "grad_norm": 0.2919617350679286, "learning_rate": 9.343558682590757e-06, "loss": 0.2661, "step": 4751 }, { "epoch": 0.38015239695206093, "grad_norm": 0.34368748087266526, "learning_rate": 9.343237803382793e-06, "loss": 0.3162, "step": 4752 }, { "epoch": 0.38023239535209297, "grad_norm": 0.2508286002948896, "learning_rate": 9.342916851281155e-06, "loss": 0.3335, "step": 4753 }, { "epoch": 0.38031239375212494, "grad_norm": 0.2997957584435135, "learning_rate": 9.342595826291224e-06, "loss": 0.2657, "step": 4754 }, { "epoch": 0.380392392152157, "grad_norm": 0.26815781727109483, "learning_rate": 9.342274728418388e-06, "loss": 0.2999, "step": 4755 }, { "epoch": 0.38047239055218895, "grad_norm": 0.34466872943359234, "learning_rate": 9.341953557668037e-06, "loss": 0.3076, "step": 4756 }, { "epoch": 0.38055238895222093, "grad_norm": 0.24191439992024608, "learning_rate": 9.34163231404556e-06, "loss": 0.3125, "step": 4757 }, { "epoch": 0.38063238735225297, "grad_norm": 0.31746459168290464, "learning_rate": 9.341310997556352e-06, "loss": 0.2435, "step": 4758 }, { "epoch": 0.38071238575228494, "grad_norm": 0.31418512955237227, "learning_rate": 9.340989608205803e-06, "loss": 0.2568, "step": 4759 }, { "epoch": 0.380792384152317, "grad_norm": 0.31047703324609527, "learning_rate": 9.340668145999308e-06, "loss": 0.2545, "step": 4760 }, { "epoch": 0.38087238255234895, "grad_norm": 0.24510803859178262, "learning_rate": 9.34034661094226e-06, "loss": 0.3151, "step": 4761 }, { "epoch": 0.38095238095238093, "grad_norm": 0.3251268238716554, "learning_rate": 9.340025003040056e-06, "loss": 0.2543, "step": 4762 }, { "epoch": 0.38103237935241296, "grad_norm": 0.2731619054969784, "learning_rate": 9.339703322298098e-06, "loss": 0.3051, "step": 4763 }, { "epoch": 0.38111237775244494, "grad_norm": 0.2745346752430434, "learning_rate": 9.339381568721779e-06, "loss": 0.2787, "step": 4764 }, { "epoch": 0.381192376152477, "grad_norm": 0.2863443760905175, "learning_rate": 9.339059742316501e-06, "loss": 0.2981, "step": 4765 }, { "epoch": 0.38127237455250895, "grad_norm": 0.28992790021659914, "learning_rate": 9.338737843087668e-06, "loss": 0.2973, "step": 4766 }, { "epoch": 0.38135237295254093, "grad_norm": 0.2609971415471059, "learning_rate": 9.33841587104068e-06, "loss": 0.3037, "step": 4767 }, { "epoch": 0.38143237135257296, "grad_norm": 0.31522470261860935, "learning_rate": 9.338093826180941e-06, "loss": 0.2665, "step": 4768 }, { "epoch": 0.38151236975260494, "grad_norm": 0.2380875622384156, "learning_rate": 9.337771708513854e-06, "loss": 0.3214, "step": 4769 }, { "epoch": 0.381592368152637, "grad_norm": 0.28630067401138737, "learning_rate": 9.33744951804483e-06, "loss": 0.2597, "step": 4770 }, { "epoch": 0.38167236655266895, "grad_norm": 0.3406852723299316, "learning_rate": 9.337127254779272e-06, "loss": 0.2905, "step": 4771 }, { "epoch": 0.38175236495270093, "grad_norm": 0.3384121103458429, "learning_rate": 9.336804918722591e-06, "loss": 0.2524, "step": 4772 }, { "epoch": 0.38183236335273296, "grad_norm": 0.3470554452790307, "learning_rate": 9.336482509880195e-06, "loss": 0.2645, "step": 4773 }, { "epoch": 0.38191236175276494, "grad_norm": 0.3155539209132383, "learning_rate": 9.3361600282575e-06, "loss": 0.265, "step": 4774 }, { "epoch": 0.3819923601527969, "grad_norm": 0.23843930232112603, "learning_rate": 9.33583747385991e-06, "loss": 0.3386, "step": 4775 }, { "epoch": 0.38207235855282895, "grad_norm": 0.3075765198465915, "learning_rate": 9.335514846692846e-06, "loss": 0.2898, "step": 4776 }, { "epoch": 0.3821523569528609, "grad_norm": 0.29309647006356254, "learning_rate": 9.335192146761717e-06, "loss": 0.3079, "step": 4777 }, { "epoch": 0.38223235535289296, "grad_norm": 0.270291133434768, "learning_rate": 9.334869374071945e-06, "loss": 0.2936, "step": 4778 }, { "epoch": 0.38231235375292494, "grad_norm": 0.3058053264694791, "learning_rate": 9.334546528628942e-06, "loss": 0.266, "step": 4779 }, { "epoch": 0.3823923521529569, "grad_norm": 0.3229559882313754, "learning_rate": 9.334223610438128e-06, "loss": 0.303, "step": 4780 }, { "epoch": 0.38247235055298895, "grad_norm": 0.29967475524978815, "learning_rate": 9.333900619504923e-06, "loss": 0.2605, "step": 4781 }, { "epoch": 0.3825523489530209, "grad_norm": 0.3083874559674714, "learning_rate": 9.333577555834748e-06, "loss": 0.251, "step": 4782 }, { "epoch": 0.38263234735305296, "grad_norm": 0.21337789665920695, "learning_rate": 9.333254419433026e-06, "loss": 0.3514, "step": 4783 }, { "epoch": 0.38271234575308494, "grad_norm": 0.30435586005980725, "learning_rate": 9.332931210305179e-06, "loss": 0.304, "step": 4784 }, { "epoch": 0.3827923441531169, "grad_norm": 0.2800480942725592, "learning_rate": 9.332607928456629e-06, "loss": 0.2981, "step": 4785 }, { "epoch": 0.38287234255314895, "grad_norm": 0.30226079268804074, "learning_rate": 9.332284573892808e-06, "loss": 0.2709, "step": 4786 }, { "epoch": 0.3829523409531809, "grad_norm": 0.2576230028155306, "learning_rate": 9.331961146619135e-06, "loss": 0.3389, "step": 4787 }, { "epoch": 0.38303233935321296, "grad_norm": 0.30086300414631717, "learning_rate": 9.331637646641046e-06, "loss": 0.2564, "step": 4788 }, { "epoch": 0.38311233775324494, "grad_norm": 0.37654417396874174, "learning_rate": 9.331314073963964e-06, "loss": 0.3092, "step": 4789 }, { "epoch": 0.3831923361532769, "grad_norm": 0.2877783763365303, "learning_rate": 9.330990428593325e-06, "loss": 0.2921, "step": 4790 }, { "epoch": 0.38327233455330895, "grad_norm": 0.3045063611953543, "learning_rate": 9.330666710534556e-06, "loss": 0.2637, "step": 4791 }, { "epoch": 0.3833523329533409, "grad_norm": 0.2995294704173348, "learning_rate": 9.330342919793094e-06, "loss": 0.2926, "step": 4792 }, { "epoch": 0.38343233135337296, "grad_norm": 0.29421488279132874, "learning_rate": 9.33001905637437e-06, "loss": 0.2806, "step": 4793 }, { "epoch": 0.38351232975340493, "grad_norm": 0.28791206120151136, "learning_rate": 9.329695120283823e-06, "loss": 0.2886, "step": 4794 }, { "epoch": 0.3835923281534369, "grad_norm": 0.3192830024315258, "learning_rate": 9.329371111526887e-06, "loss": 0.2452, "step": 4795 }, { "epoch": 0.38367232655346895, "grad_norm": 0.2848780734565408, "learning_rate": 9.329047030109e-06, "loss": 0.3035, "step": 4796 }, { "epoch": 0.3837523249535009, "grad_norm": 0.2534543278102248, "learning_rate": 9.3287228760356e-06, "loss": 0.3271, "step": 4797 }, { "epoch": 0.38383232335353296, "grad_norm": 0.25729605805652633, "learning_rate": 9.328398649312133e-06, "loss": 0.3236, "step": 4798 }, { "epoch": 0.38391232175356493, "grad_norm": 0.26936544930140294, "learning_rate": 9.328074349944034e-06, "loss": 0.3158, "step": 4799 }, { "epoch": 0.3839923201535969, "grad_norm": 0.3200630203115815, "learning_rate": 9.32774997793675e-06, "loss": 0.2673, "step": 4800 }, { "epoch": 0.38407231855362894, "grad_norm": 0.3333604174838229, "learning_rate": 9.327425533295725e-06, "loss": 0.2784, "step": 4801 }, { "epoch": 0.3841523169536609, "grad_norm": 0.2988090436097877, "learning_rate": 9.327101016026399e-06, "loss": 0.2914, "step": 4802 }, { "epoch": 0.3842323153536929, "grad_norm": 0.3118017503599925, "learning_rate": 9.326776426134223e-06, "loss": 0.2906, "step": 4803 }, { "epoch": 0.38431231375372493, "grad_norm": 0.25897457823542364, "learning_rate": 9.326451763624646e-06, "loss": 0.3193, "step": 4804 }, { "epoch": 0.3843923121537569, "grad_norm": 0.29288286463856394, "learning_rate": 9.326127028503114e-06, "loss": 0.2742, "step": 4805 }, { "epoch": 0.38447231055378894, "grad_norm": 0.27391998850846144, "learning_rate": 9.325802220775077e-06, "loss": 0.2315, "step": 4806 }, { "epoch": 0.3845523089538209, "grad_norm": 0.30615986242623516, "learning_rate": 9.325477340445989e-06, "loss": 0.265, "step": 4807 }, { "epoch": 0.3846323073538529, "grad_norm": 0.28735743237106043, "learning_rate": 9.325152387521299e-06, "loss": 0.2947, "step": 4808 }, { "epoch": 0.38471230575388493, "grad_norm": 0.2947071120597362, "learning_rate": 9.324827362006464e-06, "loss": 0.3043, "step": 4809 }, { "epoch": 0.3847923041539169, "grad_norm": 0.27303823592032184, "learning_rate": 9.324502263906937e-06, "loss": 0.2983, "step": 4810 }, { "epoch": 0.38487230255394894, "grad_norm": 0.3600443270894972, "learning_rate": 9.324177093228175e-06, "loss": 0.2928, "step": 4811 }, { "epoch": 0.3849523009539809, "grad_norm": 0.6103823939963011, "learning_rate": 9.323851849975634e-06, "loss": 0.2615, "step": 4812 }, { "epoch": 0.3850322993540129, "grad_norm": 0.3176669946313247, "learning_rate": 9.323526534154775e-06, "loss": 0.2535, "step": 4813 }, { "epoch": 0.38511229775404493, "grad_norm": 0.27867126449888346, "learning_rate": 9.323201145771057e-06, "loss": 0.2875, "step": 4814 }, { "epoch": 0.3851922961540769, "grad_norm": 0.2930399366937355, "learning_rate": 9.32287568482994e-06, "loss": 0.2871, "step": 4815 }, { "epoch": 0.38527229455410894, "grad_norm": 0.26532325107896376, "learning_rate": 9.322550151336887e-06, "loss": 0.3029, "step": 4816 }, { "epoch": 0.3853522929541409, "grad_norm": 0.30378630507854376, "learning_rate": 9.322224545297363e-06, "loss": 0.2593, "step": 4817 }, { "epoch": 0.3854322913541729, "grad_norm": 0.35984160955497496, "learning_rate": 9.32189886671683e-06, "loss": 0.2667, "step": 4818 }, { "epoch": 0.38551228975420493, "grad_norm": 0.27065720026061196, "learning_rate": 9.321573115600755e-06, "loss": 0.2823, "step": 4819 }, { "epoch": 0.3855922881542369, "grad_norm": 0.29763623061560496, "learning_rate": 9.321247291954606e-06, "loss": 0.2514, "step": 4820 }, { "epoch": 0.38567228655426894, "grad_norm": 0.27741306543951094, "learning_rate": 9.32092139578385e-06, "loss": 0.2846, "step": 4821 }, { "epoch": 0.3857522849543009, "grad_norm": 0.2972512531668461, "learning_rate": 9.32059542709396e-06, "loss": 0.2672, "step": 4822 }, { "epoch": 0.3858322833543329, "grad_norm": 0.3130207178114284, "learning_rate": 9.3202693858904e-06, "loss": 0.2742, "step": 4823 }, { "epoch": 0.3859122817543649, "grad_norm": 0.3023261789103395, "learning_rate": 9.319943272178648e-06, "loss": 0.2625, "step": 4824 }, { "epoch": 0.3859922801543969, "grad_norm": 0.2843282128055772, "learning_rate": 9.319617085964177e-06, "loss": 0.3175, "step": 4825 }, { "epoch": 0.38607227855442894, "grad_norm": 0.8494740364278741, "learning_rate": 9.31929082725246e-06, "loss": 0.2673, "step": 4826 }, { "epoch": 0.3861522769544609, "grad_norm": 0.30711964879198966, "learning_rate": 9.31896449604897e-06, "loss": 0.2746, "step": 4827 }, { "epoch": 0.3862322753544929, "grad_norm": 0.273812140557507, "learning_rate": 9.318638092359188e-06, "loss": 0.3034, "step": 4828 }, { "epoch": 0.3863122737545249, "grad_norm": 0.3394264031895906, "learning_rate": 9.318311616188592e-06, "loss": 0.283, "step": 4829 }, { "epoch": 0.3863922721545569, "grad_norm": 0.3170360725935444, "learning_rate": 9.317985067542658e-06, "loss": 0.2969, "step": 4830 }, { "epoch": 0.3864722705545889, "grad_norm": 0.32317898874187156, "learning_rate": 9.31765844642687e-06, "loss": 0.2656, "step": 4831 }, { "epoch": 0.3865522689546209, "grad_norm": 0.2732705025172585, "learning_rate": 9.317331752846708e-06, "loss": 0.2729, "step": 4832 }, { "epoch": 0.3866322673546529, "grad_norm": 0.2583479740476746, "learning_rate": 9.317004986807656e-06, "loss": 0.3221, "step": 4833 }, { "epoch": 0.3867122657546849, "grad_norm": 0.2901595131570063, "learning_rate": 9.316678148315196e-06, "loss": 0.2803, "step": 4834 }, { "epoch": 0.3867922641547169, "grad_norm": 0.23921873893848905, "learning_rate": 9.316351237374816e-06, "loss": 0.3305, "step": 4835 }, { "epoch": 0.3868722625547489, "grad_norm": 0.2446780521797944, "learning_rate": 9.316024253992e-06, "loss": 0.3134, "step": 4836 }, { "epoch": 0.3869522609547809, "grad_norm": 0.2382191770944095, "learning_rate": 9.315697198172238e-06, "loss": 0.3633, "step": 4837 }, { "epoch": 0.3870322593548129, "grad_norm": 0.30551837299635143, "learning_rate": 9.31537006992102e-06, "loss": 0.2596, "step": 4838 }, { "epoch": 0.3871122577548449, "grad_norm": 0.23338363387067115, "learning_rate": 9.315042869243833e-06, "loss": 0.3137, "step": 4839 }, { "epoch": 0.3871922561548769, "grad_norm": 0.28340900564158766, "learning_rate": 9.314715596146171e-06, "loss": 0.2958, "step": 4840 }, { "epoch": 0.3872722545549089, "grad_norm": 0.27844845401447654, "learning_rate": 9.314388250633526e-06, "loss": 0.2799, "step": 4841 }, { "epoch": 0.3873522529549409, "grad_norm": 0.3095038597494353, "learning_rate": 9.31406083271139e-06, "loss": 0.269, "step": 4842 }, { "epoch": 0.3874322513549729, "grad_norm": 0.3067432222748822, "learning_rate": 9.313733342385263e-06, "loss": 0.2772, "step": 4843 }, { "epoch": 0.3875122497550049, "grad_norm": 0.2895046446944875, "learning_rate": 9.313405779660638e-06, "loss": 0.305, "step": 4844 }, { "epoch": 0.3875922481550369, "grad_norm": 0.23593236008752308, "learning_rate": 9.313078144543012e-06, "loss": 0.3258, "step": 4845 }, { "epoch": 0.3876722465550689, "grad_norm": 0.3088175389983806, "learning_rate": 9.312750437037886e-06, "loss": 0.2446, "step": 4846 }, { "epoch": 0.3877522449551009, "grad_norm": 0.3136528412794775, "learning_rate": 9.312422657150755e-06, "loss": 0.2455, "step": 4847 }, { "epoch": 0.3878322433551329, "grad_norm": 0.29577318289844057, "learning_rate": 9.312094804887126e-06, "loss": 0.2634, "step": 4848 }, { "epoch": 0.3879122417551649, "grad_norm": 0.5062111558308404, "learning_rate": 9.3117668802525e-06, "loss": 0.2534, "step": 4849 }, { "epoch": 0.3879922401551969, "grad_norm": 0.26534665828161746, "learning_rate": 9.31143888325238e-06, "loss": 0.2759, "step": 4850 }, { "epoch": 0.3880722385552289, "grad_norm": 0.3174845989977014, "learning_rate": 9.31111081389227e-06, "loss": 0.2428, "step": 4851 }, { "epoch": 0.3881522369552609, "grad_norm": 0.31290690083661776, "learning_rate": 9.310782672177678e-06, "loss": 0.2485, "step": 4852 }, { "epoch": 0.3882322353552929, "grad_norm": 0.29791880078532285, "learning_rate": 9.31045445811411e-06, "loss": 0.3058, "step": 4853 }, { "epoch": 0.3883122337553249, "grad_norm": 0.2912453067191081, "learning_rate": 9.310126171707075e-06, "loss": 0.2789, "step": 4854 }, { "epoch": 0.3883922321553569, "grad_norm": 0.2989256247091368, "learning_rate": 9.30979781296208e-06, "loss": 0.2468, "step": 4855 }, { "epoch": 0.3884722305553889, "grad_norm": 0.3038506475186876, "learning_rate": 9.309469381884641e-06, "loss": 0.2804, "step": 4856 }, { "epoch": 0.3885522289554209, "grad_norm": 0.26373862741738086, "learning_rate": 9.309140878480267e-06, "loss": 0.3167, "step": 4857 }, { "epoch": 0.3886322273554529, "grad_norm": 0.27605479311799785, "learning_rate": 9.30881230275447e-06, "loss": 0.2884, "step": 4858 }, { "epoch": 0.38871222575548486, "grad_norm": 0.2971406902988554, "learning_rate": 9.308483654712769e-06, "loss": 0.2933, "step": 4859 }, { "epoch": 0.3887922241555169, "grad_norm": 0.33034469346797674, "learning_rate": 9.308154934360676e-06, "loss": 0.2479, "step": 4860 }, { "epoch": 0.3888722225555489, "grad_norm": 0.2430513829801032, "learning_rate": 9.30782614170371e-06, "loss": 0.3139, "step": 4861 }, { "epoch": 0.3889522209555809, "grad_norm": 0.2797729810835908, "learning_rate": 9.307497276747389e-06, "loss": 0.2909, "step": 4862 }, { "epoch": 0.3890322193556129, "grad_norm": 0.2889856263949378, "learning_rate": 9.30716833949723e-06, "loss": 0.2775, "step": 4863 }, { "epoch": 0.38911221775564486, "grad_norm": 0.2670152119338059, "learning_rate": 9.306839329958754e-06, "loss": 0.2767, "step": 4864 }, { "epoch": 0.3891922161556769, "grad_norm": 0.2933177046303458, "learning_rate": 9.306510248137488e-06, "loss": 0.3028, "step": 4865 }, { "epoch": 0.3892722145557089, "grad_norm": 0.2858990208010733, "learning_rate": 9.30618109403895e-06, "loss": 0.3164, "step": 4866 }, { "epoch": 0.3893522129557409, "grad_norm": 0.28461954063000644, "learning_rate": 9.305851867668665e-06, "loss": 0.2854, "step": 4867 }, { "epoch": 0.3894322113557729, "grad_norm": 0.2813236239302838, "learning_rate": 9.30552256903216e-06, "loss": 0.3074, "step": 4868 }, { "epoch": 0.38951220975580486, "grad_norm": 0.27473870450145954, "learning_rate": 9.305193198134962e-06, "loss": 0.305, "step": 4869 }, { "epoch": 0.3895922081558369, "grad_norm": 0.5526305731148495, "learning_rate": 9.304863754982596e-06, "loss": 0.2508, "step": 4870 }, { "epoch": 0.38967220655586887, "grad_norm": 0.2737252159714737, "learning_rate": 9.304534239580591e-06, "loss": 0.2793, "step": 4871 }, { "epoch": 0.3897522049559009, "grad_norm": 0.334680525627947, "learning_rate": 9.304204651934484e-06, "loss": 0.2774, "step": 4872 }, { "epoch": 0.3898322033559329, "grad_norm": 0.2781667571653855, "learning_rate": 9.303874992049797e-06, "loss": 0.2975, "step": 4873 }, { "epoch": 0.38991220175596486, "grad_norm": 0.27324606288748143, "learning_rate": 9.303545259932072e-06, "loss": 0.2866, "step": 4874 }, { "epoch": 0.3899922001559969, "grad_norm": 0.5961633731840792, "learning_rate": 9.303215455586835e-06, "loss": 0.27, "step": 4875 }, { "epoch": 0.39007219855602887, "grad_norm": 0.32070531476653424, "learning_rate": 9.302885579019626e-06, "loss": 0.2509, "step": 4876 }, { "epoch": 0.3901521969560609, "grad_norm": 0.285952337217137, "learning_rate": 9.30255563023598e-06, "loss": 0.2634, "step": 4877 }, { "epoch": 0.3902321953560929, "grad_norm": 0.2778755048420881, "learning_rate": 9.302225609241436e-06, "loss": 0.3037, "step": 4878 }, { "epoch": 0.39031219375612486, "grad_norm": 0.3479077575717999, "learning_rate": 9.301895516041531e-06, "loss": 0.2645, "step": 4879 }, { "epoch": 0.3903921921561569, "grad_norm": 0.3165529235609275, "learning_rate": 9.301565350641806e-06, "loss": 0.2523, "step": 4880 }, { "epoch": 0.39047219055618887, "grad_norm": 0.2967099086265595, "learning_rate": 9.301235113047801e-06, "loss": 0.2866, "step": 4881 }, { "epoch": 0.3905521889562209, "grad_norm": 0.2836419264167962, "learning_rate": 9.300904803265061e-06, "loss": 0.2824, "step": 4882 }, { "epoch": 0.3906321873562529, "grad_norm": 0.3009136868594308, "learning_rate": 9.300574421299127e-06, "loss": 0.2729, "step": 4883 }, { "epoch": 0.39071218575628486, "grad_norm": 0.24900676681618725, "learning_rate": 9.300243967155545e-06, "loss": 0.3212, "step": 4884 }, { "epoch": 0.3907921841563169, "grad_norm": 0.28562486784397295, "learning_rate": 9.299913440839859e-06, "loss": 0.2687, "step": 4885 }, { "epoch": 0.39087218255634887, "grad_norm": 0.23408404401543617, "learning_rate": 9.299582842357619e-06, "loss": 0.327, "step": 4886 }, { "epoch": 0.39095218095638085, "grad_norm": 0.34196709134874403, "learning_rate": 9.299252171714374e-06, "loss": 0.2349, "step": 4887 }, { "epoch": 0.3910321793564129, "grad_norm": 0.21750651196099097, "learning_rate": 9.298921428915674e-06, "loss": 0.3517, "step": 4888 }, { "epoch": 0.39111217775644486, "grad_norm": 0.3290300799951341, "learning_rate": 9.298590613967067e-06, "loss": 0.2926, "step": 4889 }, { "epoch": 0.3911921761564769, "grad_norm": 0.30926738760470074, "learning_rate": 9.298259726874105e-06, "loss": 0.2467, "step": 4890 }, { "epoch": 0.39127217455650887, "grad_norm": 0.29235639767949917, "learning_rate": 9.297928767642346e-06, "loss": 0.2529, "step": 4891 }, { "epoch": 0.39135217295654084, "grad_norm": 0.21947920904478438, "learning_rate": 9.297597736277339e-06, "loss": 0.3621, "step": 4892 }, { "epoch": 0.3914321713565729, "grad_norm": 0.27020595724711527, "learning_rate": 9.297266632784646e-06, "loss": 0.2811, "step": 4893 }, { "epoch": 0.39151216975660486, "grad_norm": 0.26218227191969007, "learning_rate": 9.296935457169817e-06, "loss": 0.3147, "step": 4894 }, { "epoch": 0.3915921681566369, "grad_norm": 0.29558364252541164, "learning_rate": 9.296604209438414e-06, "loss": 0.2671, "step": 4895 }, { "epoch": 0.39167216655666887, "grad_norm": 0.3353527865799121, "learning_rate": 9.296272889595997e-06, "loss": 0.2717, "step": 4896 }, { "epoch": 0.39175216495670084, "grad_norm": 0.2907789445184273, "learning_rate": 9.295941497648125e-06, "loss": 0.2457, "step": 4897 }, { "epoch": 0.3918321633567329, "grad_norm": 0.40840004195439866, "learning_rate": 9.295610033600362e-06, "loss": 0.2691, "step": 4898 }, { "epoch": 0.39191216175676485, "grad_norm": 0.3000432329400431, "learning_rate": 9.295278497458266e-06, "loss": 0.2679, "step": 4899 }, { "epoch": 0.3919921601567969, "grad_norm": 0.28720880591716935, "learning_rate": 9.294946889227408e-06, "loss": 0.2802, "step": 4900 }, { "epoch": 0.39207215855682886, "grad_norm": 0.3257263473546301, "learning_rate": 9.29461520891335e-06, "loss": 0.286, "step": 4901 }, { "epoch": 0.39215215695686084, "grad_norm": 0.3046700558668821, "learning_rate": 9.294283456521658e-06, "loss": 0.3002, "step": 4902 }, { "epoch": 0.3922321553568929, "grad_norm": 0.27670311340069864, "learning_rate": 9.2939516320579e-06, "loss": 0.2859, "step": 4903 }, { "epoch": 0.39231215375692485, "grad_norm": 0.2551949968873366, "learning_rate": 9.293619735527649e-06, "loss": 0.3101, "step": 4904 }, { "epoch": 0.3923921521569569, "grad_norm": 0.2939482951074272, "learning_rate": 9.293287766936469e-06, "loss": 0.3045, "step": 4905 }, { "epoch": 0.39247215055698886, "grad_norm": 0.28532471828073486, "learning_rate": 9.292955726289935e-06, "loss": 0.3059, "step": 4906 }, { "epoch": 0.39255214895702084, "grad_norm": 0.2549460574195678, "learning_rate": 9.29262361359362e-06, "loss": 0.321, "step": 4907 }, { "epoch": 0.3926321473570529, "grad_norm": 0.265134346748614, "learning_rate": 9.292291428853097e-06, "loss": 0.3168, "step": 4908 }, { "epoch": 0.39271214575708485, "grad_norm": 0.30934495830095277, "learning_rate": 9.291959172073943e-06, "loss": 0.2781, "step": 4909 }, { "epoch": 0.3927921441571169, "grad_norm": 0.25704991891225626, "learning_rate": 9.291626843261732e-06, "loss": 0.2828, "step": 4910 }, { "epoch": 0.39287214255714886, "grad_norm": 0.26879678043745536, "learning_rate": 9.291294442422043e-06, "loss": 0.2995, "step": 4911 }, { "epoch": 0.39295214095718084, "grad_norm": 0.26598922910372863, "learning_rate": 9.290961969560452e-06, "loss": 0.2839, "step": 4912 }, { "epoch": 0.3930321393572129, "grad_norm": 0.29662800310511234, "learning_rate": 9.290629424682543e-06, "loss": 0.291, "step": 4913 }, { "epoch": 0.39311213775724485, "grad_norm": 0.23815048215809567, "learning_rate": 9.290296807793894e-06, "loss": 0.3158, "step": 4914 }, { "epoch": 0.39319213615727683, "grad_norm": 0.2637924687959926, "learning_rate": 9.289964118900092e-06, "loss": 0.3122, "step": 4915 }, { "epoch": 0.39327213455730886, "grad_norm": 0.256407275662942, "learning_rate": 9.289631358006715e-06, "loss": 0.305, "step": 4916 }, { "epoch": 0.39335213295734084, "grad_norm": 0.30605527614189065, "learning_rate": 9.28929852511935e-06, "loss": 0.2404, "step": 4917 }, { "epoch": 0.39343213135737287, "grad_norm": 0.296514600494335, "learning_rate": 9.288965620243584e-06, "loss": 0.2787, "step": 4918 }, { "epoch": 0.39351212975740485, "grad_norm": 0.3098202512502373, "learning_rate": 9.288632643385002e-06, "loss": 0.2627, "step": 4919 }, { "epoch": 0.3935921281574368, "grad_norm": 0.3163530054582972, "learning_rate": 9.288299594549195e-06, "loss": 0.2331, "step": 4920 }, { "epoch": 0.39367212655746886, "grad_norm": 0.29614248193823495, "learning_rate": 9.287966473741752e-06, "loss": 0.2664, "step": 4921 }, { "epoch": 0.39375212495750084, "grad_norm": 0.2711493578891143, "learning_rate": 9.287633280968263e-06, "loss": 0.293, "step": 4922 }, { "epoch": 0.39383212335753287, "grad_norm": 0.31219455842559224, "learning_rate": 9.28730001623432e-06, "loss": 0.277, "step": 4923 }, { "epoch": 0.39391212175756485, "grad_norm": 0.28966369424049765, "learning_rate": 9.286966679545516e-06, "loss": 0.2966, "step": 4924 }, { "epoch": 0.3939921201575968, "grad_norm": 0.31777465376102604, "learning_rate": 9.286633270907448e-06, "loss": 0.2592, "step": 4925 }, { "epoch": 0.39407211855762886, "grad_norm": 0.28584211046025093, "learning_rate": 9.286299790325708e-06, "loss": 0.2637, "step": 4926 }, { "epoch": 0.39415211695766084, "grad_norm": 0.20535500953986718, "learning_rate": 9.285966237805895e-06, "loss": 0.3447, "step": 4927 }, { "epoch": 0.39423211535769287, "grad_norm": 0.28768616416881776, "learning_rate": 9.285632613353607e-06, "loss": 0.283, "step": 4928 }, { "epoch": 0.39431211375772485, "grad_norm": 0.2952998277188881, "learning_rate": 9.285298916974443e-06, "loss": 0.2786, "step": 4929 }, { "epoch": 0.3943921121577568, "grad_norm": 0.26674459512069204, "learning_rate": 9.284965148674004e-06, "loss": 0.2865, "step": 4930 }, { "epoch": 0.39447211055778886, "grad_norm": 0.2713987160330422, "learning_rate": 9.284631308457892e-06, "loss": 0.2823, "step": 4931 }, { "epoch": 0.39455210895782084, "grad_norm": 0.27189608905177337, "learning_rate": 9.284297396331709e-06, "loss": 0.3061, "step": 4932 }, { "epoch": 0.39463210735785287, "grad_norm": 0.28139145402442345, "learning_rate": 9.283963412301058e-06, "loss": 0.284, "step": 4933 }, { "epoch": 0.39471210575788485, "grad_norm": 0.200147738129542, "learning_rate": 9.283629356371548e-06, "loss": 0.3352, "step": 4934 }, { "epoch": 0.3947921041579168, "grad_norm": 0.2637830719066077, "learning_rate": 9.28329522854878e-06, "loss": 0.2798, "step": 4935 }, { "epoch": 0.39487210255794886, "grad_norm": 0.24386134132415022, "learning_rate": 9.282961028838369e-06, "loss": 0.3116, "step": 4936 }, { "epoch": 0.39495210095798083, "grad_norm": 0.2916238769998534, "learning_rate": 9.282626757245918e-06, "loss": 0.2865, "step": 4937 }, { "epoch": 0.39503209935801287, "grad_norm": 0.310428523305582, "learning_rate": 9.282292413777039e-06, "loss": 0.2448, "step": 4938 }, { "epoch": 0.39511209775804484, "grad_norm": 0.4393450085560604, "learning_rate": 9.281957998437345e-06, "loss": 0.2845, "step": 4939 }, { "epoch": 0.3951920961580768, "grad_norm": 0.2288654384369801, "learning_rate": 9.281623511232445e-06, "loss": 0.3203, "step": 4940 }, { "epoch": 0.39527209455810886, "grad_norm": 0.24376670709846104, "learning_rate": 9.281288952167957e-06, "loss": 0.3249, "step": 4941 }, { "epoch": 0.39535209295814083, "grad_norm": 0.2743079915780085, "learning_rate": 9.280954321249492e-06, "loss": 0.2703, "step": 4942 }, { "epoch": 0.3954320913581728, "grad_norm": 0.6256807932792317, "learning_rate": 9.28061961848267e-06, "loss": 0.2629, "step": 4943 }, { "epoch": 0.39551208975820484, "grad_norm": 0.27249929039297366, "learning_rate": 9.280284843873104e-06, "loss": 0.3014, "step": 4944 }, { "epoch": 0.3955920881582368, "grad_norm": 0.3027042764144979, "learning_rate": 9.279949997426417e-06, "loss": 0.2877, "step": 4945 }, { "epoch": 0.39567208655826885, "grad_norm": 0.32336793433404165, "learning_rate": 9.279615079148227e-06, "loss": 0.2904, "step": 4946 }, { "epoch": 0.39575208495830083, "grad_norm": 0.34576818216412203, "learning_rate": 9.279280089044155e-06, "loss": 0.2423, "step": 4947 }, { "epoch": 0.3958320833583328, "grad_norm": 0.30287458691336006, "learning_rate": 9.278945027119821e-06, "loss": 0.3037, "step": 4948 }, { "epoch": 0.39591208175836484, "grad_norm": 0.24735420503803085, "learning_rate": 9.278609893380855e-06, "loss": 0.3226, "step": 4949 }, { "epoch": 0.3959920801583968, "grad_norm": 0.2616705547544419, "learning_rate": 9.278274687832874e-06, "loss": 0.2844, "step": 4950 }, { "epoch": 0.39607207855842885, "grad_norm": 0.2550626812227144, "learning_rate": 9.277939410481507e-06, "loss": 0.3267, "step": 4951 }, { "epoch": 0.39615207695846083, "grad_norm": 0.24097297595164116, "learning_rate": 9.277604061332382e-06, "loss": 0.2639, "step": 4952 }, { "epoch": 0.3962320753584928, "grad_norm": 0.30141052522388384, "learning_rate": 9.277268640391126e-06, "loss": 0.2595, "step": 4953 }, { "epoch": 0.39631207375852484, "grad_norm": 0.3266268961564636, "learning_rate": 9.27693314766337e-06, "loss": 0.292, "step": 4954 }, { "epoch": 0.3963920721585568, "grad_norm": 0.33201987292222385, "learning_rate": 9.27659758315474e-06, "loss": 0.2418, "step": 4955 }, { "epoch": 0.39647207055858885, "grad_norm": 0.32265864992593174, "learning_rate": 9.276261946870875e-06, "loss": 0.256, "step": 4956 }, { "epoch": 0.39655206895862083, "grad_norm": 0.3364486534565527, "learning_rate": 9.275926238817404e-06, "loss": 0.2974, "step": 4957 }, { "epoch": 0.3966320673586528, "grad_norm": 0.31837491247763366, "learning_rate": 9.275590458999962e-06, "loss": 0.3333, "step": 4958 }, { "epoch": 0.39671206575868484, "grad_norm": 0.2953471841244303, "learning_rate": 9.275254607424182e-06, "loss": 0.2355, "step": 4959 }, { "epoch": 0.3967920641587168, "grad_norm": 0.27829245783281253, "learning_rate": 9.274918684095703e-06, "loss": 0.2755, "step": 4960 }, { "epoch": 0.39687206255874885, "grad_norm": 0.20255866690888125, "learning_rate": 9.274582689020164e-06, "loss": 0.3361, "step": 4961 }, { "epoch": 0.39695206095878083, "grad_norm": 0.3661372714960386, "learning_rate": 9.274246622203204e-06, "loss": 0.3228, "step": 4962 }, { "epoch": 0.3970320593588128, "grad_norm": 0.3807652867315839, "learning_rate": 9.273910483650461e-06, "loss": 0.3066, "step": 4963 }, { "epoch": 0.39711205775884484, "grad_norm": 0.26442186656373057, "learning_rate": 9.273574273367577e-06, "loss": 0.2921, "step": 4964 }, { "epoch": 0.3971920561588768, "grad_norm": 0.3356222186116281, "learning_rate": 9.273237991360196e-06, "loss": 0.2812, "step": 4965 }, { "epoch": 0.39727205455890885, "grad_norm": 0.2951746129500423, "learning_rate": 9.272901637633961e-06, "loss": 0.2677, "step": 4966 }, { "epoch": 0.3973520529589408, "grad_norm": 0.26310663834970627, "learning_rate": 9.272565212194517e-06, "loss": 0.3, "step": 4967 }, { "epoch": 0.3974320513589728, "grad_norm": 0.3389389095705509, "learning_rate": 9.272228715047512e-06, "loss": 0.2681, "step": 4968 }, { "epoch": 0.39751204975900484, "grad_norm": 0.29524617299712363, "learning_rate": 9.271892146198591e-06, "loss": 0.2625, "step": 4969 }, { "epoch": 0.3975920481590368, "grad_norm": 0.30988479736107793, "learning_rate": 9.271555505653404e-06, "loss": 0.2645, "step": 4970 }, { "epoch": 0.3976720465590688, "grad_norm": 0.23920328035494015, "learning_rate": 9.271218793417601e-06, "loss": 0.3122, "step": 4971 }, { "epoch": 0.3977520449591008, "grad_norm": 0.25015261401070144, "learning_rate": 9.270882009496834e-06, "loss": 0.3049, "step": 4972 }, { "epoch": 0.3978320433591328, "grad_norm": 0.23649340434488053, "learning_rate": 9.270545153896752e-06, "loss": 0.3305, "step": 4973 }, { "epoch": 0.39791204175916484, "grad_norm": 0.24473822786817326, "learning_rate": 9.270208226623013e-06, "loss": 0.3134, "step": 4974 }, { "epoch": 0.3979920401591968, "grad_norm": 0.3004297306808186, "learning_rate": 9.269871227681268e-06, "loss": 0.2633, "step": 4975 }, { "epoch": 0.3980720385592288, "grad_norm": 0.2675037887902771, "learning_rate": 9.269534157077177e-06, "loss": 0.2917, "step": 4976 }, { "epoch": 0.3981520369592608, "grad_norm": 0.27351496789423035, "learning_rate": 9.269197014816393e-06, "loss": 0.2952, "step": 4977 }, { "epoch": 0.3982320353592928, "grad_norm": 0.30134302204225966, "learning_rate": 9.268859800904575e-06, "loss": 0.3044, "step": 4978 }, { "epoch": 0.39831203375932484, "grad_norm": 0.317782581541904, "learning_rate": 9.268522515347384e-06, "loss": 0.2931, "step": 4979 }, { "epoch": 0.3983920321593568, "grad_norm": 0.3144302736878511, "learning_rate": 9.268185158150482e-06, "loss": 0.2664, "step": 4980 }, { "epoch": 0.3984720305593888, "grad_norm": 0.29890159728713495, "learning_rate": 9.267847729319528e-06, "loss": 0.2964, "step": 4981 }, { "epoch": 0.3985520289594208, "grad_norm": 0.30612932473175336, "learning_rate": 9.267510228860187e-06, "loss": 0.2842, "step": 4982 }, { "epoch": 0.3986320273594528, "grad_norm": 0.29716721633879417, "learning_rate": 9.267172656778122e-06, "loss": 0.2945, "step": 4983 }, { "epoch": 0.39871202575948483, "grad_norm": 0.2783425684967864, "learning_rate": 9.266835013079e-06, "loss": 0.3015, "step": 4984 }, { "epoch": 0.3987920241595168, "grad_norm": 0.23434914632947976, "learning_rate": 9.266497297768487e-06, "loss": 0.3171, "step": 4985 }, { "epoch": 0.3988720225595488, "grad_norm": 0.23066008964775217, "learning_rate": 9.266159510852251e-06, "loss": 0.3205, "step": 4986 }, { "epoch": 0.3989520209595808, "grad_norm": 0.3191400251677262, "learning_rate": 9.265821652335961e-06, "loss": 0.287, "step": 4987 }, { "epoch": 0.3990320193596128, "grad_norm": 0.28569400921712595, "learning_rate": 9.265483722225288e-06, "loss": 0.2945, "step": 4988 }, { "epoch": 0.39911201775964483, "grad_norm": 0.266379394590889, "learning_rate": 9.265145720525902e-06, "loss": 0.3067, "step": 4989 }, { "epoch": 0.3991920161596768, "grad_norm": 0.30091685367434845, "learning_rate": 9.26480764724348e-06, "loss": 0.2496, "step": 4990 }, { "epoch": 0.3992720145597088, "grad_norm": 0.3096213478614501, "learning_rate": 9.264469502383689e-06, "loss": 0.2611, "step": 4991 }, { "epoch": 0.3993520129597408, "grad_norm": 0.3684877319274733, "learning_rate": 9.264131285952209e-06, "loss": 0.2777, "step": 4992 }, { "epoch": 0.3994320113597728, "grad_norm": 0.2993231439950321, "learning_rate": 9.263792997954717e-06, "loss": 0.2742, "step": 4993 }, { "epoch": 0.39951200975980483, "grad_norm": 0.21364371777268776, "learning_rate": 9.263454638396889e-06, "loss": 0.3264, "step": 4994 }, { "epoch": 0.3995920081598368, "grad_norm": 0.31960204440287454, "learning_rate": 9.263116207284402e-06, "loss": 0.2634, "step": 4995 }, { "epoch": 0.3996720065598688, "grad_norm": 0.3389121352967545, "learning_rate": 9.262777704622939e-06, "loss": 0.3062, "step": 4996 }, { "epoch": 0.3997520049599008, "grad_norm": 0.3109395643649128, "learning_rate": 9.26243913041818e-06, "loss": 0.2727, "step": 4997 }, { "epoch": 0.3998320033599328, "grad_norm": 0.29331121253792536, "learning_rate": 9.262100484675807e-06, "loss": 0.2893, "step": 4998 }, { "epoch": 0.3999120017599648, "grad_norm": 0.2691149018747211, "learning_rate": 9.261761767401503e-06, "loss": 0.2711, "step": 4999 }, { "epoch": 0.3999920001599968, "grad_norm": 0.296576034418958, "learning_rate": 9.261422978600955e-06, "loss": 0.2854, "step": 5000 }, { "epoch": 0.4000719985600288, "grad_norm": 0.3531510028917982, "learning_rate": 9.261084118279846e-06, "loss": 0.2652, "step": 5001 }, { "epoch": 0.4001519969600608, "grad_norm": 0.3580791990637992, "learning_rate": 9.260745186443867e-06, "loss": 0.2969, "step": 5002 }, { "epoch": 0.4002319953600928, "grad_norm": 0.2732021189957499, "learning_rate": 9.260406183098704e-06, "loss": 0.3137, "step": 5003 }, { "epoch": 0.4003119937601248, "grad_norm": 0.3651616943857174, "learning_rate": 9.260067108250046e-06, "loss": 0.246, "step": 5004 }, { "epoch": 0.4003919921601568, "grad_norm": 0.2875771658907377, "learning_rate": 9.259727961903584e-06, "loss": 0.315, "step": 5005 }, { "epoch": 0.4004719905601888, "grad_norm": 0.3118289710743714, "learning_rate": 9.259388744065012e-06, "loss": 0.2967, "step": 5006 }, { "epoch": 0.4005519889602208, "grad_norm": 0.35068534037207505, "learning_rate": 9.25904945474002e-06, "loss": 0.2638, "step": 5007 }, { "epoch": 0.4006319873602528, "grad_norm": 0.2899637522019673, "learning_rate": 9.258710093934306e-06, "loss": 0.2682, "step": 5008 }, { "epoch": 0.4007119857602848, "grad_norm": 0.2906777883736232, "learning_rate": 9.258370661653563e-06, "loss": 0.3059, "step": 5009 }, { "epoch": 0.4007919841603168, "grad_norm": 0.3041562548172392, "learning_rate": 9.258031157903489e-06, "loss": 0.2908, "step": 5010 }, { "epoch": 0.4008719825603488, "grad_norm": 0.2640874593079455, "learning_rate": 9.25769158268978e-06, "loss": 0.3103, "step": 5011 }, { "epoch": 0.4009519809603808, "grad_norm": 0.314693387470461, "learning_rate": 9.257351936018137e-06, "loss": 0.2792, "step": 5012 }, { "epoch": 0.4010319793604128, "grad_norm": 1.6442972826920612, "learning_rate": 9.257012217894261e-06, "loss": 0.3086, "step": 5013 }, { "epoch": 0.40111197776044477, "grad_norm": 0.33033963481614415, "learning_rate": 9.256672428323852e-06, "loss": 0.3042, "step": 5014 }, { "epoch": 0.4011919761604768, "grad_norm": 1.094982486110196, "learning_rate": 9.256332567312614e-06, "loss": 0.2829, "step": 5015 }, { "epoch": 0.4012719745605088, "grad_norm": 0.2880930155399843, "learning_rate": 9.255992634866248e-06, "loss": 0.3016, "step": 5016 }, { "epoch": 0.4013519729605408, "grad_norm": 0.29015800984498824, "learning_rate": 9.255652630990464e-06, "loss": 0.3137, "step": 5017 }, { "epoch": 0.4014319713605728, "grad_norm": 0.26500750844840604, "learning_rate": 9.255312555690965e-06, "loss": 0.2694, "step": 5018 }, { "epoch": 0.40151196976060477, "grad_norm": 0.28241871317948614, "learning_rate": 9.25497240897346e-06, "loss": 0.3122, "step": 5019 }, { "epoch": 0.4015919681606368, "grad_norm": 0.2885469735871929, "learning_rate": 9.254632190843657e-06, "loss": 0.3167, "step": 5020 }, { "epoch": 0.4016719665606688, "grad_norm": 0.3234172073724753, "learning_rate": 9.254291901307267e-06, "loss": 0.2997, "step": 5021 }, { "epoch": 0.4017519649607008, "grad_norm": 0.40371240460558705, "learning_rate": 9.253951540369999e-06, "loss": 0.2537, "step": 5022 }, { "epoch": 0.4018319633607328, "grad_norm": 0.32854774756768734, "learning_rate": 9.253611108037566e-06, "loss": 0.2606, "step": 5023 }, { "epoch": 0.40191196176076477, "grad_norm": 0.43486019387190494, "learning_rate": 9.253270604315685e-06, "loss": 0.2665, "step": 5024 }, { "epoch": 0.4019919601607968, "grad_norm": 0.27070070193384177, "learning_rate": 9.252930029210066e-06, "loss": 0.3188, "step": 5025 }, { "epoch": 0.4020719585608288, "grad_norm": 0.309841357449696, "learning_rate": 9.252589382726426e-06, "loss": 0.2546, "step": 5026 }, { "epoch": 0.40215195696086076, "grad_norm": 0.32501942556157376, "learning_rate": 9.252248664870486e-06, "loss": 0.2851, "step": 5027 }, { "epoch": 0.4022319553608928, "grad_norm": 0.32224442920577284, "learning_rate": 9.251907875647961e-06, "loss": 0.3012, "step": 5028 }, { "epoch": 0.40231195376092477, "grad_norm": 0.33129666928433, "learning_rate": 9.25156701506457e-06, "loss": 0.2975, "step": 5029 }, { "epoch": 0.4023919521609568, "grad_norm": 0.2848291646035959, "learning_rate": 9.251226083126035e-06, "loss": 0.2818, "step": 5030 }, { "epoch": 0.4024719505609888, "grad_norm": 0.23517434792560457, "learning_rate": 9.250885079838079e-06, "loss": 0.3389, "step": 5031 }, { "epoch": 0.40255194896102076, "grad_norm": 0.47806638346222996, "learning_rate": 9.250544005206421e-06, "loss": 0.2754, "step": 5032 }, { "epoch": 0.4026319473610528, "grad_norm": 0.37640281800543207, "learning_rate": 9.250202859236792e-06, "loss": 0.2892, "step": 5033 }, { "epoch": 0.40271194576108477, "grad_norm": 0.2564643697842794, "learning_rate": 9.249861641934911e-06, "loss": 0.2878, "step": 5034 }, { "epoch": 0.4027919441611168, "grad_norm": 0.3430377354177823, "learning_rate": 9.249520353306509e-06, "loss": 0.2731, "step": 5035 }, { "epoch": 0.4028719425611488, "grad_norm": 0.2683295531141864, "learning_rate": 9.249178993357312e-06, "loss": 0.2882, "step": 5036 }, { "epoch": 0.40295194096118075, "grad_norm": 0.2933632400625278, "learning_rate": 9.248837562093049e-06, "loss": 0.2759, "step": 5037 }, { "epoch": 0.4030319393612128, "grad_norm": 0.2622192298906411, "learning_rate": 9.248496059519452e-06, "loss": 0.3238, "step": 5038 }, { "epoch": 0.40311193776124477, "grad_norm": 0.3428165002251814, "learning_rate": 9.24815448564225e-06, "loss": 0.2624, "step": 5039 }, { "epoch": 0.4031919361612768, "grad_norm": 0.30873633820468505, "learning_rate": 9.247812840467179e-06, "loss": 0.2877, "step": 5040 }, { "epoch": 0.4032719345613088, "grad_norm": 0.25738574384172697, "learning_rate": 9.24747112399997e-06, "loss": 0.29, "step": 5041 }, { "epoch": 0.40335193296134075, "grad_norm": 0.25150575581941115, "learning_rate": 9.24712933624636e-06, "loss": 0.3067, "step": 5042 }, { "epoch": 0.4034319313613728, "grad_norm": 0.3288895564004573, "learning_rate": 9.246787477212085e-06, "loss": 0.2632, "step": 5043 }, { "epoch": 0.40351192976140476, "grad_norm": 0.3032031788991777, "learning_rate": 9.24644554690288e-06, "loss": 0.2519, "step": 5044 }, { "epoch": 0.4035919281614368, "grad_norm": 0.2893762348697433, "learning_rate": 9.246103545324488e-06, "loss": 0.2927, "step": 5045 }, { "epoch": 0.4036719265614688, "grad_norm": 0.3343603013673226, "learning_rate": 9.245761472482646e-06, "loss": 0.2618, "step": 5046 }, { "epoch": 0.40375192496150075, "grad_norm": 0.3021940748433022, "learning_rate": 9.245419328383095e-06, "loss": 0.2753, "step": 5047 }, { "epoch": 0.4038319233615328, "grad_norm": 0.3263627013624696, "learning_rate": 9.245077113031577e-06, "loss": 0.2546, "step": 5048 }, { "epoch": 0.40391192176156476, "grad_norm": 0.2677818052763272, "learning_rate": 9.244734826433839e-06, "loss": 0.284, "step": 5049 }, { "epoch": 0.40399192016159674, "grad_norm": 0.24340605910982918, "learning_rate": 9.244392468595622e-06, "loss": 0.3349, "step": 5050 }, { "epoch": 0.4040719185616288, "grad_norm": 0.33123884193897074, "learning_rate": 9.244050039522673e-06, "loss": 0.2569, "step": 5051 }, { "epoch": 0.40415191696166075, "grad_norm": 0.3058730002708665, "learning_rate": 9.243707539220739e-06, "loss": 0.3145, "step": 5052 }, { "epoch": 0.4042319153616928, "grad_norm": 0.290328994784023, "learning_rate": 9.243364967695569e-06, "loss": 0.2809, "step": 5053 }, { "epoch": 0.40431191376172476, "grad_norm": 0.23150043859870223, "learning_rate": 9.243022324952912e-06, "loss": 0.3066, "step": 5054 }, { "epoch": 0.40439191216175674, "grad_norm": 0.3139180638078602, "learning_rate": 9.242679610998519e-06, "loss": 0.265, "step": 5055 }, { "epoch": 0.40447191056178877, "grad_norm": 0.27440633355491206, "learning_rate": 9.242336825838141e-06, "loss": 0.2771, "step": 5056 }, { "epoch": 0.40455190896182075, "grad_norm": 0.284952591423798, "learning_rate": 9.241993969477531e-06, "loss": 0.2954, "step": 5057 }, { "epoch": 0.4046319073618528, "grad_norm": 0.2229839786110436, "learning_rate": 9.241651041922443e-06, "loss": 0.3165, "step": 5058 }, { "epoch": 0.40471190576188476, "grad_norm": 0.19684151283244064, "learning_rate": 9.241308043178635e-06, "loss": 0.3547, "step": 5059 }, { "epoch": 0.40479190416191674, "grad_norm": 0.2885099048285632, "learning_rate": 9.240964973251861e-06, "loss": 0.2911, "step": 5060 }, { "epoch": 0.40487190256194877, "grad_norm": 0.2642295130386296, "learning_rate": 9.24062183214788e-06, "loss": 0.3111, "step": 5061 }, { "epoch": 0.40495190096198075, "grad_norm": 0.2556353383347269, "learning_rate": 9.24027861987245e-06, "loss": 0.2887, "step": 5062 }, { "epoch": 0.4050318993620128, "grad_norm": 0.2587158139104808, "learning_rate": 9.239935336431331e-06, "loss": 0.3196, "step": 5063 }, { "epoch": 0.40511189776204476, "grad_norm": 0.3063833374611063, "learning_rate": 9.239591981830286e-06, "loss": 0.2663, "step": 5064 }, { "epoch": 0.40519189616207674, "grad_norm": 0.30496393118043685, "learning_rate": 9.23924855607508e-06, "loss": 0.263, "step": 5065 }, { "epoch": 0.40527189456210877, "grad_norm": 0.2937037553971845, "learning_rate": 9.23890505917147e-06, "loss": 0.2516, "step": 5066 }, { "epoch": 0.40535189296214075, "grad_norm": 0.2887407581849976, "learning_rate": 9.238561491125225e-06, "loss": 0.2606, "step": 5067 }, { "epoch": 0.4054318913621728, "grad_norm": 0.2926303705389336, "learning_rate": 9.238217851942112e-06, "loss": 0.291, "step": 5068 }, { "epoch": 0.40551188976220476, "grad_norm": 0.3281407837920735, "learning_rate": 9.237874141627896e-06, "loss": 0.2839, "step": 5069 }, { "epoch": 0.40559188816223674, "grad_norm": 0.33451802883583126, "learning_rate": 9.23753036018835e-06, "loss": 0.2615, "step": 5070 }, { "epoch": 0.40567188656226877, "grad_norm": 0.21113763120602924, "learning_rate": 9.237186507629236e-06, "loss": 0.3359, "step": 5071 }, { "epoch": 0.40575188496230075, "grad_norm": 0.27247347659026977, "learning_rate": 9.236842583956334e-06, "loss": 0.2968, "step": 5072 }, { "epoch": 0.4058318833623328, "grad_norm": 0.8649339486613512, "learning_rate": 9.236498589175408e-06, "loss": 0.2563, "step": 5073 }, { "epoch": 0.40591188176236476, "grad_norm": 0.2661454606332336, "learning_rate": 9.236154523292237e-06, "loss": 0.2756, "step": 5074 }, { "epoch": 0.40599188016239673, "grad_norm": 0.26571725251363654, "learning_rate": 9.235810386312594e-06, "loss": 0.2963, "step": 5075 }, { "epoch": 0.40607187856242877, "grad_norm": 0.33408613662105385, "learning_rate": 9.235466178242255e-06, "loss": 0.2958, "step": 5076 }, { "epoch": 0.40615187696246075, "grad_norm": 0.3285741726513298, "learning_rate": 9.235121899086994e-06, "loss": 0.2597, "step": 5077 }, { "epoch": 0.4062318753624927, "grad_norm": 0.3873371718337856, "learning_rate": 9.234777548852593e-06, "loss": 0.2793, "step": 5078 }, { "epoch": 0.40631187376252476, "grad_norm": 0.2887667478203784, "learning_rate": 9.23443312754483e-06, "loss": 0.2507, "step": 5079 }, { "epoch": 0.40639187216255673, "grad_norm": 0.31670435599624613, "learning_rate": 9.234088635169484e-06, "loss": 0.2538, "step": 5080 }, { "epoch": 0.40647187056258877, "grad_norm": 0.2279322113398891, "learning_rate": 9.23374407173234e-06, "loss": 0.3336, "step": 5081 }, { "epoch": 0.40655186896262074, "grad_norm": 0.31415458187132445, "learning_rate": 9.233399437239177e-06, "loss": 0.2602, "step": 5082 }, { "epoch": 0.4066318673626527, "grad_norm": 0.3254314407867844, "learning_rate": 9.233054731695782e-06, "loss": 0.2966, "step": 5083 }, { "epoch": 0.40671186576268475, "grad_norm": 0.29610324548549055, "learning_rate": 9.23270995510794e-06, "loss": 0.2888, "step": 5084 }, { "epoch": 0.40679186416271673, "grad_norm": 0.2343661445058714, "learning_rate": 9.232365107481433e-06, "loss": 0.3489, "step": 5085 }, { "epoch": 0.40687186256274877, "grad_norm": 0.4390519830109507, "learning_rate": 9.232020188822055e-06, "loss": 0.2648, "step": 5086 }, { "epoch": 0.40695186096278074, "grad_norm": 0.33079487875766916, "learning_rate": 9.231675199135593e-06, "loss": 0.253, "step": 5087 }, { "epoch": 0.4070318593628127, "grad_norm": 0.2780373302361011, "learning_rate": 9.231330138427836e-06, "loss": 0.2926, "step": 5088 }, { "epoch": 0.40711185776284475, "grad_norm": 0.2955049763325041, "learning_rate": 9.230985006704575e-06, "loss": 0.2747, "step": 5089 }, { "epoch": 0.40719185616287673, "grad_norm": 0.26656259266039384, "learning_rate": 9.230639803971603e-06, "loss": 0.2873, "step": 5090 }, { "epoch": 0.40727185456290876, "grad_norm": 0.6105416018094711, "learning_rate": 9.230294530234714e-06, "loss": 0.2524, "step": 5091 }, { "epoch": 0.40735185296294074, "grad_norm": 0.29108484932788886, "learning_rate": 9.2299491854997e-06, "loss": 0.2949, "step": 5092 }, { "epoch": 0.4074318513629727, "grad_norm": 0.2708798756674137, "learning_rate": 9.229603769772364e-06, "loss": 0.286, "step": 5093 }, { "epoch": 0.40751184976300475, "grad_norm": 0.2752122282132114, "learning_rate": 9.229258283058496e-06, "loss": 0.2978, "step": 5094 }, { "epoch": 0.40759184816303673, "grad_norm": 0.31232880287125725, "learning_rate": 9.228912725363897e-06, "loss": 0.2472, "step": 5095 }, { "epoch": 0.40767184656306876, "grad_norm": 0.24846062515251233, "learning_rate": 9.228567096694366e-06, "loss": 0.3284, "step": 5096 }, { "epoch": 0.40775184496310074, "grad_norm": 0.35248465135271806, "learning_rate": 9.228221397055705e-06, "loss": 0.2807, "step": 5097 }, { "epoch": 0.4078318433631327, "grad_norm": 0.29705505187076364, "learning_rate": 9.227875626453718e-06, "loss": 0.315, "step": 5098 }, { "epoch": 0.40791184176316475, "grad_norm": 0.29119153022585825, "learning_rate": 9.2275297848942e-06, "loss": 0.2888, "step": 5099 }, { "epoch": 0.40799184016319673, "grad_norm": 0.2845988395098363, "learning_rate": 9.227183872382965e-06, "loss": 0.3018, "step": 5100 }, { "epoch": 0.40807183856322876, "grad_norm": 0.2169737723265164, "learning_rate": 9.226837888925813e-06, "loss": 0.3425, "step": 5101 }, { "epoch": 0.40815183696326074, "grad_norm": 0.33806147505314693, "learning_rate": 9.226491834528553e-06, "loss": 0.3223, "step": 5102 }, { "epoch": 0.4082318353632927, "grad_norm": 0.25515128511837964, "learning_rate": 9.226145709196991e-06, "loss": 0.2928, "step": 5103 }, { "epoch": 0.40831183376332475, "grad_norm": 0.2943256295930612, "learning_rate": 9.225799512936938e-06, "loss": 0.2982, "step": 5104 }, { "epoch": 0.4083918321633567, "grad_norm": 0.2754034913082835, "learning_rate": 9.225453245754204e-06, "loss": 0.2864, "step": 5105 }, { "epoch": 0.4084718305633887, "grad_norm": 0.3080616989550143, "learning_rate": 9.2251069076546e-06, "loss": 0.3273, "step": 5106 }, { "epoch": 0.40855182896342074, "grad_norm": 0.29406993101235734, "learning_rate": 9.224760498643936e-06, "loss": 0.2872, "step": 5107 }, { "epoch": 0.4086318273634527, "grad_norm": 0.33215953460884384, "learning_rate": 9.22441401872803e-06, "loss": 0.2568, "step": 5108 }, { "epoch": 0.40871182576348475, "grad_norm": 0.27812625207001357, "learning_rate": 9.224067467912696e-06, "loss": 0.2926, "step": 5109 }, { "epoch": 0.4087918241635167, "grad_norm": 0.26621281573365096, "learning_rate": 9.22372084620375e-06, "loss": 0.3116, "step": 5110 }, { "epoch": 0.4088718225635487, "grad_norm": 0.31073427995343855, "learning_rate": 9.22337415360701e-06, "loss": 0.3114, "step": 5111 }, { "epoch": 0.40895182096358074, "grad_norm": 0.4029026120248433, "learning_rate": 9.223027390128292e-06, "loss": 0.2631, "step": 5112 }, { "epoch": 0.4090318193636127, "grad_norm": 0.21258601598254037, "learning_rate": 9.222680555773417e-06, "loss": 0.3426, "step": 5113 }, { "epoch": 0.40911181776364475, "grad_norm": 0.4391077492364632, "learning_rate": 9.22233365054821e-06, "loss": 0.2763, "step": 5114 }, { "epoch": 0.4091918161636767, "grad_norm": 0.25896563266095185, "learning_rate": 9.221986674458488e-06, "loss": 0.2734, "step": 5115 }, { "epoch": 0.4092718145637087, "grad_norm": 0.33086304884652123, "learning_rate": 9.221639627510076e-06, "loss": 0.2499, "step": 5116 }, { "epoch": 0.40935181296374074, "grad_norm": 0.30054583459854994, "learning_rate": 9.221292509708799e-06, "loss": 0.2809, "step": 5117 }, { "epoch": 0.4094318113637727, "grad_norm": 0.2657949215120971, "learning_rate": 9.220945321060485e-06, "loss": 0.3232, "step": 5118 }, { "epoch": 0.40951180976380475, "grad_norm": 0.28552281075403557, "learning_rate": 9.220598061570956e-06, "loss": 0.2902, "step": 5119 }, { "epoch": 0.4095918081638367, "grad_norm": 0.7497670639096392, "learning_rate": 9.220250731246042e-06, "loss": 0.2694, "step": 5120 }, { "epoch": 0.4096718065638687, "grad_norm": 0.30911911357796185, "learning_rate": 9.219903330091575e-06, "loss": 0.2749, "step": 5121 }, { "epoch": 0.40975180496390073, "grad_norm": 0.33332049506059624, "learning_rate": 9.219555858113383e-06, "loss": 0.2714, "step": 5122 }, { "epoch": 0.4098318033639327, "grad_norm": 0.34156497444688066, "learning_rate": 9.219208315317298e-06, "loss": 0.2627, "step": 5123 }, { "epoch": 0.40991180176396474, "grad_norm": 0.2944717076069869, "learning_rate": 9.218860701709154e-06, "loss": 0.2839, "step": 5124 }, { "epoch": 0.4099918001639967, "grad_norm": 0.30347883791488056, "learning_rate": 9.218513017294784e-06, "loss": 0.2662, "step": 5125 }, { "epoch": 0.4100717985640287, "grad_norm": 0.2653290454319142, "learning_rate": 9.218165262080024e-06, "loss": 0.3112, "step": 5126 }, { "epoch": 0.41015179696406073, "grad_norm": 0.28810169013437004, "learning_rate": 9.217817436070708e-06, "loss": 0.2911, "step": 5127 }, { "epoch": 0.4102317953640927, "grad_norm": 0.2892865111041984, "learning_rate": 9.217469539272679e-06, "loss": 0.2917, "step": 5128 }, { "epoch": 0.41031179376412474, "grad_norm": 0.20711187665121045, "learning_rate": 9.21712157169177e-06, "loss": 0.3279, "step": 5129 }, { "epoch": 0.4103917921641567, "grad_norm": 0.24396732886107703, "learning_rate": 9.216773533333825e-06, "loss": 0.3248, "step": 5130 }, { "epoch": 0.4104717905641887, "grad_norm": 0.2063294020052452, "learning_rate": 9.216425424204683e-06, "loss": 0.34, "step": 5131 }, { "epoch": 0.41055178896422073, "grad_norm": 0.20436774684527628, "learning_rate": 9.216077244310187e-06, "loss": 0.3527, "step": 5132 }, { "epoch": 0.4106317873642527, "grad_norm": 0.18043212793630797, "learning_rate": 9.21572899365618e-06, "loss": 0.3446, "step": 5133 }, { "epoch": 0.4107117857642847, "grad_norm": 0.3148796817690092, "learning_rate": 9.21538067224851e-06, "loss": 0.2733, "step": 5134 }, { "epoch": 0.4107917841643167, "grad_norm": 0.2954961298562056, "learning_rate": 9.21503228009302e-06, "loss": 0.2591, "step": 5135 }, { "epoch": 0.4108717825643487, "grad_norm": 0.26542330131988123, "learning_rate": 9.214683817195558e-06, "loss": 0.2963, "step": 5136 }, { "epoch": 0.41095178096438073, "grad_norm": 0.3387772669648989, "learning_rate": 9.21433528356197e-06, "loss": 0.2619, "step": 5137 }, { "epoch": 0.4110317793644127, "grad_norm": 0.2734606970327009, "learning_rate": 9.213986679198107e-06, "loss": 0.2471, "step": 5138 }, { "epoch": 0.4111117777644447, "grad_norm": 0.3102462458135226, "learning_rate": 9.213638004109824e-06, "loss": 0.2798, "step": 5139 }, { "epoch": 0.4111917761644767, "grad_norm": 0.28240935705461273, "learning_rate": 9.213289258302967e-06, "loss": 0.2926, "step": 5140 }, { "epoch": 0.4112717745645087, "grad_norm": 0.33231753727160324, "learning_rate": 9.212940441783392e-06, "loss": 0.2744, "step": 5141 }, { "epoch": 0.41135177296454073, "grad_norm": 0.35571882380393344, "learning_rate": 9.21259155455695e-06, "loss": 0.2997, "step": 5142 }, { "epoch": 0.4114317713645727, "grad_norm": 0.27852116049278425, "learning_rate": 9.212242596629504e-06, "loss": 0.302, "step": 5143 }, { "epoch": 0.4115117697646047, "grad_norm": 0.33236755714087285, "learning_rate": 9.211893568006903e-06, "loss": 0.2672, "step": 5144 }, { "epoch": 0.4115917681646367, "grad_norm": 0.43552530304557246, "learning_rate": 9.211544468695006e-06, "loss": 0.2853, "step": 5145 }, { "epoch": 0.4116717665646687, "grad_norm": 0.31638432904638675, "learning_rate": 9.211195298699674e-06, "loss": 0.2646, "step": 5146 }, { "epoch": 0.41175176496470073, "grad_norm": 0.3095783844627131, "learning_rate": 9.210846058026767e-06, "loss": 0.2532, "step": 5147 }, { "epoch": 0.4118317633647327, "grad_norm": 0.3221378378969911, "learning_rate": 9.210496746682147e-06, "loss": 0.2807, "step": 5148 }, { "epoch": 0.4119117617647647, "grad_norm": 0.256840253566607, "learning_rate": 9.210147364671677e-06, "loss": 0.3304, "step": 5149 }, { "epoch": 0.4119917601647967, "grad_norm": 0.2644823654447232, "learning_rate": 9.209797912001218e-06, "loss": 0.3179, "step": 5150 }, { "epoch": 0.4120717585648287, "grad_norm": 0.3359436338492232, "learning_rate": 9.209448388676636e-06, "loss": 0.2454, "step": 5151 }, { "epoch": 0.4121517569648607, "grad_norm": 0.2607042957976346, "learning_rate": 9.209098794703796e-06, "loss": 0.3082, "step": 5152 }, { "epoch": 0.4122317553648927, "grad_norm": 0.2733418116361817, "learning_rate": 9.20874913008857e-06, "loss": 0.2807, "step": 5153 }, { "epoch": 0.4123117537649247, "grad_norm": 0.2761646471051783, "learning_rate": 9.208399394836823e-06, "loss": 0.2936, "step": 5154 }, { "epoch": 0.4123917521649567, "grad_norm": 0.30935343639940405, "learning_rate": 9.208049588954424e-06, "loss": 0.2609, "step": 5155 }, { "epoch": 0.4124717505649887, "grad_norm": 0.2069829173232812, "learning_rate": 9.207699712447246e-06, "loss": 0.3509, "step": 5156 }, { "epoch": 0.4125517489650207, "grad_norm": 0.31585244730559664, "learning_rate": 9.20734976532116e-06, "loss": 0.2531, "step": 5157 }, { "epoch": 0.4126317473650527, "grad_norm": 0.3063345741017045, "learning_rate": 9.206999747582039e-06, "loss": 0.2828, "step": 5158 }, { "epoch": 0.4127117457650847, "grad_norm": 0.24950146103579104, "learning_rate": 9.206649659235756e-06, "loss": 0.271, "step": 5159 }, { "epoch": 0.4127917441651167, "grad_norm": 0.2294088381327079, "learning_rate": 9.20629950028819e-06, "loss": 0.3092, "step": 5160 }, { "epoch": 0.4128717425651487, "grad_norm": 0.284899742187152, "learning_rate": 9.205949270745217e-06, "loss": 0.3051, "step": 5161 }, { "epoch": 0.41295174096518067, "grad_norm": 0.30785774988989656, "learning_rate": 9.205598970612713e-06, "loss": 0.262, "step": 5162 }, { "epoch": 0.4130317393652127, "grad_norm": 0.3136404434456607, "learning_rate": 9.20524859989656e-06, "loss": 0.2475, "step": 5163 }, { "epoch": 0.4131117377652447, "grad_norm": 0.31898727827148876, "learning_rate": 9.204898158602633e-06, "loss": 0.2587, "step": 5164 }, { "epoch": 0.4131917361652767, "grad_norm": 0.26542806851169926, "learning_rate": 9.204547646736821e-06, "loss": 0.2929, "step": 5165 }, { "epoch": 0.4132717345653087, "grad_norm": 0.26897606795733425, "learning_rate": 9.204197064305002e-06, "loss": 0.3663, "step": 5166 }, { "epoch": 0.41335173296534067, "grad_norm": 0.3234404814518937, "learning_rate": 9.20384641131306e-06, "loss": 0.2459, "step": 5167 }, { "epoch": 0.4134317313653727, "grad_norm": 0.21860367568617967, "learning_rate": 9.203495687766883e-06, "loss": 0.3486, "step": 5168 }, { "epoch": 0.4135117297654047, "grad_norm": 0.2954374144046898, "learning_rate": 9.203144893672354e-06, "loss": 0.3013, "step": 5169 }, { "epoch": 0.4135917281654367, "grad_norm": 0.26815101832419375, "learning_rate": 9.20279402903536e-06, "loss": 0.2851, "step": 5170 }, { "epoch": 0.4136717265654687, "grad_norm": 0.24869166232650186, "learning_rate": 9.202443093861796e-06, "loss": 0.3078, "step": 5171 }, { "epoch": 0.41375172496550067, "grad_norm": 0.2943948549152174, "learning_rate": 9.202092088157542e-06, "loss": 0.2834, "step": 5172 }, { "epoch": 0.4138317233655327, "grad_norm": 0.2797817038495954, "learning_rate": 9.201741011928498e-06, "loss": 0.2806, "step": 5173 }, { "epoch": 0.4139117217655647, "grad_norm": 0.35045531128736945, "learning_rate": 9.20138986518055e-06, "loss": 0.2612, "step": 5174 }, { "epoch": 0.4139917201655967, "grad_norm": 0.299978119866561, "learning_rate": 9.201038647919595e-06, "loss": 0.2766, "step": 5175 }, { "epoch": 0.4140717185656287, "grad_norm": 0.2986304503766352, "learning_rate": 9.200687360151527e-06, "loss": 0.3059, "step": 5176 }, { "epoch": 0.41415171696566067, "grad_norm": 0.24788436750353124, "learning_rate": 9.20033600188224e-06, "loss": 0.3213, "step": 5177 }, { "epoch": 0.4142317153656927, "grad_norm": 0.4028020609627026, "learning_rate": 9.199984573117633e-06, "loss": 0.2887, "step": 5178 }, { "epoch": 0.4143117137657247, "grad_norm": 0.29490270603628127, "learning_rate": 9.199633073863603e-06, "loss": 0.2588, "step": 5179 }, { "epoch": 0.4143917121657567, "grad_norm": 0.27603424299719537, "learning_rate": 9.19928150412605e-06, "loss": 0.2891, "step": 5180 }, { "epoch": 0.4144717105657887, "grad_norm": 0.3089826628831555, "learning_rate": 9.198929863910874e-06, "loss": 0.265, "step": 5181 }, { "epoch": 0.41455170896582066, "grad_norm": 0.3364696696791171, "learning_rate": 9.198578153223976e-06, "loss": 0.2906, "step": 5182 }, { "epoch": 0.4146317073658527, "grad_norm": 0.26033512707356476, "learning_rate": 9.198226372071259e-06, "loss": 0.3109, "step": 5183 }, { "epoch": 0.4147117057658847, "grad_norm": 0.2975368377645226, "learning_rate": 9.19787452045863e-06, "loss": 0.265, "step": 5184 }, { "epoch": 0.4147917041659167, "grad_norm": 0.25753871905202474, "learning_rate": 9.19752259839199e-06, "loss": 0.2723, "step": 5185 }, { "epoch": 0.4148717025659487, "grad_norm": 0.31398608661861965, "learning_rate": 9.197170605877248e-06, "loss": 0.2772, "step": 5186 }, { "epoch": 0.41495170096598066, "grad_norm": 0.2503903623610147, "learning_rate": 9.19681854292031e-06, "loss": 0.3191, "step": 5187 }, { "epoch": 0.4150316993660127, "grad_norm": 0.2921910583877275, "learning_rate": 9.196466409527085e-06, "loss": 0.263, "step": 5188 }, { "epoch": 0.4151116977660447, "grad_norm": 0.3084137953617749, "learning_rate": 9.196114205703484e-06, "loss": 0.2589, "step": 5189 }, { "epoch": 0.41519169616607665, "grad_norm": 0.22663415563494682, "learning_rate": 9.195761931455418e-06, "loss": 0.3015, "step": 5190 }, { "epoch": 0.4152716945661087, "grad_norm": 0.32978353164604135, "learning_rate": 9.1954095867888e-06, "loss": 0.2544, "step": 5191 }, { "epoch": 0.41535169296614066, "grad_norm": 0.2281996158079269, "learning_rate": 9.19505717170954e-06, "loss": 0.313, "step": 5192 }, { "epoch": 0.4154316913661727, "grad_norm": 0.24192166812926552, "learning_rate": 9.194704686223557e-06, "loss": 0.3194, "step": 5193 }, { "epoch": 0.4155116897662047, "grad_norm": 0.30577745117581545, "learning_rate": 9.194352130336764e-06, "loss": 0.2513, "step": 5194 }, { "epoch": 0.41559168816623665, "grad_norm": 0.2897562570127388, "learning_rate": 9.19399950405508e-06, "loss": 0.2408, "step": 5195 }, { "epoch": 0.4156716865662687, "grad_norm": 0.41816273186182856, "learning_rate": 9.193646807384421e-06, "loss": 0.3211, "step": 5196 }, { "epoch": 0.41575168496630066, "grad_norm": 0.2650843484348309, "learning_rate": 9.193294040330709e-06, "loss": 0.2729, "step": 5197 }, { "epoch": 0.4158316833663327, "grad_norm": 0.2869447618554817, "learning_rate": 9.192941202899861e-06, "loss": 0.2894, "step": 5198 }, { "epoch": 0.41591168176636467, "grad_norm": 0.29373937245755993, "learning_rate": 9.192588295097801e-06, "loss": 0.2413, "step": 5199 }, { "epoch": 0.41599168016639665, "grad_norm": 0.272057923651325, "learning_rate": 9.192235316930454e-06, "loss": 0.2982, "step": 5200 }, { "epoch": 0.4160716785664287, "grad_norm": 0.34316178138284675, "learning_rate": 9.191882268403743e-06, "loss": 0.2361, "step": 5201 }, { "epoch": 0.41615167696646066, "grad_norm": 0.3331277987001649, "learning_rate": 9.191529149523588e-06, "loss": 0.2574, "step": 5202 }, { "epoch": 0.4162316753664927, "grad_norm": 0.3111070685580852, "learning_rate": 9.191175960295924e-06, "loss": 0.2541, "step": 5203 }, { "epoch": 0.41631167376652467, "grad_norm": 0.24262366178607908, "learning_rate": 9.190822700726671e-06, "loss": 0.334, "step": 5204 }, { "epoch": 0.41639167216655665, "grad_norm": 0.29257639429490034, "learning_rate": 9.190469370821764e-06, "loss": 0.2706, "step": 5205 }, { "epoch": 0.4164716705665887, "grad_norm": 0.21763815602611417, "learning_rate": 9.190115970587129e-06, "loss": 0.3132, "step": 5206 }, { "epoch": 0.41655166896662066, "grad_norm": 0.24968064126960107, "learning_rate": 9.189762500028698e-06, "loss": 0.3311, "step": 5207 }, { "epoch": 0.4166316673666527, "grad_norm": 0.23902617205940302, "learning_rate": 9.189408959152405e-06, "loss": 0.3173, "step": 5208 }, { "epoch": 0.41671166576668467, "grad_norm": 0.30836418658037196, "learning_rate": 9.189055347964182e-06, "loss": 0.2681, "step": 5209 }, { "epoch": 0.41679166416671665, "grad_norm": 0.2999580615531441, "learning_rate": 9.188701666469965e-06, "loss": 0.269, "step": 5210 }, { "epoch": 0.4168716625667487, "grad_norm": 0.3029525933345695, "learning_rate": 9.188347914675689e-06, "loss": 0.2715, "step": 5211 }, { "epoch": 0.41695166096678066, "grad_norm": 0.27874212707814683, "learning_rate": 9.18799409258729e-06, "loss": 0.2754, "step": 5212 }, { "epoch": 0.4170316593668127, "grad_norm": 0.2600266357156443, "learning_rate": 9.18764020021071e-06, "loss": 0.2831, "step": 5213 }, { "epoch": 0.41711165776684467, "grad_norm": 0.2965364834774281, "learning_rate": 9.187286237551885e-06, "loss": 0.2492, "step": 5214 }, { "epoch": 0.41719165616687665, "grad_norm": 0.29956627287901055, "learning_rate": 9.186932204616756e-06, "loss": 0.2595, "step": 5215 }, { "epoch": 0.4172716545669087, "grad_norm": 0.2668196454869672, "learning_rate": 9.186578101411266e-06, "loss": 0.2666, "step": 5216 }, { "epoch": 0.41735165296694066, "grad_norm": 0.2438224529828869, "learning_rate": 9.186223927941356e-06, "loss": 0.3302, "step": 5217 }, { "epoch": 0.41743165136697263, "grad_norm": 0.2938911067670263, "learning_rate": 9.185869684212974e-06, "loss": 0.2757, "step": 5218 }, { "epoch": 0.41751164976700467, "grad_norm": 0.33162557376991736, "learning_rate": 9.185515370232062e-06, "loss": 0.2592, "step": 5219 }, { "epoch": 0.41759164816703664, "grad_norm": 0.305928534297296, "learning_rate": 9.185160986004566e-06, "loss": 0.263, "step": 5220 }, { "epoch": 0.4176716465670687, "grad_norm": 0.2769072028946352, "learning_rate": 9.184806531536438e-06, "loss": 0.2987, "step": 5221 }, { "epoch": 0.41775164496710065, "grad_norm": 0.27513542857080053, "learning_rate": 9.184452006833623e-06, "loss": 0.2762, "step": 5222 }, { "epoch": 0.41783164336713263, "grad_norm": 0.2890867584634828, "learning_rate": 9.184097411902072e-06, "loss": 0.2555, "step": 5223 }, { "epoch": 0.41791164176716467, "grad_norm": 0.22557238944886635, "learning_rate": 9.183742746747737e-06, "loss": 0.3063, "step": 5224 }, { "epoch": 0.41799164016719664, "grad_norm": 0.33861889666703715, "learning_rate": 9.18338801137657e-06, "loss": 0.2519, "step": 5225 }, { "epoch": 0.4180716385672287, "grad_norm": 0.23919587310154336, "learning_rate": 9.183033205794525e-06, "loss": 0.338, "step": 5226 }, { "epoch": 0.41815163696726065, "grad_norm": 0.312113249158113, "learning_rate": 9.182678330007556e-06, "loss": 0.2922, "step": 5227 }, { "epoch": 0.41823163536729263, "grad_norm": 0.2325558703465777, "learning_rate": 9.182323384021619e-06, "loss": 0.3121, "step": 5228 }, { "epoch": 0.41831163376732466, "grad_norm": 0.27912906922177216, "learning_rate": 9.181968367842674e-06, "loss": 0.3214, "step": 5229 }, { "epoch": 0.41839163216735664, "grad_norm": 0.7267050551840867, "learning_rate": 9.181613281476674e-06, "loss": 0.2928, "step": 5230 }, { "epoch": 0.4184716305673887, "grad_norm": 0.3927267879288216, "learning_rate": 9.181258124929582e-06, "loss": 0.2822, "step": 5231 }, { "epoch": 0.41855162896742065, "grad_norm": 0.42614000468655916, "learning_rate": 9.180902898207359e-06, "loss": 0.2934, "step": 5232 }, { "epoch": 0.41863162736745263, "grad_norm": 0.31114967860920073, "learning_rate": 9.180547601315963e-06, "loss": 0.3104, "step": 5233 }, { "epoch": 0.41871162576748466, "grad_norm": 0.2809300293608719, "learning_rate": 9.180192234261363e-06, "loss": 0.3086, "step": 5234 }, { "epoch": 0.41879162416751664, "grad_norm": 0.31476296330044196, "learning_rate": 9.17983679704952e-06, "loss": 0.25, "step": 5235 }, { "epoch": 0.4188716225675487, "grad_norm": 0.3035885835425518, "learning_rate": 9.179481289686398e-06, "loss": 0.2995, "step": 5236 }, { "epoch": 0.41895162096758065, "grad_norm": 0.29950245530443553, "learning_rate": 9.179125712177965e-06, "loss": 0.3142, "step": 5237 }, { "epoch": 0.41903161936761263, "grad_norm": 0.22365127860565798, "learning_rate": 9.178770064530191e-06, "loss": 0.3101, "step": 5238 }, { "epoch": 0.41911161776764466, "grad_norm": 0.26127833219000807, "learning_rate": 9.17841434674904e-06, "loss": 0.2779, "step": 5239 }, { "epoch": 0.41919161616767664, "grad_norm": 0.28374757978865806, "learning_rate": 9.178058558840488e-06, "loss": 0.2744, "step": 5240 }, { "epoch": 0.4192716145677087, "grad_norm": 0.2893333515800285, "learning_rate": 9.1777027008105e-06, "loss": 0.2449, "step": 5241 }, { "epoch": 0.41935161296774065, "grad_norm": 0.2033571467457253, "learning_rate": 9.177346772665054e-06, "loss": 0.3432, "step": 5242 }, { "epoch": 0.4194316113677726, "grad_norm": 0.2553674067238468, "learning_rate": 9.17699077441012e-06, "loss": 0.2848, "step": 5243 }, { "epoch": 0.41951160976780466, "grad_norm": 0.27193427963304284, "learning_rate": 9.176634706051676e-06, "loss": 0.2701, "step": 5244 }, { "epoch": 0.41959160816783664, "grad_norm": 0.3149712650959617, "learning_rate": 9.176278567595696e-06, "loss": 0.2528, "step": 5245 }, { "epoch": 0.4196716065678686, "grad_norm": 0.32663286761371185, "learning_rate": 9.175922359048156e-06, "loss": 0.2697, "step": 5246 }, { "epoch": 0.41975160496790065, "grad_norm": 0.3146877974573286, "learning_rate": 9.175566080415036e-06, "loss": 0.2679, "step": 5247 }, { "epoch": 0.4198316033679326, "grad_norm": 0.3343882668757924, "learning_rate": 9.175209731702313e-06, "loss": 0.2706, "step": 5248 }, { "epoch": 0.41991160176796466, "grad_norm": 0.2844214952317712, "learning_rate": 9.174853312915972e-06, "loss": 0.3069, "step": 5249 }, { "epoch": 0.41999160016799664, "grad_norm": 0.26474572835037347, "learning_rate": 9.174496824061992e-06, "loss": 0.2961, "step": 5250 }, { "epoch": 0.4200715985680286, "grad_norm": 0.2601711454875526, "learning_rate": 9.174140265146356e-06, "loss": 0.2641, "step": 5251 }, { "epoch": 0.42015159696806065, "grad_norm": 0.3628405695202224, "learning_rate": 9.173783636175051e-06, "loss": 0.2821, "step": 5252 }, { "epoch": 0.4202315953680926, "grad_norm": 0.25865390270665845, "learning_rate": 9.173426937154058e-06, "loss": 0.3224, "step": 5253 }, { "epoch": 0.42031159376812466, "grad_norm": 0.31091930774921067, "learning_rate": 9.173070168089367e-06, "loss": 0.2612, "step": 5254 }, { "epoch": 0.42039159216815664, "grad_norm": 0.32783997617721866, "learning_rate": 9.172713328986965e-06, "loss": 0.2669, "step": 5255 }, { "epoch": 0.4204715905681886, "grad_norm": 0.27634542486630254, "learning_rate": 9.172356419852841e-06, "loss": 0.3086, "step": 5256 }, { "epoch": 0.42055158896822065, "grad_norm": 0.24106327598852467, "learning_rate": 9.171999440692982e-06, "loss": 0.3347, "step": 5257 }, { "epoch": 0.4206315873682526, "grad_norm": 0.3227879519005372, "learning_rate": 9.171642391513384e-06, "loss": 0.2704, "step": 5258 }, { "epoch": 0.42071158576828466, "grad_norm": 2.6687118274508905, "learning_rate": 9.17128527232004e-06, "loss": 0.2641, "step": 5259 }, { "epoch": 0.42079158416831663, "grad_norm": 0.26193567428660103, "learning_rate": 9.170928083118937e-06, "loss": 0.317, "step": 5260 }, { "epoch": 0.4208715825683486, "grad_norm": 0.28812959166307556, "learning_rate": 9.170570823916074e-06, "loss": 0.2858, "step": 5261 }, { "epoch": 0.42095158096838065, "grad_norm": 0.38820447629464544, "learning_rate": 9.170213494717448e-06, "loss": 0.2951, "step": 5262 }, { "epoch": 0.4210315793684126, "grad_norm": 0.4206183538045783, "learning_rate": 9.169856095529055e-06, "loss": 0.2385, "step": 5263 }, { "epoch": 0.42111157776844466, "grad_norm": 0.3300137230347034, "learning_rate": 9.169498626356892e-06, "loss": 0.3307, "step": 5264 }, { "epoch": 0.42119157616847663, "grad_norm": 0.26450499737859773, "learning_rate": 9.16914108720696e-06, "loss": 0.3127, "step": 5265 }, { "epoch": 0.4212715745685086, "grad_norm": 0.19051765385229086, "learning_rate": 9.168783478085261e-06, "loss": 0.3458, "step": 5266 }, { "epoch": 0.42135157296854064, "grad_norm": 0.30551027963355376, "learning_rate": 9.168425798997794e-06, "loss": 0.2512, "step": 5267 }, { "epoch": 0.4214315713685726, "grad_norm": 0.28042328376019976, "learning_rate": 9.168068049950563e-06, "loss": 0.304, "step": 5268 }, { "epoch": 0.42151156976860465, "grad_norm": 0.2728669457583841, "learning_rate": 9.167710230949573e-06, "loss": 0.2779, "step": 5269 }, { "epoch": 0.42159156816863663, "grad_norm": 0.3062326715161398, "learning_rate": 9.16735234200083e-06, "loss": 0.2662, "step": 5270 }, { "epoch": 0.4216715665686686, "grad_norm": 0.2847921223795915, "learning_rate": 9.166994383110338e-06, "loss": 0.3047, "step": 5271 }, { "epoch": 0.42175156496870064, "grad_norm": 0.21151185431073452, "learning_rate": 9.166636354284107e-06, "loss": 0.3362, "step": 5272 }, { "epoch": 0.4218315633687326, "grad_norm": 0.2385011506609248, "learning_rate": 9.166278255528143e-06, "loss": 0.3246, "step": 5273 }, { "epoch": 0.4219115617687646, "grad_norm": 0.2427294855982681, "learning_rate": 9.165920086848461e-06, "loss": 0.2966, "step": 5274 }, { "epoch": 0.42199156016879663, "grad_norm": 0.29486245365104086, "learning_rate": 9.165561848251066e-06, "loss": 0.3163, "step": 5275 }, { "epoch": 0.4220715585688286, "grad_norm": 0.27956780620980404, "learning_rate": 9.165203539741976e-06, "loss": 0.2749, "step": 5276 }, { "epoch": 0.42215155696886064, "grad_norm": 0.3155036892323425, "learning_rate": 9.164845161327203e-06, "loss": 0.2482, "step": 5277 }, { "epoch": 0.4222315553688926, "grad_norm": 0.31750105867283485, "learning_rate": 9.164486713012759e-06, "loss": 0.3014, "step": 5278 }, { "epoch": 0.4223115537689246, "grad_norm": 0.32472418717527135, "learning_rate": 9.164128194804663e-06, "loss": 0.2489, "step": 5279 }, { "epoch": 0.42239155216895663, "grad_norm": 0.2893463872434011, "learning_rate": 9.16376960670893e-06, "loss": 0.2916, "step": 5280 }, { "epoch": 0.4224715505689886, "grad_norm": 0.32193818821457015, "learning_rate": 9.16341094873158e-06, "loss": 0.2604, "step": 5281 }, { "epoch": 0.42255154896902064, "grad_norm": 0.29681431771585615, "learning_rate": 9.163052220878633e-06, "loss": 0.291, "step": 5282 }, { "epoch": 0.4226315473690526, "grad_norm": 0.27848858788440195, "learning_rate": 9.162693423156106e-06, "loss": 0.3004, "step": 5283 }, { "epoch": 0.4227115457690846, "grad_norm": 0.34259184711845087, "learning_rate": 9.162334555570025e-06, "loss": 0.3025, "step": 5284 }, { "epoch": 0.42279154416911663, "grad_norm": 0.29961308736486125, "learning_rate": 9.161975618126411e-06, "loss": 0.2627, "step": 5285 }, { "epoch": 0.4228715425691486, "grad_norm": 0.3346393116663815, "learning_rate": 9.161616610831287e-06, "loss": 0.2746, "step": 5286 }, { "epoch": 0.42295154096918064, "grad_norm": 0.2691011270747603, "learning_rate": 9.161257533690682e-06, "loss": 0.3044, "step": 5287 }, { "epoch": 0.4230315393692126, "grad_norm": 0.29726820511238716, "learning_rate": 9.160898386710619e-06, "loss": 0.2902, "step": 5288 }, { "epoch": 0.4231115377692446, "grad_norm": 0.3399868764615308, "learning_rate": 9.160539169897126e-06, "loss": 0.2736, "step": 5289 }, { "epoch": 0.4231915361692766, "grad_norm": 0.27436839261924617, "learning_rate": 9.160179883256233e-06, "loss": 0.3123, "step": 5290 }, { "epoch": 0.4232715345693086, "grad_norm": 0.9101902372283796, "learning_rate": 9.15982052679397e-06, "loss": 0.2602, "step": 5291 }, { "epoch": 0.42335153296934064, "grad_norm": 0.33587211563704966, "learning_rate": 9.159461100516367e-06, "loss": 0.2401, "step": 5292 }, { "epoch": 0.4234315313693726, "grad_norm": 0.3159690901194471, "learning_rate": 9.15910160442946e-06, "loss": 0.278, "step": 5293 }, { "epoch": 0.4235115297694046, "grad_norm": 0.34387493962237814, "learning_rate": 9.158742038539275e-06, "loss": 0.2708, "step": 5294 }, { "epoch": 0.4235915281694366, "grad_norm": 0.31554018735901446, "learning_rate": 9.158382402851854e-06, "loss": 0.2576, "step": 5295 }, { "epoch": 0.4236715265694686, "grad_norm": 0.3359679760212402, "learning_rate": 9.15802269737323e-06, "loss": 0.2702, "step": 5296 }, { "epoch": 0.42375152496950064, "grad_norm": 0.2717092147292755, "learning_rate": 9.15766292210944e-06, "loss": 0.2787, "step": 5297 }, { "epoch": 0.4238315233695326, "grad_norm": 0.3656748160507865, "learning_rate": 9.157303077066523e-06, "loss": 0.2895, "step": 5298 }, { "epoch": 0.4239115217695646, "grad_norm": 0.3373646002280782, "learning_rate": 9.156943162250516e-06, "loss": 0.2729, "step": 5299 }, { "epoch": 0.4239915201695966, "grad_norm": 0.3012919391379303, "learning_rate": 9.156583177667464e-06, "loss": 0.2723, "step": 5300 }, { "epoch": 0.4240715185696286, "grad_norm": 0.25234126586826544, "learning_rate": 9.156223123323405e-06, "loss": 0.282, "step": 5301 }, { "epoch": 0.4241515169696606, "grad_norm": 0.2577952385033093, "learning_rate": 9.15586299922438e-06, "loss": 0.3347, "step": 5302 }, { "epoch": 0.4242315153696926, "grad_norm": 0.37470584741579593, "learning_rate": 9.155502805376439e-06, "loss": 0.2638, "step": 5303 }, { "epoch": 0.4243115137697246, "grad_norm": 0.16892465842934803, "learning_rate": 9.155142541785624e-06, "loss": 0.3695, "step": 5304 }, { "epoch": 0.4243915121697566, "grad_norm": 0.26403641191913835, "learning_rate": 9.154782208457981e-06, "loss": 0.3054, "step": 5305 }, { "epoch": 0.4244715105697886, "grad_norm": 0.2468957529614833, "learning_rate": 9.154421805399561e-06, "loss": 0.3351, "step": 5306 }, { "epoch": 0.4245515089698206, "grad_norm": 0.35693101654377846, "learning_rate": 9.154061332616407e-06, "loss": 0.2625, "step": 5307 }, { "epoch": 0.4246315073698526, "grad_norm": 0.3050144741955356, "learning_rate": 9.153700790114573e-06, "loss": 0.2388, "step": 5308 }, { "epoch": 0.4247115057698846, "grad_norm": 0.3101898650006513, "learning_rate": 9.153340177900108e-06, "loss": 0.3181, "step": 5309 }, { "epoch": 0.4247915041699166, "grad_norm": 0.3111720657209628, "learning_rate": 9.152979495979064e-06, "loss": 0.2494, "step": 5310 }, { "epoch": 0.4248715025699486, "grad_norm": 0.28244221197244207, "learning_rate": 9.152618744357498e-06, "loss": 0.2908, "step": 5311 }, { "epoch": 0.4249515009699806, "grad_norm": 0.3051308415489462, "learning_rate": 9.15225792304146e-06, "loss": 0.2561, "step": 5312 }, { "epoch": 0.4250314993700126, "grad_norm": 0.27985499237892836, "learning_rate": 9.15189703203701e-06, "loss": 0.2839, "step": 5313 }, { "epoch": 0.4251114977700446, "grad_norm": 0.3093332654169806, "learning_rate": 9.1515360713502e-06, "loss": 0.2527, "step": 5314 }, { "epoch": 0.4251914961700766, "grad_norm": 0.2025541211873972, "learning_rate": 9.151175040987094e-06, "loss": 0.3633, "step": 5315 }, { "epoch": 0.4252714945701086, "grad_norm": 0.3040913820126117, "learning_rate": 9.150813940953747e-06, "loss": 0.3197, "step": 5316 }, { "epoch": 0.4253514929701406, "grad_norm": 0.2702027936223868, "learning_rate": 9.15045277125622e-06, "loss": 0.3164, "step": 5317 }, { "epoch": 0.4254314913701726, "grad_norm": 0.23403963520163276, "learning_rate": 9.150091531900576e-06, "loss": 0.3212, "step": 5318 }, { "epoch": 0.4255114897702046, "grad_norm": 0.2527400693331135, "learning_rate": 9.149730222892876e-06, "loss": 0.2905, "step": 5319 }, { "epoch": 0.4255914881702366, "grad_norm": 0.2875071502672263, "learning_rate": 9.149368844239185e-06, "loss": 0.2735, "step": 5320 }, { "epoch": 0.4256714865702686, "grad_norm": 0.2655333240204377, "learning_rate": 9.149007395945569e-06, "loss": 0.2849, "step": 5321 }, { "epoch": 0.4257514849703006, "grad_norm": 0.32093676377340474, "learning_rate": 9.148645878018092e-06, "loss": 0.2797, "step": 5322 }, { "epoch": 0.4258314833703326, "grad_norm": 0.35001812965126505, "learning_rate": 9.148284290462825e-06, "loss": 0.2927, "step": 5323 }, { "epoch": 0.4259114817703646, "grad_norm": 0.28671191560457954, "learning_rate": 9.147922633285832e-06, "loss": 0.2888, "step": 5324 }, { "epoch": 0.4259914801703966, "grad_norm": 0.32575341804565866, "learning_rate": 9.147560906493189e-06, "loss": 0.2614, "step": 5325 }, { "epoch": 0.4260714785704286, "grad_norm": 0.3110247774504736, "learning_rate": 9.14719911009096e-06, "loss": 0.2849, "step": 5326 }, { "epoch": 0.4261514769704606, "grad_norm": 0.4078771336313896, "learning_rate": 9.14683724408522e-06, "loss": 0.2829, "step": 5327 }, { "epoch": 0.4262314753704926, "grad_norm": 0.32315447542311293, "learning_rate": 9.146475308482043e-06, "loss": 0.2848, "step": 5328 }, { "epoch": 0.4263114737705246, "grad_norm": 0.2836332579485243, "learning_rate": 9.146113303287503e-06, "loss": 0.298, "step": 5329 }, { "epoch": 0.42639147217055656, "grad_norm": 0.32001439444728585, "learning_rate": 9.145751228507677e-06, "loss": 0.2417, "step": 5330 }, { "epoch": 0.4264714705705886, "grad_norm": 0.23781742672835954, "learning_rate": 9.14538908414864e-06, "loss": 0.326, "step": 5331 }, { "epoch": 0.4265514689706206, "grad_norm": 0.28253292809151725, "learning_rate": 9.145026870216469e-06, "loss": 0.2649, "step": 5332 }, { "epoch": 0.4266314673706526, "grad_norm": 0.4590546957217334, "learning_rate": 9.144664586717246e-06, "loss": 0.2387, "step": 5333 }, { "epoch": 0.4267114657706846, "grad_norm": 0.27281238004416963, "learning_rate": 9.14430223365705e-06, "loss": 0.3024, "step": 5334 }, { "epoch": 0.42679146417071656, "grad_norm": 0.2657652222632267, "learning_rate": 9.14393981104196e-06, "loss": 0.2658, "step": 5335 }, { "epoch": 0.4268714625707486, "grad_norm": 0.25597808752190165, "learning_rate": 9.143577318878062e-06, "loss": 0.2843, "step": 5336 }, { "epoch": 0.42695146097078057, "grad_norm": 0.30115816032943915, "learning_rate": 9.14321475717144e-06, "loss": 0.2711, "step": 5337 }, { "epoch": 0.4270314593708126, "grad_norm": 0.24670215973009077, "learning_rate": 9.142852125928177e-06, "loss": 0.3179, "step": 5338 }, { "epoch": 0.4271114577708446, "grad_norm": 0.4722351211278366, "learning_rate": 9.14248942515436e-06, "loss": 0.2657, "step": 5339 }, { "epoch": 0.42719145617087656, "grad_norm": 0.2721677351306439, "learning_rate": 9.142126654856075e-06, "loss": 0.3002, "step": 5340 }, { "epoch": 0.4272714545709086, "grad_norm": 0.2859034552913096, "learning_rate": 9.141763815039413e-06, "loss": 0.2668, "step": 5341 }, { "epoch": 0.42735145297094057, "grad_norm": 0.27799070924290403, "learning_rate": 9.141400905710462e-06, "loss": 0.2717, "step": 5342 }, { "epoch": 0.4274314513709726, "grad_norm": 0.33913875873941696, "learning_rate": 9.141037926875312e-06, "loss": 0.2616, "step": 5343 }, { "epoch": 0.4275114497710046, "grad_norm": 0.2305010779992306, "learning_rate": 9.140674878540056e-06, "loss": 0.3057, "step": 5344 }, { "epoch": 0.42759144817103656, "grad_norm": 0.27543315966859977, "learning_rate": 9.140311760710788e-06, "loss": 0.271, "step": 5345 }, { "epoch": 0.4276714465710686, "grad_norm": 0.33658752780313894, "learning_rate": 9.139948573393602e-06, "loss": 0.2534, "step": 5346 }, { "epoch": 0.42775144497110057, "grad_norm": 0.30381141325256994, "learning_rate": 9.139585316594592e-06, "loss": 0.2638, "step": 5347 }, { "epoch": 0.4278314433711326, "grad_norm": 0.3013122533527082, "learning_rate": 9.139221990319856e-06, "loss": 0.2819, "step": 5348 }, { "epoch": 0.4279114417711646, "grad_norm": 0.25248263189537096, "learning_rate": 9.13885859457549e-06, "loss": 0.315, "step": 5349 }, { "epoch": 0.42799144017119656, "grad_norm": 0.272330768502305, "learning_rate": 9.138495129367595e-06, "loss": 0.3022, "step": 5350 }, { "epoch": 0.4280714385712286, "grad_norm": 0.35587849253476533, "learning_rate": 9.13813159470227e-06, "loss": 0.2586, "step": 5351 }, { "epoch": 0.42815143697126057, "grad_norm": 0.32759523312285027, "learning_rate": 9.137767990585618e-06, "loss": 0.2603, "step": 5352 }, { "epoch": 0.4282314353712926, "grad_norm": 0.21961314895895181, "learning_rate": 9.137404317023738e-06, "loss": 0.3434, "step": 5353 }, { "epoch": 0.4283114337713246, "grad_norm": 0.36331792561609977, "learning_rate": 9.13704057402274e-06, "loss": 0.2847, "step": 5354 }, { "epoch": 0.42839143217135656, "grad_norm": 0.48818216252520774, "learning_rate": 9.13667676158872e-06, "loss": 0.2597, "step": 5355 }, { "epoch": 0.4284714305713886, "grad_norm": 0.2814787227297044, "learning_rate": 9.136312879727791e-06, "loss": 0.3299, "step": 5356 }, { "epoch": 0.42855142897142057, "grad_norm": 0.2751027231612159, "learning_rate": 9.135948928446057e-06, "loss": 0.2842, "step": 5357 }, { "epoch": 0.42863142737145254, "grad_norm": 0.430150543863991, "learning_rate": 9.135584907749627e-06, "loss": 0.2784, "step": 5358 }, { "epoch": 0.4287114257714846, "grad_norm": 0.29316039071201766, "learning_rate": 9.13522081764461e-06, "loss": 0.2353, "step": 5359 }, { "epoch": 0.42879142417151656, "grad_norm": 0.3212379296611071, "learning_rate": 9.134856658137118e-06, "loss": 0.2881, "step": 5360 }, { "epoch": 0.4288714225715486, "grad_norm": 0.2476914429326272, "learning_rate": 9.134492429233262e-06, "loss": 0.3061, "step": 5361 }, { "epoch": 0.42895142097158057, "grad_norm": 0.2934879083485212, "learning_rate": 9.134128130939153e-06, "loss": 0.2837, "step": 5362 }, { "epoch": 0.42903141937161254, "grad_norm": 0.34030904430854375, "learning_rate": 9.133763763260907e-06, "loss": 0.257, "step": 5363 }, { "epoch": 0.4291114177716446, "grad_norm": 0.24010735103412736, "learning_rate": 9.13339932620464e-06, "loss": 0.342, "step": 5364 }, { "epoch": 0.42919141617167655, "grad_norm": 0.26883467146039147, "learning_rate": 9.133034819776469e-06, "loss": 0.2885, "step": 5365 }, { "epoch": 0.4292714145717086, "grad_norm": 0.3028333320526651, "learning_rate": 9.132670243982509e-06, "loss": 0.2951, "step": 5366 }, { "epoch": 0.42935141297174056, "grad_norm": 0.2815714700966718, "learning_rate": 9.13230559882888e-06, "loss": 0.2778, "step": 5367 }, { "epoch": 0.42943141137177254, "grad_norm": 0.2873876960054808, "learning_rate": 9.131940884321702e-06, "loss": 0.2619, "step": 5368 }, { "epoch": 0.4295114097718046, "grad_norm": 0.3695986160635828, "learning_rate": 9.131576100467095e-06, "loss": 0.2628, "step": 5369 }, { "epoch": 0.42959140817183655, "grad_norm": 0.23247133757044536, "learning_rate": 9.131211247271184e-06, "loss": 0.3166, "step": 5370 }, { "epoch": 0.4296714065718686, "grad_norm": 0.27384089889368796, "learning_rate": 9.130846324740087e-06, "loss": 0.2858, "step": 5371 }, { "epoch": 0.42975140497190056, "grad_norm": 0.31825890381581784, "learning_rate": 9.130481332879936e-06, "loss": 0.2642, "step": 5372 }, { "epoch": 0.42983140337193254, "grad_norm": 0.3048222134609976, "learning_rate": 9.130116271696851e-06, "loss": 0.2445, "step": 5373 }, { "epoch": 0.4299114017719646, "grad_norm": 0.3424479237750182, "learning_rate": 9.129751141196963e-06, "loss": 0.2744, "step": 5374 }, { "epoch": 0.42999140017199655, "grad_norm": 0.2655349312138144, "learning_rate": 9.129385941386397e-06, "loss": 0.3161, "step": 5375 }, { "epoch": 0.4300713985720286, "grad_norm": 0.3455002075239909, "learning_rate": 9.129020672271283e-06, "loss": 0.2685, "step": 5376 }, { "epoch": 0.43015139697206056, "grad_norm": 0.3726921797393575, "learning_rate": 9.128655333857751e-06, "loss": 0.2787, "step": 5377 }, { "epoch": 0.43023139537209254, "grad_norm": 0.2507191399964659, "learning_rate": 9.128289926151935e-06, "loss": 0.273, "step": 5378 }, { "epoch": 0.4303113937721246, "grad_norm": 0.2938005621743509, "learning_rate": 9.127924449159966e-06, "loss": 0.2512, "step": 5379 }, { "epoch": 0.43039139217215655, "grad_norm": 0.3182788486296455, "learning_rate": 9.127558902887976e-06, "loss": 0.2828, "step": 5380 }, { "epoch": 0.4304713905721886, "grad_norm": 0.2808318401685348, "learning_rate": 9.127193287342103e-06, "loss": 0.2803, "step": 5381 }, { "epoch": 0.43055138897222056, "grad_norm": 0.3024134817342552, "learning_rate": 9.126827602528482e-06, "loss": 0.2559, "step": 5382 }, { "epoch": 0.43063138737225254, "grad_norm": 0.3258480355031198, "learning_rate": 9.12646184845325e-06, "loss": 0.2546, "step": 5383 }, { "epoch": 0.43071138577228457, "grad_norm": 0.25189855527432087, "learning_rate": 9.126096025122548e-06, "loss": 0.3065, "step": 5384 }, { "epoch": 0.43079138417231655, "grad_norm": 0.23514439414586483, "learning_rate": 9.125730132542511e-06, "loss": 0.3104, "step": 5385 }, { "epoch": 0.4308713825723485, "grad_norm": 0.2702422154633485, "learning_rate": 9.125364170719284e-06, "loss": 0.3144, "step": 5386 }, { "epoch": 0.43095138097238056, "grad_norm": 0.266598087977465, "learning_rate": 9.124998139659009e-06, "loss": 0.2828, "step": 5387 }, { "epoch": 0.43103137937241254, "grad_norm": 0.2953915500486957, "learning_rate": 9.124632039367826e-06, "loss": 0.2562, "step": 5388 }, { "epoch": 0.43111137777244457, "grad_norm": 0.30378048699631005, "learning_rate": 9.124265869851882e-06, "loss": 0.2622, "step": 5389 }, { "epoch": 0.43119137617247655, "grad_norm": 0.26777484068919577, "learning_rate": 9.123899631117322e-06, "loss": 0.3386, "step": 5390 }, { "epoch": 0.4312713745725085, "grad_norm": 0.26831250017963326, "learning_rate": 9.12353332317029e-06, "loss": 0.2788, "step": 5391 }, { "epoch": 0.43135137297254056, "grad_norm": 0.19623543668312327, "learning_rate": 9.12316694601694e-06, "loss": 0.3341, "step": 5392 }, { "epoch": 0.43143137137257254, "grad_norm": 0.2612742401543717, "learning_rate": 9.122800499663414e-06, "loss": 0.2928, "step": 5393 }, { "epoch": 0.43151136977260457, "grad_norm": 0.3020648450479614, "learning_rate": 9.122433984115868e-06, "loss": 0.2618, "step": 5394 }, { "epoch": 0.43159136817263655, "grad_norm": 0.3680965398742099, "learning_rate": 9.12206739938045e-06, "loss": 0.2601, "step": 5395 }, { "epoch": 0.4316713665726685, "grad_norm": 0.2768480409270497, "learning_rate": 9.121700745463312e-06, "loss": 0.2964, "step": 5396 }, { "epoch": 0.43175136497270056, "grad_norm": 0.2963873015123927, "learning_rate": 9.12133402237061e-06, "loss": 0.2678, "step": 5397 }, { "epoch": 0.43183136337273254, "grad_norm": 0.2726660713855438, "learning_rate": 9.120967230108497e-06, "loss": 0.2933, "step": 5398 }, { "epoch": 0.43191136177276457, "grad_norm": 0.2312359857809251, "learning_rate": 9.12060036868313e-06, "loss": 0.3143, "step": 5399 }, { "epoch": 0.43199136017279655, "grad_norm": 0.2673499587560985, "learning_rate": 9.120233438100665e-06, "loss": 0.2794, "step": 5400 }, { "epoch": 0.4320713585728285, "grad_norm": 0.2712611159090724, "learning_rate": 9.119866438367263e-06, "loss": 0.2822, "step": 5401 }, { "epoch": 0.43215135697286056, "grad_norm": 0.30234296272936134, "learning_rate": 9.11949936948908e-06, "loss": 0.2975, "step": 5402 }, { "epoch": 0.43223135537289253, "grad_norm": 0.24034325263906536, "learning_rate": 9.119132231472278e-06, "loss": 0.3397, "step": 5403 }, { "epoch": 0.43231135377292457, "grad_norm": 0.3027450115663048, "learning_rate": 9.118765024323021e-06, "loss": 0.2933, "step": 5404 }, { "epoch": 0.43239135217295654, "grad_norm": 0.2549203469124061, "learning_rate": 9.118397748047467e-06, "loss": 0.3243, "step": 5405 }, { "epoch": 0.4324713505729885, "grad_norm": 0.21570434977751268, "learning_rate": 9.118030402651786e-06, "loss": 0.3266, "step": 5406 }, { "epoch": 0.43255134897302056, "grad_norm": 0.29268030067956136, "learning_rate": 9.117662988142138e-06, "loss": 0.2609, "step": 5407 }, { "epoch": 0.43263134737305253, "grad_norm": 0.32298425445107, "learning_rate": 9.117295504524692e-06, "loss": 0.2513, "step": 5408 }, { "epoch": 0.43271134577308457, "grad_norm": 0.25877497877995925, "learning_rate": 9.116927951805615e-06, "loss": 0.2925, "step": 5409 }, { "epoch": 0.43279134417311654, "grad_norm": 0.3065392139581013, "learning_rate": 9.116560329991077e-06, "loss": 0.2436, "step": 5410 }, { "epoch": 0.4328713425731485, "grad_norm": 0.24664053903373473, "learning_rate": 9.116192639087245e-06, "loss": 0.3231, "step": 5411 }, { "epoch": 0.43295134097318055, "grad_norm": 0.32261343504936224, "learning_rate": 9.115824879100294e-06, "loss": 0.2695, "step": 5412 }, { "epoch": 0.43303133937321253, "grad_norm": 0.2909319003310747, "learning_rate": 9.115457050036393e-06, "loss": 0.2798, "step": 5413 }, { "epoch": 0.4331113377732445, "grad_norm": 0.3057968980457586, "learning_rate": 9.115089151901715e-06, "loss": 0.2854, "step": 5414 }, { "epoch": 0.43319133617327654, "grad_norm": 0.2213846996831468, "learning_rate": 9.11472118470244e-06, "loss": 0.301, "step": 5415 }, { "epoch": 0.4332713345733085, "grad_norm": 0.32188464289406854, "learning_rate": 9.114353148444735e-06, "loss": 0.2649, "step": 5416 }, { "epoch": 0.43335133297334055, "grad_norm": 0.3066453535439091, "learning_rate": 9.113985043134784e-06, "loss": 0.3019, "step": 5417 }, { "epoch": 0.43343133137337253, "grad_norm": 0.30687210335486925, "learning_rate": 9.113616868778762e-06, "loss": 0.2613, "step": 5418 }, { "epoch": 0.4335113297734045, "grad_norm": 0.31677425915505125, "learning_rate": 9.113248625382849e-06, "loss": 0.2914, "step": 5419 }, { "epoch": 0.43359132817343654, "grad_norm": 0.27827872711784385, "learning_rate": 9.112880312953225e-06, "loss": 0.2957, "step": 5420 }, { "epoch": 0.4336713265734685, "grad_norm": 0.3176231716308831, "learning_rate": 9.11251193149607e-06, "loss": 0.2651, "step": 5421 }, { "epoch": 0.43375132497350055, "grad_norm": 0.24210515622266685, "learning_rate": 9.112143481017571e-06, "loss": 0.3104, "step": 5422 }, { "epoch": 0.43383132337353253, "grad_norm": 0.2753839470802295, "learning_rate": 9.111774961523906e-06, "loss": 0.2993, "step": 5423 }, { "epoch": 0.4339113217735645, "grad_norm": 0.3248365130189086, "learning_rate": 9.111406373021264e-06, "loss": 0.2472, "step": 5424 }, { "epoch": 0.43399132017359654, "grad_norm": 0.2847559163264054, "learning_rate": 9.11103771551583e-06, "loss": 0.2853, "step": 5425 }, { "epoch": 0.4340713185736285, "grad_norm": 0.28817518942233566, "learning_rate": 9.11066898901379e-06, "loss": 0.2956, "step": 5426 }, { "epoch": 0.43415131697366055, "grad_norm": 0.3169411756066201, "learning_rate": 9.110300193521336e-06, "loss": 0.3388, "step": 5427 }, { "epoch": 0.43423131537369253, "grad_norm": 0.2911984102301359, "learning_rate": 9.109931329044655e-06, "loss": 0.294, "step": 5428 }, { "epoch": 0.4343113137737245, "grad_norm": 0.307404375575855, "learning_rate": 9.109562395589937e-06, "loss": 0.2513, "step": 5429 }, { "epoch": 0.43439131217375654, "grad_norm": 0.20205107830849464, "learning_rate": 9.109193393163377e-06, "loss": 0.3667, "step": 5430 }, { "epoch": 0.4344713105737885, "grad_norm": 0.34867706845160945, "learning_rate": 9.108824321771163e-06, "loss": 0.2711, "step": 5431 }, { "epoch": 0.43455130897382055, "grad_norm": 0.2751137364604493, "learning_rate": 9.108455181419493e-06, "loss": 0.2983, "step": 5432 }, { "epoch": 0.4346313073738525, "grad_norm": 0.23793040387150152, "learning_rate": 9.108085972114563e-06, "loss": 0.3105, "step": 5433 }, { "epoch": 0.4347113057738845, "grad_norm": 0.25447894869290316, "learning_rate": 9.107716693862566e-06, "loss": 0.2879, "step": 5434 }, { "epoch": 0.43479130417391654, "grad_norm": 0.2894078075106074, "learning_rate": 9.107347346669705e-06, "loss": 0.2989, "step": 5435 }, { "epoch": 0.4348713025739485, "grad_norm": 0.2477062605366648, "learning_rate": 9.106977930542171e-06, "loss": 0.3391, "step": 5436 }, { "epoch": 0.43495130097398055, "grad_norm": 0.34301639426146047, "learning_rate": 9.106608445486171e-06, "loss": 0.3217, "step": 5437 }, { "epoch": 0.4350312993740125, "grad_norm": 0.23364256104013012, "learning_rate": 9.106238891507906e-06, "loss": 0.3091, "step": 5438 }, { "epoch": 0.4351112977740445, "grad_norm": 1.631504738519471, "learning_rate": 9.105869268613574e-06, "loss": 0.2439, "step": 5439 }, { "epoch": 0.43519129617407654, "grad_norm": 0.2945974063359576, "learning_rate": 9.10549957680938e-06, "loss": 0.234, "step": 5440 }, { "epoch": 0.4352712945741085, "grad_norm": 0.30557953524840054, "learning_rate": 9.105129816101531e-06, "loss": 0.2615, "step": 5441 }, { "epoch": 0.4353512929741405, "grad_norm": 0.32975962712134294, "learning_rate": 9.10475998649623e-06, "loss": 0.2752, "step": 5442 }, { "epoch": 0.4354312913741725, "grad_norm": 0.2920265303935656, "learning_rate": 9.104390087999686e-06, "loss": 0.3136, "step": 5443 }, { "epoch": 0.4355112897742045, "grad_norm": 0.2961220762770864, "learning_rate": 9.104020120618104e-06, "loss": 0.2751, "step": 5444 }, { "epoch": 0.43559128817423654, "grad_norm": 0.30111999838139236, "learning_rate": 9.103650084357697e-06, "loss": 0.2673, "step": 5445 }, { "epoch": 0.4356712865742685, "grad_norm": 0.3337355756468021, "learning_rate": 9.103279979224676e-06, "loss": 0.2756, "step": 5446 }, { "epoch": 0.4357512849743005, "grad_norm": 0.2960697191209259, "learning_rate": 9.102909805225246e-06, "loss": 0.2699, "step": 5447 }, { "epoch": 0.4358312833743325, "grad_norm": 0.25538927646094045, "learning_rate": 9.102539562365626e-06, "loss": 0.3185, "step": 5448 }, { "epoch": 0.4359112817743645, "grad_norm": 0.3067771826715026, "learning_rate": 9.102169250652029e-06, "loss": 0.2631, "step": 5449 }, { "epoch": 0.43599128017439653, "grad_norm": 0.34250525373737395, "learning_rate": 9.101798870090667e-06, "loss": 0.2883, "step": 5450 }, { "epoch": 0.4360712785744285, "grad_norm": 0.29995039635118365, "learning_rate": 9.101428420687759e-06, "loss": 0.2667, "step": 5451 }, { "epoch": 0.4361512769744605, "grad_norm": 0.32816371536701494, "learning_rate": 9.101057902449523e-06, "loss": 0.2412, "step": 5452 }, { "epoch": 0.4362312753744925, "grad_norm": 0.33348193996199105, "learning_rate": 9.100687315382174e-06, "loss": 0.2963, "step": 5453 }, { "epoch": 0.4363112737745245, "grad_norm": 0.29124816195568953, "learning_rate": 9.100316659491935e-06, "loss": 0.3201, "step": 5454 }, { "epoch": 0.43639127217455653, "grad_norm": 0.28815725984958035, "learning_rate": 9.099945934785026e-06, "loss": 0.3166, "step": 5455 }, { "epoch": 0.4364712705745885, "grad_norm": 0.29327413954104725, "learning_rate": 9.099575141267667e-06, "loss": 0.2473, "step": 5456 }, { "epoch": 0.4365512689746205, "grad_norm": 0.3364885479265867, "learning_rate": 9.099204278946083e-06, "loss": 0.248, "step": 5457 }, { "epoch": 0.4366312673746525, "grad_norm": 0.29817036655446233, "learning_rate": 9.098833347826497e-06, "loss": 0.2566, "step": 5458 }, { "epoch": 0.4367112657746845, "grad_norm": 0.3058383803043822, "learning_rate": 9.098462347915136e-06, "loss": 0.2764, "step": 5459 }, { "epoch": 0.43679126417471653, "grad_norm": 0.22133487702698182, "learning_rate": 9.098091279218227e-06, "loss": 0.305, "step": 5460 }, { "epoch": 0.4368712625747485, "grad_norm": 0.23328280040374874, "learning_rate": 9.097720141741994e-06, "loss": 0.3428, "step": 5461 }, { "epoch": 0.4369512609747805, "grad_norm": 0.3785764928316921, "learning_rate": 9.097348935492672e-06, "loss": 0.2679, "step": 5462 }, { "epoch": 0.4370312593748125, "grad_norm": 0.2542034275789129, "learning_rate": 9.096977660476485e-06, "loss": 0.3213, "step": 5463 }, { "epoch": 0.4371112577748445, "grad_norm": 0.3080388803389258, "learning_rate": 9.096606316699668e-06, "loss": 0.2837, "step": 5464 }, { "epoch": 0.4371912561748765, "grad_norm": 0.27497843638813996, "learning_rate": 9.096234904168451e-06, "loss": 0.251, "step": 5465 }, { "epoch": 0.4372712545749085, "grad_norm": 0.27779179506706336, "learning_rate": 9.09586342288907e-06, "loss": 0.299, "step": 5466 }, { "epoch": 0.4373512529749405, "grad_norm": 0.30373462882419044, "learning_rate": 9.095491872867757e-06, "loss": 0.2484, "step": 5467 }, { "epoch": 0.4374312513749725, "grad_norm": 0.28118529768435446, "learning_rate": 9.09512025411075e-06, "loss": 0.2984, "step": 5468 }, { "epoch": 0.4375112497750045, "grad_norm": 0.26226498576800605, "learning_rate": 9.094748566624285e-06, "loss": 0.2875, "step": 5469 }, { "epoch": 0.4375912481750365, "grad_norm": 0.29822436571078753, "learning_rate": 9.0943768104146e-06, "loss": 0.2768, "step": 5470 }, { "epoch": 0.4376712465750685, "grad_norm": 0.302418451220066, "learning_rate": 9.094004985487935e-06, "loss": 0.2549, "step": 5471 }, { "epoch": 0.4377512449751005, "grad_norm": 0.27054080486850507, "learning_rate": 9.09363309185053e-06, "loss": 0.2766, "step": 5472 }, { "epoch": 0.4378312433751325, "grad_norm": 0.2865973039695305, "learning_rate": 9.093261129508625e-06, "loss": 0.2994, "step": 5473 }, { "epoch": 0.4379112417751645, "grad_norm": 0.2858088220550337, "learning_rate": 9.092889098468467e-06, "loss": 0.2739, "step": 5474 }, { "epoch": 0.4379912401751965, "grad_norm": 0.33741181188868535, "learning_rate": 9.092516998736296e-06, "loss": 0.2599, "step": 5475 }, { "epoch": 0.4380712385752285, "grad_norm": 0.3233741086896138, "learning_rate": 9.092144830318357e-06, "loss": 0.3152, "step": 5476 }, { "epoch": 0.4381512369752605, "grad_norm": 0.27549994394537264, "learning_rate": 9.0917725932209e-06, "loss": 0.3089, "step": 5477 }, { "epoch": 0.4382312353752925, "grad_norm": 0.232507032761216, "learning_rate": 9.091400287450167e-06, "loss": 0.3152, "step": 5478 }, { "epoch": 0.4383112337753245, "grad_norm": 0.3335390863454883, "learning_rate": 9.091027913012411e-06, "loss": 0.2396, "step": 5479 }, { "epoch": 0.43839123217535647, "grad_norm": 0.30545173646632695, "learning_rate": 9.09065546991388e-06, "loss": 0.2622, "step": 5480 }, { "epoch": 0.4384712305753885, "grad_norm": 0.272215119987308, "learning_rate": 9.090282958160823e-06, "loss": 0.2861, "step": 5481 }, { "epoch": 0.4385512289754205, "grad_norm": 0.2754642449551888, "learning_rate": 9.089910377759494e-06, "loss": 0.3163, "step": 5482 }, { "epoch": 0.4386312273754525, "grad_norm": 0.29689513007836654, "learning_rate": 9.089537728716147e-06, "loss": 0.2845, "step": 5483 }, { "epoch": 0.4387112257754845, "grad_norm": 0.3039318864007909, "learning_rate": 9.089165011037036e-06, "loss": 0.2566, "step": 5484 }, { "epoch": 0.43879122417551647, "grad_norm": 0.29171957970357143, "learning_rate": 9.088792224728413e-06, "loss": 0.3131, "step": 5485 }, { "epoch": 0.4388712225755485, "grad_norm": 0.22508671554069734, "learning_rate": 9.088419369796539e-06, "loss": 0.3577, "step": 5486 }, { "epoch": 0.4389512209755805, "grad_norm": 0.2959611048981429, "learning_rate": 9.08804644624767e-06, "loss": 0.2882, "step": 5487 }, { "epoch": 0.4390312193756125, "grad_norm": 0.38072388353906433, "learning_rate": 9.087673454088063e-06, "loss": 0.2682, "step": 5488 }, { "epoch": 0.4391112177756445, "grad_norm": 0.31602366480939253, "learning_rate": 9.08730039332398e-06, "loss": 0.322, "step": 5489 }, { "epoch": 0.43919121617567647, "grad_norm": 0.2642153685219187, "learning_rate": 9.086927263961682e-06, "loss": 0.298, "step": 5490 }, { "epoch": 0.4392712145757085, "grad_norm": 0.2878770300066201, "learning_rate": 9.08655406600743e-06, "loss": 0.2881, "step": 5491 }, { "epoch": 0.4393512129757405, "grad_norm": 0.3126396548096743, "learning_rate": 9.086180799467492e-06, "loss": 0.2482, "step": 5492 }, { "epoch": 0.43943121137577246, "grad_norm": 0.26486259266923773, "learning_rate": 9.085807464348127e-06, "loss": 0.2987, "step": 5493 }, { "epoch": 0.4395112097758045, "grad_norm": 0.2988306457756422, "learning_rate": 9.085434060655603e-06, "loss": 0.2977, "step": 5494 }, { "epoch": 0.43959120817583647, "grad_norm": 0.3540172944750142, "learning_rate": 9.085060588396188e-06, "loss": 0.2429, "step": 5495 }, { "epoch": 0.4396712065758685, "grad_norm": 0.270444004448149, "learning_rate": 9.084687047576149e-06, "loss": 0.2716, "step": 5496 }, { "epoch": 0.4397512049759005, "grad_norm": 0.27306499461729083, "learning_rate": 9.084313438201754e-06, "loss": 0.2757, "step": 5497 }, { "epoch": 0.43983120337593246, "grad_norm": 0.3179994182485001, "learning_rate": 9.083939760279275e-06, "loss": 0.2568, "step": 5498 }, { "epoch": 0.4399112017759645, "grad_norm": 0.2970879780847911, "learning_rate": 9.083566013814985e-06, "loss": 0.2947, "step": 5499 }, { "epoch": 0.43999120017599647, "grad_norm": 0.21804543829080744, "learning_rate": 9.083192198815154e-06, "loss": 0.35, "step": 5500 }, { "epoch": 0.4400711985760285, "grad_norm": 0.34285352397650765, "learning_rate": 9.082818315286054e-06, "loss": 0.2604, "step": 5501 }, { "epoch": 0.4401511969760605, "grad_norm": 0.2401506634097383, "learning_rate": 9.082444363233967e-06, "loss": 0.3068, "step": 5502 }, { "epoch": 0.44023119537609245, "grad_norm": 0.2572650195998039, "learning_rate": 9.082070342665163e-06, "loss": 0.2935, "step": 5503 }, { "epoch": 0.4403111937761245, "grad_norm": 0.252048732971219, "learning_rate": 9.08169625358592e-06, "loss": 0.2693, "step": 5504 }, { "epoch": 0.44039119217615647, "grad_norm": 0.2591584221631981, "learning_rate": 9.08132209600252e-06, "loss": 0.3298, "step": 5505 }, { "epoch": 0.4404711905761885, "grad_norm": 0.27080821908467057, "learning_rate": 9.080947869921238e-06, "loss": 0.2779, "step": 5506 }, { "epoch": 0.4405511889762205, "grad_norm": 0.3074359965828897, "learning_rate": 9.080573575348358e-06, "loss": 0.2657, "step": 5507 }, { "epoch": 0.44063118737625245, "grad_norm": 0.2925259111943576, "learning_rate": 9.08019921229016e-06, "loss": 0.2719, "step": 5508 }, { "epoch": 0.4407111857762845, "grad_norm": 0.3623279402143395, "learning_rate": 9.079824780752929e-06, "loss": 0.271, "step": 5509 }, { "epoch": 0.44079118417631646, "grad_norm": 0.30132961149451437, "learning_rate": 9.079450280742948e-06, "loss": 0.2569, "step": 5510 }, { "epoch": 0.4408711825763485, "grad_norm": 0.30944446287213856, "learning_rate": 9.079075712266501e-06, "loss": 0.3091, "step": 5511 }, { "epoch": 0.4409511809763805, "grad_norm": 0.31076879062229956, "learning_rate": 9.078701075329875e-06, "loss": 0.2661, "step": 5512 }, { "epoch": 0.44103117937641245, "grad_norm": 0.30565880401873535, "learning_rate": 9.078326369939361e-06, "loss": 0.2874, "step": 5513 }, { "epoch": 0.4411111777764445, "grad_norm": 0.2842776076681883, "learning_rate": 9.077951596101244e-06, "loss": 0.2993, "step": 5514 }, { "epoch": 0.44119117617647646, "grad_norm": 0.4484252539311488, "learning_rate": 9.077576753821815e-06, "loss": 0.2534, "step": 5515 }, { "epoch": 0.4412711745765085, "grad_norm": 0.3237263552254394, "learning_rate": 9.077201843107366e-06, "loss": 0.2406, "step": 5516 }, { "epoch": 0.4413511729765405, "grad_norm": 0.29212045970859557, "learning_rate": 9.076826863964188e-06, "loss": 0.309, "step": 5517 }, { "epoch": 0.44143117137657245, "grad_norm": 0.3090907303482042, "learning_rate": 9.076451816398574e-06, "loss": 0.2948, "step": 5518 }, { "epoch": 0.4415111697766045, "grad_norm": 0.29409861796757963, "learning_rate": 9.07607670041682e-06, "loss": 0.2781, "step": 5519 }, { "epoch": 0.44159116817663646, "grad_norm": 0.3320203282513383, "learning_rate": 9.075701516025219e-06, "loss": 0.2666, "step": 5520 }, { "epoch": 0.44167116657666844, "grad_norm": 0.3023415606817555, "learning_rate": 9.075326263230073e-06, "loss": 0.2676, "step": 5521 }, { "epoch": 0.44175116497670047, "grad_norm": 0.3036445613512114, "learning_rate": 9.074950942037674e-06, "loss": 0.2752, "step": 5522 }, { "epoch": 0.44183116337673245, "grad_norm": 0.48889357604376044, "learning_rate": 9.074575552454325e-06, "loss": 0.2835, "step": 5523 }, { "epoch": 0.4419111617767645, "grad_norm": 0.3114596395402801, "learning_rate": 9.074200094486327e-06, "loss": 0.2445, "step": 5524 }, { "epoch": 0.44199116017679646, "grad_norm": 0.2845483961412628, "learning_rate": 9.073824568139979e-06, "loss": 0.2735, "step": 5525 }, { "epoch": 0.44207115857682844, "grad_norm": 0.3256403819121603, "learning_rate": 9.073448973421581e-06, "loss": 0.2889, "step": 5526 }, { "epoch": 0.44215115697686047, "grad_norm": 0.3291355906724503, "learning_rate": 9.073073310337443e-06, "loss": 0.2575, "step": 5527 }, { "epoch": 0.44223115537689245, "grad_norm": 0.27322637684631557, "learning_rate": 9.072697578893865e-06, "loss": 0.306, "step": 5528 }, { "epoch": 0.4423111537769245, "grad_norm": 0.29862018104240856, "learning_rate": 9.072321779097155e-06, "loss": 0.2521, "step": 5529 }, { "epoch": 0.44239115217695646, "grad_norm": 0.2663899555691935, "learning_rate": 9.07194591095362e-06, "loss": 0.2788, "step": 5530 }, { "epoch": 0.44247115057698844, "grad_norm": 0.29449568704359064, "learning_rate": 9.071569974469569e-06, "loss": 0.2958, "step": 5531 }, { "epoch": 0.44255114897702047, "grad_norm": 0.31496612545393604, "learning_rate": 9.071193969651308e-06, "loss": 0.2616, "step": 5532 }, { "epoch": 0.44263114737705245, "grad_norm": 0.5337093576316178, "learning_rate": 9.070817896505153e-06, "loss": 0.2508, "step": 5533 }, { "epoch": 0.4427111457770845, "grad_norm": 0.2545939015832631, "learning_rate": 9.070441755037411e-06, "loss": 0.3202, "step": 5534 }, { "epoch": 0.44279114417711646, "grad_norm": 0.3555568993738711, "learning_rate": 9.0700655452544e-06, "loss": 0.2538, "step": 5535 }, { "epoch": 0.44287114257714844, "grad_norm": 0.32798846812092775, "learning_rate": 9.069689267162425e-06, "loss": 0.2793, "step": 5536 }, { "epoch": 0.44295114097718047, "grad_norm": 0.3025105824838569, "learning_rate": 9.06931292076781e-06, "loss": 0.2962, "step": 5537 }, { "epoch": 0.44303113937721245, "grad_norm": 0.3220798367232042, "learning_rate": 9.068936506076869e-06, "loss": 0.2579, "step": 5538 }, { "epoch": 0.4431111377772445, "grad_norm": 0.2544258968850388, "learning_rate": 9.068560023095917e-06, "loss": 0.3207, "step": 5539 }, { "epoch": 0.44319113617727646, "grad_norm": 0.2786166745225385, "learning_rate": 9.068183471831276e-06, "loss": 0.2806, "step": 5540 }, { "epoch": 0.44327113457730843, "grad_norm": 0.2894796650763557, "learning_rate": 9.067806852289262e-06, "loss": 0.241, "step": 5541 }, { "epoch": 0.44335113297734047, "grad_norm": 0.20226464380511605, "learning_rate": 9.067430164476201e-06, "loss": 0.3527, "step": 5542 }, { "epoch": 0.44343113137737245, "grad_norm": 0.32119690595369493, "learning_rate": 9.067053408398409e-06, "loss": 0.2555, "step": 5543 }, { "epoch": 0.4435111297774045, "grad_norm": 0.44914315103030594, "learning_rate": 9.066676584062214e-06, "loss": 0.2658, "step": 5544 }, { "epoch": 0.44359112817743646, "grad_norm": 0.33500301745631045, "learning_rate": 9.06629969147394e-06, "loss": 0.2793, "step": 5545 }, { "epoch": 0.44367112657746843, "grad_norm": 0.2707431956379634, "learning_rate": 9.065922730639906e-06, "loss": 0.2747, "step": 5546 }, { "epoch": 0.44375112497750047, "grad_norm": 0.3664944037464088, "learning_rate": 9.065545701566448e-06, "loss": 0.3078, "step": 5547 }, { "epoch": 0.44383112337753244, "grad_norm": 0.3131343809017703, "learning_rate": 9.06516860425989e-06, "loss": 0.2996, "step": 5548 }, { "epoch": 0.4439111217775644, "grad_norm": 0.33206726437611067, "learning_rate": 9.064791438726557e-06, "loss": 0.2546, "step": 5549 }, { "epoch": 0.44399112017759645, "grad_norm": 0.30318433147221946, "learning_rate": 9.064414204972784e-06, "loss": 0.2438, "step": 5550 }, { "epoch": 0.44407111857762843, "grad_norm": 0.3439310249667007, "learning_rate": 9.0640369030049e-06, "loss": 0.2604, "step": 5551 }, { "epoch": 0.44415111697766047, "grad_norm": 0.31798152020472314, "learning_rate": 9.063659532829239e-06, "loss": 0.2629, "step": 5552 }, { "epoch": 0.44423111537769244, "grad_norm": 0.2916073236850165, "learning_rate": 9.063282094452133e-06, "loss": 0.2758, "step": 5553 }, { "epoch": 0.4443111137777244, "grad_norm": 0.28494244531663493, "learning_rate": 9.062904587879917e-06, "loss": 0.2924, "step": 5554 }, { "epoch": 0.44439111217775645, "grad_norm": 0.30645940106419894, "learning_rate": 9.062527013118926e-06, "loss": 0.2552, "step": 5555 }, { "epoch": 0.44447111057778843, "grad_norm": 0.3519526889375159, "learning_rate": 9.062149370175502e-06, "loss": 0.2483, "step": 5556 }, { "epoch": 0.44455110897782046, "grad_norm": 0.3633300490177178, "learning_rate": 9.061771659055974e-06, "loss": 0.2641, "step": 5557 }, { "epoch": 0.44463110737785244, "grad_norm": 0.29064249432426426, "learning_rate": 9.061393879766688e-06, "loss": 0.2368, "step": 5558 }, { "epoch": 0.4447111057778844, "grad_norm": 0.2369138208912775, "learning_rate": 9.061016032313984e-06, "loss": 0.331, "step": 5559 }, { "epoch": 0.44479110417791645, "grad_norm": 0.30358583170583575, "learning_rate": 9.060638116704201e-06, "loss": 0.2952, "step": 5560 }, { "epoch": 0.44487110257794843, "grad_norm": 0.31303486117091034, "learning_rate": 9.060260132943682e-06, "loss": 0.2504, "step": 5561 }, { "epoch": 0.44495110097798046, "grad_norm": 0.29103839590360725, "learning_rate": 9.059882081038773e-06, "loss": 0.286, "step": 5562 }, { "epoch": 0.44503109937801244, "grad_norm": 0.22349322831935822, "learning_rate": 9.059503960995816e-06, "loss": 0.3602, "step": 5563 }, { "epoch": 0.4451110977780444, "grad_norm": 0.3293902448108499, "learning_rate": 9.059125772821158e-06, "loss": 0.2605, "step": 5564 }, { "epoch": 0.44519109617807645, "grad_norm": 0.4345774723158368, "learning_rate": 9.058747516521149e-06, "loss": 0.2438, "step": 5565 }, { "epoch": 0.44527109457810843, "grad_norm": 0.3150458258751355, "learning_rate": 9.058369192102134e-06, "loss": 0.3195, "step": 5566 }, { "epoch": 0.44535109297814046, "grad_norm": 0.25570843852966946, "learning_rate": 9.057990799570464e-06, "loss": 0.2816, "step": 5567 }, { "epoch": 0.44543109137817244, "grad_norm": 0.2783916287739102, "learning_rate": 9.05761233893249e-06, "loss": 0.2898, "step": 5568 }, { "epoch": 0.4455110897782044, "grad_norm": 0.30273883791637235, "learning_rate": 9.05723381019456e-06, "loss": 0.2406, "step": 5569 }, { "epoch": 0.44559108817823645, "grad_norm": 0.23948383113919694, "learning_rate": 9.056855213363032e-06, "loss": 0.3295, "step": 5570 }, { "epoch": 0.4456710865782684, "grad_norm": 0.2646450057218176, "learning_rate": 9.056476548444258e-06, "loss": 0.3045, "step": 5571 }, { "epoch": 0.44575108497830046, "grad_norm": 0.2693345918562001, "learning_rate": 9.056097815444593e-06, "loss": 0.3165, "step": 5572 }, { "epoch": 0.44583108337833244, "grad_norm": 0.3227580408894766, "learning_rate": 9.055719014370396e-06, "loss": 0.3002, "step": 5573 }, { "epoch": 0.4459110817783644, "grad_norm": 0.28757082386956906, "learning_rate": 9.05534014522802e-06, "loss": 0.259, "step": 5574 }, { "epoch": 0.44599108017839645, "grad_norm": 0.2300400703266702, "learning_rate": 9.054961208023827e-06, "loss": 0.2989, "step": 5575 }, { "epoch": 0.4460710785784284, "grad_norm": 0.27937683778934963, "learning_rate": 9.054582202764175e-06, "loss": 0.3353, "step": 5576 }, { "epoch": 0.4461510769784604, "grad_norm": 0.205044693315194, "learning_rate": 9.054203129455425e-06, "loss": 0.3488, "step": 5577 }, { "epoch": 0.44623107537849244, "grad_norm": 0.3239849069221907, "learning_rate": 9.053823988103943e-06, "loss": 0.2783, "step": 5578 }, { "epoch": 0.4463110737785244, "grad_norm": 0.252336336580127, "learning_rate": 9.053444778716085e-06, "loss": 0.3219, "step": 5579 }, { "epoch": 0.44639107217855645, "grad_norm": 0.3078969168285223, "learning_rate": 9.053065501298222e-06, "loss": 0.2399, "step": 5580 }, { "epoch": 0.4464710705785884, "grad_norm": 0.2687630987547738, "learning_rate": 9.052686155856716e-06, "loss": 0.2831, "step": 5581 }, { "epoch": 0.4465510689786204, "grad_norm": 0.37006433520735704, "learning_rate": 9.052306742397933e-06, "loss": 0.2637, "step": 5582 }, { "epoch": 0.44663106737865244, "grad_norm": 0.2628079482409702, "learning_rate": 9.051927260928243e-06, "loss": 0.2776, "step": 5583 }, { "epoch": 0.4467110657786844, "grad_norm": 0.29440717479146344, "learning_rate": 9.051547711454016e-06, "loss": 0.3144, "step": 5584 }, { "epoch": 0.44679106417871645, "grad_norm": 0.3300556101413221, "learning_rate": 9.051168093981619e-06, "loss": 0.2606, "step": 5585 }, { "epoch": 0.4468710625787484, "grad_norm": 0.3332302910193934, "learning_rate": 9.050788408517426e-06, "loss": 0.2585, "step": 5586 }, { "epoch": 0.4469510609787804, "grad_norm": 0.4744145968107132, "learning_rate": 9.050408655067806e-06, "loss": 0.2967, "step": 5587 }, { "epoch": 0.44703105937881243, "grad_norm": 0.2971934536629232, "learning_rate": 9.050028833639135e-06, "loss": 0.3283, "step": 5588 }, { "epoch": 0.4471110577788444, "grad_norm": 0.8355916304823727, "learning_rate": 9.049648944237788e-06, "loss": 0.2759, "step": 5589 }, { "epoch": 0.44719105617887644, "grad_norm": 0.4343972901271774, "learning_rate": 9.049268986870139e-06, "loss": 0.2748, "step": 5590 }, { "epoch": 0.4472710545789084, "grad_norm": 0.2674819603457788, "learning_rate": 9.048888961542565e-06, "loss": 0.2781, "step": 5591 }, { "epoch": 0.4473510529789404, "grad_norm": 0.38285013281028885, "learning_rate": 9.048508868261446e-06, "loss": 0.2597, "step": 5592 }, { "epoch": 0.44743105137897243, "grad_norm": 0.33413971273208976, "learning_rate": 9.048128707033159e-06, "loss": 0.2611, "step": 5593 }, { "epoch": 0.4475110497790044, "grad_norm": 0.2435886384062945, "learning_rate": 9.047748477864087e-06, "loss": 0.2789, "step": 5594 }, { "epoch": 0.44759104817903644, "grad_norm": 0.31104568939276717, "learning_rate": 9.04736818076061e-06, "loss": 0.2385, "step": 5595 }, { "epoch": 0.4476710465790684, "grad_norm": 0.3288918532548245, "learning_rate": 9.046987815729108e-06, "loss": 0.2917, "step": 5596 }, { "epoch": 0.4477510449791004, "grad_norm": 0.4474752320977135, "learning_rate": 9.04660738277597e-06, "loss": 0.2448, "step": 5597 }, { "epoch": 0.44783104337913243, "grad_norm": 0.31423169424439035, "learning_rate": 9.046226881907575e-06, "loss": 0.2916, "step": 5598 }, { "epoch": 0.4479110417791644, "grad_norm": 0.284311799031118, "learning_rate": 9.045846313130313e-06, "loss": 0.2903, "step": 5599 }, { "epoch": 0.44799104017919644, "grad_norm": 0.32099135577091886, "learning_rate": 9.045465676450572e-06, "loss": 0.2868, "step": 5600 }, { "epoch": 0.4480710385792284, "grad_norm": 0.26307008946547045, "learning_rate": 9.045084971874738e-06, "loss": 0.3011, "step": 5601 }, { "epoch": 0.4481510369792604, "grad_norm": 0.24024802222938746, "learning_rate": 9.0447041994092e-06, "loss": 0.306, "step": 5602 }, { "epoch": 0.44823103537929243, "grad_norm": 0.2737629597786706, "learning_rate": 9.044323359060352e-06, "loss": 0.2805, "step": 5603 }, { "epoch": 0.4483110337793244, "grad_norm": 0.3429648158531512, "learning_rate": 9.043942450834582e-06, "loss": 0.2673, "step": 5604 }, { "epoch": 0.4483910321793564, "grad_norm": 0.25793553174001876, "learning_rate": 9.043561474738285e-06, "loss": 0.3112, "step": 5605 }, { "epoch": 0.4484710305793884, "grad_norm": 0.289542396743031, "learning_rate": 9.043180430777854e-06, "loss": 0.272, "step": 5606 }, { "epoch": 0.4485510289794204, "grad_norm": 0.3400782640274442, "learning_rate": 9.042799318959684e-06, "loss": 0.2531, "step": 5607 }, { "epoch": 0.44863102737945243, "grad_norm": 0.29912716934376893, "learning_rate": 9.042418139290173e-06, "loss": 0.2862, "step": 5608 }, { "epoch": 0.4487110257794844, "grad_norm": 0.26575761068988984, "learning_rate": 9.042036891775715e-06, "loss": 0.2988, "step": 5609 }, { "epoch": 0.4487910241795164, "grad_norm": 0.29738317284111315, "learning_rate": 9.041655576422713e-06, "loss": 0.2869, "step": 5610 }, { "epoch": 0.4488710225795484, "grad_norm": 0.3223219323130359, "learning_rate": 9.041274193237565e-06, "loss": 0.235, "step": 5611 }, { "epoch": 0.4489510209795804, "grad_norm": 0.24892052165565023, "learning_rate": 9.04089274222667e-06, "loss": 0.3164, "step": 5612 }, { "epoch": 0.44903101937961243, "grad_norm": 0.24974714072125548, "learning_rate": 9.040511223396432e-06, "loss": 0.3315, "step": 5613 }, { "epoch": 0.4491110177796444, "grad_norm": 0.34450259211205, "learning_rate": 9.040129636753253e-06, "loss": 0.2836, "step": 5614 }, { "epoch": 0.4491910161796764, "grad_norm": 0.31941063345613213, "learning_rate": 9.039747982303539e-06, "loss": 0.2714, "step": 5615 }, { "epoch": 0.4492710145797084, "grad_norm": 0.3287485874172965, "learning_rate": 9.039366260053693e-06, "loss": 0.2724, "step": 5616 }, { "epoch": 0.4493510129797404, "grad_norm": 0.3086087293584916, "learning_rate": 9.038984470010123e-06, "loss": 0.3125, "step": 5617 }, { "epoch": 0.4494310113797724, "grad_norm": 0.24890016446126073, "learning_rate": 9.038602612179236e-06, "loss": 0.3078, "step": 5618 }, { "epoch": 0.4495110097798044, "grad_norm": 0.2884142537736799, "learning_rate": 9.038220686567443e-06, "loss": 0.2991, "step": 5619 }, { "epoch": 0.4495910081798364, "grad_norm": 0.32664211971791374, "learning_rate": 9.037838693181151e-06, "loss": 0.2502, "step": 5620 }, { "epoch": 0.4496710065798684, "grad_norm": 0.30543754011929564, "learning_rate": 9.037456632026774e-06, "loss": 0.2608, "step": 5621 }, { "epoch": 0.4497510049799004, "grad_norm": 0.2722700118972102, "learning_rate": 9.037074503110719e-06, "loss": 0.2935, "step": 5622 }, { "epoch": 0.4498310033799324, "grad_norm": 0.2814185484101297, "learning_rate": 9.036692306439406e-06, "loss": 0.276, "step": 5623 }, { "epoch": 0.4499110017799644, "grad_norm": 0.33917382616876846, "learning_rate": 9.036310042019245e-06, "loss": 0.2629, "step": 5624 }, { "epoch": 0.4499910001799964, "grad_norm": 0.2644404209193627, "learning_rate": 9.035927709856654e-06, "loss": 0.3064, "step": 5625 }, { "epoch": 0.4500709985800284, "grad_norm": 0.31998057162255167, "learning_rate": 9.035545309958048e-06, "loss": 0.2791, "step": 5626 }, { "epoch": 0.4501509969800604, "grad_norm": 0.3144383580175707, "learning_rate": 9.035162842329845e-06, "loss": 0.2527, "step": 5627 }, { "epoch": 0.4502309953800924, "grad_norm": 0.2475654942027119, "learning_rate": 9.034780306978466e-06, "loss": 0.2811, "step": 5628 }, { "epoch": 0.4503109937801244, "grad_norm": 0.33728880170131154, "learning_rate": 9.034397703910328e-06, "loss": 0.2435, "step": 5629 }, { "epoch": 0.4503909921801564, "grad_norm": 0.3028517258463103, "learning_rate": 9.034015033131858e-06, "loss": 0.2624, "step": 5630 }, { "epoch": 0.4504709905801884, "grad_norm": 0.19293587862662287, "learning_rate": 9.033632294649473e-06, "loss": 0.3324, "step": 5631 }, { "epoch": 0.4505509889802204, "grad_norm": 0.3338684285346212, "learning_rate": 9.033249488469597e-06, "loss": 0.2884, "step": 5632 }, { "epoch": 0.45063098738025237, "grad_norm": 0.41663317342503803, "learning_rate": 9.032866614598658e-06, "loss": 0.2524, "step": 5633 }, { "epoch": 0.4507109857802844, "grad_norm": 0.44225916933109277, "learning_rate": 9.03248367304308e-06, "loss": 0.2996, "step": 5634 }, { "epoch": 0.4507909841803164, "grad_norm": 0.2628770511632072, "learning_rate": 9.032100663809288e-06, "loss": 0.2999, "step": 5635 }, { "epoch": 0.4508709825803484, "grad_norm": 0.25813889565506465, "learning_rate": 9.031717586903715e-06, "loss": 0.2887, "step": 5636 }, { "epoch": 0.4509509809803804, "grad_norm": 0.26502412157002514, "learning_rate": 9.031334442332784e-06, "loss": 0.3216, "step": 5637 }, { "epoch": 0.45103097938041237, "grad_norm": 0.2982665930974367, "learning_rate": 9.03095123010293e-06, "loss": 0.2378, "step": 5638 }, { "epoch": 0.4511109777804444, "grad_norm": 0.29201081711920746, "learning_rate": 9.030567950220586e-06, "loss": 0.2615, "step": 5639 }, { "epoch": 0.4511909761804764, "grad_norm": 0.2998080542854087, "learning_rate": 9.030184602692179e-06, "loss": 0.2491, "step": 5640 }, { "epoch": 0.4512709745805084, "grad_norm": 0.2986194988912872, "learning_rate": 9.029801187524147e-06, "loss": 0.2597, "step": 5641 }, { "epoch": 0.4513509729805404, "grad_norm": 0.2883281759915458, "learning_rate": 9.029417704722925e-06, "loss": 0.2938, "step": 5642 }, { "epoch": 0.45143097138057237, "grad_norm": 0.2430385808531927, "learning_rate": 9.029034154294945e-06, "loss": 0.3295, "step": 5643 }, { "epoch": 0.4515109697806044, "grad_norm": 0.30187154664375243, "learning_rate": 9.02865053624665e-06, "loss": 0.3231, "step": 5644 }, { "epoch": 0.4515909681806364, "grad_norm": 0.31179860386625846, "learning_rate": 9.028266850584473e-06, "loss": 0.2566, "step": 5645 }, { "epoch": 0.4516709665806684, "grad_norm": 0.28223329319727625, "learning_rate": 9.027883097314859e-06, "loss": 0.2834, "step": 5646 }, { "epoch": 0.4517509649807004, "grad_norm": 0.30544234956852817, "learning_rate": 9.027499276444242e-06, "loss": 0.2467, "step": 5647 }, { "epoch": 0.45183096338073236, "grad_norm": 0.2597709208639203, "learning_rate": 9.02711538797907e-06, "loss": 0.3074, "step": 5648 }, { "epoch": 0.4519109617807644, "grad_norm": 0.30266098700005173, "learning_rate": 9.026731431925784e-06, "loss": 0.2654, "step": 5649 }, { "epoch": 0.4519909601807964, "grad_norm": 0.24729322250465036, "learning_rate": 9.026347408290825e-06, "loss": 0.3259, "step": 5650 }, { "epoch": 0.4520709585808284, "grad_norm": 0.26801114860203573, "learning_rate": 9.025963317080641e-06, "loss": 0.2769, "step": 5651 }, { "epoch": 0.4521509569808604, "grad_norm": 0.33734427242948156, "learning_rate": 9.025579158301679e-06, "loss": 0.2734, "step": 5652 }, { "epoch": 0.45223095538089236, "grad_norm": 0.22481383076956757, "learning_rate": 9.025194931960385e-06, "loss": 0.3218, "step": 5653 }, { "epoch": 0.4523109537809244, "grad_norm": 0.2998795172784253, "learning_rate": 9.024810638063207e-06, "loss": 0.2588, "step": 5654 }, { "epoch": 0.4523909521809564, "grad_norm": 0.3139141850903015, "learning_rate": 9.024426276616595e-06, "loss": 0.2483, "step": 5655 }, { "epoch": 0.4524709505809884, "grad_norm": 0.31033240773630466, "learning_rate": 9.024041847627003e-06, "loss": 0.2571, "step": 5656 }, { "epoch": 0.4525509489810204, "grad_norm": 0.29880279724779935, "learning_rate": 9.023657351100878e-06, "loss": 0.2918, "step": 5657 }, { "epoch": 0.45263094738105236, "grad_norm": 0.31054421730758835, "learning_rate": 9.023272787044677e-06, "loss": 0.2506, "step": 5658 }, { "epoch": 0.4527109457810844, "grad_norm": 0.28541812610873313, "learning_rate": 9.02288815546485e-06, "loss": 0.2711, "step": 5659 }, { "epoch": 0.4527909441811164, "grad_norm": 0.3021087151657876, "learning_rate": 9.022503456367857e-06, "loss": 0.2596, "step": 5660 }, { "epoch": 0.45287094258114835, "grad_norm": 0.23484861913491784, "learning_rate": 9.022118689760153e-06, "loss": 0.2962, "step": 5661 }, { "epoch": 0.4529509409811804, "grad_norm": 0.2881987201842057, "learning_rate": 9.021733855648195e-06, "loss": 0.2737, "step": 5662 }, { "epoch": 0.45303093938121236, "grad_norm": 0.30361817825960186, "learning_rate": 9.02134895403844e-06, "loss": 0.2768, "step": 5663 }, { "epoch": 0.4531109377812444, "grad_norm": 0.22661207778574827, "learning_rate": 9.020963984937352e-06, "loss": 0.3583, "step": 5664 }, { "epoch": 0.45319093618127637, "grad_norm": 0.2670716373803451, "learning_rate": 9.020578948351389e-06, "loss": 0.2939, "step": 5665 }, { "epoch": 0.45327093458130835, "grad_norm": 0.34748727809787566, "learning_rate": 9.020193844287013e-06, "loss": 0.2661, "step": 5666 }, { "epoch": 0.4533509329813404, "grad_norm": 0.5706291529292663, "learning_rate": 9.01980867275069e-06, "loss": 0.2456, "step": 5667 }, { "epoch": 0.45343093138137236, "grad_norm": 0.29968714206184144, "learning_rate": 9.019423433748882e-06, "loss": 0.2587, "step": 5668 }, { "epoch": 0.4535109297814044, "grad_norm": 0.2705649589283515, "learning_rate": 9.019038127288056e-06, "loss": 0.3035, "step": 5669 }, { "epoch": 0.45359092818143637, "grad_norm": 0.28762733187630907, "learning_rate": 9.018652753374677e-06, "loss": 0.2582, "step": 5670 }, { "epoch": 0.45367092658146835, "grad_norm": 0.23547754523875356, "learning_rate": 9.018267312015214e-06, "loss": 0.3341, "step": 5671 }, { "epoch": 0.4537509249815004, "grad_norm": 0.2701532552630431, "learning_rate": 9.017881803216138e-06, "loss": 0.3042, "step": 5672 }, { "epoch": 0.45383092338153236, "grad_norm": 0.3500251342970308, "learning_rate": 9.017496226983915e-06, "loss": 0.2817, "step": 5673 }, { "epoch": 0.4539109217815644, "grad_norm": 0.34459706625376774, "learning_rate": 9.017110583325017e-06, "loss": 0.2532, "step": 5674 }, { "epoch": 0.45399092018159637, "grad_norm": 0.22861990433302665, "learning_rate": 9.01672487224592e-06, "loss": 0.3137, "step": 5675 }, { "epoch": 0.45407091858162835, "grad_norm": 0.2062557821894587, "learning_rate": 9.016339093753093e-06, "loss": 0.3557, "step": 5676 }, { "epoch": 0.4541509169816604, "grad_norm": 0.2498386261950635, "learning_rate": 9.015953247853014e-06, "loss": 0.324, "step": 5677 }, { "epoch": 0.45423091538169236, "grad_norm": 0.2775962049889726, "learning_rate": 9.015567334552158e-06, "loss": 0.2948, "step": 5678 }, { "epoch": 0.4543109137817244, "grad_norm": 0.2896902225691437, "learning_rate": 9.015181353856998e-06, "loss": 0.2687, "step": 5679 }, { "epoch": 0.45439091218175637, "grad_norm": 0.30501492797322455, "learning_rate": 9.014795305774019e-06, "loss": 0.2698, "step": 5680 }, { "epoch": 0.45447091058178835, "grad_norm": 0.28561895341133053, "learning_rate": 9.014409190309695e-06, "loss": 0.3026, "step": 5681 }, { "epoch": 0.4545509089818204, "grad_norm": 0.30837919402429925, "learning_rate": 9.014023007470507e-06, "loss": 0.2508, "step": 5682 }, { "epoch": 0.45463090738185236, "grad_norm": 0.2977031939966874, "learning_rate": 9.013636757262938e-06, "loss": 0.2432, "step": 5683 }, { "epoch": 0.4547109057818844, "grad_norm": 0.25754415871267855, "learning_rate": 9.013250439693468e-06, "loss": 0.3002, "step": 5684 }, { "epoch": 0.45479090418191637, "grad_norm": 0.27349375734525255, "learning_rate": 9.012864054768584e-06, "loss": 0.2712, "step": 5685 }, { "epoch": 0.45487090258194834, "grad_norm": 0.26811872260216413, "learning_rate": 9.012477602494768e-06, "loss": 0.2816, "step": 5686 }, { "epoch": 0.4549509009819804, "grad_norm": 0.259485351355254, "learning_rate": 9.01209108287851e-06, "loss": 0.3002, "step": 5687 }, { "epoch": 0.45503089938201235, "grad_norm": 0.29037885670128794, "learning_rate": 9.01170449592629e-06, "loss": 0.271, "step": 5688 }, { "epoch": 0.45511089778204433, "grad_norm": 0.3370454315786992, "learning_rate": 9.011317841644602e-06, "loss": 0.2484, "step": 5689 }, { "epoch": 0.45519089618207637, "grad_norm": 0.30406566790697725, "learning_rate": 9.010931120039934e-06, "loss": 0.3035, "step": 5690 }, { "epoch": 0.45527089458210834, "grad_norm": 0.30286979605168124, "learning_rate": 9.010544331118776e-06, "loss": 0.2899, "step": 5691 }, { "epoch": 0.4553508929821404, "grad_norm": 0.3268703307425401, "learning_rate": 9.01015747488762e-06, "loss": 0.2614, "step": 5692 }, { "epoch": 0.45543089138217235, "grad_norm": 0.2021326362922993, "learning_rate": 9.009770551352957e-06, "loss": 0.368, "step": 5693 }, { "epoch": 0.45551088978220433, "grad_norm": 0.30653252424072697, "learning_rate": 9.009383560521284e-06, "loss": 0.2755, "step": 5694 }, { "epoch": 0.45559088818223636, "grad_norm": 0.24797127795035148, "learning_rate": 9.008996502399092e-06, "loss": 0.2807, "step": 5695 }, { "epoch": 0.45567088658226834, "grad_norm": 0.24017968391194194, "learning_rate": 9.008609376992879e-06, "loss": 0.3119, "step": 5696 }, { "epoch": 0.4557508849823004, "grad_norm": 0.23524960158742408, "learning_rate": 9.008222184309145e-06, "loss": 0.3277, "step": 5697 }, { "epoch": 0.45583088338233235, "grad_norm": 0.25241637842412046, "learning_rate": 9.007834924354384e-06, "loss": 0.2896, "step": 5698 }, { "epoch": 0.45591088178236433, "grad_norm": 0.27192507701001134, "learning_rate": 9.007447597135097e-06, "loss": 0.2881, "step": 5699 }, { "epoch": 0.45599088018239636, "grad_norm": 0.3307123795920149, "learning_rate": 9.007060202657785e-06, "loss": 0.2559, "step": 5700 }, { "epoch": 0.45607087858242834, "grad_norm": 0.23930411036443908, "learning_rate": 9.006672740928952e-06, "loss": 0.3139, "step": 5701 }, { "epoch": 0.4561508769824604, "grad_norm": 0.2574565588745887, "learning_rate": 9.006285211955095e-06, "loss": 0.2927, "step": 5702 }, { "epoch": 0.45623087538249235, "grad_norm": 0.30533934059239326, "learning_rate": 9.005897615742723e-06, "loss": 0.2869, "step": 5703 }, { "epoch": 0.45631087378252433, "grad_norm": 0.24824860185365413, "learning_rate": 9.005509952298341e-06, "loss": 0.3307, "step": 5704 }, { "epoch": 0.45639087218255636, "grad_norm": 0.27551065378772505, "learning_rate": 9.005122221628452e-06, "loss": 0.2834, "step": 5705 }, { "epoch": 0.45647087058258834, "grad_norm": 0.31840337583536443, "learning_rate": 9.004734423739567e-06, "loss": 0.2419, "step": 5706 }, { "epoch": 0.4565508689826204, "grad_norm": 0.29808064580035876, "learning_rate": 9.00434655863819e-06, "loss": 0.2803, "step": 5707 }, { "epoch": 0.45663086738265235, "grad_norm": 0.29771552682217306, "learning_rate": 9.003958626330836e-06, "loss": 0.288, "step": 5708 }, { "epoch": 0.4567108657826843, "grad_norm": 0.3012665850981478, "learning_rate": 9.003570626824013e-06, "loss": 0.2613, "step": 5709 }, { "epoch": 0.45679086418271636, "grad_norm": 0.30670784685946273, "learning_rate": 9.003182560124233e-06, "loss": 0.2518, "step": 5710 }, { "epoch": 0.45687086258274834, "grad_norm": 0.247326398357017, "learning_rate": 9.002794426238009e-06, "loss": 0.3034, "step": 5711 }, { "epoch": 0.45695086098278037, "grad_norm": 0.26153281510187193, "learning_rate": 9.002406225171854e-06, "loss": 0.3137, "step": 5712 }, { "epoch": 0.45703085938281235, "grad_norm": 0.33003122461644413, "learning_rate": 9.002017956932285e-06, "loss": 0.238, "step": 5713 }, { "epoch": 0.4571108577828443, "grad_norm": 0.33957556940596817, "learning_rate": 9.001629621525819e-06, "loss": 0.3039, "step": 5714 }, { "epoch": 0.45719085618287636, "grad_norm": 0.25844997754384297, "learning_rate": 9.001241218958972e-06, "loss": 0.2798, "step": 5715 }, { "epoch": 0.45727085458290834, "grad_norm": 0.25058774269315715, "learning_rate": 9.000852749238263e-06, "loss": 0.275, "step": 5716 }, { "epoch": 0.4573508529829403, "grad_norm": 0.2564060105360013, "learning_rate": 9.00046421237021e-06, "loss": 0.2913, "step": 5717 }, { "epoch": 0.45743085138297235, "grad_norm": 0.2619532662604188, "learning_rate": 9.000075608361338e-06, "loss": 0.2939, "step": 5718 }, { "epoch": 0.4575108497830043, "grad_norm": 0.2797279891150205, "learning_rate": 8.999686937218168e-06, "loss": 0.289, "step": 5719 }, { "epoch": 0.45759084818303636, "grad_norm": 0.27504308412081513, "learning_rate": 8.999298198947219e-06, "loss": 0.3024, "step": 5720 }, { "epoch": 0.45767084658306834, "grad_norm": 0.2930297939351951, "learning_rate": 8.998909393555022e-06, "loss": 0.2489, "step": 5721 }, { "epoch": 0.4577508449831003, "grad_norm": 0.28397552985575436, "learning_rate": 8.998520521048096e-06, "loss": 0.2885, "step": 5722 }, { "epoch": 0.45783084338313235, "grad_norm": 0.18929032703718748, "learning_rate": 8.998131581432972e-06, "loss": 0.3288, "step": 5723 }, { "epoch": 0.4579108417831643, "grad_norm": 0.2811289826699942, "learning_rate": 8.997742574716175e-06, "loss": 0.2719, "step": 5724 }, { "epoch": 0.45799084018319636, "grad_norm": 0.29873891016683424, "learning_rate": 8.997353500904234e-06, "loss": 0.2513, "step": 5725 }, { "epoch": 0.45807083858322833, "grad_norm": 0.25813044240358757, "learning_rate": 8.99696436000368e-06, "loss": 0.2659, "step": 5726 }, { "epoch": 0.4581508369832603, "grad_norm": 0.29203999347527887, "learning_rate": 8.996575152021045e-06, "loss": 0.257, "step": 5727 }, { "epoch": 0.45823083538329235, "grad_norm": 0.2810570026486525, "learning_rate": 8.996185876962859e-06, "loss": 0.2293, "step": 5728 }, { "epoch": 0.4583108337833243, "grad_norm": 0.3019512217265175, "learning_rate": 8.995796534835656e-06, "loss": 0.2765, "step": 5729 }, { "epoch": 0.45839083218335636, "grad_norm": 0.316066082426242, "learning_rate": 8.995407125645972e-06, "loss": 0.2521, "step": 5730 }, { "epoch": 0.45847083058338833, "grad_norm": 0.4163112045672262, "learning_rate": 8.995017649400341e-06, "loss": 0.239, "step": 5731 }, { "epoch": 0.4585508289834203, "grad_norm": 0.23341158775925291, "learning_rate": 8.994628106105298e-06, "loss": 0.3198, "step": 5732 }, { "epoch": 0.45863082738345234, "grad_norm": 0.7548677407165855, "learning_rate": 8.994238495767385e-06, "loss": 0.2576, "step": 5733 }, { "epoch": 0.4587108257834843, "grad_norm": 0.34504067280488815, "learning_rate": 8.993848818393139e-06, "loss": 0.2389, "step": 5734 }, { "epoch": 0.45879082418351635, "grad_norm": 0.2588751462417314, "learning_rate": 8.993459073989098e-06, "loss": 0.3128, "step": 5735 }, { "epoch": 0.45887082258354833, "grad_norm": 0.3127232439908338, "learning_rate": 8.993069262561805e-06, "loss": 0.3028, "step": 5736 }, { "epoch": 0.4589508209835803, "grad_norm": 0.3024930462453285, "learning_rate": 8.992679384117802e-06, "loss": 0.2773, "step": 5737 }, { "epoch": 0.45903081938361234, "grad_norm": 0.26151220410359416, "learning_rate": 8.992289438663632e-06, "loss": 0.2956, "step": 5738 }, { "epoch": 0.4591108177836443, "grad_norm": 0.3343245354080847, "learning_rate": 8.991899426205844e-06, "loss": 0.2762, "step": 5739 }, { "epoch": 0.45919081618367635, "grad_norm": 0.3405573810839733, "learning_rate": 8.991509346750975e-06, "loss": 0.2824, "step": 5740 }, { "epoch": 0.45927081458370833, "grad_norm": 0.3048668397576305, "learning_rate": 8.99111920030558e-06, "loss": 0.2926, "step": 5741 }, { "epoch": 0.4593508129837403, "grad_norm": 0.3053999848651645, "learning_rate": 8.990728986876203e-06, "loss": 0.2801, "step": 5742 }, { "epoch": 0.45943081138377234, "grad_norm": 0.2933816480427734, "learning_rate": 8.990338706469393e-06, "loss": 0.2599, "step": 5743 }, { "epoch": 0.4595108097838043, "grad_norm": 0.2665839337944934, "learning_rate": 8.9899483590917e-06, "loss": 0.2774, "step": 5744 }, { "epoch": 0.4595908081838363, "grad_norm": 0.34123110035725057, "learning_rate": 8.989557944749677e-06, "loss": 0.2562, "step": 5745 }, { "epoch": 0.45967080658386833, "grad_norm": 0.25257115847420963, "learning_rate": 8.989167463449874e-06, "loss": 0.3117, "step": 5746 }, { "epoch": 0.4597508049839003, "grad_norm": 0.2677806148407579, "learning_rate": 8.988776915198849e-06, "loss": 0.29, "step": 5747 }, { "epoch": 0.45983080338393234, "grad_norm": 0.3173726398976456, "learning_rate": 8.988386300003153e-06, "loss": 0.2582, "step": 5748 }, { "epoch": 0.4599108017839643, "grad_norm": 0.2776671944309633, "learning_rate": 8.987995617869341e-06, "loss": 0.2829, "step": 5749 }, { "epoch": 0.4599908001839963, "grad_norm": 0.26687416860938623, "learning_rate": 8.987604868803972e-06, "loss": 0.2632, "step": 5750 }, { "epoch": 0.46007079858402833, "grad_norm": 0.3208482103308363, "learning_rate": 8.987214052813605e-06, "loss": 0.225, "step": 5751 }, { "epoch": 0.4601507969840603, "grad_norm": 0.26742010100475155, "learning_rate": 8.986823169904797e-06, "loss": 0.3164, "step": 5752 }, { "epoch": 0.46023079538409234, "grad_norm": 0.269844582091821, "learning_rate": 8.986432220084108e-06, "loss": 0.2761, "step": 5753 }, { "epoch": 0.4603107937841243, "grad_norm": 0.22389317613720738, "learning_rate": 8.986041203358101e-06, "loss": 0.3215, "step": 5754 }, { "epoch": 0.4603907921841563, "grad_norm": 0.291198802529429, "learning_rate": 8.985650119733338e-06, "loss": 0.257, "step": 5755 }, { "epoch": 0.4604707905841883, "grad_norm": 0.2957773042894445, "learning_rate": 8.985258969216383e-06, "loss": 0.2808, "step": 5756 }, { "epoch": 0.4605507889842203, "grad_norm": 0.2950509523276521, "learning_rate": 8.9848677518138e-06, "loss": 0.2974, "step": 5757 }, { "epoch": 0.46063078738425234, "grad_norm": 0.28130603845741026, "learning_rate": 8.984476467532156e-06, "loss": 0.3018, "step": 5758 }, { "epoch": 0.4607107857842843, "grad_norm": 0.2654643009382576, "learning_rate": 8.984085116378015e-06, "loss": 0.3048, "step": 5759 }, { "epoch": 0.4607907841843163, "grad_norm": 0.3108520601587131, "learning_rate": 8.98369369835795e-06, "loss": 0.2498, "step": 5760 }, { "epoch": 0.4608707825843483, "grad_norm": 0.3563608829858653, "learning_rate": 8.983302213478525e-06, "loss": 0.2573, "step": 5761 }, { "epoch": 0.4609507809843803, "grad_norm": 0.31070741242122324, "learning_rate": 8.982910661746315e-06, "loss": 0.2908, "step": 5762 }, { "epoch": 0.46103077938441234, "grad_norm": 0.30846740530982875, "learning_rate": 8.982519043167888e-06, "loss": 0.2651, "step": 5763 }, { "epoch": 0.4611107777844443, "grad_norm": 0.24598659564488626, "learning_rate": 8.98212735774982e-06, "loss": 0.3232, "step": 5764 }, { "epoch": 0.4611907761844763, "grad_norm": 0.27321142676964094, "learning_rate": 8.981735605498683e-06, "loss": 0.2856, "step": 5765 }, { "epoch": 0.4612707745845083, "grad_norm": 0.28610065071086943, "learning_rate": 8.981343786421051e-06, "loss": 0.2784, "step": 5766 }, { "epoch": 0.4613507729845403, "grad_norm": 0.36010062803700194, "learning_rate": 8.9809519005235e-06, "loss": 0.278, "step": 5767 }, { "epoch": 0.46143077138457234, "grad_norm": 0.3192937977714049, "learning_rate": 8.98055994781261e-06, "loss": 0.2984, "step": 5768 }, { "epoch": 0.4615107697846043, "grad_norm": 0.23240969099305078, "learning_rate": 8.980167928294956e-06, "loss": 0.3017, "step": 5769 }, { "epoch": 0.4615907681846363, "grad_norm": 0.24092273770462636, "learning_rate": 8.97977584197712e-06, "loss": 0.3332, "step": 5770 }, { "epoch": 0.4616707665846683, "grad_norm": 0.27069702263300327, "learning_rate": 8.97938368886568e-06, "loss": 0.2924, "step": 5771 }, { "epoch": 0.4617507649847003, "grad_norm": 0.24862989850632858, "learning_rate": 8.978991468967218e-06, "loss": 0.2726, "step": 5772 }, { "epoch": 0.4618307633847323, "grad_norm": 0.21831012753687268, "learning_rate": 8.978599182288319e-06, "loss": 0.3187, "step": 5773 }, { "epoch": 0.4619107617847643, "grad_norm": 0.29600031757909434, "learning_rate": 8.978206828835564e-06, "loss": 0.2867, "step": 5774 }, { "epoch": 0.4619907601847963, "grad_norm": 0.23250020902349608, "learning_rate": 8.97781440861554e-06, "loss": 0.3125, "step": 5775 }, { "epoch": 0.4620707585848283, "grad_norm": 0.3355016215639556, "learning_rate": 8.977421921634833e-06, "loss": 0.2406, "step": 5776 }, { "epoch": 0.4621507569848603, "grad_norm": 0.2665202993886267, "learning_rate": 8.977029367900028e-06, "loss": 0.2803, "step": 5777 }, { "epoch": 0.4622307553848923, "grad_norm": 0.3061210664835922, "learning_rate": 8.976636747417715e-06, "loss": 0.2876, "step": 5778 }, { "epoch": 0.4623107537849243, "grad_norm": 0.24807078690939627, "learning_rate": 8.976244060194484e-06, "loss": 0.2819, "step": 5779 }, { "epoch": 0.4623907521849563, "grad_norm": 0.31685086451058636, "learning_rate": 8.975851306236925e-06, "loss": 0.283, "step": 5780 }, { "epoch": 0.4624707505849883, "grad_norm": 0.3082104061076356, "learning_rate": 8.97545848555163e-06, "loss": 0.2366, "step": 5781 }, { "epoch": 0.4625507489850203, "grad_norm": 0.2686830051682658, "learning_rate": 8.97506559814519e-06, "loss": 0.255, "step": 5782 }, { "epoch": 0.4626307473850523, "grad_norm": 0.2658598030042549, "learning_rate": 8.9746726440242e-06, "loss": 0.3367, "step": 5783 }, { "epoch": 0.4627107457850843, "grad_norm": 0.29630629414157716, "learning_rate": 8.974279623195257e-06, "loss": 0.2648, "step": 5784 }, { "epoch": 0.4627907441851163, "grad_norm": 0.28807600672163663, "learning_rate": 8.973886535664954e-06, "loss": 0.2477, "step": 5785 }, { "epoch": 0.4628707425851483, "grad_norm": 0.2497658765717286, "learning_rate": 8.973493381439892e-06, "loss": 0.2919, "step": 5786 }, { "epoch": 0.4629507409851803, "grad_norm": 0.2698661346263241, "learning_rate": 8.973100160526666e-06, "loss": 0.2924, "step": 5787 }, { "epoch": 0.4630307393852123, "grad_norm": 0.3283141863721741, "learning_rate": 8.972706872931877e-06, "loss": 0.2618, "step": 5788 }, { "epoch": 0.4631107377852443, "grad_norm": 0.23351849884289716, "learning_rate": 8.972313518662125e-06, "loss": 0.3145, "step": 5789 }, { "epoch": 0.4631907361852763, "grad_norm": 0.5213563944960924, "learning_rate": 8.971920097724012e-06, "loss": 0.2872, "step": 5790 }, { "epoch": 0.4632707345853083, "grad_norm": 0.2753298222630535, "learning_rate": 8.971526610124142e-06, "loss": 0.3464, "step": 5791 }, { "epoch": 0.4633507329853403, "grad_norm": 0.32971697956594914, "learning_rate": 8.971133055869117e-06, "loss": 0.2665, "step": 5792 }, { "epoch": 0.4634307313853723, "grad_norm": 0.2504774800329252, "learning_rate": 8.970739434965544e-06, "loss": 0.2987, "step": 5793 }, { "epoch": 0.4635107297854043, "grad_norm": 0.26933383989691456, "learning_rate": 8.970345747420029e-06, "loss": 0.2843, "step": 5794 }, { "epoch": 0.4635907281854363, "grad_norm": 0.25718202378259497, "learning_rate": 8.969951993239177e-06, "loss": 0.287, "step": 5795 }, { "epoch": 0.4636707265854683, "grad_norm": 0.3069182338044001, "learning_rate": 8.9695581724296e-06, "loss": 0.2505, "step": 5796 }, { "epoch": 0.4637507249855003, "grad_norm": 0.26707907635924255, "learning_rate": 8.969164284997905e-06, "loss": 0.2842, "step": 5797 }, { "epoch": 0.4638307233855323, "grad_norm": 0.27193971181991994, "learning_rate": 8.968770330950703e-06, "loss": 0.301, "step": 5798 }, { "epoch": 0.4639107217855643, "grad_norm": 0.2580996315642149, "learning_rate": 8.968376310294608e-06, "loss": 0.2702, "step": 5799 }, { "epoch": 0.4639907201855963, "grad_norm": 0.2689842735500525, "learning_rate": 8.96798222303623e-06, "loss": 0.2686, "step": 5800 }, { "epoch": 0.46407071858562826, "grad_norm": 0.2719310774258587, "learning_rate": 8.967588069182184e-06, "loss": 0.292, "step": 5801 }, { "epoch": 0.4641507169856603, "grad_norm": 0.3158973157639977, "learning_rate": 8.967193848739087e-06, "loss": 0.2918, "step": 5802 }, { "epoch": 0.46423071538569227, "grad_norm": 0.18594801823625473, "learning_rate": 8.966799561713556e-06, "loss": 0.3329, "step": 5803 }, { "epoch": 0.4643107137857243, "grad_norm": 0.2776274425328694, "learning_rate": 8.966405208112202e-06, "loss": 0.2714, "step": 5804 }, { "epoch": 0.4643907121857563, "grad_norm": 0.23801349093970028, "learning_rate": 8.96601078794165e-06, "loss": 0.3211, "step": 5805 }, { "epoch": 0.46447071058578826, "grad_norm": 0.3229035737062405, "learning_rate": 8.965616301208517e-06, "loss": 0.2467, "step": 5806 }, { "epoch": 0.4645507089858203, "grad_norm": 0.3159661051094077, "learning_rate": 8.965221747919424e-06, "loss": 0.2537, "step": 5807 }, { "epoch": 0.46463070738585227, "grad_norm": 0.34638462706942025, "learning_rate": 8.964827128080995e-06, "loss": 0.2497, "step": 5808 }, { "epoch": 0.4647107057858843, "grad_norm": 0.38670104685900775, "learning_rate": 8.964432441699848e-06, "loss": 0.2759, "step": 5809 }, { "epoch": 0.4647907041859163, "grad_norm": 0.2982318404788443, "learning_rate": 8.964037688782614e-06, "loss": 0.3114, "step": 5810 }, { "epoch": 0.46487070258594826, "grad_norm": 0.30879349143825763, "learning_rate": 8.963642869335913e-06, "loss": 0.308, "step": 5811 }, { "epoch": 0.4649507009859803, "grad_norm": 0.28064384084310307, "learning_rate": 8.963247983366372e-06, "loss": 0.2526, "step": 5812 }, { "epoch": 0.46503069938601227, "grad_norm": 0.6168050452553306, "learning_rate": 8.96285303088062e-06, "loss": 0.2807, "step": 5813 }, { "epoch": 0.4651106977860443, "grad_norm": 0.3218248903171723, "learning_rate": 8.962458011885286e-06, "loss": 0.2566, "step": 5814 }, { "epoch": 0.4651906961860763, "grad_norm": 0.2895488715182279, "learning_rate": 8.962062926386998e-06, "loss": 0.2478, "step": 5815 }, { "epoch": 0.46527069458610826, "grad_norm": 0.31511710745757054, "learning_rate": 8.961667774392387e-06, "loss": 0.2832, "step": 5816 }, { "epoch": 0.4653506929861403, "grad_norm": 0.3118931716744917, "learning_rate": 8.961272555908084e-06, "loss": 0.2677, "step": 5817 }, { "epoch": 0.46543069138617227, "grad_norm": 0.31530004663403877, "learning_rate": 8.960877270940727e-06, "loss": 0.2948, "step": 5818 }, { "epoch": 0.4655106897862043, "grad_norm": 0.3134026917592955, "learning_rate": 8.960481919496944e-06, "loss": 0.2987, "step": 5819 }, { "epoch": 0.4655906881862363, "grad_norm": 0.31406935245783446, "learning_rate": 8.960086501583376e-06, "loss": 0.2583, "step": 5820 }, { "epoch": 0.46567068658626826, "grad_norm": 0.29388817366868586, "learning_rate": 8.959691017206653e-06, "loss": 0.2515, "step": 5821 }, { "epoch": 0.4657506849863003, "grad_norm": 0.3107970543538932, "learning_rate": 8.959295466373417e-06, "loss": 0.273, "step": 5822 }, { "epoch": 0.46583068338633227, "grad_norm": 0.22435008673002646, "learning_rate": 8.958899849090306e-06, "loss": 0.2939, "step": 5823 }, { "epoch": 0.4659106817863643, "grad_norm": 0.3241206416335767, "learning_rate": 8.958504165363959e-06, "loss": 0.2827, "step": 5824 }, { "epoch": 0.4659906801863963, "grad_norm": 0.27711610635765205, "learning_rate": 8.958108415201017e-06, "loss": 0.2755, "step": 5825 }, { "epoch": 0.46607067858642826, "grad_norm": 0.2873209588122253, "learning_rate": 8.957712598608123e-06, "loss": 0.2899, "step": 5826 }, { "epoch": 0.4661506769864603, "grad_norm": 0.31933428095131366, "learning_rate": 8.957316715591918e-06, "loss": 0.281, "step": 5827 }, { "epoch": 0.46623067538649227, "grad_norm": 0.7691796598485064, "learning_rate": 8.956920766159048e-06, "loss": 0.2498, "step": 5828 }, { "epoch": 0.46631067378652424, "grad_norm": 0.2983793390487636, "learning_rate": 8.956524750316158e-06, "loss": 0.2464, "step": 5829 }, { "epoch": 0.4663906721865563, "grad_norm": 0.34662663938500254, "learning_rate": 8.956128668069894e-06, "loss": 0.2535, "step": 5830 }, { "epoch": 0.46647067058658825, "grad_norm": 0.28012057632041476, "learning_rate": 8.955732519426902e-06, "loss": 0.3079, "step": 5831 }, { "epoch": 0.4665506689866203, "grad_norm": 0.30403948765802663, "learning_rate": 8.955336304393833e-06, "loss": 0.2668, "step": 5832 }, { "epoch": 0.46663066738665226, "grad_norm": 0.31213347266694547, "learning_rate": 8.954940022977338e-06, "loss": 0.2613, "step": 5833 }, { "epoch": 0.46671066578668424, "grad_norm": 0.32791326303815566, "learning_rate": 8.954543675184065e-06, "loss": 0.2546, "step": 5834 }, { "epoch": 0.4667906641867163, "grad_norm": 0.2433565302470024, "learning_rate": 8.954147261020667e-06, "loss": 0.3305, "step": 5835 }, { "epoch": 0.46687066258674825, "grad_norm": 0.34837732794149967, "learning_rate": 8.953750780493797e-06, "loss": 0.2709, "step": 5836 }, { "epoch": 0.4669506609867803, "grad_norm": 0.3198878433794871, "learning_rate": 8.95335423361011e-06, "loss": 0.2568, "step": 5837 }, { "epoch": 0.46703065938681226, "grad_norm": 0.3213000047800948, "learning_rate": 8.952957620376261e-06, "loss": 0.2903, "step": 5838 }, { "epoch": 0.46711065778684424, "grad_norm": 0.2934820533427515, "learning_rate": 8.952560940798905e-06, "loss": 0.2605, "step": 5839 }, { "epoch": 0.4671906561868763, "grad_norm": 0.2893104564107298, "learning_rate": 8.9521641948847e-06, "loss": 0.2491, "step": 5840 }, { "epoch": 0.46727065458690825, "grad_norm": 0.292775622086539, "learning_rate": 8.951767382640308e-06, "loss": 0.2549, "step": 5841 }, { "epoch": 0.4673506529869403, "grad_norm": 0.2922770887675927, "learning_rate": 8.951370504072385e-06, "loss": 0.2438, "step": 5842 }, { "epoch": 0.46743065138697226, "grad_norm": 0.32067563627927154, "learning_rate": 8.950973559187593e-06, "loss": 0.2623, "step": 5843 }, { "epoch": 0.46751064978700424, "grad_norm": 0.27563222348123945, "learning_rate": 8.950576547992593e-06, "loss": 0.2856, "step": 5844 }, { "epoch": 0.4675906481870363, "grad_norm": 0.3026488372225054, "learning_rate": 8.950179470494051e-06, "loss": 0.2571, "step": 5845 }, { "epoch": 0.46767064658706825, "grad_norm": 0.26563865369927925, "learning_rate": 8.94978232669863e-06, "loss": 0.2875, "step": 5846 }, { "epoch": 0.4677506449871003, "grad_norm": 0.24855700776861267, "learning_rate": 8.949385116612994e-06, "loss": 0.3227, "step": 5847 }, { "epoch": 0.46783064338713226, "grad_norm": 0.28460494082076854, "learning_rate": 8.948987840243812e-06, "loss": 0.3032, "step": 5848 }, { "epoch": 0.46791064178716424, "grad_norm": 0.2878708965975192, "learning_rate": 8.948590497597749e-06, "loss": 0.2405, "step": 5849 }, { "epoch": 0.46799064018719627, "grad_norm": 0.38550085029146125, "learning_rate": 8.948193088681474e-06, "loss": 0.2548, "step": 5850 }, { "epoch": 0.46807063858722825, "grad_norm": 0.3016023784671988, "learning_rate": 8.947795613501658e-06, "loss": 0.3357, "step": 5851 }, { "epoch": 0.4681506369872603, "grad_norm": 0.3121298955966143, "learning_rate": 8.947398072064972e-06, "loss": 0.2689, "step": 5852 }, { "epoch": 0.46823063538729226, "grad_norm": 0.40709280607609527, "learning_rate": 8.947000464378088e-06, "loss": 0.2758, "step": 5853 }, { "epoch": 0.46831063378732424, "grad_norm": 0.31176806236948573, "learning_rate": 8.946602790447677e-06, "loss": 0.2788, "step": 5854 }, { "epoch": 0.46839063218735627, "grad_norm": 0.29491363686560407, "learning_rate": 8.946205050280417e-06, "loss": 0.2844, "step": 5855 }, { "epoch": 0.46847063058738825, "grad_norm": 0.26843393871554133, "learning_rate": 8.94580724388298e-06, "loss": 0.2782, "step": 5856 }, { "epoch": 0.4685506289874202, "grad_norm": 0.28596350262461795, "learning_rate": 8.945409371262044e-06, "loss": 0.2716, "step": 5857 }, { "epoch": 0.46863062738745226, "grad_norm": 0.4192351977298747, "learning_rate": 8.945011432424287e-06, "loss": 0.2585, "step": 5858 }, { "epoch": 0.46871062578748424, "grad_norm": 0.23287590744755174, "learning_rate": 8.944613427376385e-06, "loss": 0.3497, "step": 5859 }, { "epoch": 0.46879062418751627, "grad_norm": 0.335048066872026, "learning_rate": 8.944215356125022e-06, "loss": 0.2648, "step": 5860 }, { "epoch": 0.46887062258754825, "grad_norm": 0.2858325760188751, "learning_rate": 8.943817218676877e-06, "loss": 0.2953, "step": 5861 }, { "epoch": 0.4689506209875802, "grad_norm": 1.2012702829785136, "learning_rate": 8.943419015038631e-06, "loss": 0.2517, "step": 5862 }, { "epoch": 0.46903061938761226, "grad_norm": 0.44540565641616214, "learning_rate": 8.943020745216968e-06, "loss": 0.3118, "step": 5863 }, { "epoch": 0.46911061778764424, "grad_norm": 0.26463789242067015, "learning_rate": 8.942622409218573e-06, "loss": 0.2689, "step": 5864 }, { "epoch": 0.46919061618767627, "grad_norm": 0.2727117036155042, "learning_rate": 8.942224007050131e-06, "loss": 0.2858, "step": 5865 }, { "epoch": 0.46927061458770825, "grad_norm": 0.23522948077420866, "learning_rate": 8.941825538718328e-06, "loss": 0.3198, "step": 5866 }, { "epoch": 0.4693506129877402, "grad_norm": 0.25995979455885676, "learning_rate": 8.941427004229851e-06, "loss": 0.2831, "step": 5867 }, { "epoch": 0.46943061138777226, "grad_norm": 0.36442325635105177, "learning_rate": 8.94102840359139e-06, "loss": 0.2932, "step": 5868 }, { "epoch": 0.46951060978780423, "grad_norm": 0.3361864503048774, "learning_rate": 8.940629736809635e-06, "loss": 0.2656, "step": 5869 }, { "epoch": 0.46959060818783627, "grad_norm": 0.24719226764272864, "learning_rate": 8.940231003891275e-06, "loss": 0.316, "step": 5870 }, { "epoch": 0.46967060658786824, "grad_norm": 0.2791908379821809, "learning_rate": 8.939832204843003e-06, "loss": 0.2745, "step": 5871 }, { "epoch": 0.4697506049879002, "grad_norm": 0.28890249708525007, "learning_rate": 8.939433339671514e-06, "loss": 0.2854, "step": 5872 }, { "epoch": 0.46983060338793226, "grad_norm": 0.2013530635557675, "learning_rate": 8.939034408383502e-06, "loss": 0.3461, "step": 5873 }, { "epoch": 0.46991060178796423, "grad_norm": 0.3059123301646322, "learning_rate": 8.938635410985658e-06, "loss": 0.2695, "step": 5874 }, { "epoch": 0.46999060018799627, "grad_norm": 0.34695752833308763, "learning_rate": 8.938236347484684e-06, "loss": 0.2968, "step": 5875 }, { "epoch": 0.47007059858802824, "grad_norm": 0.2918977147130378, "learning_rate": 8.937837217887273e-06, "loss": 0.286, "step": 5876 }, { "epoch": 0.4701505969880602, "grad_norm": 0.3053461446902127, "learning_rate": 8.937438022200126e-06, "loss": 0.289, "step": 5877 }, { "epoch": 0.47023059538809225, "grad_norm": 0.2591159932211754, "learning_rate": 8.937038760429944e-06, "loss": 0.3265, "step": 5878 }, { "epoch": 0.47031059378812423, "grad_norm": 0.29559144009668364, "learning_rate": 8.936639432583424e-06, "loss": 0.2901, "step": 5879 }, { "epoch": 0.47039059218815626, "grad_norm": 0.2772054703273193, "learning_rate": 8.936240038667275e-06, "loss": 0.272, "step": 5880 }, { "epoch": 0.47047059058818824, "grad_norm": 0.33263450760740615, "learning_rate": 8.935840578688191e-06, "loss": 0.2536, "step": 5881 }, { "epoch": 0.4705505889882202, "grad_norm": 0.27008538275523497, "learning_rate": 8.935441052652884e-06, "loss": 0.295, "step": 5882 }, { "epoch": 0.47063058738825225, "grad_norm": 0.2827234167160174, "learning_rate": 8.935041460568055e-06, "loss": 0.2443, "step": 5883 }, { "epoch": 0.47071058578828423, "grad_norm": 0.2763055106015007, "learning_rate": 8.934641802440411e-06, "loss": 0.283, "step": 5884 }, { "epoch": 0.4707905841883162, "grad_norm": 0.33224486110303325, "learning_rate": 8.934242078276662e-06, "loss": 0.2587, "step": 5885 }, { "epoch": 0.47087058258834824, "grad_norm": 0.35265997002484256, "learning_rate": 8.933842288083514e-06, "loss": 0.2514, "step": 5886 }, { "epoch": 0.4709505809883802, "grad_norm": 0.24058093082503645, "learning_rate": 8.933442431867678e-06, "loss": 0.316, "step": 5887 }, { "epoch": 0.47103057938841225, "grad_norm": 0.28129880717369393, "learning_rate": 8.933042509635866e-06, "loss": 0.2768, "step": 5888 }, { "epoch": 0.47111057778844423, "grad_norm": 0.30184007697442156, "learning_rate": 8.932642521394786e-06, "loss": 0.2534, "step": 5889 }, { "epoch": 0.4711905761884762, "grad_norm": 0.27743091175409795, "learning_rate": 8.932242467151156e-06, "loss": 0.2852, "step": 5890 }, { "epoch": 0.47127057458850824, "grad_norm": 0.31035886113688516, "learning_rate": 8.931842346911688e-06, "loss": 0.2348, "step": 5891 }, { "epoch": 0.4713505729885402, "grad_norm": 0.3330061584474159, "learning_rate": 8.931442160683094e-06, "loss": 0.2825, "step": 5892 }, { "epoch": 0.47143057138857225, "grad_norm": 0.2801275634358817, "learning_rate": 8.931041908472098e-06, "loss": 0.2938, "step": 5893 }, { "epoch": 0.47151056978860423, "grad_norm": 0.3143147904339116, "learning_rate": 8.930641590285412e-06, "loss": 0.2986, "step": 5894 }, { "epoch": 0.4715905681886362, "grad_norm": 0.3368672331390658, "learning_rate": 8.930241206129754e-06, "loss": 0.2438, "step": 5895 }, { "epoch": 0.47167056658866824, "grad_norm": 0.2630154179612777, "learning_rate": 8.929840756011847e-06, "loss": 0.2618, "step": 5896 }, { "epoch": 0.4717505649887002, "grad_norm": 0.3245431725744292, "learning_rate": 8.929440239938409e-06, "loss": 0.2482, "step": 5897 }, { "epoch": 0.47183056338873225, "grad_norm": 0.30803423862029006, "learning_rate": 8.929039657916166e-06, "loss": 0.2729, "step": 5898 }, { "epoch": 0.4719105617887642, "grad_norm": 0.31570771303494316, "learning_rate": 8.928639009951837e-06, "loss": 0.2331, "step": 5899 }, { "epoch": 0.4719905601887962, "grad_norm": 0.5190775275367271, "learning_rate": 8.928238296052148e-06, "loss": 0.2811, "step": 5900 }, { "epoch": 0.47207055858882824, "grad_norm": 0.24493905939111194, "learning_rate": 8.927837516223824e-06, "loss": 0.3136, "step": 5901 }, { "epoch": 0.4721505569888602, "grad_norm": 0.2658373338819349, "learning_rate": 8.927436670473592e-06, "loss": 0.2994, "step": 5902 }, { "epoch": 0.47223055538889225, "grad_norm": 0.27232455458262067, "learning_rate": 8.927035758808178e-06, "loss": 0.3053, "step": 5903 }, { "epoch": 0.4723105537889242, "grad_norm": 0.24495463669629086, "learning_rate": 8.926634781234311e-06, "loss": 0.317, "step": 5904 }, { "epoch": 0.4723905521889562, "grad_norm": 0.2904562816811924, "learning_rate": 8.926233737758722e-06, "loss": 0.2953, "step": 5905 }, { "epoch": 0.47247055058898824, "grad_norm": 0.24446139877730716, "learning_rate": 8.925832628388142e-06, "loss": 0.3115, "step": 5906 }, { "epoch": 0.4725505489890202, "grad_norm": 0.28410498109980487, "learning_rate": 8.9254314531293e-06, "loss": 0.3022, "step": 5907 }, { "epoch": 0.4726305473890522, "grad_norm": 0.3444122776357474, "learning_rate": 8.925030211988932e-06, "loss": 0.2529, "step": 5908 }, { "epoch": 0.4727105457890842, "grad_norm": 0.21459535293641052, "learning_rate": 8.924628904973771e-06, "loss": 0.3369, "step": 5909 }, { "epoch": 0.4727905441891162, "grad_norm": 0.27714872896057946, "learning_rate": 8.924227532090553e-06, "loss": 0.2909, "step": 5910 }, { "epoch": 0.47287054258914824, "grad_norm": 0.31695133765143274, "learning_rate": 8.923826093346013e-06, "loss": 0.2435, "step": 5911 }, { "epoch": 0.4729505409891802, "grad_norm": 0.3970991386733703, "learning_rate": 8.92342458874689e-06, "loss": 0.2586, "step": 5912 }, { "epoch": 0.4730305393892122, "grad_norm": 0.2774232628524979, "learning_rate": 8.92302301829992e-06, "loss": 0.2889, "step": 5913 }, { "epoch": 0.4731105377892442, "grad_norm": 0.2826792846487184, "learning_rate": 8.922621382011845e-06, "loss": 0.2876, "step": 5914 }, { "epoch": 0.4731905361892762, "grad_norm": 0.3245687342727812, "learning_rate": 8.922219679889406e-06, "loss": 0.295, "step": 5915 }, { "epoch": 0.47327053458930823, "grad_norm": 0.3084131998490697, "learning_rate": 8.921817911939344e-06, "loss": 0.2617, "step": 5916 }, { "epoch": 0.4733505329893402, "grad_norm": 0.49203562354859276, "learning_rate": 8.9214160781684e-06, "loss": 0.292, "step": 5917 }, { "epoch": 0.4734305313893722, "grad_norm": 0.2520513268813035, "learning_rate": 8.92101417858332e-06, "loss": 0.2835, "step": 5918 }, { "epoch": 0.4735105297894042, "grad_norm": 0.27615554711002055, "learning_rate": 8.92061221319085e-06, "loss": 0.291, "step": 5919 }, { "epoch": 0.4735905281894362, "grad_norm": 0.27559113956556025, "learning_rate": 8.920210181997736e-06, "loss": 0.2954, "step": 5920 }, { "epoch": 0.47367052658946823, "grad_norm": 0.2540698389484329, "learning_rate": 8.919808085010726e-06, "loss": 0.289, "step": 5921 }, { "epoch": 0.4737505249895002, "grad_norm": 0.27367972626124093, "learning_rate": 8.919405922236568e-06, "loss": 0.2905, "step": 5922 }, { "epoch": 0.4738305233895322, "grad_norm": 0.26152788105970687, "learning_rate": 8.919003693682008e-06, "loss": 0.3007, "step": 5923 }, { "epoch": 0.4739105217895642, "grad_norm": 0.286758959816618, "learning_rate": 8.9186013993538e-06, "loss": 0.2414, "step": 5924 }, { "epoch": 0.4739905201895962, "grad_norm": 0.2773187069773488, "learning_rate": 8.918199039258697e-06, "loss": 0.2888, "step": 5925 }, { "epoch": 0.47407051858962823, "grad_norm": 0.25236753926421013, "learning_rate": 8.917796613403451e-06, "loss": 0.2951, "step": 5926 }, { "epoch": 0.4741505169896602, "grad_norm": 0.2852609713877524, "learning_rate": 8.917394121794814e-06, "loss": 0.3132, "step": 5927 }, { "epoch": 0.4742305153896922, "grad_norm": 0.27743512886149024, "learning_rate": 8.916991564439544e-06, "loss": 0.2651, "step": 5928 }, { "epoch": 0.4743105137897242, "grad_norm": 0.21887002940546096, "learning_rate": 8.916588941344393e-06, "loss": 0.322, "step": 5929 }, { "epoch": 0.4743905121897562, "grad_norm": 0.31514393176654626, "learning_rate": 8.916186252516123e-06, "loss": 0.2362, "step": 5930 }, { "epoch": 0.47447051058978823, "grad_norm": 0.2951223519193214, "learning_rate": 8.915783497961492e-06, "loss": 0.3161, "step": 5931 }, { "epoch": 0.4745505089898202, "grad_norm": 0.2950177995826164, "learning_rate": 8.915380677687256e-06, "loss": 0.2534, "step": 5932 }, { "epoch": 0.4746305073898522, "grad_norm": 0.2825358411757625, "learning_rate": 8.914977791700178e-06, "loss": 0.2738, "step": 5933 }, { "epoch": 0.4747105057898842, "grad_norm": 0.3518925372883448, "learning_rate": 8.91457484000702e-06, "loss": 0.2438, "step": 5934 }, { "epoch": 0.4747905041899162, "grad_norm": 0.27292179135300654, "learning_rate": 8.914171822614543e-06, "loss": 0.2745, "step": 5935 }, { "epoch": 0.4748705025899482, "grad_norm": 0.3603713974375238, "learning_rate": 8.913768739529513e-06, "loss": 0.2614, "step": 5936 }, { "epoch": 0.4749505009899802, "grad_norm": 0.2977064200850592, "learning_rate": 8.913365590758695e-06, "loss": 0.2636, "step": 5937 }, { "epoch": 0.4750304993900122, "grad_norm": 0.24917559348916168, "learning_rate": 8.912962376308854e-06, "loss": 0.2985, "step": 5938 }, { "epoch": 0.4751104977900442, "grad_norm": 0.27049291311525436, "learning_rate": 8.912559096186759e-06, "loss": 0.293, "step": 5939 }, { "epoch": 0.4751904961900762, "grad_norm": 0.31101898024916386, "learning_rate": 8.912155750399176e-06, "loss": 0.2828, "step": 5940 }, { "epoch": 0.4752704945901082, "grad_norm": 0.26660813734316846, "learning_rate": 8.911752338952875e-06, "loss": 0.3172, "step": 5941 }, { "epoch": 0.4753504929901402, "grad_norm": 1.5421789426134587, "learning_rate": 8.911348861854628e-06, "loss": 0.2766, "step": 5942 }, { "epoch": 0.4754304913901722, "grad_norm": 0.2984340593968737, "learning_rate": 8.910945319111204e-06, "loss": 0.2667, "step": 5943 }, { "epoch": 0.4755104897902042, "grad_norm": 0.2668255260411109, "learning_rate": 8.910541710729379e-06, "loss": 0.2849, "step": 5944 }, { "epoch": 0.4755904881902362, "grad_norm": 0.3020466084076634, "learning_rate": 8.910138036715924e-06, "loss": 0.2539, "step": 5945 }, { "epoch": 0.47567048659026817, "grad_norm": 0.45672845821740365, "learning_rate": 8.909734297077618e-06, "loss": 0.2856, "step": 5946 }, { "epoch": 0.4757504849903002, "grad_norm": 0.3176164393174906, "learning_rate": 8.90933049182123e-06, "loss": 0.2692, "step": 5947 }, { "epoch": 0.4758304833903322, "grad_norm": 0.31465453433525264, "learning_rate": 8.908926620953545e-06, "loss": 0.2511, "step": 5948 }, { "epoch": 0.4759104817903642, "grad_norm": 0.32015544314426303, "learning_rate": 8.908522684481336e-06, "loss": 0.2739, "step": 5949 }, { "epoch": 0.4759904801903962, "grad_norm": 0.3237046384666863, "learning_rate": 8.908118682411387e-06, "loss": 0.2584, "step": 5950 }, { "epoch": 0.47607047859042817, "grad_norm": 0.30825170026179494, "learning_rate": 8.907714614750473e-06, "loss": 0.2586, "step": 5951 }, { "epoch": 0.4761504769904602, "grad_norm": 0.2730951174691001, "learning_rate": 8.907310481505378e-06, "loss": 0.3066, "step": 5952 }, { "epoch": 0.4762304753904922, "grad_norm": 0.22981698141948778, "learning_rate": 8.906906282682886e-06, "loss": 0.3541, "step": 5953 }, { "epoch": 0.4763104737905242, "grad_norm": 0.2843489355211349, "learning_rate": 8.90650201828978e-06, "loss": 0.2853, "step": 5954 }, { "epoch": 0.4763904721905562, "grad_norm": 0.31260470933364426, "learning_rate": 8.906097688332844e-06, "loss": 0.2585, "step": 5955 }, { "epoch": 0.47647047059058817, "grad_norm": 0.297678785048006, "learning_rate": 8.905693292818864e-06, "loss": 0.2436, "step": 5956 }, { "epoch": 0.4765504689906202, "grad_norm": 0.31529151103253006, "learning_rate": 8.905288831754628e-06, "loss": 0.2887, "step": 5957 }, { "epoch": 0.4766304673906522, "grad_norm": 0.3710037981386871, "learning_rate": 8.904884305146924e-06, "loss": 0.2511, "step": 5958 }, { "epoch": 0.4767104657906842, "grad_norm": 0.28976739023098963, "learning_rate": 8.904479713002542e-06, "loss": 0.2766, "step": 5959 }, { "epoch": 0.4767904641907162, "grad_norm": 0.30270950744911435, "learning_rate": 8.90407505532827e-06, "loss": 0.2434, "step": 5960 }, { "epoch": 0.47687046259074817, "grad_norm": 0.2905647218959929, "learning_rate": 8.9036703321309e-06, "loss": 0.3111, "step": 5961 }, { "epoch": 0.4769504609907802, "grad_norm": 0.3111951423804636, "learning_rate": 8.903265543417227e-06, "loss": 0.2919, "step": 5962 }, { "epoch": 0.4770304593908122, "grad_norm": 0.2869641496652721, "learning_rate": 8.902860689194044e-06, "loss": 0.2798, "step": 5963 }, { "epoch": 0.47711045779084416, "grad_norm": 0.24255111454943826, "learning_rate": 8.902455769468143e-06, "loss": 0.331, "step": 5964 }, { "epoch": 0.4771904561908762, "grad_norm": 0.31540067310751363, "learning_rate": 8.902050784246324e-06, "loss": 0.3031, "step": 5965 }, { "epoch": 0.47727045459090817, "grad_norm": 0.3052807396173634, "learning_rate": 8.90164573353538e-06, "loss": 0.3041, "step": 5966 }, { "epoch": 0.4773504529909402, "grad_norm": 0.3849216381300884, "learning_rate": 8.901240617342111e-06, "loss": 0.279, "step": 5967 }, { "epoch": 0.4774304513909722, "grad_norm": 0.28365711101001334, "learning_rate": 8.900835435673316e-06, "loss": 0.2768, "step": 5968 }, { "epoch": 0.47751044979100415, "grad_norm": 0.3193785217918695, "learning_rate": 8.900430188535796e-06, "loss": 0.2719, "step": 5969 }, { "epoch": 0.4775904481910362, "grad_norm": 0.2940398697004442, "learning_rate": 8.900024875936351e-06, "loss": 0.312, "step": 5970 }, { "epoch": 0.47767044659106817, "grad_norm": 0.290426297398639, "learning_rate": 8.899619497881784e-06, "loss": 0.2937, "step": 5971 }, { "epoch": 0.4777504449911002, "grad_norm": 0.2910230791645577, "learning_rate": 8.899214054378898e-06, "loss": 0.3221, "step": 5972 }, { "epoch": 0.4778304433911322, "grad_norm": 0.26701746118922837, "learning_rate": 8.8988085454345e-06, "loss": 0.2906, "step": 5973 }, { "epoch": 0.47791044179116415, "grad_norm": 0.3481157102399472, "learning_rate": 8.898402971055393e-06, "loss": 0.2618, "step": 5974 }, { "epoch": 0.4779904401911962, "grad_norm": 0.21062696141137857, "learning_rate": 8.897997331248384e-06, "loss": 0.3406, "step": 5975 }, { "epoch": 0.47807043859122816, "grad_norm": 0.3349618195279979, "learning_rate": 8.897591626020284e-06, "loss": 0.2792, "step": 5976 }, { "epoch": 0.4781504369912602, "grad_norm": 0.2428784430014319, "learning_rate": 8.8971858553779e-06, "loss": 0.3071, "step": 5977 }, { "epoch": 0.4782304353912922, "grad_norm": 0.4267800827640234, "learning_rate": 8.89678001932804e-06, "loss": 0.2471, "step": 5978 }, { "epoch": 0.47831043379132415, "grad_norm": 0.3839809694214864, "learning_rate": 8.896374117877519e-06, "loss": 0.259, "step": 5979 }, { "epoch": 0.4783904321913562, "grad_norm": 0.45701899799500406, "learning_rate": 8.895968151033147e-06, "loss": 0.282, "step": 5980 }, { "epoch": 0.47847043059138816, "grad_norm": 0.30808801033910155, "learning_rate": 8.895562118801739e-06, "loss": 0.2558, "step": 5981 }, { "epoch": 0.4785504289914202, "grad_norm": 0.2451417995689836, "learning_rate": 8.895156021190109e-06, "loss": 0.3095, "step": 5982 }, { "epoch": 0.4786304273914522, "grad_norm": 0.26306478982211084, "learning_rate": 8.89474985820507e-06, "loss": 0.2873, "step": 5983 }, { "epoch": 0.47871042579148415, "grad_norm": 0.43681134069748473, "learning_rate": 8.894343629853442e-06, "loss": 0.2921, "step": 5984 }, { "epoch": 0.4787904241915162, "grad_norm": 0.30489621652928284, "learning_rate": 8.893937336142043e-06, "loss": 0.3209, "step": 5985 }, { "epoch": 0.47887042259154816, "grad_norm": 0.28936053098899633, "learning_rate": 8.89353097707769e-06, "loss": 0.2656, "step": 5986 }, { "epoch": 0.4789504209915802, "grad_norm": 0.3465262010692459, "learning_rate": 8.893124552667203e-06, "loss": 0.252, "step": 5987 }, { "epoch": 0.47903041939161217, "grad_norm": 0.3040600554500932, "learning_rate": 8.892718062917405e-06, "loss": 0.2975, "step": 5988 }, { "epoch": 0.47911041779164415, "grad_norm": 0.27135774535392615, "learning_rate": 8.892311507835118e-06, "loss": 0.2958, "step": 5989 }, { "epoch": 0.4791904161916762, "grad_norm": 0.3502376409873895, "learning_rate": 8.891904887427164e-06, "loss": 0.2986, "step": 5990 }, { "epoch": 0.47927041459170816, "grad_norm": 0.26545625360116043, "learning_rate": 8.891498201700368e-06, "loss": 0.3075, "step": 5991 }, { "epoch": 0.47935041299174014, "grad_norm": 0.27679202445555856, "learning_rate": 8.891091450661556e-06, "loss": 0.2945, "step": 5992 }, { "epoch": 0.47943041139177217, "grad_norm": 0.259828046196747, "learning_rate": 8.890684634317552e-06, "loss": 0.2808, "step": 5993 }, { "epoch": 0.47951040979180415, "grad_norm": 0.29423025654605167, "learning_rate": 8.890277752675187e-06, "loss": 0.3091, "step": 5994 }, { "epoch": 0.4795904081918362, "grad_norm": 0.2597333117078829, "learning_rate": 8.889870805741288e-06, "loss": 0.2838, "step": 5995 }, { "epoch": 0.47967040659186816, "grad_norm": 0.26047642178345937, "learning_rate": 8.889463793522687e-06, "loss": 0.2662, "step": 5996 }, { "epoch": 0.47975040499190014, "grad_norm": 0.223619804677584, "learning_rate": 8.889056716026213e-06, "loss": 0.3065, "step": 5997 }, { "epoch": 0.47983040339193217, "grad_norm": 0.27823752785917666, "learning_rate": 8.888649573258697e-06, "loss": 0.2816, "step": 5998 }, { "epoch": 0.47991040179196415, "grad_norm": 0.3208355594101808, "learning_rate": 8.888242365226975e-06, "loss": 0.2617, "step": 5999 }, { "epoch": 0.4799904001919962, "grad_norm": 0.3559079743089482, "learning_rate": 8.88783509193788e-06, "loss": 0.309, "step": 6000 }, { "epoch": 0.48007039859202816, "grad_norm": 0.26953091566283877, "learning_rate": 8.887427753398249e-06, "loss": 0.2739, "step": 6001 }, { "epoch": 0.48015039699206014, "grad_norm": 0.27038877757890944, "learning_rate": 8.887020349614914e-06, "loss": 0.2899, "step": 6002 }, { "epoch": 0.48023039539209217, "grad_norm": 0.37312978896567034, "learning_rate": 8.886612880594715e-06, "loss": 0.3019, "step": 6003 }, { "epoch": 0.48031039379212415, "grad_norm": 0.2411088059741634, "learning_rate": 8.886205346344495e-06, "loss": 0.3417, "step": 6004 }, { "epoch": 0.4803903921921562, "grad_norm": 0.2559689392391588, "learning_rate": 8.885797746871085e-06, "loss": 0.2756, "step": 6005 }, { "epoch": 0.48047039059218816, "grad_norm": 0.39328241276357806, "learning_rate": 8.885390082181333e-06, "loss": 0.2873, "step": 6006 }, { "epoch": 0.48055038899222013, "grad_norm": 0.2728652678708431, "learning_rate": 8.884982352282078e-06, "loss": 0.3265, "step": 6007 }, { "epoch": 0.48063038739225217, "grad_norm": 0.272205818960488, "learning_rate": 8.884574557180165e-06, "loss": 0.2922, "step": 6008 }, { "epoch": 0.48071038579228415, "grad_norm": 0.264583990166896, "learning_rate": 8.884166696882436e-06, "loss": 0.3014, "step": 6009 }, { "epoch": 0.4807903841923162, "grad_norm": 0.26922172038974934, "learning_rate": 8.883758771395739e-06, "loss": 0.2748, "step": 6010 }, { "epoch": 0.48087038259234816, "grad_norm": 0.27759006054417085, "learning_rate": 8.883350780726915e-06, "loss": 0.2926, "step": 6011 }, { "epoch": 0.48095038099238013, "grad_norm": 0.26346029869252574, "learning_rate": 8.882942724882816e-06, "loss": 0.2856, "step": 6012 }, { "epoch": 0.48103037939241217, "grad_norm": 0.23300387129809144, "learning_rate": 8.88253460387029e-06, "loss": 0.3233, "step": 6013 }, { "epoch": 0.48111037779244414, "grad_norm": 0.2448451361469002, "learning_rate": 8.882126417696185e-06, "loss": 0.3187, "step": 6014 }, { "epoch": 0.4811903761924762, "grad_norm": 0.3308517955591014, "learning_rate": 8.881718166367353e-06, "loss": 0.2451, "step": 6015 }, { "epoch": 0.48127037459250815, "grad_norm": 0.2653559160003545, "learning_rate": 8.881309849890645e-06, "loss": 0.3221, "step": 6016 }, { "epoch": 0.48135037299254013, "grad_norm": 0.3075622635619129, "learning_rate": 8.880901468272913e-06, "loss": 0.2648, "step": 6017 }, { "epoch": 0.48143037139257217, "grad_norm": 0.3168801818528636, "learning_rate": 8.880493021521013e-06, "loss": 0.2649, "step": 6018 }, { "epoch": 0.48151036979260414, "grad_norm": 0.2975093586817609, "learning_rate": 8.8800845096418e-06, "loss": 0.259, "step": 6019 }, { "epoch": 0.4815903681926361, "grad_norm": 0.30287101021594365, "learning_rate": 8.879675932642129e-06, "loss": 0.2486, "step": 6020 }, { "epoch": 0.48167036659266815, "grad_norm": 0.3238328331113838, "learning_rate": 8.87926729052886e-06, "loss": 0.2841, "step": 6021 }, { "epoch": 0.48175036499270013, "grad_norm": 0.2101944924046625, "learning_rate": 8.878858583308845e-06, "loss": 0.3393, "step": 6022 }, { "epoch": 0.48183036339273216, "grad_norm": 0.4306022384961016, "learning_rate": 8.87844981098895e-06, "loss": 0.2689, "step": 6023 }, { "epoch": 0.48191036179276414, "grad_norm": 0.38028574127863807, "learning_rate": 8.87804097357603e-06, "loss": 0.2385, "step": 6024 }, { "epoch": 0.4819903601927961, "grad_norm": 0.2871861482154307, "learning_rate": 8.877632071076952e-06, "loss": 0.2982, "step": 6025 }, { "epoch": 0.48207035859282815, "grad_norm": 0.2757773633451801, "learning_rate": 8.877223103498576e-06, "loss": 0.2783, "step": 6026 }, { "epoch": 0.48215035699286013, "grad_norm": 0.27341700723425694, "learning_rate": 8.876814070847766e-06, "loss": 0.2676, "step": 6027 }, { "epoch": 0.48223035539289216, "grad_norm": 0.25073205520297837, "learning_rate": 8.876404973131387e-06, "loss": 0.3184, "step": 6028 }, { "epoch": 0.48231035379292414, "grad_norm": 0.38097853285361427, "learning_rate": 8.875995810356306e-06, "loss": 0.3019, "step": 6029 }, { "epoch": 0.4823903521929561, "grad_norm": 0.3250014171644075, "learning_rate": 8.875586582529388e-06, "loss": 0.2463, "step": 6030 }, { "epoch": 0.48247035059298815, "grad_norm": 0.28581214791011883, "learning_rate": 8.875177289657502e-06, "loss": 0.289, "step": 6031 }, { "epoch": 0.48255034899302013, "grad_norm": 0.26504143160053084, "learning_rate": 8.87476793174752e-06, "loss": 0.2986, "step": 6032 }, { "epoch": 0.48263034739305216, "grad_norm": 0.2711934730699754, "learning_rate": 8.874358508806306e-06, "loss": 0.3062, "step": 6033 }, { "epoch": 0.48271034579308414, "grad_norm": 0.23616451497331176, "learning_rate": 8.873949020840738e-06, "loss": 0.3114, "step": 6034 }, { "epoch": 0.4827903441931161, "grad_norm": 0.2863916929837123, "learning_rate": 8.873539467857683e-06, "loss": 0.2434, "step": 6035 }, { "epoch": 0.48287034259314815, "grad_norm": 0.28409951930720534, "learning_rate": 8.87312984986402e-06, "loss": 0.251, "step": 6036 }, { "epoch": 0.4829503409931801, "grad_norm": 0.25662827129188315, "learning_rate": 8.872720166866623e-06, "loss": 0.3126, "step": 6037 }, { "epoch": 0.48303033939321216, "grad_norm": 0.27234315195949665, "learning_rate": 8.872310418872364e-06, "loss": 0.2678, "step": 6038 }, { "epoch": 0.48311033779324414, "grad_norm": 0.3796631107619307, "learning_rate": 8.871900605888121e-06, "loss": 0.2589, "step": 6039 }, { "epoch": 0.4831903361932761, "grad_norm": 0.289288538321304, "learning_rate": 8.871490727920773e-06, "loss": 0.2734, "step": 6040 }, { "epoch": 0.48327033459330815, "grad_norm": 0.2537414314242945, "learning_rate": 8.8710807849772e-06, "loss": 0.3174, "step": 6041 }, { "epoch": 0.4833503329933401, "grad_norm": 0.2977227889456845, "learning_rate": 8.870670777064281e-06, "loss": 0.2915, "step": 6042 }, { "epoch": 0.48343033139337216, "grad_norm": 0.5136933141458111, "learning_rate": 8.870260704188897e-06, "loss": 0.2704, "step": 6043 }, { "epoch": 0.48351032979340414, "grad_norm": 0.2748376330613757, "learning_rate": 8.86985056635793e-06, "loss": 0.2768, "step": 6044 }, { "epoch": 0.4835903281934361, "grad_norm": 0.25301100524974496, "learning_rate": 8.869440363578267e-06, "loss": 0.3159, "step": 6045 }, { "epoch": 0.48367032659346815, "grad_norm": 0.19634041250045706, "learning_rate": 8.86903009585679e-06, "loss": 0.3384, "step": 6046 }, { "epoch": 0.4837503249935001, "grad_norm": 0.3273255750032447, "learning_rate": 8.868619763200384e-06, "loss": 0.27, "step": 6047 }, { "epoch": 0.4838303233935321, "grad_norm": 0.3094620768881021, "learning_rate": 8.868209365615934e-06, "loss": 0.3044, "step": 6048 }, { "epoch": 0.48391032179356414, "grad_norm": 0.38877805330353715, "learning_rate": 8.867798903110331e-06, "loss": 0.2794, "step": 6049 }, { "epoch": 0.4839903201935961, "grad_norm": 0.3458700294414693, "learning_rate": 8.867388375690464e-06, "loss": 0.2755, "step": 6050 }, { "epoch": 0.48407031859362815, "grad_norm": 0.3160104872424893, "learning_rate": 8.866977783363219e-06, "loss": 0.2681, "step": 6051 }, { "epoch": 0.4841503169936601, "grad_norm": 0.2729571125003863, "learning_rate": 8.866567126135493e-06, "loss": 0.2869, "step": 6052 }, { "epoch": 0.4842303153936921, "grad_norm": 0.3650429043703798, "learning_rate": 8.866156404014175e-06, "loss": 0.2936, "step": 6053 }, { "epoch": 0.48431031379372413, "grad_norm": 0.3366046848790977, "learning_rate": 8.865745617006157e-06, "loss": 0.2524, "step": 6054 }, { "epoch": 0.4843903121937561, "grad_norm": 0.27294679689350243, "learning_rate": 8.865334765118335e-06, "loss": 0.2838, "step": 6055 }, { "epoch": 0.48447031059378814, "grad_norm": 0.27147187919255483, "learning_rate": 8.864923848357605e-06, "loss": 0.2783, "step": 6056 }, { "epoch": 0.4845503089938201, "grad_norm": 0.2594736040800616, "learning_rate": 8.864512866730862e-06, "loss": 0.2934, "step": 6057 }, { "epoch": 0.4846303073938521, "grad_norm": 0.3036612213967791, "learning_rate": 8.864101820245003e-06, "loss": 0.2331, "step": 6058 }, { "epoch": 0.48471030579388413, "grad_norm": 0.28294286151990794, "learning_rate": 8.863690708906931e-06, "loss": 0.2905, "step": 6059 }, { "epoch": 0.4847903041939161, "grad_norm": 0.27179675468251785, "learning_rate": 8.86327953272354e-06, "loss": 0.309, "step": 6060 }, { "epoch": 0.48487030259394814, "grad_norm": 0.2851163954720835, "learning_rate": 8.862868291701735e-06, "loss": 0.2762, "step": 6061 }, { "epoch": 0.4849503009939801, "grad_norm": 0.5114124861768686, "learning_rate": 8.862456985848417e-06, "loss": 0.2733, "step": 6062 }, { "epoch": 0.4850302993940121, "grad_norm": 0.28527109179894633, "learning_rate": 8.862045615170487e-06, "loss": 0.2988, "step": 6063 }, { "epoch": 0.48511029779404413, "grad_norm": 0.24748707626915728, "learning_rate": 8.861634179674851e-06, "loss": 0.2772, "step": 6064 }, { "epoch": 0.4851902961940761, "grad_norm": 0.3585957527880161, "learning_rate": 8.861222679368416e-06, "loss": 0.2698, "step": 6065 }, { "epoch": 0.48527029459410814, "grad_norm": 0.3103252461130635, "learning_rate": 8.860811114258085e-06, "loss": 0.2758, "step": 6066 }, { "epoch": 0.4853502929941401, "grad_norm": 0.2850462172928093, "learning_rate": 8.860399484350768e-06, "loss": 0.3357, "step": 6067 }, { "epoch": 0.4854302913941721, "grad_norm": 0.29143916843725176, "learning_rate": 8.859987789653371e-06, "loss": 0.2742, "step": 6068 }, { "epoch": 0.48551028979420413, "grad_norm": 0.28664496910778864, "learning_rate": 8.859576030172804e-06, "loss": 0.2628, "step": 6069 }, { "epoch": 0.4855902881942361, "grad_norm": 0.25841190178424, "learning_rate": 8.85916420591598e-06, "loss": 0.2634, "step": 6070 }, { "epoch": 0.48567028659426814, "grad_norm": 0.3813984994640962, "learning_rate": 8.858752316889809e-06, "loss": 0.2714, "step": 6071 }, { "epoch": 0.4857502849943001, "grad_norm": 0.28858054546509043, "learning_rate": 8.858340363101204e-06, "loss": 0.2324, "step": 6072 }, { "epoch": 0.4858302833943321, "grad_norm": 0.28364559729022565, "learning_rate": 8.857928344557079e-06, "loss": 0.2456, "step": 6073 }, { "epoch": 0.48591028179436413, "grad_norm": 0.28076197813166387, "learning_rate": 8.85751626126435e-06, "loss": 0.2964, "step": 6074 }, { "epoch": 0.4859902801943961, "grad_norm": 0.257441639544771, "learning_rate": 8.857104113229929e-06, "loss": 0.2809, "step": 6075 }, { "epoch": 0.4860702785944281, "grad_norm": 0.3285007928553187, "learning_rate": 8.85669190046074e-06, "loss": 0.2575, "step": 6076 }, { "epoch": 0.4861502769944601, "grad_norm": 0.35666355546693773, "learning_rate": 8.856279622963694e-06, "loss": 0.2555, "step": 6077 }, { "epoch": 0.4862302753944921, "grad_norm": 0.3229680715740346, "learning_rate": 8.855867280745717e-06, "loss": 0.2483, "step": 6078 }, { "epoch": 0.48631027379452413, "grad_norm": 0.3155712952956712, "learning_rate": 8.855454873813724e-06, "loss": 0.2471, "step": 6079 }, { "epoch": 0.4863902721945561, "grad_norm": 0.2542018498374534, "learning_rate": 8.85504240217464e-06, "loss": 0.3252, "step": 6080 }, { "epoch": 0.4864702705945881, "grad_norm": 0.5113692309886261, "learning_rate": 8.854629865835387e-06, "loss": 0.2661, "step": 6081 }, { "epoch": 0.4865502689946201, "grad_norm": 0.29414563543630257, "learning_rate": 8.85421726480289e-06, "loss": 0.2445, "step": 6082 }, { "epoch": 0.4866302673946521, "grad_norm": 0.26241372436923976, "learning_rate": 8.853804599084068e-06, "loss": 0.2848, "step": 6083 }, { "epoch": 0.4867102657946841, "grad_norm": 0.3082505933386186, "learning_rate": 8.853391868685853e-06, "loss": 0.216, "step": 6084 }, { "epoch": 0.4867902641947161, "grad_norm": 0.2892346121353118, "learning_rate": 8.852979073615172e-06, "loss": 0.246, "step": 6085 }, { "epoch": 0.4868702625947481, "grad_norm": 0.28124505557993795, "learning_rate": 8.852566213878947e-06, "loss": 0.2743, "step": 6086 }, { "epoch": 0.4869502609947801, "grad_norm": 0.2561583334831174, "learning_rate": 8.852153289484114e-06, "loss": 0.3234, "step": 6087 }, { "epoch": 0.4870302593948121, "grad_norm": 0.2595761475915162, "learning_rate": 8.851740300437597e-06, "loss": 0.2898, "step": 6088 }, { "epoch": 0.4871102577948441, "grad_norm": 0.26554621854777943, "learning_rate": 8.851327246746334e-06, "loss": 0.2927, "step": 6089 }, { "epoch": 0.4871902561948761, "grad_norm": 0.289659492372637, "learning_rate": 8.850914128417252e-06, "loss": 0.2419, "step": 6090 }, { "epoch": 0.4872702545949081, "grad_norm": 0.267276134388402, "learning_rate": 8.850500945457286e-06, "loss": 0.2782, "step": 6091 }, { "epoch": 0.4873502529949401, "grad_norm": 0.3038839274424505, "learning_rate": 8.850087697873372e-06, "loss": 0.2672, "step": 6092 }, { "epoch": 0.4874302513949721, "grad_norm": 0.9867457409084249, "learning_rate": 8.849674385672444e-06, "loss": 0.3049, "step": 6093 }, { "epoch": 0.4875102497950041, "grad_norm": 0.27991568562509167, "learning_rate": 8.84926100886144e-06, "loss": 0.2862, "step": 6094 }, { "epoch": 0.4875902481950361, "grad_norm": 0.31782705558349744, "learning_rate": 8.848847567447298e-06, "loss": 0.3062, "step": 6095 }, { "epoch": 0.4876702465950681, "grad_norm": 0.2995384082613687, "learning_rate": 8.848434061436954e-06, "loss": 0.2935, "step": 6096 }, { "epoch": 0.4877502449951001, "grad_norm": 0.3275342457376998, "learning_rate": 8.848020490837352e-06, "loss": 0.2531, "step": 6097 }, { "epoch": 0.4878302433951321, "grad_norm": 0.2682795915746839, "learning_rate": 8.847606855655429e-06, "loss": 0.2975, "step": 6098 }, { "epoch": 0.4879102417951641, "grad_norm": 0.29402823178675375, "learning_rate": 8.84719315589813e-06, "loss": 0.2725, "step": 6099 }, { "epoch": 0.4879902401951961, "grad_norm": 0.2802527364503724, "learning_rate": 8.846779391572399e-06, "loss": 0.287, "step": 6100 }, { "epoch": 0.4880702385952281, "grad_norm": 0.3367822979842637, "learning_rate": 8.846365562685178e-06, "loss": 0.2598, "step": 6101 }, { "epoch": 0.4881502369952601, "grad_norm": 0.23450974261413876, "learning_rate": 8.84595166924341e-06, "loss": 0.3497, "step": 6102 }, { "epoch": 0.4882302353952921, "grad_norm": 0.23553521079397546, "learning_rate": 8.845537711254048e-06, "loss": 0.3144, "step": 6103 }, { "epoch": 0.48831023379532407, "grad_norm": 0.2940793778242793, "learning_rate": 8.845123688724037e-06, "loss": 0.2683, "step": 6104 }, { "epoch": 0.4883902321953561, "grad_norm": 0.2759623421620397, "learning_rate": 8.844709601660323e-06, "loss": 0.3192, "step": 6105 }, { "epoch": 0.4884702305953881, "grad_norm": 0.26951289079493057, "learning_rate": 8.844295450069858e-06, "loss": 0.2846, "step": 6106 }, { "epoch": 0.4885502289954201, "grad_norm": 0.31785066916016436, "learning_rate": 8.843881233959592e-06, "loss": 0.2761, "step": 6107 }, { "epoch": 0.4886302273954521, "grad_norm": 0.2605319093679355, "learning_rate": 8.843466953336478e-06, "loss": 0.2851, "step": 6108 }, { "epoch": 0.48871022579548407, "grad_norm": 0.3107064313221954, "learning_rate": 8.843052608207468e-06, "loss": 0.2678, "step": 6109 }, { "epoch": 0.4887902241955161, "grad_norm": 0.302151218080538, "learning_rate": 8.842638198579517e-06, "loss": 0.266, "step": 6110 }, { "epoch": 0.4888702225955481, "grad_norm": 0.32769684719788494, "learning_rate": 8.842223724459578e-06, "loss": 0.2519, "step": 6111 }, { "epoch": 0.4889502209955801, "grad_norm": 0.26206886432306425, "learning_rate": 8.84180918585461e-06, "loss": 0.2927, "step": 6112 }, { "epoch": 0.4890302193956121, "grad_norm": 0.3258798357815694, "learning_rate": 8.841394582771568e-06, "loss": 0.2946, "step": 6113 }, { "epoch": 0.48911021779564406, "grad_norm": 0.31797944972437214, "learning_rate": 8.840979915217412e-06, "loss": 0.2568, "step": 6114 }, { "epoch": 0.4891902161956761, "grad_norm": 0.31214940591085477, "learning_rate": 8.840565183199102e-06, "loss": 0.2588, "step": 6115 }, { "epoch": 0.4892702145957081, "grad_norm": 0.329987287238085, "learning_rate": 8.840150386723596e-06, "loss": 0.2907, "step": 6116 }, { "epoch": 0.4893502129957401, "grad_norm": 0.3441850285226875, "learning_rate": 8.839735525797857e-06, "loss": 0.2487, "step": 6117 }, { "epoch": 0.4894302113957721, "grad_norm": 0.25565075419674743, "learning_rate": 8.839320600428847e-06, "loss": 0.3169, "step": 6118 }, { "epoch": 0.48951020979580406, "grad_norm": 0.43858916606906057, "learning_rate": 8.838905610623532e-06, "loss": 0.2572, "step": 6119 }, { "epoch": 0.4895902081958361, "grad_norm": 0.2987897965406687, "learning_rate": 8.838490556388875e-06, "loss": 0.236, "step": 6120 }, { "epoch": 0.4896702065958681, "grad_norm": 0.29688482580580317, "learning_rate": 8.838075437731844e-06, "loss": 0.2802, "step": 6121 }, { "epoch": 0.4897502049959001, "grad_norm": 0.2591323042020038, "learning_rate": 8.837660254659401e-06, "loss": 0.3215, "step": 6122 }, { "epoch": 0.4898302033959321, "grad_norm": 0.26407900962002884, "learning_rate": 8.837245007178522e-06, "loss": 0.2856, "step": 6123 }, { "epoch": 0.48991020179596406, "grad_norm": 0.33745564817412593, "learning_rate": 8.836829695296167e-06, "loss": 0.2577, "step": 6124 }, { "epoch": 0.4899902001959961, "grad_norm": 0.2703920533925351, "learning_rate": 8.836414319019314e-06, "loss": 0.2801, "step": 6125 }, { "epoch": 0.4900701985960281, "grad_norm": 0.5976531652704125, "learning_rate": 8.83599887835493e-06, "loss": 0.3097, "step": 6126 }, { "epoch": 0.4901501969960601, "grad_norm": 0.30068138951012646, "learning_rate": 8.83558337330999e-06, "loss": 0.3097, "step": 6127 }, { "epoch": 0.4902301953960921, "grad_norm": 0.2504469450390486, "learning_rate": 8.835167803891467e-06, "loss": 0.2779, "step": 6128 }, { "epoch": 0.49031019379612406, "grad_norm": 0.28252254631474455, "learning_rate": 8.834752170106334e-06, "loss": 0.2824, "step": 6129 }, { "epoch": 0.4903901921961561, "grad_norm": 0.28600491679106227, "learning_rate": 8.834336471961569e-06, "loss": 0.2535, "step": 6130 }, { "epoch": 0.49047019059618807, "grad_norm": 0.31840214702062336, "learning_rate": 8.833920709464146e-06, "loss": 0.2887, "step": 6131 }, { "epoch": 0.49055018899622005, "grad_norm": 0.30268468891513156, "learning_rate": 8.833504882621045e-06, "loss": 0.2536, "step": 6132 }, { "epoch": 0.4906301873962521, "grad_norm": 0.2987049884739924, "learning_rate": 8.833088991439245e-06, "loss": 0.2497, "step": 6133 }, { "epoch": 0.49071018579628406, "grad_norm": 0.32457899218494535, "learning_rate": 8.832673035925724e-06, "loss": 0.2396, "step": 6134 }, { "epoch": 0.4907901841963161, "grad_norm": 0.3283392672319787, "learning_rate": 8.832257016087464e-06, "loss": 0.2966, "step": 6135 }, { "epoch": 0.49087018259634807, "grad_norm": 0.3307696933621787, "learning_rate": 8.831840931931448e-06, "loss": 0.228, "step": 6136 }, { "epoch": 0.49095018099638005, "grad_norm": 0.2554593438951163, "learning_rate": 8.83142478346466e-06, "loss": 0.2781, "step": 6137 }, { "epoch": 0.4910301793964121, "grad_norm": 0.2953200240660326, "learning_rate": 8.831008570694082e-06, "loss": 0.2918, "step": 6138 }, { "epoch": 0.49111017779644406, "grad_norm": 0.2820461701366511, "learning_rate": 8.830592293626702e-06, "loss": 0.2942, "step": 6139 }, { "epoch": 0.4911901761964761, "grad_norm": 0.3003647812027781, "learning_rate": 8.830175952269502e-06, "loss": 0.2867, "step": 6140 }, { "epoch": 0.49127017459650807, "grad_norm": 0.25929235632416175, "learning_rate": 8.829759546629474e-06, "loss": 0.2681, "step": 6141 }, { "epoch": 0.49135017299654005, "grad_norm": 0.2720004783664826, "learning_rate": 8.829343076713607e-06, "loss": 0.2906, "step": 6142 }, { "epoch": 0.4914301713965721, "grad_norm": 0.2814995606411108, "learning_rate": 8.828926542528888e-06, "loss": 0.2862, "step": 6143 }, { "epoch": 0.49151016979660406, "grad_norm": 0.2775399193462214, "learning_rate": 8.828509944082308e-06, "loss": 0.3152, "step": 6144 }, { "epoch": 0.4915901681966361, "grad_norm": 0.23441422011686913, "learning_rate": 8.828093281380859e-06, "loss": 0.3271, "step": 6145 }, { "epoch": 0.49167016659666807, "grad_norm": 0.31252199328148905, "learning_rate": 8.827676554431534e-06, "loss": 0.2888, "step": 6146 }, { "epoch": 0.49175016499670005, "grad_norm": 0.3062310465492467, "learning_rate": 8.82725976324133e-06, "loss": 0.2515, "step": 6147 }, { "epoch": 0.4918301633967321, "grad_norm": 0.4664191791414356, "learning_rate": 8.82684290781724e-06, "loss": 0.2818, "step": 6148 }, { "epoch": 0.49191016179676406, "grad_norm": 0.32797269820090874, "learning_rate": 8.826425988166259e-06, "loss": 0.2457, "step": 6149 }, { "epoch": 0.4919901601967961, "grad_norm": 0.2504772065394578, "learning_rate": 8.826009004295383e-06, "loss": 0.2818, "step": 6150 }, { "epoch": 0.49207015859682807, "grad_norm": 0.32903107691927563, "learning_rate": 8.825591956211614e-06, "loss": 0.2387, "step": 6151 }, { "epoch": 0.49215015699686004, "grad_norm": 0.2772987061434365, "learning_rate": 8.825174843921951e-06, "loss": 0.2852, "step": 6152 }, { "epoch": 0.4922301553968921, "grad_norm": 0.32511580835134446, "learning_rate": 8.824757667433392e-06, "loss": 0.2568, "step": 6153 }, { "epoch": 0.49231015379692405, "grad_norm": 0.2889389992525257, "learning_rate": 8.824340426752941e-06, "loss": 0.2513, "step": 6154 }, { "epoch": 0.4923901521969561, "grad_norm": 0.2819002758092301, "learning_rate": 8.8239231218876e-06, "loss": 0.2796, "step": 6155 }, { "epoch": 0.49247015059698807, "grad_norm": 0.24224042004922952, "learning_rate": 8.823505752844372e-06, "loss": 0.3267, "step": 6156 }, { "epoch": 0.49255014899702004, "grad_norm": 0.2927185254652474, "learning_rate": 8.823088319630262e-06, "loss": 0.2893, "step": 6157 }, { "epoch": 0.4926301473970521, "grad_norm": 0.24280504408562034, "learning_rate": 8.822670822252277e-06, "loss": 0.3061, "step": 6158 }, { "epoch": 0.49271014579708405, "grad_norm": 0.2772632867948452, "learning_rate": 8.822253260717422e-06, "loss": 0.3053, "step": 6159 }, { "epoch": 0.49279014419711603, "grad_norm": 0.2816771355687215, "learning_rate": 8.821835635032708e-06, "loss": 0.2744, "step": 6160 }, { "epoch": 0.49287014259714806, "grad_norm": 0.31194003166338646, "learning_rate": 8.82141794520514e-06, "loss": 0.2583, "step": 6161 }, { "epoch": 0.49295014099718004, "grad_norm": 0.29170121201844873, "learning_rate": 8.82100019124173e-06, "loss": 0.2882, "step": 6162 }, { "epoch": 0.4930301393972121, "grad_norm": 0.2880961067970108, "learning_rate": 8.820582373149491e-06, "loss": 0.2923, "step": 6163 }, { "epoch": 0.49311013779724405, "grad_norm": 0.24713540721807353, "learning_rate": 8.820164490935435e-06, "loss": 0.32, "step": 6164 }, { "epoch": 0.49319013619727603, "grad_norm": 0.2917153683615355, "learning_rate": 8.819746544606573e-06, "loss": 0.2934, "step": 6165 }, { "epoch": 0.49327013459730806, "grad_norm": 0.5481727615817722, "learning_rate": 8.819328534169922e-06, "loss": 0.2726, "step": 6166 }, { "epoch": 0.49335013299734004, "grad_norm": 0.3126742227168862, "learning_rate": 8.818910459632495e-06, "loss": 0.255, "step": 6167 }, { "epoch": 0.4934301313973721, "grad_norm": 0.39325891364849425, "learning_rate": 8.818492321001311e-06, "loss": 0.2564, "step": 6168 }, { "epoch": 0.49351012979740405, "grad_norm": 0.29462075143685634, "learning_rate": 8.818074118283389e-06, "loss": 0.2293, "step": 6169 }, { "epoch": 0.49359012819743603, "grad_norm": 0.28612136843199354, "learning_rate": 8.817655851485744e-06, "loss": 0.2674, "step": 6170 }, { "epoch": 0.49367012659746806, "grad_norm": 0.23975356612050455, "learning_rate": 8.817237520615398e-06, "loss": 0.3414, "step": 6171 }, { "epoch": 0.49375012499750004, "grad_norm": 0.18624517951886338, "learning_rate": 8.81681912567937e-06, "loss": 0.3656, "step": 6172 }, { "epoch": 0.4938301233975321, "grad_norm": 0.27388646938430544, "learning_rate": 8.816400666684685e-06, "loss": 0.2659, "step": 6173 }, { "epoch": 0.49391012179756405, "grad_norm": 0.26404587869789004, "learning_rate": 8.815982143638366e-06, "loss": 0.2886, "step": 6174 }, { "epoch": 0.493990120197596, "grad_norm": 0.28463380500914875, "learning_rate": 8.815563556547434e-06, "loss": 0.2931, "step": 6175 }, { "epoch": 0.49407011859762806, "grad_norm": 0.2746904308973596, "learning_rate": 8.815144905418918e-06, "loss": 0.2608, "step": 6176 }, { "epoch": 0.49415011699766004, "grad_norm": 0.2947171386997634, "learning_rate": 8.81472619025984e-06, "loss": 0.3141, "step": 6177 }, { "epoch": 0.49423011539769207, "grad_norm": 0.3081920645616456, "learning_rate": 8.814307411077233e-06, "loss": 0.2512, "step": 6178 }, { "epoch": 0.49431011379772405, "grad_norm": 0.2531982549809297, "learning_rate": 8.81388856787812e-06, "loss": 0.279, "step": 6179 }, { "epoch": 0.494390112197756, "grad_norm": 0.3617405574036201, "learning_rate": 8.813469660669532e-06, "loss": 0.2775, "step": 6180 }, { "epoch": 0.49447011059778806, "grad_norm": 0.22876192521100797, "learning_rate": 8.813050689458502e-06, "loss": 0.3301, "step": 6181 }, { "epoch": 0.49455010899782004, "grad_norm": 0.3380061021276065, "learning_rate": 8.812631654252061e-06, "loss": 0.2382, "step": 6182 }, { "epoch": 0.49463010739785207, "grad_norm": 0.28000678359119424, "learning_rate": 8.81221255505724e-06, "loss": 0.247, "step": 6183 }, { "epoch": 0.49471010579788405, "grad_norm": 0.30962445175473036, "learning_rate": 8.811793391881074e-06, "loss": 0.2628, "step": 6184 }, { "epoch": 0.494790104197916, "grad_norm": 0.2527738952014621, "learning_rate": 8.811374164730599e-06, "loss": 0.2857, "step": 6185 }, { "epoch": 0.49487010259794806, "grad_norm": 0.2812747743444406, "learning_rate": 8.81095487361285e-06, "loss": 0.2976, "step": 6186 }, { "epoch": 0.49495010099798004, "grad_norm": 0.370419002579203, "learning_rate": 8.810535518534862e-06, "loss": 0.2736, "step": 6187 }, { "epoch": 0.495030099398012, "grad_norm": 0.22383486255057167, "learning_rate": 8.810116099503675e-06, "loss": 0.345, "step": 6188 }, { "epoch": 0.49511009779804405, "grad_norm": 0.31843054127070447, "learning_rate": 8.80969661652633e-06, "loss": 0.2492, "step": 6189 }, { "epoch": 0.495190096198076, "grad_norm": 0.31398463982348573, "learning_rate": 8.809277069609863e-06, "loss": 0.3059, "step": 6190 }, { "epoch": 0.49527009459810806, "grad_norm": 0.33107502038416686, "learning_rate": 8.80885745876132e-06, "loss": 0.2305, "step": 6191 }, { "epoch": 0.49535009299814003, "grad_norm": 0.2965927361348403, "learning_rate": 8.80843778398774e-06, "loss": 0.3142, "step": 6192 }, { "epoch": 0.495430091398172, "grad_norm": 0.29777299696063586, "learning_rate": 8.80801804529617e-06, "loss": 0.2884, "step": 6193 }, { "epoch": 0.49551008979820405, "grad_norm": 0.3284321667257422, "learning_rate": 8.80759824269365e-06, "loss": 0.2507, "step": 6194 }, { "epoch": 0.495590088198236, "grad_norm": 0.30694150498535294, "learning_rate": 8.80717837618723e-06, "loss": 0.246, "step": 6195 }, { "epoch": 0.49567008659826806, "grad_norm": 0.30662303846561084, "learning_rate": 8.806758445783954e-06, "loss": 0.2573, "step": 6196 }, { "epoch": 0.49575008499830003, "grad_norm": 0.22145930868483196, "learning_rate": 8.80633845149087e-06, "loss": 0.3458, "step": 6197 }, { "epoch": 0.495830083398332, "grad_norm": 0.2844871237289375, "learning_rate": 8.805918393315028e-06, "loss": 0.2827, "step": 6198 }, { "epoch": 0.49591008179836404, "grad_norm": 0.33653259836990795, "learning_rate": 8.805498271263477e-06, "loss": 0.2598, "step": 6199 }, { "epoch": 0.495990080198396, "grad_norm": 0.2334144710958175, "learning_rate": 8.80507808534327e-06, "loss": 0.3023, "step": 6200 }, { "epoch": 0.49607007859842805, "grad_norm": 0.28469530950879157, "learning_rate": 8.804657835561456e-06, "loss": 0.2834, "step": 6201 }, { "epoch": 0.49615007699846003, "grad_norm": 0.24360761785309337, "learning_rate": 8.804237521925089e-06, "loss": 0.3314, "step": 6202 }, { "epoch": 0.496230075398492, "grad_norm": 0.33624240112833514, "learning_rate": 8.803817144441227e-06, "loss": 0.2513, "step": 6203 }, { "epoch": 0.49631007379852404, "grad_norm": 0.35956965641806815, "learning_rate": 8.80339670311692e-06, "loss": 0.2936, "step": 6204 }, { "epoch": 0.496390072198556, "grad_norm": 0.32813913917164916, "learning_rate": 8.802976197959228e-06, "loss": 0.2802, "step": 6205 }, { "epoch": 0.49647007059858805, "grad_norm": 0.3001599528905617, "learning_rate": 8.802555628975204e-06, "loss": 0.261, "step": 6206 }, { "epoch": 0.49655006899862003, "grad_norm": 0.3103453053778995, "learning_rate": 8.802134996171913e-06, "loss": 0.2649, "step": 6207 }, { "epoch": 0.496630067398652, "grad_norm": 0.29820463246610635, "learning_rate": 8.80171429955641e-06, "loss": 0.2421, "step": 6208 }, { "epoch": 0.49671006579868404, "grad_norm": 0.3017429531885275, "learning_rate": 8.801293539135755e-06, "loss": 0.2738, "step": 6209 }, { "epoch": 0.496790064198716, "grad_norm": 0.24891872653789646, "learning_rate": 8.800872714917013e-06, "loss": 0.2964, "step": 6210 }, { "epoch": 0.49687006259874805, "grad_norm": 0.30615152686626124, "learning_rate": 8.800451826907245e-06, "loss": 0.2487, "step": 6211 }, { "epoch": 0.49695006099878003, "grad_norm": 0.28482174296561136, "learning_rate": 8.800030875113517e-06, "loss": 0.2805, "step": 6212 }, { "epoch": 0.497030059398812, "grad_norm": 0.304139376832062, "learning_rate": 8.79960985954289e-06, "loss": 0.258, "step": 6213 }, { "epoch": 0.49711005779884404, "grad_norm": 0.2347774628157303, "learning_rate": 8.799188780202435e-06, "loss": 0.3135, "step": 6214 }, { "epoch": 0.497190056198876, "grad_norm": 0.27149790363672666, "learning_rate": 8.798767637099212e-06, "loss": 0.2923, "step": 6215 }, { "epoch": 0.497270054598908, "grad_norm": 0.27737415917511216, "learning_rate": 8.798346430240297e-06, "loss": 0.3247, "step": 6216 }, { "epoch": 0.49735005299894003, "grad_norm": 0.2981947915674584, "learning_rate": 8.797925159632753e-06, "loss": 0.2488, "step": 6217 }, { "epoch": 0.497430051398972, "grad_norm": 0.3518703439828616, "learning_rate": 8.797503825283655e-06, "loss": 0.2611, "step": 6218 }, { "epoch": 0.49751004979900404, "grad_norm": 0.41758276842174935, "learning_rate": 8.79708242720007e-06, "loss": 0.2536, "step": 6219 }, { "epoch": 0.497590048199036, "grad_norm": 0.2766698561242508, "learning_rate": 8.796660965389075e-06, "loss": 0.2673, "step": 6220 }, { "epoch": 0.497670046599068, "grad_norm": 0.26236207042969006, "learning_rate": 8.79623943985774e-06, "loss": 0.2844, "step": 6221 }, { "epoch": 0.4977500449991, "grad_norm": 0.2422867368158345, "learning_rate": 8.795817850613142e-06, "loss": 0.3148, "step": 6222 }, { "epoch": 0.497830043399132, "grad_norm": 0.2919719376708548, "learning_rate": 8.795396197662355e-06, "loss": 0.2849, "step": 6223 }, { "epoch": 0.49791004179916404, "grad_norm": 0.34671038065839765, "learning_rate": 8.794974481012455e-06, "loss": 0.2272, "step": 6224 }, { "epoch": 0.497990040199196, "grad_norm": 0.2714354108365635, "learning_rate": 8.794552700670522e-06, "loss": 0.2755, "step": 6225 }, { "epoch": 0.498070038599228, "grad_norm": 0.3023993856322621, "learning_rate": 8.794130856643635e-06, "loss": 0.2634, "step": 6226 }, { "epoch": 0.49815003699926, "grad_norm": 0.3121652909443062, "learning_rate": 8.79370894893887e-06, "loss": 0.2732, "step": 6227 }, { "epoch": 0.498230035399292, "grad_norm": 0.3110639672821751, "learning_rate": 8.79328697756331e-06, "loss": 0.2489, "step": 6228 }, { "epoch": 0.49831003379932404, "grad_norm": 0.3300889620086029, "learning_rate": 8.792864942524042e-06, "loss": 0.2534, "step": 6229 }, { "epoch": 0.498390032199356, "grad_norm": 0.2511934582809575, "learning_rate": 8.792442843828141e-06, "loss": 0.3058, "step": 6230 }, { "epoch": 0.498470030599388, "grad_norm": 0.24390011678622378, "learning_rate": 8.792020681482698e-06, "loss": 0.2723, "step": 6231 }, { "epoch": 0.49855002899942, "grad_norm": 0.2936919726913615, "learning_rate": 8.791598455494793e-06, "loss": 0.2684, "step": 6232 }, { "epoch": 0.498630027399452, "grad_norm": 0.2876063518337499, "learning_rate": 8.791176165871515e-06, "loss": 0.2516, "step": 6233 }, { "epoch": 0.49871002579948404, "grad_norm": 0.2996283959206275, "learning_rate": 8.790753812619952e-06, "loss": 0.2887, "step": 6234 }, { "epoch": 0.498790024199516, "grad_norm": 0.22899200505736134, "learning_rate": 8.79033139574719e-06, "loss": 0.3093, "step": 6235 }, { "epoch": 0.498870022599548, "grad_norm": 0.25789596587768754, "learning_rate": 8.789908915260322e-06, "loss": 0.3186, "step": 6236 }, { "epoch": 0.49895002099958, "grad_norm": 0.2937590842027953, "learning_rate": 8.789486371166435e-06, "loss": 0.2696, "step": 6237 }, { "epoch": 0.499030019399612, "grad_norm": 0.32960698858358717, "learning_rate": 8.789063763472624e-06, "loss": 0.2764, "step": 6238 }, { "epoch": 0.49911001779964403, "grad_norm": 0.6339535374463624, "learning_rate": 8.788641092185978e-06, "loss": 0.2355, "step": 6239 }, { "epoch": 0.499190016199676, "grad_norm": 0.3113731057890879, "learning_rate": 8.788218357313594e-06, "loss": 0.2546, "step": 6240 }, { "epoch": 0.499270014599708, "grad_norm": 0.21442528912868053, "learning_rate": 8.787795558862566e-06, "loss": 0.3426, "step": 6241 }, { "epoch": 0.49935001299974, "grad_norm": 0.2642253887847804, "learning_rate": 8.787372696839989e-06, "loss": 0.283, "step": 6242 }, { "epoch": 0.499430011399772, "grad_norm": 0.20123927417956905, "learning_rate": 8.786949771252961e-06, "loss": 0.3342, "step": 6243 }, { "epoch": 0.499510009799804, "grad_norm": 0.31353754515587184, "learning_rate": 8.786526782108579e-06, "loss": 0.2464, "step": 6244 }, { "epoch": 0.499590008199836, "grad_norm": 0.2751998754609181, "learning_rate": 8.786103729413944e-06, "loss": 0.2832, "step": 6245 }, { "epoch": 0.499670006599868, "grad_norm": 0.3033487943481697, "learning_rate": 8.785680613176153e-06, "loss": 0.2926, "step": 6246 }, { "epoch": 0.4997500049999, "grad_norm": 0.2975187575131753, "learning_rate": 8.785257433402311e-06, "loss": 0.2524, "step": 6247 }, { "epoch": 0.499830003399932, "grad_norm": 0.3018289434187931, "learning_rate": 8.784834190099519e-06, "loss": 0.2439, "step": 6248 }, { "epoch": 0.499910001799964, "grad_norm": 0.31375128156228366, "learning_rate": 8.784410883274879e-06, "loss": 0.2632, "step": 6249 }, { "epoch": 0.499990000199996, "grad_norm": 0.2611977037643278, "learning_rate": 8.783987512935498e-06, "loss": 0.2814, "step": 6250 }, { "epoch": 0.500069998600028, "grad_norm": 0.304541362898982, "learning_rate": 8.783564079088478e-06, "loss": 0.2527, "step": 6251 }, { "epoch": 0.50014999700006, "grad_norm": 0.3302825008821275, "learning_rate": 8.783140581740927e-06, "loss": 0.2937, "step": 6252 }, { "epoch": 0.500229995400092, "grad_norm": 0.27699535533304726, "learning_rate": 8.782717020899957e-06, "loss": 0.2735, "step": 6253 }, { "epoch": 0.500309993800124, "grad_norm": 0.2689907287314529, "learning_rate": 8.782293396572669e-06, "loss": 0.293, "step": 6254 }, { "epoch": 0.500389992200156, "grad_norm": 0.3012722891711402, "learning_rate": 8.781869708766179e-06, "loss": 0.2908, "step": 6255 }, { "epoch": 0.500469990600188, "grad_norm": 0.2967077943637965, "learning_rate": 8.781445957487595e-06, "loss": 0.2406, "step": 6256 }, { "epoch": 0.50054998900022, "grad_norm": 0.2980459589836454, "learning_rate": 8.781022142744028e-06, "loss": 0.2368, "step": 6257 }, { "epoch": 0.500629987400252, "grad_norm": 0.33408051679032774, "learning_rate": 8.780598264542597e-06, "loss": 0.2591, "step": 6258 }, { "epoch": 0.500709985800284, "grad_norm": 0.27978288410319896, "learning_rate": 8.78017432289041e-06, "loss": 0.2547, "step": 6259 }, { "epoch": 0.500789984200316, "grad_norm": 0.3137039696457966, "learning_rate": 8.779750317794582e-06, "loss": 0.2499, "step": 6260 }, { "epoch": 0.500869982600348, "grad_norm": 0.284273439729684, "learning_rate": 8.779326249262232e-06, "loss": 0.3136, "step": 6261 }, { "epoch": 0.50094998100038, "grad_norm": 0.33446665193229613, "learning_rate": 8.778902117300475e-06, "loss": 0.2715, "step": 6262 }, { "epoch": 0.5010299794004119, "grad_norm": 0.2570259779119952, "learning_rate": 8.778477921916431e-06, "loss": 0.276, "step": 6263 }, { "epoch": 0.501109977800444, "grad_norm": 0.3860663646413159, "learning_rate": 8.77805366311722e-06, "loss": 0.2771, "step": 6264 }, { "epoch": 0.501189976200476, "grad_norm": 0.2734503582315964, "learning_rate": 8.777629340909963e-06, "loss": 0.2966, "step": 6265 }, { "epoch": 0.501269974600508, "grad_norm": 0.2812462874939072, "learning_rate": 8.777204955301777e-06, "loss": 0.2844, "step": 6266 }, { "epoch": 0.50134997300054, "grad_norm": 0.2653240088021343, "learning_rate": 8.77678050629979e-06, "loss": 0.285, "step": 6267 }, { "epoch": 0.501429971400572, "grad_norm": 0.23312541407220877, "learning_rate": 8.77635599391112e-06, "loss": 0.3174, "step": 6268 }, { "epoch": 0.501509969800604, "grad_norm": 0.21975551238120355, "learning_rate": 8.775931418142895e-06, "loss": 0.3032, "step": 6269 }, { "epoch": 0.501589968200636, "grad_norm": 0.31042804252077155, "learning_rate": 8.775506779002243e-06, "loss": 0.2507, "step": 6270 }, { "epoch": 0.501669966600668, "grad_norm": 0.30009975331281935, "learning_rate": 8.775082076496287e-06, "loss": 0.2812, "step": 6271 }, { "epoch": 0.5017499650007, "grad_norm": 0.29293481140760097, "learning_rate": 8.774657310632157e-06, "loss": 0.2389, "step": 6272 }, { "epoch": 0.5018299634007319, "grad_norm": 0.32609770531104576, "learning_rate": 8.77423248141698e-06, "loss": 0.2545, "step": 6273 }, { "epoch": 0.501909961800764, "grad_norm": 0.2802559829283081, "learning_rate": 8.773807588857887e-06, "loss": 0.2893, "step": 6274 }, { "epoch": 0.501989960200796, "grad_norm": 0.2720664207118118, "learning_rate": 8.773382632962011e-06, "loss": 0.2862, "step": 6275 }, { "epoch": 0.502069958600828, "grad_norm": 0.2883361909694832, "learning_rate": 8.772957613736483e-06, "loss": 0.2898, "step": 6276 }, { "epoch": 0.50214995700086, "grad_norm": 0.26123621604761577, "learning_rate": 8.772532531188434e-06, "loss": 0.2749, "step": 6277 }, { "epoch": 0.502229955400892, "grad_norm": 0.2279925274090334, "learning_rate": 8.772107385325e-06, "loss": 0.3048, "step": 6278 }, { "epoch": 0.502309953800924, "grad_norm": 0.24105549729433387, "learning_rate": 8.771682176153317e-06, "loss": 0.2968, "step": 6279 }, { "epoch": 0.502389952200956, "grad_norm": 0.2788393350324983, "learning_rate": 8.77125690368052e-06, "loss": 0.3018, "step": 6280 }, { "epoch": 0.502469950600988, "grad_norm": 0.2298527450957488, "learning_rate": 8.770831567913747e-06, "loss": 0.3308, "step": 6281 }, { "epoch": 0.50254994900102, "grad_norm": 0.31295840330347935, "learning_rate": 8.770406168860138e-06, "loss": 0.2763, "step": 6282 }, { "epoch": 0.5026299474010519, "grad_norm": 0.23316640379813178, "learning_rate": 8.76998070652683e-06, "loss": 0.3102, "step": 6283 }, { "epoch": 0.502709945801084, "grad_norm": 0.3189578063543914, "learning_rate": 8.769555180920966e-06, "loss": 0.2611, "step": 6284 }, { "epoch": 0.502789944201116, "grad_norm": 0.24612766296188715, "learning_rate": 8.769129592049685e-06, "loss": 0.2964, "step": 6285 }, { "epoch": 0.5028699426011479, "grad_norm": 0.42071400468838627, "learning_rate": 8.768703939920133e-06, "loss": 0.2561, "step": 6286 }, { "epoch": 0.50294994100118, "grad_norm": 0.2713208010665168, "learning_rate": 8.768278224539451e-06, "loss": 0.2741, "step": 6287 }, { "epoch": 0.503029939401212, "grad_norm": 0.5094972792558844, "learning_rate": 8.767852445914784e-06, "loss": 0.2686, "step": 6288 }, { "epoch": 0.503109937801244, "grad_norm": 0.406249848697086, "learning_rate": 8.767426604053282e-06, "loss": 0.2609, "step": 6289 }, { "epoch": 0.503189936201276, "grad_norm": 0.23187734044257552, "learning_rate": 8.767000698962087e-06, "loss": 0.2976, "step": 6290 }, { "epoch": 0.503269934601308, "grad_norm": 0.351227817640469, "learning_rate": 8.76657473064835e-06, "loss": 0.251, "step": 6291 }, { "epoch": 0.50334993300134, "grad_norm": 0.3049630067062447, "learning_rate": 8.76614869911922e-06, "loss": 0.2538, "step": 6292 }, { "epoch": 0.5034299314013719, "grad_norm": 0.24265407229996766, "learning_rate": 8.765722604381843e-06, "loss": 0.3577, "step": 6293 }, { "epoch": 0.503509929801404, "grad_norm": 0.21995827556366285, "learning_rate": 8.765296446443377e-06, "loss": 0.3164, "step": 6294 }, { "epoch": 0.503589928201436, "grad_norm": 0.25699180894119267, "learning_rate": 8.76487022531097e-06, "loss": 0.3128, "step": 6295 }, { "epoch": 0.5036699266014679, "grad_norm": 0.3344515679998034, "learning_rate": 8.764443940991776e-06, "loss": 0.2475, "step": 6296 }, { "epoch": 0.5037499250015, "grad_norm": 0.32749041126611467, "learning_rate": 8.764017593492951e-06, "loss": 0.2675, "step": 6297 }, { "epoch": 0.503829923401532, "grad_norm": 0.33041714098430913, "learning_rate": 8.763591182821647e-06, "loss": 0.2598, "step": 6298 }, { "epoch": 0.503909921801564, "grad_norm": 0.298064282351983, "learning_rate": 8.763164708985026e-06, "loss": 0.3051, "step": 6299 }, { "epoch": 0.5039899202015959, "grad_norm": 0.2570912606503043, "learning_rate": 8.762738171990242e-06, "loss": 0.275, "step": 6300 }, { "epoch": 0.504069918601628, "grad_norm": 0.26716381265564565, "learning_rate": 8.762311571844453e-06, "loss": 0.2871, "step": 6301 }, { "epoch": 0.50414991700166, "grad_norm": 0.2424059267181268, "learning_rate": 8.761884908554818e-06, "loss": 0.3185, "step": 6302 }, { "epoch": 0.5042299154016919, "grad_norm": 0.3425031202563655, "learning_rate": 8.761458182128503e-06, "loss": 0.2905, "step": 6303 }, { "epoch": 0.504309913801724, "grad_norm": 0.27336414093640604, "learning_rate": 8.761031392572665e-06, "loss": 0.2728, "step": 6304 }, { "epoch": 0.504389912201756, "grad_norm": 0.30014573247936854, "learning_rate": 8.76060453989447e-06, "loss": 0.2803, "step": 6305 }, { "epoch": 0.5044699106017879, "grad_norm": 0.282378096069049, "learning_rate": 8.760177624101079e-06, "loss": 0.3105, "step": 6306 }, { "epoch": 0.50454990900182, "grad_norm": 0.3163778280576005, "learning_rate": 8.75975064519966e-06, "loss": 0.2923, "step": 6307 }, { "epoch": 0.504629907401852, "grad_norm": 0.2892165726596172, "learning_rate": 8.759323603197377e-06, "loss": 0.2559, "step": 6308 }, { "epoch": 0.5047099058018839, "grad_norm": 0.28984176988978866, "learning_rate": 8.758896498101397e-06, "loss": 0.2785, "step": 6309 }, { "epoch": 0.5047899042019159, "grad_norm": 0.27680320592234975, "learning_rate": 8.75846932991889e-06, "loss": 0.2852, "step": 6310 }, { "epoch": 0.504869902601948, "grad_norm": 0.31099022653542613, "learning_rate": 8.758042098657022e-06, "loss": 0.2772, "step": 6311 }, { "epoch": 0.50494990100198, "grad_norm": 0.31234868951044725, "learning_rate": 8.757614804322968e-06, "loss": 0.2601, "step": 6312 }, { "epoch": 0.5050298994020119, "grad_norm": 0.25319215452677285, "learning_rate": 8.757187446923896e-06, "loss": 0.3305, "step": 6313 }, { "epoch": 0.505109897802044, "grad_norm": 0.37316326721559234, "learning_rate": 8.75676002646698e-06, "loss": 0.2472, "step": 6314 }, { "epoch": 0.505189896202076, "grad_norm": 0.30582011068902065, "learning_rate": 8.756332542959394e-06, "loss": 0.261, "step": 6315 }, { "epoch": 0.5052698946021079, "grad_norm": 0.2212791814643995, "learning_rate": 8.75590499640831e-06, "loss": 0.3172, "step": 6316 }, { "epoch": 0.50534989300214, "grad_norm": 0.19928828324980044, "learning_rate": 8.755477386820906e-06, "loss": 0.3476, "step": 6317 }, { "epoch": 0.505429891402172, "grad_norm": 0.29311239948504214, "learning_rate": 8.755049714204357e-06, "loss": 0.252, "step": 6318 }, { "epoch": 0.5055098898022039, "grad_norm": 0.260109042448725, "learning_rate": 8.75462197856584e-06, "loss": 0.2653, "step": 6319 }, { "epoch": 0.5055898882022359, "grad_norm": 0.37857891369433805, "learning_rate": 8.75419417991254e-06, "loss": 0.3007, "step": 6320 }, { "epoch": 0.505669886602268, "grad_norm": 0.39538881799127834, "learning_rate": 8.753766318251628e-06, "loss": 0.2507, "step": 6321 }, { "epoch": 0.5057498850023, "grad_norm": 0.2642534638613745, "learning_rate": 8.753338393590292e-06, "loss": 0.2949, "step": 6322 }, { "epoch": 0.5058298834023319, "grad_norm": 0.3286064258181822, "learning_rate": 8.752910405935708e-06, "loss": 0.2302, "step": 6323 }, { "epoch": 0.505909881802364, "grad_norm": 0.2624475579172414, "learning_rate": 8.752482355295065e-06, "loss": 0.2767, "step": 6324 }, { "epoch": 0.505989880202396, "grad_norm": 0.2718872355779903, "learning_rate": 8.752054241675543e-06, "loss": 0.2483, "step": 6325 }, { "epoch": 0.5060698786024279, "grad_norm": 0.26746631566265905, "learning_rate": 8.751626065084328e-06, "loss": 0.3011, "step": 6326 }, { "epoch": 0.50614987700246, "grad_norm": 0.26966741493678964, "learning_rate": 8.751197825528607e-06, "loss": 0.2728, "step": 6327 }, { "epoch": 0.506229875402492, "grad_norm": 0.2607201478904174, "learning_rate": 8.750769523015568e-06, "loss": 0.2611, "step": 6328 }, { "epoch": 0.5063098738025239, "grad_norm": 0.45693938198765477, "learning_rate": 8.750341157552396e-06, "loss": 0.2551, "step": 6329 }, { "epoch": 0.5063898722025559, "grad_norm": 0.233540134825675, "learning_rate": 8.749912729146283e-06, "loss": 0.3078, "step": 6330 }, { "epoch": 0.506469870602588, "grad_norm": 0.2672135626925953, "learning_rate": 8.74948423780442e-06, "loss": 0.2744, "step": 6331 }, { "epoch": 0.50654986900262, "grad_norm": 0.2869379882783276, "learning_rate": 8.749055683533995e-06, "loss": 0.2685, "step": 6332 }, { "epoch": 0.5066298674026519, "grad_norm": 0.3018553641769092, "learning_rate": 8.748627066342206e-06, "loss": 0.2963, "step": 6333 }, { "epoch": 0.506709865802684, "grad_norm": 0.2617675858803325, "learning_rate": 8.748198386236241e-06, "loss": 0.2967, "step": 6334 }, { "epoch": 0.506789864202716, "grad_norm": 0.3360853030310783, "learning_rate": 8.7477696432233e-06, "loss": 0.2933, "step": 6335 }, { "epoch": 0.5068698626027479, "grad_norm": 0.26300366264943653, "learning_rate": 8.747340837310574e-06, "loss": 0.2813, "step": 6336 }, { "epoch": 0.50694986100278, "grad_norm": 0.2778915232778241, "learning_rate": 8.746911968505262e-06, "loss": 0.2954, "step": 6337 }, { "epoch": 0.507029859402812, "grad_norm": 0.27309891370722833, "learning_rate": 8.746483036814561e-06, "loss": 0.3116, "step": 6338 }, { "epoch": 0.5071098578028439, "grad_norm": 0.35358892697470456, "learning_rate": 8.74605404224567e-06, "loss": 0.2679, "step": 6339 }, { "epoch": 0.5071898562028759, "grad_norm": 0.26546425661701656, "learning_rate": 8.74562498480579e-06, "loss": 0.318, "step": 6340 }, { "epoch": 0.507269854602908, "grad_norm": 0.2680335957799276, "learning_rate": 8.745195864502121e-06, "loss": 0.279, "step": 6341 }, { "epoch": 0.5073498530029399, "grad_norm": 0.30886579836433153, "learning_rate": 8.744766681341866e-06, "loss": 0.2508, "step": 6342 }, { "epoch": 0.5074298514029719, "grad_norm": 0.2830385418970947, "learning_rate": 8.744337435332226e-06, "loss": 0.278, "step": 6343 }, { "epoch": 0.507509849803004, "grad_norm": 0.30528688834790846, "learning_rate": 8.743908126480408e-06, "loss": 0.2475, "step": 6344 }, { "epoch": 0.507589848203036, "grad_norm": 0.27153869652124374, "learning_rate": 8.743478754793616e-06, "loss": 0.2947, "step": 6345 }, { "epoch": 0.5076698466030679, "grad_norm": 0.27529965487455693, "learning_rate": 8.743049320279053e-06, "loss": 0.2808, "step": 6346 }, { "epoch": 0.5077498450030999, "grad_norm": 0.4460240629763492, "learning_rate": 8.742619822943932e-06, "loss": 0.2693, "step": 6347 }, { "epoch": 0.507829843403132, "grad_norm": 0.2585966620013788, "learning_rate": 8.742190262795458e-06, "loss": 0.2744, "step": 6348 }, { "epoch": 0.5079098418031639, "grad_norm": 0.3109977970819965, "learning_rate": 8.74176063984084e-06, "loss": 0.2556, "step": 6349 }, { "epoch": 0.5079898402031959, "grad_norm": 0.3108898260252627, "learning_rate": 8.741330954087291e-06, "loss": 0.2574, "step": 6350 }, { "epoch": 0.508069838603228, "grad_norm": 0.30929210561246107, "learning_rate": 8.74090120554202e-06, "loss": 0.2566, "step": 6351 }, { "epoch": 0.5081498370032599, "grad_norm": 0.29013371787319897, "learning_rate": 8.740471394212242e-06, "loss": 0.2832, "step": 6352 }, { "epoch": 0.5082298354032919, "grad_norm": 0.2816329317577217, "learning_rate": 8.740041520105168e-06, "loss": 0.305, "step": 6353 }, { "epoch": 0.508309833803324, "grad_norm": 0.28996547189623834, "learning_rate": 8.739611583228014e-06, "loss": 0.256, "step": 6354 }, { "epoch": 0.508389832203356, "grad_norm": 0.2788915549803398, "learning_rate": 8.739181583587997e-06, "loss": 0.2918, "step": 6355 }, { "epoch": 0.5084698306033879, "grad_norm": 0.3199198931418646, "learning_rate": 8.73875152119233e-06, "loss": 0.2558, "step": 6356 }, { "epoch": 0.5085498290034199, "grad_norm": 0.4434178824740245, "learning_rate": 8.738321396048235e-06, "loss": 0.2594, "step": 6357 }, { "epoch": 0.508629827403452, "grad_norm": 0.26352315719387487, "learning_rate": 8.73789120816293e-06, "loss": 0.3191, "step": 6358 }, { "epoch": 0.5087098258034839, "grad_norm": 0.32987147710074316, "learning_rate": 8.737460957543633e-06, "loss": 0.2824, "step": 6359 }, { "epoch": 0.5087898242035159, "grad_norm": 0.31741415715751325, "learning_rate": 8.737030644197566e-06, "loss": 0.271, "step": 6360 }, { "epoch": 0.508869822603548, "grad_norm": 0.28540390718501785, "learning_rate": 8.736600268131953e-06, "loss": 0.2542, "step": 6361 }, { "epoch": 0.5089498210035799, "grad_norm": 0.30048979440893203, "learning_rate": 8.736169829354012e-06, "loss": 0.2555, "step": 6362 }, { "epoch": 0.5090298194036119, "grad_norm": 0.2957560502764145, "learning_rate": 8.735739327870974e-06, "loss": 0.2497, "step": 6363 }, { "epoch": 0.509109817803644, "grad_norm": 0.2756206478946818, "learning_rate": 8.73530876369006e-06, "loss": 0.2749, "step": 6364 }, { "epoch": 0.5091898162036759, "grad_norm": 0.3023930067587958, "learning_rate": 8.734878136818496e-06, "loss": 0.2797, "step": 6365 }, { "epoch": 0.5092698146037079, "grad_norm": 0.29283047878326296, "learning_rate": 8.73444744726351e-06, "loss": 0.2476, "step": 6366 }, { "epoch": 0.5093498130037399, "grad_norm": 0.25960329251610215, "learning_rate": 8.734016695032333e-06, "loss": 0.2734, "step": 6367 }, { "epoch": 0.509429811403772, "grad_norm": 0.26594726513257555, "learning_rate": 8.733585880132189e-06, "loss": 0.2742, "step": 6368 }, { "epoch": 0.5095098098038039, "grad_norm": 0.40830614133430737, "learning_rate": 8.733155002570315e-06, "loss": 0.2667, "step": 6369 }, { "epoch": 0.5095898082038359, "grad_norm": 0.27062026907067, "learning_rate": 8.732724062353937e-06, "loss": 0.2755, "step": 6370 }, { "epoch": 0.509669806603868, "grad_norm": 0.23148898842123292, "learning_rate": 8.73229305949029e-06, "loss": 0.3359, "step": 6371 }, { "epoch": 0.5097498050038999, "grad_norm": 0.26829143346797524, "learning_rate": 8.731861993986608e-06, "loss": 0.2733, "step": 6372 }, { "epoch": 0.5098298034039319, "grad_norm": 0.27321636666393245, "learning_rate": 8.731430865850124e-06, "loss": 0.2871, "step": 6373 }, { "epoch": 0.509909801803964, "grad_norm": 0.2992400669628326, "learning_rate": 8.730999675088075e-06, "loss": 0.2471, "step": 6374 }, { "epoch": 0.5099898002039959, "grad_norm": 0.3173399260580309, "learning_rate": 8.730568421707699e-06, "loss": 0.2584, "step": 6375 }, { "epoch": 0.5100697986040279, "grad_norm": 0.39942238959930576, "learning_rate": 8.730137105716231e-06, "loss": 0.2671, "step": 6376 }, { "epoch": 0.5101497970040599, "grad_norm": 0.33097521252812084, "learning_rate": 8.729705727120911e-06, "loss": 0.2788, "step": 6377 }, { "epoch": 0.510229795404092, "grad_norm": 0.27291127013991473, "learning_rate": 8.72927428592898e-06, "loss": 0.2749, "step": 6378 }, { "epoch": 0.5103097938041239, "grad_norm": 0.30109509811447827, "learning_rate": 8.728842782147679e-06, "loss": 0.2302, "step": 6379 }, { "epoch": 0.5103897922041559, "grad_norm": 0.2916838161999928, "learning_rate": 8.728411215784246e-06, "loss": 0.239, "step": 6380 }, { "epoch": 0.510469790604188, "grad_norm": 0.32067224862764493, "learning_rate": 8.727979586845931e-06, "loss": 0.2578, "step": 6381 }, { "epoch": 0.5105497890042199, "grad_norm": 0.3206381047068147, "learning_rate": 8.727547895339974e-06, "loss": 0.2626, "step": 6382 }, { "epoch": 0.5106297874042519, "grad_norm": 0.3082124764253943, "learning_rate": 8.727116141273619e-06, "loss": 0.2369, "step": 6383 }, { "epoch": 0.5107097858042839, "grad_norm": 0.2741492227582812, "learning_rate": 8.726684324654115e-06, "loss": 0.3057, "step": 6384 }, { "epoch": 0.5107897842043159, "grad_norm": 0.339283108525902, "learning_rate": 8.726252445488708e-06, "loss": 0.246, "step": 6385 }, { "epoch": 0.5108697826043479, "grad_norm": 0.8038592105658828, "learning_rate": 8.725820503784648e-06, "loss": 0.2774, "step": 6386 }, { "epoch": 0.5109497810043799, "grad_norm": 0.28954994439665704, "learning_rate": 8.725388499549182e-06, "loss": 0.2768, "step": 6387 }, { "epoch": 0.511029779404412, "grad_norm": 0.20660916849362523, "learning_rate": 8.72495643278956e-06, "loss": 0.3587, "step": 6388 }, { "epoch": 0.5111097778044439, "grad_norm": 0.23459056698151448, "learning_rate": 8.724524303513035e-06, "loss": 0.3121, "step": 6389 }, { "epoch": 0.5111897762044759, "grad_norm": 0.29070581253921846, "learning_rate": 8.724092111726861e-06, "loss": 0.284, "step": 6390 }, { "epoch": 0.511269774604508, "grad_norm": 0.32627902159843886, "learning_rate": 8.72365985743829e-06, "loss": 0.26, "step": 6391 }, { "epoch": 0.5113497730045399, "grad_norm": 0.3105413123160655, "learning_rate": 8.723227540654574e-06, "loss": 0.2855, "step": 6392 }, { "epoch": 0.5114297714045719, "grad_norm": 0.2606233427496336, "learning_rate": 8.722795161382974e-06, "loss": 0.2891, "step": 6393 }, { "epoch": 0.5115097698046039, "grad_norm": 0.3167862793327284, "learning_rate": 8.722362719630741e-06, "loss": 0.2495, "step": 6394 }, { "epoch": 0.5115897682046359, "grad_norm": 0.26133039747930814, "learning_rate": 8.72193021540514e-06, "loss": 0.2818, "step": 6395 }, { "epoch": 0.5116697666046679, "grad_norm": 0.24349370532312845, "learning_rate": 8.721497648713423e-06, "loss": 0.2863, "step": 6396 }, { "epoch": 0.5117497650046999, "grad_norm": 0.2927263678640821, "learning_rate": 8.721065019562854e-06, "loss": 0.263, "step": 6397 }, { "epoch": 0.5118297634047319, "grad_norm": 0.3175143480093446, "learning_rate": 8.72063232796069e-06, "loss": 0.2461, "step": 6398 }, { "epoch": 0.5119097618047639, "grad_norm": 0.326270044178002, "learning_rate": 8.720199573914196e-06, "loss": 0.2845, "step": 6399 }, { "epoch": 0.5119897602047959, "grad_norm": 0.29604109702657116, "learning_rate": 8.719766757430637e-06, "loss": 0.2636, "step": 6400 } ], "logging_steps": 1.0, "max_steps": 25000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 400, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.1977941640701542e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }