{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9988974641675856, "eval_steps": 100.0, "global_step": 1020, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 19.301212901214466, "kl": 0.0, "learning_rate": 5.000000000000001e-07, "loss": 0.6049, "step": 1, "step_loss": 0.6052899360656738 }, { "epoch": 0.01, "grad_norm": 6.455205328029959, "kl": 0.3062567710876465, "learning_rate": 2.438044511330269e-06, "loss": 0.4588, "step": 2, "step_loss": 0.46981990337371826 }, { "epoch": 0.01, "grad_norm": 4.434829230563478, "kl": 0.23490308225154877, "learning_rate": 3.5717278751869343e-06, "loss": 0.4361, "step": 3, "step_loss": 0.4431145191192627 }, { "epoch": 0.01, "grad_norm": 3.1316724496056834, "kl": 0.21129530668258667, "learning_rate": 4.376089022660538e-06, "loss": 0.4118, "step": 4, "step_loss": 0.371662974357605 }, { "epoch": 0.01, "grad_norm": 2.988460560324433, "kl": 0.22684630751609802, "learning_rate": 5e-06, "loss": 0.3726, "step": 5, "step_loss": 0.36066734790802 }, { "epoch": 0.02, "grad_norm": 2.378051002744742, "kl": 0.3077385723590851, "learning_rate": 4.9999961353271305e-06, "loss": 0.3356, "step": 6, "step_loss": 0.36983931064605713 }, { "epoch": 0.02, "grad_norm": 2.1071117126920313, "kl": 0.28660184144973755, "learning_rate": 4.9999845413217956e-06, "loss": 0.3443, "step": 7, "step_loss": 0.33001354336738586 }, { "epoch": 0.02, "grad_norm": 2.126342237334389, "kl": 0.2792012393474579, "learning_rate": 4.999965218023826e-06, "loss": 0.3704, "step": 8, "step_loss": 0.3890674114227295 }, { "epoch": 0.03, "grad_norm": 1.805551016834456, "kl": 0.25425001978874207, "learning_rate": 4.999938165499602e-06, "loss": 0.3601, "step": 9, "step_loss": 0.3878689408302307 }, { "epoch": 0.03, "grad_norm": 2.115675523568805, "kl": 0.286655992269516, "learning_rate": 4.999903383842054e-06, "loss": 0.3476, "step": 10, "step_loss": 0.3587522804737091 }, { "epoch": 0.03, "grad_norm": 1.944073523799986, "kl": 0.2786046862602234, "learning_rate": 4.9998608731706695e-06, "loss": 0.361, "step": 11, "step_loss": 0.37417733669281006 }, { "epoch": 0.04, "grad_norm": 1.900652962403358, "kl": 0.3140088617801666, "learning_rate": 4.999810633631482e-06, "loss": 0.333, "step": 12, "step_loss": 0.3321115970611572 }, { "epoch": 0.04, "grad_norm": 1.7772964219719185, "kl": 0.27589982748031616, "learning_rate": 4.999752665397077e-06, "loss": 0.332, "step": 13, "step_loss": 0.2766149938106537 }, { "epoch": 0.04, "grad_norm": 1.784571061327797, "kl": 0.30012714862823486, "learning_rate": 4.999686968666592e-06, "loss": 0.3256, "step": 14, "step_loss": 0.2897532284259796 }, { "epoch": 0.04, "grad_norm": 1.7357127297345116, "kl": 0.27369552850723267, "learning_rate": 4.999613543665713e-06, "loss": 0.3343, "step": 15, "step_loss": 0.3049730062484741 }, { "epoch": 0.05, "grad_norm": 1.9257598473402229, "kl": 0.36154788732528687, "learning_rate": 4.999532390646673e-06, "loss": 0.3378, "step": 16, "step_loss": 0.3547108769416809 }, { "epoch": 0.05, "grad_norm": 1.4906210741355268, "kl": 0.2691032588481903, "learning_rate": 4.999443509888254e-06, "loss": 0.3175, "step": 17, "step_loss": 0.29635873436927795 }, { "epoch": 0.05, "grad_norm": 1.6048699907577806, "kl": 0.34884747862815857, "learning_rate": 4.999346901695787e-06, "loss": 0.3237, "step": 18, "step_loss": 0.3572104871273041 }, { "epoch": 0.06, "grad_norm": 1.5670463795130773, "kl": 0.2717525362968445, "learning_rate": 4.999242566401145e-06, "loss": 0.3306, "step": 19, "step_loss": 0.31295859813690186 }, { "epoch": 0.06, "grad_norm": 1.682513968323289, "kl": 0.2477482557296753, "learning_rate": 4.999130504362748e-06, "loss": 0.3383, "step": 20, "step_loss": 0.3229523301124573 }, { "epoch": 0.06, "grad_norm": 1.6243623252629549, "kl": 0.348207950592041, "learning_rate": 4.9990107159655565e-06, "loss": 0.3172, "step": 21, "step_loss": 0.4179743528366089 }, { "epoch": 0.06, "grad_norm": 1.6607049641076566, "kl": 0.3793472945690155, "learning_rate": 4.998883201621079e-06, "loss": 0.3397, "step": 22, "step_loss": 0.39602553844451904 }, { "epoch": 0.07, "grad_norm": 1.5170388362033584, "kl": 0.3244696259498596, "learning_rate": 4.998747961767359e-06, "loss": 0.3197, "step": 23, "step_loss": 0.3359769582748413 }, { "epoch": 0.07, "grad_norm": 1.6128730811774565, "kl": 0.28219160437583923, "learning_rate": 4.998604996868982e-06, "loss": 0.3442, "step": 24, "step_loss": 0.3883013129234314 }, { "epoch": 0.07, "grad_norm": 1.4539005711940118, "kl": 0.23452766239643097, "learning_rate": 4.998454307417071e-06, "loss": 0.3093, "step": 25, "step_loss": 0.3024548590183258 }, { "epoch": 0.08, "grad_norm": 1.5624222068494429, "kl": 0.3776319622993469, "learning_rate": 4.998295893929281e-06, "loss": 0.3279, "step": 26, "step_loss": 0.3177269399166107 }, { "epoch": 0.08, "grad_norm": 1.5954783642036205, "kl": 0.27478423714637756, "learning_rate": 4.998129756949807e-06, "loss": 0.3057, "step": 27, "step_loss": 0.2898721396923065 }, { "epoch": 0.08, "grad_norm": 1.541698773367659, "kl": 0.31704697012901306, "learning_rate": 4.997955897049373e-06, "loss": 0.3468, "step": 28, "step_loss": 0.3161465525627136 }, { "epoch": 0.09, "grad_norm": 1.464173735929187, "kl": 0.2408154010772705, "learning_rate": 4.997774314825233e-06, "loss": 0.3114, "step": 29, "step_loss": 0.2493884265422821 }, { "epoch": 0.09, "grad_norm": 1.5382515760575184, "kl": 0.2777164876461029, "learning_rate": 4.997585010901172e-06, "loss": 0.3129, "step": 30, "step_loss": 0.2662698030471802 }, { "epoch": 0.09, "grad_norm": 1.456667059415303, "kl": 0.2807072401046753, "learning_rate": 4.9973879859274966e-06, "loss": 0.3041, "step": 31, "step_loss": 0.2914745509624481 }, { "epoch": 0.09, "grad_norm": 1.5999842892560905, "kl": 0.2716163098812103, "learning_rate": 4.997183240581041e-06, "loss": 0.328, "step": 32, "step_loss": 0.2906668484210968 }, { "epoch": 0.1, "grad_norm": 1.4212712274071648, "kl": 0.29081669449806213, "learning_rate": 4.996970775565161e-06, "loss": 0.3008, "step": 33, "step_loss": 0.30645477771759033 }, { "epoch": 0.1, "grad_norm": 1.5615238192082477, "kl": 0.2825222611427307, "learning_rate": 4.996750591609727e-06, "loss": 0.3209, "step": 34, "step_loss": 0.3189748525619507 }, { "epoch": 0.1, "grad_norm": 1.3807731720214431, "kl": 0.3141458034515381, "learning_rate": 4.9965226894711316e-06, "loss": 0.3363, "step": 35, "step_loss": 0.3879620432853699 }, { "epoch": 0.11, "grad_norm": 1.4557787239042146, "kl": 0.34700241684913635, "learning_rate": 4.996287069932278e-06, "loss": 0.3064, "step": 36, "step_loss": 0.39095190167427063 }, { "epoch": 0.11, "grad_norm": 1.5774051610848923, "kl": 0.3121364116668701, "learning_rate": 4.996043733802583e-06, "loss": 0.3243, "step": 37, "step_loss": 0.30228227376937866 }, { "epoch": 0.11, "grad_norm": 1.4430150717016268, "kl": 0.2903325855731964, "learning_rate": 4.995792681917968e-06, "loss": 0.2911, "step": 38, "step_loss": 0.2797442078590393 }, { "epoch": 0.11, "grad_norm": 1.3795537860052318, "kl": 0.328932523727417, "learning_rate": 4.995533915140866e-06, "loss": 0.2866, "step": 39, "step_loss": 0.2982497215270996 }, { "epoch": 0.12, "grad_norm": 1.3084724825566474, "kl": 0.26037484407424927, "learning_rate": 4.995267434360207e-06, "loss": 0.3049, "step": 40, "step_loss": 0.2707076370716095 }, { "epoch": 0.12, "grad_norm": 1.546767703101615, "kl": 0.25849828124046326, "learning_rate": 4.9949932404914245e-06, "loss": 0.2885, "step": 41, "step_loss": 0.2949169874191284 }, { "epoch": 0.12, "grad_norm": 1.4065895730214004, "kl": 0.3979896306991577, "learning_rate": 4.9947113344764455e-06, "loss": 0.3045, "step": 42, "step_loss": 0.32115134596824646 }, { "epoch": 0.13, "grad_norm": 1.5259965481994058, "kl": 0.3463974595069885, "learning_rate": 4.994421717283693e-06, "loss": 0.304, "step": 43, "step_loss": 0.3217414617538452 }, { "epoch": 0.13, "grad_norm": 1.2978285994841212, "kl": 0.36674511432647705, "learning_rate": 4.994124389908078e-06, "loss": 0.2864, "step": 44, "step_loss": 0.31553012132644653 }, { "epoch": 0.13, "grad_norm": 1.5564349432288815, "kl": 0.35473960638046265, "learning_rate": 4.993819353370999e-06, "loss": 0.3335, "step": 45, "step_loss": 0.3283192217350006 }, { "epoch": 0.14, "grad_norm": 1.3575598996472378, "kl": 0.2651940584182739, "learning_rate": 4.993506608720339e-06, "loss": 0.301, "step": 46, "step_loss": 0.2609683871269226 }, { "epoch": 0.14, "grad_norm": 1.3229608497099903, "kl": 0.2759368121623993, "learning_rate": 4.9931861570304555e-06, "loss": 0.2925, "step": 47, "step_loss": 0.3200822174549103 }, { "epoch": 0.14, "grad_norm": 1.368724787057477, "kl": 0.3092648684978485, "learning_rate": 4.992857999402187e-06, "loss": 0.2718, "step": 48, "step_loss": 0.2981921136379242 }, { "epoch": 0.14, "grad_norm": 1.3728861165253508, "kl": 0.3472633361816406, "learning_rate": 4.992522136962841e-06, "loss": 0.3126, "step": 49, "step_loss": 0.31947529315948486 }, { "epoch": 0.15, "grad_norm": 1.4242257817482427, "kl": 0.28193366527557373, "learning_rate": 4.992178570866195e-06, "loss": 0.2964, "step": 50, "step_loss": 0.2877271771430969 }, { "epoch": 0.15, "grad_norm": 1.521655434234139, "kl": 0.2764663100242615, "learning_rate": 4.9918273022924885e-06, "loss": 0.3052, "step": 51, "step_loss": 0.2541694641113281 }, { "epoch": 0.15, "grad_norm": 1.5242796013210014, "kl": 0.2966528832912445, "learning_rate": 4.991468332448422e-06, "loss": 0.3304, "step": 52, "step_loss": 0.30177876353263855 }, { "epoch": 0.16, "grad_norm": 1.4177402595981858, "kl": 0.2607875168323517, "learning_rate": 4.991101662567153e-06, "loss": 0.3214, "step": 53, "step_loss": 0.27815118432044983 }, { "epoch": 0.16, "grad_norm": 1.4497114613800353, "kl": 0.36675119400024414, "learning_rate": 4.990727293908288e-06, "loss": 0.3141, "step": 54, "step_loss": 0.34744372963905334 }, { "epoch": 0.16, "grad_norm": 1.476617226772246, "kl": 0.2716200351715088, "learning_rate": 4.990345227757884e-06, "loss": 0.298, "step": 55, "step_loss": 0.25762778520584106 }, { "epoch": 0.16, "grad_norm": 1.3365233291298146, "kl": 0.29236650466918945, "learning_rate": 4.989955465428438e-06, "loss": 0.2763, "step": 56, "step_loss": 0.27397677302360535 }, { "epoch": 0.17, "grad_norm": 1.4599412693497766, "kl": 0.27795305848121643, "learning_rate": 4.989558008258888e-06, "loss": 0.3043, "step": 57, "step_loss": 0.24359291791915894 }, { "epoch": 0.17, "grad_norm": 1.5617623113651034, "kl": 0.35907837748527527, "learning_rate": 4.9891528576146046e-06, "loss": 0.325, "step": 58, "step_loss": 0.32902687788009644 }, { "epoch": 0.17, "grad_norm": 1.4059355225202563, "kl": 0.307645320892334, "learning_rate": 4.988740014887386e-06, "loss": 0.3028, "step": 59, "step_loss": 0.3135125935077667 }, { "epoch": 0.18, "grad_norm": 1.4501862291250671, "kl": 0.32680854201316833, "learning_rate": 4.9883194814954575e-06, "loss": 0.3073, "step": 60, "step_loss": 0.34192976355552673 }, { "epoch": 0.18, "grad_norm": 1.3852935344310822, "kl": 0.33248478174209595, "learning_rate": 4.987891258883463e-06, "loss": 0.3086, "step": 61, "step_loss": 0.3534170389175415 }, { "epoch": 0.18, "grad_norm": 1.4199406597673683, "kl": 0.28986871242523193, "learning_rate": 4.98745534852246e-06, "loss": 0.3038, "step": 62, "step_loss": 0.3005980849266052 }, { "epoch": 0.19, "grad_norm": 1.5113547919536734, "kl": 0.30484679341316223, "learning_rate": 4.987011751909917e-06, "loss": 0.3044, "step": 63, "step_loss": 0.23517432808876038 }, { "epoch": 0.19, "grad_norm": 1.3355819496710448, "kl": 0.3742017447948456, "learning_rate": 4.986560470569704e-06, "loss": 0.3017, "step": 64, "step_loss": 0.3898337483406067 }, { "epoch": 0.19, "grad_norm": 1.4221577520408863, "kl": 0.3570794463157654, "learning_rate": 4.986101506052093e-06, "loss": 0.3022, "step": 65, "step_loss": 0.30060654878616333 }, { "epoch": 0.19, "grad_norm": 1.6366629670681416, "kl": 0.32745978236198425, "learning_rate": 4.9856348599337485e-06, "loss": 0.3361, "step": 66, "step_loss": 0.31522005796432495 }, { "epoch": 0.2, "grad_norm": 1.3857470049246778, "kl": 0.284152626991272, "learning_rate": 4.985160533817723e-06, "loss": 0.2951, "step": 67, "step_loss": 0.27435213327407837 }, { "epoch": 0.2, "grad_norm": 1.3848855487240277, "kl": 0.3354739546775818, "learning_rate": 4.984678529333453e-06, "loss": 0.2879, "step": 68, "step_loss": 0.3034001886844635 }, { "epoch": 0.2, "grad_norm": 1.4341508578031887, "kl": 0.25649285316467285, "learning_rate": 4.984188848136751e-06, "loss": 0.3164, "step": 69, "step_loss": 0.30513542890548706 }, { "epoch": 0.21, "grad_norm": 1.3467125947401988, "kl": 0.2996070384979248, "learning_rate": 4.983691491909802e-06, "loss": 0.3019, "step": 70, "step_loss": 0.36908990144729614 }, { "epoch": 0.21, "grad_norm": 1.3474866013931235, "kl": 0.3084847927093506, "learning_rate": 4.9831864623611564e-06, "loss": 0.3008, "step": 71, "step_loss": 0.26990407705307007 }, { "epoch": 0.21, "grad_norm": 1.4862590377731744, "kl": 0.31708824634552, "learning_rate": 4.982673761225724e-06, "loss": 0.314, "step": 72, "step_loss": 0.2802667021751404 }, { "epoch": 0.21, "grad_norm": 1.3719248630090615, "kl": 0.2884005010128021, "learning_rate": 4.982153390264769e-06, "loss": 0.2902, "step": 73, "step_loss": 0.3215486705303192 }, { "epoch": 0.22, "grad_norm": 1.6257809266397576, "kl": 0.4290885329246521, "learning_rate": 4.981625351265903e-06, "loss": 0.3466, "step": 74, "step_loss": 0.38507044315338135 }, { "epoch": 0.22, "grad_norm": 1.4642429493015807, "kl": 0.3809298872947693, "learning_rate": 4.9810896460430805e-06, "loss": 0.3213, "step": 75, "step_loss": 0.43292292952537537 }, { "epoch": 0.22, "grad_norm": 1.2483432519354851, "kl": 0.3046913146972656, "learning_rate": 4.980546276436591e-06, "loss": 0.2913, "step": 76, "step_loss": 0.29639115929603577 }, { "epoch": 0.23, "grad_norm": 1.380629384097335, "kl": 0.3005834221839905, "learning_rate": 4.979995244313052e-06, "loss": 0.3037, "step": 77, "step_loss": 0.2509528696537018 }, { "epoch": 0.23, "grad_norm": 1.4850279125005055, "kl": 0.3294805884361267, "learning_rate": 4.979436551565407e-06, "loss": 0.3246, "step": 78, "step_loss": 0.2669539451599121 }, { "epoch": 0.23, "grad_norm": 1.4283473816232348, "kl": 0.29898595809936523, "learning_rate": 4.9788702001129105e-06, "loss": 0.3092, "step": 79, "step_loss": 0.35289207100868225 }, { "epoch": 0.24, "grad_norm": 1.4847956412758032, "kl": 0.244553804397583, "learning_rate": 4.97829619190113e-06, "loss": 0.3042, "step": 80, "step_loss": 0.23004528880119324 }, { "epoch": 0.24, "grad_norm": 1.2924538131518895, "kl": 0.2728404104709625, "learning_rate": 4.977714528901938e-06, "loss": 0.2793, "step": 81, "step_loss": 0.2652290165424347 }, { "epoch": 0.24, "grad_norm": 1.2587616697753488, "kl": 0.2822519540786743, "learning_rate": 4.9771252131135e-06, "loss": 0.2952, "step": 82, "step_loss": 0.2914755642414093 }, { "epoch": 0.24, "grad_norm": 1.3835708779976017, "kl": 0.3696300983428955, "learning_rate": 4.976528246560269e-06, "loss": 0.3029, "step": 83, "step_loss": 0.36016547679901123 }, { "epoch": 0.25, "grad_norm": 1.2530361733858713, "kl": 0.31230488419532776, "learning_rate": 4.975923631292988e-06, "loss": 0.2898, "step": 84, "step_loss": 0.33970096707344055 }, { "epoch": 0.25, "grad_norm": 1.4116112976895483, "kl": 0.312380850315094, "learning_rate": 4.975311369388667e-06, "loss": 0.2915, "step": 85, "step_loss": 0.3011205196380615 }, { "epoch": 0.25, "grad_norm": 1.3375751652497407, "kl": 0.3674446940422058, "learning_rate": 4.974691462950589e-06, "loss": 0.3105, "step": 86, "step_loss": 0.3819746971130371 }, { "epoch": 0.26, "grad_norm": 1.3695626037107396, "kl": 0.3539569675922394, "learning_rate": 4.974063914108297e-06, "loss": 0.3069, "step": 87, "step_loss": 0.36265525221824646 }, { "epoch": 0.26, "grad_norm": 1.4398198275304508, "kl": 0.30112671852111816, "learning_rate": 4.9734287250175865e-06, "loss": 0.3125, "step": 88, "step_loss": 0.2848939299583435 }, { "epoch": 0.26, "grad_norm": 1.3830922269493662, "kl": 0.33735549449920654, "learning_rate": 4.9727858978605e-06, "loss": 0.3147, "step": 89, "step_loss": 0.3306404650211334 }, { "epoch": 0.26, "grad_norm": 1.2728858115467476, "kl": 0.28279662132263184, "learning_rate": 4.97213543484532e-06, "loss": 0.3045, "step": 90, "step_loss": 0.27660509943962097 }, { "epoch": 0.27, "grad_norm": 1.5403404739139803, "kl": 0.27295035123825073, "learning_rate": 4.97147733820656e-06, "loss": 0.2961, "step": 91, "step_loss": 0.26971620321273804 }, { "epoch": 0.27, "grad_norm": 1.411561429621046, "kl": 0.37973371148109436, "learning_rate": 4.970811610204954e-06, "loss": 0.299, "step": 92, "step_loss": 0.326732337474823 }, { "epoch": 0.27, "grad_norm": 1.4805034744095673, "kl": 0.3346588611602783, "learning_rate": 4.970138253127456e-06, "loss": 0.3116, "step": 93, "step_loss": 0.32256820797920227 }, { "epoch": 0.28, "grad_norm": 1.2537072876512962, "kl": 0.31613579392433167, "learning_rate": 4.969457269287224e-06, "loss": 0.2909, "step": 94, "step_loss": 0.3002708852291107 }, { "epoch": 0.28, "grad_norm": 1.3783445970477886, "kl": 0.2733086943626404, "learning_rate": 4.968768661023619e-06, "loss": 0.3092, "step": 95, "step_loss": 0.3080819547176361 }, { "epoch": 0.28, "grad_norm": 1.2682216286353625, "kl": 0.2610551714897156, "learning_rate": 4.968072430702193e-06, "loss": 0.2839, "step": 96, "step_loss": 0.25847068428993225 }, { "epoch": 0.29, "grad_norm": 1.2077676939950335, "kl": 0.298378050327301, "learning_rate": 4.967368580714681e-06, "loss": 0.2803, "step": 97, "step_loss": 0.2736283242702484 }, { "epoch": 0.29, "grad_norm": 1.2148373717113006, "kl": 0.27147936820983887, "learning_rate": 4.966657113478992e-06, "loss": 0.2765, "step": 98, "step_loss": 0.30714210867881775 }, { "epoch": 0.29, "grad_norm": 1.4139521118408638, "kl": 0.32129478454589844, "learning_rate": 4.9659380314392075e-06, "loss": 0.3138, "step": 99, "step_loss": 0.334412157535553 }, { "epoch": 0.29, "grad_norm": 1.3294266549115017, "kl": 0.35117053985595703, "learning_rate": 4.965211337065563e-06, "loss": 0.3057, "step": 100, "step_loss": 0.30289411544799805 }, { "epoch": 0.3, "grad_norm": 1.424742291386975, "kl": 0.30909568071365356, "learning_rate": 4.964477032854448e-06, "loss": 0.3141, "step": 101, "step_loss": 0.3024054765701294 }, { "epoch": 0.3, "grad_norm": 1.4494693467464015, "kl": 0.3519325256347656, "learning_rate": 4.963735121328389e-06, "loss": 0.3074, "step": 102, "step_loss": 0.29212692379951477 }, { "epoch": 0.3, "grad_norm": 1.4137157099247653, "kl": 0.32887011766433716, "learning_rate": 4.9629856050360505e-06, "loss": 0.3085, "step": 103, "step_loss": 0.2845655679702759 }, { "epoch": 0.31, "grad_norm": 1.3535308616848474, "kl": 0.3814646005630493, "learning_rate": 4.962228486552219e-06, "loss": 0.3086, "step": 104, "step_loss": 0.4175484776496887 }, { "epoch": 0.31, "grad_norm": 1.264600598229456, "kl": 0.32557374238967896, "learning_rate": 4.961463768477797e-06, "loss": 0.3065, "step": 105, "step_loss": 0.3005172908306122 }, { "epoch": 0.31, "grad_norm": 1.3367387253981593, "kl": 0.33622100949287415, "learning_rate": 4.960691453439793e-06, "loss": 0.3221, "step": 106, "step_loss": 0.32829591631889343 }, { "epoch": 0.31, "grad_norm": 1.285114925381023, "kl": 0.415163516998291, "learning_rate": 4.9599115440913145e-06, "loss": 0.3033, "step": 107, "step_loss": 0.3966817557811737 }, { "epoch": 0.32, "grad_norm": 1.2697189074985138, "kl": 0.31419163942337036, "learning_rate": 4.9591240431115565e-06, "loss": 0.2899, "step": 108, "step_loss": 0.28133562207221985 }, { "epoch": 0.32, "grad_norm": 1.2984645133760384, "kl": 0.2992507219314575, "learning_rate": 4.9583289532057925e-06, "loss": 0.2957, "step": 109, "step_loss": 0.3047301173210144 }, { "epoch": 0.32, "grad_norm": 1.2959997601773783, "kl": 0.3358232080936432, "learning_rate": 4.9575262771053666e-06, "loss": 0.2977, "step": 110, "step_loss": 0.3054252564907074 }, { "epoch": 0.33, "grad_norm": 1.3678091978992384, "kl": 0.337202250957489, "learning_rate": 4.956716017567685e-06, "loss": 0.3189, "step": 111, "step_loss": 0.3637933135032654 }, { "epoch": 0.33, "grad_norm": 1.3266191585020017, "kl": 0.2727779150009155, "learning_rate": 4.955898177376204e-06, "loss": 0.2895, "step": 112, "step_loss": 0.27312493324279785 }, { "epoch": 0.33, "grad_norm": 1.3644833661607003, "kl": 0.27666690945625305, "learning_rate": 4.95507275934042e-06, "loss": 0.314, "step": 113, "step_loss": 0.251804381608963 }, { "epoch": 0.34, "grad_norm": 1.2828415661770218, "kl": 0.28860220313072205, "learning_rate": 4.954239766295862e-06, "loss": 0.2829, "step": 114, "step_loss": 0.2733534574508667 }, { "epoch": 0.34, "grad_norm": 1.225743500205446, "kl": 0.2747955322265625, "learning_rate": 4.953399201104084e-06, "loss": 0.2794, "step": 115, "step_loss": 0.23778927326202393 }, { "epoch": 0.34, "grad_norm": 1.229862820827967, "kl": 0.35592517256736755, "learning_rate": 4.952551066652648e-06, "loss": 0.2758, "step": 116, "step_loss": 0.3347897529602051 }, { "epoch": 0.34, "grad_norm": 1.4102429181039295, "kl": 0.3126868009567261, "learning_rate": 4.951695365855122e-06, "loss": 0.2897, "step": 117, "step_loss": 0.3053089380264282 }, { "epoch": 0.35, "grad_norm": 1.332632214515851, "kl": 0.341084748506546, "learning_rate": 4.950832101651063e-06, "loss": 0.2992, "step": 118, "step_loss": 0.3318370282649994 }, { "epoch": 0.35, "grad_norm": 1.2233149853258531, "kl": 0.29855814576148987, "learning_rate": 4.949961277006013e-06, "loss": 0.289, "step": 119, "step_loss": 0.27190065383911133 }, { "epoch": 0.35, "grad_norm": 1.2346324893530762, "kl": 0.28264421224594116, "learning_rate": 4.949082894911485e-06, "loss": 0.2996, "step": 120, "step_loss": 0.28687310218811035 }, { "epoch": 0.36, "grad_norm": 1.2793881099869688, "kl": 0.27299419045448303, "learning_rate": 4.948196958384955e-06, "loss": 0.3025, "step": 121, "step_loss": 0.23233090341091156 }, { "epoch": 0.36, "grad_norm": 1.2998410971433687, "kl": 0.29575178027153015, "learning_rate": 4.9473034704698485e-06, "loss": 0.2962, "step": 122, "step_loss": 0.2823527753353119 }, { "epoch": 0.36, "grad_norm": 1.2555805745200317, "kl": 0.28942999243736267, "learning_rate": 4.9464024342355335e-06, "loss": 0.2914, "step": 123, "step_loss": 0.2781384587287903 }, { "epoch": 0.36, "grad_norm": 1.2708803374034965, "kl": 0.3180427551269531, "learning_rate": 4.945493852777307e-06, "loss": 0.2944, "step": 124, "step_loss": 0.31637904047966003 }, { "epoch": 0.37, "grad_norm": 1.3848259809116499, "kl": 0.33197423815727234, "learning_rate": 4.944577729216388e-06, "loss": 0.3152, "step": 125, "step_loss": 0.3225075900554657 }, { "epoch": 0.37, "grad_norm": 1.4354123007048643, "kl": 0.2686159908771515, "learning_rate": 4.943654066699904e-06, "loss": 0.3118, "step": 126, "step_loss": 0.29845237731933594 }, { "epoch": 0.37, "grad_norm": 1.2899412223451328, "kl": 0.3330647349357605, "learning_rate": 4.942722868400879e-06, "loss": 0.3322, "step": 127, "step_loss": 0.3406273424625397 }, { "epoch": 0.38, "grad_norm": 1.2878537840081934, "kl": 0.31060031056404114, "learning_rate": 4.941784137518227e-06, "loss": 0.2967, "step": 128, "step_loss": 0.28862464427948 }, { "epoch": 0.38, "grad_norm": 1.31852289386673, "kl": 0.3059355914592743, "learning_rate": 4.940837877276735e-06, "loss": 0.2919, "step": 129, "step_loss": 0.30133622884750366 }, { "epoch": 0.38, "grad_norm": 1.243803418751941, "kl": 0.32812631130218506, "learning_rate": 4.93988409092706e-06, "loss": 0.2982, "step": 130, "step_loss": 0.2712858319282532 }, { "epoch": 0.39, "grad_norm": 1.206601958490433, "kl": 0.2982441484928131, "learning_rate": 4.93892278174571e-06, "loss": 0.2717, "step": 131, "step_loss": 0.2625717520713806 }, { "epoch": 0.39, "grad_norm": 1.2840226109970796, "kl": 0.3162402808666229, "learning_rate": 4.937953953035035e-06, "loss": 0.2973, "step": 132, "step_loss": 0.3028516471385956 }, { "epoch": 0.39, "grad_norm": 1.219921942425963, "kl": 0.3007054030895233, "learning_rate": 4.93697760812322e-06, "loss": 0.2999, "step": 133, "step_loss": 0.29070332646369934 }, { "epoch": 0.39, "grad_norm": 1.3505734810630552, "kl": 0.3097812533378601, "learning_rate": 4.935993750364267e-06, "loss": 0.3213, "step": 134, "step_loss": 0.32063156366348267 }, { "epoch": 0.4, "grad_norm": 1.2960801503834385, "kl": 0.3124713599681854, "learning_rate": 4.9350023831379885e-06, "loss": 0.292, "step": 135, "step_loss": 0.2716798782348633 }, { "epoch": 0.4, "grad_norm": 1.3346284118716634, "kl": 0.299164354801178, "learning_rate": 4.934003509849993e-06, "loss": 0.292, "step": 136, "step_loss": 0.29498571157455444 }, { "epoch": 0.4, "grad_norm": 1.338862702232099, "kl": 0.3523540198802948, "learning_rate": 4.932997133931676e-06, "loss": 0.2999, "step": 137, "step_loss": 0.2898944914340973 }, { "epoch": 0.41, "grad_norm": 1.299533142110482, "kl": 0.3428666591644287, "learning_rate": 4.931983258840204e-06, "loss": 0.2808, "step": 138, "step_loss": 0.3303877115249634 }, { "epoch": 0.41, "grad_norm": 1.2814240918861108, "kl": 0.26252108812332153, "learning_rate": 4.930961888058506e-06, "loss": 0.2909, "step": 139, "step_loss": 0.23940859735012054 }, { "epoch": 0.41, "grad_norm": 1.2407806935373367, "kl": 0.2580229341983795, "learning_rate": 4.929933025095262e-06, "loss": 0.2906, "step": 140, "step_loss": 0.2781831920146942 }, { "epoch": 0.41, "grad_norm": 1.315404278595456, "kl": 0.3604077398777008, "learning_rate": 4.928896673484888e-06, "loss": 0.3072, "step": 141, "step_loss": 0.3967340588569641 }, { "epoch": 0.42, "grad_norm": 1.286676458510305, "kl": 0.32844799757003784, "learning_rate": 4.9278528367875275e-06, "loss": 0.2836, "step": 142, "step_loss": 0.2578602731227875 }, { "epoch": 0.42, "grad_norm": 1.2526938379221886, "kl": 0.32336488366127014, "learning_rate": 4.926801518589035e-06, "loss": 0.2991, "step": 143, "step_loss": 0.2511914372444153 }, { "epoch": 0.42, "grad_norm": 1.196299407291449, "kl": 0.239657461643219, "learning_rate": 4.9257427225009665e-06, "loss": 0.2827, "step": 144, "step_loss": 0.23767231404781342 }, { "epoch": 0.43, "grad_norm": 1.253688615112022, "kl": 0.36846810579299927, "learning_rate": 4.924676452160568e-06, "loss": 0.2971, "step": 145, "step_loss": 0.32303857803344727 }, { "epoch": 0.43, "grad_norm": 1.2337718709400438, "kl": 0.292164146900177, "learning_rate": 4.92360271123076e-06, "loss": 0.2999, "step": 146, "step_loss": 0.3450307250022888 }, { "epoch": 0.43, "grad_norm": 1.4234900890899038, "kl": 0.3000570237636566, "learning_rate": 4.922521503400125e-06, "loss": 0.297, "step": 147, "step_loss": 0.2996768355369568 }, { "epoch": 0.44, "grad_norm": 1.2489090450298066, "kl": 0.2863319516181946, "learning_rate": 4.921432832382901e-06, "loss": 0.2896, "step": 148, "step_loss": 0.28483325242996216 }, { "epoch": 0.44, "grad_norm": 1.230750460392728, "kl": 0.30758440494537354, "learning_rate": 4.92033670191896e-06, "loss": 0.2884, "step": 149, "step_loss": 0.2748796045780182 }, { "epoch": 0.44, "grad_norm": 1.3306176467968267, "kl": 0.26451659202575684, "learning_rate": 4.9192331157738e-06, "loss": 0.29, "step": 150, "step_loss": 0.2788347601890564 }, { "epoch": 0.44, "grad_norm": 1.332331055940497, "kl": 0.2376236617565155, "learning_rate": 4.918122077738533e-06, "loss": 0.2961, "step": 151, "step_loss": 0.24186082184314728 }, { "epoch": 0.45, "grad_norm": 1.332600066594834, "kl": 0.3817494511604309, "learning_rate": 4.917003591629867e-06, "loss": 0.307, "step": 152, "step_loss": 0.33062270283699036 }, { "epoch": 0.45, "grad_norm": 1.2536110337354542, "kl": 0.2526598870754242, "learning_rate": 4.915877661290099e-06, "loss": 0.291, "step": 153, "step_loss": 0.24536053836345673 }, { "epoch": 0.45, "grad_norm": 1.2674122835595976, "kl": 0.37763291597366333, "learning_rate": 4.914744290587096e-06, "loss": 0.2976, "step": 154, "step_loss": 0.3559175431728363 }, { "epoch": 0.46, "grad_norm": 1.144770437772112, "kl": 0.28675389289855957, "learning_rate": 4.913603483414291e-06, "loss": 0.2843, "step": 155, "step_loss": 0.26313164830207825 }, { "epoch": 0.46, "grad_norm": 1.2867088389290005, "kl": 0.30408233404159546, "learning_rate": 4.912455243690654e-06, "loss": 0.2895, "step": 156, "step_loss": 0.305271178483963 }, { "epoch": 0.46, "grad_norm": 1.3337139732461034, "kl": 0.33538201451301575, "learning_rate": 4.911299575360694e-06, "loss": 0.2829, "step": 157, "step_loss": 0.2805282771587372 }, { "epoch": 0.46, "grad_norm": 1.2603558835697868, "kl": 0.2967783808708191, "learning_rate": 4.910136482394439e-06, "loss": 0.2987, "step": 158, "step_loss": 0.266804963350296 }, { "epoch": 0.47, "grad_norm": 1.1861792045426607, "kl": 0.32278546690940857, "learning_rate": 4.90896596878742e-06, "loss": 0.2737, "step": 159, "step_loss": 0.2891842722892761 }, { "epoch": 0.47, "grad_norm": 1.168008822771122, "kl": 0.3604351878166199, "learning_rate": 4.907788038560661e-06, "loss": 0.2703, "step": 160, "step_loss": 0.3211236596107483 }, { "epoch": 0.47, "grad_norm": 1.2273672100425606, "kl": 0.3063991069793701, "learning_rate": 4.906602695760665e-06, "loss": 0.2919, "step": 161, "step_loss": 0.25892752408981323 }, { "epoch": 0.48, "grad_norm": 1.4264755544249268, "kl": 0.2818457782268524, "learning_rate": 4.905409944459397e-06, "loss": 0.2934, "step": 162, "step_loss": 0.2786937654018402 }, { "epoch": 0.48, "grad_norm": 1.314284320892201, "kl": 0.32207655906677246, "learning_rate": 4.904209788754275e-06, "loss": 0.2931, "step": 163, "step_loss": 0.23789873719215393 }, { "epoch": 0.48, "grad_norm": 1.1804456232659608, "kl": 0.33073878288269043, "learning_rate": 4.903002232768151e-06, "loss": 0.2776, "step": 164, "step_loss": 0.2962447702884674 }, { "epoch": 0.49, "grad_norm": 1.2497392620900494, "kl": 0.2782425284385681, "learning_rate": 4.9017872806493e-06, "loss": 0.2918, "step": 165, "step_loss": 0.2765083611011505 }, { "epoch": 0.49, "grad_norm": 1.2704086792818736, "kl": 0.3183959722518921, "learning_rate": 4.900564936571404e-06, "loss": 0.2873, "step": 166, "step_loss": 0.2902598977088928 }, { "epoch": 0.49, "grad_norm": 1.2954983816000556, "kl": 0.29798054695129395, "learning_rate": 4.899335204733538e-06, "loss": 0.3052, "step": 167, "step_loss": 0.2802087068557739 }, { "epoch": 0.49, "grad_norm": 1.2070703839621497, "kl": 0.35096466541290283, "learning_rate": 4.8980980893601575e-06, "loss": 0.2918, "step": 168, "step_loss": 0.3088727593421936 }, { "epoch": 0.5, "grad_norm": 1.3158993296815968, "kl": 0.27438434958457947, "learning_rate": 4.8968535947010795e-06, "loss": 0.2913, "step": 169, "step_loss": 0.2575715184211731 }, { "epoch": 0.5, "grad_norm": 1.2643025871656506, "kl": 0.27822235226631165, "learning_rate": 4.895601725031475e-06, "loss": 0.2934, "step": 170, "step_loss": 0.2611542344093323 }, { "epoch": 0.5, "grad_norm": 1.152592001860982, "kl": 0.3118104934692383, "learning_rate": 4.894342484651846e-06, "loss": 0.2776, "step": 171, "step_loss": 0.29376712441444397 }, { "epoch": 0.51, "grad_norm": 1.2558097414261615, "kl": 0.31661081314086914, "learning_rate": 4.893075877888018e-06, "loss": 0.3067, "step": 172, "step_loss": 0.302161306142807 }, { "epoch": 0.51, "grad_norm": 1.4838421517721772, "kl": 0.33207058906555176, "learning_rate": 4.891801909091119e-06, "loss": 0.3207, "step": 173, "step_loss": 0.3354288935661316 }, { "epoch": 0.51, "grad_norm": 1.270609611872136, "kl": 0.3163268566131592, "learning_rate": 4.8905205826375705e-06, "loss": 0.3031, "step": 174, "step_loss": 0.285269558429718 }, { "epoch": 0.51, "grad_norm": 1.278119700217976, "kl": 0.29490146040916443, "learning_rate": 4.8892319029290685e-06, "loss": 0.2906, "step": 175, "step_loss": 0.3601941168308258 }, { "epoch": 0.52, "grad_norm": 1.3328158565268786, "kl": 0.287243127822876, "learning_rate": 4.887935874392567e-06, "loss": 0.3141, "step": 176, "step_loss": 0.27393481135368347 }, { "epoch": 0.52, "grad_norm": 1.146954506657001, "kl": 0.3274082541465759, "learning_rate": 4.886632501480269e-06, "loss": 0.2816, "step": 177, "step_loss": 0.3594622015953064 }, { "epoch": 0.52, "grad_norm": 1.2568621036203667, "kl": 0.32911595702171326, "learning_rate": 4.885321788669604e-06, "loss": 0.3038, "step": 178, "step_loss": 0.2939574420452118 }, { "epoch": 0.53, "grad_norm": 1.215908552897293, "kl": 0.2502468228340149, "learning_rate": 4.884003740463219e-06, "loss": 0.2902, "step": 179, "step_loss": 0.24465596675872803 }, { "epoch": 0.53, "grad_norm": 1.1611108031885955, "kl": 0.30213692784309387, "learning_rate": 4.882678361388958e-06, "loss": 0.274, "step": 180, "step_loss": 0.2538335919380188 }, { "epoch": 0.53, "grad_norm": 1.301100728893655, "kl": 0.3141769766807556, "learning_rate": 4.88134565599985e-06, "loss": 0.3048, "step": 181, "step_loss": 0.25083863735198975 }, { "epoch": 0.54, "grad_norm": 1.1533263462306118, "kl": 0.31978100538253784, "learning_rate": 4.880005628874088e-06, "loss": 0.2979, "step": 182, "step_loss": 0.2340894490480423 }, { "epoch": 0.54, "grad_norm": 1.2517414717516113, "kl": 0.2578886151313782, "learning_rate": 4.878658284615023e-06, "loss": 0.2888, "step": 183, "step_loss": 0.25022444128990173 }, { "epoch": 0.54, "grad_norm": 1.2245537412134813, "kl": 0.3525405824184418, "learning_rate": 4.877303627851138e-06, "loss": 0.2856, "step": 184, "step_loss": 0.30141592025756836 }, { "epoch": 0.54, "grad_norm": 1.191367958579159, "kl": 0.2949683368206024, "learning_rate": 4.875941663236039e-06, "loss": 0.2811, "step": 185, "step_loss": 0.27863818407058716 }, { "epoch": 0.55, "grad_norm": 1.2248110810400246, "kl": 0.31630921363830566, "learning_rate": 4.874572395448432e-06, "loss": 0.288, "step": 186, "step_loss": 0.26466599106788635 }, { "epoch": 0.55, "grad_norm": 1.1677774508685308, "kl": 0.3383273482322693, "learning_rate": 4.8731958291921174e-06, "loss": 0.2646, "step": 187, "step_loss": 0.23358532786369324 }, { "epoch": 0.55, "grad_norm": 1.2534617173778235, "kl": 0.3180442452430725, "learning_rate": 4.871811969195963e-06, "loss": 0.2795, "step": 188, "step_loss": 0.2650742828845978 }, { "epoch": 0.56, "grad_norm": 1.3623516662591668, "kl": 0.36272916197776794, "learning_rate": 4.870420820213896e-06, "loss": 0.3179, "step": 189, "step_loss": 0.34722331166267395 }, { "epoch": 0.56, "grad_norm": 1.139199587731826, "kl": 0.31969937682151794, "learning_rate": 4.869022387024879e-06, "loss": 0.2761, "step": 190, "step_loss": 0.26098594069480896 }, { "epoch": 0.56, "grad_norm": 1.2194709601641671, "kl": 0.34485194087028503, "learning_rate": 4.867616674432903e-06, "loss": 0.3146, "step": 191, "step_loss": 0.3215685486793518 }, { "epoch": 0.56, "grad_norm": 1.2744845374487466, "kl": 0.3000357747077942, "learning_rate": 4.8662036872669615e-06, "loss": 0.2898, "step": 192, "step_loss": 0.3152067959308624 }, { "epoch": 0.57, "grad_norm": 1.2219115844478998, "kl": 0.26532527804374695, "learning_rate": 4.864783430381039e-06, "loss": 0.2993, "step": 193, "step_loss": 0.2950400710105896 }, { "epoch": 0.57, "grad_norm": 1.3029827184325222, "kl": 0.35824403166770935, "learning_rate": 4.863355908654095e-06, "loss": 0.2951, "step": 194, "step_loss": 0.3127448856830597 }, { "epoch": 0.57, "grad_norm": 1.3611804602947277, "kl": 0.3048401474952698, "learning_rate": 4.861921126990045e-06, "loss": 0.3019, "step": 195, "step_loss": 0.3276352882385254 }, { "epoch": 0.58, "grad_norm": 1.2537615861956364, "kl": 0.32806965708732605, "learning_rate": 4.860479090317742e-06, "loss": 0.3018, "step": 196, "step_loss": 0.2849041223526001 }, { "epoch": 0.58, "grad_norm": 1.305638823054846, "kl": 0.3609757423400879, "learning_rate": 4.859029803590966e-06, "loss": 0.3146, "step": 197, "step_loss": 0.304736852645874 }, { "epoch": 0.58, "grad_norm": 1.180396492221588, "kl": 0.31104931235313416, "learning_rate": 4.8575732717884e-06, "loss": 0.307, "step": 198, "step_loss": 0.32223203778266907 }, { "epoch": 0.59, "grad_norm": 1.3152524549772768, "kl": 0.30966895818710327, "learning_rate": 4.856109499913615e-06, "loss": 0.2935, "step": 199, "step_loss": 0.29456788301467896 }, { "epoch": 0.59, "grad_norm": 1.214126043731043, "kl": 0.3455277681350708, "learning_rate": 4.854638492995056e-06, "loss": 0.2749, "step": 200, "step_loss": 0.362245112657547 }, { "epoch": 0.59, "grad_norm": 1.2555037212901567, "kl": 0.23420512676239014, "learning_rate": 4.853160256086021e-06, "loss": 0.2803, "step": 201, "step_loss": 0.25304749608039856 }, { "epoch": 0.59, "grad_norm": 1.2278744065777365, "kl": 0.3848443627357483, "learning_rate": 4.8516747942646465e-06, "loss": 0.3211, "step": 202, "step_loss": 0.382394403219223 }, { "epoch": 0.6, "grad_norm": 1.1839836158359853, "kl": 0.3287525475025177, "learning_rate": 4.850182112633885e-06, "loss": 0.2913, "step": 203, "step_loss": 0.29712358117103577 }, { "epoch": 0.6, "grad_norm": 1.1211578271078935, "kl": 0.33271652460098267, "learning_rate": 4.8486822163214944e-06, "loss": 0.2738, "step": 204, "step_loss": 0.28494173288345337 }, { "epoch": 0.6, "grad_norm": 1.2261387888906659, "kl": 0.304698646068573, "learning_rate": 4.847175110480015e-06, "loss": 0.2984, "step": 205, "step_loss": 0.28125837445259094 }, { "epoch": 0.61, "grad_norm": 1.3388127733966415, "kl": 0.30615222454071045, "learning_rate": 4.8456608002867555e-06, "loss": 0.2859, "step": 206, "step_loss": 0.2698904275894165 }, { "epoch": 0.61, "grad_norm": 1.179491051996919, "kl": 0.31592032313346863, "learning_rate": 4.844139290943771e-06, "loss": 0.2909, "step": 207, "step_loss": 0.2879568934440613 }, { "epoch": 0.61, "grad_norm": 1.1817492222340549, "kl": 0.3141896724700928, "learning_rate": 4.84261058767785e-06, "loss": 0.2832, "step": 208, "step_loss": 0.24105487763881683 }, { "epoch": 0.61, "grad_norm": 1.275156633488967, "kl": 0.4037202000617981, "learning_rate": 4.841074695740493e-06, "loss": 0.2875, "step": 209, "step_loss": 0.3415408432483673 }, { "epoch": 0.62, "grad_norm": 1.188350536024408, "kl": 0.32159894704818726, "learning_rate": 4.839531620407895e-06, "loss": 0.3031, "step": 210, "step_loss": 0.27604830265045166 }, { "epoch": 0.62, "grad_norm": 1.1429229933157632, "kl": 0.32619747519493103, "learning_rate": 4.837981366980928e-06, "loss": 0.2812, "step": 211, "step_loss": 0.27371087670326233 }, { "epoch": 0.62, "grad_norm": 1.2193809489970755, "kl": 0.33697012066841125, "learning_rate": 4.836423940785124e-06, "loss": 0.2775, "step": 212, "step_loss": 0.27547112107276917 }, { "epoch": 0.63, "grad_norm": 1.2348631847135687, "kl": 0.28872644901275635, "learning_rate": 4.834859347170654e-06, "loss": 0.2913, "step": 213, "step_loss": 0.2788584232330322 }, { "epoch": 0.63, "grad_norm": 1.2474162160473052, "kl": 0.3392971456050873, "learning_rate": 4.8332875915123105e-06, "loss": 0.2998, "step": 214, "step_loss": 0.34434232115745544 }, { "epoch": 0.63, "grad_norm": 1.323303913774139, "kl": 0.3054981827735901, "learning_rate": 4.831708679209491e-06, "loss": 0.3121, "step": 215, "step_loss": 0.28162604570388794 }, { "epoch": 0.64, "grad_norm": 1.1929513261742986, "kl": 0.3404708206653595, "learning_rate": 4.830122615686177e-06, "loss": 0.3063, "step": 216, "step_loss": 0.29514598846435547 }, { "epoch": 0.64, "grad_norm": 1.1911094796199106, "kl": 0.33788585662841797, "learning_rate": 4.828529406390917e-06, "loss": 0.2854, "step": 217, "step_loss": 0.31073516607284546 }, { "epoch": 0.64, "grad_norm": 1.2665507498308344, "kl": 0.36928457021713257, "learning_rate": 4.826929056796807e-06, "loss": 0.2902, "step": 218, "step_loss": 0.35527801513671875 }, { "epoch": 0.64, "grad_norm": 1.2680076553195618, "kl": 0.39868634939193726, "learning_rate": 4.8253215724014725e-06, "loss": 0.2981, "step": 219, "step_loss": 0.27999529242515564 }, { "epoch": 0.65, "grad_norm": 1.2775727396335816, "kl": 0.24477747082710266, "learning_rate": 4.823706958727045e-06, "loss": 0.3168, "step": 220, "step_loss": 0.2902810871601105 }, { "epoch": 0.65, "grad_norm": 1.1655079488814604, "kl": 0.3722260594367981, "learning_rate": 4.822085221320152e-06, "loss": 0.2937, "step": 221, "step_loss": 0.30477461218833923 }, { "epoch": 0.65, "grad_norm": 1.1528789635120378, "kl": 0.3633630871772766, "learning_rate": 4.820456365751891e-06, "loss": 0.2733, "step": 222, "step_loss": 0.2682224214076996 }, { "epoch": 0.66, "grad_norm": 1.2833936250857951, "kl": 0.4017341732978821, "learning_rate": 4.818820397617811e-06, "loss": 0.3113, "step": 223, "step_loss": 0.36901068687438965 }, { "epoch": 0.66, "grad_norm": 1.2106488345236557, "kl": 0.2505451738834381, "learning_rate": 4.817177322537898e-06, "loss": 0.3016, "step": 224, "step_loss": 0.2669680416584015 }, { "epoch": 0.66, "grad_norm": 1.2935886740881541, "kl": 0.35546159744262695, "learning_rate": 4.815527146156548e-06, "loss": 0.3008, "step": 225, "step_loss": 0.30147963762283325 }, { "epoch": 0.66, "grad_norm": 1.2631269633217965, "kl": 0.39503854513168335, "learning_rate": 4.8138698741425535e-06, "loss": 0.2868, "step": 226, "step_loss": 0.2804335057735443 }, { "epoch": 0.67, "grad_norm": 1.2621588928369527, "kl": 0.3259708285331726, "learning_rate": 4.812205512189083e-06, "loss": 0.2943, "step": 227, "step_loss": 0.30732688307762146 }, { "epoch": 0.67, "grad_norm": 1.252311030511955, "kl": 0.3171556293964386, "learning_rate": 4.8105340660136614e-06, "loss": 0.2912, "step": 228, "step_loss": 0.3541552722454071 }, { "epoch": 0.67, "grad_norm": 1.1582157784039353, "kl": 0.33646219968795776, "learning_rate": 4.8088555413581495e-06, "loss": 0.2862, "step": 229, "step_loss": 0.3080594539642334 }, { "epoch": 0.68, "grad_norm": 1.1909622553262202, "kl": 0.3843502402305603, "learning_rate": 4.8071699439887215e-06, "loss": 0.3056, "step": 230, "step_loss": 0.3147525489330292 }, { "epoch": 0.68, "grad_norm": 1.2594429963263456, "kl": 0.26849618554115295, "learning_rate": 4.805477279695852e-06, "loss": 0.298, "step": 231, "step_loss": 0.2553267776966095 }, { "epoch": 0.68, "grad_norm": 1.1702087636485823, "kl": 0.40651583671569824, "learning_rate": 4.8037775542942925e-06, "loss": 0.2827, "step": 232, "step_loss": 0.2711586356163025 }, { "epoch": 0.69, "grad_norm": 1.1234639927336083, "kl": 0.32860204577445984, "learning_rate": 4.802070773623047e-06, "loss": 0.2687, "step": 233, "step_loss": 0.24301442503929138 }, { "epoch": 0.69, "grad_norm": 1.232534630116778, "kl": 0.392067551612854, "learning_rate": 4.80035694354536e-06, "loss": 0.2943, "step": 234, "step_loss": 0.3448218107223511 }, { "epoch": 0.69, "grad_norm": 1.1963956326096639, "kl": 0.3147091567516327, "learning_rate": 4.798636069948692e-06, "loss": 0.2856, "step": 235, "step_loss": 0.30948758125305176 }, { "epoch": 0.69, "grad_norm": 1.2201677781179745, "kl": 0.26646876335144043, "learning_rate": 4.7969081587446994e-06, "loss": 0.2745, "step": 236, "step_loss": 0.23111504316329956 }, { "epoch": 0.7, "grad_norm": 1.0792412024548768, "kl": 0.2925267517566681, "learning_rate": 4.795173215869214e-06, "loss": 0.2691, "step": 237, "step_loss": 0.2031397670507431 }, { "epoch": 0.7, "grad_norm": 1.3799205324021417, "kl": 0.3084944486618042, "learning_rate": 4.7934312472822255e-06, "loss": 0.3316, "step": 238, "step_loss": 0.2557719945907593 }, { "epoch": 0.7, "grad_norm": 1.156691853407518, "kl": 0.318729043006897, "learning_rate": 4.791682258967856e-06, "loss": 0.2844, "step": 239, "step_loss": 0.23593303561210632 }, { "epoch": 0.71, "grad_norm": 1.2618289049089157, "kl": 0.3763369619846344, "learning_rate": 4.789926256934344e-06, "loss": 0.2928, "step": 240, "step_loss": 0.2985472083091736 }, { "epoch": 0.71, "grad_norm": 1.182146333032642, "kl": 0.33732131123542786, "learning_rate": 4.788163247214021e-06, "loss": 0.2874, "step": 241, "step_loss": 0.2921789586544037 }, { "epoch": 0.71, "grad_norm": 1.2611562106682654, "kl": 0.26897132396698, "learning_rate": 4.786393235863292e-06, "loss": 0.2863, "step": 242, "step_loss": 0.24371370673179626 }, { "epoch": 0.71, "grad_norm": 1.1667739600277132, "kl": 0.3665682077407837, "learning_rate": 4.7846162289626156e-06, "loss": 0.2726, "step": 243, "step_loss": 0.33350181579589844 }, { "epoch": 0.72, "grad_norm": 1.159930396653472, "kl": 0.294426292181015, "learning_rate": 4.78283223261648e-06, "loss": 0.2855, "step": 244, "step_loss": 0.34132063388824463 }, { "epoch": 0.72, "grad_norm": 1.251219143005887, "kl": 0.3149415850639343, "learning_rate": 4.781041252953385e-06, "loss": 0.2863, "step": 245, "step_loss": 0.31999310851097107 }, { "epoch": 0.72, "grad_norm": 1.3041399931947542, "kl": 0.34632056951522827, "learning_rate": 4.779243296125821e-06, "loss": 0.3093, "step": 246, "step_loss": 0.2359839826822281 }, { "epoch": 0.73, "grad_norm": 1.1281402721586493, "kl": 0.3858156204223633, "learning_rate": 4.777438368310246e-06, "loss": 0.275, "step": 247, "step_loss": 0.3059740364551544 }, { "epoch": 0.73, "grad_norm": 1.182215527114777, "kl": 0.3195875883102417, "learning_rate": 4.775626475707062e-06, "loss": 0.2887, "step": 248, "step_loss": 0.2916240692138672 }, { "epoch": 0.73, "grad_norm": 1.192317476599235, "kl": 0.3068830370903015, "learning_rate": 4.773807624540603e-06, "loss": 0.2722, "step": 249, "step_loss": 0.23078405857086182 }, { "epoch": 0.74, "grad_norm": 1.1808958496558022, "kl": 0.32459574937820435, "learning_rate": 4.771981821059103e-06, "loss": 0.2646, "step": 250, "step_loss": 0.250786155462265 }, { "epoch": 0.74, "grad_norm": 1.2752507061465592, "kl": 0.3301911950111389, "learning_rate": 4.770149071534681e-06, "loss": 0.2828, "step": 251, "step_loss": 0.30227547883987427 }, { "epoch": 0.74, "grad_norm": 1.3749281257570973, "kl": 0.3217305839061737, "learning_rate": 4.7683093822633155e-06, "loss": 0.2692, "step": 252, "step_loss": 0.27843326330184937 }, { "epoch": 0.74, "grad_norm": 1.2384381600974261, "kl": 0.3240608274936676, "learning_rate": 4.766462759564828e-06, "loss": 0.3051, "step": 253, "step_loss": 0.2549932599067688 }, { "epoch": 0.75, "grad_norm": 1.1891702525457484, "kl": 0.3199639320373535, "learning_rate": 4.764609209782856e-06, "loss": 0.3027, "step": 254, "step_loss": 0.30427688360214233 }, { "epoch": 0.75, "grad_norm": 1.2920719505595144, "kl": 0.2981690764427185, "learning_rate": 4.762748739284832e-06, "loss": 0.2795, "step": 255, "step_loss": 0.25225040316581726 }, { "epoch": 0.75, "grad_norm": 1.3848628129737062, "kl": 0.38679319620132446, "learning_rate": 4.760881354461966e-06, "loss": 0.3075, "step": 256, "step_loss": 0.3289315700531006 }, { "epoch": 0.76, "grad_norm": 1.2842520225661949, "kl": 0.29211699962615967, "learning_rate": 4.7590070617292214e-06, "loss": 0.2903, "step": 257, "step_loss": 0.2732706367969513 }, { "epoch": 0.76, "grad_norm": 1.303090202772597, "kl": 0.2750818729400635, "learning_rate": 4.757125867525287e-06, "loss": 0.2855, "step": 258, "step_loss": 0.24878114461898804 }, { "epoch": 0.76, "grad_norm": 1.2016632749663505, "kl": 0.31452032923698425, "learning_rate": 4.755237778312564e-06, "loss": 0.2779, "step": 259, "step_loss": 0.27004343271255493 }, { "epoch": 0.76, "grad_norm": 1.2724450520997204, "kl": 0.33003339171409607, "learning_rate": 4.753342800577139e-06, "loss": 0.2929, "step": 260, "step_loss": 0.2996165454387665 }, { "epoch": 0.77, "grad_norm": 1.1644344183728652, "kl": 0.31851598620414734, "learning_rate": 4.751440940828761e-06, "loss": 0.2968, "step": 261, "step_loss": 0.31100180745124817 }, { "epoch": 0.77, "grad_norm": 1.1708153702363653, "kl": 0.2956541180610657, "learning_rate": 4.749532205600825e-06, "loss": 0.2749, "step": 262, "step_loss": 0.26357224583625793 }, { "epoch": 0.77, "grad_norm": 1.2113982585346696, "kl": 0.29637524485588074, "learning_rate": 4.747616601450337e-06, "loss": 0.2976, "step": 263, "step_loss": 0.2821243405342102 }, { "epoch": 0.78, "grad_norm": 1.2136481249314546, "kl": 0.32876235246658325, "learning_rate": 4.74569413495791e-06, "loss": 0.2818, "step": 264, "step_loss": 0.2800009548664093 }, { "epoch": 0.78, "grad_norm": 1.2613044034795275, "kl": 0.38935160636901855, "learning_rate": 4.743764812727722e-06, "loss": 0.3063, "step": 265, "step_loss": 0.3427608907222748 }, { "epoch": 0.78, "grad_norm": 1.0797444840559929, "kl": 0.3119351267814636, "learning_rate": 4.741828641387505e-06, "loss": 0.2676, "step": 266, "step_loss": 0.2635708153247833 }, { "epoch": 0.79, "grad_norm": 1.152621213192496, "kl": 0.36095455288887024, "learning_rate": 4.739885627588522e-06, "loss": 0.2825, "step": 267, "step_loss": 0.2881103754043579 }, { "epoch": 0.79, "grad_norm": 1.2119555039739955, "kl": 0.36039823293685913, "learning_rate": 4.73793577800554e-06, "loss": 0.3022, "step": 268, "step_loss": 0.29259294271469116 }, { "epoch": 0.79, "grad_norm": 1.1482140459926162, "kl": 0.2709002196788788, "learning_rate": 4.735979099336807e-06, "loss": 0.2862, "step": 269, "step_loss": 0.23157480359077454 }, { "epoch": 0.79, "grad_norm": 1.1692731329110126, "kl": 0.3389941453933716, "learning_rate": 4.734015598304034e-06, "loss": 0.2711, "step": 270, "step_loss": 0.2778770327568054 }, { "epoch": 0.8, "grad_norm": 1.2258612213402873, "kl": 0.35436713695526123, "learning_rate": 4.732045281652366e-06, "loss": 0.3091, "step": 271, "step_loss": 0.3284461200237274 }, { "epoch": 0.8, "grad_norm": 1.133815899776919, "kl": 0.31201237440109253, "learning_rate": 4.730068156150363e-06, "loss": 0.2656, "step": 272, "step_loss": 0.3073486387729645 }, { "epoch": 0.8, "grad_norm": 1.2017193050777555, "kl": 0.29038453102111816, "learning_rate": 4.728084228589973e-06, "loss": 0.2846, "step": 273, "step_loss": 0.2407042682170868 }, { "epoch": 0.81, "grad_norm": 1.2777490002160443, "kl": 0.2809942364692688, "learning_rate": 4.726093505786515e-06, "loss": 0.281, "step": 274, "step_loss": 0.2713775932788849 }, { "epoch": 0.81, "grad_norm": 1.1867307379834706, "kl": 0.39103490114212036, "learning_rate": 4.724095994578648e-06, "loss": 0.3022, "step": 275, "step_loss": 0.38512590527534485 }, { "epoch": 0.81, "grad_norm": 1.1650998402655737, "kl": 0.36318308115005493, "learning_rate": 4.72209170182835e-06, "loss": 0.2815, "step": 276, "step_loss": 0.3397858142852783 }, { "epoch": 0.81, "grad_norm": 1.0795928320116555, "kl": 0.36297377943992615, "learning_rate": 4.720080634420899e-06, "loss": 0.2818, "step": 277, "step_loss": 0.3083583116531372 }, { "epoch": 0.82, "grad_norm": 1.1780687430025762, "kl": 0.3422907590866089, "learning_rate": 4.7180627992648435e-06, "loss": 0.2789, "step": 278, "step_loss": 0.27315691113471985 }, { "epoch": 0.82, "grad_norm": 1.2103849934862, "kl": 0.29663997888565063, "learning_rate": 4.7160382032919824e-06, "loss": 0.2971, "step": 279, "step_loss": 0.27249854803085327 }, { "epoch": 0.82, "grad_norm": 1.3222585308836912, "kl": 0.3770950436592102, "learning_rate": 4.714006853457339e-06, "loss": 0.2972, "step": 280, "step_loss": 0.325216680765152 }, { "epoch": 0.83, "grad_norm": 1.162522628779284, "kl": 0.313273549079895, "learning_rate": 4.711968756739136e-06, "loss": 0.289, "step": 281, "step_loss": 0.30059880018234253 }, { "epoch": 0.83, "grad_norm": 1.2370630041790278, "kl": 0.3747837245464325, "learning_rate": 4.709923920138776e-06, "loss": 0.3016, "step": 282, "step_loss": 0.28908976912498474 }, { "epoch": 0.83, "grad_norm": 1.140130660961975, "kl": 0.3109118342399597, "learning_rate": 4.707872350680816e-06, "loss": 0.2839, "step": 283, "step_loss": 0.276600182056427 }, { "epoch": 0.83, "grad_norm": 1.082299712166179, "kl": 0.27061912417411804, "learning_rate": 4.705814055412938e-06, "loss": 0.266, "step": 284, "step_loss": 0.2920036315917969 }, { "epoch": 0.84, "grad_norm": 1.21073161805326, "kl": 0.318640798330307, "learning_rate": 4.703749041405932e-06, "loss": 0.2725, "step": 285, "step_loss": 0.2592398524284363 }, { "epoch": 0.84, "grad_norm": 1.219448433556087, "kl": 0.35519281029701233, "learning_rate": 4.701677315753668e-06, "loss": 0.2874, "step": 286, "step_loss": 0.3630865514278412 }, { "epoch": 0.84, "grad_norm": 1.2097930175912097, "kl": 0.41031721234321594, "learning_rate": 4.699598885573072e-06, "loss": 0.3032, "step": 287, "step_loss": 0.3249673843383789 }, { "epoch": 0.85, "grad_norm": 1.1521100017307662, "kl": 0.3674570620059967, "learning_rate": 4.697513758004102e-06, "loss": 0.2849, "step": 288, "step_loss": 0.30098646879196167 }, { "epoch": 0.85, "grad_norm": 1.1074540740753043, "kl": 0.32119742035865784, "learning_rate": 4.695421940209723e-06, "loss": 0.2832, "step": 289, "step_loss": 0.2897493541240692 }, { "epoch": 0.85, "grad_norm": 1.2293554204206822, "kl": 0.3377346098423004, "learning_rate": 4.693323439375885e-06, "loss": 0.293, "step": 290, "step_loss": 0.2627705931663513 }, { "epoch": 0.86, "grad_norm": 1.2069388480758467, "kl": 0.3485328257083893, "learning_rate": 4.691218262711491e-06, "loss": 0.2867, "step": 291, "step_loss": 0.38160112500190735 }, { "epoch": 0.86, "grad_norm": 1.1419057253236133, "kl": 0.29920992255210876, "learning_rate": 4.6891064174483816e-06, "loss": 0.2793, "step": 292, "step_loss": 0.27294811606407166 }, { "epoch": 0.86, "grad_norm": 1.1050106622647566, "kl": 0.31865233182907104, "learning_rate": 4.686987910841304e-06, "loss": 0.2834, "step": 293, "step_loss": 0.24811354279518127 }, { "epoch": 0.86, "grad_norm": 1.2316064086381218, "kl": 0.36698243021965027, "learning_rate": 4.684862750167891e-06, "loss": 0.288, "step": 294, "step_loss": 0.3681890070438385 }, { "epoch": 0.87, "grad_norm": 1.0625124958649887, "kl": 0.31484031677246094, "learning_rate": 4.6827309427286295e-06, "loss": 0.2633, "step": 295, "step_loss": 0.24335134029388428 }, { "epoch": 0.87, "grad_norm": 1.0972510299173677, "kl": 0.37381210923194885, "learning_rate": 4.680592495846845e-06, "loss": 0.2779, "step": 296, "step_loss": 0.34890639781951904 }, { "epoch": 0.87, "grad_norm": 1.2397594353527759, "kl": 0.3720863163471222, "learning_rate": 4.678447416868667e-06, "loss": 0.2813, "step": 297, "step_loss": 0.335337370634079 }, { "epoch": 0.88, "grad_norm": 1.085078132679382, "kl": 0.37017738819122314, "learning_rate": 4.67629571316301e-06, "loss": 0.2752, "step": 298, "step_loss": 0.29323574900627136 }, { "epoch": 0.88, "grad_norm": 1.1972337254915437, "kl": 0.3234942853450775, "learning_rate": 4.6741373921215475e-06, "loss": 0.293, "step": 299, "step_loss": 0.2743943929672241 }, { "epoch": 0.88, "grad_norm": 1.354145558510618, "kl": 0.408852756023407, "learning_rate": 4.671972461158682e-06, "loss": 0.3091, "step": 300, "step_loss": 0.3570805788040161 }, { "epoch": 0.88, "grad_norm": 1.308516554192417, "kl": 0.33082523941993713, "learning_rate": 4.669800927711525e-06, "loss": 0.2957, "step": 301, "step_loss": 0.3042528033256531 }, { "epoch": 0.89, "grad_norm": 1.0883509759412517, "kl": 0.36321133375167847, "learning_rate": 4.667622799239869e-06, "loss": 0.2822, "step": 302, "step_loss": 0.3216032385826111 }, { "epoch": 0.89, "grad_norm": 1.263711518929017, "kl": 0.2793586850166321, "learning_rate": 4.665438083226163e-06, "loss": 0.2997, "step": 303, "step_loss": 0.2776317000389099 }, { "epoch": 0.89, "grad_norm": 1.1674457067837019, "kl": 0.33832457661628723, "learning_rate": 4.663246787175483e-06, "loss": 0.3, "step": 304, "step_loss": 0.31598201394081116 }, { "epoch": 0.9, "grad_norm": 1.1988014340031905, "kl": 0.36085131764411926, "learning_rate": 4.661048918615513e-06, "loss": 0.2791, "step": 305, "step_loss": 0.302070677280426 }, { "epoch": 0.9, "grad_norm": 1.1315893146442402, "kl": 0.33112025260925293, "learning_rate": 4.658844485096512e-06, "loss": 0.2691, "step": 306, "step_loss": 0.29177939891815186 }, { "epoch": 0.9, "grad_norm": 1.2072921211772827, "kl": 0.3359392583370209, "learning_rate": 4.656633494191293e-06, "loss": 0.2941, "step": 307, "step_loss": 0.3084091246128082 }, { "epoch": 0.91, "grad_norm": 1.2218266192172693, "kl": 0.34884458780288696, "learning_rate": 4.654415953495196e-06, "loss": 0.2954, "step": 308, "step_loss": 0.28813090920448303 }, { "epoch": 0.91, "grad_norm": 1.27050923164869, "kl": 0.38197797536849976, "learning_rate": 4.65219187062606e-06, "loss": 0.3116, "step": 309, "step_loss": 0.29677310585975647 }, { "epoch": 0.91, "grad_norm": 1.1599676961552565, "kl": 0.3238053321838379, "learning_rate": 4.649961253224198e-06, "loss": 0.2855, "step": 310, "step_loss": 0.31362178921699524 }, { "epoch": 0.91, "grad_norm": 1.1310288626730198, "kl": 0.3672914505004883, "learning_rate": 4.647724108952373e-06, "loss": 0.273, "step": 311, "step_loss": 0.2773699462413788 }, { "epoch": 0.92, "grad_norm": 1.2079649280038118, "kl": 0.29229724407196045, "learning_rate": 4.645480445495767e-06, "loss": 0.3097, "step": 312, "step_loss": 0.3600565195083618 }, { "epoch": 0.92, "grad_norm": 1.1441975650704101, "kl": 0.33793753385543823, "learning_rate": 4.643230270561956e-06, "loss": 0.2872, "step": 313, "step_loss": 0.2990878224372864 }, { "epoch": 0.92, "grad_norm": 1.1546332487233124, "kl": 0.2881549596786499, "learning_rate": 4.640973591880889e-06, "loss": 0.2827, "step": 314, "step_loss": 0.2339860498905182 }, { "epoch": 0.93, "grad_norm": 1.2470856906835874, "kl": 0.319610059261322, "learning_rate": 4.638710417204855e-06, "loss": 0.2845, "step": 315, "step_loss": 0.327451229095459 }, { "epoch": 0.93, "grad_norm": 1.0504828245555484, "kl": 0.25614118576049805, "learning_rate": 4.636440754308458e-06, "loss": 0.2556, "step": 316, "step_loss": 0.21319641172885895 }, { "epoch": 0.93, "grad_norm": 1.11856029436489, "kl": 0.3035759925842285, "learning_rate": 4.63416461098859e-06, "loss": 0.2848, "step": 317, "step_loss": 0.2784833014011383 }, { "epoch": 0.93, "grad_norm": 1.0668814023255409, "kl": 0.29398462176322937, "learning_rate": 4.631881995064406e-06, "loss": 0.2744, "step": 318, "step_loss": 0.26396968960762024 }, { "epoch": 0.94, "grad_norm": 1.0712893824198688, "kl": 0.32271263003349304, "learning_rate": 4.629592914377298e-06, "loss": 0.2668, "step": 319, "step_loss": 0.273529052734375 }, { "epoch": 0.94, "grad_norm": 1.2737157435766238, "kl": 0.344595730304718, "learning_rate": 4.6272973767908615e-06, "loss": 0.3066, "step": 320, "step_loss": 0.3095177710056305 }, { "epoch": 0.94, "grad_norm": 1.0251746467924616, "kl": 0.28812479972839355, "learning_rate": 4.624995390190878e-06, "loss": 0.2804, "step": 321, "step_loss": 0.24252820014953613 }, { "epoch": 0.95, "grad_norm": 1.1186582847380844, "kl": 0.40985843539237976, "learning_rate": 4.62268696248528e-06, "loss": 0.2782, "step": 322, "step_loss": 0.27699118852615356 }, { "epoch": 0.95, "grad_norm": 1.181794615240478, "kl": 0.2965797185897827, "learning_rate": 4.620372101604128e-06, "loss": 0.294, "step": 323, "step_loss": 0.25799939036369324 }, { "epoch": 0.95, "grad_norm": 1.2137603344462602, "kl": 0.3655190169811249, "learning_rate": 4.618050815499582e-06, "loss": 0.2857, "step": 324, "step_loss": 0.30608299374580383 }, { "epoch": 0.96, "grad_norm": 1.181964659595807, "kl": 0.3023182451725006, "learning_rate": 4.615723112145875e-06, "loss": 0.2832, "step": 325, "step_loss": 0.2613093852996826 }, { "epoch": 0.96, "grad_norm": 1.228179157800439, "kl": 0.3194785714149475, "learning_rate": 4.613388999539283e-06, "loss": 0.2976, "step": 326, "step_loss": 0.3578476905822754 }, { "epoch": 0.96, "grad_norm": 1.1277959636999064, "kl": 0.3428605794906616, "learning_rate": 4.6110484856981025e-06, "loss": 0.2771, "step": 327, "step_loss": 0.2329404354095459 }, { "epoch": 0.96, "grad_norm": 1.2069845653505489, "kl": 0.425138384103775, "learning_rate": 4.608701578662618e-06, "loss": 0.2977, "step": 328, "step_loss": 0.3749895393848419 }, { "epoch": 0.97, "grad_norm": 1.1076692442818552, "kl": 0.2908833622932434, "learning_rate": 4.606348286495074e-06, "loss": 0.2861, "step": 329, "step_loss": 0.28232869505882263 }, { "epoch": 0.97, "grad_norm": 1.1600108842297994, "kl": 0.3712472915649414, "learning_rate": 4.6039886172796555e-06, "loss": 0.2825, "step": 330, "step_loss": 0.29888463020324707 }, { "epoch": 0.97, "grad_norm": 1.0826571682025825, "kl": 0.3151686489582062, "learning_rate": 4.6016225791224504e-06, "loss": 0.2826, "step": 331, "step_loss": 0.25253331661224365 }, { "epoch": 0.98, "grad_norm": 1.0721510598381356, "kl": 0.30766239762306213, "learning_rate": 4.5992501801514235e-06, "loss": 0.2802, "step": 332, "step_loss": 0.25241219997406006 }, { "epoch": 0.98, "grad_norm": 1.149128958736038, "kl": 0.31056687235832214, "learning_rate": 4.596871428516397e-06, "loss": 0.2778, "step": 333, "step_loss": 0.2913603186607361 }, { "epoch": 0.98, "grad_norm": 1.0863838444877447, "kl": 0.37738847732543945, "learning_rate": 4.594486332389011e-06, "loss": 0.2736, "step": 334, "step_loss": 0.3190591335296631 }, { "epoch": 0.98, "grad_norm": 1.2058838263807778, "kl": 0.3031269609928131, "learning_rate": 4.592094899962702e-06, "loss": 0.2911, "step": 335, "step_loss": 0.23730549216270447 }, { "epoch": 0.99, "grad_norm": 1.1861398398638574, "kl": 0.3154381513595581, "learning_rate": 4.589697139452673e-06, "loss": 0.295, "step": 336, "step_loss": 0.30010032653808594 }, { "epoch": 0.99, "grad_norm": 1.1654254307836165, "kl": 0.3234802186489105, "learning_rate": 4.5872930590958685e-06, "loss": 0.2821, "step": 337, "step_loss": 0.3050956428050995 }, { "epoch": 0.99, "grad_norm": 1.0142973438347946, "kl": 0.3032388389110565, "learning_rate": 4.584882667150939e-06, "loss": 0.2637, "step": 338, "step_loss": 0.25487464666366577 }, { "epoch": 1.0, "grad_norm": 1.1306032144314904, "kl": 0.37623462080955505, "learning_rate": 4.582465971898219e-06, "loss": 0.289, "step": 339, "step_loss": 0.3044344484806061 }, { "epoch": 1.0, "grad_norm": 1.1724697129866262, "kl": 0.3717675507068634, "learning_rate": 4.580042981639699e-06, "loss": 0.2842, "step": 340, "step_loss": 0.30331289768218994 }, { "epoch": 1.0, "eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_loss": 1.5836049318313599, "eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_runtime": 14.7278, "eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_samples_per_second": 6.79, "eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_steps_per_second": 0.883, "step": 340 }, { "epoch": 1.0, "grad_norm": 0.9909952614947144, "kl": 0.3604995012283325, "learning_rate": 4.577613704698989e-06, "loss": 0.2173, "step": 341, "step_loss": 1.2625408172607422 }, { "epoch": 1.01, "grad_norm": 1.122598641549428, "kl": 0.37703055143356323, "learning_rate": 4.5751781494213e-06, "loss": 0.2233, "step": 342, "step_loss": 0.24784518778324127 }, { "epoch": 1.01, "grad_norm": 0.9555222272528335, "kl": 0.3994651734828949, "learning_rate": 4.572736324173409e-06, "loss": 0.2154, "step": 343, "step_loss": 0.24239662289619446 }, { "epoch": 1.01, "grad_norm": 1.0100151801384887, "kl": 0.40937453508377075, "learning_rate": 4.570288237343632e-06, "loss": 0.2098, "step": 344, "step_loss": 0.25453901290893555 }, { "epoch": 1.01, "grad_norm": 0.9870747676771315, "kl": 0.3054189085960388, "learning_rate": 4.567833897341797e-06, "loss": 0.2083, "step": 345, "step_loss": 0.19117772579193115 }, { "epoch": 1.02, "grad_norm": 1.1061807368303616, "kl": 0.41628479957580566, "learning_rate": 4.565373312599209e-06, "loss": 0.2152, "step": 346, "step_loss": 0.22663772106170654 }, { "epoch": 1.02, "grad_norm": 1.0384373325803509, "kl": 0.3613884449005127, "learning_rate": 4.56290649156863e-06, "loss": 0.2156, "step": 347, "step_loss": 0.18198919296264648 }, { "epoch": 1.02, "grad_norm": 1.0658063843250276, "kl": 0.3354627192020416, "learning_rate": 4.560433442724243e-06, "loss": 0.2078, "step": 348, "step_loss": 0.16816985607147217 }, { "epoch": 1.03, "grad_norm": 1.1109132713111654, "kl": 0.4467100501060486, "learning_rate": 4.5579541745616254e-06, "loss": 0.2177, "step": 349, "step_loss": 0.2320152074098587 }, { "epoch": 1.03, "grad_norm": 1.1929174001218243, "kl": 0.456093430519104, "learning_rate": 4.555468695597721e-06, "loss": 0.2167, "step": 350, "step_loss": 0.22836454212665558 }, { "epoch": 1.03, "grad_norm": 1.183660347956496, "kl": 0.39071205258369446, "learning_rate": 4.552977014370807e-06, "loss": 0.2235, "step": 351, "step_loss": 0.2216327041387558 }, { "epoch": 1.03, "grad_norm": 1.113360044133729, "kl": 0.40837928652763367, "learning_rate": 4.550479139440466e-06, "loss": 0.2133, "step": 352, "step_loss": 0.20445051789283752 }, { "epoch": 1.04, "grad_norm": 1.1459353950591424, "kl": 0.355142205953598, "learning_rate": 4.547975079387564e-06, "loss": 0.2272, "step": 353, "step_loss": 0.22001862525939941 }, { "epoch": 1.04, "grad_norm": 1.1749772462772283, "kl": 0.3855653405189514, "learning_rate": 4.545464842814208e-06, "loss": 0.2217, "step": 354, "step_loss": 0.22728465497493744 }, { "epoch": 1.04, "grad_norm": 1.015463016503802, "kl": 0.3446847200393677, "learning_rate": 4.542948438343726e-06, "loss": 0.2058, "step": 355, "step_loss": 0.18022528290748596 }, { "epoch": 1.05, "grad_norm": 1.0669292034040894, "kl": 0.36371520161628723, "learning_rate": 4.5404258746206314e-06, "loss": 0.21, "step": 356, "step_loss": 0.2281932234764099 }, { "epoch": 1.05, "grad_norm": 1.020434757519867, "kl": 0.40964460372924805, "learning_rate": 4.537897160310602e-06, "loss": 0.2063, "step": 357, "step_loss": 0.20418326556682587 }, { "epoch": 1.05, "grad_norm": 1.0651999836970036, "kl": 0.328068345785141, "learning_rate": 4.535362304100439e-06, "loss": 0.2153, "step": 358, "step_loss": 0.18197788298130035 }, { "epoch": 1.06, "grad_norm": 1.0161322060783642, "kl": 0.3565504848957062, "learning_rate": 4.532821314698044e-06, "loss": 0.2113, "step": 359, "step_loss": 0.22694353759288788 }, { "epoch": 1.06, "grad_norm": 1.0601947614383682, "kl": 0.34940865635871887, "learning_rate": 4.5302742008323895e-06, "loss": 0.2244, "step": 360, "step_loss": 0.18668481707572937 }, { "epoch": 1.06, "grad_norm": 1.0190544548173317, "kl": 0.30953383445739746, "learning_rate": 4.527720971253486e-06, "loss": 0.2146, "step": 361, "step_loss": 0.2063293755054474 }, { "epoch": 1.06, "grad_norm": 1.0542367113097721, "kl": 0.4005735516548157, "learning_rate": 4.525161634732354e-06, "loss": 0.2043, "step": 362, "step_loss": 0.20447780191898346 }, { "epoch": 1.07, "grad_norm": 1.240603468633649, "kl": 0.40291449427604675, "learning_rate": 4.52259620006099e-06, "loss": 0.2211, "step": 363, "step_loss": 0.19387193024158478 }, { "epoch": 1.07, "grad_norm": 1.1166355530237915, "kl": 0.409071147441864, "learning_rate": 4.520024676052342e-06, "loss": 0.2141, "step": 364, "step_loss": 0.2120700180530548 }, { "epoch": 1.07, "grad_norm": 1.1835634208796884, "kl": 0.42897555232048035, "learning_rate": 4.517447071540277e-06, "loss": 0.2129, "step": 365, "step_loss": 0.24478043615818024 }, { "epoch": 1.08, "grad_norm": 1.10221612084664, "kl": 0.4204740822315216, "learning_rate": 4.514863395379548e-06, "loss": 0.2127, "step": 366, "step_loss": 0.2228332757949829 }, { "epoch": 1.08, "grad_norm": 1.114889221761073, "kl": 0.3709118664264679, "learning_rate": 4.512273656445767e-06, "loss": 0.2216, "step": 367, "step_loss": 0.204238623380661 }, { "epoch": 1.08, "grad_norm": 1.1697263984215087, "kl": 0.36464443802833557, "learning_rate": 4.509677863635373e-06, "loss": 0.2152, "step": 368, "step_loss": 0.21155373752117157 }, { "epoch": 1.08, "grad_norm": 1.1455769887916711, "kl": 0.33827337622642517, "learning_rate": 4.507076025865602e-06, "loss": 0.2025, "step": 369, "step_loss": 0.15999384224414825 }, { "epoch": 1.09, "grad_norm": 1.18888099578414, "kl": 0.38023340702056885, "learning_rate": 4.504468152074454e-06, "loss": 0.2221, "step": 370, "step_loss": 0.22181765735149384 }, { "epoch": 1.09, "grad_norm": 1.1432354202041541, "kl": 0.4160056710243225, "learning_rate": 4.501854251220667e-06, "loss": 0.2184, "step": 371, "step_loss": 0.2242666780948639 }, { "epoch": 1.09, "grad_norm": 1.05396341263562, "kl": 0.324006050825119, "learning_rate": 4.499234332283683e-06, "loss": 0.2067, "step": 372, "step_loss": 0.23381561040878296 }, { "epoch": 1.1, "grad_norm": 1.1234179605156285, "kl": 0.4134640395641327, "learning_rate": 4.496608404263617e-06, "loss": 0.2267, "step": 373, "step_loss": 0.2217472344636917 }, { "epoch": 1.1, "grad_norm": 1.0823406851201798, "kl": 0.2976873219013214, "learning_rate": 4.493976476181227e-06, "loss": 0.2146, "step": 374, "step_loss": 0.19449269771575928 }, { "epoch": 1.1, "grad_norm": 1.0386992586850838, "kl": 0.40376949310302734, "learning_rate": 4.491338557077883e-06, "loss": 0.2124, "step": 375, "step_loss": 0.2595367729663849 }, { "epoch": 1.11, "grad_norm": 1.1201199545655665, "kl": 0.46266913414001465, "learning_rate": 4.488694656015535e-06, "loss": 0.2176, "step": 376, "step_loss": 0.22357900440692902 }, { "epoch": 1.11, "grad_norm": 1.0754291235160311, "kl": 0.33384445309638977, "learning_rate": 4.486044782076683e-06, "loss": 0.2244, "step": 377, "step_loss": 0.16794537007808685 }, { "epoch": 1.11, "grad_norm": 1.0073994521995542, "kl": 0.27469688653945923, "learning_rate": 4.483388944364345e-06, "loss": 0.2203, "step": 378, "step_loss": 0.14068152010440826 }, { "epoch": 1.11, "grad_norm": 1.081773692852582, "kl": 0.4029933512210846, "learning_rate": 4.480727152002029e-06, "loss": 0.2315, "step": 379, "step_loss": 0.2484932690858841 }, { "epoch": 1.12, "grad_norm": 0.9668680107071213, "kl": 0.41677016019821167, "learning_rate": 4.478059414133695e-06, "loss": 0.2004, "step": 380, "step_loss": 0.19721892476081848 }, { "epoch": 1.12, "grad_norm": 1.001005650539627, "kl": 0.31512561440467834, "learning_rate": 4.47538573992373e-06, "loss": 0.205, "step": 381, "step_loss": 0.20380663871765137 }, { "epoch": 1.12, "grad_norm": 1.0947450062868074, "kl": 0.357501357793808, "learning_rate": 4.472706138556911e-06, "loss": 0.2164, "step": 382, "step_loss": 0.18332478404045105 }, { "epoch": 1.13, "grad_norm": 1.1706570208600346, "kl": 0.3620475232601166, "learning_rate": 4.4700206192383796e-06, "loss": 0.2252, "step": 383, "step_loss": 0.19933828711509705 }, { "epoch": 1.13, "grad_norm": 1.226064605249827, "kl": 0.2993457019329071, "learning_rate": 4.4673291911936064e-06, "loss": 0.2265, "step": 384, "step_loss": 0.17195503413677216 }, { "epoch": 1.13, "grad_norm": 1.1576130346407478, "kl": 0.35498231649398804, "learning_rate": 4.464631863668357e-06, "loss": 0.2108, "step": 385, "step_loss": 0.1912364363670349 }, { "epoch": 1.13, "grad_norm": 1.0802215632686387, "kl": 0.3443387448787689, "learning_rate": 4.461928645928667e-06, "loss": 0.2105, "step": 386, "step_loss": 0.27471888065338135 }, { "epoch": 1.14, "grad_norm": 0.9802948466940768, "kl": 0.3694034218788147, "learning_rate": 4.459219547260805e-06, "loss": 0.1983, "step": 387, "step_loss": 0.21388615667819977 }, { "epoch": 1.14, "grad_norm": 1.0063078315772995, "kl": 0.2925432026386261, "learning_rate": 4.456504576971243e-06, "loss": 0.2123, "step": 388, "step_loss": 0.20165732502937317 }, { "epoch": 1.14, "grad_norm": 1.0786559345589601, "kl": 0.29720914363861084, "learning_rate": 4.453783744386621e-06, "loss": 0.2076, "step": 389, "step_loss": 0.22997143864631653 }, { "epoch": 1.15, "grad_norm": 1.0602165185359143, "kl": 0.31571799516677856, "learning_rate": 4.451057058853721e-06, "loss": 0.2134, "step": 390, "step_loss": 0.17782820761203766 }, { "epoch": 1.15, "grad_norm": 1.0133394273161123, "kl": 0.30432724952697754, "learning_rate": 4.448324529739429e-06, "loss": 0.1898, "step": 391, "step_loss": 0.15238864719867706 }, { "epoch": 1.15, "grad_norm": 0.9804056306864453, "kl": 0.3994058072566986, "learning_rate": 4.445586166430706e-06, "loss": 0.1903, "step": 392, "step_loss": 0.2173841893672943 }, { "epoch": 1.16, "grad_norm": 1.123296175942593, "kl": 0.30720776319503784, "learning_rate": 4.442841978334556e-06, "loss": 0.2216, "step": 393, "step_loss": 0.19546273350715637 }, { "epoch": 1.16, "grad_norm": 1.0359920222691423, "kl": 0.35486575961112976, "learning_rate": 4.440091974877989e-06, "loss": 0.2129, "step": 394, "step_loss": 0.20697163045406342 }, { "epoch": 1.16, "grad_norm": 1.0849629839063242, "kl": 0.33868688344955444, "learning_rate": 4.437336165507998e-06, "loss": 0.2166, "step": 395, "step_loss": 0.21048006415367126 }, { "epoch": 1.16, "grad_norm": 1.0951854958685978, "kl": 0.4097781181335449, "learning_rate": 4.4345745596915164e-06, "loss": 0.2149, "step": 396, "step_loss": 0.22375498712062836 }, { "epoch": 1.17, "grad_norm": 1.077971005153329, "kl": 0.3999599516391754, "learning_rate": 4.431807166915393e-06, "loss": 0.1962, "step": 397, "step_loss": 0.20573902130126953 }, { "epoch": 1.17, "grad_norm": 1.1986240067763814, "kl": 0.33355939388275146, "learning_rate": 4.429033996686352e-06, "loss": 0.2158, "step": 398, "step_loss": 0.20132023096084595 }, { "epoch": 1.17, "grad_norm": 1.0260499777997851, "kl": 0.435636967420578, "learning_rate": 4.426255058530969e-06, "loss": 0.2093, "step": 399, "step_loss": 0.2543203830718994 }, { "epoch": 1.18, "grad_norm": 1.1062815074420902, "kl": 0.313290536403656, "learning_rate": 4.423470361995632e-06, "loss": 0.2169, "step": 400, "step_loss": 0.1880669891834259 }, { "epoch": 1.18, "grad_norm": 1.1099408278287306, "kl": 0.384952574968338, "learning_rate": 4.420679916646512e-06, "loss": 0.2167, "step": 401, "step_loss": 0.21090340614318848 }, { "epoch": 1.18, "grad_norm": 1.072595417050862, "kl": 0.32443416118621826, "learning_rate": 4.417883732069525e-06, "loss": 0.2069, "step": 402, "step_loss": 0.2197069674730301 }, { "epoch": 1.18, "grad_norm": 1.1520310317038895, "kl": 0.4313489496707916, "learning_rate": 4.4150818178703084e-06, "loss": 0.2164, "step": 403, "step_loss": 0.23380175232887268 }, { "epoch": 1.19, "grad_norm": 1.0783958233120303, "kl": 0.4069855809211731, "learning_rate": 4.412274183674177e-06, "loss": 0.2254, "step": 404, "step_loss": 0.22361119091510773 }, { "epoch": 1.19, "grad_norm": 1.063343526682731, "kl": 0.32530197501182556, "learning_rate": 4.409460839126099e-06, "loss": 0.1957, "step": 405, "step_loss": 0.20308908820152283 }, { "epoch": 1.19, "grad_norm": 1.1176352237327234, "kl": 0.44647660851478577, "learning_rate": 4.406641793890658e-06, "loss": 0.2153, "step": 406, "step_loss": 0.2192254364490509 }, { "epoch": 1.2, "grad_norm": 1.0704852612143154, "kl": 0.3989701271057129, "learning_rate": 4.40381705765202e-06, "loss": 0.2141, "step": 407, "step_loss": 0.2276991605758667 }, { "epoch": 1.2, "grad_norm": 1.1043825778150693, "kl": 0.4035341739654541, "learning_rate": 4.400986640113903e-06, "loss": 0.2179, "step": 408, "step_loss": 0.24566595256328583 }, { "epoch": 1.2, "grad_norm": 1.0492597860234878, "kl": 0.34541037678718567, "learning_rate": 4.398150550999538e-06, "loss": 0.2097, "step": 409, "step_loss": 0.22366544604301453 }, { "epoch": 1.21, "grad_norm": 1.104701648821187, "kl": 0.47003430128097534, "learning_rate": 4.395308800051645e-06, "loss": 0.2152, "step": 410, "step_loss": 0.2739095389842987 }, { "epoch": 1.21, "grad_norm": 1.1432191403701684, "kl": 0.3003239333629608, "learning_rate": 4.392461397032388e-06, "loss": 0.2209, "step": 411, "step_loss": 0.20217368006706238 }, { "epoch": 1.21, "grad_norm": 1.1006520749844693, "kl": 0.34780094027519226, "learning_rate": 4.389608351723354e-06, "loss": 0.2222, "step": 412, "step_loss": 0.18477079272270203 }, { "epoch": 1.21, "grad_norm": 1.1433499590320158, "kl": 0.29936301708221436, "learning_rate": 4.386749673925507e-06, "loss": 0.2136, "step": 413, "step_loss": 0.1923401653766632 }, { "epoch": 1.22, "grad_norm": 1.0604657445965895, "kl": 0.3893413245677948, "learning_rate": 4.383885373459162e-06, "loss": 0.2077, "step": 414, "step_loss": 0.22892388701438904 }, { "epoch": 1.22, "grad_norm": 1.1153994570005237, "kl": 0.4249955713748932, "learning_rate": 4.381015460163949e-06, "loss": 0.213, "step": 415, "step_loss": 0.22074082493782043 }, { "epoch": 1.22, "grad_norm": 1.0044730994428235, "kl": 0.42945361137390137, "learning_rate": 4.378139943898782e-06, "loss": 0.201, "step": 416, "step_loss": 0.2300502359867096 }, { "epoch": 1.23, "grad_norm": 1.1201498198141744, "kl": 0.31198564171791077, "learning_rate": 4.375258834541819e-06, "loss": 0.2335, "step": 417, "step_loss": 0.18508249521255493 }, { "epoch": 1.23, "grad_norm": 1.1207096425422391, "kl": 0.3583974540233612, "learning_rate": 4.372372141990433e-06, "loss": 0.2065, "step": 418, "step_loss": 0.1857309639453888 }, { "epoch": 1.23, "grad_norm": 1.1496661225032172, "kl": 0.3583613336086273, "learning_rate": 4.369479876161179e-06, "loss": 0.2177, "step": 419, "step_loss": 0.224288210272789 }, { "epoch": 1.23, "grad_norm": 1.0784304365856952, "kl": 0.3118525445461273, "learning_rate": 4.366582046989756e-06, "loss": 0.2067, "step": 420, "step_loss": 0.1751691997051239 }, { "epoch": 1.24, "grad_norm": 1.1886899720468163, "kl": 0.4487501382827759, "learning_rate": 4.363678664430972e-06, "loss": 0.2237, "step": 421, "step_loss": 0.25023704767227173 }, { "epoch": 1.24, "grad_norm": 1.1745967665142434, "kl": 0.33707478642463684, "learning_rate": 4.360769738458717e-06, "loss": 0.2135, "step": 422, "step_loss": 0.18086591362953186 }, { "epoch": 1.24, "grad_norm": 1.092498622016125, "kl": 0.3724205791950226, "learning_rate": 4.3578552790659215e-06, "loss": 0.2313, "step": 423, "step_loss": 0.22714251279830933 }, { "epoch": 1.25, "grad_norm": 1.1190105848672534, "kl": 0.2946886420249939, "learning_rate": 4.3549352962645256e-06, "loss": 0.2036, "step": 424, "step_loss": 0.20033493638038635 }, { "epoch": 1.25, "grad_norm": 1.1415511904160704, "kl": 0.2983555495738983, "learning_rate": 4.352009800085442e-06, "loss": 0.2136, "step": 425, "step_loss": 0.16325783729553223 }, { "epoch": 1.25, "grad_norm": 1.12564799402316, "kl": 0.40603286027908325, "learning_rate": 4.349078800578527e-06, "loss": 0.2225, "step": 426, "step_loss": 0.24971996247768402 }, { "epoch": 1.26, "grad_norm": 1.0847789979247071, "kl": 0.2904649078845978, "learning_rate": 4.346142307812537e-06, "loss": 0.2134, "step": 427, "step_loss": 0.21226723492145538 }, { "epoch": 1.26, "grad_norm": 1.1535493008057434, "kl": 0.4135288596153259, "learning_rate": 4.3432003318751034e-06, "loss": 0.218, "step": 428, "step_loss": 0.24029088020324707 }, { "epoch": 1.26, "grad_norm": 1.1822688678890323, "kl": 0.3823299705982208, "learning_rate": 4.340252882872693e-06, "loss": 0.2093, "step": 429, "step_loss": 0.23636040091514587 }, { "epoch": 1.26, "grad_norm": 1.218665217849662, "kl": 0.34758618474006653, "learning_rate": 4.337299970930573e-06, "loss": 0.2031, "step": 430, "step_loss": 0.1947343796491623 }, { "epoch": 1.27, "grad_norm": 1.12952236007926, "kl": 0.3034479022026062, "learning_rate": 4.3343416061927784e-06, "loss": 0.2125, "step": 431, "step_loss": 0.21196305751800537 }, { "epoch": 1.27, "grad_norm": 1.1167424522607692, "kl": 0.3147704005241394, "learning_rate": 4.331377798822074e-06, "loss": 0.2131, "step": 432, "step_loss": 0.22102078795433044 }, { "epoch": 1.27, "grad_norm": 1.0776199042214483, "kl": 0.38971424102783203, "learning_rate": 4.328408558999926e-06, "loss": 0.2129, "step": 433, "step_loss": 0.2175690084695816 }, { "epoch": 1.28, "grad_norm": 1.247807487250352, "kl": 0.37261104583740234, "learning_rate": 4.325433896926455e-06, "loss": 0.233, "step": 434, "step_loss": 0.17759710550308228 }, { "epoch": 1.28, "grad_norm": 1.0828914253017792, "kl": 0.4615696668624878, "learning_rate": 4.322453822820416e-06, "loss": 0.2068, "step": 435, "step_loss": 0.2164827585220337 }, { "epoch": 1.28, "grad_norm": 1.1538866091782574, "kl": 0.34330347180366516, "learning_rate": 4.319468346919151e-06, "loss": 0.2141, "step": 436, "step_loss": 0.17444977164268494 }, { "epoch": 1.28, "grad_norm": 1.0771764576951577, "kl": 0.3454141616821289, "learning_rate": 4.316477479478562e-06, "loss": 0.2071, "step": 437, "step_loss": 0.1811198741197586 }, { "epoch": 1.29, "grad_norm": 1.0306242648319286, "kl": 0.33969008922576904, "learning_rate": 4.3134812307730685e-06, "loss": 0.2226, "step": 438, "step_loss": 0.20342102646827698 }, { "epoch": 1.29, "grad_norm": 1.204874661424694, "kl": 0.35218775272369385, "learning_rate": 4.310479611095579e-06, "loss": 0.2297, "step": 439, "step_loss": 0.2256166785955429 }, { "epoch": 1.29, "grad_norm": 1.1018103259900038, "kl": 0.3603362441062927, "learning_rate": 4.3074726307574515e-06, "loss": 0.2154, "step": 440, "step_loss": 0.1923496127128601 }, { "epoch": 1.3, "grad_norm": 1.1380166709119617, "kl": 0.3439081907272339, "learning_rate": 4.304460300088461e-06, "loss": 0.2132, "step": 441, "step_loss": 0.1734137088060379 }, { "epoch": 1.3, "grad_norm": 1.2016707710129002, "kl": 0.5097121000289917, "learning_rate": 4.3014426294367585e-06, "loss": 0.2059, "step": 442, "step_loss": 0.24441494047641754 }, { "epoch": 1.3, "grad_norm": 1.095054101053829, "kl": 0.37071746587753296, "learning_rate": 4.298419629168844e-06, "loss": 0.2075, "step": 443, "step_loss": 0.22421976923942566 }, { "epoch": 1.31, "grad_norm": 1.1424892814851308, "kl": 0.3091402053833008, "learning_rate": 4.295391309669523e-06, "loss": 0.2133, "step": 444, "step_loss": 0.19774490594863892 }, { "epoch": 1.31, "grad_norm": 1.0489332618798233, "kl": 0.3951050341129303, "learning_rate": 4.292357681341875e-06, "loss": 0.2173, "step": 445, "step_loss": 0.23351700603961945 }, { "epoch": 1.31, "grad_norm": 1.1201440862803222, "kl": 0.31920719146728516, "learning_rate": 4.289318754607216e-06, "loss": 0.2119, "step": 446, "step_loss": 0.19813159108161926 }, { "epoch": 1.31, "grad_norm": 1.109579067534783, "kl": 0.36979711055755615, "learning_rate": 4.286274539905064e-06, "loss": 0.2214, "step": 447, "step_loss": 0.2170843780040741 }, { "epoch": 1.32, "grad_norm": 1.1248189065136833, "kl": 0.4319002628326416, "learning_rate": 4.283225047693102e-06, "loss": 0.2216, "step": 448, "step_loss": 0.23914441466331482 }, { "epoch": 1.32, "grad_norm": 1.0143097210255512, "kl": 0.44951027631759644, "learning_rate": 4.280170288447145e-06, "loss": 0.19, "step": 449, "step_loss": 0.23710179328918457 }, { "epoch": 1.32, "grad_norm": 1.1324153993387862, "kl": 0.38769853115081787, "learning_rate": 4.277110272661098e-06, "loss": 0.2155, "step": 450, "step_loss": 0.201776921749115 }, { "epoch": 1.33, "grad_norm": 1.1440937390786563, "kl": 0.4149802625179291, "learning_rate": 4.2740450108469276e-06, "loss": 0.2167, "step": 451, "step_loss": 0.22473248839378357 }, { "epoch": 1.33, "grad_norm": 1.1169649315923986, "kl": 0.4232461452484131, "learning_rate": 4.270974513534617e-06, "loss": 0.2186, "step": 452, "step_loss": 0.22173169255256653 }, { "epoch": 1.33, "grad_norm": 1.0911796681416763, "kl": 0.36605626344680786, "learning_rate": 4.26789879127214e-06, "loss": 0.2013, "step": 453, "step_loss": 0.24023717641830444 }, { "epoch": 1.33, "grad_norm": 1.1226223217769014, "kl": 0.363466739654541, "learning_rate": 4.2648178546254135e-06, "loss": 0.2128, "step": 454, "step_loss": 0.19646257162094116 }, { "epoch": 1.34, "grad_norm": 1.0952440956076734, "kl": 0.4751397669315338, "learning_rate": 4.261731714178274e-06, "loss": 0.2107, "step": 455, "step_loss": 0.27217623591423035 }, { "epoch": 1.34, "grad_norm": 1.140170398680307, "kl": 0.3529389500617981, "learning_rate": 4.25864038053243e-06, "loss": 0.2175, "step": 456, "step_loss": 0.22076928615570068 }, { "epoch": 1.34, "grad_norm": 1.0815717361792092, "kl": 0.3519299626350403, "learning_rate": 4.2555438643074315e-06, "loss": 0.2019, "step": 457, "step_loss": 0.19926907122135162 }, { "epoch": 1.35, "grad_norm": 1.1185893886506182, "kl": 0.36596280336380005, "learning_rate": 4.252442176140631e-06, "loss": 0.2036, "step": 458, "step_loss": 0.18698200583457947 }, { "epoch": 1.35, "grad_norm": 1.1963180102524402, "kl": 0.41686299443244934, "learning_rate": 4.249335326687148e-06, "loss": 0.2236, "step": 459, "step_loss": 0.23538297414779663 }, { "epoch": 1.35, "grad_norm": 1.1749480454138903, "kl": 0.3308974504470825, "learning_rate": 4.2462233266198335e-06, "loss": 0.2188, "step": 460, "step_loss": 0.19405069947242737 }, { "epoch": 1.36, "grad_norm": 1.227170089100812, "kl": 0.3617514967918396, "learning_rate": 4.243106186629233e-06, "loss": 0.2232, "step": 461, "step_loss": 0.1792970597743988 }, { "epoch": 1.36, "grad_norm": 1.0671020515492777, "kl": 0.34086763858795166, "learning_rate": 4.2399839174235445e-06, "loss": 0.2123, "step": 462, "step_loss": 0.22149503231048584 }, { "epoch": 1.36, "grad_norm": 1.0270636289046342, "kl": 0.3730754852294922, "learning_rate": 4.236856529728593e-06, "loss": 0.2081, "step": 463, "step_loss": 0.2611701488494873 }, { "epoch": 1.36, "grad_norm": 1.0352246387850932, "kl": 0.31691187620162964, "learning_rate": 4.233724034287782e-06, "loss": 0.2062, "step": 464, "step_loss": 0.18789568543434143 }, { "epoch": 1.37, "grad_norm": 1.1036670142111487, "kl": 0.3653966188430786, "learning_rate": 4.230586441862063e-06, "loss": 0.2102, "step": 465, "step_loss": 0.25932013988494873 }, { "epoch": 1.37, "grad_norm": 1.141576174412114, "kl": 0.3486475348472595, "learning_rate": 4.227443763229895e-06, "loss": 0.2143, "step": 466, "step_loss": 0.22772841155529022 }, { "epoch": 1.37, "grad_norm": 1.08652115649653, "kl": 0.38706696033477783, "learning_rate": 4.224296009187212e-06, "loss": 0.2092, "step": 467, "step_loss": 0.20331993699073792 }, { "epoch": 1.38, "grad_norm": 1.1132102876767822, "kl": 0.4534846544265747, "learning_rate": 4.221143190547384e-06, "loss": 0.2069, "step": 468, "step_loss": 0.19342438876628876 }, { "epoch": 1.38, "grad_norm": 1.2487774294245415, "kl": 0.34831732511520386, "learning_rate": 4.217985318141177e-06, "loss": 0.23, "step": 469, "step_loss": 0.23435795307159424 }, { "epoch": 1.38, "grad_norm": 1.1596986401165996, "kl": 0.3419073224067688, "learning_rate": 4.214822402816718e-06, "loss": 0.2087, "step": 470, "step_loss": 0.17857272922992706 }, { "epoch": 1.38, "grad_norm": 1.0228762507304563, "kl": 0.3407072126865387, "learning_rate": 4.21165445543946e-06, "loss": 0.2022, "step": 471, "step_loss": 0.18100890517234802 }, { "epoch": 1.39, "grad_norm": 1.1834791650481271, "kl": 0.3761002719402313, "learning_rate": 4.20848148689214e-06, "loss": 0.2114, "step": 472, "step_loss": 0.20595240592956543 }, { "epoch": 1.39, "grad_norm": 1.0537720415925667, "kl": 0.32567542791366577, "learning_rate": 4.205303508074745e-06, "loss": 0.2135, "step": 473, "step_loss": 0.23016318678855896 }, { "epoch": 1.39, "grad_norm": 1.08626518519842, "kl": 0.39517271518707275, "learning_rate": 4.202120529904474e-06, "loss": 0.2076, "step": 474, "step_loss": 0.22103264927864075 }, { "epoch": 1.4, "grad_norm": 1.0419674113366015, "kl": 0.34639662504196167, "learning_rate": 4.1989325633157e-06, "loss": 0.2224, "step": 475, "step_loss": 0.2245238721370697 }, { "epoch": 1.4, "grad_norm": 1.0933257642610934, "kl": 0.3724210858345032, "learning_rate": 4.195739619259933e-06, "loss": 0.2081, "step": 476, "step_loss": 0.2216363102197647 }, { "epoch": 1.4, "grad_norm": 1.206514414530843, "kl": 0.33405083417892456, "learning_rate": 4.19254170870578e-06, "loss": 0.2272, "step": 477, "step_loss": 0.19681967794895172 }, { "epoch": 1.41, "grad_norm": 1.127545668719584, "kl": 0.36871930956840515, "learning_rate": 4.18933884263891e-06, "loss": 0.2207, "step": 478, "step_loss": 0.18126648664474487 }, { "epoch": 1.41, "grad_norm": 1.1433635783272997, "kl": 0.3497537076473236, "learning_rate": 4.186131032062018e-06, "loss": 0.2206, "step": 479, "step_loss": 0.20069673657417297 }, { "epoch": 1.41, "grad_norm": 1.0952740745497684, "kl": 0.39063096046447754, "learning_rate": 4.182918287994781e-06, "loss": 0.2209, "step": 480, "step_loss": 0.18552149832248688 }, { "epoch": 1.41, "grad_norm": 1.098642354287991, "kl": 0.33700186014175415, "learning_rate": 4.1797006214738264e-06, "loss": 0.2026, "step": 481, "step_loss": 0.18201524019241333 }, { "epoch": 1.42, "grad_norm": 1.045712202429604, "kl": 0.35738605260849, "learning_rate": 4.17647804355269e-06, "loss": 0.2026, "step": 482, "step_loss": 0.18481150269508362 }, { "epoch": 1.42, "grad_norm": 1.1438459095582905, "kl": 0.3997901678085327, "learning_rate": 4.1732505653017805e-06, "loss": 0.2192, "step": 483, "step_loss": 0.23264771699905396 }, { "epoch": 1.42, "grad_norm": 1.1805883340710588, "kl": 0.342068612575531, "learning_rate": 4.17001819780834e-06, "loss": 0.2131, "step": 484, "step_loss": 0.18483933806419373 }, { "epoch": 1.43, "grad_norm": 1.1642629591622176, "kl": 0.3150475323200226, "learning_rate": 4.1667809521764065e-06, "loss": 0.2144, "step": 485, "step_loss": 0.2131662219762802 }, { "epoch": 1.43, "grad_norm": 1.0776552130456476, "kl": 0.3208051025867462, "learning_rate": 4.163538839526777e-06, "loss": 0.2029, "step": 486, "step_loss": 0.1959662288427353 }, { "epoch": 1.43, "grad_norm": 1.0456011694309113, "kl": 0.4914534091949463, "learning_rate": 4.160291870996966e-06, "loss": 0.2082, "step": 487, "step_loss": 0.21434994041919708 }, { "epoch": 1.43, "grad_norm": 1.0702724310064322, "kl": 0.39285576343536377, "learning_rate": 4.157040057741171e-06, "loss": 0.2118, "step": 488, "step_loss": 0.20233172178268433 }, { "epoch": 1.44, "grad_norm": 1.022356313983593, "kl": 0.29209986329078674, "learning_rate": 4.15378341093023e-06, "loss": 0.2109, "step": 489, "step_loss": 0.19211889803409576 }, { "epoch": 1.44, "grad_norm": 1.1060860463863098, "kl": 0.31146690249443054, "learning_rate": 4.150521941751589e-06, "loss": 0.2284, "step": 490, "step_loss": 0.22323687374591827 }, { "epoch": 1.44, "grad_norm": 1.086327297590339, "kl": 0.3390502333641052, "learning_rate": 4.147255661409255e-06, "loss": 0.2112, "step": 491, "step_loss": 0.19411392509937286 }, { "epoch": 1.45, "grad_norm": 1.0776562984953817, "kl": 0.3938494026660919, "learning_rate": 4.14398458112377e-06, "loss": 0.2128, "step": 492, "step_loss": 0.22699597477912903 }, { "epoch": 1.45, "grad_norm": 1.0858023168277036, "kl": 0.3118027448654175, "learning_rate": 4.140708712132157e-06, "loss": 0.2156, "step": 493, "step_loss": 0.21079112589359283 }, { "epoch": 1.45, "grad_norm": 1.1315907434090295, "kl": 0.37194758653640747, "learning_rate": 4.137428065687896e-06, "loss": 0.2121, "step": 494, "step_loss": 0.2287684679031372 }, { "epoch": 1.46, "grad_norm": 1.1331173453727341, "kl": 0.3639126121997833, "learning_rate": 4.134142653060875e-06, "loss": 0.2186, "step": 495, "step_loss": 0.2055366486310959 }, { "epoch": 1.46, "grad_norm": 1.1630295644530058, "kl": 0.4070471525192261, "learning_rate": 4.130852485537357e-06, "loss": 0.2321, "step": 496, "step_loss": 0.2210061103105545 }, { "epoch": 1.46, "grad_norm": 1.1594682830751208, "kl": 0.40022650361061096, "learning_rate": 4.127557574419938e-06, "loss": 0.2245, "step": 497, "step_loss": 0.2172410786151886 }, { "epoch": 1.46, "grad_norm": 1.0825009135777626, "kl": 0.3248887062072754, "learning_rate": 4.12425793102751e-06, "loss": 0.2112, "step": 498, "step_loss": 0.18749205768108368 }, { "epoch": 1.47, "grad_norm": 1.159823925574004, "kl": 0.4212324321269989, "learning_rate": 4.120953566695222e-06, "loss": 0.2334, "step": 499, "step_loss": 0.2644669711589813 }, { "epoch": 1.47, "grad_norm": 1.0191841777064992, "kl": 0.39752498269081116, "learning_rate": 4.117644492774441e-06, "loss": 0.2073, "step": 500, "step_loss": 0.1976650357246399 }, { "epoch": 1.47, "grad_norm": 1.1191918326441155, "kl": 0.3732473850250244, "learning_rate": 4.11433072063271e-06, "loss": 0.2132, "step": 501, "step_loss": 0.18028053641319275 }, { "epoch": 1.48, "grad_norm": 1.1922599610472286, "kl": 0.3869403004646301, "learning_rate": 4.111012261653716e-06, "loss": 0.2293, "step": 502, "step_loss": 0.2117547243833542 }, { "epoch": 1.48, "grad_norm": 1.1390828459169302, "kl": 0.3269495368003845, "learning_rate": 4.1076891272372435e-06, "loss": 0.2253, "step": 503, "step_loss": 0.20671634376049042 }, { "epoch": 1.48, "grad_norm": 1.1027235919075196, "kl": 0.3744144141674042, "learning_rate": 4.104361328799139e-06, "loss": 0.2237, "step": 504, "step_loss": 0.20059865713119507 }, { "epoch": 1.48, "grad_norm": 1.0613715757167175, "kl": 0.3339211940765381, "learning_rate": 4.101028877771271e-06, "loss": 0.2047, "step": 505, "step_loss": 0.17482547461986542 }, { "epoch": 1.49, "grad_norm": 1.09162612153003, "kl": 0.3029939830303192, "learning_rate": 4.09769178560149e-06, "loss": 0.2138, "step": 506, "step_loss": 0.21953245997428894 }, { "epoch": 1.49, "grad_norm": 1.2415791947427925, "kl": 0.3678430914878845, "learning_rate": 4.094350063753594e-06, "loss": 0.2275, "step": 507, "step_loss": 0.20582033693790436 }, { "epoch": 1.49, "grad_norm": 1.1048759043495957, "kl": 0.35326552391052246, "learning_rate": 4.0910037237072805e-06, "loss": 0.215, "step": 508, "step_loss": 0.20594316720962524 }, { "epoch": 1.5, "grad_norm": 1.0129951540806574, "kl": 0.3663599491119385, "learning_rate": 4.087652776958113e-06, "loss": 0.2142, "step": 509, "step_loss": 0.20410504937171936 }, { "epoch": 1.5, "grad_norm": 1.046767706141149, "kl": 0.2914074659347534, "learning_rate": 4.084297235017482e-06, "loss": 0.2192, "step": 510, "step_loss": 0.1773955076932907 }, { "epoch": 1.5, "grad_norm": 1.0601343367693614, "kl": 0.32609421014785767, "learning_rate": 4.0809371094125635e-06, "loss": 0.2082, "step": 511, "step_loss": 0.1615651398897171 }, { "epoch": 1.51, "grad_norm": 1.1354172849507647, "kl": 0.3583594858646393, "learning_rate": 4.077572411686277e-06, "loss": 0.2119, "step": 512, "step_loss": 0.21853014826774597 }, { "epoch": 1.51, "grad_norm": 1.1763065280060416, "kl": 0.34837618470191956, "learning_rate": 4.07420315339725e-06, "loss": 0.2182, "step": 513, "step_loss": 0.23535805940628052 }, { "epoch": 1.51, "grad_norm": 1.1126977629565107, "kl": 0.43193358182907104, "learning_rate": 4.070829346119778e-06, "loss": 0.2082, "step": 514, "step_loss": 0.2645440101623535 }, { "epoch": 1.51, "grad_norm": 1.0882216657003507, "kl": 0.29184776544570923, "learning_rate": 4.06745100144378e-06, "loss": 0.21, "step": 515, "step_loss": 0.18505370616912842 }, { "epoch": 1.52, "grad_norm": 1.0968084081449223, "kl": 0.32751548290252686, "learning_rate": 4.064068130974767e-06, "loss": 0.2079, "step": 516, "step_loss": 0.20232903957366943 }, { "epoch": 1.52, "grad_norm": 1.099644580772103, "kl": 0.37286919355392456, "learning_rate": 4.060680746333793e-06, "loss": 0.2085, "step": 517, "step_loss": 0.23085977137088776 }, { "epoch": 1.52, "grad_norm": 1.1121872730260256, "kl": 0.25646811723709106, "learning_rate": 4.057288859157423e-06, "loss": 0.2174, "step": 518, "step_loss": 0.16534452140331268 }, { "epoch": 1.53, "grad_norm": 1.081954078814375, "kl": 0.332968145608902, "learning_rate": 4.053892481097686e-06, "loss": 0.2119, "step": 519, "step_loss": 0.22676271200180054 }, { "epoch": 1.53, "grad_norm": 1.0690195878534157, "kl": 0.38490551710128784, "learning_rate": 4.050491623822041e-06, "loss": 0.2086, "step": 520, "step_loss": 0.2052135318517685 }, { "epoch": 1.53, "grad_norm": 1.135011971238312, "kl": 0.44730523228645325, "learning_rate": 4.047086299013332e-06, "loss": 0.21, "step": 521, "step_loss": 0.22579625248908997 }, { "epoch": 1.53, "grad_norm": 1.1803940053183595, "kl": 0.3478749096393585, "learning_rate": 4.0436765183697516e-06, "loss": 0.2289, "step": 522, "step_loss": 0.20054206252098083 }, { "epoch": 1.54, "grad_norm": 1.06138099620151, "kl": 0.35531559586524963, "learning_rate": 4.040262293604799e-06, "loss": 0.2117, "step": 523, "step_loss": 0.23341700434684753 }, { "epoch": 1.54, "grad_norm": 1.131403790342062, "kl": 0.36995428800582886, "learning_rate": 4.036843636447242e-06, "loss": 0.2148, "step": 524, "step_loss": 0.23521539568901062 }, { "epoch": 1.54, "grad_norm": 1.0809651297230498, "kl": 0.33512529730796814, "learning_rate": 4.0334205586410706e-06, "loss": 0.2239, "step": 525, "step_loss": 0.23108862340450287 }, { "epoch": 1.55, "grad_norm": 0.9644089850533444, "kl": 0.3100147843360901, "learning_rate": 4.029993071945465e-06, "loss": 0.2021, "step": 526, "step_loss": 0.16294045746326447 }, { "epoch": 1.55, "grad_norm": 1.14530814951946, "kl": 0.4870828688144684, "learning_rate": 4.026561188134749e-06, "loss": 0.2214, "step": 527, "step_loss": 0.23596243560314178 }, { "epoch": 1.55, "grad_norm": 1.1472820386014453, "kl": 0.4136597216129303, "learning_rate": 4.023124918998353e-06, "loss": 0.2209, "step": 528, "step_loss": 0.2301231026649475 }, { "epoch": 1.56, "grad_norm": 1.123885543146704, "kl": 0.3524470925331116, "learning_rate": 4.01968427634077e-06, "loss": 0.2253, "step": 529, "step_loss": 0.21960632503032684 }, { "epoch": 1.56, "grad_norm": 0.9967913335410991, "kl": 0.28382453322410583, "learning_rate": 4.016239271981519e-06, "loss": 0.1989, "step": 530, "step_loss": 0.1486484706401825 }, { "epoch": 1.56, "grad_norm": 1.0902910989337662, "kl": 0.38825753331184387, "learning_rate": 4.012789917755102e-06, "loss": 0.2212, "step": 531, "step_loss": 0.19914306700229645 }, { "epoch": 1.56, "grad_norm": 1.0173576760514342, "kl": 0.3644031584262848, "learning_rate": 4.0093362255109645e-06, "loss": 0.2037, "step": 532, "step_loss": 0.2210751622915268 }, { "epoch": 1.57, "grad_norm": 1.1991664117084275, "kl": 0.38465866446495056, "learning_rate": 4.0058782071134544e-06, "loss": 0.2196, "step": 533, "step_loss": 0.20659992098808289 }, { "epoch": 1.57, "grad_norm": 1.1662242070255937, "kl": 0.4157981276512146, "learning_rate": 4.002415874441778e-06, "loss": 0.2163, "step": 534, "step_loss": 0.23980651795864105 }, { "epoch": 1.57, "grad_norm": 1.0904814655761281, "kl": 0.36056768894195557, "learning_rate": 3.998949239389968e-06, "loss": 0.2117, "step": 535, "step_loss": 0.21642203629016876 }, { "epoch": 1.58, "grad_norm": 1.0852927276607216, "kl": 0.3154537081718445, "learning_rate": 3.995478313866832e-06, "loss": 0.2147, "step": 536, "step_loss": 0.2088639885187149 }, { "epoch": 1.58, "grad_norm": 1.0681140345975404, "kl": 0.423949658870697, "learning_rate": 3.992003109795918e-06, "loss": 0.231, "step": 537, "step_loss": 0.21801121532917023 }, { "epoch": 1.58, "grad_norm": 1.076997180877474, "kl": 0.39057889580726624, "learning_rate": 3.9885236391154725e-06, "loss": 0.2214, "step": 538, "step_loss": 0.22681710124015808 }, { "epoch": 1.58, "grad_norm": 1.098273331215662, "kl": 0.4600808620452881, "learning_rate": 3.985039913778398e-06, "loss": 0.2204, "step": 539, "step_loss": 0.27202731370925903 }, { "epoch": 1.59, "grad_norm": 1.1677412111257455, "kl": 0.38847512006759644, "learning_rate": 3.981551945752214e-06, "loss": 0.2294, "step": 540, "step_loss": 0.22447596490383148 }, { "epoch": 1.59, "grad_norm": 1.014321414944766, "kl": 0.37826013565063477, "learning_rate": 3.978059747019014e-06, "loss": 0.2029, "step": 541, "step_loss": 0.24410393834114075 }, { "epoch": 1.59, "grad_norm": 1.0948405619058077, "kl": 0.3952583074569702, "learning_rate": 3.974563329575426e-06, "loss": 0.2234, "step": 542, "step_loss": 0.23489505052566528 }, { "epoch": 1.6, "grad_norm": 1.1174386949172748, "kl": 0.38856184482574463, "learning_rate": 3.971062705432569e-06, "loss": 0.2169, "step": 543, "step_loss": 0.23416107892990112 }, { "epoch": 1.6, "grad_norm": 1.181312376726803, "kl": 0.42837756872177124, "learning_rate": 3.967557886616014e-06, "loss": 0.2253, "step": 544, "step_loss": 0.24485422670841217 }, { "epoch": 1.6, "grad_norm": 1.0976730155343217, "kl": 0.3416699767112732, "learning_rate": 3.964048885165741e-06, "loss": 0.2289, "step": 545, "step_loss": 0.2253103256225586 }, { "epoch": 1.61, "grad_norm": 1.1975345651778746, "kl": 0.3401637673377991, "learning_rate": 3.9605357131360994e-06, "loss": 0.2249, "step": 546, "step_loss": 0.24569852650165558 }, { "epoch": 1.61, "grad_norm": 1.0984507770197158, "kl": 0.33863207697868347, "learning_rate": 3.957018382595765e-06, "loss": 0.2156, "step": 547, "step_loss": 0.24580451846122742 }, { "epoch": 1.61, "grad_norm": 1.1716298673466186, "kl": 0.33466053009033203, "learning_rate": 3.953496905627702e-06, "loss": 0.2147, "step": 548, "step_loss": 0.18085306882858276 }, { "epoch": 1.61, "grad_norm": 1.1734884852928138, "kl": 0.34325727820396423, "learning_rate": 3.949971294329112e-06, "loss": 0.2378, "step": 549, "step_loss": 0.23332083225250244 }, { "epoch": 1.62, "grad_norm": 1.136385000201455, "kl": 0.36762735247612, "learning_rate": 3.946441560811407e-06, "loss": 0.2235, "step": 550, "step_loss": 0.22139766812324524 }, { "epoch": 1.62, "grad_norm": 1.0264760838893863, "kl": 0.3464069366455078, "learning_rate": 3.942907717200154e-06, "loss": 0.216, "step": 551, "step_loss": 0.19486932456493378 }, { "epoch": 1.62, "grad_norm": 1.058251659960203, "kl": 0.352877140045166, "learning_rate": 3.939369775635042e-06, "loss": 0.2112, "step": 552, "step_loss": 0.2148822396993637 }, { "epoch": 1.63, "grad_norm": 1.072714998063151, "kl": 0.33858194947242737, "learning_rate": 3.935827748269837e-06, "loss": 0.2112, "step": 553, "step_loss": 0.20188114047050476 }, { "epoch": 1.63, "grad_norm": 1.0835941815255097, "kl": 0.3523325026035309, "learning_rate": 3.932281647272341e-06, "loss": 0.2125, "step": 554, "step_loss": 0.19613249599933624 }, { "epoch": 1.63, "grad_norm": 1.1022095639794895, "kl": 0.41155239939689636, "learning_rate": 3.9287314848243516e-06, "loss": 0.2171, "step": 555, "step_loss": 0.2243964970111847 }, { "epoch": 1.63, "grad_norm": 1.0886715974181185, "kl": 0.32835161685943604, "learning_rate": 3.925177273121613e-06, "loss": 0.2093, "step": 556, "step_loss": 0.19310572743415833 }, { "epoch": 1.64, "grad_norm": 1.0796872927034789, "kl": 0.3089352548122406, "learning_rate": 3.921619024373787e-06, "loss": 0.2044, "step": 557, "step_loss": 0.19184894859790802 }, { "epoch": 1.64, "grad_norm": 1.0718596059733634, "kl": 0.35348087549209595, "learning_rate": 3.918056750804397e-06, "loss": 0.2102, "step": 558, "step_loss": 0.1941283494234085 }, { "epoch": 1.64, "grad_norm": 1.146403256199632, "kl": 0.339926540851593, "learning_rate": 3.914490464650798e-06, "loss": 0.214, "step": 559, "step_loss": 0.19512970745563507 }, { "epoch": 1.65, "grad_norm": 1.104285223312089, "kl": 0.42092275619506836, "learning_rate": 3.910920178164127e-06, "loss": 0.2238, "step": 560, "step_loss": 0.2336612045764923 }, { "epoch": 1.65, "grad_norm": 1.0272793405162493, "kl": 0.3618415892124176, "learning_rate": 3.907345903609264e-06, "loss": 0.2193, "step": 561, "step_loss": 0.2407916635274887 }, { "epoch": 1.65, "grad_norm": 1.105176543312791, "kl": 0.3805115818977356, "learning_rate": 3.903767653264787e-06, "loss": 0.2085, "step": 562, "step_loss": 0.24206788837909698 }, { "epoch": 1.66, "grad_norm": 1.0939508934811706, "kl": 0.36903828382492065, "learning_rate": 3.900185439422934e-06, "loss": 0.2092, "step": 563, "step_loss": 0.16102567315101624 }, { "epoch": 1.66, "grad_norm": 1.0506794536305846, "kl": 0.3899340033531189, "learning_rate": 3.896599274389558e-06, "loss": 0.208, "step": 564, "step_loss": 0.22886879742145538 }, { "epoch": 1.66, "grad_norm": 1.0301032554040463, "kl": 0.348848819732666, "learning_rate": 3.893009170484086e-06, "loss": 0.2182, "step": 565, "step_loss": 0.23902392387390137 }, { "epoch": 1.66, "grad_norm": 1.0779620821495426, "kl": 0.34697240591049194, "learning_rate": 3.889415140039473e-06, "loss": 0.2148, "step": 566, "step_loss": 0.1859664022922516 }, { "epoch": 1.67, "grad_norm": 1.018795902879667, "kl": 0.38165339827537537, "learning_rate": 3.8858171954021695e-06, "loss": 0.2135, "step": 567, "step_loss": 0.2154882550239563 }, { "epoch": 1.67, "grad_norm": 1.0495585663185147, "kl": 0.389653742313385, "learning_rate": 3.882215348932065e-06, "loss": 0.2083, "step": 568, "step_loss": 0.24652042984962463 }, { "epoch": 1.67, "grad_norm": 1.1620507618203504, "kl": 0.28063511848449707, "learning_rate": 3.878609613002456e-06, "loss": 0.2309, "step": 569, "step_loss": 0.21258799731731415 }, { "epoch": 1.68, "grad_norm": 1.0370207351765348, "kl": 0.478799045085907, "learning_rate": 3.875e-06, "loss": 0.2206, "step": 570, "step_loss": 0.2444104254245758 }, { "epoch": 1.68, "grad_norm": 1.1178474477754934, "kl": 0.33863985538482666, "learning_rate": 3.8713865223246744e-06, "loss": 0.2015, "step": 571, "step_loss": 0.18036487698554993 }, { "epoch": 1.68, "grad_norm": 1.1609234312290915, "kl": 0.3761056661605835, "learning_rate": 3.867769192389731e-06, "loss": 0.2247, "step": 572, "step_loss": 0.22269845008850098 }, { "epoch": 1.68, "grad_norm": 1.032185592487916, "kl": 0.3428119719028473, "learning_rate": 3.864148022621657e-06, "loss": 0.2071, "step": 573, "step_loss": 0.18176212906837463 }, { "epoch": 1.69, "grad_norm": 1.0666730498083177, "kl": 0.389988511800766, "learning_rate": 3.8605230254601275e-06, "loss": 0.2223, "step": 574, "step_loss": 0.2073187381029129 }, { "epoch": 1.69, "grad_norm": 1.0611933920672172, "kl": 0.4241553246974945, "learning_rate": 3.856894213357969e-06, "loss": 0.2166, "step": 575, "step_loss": 0.21341772377490997 }, { "epoch": 1.69, "grad_norm": 1.0463821902335075, "kl": 0.3435147702693939, "learning_rate": 3.853261598781112e-06, "loss": 0.209, "step": 576, "step_loss": 0.2048221081495285 }, { "epoch": 1.7, "grad_norm": 1.1170695382643887, "kl": 0.3356623649597168, "learning_rate": 3.849625194208548e-06, "loss": 0.2221, "step": 577, "step_loss": 0.19540490210056305 }, { "epoch": 1.7, "grad_norm": 0.9978130835660762, "kl": 0.3102739751338959, "learning_rate": 3.845985012132291e-06, "loss": 0.2093, "step": 578, "step_loss": 0.1982828974723816 }, { "epoch": 1.7, "grad_norm": 1.1198188896714405, "kl": 0.36010634899139404, "learning_rate": 3.842341065057329e-06, "loss": 0.2115, "step": 579, "step_loss": 0.19751714169979095 }, { "epoch": 1.71, "grad_norm": 1.1041808967153213, "kl": 0.40271279215812683, "learning_rate": 3.8386933655015855e-06, "loss": 0.2121, "step": 580, "step_loss": 0.20048068463802338 }, { "epoch": 1.71, "grad_norm": 1.0991371658621079, "kl": 0.3609469532966614, "learning_rate": 3.8350419259958745e-06, "loss": 0.2153, "step": 581, "step_loss": 0.2352994829416275 }, { "epoch": 1.71, "grad_norm": 1.0699438170139548, "kl": 0.3842519223690033, "learning_rate": 3.831386759083857e-06, "loss": 0.2134, "step": 582, "step_loss": 0.22360186278820038 }, { "epoch": 1.71, "grad_norm": 1.0996292681613908, "kl": 0.3337691128253937, "learning_rate": 3.827727877322001e-06, "loss": 0.2195, "step": 583, "step_loss": 0.1927204579114914 }, { "epoch": 1.72, "grad_norm": 1.184402994917098, "kl": 0.33971020579338074, "learning_rate": 3.824065293279532e-06, "loss": 0.223, "step": 584, "step_loss": 0.18949981033802032 }, { "epoch": 1.72, "grad_norm": 1.0618979276884035, "kl": 0.44578462839126587, "learning_rate": 3.820399019538397e-06, "loss": 0.2167, "step": 585, "step_loss": 0.2560417354106903 }, { "epoch": 1.72, "grad_norm": 1.0548994769662545, "kl": 0.30985498428344727, "learning_rate": 3.816729068693215e-06, "loss": 0.2138, "step": 586, "step_loss": 0.17075133323669434 }, { "epoch": 1.73, "grad_norm": 1.1046250699328048, "kl": 0.40717557072639465, "learning_rate": 3.813055453351242e-06, "loss": 0.2094, "step": 587, "step_loss": 0.2145349383354187 }, { "epoch": 1.73, "grad_norm": 1.0398697406320772, "kl": 0.39057114720344543, "learning_rate": 3.809378186132318e-06, "loss": 0.2124, "step": 588, "step_loss": 0.22147606313228607 }, { "epoch": 1.73, "grad_norm": 1.0908270949218593, "kl": 0.37297523021698, "learning_rate": 3.805697279668829e-06, "loss": 0.2194, "step": 589, "step_loss": 0.25735077261924744 }, { "epoch": 1.73, "grad_norm": 1.0414554175112045, "kl": 0.3597789704799652, "learning_rate": 3.8020127466056638e-06, "loss": 0.2035, "step": 590, "step_loss": 0.19476523995399475 }, { "epoch": 1.74, "grad_norm": 1.042370270883592, "kl": 0.3284936547279358, "learning_rate": 3.7983245996001695e-06, "loss": 0.2099, "step": 591, "step_loss": 0.17381024360656738 }, { "epoch": 1.74, "grad_norm": 1.1212450649744, "kl": 0.29087406396865845, "learning_rate": 3.7946328513221058e-06, "loss": 0.225, "step": 592, "step_loss": 0.2309185266494751 }, { "epoch": 1.74, "grad_norm": 1.1047849123898952, "kl": 0.3819228410720825, "learning_rate": 3.7909375144536077e-06, "loss": 0.2209, "step": 593, "step_loss": 0.2588649094104767 }, { "epoch": 1.75, "grad_norm": 1.0515086411036276, "kl": 0.3881183862686157, "learning_rate": 3.7872386016891342e-06, "loss": 0.2054, "step": 594, "step_loss": 0.1839597374200821 }, { "epoch": 1.75, "grad_norm": 1.0871033893493067, "kl": 0.38226088881492615, "learning_rate": 3.783536125735431e-06, "loss": 0.2089, "step": 595, "step_loss": 0.20572155714035034 }, { "epoch": 1.75, "grad_norm": 1.1196016623588247, "kl": 0.34919315576553345, "learning_rate": 3.7798300993114835e-06, "loss": 0.213, "step": 596, "step_loss": 0.19187521934509277 }, { "epoch": 1.76, "grad_norm": 1.039644651891598, "kl": 0.2882639169692993, "learning_rate": 3.7761205351484732e-06, "loss": 0.212, "step": 597, "step_loss": 0.1760086715221405 }, { "epoch": 1.76, "grad_norm": 1.1245600714414823, "kl": 0.37673628330230713, "learning_rate": 3.7724074459897346e-06, "loss": 0.22, "step": 598, "step_loss": 0.1975017637014389 }, { "epoch": 1.76, "grad_norm": 1.1311382944584354, "kl": 0.38365456461906433, "learning_rate": 3.7686908445907126e-06, "loss": 0.219, "step": 599, "step_loss": 0.21963948011398315 }, { "epoch": 1.76, "grad_norm": 1.078085850267425, "kl": 0.382407009601593, "learning_rate": 3.7649707437189178e-06, "loss": 0.2195, "step": 600, "step_loss": 0.23141171038150787 }, { "epoch": 1.77, "grad_norm": 1.0427847639241463, "kl": 0.37707236409187317, "learning_rate": 3.761247156153881e-06, "loss": 0.2043, "step": 601, "step_loss": 0.21038171648979187 }, { "epoch": 1.77, "grad_norm": 1.0896289777889905, "kl": 0.36756861209869385, "learning_rate": 3.7575200946871104e-06, "loss": 0.2137, "step": 602, "step_loss": 0.23161782324314117 }, { "epoch": 1.77, "grad_norm": 1.0273854062297945, "kl": 0.27731993794441223, "learning_rate": 3.7537895721220513e-06, "loss": 0.2046, "step": 603, "step_loss": 0.23568624258041382 }, { "epoch": 1.78, "grad_norm": 1.0737448045607119, "kl": 0.3347272574901581, "learning_rate": 3.7500556012740343e-06, "loss": 0.2212, "step": 604, "step_loss": 0.1964089274406433 }, { "epoch": 1.78, "grad_norm": 1.0946594680520594, "kl": 0.3800688683986664, "learning_rate": 3.746318194970239e-06, "loss": 0.2176, "step": 605, "step_loss": 0.22273258864879608 }, { "epoch": 1.78, "grad_norm": 1.1129855917797444, "kl": 0.3622528314590454, "learning_rate": 3.7425773660496453e-06, "loss": 0.2186, "step": 606, "step_loss": 0.2413870245218277 }, { "epoch": 1.78, "grad_norm": 1.145527181762428, "kl": 0.3814672827720642, "learning_rate": 3.7388331273629914e-06, "loss": 0.2337, "step": 607, "step_loss": 0.23212337493896484 }, { "epoch": 1.79, "grad_norm": 1.0984629724406025, "kl": 0.2834460735321045, "learning_rate": 3.7350854917727287e-06, "loss": 0.2235, "step": 608, "step_loss": 0.17759786546230316 }, { "epoch": 1.79, "grad_norm": 1.1001711709509128, "kl": 0.3364371359348297, "learning_rate": 3.7313344721529765e-06, "loss": 0.2146, "step": 609, "step_loss": 0.22853884100914001 }, { "epoch": 1.79, "grad_norm": 1.068141085066643, "kl": 0.3433828353881836, "learning_rate": 3.727580081389481e-06, "loss": 0.2145, "step": 610, "step_loss": 0.21322953701019287 }, { "epoch": 1.8, "grad_norm": 1.0787297225275188, "kl": 0.42644378542900085, "learning_rate": 3.72382233237957e-06, "loss": 0.2158, "step": 611, "step_loss": 0.2728964388370514 }, { "epoch": 1.8, "grad_norm": 1.0596709735361247, "kl": 0.36292925477027893, "learning_rate": 3.7200612380321034e-06, "loss": 0.2096, "step": 612, "step_loss": 0.196391299366951 }, { "epoch": 1.8, "grad_norm": 0.9958580635836058, "kl": 0.3467836081981659, "learning_rate": 3.7162968112674387e-06, "loss": 0.2074, "step": 613, "step_loss": 0.21914325654506683 }, { "epoch": 1.81, "grad_norm": 1.1091567079931492, "kl": 0.39706793427467346, "learning_rate": 3.7125290650173768e-06, "loss": 0.2148, "step": 614, "step_loss": 0.23869748413562775 }, { "epoch": 1.81, "grad_norm": 1.0845278159536325, "kl": 0.42876726388931274, "learning_rate": 3.708758012225125e-06, "loss": 0.217, "step": 615, "step_loss": 0.24647286534309387 }, { "epoch": 1.81, "grad_norm": 1.1069509053065003, "kl": 0.31808775663375854, "learning_rate": 3.7049836658452474e-06, "loss": 0.2241, "step": 616, "step_loss": 0.21536415815353394 }, { "epoch": 1.81, "grad_norm": 1.0997318179324385, "kl": 0.45750346779823303, "learning_rate": 3.701206038843623e-06, "loss": 0.2182, "step": 617, "step_loss": 0.23618176579475403 }, { "epoch": 1.82, "grad_norm": 1.098642449230739, "kl": 0.3418111801147461, "learning_rate": 3.697425144197402e-06, "loss": 0.2181, "step": 618, "step_loss": 0.21152858436107635 }, { "epoch": 1.82, "grad_norm": 1.105833997472311, "kl": 0.35120660066604614, "learning_rate": 3.6936409948949563e-06, "loss": 0.2235, "step": 619, "step_loss": 0.25562742352485657 }, { "epoch": 1.82, "grad_norm": 1.0909382099858478, "kl": 0.38127419352531433, "learning_rate": 3.689853603935843e-06, "loss": 0.2023, "step": 620, "step_loss": 0.21061675250530243 }, { "epoch": 1.83, "grad_norm": 1.04203667585934, "kl": 0.3696785569190979, "learning_rate": 3.686062984330752e-06, "loss": 0.2084, "step": 621, "step_loss": 0.2408429980278015 }, { "epoch": 1.83, "grad_norm": 1.0740199095756, "kl": 0.35119834542274475, "learning_rate": 3.682269149101465e-06, "loss": 0.2249, "step": 622, "step_loss": 0.2259160876274109 }, { "epoch": 1.83, "grad_norm": 1.1244116476489243, "kl": 0.39911478757858276, "learning_rate": 3.6784721112808107e-06, "loss": 0.2076, "step": 623, "step_loss": 0.2444825917482376 }, { "epoch": 1.83, "grad_norm": 1.084381841698426, "kl": 0.364761620759964, "learning_rate": 3.6746718839126195e-06, "loss": 0.2136, "step": 624, "step_loss": 0.21551458537578583 }, { "epoch": 1.84, "grad_norm": 1.1078473708417569, "kl": 0.44796106219291687, "learning_rate": 3.6708684800516786e-06, "loss": 0.2175, "step": 625, "step_loss": 0.219948410987854 }, { "epoch": 1.84, "grad_norm": 1.0214975768306558, "kl": 0.2986973226070404, "learning_rate": 3.6670619127636865e-06, "loss": 0.2043, "step": 626, "step_loss": 0.16981080174446106 }, { "epoch": 1.84, "grad_norm": 1.0733872403257325, "kl": 0.37517714500427246, "learning_rate": 3.663252195125211e-06, "loss": 0.2169, "step": 627, "step_loss": 0.21208983659744263 }, { "epoch": 1.85, "grad_norm": 1.1239709783885117, "kl": 0.38882941007614136, "learning_rate": 3.6594393402236405e-06, "loss": 0.2128, "step": 628, "step_loss": 0.2071652114391327 }, { "epoch": 1.85, "grad_norm": 1.0181078127118095, "kl": 0.35665225982666016, "learning_rate": 3.655623361157141e-06, "loss": 0.2065, "step": 629, "step_loss": 0.21186676621437073 }, { "epoch": 1.85, "grad_norm": 1.0980774285222674, "kl": 0.33140304684638977, "learning_rate": 3.65180427103461e-06, "loss": 0.216, "step": 630, "step_loss": 0.23446890711784363 }, { "epoch": 1.86, "grad_norm": 1.1533939812862526, "kl": 0.3695037364959717, "learning_rate": 3.647982082975635e-06, "loss": 0.2227, "step": 631, "step_loss": 0.2064458131790161 }, { "epoch": 1.86, "grad_norm": 1.0607104236817797, "kl": 0.3754327893257141, "learning_rate": 3.6441568101104434e-06, "loss": 0.2074, "step": 632, "step_loss": 0.21298718452453613 }, { "epoch": 1.86, "grad_norm": 1.1080406855173834, "kl": 0.372994601726532, "learning_rate": 3.64032846557986e-06, "loss": 0.2371, "step": 633, "step_loss": 0.22076019644737244 }, { "epoch": 1.86, "grad_norm": 1.061998718883708, "kl": 0.36003583669662476, "learning_rate": 3.6364970625352613e-06, "loss": 0.2106, "step": 634, "step_loss": 0.236919105052948 }, { "epoch": 1.87, "grad_norm": 1.1629557628795668, "kl": 0.40046006441116333, "learning_rate": 3.6326626141385323e-06, "loss": 0.2194, "step": 635, "step_loss": 0.21533732116222382 }, { "epoch": 1.87, "grad_norm": 1.1308113048397246, "kl": 0.35290196537971497, "learning_rate": 3.6288251335620185e-06, "loss": 0.2356, "step": 636, "step_loss": 0.25752317905426025 }, { "epoch": 1.87, "grad_norm": 1.024178113211246, "kl": 0.367868036031723, "learning_rate": 3.6249846339884807e-06, "loss": 0.2132, "step": 637, "step_loss": 0.2091902643442154 }, { "epoch": 1.88, "grad_norm": 1.0233015596218147, "kl": 0.36171606183052063, "learning_rate": 3.621141128611053e-06, "loss": 0.211, "step": 638, "step_loss": 0.1969766914844513 }, { "epoch": 1.88, "grad_norm": 1.0521376056017973, "kl": 0.3328798711299896, "learning_rate": 3.617294630633193e-06, "loss": 0.2148, "step": 639, "step_loss": 0.20545676350593567 }, { "epoch": 1.88, "grad_norm": 1.1495041143489746, "kl": 0.31849485635757446, "learning_rate": 3.613445153268641e-06, "loss": 0.2273, "step": 640, "step_loss": 0.19370122253894806 }, { "epoch": 1.88, "grad_norm": 1.107261874903731, "kl": 0.35327592492103577, "learning_rate": 3.6095927097413697e-06, "loss": 0.2289, "step": 641, "step_loss": 0.19487443566322327 }, { "epoch": 1.89, "grad_norm": 1.087476278188833, "kl": 0.37533411383628845, "learning_rate": 3.6057373132855426e-06, "loss": 0.212, "step": 642, "step_loss": 0.20453642308712006 }, { "epoch": 1.89, "grad_norm": 1.0777004622363646, "kl": 0.3517782390117645, "learning_rate": 3.6018789771454686e-06, "loss": 0.2215, "step": 643, "step_loss": 0.2225874960422516 }, { "epoch": 1.89, "grad_norm": 1.1041421512665277, "kl": 0.3960123658180237, "learning_rate": 3.5980177145755527e-06, "loss": 0.2156, "step": 644, "step_loss": 0.19644665718078613 }, { "epoch": 1.9, "grad_norm": 1.107531345345793, "kl": 0.3443949818611145, "learning_rate": 3.5941535388402555e-06, "loss": 0.218, "step": 645, "step_loss": 0.21444953978061676 }, { "epoch": 1.9, "grad_norm": 1.0633655556716426, "kl": 0.32992398738861084, "learning_rate": 3.5902864632140417e-06, "loss": 0.1945, "step": 646, "step_loss": 0.17697668075561523 }, { "epoch": 1.9, "grad_norm": 1.0660551981501736, "kl": 0.3524007201194763, "learning_rate": 3.5864165009813417e-06, "loss": 0.2062, "step": 647, "step_loss": 0.2261100560426712 }, { "epoch": 1.91, "grad_norm": 1.0902351373924726, "kl": 0.3847709894180298, "learning_rate": 3.5825436654365005e-06, "loss": 0.1977, "step": 648, "step_loss": 0.21421456336975098 }, { "epoch": 1.91, "grad_norm": 1.0921552134859336, "kl": 0.3233397603034973, "learning_rate": 3.578667969883733e-06, "loss": 0.2088, "step": 649, "step_loss": 0.1856887936592102 }, { "epoch": 1.91, "grad_norm": 1.067332679290162, "kl": 0.35494789481163025, "learning_rate": 3.5747894276370792e-06, "loss": 0.2174, "step": 650, "step_loss": 0.2479093074798584 }, { "epoch": 1.91, "grad_norm": 1.1216410520483295, "kl": 0.3918030261993408, "learning_rate": 3.5709080520203593e-06, "loss": 0.2317, "step": 651, "step_loss": 0.21458828449249268 }, { "epoch": 1.92, "grad_norm": 1.0495414510313243, "kl": 0.383411705493927, "learning_rate": 3.5670238563671257e-06, "loss": 0.2046, "step": 652, "step_loss": 0.22937092185020447 }, { "epoch": 1.92, "grad_norm": 1.137161738802843, "kl": 0.39319974184036255, "learning_rate": 3.563136854020621e-06, "loss": 0.2304, "step": 653, "step_loss": 0.2653224468231201 }, { "epoch": 1.92, "grad_norm": 1.1155913828745578, "kl": 0.4275739789009094, "learning_rate": 3.5592470583337233e-06, "loss": 0.2204, "step": 654, "step_loss": 0.2726008892059326 }, { "epoch": 1.93, "grad_norm": 1.0500891579513196, "kl": 0.3193601071834564, "learning_rate": 3.5553544826689148e-06, "loss": 0.2075, "step": 655, "step_loss": 0.18977577984333038 }, { "epoch": 1.93, "grad_norm": 1.0438682449198995, "kl": 0.3925110697746277, "learning_rate": 3.551459140398221e-06, "loss": 0.2184, "step": 656, "step_loss": 0.24175623059272766 }, { "epoch": 1.93, "grad_norm": 1.1175988513740611, "kl": 0.3625691533088684, "learning_rate": 3.547561044903175e-06, "loss": 0.2138, "step": 657, "step_loss": 0.21793964505195618 }, { "epoch": 1.93, "grad_norm": 1.0707034837846736, "kl": 0.3854144215583801, "learning_rate": 3.5436602095747665e-06, "loss": 0.2168, "step": 658, "step_loss": 0.2395915985107422 }, { "epoch": 1.94, "grad_norm": 1.1404652940954114, "kl": 0.3602861166000366, "learning_rate": 3.539756647813398e-06, "loss": 0.2356, "step": 659, "step_loss": 0.21894899010658264 }, { "epoch": 1.94, "grad_norm": 0.9838424861989383, "kl": 0.36652958393096924, "learning_rate": 3.535850373028839e-06, "loss": 0.1979, "step": 660, "step_loss": 0.20045427978038788 }, { "epoch": 1.94, "grad_norm": 1.095329737833727, "kl": 0.3199812173843384, "learning_rate": 3.5319413986401753e-06, "loss": 0.2183, "step": 661, "step_loss": 0.26433855295181274 }, { "epoch": 1.95, "grad_norm": 1.053846419483373, "kl": 0.4188472032546997, "learning_rate": 3.5280297380757692e-06, "loss": 0.2146, "step": 662, "step_loss": 0.2612619400024414 }, { "epoch": 1.95, "grad_norm": 1.0686913624789676, "kl": 0.3134489357471466, "learning_rate": 3.524115404773213e-06, "loss": 0.2118, "step": 663, "step_loss": 0.18129369616508484 }, { "epoch": 1.95, "grad_norm": 1.0608198050048145, "kl": 0.34840127825737, "learning_rate": 3.5201984121792753e-06, "loss": 0.2121, "step": 664, "step_loss": 0.18985848128795624 }, { "epoch": 1.96, "grad_norm": 1.0981997640992105, "kl": 0.3330709934234619, "learning_rate": 3.516278773749863e-06, "loss": 0.2064, "step": 665, "step_loss": 0.2144535481929779 }, { "epoch": 1.96, "grad_norm": 1.1140559930597989, "kl": 0.3608890771865845, "learning_rate": 3.512356502949973e-06, "loss": 0.2141, "step": 666, "step_loss": 0.24026130139827728 }, { "epoch": 1.96, "grad_norm": 1.0633605809716729, "kl": 0.32477813959121704, "learning_rate": 3.508431613253644e-06, "loss": 0.2104, "step": 667, "step_loss": 0.19436398148536682 }, { "epoch": 1.96, "grad_norm": 0.9553573137927444, "kl": 0.33438920974731445, "learning_rate": 3.5045041181439117e-06, "loss": 0.1949, "step": 668, "step_loss": 0.17747747898101807 }, { "epoch": 1.97, "grad_norm": 1.1177038853329584, "kl": 0.3582827150821686, "learning_rate": 3.500574031112759e-06, "loss": 0.223, "step": 669, "step_loss": 0.21867407858371735 }, { "epoch": 1.97, "grad_norm": 1.1246746241729981, "kl": 0.3887527585029602, "learning_rate": 3.496641365661079e-06, "loss": 0.2136, "step": 670, "step_loss": 0.20844541490077972 }, { "epoch": 1.97, "grad_norm": 1.1013777287048612, "kl": 0.36839255690574646, "learning_rate": 3.4927061352986163e-06, "loss": 0.2264, "step": 671, "step_loss": 0.23754476010799408 }, { "epoch": 1.98, "grad_norm": 1.0736030062333497, "kl": 0.37584322690963745, "learning_rate": 3.4887683535439305e-06, "loss": 0.2149, "step": 672, "step_loss": 0.22229911386966705 }, { "epoch": 1.98, "grad_norm": 1.1162319765419337, "kl": 0.38228553533554077, "learning_rate": 3.484828033924343e-06, "loss": 0.2289, "step": 673, "step_loss": 0.24069103598594666 }, { "epoch": 1.98, "grad_norm": 1.0677391357940962, "kl": 0.37927836179733276, "learning_rate": 3.4808851899758967e-06, "loss": 0.2147, "step": 674, "step_loss": 0.19201472401618958 }, { "epoch": 1.98, "grad_norm": 1.0515478096285862, "kl": 0.3583109974861145, "learning_rate": 3.476939835243304e-06, "loss": 0.2151, "step": 675, "step_loss": 0.22411265969276428 }, { "epoch": 1.99, "grad_norm": 1.0225232678263274, "kl": 0.285269558429718, "learning_rate": 3.4729919832799036e-06, "loss": 0.199, "step": 676, "step_loss": 0.20125800371170044 }, { "epoch": 1.99, "grad_norm": 1.0899879606264502, "kl": 0.34659573435783386, "learning_rate": 3.46904164764761e-06, "loss": 0.2159, "step": 677, "step_loss": 0.21278782188892365 }, { "epoch": 1.99, "grad_norm": 1.0775849606901395, "kl": 0.3956890106201172, "learning_rate": 3.4650888419168748e-06, "loss": 0.2092, "step": 678, "step_loss": 0.2175438106060028 }, { "epoch": 2.0, "grad_norm": 0.9947578897117778, "kl": 0.30330708622932434, "learning_rate": 3.4611335796666307e-06, "loss": 0.2031, "step": 679, "step_loss": 0.19578225910663605 }, { "epoch": 2.0, "grad_norm": 1.0764381231649376, "kl": 0.35529449582099915, "learning_rate": 3.457175874484251e-06, "loss": 0.2005, "step": 680, "step_loss": 0.19128616154193878 }, { "epoch": 2.0, "eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_loss": 1.6508517265319824, "eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_runtime": 14.6015, "eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_samples_per_second": 6.849, "eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_steps_per_second": 0.89, "step": 680 }, { "epoch": 2.0, "grad_norm": 0.925978313670487, "kl": 0.4176456928253174, "learning_rate": 3.4532157399655014e-06, "loss": 0.1679, "step": 681, "step_loss": 1.3116686344146729 }, { "epoch": 2.01, "grad_norm": 0.8940649274695096, "kl": 0.33792364597320557, "learning_rate": 3.4492531897144923e-06, "loss": 0.162, "step": 682, "step_loss": 0.14112232625484467 }, { "epoch": 2.01, "grad_norm": 0.8396441200218108, "kl": 0.3699738383293152, "learning_rate": 3.445288237343632e-06, "loss": 0.144, "step": 683, "step_loss": 0.1407199501991272 }, { "epoch": 2.01, "grad_norm": 0.9100745741579815, "kl": 0.39846813678741455, "learning_rate": 3.441320896473583e-06, "loss": 0.1509, "step": 684, "step_loss": 0.16023500263690948 }, { "epoch": 2.01, "grad_norm": 0.9183176716722681, "kl": 0.4535992741584778, "learning_rate": 3.4373511807332115e-06, "loss": 0.1555, "step": 685, "step_loss": 0.15503008663654327 }, { "epoch": 2.02, "grad_norm": 0.8961989752558022, "kl": 0.45110467076301575, "learning_rate": 3.433379103759542e-06, "loss": 0.1564, "step": 686, "step_loss": 0.16036511957645416 }, { "epoch": 2.02, "grad_norm": 0.8795040894782503, "kl": 0.3965161442756653, "learning_rate": 3.4294046791977096e-06, "loss": 0.146, "step": 687, "step_loss": 0.13043992221355438 }, { "epoch": 2.02, "grad_norm": 0.9844997417794465, "kl": 0.4604860246181488, "learning_rate": 3.4254279207009163e-06, "loss": 0.147, "step": 688, "step_loss": 0.15021522343158722 }, { "epoch": 2.03, "grad_norm": 0.9757415983330471, "kl": 0.4833226501941681, "learning_rate": 3.4214488419303806e-06, "loss": 0.1415, "step": 689, "step_loss": 0.14150793850421906 }, { "epoch": 2.03, "grad_norm": 1.0650937173916857, "kl": 0.4186987280845642, "learning_rate": 3.4174674565552902e-06, "loss": 0.1507, "step": 690, "step_loss": 0.1668976992368698 }, { "epoch": 2.03, "grad_norm": 1.1276622372563851, "kl": 0.45650917291641235, "learning_rate": 3.413483778252759e-06, "loss": 0.1605, "step": 691, "step_loss": 0.16845574975013733 }, { "epoch": 2.03, "grad_norm": 1.1937296491156466, "kl": 0.4285459518432617, "learning_rate": 3.4094978207077768e-06, "loss": 0.1487, "step": 692, "step_loss": 0.1462787389755249 }, { "epoch": 2.04, "grad_norm": 1.0212719844193419, "kl": 0.5114130973815918, "learning_rate": 3.405509597613163e-06, "loss": 0.145, "step": 693, "step_loss": 0.1645100861787796 }, { "epoch": 2.04, "grad_norm": 1.1028532757980718, "kl": 0.520036518573761, "learning_rate": 3.4015191226695203e-06, "loss": 0.1545, "step": 694, "step_loss": 0.17864225804805756 }, { "epoch": 2.04, "grad_norm": 0.9733155555331748, "kl": 0.4656696915626526, "learning_rate": 3.397526409585185e-06, "loss": 0.1407, "step": 695, "step_loss": 0.12441332638263702 }, { "epoch": 2.05, "grad_norm": 1.0128686328034844, "kl": 0.4196836054325104, "learning_rate": 3.3935314720761864e-06, "loss": 0.1494, "step": 696, "step_loss": 0.12643490731716156 }, { "epoch": 2.05, "grad_norm": 0.9201530198545629, "kl": 0.46877506375312805, "learning_rate": 3.389534323866191e-06, "loss": 0.1399, "step": 697, "step_loss": 0.15753169357776642 }, { "epoch": 2.05, "grad_norm": 0.9894624669733002, "kl": 0.4130990505218506, "learning_rate": 3.385534978686461e-06, "loss": 0.1546, "step": 698, "step_loss": 0.17007581889629364 }, { "epoch": 2.06, "grad_norm": 1.0155107140429007, "kl": 0.4775574505329132, "learning_rate": 3.3815334502758055e-06, "loss": 0.1489, "step": 699, "step_loss": 0.12813322246074677 }, { "epoch": 2.06, "grad_norm": 0.939102091406889, "kl": 0.41837960481643677, "learning_rate": 3.3775297523805365e-06, "loss": 0.1457, "step": 700, "step_loss": 0.15080194175243378 }, { "epoch": 2.06, "grad_norm": 0.9499828261780652, "kl": 0.46936333179473877, "learning_rate": 3.3735238987544146e-06, "loss": 0.152, "step": 701, "step_loss": 0.13999901711940765 }, { "epoch": 2.06, "grad_norm": 0.9757544264149329, "kl": 0.39698389172554016, "learning_rate": 3.369515903158607e-06, "loss": 0.1507, "step": 702, "step_loss": 0.14616172015666962 }, { "epoch": 2.07, "grad_norm": 0.9785785921285085, "kl": 0.5002555847167969, "learning_rate": 3.365505779361642e-06, "loss": 0.1487, "step": 703, "step_loss": 0.16984041035175323 }, { "epoch": 2.07, "grad_norm": 0.9897490897977824, "kl": 0.4246920049190521, "learning_rate": 3.3614935411393544e-06, "loss": 0.1589, "step": 704, "step_loss": 0.1668768674135208 }, { "epoch": 2.07, "grad_norm": 0.9578147172513255, "kl": 0.3801954388618469, "learning_rate": 3.3574792022748463e-06, "loss": 0.1543, "step": 705, "step_loss": 0.13766665756702423 }, { "epoch": 2.08, "grad_norm": 0.9662098961881012, "kl": 0.43419426679611206, "learning_rate": 3.3534627765584334e-06, "loss": 0.1512, "step": 706, "step_loss": 0.15339550375938416 }, { "epoch": 2.08, "grad_norm": 0.9114560763013234, "kl": 0.5094923377037048, "learning_rate": 3.3494442777876022e-06, "loss": 0.1439, "step": 707, "step_loss": 0.1555686891078949 }, { "epoch": 2.08, "grad_norm": 1.0718532109617787, "kl": 0.4465751051902771, "learning_rate": 3.3454237197669607e-06, "loss": 0.158, "step": 708, "step_loss": 0.14179621636867523 }, { "epoch": 2.08, "grad_norm": 1.0391746116330387, "kl": 0.426788866519928, "learning_rate": 3.341401116308189e-06, "loss": 0.1514, "step": 709, "step_loss": 0.16065070033073425 }, { "epoch": 2.09, "grad_norm": 0.9617041964014664, "kl": 0.39666134119033813, "learning_rate": 3.3373764812299954e-06, "loss": 0.1477, "step": 710, "step_loss": 0.13998152315616608 }, { "epoch": 2.09, "grad_norm": 1.0146003249127458, "kl": 0.3973727226257324, "learning_rate": 3.333349828358067e-06, "loss": 0.1507, "step": 711, "step_loss": 0.15805211663246155 }, { "epoch": 2.09, "grad_norm": 0.9869352681538952, "kl": 0.42524510622024536, "learning_rate": 3.3293211715250222e-06, "loss": 0.1457, "step": 712, "step_loss": 0.1241888552904129 }, { "epoch": 2.1, "grad_norm": 0.9948029176298825, "kl": 0.40643396973609924, "learning_rate": 3.325290524570365e-06, "loss": 0.151, "step": 713, "step_loss": 0.16702188551425934 }, { "epoch": 2.1, "grad_norm": 1.0201927757414222, "kl": 0.48036760091781616, "learning_rate": 3.321257901340434e-06, "loss": 0.148, "step": 714, "step_loss": 0.1528395265340805 }, { "epoch": 2.1, "grad_norm": 0.9203151178241796, "kl": 0.5000730156898499, "learning_rate": 3.317223315688358e-06, "loss": 0.1463, "step": 715, "step_loss": 0.16571396589279175 }, { "epoch": 2.11, "grad_norm": 0.9937513297449968, "kl": 0.3717888593673706, "learning_rate": 3.313186781474008e-06, "loss": 0.1627, "step": 716, "step_loss": 0.17453354597091675 }, { "epoch": 2.11, "grad_norm": 1.0398940867357407, "kl": 0.5787868499755859, "learning_rate": 3.309148312563945e-06, "loss": 0.1602, "step": 717, "step_loss": 0.2011195421218872 }, { "epoch": 2.11, "grad_norm": 0.9694706558977573, "kl": 0.4446108639240265, "learning_rate": 3.3051079228313815e-06, "loss": 0.1426, "step": 718, "step_loss": 0.1449287086725235 }, { "epoch": 2.11, "grad_norm": 0.9431356634721532, "kl": 0.4195602834224701, "learning_rate": 3.301065626156125e-06, "loss": 0.143, "step": 719, "step_loss": 0.1555873304605484 }, { "epoch": 2.12, "grad_norm": 0.8839846147038787, "kl": 0.46536436676979065, "learning_rate": 3.2970214364245352e-06, "loss": 0.1425, "step": 720, "step_loss": 0.1379953920841217 }, { "epoch": 2.12, "grad_norm": 0.963039812210809, "kl": 0.4512026011943817, "learning_rate": 3.2929753675294745e-06, "loss": 0.1529, "step": 721, "step_loss": 0.1425732672214508 }, { "epoch": 2.12, "grad_norm": 1.0145646795924912, "kl": 0.48650485277175903, "learning_rate": 3.2889274333702612e-06, "loss": 0.1592, "step": 722, "step_loss": 0.14481569826602936 }, { "epoch": 2.13, "grad_norm": 0.933756411120608, "kl": 0.45475882291793823, "learning_rate": 3.284877647852621e-06, "loss": 0.1427, "step": 723, "step_loss": 0.1405959278345108 }, { "epoch": 2.13, "grad_norm": 0.992827171719177, "kl": 0.4193406403064728, "learning_rate": 3.2808260248886375e-06, "loss": 0.1552, "step": 724, "step_loss": 0.14025843143463135 }, { "epoch": 2.13, "grad_norm": 1.0734808867236152, "kl": 0.4013231694698334, "learning_rate": 3.2767725783967112e-06, "loss": 0.1584, "step": 725, "step_loss": 0.16459117829799652 }, { "epoch": 2.13, "grad_norm": 0.9858158556441436, "kl": 0.38677310943603516, "learning_rate": 3.272717322301503e-06, "loss": 0.1495, "step": 726, "step_loss": 0.13374319672584534 }, { "epoch": 2.14, "grad_norm": 0.9541473377690992, "kl": 0.4354119598865509, "learning_rate": 3.2686602705338906e-06, "loss": 0.15, "step": 727, "step_loss": 0.15379023551940918 }, { "epoch": 2.14, "grad_norm": 0.9634481367476163, "kl": 0.4873085021972656, "learning_rate": 3.26460143703092e-06, "loss": 0.1493, "step": 728, "step_loss": 0.1327579915523529 }, { "epoch": 2.14, "grad_norm": 0.9953389168771356, "kl": 0.3850947916507721, "learning_rate": 3.2605408357357624e-06, "loss": 0.1521, "step": 729, "step_loss": 0.15176990628242493 }, { "epoch": 2.15, "grad_norm": 0.9844939750343663, "kl": 0.44254547357559204, "learning_rate": 3.2564784805976562e-06, "loss": 0.1555, "step": 730, "step_loss": 0.13903678953647614 }, { "epoch": 2.15, "grad_norm": 0.9886634600223577, "kl": 0.43694841861724854, "learning_rate": 3.2524143855718658e-06, "loss": 0.1565, "step": 731, "step_loss": 0.16289514303207397 }, { "epoch": 2.15, "grad_norm": 0.9484758806082827, "kl": 0.3681836426258087, "learning_rate": 3.2483485646196362e-06, "loss": 0.149, "step": 732, "step_loss": 0.12337259948253632 }, { "epoch": 2.16, "grad_norm": 0.9484232485107367, "kl": 0.4986894428730011, "learning_rate": 3.2442810317081377e-06, "loss": 0.1431, "step": 733, "step_loss": 0.13536569476127625 }, { "epoch": 2.16, "grad_norm": 0.9693721649945023, "kl": 0.3540458083152771, "learning_rate": 3.240211800810422e-06, "loss": 0.145, "step": 734, "step_loss": 0.11736252903938293 }, { "epoch": 2.16, "grad_norm": 1.0585370770025628, "kl": 0.43972048163414, "learning_rate": 3.2361408859053755e-06, "loss": 0.1481, "step": 735, "step_loss": 0.15756061673164368 }, { "epoch": 2.16, "grad_norm": 0.9537128136761093, "kl": 0.3989626467227936, "learning_rate": 3.2320683009776693e-06, "loss": 0.1536, "step": 736, "step_loss": 0.13479158282279968 }, { "epoch": 2.17, "grad_norm": 0.9815792466817577, "kl": 0.41348952054977417, "learning_rate": 3.2279940600177097e-06, "loss": 0.151, "step": 737, "step_loss": 0.12909382581710815 }, { "epoch": 2.17, "grad_norm": 1.04854074018167, "kl": 0.4909619688987732, "learning_rate": 3.223918177021594e-06, "loss": 0.1475, "step": 738, "step_loss": 0.1474575400352478 }, { "epoch": 2.17, "grad_norm": 1.0715274180501446, "kl": 0.45988544821739197, "learning_rate": 3.2198406659910596e-06, "loss": 0.1634, "step": 739, "step_loss": 0.16125904023647308 }, { "epoch": 2.18, "grad_norm": 0.9890646713016052, "kl": 0.42592141032218933, "learning_rate": 3.2157615409334363e-06, "loss": 0.1499, "step": 740, "step_loss": 0.128481924533844 }, { "epoch": 2.18, "grad_norm": 0.9970158964695769, "kl": 0.397694855928421, "learning_rate": 3.2116808158615986e-06, "loss": 0.1577, "step": 741, "step_loss": 0.1626586616039276 }, { "epoch": 2.18, "grad_norm": 1.0147077673417295, "kl": 0.365612268447876, "learning_rate": 3.207598504793917e-06, "loss": 0.1517, "step": 742, "step_loss": 0.17115183174610138 }, { "epoch": 2.18, "grad_norm": 0.9665733618485168, "kl": 0.41971349716186523, "learning_rate": 3.2035146217542116e-06, "loss": 0.1443, "step": 743, "step_loss": 0.14287295937538147 }, { "epoch": 2.19, "grad_norm": 1.0369419314745194, "kl": 0.46320030093193054, "learning_rate": 3.1994291807717027e-06, "loss": 0.1556, "step": 744, "step_loss": 0.16582506895065308 }, { "epoch": 2.19, "grad_norm": 0.9781857084745722, "kl": 0.369863361120224, "learning_rate": 3.19534219588096e-06, "loss": 0.1513, "step": 745, "step_loss": 0.1406002938747406 }, { "epoch": 2.19, "grad_norm": 0.9404615672126064, "kl": 0.44528642296791077, "learning_rate": 3.19125368112186e-06, "loss": 0.1519, "step": 746, "step_loss": 0.13117581605911255 }, { "epoch": 2.2, "grad_norm": 1.0121457881406244, "kl": 0.39943063259124756, "learning_rate": 3.187163650539533e-06, "loss": 0.155, "step": 747, "step_loss": 0.1424383670091629 }, { "epoch": 2.2, "grad_norm": 1.1106950133092461, "kl": 0.5139979124069214, "learning_rate": 3.1830721181843177e-06, "loss": 0.1448, "step": 748, "step_loss": 0.13854211568832397 }, { "epoch": 2.2, "grad_norm": 0.9928293479952365, "kl": 0.4104097783565521, "learning_rate": 3.1789790981117103e-06, "loss": 0.1496, "step": 749, "step_loss": 0.13803105056285858 }, { "epoch": 2.21, "grad_norm": 0.9864891298529466, "kl": 0.5884207487106323, "learning_rate": 3.174884604382317e-06, "loss": 0.1457, "step": 750, "step_loss": 0.1734772026538849 }, { "epoch": 2.21, "grad_norm": 1.019026327979771, "kl": 0.39733612537384033, "learning_rate": 3.170788651061811e-06, "loss": 0.1548, "step": 751, "step_loss": 0.15517111122608185 }, { "epoch": 2.21, "grad_norm": 0.9759482931761134, "kl": 0.39855048060417175, "learning_rate": 3.1666912522208754e-06, "loss": 0.143, "step": 752, "step_loss": 0.1241132915019989 }, { "epoch": 2.21, "grad_norm": 0.9819776619042838, "kl": 0.4785412549972534, "learning_rate": 3.162592421935158e-06, "loss": 0.1529, "step": 753, "step_loss": 0.1676996350288391 }, { "epoch": 2.22, "grad_norm": 0.9567580475400209, "kl": 0.4445911943912506, "learning_rate": 3.158492174285229e-06, "loss": 0.1488, "step": 754, "step_loss": 0.16597110033035278 }, { "epoch": 2.22, "grad_norm": 0.9727635152108765, "kl": 0.420547217130661, "learning_rate": 3.1543905233565232e-06, "loss": 0.1464, "step": 755, "step_loss": 0.12980793416500092 }, { "epoch": 2.22, "grad_norm": 0.9831060677637677, "kl": 0.43531182408332825, "learning_rate": 3.1502874832392984e-06, "loss": 0.1502, "step": 756, "step_loss": 0.19235534965991974 }, { "epoch": 2.23, "grad_norm": 0.9741457522529087, "kl": 0.4326942563056946, "learning_rate": 3.146183068028582e-06, "loss": 0.1521, "step": 757, "step_loss": 0.17118844389915466 }, { "epoch": 2.23, "grad_norm": 1.0158603395549721, "kl": 0.4532112777233124, "learning_rate": 3.1420772918241286e-06, "loss": 0.1416, "step": 758, "step_loss": 0.1491362452507019 }, { "epoch": 2.23, "grad_norm": 0.9702749401691976, "kl": 0.41253310441970825, "learning_rate": 3.1379701687303665e-06, "loss": 0.152, "step": 759, "step_loss": 0.16260167956352234 }, { "epoch": 2.23, "grad_norm": 0.9835143944298296, "kl": 0.48813024163246155, "learning_rate": 3.1338617128563505e-06, "loss": 0.1483, "step": 760, "step_loss": 0.14093339443206787 }, { "epoch": 2.24, "grad_norm": 0.9673002377624046, "kl": 0.45352044701576233, "learning_rate": 3.1297519383157138e-06, "loss": 0.1444, "step": 761, "step_loss": 0.14167660474777222 }, { "epoch": 2.24, "grad_norm": 1.0177495156776364, "kl": 0.3557469844818115, "learning_rate": 3.1256408592266214e-06, "loss": 0.1545, "step": 762, "step_loss": 0.1343638151884079 }, { "epoch": 2.24, "grad_norm": 0.9023137851380141, "kl": 0.4272156357765198, "learning_rate": 3.121528489711718e-06, "loss": 0.1473, "step": 763, "step_loss": 0.1451863944530487 }, { "epoch": 2.25, "grad_norm": 1.0441318127695194, "kl": 0.4729064106941223, "learning_rate": 3.1174148438980804e-06, "loss": 0.151, "step": 764, "step_loss": 0.16162380576133728 }, { "epoch": 2.25, "grad_norm": 0.9950818625517187, "kl": 0.4472399353981018, "learning_rate": 3.113299935917174e-06, "loss": 0.1539, "step": 765, "step_loss": 0.14452748000621796 }, { "epoch": 2.25, "grad_norm": 1.0179996166687564, "kl": 0.5420696139335632, "learning_rate": 3.1091837799047946e-06, "loss": 0.1475, "step": 766, "step_loss": 0.18763327598571777 }, { "epoch": 2.26, "grad_norm": 1.0010025393208966, "kl": 0.3447565734386444, "learning_rate": 3.1050663900010295e-06, "loss": 0.1485, "step": 767, "step_loss": 0.13680729269981384 }, { "epoch": 2.26, "grad_norm": 1.0142744474701502, "kl": 0.3538120687007904, "learning_rate": 3.1009477803502015e-06, "loss": 0.1541, "step": 768, "step_loss": 0.16569288074970245 }, { "epoch": 2.26, "grad_norm": 1.036047382769812, "kl": 0.4295748174190521, "learning_rate": 3.0968279651008273e-06, "loss": 0.1613, "step": 769, "step_loss": 0.14074234664440155 }, { "epoch": 2.26, "grad_norm": 0.9244937635890171, "kl": 0.5195684432983398, "learning_rate": 3.092706958405561e-06, "loss": 0.1472, "step": 770, "step_loss": 0.15720242261886597 }, { "epoch": 2.27, "grad_norm": 0.9621433507609762, "kl": 0.4947783052921295, "learning_rate": 3.088584774421155e-06, "loss": 0.1552, "step": 771, "step_loss": 0.16154304146766663 }, { "epoch": 2.27, "grad_norm": 1.0088544087696063, "kl": 0.45518720149993896, "learning_rate": 3.0844614273083986e-06, "loss": 0.1537, "step": 772, "step_loss": 0.16250544786453247 }, { "epoch": 2.27, "grad_norm": 1.02735358403942, "kl": 0.3680950999259949, "learning_rate": 3.0803369312320834e-06, "loss": 0.1595, "step": 773, "step_loss": 0.13430428504943848 }, { "epoch": 2.28, "grad_norm": 1.0229473541898282, "kl": 0.45938223600387573, "learning_rate": 3.0762113003609464e-06, "loss": 0.1521, "step": 774, "step_loss": 0.16164535284042358 }, { "epoch": 2.28, "grad_norm": 0.9762252225185958, "kl": 0.4909830391407013, "learning_rate": 3.072084548867619e-06, "loss": 0.1487, "step": 775, "step_loss": 0.16784648597240448 }, { "epoch": 2.28, "grad_norm": 1.0525530374080225, "kl": 0.4151424765586853, "learning_rate": 3.0679566909285865e-06, "loss": 0.1644, "step": 776, "step_loss": 0.15828314423561096 }, { "epoch": 2.28, "grad_norm": 1.0300851369596706, "kl": 0.3816262185573578, "learning_rate": 3.0638277407241353e-06, "loss": 0.1552, "step": 777, "step_loss": 0.1416245996952057 }, { "epoch": 2.29, "grad_norm": 0.9094117275175454, "kl": 0.47151878476142883, "learning_rate": 3.059697712438301e-06, "loss": 0.1429, "step": 778, "step_loss": 0.1599782109260559 }, { "epoch": 2.29, "grad_norm": 0.943123316008099, "kl": 0.4516427218914032, "learning_rate": 3.0555666202588237e-06, "loss": 0.1525, "step": 779, "step_loss": 0.17189931869506836 }, { "epoch": 2.29, "grad_norm": 0.9766034537311132, "kl": 0.4513046443462372, "learning_rate": 3.0514344783771017e-06, "loss": 0.1516, "step": 780, "step_loss": 0.14572221040725708 }, { "epoch": 2.3, "grad_norm": 0.9983107634149363, "kl": 0.36767828464508057, "learning_rate": 3.0473013009881343e-06, "loss": 0.1561, "step": 781, "step_loss": 0.16547314822673798 }, { "epoch": 2.3, "grad_norm": 1.0270127608148132, "kl": 0.45325592160224915, "learning_rate": 3.04316710229048e-06, "loss": 0.1583, "step": 782, "step_loss": 0.14895084500312805 }, { "epoch": 2.3, "grad_norm": 0.9148345849077315, "kl": 0.5390376448631287, "learning_rate": 3.0390318964862064e-06, "loss": 0.1519, "step": 783, "step_loss": 0.1619083732366562 }, { "epoch": 2.31, "grad_norm": 0.8725682510499037, "kl": 0.34793299436569214, "learning_rate": 3.03489569778084e-06, "loss": 0.1452, "step": 784, "step_loss": 0.1361267864704132 }, { "epoch": 2.31, "grad_norm": 0.9441065859863195, "kl": 0.4481748640537262, "learning_rate": 3.0307585203833203e-06, "loss": 0.1426, "step": 785, "step_loss": 0.15584218502044678 }, { "epoch": 2.31, "grad_norm": 1.0085881671115713, "kl": 0.43633711338043213, "learning_rate": 3.0266203785059438e-06, "loss": 0.1528, "step": 786, "step_loss": 0.1516050398349762 }, { "epoch": 2.31, "grad_norm": 0.9369144359378735, "kl": 0.4298512935638428, "learning_rate": 3.0224812863643266e-06, "loss": 0.1451, "step": 787, "step_loss": 0.12825755774974823 }, { "epoch": 2.32, "grad_norm": 1.065211790409866, "kl": 0.43545064330101013, "learning_rate": 3.0183412581773453e-06, "loss": 0.1619, "step": 788, "step_loss": 0.16257864236831665 }, { "epoch": 2.32, "grad_norm": 1.0775993281913878, "kl": 0.40961089730262756, "learning_rate": 3.0142003081670922e-06, "loss": 0.1628, "step": 789, "step_loss": 0.1613461673259735 }, { "epoch": 2.32, "grad_norm": 1.0237299523119836, "kl": 0.3749983608722687, "learning_rate": 3.010058450558827e-06, "loss": 0.1504, "step": 790, "step_loss": 0.13308608531951904 }, { "epoch": 2.33, "grad_norm": 1.0661845972455573, "kl": 0.472342312335968, "learning_rate": 3.005915699580928e-06, "loss": 0.1486, "step": 791, "step_loss": 0.1562490463256836 }, { "epoch": 2.33, "grad_norm": 0.9764395369363379, "kl": 0.36287054419517517, "learning_rate": 3.0017720694648407e-06, "loss": 0.1472, "step": 792, "step_loss": 0.14360609650611877 }, { "epoch": 2.33, "grad_norm": 0.9561934092861629, "kl": 0.41280660033226013, "learning_rate": 2.997627574445032e-06, "loss": 0.1425, "step": 793, "step_loss": 0.1299421787261963 }, { "epoch": 2.33, "grad_norm": 1.016934035404032, "kl": 0.44996654987335205, "learning_rate": 2.9934822287589404e-06, "loss": 0.1517, "step": 794, "step_loss": 0.13960210978984833 }, { "epoch": 2.34, "grad_norm": 1.007459978061249, "kl": 0.4418295919895172, "learning_rate": 2.9893360466469257e-06, "loss": 0.1586, "step": 795, "step_loss": 0.1698797643184662 }, { "epoch": 2.34, "grad_norm": 0.9777845289814279, "kl": 0.4033919870853424, "learning_rate": 2.9851890423522214e-06, "loss": 0.1534, "step": 796, "step_loss": 0.148381307721138 }, { "epoch": 2.34, "grad_norm": 1.0038670483961691, "kl": 0.41636258363723755, "learning_rate": 2.9810412301208837e-06, "loss": 0.1605, "step": 797, "step_loss": 0.15567950904369354 }, { "epoch": 2.35, "grad_norm": 0.9931491186767503, "kl": 0.536481499671936, "learning_rate": 2.976892624201747e-06, "loss": 0.1515, "step": 798, "step_loss": 0.1677020788192749 }, { "epoch": 2.35, "grad_norm": 0.9976100824318828, "kl": 0.3861311674118042, "learning_rate": 2.9727432388463713e-06, "loss": 0.1498, "step": 799, "step_loss": 0.14130038022994995 }, { "epoch": 2.35, "grad_norm": 0.9818984015412048, "kl": 0.38746803998947144, "learning_rate": 2.9685930883089936e-06, "loss": 0.1524, "step": 800, "step_loss": 0.1593620926141739 }, { "epoch": 2.36, "grad_norm": 0.9288991903769893, "kl": 0.3206455707550049, "learning_rate": 2.9644421868464797e-06, "loss": 0.1394, "step": 801, "step_loss": 0.10261634737253189 }, { "epoch": 2.36, "grad_norm": 0.9983514817365607, "kl": 0.41597017645835876, "learning_rate": 2.9602905487182758e-06, "loss": 0.1451, "step": 802, "step_loss": 0.12765845656394958 }, { "epoch": 2.36, "grad_norm": 0.8902637518688588, "kl": 0.35546138882637024, "learning_rate": 2.9561381881863583e-06, "loss": 0.1358, "step": 803, "step_loss": 0.11750486493110657 }, { "epoch": 2.36, "grad_norm": 1.1226095260209488, "kl": 0.41053929924964905, "learning_rate": 2.9519851195151834e-06, "loss": 0.1441, "step": 804, "step_loss": 0.15928710997104645 }, { "epoch": 2.37, "grad_norm": 1.0162148889995983, "kl": 0.40562787652015686, "learning_rate": 2.9478313569716427e-06, "loss": 0.1497, "step": 805, "step_loss": 0.1482928991317749 }, { "epoch": 2.37, "grad_norm": 0.9605017503685718, "kl": 0.37824496626853943, "learning_rate": 2.9436769148250107e-06, "loss": 0.1502, "step": 806, "step_loss": 0.1375085562467575 }, { "epoch": 2.37, "grad_norm": 1.0439813106314617, "kl": 0.4013465642929077, "learning_rate": 2.939521807346896e-06, "loss": 0.1515, "step": 807, "step_loss": 0.1387888491153717 }, { "epoch": 2.38, "grad_norm": 0.9915447105353192, "kl": 0.4745499789714813, "learning_rate": 2.935366048811192e-06, "loss": 0.1519, "step": 808, "step_loss": 0.1591644287109375 }, { "epoch": 2.38, "grad_norm": 0.9295061078745588, "kl": 0.39526990056037903, "learning_rate": 2.9312096534940304e-06, "loss": 0.1403, "step": 809, "step_loss": 0.14475908875465393 }, { "epoch": 2.38, "grad_norm": 0.9874188124202254, "kl": 0.4027431309223175, "learning_rate": 2.9270526356737306e-06, "loss": 0.1551, "step": 810, "step_loss": 0.17166703939437866 }, { "epoch": 2.38, "grad_norm": 1.0386355533970422, "kl": 0.332511842250824, "learning_rate": 2.9228950096307477e-06, "loss": 0.1543, "step": 811, "step_loss": 0.14212793111801147 }, { "epoch": 2.39, "grad_norm": 0.9426077934872951, "kl": 0.5144191980361938, "learning_rate": 2.9187367896476287e-06, "loss": 0.1467, "step": 812, "step_loss": 0.17707863450050354 }, { "epoch": 2.39, "grad_norm": 1.033937968101504, "kl": 0.458046019077301, "learning_rate": 2.9145779900089603e-06, "loss": 0.1568, "step": 813, "step_loss": 0.14064949750900269 }, { "epoch": 2.39, "grad_norm": 0.9527788628095325, "kl": 0.46262165904045105, "learning_rate": 2.91041862500132e-06, "loss": 0.1517, "step": 814, "step_loss": 0.13187597692012787 }, { "epoch": 2.4, "grad_norm": 1.0259502663865276, "kl": 0.5149811506271362, "learning_rate": 2.9062587089132287e-06, "loss": 0.1507, "step": 815, "step_loss": 0.14037488400936127 }, { "epoch": 2.4, "grad_norm": 0.9142149529111745, "kl": 0.5691028237342834, "learning_rate": 2.9020982560350987e-06, "loss": 0.1424, "step": 816, "step_loss": 0.15255063772201538 }, { "epoch": 2.4, "grad_norm": 0.9834074836322464, "kl": 0.5331578254699707, "learning_rate": 2.897937280659188e-06, "loss": 0.1522, "step": 817, "step_loss": 0.19720472395420074 }, { "epoch": 2.4, "grad_norm": 1.0291303032977386, "kl": 0.42640256881713867, "learning_rate": 2.893775797079548e-06, "loss": 0.1587, "step": 818, "step_loss": 0.15943148732185364 }, { "epoch": 2.41, "grad_norm": 1.023854381889724, "kl": 0.3928847908973694, "learning_rate": 2.8896138195919774e-06, "loss": 0.1478, "step": 819, "step_loss": 0.14019568264484406 }, { "epoch": 2.41, "grad_norm": 1.0242535198952862, "kl": 0.48428666591644287, "learning_rate": 2.885451362493971e-06, "loss": 0.1529, "step": 820, "step_loss": 0.19270402193069458 }, { "epoch": 2.41, "grad_norm": 1.0748533004644767, "kl": 0.5833289623260498, "learning_rate": 2.8812884400846697e-06, "loss": 0.1585, "step": 821, "step_loss": 0.15655606985092163 }, { "epoch": 2.42, "grad_norm": 1.0464967072640574, "kl": 0.46840161085128784, "learning_rate": 2.8771250666648154e-06, "loss": 0.1546, "step": 822, "step_loss": 0.15261268615722656 }, { "epoch": 2.42, "grad_norm": 0.9831924601580038, "kl": 0.46287956833839417, "learning_rate": 2.872961256536697e-06, "loss": 0.1586, "step": 823, "step_loss": 0.17225544154644012 }, { "epoch": 2.42, "grad_norm": 1.065657859586474, "kl": 0.3879581093788147, "learning_rate": 2.868797024004106e-06, "loss": 0.1595, "step": 824, "step_loss": 0.1531311571598053 }, { "epoch": 2.43, "grad_norm": 1.044424305750142, "kl": 0.43647971749305725, "learning_rate": 2.864632383372284e-06, "loss": 0.156, "step": 825, "step_loss": 0.13304683566093445 }, { "epoch": 2.43, "grad_norm": 0.9736991867352337, "kl": 0.5177615880966187, "learning_rate": 2.8604673489478736e-06, "loss": 0.1507, "step": 826, "step_loss": 0.1456117331981659 }, { "epoch": 2.43, "grad_norm": 0.935779184315902, "kl": 0.4033496379852295, "learning_rate": 2.8563019350388682e-06, "loss": 0.1441, "step": 827, "step_loss": 0.1455593705177307 }, { "epoch": 2.43, "grad_norm": 0.9724703844802562, "kl": 0.44370484352111816, "learning_rate": 2.852136155954573e-06, "loss": 0.1551, "step": 828, "step_loss": 0.15278911590576172 }, { "epoch": 2.44, "grad_norm": 1.0389405085720838, "kl": 0.3740912079811096, "learning_rate": 2.8479700260055375e-06, "loss": 0.1464, "step": 829, "step_loss": 0.14697124063968658 }, { "epoch": 2.44, "grad_norm": 1.045910704297703, "kl": 0.46834734082221985, "learning_rate": 2.8438035595035235e-06, "loss": 0.1616, "step": 830, "step_loss": 0.16846756637096405 }, { "epoch": 2.44, "grad_norm": 1.142264846499886, "kl": 0.47931694984436035, "learning_rate": 2.8396367707614454e-06, "loss": 0.1669, "step": 831, "step_loss": 0.18373435735702515 }, { "epoch": 2.45, "grad_norm": 1.0527179270858962, "kl": 0.527995228767395, "learning_rate": 2.835469674093326e-06, "loss": 0.1575, "step": 832, "step_loss": 0.16337227821350098 }, { "epoch": 2.45, "grad_norm": 1.0387618411476036, "kl": 0.42796212434768677, "learning_rate": 2.8313022838142475e-06, "loss": 0.1595, "step": 833, "step_loss": 0.16812928020954132 }, { "epoch": 2.45, "grad_norm": 0.9471143464094294, "kl": 0.4106406271457672, "learning_rate": 2.827134614240296e-06, "loss": 0.1499, "step": 834, "step_loss": 0.14076904952526093 }, { "epoch": 2.45, "grad_norm": 0.9917655257761571, "kl": 0.45805299282073975, "learning_rate": 2.8229666796885224e-06, "loss": 0.1557, "step": 835, "step_loss": 0.15434692800045013 }, { "epoch": 2.46, "grad_norm": 1.0110321513562135, "kl": 0.40475213527679443, "learning_rate": 2.818798494476884e-06, "loss": 0.1465, "step": 836, "step_loss": 0.1469970941543579 }, { "epoch": 2.46, "grad_norm": 0.9764345161496416, "kl": 0.49001234769821167, "learning_rate": 2.814630072924201e-06, "loss": 0.1487, "step": 837, "step_loss": 0.1866789162158966 }, { "epoch": 2.46, "grad_norm": 1.0794414278495323, "kl": 0.4759081304073334, "learning_rate": 2.8104614293501047e-06, "loss": 0.166, "step": 838, "step_loss": 0.15782764554023743 }, { "epoch": 2.47, "grad_norm": 0.9236052649575086, "kl": 0.5032958984375, "learning_rate": 2.8062925780749913e-06, "loss": 0.141, "step": 839, "step_loss": 0.1591145396232605 }, { "epoch": 2.47, "grad_norm": 0.9482136338056982, "kl": 0.4338464140892029, "learning_rate": 2.802123533419966e-06, "loss": 0.1433, "step": 840, "step_loss": 0.16640107333660126 }, { "epoch": 2.47, "grad_norm": 1.0595480920949152, "kl": 0.526473343372345, "learning_rate": 2.7979543097068023e-06, "loss": 0.1574, "step": 841, "step_loss": 0.1585194319486618 }, { "epoch": 2.48, "grad_norm": 0.9792300907182565, "kl": 0.43886512517929077, "learning_rate": 2.793784921257889e-06, "loss": 0.1488, "step": 842, "step_loss": 0.1458326131105423 }, { "epoch": 2.48, "grad_norm": 0.9630706804467645, "kl": 0.4345511794090271, "learning_rate": 2.789615382396178e-06, "loss": 0.1489, "step": 843, "step_loss": 0.15134494006633759 }, { "epoch": 2.48, "grad_norm": 1.0446667083350927, "kl": 0.4915614724159241, "learning_rate": 2.785445707445139e-06, "loss": 0.1563, "step": 844, "step_loss": 0.1671372950077057 }, { "epoch": 2.48, "grad_norm": 1.0372490510437422, "kl": 0.46963563561439514, "learning_rate": 2.7812759107287092e-06, "loss": 0.1577, "step": 845, "step_loss": 0.17008760571479797 }, { "epoch": 2.49, "grad_norm": 1.0428903957466424, "kl": 0.4324113130569458, "learning_rate": 2.777106006571246e-06, "loss": 0.1637, "step": 846, "step_loss": 0.1375209391117096 }, { "epoch": 2.49, "grad_norm": 1.1117206141871079, "kl": 0.49308332800865173, "learning_rate": 2.7729360092974727e-06, "loss": 0.1573, "step": 847, "step_loss": 0.1321687251329422 }, { "epoch": 2.49, "grad_norm": 1.0017834941794728, "kl": 0.4275958836078644, "learning_rate": 2.7687659332324348e-06, "loss": 0.1522, "step": 848, "step_loss": 0.1515330672264099 }, { "epoch": 2.5, "grad_norm": 0.9791789290830443, "kl": 0.3971335291862488, "learning_rate": 2.7645957927014476e-06, "loss": 0.1472, "step": 849, "step_loss": 0.1782829761505127 }, { "epoch": 2.5, "grad_norm": 0.9777418720782441, "kl": 0.44619691371917725, "learning_rate": 2.7604256020300474e-06, "loss": 0.1504, "step": 850, "step_loss": 0.15114323794841766 }, { "epoch": 2.5, "grad_norm": 1.0171915736900594, "kl": 0.43852925300598145, "learning_rate": 2.7562553755439453e-06, "loss": 0.1536, "step": 851, "step_loss": 0.1527268886566162 }, { "epoch": 2.5, "grad_norm": 0.9392503452143012, "kl": 0.48514020442962646, "learning_rate": 2.7520851275689705e-06, "loss": 0.1434, "step": 852, "step_loss": 0.15535013377666473 }, { "epoch": 2.51, "grad_norm": 1.0357489994061733, "kl": 0.40949738025665283, "learning_rate": 2.7479148724310306e-06, "loss": 0.1554, "step": 853, "step_loss": 0.15922455489635468 }, { "epoch": 2.51, "grad_norm": 0.9421029421438589, "kl": 0.4990323483943939, "learning_rate": 2.7437446244560563e-06, "loss": 0.1501, "step": 854, "step_loss": 0.14648234844207764 }, { "epoch": 2.51, "grad_norm": 0.9961498319508436, "kl": 0.4119671583175659, "learning_rate": 2.739574397969953e-06, "loss": 0.1457, "step": 855, "step_loss": 0.14378459751605988 }, { "epoch": 2.52, "grad_norm": 1.018662122435029, "kl": 0.4358648359775543, "learning_rate": 2.7354042072985527e-06, "loss": 0.1596, "step": 856, "step_loss": 0.1559343934059143 }, { "epoch": 2.52, "grad_norm": 0.9455489895306939, "kl": 0.5033860206604004, "learning_rate": 2.731234066767566e-06, "loss": 0.148, "step": 857, "step_loss": 0.14744150638580322 }, { "epoch": 2.52, "grad_norm": 1.046978432968794, "kl": 0.5012477040290833, "learning_rate": 2.727063990702528e-06, "loss": 0.1554, "step": 858, "step_loss": 0.1712190806865692 }, { "epoch": 2.53, "grad_norm": 0.9584546228377427, "kl": 0.4685800075531006, "learning_rate": 2.7228939934287545e-06, "loss": 0.1518, "step": 859, "step_loss": 0.1738032102584839 }, { "epoch": 2.53, "grad_norm": 1.0138203947600486, "kl": 0.4495634138584137, "learning_rate": 2.7187240892712915e-06, "loss": 0.1542, "step": 860, "step_loss": 0.1489475667476654 }, { "epoch": 2.53, "grad_norm": 0.9471813046395302, "kl": 0.506722092628479, "learning_rate": 2.7145542925548625e-06, "loss": 0.1497, "step": 861, "step_loss": 0.17352920770645142 }, { "epoch": 2.53, "grad_norm": 0.930769811268272, "kl": 0.45764830708503723, "learning_rate": 2.7103846176038234e-06, "loss": 0.1463, "step": 862, "step_loss": 0.14699025452136993 }, { "epoch": 2.54, "grad_norm": 1.0613625131595172, "kl": 0.5440824627876282, "learning_rate": 2.7062150787421117e-06, "loss": 0.1586, "step": 863, "step_loss": 0.15115922689437866 }, { "epoch": 2.54, "grad_norm": 1.0090354921253601, "kl": 0.4678072929382324, "learning_rate": 2.702045690293198e-06, "loss": 0.1577, "step": 864, "step_loss": 0.19815057516098022 }, { "epoch": 2.54, "grad_norm": 0.9879524528369488, "kl": 0.41325706243515015, "learning_rate": 2.697876466580035e-06, "loss": 0.1587, "step": 865, "step_loss": 0.1458713412284851 }, { "epoch": 2.55, "grad_norm": 0.9336052578651772, "kl": 0.35849088430404663, "learning_rate": 2.693707421925011e-06, "loss": 0.1427, "step": 866, "step_loss": 0.1263750195503235 }, { "epoch": 2.55, "grad_norm": 0.962448080322168, "kl": 0.4207032024860382, "learning_rate": 2.689538570649896e-06, "loss": 0.1501, "step": 867, "step_loss": 0.15147185325622559 }, { "epoch": 2.55, "grad_norm": 0.9755617125351154, "kl": 0.35634732246398926, "learning_rate": 2.6853699270758006e-06, "loss": 0.1444, "step": 868, "step_loss": 0.13335993885993958 }, { "epoch": 2.55, "grad_norm": 1.0071987678638463, "kl": 0.40423381328582764, "learning_rate": 2.681201505523117e-06, "loss": 0.1497, "step": 869, "step_loss": 0.15910114347934723 }, { "epoch": 2.56, "grad_norm": 0.9679292924796197, "kl": 0.4693619906902313, "learning_rate": 2.6770333203114783e-06, "loss": 0.1527, "step": 870, "step_loss": 0.16329102218151093 }, { "epoch": 2.56, "grad_norm": 0.9628687579474976, "kl": 0.4137752056121826, "learning_rate": 2.6728653857597042e-06, "loss": 0.1462, "step": 871, "step_loss": 0.12889014184474945 }, { "epoch": 2.56, "grad_norm": 1.0253647283281182, "kl": 0.31751748919487, "learning_rate": 2.6686977161857536e-06, "loss": 0.1509, "step": 872, "step_loss": 0.13793063163757324 }, { "epoch": 2.57, "grad_norm": 0.9933774525703443, "kl": 0.4467350244522095, "learning_rate": 2.664530325906674e-06, "loss": 0.1555, "step": 873, "step_loss": 0.15573230385780334 }, { "epoch": 2.57, "grad_norm": 1.0177636297789605, "kl": 0.4431452751159668, "learning_rate": 2.660363229238555e-06, "loss": 0.159, "step": 874, "step_loss": 0.12957452237606049 }, { "epoch": 2.57, "grad_norm": 0.9976856899473273, "kl": 0.415743350982666, "learning_rate": 2.6561964404964772e-06, "loss": 0.1414, "step": 875, "step_loss": 0.14293581247329712 }, { "epoch": 2.58, "grad_norm": 1.0243005389649271, "kl": 0.47209423780441284, "learning_rate": 2.6520299739944632e-06, "loss": 0.1577, "step": 876, "step_loss": 0.15875697135925293 }, { "epoch": 2.58, "grad_norm": 1.0132980539403538, "kl": 0.4555986821651459, "learning_rate": 2.6478638440454287e-06, "loss": 0.155, "step": 877, "step_loss": 0.14882808923721313 }, { "epoch": 2.58, "grad_norm": 0.9968564423415248, "kl": 0.4383925199508667, "learning_rate": 2.6436980649611316e-06, "loss": 0.1527, "step": 878, "step_loss": 0.1468876153230667 }, { "epoch": 2.58, "grad_norm": 0.9946869883294787, "kl": 0.4805772602558136, "learning_rate": 2.6395326510521284e-06, "loss": 0.1468, "step": 879, "step_loss": 0.1445452868938446 }, { "epoch": 2.59, "grad_norm": 1.0065650168443376, "kl": 0.4132371246814728, "learning_rate": 2.635367616627717e-06, "loss": 0.1504, "step": 880, "step_loss": 0.1394580602645874 }, { "epoch": 2.59, "grad_norm": 1.0307017371698117, "kl": 0.3783648908138275, "learning_rate": 2.631202975995894e-06, "loss": 0.1592, "step": 881, "step_loss": 0.15418383479118347 }, { "epoch": 2.59, "grad_norm": 1.001319848718613, "kl": 0.5001019835472107, "learning_rate": 2.6270387434633033e-06, "loss": 0.1473, "step": 882, "step_loss": 0.14509941637516022 }, { "epoch": 2.6, "grad_norm": 0.9875134436983942, "kl": 0.41825470328330994, "learning_rate": 2.622874933335186e-06, "loss": 0.1537, "step": 883, "step_loss": 0.12782949209213257 }, { "epoch": 2.6, "grad_norm": 1.0572877830596905, "kl": 0.48397839069366455, "learning_rate": 2.618711559915332e-06, "loss": 0.1532, "step": 884, "step_loss": 0.15913690626621246 }, { "epoch": 2.6, "grad_norm": 0.9751094536600117, "kl": 0.4292382597923279, "learning_rate": 2.6145486375060305e-06, "loss": 0.1522, "step": 885, "step_loss": 0.14782339334487915 }, { "epoch": 2.6, "grad_norm": 1.0673776750362411, "kl": 0.5227698087692261, "learning_rate": 2.610386180408023e-06, "loss": 0.1631, "step": 886, "step_loss": 0.17023181915283203 }, { "epoch": 2.61, "grad_norm": 0.9733633359085521, "kl": 0.35292860865592957, "learning_rate": 2.6062242029204525e-06, "loss": 0.1471, "step": 887, "step_loss": 0.16706503927707672 }, { "epoch": 2.61, "grad_norm": 1.0817929222314064, "kl": 0.4509636163711548, "learning_rate": 2.6020627193408126e-06, "loss": 0.159, "step": 888, "step_loss": 0.14339911937713623 }, { "epoch": 2.61, "grad_norm": 1.0112704753099575, "kl": 0.5275288820266724, "learning_rate": 2.5979017439649016e-06, "loss": 0.1578, "step": 889, "step_loss": 0.1634017825126648 }, { "epoch": 2.62, "grad_norm": 1.065748885440266, "kl": 0.4825303554534912, "learning_rate": 2.593741291086772e-06, "loss": 0.1647, "step": 890, "step_loss": 0.15958373248577118 }, { "epoch": 2.62, "grad_norm": 1.0145482738058882, "kl": 0.4341558814048767, "learning_rate": 2.589581374998681e-06, "loss": 0.1535, "step": 891, "step_loss": 0.15015427768230438 }, { "epoch": 2.62, "grad_norm": 0.965175247388308, "kl": 0.3313429355621338, "learning_rate": 2.5854220099910404e-06, "loss": 0.1421, "step": 892, "step_loss": 0.1318102329969406 }, { "epoch": 2.63, "grad_norm": 0.9691866844145771, "kl": 0.3674515187740326, "learning_rate": 2.581263210352372e-06, "loss": 0.1465, "step": 893, "step_loss": 0.12013500183820724 }, { "epoch": 2.63, "grad_norm": 1.0326376883901633, "kl": 0.4059482514858246, "learning_rate": 2.5771049903692534e-06, "loss": 0.1576, "step": 894, "step_loss": 0.1739095002412796 }, { "epoch": 2.63, "grad_norm": 1.025107600798099, "kl": 0.4659903943538666, "learning_rate": 2.572947364326271e-06, "loss": 0.1657, "step": 895, "step_loss": 0.1959068477153778 }, { "epoch": 2.63, "grad_norm": 1.0072382984139703, "kl": 0.4354158639907837, "learning_rate": 2.5687903465059694e-06, "loss": 0.1544, "step": 896, "step_loss": 0.1440533995628357 }, { "epoch": 2.64, "grad_norm": 0.9879824672979248, "kl": 0.42458993196487427, "learning_rate": 2.5646339511888087e-06, "loss": 0.1515, "step": 897, "step_loss": 0.16383150219917297 }, { "epoch": 2.64, "grad_norm": 0.9776984512612195, "kl": 0.5129539966583252, "learning_rate": 2.560478192653106e-06, "loss": 0.145, "step": 898, "step_loss": 0.1519792526960373 }, { "epoch": 2.64, "grad_norm": 1.0045857782912961, "kl": 0.46529197692871094, "learning_rate": 2.5563230851749904e-06, "loss": 0.1554, "step": 899, "step_loss": 0.14696285128593445 }, { "epoch": 2.65, "grad_norm": 1.0698829958572684, "kl": 0.38536253571510315, "learning_rate": 2.5521686430283584e-06, "loss": 0.1562, "step": 900, "step_loss": 0.1435265839099884 }, { "epoch": 2.65, "grad_norm": 0.9972674616711951, "kl": 0.5101684927940369, "learning_rate": 2.5480148804848177e-06, "loss": 0.1518, "step": 901, "step_loss": 0.17259491980075836 }, { "epoch": 2.65, "grad_norm": 0.9978533741995581, "kl": 0.4602809548377991, "learning_rate": 2.5438618118136433e-06, "loss": 0.1524, "step": 902, "step_loss": 0.16445577144622803 }, { "epoch": 2.65, "grad_norm": 0.9469645542814312, "kl": 0.42083609104156494, "learning_rate": 2.539709451281725e-06, "loss": 0.1485, "step": 903, "step_loss": 0.12865757942199707 }, { "epoch": 2.66, "grad_norm": 1.095494010676487, "kl": 0.522094190120697, "learning_rate": 2.5355578131535206e-06, "loss": 0.1574, "step": 904, "step_loss": 0.1739048808813095 }, { "epoch": 2.66, "grad_norm": 0.9767740096877633, "kl": 0.4193570613861084, "learning_rate": 2.531406911691007e-06, "loss": 0.1573, "step": 905, "step_loss": 0.1455826461315155 }, { "epoch": 2.66, "grad_norm": 1.017293366696319, "kl": 0.4341852068901062, "learning_rate": 2.5272567611536303e-06, "loss": 0.1526, "step": 906, "step_loss": 0.15153871476650238 }, { "epoch": 2.67, "grad_norm": 0.9952002463525593, "kl": 0.42138275504112244, "learning_rate": 2.523107375798254e-06, "loss": 0.1538, "step": 907, "step_loss": 0.1505734622478485 }, { "epoch": 2.67, "grad_norm": 0.9990848287474213, "kl": 0.4363083839416504, "learning_rate": 2.5189587698791175e-06, "loss": 0.154, "step": 908, "step_loss": 0.14343413710594177 }, { "epoch": 2.67, "grad_norm": 0.9723709331417107, "kl": 0.42384618520736694, "learning_rate": 2.51481095764778e-06, "loss": 0.1496, "step": 909, "step_loss": 0.1434541940689087 }, { "epoch": 2.68, "grad_norm": 0.953374267251741, "kl": 0.5374601483345032, "learning_rate": 2.510663953353075e-06, "loss": 0.1485, "step": 910, "step_loss": 0.1526307612657547 }, { "epoch": 2.68, "grad_norm": 1.020275779519007, "kl": 0.5095154047012329, "learning_rate": 2.50651777124106e-06, "loss": 0.1547, "step": 911, "step_loss": 0.15365783870220184 }, { "epoch": 2.68, "grad_norm": 1.0064179434331657, "kl": 0.5424807667732239, "learning_rate": 2.502372425554968e-06, "loss": 0.1491, "step": 912, "step_loss": 0.1669929325580597 }, { "epoch": 2.68, "grad_norm": 0.9590723032371087, "kl": 0.3669721484184265, "learning_rate": 2.4982279305351605e-06, "loss": 0.149, "step": 913, "step_loss": 0.14819172024726868 }, { "epoch": 2.69, "grad_norm": 1.0423700278607653, "kl": 0.40059924125671387, "learning_rate": 2.4940843004190727e-06, "loss": 0.1528, "step": 914, "step_loss": 0.14792829751968384 }, { "epoch": 2.69, "grad_norm": 1.059798761869846, "kl": 0.39160391688346863, "learning_rate": 2.4899415494411736e-06, "loss": 0.1494, "step": 915, "step_loss": 0.14644666016101837 }, { "epoch": 2.69, "grad_norm": 0.994269671804963, "kl": 0.4729336202144623, "learning_rate": 2.4857996918329093e-06, "loss": 0.1548, "step": 916, "step_loss": 0.14490240812301636 }, { "epoch": 2.7, "grad_norm": 1.0274421441712134, "kl": 0.4249011278152466, "learning_rate": 2.481658741822656e-06, "loss": 0.1528, "step": 917, "step_loss": 0.1569293737411499 }, { "epoch": 2.7, "grad_norm": 1.1032766707978614, "kl": 0.44397133588790894, "learning_rate": 2.4775187136356732e-06, "loss": 0.1509, "step": 918, "step_loss": 0.13812614977359772 }, { "epoch": 2.7, "grad_norm": 1.0393047337226677, "kl": 0.40771737694740295, "learning_rate": 2.4733796214940565e-06, "loss": 0.1559, "step": 919, "step_loss": 0.1609930545091629 }, { "epoch": 2.7, "grad_norm": 1.0454766342784834, "kl": 0.40909823775291443, "learning_rate": 2.469241479616681e-06, "loss": 0.1562, "step": 920, "step_loss": 0.15960478782653809 }, { "epoch": 2.71, "grad_norm": 0.9710746223118797, "kl": 0.357599139213562, "learning_rate": 2.4651043022191605e-06, "loss": 0.1409, "step": 921, "step_loss": 0.12360851466655731 }, { "epoch": 2.71, "grad_norm": 1.0366911538769703, "kl": 0.44713571667671204, "learning_rate": 2.4609681035137944e-06, "loss": 0.1515, "step": 922, "step_loss": 0.13877364993095398 }, { "epoch": 2.71, "grad_norm": 0.9935669508473964, "kl": 0.47438859939575195, "learning_rate": 2.456832897709521e-06, "loss": 0.1502, "step": 923, "step_loss": 0.17184007167816162 }, { "epoch": 2.72, "grad_norm": 1.0048074202805686, "kl": 0.3735441565513611, "learning_rate": 2.4526986990118672e-06, "loss": 0.1583, "step": 924, "step_loss": 0.14378073811531067 }, { "epoch": 2.72, "grad_norm": 0.9409483433063506, "kl": 0.3914346694946289, "learning_rate": 2.4485655216228986e-06, "loss": 0.1476, "step": 925, "step_loss": 0.15655651688575745 }, { "epoch": 2.72, "grad_norm": 1.0223306684022924, "kl": 0.424472451210022, "learning_rate": 2.444433379741176e-06, "loss": 0.1541, "step": 926, "step_loss": 0.15379807353019714 }, { "epoch": 2.73, "grad_norm": 0.9893200206120092, "kl": 0.47619765996932983, "learning_rate": 2.4403022875617e-06, "loss": 0.1467, "step": 927, "step_loss": 0.17208687961101532 }, { "epoch": 2.73, "grad_norm": 1.0262447178852225, "kl": 0.47813111543655396, "learning_rate": 2.436172259275866e-06, "loss": 0.1623, "step": 928, "step_loss": 0.13537657260894775 }, { "epoch": 2.73, "grad_norm": 0.9861270661846792, "kl": 0.38649582862854004, "learning_rate": 2.4320433090714134e-06, "loss": 0.1476, "step": 929, "step_loss": 0.1501408964395523 }, { "epoch": 2.73, "grad_norm": 0.9853040761251798, "kl": 0.4063931107521057, "learning_rate": 2.4279154511323823e-06, "loss": 0.1615, "step": 930, "step_loss": 0.15353356301784515 }, { "epoch": 2.74, "grad_norm": 0.9205228458296352, "kl": 0.3394644260406494, "learning_rate": 2.4237886996390556e-06, "loss": 0.1427, "step": 931, "step_loss": 0.1389724165201187 }, { "epoch": 2.74, "grad_norm": 0.9932025613140306, "kl": 0.3473202884197235, "learning_rate": 2.4196630687679173e-06, "loss": 0.1505, "step": 932, "step_loss": 0.1683613657951355 }, { "epoch": 2.74, "grad_norm": 0.984733712974178, "kl": 0.40478530526161194, "learning_rate": 2.415538572691602e-06, "loss": 0.1463, "step": 933, "step_loss": 0.13838434219360352 }, { "epoch": 2.75, "grad_norm": 0.9806916184951824, "kl": 0.4532083570957184, "learning_rate": 2.4114152255788466e-06, "loss": 0.1518, "step": 934, "step_loss": 0.13839900493621826 }, { "epoch": 2.75, "grad_norm": 0.9979938112250495, "kl": 0.389826238155365, "learning_rate": 2.407293041594439e-06, "loss": 0.156, "step": 935, "step_loss": 0.1437515914440155 }, { "epoch": 2.75, "grad_norm": 0.978297759451275, "kl": 0.43591850996017456, "learning_rate": 2.4031720348991734e-06, "loss": 0.1506, "step": 936, "step_loss": 0.1359221488237381 }, { "epoch": 2.75, "grad_norm": 0.9961809157585862, "kl": 0.33996838331222534, "learning_rate": 2.399052219649799e-06, "loss": 0.145, "step": 937, "step_loss": 0.14791721105575562 }, { "epoch": 2.76, "grad_norm": 0.9450751235156168, "kl": 0.42210879921913147, "learning_rate": 2.3949336099989724e-06, "loss": 0.1503, "step": 938, "step_loss": 0.16048789024353027 }, { "epoch": 2.76, "grad_norm": 0.9925709982011522, "kl": 0.4403047561645508, "learning_rate": 2.390816220095207e-06, "loss": 0.1551, "step": 939, "step_loss": 0.16639472544193268 }, { "epoch": 2.76, "grad_norm": 0.9326817674895876, "kl": 0.4106891453266144, "learning_rate": 2.386700064082827e-06, "loss": 0.1437, "step": 940, "step_loss": 0.13783779740333557 }, { "epoch": 2.77, "grad_norm": 0.9993440741834876, "kl": 0.5168544054031372, "learning_rate": 2.38258515610192e-06, "loss": 0.1586, "step": 941, "step_loss": 0.19751232862472534 }, { "epoch": 2.77, "grad_norm": 0.951222600504691, "kl": 0.39257577061653137, "learning_rate": 2.3784715102882834e-06, "loss": 0.1512, "step": 942, "step_loss": 0.1248694509267807 }, { "epoch": 2.77, "grad_norm": 0.9983634073528408, "kl": 0.34117716550827026, "learning_rate": 2.3743591407733797e-06, "loss": 0.1574, "step": 943, "step_loss": 0.15840350091457367 }, { "epoch": 2.78, "grad_norm": 1.0089856202771001, "kl": 0.5630459785461426, "learning_rate": 2.3702480616842865e-06, "loss": 0.1612, "step": 944, "step_loss": 0.17266206443309784 }, { "epoch": 2.78, "grad_norm": 1.0153619278805137, "kl": 0.518592894077301, "learning_rate": 2.36613828714365e-06, "loss": 0.1595, "step": 945, "step_loss": 0.18611447513103485 }, { "epoch": 2.78, "grad_norm": 0.9672443779904987, "kl": 0.4448012113571167, "learning_rate": 2.362029831269634e-06, "loss": 0.1558, "step": 946, "step_loss": 0.14080186188220978 }, { "epoch": 2.78, "grad_norm": 0.9611084638649775, "kl": 0.38354170322418213, "learning_rate": 2.357922708175872e-06, "loss": 0.1469, "step": 947, "step_loss": 0.15614628791809082 }, { "epoch": 2.79, "grad_norm": 1.0312340475062312, "kl": 0.42923545837402344, "learning_rate": 2.353816931971419e-06, "loss": 0.1509, "step": 948, "step_loss": 0.15058480203151703 }, { "epoch": 2.79, "grad_norm": 1.0325988848719911, "kl": 0.37678343057632446, "learning_rate": 2.3497125167607027e-06, "loss": 0.153, "step": 949, "step_loss": 0.14883080124855042 }, { "epoch": 2.79, "grad_norm": 0.9819062626097106, "kl": 0.5016992688179016, "learning_rate": 2.345609476643477e-06, "loss": 0.1428, "step": 950, "step_loss": 0.15665948390960693 }, { "epoch": 2.8, "grad_norm": 1.0064440018956071, "kl": 0.4498019814491272, "learning_rate": 2.341507825714771e-06, "loss": 0.1593, "step": 951, "step_loss": 0.1523018330335617 }, { "epoch": 2.8, "grad_norm": 0.942831756703654, "kl": 0.508718729019165, "learning_rate": 2.337407578064842e-06, "loss": 0.1473, "step": 952, "step_loss": 0.16131407022476196 }, { "epoch": 2.8, "grad_norm": 1.0213628080249857, "kl": 0.47834068536758423, "learning_rate": 2.3333087477791257e-06, "loss": 0.1581, "step": 953, "step_loss": 0.16310566663742065 }, { "epoch": 2.8, "grad_norm": 0.9611082313028335, "kl": 0.3989601135253906, "learning_rate": 2.3292113489381895e-06, "loss": 0.152, "step": 954, "step_loss": 0.13949620723724365 }, { "epoch": 2.81, "grad_norm": 0.9853678646194656, "kl": 0.3734014332294464, "learning_rate": 2.325115395617683e-06, "loss": 0.1535, "step": 955, "step_loss": 0.14914605021476746 }, { "epoch": 2.81, "grad_norm": 0.9778583931081463, "kl": 0.45069605112075806, "learning_rate": 2.3210209018882913e-06, "loss": 0.1524, "step": 956, "step_loss": 0.15262170135974884 }, { "epoch": 2.81, "grad_norm": 1.014172101673639, "kl": 0.5114811062812805, "learning_rate": 2.316927881815683e-06, "loss": 0.1451, "step": 957, "step_loss": 0.16915518045425415 }, { "epoch": 2.82, "grad_norm": 0.9415660896574684, "kl": 0.39761587977409363, "learning_rate": 2.312836349460467e-06, "loss": 0.153, "step": 958, "step_loss": 0.1431863158941269 }, { "epoch": 2.82, "grad_norm": 1.0285671477663938, "kl": 0.2906627655029297, "learning_rate": 2.3087463188781408e-06, "loss": 0.1559, "step": 959, "step_loss": 0.12965397536754608 }, { "epoch": 2.82, "grad_norm": 0.9929048693237944, "kl": 0.4464694559574127, "learning_rate": 2.3046578041190403e-06, "loss": 0.1486, "step": 960, "step_loss": 0.15477749705314636 }, { "epoch": 2.83, "grad_norm": 1.0822745094493649, "kl": 0.4634704291820526, "learning_rate": 2.3005708192282984e-06, "loss": 0.1604, "step": 961, "step_loss": 0.17266973853111267 }, { "epoch": 2.83, "grad_norm": 1.0941722735914765, "kl": 0.47234082221984863, "learning_rate": 2.2964853782457887e-06, "loss": 0.149, "step": 962, "step_loss": 0.14461389183998108 }, { "epoch": 2.83, "grad_norm": 1.0574454523000933, "kl": 0.39766812324523926, "learning_rate": 2.2924014952060843e-06, "loss": 0.1603, "step": 963, "step_loss": 0.14928704500198364 }, { "epoch": 2.83, "grad_norm": 0.9067579687361036, "kl": 0.4438409209251404, "learning_rate": 2.288319184138403e-06, "loss": 0.1408, "step": 964, "step_loss": 0.14300301671028137 }, { "epoch": 2.84, "grad_norm": 1.0342452737185248, "kl": 0.3799823820590973, "learning_rate": 2.2842384590665644e-06, "loss": 0.155, "step": 965, "step_loss": 0.15365462005138397 }, { "epoch": 2.84, "grad_norm": 1.0444298271978016, "kl": 0.4924103617668152, "learning_rate": 2.280159334008941e-06, "loss": 0.1507, "step": 966, "step_loss": 0.15880931913852692 }, { "epoch": 2.84, "grad_norm": 0.9599603942062377, "kl": 0.41578635573387146, "learning_rate": 2.2760818229784065e-06, "loss": 0.1504, "step": 967, "step_loss": 0.1449252814054489 }, { "epoch": 2.85, "grad_norm": 1.0134939283764037, "kl": 0.46528518199920654, "learning_rate": 2.2720059399822906e-06, "loss": 0.1545, "step": 968, "step_loss": 0.1586332619190216 }, { "epoch": 2.85, "grad_norm": 1.0347690361891235, "kl": 0.41343602538108826, "learning_rate": 2.2679316990223314e-06, "loss": 0.1578, "step": 969, "step_loss": 0.17001797258853912 }, { "epoch": 2.85, "grad_norm": 1.0055593394957056, "kl": 0.45835837721824646, "learning_rate": 2.263859114094625e-06, "loss": 0.1463, "step": 970, "step_loss": 0.13045182824134827 }, { "epoch": 2.85, "grad_norm": 0.9567730457998137, "kl": 0.5038785934448242, "learning_rate": 2.259788199189579e-06, "loss": 0.1515, "step": 971, "step_loss": 0.17427542805671692 }, { "epoch": 2.86, "grad_norm": 1.0010068414341897, "kl": 0.4056503474712372, "learning_rate": 2.255718968291864e-06, "loss": 0.157, "step": 972, "step_loss": 0.15841376781463623 }, { "epoch": 2.86, "grad_norm": 1.0107117284340932, "kl": 0.45705166459083557, "learning_rate": 2.251651435380364e-06, "loss": 0.1571, "step": 973, "step_loss": 0.14534001052379608 }, { "epoch": 2.86, "grad_norm": 0.990000349150702, "kl": 0.5228754878044128, "learning_rate": 2.2475856144281345e-06, "loss": 0.1379, "step": 974, "step_loss": 0.16290828585624695 }, { "epoch": 2.87, "grad_norm": 1.0008871196700513, "kl": 0.3806186020374298, "learning_rate": 2.2435215194023453e-06, "loss": 0.1525, "step": 975, "step_loss": 0.1635514348745346 }, { "epoch": 2.87, "grad_norm": 1.0960477399243882, "kl": 0.45375317335128784, "learning_rate": 2.239459164264238e-06, "loss": 0.1535, "step": 976, "step_loss": 0.18511676788330078 }, { "epoch": 2.87, "grad_norm": 1.0218003843637917, "kl": 0.37781763076782227, "learning_rate": 2.2353985629690793e-06, "loss": 0.1475, "step": 977, "step_loss": 0.12731696665287018 }, { "epoch": 2.88, "grad_norm": 0.9017717274035716, "kl": 0.3964046239852905, "learning_rate": 2.231339729466111e-06, "loss": 0.143, "step": 978, "step_loss": 0.13375751674175262 }, { "epoch": 2.88, "grad_norm": 0.9619019793221093, "kl": 0.36923855543136597, "learning_rate": 2.2272826776984985e-06, "loss": 0.1486, "step": 979, "step_loss": 0.15310907363891602 }, { "epoch": 2.88, "grad_norm": 1.0027381543220892, "kl": 0.45206311345100403, "learning_rate": 2.223227421603289e-06, "loss": 0.1508, "step": 980, "step_loss": 0.17637795209884644 }, { "epoch": 2.88, "grad_norm": 1.047855188766988, "kl": 0.44840162992477417, "learning_rate": 2.2191739751113624e-06, "loss": 0.1611, "step": 981, "step_loss": 0.1478584259748459 }, { "epoch": 2.89, "grad_norm": 1.0127988521429687, "kl": 0.5084017515182495, "learning_rate": 2.2151223521473803e-06, "loss": 0.1474, "step": 982, "step_loss": 0.14254876971244812 }, { "epoch": 2.89, "grad_norm": 0.9277534723825935, "kl": 0.4267195463180542, "learning_rate": 2.2110725666297395e-06, "loss": 0.1441, "step": 983, "step_loss": 0.13901741802692413 }, { "epoch": 2.89, "grad_norm": 1.0036892192485454, "kl": 0.3984602987766266, "learning_rate": 2.2070246324705253e-06, "loss": 0.143, "step": 984, "step_loss": 0.1377902328968048 }, { "epoch": 2.9, "grad_norm": 0.9646755179948415, "kl": 0.6093090772628784, "learning_rate": 2.2029785635754646e-06, "loss": 0.1476, "step": 985, "step_loss": 0.17706118524074554 }, { "epoch": 2.9, "grad_norm": 0.9905850844659927, "kl": 0.4276701807975769, "learning_rate": 2.1989343738438755e-06, "loss": 0.1525, "step": 986, "step_loss": 0.17314979434013367 }, { "epoch": 2.9, "grad_norm": 1.1708291027675957, "kl": 0.40725067257881165, "learning_rate": 2.1948920771686196e-06, "loss": 0.1567, "step": 987, "step_loss": 0.16095474362373352 }, { "epoch": 2.9, "grad_norm": 1.0739387979713282, "kl": 0.39202889800071716, "learning_rate": 2.1908516874360558e-06, "loss": 0.1485, "step": 988, "step_loss": 0.13703203201293945 }, { "epoch": 2.91, "grad_norm": 0.9536023429872225, "kl": 0.4466555714607239, "learning_rate": 2.1868132185259933e-06, "loss": 0.148, "step": 989, "step_loss": 0.16553649306297302 }, { "epoch": 2.91, "grad_norm": 0.962874004228303, "kl": 0.36752018332481384, "learning_rate": 2.1827766843116427e-06, "loss": 0.1486, "step": 990, "step_loss": 0.1441442370414734 }, { "epoch": 2.91, "grad_norm": 1.0075217426405563, "kl": 0.4437835216522217, "learning_rate": 2.1787420986595664e-06, "loss": 0.1485, "step": 991, "step_loss": 0.15905022621154785 }, { "epoch": 2.92, "grad_norm": 1.016123385289782, "kl": 0.4468748867511749, "learning_rate": 2.1747094754296353e-06, "loss": 0.1452, "step": 992, "step_loss": 0.12368491291999817 }, { "epoch": 2.92, "grad_norm": 0.9569243857996387, "kl": 0.4261481463909149, "learning_rate": 2.170678828474978e-06, "loss": 0.1471, "step": 993, "step_loss": 0.14896030724048615 }, { "epoch": 2.92, "grad_norm": 1.0626260878976141, "kl": 0.38104549050331116, "learning_rate": 2.1666501716419342e-06, "loss": 0.1563, "step": 994, "step_loss": 0.15966196358203888 }, { "epoch": 2.93, "grad_norm": 0.9361293787090408, "kl": 0.4901023805141449, "learning_rate": 2.1626235187700057e-06, "loss": 0.1462, "step": 995, "step_loss": 0.18366771936416626 }, { "epoch": 2.93, "grad_norm": 0.950338934995153, "kl": 0.4179726541042328, "learning_rate": 2.158598883691812e-06, "loss": 0.1494, "step": 996, "step_loss": 0.138540118932724 }, { "epoch": 2.93, "grad_norm": 1.0489853611443787, "kl": 0.506919801235199, "learning_rate": 2.15457628023304e-06, "loss": 0.1668, "step": 997, "step_loss": 0.21282246708869934 }, { "epoch": 2.93, "grad_norm": 1.04818521184514, "kl": 0.5396957993507385, "learning_rate": 2.1505557222123972e-06, "loss": 0.1539, "step": 998, "step_loss": 0.1591482013463974 }, { "epoch": 2.94, "grad_norm": 1.0616666386652713, "kl": 0.4631801247596741, "learning_rate": 2.1465372234415673e-06, "loss": 0.1555, "step": 999, "step_loss": 0.14960813522338867 }, { "epoch": 2.94, "grad_norm": 1.035119268368553, "kl": 0.45855218172073364, "learning_rate": 2.1425207977251544e-06, "loss": 0.1521, "step": 1000, "step_loss": 0.1509908139705658 }, { "epoch": 2.94, "grad_norm": 0.996467609929929, "kl": 0.3887898921966553, "learning_rate": 2.1385064588606463e-06, "loss": 0.1549, "step": 1001, "step_loss": 0.13089656829833984 }, { "epoch": 2.95, "grad_norm": 0.9742558665726649, "kl": 0.40123283863067627, "learning_rate": 2.1344942206383586e-06, "loss": 0.1459, "step": 1002, "step_loss": 0.13350878655910492 }, { "epoch": 2.95, "grad_norm": 0.9488404858578288, "kl": 0.382538765668869, "learning_rate": 2.130484096841393e-06, "loss": 0.1439, "step": 1003, "step_loss": 0.14286507666110992 }, { "epoch": 2.95, "grad_norm": 0.9513353524533048, "kl": 0.42083343863487244, "learning_rate": 2.1264761012455865e-06, "loss": 0.142, "step": 1004, "step_loss": 0.14722788333892822 }, { "epoch": 2.95, "grad_norm": 1.0141753634991808, "kl": 0.4633311629295349, "learning_rate": 2.122470247619464e-06, "loss": 0.1623, "step": 1005, "step_loss": 0.1737322211265564 }, { "epoch": 2.96, "grad_norm": 1.028458994383184, "kl": 0.5300709009170532, "learning_rate": 2.118466549724194e-06, "loss": 0.1536, "step": 1006, "step_loss": 0.16598369181156158 }, { "epoch": 2.96, "grad_norm": 1.029391738602643, "kl": 0.43173086643218994, "learning_rate": 2.1144650213135397e-06, "loss": 0.1624, "step": 1007, "step_loss": 0.14931721985340118 }, { "epoch": 2.96, "grad_norm": 0.9545265742649662, "kl": 0.4387211501598358, "learning_rate": 2.11046567613381e-06, "loss": 0.1437, "step": 1008, "step_loss": 0.16143842041492462 }, { "epoch": 2.97, "grad_norm": 1.0152085404083042, "kl": 0.46170535683631897, "learning_rate": 2.106468527923814e-06, "loss": 0.1575, "step": 1009, "step_loss": 0.14393383264541626 }, { "epoch": 2.97, "grad_norm": 0.9507669139022603, "kl": 0.450935959815979, "learning_rate": 2.1024735904148152e-06, "loss": 0.1556, "step": 1010, "step_loss": 0.14951980113983154 }, { "epoch": 2.97, "grad_norm": 0.9775946084017444, "kl": 0.4422980546951294, "learning_rate": 2.098480877330481e-06, "loss": 0.1544, "step": 1011, "step_loss": 0.14084143936634064 }, { "epoch": 2.98, "grad_norm": 0.9641237236379501, "kl": 0.4498288035392761, "learning_rate": 2.094490402386838e-06, "loss": 0.1566, "step": 1012, "step_loss": 0.13751116394996643 }, { "epoch": 2.98, "grad_norm": 1.0126537288710316, "kl": 0.39250999689102173, "learning_rate": 2.0905021792922235e-06, "loss": 0.1559, "step": 1013, "step_loss": 0.14081251621246338 }, { "epoch": 2.98, "grad_norm": 1.0449214471833967, "kl": 0.5089797377586365, "learning_rate": 2.0865162217472416e-06, "loss": 0.1596, "step": 1014, "step_loss": 0.16164351999759674 }, { "epoch": 2.98, "grad_norm": 0.9832128900915236, "kl": 0.39397111535072327, "learning_rate": 2.08253254344471e-06, "loss": 0.1489, "step": 1015, "step_loss": 0.13195285201072693 }, { "epoch": 2.99, "grad_norm": 1.081208132886941, "kl": 0.38181591033935547, "learning_rate": 2.0785511580696206e-06, "loss": 0.163, "step": 1016, "step_loss": 0.14687636494636536 }, { "epoch": 2.99, "grad_norm": 0.9836684370582969, "kl": 0.5204348564147949, "learning_rate": 2.0745720792990836e-06, "loss": 0.1498, "step": 1017, "step_loss": 0.15303507447242737 }, { "epoch": 2.99, "grad_norm": 1.0707243384591267, "kl": 0.37323451042175293, "learning_rate": 2.070595320802291e-06, "loss": 0.1536, "step": 1018, "step_loss": 0.1562386006116867 }, { "epoch": 3.0, "grad_norm": 0.9179992183742228, "kl": 0.4068221151828766, "learning_rate": 2.0666208962404593e-06, "loss": 0.1383, "step": 1019, "step_loss": 0.1486799120903015 }, { "epoch": 3.0, "grad_norm": 0.9834517584761968, "kl": 0.4073179364204407, "learning_rate": 2.062648819266789e-06, "loss": 0.1498, "step": 1020, "step_loss": 0.15172114968299866 }, { "epoch": 3.0, "eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_loss": 1.7941198348999023, "eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_runtime": 14.5917, "eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_samples_per_second": 6.853, "eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_steps_per_second": 0.891, "step": 1020 } ], "logging_steps": 1.0, "max_steps": 1700, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 100.0, "total_flos": 87053826223104.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }