| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9988974641675856, | |
| "eval_steps": 100.0, | |
| "global_step": 1020, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 19.301212901214466, | |
| "kl": 0.0, | |
| "learning_rate": 5.000000000000001e-07, | |
| "loss": 0.6049, | |
| "step": 1, | |
| "step_loss": 0.6052899360656738 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 6.455205328029959, | |
| "kl": 0.3062567710876465, | |
| "learning_rate": 2.438044511330269e-06, | |
| "loss": 0.4588, | |
| "step": 2, | |
| "step_loss": 0.46981990337371826 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 4.434829230563478, | |
| "kl": 0.23490308225154877, | |
| "learning_rate": 3.5717278751869343e-06, | |
| "loss": 0.4361, | |
| "step": 3, | |
| "step_loss": 0.4431145191192627 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 3.1316724496056834, | |
| "kl": 0.21129530668258667, | |
| "learning_rate": 4.376089022660538e-06, | |
| "loss": 0.4118, | |
| "step": 4, | |
| "step_loss": 0.371662974357605 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 2.988460560324433, | |
| "kl": 0.22684630751609802, | |
| "learning_rate": 5e-06, | |
| "loss": 0.3726, | |
| "step": 5, | |
| "step_loss": 0.36066734790802 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 2.378051002744742, | |
| "kl": 0.3077385723590851, | |
| "learning_rate": 4.9999961353271305e-06, | |
| "loss": 0.3356, | |
| "step": 6, | |
| "step_loss": 0.36983931064605713 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 2.1071117126920313, | |
| "kl": 0.28660184144973755, | |
| "learning_rate": 4.9999845413217956e-06, | |
| "loss": 0.3443, | |
| "step": 7, | |
| "step_loss": 0.33001354336738586 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 2.126342237334389, | |
| "kl": 0.2792012393474579, | |
| "learning_rate": 4.999965218023826e-06, | |
| "loss": 0.3704, | |
| "step": 8, | |
| "step_loss": 0.3890674114227295 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 1.805551016834456, | |
| "kl": 0.25425001978874207, | |
| "learning_rate": 4.999938165499602e-06, | |
| "loss": 0.3601, | |
| "step": 9, | |
| "step_loss": 0.3878689408302307 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 2.115675523568805, | |
| "kl": 0.286655992269516, | |
| "learning_rate": 4.999903383842054e-06, | |
| "loss": 0.3476, | |
| "step": 10, | |
| "step_loss": 0.3587522804737091 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 1.944073523799986, | |
| "kl": 0.2786046862602234, | |
| "learning_rate": 4.9998608731706695e-06, | |
| "loss": 0.361, | |
| "step": 11, | |
| "step_loss": 0.37417733669281006 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 1.900652962403358, | |
| "kl": 0.3140088617801666, | |
| "learning_rate": 4.999810633631482e-06, | |
| "loss": 0.333, | |
| "step": 12, | |
| "step_loss": 0.3321115970611572 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 1.7772964219719185, | |
| "kl": 0.27589982748031616, | |
| "learning_rate": 4.999752665397077e-06, | |
| "loss": 0.332, | |
| "step": 13, | |
| "step_loss": 0.2766149938106537 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 1.784571061327797, | |
| "kl": 0.30012714862823486, | |
| "learning_rate": 4.999686968666592e-06, | |
| "loss": 0.3256, | |
| "step": 14, | |
| "step_loss": 0.2897532284259796 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 1.7357127297345116, | |
| "kl": 0.27369552850723267, | |
| "learning_rate": 4.999613543665713e-06, | |
| "loss": 0.3343, | |
| "step": 15, | |
| "step_loss": 0.3049730062484741 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 1.9257598473402229, | |
| "kl": 0.36154788732528687, | |
| "learning_rate": 4.999532390646673e-06, | |
| "loss": 0.3378, | |
| "step": 16, | |
| "step_loss": 0.3547108769416809 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 1.4906210741355268, | |
| "kl": 0.2691032588481903, | |
| "learning_rate": 4.999443509888254e-06, | |
| "loss": 0.3175, | |
| "step": 17, | |
| "step_loss": 0.29635873436927795 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 1.6048699907577806, | |
| "kl": 0.34884747862815857, | |
| "learning_rate": 4.999346901695787e-06, | |
| "loss": 0.3237, | |
| "step": 18, | |
| "step_loss": 0.3572104871273041 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 1.5670463795130773, | |
| "kl": 0.2717525362968445, | |
| "learning_rate": 4.999242566401145e-06, | |
| "loss": 0.3306, | |
| "step": 19, | |
| "step_loss": 0.31295859813690186 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 1.682513968323289, | |
| "kl": 0.2477482557296753, | |
| "learning_rate": 4.999130504362748e-06, | |
| "loss": 0.3383, | |
| "step": 20, | |
| "step_loss": 0.3229523301124573 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 1.6243623252629549, | |
| "kl": 0.348207950592041, | |
| "learning_rate": 4.9990107159655565e-06, | |
| "loss": 0.3172, | |
| "step": 21, | |
| "step_loss": 0.4179743528366089 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 1.6607049641076566, | |
| "kl": 0.3793472945690155, | |
| "learning_rate": 4.998883201621079e-06, | |
| "loss": 0.3397, | |
| "step": 22, | |
| "step_loss": 0.39602553844451904 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 1.5170388362033584, | |
| "kl": 0.3244696259498596, | |
| "learning_rate": 4.998747961767359e-06, | |
| "loss": 0.3197, | |
| "step": 23, | |
| "step_loss": 0.3359769582748413 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 1.6128730811774565, | |
| "kl": 0.28219160437583923, | |
| "learning_rate": 4.998604996868982e-06, | |
| "loss": 0.3442, | |
| "step": 24, | |
| "step_loss": 0.3883013129234314 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 1.4539005711940118, | |
| "kl": 0.23452766239643097, | |
| "learning_rate": 4.998454307417071e-06, | |
| "loss": 0.3093, | |
| "step": 25, | |
| "step_loss": 0.3024548590183258 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 1.5624222068494429, | |
| "kl": 0.3776319622993469, | |
| "learning_rate": 4.998295893929281e-06, | |
| "loss": 0.3279, | |
| "step": 26, | |
| "step_loss": 0.3177269399166107 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 1.5954783642036205, | |
| "kl": 0.27478423714637756, | |
| "learning_rate": 4.998129756949807e-06, | |
| "loss": 0.3057, | |
| "step": 27, | |
| "step_loss": 0.2898721396923065 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 1.541698773367659, | |
| "kl": 0.31704697012901306, | |
| "learning_rate": 4.997955897049373e-06, | |
| "loss": 0.3468, | |
| "step": 28, | |
| "step_loss": 0.3161465525627136 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 1.464173735929187, | |
| "kl": 0.2408154010772705, | |
| "learning_rate": 4.997774314825233e-06, | |
| "loss": 0.3114, | |
| "step": 29, | |
| "step_loss": 0.2493884265422821 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 1.5382515760575184, | |
| "kl": 0.2777164876461029, | |
| "learning_rate": 4.997585010901172e-06, | |
| "loss": 0.3129, | |
| "step": 30, | |
| "step_loss": 0.2662698030471802 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 1.456667059415303, | |
| "kl": 0.2807072401046753, | |
| "learning_rate": 4.9973879859274966e-06, | |
| "loss": 0.3041, | |
| "step": 31, | |
| "step_loss": 0.2914745509624481 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 1.5999842892560905, | |
| "kl": 0.2716163098812103, | |
| "learning_rate": 4.997183240581041e-06, | |
| "loss": 0.328, | |
| "step": 32, | |
| "step_loss": 0.2906668484210968 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 1.4212712274071648, | |
| "kl": 0.29081669449806213, | |
| "learning_rate": 4.996970775565161e-06, | |
| "loss": 0.3008, | |
| "step": 33, | |
| "step_loss": 0.30645477771759033 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 1.5615238192082477, | |
| "kl": 0.2825222611427307, | |
| "learning_rate": 4.996750591609727e-06, | |
| "loss": 0.3209, | |
| "step": 34, | |
| "step_loss": 0.3189748525619507 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 1.3807731720214431, | |
| "kl": 0.3141458034515381, | |
| "learning_rate": 4.9965226894711316e-06, | |
| "loss": 0.3363, | |
| "step": 35, | |
| "step_loss": 0.3879620432853699 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 1.4557787239042146, | |
| "kl": 0.34700241684913635, | |
| "learning_rate": 4.996287069932278e-06, | |
| "loss": 0.3064, | |
| "step": 36, | |
| "step_loss": 0.39095190167427063 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 1.5774051610848923, | |
| "kl": 0.3121364116668701, | |
| "learning_rate": 4.996043733802583e-06, | |
| "loss": 0.3243, | |
| "step": 37, | |
| "step_loss": 0.30228227376937866 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 1.4430150717016268, | |
| "kl": 0.2903325855731964, | |
| "learning_rate": 4.995792681917968e-06, | |
| "loss": 0.2911, | |
| "step": 38, | |
| "step_loss": 0.2797442078590393 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 1.3795537860052318, | |
| "kl": 0.328932523727417, | |
| "learning_rate": 4.995533915140866e-06, | |
| "loss": 0.2866, | |
| "step": 39, | |
| "step_loss": 0.2982497215270996 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 1.3084724825566474, | |
| "kl": 0.26037484407424927, | |
| "learning_rate": 4.995267434360207e-06, | |
| "loss": 0.3049, | |
| "step": 40, | |
| "step_loss": 0.2707076370716095 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 1.546767703101615, | |
| "kl": 0.25849828124046326, | |
| "learning_rate": 4.9949932404914245e-06, | |
| "loss": 0.2885, | |
| "step": 41, | |
| "step_loss": 0.2949169874191284 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 1.4065895730214004, | |
| "kl": 0.3979896306991577, | |
| "learning_rate": 4.9947113344764455e-06, | |
| "loss": 0.3045, | |
| "step": 42, | |
| "step_loss": 0.32115134596824646 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 1.5259965481994058, | |
| "kl": 0.3463974595069885, | |
| "learning_rate": 4.994421717283693e-06, | |
| "loss": 0.304, | |
| "step": 43, | |
| "step_loss": 0.3217414617538452 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 1.2978285994841212, | |
| "kl": 0.36674511432647705, | |
| "learning_rate": 4.994124389908078e-06, | |
| "loss": 0.2864, | |
| "step": 44, | |
| "step_loss": 0.31553012132644653 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 1.5564349432288815, | |
| "kl": 0.35473960638046265, | |
| "learning_rate": 4.993819353370999e-06, | |
| "loss": 0.3335, | |
| "step": 45, | |
| "step_loss": 0.3283192217350006 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 1.3575598996472378, | |
| "kl": 0.2651940584182739, | |
| "learning_rate": 4.993506608720339e-06, | |
| "loss": 0.301, | |
| "step": 46, | |
| "step_loss": 0.2609683871269226 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 1.3229608497099903, | |
| "kl": 0.2759368121623993, | |
| "learning_rate": 4.9931861570304555e-06, | |
| "loss": 0.2925, | |
| "step": 47, | |
| "step_loss": 0.3200822174549103 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 1.368724787057477, | |
| "kl": 0.3092648684978485, | |
| "learning_rate": 4.992857999402187e-06, | |
| "loss": 0.2718, | |
| "step": 48, | |
| "step_loss": 0.2981921136379242 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 1.3728861165253508, | |
| "kl": 0.3472633361816406, | |
| "learning_rate": 4.992522136962841e-06, | |
| "loss": 0.3126, | |
| "step": 49, | |
| "step_loss": 0.31947529315948486 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 1.4242257817482427, | |
| "kl": 0.28193366527557373, | |
| "learning_rate": 4.992178570866195e-06, | |
| "loss": 0.2964, | |
| "step": 50, | |
| "step_loss": 0.2877271771430969 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 1.521655434234139, | |
| "kl": 0.2764663100242615, | |
| "learning_rate": 4.9918273022924885e-06, | |
| "loss": 0.3052, | |
| "step": 51, | |
| "step_loss": 0.2541694641113281 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 1.5242796013210014, | |
| "kl": 0.2966528832912445, | |
| "learning_rate": 4.991468332448422e-06, | |
| "loss": 0.3304, | |
| "step": 52, | |
| "step_loss": 0.30177876353263855 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 1.4177402595981858, | |
| "kl": 0.2607875168323517, | |
| "learning_rate": 4.991101662567153e-06, | |
| "loss": 0.3214, | |
| "step": 53, | |
| "step_loss": 0.27815118432044983 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 1.4497114613800353, | |
| "kl": 0.36675119400024414, | |
| "learning_rate": 4.990727293908288e-06, | |
| "loss": 0.3141, | |
| "step": 54, | |
| "step_loss": 0.34744372963905334 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 1.476617226772246, | |
| "kl": 0.2716200351715088, | |
| "learning_rate": 4.990345227757884e-06, | |
| "loss": 0.298, | |
| "step": 55, | |
| "step_loss": 0.25762778520584106 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 1.3365233291298146, | |
| "kl": 0.29236650466918945, | |
| "learning_rate": 4.989955465428438e-06, | |
| "loss": 0.2763, | |
| "step": 56, | |
| "step_loss": 0.27397677302360535 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 1.4599412693497766, | |
| "kl": 0.27795305848121643, | |
| "learning_rate": 4.989558008258888e-06, | |
| "loss": 0.3043, | |
| "step": 57, | |
| "step_loss": 0.24359291791915894 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 1.5617623113651034, | |
| "kl": 0.35907837748527527, | |
| "learning_rate": 4.9891528576146046e-06, | |
| "loss": 0.325, | |
| "step": 58, | |
| "step_loss": 0.32902687788009644 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 1.4059355225202563, | |
| "kl": 0.307645320892334, | |
| "learning_rate": 4.988740014887386e-06, | |
| "loss": 0.3028, | |
| "step": 59, | |
| "step_loss": 0.3135125935077667 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 1.4501862291250671, | |
| "kl": 0.32680854201316833, | |
| "learning_rate": 4.9883194814954575e-06, | |
| "loss": 0.3073, | |
| "step": 60, | |
| "step_loss": 0.34192976355552673 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 1.3852935344310822, | |
| "kl": 0.33248478174209595, | |
| "learning_rate": 4.987891258883463e-06, | |
| "loss": 0.3086, | |
| "step": 61, | |
| "step_loss": 0.3534170389175415 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 1.4199406597673683, | |
| "kl": 0.28986871242523193, | |
| "learning_rate": 4.98745534852246e-06, | |
| "loss": 0.3038, | |
| "step": 62, | |
| "step_loss": 0.3005980849266052 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 1.5113547919536734, | |
| "kl": 0.30484679341316223, | |
| "learning_rate": 4.987011751909917e-06, | |
| "loss": 0.3044, | |
| "step": 63, | |
| "step_loss": 0.23517432808876038 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 1.3355819496710448, | |
| "kl": 0.3742017447948456, | |
| "learning_rate": 4.986560470569704e-06, | |
| "loss": 0.3017, | |
| "step": 64, | |
| "step_loss": 0.3898337483406067 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 1.4221577520408863, | |
| "kl": 0.3570794463157654, | |
| "learning_rate": 4.986101506052093e-06, | |
| "loss": 0.3022, | |
| "step": 65, | |
| "step_loss": 0.30060654878616333 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 1.6366629670681416, | |
| "kl": 0.32745978236198425, | |
| "learning_rate": 4.9856348599337485e-06, | |
| "loss": 0.3361, | |
| "step": 66, | |
| "step_loss": 0.31522005796432495 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 1.3857470049246778, | |
| "kl": 0.284152626991272, | |
| "learning_rate": 4.985160533817723e-06, | |
| "loss": 0.2951, | |
| "step": 67, | |
| "step_loss": 0.27435213327407837 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 1.3848855487240277, | |
| "kl": 0.3354739546775818, | |
| "learning_rate": 4.984678529333453e-06, | |
| "loss": 0.2879, | |
| "step": 68, | |
| "step_loss": 0.3034001886844635 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 1.4341508578031887, | |
| "kl": 0.25649285316467285, | |
| "learning_rate": 4.984188848136751e-06, | |
| "loss": 0.3164, | |
| "step": 69, | |
| "step_loss": 0.30513542890548706 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 1.3467125947401988, | |
| "kl": 0.2996070384979248, | |
| "learning_rate": 4.983691491909802e-06, | |
| "loss": 0.3019, | |
| "step": 70, | |
| "step_loss": 0.36908990144729614 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 1.3474866013931235, | |
| "kl": 0.3084847927093506, | |
| "learning_rate": 4.9831864623611564e-06, | |
| "loss": 0.3008, | |
| "step": 71, | |
| "step_loss": 0.26990407705307007 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 1.4862590377731744, | |
| "kl": 0.31708824634552, | |
| "learning_rate": 4.982673761225724e-06, | |
| "loss": 0.314, | |
| "step": 72, | |
| "step_loss": 0.2802667021751404 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 1.3719248630090615, | |
| "kl": 0.2884005010128021, | |
| "learning_rate": 4.982153390264769e-06, | |
| "loss": 0.2902, | |
| "step": 73, | |
| "step_loss": 0.3215486705303192 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 1.6257809266397576, | |
| "kl": 0.4290885329246521, | |
| "learning_rate": 4.981625351265903e-06, | |
| "loss": 0.3466, | |
| "step": 74, | |
| "step_loss": 0.38507044315338135 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 1.4642429493015807, | |
| "kl": 0.3809298872947693, | |
| "learning_rate": 4.9810896460430805e-06, | |
| "loss": 0.3213, | |
| "step": 75, | |
| "step_loss": 0.43292292952537537 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 1.2483432519354851, | |
| "kl": 0.3046913146972656, | |
| "learning_rate": 4.980546276436591e-06, | |
| "loss": 0.2913, | |
| "step": 76, | |
| "step_loss": 0.29639115929603577 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 1.380629384097335, | |
| "kl": 0.3005834221839905, | |
| "learning_rate": 4.979995244313052e-06, | |
| "loss": 0.3037, | |
| "step": 77, | |
| "step_loss": 0.2509528696537018 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 1.4850279125005055, | |
| "kl": 0.3294805884361267, | |
| "learning_rate": 4.979436551565407e-06, | |
| "loss": 0.3246, | |
| "step": 78, | |
| "step_loss": 0.2669539451599121 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 1.4283473816232348, | |
| "kl": 0.29898595809936523, | |
| "learning_rate": 4.9788702001129105e-06, | |
| "loss": 0.3092, | |
| "step": 79, | |
| "step_loss": 0.35289207100868225 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 1.4847956412758032, | |
| "kl": 0.244553804397583, | |
| "learning_rate": 4.97829619190113e-06, | |
| "loss": 0.3042, | |
| "step": 80, | |
| "step_loss": 0.23004528880119324 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 1.2924538131518895, | |
| "kl": 0.2728404104709625, | |
| "learning_rate": 4.977714528901938e-06, | |
| "loss": 0.2793, | |
| "step": 81, | |
| "step_loss": 0.2652290165424347 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 1.2587616697753488, | |
| "kl": 0.2822519540786743, | |
| "learning_rate": 4.9771252131135e-06, | |
| "loss": 0.2952, | |
| "step": 82, | |
| "step_loss": 0.2914755642414093 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 1.3835708779976017, | |
| "kl": 0.3696300983428955, | |
| "learning_rate": 4.976528246560269e-06, | |
| "loss": 0.3029, | |
| "step": 83, | |
| "step_loss": 0.36016547679901123 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 1.2530361733858713, | |
| "kl": 0.31230488419532776, | |
| "learning_rate": 4.975923631292988e-06, | |
| "loss": 0.2898, | |
| "step": 84, | |
| "step_loss": 0.33970096707344055 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 1.4116112976895483, | |
| "kl": 0.312380850315094, | |
| "learning_rate": 4.975311369388667e-06, | |
| "loss": 0.2915, | |
| "step": 85, | |
| "step_loss": 0.3011205196380615 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 1.3375751652497407, | |
| "kl": 0.3674446940422058, | |
| "learning_rate": 4.974691462950589e-06, | |
| "loss": 0.3105, | |
| "step": 86, | |
| "step_loss": 0.3819746971130371 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 1.3695626037107396, | |
| "kl": 0.3539569675922394, | |
| "learning_rate": 4.974063914108297e-06, | |
| "loss": 0.3069, | |
| "step": 87, | |
| "step_loss": 0.36265525221824646 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 1.4398198275304508, | |
| "kl": 0.30112671852111816, | |
| "learning_rate": 4.9734287250175865e-06, | |
| "loss": 0.3125, | |
| "step": 88, | |
| "step_loss": 0.2848939299583435 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 1.3830922269493662, | |
| "kl": 0.33735549449920654, | |
| "learning_rate": 4.9727858978605e-06, | |
| "loss": 0.3147, | |
| "step": 89, | |
| "step_loss": 0.3306404650211334 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 1.2728858115467476, | |
| "kl": 0.28279662132263184, | |
| "learning_rate": 4.97213543484532e-06, | |
| "loss": 0.3045, | |
| "step": 90, | |
| "step_loss": 0.27660509943962097 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 1.5403404739139803, | |
| "kl": 0.27295035123825073, | |
| "learning_rate": 4.97147733820656e-06, | |
| "loss": 0.2961, | |
| "step": 91, | |
| "step_loss": 0.26971620321273804 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 1.411561429621046, | |
| "kl": 0.37973371148109436, | |
| "learning_rate": 4.970811610204954e-06, | |
| "loss": 0.299, | |
| "step": 92, | |
| "step_loss": 0.326732337474823 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 1.4805034744095673, | |
| "kl": 0.3346588611602783, | |
| "learning_rate": 4.970138253127456e-06, | |
| "loss": 0.3116, | |
| "step": 93, | |
| "step_loss": 0.32256820797920227 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 1.2537072876512962, | |
| "kl": 0.31613579392433167, | |
| "learning_rate": 4.969457269287224e-06, | |
| "loss": 0.2909, | |
| "step": 94, | |
| "step_loss": 0.3002708852291107 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 1.3783445970477886, | |
| "kl": 0.2733086943626404, | |
| "learning_rate": 4.968768661023619e-06, | |
| "loss": 0.3092, | |
| "step": 95, | |
| "step_loss": 0.3080819547176361 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 1.2682216286353625, | |
| "kl": 0.2610551714897156, | |
| "learning_rate": 4.968072430702193e-06, | |
| "loss": 0.2839, | |
| "step": 96, | |
| "step_loss": 0.25847068428993225 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 1.2077676939950335, | |
| "kl": 0.298378050327301, | |
| "learning_rate": 4.967368580714681e-06, | |
| "loss": 0.2803, | |
| "step": 97, | |
| "step_loss": 0.2736283242702484 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 1.2148373717113006, | |
| "kl": 0.27147936820983887, | |
| "learning_rate": 4.966657113478992e-06, | |
| "loss": 0.2765, | |
| "step": 98, | |
| "step_loss": 0.30714210867881775 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 1.4139521118408638, | |
| "kl": 0.32129478454589844, | |
| "learning_rate": 4.9659380314392075e-06, | |
| "loss": 0.3138, | |
| "step": 99, | |
| "step_loss": 0.334412157535553 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 1.3294266549115017, | |
| "kl": 0.35117053985595703, | |
| "learning_rate": 4.965211337065563e-06, | |
| "loss": 0.3057, | |
| "step": 100, | |
| "step_loss": 0.30289411544799805 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 1.424742291386975, | |
| "kl": 0.30909568071365356, | |
| "learning_rate": 4.964477032854448e-06, | |
| "loss": 0.3141, | |
| "step": 101, | |
| "step_loss": 0.3024054765701294 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 1.4494693467464015, | |
| "kl": 0.3519325256347656, | |
| "learning_rate": 4.963735121328389e-06, | |
| "loss": 0.3074, | |
| "step": 102, | |
| "step_loss": 0.29212692379951477 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 1.4137157099247653, | |
| "kl": 0.32887011766433716, | |
| "learning_rate": 4.9629856050360505e-06, | |
| "loss": 0.3085, | |
| "step": 103, | |
| "step_loss": 0.2845655679702759 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 1.3535308616848474, | |
| "kl": 0.3814646005630493, | |
| "learning_rate": 4.962228486552219e-06, | |
| "loss": 0.3086, | |
| "step": 104, | |
| "step_loss": 0.4175484776496887 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 1.264600598229456, | |
| "kl": 0.32557374238967896, | |
| "learning_rate": 4.961463768477797e-06, | |
| "loss": 0.3065, | |
| "step": 105, | |
| "step_loss": 0.3005172908306122 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 1.3367387253981593, | |
| "kl": 0.33622100949287415, | |
| "learning_rate": 4.960691453439793e-06, | |
| "loss": 0.3221, | |
| "step": 106, | |
| "step_loss": 0.32829591631889343 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 1.285114925381023, | |
| "kl": 0.415163516998291, | |
| "learning_rate": 4.9599115440913145e-06, | |
| "loss": 0.3033, | |
| "step": 107, | |
| "step_loss": 0.3966817557811737 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 1.2697189074985138, | |
| "kl": 0.31419163942337036, | |
| "learning_rate": 4.9591240431115565e-06, | |
| "loss": 0.2899, | |
| "step": 108, | |
| "step_loss": 0.28133562207221985 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 1.2984645133760384, | |
| "kl": 0.2992507219314575, | |
| "learning_rate": 4.9583289532057925e-06, | |
| "loss": 0.2957, | |
| "step": 109, | |
| "step_loss": 0.3047301173210144 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 1.2959997601773783, | |
| "kl": 0.3358232080936432, | |
| "learning_rate": 4.9575262771053666e-06, | |
| "loss": 0.2977, | |
| "step": 110, | |
| "step_loss": 0.3054252564907074 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 1.3678091978992384, | |
| "kl": 0.337202250957489, | |
| "learning_rate": 4.956716017567685e-06, | |
| "loss": 0.3189, | |
| "step": 111, | |
| "step_loss": 0.3637933135032654 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 1.3266191585020017, | |
| "kl": 0.2727779150009155, | |
| "learning_rate": 4.955898177376204e-06, | |
| "loss": 0.2895, | |
| "step": 112, | |
| "step_loss": 0.27312493324279785 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 1.3644833661607003, | |
| "kl": 0.27666690945625305, | |
| "learning_rate": 4.95507275934042e-06, | |
| "loss": 0.314, | |
| "step": 113, | |
| "step_loss": 0.251804381608963 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 1.2828415661770218, | |
| "kl": 0.28860220313072205, | |
| "learning_rate": 4.954239766295862e-06, | |
| "loss": 0.2829, | |
| "step": 114, | |
| "step_loss": 0.2733534574508667 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 1.225743500205446, | |
| "kl": 0.2747955322265625, | |
| "learning_rate": 4.953399201104084e-06, | |
| "loss": 0.2794, | |
| "step": 115, | |
| "step_loss": 0.23778927326202393 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 1.229862820827967, | |
| "kl": 0.35592517256736755, | |
| "learning_rate": 4.952551066652648e-06, | |
| "loss": 0.2758, | |
| "step": 116, | |
| "step_loss": 0.3347897529602051 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 1.4102429181039295, | |
| "kl": 0.3126868009567261, | |
| "learning_rate": 4.951695365855122e-06, | |
| "loss": 0.2897, | |
| "step": 117, | |
| "step_loss": 0.3053089380264282 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 1.332632214515851, | |
| "kl": 0.341084748506546, | |
| "learning_rate": 4.950832101651063e-06, | |
| "loss": 0.2992, | |
| "step": 118, | |
| "step_loss": 0.3318370282649994 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 1.2233149853258531, | |
| "kl": 0.29855814576148987, | |
| "learning_rate": 4.949961277006013e-06, | |
| "loss": 0.289, | |
| "step": 119, | |
| "step_loss": 0.27190065383911133 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 1.2346324893530762, | |
| "kl": 0.28264421224594116, | |
| "learning_rate": 4.949082894911485e-06, | |
| "loss": 0.2996, | |
| "step": 120, | |
| "step_loss": 0.28687310218811035 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 1.2793881099869688, | |
| "kl": 0.27299419045448303, | |
| "learning_rate": 4.948196958384955e-06, | |
| "loss": 0.3025, | |
| "step": 121, | |
| "step_loss": 0.23233090341091156 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 1.2998410971433687, | |
| "kl": 0.29575178027153015, | |
| "learning_rate": 4.9473034704698485e-06, | |
| "loss": 0.2962, | |
| "step": 122, | |
| "step_loss": 0.2823527753353119 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 1.2555805745200317, | |
| "kl": 0.28942999243736267, | |
| "learning_rate": 4.9464024342355335e-06, | |
| "loss": 0.2914, | |
| "step": 123, | |
| "step_loss": 0.2781384587287903 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 1.2708803374034965, | |
| "kl": 0.3180427551269531, | |
| "learning_rate": 4.945493852777307e-06, | |
| "loss": 0.2944, | |
| "step": 124, | |
| "step_loss": 0.31637904047966003 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 1.3848259809116499, | |
| "kl": 0.33197423815727234, | |
| "learning_rate": 4.944577729216388e-06, | |
| "loss": 0.3152, | |
| "step": 125, | |
| "step_loss": 0.3225075900554657 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 1.4354123007048643, | |
| "kl": 0.2686159908771515, | |
| "learning_rate": 4.943654066699904e-06, | |
| "loss": 0.3118, | |
| "step": 126, | |
| "step_loss": 0.29845237731933594 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 1.2899412223451328, | |
| "kl": 0.3330647349357605, | |
| "learning_rate": 4.942722868400879e-06, | |
| "loss": 0.3322, | |
| "step": 127, | |
| "step_loss": 0.3406273424625397 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 1.2878537840081934, | |
| "kl": 0.31060031056404114, | |
| "learning_rate": 4.941784137518227e-06, | |
| "loss": 0.2967, | |
| "step": 128, | |
| "step_loss": 0.28862464427948 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 1.31852289386673, | |
| "kl": 0.3059355914592743, | |
| "learning_rate": 4.940837877276735e-06, | |
| "loss": 0.2919, | |
| "step": 129, | |
| "step_loss": 0.30133622884750366 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 1.243803418751941, | |
| "kl": 0.32812631130218506, | |
| "learning_rate": 4.93988409092706e-06, | |
| "loss": 0.2982, | |
| "step": 130, | |
| "step_loss": 0.2712858319282532 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 1.206601958490433, | |
| "kl": 0.2982441484928131, | |
| "learning_rate": 4.93892278174571e-06, | |
| "loss": 0.2717, | |
| "step": 131, | |
| "step_loss": 0.2625717520713806 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 1.2840226109970796, | |
| "kl": 0.3162402808666229, | |
| "learning_rate": 4.937953953035035e-06, | |
| "loss": 0.2973, | |
| "step": 132, | |
| "step_loss": 0.3028516471385956 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 1.219921942425963, | |
| "kl": 0.3007054030895233, | |
| "learning_rate": 4.93697760812322e-06, | |
| "loss": 0.2999, | |
| "step": 133, | |
| "step_loss": 0.29070332646369934 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 1.3505734810630552, | |
| "kl": 0.3097812533378601, | |
| "learning_rate": 4.935993750364267e-06, | |
| "loss": 0.3213, | |
| "step": 134, | |
| "step_loss": 0.32063156366348267 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 1.2960801503834385, | |
| "kl": 0.3124713599681854, | |
| "learning_rate": 4.9350023831379885e-06, | |
| "loss": 0.292, | |
| "step": 135, | |
| "step_loss": 0.2716798782348633 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 1.3346284118716634, | |
| "kl": 0.299164354801178, | |
| "learning_rate": 4.934003509849993e-06, | |
| "loss": 0.292, | |
| "step": 136, | |
| "step_loss": 0.29498571157455444 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 1.338862702232099, | |
| "kl": 0.3523540198802948, | |
| "learning_rate": 4.932997133931676e-06, | |
| "loss": 0.2999, | |
| "step": 137, | |
| "step_loss": 0.2898944914340973 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 1.299533142110482, | |
| "kl": 0.3428666591644287, | |
| "learning_rate": 4.931983258840204e-06, | |
| "loss": 0.2808, | |
| "step": 138, | |
| "step_loss": 0.3303877115249634 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 1.2814240918861108, | |
| "kl": 0.26252108812332153, | |
| "learning_rate": 4.930961888058506e-06, | |
| "loss": 0.2909, | |
| "step": 139, | |
| "step_loss": 0.23940859735012054 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 1.2407806935373367, | |
| "kl": 0.2580229341983795, | |
| "learning_rate": 4.929933025095262e-06, | |
| "loss": 0.2906, | |
| "step": 140, | |
| "step_loss": 0.2781831920146942 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 1.315404278595456, | |
| "kl": 0.3604077398777008, | |
| "learning_rate": 4.928896673484888e-06, | |
| "loss": 0.3072, | |
| "step": 141, | |
| "step_loss": 0.3967340588569641 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 1.286676458510305, | |
| "kl": 0.32844799757003784, | |
| "learning_rate": 4.9278528367875275e-06, | |
| "loss": 0.2836, | |
| "step": 142, | |
| "step_loss": 0.2578602731227875 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 1.2526938379221886, | |
| "kl": 0.32336488366127014, | |
| "learning_rate": 4.926801518589035e-06, | |
| "loss": 0.2991, | |
| "step": 143, | |
| "step_loss": 0.2511914372444153 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 1.196299407291449, | |
| "kl": 0.239657461643219, | |
| "learning_rate": 4.9257427225009665e-06, | |
| "loss": 0.2827, | |
| "step": 144, | |
| "step_loss": 0.23767231404781342 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 1.253688615112022, | |
| "kl": 0.36846810579299927, | |
| "learning_rate": 4.924676452160568e-06, | |
| "loss": 0.2971, | |
| "step": 145, | |
| "step_loss": 0.32303857803344727 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 1.2337718709400438, | |
| "kl": 0.292164146900177, | |
| "learning_rate": 4.92360271123076e-06, | |
| "loss": 0.2999, | |
| "step": 146, | |
| "step_loss": 0.3450307250022888 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 1.4234900890899038, | |
| "kl": 0.3000570237636566, | |
| "learning_rate": 4.922521503400125e-06, | |
| "loss": 0.297, | |
| "step": 147, | |
| "step_loss": 0.2996768355369568 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 1.2489090450298066, | |
| "kl": 0.2863319516181946, | |
| "learning_rate": 4.921432832382901e-06, | |
| "loss": 0.2896, | |
| "step": 148, | |
| "step_loss": 0.28483325242996216 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 1.230750460392728, | |
| "kl": 0.30758440494537354, | |
| "learning_rate": 4.92033670191896e-06, | |
| "loss": 0.2884, | |
| "step": 149, | |
| "step_loss": 0.2748796045780182 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 1.3306176467968267, | |
| "kl": 0.26451659202575684, | |
| "learning_rate": 4.9192331157738e-06, | |
| "loss": 0.29, | |
| "step": 150, | |
| "step_loss": 0.2788347601890564 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 1.332331055940497, | |
| "kl": 0.2376236617565155, | |
| "learning_rate": 4.918122077738533e-06, | |
| "loss": 0.2961, | |
| "step": 151, | |
| "step_loss": 0.24186082184314728 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 1.332600066594834, | |
| "kl": 0.3817494511604309, | |
| "learning_rate": 4.917003591629867e-06, | |
| "loss": 0.307, | |
| "step": 152, | |
| "step_loss": 0.33062270283699036 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 1.2536110337354542, | |
| "kl": 0.2526598870754242, | |
| "learning_rate": 4.915877661290099e-06, | |
| "loss": 0.291, | |
| "step": 153, | |
| "step_loss": 0.24536053836345673 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 1.2674122835595976, | |
| "kl": 0.37763291597366333, | |
| "learning_rate": 4.914744290587096e-06, | |
| "loss": 0.2976, | |
| "step": 154, | |
| "step_loss": 0.3559175431728363 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 1.144770437772112, | |
| "kl": 0.28675389289855957, | |
| "learning_rate": 4.913603483414291e-06, | |
| "loss": 0.2843, | |
| "step": 155, | |
| "step_loss": 0.26313164830207825 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 1.2867088389290005, | |
| "kl": 0.30408233404159546, | |
| "learning_rate": 4.912455243690654e-06, | |
| "loss": 0.2895, | |
| "step": 156, | |
| "step_loss": 0.305271178483963 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 1.3337139732461034, | |
| "kl": 0.33538201451301575, | |
| "learning_rate": 4.911299575360694e-06, | |
| "loss": 0.2829, | |
| "step": 157, | |
| "step_loss": 0.2805282771587372 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 1.2603558835697868, | |
| "kl": 0.2967783808708191, | |
| "learning_rate": 4.910136482394439e-06, | |
| "loss": 0.2987, | |
| "step": 158, | |
| "step_loss": 0.266804963350296 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 1.1861792045426607, | |
| "kl": 0.32278546690940857, | |
| "learning_rate": 4.90896596878742e-06, | |
| "loss": 0.2737, | |
| "step": 159, | |
| "step_loss": 0.2891842722892761 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 1.168008822771122, | |
| "kl": 0.3604351878166199, | |
| "learning_rate": 4.907788038560661e-06, | |
| "loss": 0.2703, | |
| "step": 160, | |
| "step_loss": 0.3211236596107483 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 1.2273672100425606, | |
| "kl": 0.3063991069793701, | |
| "learning_rate": 4.906602695760665e-06, | |
| "loss": 0.2919, | |
| "step": 161, | |
| "step_loss": 0.25892752408981323 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 1.4264755544249268, | |
| "kl": 0.2818457782268524, | |
| "learning_rate": 4.905409944459397e-06, | |
| "loss": 0.2934, | |
| "step": 162, | |
| "step_loss": 0.2786937654018402 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 1.314284320892201, | |
| "kl": 0.32207655906677246, | |
| "learning_rate": 4.904209788754275e-06, | |
| "loss": 0.2931, | |
| "step": 163, | |
| "step_loss": 0.23789873719215393 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 1.1804456232659608, | |
| "kl": 0.33073878288269043, | |
| "learning_rate": 4.903002232768151e-06, | |
| "loss": 0.2776, | |
| "step": 164, | |
| "step_loss": 0.2962447702884674 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 1.2497392620900494, | |
| "kl": 0.2782425284385681, | |
| "learning_rate": 4.9017872806493e-06, | |
| "loss": 0.2918, | |
| "step": 165, | |
| "step_loss": 0.2765083611011505 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 1.2704086792818736, | |
| "kl": 0.3183959722518921, | |
| "learning_rate": 4.900564936571404e-06, | |
| "loss": 0.2873, | |
| "step": 166, | |
| "step_loss": 0.2902598977088928 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 1.2954983816000556, | |
| "kl": 0.29798054695129395, | |
| "learning_rate": 4.899335204733538e-06, | |
| "loss": 0.3052, | |
| "step": 167, | |
| "step_loss": 0.2802087068557739 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 1.2070703839621497, | |
| "kl": 0.35096466541290283, | |
| "learning_rate": 4.8980980893601575e-06, | |
| "loss": 0.2918, | |
| "step": 168, | |
| "step_loss": 0.3088727593421936 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 1.3158993296815968, | |
| "kl": 0.27438434958457947, | |
| "learning_rate": 4.8968535947010795e-06, | |
| "loss": 0.2913, | |
| "step": 169, | |
| "step_loss": 0.2575715184211731 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 1.2643025871656506, | |
| "kl": 0.27822235226631165, | |
| "learning_rate": 4.895601725031475e-06, | |
| "loss": 0.2934, | |
| "step": 170, | |
| "step_loss": 0.2611542344093323 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 1.152592001860982, | |
| "kl": 0.3118104934692383, | |
| "learning_rate": 4.894342484651846e-06, | |
| "loss": 0.2776, | |
| "step": 171, | |
| "step_loss": 0.29376712441444397 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 1.2558097414261615, | |
| "kl": 0.31661081314086914, | |
| "learning_rate": 4.893075877888018e-06, | |
| "loss": 0.3067, | |
| "step": 172, | |
| "step_loss": 0.302161306142807 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 1.4838421517721772, | |
| "kl": 0.33207058906555176, | |
| "learning_rate": 4.891801909091119e-06, | |
| "loss": 0.3207, | |
| "step": 173, | |
| "step_loss": 0.3354288935661316 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 1.270609611872136, | |
| "kl": 0.3163268566131592, | |
| "learning_rate": 4.8905205826375705e-06, | |
| "loss": 0.3031, | |
| "step": 174, | |
| "step_loss": 0.285269558429718 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 1.278119700217976, | |
| "kl": 0.29490146040916443, | |
| "learning_rate": 4.8892319029290685e-06, | |
| "loss": 0.2906, | |
| "step": 175, | |
| "step_loss": 0.3601941168308258 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 1.3328158565268786, | |
| "kl": 0.287243127822876, | |
| "learning_rate": 4.887935874392567e-06, | |
| "loss": 0.3141, | |
| "step": 176, | |
| "step_loss": 0.27393481135368347 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 1.146954506657001, | |
| "kl": 0.3274082541465759, | |
| "learning_rate": 4.886632501480269e-06, | |
| "loss": 0.2816, | |
| "step": 177, | |
| "step_loss": 0.3594622015953064 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 1.2568621036203667, | |
| "kl": 0.32911595702171326, | |
| "learning_rate": 4.885321788669604e-06, | |
| "loss": 0.3038, | |
| "step": 178, | |
| "step_loss": 0.2939574420452118 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 1.215908552897293, | |
| "kl": 0.2502468228340149, | |
| "learning_rate": 4.884003740463219e-06, | |
| "loss": 0.2902, | |
| "step": 179, | |
| "step_loss": 0.24465596675872803 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 1.1611108031885955, | |
| "kl": 0.30213692784309387, | |
| "learning_rate": 4.882678361388958e-06, | |
| "loss": 0.274, | |
| "step": 180, | |
| "step_loss": 0.2538335919380188 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 1.301100728893655, | |
| "kl": 0.3141769766807556, | |
| "learning_rate": 4.88134565599985e-06, | |
| "loss": 0.3048, | |
| "step": 181, | |
| "step_loss": 0.25083863735198975 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 1.1533263462306118, | |
| "kl": 0.31978100538253784, | |
| "learning_rate": 4.880005628874088e-06, | |
| "loss": 0.2979, | |
| "step": 182, | |
| "step_loss": 0.2340894490480423 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 1.2517414717516113, | |
| "kl": 0.2578886151313782, | |
| "learning_rate": 4.878658284615023e-06, | |
| "loss": 0.2888, | |
| "step": 183, | |
| "step_loss": 0.25022444128990173 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 1.2245537412134813, | |
| "kl": 0.3525405824184418, | |
| "learning_rate": 4.877303627851138e-06, | |
| "loss": 0.2856, | |
| "step": 184, | |
| "step_loss": 0.30141592025756836 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 1.191367958579159, | |
| "kl": 0.2949683368206024, | |
| "learning_rate": 4.875941663236039e-06, | |
| "loss": 0.2811, | |
| "step": 185, | |
| "step_loss": 0.27863818407058716 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 1.2248110810400246, | |
| "kl": 0.31630921363830566, | |
| "learning_rate": 4.874572395448432e-06, | |
| "loss": 0.288, | |
| "step": 186, | |
| "step_loss": 0.26466599106788635 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 1.1677774508685308, | |
| "kl": 0.3383273482322693, | |
| "learning_rate": 4.8731958291921174e-06, | |
| "loss": 0.2646, | |
| "step": 187, | |
| "step_loss": 0.23358532786369324 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 1.2534617173778235, | |
| "kl": 0.3180442452430725, | |
| "learning_rate": 4.871811969195963e-06, | |
| "loss": 0.2795, | |
| "step": 188, | |
| "step_loss": 0.2650742828845978 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 1.3623516662591668, | |
| "kl": 0.36272916197776794, | |
| "learning_rate": 4.870420820213896e-06, | |
| "loss": 0.3179, | |
| "step": 189, | |
| "step_loss": 0.34722331166267395 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 1.139199587731826, | |
| "kl": 0.31969937682151794, | |
| "learning_rate": 4.869022387024879e-06, | |
| "loss": 0.2761, | |
| "step": 190, | |
| "step_loss": 0.26098594069480896 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 1.2194709601641671, | |
| "kl": 0.34485194087028503, | |
| "learning_rate": 4.867616674432903e-06, | |
| "loss": 0.3146, | |
| "step": 191, | |
| "step_loss": 0.3215685486793518 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 1.2744845374487466, | |
| "kl": 0.3000357747077942, | |
| "learning_rate": 4.8662036872669615e-06, | |
| "loss": 0.2898, | |
| "step": 192, | |
| "step_loss": 0.3152067959308624 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 1.2219115844478998, | |
| "kl": 0.26532527804374695, | |
| "learning_rate": 4.864783430381039e-06, | |
| "loss": 0.2993, | |
| "step": 193, | |
| "step_loss": 0.2950400710105896 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 1.3029827184325222, | |
| "kl": 0.35824403166770935, | |
| "learning_rate": 4.863355908654095e-06, | |
| "loss": 0.2951, | |
| "step": 194, | |
| "step_loss": 0.3127448856830597 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 1.3611804602947277, | |
| "kl": 0.3048401474952698, | |
| "learning_rate": 4.861921126990045e-06, | |
| "loss": 0.3019, | |
| "step": 195, | |
| "step_loss": 0.3276352882385254 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 1.2537615861956364, | |
| "kl": 0.32806965708732605, | |
| "learning_rate": 4.860479090317742e-06, | |
| "loss": 0.3018, | |
| "step": 196, | |
| "step_loss": 0.2849041223526001 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 1.305638823054846, | |
| "kl": 0.3609757423400879, | |
| "learning_rate": 4.859029803590966e-06, | |
| "loss": 0.3146, | |
| "step": 197, | |
| "step_loss": 0.304736852645874 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 1.180396492221588, | |
| "kl": 0.31104931235313416, | |
| "learning_rate": 4.8575732717884e-06, | |
| "loss": 0.307, | |
| "step": 198, | |
| "step_loss": 0.32223203778266907 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 1.3152524549772768, | |
| "kl": 0.30966895818710327, | |
| "learning_rate": 4.856109499913615e-06, | |
| "loss": 0.2935, | |
| "step": 199, | |
| "step_loss": 0.29456788301467896 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 1.214126043731043, | |
| "kl": 0.3455277681350708, | |
| "learning_rate": 4.854638492995056e-06, | |
| "loss": 0.2749, | |
| "step": 200, | |
| "step_loss": 0.362245112657547 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 1.2555037212901567, | |
| "kl": 0.23420512676239014, | |
| "learning_rate": 4.853160256086021e-06, | |
| "loss": 0.2803, | |
| "step": 201, | |
| "step_loss": 0.25304749608039856 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 1.2278744065777365, | |
| "kl": 0.3848443627357483, | |
| "learning_rate": 4.8516747942646465e-06, | |
| "loss": 0.3211, | |
| "step": 202, | |
| "step_loss": 0.382394403219223 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 1.1839836158359853, | |
| "kl": 0.3287525475025177, | |
| "learning_rate": 4.850182112633885e-06, | |
| "loss": 0.2913, | |
| "step": 203, | |
| "step_loss": 0.29712358117103577 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 1.1211578271078935, | |
| "kl": 0.33271652460098267, | |
| "learning_rate": 4.8486822163214944e-06, | |
| "loss": 0.2738, | |
| "step": 204, | |
| "step_loss": 0.28494173288345337 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 1.2261387888906659, | |
| "kl": 0.304698646068573, | |
| "learning_rate": 4.847175110480015e-06, | |
| "loss": 0.2984, | |
| "step": 205, | |
| "step_loss": 0.28125837445259094 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 1.3388127733966415, | |
| "kl": 0.30615222454071045, | |
| "learning_rate": 4.8456608002867555e-06, | |
| "loss": 0.2859, | |
| "step": 206, | |
| "step_loss": 0.2698904275894165 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 1.179491051996919, | |
| "kl": 0.31592032313346863, | |
| "learning_rate": 4.844139290943771e-06, | |
| "loss": 0.2909, | |
| "step": 207, | |
| "step_loss": 0.2879568934440613 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 1.1817492222340549, | |
| "kl": 0.3141896724700928, | |
| "learning_rate": 4.84261058767785e-06, | |
| "loss": 0.2832, | |
| "step": 208, | |
| "step_loss": 0.24105487763881683 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 1.275156633488967, | |
| "kl": 0.4037202000617981, | |
| "learning_rate": 4.841074695740493e-06, | |
| "loss": 0.2875, | |
| "step": 209, | |
| "step_loss": 0.3415408432483673 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 1.188350536024408, | |
| "kl": 0.32159894704818726, | |
| "learning_rate": 4.839531620407895e-06, | |
| "loss": 0.3031, | |
| "step": 210, | |
| "step_loss": 0.27604830265045166 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 1.1429229933157632, | |
| "kl": 0.32619747519493103, | |
| "learning_rate": 4.837981366980928e-06, | |
| "loss": 0.2812, | |
| "step": 211, | |
| "step_loss": 0.27371087670326233 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 1.2193809489970755, | |
| "kl": 0.33697012066841125, | |
| "learning_rate": 4.836423940785124e-06, | |
| "loss": 0.2775, | |
| "step": 212, | |
| "step_loss": 0.27547112107276917 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 1.2348631847135687, | |
| "kl": 0.28872644901275635, | |
| "learning_rate": 4.834859347170654e-06, | |
| "loss": 0.2913, | |
| "step": 213, | |
| "step_loss": 0.2788584232330322 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 1.2474162160473052, | |
| "kl": 0.3392971456050873, | |
| "learning_rate": 4.8332875915123105e-06, | |
| "loss": 0.2998, | |
| "step": 214, | |
| "step_loss": 0.34434232115745544 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 1.323303913774139, | |
| "kl": 0.3054981827735901, | |
| "learning_rate": 4.831708679209491e-06, | |
| "loss": 0.3121, | |
| "step": 215, | |
| "step_loss": 0.28162604570388794 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 1.1929513261742986, | |
| "kl": 0.3404708206653595, | |
| "learning_rate": 4.830122615686177e-06, | |
| "loss": 0.3063, | |
| "step": 216, | |
| "step_loss": 0.29514598846435547 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 1.1911094796199106, | |
| "kl": 0.33788585662841797, | |
| "learning_rate": 4.828529406390917e-06, | |
| "loss": 0.2854, | |
| "step": 217, | |
| "step_loss": 0.31073516607284546 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 1.2665507498308344, | |
| "kl": 0.36928457021713257, | |
| "learning_rate": 4.826929056796807e-06, | |
| "loss": 0.2902, | |
| "step": 218, | |
| "step_loss": 0.35527801513671875 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 1.2680076553195618, | |
| "kl": 0.39868634939193726, | |
| "learning_rate": 4.8253215724014725e-06, | |
| "loss": 0.2981, | |
| "step": 219, | |
| "step_loss": 0.27999529242515564 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 1.2775727396335816, | |
| "kl": 0.24477747082710266, | |
| "learning_rate": 4.823706958727045e-06, | |
| "loss": 0.3168, | |
| "step": 220, | |
| "step_loss": 0.2902810871601105 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 1.1655079488814604, | |
| "kl": 0.3722260594367981, | |
| "learning_rate": 4.822085221320152e-06, | |
| "loss": 0.2937, | |
| "step": 221, | |
| "step_loss": 0.30477461218833923 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 1.1528789635120378, | |
| "kl": 0.3633630871772766, | |
| "learning_rate": 4.820456365751891e-06, | |
| "loss": 0.2733, | |
| "step": 222, | |
| "step_loss": 0.2682224214076996 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 1.2833936250857951, | |
| "kl": 0.4017341732978821, | |
| "learning_rate": 4.818820397617811e-06, | |
| "loss": 0.3113, | |
| "step": 223, | |
| "step_loss": 0.36901068687438965 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 1.2106488345236557, | |
| "kl": 0.2505451738834381, | |
| "learning_rate": 4.817177322537898e-06, | |
| "loss": 0.3016, | |
| "step": 224, | |
| "step_loss": 0.2669680416584015 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 1.2935886740881541, | |
| "kl": 0.35546159744262695, | |
| "learning_rate": 4.815527146156548e-06, | |
| "loss": 0.3008, | |
| "step": 225, | |
| "step_loss": 0.30147963762283325 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 1.2631269633217965, | |
| "kl": 0.39503854513168335, | |
| "learning_rate": 4.8138698741425535e-06, | |
| "loss": 0.2868, | |
| "step": 226, | |
| "step_loss": 0.2804335057735443 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 1.2621588928369527, | |
| "kl": 0.3259708285331726, | |
| "learning_rate": 4.812205512189083e-06, | |
| "loss": 0.2943, | |
| "step": 227, | |
| "step_loss": 0.30732688307762146 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 1.252311030511955, | |
| "kl": 0.3171556293964386, | |
| "learning_rate": 4.8105340660136614e-06, | |
| "loss": 0.2912, | |
| "step": 228, | |
| "step_loss": 0.3541552722454071 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 1.1582157784039353, | |
| "kl": 0.33646219968795776, | |
| "learning_rate": 4.8088555413581495e-06, | |
| "loss": 0.2862, | |
| "step": 229, | |
| "step_loss": 0.3080594539642334 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 1.1909622553262202, | |
| "kl": 0.3843502402305603, | |
| "learning_rate": 4.8071699439887215e-06, | |
| "loss": 0.3056, | |
| "step": 230, | |
| "step_loss": 0.3147525489330292 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 1.2594429963263456, | |
| "kl": 0.26849618554115295, | |
| "learning_rate": 4.805477279695852e-06, | |
| "loss": 0.298, | |
| "step": 231, | |
| "step_loss": 0.2553267776966095 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 1.1702087636485823, | |
| "kl": 0.40651583671569824, | |
| "learning_rate": 4.8037775542942925e-06, | |
| "loss": 0.2827, | |
| "step": 232, | |
| "step_loss": 0.2711586356163025 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 1.1234639927336083, | |
| "kl": 0.32860204577445984, | |
| "learning_rate": 4.802070773623047e-06, | |
| "loss": 0.2687, | |
| "step": 233, | |
| "step_loss": 0.24301442503929138 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 1.232534630116778, | |
| "kl": 0.392067551612854, | |
| "learning_rate": 4.80035694354536e-06, | |
| "loss": 0.2943, | |
| "step": 234, | |
| "step_loss": 0.3448218107223511 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 1.1963956326096639, | |
| "kl": 0.3147091567516327, | |
| "learning_rate": 4.798636069948692e-06, | |
| "loss": 0.2856, | |
| "step": 235, | |
| "step_loss": 0.30948758125305176 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 1.2201677781179745, | |
| "kl": 0.26646876335144043, | |
| "learning_rate": 4.7969081587446994e-06, | |
| "loss": 0.2745, | |
| "step": 236, | |
| "step_loss": 0.23111504316329956 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 1.0792412024548768, | |
| "kl": 0.2925267517566681, | |
| "learning_rate": 4.795173215869214e-06, | |
| "loss": 0.2691, | |
| "step": 237, | |
| "step_loss": 0.2031397670507431 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 1.3799205324021417, | |
| "kl": 0.3084944486618042, | |
| "learning_rate": 4.7934312472822255e-06, | |
| "loss": 0.3316, | |
| "step": 238, | |
| "step_loss": 0.2557719945907593 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 1.156691853407518, | |
| "kl": 0.318729043006897, | |
| "learning_rate": 4.791682258967856e-06, | |
| "loss": 0.2844, | |
| "step": 239, | |
| "step_loss": 0.23593303561210632 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 1.2618289049089157, | |
| "kl": 0.3763369619846344, | |
| "learning_rate": 4.789926256934344e-06, | |
| "loss": 0.2928, | |
| "step": 240, | |
| "step_loss": 0.2985472083091736 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 1.182146333032642, | |
| "kl": 0.33732131123542786, | |
| "learning_rate": 4.788163247214021e-06, | |
| "loss": 0.2874, | |
| "step": 241, | |
| "step_loss": 0.2921789586544037 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 1.2611562106682654, | |
| "kl": 0.26897132396698, | |
| "learning_rate": 4.786393235863292e-06, | |
| "loss": 0.2863, | |
| "step": 242, | |
| "step_loss": 0.24371370673179626 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 1.1667739600277132, | |
| "kl": 0.3665682077407837, | |
| "learning_rate": 4.7846162289626156e-06, | |
| "loss": 0.2726, | |
| "step": 243, | |
| "step_loss": 0.33350181579589844 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 1.159930396653472, | |
| "kl": 0.294426292181015, | |
| "learning_rate": 4.78283223261648e-06, | |
| "loss": 0.2855, | |
| "step": 244, | |
| "step_loss": 0.34132063388824463 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 1.251219143005887, | |
| "kl": 0.3149415850639343, | |
| "learning_rate": 4.781041252953385e-06, | |
| "loss": 0.2863, | |
| "step": 245, | |
| "step_loss": 0.31999310851097107 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 1.3041399931947542, | |
| "kl": 0.34632056951522827, | |
| "learning_rate": 4.779243296125821e-06, | |
| "loss": 0.3093, | |
| "step": 246, | |
| "step_loss": 0.2359839826822281 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 1.1281402721586493, | |
| "kl": 0.3858156204223633, | |
| "learning_rate": 4.777438368310246e-06, | |
| "loss": 0.275, | |
| "step": 247, | |
| "step_loss": 0.3059740364551544 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 1.182215527114777, | |
| "kl": 0.3195875883102417, | |
| "learning_rate": 4.775626475707062e-06, | |
| "loss": 0.2887, | |
| "step": 248, | |
| "step_loss": 0.2916240692138672 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 1.192317476599235, | |
| "kl": 0.3068830370903015, | |
| "learning_rate": 4.773807624540603e-06, | |
| "loss": 0.2722, | |
| "step": 249, | |
| "step_loss": 0.23078405857086182 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 1.1808958496558022, | |
| "kl": 0.32459574937820435, | |
| "learning_rate": 4.771981821059103e-06, | |
| "loss": 0.2646, | |
| "step": 250, | |
| "step_loss": 0.250786155462265 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 1.2752507061465592, | |
| "kl": 0.3301911950111389, | |
| "learning_rate": 4.770149071534681e-06, | |
| "loss": 0.2828, | |
| "step": 251, | |
| "step_loss": 0.30227547883987427 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 1.3749281257570973, | |
| "kl": 0.3217305839061737, | |
| "learning_rate": 4.7683093822633155e-06, | |
| "loss": 0.2692, | |
| "step": 252, | |
| "step_loss": 0.27843326330184937 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 1.2384381600974261, | |
| "kl": 0.3240608274936676, | |
| "learning_rate": 4.766462759564828e-06, | |
| "loss": 0.3051, | |
| "step": 253, | |
| "step_loss": 0.2549932599067688 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 1.1891702525457484, | |
| "kl": 0.3199639320373535, | |
| "learning_rate": 4.764609209782856e-06, | |
| "loss": 0.3027, | |
| "step": 254, | |
| "step_loss": 0.30427688360214233 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 1.2920719505595144, | |
| "kl": 0.2981690764427185, | |
| "learning_rate": 4.762748739284832e-06, | |
| "loss": 0.2795, | |
| "step": 255, | |
| "step_loss": 0.25225040316581726 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 1.3848628129737062, | |
| "kl": 0.38679319620132446, | |
| "learning_rate": 4.760881354461966e-06, | |
| "loss": 0.3075, | |
| "step": 256, | |
| "step_loss": 0.3289315700531006 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 1.2842520225661949, | |
| "kl": 0.29211699962615967, | |
| "learning_rate": 4.7590070617292214e-06, | |
| "loss": 0.2903, | |
| "step": 257, | |
| "step_loss": 0.2732706367969513 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 1.303090202772597, | |
| "kl": 0.2750818729400635, | |
| "learning_rate": 4.757125867525287e-06, | |
| "loss": 0.2855, | |
| "step": 258, | |
| "step_loss": 0.24878114461898804 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 1.2016632749663505, | |
| "kl": 0.31452032923698425, | |
| "learning_rate": 4.755237778312564e-06, | |
| "loss": 0.2779, | |
| "step": 259, | |
| "step_loss": 0.27004343271255493 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 1.2724450520997204, | |
| "kl": 0.33003339171409607, | |
| "learning_rate": 4.753342800577139e-06, | |
| "loss": 0.2929, | |
| "step": 260, | |
| "step_loss": 0.2996165454387665 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 1.1644344183728652, | |
| "kl": 0.31851598620414734, | |
| "learning_rate": 4.751440940828761e-06, | |
| "loss": 0.2968, | |
| "step": 261, | |
| "step_loss": 0.31100180745124817 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 1.1708153702363653, | |
| "kl": 0.2956541180610657, | |
| "learning_rate": 4.749532205600825e-06, | |
| "loss": 0.2749, | |
| "step": 262, | |
| "step_loss": 0.26357224583625793 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 1.2113982585346696, | |
| "kl": 0.29637524485588074, | |
| "learning_rate": 4.747616601450337e-06, | |
| "loss": 0.2976, | |
| "step": 263, | |
| "step_loss": 0.2821243405342102 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 1.2136481249314546, | |
| "kl": 0.32876235246658325, | |
| "learning_rate": 4.74569413495791e-06, | |
| "loss": 0.2818, | |
| "step": 264, | |
| "step_loss": 0.2800009548664093 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 1.2613044034795275, | |
| "kl": 0.38935160636901855, | |
| "learning_rate": 4.743764812727722e-06, | |
| "loss": 0.3063, | |
| "step": 265, | |
| "step_loss": 0.3427608907222748 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 1.0797444840559929, | |
| "kl": 0.3119351267814636, | |
| "learning_rate": 4.741828641387505e-06, | |
| "loss": 0.2676, | |
| "step": 266, | |
| "step_loss": 0.2635708153247833 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 1.152621213192496, | |
| "kl": 0.36095455288887024, | |
| "learning_rate": 4.739885627588522e-06, | |
| "loss": 0.2825, | |
| "step": 267, | |
| "step_loss": 0.2881103754043579 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 1.2119555039739955, | |
| "kl": 0.36039823293685913, | |
| "learning_rate": 4.73793577800554e-06, | |
| "loss": 0.3022, | |
| "step": 268, | |
| "step_loss": 0.29259294271469116 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 1.1482140459926162, | |
| "kl": 0.2709002196788788, | |
| "learning_rate": 4.735979099336807e-06, | |
| "loss": 0.2862, | |
| "step": 269, | |
| "step_loss": 0.23157480359077454 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 1.1692731329110126, | |
| "kl": 0.3389941453933716, | |
| "learning_rate": 4.734015598304034e-06, | |
| "loss": 0.2711, | |
| "step": 270, | |
| "step_loss": 0.2778770327568054 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.2258612213402873, | |
| "kl": 0.35436713695526123, | |
| "learning_rate": 4.732045281652366e-06, | |
| "loss": 0.3091, | |
| "step": 271, | |
| "step_loss": 0.3284461200237274 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.133815899776919, | |
| "kl": 0.31201237440109253, | |
| "learning_rate": 4.730068156150363e-06, | |
| "loss": 0.2656, | |
| "step": 272, | |
| "step_loss": 0.3073486387729645 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.2017193050777555, | |
| "kl": 0.29038453102111816, | |
| "learning_rate": 4.728084228589973e-06, | |
| "loss": 0.2846, | |
| "step": 273, | |
| "step_loss": 0.2407042682170868 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 1.2777490002160443, | |
| "kl": 0.2809942364692688, | |
| "learning_rate": 4.726093505786515e-06, | |
| "loss": 0.281, | |
| "step": 274, | |
| "step_loss": 0.2713775932788849 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 1.1867307379834706, | |
| "kl": 0.39103490114212036, | |
| "learning_rate": 4.724095994578648e-06, | |
| "loss": 0.3022, | |
| "step": 275, | |
| "step_loss": 0.38512590527534485 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 1.1650998402655737, | |
| "kl": 0.36318308115005493, | |
| "learning_rate": 4.72209170182835e-06, | |
| "loss": 0.2815, | |
| "step": 276, | |
| "step_loss": 0.3397858142852783 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 1.0795928320116555, | |
| "kl": 0.36297377943992615, | |
| "learning_rate": 4.720080634420899e-06, | |
| "loss": 0.2818, | |
| "step": 277, | |
| "step_loss": 0.3083583116531372 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 1.1780687430025762, | |
| "kl": 0.3422907590866089, | |
| "learning_rate": 4.7180627992648435e-06, | |
| "loss": 0.2789, | |
| "step": 278, | |
| "step_loss": 0.27315691113471985 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 1.2103849934862, | |
| "kl": 0.29663997888565063, | |
| "learning_rate": 4.7160382032919824e-06, | |
| "loss": 0.2971, | |
| "step": 279, | |
| "step_loss": 0.27249854803085327 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 1.3222585308836912, | |
| "kl": 0.3770950436592102, | |
| "learning_rate": 4.714006853457339e-06, | |
| "loss": 0.2972, | |
| "step": 280, | |
| "step_loss": 0.325216680765152 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 1.162522628779284, | |
| "kl": 0.313273549079895, | |
| "learning_rate": 4.711968756739136e-06, | |
| "loss": 0.289, | |
| "step": 281, | |
| "step_loss": 0.30059880018234253 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 1.2370630041790278, | |
| "kl": 0.3747837245464325, | |
| "learning_rate": 4.709923920138776e-06, | |
| "loss": 0.3016, | |
| "step": 282, | |
| "step_loss": 0.28908976912498474 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 1.140130660961975, | |
| "kl": 0.3109118342399597, | |
| "learning_rate": 4.707872350680816e-06, | |
| "loss": 0.2839, | |
| "step": 283, | |
| "step_loss": 0.276600182056427 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 1.082299712166179, | |
| "kl": 0.27061912417411804, | |
| "learning_rate": 4.705814055412938e-06, | |
| "loss": 0.266, | |
| "step": 284, | |
| "step_loss": 0.2920036315917969 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 1.21073161805326, | |
| "kl": 0.318640798330307, | |
| "learning_rate": 4.703749041405932e-06, | |
| "loss": 0.2725, | |
| "step": 285, | |
| "step_loss": 0.2592398524284363 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 1.219448433556087, | |
| "kl": 0.35519281029701233, | |
| "learning_rate": 4.701677315753668e-06, | |
| "loss": 0.2874, | |
| "step": 286, | |
| "step_loss": 0.3630865514278412 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 1.2097930175912097, | |
| "kl": 0.41031721234321594, | |
| "learning_rate": 4.699598885573072e-06, | |
| "loss": 0.3032, | |
| "step": 287, | |
| "step_loss": 0.3249673843383789 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 1.1521100017307662, | |
| "kl": 0.3674570620059967, | |
| "learning_rate": 4.697513758004102e-06, | |
| "loss": 0.2849, | |
| "step": 288, | |
| "step_loss": 0.30098646879196167 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 1.1074540740753043, | |
| "kl": 0.32119742035865784, | |
| "learning_rate": 4.695421940209723e-06, | |
| "loss": 0.2832, | |
| "step": 289, | |
| "step_loss": 0.2897493541240692 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 1.2293554204206822, | |
| "kl": 0.3377346098423004, | |
| "learning_rate": 4.693323439375885e-06, | |
| "loss": 0.293, | |
| "step": 290, | |
| "step_loss": 0.2627705931663513 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 1.2069388480758467, | |
| "kl": 0.3485328257083893, | |
| "learning_rate": 4.691218262711491e-06, | |
| "loss": 0.2867, | |
| "step": 291, | |
| "step_loss": 0.38160112500190735 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 1.1419057253236133, | |
| "kl": 0.29920992255210876, | |
| "learning_rate": 4.6891064174483816e-06, | |
| "loss": 0.2793, | |
| "step": 292, | |
| "step_loss": 0.27294811606407166 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 1.1050106622647566, | |
| "kl": 0.31865233182907104, | |
| "learning_rate": 4.686987910841304e-06, | |
| "loss": 0.2834, | |
| "step": 293, | |
| "step_loss": 0.24811354279518127 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 1.2316064086381218, | |
| "kl": 0.36698243021965027, | |
| "learning_rate": 4.684862750167891e-06, | |
| "loss": 0.288, | |
| "step": 294, | |
| "step_loss": 0.3681890070438385 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 1.0625124958649887, | |
| "kl": 0.31484031677246094, | |
| "learning_rate": 4.6827309427286295e-06, | |
| "loss": 0.2633, | |
| "step": 295, | |
| "step_loss": 0.24335134029388428 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 1.0972510299173677, | |
| "kl": 0.37381210923194885, | |
| "learning_rate": 4.680592495846845e-06, | |
| "loss": 0.2779, | |
| "step": 296, | |
| "step_loss": 0.34890639781951904 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 1.2397594353527759, | |
| "kl": 0.3720863163471222, | |
| "learning_rate": 4.678447416868667e-06, | |
| "loss": 0.2813, | |
| "step": 297, | |
| "step_loss": 0.335337370634079 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 1.085078132679382, | |
| "kl": 0.37017738819122314, | |
| "learning_rate": 4.67629571316301e-06, | |
| "loss": 0.2752, | |
| "step": 298, | |
| "step_loss": 0.29323574900627136 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 1.1972337254915437, | |
| "kl": 0.3234942853450775, | |
| "learning_rate": 4.6741373921215475e-06, | |
| "loss": 0.293, | |
| "step": 299, | |
| "step_loss": 0.2743943929672241 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 1.354145558510618, | |
| "kl": 0.408852756023407, | |
| "learning_rate": 4.671972461158682e-06, | |
| "loss": 0.3091, | |
| "step": 300, | |
| "step_loss": 0.3570805788040161 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 1.308516554192417, | |
| "kl": 0.33082523941993713, | |
| "learning_rate": 4.669800927711525e-06, | |
| "loss": 0.2957, | |
| "step": 301, | |
| "step_loss": 0.3042528033256531 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 1.0883509759412517, | |
| "kl": 0.36321133375167847, | |
| "learning_rate": 4.667622799239869e-06, | |
| "loss": 0.2822, | |
| "step": 302, | |
| "step_loss": 0.3216032385826111 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 1.263711518929017, | |
| "kl": 0.2793586850166321, | |
| "learning_rate": 4.665438083226163e-06, | |
| "loss": 0.2997, | |
| "step": 303, | |
| "step_loss": 0.2776317000389099 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 1.1674457067837019, | |
| "kl": 0.33832457661628723, | |
| "learning_rate": 4.663246787175483e-06, | |
| "loss": 0.3, | |
| "step": 304, | |
| "step_loss": 0.31598201394081116 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 1.1988014340031905, | |
| "kl": 0.36085131764411926, | |
| "learning_rate": 4.661048918615513e-06, | |
| "loss": 0.2791, | |
| "step": 305, | |
| "step_loss": 0.302070677280426 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 1.1315893146442402, | |
| "kl": 0.33112025260925293, | |
| "learning_rate": 4.658844485096512e-06, | |
| "loss": 0.2691, | |
| "step": 306, | |
| "step_loss": 0.29177939891815186 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 1.2072921211772827, | |
| "kl": 0.3359392583370209, | |
| "learning_rate": 4.656633494191293e-06, | |
| "loss": 0.2941, | |
| "step": 307, | |
| "step_loss": 0.3084091246128082 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 1.2218266192172693, | |
| "kl": 0.34884458780288696, | |
| "learning_rate": 4.654415953495196e-06, | |
| "loss": 0.2954, | |
| "step": 308, | |
| "step_loss": 0.28813090920448303 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 1.27050923164869, | |
| "kl": 0.38197797536849976, | |
| "learning_rate": 4.65219187062606e-06, | |
| "loss": 0.3116, | |
| "step": 309, | |
| "step_loss": 0.29677310585975647 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 1.1599676961552565, | |
| "kl": 0.3238053321838379, | |
| "learning_rate": 4.649961253224198e-06, | |
| "loss": 0.2855, | |
| "step": 310, | |
| "step_loss": 0.31362178921699524 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 1.1310288626730198, | |
| "kl": 0.3672914505004883, | |
| "learning_rate": 4.647724108952373e-06, | |
| "loss": 0.273, | |
| "step": 311, | |
| "step_loss": 0.2773699462413788 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 1.2079649280038118, | |
| "kl": 0.29229724407196045, | |
| "learning_rate": 4.645480445495767e-06, | |
| "loss": 0.3097, | |
| "step": 312, | |
| "step_loss": 0.3600565195083618 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 1.1441975650704101, | |
| "kl": 0.33793753385543823, | |
| "learning_rate": 4.643230270561956e-06, | |
| "loss": 0.2872, | |
| "step": 313, | |
| "step_loss": 0.2990878224372864 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 1.1546332487233124, | |
| "kl": 0.2881549596786499, | |
| "learning_rate": 4.640973591880889e-06, | |
| "loss": 0.2827, | |
| "step": 314, | |
| "step_loss": 0.2339860498905182 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 1.2470856906835874, | |
| "kl": 0.319610059261322, | |
| "learning_rate": 4.638710417204855e-06, | |
| "loss": 0.2845, | |
| "step": 315, | |
| "step_loss": 0.327451229095459 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 1.0504828245555484, | |
| "kl": 0.25614118576049805, | |
| "learning_rate": 4.636440754308458e-06, | |
| "loss": 0.2556, | |
| "step": 316, | |
| "step_loss": 0.21319641172885895 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 1.11856029436489, | |
| "kl": 0.3035759925842285, | |
| "learning_rate": 4.63416461098859e-06, | |
| "loss": 0.2848, | |
| "step": 317, | |
| "step_loss": 0.2784833014011383 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 1.0668814023255409, | |
| "kl": 0.29398462176322937, | |
| "learning_rate": 4.631881995064406e-06, | |
| "loss": 0.2744, | |
| "step": 318, | |
| "step_loss": 0.26396968960762024 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 1.0712893824198688, | |
| "kl": 0.32271263003349304, | |
| "learning_rate": 4.629592914377298e-06, | |
| "loss": 0.2668, | |
| "step": 319, | |
| "step_loss": 0.273529052734375 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 1.2737157435766238, | |
| "kl": 0.344595730304718, | |
| "learning_rate": 4.6272973767908615e-06, | |
| "loss": 0.3066, | |
| "step": 320, | |
| "step_loss": 0.3095177710056305 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 1.0251746467924616, | |
| "kl": 0.28812479972839355, | |
| "learning_rate": 4.624995390190878e-06, | |
| "loss": 0.2804, | |
| "step": 321, | |
| "step_loss": 0.24252820014953613 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 1.1186582847380844, | |
| "kl": 0.40985843539237976, | |
| "learning_rate": 4.62268696248528e-06, | |
| "loss": 0.2782, | |
| "step": 322, | |
| "step_loss": 0.27699118852615356 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 1.181794615240478, | |
| "kl": 0.2965797185897827, | |
| "learning_rate": 4.620372101604128e-06, | |
| "loss": 0.294, | |
| "step": 323, | |
| "step_loss": 0.25799939036369324 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 1.2137603344462602, | |
| "kl": 0.3655190169811249, | |
| "learning_rate": 4.618050815499582e-06, | |
| "loss": 0.2857, | |
| "step": 324, | |
| "step_loss": 0.30608299374580383 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 1.181964659595807, | |
| "kl": 0.3023182451725006, | |
| "learning_rate": 4.615723112145875e-06, | |
| "loss": 0.2832, | |
| "step": 325, | |
| "step_loss": 0.2613093852996826 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 1.228179157800439, | |
| "kl": 0.3194785714149475, | |
| "learning_rate": 4.613388999539283e-06, | |
| "loss": 0.2976, | |
| "step": 326, | |
| "step_loss": 0.3578476905822754 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 1.1277959636999064, | |
| "kl": 0.3428605794906616, | |
| "learning_rate": 4.6110484856981025e-06, | |
| "loss": 0.2771, | |
| "step": 327, | |
| "step_loss": 0.2329404354095459 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 1.2069845653505489, | |
| "kl": 0.425138384103775, | |
| "learning_rate": 4.608701578662618e-06, | |
| "loss": 0.2977, | |
| "step": 328, | |
| "step_loss": 0.3749895393848419 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 1.1076692442818552, | |
| "kl": 0.2908833622932434, | |
| "learning_rate": 4.606348286495074e-06, | |
| "loss": 0.2861, | |
| "step": 329, | |
| "step_loss": 0.28232869505882263 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 1.1600108842297994, | |
| "kl": 0.3712472915649414, | |
| "learning_rate": 4.6039886172796555e-06, | |
| "loss": 0.2825, | |
| "step": 330, | |
| "step_loss": 0.29888463020324707 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 1.0826571682025825, | |
| "kl": 0.3151686489582062, | |
| "learning_rate": 4.6016225791224504e-06, | |
| "loss": 0.2826, | |
| "step": 331, | |
| "step_loss": 0.25253331661224365 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 1.0721510598381356, | |
| "kl": 0.30766239762306213, | |
| "learning_rate": 4.5992501801514235e-06, | |
| "loss": 0.2802, | |
| "step": 332, | |
| "step_loss": 0.25241219997406006 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 1.149128958736038, | |
| "kl": 0.31056687235832214, | |
| "learning_rate": 4.596871428516397e-06, | |
| "loss": 0.2778, | |
| "step": 333, | |
| "step_loss": 0.2913603186607361 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 1.0863838444877447, | |
| "kl": 0.37738847732543945, | |
| "learning_rate": 4.594486332389011e-06, | |
| "loss": 0.2736, | |
| "step": 334, | |
| "step_loss": 0.3190591335296631 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 1.2058838263807778, | |
| "kl": 0.3031269609928131, | |
| "learning_rate": 4.592094899962702e-06, | |
| "loss": 0.2911, | |
| "step": 335, | |
| "step_loss": 0.23730549216270447 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 1.1861398398638574, | |
| "kl": 0.3154381513595581, | |
| "learning_rate": 4.589697139452673e-06, | |
| "loss": 0.295, | |
| "step": 336, | |
| "step_loss": 0.30010032653808594 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 1.1654254307836165, | |
| "kl": 0.3234802186489105, | |
| "learning_rate": 4.5872930590958685e-06, | |
| "loss": 0.2821, | |
| "step": 337, | |
| "step_loss": 0.3050956428050995 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 1.0142973438347946, | |
| "kl": 0.3032388389110565, | |
| "learning_rate": 4.584882667150939e-06, | |
| "loss": 0.2637, | |
| "step": 338, | |
| "step_loss": 0.25487464666366577 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.1306032144314904, | |
| "kl": 0.37623462080955505, | |
| "learning_rate": 4.582465971898219e-06, | |
| "loss": 0.289, | |
| "step": 339, | |
| "step_loss": 0.3044344484806061 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.1724697129866262, | |
| "kl": 0.3717675507068634, | |
| "learning_rate": 4.580042981639699e-06, | |
| "loss": 0.2842, | |
| "step": 340, | |
| "step_loss": 0.30331289768218994 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_loss": 1.5836049318313599, | |
| "eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_runtime": 14.7278, | |
| "eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_samples_per_second": 6.79, | |
| "eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_steps_per_second": 0.883, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.9909952614947144, | |
| "kl": 0.3604995012283325, | |
| "learning_rate": 4.577613704698989e-06, | |
| "loss": 0.2173, | |
| "step": 341, | |
| "step_loss": 1.2625408172607422 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "grad_norm": 1.122598641549428, | |
| "kl": 0.37703055143356323, | |
| "learning_rate": 4.5751781494213e-06, | |
| "loss": 0.2233, | |
| "step": 342, | |
| "step_loss": 0.24784518778324127 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "grad_norm": 0.9555222272528335, | |
| "kl": 0.3994651734828949, | |
| "learning_rate": 4.572736324173409e-06, | |
| "loss": 0.2154, | |
| "step": 343, | |
| "step_loss": 0.24239662289619446 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "grad_norm": 1.0100151801384887, | |
| "kl": 0.40937453508377075, | |
| "learning_rate": 4.570288237343632e-06, | |
| "loss": 0.2098, | |
| "step": 344, | |
| "step_loss": 0.25453901290893555 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "grad_norm": 0.9870747676771315, | |
| "kl": 0.3054189085960388, | |
| "learning_rate": 4.567833897341797e-06, | |
| "loss": 0.2083, | |
| "step": 345, | |
| "step_loss": 0.19117772579193115 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "grad_norm": 1.1061807368303616, | |
| "kl": 0.41628479957580566, | |
| "learning_rate": 4.565373312599209e-06, | |
| "loss": 0.2152, | |
| "step": 346, | |
| "step_loss": 0.22663772106170654 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "grad_norm": 1.0384373325803509, | |
| "kl": 0.3613884449005127, | |
| "learning_rate": 4.56290649156863e-06, | |
| "loss": 0.2156, | |
| "step": 347, | |
| "step_loss": 0.18198919296264648 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "grad_norm": 1.0658063843250276, | |
| "kl": 0.3354627192020416, | |
| "learning_rate": 4.560433442724243e-06, | |
| "loss": 0.2078, | |
| "step": 348, | |
| "step_loss": 0.16816985607147217 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "grad_norm": 1.1109132713111654, | |
| "kl": 0.4467100501060486, | |
| "learning_rate": 4.5579541745616254e-06, | |
| "loss": 0.2177, | |
| "step": 349, | |
| "step_loss": 0.2320152074098587 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "grad_norm": 1.1929174001218243, | |
| "kl": 0.456093430519104, | |
| "learning_rate": 4.555468695597721e-06, | |
| "loss": 0.2167, | |
| "step": 350, | |
| "step_loss": 0.22836454212665558 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "grad_norm": 1.183660347956496, | |
| "kl": 0.39071205258369446, | |
| "learning_rate": 4.552977014370807e-06, | |
| "loss": 0.2235, | |
| "step": 351, | |
| "step_loss": 0.2216327041387558 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "grad_norm": 1.113360044133729, | |
| "kl": 0.40837928652763367, | |
| "learning_rate": 4.550479139440466e-06, | |
| "loss": 0.2133, | |
| "step": 352, | |
| "step_loss": 0.20445051789283752 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 1.1459353950591424, | |
| "kl": 0.355142205953598, | |
| "learning_rate": 4.547975079387564e-06, | |
| "loss": 0.2272, | |
| "step": 353, | |
| "step_loss": 0.22001862525939941 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 1.1749772462772283, | |
| "kl": 0.3855653405189514, | |
| "learning_rate": 4.545464842814208e-06, | |
| "loss": 0.2217, | |
| "step": 354, | |
| "step_loss": 0.22728465497493744 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 1.015463016503802, | |
| "kl": 0.3446847200393677, | |
| "learning_rate": 4.542948438343726e-06, | |
| "loss": 0.2058, | |
| "step": 355, | |
| "step_loss": 0.18022528290748596 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "grad_norm": 1.0669292034040894, | |
| "kl": 0.36371520161628723, | |
| "learning_rate": 4.5404258746206314e-06, | |
| "loss": 0.21, | |
| "step": 356, | |
| "step_loss": 0.2281932234764099 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "grad_norm": 1.020434757519867, | |
| "kl": 0.40964460372924805, | |
| "learning_rate": 4.537897160310602e-06, | |
| "loss": 0.2063, | |
| "step": 357, | |
| "step_loss": 0.20418326556682587 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "grad_norm": 1.0651999836970036, | |
| "kl": 0.328068345785141, | |
| "learning_rate": 4.535362304100439e-06, | |
| "loss": 0.2153, | |
| "step": 358, | |
| "step_loss": 0.18197788298130035 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "grad_norm": 1.0161322060783642, | |
| "kl": 0.3565504848957062, | |
| "learning_rate": 4.532821314698044e-06, | |
| "loss": 0.2113, | |
| "step": 359, | |
| "step_loss": 0.22694353759288788 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "grad_norm": 1.0601947614383682, | |
| "kl": 0.34940865635871887, | |
| "learning_rate": 4.5302742008323895e-06, | |
| "loss": 0.2244, | |
| "step": 360, | |
| "step_loss": 0.18668481707572937 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "grad_norm": 1.0190544548173317, | |
| "kl": 0.30953383445739746, | |
| "learning_rate": 4.527720971253486e-06, | |
| "loss": 0.2146, | |
| "step": 361, | |
| "step_loss": 0.2063293755054474 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "grad_norm": 1.0542367113097721, | |
| "kl": 0.4005735516548157, | |
| "learning_rate": 4.525161634732354e-06, | |
| "loss": 0.2043, | |
| "step": 362, | |
| "step_loss": 0.20447780191898346 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "grad_norm": 1.240603468633649, | |
| "kl": 0.40291449427604675, | |
| "learning_rate": 4.52259620006099e-06, | |
| "loss": 0.2211, | |
| "step": 363, | |
| "step_loss": 0.19387193024158478 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "grad_norm": 1.1166355530237915, | |
| "kl": 0.409071147441864, | |
| "learning_rate": 4.520024676052342e-06, | |
| "loss": 0.2141, | |
| "step": 364, | |
| "step_loss": 0.2120700180530548 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "grad_norm": 1.1835634208796884, | |
| "kl": 0.42897555232048035, | |
| "learning_rate": 4.517447071540277e-06, | |
| "loss": 0.2129, | |
| "step": 365, | |
| "step_loss": 0.24478043615818024 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 1.10221612084664, | |
| "kl": 0.4204740822315216, | |
| "learning_rate": 4.514863395379548e-06, | |
| "loss": 0.2127, | |
| "step": 366, | |
| "step_loss": 0.2228332757949829 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 1.114889221761073, | |
| "kl": 0.3709118664264679, | |
| "learning_rate": 4.512273656445767e-06, | |
| "loss": 0.2216, | |
| "step": 367, | |
| "step_loss": 0.204238623380661 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 1.1697263984215087, | |
| "kl": 0.36464443802833557, | |
| "learning_rate": 4.509677863635373e-06, | |
| "loss": 0.2152, | |
| "step": 368, | |
| "step_loss": 0.21155373752117157 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 1.1455769887916711, | |
| "kl": 0.33827337622642517, | |
| "learning_rate": 4.507076025865602e-06, | |
| "loss": 0.2025, | |
| "step": 369, | |
| "step_loss": 0.15999384224414825 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "grad_norm": 1.18888099578414, | |
| "kl": 0.38023340702056885, | |
| "learning_rate": 4.504468152074454e-06, | |
| "loss": 0.2221, | |
| "step": 370, | |
| "step_loss": 0.22181765735149384 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "grad_norm": 1.1432354202041541, | |
| "kl": 0.4160056710243225, | |
| "learning_rate": 4.501854251220667e-06, | |
| "loss": 0.2184, | |
| "step": 371, | |
| "step_loss": 0.2242666780948639 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "grad_norm": 1.05396341263562, | |
| "kl": 0.324006050825119, | |
| "learning_rate": 4.499234332283683e-06, | |
| "loss": 0.2067, | |
| "step": 372, | |
| "step_loss": 0.23381561040878296 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "grad_norm": 1.1234179605156285, | |
| "kl": 0.4134640395641327, | |
| "learning_rate": 4.496608404263617e-06, | |
| "loss": 0.2267, | |
| "step": 373, | |
| "step_loss": 0.2217472344636917 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "grad_norm": 1.0823406851201798, | |
| "kl": 0.2976873219013214, | |
| "learning_rate": 4.493976476181227e-06, | |
| "loss": 0.2146, | |
| "step": 374, | |
| "step_loss": 0.19449269771575928 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "grad_norm": 1.0386992586850838, | |
| "kl": 0.40376949310302734, | |
| "learning_rate": 4.491338557077883e-06, | |
| "loss": 0.2124, | |
| "step": 375, | |
| "step_loss": 0.2595367729663849 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "grad_norm": 1.1201199545655665, | |
| "kl": 0.46266913414001465, | |
| "learning_rate": 4.488694656015535e-06, | |
| "loss": 0.2176, | |
| "step": 376, | |
| "step_loss": 0.22357900440692902 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "grad_norm": 1.0754291235160311, | |
| "kl": 0.33384445309638977, | |
| "learning_rate": 4.486044782076683e-06, | |
| "loss": 0.2244, | |
| "step": 377, | |
| "step_loss": 0.16794537007808685 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "grad_norm": 1.0073994521995542, | |
| "kl": 0.27469688653945923, | |
| "learning_rate": 4.483388944364345e-06, | |
| "loss": 0.2203, | |
| "step": 378, | |
| "step_loss": 0.14068152010440826 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "grad_norm": 1.081773692852582, | |
| "kl": 0.4029933512210846, | |
| "learning_rate": 4.480727152002029e-06, | |
| "loss": 0.2315, | |
| "step": 379, | |
| "step_loss": 0.2484932690858841 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 0.9668680107071213, | |
| "kl": 0.41677016019821167, | |
| "learning_rate": 4.478059414133695e-06, | |
| "loss": 0.2004, | |
| "step": 380, | |
| "step_loss": 0.19721892476081848 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 1.001005650539627, | |
| "kl": 0.31512561440467834, | |
| "learning_rate": 4.47538573992373e-06, | |
| "loss": 0.205, | |
| "step": 381, | |
| "step_loss": 0.20380663871765137 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 1.0947450062868074, | |
| "kl": 0.357501357793808, | |
| "learning_rate": 4.472706138556911e-06, | |
| "loss": 0.2164, | |
| "step": 382, | |
| "step_loss": 0.18332478404045105 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "grad_norm": 1.1706570208600346, | |
| "kl": 0.3620475232601166, | |
| "learning_rate": 4.4700206192383796e-06, | |
| "loss": 0.2252, | |
| "step": 383, | |
| "step_loss": 0.19933828711509705 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "grad_norm": 1.226064605249827, | |
| "kl": 0.2993457019329071, | |
| "learning_rate": 4.4673291911936064e-06, | |
| "loss": 0.2265, | |
| "step": 384, | |
| "step_loss": 0.17195503413677216 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "grad_norm": 1.1576130346407478, | |
| "kl": 0.35498231649398804, | |
| "learning_rate": 4.464631863668357e-06, | |
| "loss": 0.2108, | |
| "step": 385, | |
| "step_loss": 0.1912364363670349 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "grad_norm": 1.0802215632686387, | |
| "kl": 0.3443387448787689, | |
| "learning_rate": 4.461928645928667e-06, | |
| "loss": 0.2105, | |
| "step": 386, | |
| "step_loss": 0.27471888065338135 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "grad_norm": 0.9802948466940768, | |
| "kl": 0.3694034218788147, | |
| "learning_rate": 4.459219547260805e-06, | |
| "loss": 0.1983, | |
| "step": 387, | |
| "step_loss": 0.21388615667819977 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "grad_norm": 1.0063078315772995, | |
| "kl": 0.2925432026386261, | |
| "learning_rate": 4.456504576971243e-06, | |
| "loss": 0.2123, | |
| "step": 388, | |
| "step_loss": 0.20165732502937317 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "grad_norm": 1.0786559345589601, | |
| "kl": 0.29720914363861084, | |
| "learning_rate": 4.453783744386621e-06, | |
| "loss": 0.2076, | |
| "step": 389, | |
| "step_loss": 0.22997143864631653 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "grad_norm": 1.0602165185359143, | |
| "kl": 0.31571799516677856, | |
| "learning_rate": 4.451057058853721e-06, | |
| "loss": 0.2134, | |
| "step": 390, | |
| "step_loss": 0.17782820761203766 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "grad_norm": 1.0133394273161123, | |
| "kl": 0.30432724952697754, | |
| "learning_rate": 4.448324529739429e-06, | |
| "loss": 0.1898, | |
| "step": 391, | |
| "step_loss": 0.15238864719867706 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "grad_norm": 0.9804056306864453, | |
| "kl": 0.3994058072566986, | |
| "learning_rate": 4.445586166430706e-06, | |
| "loss": 0.1903, | |
| "step": 392, | |
| "step_loss": 0.2173841893672943 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 1.123296175942593, | |
| "kl": 0.30720776319503784, | |
| "learning_rate": 4.442841978334556e-06, | |
| "loss": 0.2216, | |
| "step": 393, | |
| "step_loss": 0.19546273350715637 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 1.0359920222691423, | |
| "kl": 0.35486575961112976, | |
| "learning_rate": 4.440091974877989e-06, | |
| "loss": 0.2129, | |
| "step": 394, | |
| "step_loss": 0.20697163045406342 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 1.0849629839063242, | |
| "kl": 0.33868688344955444, | |
| "learning_rate": 4.437336165507998e-06, | |
| "loss": 0.2166, | |
| "step": 395, | |
| "step_loss": 0.21048006415367126 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 1.0951854958685978, | |
| "kl": 0.4097781181335449, | |
| "learning_rate": 4.4345745596915164e-06, | |
| "loss": 0.2149, | |
| "step": 396, | |
| "step_loss": 0.22375498712062836 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "grad_norm": 1.077971005153329, | |
| "kl": 0.3999599516391754, | |
| "learning_rate": 4.431807166915393e-06, | |
| "loss": 0.1962, | |
| "step": 397, | |
| "step_loss": 0.20573902130126953 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "grad_norm": 1.1986240067763814, | |
| "kl": 0.33355939388275146, | |
| "learning_rate": 4.429033996686352e-06, | |
| "loss": 0.2158, | |
| "step": 398, | |
| "step_loss": 0.20132023096084595 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "grad_norm": 1.0260499777997851, | |
| "kl": 0.435636967420578, | |
| "learning_rate": 4.426255058530969e-06, | |
| "loss": 0.2093, | |
| "step": 399, | |
| "step_loss": 0.2543203830718994 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "grad_norm": 1.1062815074420902, | |
| "kl": 0.313290536403656, | |
| "learning_rate": 4.423470361995632e-06, | |
| "loss": 0.2169, | |
| "step": 400, | |
| "step_loss": 0.1880669891834259 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "grad_norm": 1.1099408278287306, | |
| "kl": 0.384952574968338, | |
| "learning_rate": 4.420679916646512e-06, | |
| "loss": 0.2167, | |
| "step": 401, | |
| "step_loss": 0.21090340614318848 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "grad_norm": 1.072595417050862, | |
| "kl": 0.32443416118621826, | |
| "learning_rate": 4.417883732069525e-06, | |
| "loss": 0.2069, | |
| "step": 402, | |
| "step_loss": 0.2197069674730301 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "grad_norm": 1.1520310317038895, | |
| "kl": 0.4313489496707916, | |
| "learning_rate": 4.4150818178703084e-06, | |
| "loss": 0.2164, | |
| "step": 403, | |
| "step_loss": 0.23380175232887268 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "grad_norm": 1.0783958233120303, | |
| "kl": 0.4069855809211731, | |
| "learning_rate": 4.412274183674177e-06, | |
| "loss": 0.2254, | |
| "step": 404, | |
| "step_loss": 0.22361119091510773 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "grad_norm": 1.063343526682731, | |
| "kl": 0.32530197501182556, | |
| "learning_rate": 4.409460839126099e-06, | |
| "loss": 0.1957, | |
| "step": 405, | |
| "step_loss": 0.20308908820152283 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "grad_norm": 1.1176352237327234, | |
| "kl": 0.44647660851478577, | |
| "learning_rate": 4.406641793890658e-06, | |
| "loss": 0.2153, | |
| "step": 406, | |
| "step_loss": 0.2192254364490509 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 1.0704852612143154, | |
| "kl": 0.3989701271057129, | |
| "learning_rate": 4.40381705765202e-06, | |
| "loss": 0.2141, | |
| "step": 407, | |
| "step_loss": 0.2276991605758667 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 1.1043825778150693, | |
| "kl": 0.4035341739654541, | |
| "learning_rate": 4.400986640113903e-06, | |
| "loss": 0.2179, | |
| "step": 408, | |
| "step_loss": 0.24566595256328583 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 1.0492597860234878, | |
| "kl": 0.34541037678718567, | |
| "learning_rate": 4.398150550999538e-06, | |
| "loss": 0.2097, | |
| "step": 409, | |
| "step_loss": 0.22366544604301453 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "grad_norm": 1.104701648821187, | |
| "kl": 0.47003430128097534, | |
| "learning_rate": 4.395308800051645e-06, | |
| "loss": 0.2152, | |
| "step": 410, | |
| "step_loss": 0.2739095389842987 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "grad_norm": 1.1432191403701684, | |
| "kl": 0.3003239333629608, | |
| "learning_rate": 4.392461397032388e-06, | |
| "loss": 0.2209, | |
| "step": 411, | |
| "step_loss": 0.20217368006706238 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "grad_norm": 1.1006520749844693, | |
| "kl": 0.34780094027519226, | |
| "learning_rate": 4.389608351723354e-06, | |
| "loss": 0.2222, | |
| "step": 412, | |
| "step_loss": 0.18477079272270203 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "grad_norm": 1.1433499590320158, | |
| "kl": 0.29936301708221436, | |
| "learning_rate": 4.386749673925507e-06, | |
| "loss": 0.2136, | |
| "step": 413, | |
| "step_loss": 0.1923401653766632 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "grad_norm": 1.0604657445965895, | |
| "kl": 0.3893413245677948, | |
| "learning_rate": 4.383885373459162e-06, | |
| "loss": 0.2077, | |
| "step": 414, | |
| "step_loss": 0.22892388701438904 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "grad_norm": 1.1153994570005237, | |
| "kl": 0.4249955713748932, | |
| "learning_rate": 4.381015460163949e-06, | |
| "loss": 0.213, | |
| "step": 415, | |
| "step_loss": 0.22074082493782043 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "grad_norm": 1.0044730994428235, | |
| "kl": 0.42945361137390137, | |
| "learning_rate": 4.378139943898782e-06, | |
| "loss": 0.201, | |
| "step": 416, | |
| "step_loss": 0.2300502359867096 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "grad_norm": 1.1201498198141744, | |
| "kl": 0.31198564171791077, | |
| "learning_rate": 4.375258834541819e-06, | |
| "loss": 0.2335, | |
| "step": 417, | |
| "step_loss": 0.18508249521255493 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "grad_norm": 1.1207096425422391, | |
| "kl": 0.3583974540233612, | |
| "learning_rate": 4.372372141990433e-06, | |
| "loss": 0.2065, | |
| "step": 418, | |
| "step_loss": 0.1857309639453888 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "grad_norm": 1.1496661225032172, | |
| "kl": 0.3583613336086273, | |
| "learning_rate": 4.369479876161179e-06, | |
| "loss": 0.2177, | |
| "step": 419, | |
| "step_loss": 0.224288210272789 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "grad_norm": 1.0784304365856952, | |
| "kl": 0.3118525445461273, | |
| "learning_rate": 4.366582046989756e-06, | |
| "loss": 0.2067, | |
| "step": 420, | |
| "step_loss": 0.1751691997051239 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 1.1886899720468163, | |
| "kl": 0.4487501382827759, | |
| "learning_rate": 4.363678664430972e-06, | |
| "loss": 0.2237, | |
| "step": 421, | |
| "step_loss": 0.25023704767227173 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 1.1745967665142434, | |
| "kl": 0.33707478642463684, | |
| "learning_rate": 4.360769738458717e-06, | |
| "loss": 0.2135, | |
| "step": 422, | |
| "step_loss": 0.18086591362953186 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 1.092498622016125, | |
| "kl": 0.3724205791950226, | |
| "learning_rate": 4.3578552790659215e-06, | |
| "loss": 0.2313, | |
| "step": 423, | |
| "step_loss": 0.22714251279830933 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 1.1190105848672534, | |
| "kl": 0.2946886420249939, | |
| "learning_rate": 4.3549352962645256e-06, | |
| "loss": 0.2036, | |
| "step": 424, | |
| "step_loss": 0.20033493638038635 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 1.1415511904160704, | |
| "kl": 0.2983555495738983, | |
| "learning_rate": 4.352009800085442e-06, | |
| "loss": 0.2136, | |
| "step": 425, | |
| "step_loss": 0.16325783729553223 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 1.12564799402316, | |
| "kl": 0.40603286027908325, | |
| "learning_rate": 4.349078800578527e-06, | |
| "loss": 0.2225, | |
| "step": 426, | |
| "step_loss": 0.24971996247768402 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "grad_norm": 1.0847789979247071, | |
| "kl": 0.2904649078845978, | |
| "learning_rate": 4.346142307812537e-06, | |
| "loss": 0.2134, | |
| "step": 427, | |
| "step_loss": 0.21226723492145538 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "grad_norm": 1.1535493008057434, | |
| "kl": 0.4135288596153259, | |
| "learning_rate": 4.3432003318751034e-06, | |
| "loss": 0.218, | |
| "step": 428, | |
| "step_loss": 0.24029088020324707 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "grad_norm": 1.1822688678890323, | |
| "kl": 0.3823299705982208, | |
| "learning_rate": 4.340252882872693e-06, | |
| "loss": 0.2093, | |
| "step": 429, | |
| "step_loss": 0.23636040091514587 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "grad_norm": 1.218665217849662, | |
| "kl": 0.34758618474006653, | |
| "learning_rate": 4.337299970930573e-06, | |
| "loss": 0.2031, | |
| "step": 430, | |
| "step_loss": 0.1947343796491623 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "grad_norm": 1.12952236007926, | |
| "kl": 0.3034479022026062, | |
| "learning_rate": 4.3343416061927784e-06, | |
| "loss": 0.2125, | |
| "step": 431, | |
| "step_loss": 0.21196305751800537 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "grad_norm": 1.1167424522607692, | |
| "kl": 0.3147704005241394, | |
| "learning_rate": 4.331377798822074e-06, | |
| "loss": 0.2131, | |
| "step": 432, | |
| "step_loss": 0.22102078795433044 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "grad_norm": 1.0776199042214483, | |
| "kl": 0.38971424102783203, | |
| "learning_rate": 4.328408558999926e-06, | |
| "loss": 0.2129, | |
| "step": 433, | |
| "step_loss": 0.2175690084695816 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 1.247807487250352, | |
| "kl": 0.37261104583740234, | |
| "learning_rate": 4.325433896926455e-06, | |
| "loss": 0.233, | |
| "step": 434, | |
| "step_loss": 0.17759710550308228 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 1.0828914253017792, | |
| "kl": 0.4615696668624878, | |
| "learning_rate": 4.322453822820416e-06, | |
| "loss": 0.2068, | |
| "step": 435, | |
| "step_loss": 0.2164827585220337 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 1.1538866091782574, | |
| "kl": 0.34330347180366516, | |
| "learning_rate": 4.319468346919151e-06, | |
| "loss": 0.2141, | |
| "step": 436, | |
| "step_loss": 0.17444977164268494 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 1.0771764576951577, | |
| "kl": 0.3454141616821289, | |
| "learning_rate": 4.316477479478562e-06, | |
| "loss": 0.2071, | |
| "step": 437, | |
| "step_loss": 0.1811198741197586 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "grad_norm": 1.0306242648319286, | |
| "kl": 0.33969008922576904, | |
| "learning_rate": 4.3134812307730685e-06, | |
| "loss": 0.2226, | |
| "step": 438, | |
| "step_loss": 0.20342102646827698 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "grad_norm": 1.204874661424694, | |
| "kl": 0.35218775272369385, | |
| "learning_rate": 4.310479611095579e-06, | |
| "loss": 0.2297, | |
| "step": 439, | |
| "step_loss": 0.2256166785955429 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "grad_norm": 1.1018103259900038, | |
| "kl": 0.3603362441062927, | |
| "learning_rate": 4.3074726307574515e-06, | |
| "loss": 0.2154, | |
| "step": 440, | |
| "step_loss": 0.1923496127128601 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "grad_norm": 1.1380166709119617, | |
| "kl": 0.3439081907272339, | |
| "learning_rate": 4.304460300088461e-06, | |
| "loss": 0.2132, | |
| "step": 441, | |
| "step_loss": 0.1734137088060379 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "grad_norm": 1.2016707710129002, | |
| "kl": 0.5097121000289917, | |
| "learning_rate": 4.3014426294367585e-06, | |
| "loss": 0.2059, | |
| "step": 442, | |
| "step_loss": 0.24441494047641754 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "grad_norm": 1.095054101053829, | |
| "kl": 0.37071746587753296, | |
| "learning_rate": 4.298419629168844e-06, | |
| "loss": 0.2075, | |
| "step": 443, | |
| "step_loss": 0.22421976923942566 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "grad_norm": 1.1424892814851308, | |
| "kl": 0.3091402053833008, | |
| "learning_rate": 4.295391309669523e-06, | |
| "loss": 0.2133, | |
| "step": 444, | |
| "step_loss": 0.19774490594863892 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "grad_norm": 1.0489332618798233, | |
| "kl": 0.3951050341129303, | |
| "learning_rate": 4.292357681341875e-06, | |
| "loss": 0.2173, | |
| "step": 445, | |
| "step_loss": 0.23351700603961945 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "grad_norm": 1.1201440862803222, | |
| "kl": 0.31920719146728516, | |
| "learning_rate": 4.289318754607216e-06, | |
| "loss": 0.2119, | |
| "step": 446, | |
| "step_loss": 0.19813159108161926 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "grad_norm": 1.109579067534783, | |
| "kl": 0.36979711055755615, | |
| "learning_rate": 4.286274539905064e-06, | |
| "loss": 0.2214, | |
| "step": 447, | |
| "step_loss": 0.2170843780040741 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 1.1248189065136833, | |
| "kl": 0.4319002628326416, | |
| "learning_rate": 4.283225047693102e-06, | |
| "loss": 0.2216, | |
| "step": 448, | |
| "step_loss": 0.23914441466331482 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 1.0143097210255512, | |
| "kl": 0.44951027631759644, | |
| "learning_rate": 4.280170288447145e-06, | |
| "loss": 0.19, | |
| "step": 449, | |
| "step_loss": 0.23710179328918457 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 1.1324153993387862, | |
| "kl": 0.38769853115081787, | |
| "learning_rate": 4.277110272661098e-06, | |
| "loss": 0.2155, | |
| "step": 450, | |
| "step_loss": 0.201776921749115 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "grad_norm": 1.1440937390786563, | |
| "kl": 0.4149802625179291, | |
| "learning_rate": 4.2740450108469276e-06, | |
| "loss": 0.2167, | |
| "step": 451, | |
| "step_loss": 0.22473248839378357 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "grad_norm": 1.1169649315923986, | |
| "kl": 0.4232461452484131, | |
| "learning_rate": 4.270974513534617e-06, | |
| "loss": 0.2186, | |
| "step": 452, | |
| "step_loss": 0.22173169255256653 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "grad_norm": 1.0911796681416763, | |
| "kl": 0.36605626344680786, | |
| "learning_rate": 4.26789879127214e-06, | |
| "loss": 0.2013, | |
| "step": 453, | |
| "step_loss": 0.24023717641830444 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "grad_norm": 1.1226223217769014, | |
| "kl": 0.363466739654541, | |
| "learning_rate": 4.2648178546254135e-06, | |
| "loss": 0.2128, | |
| "step": 454, | |
| "step_loss": 0.19646257162094116 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "grad_norm": 1.0952440956076734, | |
| "kl": 0.4751397669315338, | |
| "learning_rate": 4.261731714178274e-06, | |
| "loss": 0.2107, | |
| "step": 455, | |
| "step_loss": 0.27217623591423035 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "grad_norm": 1.140170398680307, | |
| "kl": 0.3529389500617981, | |
| "learning_rate": 4.25864038053243e-06, | |
| "loss": 0.2175, | |
| "step": 456, | |
| "step_loss": 0.22076928615570068 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "grad_norm": 1.0815717361792092, | |
| "kl": 0.3519299626350403, | |
| "learning_rate": 4.2555438643074315e-06, | |
| "loss": 0.2019, | |
| "step": 457, | |
| "step_loss": 0.19926907122135162 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "grad_norm": 1.1185893886506182, | |
| "kl": 0.36596280336380005, | |
| "learning_rate": 4.252442176140631e-06, | |
| "loss": 0.2036, | |
| "step": 458, | |
| "step_loss": 0.18698200583457947 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "grad_norm": 1.1963180102524402, | |
| "kl": 0.41686299443244934, | |
| "learning_rate": 4.249335326687148e-06, | |
| "loss": 0.2236, | |
| "step": 459, | |
| "step_loss": 0.23538297414779663 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "grad_norm": 1.1749480454138903, | |
| "kl": 0.3308974504470825, | |
| "learning_rate": 4.2462233266198335e-06, | |
| "loss": 0.2188, | |
| "step": 460, | |
| "step_loss": 0.19405069947242737 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "grad_norm": 1.227170089100812, | |
| "kl": 0.3617514967918396, | |
| "learning_rate": 4.243106186629233e-06, | |
| "loss": 0.2232, | |
| "step": 461, | |
| "step_loss": 0.1792970597743988 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "grad_norm": 1.0671020515492777, | |
| "kl": 0.34086763858795166, | |
| "learning_rate": 4.2399839174235445e-06, | |
| "loss": 0.2123, | |
| "step": 462, | |
| "step_loss": 0.22149503231048584 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "grad_norm": 1.0270636289046342, | |
| "kl": 0.3730754852294922, | |
| "learning_rate": 4.236856529728593e-06, | |
| "loss": 0.2081, | |
| "step": 463, | |
| "step_loss": 0.2611701488494873 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "grad_norm": 1.0352246387850932, | |
| "kl": 0.31691187620162964, | |
| "learning_rate": 4.233724034287782e-06, | |
| "loss": 0.2062, | |
| "step": 464, | |
| "step_loss": 0.18789568543434143 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "grad_norm": 1.1036670142111487, | |
| "kl": 0.3653966188430786, | |
| "learning_rate": 4.230586441862063e-06, | |
| "loss": 0.2102, | |
| "step": 465, | |
| "step_loss": 0.25932013988494873 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "grad_norm": 1.141576174412114, | |
| "kl": 0.3486475348472595, | |
| "learning_rate": 4.227443763229895e-06, | |
| "loss": 0.2143, | |
| "step": 466, | |
| "step_loss": 0.22772841155529022 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "grad_norm": 1.08652115649653, | |
| "kl": 0.38706696033477783, | |
| "learning_rate": 4.224296009187212e-06, | |
| "loss": 0.2092, | |
| "step": 467, | |
| "step_loss": 0.20331993699073792 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "grad_norm": 1.1132102876767822, | |
| "kl": 0.4534846544265747, | |
| "learning_rate": 4.221143190547384e-06, | |
| "loss": 0.2069, | |
| "step": 468, | |
| "step_loss": 0.19342438876628876 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "grad_norm": 1.2487774294245415, | |
| "kl": 0.34831732511520386, | |
| "learning_rate": 4.217985318141177e-06, | |
| "loss": 0.23, | |
| "step": 469, | |
| "step_loss": 0.23435795307159424 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "grad_norm": 1.1596986401165996, | |
| "kl": 0.3419073224067688, | |
| "learning_rate": 4.214822402816718e-06, | |
| "loss": 0.2087, | |
| "step": 470, | |
| "step_loss": 0.17857272922992706 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "grad_norm": 1.0228762507304563, | |
| "kl": 0.3407072126865387, | |
| "learning_rate": 4.21165445543946e-06, | |
| "loss": 0.2022, | |
| "step": 471, | |
| "step_loss": 0.18100890517234802 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "grad_norm": 1.1834791650481271, | |
| "kl": 0.3761002719402313, | |
| "learning_rate": 4.20848148689214e-06, | |
| "loss": 0.2114, | |
| "step": 472, | |
| "step_loss": 0.20595240592956543 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "grad_norm": 1.0537720415925667, | |
| "kl": 0.32567542791366577, | |
| "learning_rate": 4.205303508074745e-06, | |
| "loss": 0.2135, | |
| "step": 473, | |
| "step_loss": 0.23016318678855896 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "grad_norm": 1.08626518519842, | |
| "kl": 0.39517271518707275, | |
| "learning_rate": 4.202120529904474e-06, | |
| "loss": 0.2076, | |
| "step": 474, | |
| "step_loss": 0.22103264927864075 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 1.0419674113366015, | |
| "kl": 0.34639662504196167, | |
| "learning_rate": 4.1989325633157e-06, | |
| "loss": 0.2224, | |
| "step": 475, | |
| "step_loss": 0.2245238721370697 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 1.0933257642610934, | |
| "kl": 0.3724210858345032, | |
| "learning_rate": 4.195739619259933e-06, | |
| "loss": 0.2081, | |
| "step": 476, | |
| "step_loss": 0.2216363102197647 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 1.206514414530843, | |
| "kl": 0.33405083417892456, | |
| "learning_rate": 4.19254170870578e-06, | |
| "loss": 0.2272, | |
| "step": 477, | |
| "step_loss": 0.19681967794895172 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "grad_norm": 1.127545668719584, | |
| "kl": 0.36871930956840515, | |
| "learning_rate": 4.18933884263891e-06, | |
| "loss": 0.2207, | |
| "step": 478, | |
| "step_loss": 0.18126648664474487 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "grad_norm": 1.1433635783272997, | |
| "kl": 0.3497537076473236, | |
| "learning_rate": 4.186131032062018e-06, | |
| "loss": 0.2206, | |
| "step": 479, | |
| "step_loss": 0.20069673657417297 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "grad_norm": 1.0952740745497684, | |
| "kl": 0.39063096046447754, | |
| "learning_rate": 4.182918287994781e-06, | |
| "loss": 0.2209, | |
| "step": 480, | |
| "step_loss": 0.18552149832248688 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "grad_norm": 1.098642354287991, | |
| "kl": 0.33700186014175415, | |
| "learning_rate": 4.1797006214738264e-06, | |
| "loss": 0.2026, | |
| "step": 481, | |
| "step_loss": 0.18201524019241333 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "grad_norm": 1.045712202429604, | |
| "kl": 0.35738605260849, | |
| "learning_rate": 4.17647804355269e-06, | |
| "loss": 0.2026, | |
| "step": 482, | |
| "step_loss": 0.18481150269508362 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "grad_norm": 1.1438459095582905, | |
| "kl": 0.3997901678085327, | |
| "learning_rate": 4.1732505653017805e-06, | |
| "loss": 0.2192, | |
| "step": 483, | |
| "step_loss": 0.23264771699905396 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "grad_norm": 1.1805883340710588, | |
| "kl": 0.342068612575531, | |
| "learning_rate": 4.17001819780834e-06, | |
| "loss": 0.2131, | |
| "step": 484, | |
| "step_loss": 0.18483933806419373 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "grad_norm": 1.1642629591622176, | |
| "kl": 0.3150475323200226, | |
| "learning_rate": 4.1667809521764065e-06, | |
| "loss": 0.2144, | |
| "step": 485, | |
| "step_loss": 0.2131662219762802 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "grad_norm": 1.0776552130456476, | |
| "kl": 0.3208051025867462, | |
| "learning_rate": 4.163538839526777e-06, | |
| "loss": 0.2029, | |
| "step": 486, | |
| "step_loss": 0.1959662288427353 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "grad_norm": 1.0456011694309113, | |
| "kl": 0.4914534091949463, | |
| "learning_rate": 4.160291870996966e-06, | |
| "loss": 0.2082, | |
| "step": 487, | |
| "step_loss": 0.21434994041919708 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "grad_norm": 1.0702724310064322, | |
| "kl": 0.39285576343536377, | |
| "learning_rate": 4.157040057741171e-06, | |
| "loss": 0.2118, | |
| "step": 488, | |
| "step_loss": 0.20233172178268433 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 1.022356313983593, | |
| "kl": 0.29209986329078674, | |
| "learning_rate": 4.15378341093023e-06, | |
| "loss": 0.2109, | |
| "step": 489, | |
| "step_loss": 0.19211889803409576 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 1.1060860463863098, | |
| "kl": 0.31146690249443054, | |
| "learning_rate": 4.150521941751589e-06, | |
| "loss": 0.2284, | |
| "step": 490, | |
| "step_loss": 0.22323687374591827 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 1.086327297590339, | |
| "kl": 0.3390502333641052, | |
| "learning_rate": 4.147255661409255e-06, | |
| "loss": 0.2112, | |
| "step": 491, | |
| "step_loss": 0.19411392509937286 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "grad_norm": 1.0776562984953817, | |
| "kl": 0.3938494026660919, | |
| "learning_rate": 4.14398458112377e-06, | |
| "loss": 0.2128, | |
| "step": 492, | |
| "step_loss": 0.22699597477912903 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "grad_norm": 1.0858023168277036, | |
| "kl": 0.3118027448654175, | |
| "learning_rate": 4.140708712132157e-06, | |
| "loss": 0.2156, | |
| "step": 493, | |
| "step_loss": 0.21079112589359283 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "grad_norm": 1.1315907434090295, | |
| "kl": 0.37194758653640747, | |
| "learning_rate": 4.137428065687896e-06, | |
| "loss": 0.2121, | |
| "step": 494, | |
| "step_loss": 0.2287684679031372 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "grad_norm": 1.1331173453727341, | |
| "kl": 0.3639126121997833, | |
| "learning_rate": 4.134142653060875e-06, | |
| "loss": 0.2186, | |
| "step": 495, | |
| "step_loss": 0.2055366486310959 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "grad_norm": 1.1630295644530058, | |
| "kl": 0.4070471525192261, | |
| "learning_rate": 4.130852485537357e-06, | |
| "loss": 0.2321, | |
| "step": 496, | |
| "step_loss": 0.2210061103105545 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "grad_norm": 1.1594682830751208, | |
| "kl": 0.40022650361061096, | |
| "learning_rate": 4.127557574419938e-06, | |
| "loss": 0.2245, | |
| "step": 497, | |
| "step_loss": 0.2172410786151886 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "grad_norm": 1.0825009135777626, | |
| "kl": 0.3248887062072754, | |
| "learning_rate": 4.12425793102751e-06, | |
| "loss": 0.2112, | |
| "step": 498, | |
| "step_loss": 0.18749205768108368 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "grad_norm": 1.159823925574004, | |
| "kl": 0.4212324321269989, | |
| "learning_rate": 4.120953566695222e-06, | |
| "loss": 0.2334, | |
| "step": 499, | |
| "step_loss": 0.2644669711589813 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "grad_norm": 1.0191841777064992, | |
| "kl": 0.39752498269081116, | |
| "learning_rate": 4.117644492774441e-06, | |
| "loss": 0.2073, | |
| "step": 500, | |
| "step_loss": 0.1976650357246399 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "grad_norm": 1.1191918326441155, | |
| "kl": 0.3732473850250244, | |
| "learning_rate": 4.11433072063271e-06, | |
| "loss": 0.2132, | |
| "step": 501, | |
| "step_loss": 0.18028053641319275 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "grad_norm": 1.1922599610472286, | |
| "kl": 0.3869403004646301, | |
| "learning_rate": 4.111012261653716e-06, | |
| "loss": 0.2293, | |
| "step": 502, | |
| "step_loss": 0.2117547243833542 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "grad_norm": 1.1390828459169302, | |
| "kl": 0.3269495368003845, | |
| "learning_rate": 4.1076891272372435e-06, | |
| "loss": 0.2253, | |
| "step": 503, | |
| "step_loss": 0.20671634376049042 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "grad_norm": 1.1027235919075196, | |
| "kl": 0.3744144141674042, | |
| "learning_rate": 4.104361328799139e-06, | |
| "loss": 0.2237, | |
| "step": 504, | |
| "step_loss": 0.20059865713119507 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "grad_norm": 1.0613715757167175, | |
| "kl": 0.3339211940765381, | |
| "learning_rate": 4.101028877771271e-06, | |
| "loss": 0.2047, | |
| "step": 505, | |
| "step_loss": 0.17482547461986542 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "grad_norm": 1.09162612153003, | |
| "kl": 0.3029939830303192, | |
| "learning_rate": 4.09769178560149e-06, | |
| "loss": 0.2138, | |
| "step": 506, | |
| "step_loss": 0.21953245997428894 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "grad_norm": 1.2415791947427925, | |
| "kl": 0.3678430914878845, | |
| "learning_rate": 4.094350063753594e-06, | |
| "loss": 0.2275, | |
| "step": 507, | |
| "step_loss": 0.20582033693790436 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "grad_norm": 1.1048759043495957, | |
| "kl": 0.35326552391052246, | |
| "learning_rate": 4.0910037237072805e-06, | |
| "loss": 0.215, | |
| "step": 508, | |
| "step_loss": 0.20594316720962524 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 1.0129951540806574, | |
| "kl": 0.3663599491119385, | |
| "learning_rate": 4.087652776958113e-06, | |
| "loss": 0.2142, | |
| "step": 509, | |
| "step_loss": 0.20410504937171936 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 1.046767706141149, | |
| "kl": 0.2914074659347534, | |
| "learning_rate": 4.084297235017482e-06, | |
| "loss": 0.2192, | |
| "step": 510, | |
| "step_loss": 0.1773955076932907 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 1.0601343367693614, | |
| "kl": 0.32609421014785767, | |
| "learning_rate": 4.0809371094125635e-06, | |
| "loss": 0.2082, | |
| "step": 511, | |
| "step_loss": 0.1615651398897171 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "grad_norm": 1.1354172849507647, | |
| "kl": 0.3583594858646393, | |
| "learning_rate": 4.077572411686277e-06, | |
| "loss": 0.2119, | |
| "step": 512, | |
| "step_loss": 0.21853014826774597 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "grad_norm": 1.1763065280060416, | |
| "kl": 0.34837618470191956, | |
| "learning_rate": 4.07420315339725e-06, | |
| "loss": 0.2182, | |
| "step": 513, | |
| "step_loss": 0.23535805940628052 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "grad_norm": 1.1126977629565107, | |
| "kl": 0.43193358182907104, | |
| "learning_rate": 4.070829346119778e-06, | |
| "loss": 0.2082, | |
| "step": 514, | |
| "step_loss": 0.2645440101623535 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "grad_norm": 1.0882216657003507, | |
| "kl": 0.29184776544570923, | |
| "learning_rate": 4.06745100144378e-06, | |
| "loss": 0.21, | |
| "step": 515, | |
| "step_loss": 0.18505370616912842 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 1.0968084081449223, | |
| "kl": 0.32751548290252686, | |
| "learning_rate": 4.064068130974767e-06, | |
| "loss": 0.2079, | |
| "step": 516, | |
| "step_loss": 0.20232903957366943 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 1.099644580772103, | |
| "kl": 0.37286919355392456, | |
| "learning_rate": 4.060680746333793e-06, | |
| "loss": 0.2085, | |
| "step": 517, | |
| "step_loss": 0.23085977137088776 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 1.1121872730260256, | |
| "kl": 0.25646811723709106, | |
| "learning_rate": 4.057288859157423e-06, | |
| "loss": 0.2174, | |
| "step": 518, | |
| "step_loss": 0.16534452140331268 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "grad_norm": 1.081954078814375, | |
| "kl": 0.332968145608902, | |
| "learning_rate": 4.053892481097686e-06, | |
| "loss": 0.2119, | |
| "step": 519, | |
| "step_loss": 0.22676271200180054 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "grad_norm": 1.0690195878534157, | |
| "kl": 0.38490551710128784, | |
| "learning_rate": 4.050491623822041e-06, | |
| "loss": 0.2086, | |
| "step": 520, | |
| "step_loss": 0.2052135318517685 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "grad_norm": 1.135011971238312, | |
| "kl": 0.44730523228645325, | |
| "learning_rate": 4.047086299013332e-06, | |
| "loss": 0.21, | |
| "step": 521, | |
| "step_loss": 0.22579625248908997 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "grad_norm": 1.1803940053183595, | |
| "kl": 0.3478749096393585, | |
| "learning_rate": 4.0436765183697516e-06, | |
| "loss": 0.2289, | |
| "step": 522, | |
| "step_loss": 0.20054206252098083 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "grad_norm": 1.06138099620151, | |
| "kl": 0.35531559586524963, | |
| "learning_rate": 4.040262293604799e-06, | |
| "loss": 0.2117, | |
| "step": 523, | |
| "step_loss": 0.23341700434684753 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "grad_norm": 1.131403790342062, | |
| "kl": 0.36995428800582886, | |
| "learning_rate": 4.036843636447242e-06, | |
| "loss": 0.2148, | |
| "step": 524, | |
| "step_loss": 0.23521539568901062 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "grad_norm": 1.0809651297230498, | |
| "kl": 0.33512529730796814, | |
| "learning_rate": 4.0334205586410706e-06, | |
| "loss": 0.2239, | |
| "step": 525, | |
| "step_loss": 0.23108862340450287 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "grad_norm": 0.9644089850533444, | |
| "kl": 0.3100147843360901, | |
| "learning_rate": 4.029993071945465e-06, | |
| "loss": 0.2021, | |
| "step": 526, | |
| "step_loss": 0.16294045746326447 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "grad_norm": 1.14530814951946, | |
| "kl": 0.4870828688144684, | |
| "learning_rate": 4.026561188134749e-06, | |
| "loss": 0.2214, | |
| "step": 527, | |
| "step_loss": 0.23596243560314178 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "grad_norm": 1.1472820386014453, | |
| "kl": 0.4136597216129303, | |
| "learning_rate": 4.023124918998353e-06, | |
| "loss": 0.2209, | |
| "step": 528, | |
| "step_loss": 0.2301231026649475 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 1.123885543146704, | |
| "kl": 0.3524470925331116, | |
| "learning_rate": 4.01968427634077e-06, | |
| "loss": 0.2253, | |
| "step": 529, | |
| "step_loss": 0.21960632503032684 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 0.9967913335410991, | |
| "kl": 0.28382453322410583, | |
| "learning_rate": 4.016239271981519e-06, | |
| "loss": 0.1989, | |
| "step": 530, | |
| "step_loss": 0.1486484706401825 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 1.0902910989337662, | |
| "kl": 0.38825753331184387, | |
| "learning_rate": 4.012789917755102e-06, | |
| "loss": 0.2212, | |
| "step": 531, | |
| "step_loss": 0.19914306700229645 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 1.0173576760514342, | |
| "kl": 0.3644031584262848, | |
| "learning_rate": 4.0093362255109645e-06, | |
| "loss": 0.2037, | |
| "step": 532, | |
| "step_loss": 0.2210751622915268 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "grad_norm": 1.1991664117084275, | |
| "kl": 0.38465866446495056, | |
| "learning_rate": 4.0058782071134544e-06, | |
| "loss": 0.2196, | |
| "step": 533, | |
| "step_loss": 0.20659992098808289 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "grad_norm": 1.1662242070255937, | |
| "kl": 0.4157981276512146, | |
| "learning_rate": 4.002415874441778e-06, | |
| "loss": 0.2163, | |
| "step": 534, | |
| "step_loss": 0.23980651795864105 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "grad_norm": 1.0904814655761281, | |
| "kl": 0.36056768894195557, | |
| "learning_rate": 3.998949239389968e-06, | |
| "loss": 0.2117, | |
| "step": 535, | |
| "step_loss": 0.21642203629016876 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "grad_norm": 1.0852927276607216, | |
| "kl": 0.3154537081718445, | |
| "learning_rate": 3.995478313866832e-06, | |
| "loss": 0.2147, | |
| "step": 536, | |
| "step_loss": 0.2088639885187149 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "grad_norm": 1.0681140345975404, | |
| "kl": 0.423949658870697, | |
| "learning_rate": 3.992003109795918e-06, | |
| "loss": 0.231, | |
| "step": 537, | |
| "step_loss": 0.21801121532917023 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "grad_norm": 1.076997180877474, | |
| "kl": 0.39057889580726624, | |
| "learning_rate": 3.9885236391154725e-06, | |
| "loss": 0.2214, | |
| "step": 538, | |
| "step_loss": 0.22681710124015808 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "grad_norm": 1.098273331215662, | |
| "kl": 0.4600808620452881, | |
| "learning_rate": 3.985039913778398e-06, | |
| "loss": 0.2204, | |
| "step": 539, | |
| "step_loss": 0.27202731370925903 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "grad_norm": 1.1677412111257455, | |
| "kl": 0.38847512006759644, | |
| "learning_rate": 3.981551945752214e-06, | |
| "loss": 0.2294, | |
| "step": 540, | |
| "step_loss": 0.22447596490383148 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "grad_norm": 1.014321414944766, | |
| "kl": 0.37826013565063477, | |
| "learning_rate": 3.978059747019014e-06, | |
| "loss": 0.2029, | |
| "step": 541, | |
| "step_loss": 0.24410393834114075 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "grad_norm": 1.0948405619058077, | |
| "kl": 0.3952583074569702, | |
| "learning_rate": 3.974563329575426e-06, | |
| "loss": 0.2234, | |
| "step": 542, | |
| "step_loss": 0.23489505052566528 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 1.1174386949172748, | |
| "kl": 0.38856184482574463, | |
| "learning_rate": 3.971062705432569e-06, | |
| "loss": 0.2169, | |
| "step": 543, | |
| "step_loss": 0.23416107892990112 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 1.181312376726803, | |
| "kl": 0.42837756872177124, | |
| "learning_rate": 3.967557886616014e-06, | |
| "loss": 0.2253, | |
| "step": 544, | |
| "step_loss": 0.24485422670841217 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 1.0976730155343217, | |
| "kl": 0.3416699767112732, | |
| "learning_rate": 3.964048885165741e-06, | |
| "loss": 0.2289, | |
| "step": 545, | |
| "step_loss": 0.2253103256225586 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "grad_norm": 1.1975345651778746, | |
| "kl": 0.3401637673377991, | |
| "learning_rate": 3.9605357131360994e-06, | |
| "loss": 0.2249, | |
| "step": 546, | |
| "step_loss": 0.24569852650165558 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "grad_norm": 1.0984507770197158, | |
| "kl": 0.33863207697868347, | |
| "learning_rate": 3.957018382595765e-06, | |
| "loss": 0.2156, | |
| "step": 547, | |
| "step_loss": 0.24580451846122742 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "grad_norm": 1.1716298673466186, | |
| "kl": 0.33466053009033203, | |
| "learning_rate": 3.953496905627702e-06, | |
| "loss": 0.2147, | |
| "step": 548, | |
| "step_loss": 0.18085306882858276 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "grad_norm": 1.1734884852928138, | |
| "kl": 0.34325727820396423, | |
| "learning_rate": 3.949971294329112e-06, | |
| "loss": 0.2378, | |
| "step": 549, | |
| "step_loss": 0.23332083225250244 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "grad_norm": 1.136385000201455, | |
| "kl": 0.36762735247612, | |
| "learning_rate": 3.946441560811407e-06, | |
| "loss": 0.2235, | |
| "step": 550, | |
| "step_loss": 0.22139766812324524 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "grad_norm": 1.0264760838893863, | |
| "kl": 0.3464069366455078, | |
| "learning_rate": 3.942907717200154e-06, | |
| "loss": 0.216, | |
| "step": 551, | |
| "step_loss": 0.19486932456493378 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "grad_norm": 1.058251659960203, | |
| "kl": 0.352877140045166, | |
| "learning_rate": 3.939369775635042e-06, | |
| "loss": 0.2112, | |
| "step": 552, | |
| "step_loss": 0.2148822396993637 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "grad_norm": 1.072714998063151, | |
| "kl": 0.33858194947242737, | |
| "learning_rate": 3.935827748269837e-06, | |
| "loss": 0.2112, | |
| "step": 553, | |
| "step_loss": 0.20188114047050476 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "grad_norm": 1.0835941815255097, | |
| "kl": 0.3523325026035309, | |
| "learning_rate": 3.932281647272341e-06, | |
| "loss": 0.2125, | |
| "step": 554, | |
| "step_loss": 0.19613249599933624 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "grad_norm": 1.1022095639794895, | |
| "kl": 0.41155239939689636, | |
| "learning_rate": 3.9287314848243516e-06, | |
| "loss": 0.2171, | |
| "step": 555, | |
| "step_loss": 0.2243964970111847 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "grad_norm": 1.0886715974181185, | |
| "kl": 0.32835161685943604, | |
| "learning_rate": 3.925177273121613e-06, | |
| "loss": 0.2093, | |
| "step": 556, | |
| "step_loss": 0.19310572743415833 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "grad_norm": 1.0796872927034789, | |
| "kl": 0.3089352548122406, | |
| "learning_rate": 3.921619024373787e-06, | |
| "loss": 0.2044, | |
| "step": 557, | |
| "step_loss": 0.19184894859790802 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "grad_norm": 1.0718596059733634, | |
| "kl": 0.35348087549209595, | |
| "learning_rate": 3.918056750804397e-06, | |
| "loss": 0.2102, | |
| "step": 558, | |
| "step_loss": 0.1941283494234085 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "grad_norm": 1.146403256199632, | |
| "kl": 0.339926540851593, | |
| "learning_rate": 3.914490464650798e-06, | |
| "loss": 0.214, | |
| "step": 559, | |
| "step_loss": 0.19512970745563507 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "grad_norm": 1.104285223312089, | |
| "kl": 0.42092275619506836, | |
| "learning_rate": 3.910920178164127e-06, | |
| "loss": 0.2238, | |
| "step": 560, | |
| "step_loss": 0.2336612045764923 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "grad_norm": 1.0272793405162493, | |
| "kl": 0.3618415892124176, | |
| "learning_rate": 3.907345903609264e-06, | |
| "loss": 0.2193, | |
| "step": 561, | |
| "step_loss": 0.2407916635274887 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "grad_norm": 1.105176543312791, | |
| "kl": 0.3805115818977356, | |
| "learning_rate": 3.903767653264787e-06, | |
| "loss": 0.2085, | |
| "step": 562, | |
| "step_loss": 0.24206788837909698 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "grad_norm": 1.0939508934811706, | |
| "kl": 0.36903828382492065, | |
| "learning_rate": 3.900185439422934e-06, | |
| "loss": 0.2092, | |
| "step": 563, | |
| "step_loss": 0.16102567315101624 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "grad_norm": 1.0506794536305846, | |
| "kl": 0.3899340033531189, | |
| "learning_rate": 3.896599274389558e-06, | |
| "loss": 0.208, | |
| "step": 564, | |
| "step_loss": 0.22886879742145538 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "grad_norm": 1.0301032554040463, | |
| "kl": 0.348848819732666, | |
| "learning_rate": 3.893009170484086e-06, | |
| "loss": 0.2182, | |
| "step": 565, | |
| "step_loss": 0.23902392387390137 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "grad_norm": 1.0779620821495426, | |
| "kl": 0.34697240591049194, | |
| "learning_rate": 3.889415140039473e-06, | |
| "loss": 0.2148, | |
| "step": 566, | |
| "step_loss": 0.1859664022922516 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "grad_norm": 1.018795902879667, | |
| "kl": 0.38165339827537537, | |
| "learning_rate": 3.8858171954021695e-06, | |
| "loss": 0.2135, | |
| "step": 567, | |
| "step_loss": 0.2154882550239563 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "grad_norm": 1.0495585663185147, | |
| "kl": 0.389653742313385, | |
| "learning_rate": 3.882215348932065e-06, | |
| "loss": 0.2083, | |
| "step": 568, | |
| "step_loss": 0.24652042984962463 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "grad_norm": 1.1620507618203504, | |
| "kl": 0.28063511848449707, | |
| "learning_rate": 3.878609613002456e-06, | |
| "loss": 0.2309, | |
| "step": 569, | |
| "step_loss": 0.21258799731731415 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "grad_norm": 1.0370207351765348, | |
| "kl": 0.478799045085907, | |
| "learning_rate": 3.875e-06, | |
| "loss": 0.2206, | |
| "step": 570, | |
| "step_loss": 0.2444104254245758 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "grad_norm": 1.1178474477754934, | |
| "kl": 0.33863985538482666, | |
| "learning_rate": 3.8713865223246744e-06, | |
| "loss": 0.2015, | |
| "step": 571, | |
| "step_loss": 0.18036487698554993 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "grad_norm": 1.1609234312290915, | |
| "kl": 0.3761056661605835, | |
| "learning_rate": 3.867769192389731e-06, | |
| "loss": 0.2247, | |
| "step": 572, | |
| "step_loss": 0.22269845008850098 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "grad_norm": 1.032185592487916, | |
| "kl": 0.3428119719028473, | |
| "learning_rate": 3.864148022621657e-06, | |
| "loss": 0.2071, | |
| "step": 573, | |
| "step_loss": 0.18176212906837463 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "grad_norm": 1.0666730498083177, | |
| "kl": 0.389988511800766, | |
| "learning_rate": 3.8605230254601275e-06, | |
| "loss": 0.2223, | |
| "step": 574, | |
| "step_loss": 0.2073187381029129 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "grad_norm": 1.0611933920672172, | |
| "kl": 0.4241553246974945, | |
| "learning_rate": 3.856894213357969e-06, | |
| "loss": 0.2166, | |
| "step": 575, | |
| "step_loss": 0.21341772377490997 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "grad_norm": 1.0463821902335075, | |
| "kl": 0.3435147702693939, | |
| "learning_rate": 3.853261598781112e-06, | |
| "loss": 0.209, | |
| "step": 576, | |
| "step_loss": 0.2048221081495285 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "grad_norm": 1.1170695382643887, | |
| "kl": 0.3356623649597168, | |
| "learning_rate": 3.849625194208548e-06, | |
| "loss": 0.2221, | |
| "step": 577, | |
| "step_loss": 0.19540490210056305 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "grad_norm": 0.9978130835660762, | |
| "kl": 0.3102739751338959, | |
| "learning_rate": 3.845985012132291e-06, | |
| "loss": 0.2093, | |
| "step": 578, | |
| "step_loss": 0.1982828974723816 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "grad_norm": 1.1198188896714405, | |
| "kl": 0.36010634899139404, | |
| "learning_rate": 3.842341065057329e-06, | |
| "loss": 0.2115, | |
| "step": 579, | |
| "step_loss": 0.19751714169979095 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "grad_norm": 1.1041808967153213, | |
| "kl": 0.40271279215812683, | |
| "learning_rate": 3.8386933655015855e-06, | |
| "loss": 0.2121, | |
| "step": 580, | |
| "step_loss": 0.20048068463802338 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "grad_norm": 1.0991371658621079, | |
| "kl": 0.3609469532966614, | |
| "learning_rate": 3.8350419259958745e-06, | |
| "loss": 0.2153, | |
| "step": 581, | |
| "step_loss": 0.2352994829416275 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "grad_norm": 1.0699438170139548, | |
| "kl": 0.3842519223690033, | |
| "learning_rate": 3.831386759083857e-06, | |
| "loss": 0.2134, | |
| "step": 582, | |
| "step_loss": 0.22360186278820038 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "grad_norm": 1.0996292681613908, | |
| "kl": 0.3337691128253937, | |
| "learning_rate": 3.827727877322001e-06, | |
| "loss": 0.2195, | |
| "step": 583, | |
| "step_loss": 0.1927204579114914 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "grad_norm": 1.184402994917098, | |
| "kl": 0.33971020579338074, | |
| "learning_rate": 3.824065293279532e-06, | |
| "loss": 0.223, | |
| "step": 584, | |
| "step_loss": 0.18949981033802032 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "grad_norm": 1.0618979276884035, | |
| "kl": 0.44578462839126587, | |
| "learning_rate": 3.820399019538397e-06, | |
| "loss": 0.2167, | |
| "step": 585, | |
| "step_loss": 0.2560417354106903 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "grad_norm": 1.0548994769662545, | |
| "kl": 0.30985498428344727, | |
| "learning_rate": 3.816729068693215e-06, | |
| "loss": 0.2138, | |
| "step": 586, | |
| "step_loss": 0.17075133323669434 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "grad_norm": 1.1046250699328048, | |
| "kl": 0.40717557072639465, | |
| "learning_rate": 3.813055453351242e-06, | |
| "loss": 0.2094, | |
| "step": 587, | |
| "step_loss": 0.2145349383354187 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "grad_norm": 1.0398697406320772, | |
| "kl": 0.39057114720344543, | |
| "learning_rate": 3.809378186132318e-06, | |
| "loss": 0.2124, | |
| "step": 588, | |
| "step_loss": 0.22147606313228607 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "grad_norm": 1.0908270949218593, | |
| "kl": 0.37297523021698, | |
| "learning_rate": 3.805697279668829e-06, | |
| "loss": 0.2194, | |
| "step": 589, | |
| "step_loss": 0.25735077261924744 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "grad_norm": 1.0414554175112045, | |
| "kl": 0.3597789704799652, | |
| "learning_rate": 3.8020127466056638e-06, | |
| "loss": 0.2035, | |
| "step": 590, | |
| "step_loss": 0.19476523995399475 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "grad_norm": 1.042370270883592, | |
| "kl": 0.3284936547279358, | |
| "learning_rate": 3.7983245996001695e-06, | |
| "loss": 0.2099, | |
| "step": 591, | |
| "step_loss": 0.17381024360656738 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "grad_norm": 1.1212450649744, | |
| "kl": 0.29087406396865845, | |
| "learning_rate": 3.7946328513221058e-06, | |
| "loss": 0.225, | |
| "step": 592, | |
| "step_loss": 0.2309185266494751 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "grad_norm": 1.1047849123898952, | |
| "kl": 0.3819228410720825, | |
| "learning_rate": 3.7909375144536077e-06, | |
| "loss": 0.2209, | |
| "step": 593, | |
| "step_loss": 0.2588649094104767 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "grad_norm": 1.0515086411036276, | |
| "kl": 0.3881183862686157, | |
| "learning_rate": 3.7872386016891342e-06, | |
| "loss": 0.2054, | |
| "step": 594, | |
| "step_loss": 0.1839597374200821 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "grad_norm": 1.0871033893493067, | |
| "kl": 0.38226088881492615, | |
| "learning_rate": 3.783536125735431e-06, | |
| "loss": 0.2089, | |
| "step": 595, | |
| "step_loss": 0.20572155714035034 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "grad_norm": 1.1196016623588247, | |
| "kl": 0.34919315576553345, | |
| "learning_rate": 3.7798300993114835e-06, | |
| "loss": 0.213, | |
| "step": 596, | |
| "step_loss": 0.19187521934509277 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 1.039644651891598, | |
| "kl": 0.2882639169692993, | |
| "learning_rate": 3.7761205351484732e-06, | |
| "loss": 0.212, | |
| "step": 597, | |
| "step_loss": 0.1760086715221405 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 1.1245600714414823, | |
| "kl": 0.37673628330230713, | |
| "learning_rate": 3.7724074459897346e-06, | |
| "loss": 0.22, | |
| "step": 598, | |
| "step_loss": 0.1975017637014389 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 1.1311382944584354, | |
| "kl": 0.38365456461906433, | |
| "learning_rate": 3.7686908445907126e-06, | |
| "loss": 0.219, | |
| "step": 599, | |
| "step_loss": 0.21963948011398315 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 1.078085850267425, | |
| "kl": 0.382407009601593, | |
| "learning_rate": 3.7649707437189178e-06, | |
| "loss": 0.2195, | |
| "step": 600, | |
| "step_loss": 0.23141171038150787 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "grad_norm": 1.0427847639241463, | |
| "kl": 0.37707236409187317, | |
| "learning_rate": 3.761247156153881e-06, | |
| "loss": 0.2043, | |
| "step": 601, | |
| "step_loss": 0.21038171648979187 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "grad_norm": 1.0896289777889905, | |
| "kl": 0.36756861209869385, | |
| "learning_rate": 3.7575200946871104e-06, | |
| "loss": 0.2137, | |
| "step": 602, | |
| "step_loss": 0.23161782324314117 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "grad_norm": 1.0273854062297945, | |
| "kl": 0.27731993794441223, | |
| "learning_rate": 3.7537895721220513e-06, | |
| "loss": 0.2046, | |
| "step": 603, | |
| "step_loss": 0.23568624258041382 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "grad_norm": 1.0737448045607119, | |
| "kl": 0.3347272574901581, | |
| "learning_rate": 3.7500556012740343e-06, | |
| "loss": 0.2212, | |
| "step": 604, | |
| "step_loss": 0.1964089274406433 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "grad_norm": 1.0946594680520594, | |
| "kl": 0.3800688683986664, | |
| "learning_rate": 3.746318194970239e-06, | |
| "loss": 0.2176, | |
| "step": 605, | |
| "step_loss": 0.22273258864879608 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "grad_norm": 1.1129855917797444, | |
| "kl": 0.3622528314590454, | |
| "learning_rate": 3.7425773660496453e-06, | |
| "loss": 0.2186, | |
| "step": 606, | |
| "step_loss": 0.2413870245218277 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "grad_norm": 1.145527181762428, | |
| "kl": 0.3814672827720642, | |
| "learning_rate": 3.7388331273629914e-06, | |
| "loss": 0.2337, | |
| "step": 607, | |
| "step_loss": 0.23212337493896484 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "grad_norm": 1.0984629724406025, | |
| "kl": 0.2834460735321045, | |
| "learning_rate": 3.7350854917727287e-06, | |
| "loss": 0.2235, | |
| "step": 608, | |
| "step_loss": 0.17759786546230316 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "grad_norm": 1.1001711709509128, | |
| "kl": 0.3364371359348297, | |
| "learning_rate": 3.7313344721529765e-06, | |
| "loss": 0.2146, | |
| "step": 609, | |
| "step_loss": 0.22853884100914001 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "grad_norm": 1.068141085066643, | |
| "kl": 0.3433828353881836, | |
| "learning_rate": 3.727580081389481e-06, | |
| "loss": 0.2145, | |
| "step": 610, | |
| "step_loss": 0.21322953701019287 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 1.0787297225275188, | |
| "kl": 0.42644378542900085, | |
| "learning_rate": 3.72382233237957e-06, | |
| "loss": 0.2158, | |
| "step": 611, | |
| "step_loss": 0.2728964388370514 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 1.0596709735361247, | |
| "kl": 0.36292925477027893, | |
| "learning_rate": 3.7200612380321034e-06, | |
| "loss": 0.2096, | |
| "step": 612, | |
| "step_loss": 0.196391299366951 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 0.9958580635836058, | |
| "kl": 0.3467836081981659, | |
| "learning_rate": 3.7162968112674387e-06, | |
| "loss": 0.2074, | |
| "step": 613, | |
| "step_loss": 0.21914325654506683 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "grad_norm": 1.1091567079931492, | |
| "kl": 0.39706793427467346, | |
| "learning_rate": 3.7125290650173768e-06, | |
| "loss": 0.2148, | |
| "step": 614, | |
| "step_loss": 0.23869748413562775 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "grad_norm": 1.0845278159536325, | |
| "kl": 0.42876726388931274, | |
| "learning_rate": 3.708758012225125e-06, | |
| "loss": 0.217, | |
| "step": 615, | |
| "step_loss": 0.24647286534309387 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "grad_norm": 1.1069509053065003, | |
| "kl": 0.31808775663375854, | |
| "learning_rate": 3.7049836658452474e-06, | |
| "loss": 0.2241, | |
| "step": 616, | |
| "step_loss": 0.21536415815353394 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "grad_norm": 1.0997318179324385, | |
| "kl": 0.45750346779823303, | |
| "learning_rate": 3.701206038843623e-06, | |
| "loss": 0.2182, | |
| "step": 617, | |
| "step_loss": 0.23618176579475403 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "grad_norm": 1.098642449230739, | |
| "kl": 0.3418111801147461, | |
| "learning_rate": 3.697425144197402e-06, | |
| "loss": 0.2181, | |
| "step": 618, | |
| "step_loss": 0.21152858436107635 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "grad_norm": 1.105833997472311, | |
| "kl": 0.35120660066604614, | |
| "learning_rate": 3.6936409948949563e-06, | |
| "loss": 0.2235, | |
| "step": 619, | |
| "step_loss": 0.25562742352485657 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "grad_norm": 1.0909382099858478, | |
| "kl": 0.38127419352531433, | |
| "learning_rate": 3.689853603935843e-06, | |
| "loss": 0.2023, | |
| "step": 620, | |
| "step_loss": 0.21061675250530243 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "grad_norm": 1.04203667585934, | |
| "kl": 0.3696785569190979, | |
| "learning_rate": 3.686062984330752e-06, | |
| "loss": 0.2084, | |
| "step": 621, | |
| "step_loss": 0.2408429980278015 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "grad_norm": 1.0740199095756, | |
| "kl": 0.35119834542274475, | |
| "learning_rate": 3.682269149101465e-06, | |
| "loss": 0.2249, | |
| "step": 622, | |
| "step_loss": 0.2259160876274109 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "grad_norm": 1.1244116476489243, | |
| "kl": 0.39911478757858276, | |
| "learning_rate": 3.6784721112808107e-06, | |
| "loss": 0.2076, | |
| "step": 623, | |
| "step_loss": 0.2444825917482376 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "grad_norm": 1.084381841698426, | |
| "kl": 0.364761620759964, | |
| "learning_rate": 3.6746718839126195e-06, | |
| "loss": 0.2136, | |
| "step": 624, | |
| "step_loss": 0.21551458537578583 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "grad_norm": 1.1078473708417569, | |
| "kl": 0.44796106219291687, | |
| "learning_rate": 3.6708684800516786e-06, | |
| "loss": 0.2175, | |
| "step": 625, | |
| "step_loss": 0.219948410987854 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "grad_norm": 1.0214975768306558, | |
| "kl": 0.2986973226070404, | |
| "learning_rate": 3.6670619127636865e-06, | |
| "loss": 0.2043, | |
| "step": 626, | |
| "step_loss": 0.16981080174446106 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "grad_norm": 1.0733872403257325, | |
| "kl": 0.37517714500427246, | |
| "learning_rate": 3.663252195125211e-06, | |
| "loss": 0.2169, | |
| "step": 627, | |
| "step_loss": 0.21208983659744263 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "grad_norm": 1.1239709783885117, | |
| "kl": 0.38882941007614136, | |
| "learning_rate": 3.6594393402236405e-06, | |
| "loss": 0.2128, | |
| "step": 628, | |
| "step_loss": 0.2071652114391327 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "grad_norm": 1.0181078127118095, | |
| "kl": 0.35665225982666016, | |
| "learning_rate": 3.655623361157141e-06, | |
| "loss": 0.2065, | |
| "step": 629, | |
| "step_loss": 0.21186676621437073 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "grad_norm": 1.0980774285222674, | |
| "kl": 0.33140304684638977, | |
| "learning_rate": 3.65180427103461e-06, | |
| "loss": 0.216, | |
| "step": 630, | |
| "step_loss": 0.23446890711784363 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "grad_norm": 1.1533939812862526, | |
| "kl": 0.3695037364959717, | |
| "learning_rate": 3.647982082975635e-06, | |
| "loss": 0.2227, | |
| "step": 631, | |
| "step_loss": 0.2064458131790161 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "grad_norm": 1.0607104236817797, | |
| "kl": 0.3754327893257141, | |
| "learning_rate": 3.6441568101104434e-06, | |
| "loss": 0.2074, | |
| "step": 632, | |
| "step_loss": 0.21298718452453613 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "grad_norm": 1.1080406855173834, | |
| "kl": 0.372994601726532, | |
| "learning_rate": 3.64032846557986e-06, | |
| "loss": 0.2371, | |
| "step": 633, | |
| "step_loss": 0.22076019644737244 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "grad_norm": 1.061998718883708, | |
| "kl": 0.36003583669662476, | |
| "learning_rate": 3.6364970625352613e-06, | |
| "loss": 0.2106, | |
| "step": 634, | |
| "step_loss": 0.236919105052948 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "grad_norm": 1.1629557628795668, | |
| "kl": 0.40046006441116333, | |
| "learning_rate": 3.6326626141385323e-06, | |
| "loss": 0.2194, | |
| "step": 635, | |
| "step_loss": 0.21533732116222382 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "grad_norm": 1.1308113048397246, | |
| "kl": 0.35290196537971497, | |
| "learning_rate": 3.6288251335620185e-06, | |
| "loss": 0.2356, | |
| "step": 636, | |
| "step_loss": 0.25752317905426025 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "grad_norm": 1.024178113211246, | |
| "kl": 0.367868036031723, | |
| "learning_rate": 3.6249846339884807e-06, | |
| "loss": 0.2132, | |
| "step": 637, | |
| "step_loss": 0.2091902643442154 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 1.0233015596218147, | |
| "kl": 0.36171606183052063, | |
| "learning_rate": 3.621141128611053e-06, | |
| "loss": 0.211, | |
| "step": 638, | |
| "step_loss": 0.1969766914844513 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 1.0521376056017973, | |
| "kl": 0.3328798711299896, | |
| "learning_rate": 3.617294630633193e-06, | |
| "loss": 0.2148, | |
| "step": 639, | |
| "step_loss": 0.20545676350593567 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 1.1495041143489746, | |
| "kl": 0.31849485635757446, | |
| "learning_rate": 3.613445153268641e-06, | |
| "loss": 0.2273, | |
| "step": 640, | |
| "step_loss": 0.19370122253894806 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 1.107261874903731, | |
| "kl": 0.35327592492103577, | |
| "learning_rate": 3.6095927097413697e-06, | |
| "loss": 0.2289, | |
| "step": 641, | |
| "step_loss": 0.19487443566322327 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "grad_norm": 1.087476278188833, | |
| "kl": 0.37533411383628845, | |
| "learning_rate": 3.6057373132855426e-06, | |
| "loss": 0.212, | |
| "step": 642, | |
| "step_loss": 0.20453642308712006 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "grad_norm": 1.0777004622363646, | |
| "kl": 0.3517782390117645, | |
| "learning_rate": 3.6018789771454686e-06, | |
| "loss": 0.2215, | |
| "step": 643, | |
| "step_loss": 0.2225874960422516 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "grad_norm": 1.1041421512665277, | |
| "kl": 0.3960123658180237, | |
| "learning_rate": 3.5980177145755527e-06, | |
| "loss": 0.2156, | |
| "step": 644, | |
| "step_loss": 0.19644665718078613 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "grad_norm": 1.107531345345793, | |
| "kl": 0.3443949818611145, | |
| "learning_rate": 3.5941535388402555e-06, | |
| "loss": 0.218, | |
| "step": 645, | |
| "step_loss": 0.21444953978061676 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "grad_norm": 1.0633655556716426, | |
| "kl": 0.32992398738861084, | |
| "learning_rate": 3.5902864632140417e-06, | |
| "loss": 0.1945, | |
| "step": 646, | |
| "step_loss": 0.17697668075561523 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "grad_norm": 1.0660551981501736, | |
| "kl": 0.3524007201194763, | |
| "learning_rate": 3.5864165009813417e-06, | |
| "loss": 0.2062, | |
| "step": 647, | |
| "step_loss": 0.2261100560426712 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "grad_norm": 1.0902351373924726, | |
| "kl": 0.3847709894180298, | |
| "learning_rate": 3.5825436654365005e-06, | |
| "loss": 0.1977, | |
| "step": 648, | |
| "step_loss": 0.21421456336975098 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "grad_norm": 1.0921552134859336, | |
| "kl": 0.3233397603034973, | |
| "learning_rate": 3.578667969883733e-06, | |
| "loss": 0.2088, | |
| "step": 649, | |
| "step_loss": 0.1856887936592102 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "grad_norm": 1.067332679290162, | |
| "kl": 0.35494789481163025, | |
| "learning_rate": 3.5747894276370792e-06, | |
| "loss": 0.2174, | |
| "step": 650, | |
| "step_loss": 0.2479093074798584 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "grad_norm": 1.1216410520483295, | |
| "kl": 0.3918030261993408, | |
| "learning_rate": 3.5709080520203593e-06, | |
| "loss": 0.2317, | |
| "step": 651, | |
| "step_loss": 0.21458828449249268 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 1.0495414510313243, | |
| "kl": 0.383411705493927, | |
| "learning_rate": 3.5670238563671257e-06, | |
| "loss": 0.2046, | |
| "step": 652, | |
| "step_loss": 0.22937092185020447 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 1.137161738802843, | |
| "kl": 0.39319974184036255, | |
| "learning_rate": 3.563136854020621e-06, | |
| "loss": 0.2304, | |
| "step": 653, | |
| "step_loss": 0.2653224468231201 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 1.1155913828745578, | |
| "kl": 0.4275739789009094, | |
| "learning_rate": 3.5592470583337233e-06, | |
| "loss": 0.2204, | |
| "step": 654, | |
| "step_loss": 0.2726008892059326 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "grad_norm": 1.0500891579513196, | |
| "kl": 0.3193601071834564, | |
| "learning_rate": 3.5553544826689148e-06, | |
| "loss": 0.2075, | |
| "step": 655, | |
| "step_loss": 0.18977577984333038 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "grad_norm": 1.0438682449198995, | |
| "kl": 0.3925110697746277, | |
| "learning_rate": 3.551459140398221e-06, | |
| "loss": 0.2184, | |
| "step": 656, | |
| "step_loss": 0.24175623059272766 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "grad_norm": 1.1175988513740611, | |
| "kl": 0.3625691533088684, | |
| "learning_rate": 3.547561044903175e-06, | |
| "loss": 0.2138, | |
| "step": 657, | |
| "step_loss": 0.21793964505195618 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "grad_norm": 1.0707034837846736, | |
| "kl": 0.3854144215583801, | |
| "learning_rate": 3.5436602095747665e-06, | |
| "loss": 0.2168, | |
| "step": 658, | |
| "step_loss": 0.2395915985107422 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "grad_norm": 1.1404652940954114, | |
| "kl": 0.3602861166000366, | |
| "learning_rate": 3.539756647813398e-06, | |
| "loss": 0.2356, | |
| "step": 659, | |
| "step_loss": 0.21894899010658264 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "grad_norm": 0.9838424861989383, | |
| "kl": 0.36652958393096924, | |
| "learning_rate": 3.535850373028839e-06, | |
| "loss": 0.1979, | |
| "step": 660, | |
| "step_loss": 0.20045427978038788 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "grad_norm": 1.095329737833727, | |
| "kl": 0.3199812173843384, | |
| "learning_rate": 3.5319413986401753e-06, | |
| "loss": 0.2183, | |
| "step": 661, | |
| "step_loss": 0.26433855295181274 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "grad_norm": 1.053846419483373, | |
| "kl": 0.4188472032546997, | |
| "learning_rate": 3.5280297380757692e-06, | |
| "loss": 0.2146, | |
| "step": 662, | |
| "step_loss": 0.2612619400024414 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "grad_norm": 1.0686913624789676, | |
| "kl": 0.3134489357471466, | |
| "learning_rate": 3.524115404773213e-06, | |
| "loss": 0.2118, | |
| "step": 663, | |
| "step_loss": 0.18129369616508484 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "grad_norm": 1.0608198050048145, | |
| "kl": 0.34840127825737, | |
| "learning_rate": 3.5201984121792753e-06, | |
| "loss": 0.2121, | |
| "step": 664, | |
| "step_loss": 0.18985848128795624 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 1.0981997640992105, | |
| "kl": 0.3330709934234619, | |
| "learning_rate": 3.516278773749863e-06, | |
| "loss": 0.2064, | |
| "step": 665, | |
| "step_loss": 0.2144535481929779 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 1.1140559930597989, | |
| "kl": 0.3608890771865845, | |
| "learning_rate": 3.512356502949973e-06, | |
| "loss": 0.2141, | |
| "step": 666, | |
| "step_loss": 0.24026130139827728 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 1.0633605809716729, | |
| "kl": 0.32477813959121704, | |
| "learning_rate": 3.508431613253644e-06, | |
| "loss": 0.2104, | |
| "step": 667, | |
| "step_loss": 0.19436398148536682 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 0.9553573137927444, | |
| "kl": 0.33438920974731445, | |
| "learning_rate": 3.5045041181439117e-06, | |
| "loss": 0.1949, | |
| "step": 668, | |
| "step_loss": 0.17747747898101807 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "grad_norm": 1.1177038853329584, | |
| "kl": 0.3582827150821686, | |
| "learning_rate": 3.500574031112759e-06, | |
| "loss": 0.223, | |
| "step": 669, | |
| "step_loss": 0.21867407858371735 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "grad_norm": 1.1246746241729981, | |
| "kl": 0.3887527585029602, | |
| "learning_rate": 3.496641365661079e-06, | |
| "loss": 0.2136, | |
| "step": 670, | |
| "step_loss": 0.20844541490077972 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "grad_norm": 1.1013777287048612, | |
| "kl": 0.36839255690574646, | |
| "learning_rate": 3.4927061352986163e-06, | |
| "loss": 0.2264, | |
| "step": 671, | |
| "step_loss": 0.23754476010799408 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "grad_norm": 1.0736030062333497, | |
| "kl": 0.37584322690963745, | |
| "learning_rate": 3.4887683535439305e-06, | |
| "loss": 0.2149, | |
| "step": 672, | |
| "step_loss": 0.22229911386966705 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "grad_norm": 1.1162319765419337, | |
| "kl": 0.38228553533554077, | |
| "learning_rate": 3.484828033924343e-06, | |
| "loss": 0.2289, | |
| "step": 673, | |
| "step_loss": 0.24069103598594666 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "grad_norm": 1.0677391357940962, | |
| "kl": 0.37927836179733276, | |
| "learning_rate": 3.4808851899758967e-06, | |
| "loss": 0.2147, | |
| "step": 674, | |
| "step_loss": 0.19201472401618958 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "grad_norm": 1.0515478096285862, | |
| "kl": 0.3583109974861145, | |
| "learning_rate": 3.476939835243304e-06, | |
| "loss": 0.2151, | |
| "step": 675, | |
| "step_loss": 0.22411265969276428 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "grad_norm": 1.0225232678263274, | |
| "kl": 0.285269558429718, | |
| "learning_rate": 3.4729919832799036e-06, | |
| "loss": 0.199, | |
| "step": 676, | |
| "step_loss": 0.20125800371170044 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "grad_norm": 1.0899879606264502, | |
| "kl": 0.34659573435783386, | |
| "learning_rate": 3.46904164764761e-06, | |
| "loss": 0.2159, | |
| "step": 677, | |
| "step_loss": 0.21278782188892365 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "grad_norm": 1.0775849606901395, | |
| "kl": 0.3956890106201172, | |
| "learning_rate": 3.4650888419168748e-06, | |
| "loss": 0.2092, | |
| "step": 678, | |
| "step_loss": 0.2175438106060028 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.9947578897117778, | |
| "kl": 0.30330708622932434, | |
| "learning_rate": 3.4611335796666307e-06, | |
| "loss": 0.2031, | |
| "step": 679, | |
| "step_loss": 0.19578225910663605 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.0764381231649376, | |
| "kl": 0.35529449582099915, | |
| "learning_rate": 3.457175874484251e-06, | |
| "loss": 0.2005, | |
| "step": 680, | |
| "step_loss": 0.19128616154193878 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_loss": 1.6508517265319824, | |
| "eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_runtime": 14.6015, | |
| "eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_samples_per_second": 6.849, | |
| "eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_steps_per_second": 0.89, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.925978313670487, | |
| "kl": 0.4176456928253174, | |
| "learning_rate": 3.4532157399655014e-06, | |
| "loss": 0.1679, | |
| "step": 681, | |
| "step_loss": 1.3116686344146729 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "grad_norm": 0.8940649274695096, | |
| "kl": 0.33792364597320557, | |
| "learning_rate": 3.4492531897144923e-06, | |
| "loss": 0.162, | |
| "step": 682, | |
| "step_loss": 0.14112232625484467 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "grad_norm": 0.8396441200218108, | |
| "kl": 0.3699738383293152, | |
| "learning_rate": 3.445288237343632e-06, | |
| "loss": 0.144, | |
| "step": 683, | |
| "step_loss": 0.1407199501991272 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "grad_norm": 0.9100745741579815, | |
| "kl": 0.39846813678741455, | |
| "learning_rate": 3.441320896473583e-06, | |
| "loss": 0.1509, | |
| "step": 684, | |
| "step_loss": 0.16023500263690948 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "grad_norm": 0.9183176716722681, | |
| "kl": 0.4535992741584778, | |
| "learning_rate": 3.4373511807332115e-06, | |
| "loss": 0.1555, | |
| "step": 685, | |
| "step_loss": 0.15503008663654327 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "grad_norm": 0.8961989752558022, | |
| "kl": 0.45110467076301575, | |
| "learning_rate": 3.433379103759542e-06, | |
| "loss": 0.1564, | |
| "step": 686, | |
| "step_loss": 0.16036511957645416 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "grad_norm": 0.8795040894782503, | |
| "kl": 0.3965161442756653, | |
| "learning_rate": 3.4294046791977096e-06, | |
| "loss": 0.146, | |
| "step": 687, | |
| "step_loss": 0.13043992221355438 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "grad_norm": 0.9844997417794465, | |
| "kl": 0.4604860246181488, | |
| "learning_rate": 3.4254279207009163e-06, | |
| "loss": 0.147, | |
| "step": 688, | |
| "step_loss": 0.15021522343158722 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "grad_norm": 0.9757415983330471, | |
| "kl": 0.4833226501941681, | |
| "learning_rate": 3.4214488419303806e-06, | |
| "loss": 0.1415, | |
| "step": 689, | |
| "step_loss": 0.14150793850421906 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "grad_norm": 1.0650937173916857, | |
| "kl": 0.4186987280845642, | |
| "learning_rate": 3.4174674565552902e-06, | |
| "loss": 0.1507, | |
| "step": 690, | |
| "step_loss": 0.1668976992368698 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "grad_norm": 1.1276622372563851, | |
| "kl": 0.45650917291641235, | |
| "learning_rate": 3.413483778252759e-06, | |
| "loss": 0.1605, | |
| "step": 691, | |
| "step_loss": 0.16845574975013733 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "grad_norm": 1.1937296491156466, | |
| "kl": 0.4285459518432617, | |
| "learning_rate": 3.4094978207077768e-06, | |
| "loss": 0.1487, | |
| "step": 692, | |
| "step_loss": 0.1462787389755249 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "grad_norm": 1.0212719844193419, | |
| "kl": 0.5114130973815918, | |
| "learning_rate": 3.405509597613163e-06, | |
| "loss": 0.145, | |
| "step": 693, | |
| "step_loss": 0.1645100861787796 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "grad_norm": 1.1028532757980718, | |
| "kl": 0.520036518573761, | |
| "learning_rate": 3.4015191226695203e-06, | |
| "loss": 0.1545, | |
| "step": 694, | |
| "step_loss": 0.17864225804805756 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "grad_norm": 0.9733155555331748, | |
| "kl": 0.4656696915626526, | |
| "learning_rate": 3.397526409585185e-06, | |
| "loss": 0.1407, | |
| "step": 695, | |
| "step_loss": 0.12441332638263702 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "grad_norm": 1.0128686328034844, | |
| "kl": 0.4196836054325104, | |
| "learning_rate": 3.3935314720761864e-06, | |
| "loss": 0.1494, | |
| "step": 696, | |
| "step_loss": 0.12643490731716156 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "grad_norm": 0.9201530198545629, | |
| "kl": 0.46877506375312805, | |
| "learning_rate": 3.389534323866191e-06, | |
| "loss": 0.1399, | |
| "step": 697, | |
| "step_loss": 0.15753169357776642 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "grad_norm": 0.9894624669733002, | |
| "kl": 0.4130990505218506, | |
| "learning_rate": 3.385534978686461e-06, | |
| "loss": 0.1546, | |
| "step": 698, | |
| "step_loss": 0.17007581889629364 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "grad_norm": 1.0155107140429007, | |
| "kl": 0.4775574505329132, | |
| "learning_rate": 3.3815334502758055e-06, | |
| "loss": 0.1489, | |
| "step": 699, | |
| "step_loss": 0.12813322246074677 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "grad_norm": 0.939102091406889, | |
| "kl": 0.41837960481643677, | |
| "learning_rate": 3.3775297523805365e-06, | |
| "loss": 0.1457, | |
| "step": 700, | |
| "step_loss": 0.15080194175243378 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "grad_norm": 0.9499828261780652, | |
| "kl": 0.46936333179473877, | |
| "learning_rate": 3.3735238987544146e-06, | |
| "loss": 0.152, | |
| "step": 701, | |
| "step_loss": 0.13999901711940765 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "grad_norm": 0.9757544264149329, | |
| "kl": 0.39698389172554016, | |
| "learning_rate": 3.369515903158607e-06, | |
| "loss": 0.1507, | |
| "step": 702, | |
| "step_loss": 0.14616172015666962 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "grad_norm": 0.9785785921285085, | |
| "kl": 0.5002555847167969, | |
| "learning_rate": 3.365505779361642e-06, | |
| "loss": 0.1487, | |
| "step": 703, | |
| "step_loss": 0.16984041035175323 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "grad_norm": 0.9897490897977824, | |
| "kl": 0.4246920049190521, | |
| "learning_rate": 3.3614935411393544e-06, | |
| "loss": 0.1589, | |
| "step": 704, | |
| "step_loss": 0.1668768674135208 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "grad_norm": 0.9578147172513255, | |
| "kl": 0.3801954388618469, | |
| "learning_rate": 3.3574792022748463e-06, | |
| "loss": 0.1543, | |
| "step": 705, | |
| "step_loss": 0.13766665756702423 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 0.9662098961881012, | |
| "kl": 0.43419426679611206, | |
| "learning_rate": 3.3534627765584334e-06, | |
| "loss": 0.1512, | |
| "step": 706, | |
| "step_loss": 0.15339550375938416 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 0.9114560763013234, | |
| "kl": 0.5094923377037048, | |
| "learning_rate": 3.3494442777876022e-06, | |
| "loss": 0.1439, | |
| "step": 707, | |
| "step_loss": 0.1555686891078949 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 1.0718532109617787, | |
| "kl": 0.4465751051902771, | |
| "learning_rate": 3.3454237197669607e-06, | |
| "loss": 0.158, | |
| "step": 708, | |
| "step_loss": 0.14179621636867523 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 1.0391746116330387, | |
| "kl": 0.426788866519928, | |
| "learning_rate": 3.341401116308189e-06, | |
| "loss": 0.1514, | |
| "step": 709, | |
| "step_loss": 0.16065070033073425 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "grad_norm": 0.9617041964014664, | |
| "kl": 0.39666134119033813, | |
| "learning_rate": 3.3373764812299954e-06, | |
| "loss": 0.1477, | |
| "step": 710, | |
| "step_loss": 0.13998152315616608 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "grad_norm": 1.0146003249127458, | |
| "kl": 0.3973727226257324, | |
| "learning_rate": 3.333349828358067e-06, | |
| "loss": 0.1507, | |
| "step": 711, | |
| "step_loss": 0.15805211663246155 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "grad_norm": 0.9869352681538952, | |
| "kl": 0.42524510622024536, | |
| "learning_rate": 3.3293211715250222e-06, | |
| "loss": 0.1457, | |
| "step": 712, | |
| "step_loss": 0.1241888552904129 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "grad_norm": 0.9948029176298825, | |
| "kl": 0.40643396973609924, | |
| "learning_rate": 3.325290524570365e-06, | |
| "loss": 0.151, | |
| "step": 713, | |
| "step_loss": 0.16702188551425934 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "grad_norm": 1.0201927757414222, | |
| "kl": 0.48036760091781616, | |
| "learning_rate": 3.321257901340434e-06, | |
| "loss": 0.148, | |
| "step": 714, | |
| "step_loss": 0.1528395265340805 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "grad_norm": 0.9203151178241796, | |
| "kl": 0.5000730156898499, | |
| "learning_rate": 3.317223315688358e-06, | |
| "loss": 0.1463, | |
| "step": 715, | |
| "step_loss": 0.16571396589279175 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "grad_norm": 0.9937513297449968, | |
| "kl": 0.3717888593673706, | |
| "learning_rate": 3.313186781474008e-06, | |
| "loss": 0.1627, | |
| "step": 716, | |
| "step_loss": 0.17453354597091675 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "grad_norm": 1.0398940867357407, | |
| "kl": 0.5787868499755859, | |
| "learning_rate": 3.309148312563945e-06, | |
| "loss": 0.1602, | |
| "step": 717, | |
| "step_loss": 0.2011195421218872 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "grad_norm": 0.9694706558977573, | |
| "kl": 0.4446108639240265, | |
| "learning_rate": 3.3051079228313815e-06, | |
| "loss": 0.1426, | |
| "step": 718, | |
| "step_loss": 0.1449287086725235 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "grad_norm": 0.9431356634721532, | |
| "kl": 0.4195602834224701, | |
| "learning_rate": 3.301065626156125e-06, | |
| "loss": 0.143, | |
| "step": 719, | |
| "step_loss": 0.1555873304605484 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "grad_norm": 0.8839846147038787, | |
| "kl": 0.46536436676979065, | |
| "learning_rate": 3.2970214364245352e-06, | |
| "loss": 0.1425, | |
| "step": 720, | |
| "step_loss": 0.1379953920841217 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "grad_norm": 0.963039812210809, | |
| "kl": 0.4512026011943817, | |
| "learning_rate": 3.2929753675294745e-06, | |
| "loss": 0.1529, | |
| "step": 721, | |
| "step_loss": 0.1425732672214508 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "grad_norm": 1.0145646795924912, | |
| "kl": 0.48650485277175903, | |
| "learning_rate": 3.2889274333702612e-06, | |
| "loss": 0.1592, | |
| "step": 722, | |
| "step_loss": 0.14481569826602936 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "grad_norm": 0.933756411120608, | |
| "kl": 0.45475882291793823, | |
| "learning_rate": 3.284877647852621e-06, | |
| "loss": 0.1427, | |
| "step": 723, | |
| "step_loss": 0.1405959278345108 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "grad_norm": 0.992827171719177, | |
| "kl": 0.4193406403064728, | |
| "learning_rate": 3.2808260248886375e-06, | |
| "loss": 0.1552, | |
| "step": 724, | |
| "step_loss": 0.14025843143463135 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "grad_norm": 1.0734808867236152, | |
| "kl": 0.4013231694698334, | |
| "learning_rate": 3.2767725783967112e-06, | |
| "loss": 0.1584, | |
| "step": 725, | |
| "step_loss": 0.16459117829799652 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "grad_norm": 0.9858158556441436, | |
| "kl": 0.38677310943603516, | |
| "learning_rate": 3.272717322301503e-06, | |
| "loss": 0.1495, | |
| "step": 726, | |
| "step_loss": 0.13374319672584534 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "grad_norm": 0.9541473377690992, | |
| "kl": 0.4354119598865509, | |
| "learning_rate": 3.2686602705338906e-06, | |
| "loss": 0.15, | |
| "step": 727, | |
| "step_loss": 0.15379023551940918 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "grad_norm": 0.9634481367476163, | |
| "kl": 0.4873085021972656, | |
| "learning_rate": 3.26460143703092e-06, | |
| "loss": 0.1493, | |
| "step": 728, | |
| "step_loss": 0.1327579915523529 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "grad_norm": 0.9953389168771356, | |
| "kl": 0.3850947916507721, | |
| "learning_rate": 3.2605408357357624e-06, | |
| "loss": 0.1521, | |
| "step": 729, | |
| "step_loss": 0.15176990628242493 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "grad_norm": 0.9844939750343663, | |
| "kl": 0.44254547357559204, | |
| "learning_rate": 3.2564784805976562e-06, | |
| "loss": 0.1555, | |
| "step": 730, | |
| "step_loss": 0.13903678953647614 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "grad_norm": 0.9886634600223577, | |
| "kl": 0.43694841861724854, | |
| "learning_rate": 3.2524143855718658e-06, | |
| "loss": 0.1565, | |
| "step": 731, | |
| "step_loss": 0.16289514303207397 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "grad_norm": 0.9484758806082827, | |
| "kl": 0.3681836426258087, | |
| "learning_rate": 3.2483485646196362e-06, | |
| "loss": 0.149, | |
| "step": 732, | |
| "step_loss": 0.12337259948253632 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 0.9484232485107367, | |
| "kl": 0.4986894428730011, | |
| "learning_rate": 3.2442810317081377e-06, | |
| "loss": 0.1431, | |
| "step": 733, | |
| "step_loss": 0.13536569476127625 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 0.9693721649945023, | |
| "kl": 0.3540458083152771, | |
| "learning_rate": 3.240211800810422e-06, | |
| "loss": 0.145, | |
| "step": 734, | |
| "step_loss": 0.11736252903938293 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 1.0585370770025628, | |
| "kl": 0.43972048163414, | |
| "learning_rate": 3.2361408859053755e-06, | |
| "loss": 0.1481, | |
| "step": 735, | |
| "step_loss": 0.15756061673164368 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 0.9537128136761093, | |
| "kl": 0.3989626467227936, | |
| "learning_rate": 3.2320683009776693e-06, | |
| "loss": 0.1536, | |
| "step": 736, | |
| "step_loss": 0.13479158282279968 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "grad_norm": 0.9815792466817577, | |
| "kl": 0.41348952054977417, | |
| "learning_rate": 3.2279940600177097e-06, | |
| "loss": 0.151, | |
| "step": 737, | |
| "step_loss": 0.12909382581710815 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "grad_norm": 1.04854074018167, | |
| "kl": 0.4909619688987732, | |
| "learning_rate": 3.223918177021594e-06, | |
| "loss": 0.1475, | |
| "step": 738, | |
| "step_loss": 0.1474575400352478 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "grad_norm": 1.0715274180501446, | |
| "kl": 0.45988544821739197, | |
| "learning_rate": 3.2198406659910596e-06, | |
| "loss": 0.1634, | |
| "step": 739, | |
| "step_loss": 0.16125904023647308 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "grad_norm": 0.9890646713016052, | |
| "kl": 0.42592141032218933, | |
| "learning_rate": 3.2157615409334363e-06, | |
| "loss": 0.1499, | |
| "step": 740, | |
| "step_loss": 0.128481924533844 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "grad_norm": 0.9970158964695769, | |
| "kl": 0.397694855928421, | |
| "learning_rate": 3.2116808158615986e-06, | |
| "loss": 0.1577, | |
| "step": 741, | |
| "step_loss": 0.1626586616039276 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "grad_norm": 1.0147077673417295, | |
| "kl": 0.365612268447876, | |
| "learning_rate": 3.207598504793917e-06, | |
| "loss": 0.1517, | |
| "step": 742, | |
| "step_loss": 0.17115183174610138 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "grad_norm": 0.9665733618485168, | |
| "kl": 0.41971349716186523, | |
| "learning_rate": 3.2035146217542116e-06, | |
| "loss": 0.1443, | |
| "step": 743, | |
| "step_loss": 0.14287295937538147 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "grad_norm": 1.0369419314745194, | |
| "kl": 0.46320030093193054, | |
| "learning_rate": 3.1994291807717027e-06, | |
| "loss": 0.1556, | |
| "step": 744, | |
| "step_loss": 0.16582506895065308 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "grad_norm": 0.9781857084745722, | |
| "kl": 0.369863361120224, | |
| "learning_rate": 3.19534219588096e-06, | |
| "loss": 0.1513, | |
| "step": 745, | |
| "step_loss": 0.1406002938747406 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "grad_norm": 0.9404615672126064, | |
| "kl": 0.44528642296791077, | |
| "learning_rate": 3.19125368112186e-06, | |
| "loss": 0.1519, | |
| "step": 746, | |
| "step_loss": 0.13117581605911255 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 1.0121457881406244, | |
| "kl": 0.39943063259124756, | |
| "learning_rate": 3.187163650539533e-06, | |
| "loss": 0.155, | |
| "step": 747, | |
| "step_loss": 0.1424383670091629 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 1.1106950133092461, | |
| "kl": 0.5139979124069214, | |
| "learning_rate": 3.1830721181843177e-06, | |
| "loss": 0.1448, | |
| "step": 748, | |
| "step_loss": 0.13854211568832397 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 0.9928293479952365, | |
| "kl": 0.4104097783565521, | |
| "learning_rate": 3.1789790981117103e-06, | |
| "loss": 0.1496, | |
| "step": 749, | |
| "step_loss": 0.13803105056285858 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "grad_norm": 0.9864891298529466, | |
| "kl": 0.5884207487106323, | |
| "learning_rate": 3.174884604382317e-06, | |
| "loss": 0.1457, | |
| "step": 750, | |
| "step_loss": 0.1734772026538849 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "grad_norm": 1.019026327979771, | |
| "kl": 0.39733612537384033, | |
| "learning_rate": 3.170788651061811e-06, | |
| "loss": 0.1548, | |
| "step": 751, | |
| "step_loss": 0.15517111122608185 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "grad_norm": 0.9759482931761134, | |
| "kl": 0.39855048060417175, | |
| "learning_rate": 3.1666912522208754e-06, | |
| "loss": 0.143, | |
| "step": 752, | |
| "step_loss": 0.1241132915019989 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "grad_norm": 0.9819776619042838, | |
| "kl": 0.4785412549972534, | |
| "learning_rate": 3.162592421935158e-06, | |
| "loss": 0.1529, | |
| "step": 753, | |
| "step_loss": 0.1676996350288391 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "grad_norm": 0.9567580475400209, | |
| "kl": 0.4445911943912506, | |
| "learning_rate": 3.158492174285229e-06, | |
| "loss": 0.1488, | |
| "step": 754, | |
| "step_loss": 0.16597110033035278 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "grad_norm": 0.9727635152108765, | |
| "kl": 0.420547217130661, | |
| "learning_rate": 3.1543905233565232e-06, | |
| "loss": 0.1464, | |
| "step": 755, | |
| "step_loss": 0.12980793416500092 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "grad_norm": 0.9831060677637677, | |
| "kl": 0.43531182408332825, | |
| "learning_rate": 3.1502874832392984e-06, | |
| "loss": 0.1502, | |
| "step": 756, | |
| "step_loss": 0.19235534965991974 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "grad_norm": 0.9741457522529087, | |
| "kl": 0.4326942563056946, | |
| "learning_rate": 3.146183068028582e-06, | |
| "loss": 0.1521, | |
| "step": 757, | |
| "step_loss": 0.17118844389915466 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "grad_norm": 1.0158603395549721, | |
| "kl": 0.4532112777233124, | |
| "learning_rate": 3.1420772918241286e-06, | |
| "loss": 0.1416, | |
| "step": 758, | |
| "step_loss": 0.1491362452507019 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "grad_norm": 0.9702749401691976, | |
| "kl": 0.41253310441970825, | |
| "learning_rate": 3.1379701687303665e-06, | |
| "loss": 0.152, | |
| "step": 759, | |
| "step_loss": 0.16260167956352234 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "grad_norm": 0.9835143944298296, | |
| "kl": 0.48813024163246155, | |
| "learning_rate": 3.1338617128563505e-06, | |
| "loss": 0.1483, | |
| "step": 760, | |
| "step_loss": 0.14093339443206787 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 0.9673002377624046, | |
| "kl": 0.45352044701576233, | |
| "learning_rate": 3.1297519383157138e-06, | |
| "loss": 0.1444, | |
| "step": 761, | |
| "step_loss": 0.14167660474777222 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 1.0177495156776364, | |
| "kl": 0.3557469844818115, | |
| "learning_rate": 3.1256408592266214e-06, | |
| "loss": 0.1545, | |
| "step": 762, | |
| "step_loss": 0.1343638151884079 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 0.9023137851380141, | |
| "kl": 0.4272156357765198, | |
| "learning_rate": 3.121528489711718e-06, | |
| "loss": 0.1473, | |
| "step": 763, | |
| "step_loss": 0.1451863944530487 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "grad_norm": 1.0441318127695194, | |
| "kl": 0.4729064106941223, | |
| "learning_rate": 3.1174148438980804e-06, | |
| "loss": 0.151, | |
| "step": 764, | |
| "step_loss": 0.16162380576133728 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "grad_norm": 0.9950818625517187, | |
| "kl": 0.4472399353981018, | |
| "learning_rate": 3.113299935917174e-06, | |
| "loss": 0.1539, | |
| "step": 765, | |
| "step_loss": 0.14452748000621796 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "grad_norm": 1.0179996166687564, | |
| "kl": 0.5420696139335632, | |
| "learning_rate": 3.1091837799047946e-06, | |
| "loss": 0.1475, | |
| "step": 766, | |
| "step_loss": 0.18763327598571777 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "grad_norm": 1.0010025393208966, | |
| "kl": 0.3447565734386444, | |
| "learning_rate": 3.1050663900010295e-06, | |
| "loss": 0.1485, | |
| "step": 767, | |
| "step_loss": 0.13680729269981384 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "grad_norm": 1.0142744474701502, | |
| "kl": 0.3538120687007904, | |
| "learning_rate": 3.1009477803502015e-06, | |
| "loss": 0.1541, | |
| "step": 768, | |
| "step_loss": 0.16569288074970245 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "grad_norm": 1.036047382769812, | |
| "kl": 0.4295748174190521, | |
| "learning_rate": 3.0968279651008273e-06, | |
| "loss": 0.1613, | |
| "step": 769, | |
| "step_loss": 0.14074234664440155 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "grad_norm": 0.9244937635890171, | |
| "kl": 0.5195684432983398, | |
| "learning_rate": 3.092706958405561e-06, | |
| "loss": 0.1472, | |
| "step": 770, | |
| "step_loss": 0.15720242261886597 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "grad_norm": 0.9621433507609762, | |
| "kl": 0.4947783052921295, | |
| "learning_rate": 3.088584774421155e-06, | |
| "loss": 0.1552, | |
| "step": 771, | |
| "step_loss": 0.16154304146766663 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "grad_norm": 1.0088544087696063, | |
| "kl": 0.45518720149993896, | |
| "learning_rate": 3.0844614273083986e-06, | |
| "loss": 0.1537, | |
| "step": 772, | |
| "step_loss": 0.16250544786453247 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "grad_norm": 1.02735358403942, | |
| "kl": 0.3680950999259949, | |
| "learning_rate": 3.0803369312320834e-06, | |
| "loss": 0.1595, | |
| "step": 773, | |
| "step_loss": 0.13430428504943848 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "grad_norm": 1.0229473541898282, | |
| "kl": 0.45938223600387573, | |
| "learning_rate": 3.0762113003609464e-06, | |
| "loss": 0.1521, | |
| "step": 774, | |
| "step_loss": 0.16164535284042358 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "grad_norm": 0.9762252225185958, | |
| "kl": 0.4909830391407013, | |
| "learning_rate": 3.072084548867619e-06, | |
| "loss": 0.1487, | |
| "step": 775, | |
| "step_loss": 0.16784648597240448 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "grad_norm": 1.0525530374080225, | |
| "kl": 0.4151424765586853, | |
| "learning_rate": 3.0679566909285865e-06, | |
| "loss": 0.1644, | |
| "step": 776, | |
| "step_loss": 0.15828314423561096 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "grad_norm": 1.0300851369596706, | |
| "kl": 0.3816262185573578, | |
| "learning_rate": 3.0638277407241353e-06, | |
| "loss": 0.1552, | |
| "step": 777, | |
| "step_loss": 0.1416245996952057 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "grad_norm": 0.9094117275175454, | |
| "kl": 0.47151878476142883, | |
| "learning_rate": 3.059697712438301e-06, | |
| "loss": 0.1429, | |
| "step": 778, | |
| "step_loss": 0.1599782109260559 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "grad_norm": 0.943123316008099, | |
| "kl": 0.4516427218914032, | |
| "learning_rate": 3.0555666202588237e-06, | |
| "loss": 0.1525, | |
| "step": 779, | |
| "step_loss": 0.17189931869506836 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "grad_norm": 0.9766034537311132, | |
| "kl": 0.4513046443462372, | |
| "learning_rate": 3.0514344783771017e-06, | |
| "loss": 0.1516, | |
| "step": 780, | |
| "step_loss": 0.14572221040725708 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "grad_norm": 0.9983107634149363, | |
| "kl": 0.36767828464508057, | |
| "learning_rate": 3.0473013009881343e-06, | |
| "loss": 0.1561, | |
| "step": 781, | |
| "step_loss": 0.16547314822673798 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "grad_norm": 1.0270127608148132, | |
| "kl": 0.45325592160224915, | |
| "learning_rate": 3.04316710229048e-06, | |
| "loss": 0.1583, | |
| "step": 782, | |
| "step_loss": 0.14895084500312805 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "grad_norm": 0.9148345849077315, | |
| "kl": 0.5390376448631287, | |
| "learning_rate": 3.0390318964862064e-06, | |
| "loss": 0.1519, | |
| "step": 783, | |
| "step_loss": 0.1619083732366562 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "grad_norm": 0.8725682510499037, | |
| "kl": 0.34793299436569214, | |
| "learning_rate": 3.03489569778084e-06, | |
| "loss": 0.1452, | |
| "step": 784, | |
| "step_loss": 0.1361267864704132 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "grad_norm": 0.9441065859863195, | |
| "kl": 0.4481748640537262, | |
| "learning_rate": 3.0307585203833203e-06, | |
| "loss": 0.1426, | |
| "step": 785, | |
| "step_loss": 0.15584218502044678 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "grad_norm": 1.0085881671115713, | |
| "kl": 0.43633711338043213, | |
| "learning_rate": 3.0266203785059438e-06, | |
| "loss": 0.1528, | |
| "step": 786, | |
| "step_loss": 0.1516050398349762 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "grad_norm": 0.9369144359378735, | |
| "kl": 0.4298512935638428, | |
| "learning_rate": 3.0224812863643266e-06, | |
| "loss": 0.1451, | |
| "step": 787, | |
| "step_loss": 0.12825755774974823 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 1.065211790409866, | |
| "kl": 0.43545064330101013, | |
| "learning_rate": 3.0183412581773453e-06, | |
| "loss": 0.1619, | |
| "step": 788, | |
| "step_loss": 0.16257864236831665 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 1.0775993281913878, | |
| "kl": 0.40961089730262756, | |
| "learning_rate": 3.0142003081670922e-06, | |
| "loss": 0.1628, | |
| "step": 789, | |
| "step_loss": 0.1613461673259735 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 1.0237299523119836, | |
| "kl": 0.3749983608722687, | |
| "learning_rate": 3.010058450558827e-06, | |
| "loss": 0.1504, | |
| "step": 790, | |
| "step_loss": 0.13308608531951904 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "grad_norm": 1.0661845972455573, | |
| "kl": 0.472342312335968, | |
| "learning_rate": 3.005915699580928e-06, | |
| "loss": 0.1486, | |
| "step": 791, | |
| "step_loss": 0.1562490463256836 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "grad_norm": 0.9764395369363379, | |
| "kl": 0.36287054419517517, | |
| "learning_rate": 3.0017720694648407e-06, | |
| "loss": 0.1472, | |
| "step": 792, | |
| "step_loss": 0.14360609650611877 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "grad_norm": 0.9561934092861629, | |
| "kl": 0.41280660033226013, | |
| "learning_rate": 2.997627574445032e-06, | |
| "loss": 0.1425, | |
| "step": 793, | |
| "step_loss": 0.1299421787261963 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "grad_norm": 1.016934035404032, | |
| "kl": 0.44996654987335205, | |
| "learning_rate": 2.9934822287589404e-06, | |
| "loss": 0.1517, | |
| "step": 794, | |
| "step_loss": 0.13960210978984833 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "grad_norm": 1.007459978061249, | |
| "kl": 0.4418295919895172, | |
| "learning_rate": 2.9893360466469257e-06, | |
| "loss": 0.1586, | |
| "step": 795, | |
| "step_loss": 0.1698797643184662 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "grad_norm": 0.9777845289814279, | |
| "kl": 0.4033919870853424, | |
| "learning_rate": 2.9851890423522214e-06, | |
| "loss": 0.1534, | |
| "step": 796, | |
| "step_loss": 0.148381307721138 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "grad_norm": 1.0038670483961691, | |
| "kl": 0.41636258363723755, | |
| "learning_rate": 2.9810412301208837e-06, | |
| "loss": 0.1605, | |
| "step": 797, | |
| "step_loss": 0.15567950904369354 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "grad_norm": 0.9931491186767503, | |
| "kl": 0.536481499671936, | |
| "learning_rate": 2.976892624201747e-06, | |
| "loss": 0.1515, | |
| "step": 798, | |
| "step_loss": 0.1677020788192749 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "grad_norm": 0.9976100824318828, | |
| "kl": 0.3861311674118042, | |
| "learning_rate": 2.9727432388463713e-06, | |
| "loss": 0.1498, | |
| "step": 799, | |
| "step_loss": 0.14130038022994995 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "grad_norm": 0.9818984015412048, | |
| "kl": 0.38746803998947144, | |
| "learning_rate": 2.9685930883089936e-06, | |
| "loss": 0.1524, | |
| "step": 800, | |
| "step_loss": 0.1593620926141739 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "grad_norm": 0.9288991903769893, | |
| "kl": 0.3206455707550049, | |
| "learning_rate": 2.9644421868464797e-06, | |
| "loss": 0.1394, | |
| "step": 801, | |
| "step_loss": 0.10261634737253189 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "grad_norm": 0.9983514817365607, | |
| "kl": 0.41597017645835876, | |
| "learning_rate": 2.9602905487182758e-06, | |
| "loss": 0.1451, | |
| "step": 802, | |
| "step_loss": 0.12765845656394958 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "grad_norm": 0.8902637518688588, | |
| "kl": 0.35546138882637024, | |
| "learning_rate": 2.9561381881863583e-06, | |
| "loss": 0.1358, | |
| "step": 803, | |
| "step_loss": 0.11750486493110657 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "grad_norm": 1.1226095260209488, | |
| "kl": 0.41053929924964905, | |
| "learning_rate": 2.9519851195151834e-06, | |
| "loss": 0.1441, | |
| "step": 804, | |
| "step_loss": 0.15928710997104645 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "grad_norm": 1.0162148889995983, | |
| "kl": 0.40562787652015686, | |
| "learning_rate": 2.9478313569716427e-06, | |
| "loss": 0.1497, | |
| "step": 805, | |
| "step_loss": 0.1482928991317749 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "grad_norm": 0.9605017503685718, | |
| "kl": 0.37824496626853943, | |
| "learning_rate": 2.9436769148250107e-06, | |
| "loss": 0.1502, | |
| "step": 806, | |
| "step_loss": 0.1375085562467575 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "grad_norm": 1.0439813106314617, | |
| "kl": 0.4013465642929077, | |
| "learning_rate": 2.939521807346896e-06, | |
| "loss": 0.1515, | |
| "step": 807, | |
| "step_loss": 0.1387888491153717 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "grad_norm": 0.9915447105353192, | |
| "kl": 0.4745499789714813, | |
| "learning_rate": 2.935366048811192e-06, | |
| "loss": 0.1519, | |
| "step": 808, | |
| "step_loss": 0.1591644287109375 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "grad_norm": 0.9295061078745588, | |
| "kl": 0.39526990056037903, | |
| "learning_rate": 2.9312096534940304e-06, | |
| "loss": 0.1403, | |
| "step": 809, | |
| "step_loss": 0.14475908875465393 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "grad_norm": 0.9874188124202254, | |
| "kl": 0.4027431309223175, | |
| "learning_rate": 2.9270526356737306e-06, | |
| "loss": 0.1551, | |
| "step": 810, | |
| "step_loss": 0.17166703939437866 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "grad_norm": 1.0386355533970422, | |
| "kl": 0.332511842250824, | |
| "learning_rate": 2.9228950096307477e-06, | |
| "loss": 0.1543, | |
| "step": 811, | |
| "step_loss": 0.14212793111801147 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "grad_norm": 0.9426077934872951, | |
| "kl": 0.5144191980361938, | |
| "learning_rate": 2.9187367896476287e-06, | |
| "loss": 0.1467, | |
| "step": 812, | |
| "step_loss": 0.17707863450050354 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "grad_norm": 1.033937968101504, | |
| "kl": 0.458046019077301, | |
| "learning_rate": 2.9145779900089603e-06, | |
| "loss": 0.1568, | |
| "step": 813, | |
| "step_loss": 0.14064949750900269 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "grad_norm": 0.9527788628095325, | |
| "kl": 0.46262165904045105, | |
| "learning_rate": 2.91041862500132e-06, | |
| "loss": 0.1517, | |
| "step": 814, | |
| "step_loss": 0.13187597692012787 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 1.0259502663865276, | |
| "kl": 0.5149811506271362, | |
| "learning_rate": 2.9062587089132287e-06, | |
| "loss": 0.1507, | |
| "step": 815, | |
| "step_loss": 0.14037488400936127 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 0.9142149529111745, | |
| "kl": 0.5691028237342834, | |
| "learning_rate": 2.9020982560350987e-06, | |
| "loss": 0.1424, | |
| "step": 816, | |
| "step_loss": 0.15255063772201538 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 0.9834074836322464, | |
| "kl": 0.5331578254699707, | |
| "learning_rate": 2.897937280659188e-06, | |
| "loss": 0.1522, | |
| "step": 817, | |
| "step_loss": 0.19720472395420074 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 1.0291303032977386, | |
| "kl": 0.42640256881713867, | |
| "learning_rate": 2.893775797079548e-06, | |
| "loss": 0.1587, | |
| "step": 818, | |
| "step_loss": 0.15943148732185364 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "grad_norm": 1.023854381889724, | |
| "kl": 0.3928847908973694, | |
| "learning_rate": 2.8896138195919774e-06, | |
| "loss": 0.1478, | |
| "step": 819, | |
| "step_loss": 0.14019568264484406 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "grad_norm": 1.0242535198952862, | |
| "kl": 0.48428666591644287, | |
| "learning_rate": 2.885451362493971e-06, | |
| "loss": 0.1529, | |
| "step": 820, | |
| "step_loss": 0.19270402193069458 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "grad_norm": 1.0748533004644767, | |
| "kl": 0.5833289623260498, | |
| "learning_rate": 2.8812884400846697e-06, | |
| "loss": 0.1585, | |
| "step": 821, | |
| "step_loss": 0.15655606985092163 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "grad_norm": 1.0464967072640574, | |
| "kl": 0.46840161085128784, | |
| "learning_rate": 2.8771250666648154e-06, | |
| "loss": 0.1546, | |
| "step": 822, | |
| "step_loss": 0.15261268615722656 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "grad_norm": 0.9831924601580038, | |
| "kl": 0.46287956833839417, | |
| "learning_rate": 2.872961256536697e-06, | |
| "loss": 0.1586, | |
| "step": 823, | |
| "step_loss": 0.17225544154644012 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "grad_norm": 1.065657859586474, | |
| "kl": 0.3879581093788147, | |
| "learning_rate": 2.868797024004106e-06, | |
| "loss": 0.1595, | |
| "step": 824, | |
| "step_loss": 0.1531311571598053 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "grad_norm": 1.044424305750142, | |
| "kl": 0.43647971749305725, | |
| "learning_rate": 2.864632383372284e-06, | |
| "loss": 0.156, | |
| "step": 825, | |
| "step_loss": 0.13304683566093445 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "grad_norm": 0.9736991867352337, | |
| "kl": 0.5177615880966187, | |
| "learning_rate": 2.8604673489478736e-06, | |
| "loss": 0.1507, | |
| "step": 826, | |
| "step_loss": 0.1456117331981659 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "grad_norm": 0.935779184315902, | |
| "kl": 0.4033496379852295, | |
| "learning_rate": 2.8563019350388682e-06, | |
| "loss": 0.1441, | |
| "step": 827, | |
| "step_loss": 0.1455593705177307 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "grad_norm": 0.9724703844802562, | |
| "kl": 0.44370484352111816, | |
| "learning_rate": 2.852136155954573e-06, | |
| "loss": 0.1551, | |
| "step": 828, | |
| "step_loss": 0.15278911590576172 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "grad_norm": 1.0389405085720838, | |
| "kl": 0.3740912079811096, | |
| "learning_rate": 2.8479700260055375e-06, | |
| "loss": 0.1464, | |
| "step": 829, | |
| "step_loss": 0.14697124063968658 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "grad_norm": 1.045910704297703, | |
| "kl": 0.46834734082221985, | |
| "learning_rate": 2.8438035595035235e-06, | |
| "loss": 0.1616, | |
| "step": 830, | |
| "step_loss": 0.16846756637096405 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "grad_norm": 1.142264846499886, | |
| "kl": 0.47931694984436035, | |
| "learning_rate": 2.8396367707614454e-06, | |
| "loss": 0.1669, | |
| "step": 831, | |
| "step_loss": 0.18373435735702515 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "grad_norm": 1.0527179270858962, | |
| "kl": 0.527995228767395, | |
| "learning_rate": 2.835469674093326e-06, | |
| "loss": 0.1575, | |
| "step": 832, | |
| "step_loss": 0.16337227821350098 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "grad_norm": 1.0387618411476036, | |
| "kl": 0.42796212434768677, | |
| "learning_rate": 2.8313022838142475e-06, | |
| "loss": 0.1595, | |
| "step": 833, | |
| "step_loss": 0.16812928020954132 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "grad_norm": 0.9471143464094294, | |
| "kl": 0.4106406271457672, | |
| "learning_rate": 2.827134614240296e-06, | |
| "loss": 0.1499, | |
| "step": 834, | |
| "step_loss": 0.14076904952526093 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "grad_norm": 0.9917655257761571, | |
| "kl": 0.45805299282073975, | |
| "learning_rate": 2.8229666796885224e-06, | |
| "loss": 0.1557, | |
| "step": 835, | |
| "step_loss": 0.15434692800045013 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "grad_norm": 1.0110321513562135, | |
| "kl": 0.40475213527679443, | |
| "learning_rate": 2.818798494476884e-06, | |
| "loss": 0.1465, | |
| "step": 836, | |
| "step_loss": 0.1469970941543579 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "grad_norm": 0.9764345161496416, | |
| "kl": 0.49001234769821167, | |
| "learning_rate": 2.814630072924201e-06, | |
| "loss": 0.1487, | |
| "step": 837, | |
| "step_loss": 0.1866789162158966 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "grad_norm": 1.0794414278495323, | |
| "kl": 0.4759081304073334, | |
| "learning_rate": 2.8104614293501047e-06, | |
| "loss": 0.166, | |
| "step": 838, | |
| "step_loss": 0.15782764554023743 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "grad_norm": 0.9236052649575086, | |
| "kl": 0.5032958984375, | |
| "learning_rate": 2.8062925780749913e-06, | |
| "loss": 0.141, | |
| "step": 839, | |
| "step_loss": 0.1591145396232605 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "grad_norm": 0.9482136338056982, | |
| "kl": 0.4338464140892029, | |
| "learning_rate": 2.802123533419966e-06, | |
| "loss": 0.1433, | |
| "step": 840, | |
| "step_loss": 0.16640107333660126 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "grad_norm": 1.0595480920949152, | |
| "kl": 0.526473343372345, | |
| "learning_rate": 2.7979543097068023e-06, | |
| "loss": 0.1574, | |
| "step": 841, | |
| "step_loss": 0.1585194319486618 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 0.9792300907182565, | |
| "kl": 0.43886512517929077, | |
| "learning_rate": 2.793784921257889e-06, | |
| "loss": 0.1488, | |
| "step": 842, | |
| "step_loss": 0.1458326131105423 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 0.9630706804467645, | |
| "kl": 0.4345511794090271, | |
| "learning_rate": 2.789615382396178e-06, | |
| "loss": 0.1489, | |
| "step": 843, | |
| "step_loss": 0.15134494006633759 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 1.0446667083350927, | |
| "kl": 0.4915614724159241, | |
| "learning_rate": 2.785445707445139e-06, | |
| "loss": 0.1563, | |
| "step": 844, | |
| "step_loss": 0.1671372950077057 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 1.0372490510437422, | |
| "kl": 0.46963563561439514, | |
| "learning_rate": 2.7812759107287092e-06, | |
| "loss": 0.1577, | |
| "step": 845, | |
| "step_loss": 0.17008760571479797 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "grad_norm": 1.0428903957466424, | |
| "kl": 0.4324113130569458, | |
| "learning_rate": 2.777106006571246e-06, | |
| "loss": 0.1637, | |
| "step": 846, | |
| "step_loss": 0.1375209391117096 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "grad_norm": 1.1117206141871079, | |
| "kl": 0.49308332800865173, | |
| "learning_rate": 2.7729360092974727e-06, | |
| "loss": 0.1573, | |
| "step": 847, | |
| "step_loss": 0.1321687251329422 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "grad_norm": 1.0017834941794728, | |
| "kl": 0.4275958836078644, | |
| "learning_rate": 2.7687659332324348e-06, | |
| "loss": 0.1522, | |
| "step": 848, | |
| "step_loss": 0.1515330672264099 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 0.9791789290830443, | |
| "kl": 0.3971335291862488, | |
| "learning_rate": 2.7645957927014476e-06, | |
| "loss": 0.1472, | |
| "step": 849, | |
| "step_loss": 0.1782829761505127 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 0.9777418720782441, | |
| "kl": 0.44619691371917725, | |
| "learning_rate": 2.7604256020300474e-06, | |
| "loss": 0.1504, | |
| "step": 850, | |
| "step_loss": 0.15114323794841766 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 1.0171915736900594, | |
| "kl": 0.43852925300598145, | |
| "learning_rate": 2.7562553755439453e-06, | |
| "loss": 0.1536, | |
| "step": 851, | |
| "step_loss": 0.1527268886566162 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 0.9392503452143012, | |
| "kl": 0.48514020442962646, | |
| "learning_rate": 2.7520851275689705e-06, | |
| "loss": 0.1434, | |
| "step": 852, | |
| "step_loss": 0.15535013377666473 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "grad_norm": 1.0357489994061733, | |
| "kl": 0.40949738025665283, | |
| "learning_rate": 2.7479148724310306e-06, | |
| "loss": 0.1554, | |
| "step": 853, | |
| "step_loss": 0.15922455489635468 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "grad_norm": 0.9421029421438589, | |
| "kl": 0.4990323483943939, | |
| "learning_rate": 2.7437446244560563e-06, | |
| "loss": 0.1501, | |
| "step": 854, | |
| "step_loss": 0.14648234844207764 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "grad_norm": 0.9961498319508436, | |
| "kl": 0.4119671583175659, | |
| "learning_rate": 2.739574397969953e-06, | |
| "loss": 0.1457, | |
| "step": 855, | |
| "step_loss": 0.14378459751605988 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "grad_norm": 1.018662122435029, | |
| "kl": 0.4358648359775543, | |
| "learning_rate": 2.7354042072985527e-06, | |
| "loss": 0.1596, | |
| "step": 856, | |
| "step_loss": 0.1559343934059143 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "grad_norm": 0.9455489895306939, | |
| "kl": 0.5033860206604004, | |
| "learning_rate": 2.731234066767566e-06, | |
| "loss": 0.148, | |
| "step": 857, | |
| "step_loss": 0.14744150638580322 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "grad_norm": 1.046978432968794, | |
| "kl": 0.5012477040290833, | |
| "learning_rate": 2.727063990702528e-06, | |
| "loss": 0.1554, | |
| "step": 858, | |
| "step_loss": 0.1712190806865692 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "grad_norm": 0.9584546228377427, | |
| "kl": 0.4685800075531006, | |
| "learning_rate": 2.7228939934287545e-06, | |
| "loss": 0.1518, | |
| "step": 859, | |
| "step_loss": 0.1738032102584839 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "grad_norm": 1.0138203947600486, | |
| "kl": 0.4495634138584137, | |
| "learning_rate": 2.7187240892712915e-06, | |
| "loss": 0.1542, | |
| "step": 860, | |
| "step_loss": 0.1489475667476654 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "grad_norm": 0.9471813046395302, | |
| "kl": 0.506722092628479, | |
| "learning_rate": 2.7145542925548625e-06, | |
| "loss": 0.1497, | |
| "step": 861, | |
| "step_loss": 0.17352920770645142 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "grad_norm": 0.930769811268272, | |
| "kl": 0.45764830708503723, | |
| "learning_rate": 2.7103846176038234e-06, | |
| "loss": 0.1463, | |
| "step": 862, | |
| "step_loss": 0.14699025452136993 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "grad_norm": 1.0613625131595172, | |
| "kl": 0.5440824627876282, | |
| "learning_rate": 2.7062150787421117e-06, | |
| "loss": 0.1586, | |
| "step": 863, | |
| "step_loss": 0.15115922689437866 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "grad_norm": 1.0090354921253601, | |
| "kl": 0.4678072929382324, | |
| "learning_rate": 2.702045690293198e-06, | |
| "loss": 0.1577, | |
| "step": 864, | |
| "step_loss": 0.19815057516098022 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "grad_norm": 0.9879524528369488, | |
| "kl": 0.41325706243515015, | |
| "learning_rate": 2.697876466580035e-06, | |
| "loss": 0.1587, | |
| "step": 865, | |
| "step_loss": 0.1458713412284851 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "grad_norm": 0.9336052578651772, | |
| "kl": 0.35849088430404663, | |
| "learning_rate": 2.693707421925011e-06, | |
| "loss": 0.1427, | |
| "step": 866, | |
| "step_loss": 0.1263750195503235 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "grad_norm": 0.962448080322168, | |
| "kl": 0.4207032024860382, | |
| "learning_rate": 2.689538570649896e-06, | |
| "loss": 0.1501, | |
| "step": 867, | |
| "step_loss": 0.15147185325622559 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "grad_norm": 0.9755617125351154, | |
| "kl": 0.35634732246398926, | |
| "learning_rate": 2.6853699270758006e-06, | |
| "loss": 0.1444, | |
| "step": 868, | |
| "step_loss": 0.13335993885993958 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "grad_norm": 1.0071987678638463, | |
| "kl": 0.40423381328582764, | |
| "learning_rate": 2.681201505523117e-06, | |
| "loss": 0.1497, | |
| "step": 869, | |
| "step_loss": 0.15910114347934723 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 0.9679292924796197, | |
| "kl": 0.4693619906902313, | |
| "learning_rate": 2.6770333203114783e-06, | |
| "loss": 0.1527, | |
| "step": 870, | |
| "step_loss": 0.16329102218151093 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 0.9628687579474976, | |
| "kl": 0.4137752056121826, | |
| "learning_rate": 2.6728653857597042e-06, | |
| "loss": 0.1462, | |
| "step": 871, | |
| "step_loss": 0.12889014184474945 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 1.0253647283281182, | |
| "kl": 0.31751748919487, | |
| "learning_rate": 2.6686977161857536e-06, | |
| "loss": 0.1509, | |
| "step": 872, | |
| "step_loss": 0.13793063163757324 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "grad_norm": 0.9933774525703443, | |
| "kl": 0.4467350244522095, | |
| "learning_rate": 2.664530325906674e-06, | |
| "loss": 0.1555, | |
| "step": 873, | |
| "step_loss": 0.15573230385780334 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "grad_norm": 1.0177636297789605, | |
| "kl": 0.4431452751159668, | |
| "learning_rate": 2.660363229238555e-06, | |
| "loss": 0.159, | |
| "step": 874, | |
| "step_loss": 0.12957452237606049 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "grad_norm": 0.9976856899473273, | |
| "kl": 0.415743350982666, | |
| "learning_rate": 2.6561964404964772e-06, | |
| "loss": 0.1414, | |
| "step": 875, | |
| "step_loss": 0.14293581247329712 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "grad_norm": 1.0243005389649271, | |
| "kl": 0.47209423780441284, | |
| "learning_rate": 2.6520299739944632e-06, | |
| "loss": 0.1577, | |
| "step": 876, | |
| "step_loss": 0.15875697135925293 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "grad_norm": 1.0132980539403538, | |
| "kl": 0.4555986821651459, | |
| "learning_rate": 2.6478638440454287e-06, | |
| "loss": 0.155, | |
| "step": 877, | |
| "step_loss": 0.14882808923721313 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "grad_norm": 0.9968564423415248, | |
| "kl": 0.4383925199508667, | |
| "learning_rate": 2.6436980649611316e-06, | |
| "loss": 0.1527, | |
| "step": 878, | |
| "step_loss": 0.1468876153230667 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "grad_norm": 0.9946869883294787, | |
| "kl": 0.4805772602558136, | |
| "learning_rate": 2.6395326510521284e-06, | |
| "loss": 0.1468, | |
| "step": 879, | |
| "step_loss": 0.1445452868938446 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "grad_norm": 1.0065650168443376, | |
| "kl": 0.4132371246814728, | |
| "learning_rate": 2.635367616627717e-06, | |
| "loss": 0.1504, | |
| "step": 880, | |
| "step_loss": 0.1394580602645874 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "grad_norm": 1.0307017371698117, | |
| "kl": 0.3783648908138275, | |
| "learning_rate": 2.631202975995894e-06, | |
| "loss": 0.1592, | |
| "step": 881, | |
| "step_loss": 0.15418383479118347 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "grad_norm": 1.001319848718613, | |
| "kl": 0.5001019835472107, | |
| "learning_rate": 2.6270387434633033e-06, | |
| "loss": 0.1473, | |
| "step": 882, | |
| "step_loss": 0.14509941637516022 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 0.9875134436983942, | |
| "kl": 0.41825470328330994, | |
| "learning_rate": 2.622874933335186e-06, | |
| "loss": 0.1537, | |
| "step": 883, | |
| "step_loss": 0.12782949209213257 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 1.0572877830596905, | |
| "kl": 0.48397839069366455, | |
| "learning_rate": 2.618711559915332e-06, | |
| "loss": 0.1532, | |
| "step": 884, | |
| "step_loss": 0.15913690626621246 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 0.9751094536600117, | |
| "kl": 0.4292382597923279, | |
| "learning_rate": 2.6145486375060305e-06, | |
| "loss": 0.1522, | |
| "step": 885, | |
| "step_loss": 0.14782339334487915 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 1.0673776750362411, | |
| "kl": 0.5227698087692261, | |
| "learning_rate": 2.610386180408023e-06, | |
| "loss": 0.1631, | |
| "step": 886, | |
| "step_loss": 0.17023181915283203 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "grad_norm": 0.9733633359085521, | |
| "kl": 0.35292860865592957, | |
| "learning_rate": 2.6062242029204525e-06, | |
| "loss": 0.1471, | |
| "step": 887, | |
| "step_loss": 0.16706503927707672 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "grad_norm": 1.0817929222314064, | |
| "kl": 0.4509636163711548, | |
| "learning_rate": 2.6020627193408126e-06, | |
| "loss": 0.159, | |
| "step": 888, | |
| "step_loss": 0.14339911937713623 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "grad_norm": 1.0112704753099575, | |
| "kl": 0.5275288820266724, | |
| "learning_rate": 2.5979017439649016e-06, | |
| "loss": 0.1578, | |
| "step": 889, | |
| "step_loss": 0.1634017825126648 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "grad_norm": 1.065748885440266, | |
| "kl": 0.4825303554534912, | |
| "learning_rate": 2.593741291086772e-06, | |
| "loss": 0.1647, | |
| "step": 890, | |
| "step_loss": 0.15958373248577118 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "grad_norm": 1.0145482738058882, | |
| "kl": 0.4341558814048767, | |
| "learning_rate": 2.589581374998681e-06, | |
| "loss": 0.1535, | |
| "step": 891, | |
| "step_loss": 0.15015427768230438 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "grad_norm": 0.965175247388308, | |
| "kl": 0.3313429355621338, | |
| "learning_rate": 2.5854220099910404e-06, | |
| "loss": 0.1421, | |
| "step": 892, | |
| "step_loss": 0.1318102329969406 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "grad_norm": 0.9691866844145771, | |
| "kl": 0.3674515187740326, | |
| "learning_rate": 2.581263210352372e-06, | |
| "loss": 0.1465, | |
| "step": 893, | |
| "step_loss": 0.12013500183820724 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "grad_norm": 1.0326376883901633, | |
| "kl": 0.4059482514858246, | |
| "learning_rate": 2.5771049903692534e-06, | |
| "loss": 0.1576, | |
| "step": 894, | |
| "step_loss": 0.1739095002412796 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "grad_norm": 1.025107600798099, | |
| "kl": 0.4659903943538666, | |
| "learning_rate": 2.572947364326271e-06, | |
| "loss": 0.1657, | |
| "step": 895, | |
| "step_loss": 0.1959068477153778 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "grad_norm": 1.0072382984139703, | |
| "kl": 0.4354158639907837, | |
| "learning_rate": 2.5687903465059694e-06, | |
| "loss": 0.1544, | |
| "step": 896, | |
| "step_loss": 0.1440533995628357 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 0.9879824672979248, | |
| "kl": 0.42458993196487427, | |
| "learning_rate": 2.5646339511888087e-06, | |
| "loss": 0.1515, | |
| "step": 897, | |
| "step_loss": 0.16383150219917297 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 0.9776984512612195, | |
| "kl": 0.5129539966583252, | |
| "learning_rate": 2.560478192653106e-06, | |
| "loss": 0.145, | |
| "step": 898, | |
| "step_loss": 0.1519792526960373 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 1.0045857782912961, | |
| "kl": 0.46529197692871094, | |
| "learning_rate": 2.5563230851749904e-06, | |
| "loss": 0.1554, | |
| "step": 899, | |
| "step_loss": 0.14696285128593445 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "grad_norm": 1.0698829958572684, | |
| "kl": 0.38536253571510315, | |
| "learning_rate": 2.5521686430283584e-06, | |
| "loss": 0.1562, | |
| "step": 900, | |
| "step_loss": 0.1435265839099884 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "grad_norm": 0.9972674616711951, | |
| "kl": 0.5101684927940369, | |
| "learning_rate": 2.5480148804848177e-06, | |
| "loss": 0.1518, | |
| "step": 901, | |
| "step_loss": 0.17259491980075836 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "grad_norm": 0.9978533741995581, | |
| "kl": 0.4602809548377991, | |
| "learning_rate": 2.5438618118136433e-06, | |
| "loss": 0.1524, | |
| "step": 902, | |
| "step_loss": 0.16445577144622803 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "grad_norm": 0.9469645542814312, | |
| "kl": 0.42083609104156494, | |
| "learning_rate": 2.539709451281725e-06, | |
| "loss": 0.1485, | |
| "step": 903, | |
| "step_loss": 0.12865757942199707 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "grad_norm": 1.095494010676487, | |
| "kl": 0.522094190120697, | |
| "learning_rate": 2.5355578131535206e-06, | |
| "loss": 0.1574, | |
| "step": 904, | |
| "step_loss": 0.1739048808813095 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "grad_norm": 0.9767740096877633, | |
| "kl": 0.4193570613861084, | |
| "learning_rate": 2.531406911691007e-06, | |
| "loss": 0.1573, | |
| "step": 905, | |
| "step_loss": 0.1455826461315155 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "grad_norm": 1.017293366696319, | |
| "kl": 0.4341852068901062, | |
| "learning_rate": 2.5272567611536303e-06, | |
| "loss": 0.1526, | |
| "step": 906, | |
| "step_loss": 0.15153871476650238 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "grad_norm": 0.9952002463525593, | |
| "kl": 0.42138275504112244, | |
| "learning_rate": 2.523107375798254e-06, | |
| "loss": 0.1538, | |
| "step": 907, | |
| "step_loss": 0.1505734622478485 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "grad_norm": 0.9990848287474213, | |
| "kl": 0.4363083839416504, | |
| "learning_rate": 2.5189587698791175e-06, | |
| "loss": 0.154, | |
| "step": 908, | |
| "step_loss": 0.14343413710594177 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "grad_norm": 0.9723709331417107, | |
| "kl": 0.42384618520736694, | |
| "learning_rate": 2.51481095764778e-06, | |
| "loss": 0.1496, | |
| "step": 909, | |
| "step_loss": 0.1434541940689087 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "grad_norm": 0.953374267251741, | |
| "kl": 0.5374601483345032, | |
| "learning_rate": 2.510663953353075e-06, | |
| "loss": 0.1485, | |
| "step": 910, | |
| "step_loss": 0.1526307612657547 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "grad_norm": 1.020275779519007, | |
| "kl": 0.5095154047012329, | |
| "learning_rate": 2.50651777124106e-06, | |
| "loss": 0.1547, | |
| "step": 911, | |
| "step_loss": 0.15365783870220184 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "grad_norm": 1.0064179434331657, | |
| "kl": 0.5424807667732239, | |
| "learning_rate": 2.502372425554968e-06, | |
| "loss": 0.1491, | |
| "step": 912, | |
| "step_loss": 0.1669929325580597 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "grad_norm": 0.9590723032371087, | |
| "kl": 0.3669721484184265, | |
| "learning_rate": 2.4982279305351605e-06, | |
| "loss": 0.149, | |
| "step": 913, | |
| "step_loss": 0.14819172024726868 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "grad_norm": 1.0423700278607653, | |
| "kl": 0.40059924125671387, | |
| "learning_rate": 2.4940843004190727e-06, | |
| "loss": 0.1528, | |
| "step": 914, | |
| "step_loss": 0.14792829751968384 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "grad_norm": 1.059798761869846, | |
| "kl": 0.39160391688346863, | |
| "learning_rate": 2.4899415494411736e-06, | |
| "loss": 0.1494, | |
| "step": 915, | |
| "step_loss": 0.14644666016101837 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "grad_norm": 0.994269671804963, | |
| "kl": 0.4729336202144623, | |
| "learning_rate": 2.4857996918329093e-06, | |
| "loss": 0.1548, | |
| "step": 916, | |
| "step_loss": 0.14490240812301636 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "grad_norm": 1.0274421441712134, | |
| "kl": 0.4249011278152466, | |
| "learning_rate": 2.481658741822656e-06, | |
| "loss": 0.1528, | |
| "step": 917, | |
| "step_loss": 0.1569293737411499 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "grad_norm": 1.1032766707978614, | |
| "kl": 0.44397133588790894, | |
| "learning_rate": 2.4775187136356732e-06, | |
| "loss": 0.1509, | |
| "step": 918, | |
| "step_loss": 0.13812614977359772 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "grad_norm": 1.0393047337226677, | |
| "kl": 0.40771737694740295, | |
| "learning_rate": 2.4733796214940565e-06, | |
| "loss": 0.1559, | |
| "step": 919, | |
| "step_loss": 0.1609930545091629 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "grad_norm": 1.0454766342784834, | |
| "kl": 0.40909823775291443, | |
| "learning_rate": 2.469241479616681e-06, | |
| "loss": 0.1562, | |
| "step": 920, | |
| "step_loss": 0.15960478782653809 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "grad_norm": 0.9710746223118797, | |
| "kl": 0.357599139213562, | |
| "learning_rate": 2.4651043022191605e-06, | |
| "loss": 0.1409, | |
| "step": 921, | |
| "step_loss": 0.12360851466655731 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "grad_norm": 1.0366911538769703, | |
| "kl": 0.44713571667671204, | |
| "learning_rate": 2.4609681035137944e-06, | |
| "loss": 0.1515, | |
| "step": 922, | |
| "step_loss": 0.13877364993095398 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "grad_norm": 0.9935669508473964, | |
| "kl": 0.47438859939575195, | |
| "learning_rate": 2.456832897709521e-06, | |
| "loss": 0.1502, | |
| "step": 923, | |
| "step_loss": 0.17184007167816162 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "grad_norm": 1.0048074202805686, | |
| "kl": 0.3735441565513611, | |
| "learning_rate": 2.4526986990118672e-06, | |
| "loss": 0.1583, | |
| "step": 924, | |
| "step_loss": 0.14378073811531067 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "grad_norm": 0.9409483433063506, | |
| "kl": 0.3914346694946289, | |
| "learning_rate": 2.4485655216228986e-06, | |
| "loss": 0.1476, | |
| "step": 925, | |
| "step_loss": 0.15655651688575745 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "grad_norm": 1.0223306684022924, | |
| "kl": 0.424472451210022, | |
| "learning_rate": 2.444433379741176e-06, | |
| "loss": 0.1541, | |
| "step": 926, | |
| "step_loss": 0.15379807353019714 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "grad_norm": 0.9893200206120092, | |
| "kl": 0.47619765996932983, | |
| "learning_rate": 2.4403022875617e-06, | |
| "loss": 0.1467, | |
| "step": 927, | |
| "step_loss": 0.17208687961101532 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "grad_norm": 1.0262447178852225, | |
| "kl": 0.47813111543655396, | |
| "learning_rate": 2.436172259275866e-06, | |
| "loss": 0.1623, | |
| "step": 928, | |
| "step_loss": 0.13537657260894775 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "grad_norm": 0.9861270661846792, | |
| "kl": 0.38649582862854004, | |
| "learning_rate": 2.4320433090714134e-06, | |
| "loss": 0.1476, | |
| "step": 929, | |
| "step_loss": 0.1501408964395523 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "grad_norm": 0.9853040761251798, | |
| "kl": 0.4063931107521057, | |
| "learning_rate": 2.4279154511323823e-06, | |
| "loss": 0.1615, | |
| "step": 930, | |
| "step_loss": 0.15353356301784515 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "grad_norm": 0.9205228458296352, | |
| "kl": 0.3394644260406494, | |
| "learning_rate": 2.4237886996390556e-06, | |
| "loss": 0.1427, | |
| "step": 931, | |
| "step_loss": 0.1389724165201187 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "grad_norm": 0.9932025613140306, | |
| "kl": 0.3473202884197235, | |
| "learning_rate": 2.4196630687679173e-06, | |
| "loss": 0.1505, | |
| "step": 932, | |
| "step_loss": 0.1683613657951355 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "grad_norm": 0.984733712974178, | |
| "kl": 0.40478530526161194, | |
| "learning_rate": 2.415538572691602e-06, | |
| "loss": 0.1463, | |
| "step": 933, | |
| "step_loss": 0.13838434219360352 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "grad_norm": 0.9806916184951824, | |
| "kl": 0.4532083570957184, | |
| "learning_rate": 2.4114152255788466e-06, | |
| "loss": 0.1518, | |
| "step": 934, | |
| "step_loss": 0.13839900493621826 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "grad_norm": 0.9979938112250495, | |
| "kl": 0.389826238155365, | |
| "learning_rate": 2.407293041594439e-06, | |
| "loss": 0.156, | |
| "step": 935, | |
| "step_loss": 0.1437515914440155 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "grad_norm": 0.978297759451275, | |
| "kl": 0.43591850996017456, | |
| "learning_rate": 2.4031720348991734e-06, | |
| "loss": 0.1506, | |
| "step": 936, | |
| "step_loss": 0.1359221488237381 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "grad_norm": 0.9961809157585862, | |
| "kl": 0.33996838331222534, | |
| "learning_rate": 2.399052219649799e-06, | |
| "loss": 0.145, | |
| "step": 937, | |
| "step_loss": 0.14791721105575562 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "grad_norm": 0.9450751235156168, | |
| "kl": 0.42210879921913147, | |
| "learning_rate": 2.3949336099989724e-06, | |
| "loss": 0.1503, | |
| "step": 938, | |
| "step_loss": 0.16048789024353027 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "grad_norm": 0.9925709982011522, | |
| "kl": 0.4403047561645508, | |
| "learning_rate": 2.390816220095207e-06, | |
| "loss": 0.1551, | |
| "step": 939, | |
| "step_loss": 0.16639472544193268 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "grad_norm": 0.9326817674895876, | |
| "kl": 0.4106891453266144, | |
| "learning_rate": 2.386700064082827e-06, | |
| "loss": 0.1437, | |
| "step": 940, | |
| "step_loss": 0.13783779740333557 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "grad_norm": 0.9993440741834876, | |
| "kl": 0.5168544054031372, | |
| "learning_rate": 2.38258515610192e-06, | |
| "loss": 0.1586, | |
| "step": 941, | |
| "step_loss": 0.19751232862472534 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "grad_norm": 0.951222600504691, | |
| "kl": 0.39257577061653137, | |
| "learning_rate": 2.3784715102882834e-06, | |
| "loss": 0.1512, | |
| "step": 942, | |
| "step_loss": 0.1248694509267807 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "grad_norm": 0.9983634073528408, | |
| "kl": 0.34117716550827026, | |
| "learning_rate": 2.3743591407733797e-06, | |
| "loss": 0.1574, | |
| "step": 943, | |
| "step_loss": 0.15840350091457367 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "grad_norm": 1.0089856202771001, | |
| "kl": 0.5630459785461426, | |
| "learning_rate": 2.3702480616842865e-06, | |
| "loss": 0.1612, | |
| "step": 944, | |
| "step_loss": 0.17266206443309784 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "grad_norm": 1.0153619278805137, | |
| "kl": 0.518592894077301, | |
| "learning_rate": 2.36613828714365e-06, | |
| "loss": 0.1595, | |
| "step": 945, | |
| "step_loss": 0.18611447513103485 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "grad_norm": 0.9672443779904987, | |
| "kl": 0.4448012113571167, | |
| "learning_rate": 2.362029831269634e-06, | |
| "loss": 0.1558, | |
| "step": 946, | |
| "step_loss": 0.14080186188220978 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "grad_norm": 0.9611084638649775, | |
| "kl": 0.38354170322418213, | |
| "learning_rate": 2.357922708175872e-06, | |
| "loss": 0.1469, | |
| "step": 947, | |
| "step_loss": 0.15614628791809082 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "grad_norm": 1.0312340475062312, | |
| "kl": 0.42923545837402344, | |
| "learning_rate": 2.353816931971419e-06, | |
| "loss": 0.1509, | |
| "step": 948, | |
| "step_loss": 0.15058480203151703 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "grad_norm": 1.0325988848719911, | |
| "kl": 0.37678343057632446, | |
| "learning_rate": 2.3497125167607027e-06, | |
| "loss": 0.153, | |
| "step": 949, | |
| "step_loss": 0.14883080124855042 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "grad_norm": 0.9819062626097106, | |
| "kl": 0.5016992688179016, | |
| "learning_rate": 2.345609476643477e-06, | |
| "loss": 0.1428, | |
| "step": 950, | |
| "step_loss": 0.15665948390960693 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 1.0064440018956071, | |
| "kl": 0.4498019814491272, | |
| "learning_rate": 2.341507825714771e-06, | |
| "loss": 0.1593, | |
| "step": 951, | |
| "step_loss": 0.1523018330335617 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 0.942831756703654, | |
| "kl": 0.508718729019165, | |
| "learning_rate": 2.337407578064842e-06, | |
| "loss": 0.1473, | |
| "step": 952, | |
| "step_loss": 0.16131407022476196 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 1.0213628080249857, | |
| "kl": 0.47834068536758423, | |
| "learning_rate": 2.3333087477791257e-06, | |
| "loss": 0.1581, | |
| "step": 953, | |
| "step_loss": 0.16310566663742065 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 0.9611082313028335, | |
| "kl": 0.3989601135253906, | |
| "learning_rate": 2.3292113489381895e-06, | |
| "loss": 0.152, | |
| "step": 954, | |
| "step_loss": 0.13949620723724365 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "grad_norm": 0.9853678646194656, | |
| "kl": 0.3734014332294464, | |
| "learning_rate": 2.325115395617683e-06, | |
| "loss": 0.1535, | |
| "step": 955, | |
| "step_loss": 0.14914605021476746 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "grad_norm": 0.9778583931081463, | |
| "kl": 0.45069605112075806, | |
| "learning_rate": 2.3210209018882913e-06, | |
| "loss": 0.1524, | |
| "step": 956, | |
| "step_loss": 0.15262170135974884 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "grad_norm": 1.014172101673639, | |
| "kl": 0.5114811062812805, | |
| "learning_rate": 2.316927881815683e-06, | |
| "loss": 0.1451, | |
| "step": 957, | |
| "step_loss": 0.16915518045425415 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "grad_norm": 0.9415660896574684, | |
| "kl": 0.39761587977409363, | |
| "learning_rate": 2.312836349460467e-06, | |
| "loss": 0.153, | |
| "step": 958, | |
| "step_loss": 0.1431863158941269 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "grad_norm": 1.0285671477663938, | |
| "kl": 0.2906627655029297, | |
| "learning_rate": 2.3087463188781408e-06, | |
| "loss": 0.1559, | |
| "step": 959, | |
| "step_loss": 0.12965397536754608 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "grad_norm": 0.9929048693237944, | |
| "kl": 0.4464694559574127, | |
| "learning_rate": 2.3046578041190403e-06, | |
| "loss": 0.1486, | |
| "step": 960, | |
| "step_loss": 0.15477749705314636 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "grad_norm": 1.0822745094493649, | |
| "kl": 0.4634704291820526, | |
| "learning_rate": 2.3005708192282984e-06, | |
| "loss": 0.1604, | |
| "step": 961, | |
| "step_loss": 0.17266973853111267 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "grad_norm": 1.0941722735914765, | |
| "kl": 0.47234082221984863, | |
| "learning_rate": 2.2964853782457887e-06, | |
| "loss": 0.149, | |
| "step": 962, | |
| "step_loss": 0.14461389183998108 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "grad_norm": 1.0574454523000933, | |
| "kl": 0.39766812324523926, | |
| "learning_rate": 2.2924014952060843e-06, | |
| "loss": 0.1603, | |
| "step": 963, | |
| "step_loss": 0.14928704500198364 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "grad_norm": 0.9067579687361036, | |
| "kl": 0.4438409209251404, | |
| "learning_rate": 2.288319184138403e-06, | |
| "loss": 0.1408, | |
| "step": 964, | |
| "step_loss": 0.14300301671028137 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "grad_norm": 1.0342452737185248, | |
| "kl": 0.3799823820590973, | |
| "learning_rate": 2.2842384590665644e-06, | |
| "loss": 0.155, | |
| "step": 965, | |
| "step_loss": 0.15365462005138397 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "grad_norm": 1.0444298271978016, | |
| "kl": 0.4924103617668152, | |
| "learning_rate": 2.280159334008941e-06, | |
| "loss": 0.1507, | |
| "step": 966, | |
| "step_loss": 0.15880931913852692 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "grad_norm": 0.9599603942062377, | |
| "kl": 0.41578635573387146, | |
| "learning_rate": 2.2760818229784065e-06, | |
| "loss": 0.1504, | |
| "step": 967, | |
| "step_loss": 0.1449252814054489 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "grad_norm": 1.0134939283764037, | |
| "kl": 0.46528518199920654, | |
| "learning_rate": 2.2720059399822906e-06, | |
| "loss": 0.1545, | |
| "step": 968, | |
| "step_loss": 0.1586332619190216 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "grad_norm": 1.0347690361891235, | |
| "kl": 0.41343602538108826, | |
| "learning_rate": 2.2679316990223314e-06, | |
| "loss": 0.1578, | |
| "step": 969, | |
| "step_loss": 0.17001797258853912 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "grad_norm": 1.0055593394957056, | |
| "kl": 0.45835837721824646, | |
| "learning_rate": 2.263859114094625e-06, | |
| "loss": 0.1463, | |
| "step": 970, | |
| "step_loss": 0.13045182824134827 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "grad_norm": 0.9567730457998137, | |
| "kl": 0.5038785934448242, | |
| "learning_rate": 2.259788199189579e-06, | |
| "loss": 0.1515, | |
| "step": 971, | |
| "step_loss": 0.17427542805671692 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "grad_norm": 1.0010068414341897, | |
| "kl": 0.4056503474712372, | |
| "learning_rate": 2.255718968291864e-06, | |
| "loss": 0.157, | |
| "step": 972, | |
| "step_loss": 0.15841376781463623 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "grad_norm": 1.0107117284340932, | |
| "kl": 0.45705166459083557, | |
| "learning_rate": 2.251651435380364e-06, | |
| "loss": 0.1571, | |
| "step": 973, | |
| "step_loss": 0.14534001052379608 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "grad_norm": 0.990000349150702, | |
| "kl": 0.5228754878044128, | |
| "learning_rate": 2.2475856144281345e-06, | |
| "loss": 0.1379, | |
| "step": 974, | |
| "step_loss": 0.16290828585624695 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "grad_norm": 1.0008871196700513, | |
| "kl": 0.3806186020374298, | |
| "learning_rate": 2.2435215194023453e-06, | |
| "loss": 0.1525, | |
| "step": 975, | |
| "step_loss": 0.1635514348745346 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "grad_norm": 1.0960477399243882, | |
| "kl": 0.45375317335128784, | |
| "learning_rate": 2.239459164264238e-06, | |
| "loss": 0.1535, | |
| "step": 976, | |
| "step_loss": 0.18511676788330078 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "grad_norm": 1.0218003843637917, | |
| "kl": 0.37781763076782227, | |
| "learning_rate": 2.2353985629690793e-06, | |
| "loss": 0.1475, | |
| "step": 977, | |
| "step_loss": 0.12731696665287018 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 0.9017717274035716, | |
| "kl": 0.3964046239852905, | |
| "learning_rate": 2.231339729466111e-06, | |
| "loss": 0.143, | |
| "step": 978, | |
| "step_loss": 0.13375751674175262 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 0.9619019793221093, | |
| "kl": 0.36923855543136597, | |
| "learning_rate": 2.2272826776984985e-06, | |
| "loss": 0.1486, | |
| "step": 979, | |
| "step_loss": 0.15310907363891602 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 1.0027381543220892, | |
| "kl": 0.45206311345100403, | |
| "learning_rate": 2.223227421603289e-06, | |
| "loss": 0.1508, | |
| "step": 980, | |
| "step_loss": 0.17637795209884644 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 1.047855188766988, | |
| "kl": 0.44840162992477417, | |
| "learning_rate": 2.2191739751113624e-06, | |
| "loss": 0.1611, | |
| "step": 981, | |
| "step_loss": 0.1478584259748459 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "grad_norm": 1.0127988521429687, | |
| "kl": 0.5084017515182495, | |
| "learning_rate": 2.2151223521473803e-06, | |
| "loss": 0.1474, | |
| "step": 982, | |
| "step_loss": 0.14254876971244812 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "grad_norm": 0.9277534723825935, | |
| "kl": 0.4267195463180542, | |
| "learning_rate": 2.2110725666297395e-06, | |
| "loss": 0.1441, | |
| "step": 983, | |
| "step_loss": 0.13901741802692413 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "grad_norm": 1.0036892192485454, | |
| "kl": 0.3984602987766266, | |
| "learning_rate": 2.2070246324705253e-06, | |
| "loss": 0.143, | |
| "step": 984, | |
| "step_loss": 0.1377902328968048 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "grad_norm": 0.9646755179948415, | |
| "kl": 0.6093090772628784, | |
| "learning_rate": 2.2029785635754646e-06, | |
| "loss": 0.1476, | |
| "step": 985, | |
| "step_loss": 0.17706118524074554 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "grad_norm": 0.9905850844659927, | |
| "kl": 0.4276701807975769, | |
| "learning_rate": 2.1989343738438755e-06, | |
| "loss": 0.1525, | |
| "step": 986, | |
| "step_loss": 0.17314979434013367 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "grad_norm": 1.1708291027675957, | |
| "kl": 0.40725067257881165, | |
| "learning_rate": 2.1948920771686196e-06, | |
| "loss": 0.1567, | |
| "step": 987, | |
| "step_loss": 0.16095474362373352 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "grad_norm": 1.0739387979713282, | |
| "kl": 0.39202889800071716, | |
| "learning_rate": 2.1908516874360558e-06, | |
| "loss": 0.1485, | |
| "step": 988, | |
| "step_loss": 0.13703203201293945 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "grad_norm": 0.9536023429872225, | |
| "kl": 0.4466555714607239, | |
| "learning_rate": 2.1868132185259933e-06, | |
| "loss": 0.148, | |
| "step": 989, | |
| "step_loss": 0.16553649306297302 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "grad_norm": 0.962874004228303, | |
| "kl": 0.36752018332481384, | |
| "learning_rate": 2.1827766843116427e-06, | |
| "loss": 0.1486, | |
| "step": 990, | |
| "step_loss": 0.1441442370414734 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "grad_norm": 1.0075217426405563, | |
| "kl": 0.4437835216522217, | |
| "learning_rate": 2.1787420986595664e-06, | |
| "loss": 0.1485, | |
| "step": 991, | |
| "step_loss": 0.15905022621154785 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "grad_norm": 1.016123385289782, | |
| "kl": 0.4468748867511749, | |
| "learning_rate": 2.1747094754296353e-06, | |
| "loss": 0.1452, | |
| "step": 992, | |
| "step_loss": 0.12368491291999817 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "grad_norm": 0.9569243857996387, | |
| "kl": 0.4261481463909149, | |
| "learning_rate": 2.170678828474978e-06, | |
| "loss": 0.1471, | |
| "step": 993, | |
| "step_loss": 0.14896030724048615 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "grad_norm": 1.0626260878976141, | |
| "kl": 0.38104549050331116, | |
| "learning_rate": 2.1666501716419342e-06, | |
| "loss": 0.1563, | |
| "step": 994, | |
| "step_loss": 0.15966196358203888 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "grad_norm": 0.9361293787090408, | |
| "kl": 0.4901023805141449, | |
| "learning_rate": 2.1626235187700057e-06, | |
| "loss": 0.1462, | |
| "step": 995, | |
| "step_loss": 0.18366771936416626 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "grad_norm": 0.950338934995153, | |
| "kl": 0.4179726541042328, | |
| "learning_rate": 2.158598883691812e-06, | |
| "loss": 0.1494, | |
| "step": 996, | |
| "step_loss": 0.138540118932724 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "grad_norm": 1.0489853611443787, | |
| "kl": 0.506919801235199, | |
| "learning_rate": 2.15457628023304e-06, | |
| "loss": 0.1668, | |
| "step": 997, | |
| "step_loss": 0.21282246708869934 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "grad_norm": 1.04818521184514, | |
| "kl": 0.5396957993507385, | |
| "learning_rate": 2.1505557222123972e-06, | |
| "loss": 0.1539, | |
| "step": 998, | |
| "step_loss": 0.1591482013463974 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "grad_norm": 1.0616666386652713, | |
| "kl": 0.4631801247596741, | |
| "learning_rate": 2.1465372234415673e-06, | |
| "loss": 0.1555, | |
| "step": 999, | |
| "step_loss": 0.14960813522338867 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "grad_norm": 1.035119268368553, | |
| "kl": 0.45855218172073364, | |
| "learning_rate": 2.1425207977251544e-06, | |
| "loss": 0.1521, | |
| "step": 1000, | |
| "step_loss": 0.1509908139705658 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "grad_norm": 0.996467609929929, | |
| "kl": 0.3887898921966553, | |
| "learning_rate": 2.1385064588606463e-06, | |
| "loss": 0.1549, | |
| "step": 1001, | |
| "step_loss": 0.13089656829833984 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "grad_norm": 0.9742558665726649, | |
| "kl": 0.40123283863067627, | |
| "learning_rate": 2.1344942206383586e-06, | |
| "loss": 0.1459, | |
| "step": 1002, | |
| "step_loss": 0.13350878655910492 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "grad_norm": 0.9488404858578288, | |
| "kl": 0.382538765668869, | |
| "learning_rate": 2.130484096841393e-06, | |
| "loss": 0.1439, | |
| "step": 1003, | |
| "step_loss": 0.14286507666110992 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "grad_norm": 0.9513353524533048, | |
| "kl": 0.42083343863487244, | |
| "learning_rate": 2.1264761012455865e-06, | |
| "loss": 0.142, | |
| "step": 1004, | |
| "step_loss": 0.14722788333892822 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "grad_norm": 1.0141753634991808, | |
| "kl": 0.4633311629295349, | |
| "learning_rate": 2.122470247619464e-06, | |
| "loss": 0.1623, | |
| "step": 1005, | |
| "step_loss": 0.1737322211265564 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 1.028458994383184, | |
| "kl": 0.5300709009170532, | |
| "learning_rate": 2.118466549724194e-06, | |
| "loss": 0.1536, | |
| "step": 1006, | |
| "step_loss": 0.16598369181156158 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 1.029391738602643, | |
| "kl": 0.43173086643218994, | |
| "learning_rate": 2.1144650213135397e-06, | |
| "loss": 0.1624, | |
| "step": 1007, | |
| "step_loss": 0.14931721985340118 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 0.9545265742649662, | |
| "kl": 0.4387211501598358, | |
| "learning_rate": 2.11046567613381e-06, | |
| "loss": 0.1437, | |
| "step": 1008, | |
| "step_loss": 0.16143842041492462 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "grad_norm": 1.0152085404083042, | |
| "kl": 0.46170535683631897, | |
| "learning_rate": 2.106468527923814e-06, | |
| "loss": 0.1575, | |
| "step": 1009, | |
| "step_loss": 0.14393383264541626 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "grad_norm": 0.9507669139022603, | |
| "kl": 0.450935959815979, | |
| "learning_rate": 2.1024735904148152e-06, | |
| "loss": 0.1556, | |
| "step": 1010, | |
| "step_loss": 0.14951980113983154 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "grad_norm": 0.9775946084017444, | |
| "kl": 0.4422980546951294, | |
| "learning_rate": 2.098480877330481e-06, | |
| "loss": 0.1544, | |
| "step": 1011, | |
| "step_loss": 0.14084143936634064 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "grad_norm": 0.9641237236379501, | |
| "kl": 0.4498288035392761, | |
| "learning_rate": 2.094490402386838e-06, | |
| "loss": 0.1566, | |
| "step": 1012, | |
| "step_loss": 0.13751116394996643 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "grad_norm": 1.0126537288710316, | |
| "kl": 0.39250999689102173, | |
| "learning_rate": 2.0905021792922235e-06, | |
| "loss": 0.1559, | |
| "step": 1013, | |
| "step_loss": 0.14081251621246338 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "grad_norm": 1.0449214471833967, | |
| "kl": 0.5089797377586365, | |
| "learning_rate": 2.0865162217472416e-06, | |
| "loss": 0.1596, | |
| "step": 1014, | |
| "step_loss": 0.16164351999759674 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "grad_norm": 0.9832128900915236, | |
| "kl": 0.39397111535072327, | |
| "learning_rate": 2.08253254344471e-06, | |
| "loss": 0.1489, | |
| "step": 1015, | |
| "step_loss": 0.13195285201072693 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "grad_norm": 1.081208132886941, | |
| "kl": 0.38181591033935547, | |
| "learning_rate": 2.0785511580696206e-06, | |
| "loss": 0.163, | |
| "step": 1016, | |
| "step_loss": 0.14687636494636536 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "grad_norm": 0.9836684370582969, | |
| "kl": 0.5204348564147949, | |
| "learning_rate": 2.0745720792990836e-06, | |
| "loss": 0.1498, | |
| "step": 1017, | |
| "step_loss": 0.15303507447242737 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "grad_norm": 1.0707243384591267, | |
| "kl": 0.37323451042175293, | |
| "learning_rate": 2.070595320802291e-06, | |
| "loss": 0.1536, | |
| "step": 1018, | |
| "step_loss": 0.1562386006116867 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.9179992183742228, | |
| "kl": 0.4068221151828766, | |
| "learning_rate": 2.0666208962404593e-06, | |
| "loss": 0.1383, | |
| "step": 1019, | |
| "step_loss": 0.1486799120903015 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.9834517584761968, | |
| "kl": 0.4073179364204407, | |
| "learning_rate": 2.062648819266789e-06, | |
| "loss": 0.1498, | |
| "step": 1020, | |
| "step_loss": 0.15172114968299866 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_loss": 1.7941198348999023, | |
| "eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_runtime": 14.5917, | |
| "eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_samples_per_second": 6.853, | |
| "eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_steps_per_second": 0.891, | |
| "step": 1020 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 1700, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 100.0, | |
| "total_flos": 87053826223104.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |