SPC-Critic-0 / trainer_state.json
judge's picture
Upload folder using huggingface_hub
8416b65 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9988974641675856,
"eval_steps": 100.0,
"global_step": 1020,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 19.301212901214466,
"kl": 0.0,
"learning_rate": 5.000000000000001e-07,
"loss": 0.6049,
"step": 1,
"step_loss": 0.6052899360656738
},
{
"epoch": 0.01,
"grad_norm": 6.455205328029959,
"kl": 0.3062567710876465,
"learning_rate": 2.438044511330269e-06,
"loss": 0.4588,
"step": 2,
"step_loss": 0.46981990337371826
},
{
"epoch": 0.01,
"grad_norm": 4.434829230563478,
"kl": 0.23490308225154877,
"learning_rate": 3.5717278751869343e-06,
"loss": 0.4361,
"step": 3,
"step_loss": 0.4431145191192627
},
{
"epoch": 0.01,
"grad_norm": 3.1316724496056834,
"kl": 0.21129530668258667,
"learning_rate": 4.376089022660538e-06,
"loss": 0.4118,
"step": 4,
"step_loss": 0.371662974357605
},
{
"epoch": 0.01,
"grad_norm": 2.988460560324433,
"kl": 0.22684630751609802,
"learning_rate": 5e-06,
"loss": 0.3726,
"step": 5,
"step_loss": 0.36066734790802
},
{
"epoch": 0.02,
"grad_norm": 2.378051002744742,
"kl": 0.3077385723590851,
"learning_rate": 4.9999961353271305e-06,
"loss": 0.3356,
"step": 6,
"step_loss": 0.36983931064605713
},
{
"epoch": 0.02,
"grad_norm": 2.1071117126920313,
"kl": 0.28660184144973755,
"learning_rate": 4.9999845413217956e-06,
"loss": 0.3443,
"step": 7,
"step_loss": 0.33001354336738586
},
{
"epoch": 0.02,
"grad_norm": 2.126342237334389,
"kl": 0.2792012393474579,
"learning_rate": 4.999965218023826e-06,
"loss": 0.3704,
"step": 8,
"step_loss": 0.3890674114227295
},
{
"epoch": 0.03,
"grad_norm": 1.805551016834456,
"kl": 0.25425001978874207,
"learning_rate": 4.999938165499602e-06,
"loss": 0.3601,
"step": 9,
"step_loss": 0.3878689408302307
},
{
"epoch": 0.03,
"grad_norm": 2.115675523568805,
"kl": 0.286655992269516,
"learning_rate": 4.999903383842054e-06,
"loss": 0.3476,
"step": 10,
"step_loss": 0.3587522804737091
},
{
"epoch": 0.03,
"grad_norm": 1.944073523799986,
"kl": 0.2786046862602234,
"learning_rate": 4.9998608731706695e-06,
"loss": 0.361,
"step": 11,
"step_loss": 0.37417733669281006
},
{
"epoch": 0.04,
"grad_norm": 1.900652962403358,
"kl": 0.3140088617801666,
"learning_rate": 4.999810633631482e-06,
"loss": 0.333,
"step": 12,
"step_loss": 0.3321115970611572
},
{
"epoch": 0.04,
"grad_norm": 1.7772964219719185,
"kl": 0.27589982748031616,
"learning_rate": 4.999752665397077e-06,
"loss": 0.332,
"step": 13,
"step_loss": 0.2766149938106537
},
{
"epoch": 0.04,
"grad_norm": 1.784571061327797,
"kl": 0.30012714862823486,
"learning_rate": 4.999686968666592e-06,
"loss": 0.3256,
"step": 14,
"step_loss": 0.2897532284259796
},
{
"epoch": 0.04,
"grad_norm": 1.7357127297345116,
"kl": 0.27369552850723267,
"learning_rate": 4.999613543665713e-06,
"loss": 0.3343,
"step": 15,
"step_loss": 0.3049730062484741
},
{
"epoch": 0.05,
"grad_norm": 1.9257598473402229,
"kl": 0.36154788732528687,
"learning_rate": 4.999532390646673e-06,
"loss": 0.3378,
"step": 16,
"step_loss": 0.3547108769416809
},
{
"epoch": 0.05,
"grad_norm": 1.4906210741355268,
"kl": 0.2691032588481903,
"learning_rate": 4.999443509888254e-06,
"loss": 0.3175,
"step": 17,
"step_loss": 0.29635873436927795
},
{
"epoch": 0.05,
"grad_norm": 1.6048699907577806,
"kl": 0.34884747862815857,
"learning_rate": 4.999346901695787e-06,
"loss": 0.3237,
"step": 18,
"step_loss": 0.3572104871273041
},
{
"epoch": 0.06,
"grad_norm": 1.5670463795130773,
"kl": 0.2717525362968445,
"learning_rate": 4.999242566401145e-06,
"loss": 0.3306,
"step": 19,
"step_loss": 0.31295859813690186
},
{
"epoch": 0.06,
"grad_norm": 1.682513968323289,
"kl": 0.2477482557296753,
"learning_rate": 4.999130504362748e-06,
"loss": 0.3383,
"step": 20,
"step_loss": 0.3229523301124573
},
{
"epoch": 0.06,
"grad_norm": 1.6243623252629549,
"kl": 0.348207950592041,
"learning_rate": 4.9990107159655565e-06,
"loss": 0.3172,
"step": 21,
"step_loss": 0.4179743528366089
},
{
"epoch": 0.06,
"grad_norm": 1.6607049641076566,
"kl": 0.3793472945690155,
"learning_rate": 4.998883201621079e-06,
"loss": 0.3397,
"step": 22,
"step_loss": 0.39602553844451904
},
{
"epoch": 0.07,
"grad_norm": 1.5170388362033584,
"kl": 0.3244696259498596,
"learning_rate": 4.998747961767359e-06,
"loss": 0.3197,
"step": 23,
"step_loss": 0.3359769582748413
},
{
"epoch": 0.07,
"grad_norm": 1.6128730811774565,
"kl": 0.28219160437583923,
"learning_rate": 4.998604996868982e-06,
"loss": 0.3442,
"step": 24,
"step_loss": 0.3883013129234314
},
{
"epoch": 0.07,
"grad_norm": 1.4539005711940118,
"kl": 0.23452766239643097,
"learning_rate": 4.998454307417071e-06,
"loss": 0.3093,
"step": 25,
"step_loss": 0.3024548590183258
},
{
"epoch": 0.08,
"grad_norm": 1.5624222068494429,
"kl": 0.3776319622993469,
"learning_rate": 4.998295893929281e-06,
"loss": 0.3279,
"step": 26,
"step_loss": 0.3177269399166107
},
{
"epoch": 0.08,
"grad_norm": 1.5954783642036205,
"kl": 0.27478423714637756,
"learning_rate": 4.998129756949807e-06,
"loss": 0.3057,
"step": 27,
"step_loss": 0.2898721396923065
},
{
"epoch": 0.08,
"grad_norm": 1.541698773367659,
"kl": 0.31704697012901306,
"learning_rate": 4.997955897049373e-06,
"loss": 0.3468,
"step": 28,
"step_loss": 0.3161465525627136
},
{
"epoch": 0.09,
"grad_norm": 1.464173735929187,
"kl": 0.2408154010772705,
"learning_rate": 4.997774314825233e-06,
"loss": 0.3114,
"step": 29,
"step_loss": 0.2493884265422821
},
{
"epoch": 0.09,
"grad_norm": 1.5382515760575184,
"kl": 0.2777164876461029,
"learning_rate": 4.997585010901172e-06,
"loss": 0.3129,
"step": 30,
"step_loss": 0.2662698030471802
},
{
"epoch": 0.09,
"grad_norm": 1.456667059415303,
"kl": 0.2807072401046753,
"learning_rate": 4.9973879859274966e-06,
"loss": 0.3041,
"step": 31,
"step_loss": 0.2914745509624481
},
{
"epoch": 0.09,
"grad_norm": 1.5999842892560905,
"kl": 0.2716163098812103,
"learning_rate": 4.997183240581041e-06,
"loss": 0.328,
"step": 32,
"step_loss": 0.2906668484210968
},
{
"epoch": 0.1,
"grad_norm": 1.4212712274071648,
"kl": 0.29081669449806213,
"learning_rate": 4.996970775565161e-06,
"loss": 0.3008,
"step": 33,
"step_loss": 0.30645477771759033
},
{
"epoch": 0.1,
"grad_norm": 1.5615238192082477,
"kl": 0.2825222611427307,
"learning_rate": 4.996750591609727e-06,
"loss": 0.3209,
"step": 34,
"step_loss": 0.3189748525619507
},
{
"epoch": 0.1,
"grad_norm": 1.3807731720214431,
"kl": 0.3141458034515381,
"learning_rate": 4.9965226894711316e-06,
"loss": 0.3363,
"step": 35,
"step_loss": 0.3879620432853699
},
{
"epoch": 0.11,
"grad_norm": 1.4557787239042146,
"kl": 0.34700241684913635,
"learning_rate": 4.996287069932278e-06,
"loss": 0.3064,
"step": 36,
"step_loss": 0.39095190167427063
},
{
"epoch": 0.11,
"grad_norm": 1.5774051610848923,
"kl": 0.3121364116668701,
"learning_rate": 4.996043733802583e-06,
"loss": 0.3243,
"step": 37,
"step_loss": 0.30228227376937866
},
{
"epoch": 0.11,
"grad_norm": 1.4430150717016268,
"kl": 0.2903325855731964,
"learning_rate": 4.995792681917968e-06,
"loss": 0.2911,
"step": 38,
"step_loss": 0.2797442078590393
},
{
"epoch": 0.11,
"grad_norm": 1.3795537860052318,
"kl": 0.328932523727417,
"learning_rate": 4.995533915140866e-06,
"loss": 0.2866,
"step": 39,
"step_loss": 0.2982497215270996
},
{
"epoch": 0.12,
"grad_norm": 1.3084724825566474,
"kl": 0.26037484407424927,
"learning_rate": 4.995267434360207e-06,
"loss": 0.3049,
"step": 40,
"step_loss": 0.2707076370716095
},
{
"epoch": 0.12,
"grad_norm": 1.546767703101615,
"kl": 0.25849828124046326,
"learning_rate": 4.9949932404914245e-06,
"loss": 0.2885,
"step": 41,
"step_loss": 0.2949169874191284
},
{
"epoch": 0.12,
"grad_norm": 1.4065895730214004,
"kl": 0.3979896306991577,
"learning_rate": 4.9947113344764455e-06,
"loss": 0.3045,
"step": 42,
"step_loss": 0.32115134596824646
},
{
"epoch": 0.13,
"grad_norm": 1.5259965481994058,
"kl": 0.3463974595069885,
"learning_rate": 4.994421717283693e-06,
"loss": 0.304,
"step": 43,
"step_loss": 0.3217414617538452
},
{
"epoch": 0.13,
"grad_norm": 1.2978285994841212,
"kl": 0.36674511432647705,
"learning_rate": 4.994124389908078e-06,
"loss": 0.2864,
"step": 44,
"step_loss": 0.31553012132644653
},
{
"epoch": 0.13,
"grad_norm": 1.5564349432288815,
"kl": 0.35473960638046265,
"learning_rate": 4.993819353370999e-06,
"loss": 0.3335,
"step": 45,
"step_loss": 0.3283192217350006
},
{
"epoch": 0.14,
"grad_norm": 1.3575598996472378,
"kl": 0.2651940584182739,
"learning_rate": 4.993506608720339e-06,
"loss": 0.301,
"step": 46,
"step_loss": 0.2609683871269226
},
{
"epoch": 0.14,
"grad_norm": 1.3229608497099903,
"kl": 0.2759368121623993,
"learning_rate": 4.9931861570304555e-06,
"loss": 0.2925,
"step": 47,
"step_loss": 0.3200822174549103
},
{
"epoch": 0.14,
"grad_norm": 1.368724787057477,
"kl": 0.3092648684978485,
"learning_rate": 4.992857999402187e-06,
"loss": 0.2718,
"step": 48,
"step_loss": 0.2981921136379242
},
{
"epoch": 0.14,
"grad_norm": 1.3728861165253508,
"kl": 0.3472633361816406,
"learning_rate": 4.992522136962841e-06,
"loss": 0.3126,
"step": 49,
"step_loss": 0.31947529315948486
},
{
"epoch": 0.15,
"grad_norm": 1.4242257817482427,
"kl": 0.28193366527557373,
"learning_rate": 4.992178570866195e-06,
"loss": 0.2964,
"step": 50,
"step_loss": 0.2877271771430969
},
{
"epoch": 0.15,
"grad_norm": 1.521655434234139,
"kl": 0.2764663100242615,
"learning_rate": 4.9918273022924885e-06,
"loss": 0.3052,
"step": 51,
"step_loss": 0.2541694641113281
},
{
"epoch": 0.15,
"grad_norm": 1.5242796013210014,
"kl": 0.2966528832912445,
"learning_rate": 4.991468332448422e-06,
"loss": 0.3304,
"step": 52,
"step_loss": 0.30177876353263855
},
{
"epoch": 0.16,
"grad_norm": 1.4177402595981858,
"kl": 0.2607875168323517,
"learning_rate": 4.991101662567153e-06,
"loss": 0.3214,
"step": 53,
"step_loss": 0.27815118432044983
},
{
"epoch": 0.16,
"grad_norm": 1.4497114613800353,
"kl": 0.36675119400024414,
"learning_rate": 4.990727293908288e-06,
"loss": 0.3141,
"step": 54,
"step_loss": 0.34744372963905334
},
{
"epoch": 0.16,
"grad_norm": 1.476617226772246,
"kl": 0.2716200351715088,
"learning_rate": 4.990345227757884e-06,
"loss": 0.298,
"step": 55,
"step_loss": 0.25762778520584106
},
{
"epoch": 0.16,
"grad_norm": 1.3365233291298146,
"kl": 0.29236650466918945,
"learning_rate": 4.989955465428438e-06,
"loss": 0.2763,
"step": 56,
"step_loss": 0.27397677302360535
},
{
"epoch": 0.17,
"grad_norm": 1.4599412693497766,
"kl": 0.27795305848121643,
"learning_rate": 4.989558008258888e-06,
"loss": 0.3043,
"step": 57,
"step_loss": 0.24359291791915894
},
{
"epoch": 0.17,
"grad_norm": 1.5617623113651034,
"kl": 0.35907837748527527,
"learning_rate": 4.9891528576146046e-06,
"loss": 0.325,
"step": 58,
"step_loss": 0.32902687788009644
},
{
"epoch": 0.17,
"grad_norm": 1.4059355225202563,
"kl": 0.307645320892334,
"learning_rate": 4.988740014887386e-06,
"loss": 0.3028,
"step": 59,
"step_loss": 0.3135125935077667
},
{
"epoch": 0.18,
"grad_norm": 1.4501862291250671,
"kl": 0.32680854201316833,
"learning_rate": 4.9883194814954575e-06,
"loss": 0.3073,
"step": 60,
"step_loss": 0.34192976355552673
},
{
"epoch": 0.18,
"grad_norm": 1.3852935344310822,
"kl": 0.33248478174209595,
"learning_rate": 4.987891258883463e-06,
"loss": 0.3086,
"step": 61,
"step_loss": 0.3534170389175415
},
{
"epoch": 0.18,
"grad_norm": 1.4199406597673683,
"kl": 0.28986871242523193,
"learning_rate": 4.98745534852246e-06,
"loss": 0.3038,
"step": 62,
"step_loss": 0.3005980849266052
},
{
"epoch": 0.19,
"grad_norm": 1.5113547919536734,
"kl": 0.30484679341316223,
"learning_rate": 4.987011751909917e-06,
"loss": 0.3044,
"step": 63,
"step_loss": 0.23517432808876038
},
{
"epoch": 0.19,
"grad_norm": 1.3355819496710448,
"kl": 0.3742017447948456,
"learning_rate": 4.986560470569704e-06,
"loss": 0.3017,
"step": 64,
"step_loss": 0.3898337483406067
},
{
"epoch": 0.19,
"grad_norm": 1.4221577520408863,
"kl": 0.3570794463157654,
"learning_rate": 4.986101506052093e-06,
"loss": 0.3022,
"step": 65,
"step_loss": 0.30060654878616333
},
{
"epoch": 0.19,
"grad_norm": 1.6366629670681416,
"kl": 0.32745978236198425,
"learning_rate": 4.9856348599337485e-06,
"loss": 0.3361,
"step": 66,
"step_loss": 0.31522005796432495
},
{
"epoch": 0.2,
"grad_norm": 1.3857470049246778,
"kl": 0.284152626991272,
"learning_rate": 4.985160533817723e-06,
"loss": 0.2951,
"step": 67,
"step_loss": 0.27435213327407837
},
{
"epoch": 0.2,
"grad_norm": 1.3848855487240277,
"kl": 0.3354739546775818,
"learning_rate": 4.984678529333453e-06,
"loss": 0.2879,
"step": 68,
"step_loss": 0.3034001886844635
},
{
"epoch": 0.2,
"grad_norm": 1.4341508578031887,
"kl": 0.25649285316467285,
"learning_rate": 4.984188848136751e-06,
"loss": 0.3164,
"step": 69,
"step_loss": 0.30513542890548706
},
{
"epoch": 0.21,
"grad_norm": 1.3467125947401988,
"kl": 0.2996070384979248,
"learning_rate": 4.983691491909802e-06,
"loss": 0.3019,
"step": 70,
"step_loss": 0.36908990144729614
},
{
"epoch": 0.21,
"grad_norm": 1.3474866013931235,
"kl": 0.3084847927093506,
"learning_rate": 4.9831864623611564e-06,
"loss": 0.3008,
"step": 71,
"step_loss": 0.26990407705307007
},
{
"epoch": 0.21,
"grad_norm": 1.4862590377731744,
"kl": 0.31708824634552,
"learning_rate": 4.982673761225724e-06,
"loss": 0.314,
"step": 72,
"step_loss": 0.2802667021751404
},
{
"epoch": 0.21,
"grad_norm": 1.3719248630090615,
"kl": 0.2884005010128021,
"learning_rate": 4.982153390264769e-06,
"loss": 0.2902,
"step": 73,
"step_loss": 0.3215486705303192
},
{
"epoch": 0.22,
"grad_norm": 1.6257809266397576,
"kl": 0.4290885329246521,
"learning_rate": 4.981625351265903e-06,
"loss": 0.3466,
"step": 74,
"step_loss": 0.38507044315338135
},
{
"epoch": 0.22,
"grad_norm": 1.4642429493015807,
"kl": 0.3809298872947693,
"learning_rate": 4.9810896460430805e-06,
"loss": 0.3213,
"step": 75,
"step_loss": 0.43292292952537537
},
{
"epoch": 0.22,
"grad_norm": 1.2483432519354851,
"kl": 0.3046913146972656,
"learning_rate": 4.980546276436591e-06,
"loss": 0.2913,
"step": 76,
"step_loss": 0.29639115929603577
},
{
"epoch": 0.23,
"grad_norm": 1.380629384097335,
"kl": 0.3005834221839905,
"learning_rate": 4.979995244313052e-06,
"loss": 0.3037,
"step": 77,
"step_loss": 0.2509528696537018
},
{
"epoch": 0.23,
"grad_norm": 1.4850279125005055,
"kl": 0.3294805884361267,
"learning_rate": 4.979436551565407e-06,
"loss": 0.3246,
"step": 78,
"step_loss": 0.2669539451599121
},
{
"epoch": 0.23,
"grad_norm": 1.4283473816232348,
"kl": 0.29898595809936523,
"learning_rate": 4.9788702001129105e-06,
"loss": 0.3092,
"step": 79,
"step_loss": 0.35289207100868225
},
{
"epoch": 0.24,
"grad_norm": 1.4847956412758032,
"kl": 0.244553804397583,
"learning_rate": 4.97829619190113e-06,
"loss": 0.3042,
"step": 80,
"step_loss": 0.23004528880119324
},
{
"epoch": 0.24,
"grad_norm": 1.2924538131518895,
"kl": 0.2728404104709625,
"learning_rate": 4.977714528901938e-06,
"loss": 0.2793,
"step": 81,
"step_loss": 0.2652290165424347
},
{
"epoch": 0.24,
"grad_norm": 1.2587616697753488,
"kl": 0.2822519540786743,
"learning_rate": 4.9771252131135e-06,
"loss": 0.2952,
"step": 82,
"step_loss": 0.2914755642414093
},
{
"epoch": 0.24,
"grad_norm": 1.3835708779976017,
"kl": 0.3696300983428955,
"learning_rate": 4.976528246560269e-06,
"loss": 0.3029,
"step": 83,
"step_loss": 0.36016547679901123
},
{
"epoch": 0.25,
"grad_norm": 1.2530361733858713,
"kl": 0.31230488419532776,
"learning_rate": 4.975923631292988e-06,
"loss": 0.2898,
"step": 84,
"step_loss": 0.33970096707344055
},
{
"epoch": 0.25,
"grad_norm": 1.4116112976895483,
"kl": 0.312380850315094,
"learning_rate": 4.975311369388667e-06,
"loss": 0.2915,
"step": 85,
"step_loss": 0.3011205196380615
},
{
"epoch": 0.25,
"grad_norm": 1.3375751652497407,
"kl": 0.3674446940422058,
"learning_rate": 4.974691462950589e-06,
"loss": 0.3105,
"step": 86,
"step_loss": 0.3819746971130371
},
{
"epoch": 0.26,
"grad_norm": 1.3695626037107396,
"kl": 0.3539569675922394,
"learning_rate": 4.974063914108297e-06,
"loss": 0.3069,
"step": 87,
"step_loss": 0.36265525221824646
},
{
"epoch": 0.26,
"grad_norm": 1.4398198275304508,
"kl": 0.30112671852111816,
"learning_rate": 4.9734287250175865e-06,
"loss": 0.3125,
"step": 88,
"step_loss": 0.2848939299583435
},
{
"epoch": 0.26,
"grad_norm": 1.3830922269493662,
"kl": 0.33735549449920654,
"learning_rate": 4.9727858978605e-06,
"loss": 0.3147,
"step": 89,
"step_loss": 0.3306404650211334
},
{
"epoch": 0.26,
"grad_norm": 1.2728858115467476,
"kl": 0.28279662132263184,
"learning_rate": 4.97213543484532e-06,
"loss": 0.3045,
"step": 90,
"step_loss": 0.27660509943962097
},
{
"epoch": 0.27,
"grad_norm": 1.5403404739139803,
"kl": 0.27295035123825073,
"learning_rate": 4.97147733820656e-06,
"loss": 0.2961,
"step": 91,
"step_loss": 0.26971620321273804
},
{
"epoch": 0.27,
"grad_norm": 1.411561429621046,
"kl": 0.37973371148109436,
"learning_rate": 4.970811610204954e-06,
"loss": 0.299,
"step": 92,
"step_loss": 0.326732337474823
},
{
"epoch": 0.27,
"grad_norm": 1.4805034744095673,
"kl": 0.3346588611602783,
"learning_rate": 4.970138253127456e-06,
"loss": 0.3116,
"step": 93,
"step_loss": 0.32256820797920227
},
{
"epoch": 0.28,
"grad_norm": 1.2537072876512962,
"kl": 0.31613579392433167,
"learning_rate": 4.969457269287224e-06,
"loss": 0.2909,
"step": 94,
"step_loss": 0.3002708852291107
},
{
"epoch": 0.28,
"grad_norm": 1.3783445970477886,
"kl": 0.2733086943626404,
"learning_rate": 4.968768661023619e-06,
"loss": 0.3092,
"step": 95,
"step_loss": 0.3080819547176361
},
{
"epoch": 0.28,
"grad_norm": 1.2682216286353625,
"kl": 0.2610551714897156,
"learning_rate": 4.968072430702193e-06,
"loss": 0.2839,
"step": 96,
"step_loss": 0.25847068428993225
},
{
"epoch": 0.29,
"grad_norm": 1.2077676939950335,
"kl": 0.298378050327301,
"learning_rate": 4.967368580714681e-06,
"loss": 0.2803,
"step": 97,
"step_loss": 0.2736283242702484
},
{
"epoch": 0.29,
"grad_norm": 1.2148373717113006,
"kl": 0.27147936820983887,
"learning_rate": 4.966657113478992e-06,
"loss": 0.2765,
"step": 98,
"step_loss": 0.30714210867881775
},
{
"epoch": 0.29,
"grad_norm": 1.4139521118408638,
"kl": 0.32129478454589844,
"learning_rate": 4.9659380314392075e-06,
"loss": 0.3138,
"step": 99,
"step_loss": 0.334412157535553
},
{
"epoch": 0.29,
"grad_norm": 1.3294266549115017,
"kl": 0.35117053985595703,
"learning_rate": 4.965211337065563e-06,
"loss": 0.3057,
"step": 100,
"step_loss": 0.30289411544799805
},
{
"epoch": 0.3,
"grad_norm": 1.424742291386975,
"kl": 0.30909568071365356,
"learning_rate": 4.964477032854448e-06,
"loss": 0.3141,
"step": 101,
"step_loss": 0.3024054765701294
},
{
"epoch": 0.3,
"grad_norm": 1.4494693467464015,
"kl": 0.3519325256347656,
"learning_rate": 4.963735121328389e-06,
"loss": 0.3074,
"step": 102,
"step_loss": 0.29212692379951477
},
{
"epoch": 0.3,
"grad_norm": 1.4137157099247653,
"kl": 0.32887011766433716,
"learning_rate": 4.9629856050360505e-06,
"loss": 0.3085,
"step": 103,
"step_loss": 0.2845655679702759
},
{
"epoch": 0.31,
"grad_norm": 1.3535308616848474,
"kl": 0.3814646005630493,
"learning_rate": 4.962228486552219e-06,
"loss": 0.3086,
"step": 104,
"step_loss": 0.4175484776496887
},
{
"epoch": 0.31,
"grad_norm": 1.264600598229456,
"kl": 0.32557374238967896,
"learning_rate": 4.961463768477797e-06,
"loss": 0.3065,
"step": 105,
"step_loss": 0.3005172908306122
},
{
"epoch": 0.31,
"grad_norm": 1.3367387253981593,
"kl": 0.33622100949287415,
"learning_rate": 4.960691453439793e-06,
"loss": 0.3221,
"step": 106,
"step_loss": 0.32829591631889343
},
{
"epoch": 0.31,
"grad_norm": 1.285114925381023,
"kl": 0.415163516998291,
"learning_rate": 4.9599115440913145e-06,
"loss": 0.3033,
"step": 107,
"step_loss": 0.3966817557811737
},
{
"epoch": 0.32,
"grad_norm": 1.2697189074985138,
"kl": 0.31419163942337036,
"learning_rate": 4.9591240431115565e-06,
"loss": 0.2899,
"step": 108,
"step_loss": 0.28133562207221985
},
{
"epoch": 0.32,
"grad_norm": 1.2984645133760384,
"kl": 0.2992507219314575,
"learning_rate": 4.9583289532057925e-06,
"loss": 0.2957,
"step": 109,
"step_loss": 0.3047301173210144
},
{
"epoch": 0.32,
"grad_norm": 1.2959997601773783,
"kl": 0.3358232080936432,
"learning_rate": 4.9575262771053666e-06,
"loss": 0.2977,
"step": 110,
"step_loss": 0.3054252564907074
},
{
"epoch": 0.33,
"grad_norm": 1.3678091978992384,
"kl": 0.337202250957489,
"learning_rate": 4.956716017567685e-06,
"loss": 0.3189,
"step": 111,
"step_loss": 0.3637933135032654
},
{
"epoch": 0.33,
"grad_norm": 1.3266191585020017,
"kl": 0.2727779150009155,
"learning_rate": 4.955898177376204e-06,
"loss": 0.2895,
"step": 112,
"step_loss": 0.27312493324279785
},
{
"epoch": 0.33,
"grad_norm": 1.3644833661607003,
"kl": 0.27666690945625305,
"learning_rate": 4.95507275934042e-06,
"loss": 0.314,
"step": 113,
"step_loss": 0.251804381608963
},
{
"epoch": 0.34,
"grad_norm": 1.2828415661770218,
"kl": 0.28860220313072205,
"learning_rate": 4.954239766295862e-06,
"loss": 0.2829,
"step": 114,
"step_loss": 0.2733534574508667
},
{
"epoch": 0.34,
"grad_norm": 1.225743500205446,
"kl": 0.2747955322265625,
"learning_rate": 4.953399201104084e-06,
"loss": 0.2794,
"step": 115,
"step_loss": 0.23778927326202393
},
{
"epoch": 0.34,
"grad_norm": 1.229862820827967,
"kl": 0.35592517256736755,
"learning_rate": 4.952551066652648e-06,
"loss": 0.2758,
"step": 116,
"step_loss": 0.3347897529602051
},
{
"epoch": 0.34,
"grad_norm": 1.4102429181039295,
"kl": 0.3126868009567261,
"learning_rate": 4.951695365855122e-06,
"loss": 0.2897,
"step": 117,
"step_loss": 0.3053089380264282
},
{
"epoch": 0.35,
"grad_norm": 1.332632214515851,
"kl": 0.341084748506546,
"learning_rate": 4.950832101651063e-06,
"loss": 0.2992,
"step": 118,
"step_loss": 0.3318370282649994
},
{
"epoch": 0.35,
"grad_norm": 1.2233149853258531,
"kl": 0.29855814576148987,
"learning_rate": 4.949961277006013e-06,
"loss": 0.289,
"step": 119,
"step_loss": 0.27190065383911133
},
{
"epoch": 0.35,
"grad_norm": 1.2346324893530762,
"kl": 0.28264421224594116,
"learning_rate": 4.949082894911485e-06,
"loss": 0.2996,
"step": 120,
"step_loss": 0.28687310218811035
},
{
"epoch": 0.36,
"grad_norm": 1.2793881099869688,
"kl": 0.27299419045448303,
"learning_rate": 4.948196958384955e-06,
"loss": 0.3025,
"step": 121,
"step_loss": 0.23233090341091156
},
{
"epoch": 0.36,
"grad_norm": 1.2998410971433687,
"kl": 0.29575178027153015,
"learning_rate": 4.9473034704698485e-06,
"loss": 0.2962,
"step": 122,
"step_loss": 0.2823527753353119
},
{
"epoch": 0.36,
"grad_norm": 1.2555805745200317,
"kl": 0.28942999243736267,
"learning_rate": 4.9464024342355335e-06,
"loss": 0.2914,
"step": 123,
"step_loss": 0.2781384587287903
},
{
"epoch": 0.36,
"grad_norm": 1.2708803374034965,
"kl": 0.3180427551269531,
"learning_rate": 4.945493852777307e-06,
"loss": 0.2944,
"step": 124,
"step_loss": 0.31637904047966003
},
{
"epoch": 0.37,
"grad_norm": 1.3848259809116499,
"kl": 0.33197423815727234,
"learning_rate": 4.944577729216388e-06,
"loss": 0.3152,
"step": 125,
"step_loss": 0.3225075900554657
},
{
"epoch": 0.37,
"grad_norm": 1.4354123007048643,
"kl": 0.2686159908771515,
"learning_rate": 4.943654066699904e-06,
"loss": 0.3118,
"step": 126,
"step_loss": 0.29845237731933594
},
{
"epoch": 0.37,
"grad_norm": 1.2899412223451328,
"kl": 0.3330647349357605,
"learning_rate": 4.942722868400879e-06,
"loss": 0.3322,
"step": 127,
"step_loss": 0.3406273424625397
},
{
"epoch": 0.38,
"grad_norm": 1.2878537840081934,
"kl": 0.31060031056404114,
"learning_rate": 4.941784137518227e-06,
"loss": 0.2967,
"step": 128,
"step_loss": 0.28862464427948
},
{
"epoch": 0.38,
"grad_norm": 1.31852289386673,
"kl": 0.3059355914592743,
"learning_rate": 4.940837877276735e-06,
"loss": 0.2919,
"step": 129,
"step_loss": 0.30133622884750366
},
{
"epoch": 0.38,
"grad_norm": 1.243803418751941,
"kl": 0.32812631130218506,
"learning_rate": 4.93988409092706e-06,
"loss": 0.2982,
"step": 130,
"step_loss": 0.2712858319282532
},
{
"epoch": 0.39,
"grad_norm": 1.206601958490433,
"kl": 0.2982441484928131,
"learning_rate": 4.93892278174571e-06,
"loss": 0.2717,
"step": 131,
"step_loss": 0.2625717520713806
},
{
"epoch": 0.39,
"grad_norm": 1.2840226109970796,
"kl": 0.3162402808666229,
"learning_rate": 4.937953953035035e-06,
"loss": 0.2973,
"step": 132,
"step_loss": 0.3028516471385956
},
{
"epoch": 0.39,
"grad_norm": 1.219921942425963,
"kl": 0.3007054030895233,
"learning_rate": 4.93697760812322e-06,
"loss": 0.2999,
"step": 133,
"step_loss": 0.29070332646369934
},
{
"epoch": 0.39,
"grad_norm": 1.3505734810630552,
"kl": 0.3097812533378601,
"learning_rate": 4.935993750364267e-06,
"loss": 0.3213,
"step": 134,
"step_loss": 0.32063156366348267
},
{
"epoch": 0.4,
"grad_norm": 1.2960801503834385,
"kl": 0.3124713599681854,
"learning_rate": 4.9350023831379885e-06,
"loss": 0.292,
"step": 135,
"step_loss": 0.2716798782348633
},
{
"epoch": 0.4,
"grad_norm": 1.3346284118716634,
"kl": 0.299164354801178,
"learning_rate": 4.934003509849993e-06,
"loss": 0.292,
"step": 136,
"step_loss": 0.29498571157455444
},
{
"epoch": 0.4,
"grad_norm": 1.338862702232099,
"kl": 0.3523540198802948,
"learning_rate": 4.932997133931676e-06,
"loss": 0.2999,
"step": 137,
"step_loss": 0.2898944914340973
},
{
"epoch": 0.41,
"grad_norm": 1.299533142110482,
"kl": 0.3428666591644287,
"learning_rate": 4.931983258840204e-06,
"loss": 0.2808,
"step": 138,
"step_loss": 0.3303877115249634
},
{
"epoch": 0.41,
"grad_norm": 1.2814240918861108,
"kl": 0.26252108812332153,
"learning_rate": 4.930961888058506e-06,
"loss": 0.2909,
"step": 139,
"step_loss": 0.23940859735012054
},
{
"epoch": 0.41,
"grad_norm": 1.2407806935373367,
"kl": 0.2580229341983795,
"learning_rate": 4.929933025095262e-06,
"loss": 0.2906,
"step": 140,
"step_loss": 0.2781831920146942
},
{
"epoch": 0.41,
"grad_norm": 1.315404278595456,
"kl": 0.3604077398777008,
"learning_rate": 4.928896673484888e-06,
"loss": 0.3072,
"step": 141,
"step_loss": 0.3967340588569641
},
{
"epoch": 0.42,
"grad_norm": 1.286676458510305,
"kl": 0.32844799757003784,
"learning_rate": 4.9278528367875275e-06,
"loss": 0.2836,
"step": 142,
"step_loss": 0.2578602731227875
},
{
"epoch": 0.42,
"grad_norm": 1.2526938379221886,
"kl": 0.32336488366127014,
"learning_rate": 4.926801518589035e-06,
"loss": 0.2991,
"step": 143,
"step_loss": 0.2511914372444153
},
{
"epoch": 0.42,
"grad_norm": 1.196299407291449,
"kl": 0.239657461643219,
"learning_rate": 4.9257427225009665e-06,
"loss": 0.2827,
"step": 144,
"step_loss": 0.23767231404781342
},
{
"epoch": 0.43,
"grad_norm": 1.253688615112022,
"kl": 0.36846810579299927,
"learning_rate": 4.924676452160568e-06,
"loss": 0.2971,
"step": 145,
"step_loss": 0.32303857803344727
},
{
"epoch": 0.43,
"grad_norm": 1.2337718709400438,
"kl": 0.292164146900177,
"learning_rate": 4.92360271123076e-06,
"loss": 0.2999,
"step": 146,
"step_loss": 0.3450307250022888
},
{
"epoch": 0.43,
"grad_norm": 1.4234900890899038,
"kl": 0.3000570237636566,
"learning_rate": 4.922521503400125e-06,
"loss": 0.297,
"step": 147,
"step_loss": 0.2996768355369568
},
{
"epoch": 0.44,
"grad_norm": 1.2489090450298066,
"kl": 0.2863319516181946,
"learning_rate": 4.921432832382901e-06,
"loss": 0.2896,
"step": 148,
"step_loss": 0.28483325242996216
},
{
"epoch": 0.44,
"grad_norm": 1.230750460392728,
"kl": 0.30758440494537354,
"learning_rate": 4.92033670191896e-06,
"loss": 0.2884,
"step": 149,
"step_loss": 0.2748796045780182
},
{
"epoch": 0.44,
"grad_norm": 1.3306176467968267,
"kl": 0.26451659202575684,
"learning_rate": 4.9192331157738e-06,
"loss": 0.29,
"step": 150,
"step_loss": 0.2788347601890564
},
{
"epoch": 0.44,
"grad_norm": 1.332331055940497,
"kl": 0.2376236617565155,
"learning_rate": 4.918122077738533e-06,
"loss": 0.2961,
"step": 151,
"step_loss": 0.24186082184314728
},
{
"epoch": 0.45,
"grad_norm": 1.332600066594834,
"kl": 0.3817494511604309,
"learning_rate": 4.917003591629867e-06,
"loss": 0.307,
"step": 152,
"step_loss": 0.33062270283699036
},
{
"epoch": 0.45,
"grad_norm": 1.2536110337354542,
"kl": 0.2526598870754242,
"learning_rate": 4.915877661290099e-06,
"loss": 0.291,
"step": 153,
"step_loss": 0.24536053836345673
},
{
"epoch": 0.45,
"grad_norm": 1.2674122835595976,
"kl": 0.37763291597366333,
"learning_rate": 4.914744290587096e-06,
"loss": 0.2976,
"step": 154,
"step_loss": 0.3559175431728363
},
{
"epoch": 0.46,
"grad_norm": 1.144770437772112,
"kl": 0.28675389289855957,
"learning_rate": 4.913603483414291e-06,
"loss": 0.2843,
"step": 155,
"step_loss": 0.26313164830207825
},
{
"epoch": 0.46,
"grad_norm": 1.2867088389290005,
"kl": 0.30408233404159546,
"learning_rate": 4.912455243690654e-06,
"loss": 0.2895,
"step": 156,
"step_loss": 0.305271178483963
},
{
"epoch": 0.46,
"grad_norm": 1.3337139732461034,
"kl": 0.33538201451301575,
"learning_rate": 4.911299575360694e-06,
"loss": 0.2829,
"step": 157,
"step_loss": 0.2805282771587372
},
{
"epoch": 0.46,
"grad_norm": 1.2603558835697868,
"kl": 0.2967783808708191,
"learning_rate": 4.910136482394439e-06,
"loss": 0.2987,
"step": 158,
"step_loss": 0.266804963350296
},
{
"epoch": 0.47,
"grad_norm": 1.1861792045426607,
"kl": 0.32278546690940857,
"learning_rate": 4.90896596878742e-06,
"loss": 0.2737,
"step": 159,
"step_loss": 0.2891842722892761
},
{
"epoch": 0.47,
"grad_norm": 1.168008822771122,
"kl": 0.3604351878166199,
"learning_rate": 4.907788038560661e-06,
"loss": 0.2703,
"step": 160,
"step_loss": 0.3211236596107483
},
{
"epoch": 0.47,
"grad_norm": 1.2273672100425606,
"kl": 0.3063991069793701,
"learning_rate": 4.906602695760665e-06,
"loss": 0.2919,
"step": 161,
"step_loss": 0.25892752408981323
},
{
"epoch": 0.48,
"grad_norm": 1.4264755544249268,
"kl": 0.2818457782268524,
"learning_rate": 4.905409944459397e-06,
"loss": 0.2934,
"step": 162,
"step_loss": 0.2786937654018402
},
{
"epoch": 0.48,
"grad_norm": 1.314284320892201,
"kl": 0.32207655906677246,
"learning_rate": 4.904209788754275e-06,
"loss": 0.2931,
"step": 163,
"step_loss": 0.23789873719215393
},
{
"epoch": 0.48,
"grad_norm": 1.1804456232659608,
"kl": 0.33073878288269043,
"learning_rate": 4.903002232768151e-06,
"loss": 0.2776,
"step": 164,
"step_loss": 0.2962447702884674
},
{
"epoch": 0.49,
"grad_norm": 1.2497392620900494,
"kl": 0.2782425284385681,
"learning_rate": 4.9017872806493e-06,
"loss": 0.2918,
"step": 165,
"step_loss": 0.2765083611011505
},
{
"epoch": 0.49,
"grad_norm": 1.2704086792818736,
"kl": 0.3183959722518921,
"learning_rate": 4.900564936571404e-06,
"loss": 0.2873,
"step": 166,
"step_loss": 0.2902598977088928
},
{
"epoch": 0.49,
"grad_norm": 1.2954983816000556,
"kl": 0.29798054695129395,
"learning_rate": 4.899335204733538e-06,
"loss": 0.3052,
"step": 167,
"step_loss": 0.2802087068557739
},
{
"epoch": 0.49,
"grad_norm": 1.2070703839621497,
"kl": 0.35096466541290283,
"learning_rate": 4.8980980893601575e-06,
"loss": 0.2918,
"step": 168,
"step_loss": 0.3088727593421936
},
{
"epoch": 0.5,
"grad_norm": 1.3158993296815968,
"kl": 0.27438434958457947,
"learning_rate": 4.8968535947010795e-06,
"loss": 0.2913,
"step": 169,
"step_loss": 0.2575715184211731
},
{
"epoch": 0.5,
"grad_norm": 1.2643025871656506,
"kl": 0.27822235226631165,
"learning_rate": 4.895601725031475e-06,
"loss": 0.2934,
"step": 170,
"step_loss": 0.2611542344093323
},
{
"epoch": 0.5,
"grad_norm": 1.152592001860982,
"kl": 0.3118104934692383,
"learning_rate": 4.894342484651846e-06,
"loss": 0.2776,
"step": 171,
"step_loss": 0.29376712441444397
},
{
"epoch": 0.51,
"grad_norm": 1.2558097414261615,
"kl": 0.31661081314086914,
"learning_rate": 4.893075877888018e-06,
"loss": 0.3067,
"step": 172,
"step_loss": 0.302161306142807
},
{
"epoch": 0.51,
"grad_norm": 1.4838421517721772,
"kl": 0.33207058906555176,
"learning_rate": 4.891801909091119e-06,
"loss": 0.3207,
"step": 173,
"step_loss": 0.3354288935661316
},
{
"epoch": 0.51,
"grad_norm": 1.270609611872136,
"kl": 0.3163268566131592,
"learning_rate": 4.8905205826375705e-06,
"loss": 0.3031,
"step": 174,
"step_loss": 0.285269558429718
},
{
"epoch": 0.51,
"grad_norm": 1.278119700217976,
"kl": 0.29490146040916443,
"learning_rate": 4.8892319029290685e-06,
"loss": 0.2906,
"step": 175,
"step_loss": 0.3601941168308258
},
{
"epoch": 0.52,
"grad_norm": 1.3328158565268786,
"kl": 0.287243127822876,
"learning_rate": 4.887935874392567e-06,
"loss": 0.3141,
"step": 176,
"step_loss": 0.27393481135368347
},
{
"epoch": 0.52,
"grad_norm": 1.146954506657001,
"kl": 0.3274082541465759,
"learning_rate": 4.886632501480269e-06,
"loss": 0.2816,
"step": 177,
"step_loss": 0.3594622015953064
},
{
"epoch": 0.52,
"grad_norm": 1.2568621036203667,
"kl": 0.32911595702171326,
"learning_rate": 4.885321788669604e-06,
"loss": 0.3038,
"step": 178,
"step_loss": 0.2939574420452118
},
{
"epoch": 0.53,
"grad_norm": 1.215908552897293,
"kl": 0.2502468228340149,
"learning_rate": 4.884003740463219e-06,
"loss": 0.2902,
"step": 179,
"step_loss": 0.24465596675872803
},
{
"epoch": 0.53,
"grad_norm": 1.1611108031885955,
"kl": 0.30213692784309387,
"learning_rate": 4.882678361388958e-06,
"loss": 0.274,
"step": 180,
"step_loss": 0.2538335919380188
},
{
"epoch": 0.53,
"grad_norm": 1.301100728893655,
"kl": 0.3141769766807556,
"learning_rate": 4.88134565599985e-06,
"loss": 0.3048,
"step": 181,
"step_loss": 0.25083863735198975
},
{
"epoch": 0.54,
"grad_norm": 1.1533263462306118,
"kl": 0.31978100538253784,
"learning_rate": 4.880005628874088e-06,
"loss": 0.2979,
"step": 182,
"step_loss": 0.2340894490480423
},
{
"epoch": 0.54,
"grad_norm": 1.2517414717516113,
"kl": 0.2578886151313782,
"learning_rate": 4.878658284615023e-06,
"loss": 0.2888,
"step": 183,
"step_loss": 0.25022444128990173
},
{
"epoch": 0.54,
"grad_norm": 1.2245537412134813,
"kl": 0.3525405824184418,
"learning_rate": 4.877303627851138e-06,
"loss": 0.2856,
"step": 184,
"step_loss": 0.30141592025756836
},
{
"epoch": 0.54,
"grad_norm": 1.191367958579159,
"kl": 0.2949683368206024,
"learning_rate": 4.875941663236039e-06,
"loss": 0.2811,
"step": 185,
"step_loss": 0.27863818407058716
},
{
"epoch": 0.55,
"grad_norm": 1.2248110810400246,
"kl": 0.31630921363830566,
"learning_rate": 4.874572395448432e-06,
"loss": 0.288,
"step": 186,
"step_loss": 0.26466599106788635
},
{
"epoch": 0.55,
"grad_norm": 1.1677774508685308,
"kl": 0.3383273482322693,
"learning_rate": 4.8731958291921174e-06,
"loss": 0.2646,
"step": 187,
"step_loss": 0.23358532786369324
},
{
"epoch": 0.55,
"grad_norm": 1.2534617173778235,
"kl": 0.3180442452430725,
"learning_rate": 4.871811969195963e-06,
"loss": 0.2795,
"step": 188,
"step_loss": 0.2650742828845978
},
{
"epoch": 0.56,
"grad_norm": 1.3623516662591668,
"kl": 0.36272916197776794,
"learning_rate": 4.870420820213896e-06,
"loss": 0.3179,
"step": 189,
"step_loss": 0.34722331166267395
},
{
"epoch": 0.56,
"grad_norm": 1.139199587731826,
"kl": 0.31969937682151794,
"learning_rate": 4.869022387024879e-06,
"loss": 0.2761,
"step": 190,
"step_loss": 0.26098594069480896
},
{
"epoch": 0.56,
"grad_norm": 1.2194709601641671,
"kl": 0.34485194087028503,
"learning_rate": 4.867616674432903e-06,
"loss": 0.3146,
"step": 191,
"step_loss": 0.3215685486793518
},
{
"epoch": 0.56,
"grad_norm": 1.2744845374487466,
"kl": 0.3000357747077942,
"learning_rate": 4.8662036872669615e-06,
"loss": 0.2898,
"step": 192,
"step_loss": 0.3152067959308624
},
{
"epoch": 0.57,
"grad_norm": 1.2219115844478998,
"kl": 0.26532527804374695,
"learning_rate": 4.864783430381039e-06,
"loss": 0.2993,
"step": 193,
"step_loss": 0.2950400710105896
},
{
"epoch": 0.57,
"grad_norm": 1.3029827184325222,
"kl": 0.35824403166770935,
"learning_rate": 4.863355908654095e-06,
"loss": 0.2951,
"step": 194,
"step_loss": 0.3127448856830597
},
{
"epoch": 0.57,
"grad_norm": 1.3611804602947277,
"kl": 0.3048401474952698,
"learning_rate": 4.861921126990045e-06,
"loss": 0.3019,
"step": 195,
"step_loss": 0.3276352882385254
},
{
"epoch": 0.58,
"grad_norm": 1.2537615861956364,
"kl": 0.32806965708732605,
"learning_rate": 4.860479090317742e-06,
"loss": 0.3018,
"step": 196,
"step_loss": 0.2849041223526001
},
{
"epoch": 0.58,
"grad_norm": 1.305638823054846,
"kl": 0.3609757423400879,
"learning_rate": 4.859029803590966e-06,
"loss": 0.3146,
"step": 197,
"step_loss": 0.304736852645874
},
{
"epoch": 0.58,
"grad_norm": 1.180396492221588,
"kl": 0.31104931235313416,
"learning_rate": 4.8575732717884e-06,
"loss": 0.307,
"step": 198,
"step_loss": 0.32223203778266907
},
{
"epoch": 0.59,
"grad_norm": 1.3152524549772768,
"kl": 0.30966895818710327,
"learning_rate": 4.856109499913615e-06,
"loss": 0.2935,
"step": 199,
"step_loss": 0.29456788301467896
},
{
"epoch": 0.59,
"grad_norm": 1.214126043731043,
"kl": 0.3455277681350708,
"learning_rate": 4.854638492995056e-06,
"loss": 0.2749,
"step": 200,
"step_loss": 0.362245112657547
},
{
"epoch": 0.59,
"grad_norm": 1.2555037212901567,
"kl": 0.23420512676239014,
"learning_rate": 4.853160256086021e-06,
"loss": 0.2803,
"step": 201,
"step_loss": 0.25304749608039856
},
{
"epoch": 0.59,
"grad_norm": 1.2278744065777365,
"kl": 0.3848443627357483,
"learning_rate": 4.8516747942646465e-06,
"loss": 0.3211,
"step": 202,
"step_loss": 0.382394403219223
},
{
"epoch": 0.6,
"grad_norm": 1.1839836158359853,
"kl": 0.3287525475025177,
"learning_rate": 4.850182112633885e-06,
"loss": 0.2913,
"step": 203,
"step_loss": 0.29712358117103577
},
{
"epoch": 0.6,
"grad_norm": 1.1211578271078935,
"kl": 0.33271652460098267,
"learning_rate": 4.8486822163214944e-06,
"loss": 0.2738,
"step": 204,
"step_loss": 0.28494173288345337
},
{
"epoch": 0.6,
"grad_norm": 1.2261387888906659,
"kl": 0.304698646068573,
"learning_rate": 4.847175110480015e-06,
"loss": 0.2984,
"step": 205,
"step_loss": 0.28125837445259094
},
{
"epoch": 0.61,
"grad_norm": 1.3388127733966415,
"kl": 0.30615222454071045,
"learning_rate": 4.8456608002867555e-06,
"loss": 0.2859,
"step": 206,
"step_loss": 0.2698904275894165
},
{
"epoch": 0.61,
"grad_norm": 1.179491051996919,
"kl": 0.31592032313346863,
"learning_rate": 4.844139290943771e-06,
"loss": 0.2909,
"step": 207,
"step_loss": 0.2879568934440613
},
{
"epoch": 0.61,
"grad_norm": 1.1817492222340549,
"kl": 0.3141896724700928,
"learning_rate": 4.84261058767785e-06,
"loss": 0.2832,
"step": 208,
"step_loss": 0.24105487763881683
},
{
"epoch": 0.61,
"grad_norm": 1.275156633488967,
"kl": 0.4037202000617981,
"learning_rate": 4.841074695740493e-06,
"loss": 0.2875,
"step": 209,
"step_loss": 0.3415408432483673
},
{
"epoch": 0.62,
"grad_norm": 1.188350536024408,
"kl": 0.32159894704818726,
"learning_rate": 4.839531620407895e-06,
"loss": 0.3031,
"step": 210,
"step_loss": 0.27604830265045166
},
{
"epoch": 0.62,
"grad_norm": 1.1429229933157632,
"kl": 0.32619747519493103,
"learning_rate": 4.837981366980928e-06,
"loss": 0.2812,
"step": 211,
"step_loss": 0.27371087670326233
},
{
"epoch": 0.62,
"grad_norm": 1.2193809489970755,
"kl": 0.33697012066841125,
"learning_rate": 4.836423940785124e-06,
"loss": 0.2775,
"step": 212,
"step_loss": 0.27547112107276917
},
{
"epoch": 0.63,
"grad_norm": 1.2348631847135687,
"kl": 0.28872644901275635,
"learning_rate": 4.834859347170654e-06,
"loss": 0.2913,
"step": 213,
"step_loss": 0.2788584232330322
},
{
"epoch": 0.63,
"grad_norm": 1.2474162160473052,
"kl": 0.3392971456050873,
"learning_rate": 4.8332875915123105e-06,
"loss": 0.2998,
"step": 214,
"step_loss": 0.34434232115745544
},
{
"epoch": 0.63,
"grad_norm": 1.323303913774139,
"kl": 0.3054981827735901,
"learning_rate": 4.831708679209491e-06,
"loss": 0.3121,
"step": 215,
"step_loss": 0.28162604570388794
},
{
"epoch": 0.64,
"grad_norm": 1.1929513261742986,
"kl": 0.3404708206653595,
"learning_rate": 4.830122615686177e-06,
"loss": 0.3063,
"step": 216,
"step_loss": 0.29514598846435547
},
{
"epoch": 0.64,
"grad_norm": 1.1911094796199106,
"kl": 0.33788585662841797,
"learning_rate": 4.828529406390917e-06,
"loss": 0.2854,
"step": 217,
"step_loss": 0.31073516607284546
},
{
"epoch": 0.64,
"grad_norm": 1.2665507498308344,
"kl": 0.36928457021713257,
"learning_rate": 4.826929056796807e-06,
"loss": 0.2902,
"step": 218,
"step_loss": 0.35527801513671875
},
{
"epoch": 0.64,
"grad_norm": 1.2680076553195618,
"kl": 0.39868634939193726,
"learning_rate": 4.8253215724014725e-06,
"loss": 0.2981,
"step": 219,
"step_loss": 0.27999529242515564
},
{
"epoch": 0.65,
"grad_norm": 1.2775727396335816,
"kl": 0.24477747082710266,
"learning_rate": 4.823706958727045e-06,
"loss": 0.3168,
"step": 220,
"step_loss": 0.2902810871601105
},
{
"epoch": 0.65,
"grad_norm": 1.1655079488814604,
"kl": 0.3722260594367981,
"learning_rate": 4.822085221320152e-06,
"loss": 0.2937,
"step": 221,
"step_loss": 0.30477461218833923
},
{
"epoch": 0.65,
"grad_norm": 1.1528789635120378,
"kl": 0.3633630871772766,
"learning_rate": 4.820456365751891e-06,
"loss": 0.2733,
"step": 222,
"step_loss": 0.2682224214076996
},
{
"epoch": 0.66,
"grad_norm": 1.2833936250857951,
"kl": 0.4017341732978821,
"learning_rate": 4.818820397617811e-06,
"loss": 0.3113,
"step": 223,
"step_loss": 0.36901068687438965
},
{
"epoch": 0.66,
"grad_norm": 1.2106488345236557,
"kl": 0.2505451738834381,
"learning_rate": 4.817177322537898e-06,
"loss": 0.3016,
"step": 224,
"step_loss": 0.2669680416584015
},
{
"epoch": 0.66,
"grad_norm": 1.2935886740881541,
"kl": 0.35546159744262695,
"learning_rate": 4.815527146156548e-06,
"loss": 0.3008,
"step": 225,
"step_loss": 0.30147963762283325
},
{
"epoch": 0.66,
"grad_norm": 1.2631269633217965,
"kl": 0.39503854513168335,
"learning_rate": 4.8138698741425535e-06,
"loss": 0.2868,
"step": 226,
"step_loss": 0.2804335057735443
},
{
"epoch": 0.67,
"grad_norm": 1.2621588928369527,
"kl": 0.3259708285331726,
"learning_rate": 4.812205512189083e-06,
"loss": 0.2943,
"step": 227,
"step_loss": 0.30732688307762146
},
{
"epoch": 0.67,
"grad_norm": 1.252311030511955,
"kl": 0.3171556293964386,
"learning_rate": 4.8105340660136614e-06,
"loss": 0.2912,
"step": 228,
"step_loss": 0.3541552722454071
},
{
"epoch": 0.67,
"grad_norm": 1.1582157784039353,
"kl": 0.33646219968795776,
"learning_rate": 4.8088555413581495e-06,
"loss": 0.2862,
"step": 229,
"step_loss": 0.3080594539642334
},
{
"epoch": 0.68,
"grad_norm": 1.1909622553262202,
"kl": 0.3843502402305603,
"learning_rate": 4.8071699439887215e-06,
"loss": 0.3056,
"step": 230,
"step_loss": 0.3147525489330292
},
{
"epoch": 0.68,
"grad_norm": 1.2594429963263456,
"kl": 0.26849618554115295,
"learning_rate": 4.805477279695852e-06,
"loss": 0.298,
"step": 231,
"step_loss": 0.2553267776966095
},
{
"epoch": 0.68,
"grad_norm": 1.1702087636485823,
"kl": 0.40651583671569824,
"learning_rate": 4.8037775542942925e-06,
"loss": 0.2827,
"step": 232,
"step_loss": 0.2711586356163025
},
{
"epoch": 0.69,
"grad_norm": 1.1234639927336083,
"kl": 0.32860204577445984,
"learning_rate": 4.802070773623047e-06,
"loss": 0.2687,
"step": 233,
"step_loss": 0.24301442503929138
},
{
"epoch": 0.69,
"grad_norm": 1.232534630116778,
"kl": 0.392067551612854,
"learning_rate": 4.80035694354536e-06,
"loss": 0.2943,
"step": 234,
"step_loss": 0.3448218107223511
},
{
"epoch": 0.69,
"grad_norm": 1.1963956326096639,
"kl": 0.3147091567516327,
"learning_rate": 4.798636069948692e-06,
"loss": 0.2856,
"step": 235,
"step_loss": 0.30948758125305176
},
{
"epoch": 0.69,
"grad_norm": 1.2201677781179745,
"kl": 0.26646876335144043,
"learning_rate": 4.7969081587446994e-06,
"loss": 0.2745,
"step": 236,
"step_loss": 0.23111504316329956
},
{
"epoch": 0.7,
"grad_norm": 1.0792412024548768,
"kl": 0.2925267517566681,
"learning_rate": 4.795173215869214e-06,
"loss": 0.2691,
"step": 237,
"step_loss": 0.2031397670507431
},
{
"epoch": 0.7,
"grad_norm": 1.3799205324021417,
"kl": 0.3084944486618042,
"learning_rate": 4.7934312472822255e-06,
"loss": 0.3316,
"step": 238,
"step_loss": 0.2557719945907593
},
{
"epoch": 0.7,
"grad_norm": 1.156691853407518,
"kl": 0.318729043006897,
"learning_rate": 4.791682258967856e-06,
"loss": 0.2844,
"step": 239,
"step_loss": 0.23593303561210632
},
{
"epoch": 0.71,
"grad_norm": 1.2618289049089157,
"kl": 0.3763369619846344,
"learning_rate": 4.789926256934344e-06,
"loss": 0.2928,
"step": 240,
"step_loss": 0.2985472083091736
},
{
"epoch": 0.71,
"grad_norm": 1.182146333032642,
"kl": 0.33732131123542786,
"learning_rate": 4.788163247214021e-06,
"loss": 0.2874,
"step": 241,
"step_loss": 0.2921789586544037
},
{
"epoch": 0.71,
"grad_norm": 1.2611562106682654,
"kl": 0.26897132396698,
"learning_rate": 4.786393235863292e-06,
"loss": 0.2863,
"step": 242,
"step_loss": 0.24371370673179626
},
{
"epoch": 0.71,
"grad_norm": 1.1667739600277132,
"kl": 0.3665682077407837,
"learning_rate": 4.7846162289626156e-06,
"loss": 0.2726,
"step": 243,
"step_loss": 0.33350181579589844
},
{
"epoch": 0.72,
"grad_norm": 1.159930396653472,
"kl": 0.294426292181015,
"learning_rate": 4.78283223261648e-06,
"loss": 0.2855,
"step": 244,
"step_loss": 0.34132063388824463
},
{
"epoch": 0.72,
"grad_norm": 1.251219143005887,
"kl": 0.3149415850639343,
"learning_rate": 4.781041252953385e-06,
"loss": 0.2863,
"step": 245,
"step_loss": 0.31999310851097107
},
{
"epoch": 0.72,
"grad_norm": 1.3041399931947542,
"kl": 0.34632056951522827,
"learning_rate": 4.779243296125821e-06,
"loss": 0.3093,
"step": 246,
"step_loss": 0.2359839826822281
},
{
"epoch": 0.73,
"grad_norm": 1.1281402721586493,
"kl": 0.3858156204223633,
"learning_rate": 4.777438368310246e-06,
"loss": 0.275,
"step": 247,
"step_loss": 0.3059740364551544
},
{
"epoch": 0.73,
"grad_norm": 1.182215527114777,
"kl": 0.3195875883102417,
"learning_rate": 4.775626475707062e-06,
"loss": 0.2887,
"step": 248,
"step_loss": 0.2916240692138672
},
{
"epoch": 0.73,
"grad_norm": 1.192317476599235,
"kl": 0.3068830370903015,
"learning_rate": 4.773807624540603e-06,
"loss": 0.2722,
"step": 249,
"step_loss": 0.23078405857086182
},
{
"epoch": 0.74,
"grad_norm": 1.1808958496558022,
"kl": 0.32459574937820435,
"learning_rate": 4.771981821059103e-06,
"loss": 0.2646,
"step": 250,
"step_loss": 0.250786155462265
},
{
"epoch": 0.74,
"grad_norm": 1.2752507061465592,
"kl": 0.3301911950111389,
"learning_rate": 4.770149071534681e-06,
"loss": 0.2828,
"step": 251,
"step_loss": 0.30227547883987427
},
{
"epoch": 0.74,
"grad_norm": 1.3749281257570973,
"kl": 0.3217305839061737,
"learning_rate": 4.7683093822633155e-06,
"loss": 0.2692,
"step": 252,
"step_loss": 0.27843326330184937
},
{
"epoch": 0.74,
"grad_norm": 1.2384381600974261,
"kl": 0.3240608274936676,
"learning_rate": 4.766462759564828e-06,
"loss": 0.3051,
"step": 253,
"step_loss": 0.2549932599067688
},
{
"epoch": 0.75,
"grad_norm": 1.1891702525457484,
"kl": 0.3199639320373535,
"learning_rate": 4.764609209782856e-06,
"loss": 0.3027,
"step": 254,
"step_loss": 0.30427688360214233
},
{
"epoch": 0.75,
"grad_norm": 1.2920719505595144,
"kl": 0.2981690764427185,
"learning_rate": 4.762748739284832e-06,
"loss": 0.2795,
"step": 255,
"step_loss": 0.25225040316581726
},
{
"epoch": 0.75,
"grad_norm": 1.3848628129737062,
"kl": 0.38679319620132446,
"learning_rate": 4.760881354461966e-06,
"loss": 0.3075,
"step": 256,
"step_loss": 0.3289315700531006
},
{
"epoch": 0.76,
"grad_norm": 1.2842520225661949,
"kl": 0.29211699962615967,
"learning_rate": 4.7590070617292214e-06,
"loss": 0.2903,
"step": 257,
"step_loss": 0.2732706367969513
},
{
"epoch": 0.76,
"grad_norm": 1.303090202772597,
"kl": 0.2750818729400635,
"learning_rate": 4.757125867525287e-06,
"loss": 0.2855,
"step": 258,
"step_loss": 0.24878114461898804
},
{
"epoch": 0.76,
"grad_norm": 1.2016632749663505,
"kl": 0.31452032923698425,
"learning_rate": 4.755237778312564e-06,
"loss": 0.2779,
"step": 259,
"step_loss": 0.27004343271255493
},
{
"epoch": 0.76,
"grad_norm": 1.2724450520997204,
"kl": 0.33003339171409607,
"learning_rate": 4.753342800577139e-06,
"loss": 0.2929,
"step": 260,
"step_loss": 0.2996165454387665
},
{
"epoch": 0.77,
"grad_norm": 1.1644344183728652,
"kl": 0.31851598620414734,
"learning_rate": 4.751440940828761e-06,
"loss": 0.2968,
"step": 261,
"step_loss": 0.31100180745124817
},
{
"epoch": 0.77,
"grad_norm": 1.1708153702363653,
"kl": 0.2956541180610657,
"learning_rate": 4.749532205600825e-06,
"loss": 0.2749,
"step": 262,
"step_loss": 0.26357224583625793
},
{
"epoch": 0.77,
"grad_norm": 1.2113982585346696,
"kl": 0.29637524485588074,
"learning_rate": 4.747616601450337e-06,
"loss": 0.2976,
"step": 263,
"step_loss": 0.2821243405342102
},
{
"epoch": 0.78,
"grad_norm": 1.2136481249314546,
"kl": 0.32876235246658325,
"learning_rate": 4.74569413495791e-06,
"loss": 0.2818,
"step": 264,
"step_loss": 0.2800009548664093
},
{
"epoch": 0.78,
"grad_norm": 1.2613044034795275,
"kl": 0.38935160636901855,
"learning_rate": 4.743764812727722e-06,
"loss": 0.3063,
"step": 265,
"step_loss": 0.3427608907222748
},
{
"epoch": 0.78,
"grad_norm": 1.0797444840559929,
"kl": 0.3119351267814636,
"learning_rate": 4.741828641387505e-06,
"loss": 0.2676,
"step": 266,
"step_loss": 0.2635708153247833
},
{
"epoch": 0.79,
"grad_norm": 1.152621213192496,
"kl": 0.36095455288887024,
"learning_rate": 4.739885627588522e-06,
"loss": 0.2825,
"step": 267,
"step_loss": 0.2881103754043579
},
{
"epoch": 0.79,
"grad_norm": 1.2119555039739955,
"kl": 0.36039823293685913,
"learning_rate": 4.73793577800554e-06,
"loss": 0.3022,
"step": 268,
"step_loss": 0.29259294271469116
},
{
"epoch": 0.79,
"grad_norm": 1.1482140459926162,
"kl": 0.2709002196788788,
"learning_rate": 4.735979099336807e-06,
"loss": 0.2862,
"step": 269,
"step_loss": 0.23157480359077454
},
{
"epoch": 0.79,
"grad_norm": 1.1692731329110126,
"kl": 0.3389941453933716,
"learning_rate": 4.734015598304034e-06,
"loss": 0.2711,
"step": 270,
"step_loss": 0.2778770327568054
},
{
"epoch": 0.8,
"grad_norm": 1.2258612213402873,
"kl": 0.35436713695526123,
"learning_rate": 4.732045281652366e-06,
"loss": 0.3091,
"step": 271,
"step_loss": 0.3284461200237274
},
{
"epoch": 0.8,
"grad_norm": 1.133815899776919,
"kl": 0.31201237440109253,
"learning_rate": 4.730068156150363e-06,
"loss": 0.2656,
"step": 272,
"step_loss": 0.3073486387729645
},
{
"epoch": 0.8,
"grad_norm": 1.2017193050777555,
"kl": 0.29038453102111816,
"learning_rate": 4.728084228589973e-06,
"loss": 0.2846,
"step": 273,
"step_loss": 0.2407042682170868
},
{
"epoch": 0.81,
"grad_norm": 1.2777490002160443,
"kl": 0.2809942364692688,
"learning_rate": 4.726093505786515e-06,
"loss": 0.281,
"step": 274,
"step_loss": 0.2713775932788849
},
{
"epoch": 0.81,
"grad_norm": 1.1867307379834706,
"kl": 0.39103490114212036,
"learning_rate": 4.724095994578648e-06,
"loss": 0.3022,
"step": 275,
"step_loss": 0.38512590527534485
},
{
"epoch": 0.81,
"grad_norm": 1.1650998402655737,
"kl": 0.36318308115005493,
"learning_rate": 4.72209170182835e-06,
"loss": 0.2815,
"step": 276,
"step_loss": 0.3397858142852783
},
{
"epoch": 0.81,
"grad_norm": 1.0795928320116555,
"kl": 0.36297377943992615,
"learning_rate": 4.720080634420899e-06,
"loss": 0.2818,
"step": 277,
"step_loss": 0.3083583116531372
},
{
"epoch": 0.82,
"grad_norm": 1.1780687430025762,
"kl": 0.3422907590866089,
"learning_rate": 4.7180627992648435e-06,
"loss": 0.2789,
"step": 278,
"step_loss": 0.27315691113471985
},
{
"epoch": 0.82,
"grad_norm": 1.2103849934862,
"kl": 0.29663997888565063,
"learning_rate": 4.7160382032919824e-06,
"loss": 0.2971,
"step": 279,
"step_loss": 0.27249854803085327
},
{
"epoch": 0.82,
"grad_norm": 1.3222585308836912,
"kl": 0.3770950436592102,
"learning_rate": 4.714006853457339e-06,
"loss": 0.2972,
"step": 280,
"step_loss": 0.325216680765152
},
{
"epoch": 0.83,
"grad_norm": 1.162522628779284,
"kl": 0.313273549079895,
"learning_rate": 4.711968756739136e-06,
"loss": 0.289,
"step": 281,
"step_loss": 0.30059880018234253
},
{
"epoch": 0.83,
"grad_norm": 1.2370630041790278,
"kl": 0.3747837245464325,
"learning_rate": 4.709923920138776e-06,
"loss": 0.3016,
"step": 282,
"step_loss": 0.28908976912498474
},
{
"epoch": 0.83,
"grad_norm": 1.140130660961975,
"kl": 0.3109118342399597,
"learning_rate": 4.707872350680816e-06,
"loss": 0.2839,
"step": 283,
"step_loss": 0.276600182056427
},
{
"epoch": 0.83,
"grad_norm": 1.082299712166179,
"kl": 0.27061912417411804,
"learning_rate": 4.705814055412938e-06,
"loss": 0.266,
"step": 284,
"step_loss": 0.2920036315917969
},
{
"epoch": 0.84,
"grad_norm": 1.21073161805326,
"kl": 0.318640798330307,
"learning_rate": 4.703749041405932e-06,
"loss": 0.2725,
"step": 285,
"step_loss": 0.2592398524284363
},
{
"epoch": 0.84,
"grad_norm": 1.219448433556087,
"kl": 0.35519281029701233,
"learning_rate": 4.701677315753668e-06,
"loss": 0.2874,
"step": 286,
"step_loss": 0.3630865514278412
},
{
"epoch": 0.84,
"grad_norm": 1.2097930175912097,
"kl": 0.41031721234321594,
"learning_rate": 4.699598885573072e-06,
"loss": 0.3032,
"step": 287,
"step_loss": 0.3249673843383789
},
{
"epoch": 0.85,
"grad_norm": 1.1521100017307662,
"kl": 0.3674570620059967,
"learning_rate": 4.697513758004102e-06,
"loss": 0.2849,
"step": 288,
"step_loss": 0.30098646879196167
},
{
"epoch": 0.85,
"grad_norm": 1.1074540740753043,
"kl": 0.32119742035865784,
"learning_rate": 4.695421940209723e-06,
"loss": 0.2832,
"step": 289,
"step_loss": 0.2897493541240692
},
{
"epoch": 0.85,
"grad_norm": 1.2293554204206822,
"kl": 0.3377346098423004,
"learning_rate": 4.693323439375885e-06,
"loss": 0.293,
"step": 290,
"step_loss": 0.2627705931663513
},
{
"epoch": 0.86,
"grad_norm": 1.2069388480758467,
"kl": 0.3485328257083893,
"learning_rate": 4.691218262711491e-06,
"loss": 0.2867,
"step": 291,
"step_loss": 0.38160112500190735
},
{
"epoch": 0.86,
"grad_norm": 1.1419057253236133,
"kl": 0.29920992255210876,
"learning_rate": 4.6891064174483816e-06,
"loss": 0.2793,
"step": 292,
"step_loss": 0.27294811606407166
},
{
"epoch": 0.86,
"grad_norm": 1.1050106622647566,
"kl": 0.31865233182907104,
"learning_rate": 4.686987910841304e-06,
"loss": 0.2834,
"step": 293,
"step_loss": 0.24811354279518127
},
{
"epoch": 0.86,
"grad_norm": 1.2316064086381218,
"kl": 0.36698243021965027,
"learning_rate": 4.684862750167891e-06,
"loss": 0.288,
"step": 294,
"step_loss": 0.3681890070438385
},
{
"epoch": 0.87,
"grad_norm": 1.0625124958649887,
"kl": 0.31484031677246094,
"learning_rate": 4.6827309427286295e-06,
"loss": 0.2633,
"step": 295,
"step_loss": 0.24335134029388428
},
{
"epoch": 0.87,
"grad_norm": 1.0972510299173677,
"kl": 0.37381210923194885,
"learning_rate": 4.680592495846845e-06,
"loss": 0.2779,
"step": 296,
"step_loss": 0.34890639781951904
},
{
"epoch": 0.87,
"grad_norm": 1.2397594353527759,
"kl": 0.3720863163471222,
"learning_rate": 4.678447416868667e-06,
"loss": 0.2813,
"step": 297,
"step_loss": 0.335337370634079
},
{
"epoch": 0.88,
"grad_norm": 1.085078132679382,
"kl": 0.37017738819122314,
"learning_rate": 4.67629571316301e-06,
"loss": 0.2752,
"step": 298,
"step_loss": 0.29323574900627136
},
{
"epoch": 0.88,
"grad_norm": 1.1972337254915437,
"kl": 0.3234942853450775,
"learning_rate": 4.6741373921215475e-06,
"loss": 0.293,
"step": 299,
"step_loss": 0.2743943929672241
},
{
"epoch": 0.88,
"grad_norm": 1.354145558510618,
"kl": 0.408852756023407,
"learning_rate": 4.671972461158682e-06,
"loss": 0.3091,
"step": 300,
"step_loss": 0.3570805788040161
},
{
"epoch": 0.88,
"grad_norm": 1.308516554192417,
"kl": 0.33082523941993713,
"learning_rate": 4.669800927711525e-06,
"loss": 0.2957,
"step": 301,
"step_loss": 0.3042528033256531
},
{
"epoch": 0.89,
"grad_norm": 1.0883509759412517,
"kl": 0.36321133375167847,
"learning_rate": 4.667622799239869e-06,
"loss": 0.2822,
"step": 302,
"step_loss": 0.3216032385826111
},
{
"epoch": 0.89,
"grad_norm": 1.263711518929017,
"kl": 0.2793586850166321,
"learning_rate": 4.665438083226163e-06,
"loss": 0.2997,
"step": 303,
"step_loss": 0.2776317000389099
},
{
"epoch": 0.89,
"grad_norm": 1.1674457067837019,
"kl": 0.33832457661628723,
"learning_rate": 4.663246787175483e-06,
"loss": 0.3,
"step": 304,
"step_loss": 0.31598201394081116
},
{
"epoch": 0.9,
"grad_norm": 1.1988014340031905,
"kl": 0.36085131764411926,
"learning_rate": 4.661048918615513e-06,
"loss": 0.2791,
"step": 305,
"step_loss": 0.302070677280426
},
{
"epoch": 0.9,
"grad_norm": 1.1315893146442402,
"kl": 0.33112025260925293,
"learning_rate": 4.658844485096512e-06,
"loss": 0.2691,
"step": 306,
"step_loss": 0.29177939891815186
},
{
"epoch": 0.9,
"grad_norm": 1.2072921211772827,
"kl": 0.3359392583370209,
"learning_rate": 4.656633494191293e-06,
"loss": 0.2941,
"step": 307,
"step_loss": 0.3084091246128082
},
{
"epoch": 0.91,
"grad_norm": 1.2218266192172693,
"kl": 0.34884458780288696,
"learning_rate": 4.654415953495196e-06,
"loss": 0.2954,
"step": 308,
"step_loss": 0.28813090920448303
},
{
"epoch": 0.91,
"grad_norm": 1.27050923164869,
"kl": 0.38197797536849976,
"learning_rate": 4.65219187062606e-06,
"loss": 0.3116,
"step": 309,
"step_loss": 0.29677310585975647
},
{
"epoch": 0.91,
"grad_norm": 1.1599676961552565,
"kl": 0.3238053321838379,
"learning_rate": 4.649961253224198e-06,
"loss": 0.2855,
"step": 310,
"step_loss": 0.31362178921699524
},
{
"epoch": 0.91,
"grad_norm": 1.1310288626730198,
"kl": 0.3672914505004883,
"learning_rate": 4.647724108952373e-06,
"loss": 0.273,
"step": 311,
"step_loss": 0.2773699462413788
},
{
"epoch": 0.92,
"grad_norm": 1.2079649280038118,
"kl": 0.29229724407196045,
"learning_rate": 4.645480445495767e-06,
"loss": 0.3097,
"step": 312,
"step_loss": 0.3600565195083618
},
{
"epoch": 0.92,
"grad_norm": 1.1441975650704101,
"kl": 0.33793753385543823,
"learning_rate": 4.643230270561956e-06,
"loss": 0.2872,
"step": 313,
"step_loss": 0.2990878224372864
},
{
"epoch": 0.92,
"grad_norm": 1.1546332487233124,
"kl": 0.2881549596786499,
"learning_rate": 4.640973591880889e-06,
"loss": 0.2827,
"step": 314,
"step_loss": 0.2339860498905182
},
{
"epoch": 0.93,
"grad_norm": 1.2470856906835874,
"kl": 0.319610059261322,
"learning_rate": 4.638710417204855e-06,
"loss": 0.2845,
"step": 315,
"step_loss": 0.327451229095459
},
{
"epoch": 0.93,
"grad_norm": 1.0504828245555484,
"kl": 0.25614118576049805,
"learning_rate": 4.636440754308458e-06,
"loss": 0.2556,
"step": 316,
"step_loss": 0.21319641172885895
},
{
"epoch": 0.93,
"grad_norm": 1.11856029436489,
"kl": 0.3035759925842285,
"learning_rate": 4.63416461098859e-06,
"loss": 0.2848,
"step": 317,
"step_loss": 0.2784833014011383
},
{
"epoch": 0.93,
"grad_norm": 1.0668814023255409,
"kl": 0.29398462176322937,
"learning_rate": 4.631881995064406e-06,
"loss": 0.2744,
"step": 318,
"step_loss": 0.26396968960762024
},
{
"epoch": 0.94,
"grad_norm": 1.0712893824198688,
"kl": 0.32271263003349304,
"learning_rate": 4.629592914377298e-06,
"loss": 0.2668,
"step": 319,
"step_loss": 0.273529052734375
},
{
"epoch": 0.94,
"grad_norm": 1.2737157435766238,
"kl": 0.344595730304718,
"learning_rate": 4.6272973767908615e-06,
"loss": 0.3066,
"step": 320,
"step_loss": 0.3095177710056305
},
{
"epoch": 0.94,
"grad_norm": 1.0251746467924616,
"kl": 0.28812479972839355,
"learning_rate": 4.624995390190878e-06,
"loss": 0.2804,
"step": 321,
"step_loss": 0.24252820014953613
},
{
"epoch": 0.95,
"grad_norm": 1.1186582847380844,
"kl": 0.40985843539237976,
"learning_rate": 4.62268696248528e-06,
"loss": 0.2782,
"step": 322,
"step_loss": 0.27699118852615356
},
{
"epoch": 0.95,
"grad_norm": 1.181794615240478,
"kl": 0.2965797185897827,
"learning_rate": 4.620372101604128e-06,
"loss": 0.294,
"step": 323,
"step_loss": 0.25799939036369324
},
{
"epoch": 0.95,
"grad_norm": 1.2137603344462602,
"kl": 0.3655190169811249,
"learning_rate": 4.618050815499582e-06,
"loss": 0.2857,
"step": 324,
"step_loss": 0.30608299374580383
},
{
"epoch": 0.96,
"grad_norm": 1.181964659595807,
"kl": 0.3023182451725006,
"learning_rate": 4.615723112145875e-06,
"loss": 0.2832,
"step": 325,
"step_loss": 0.2613093852996826
},
{
"epoch": 0.96,
"grad_norm": 1.228179157800439,
"kl": 0.3194785714149475,
"learning_rate": 4.613388999539283e-06,
"loss": 0.2976,
"step": 326,
"step_loss": 0.3578476905822754
},
{
"epoch": 0.96,
"grad_norm": 1.1277959636999064,
"kl": 0.3428605794906616,
"learning_rate": 4.6110484856981025e-06,
"loss": 0.2771,
"step": 327,
"step_loss": 0.2329404354095459
},
{
"epoch": 0.96,
"grad_norm": 1.2069845653505489,
"kl": 0.425138384103775,
"learning_rate": 4.608701578662618e-06,
"loss": 0.2977,
"step": 328,
"step_loss": 0.3749895393848419
},
{
"epoch": 0.97,
"grad_norm": 1.1076692442818552,
"kl": 0.2908833622932434,
"learning_rate": 4.606348286495074e-06,
"loss": 0.2861,
"step": 329,
"step_loss": 0.28232869505882263
},
{
"epoch": 0.97,
"grad_norm": 1.1600108842297994,
"kl": 0.3712472915649414,
"learning_rate": 4.6039886172796555e-06,
"loss": 0.2825,
"step": 330,
"step_loss": 0.29888463020324707
},
{
"epoch": 0.97,
"grad_norm": 1.0826571682025825,
"kl": 0.3151686489582062,
"learning_rate": 4.6016225791224504e-06,
"loss": 0.2826,
"step": 331,
"step_loss": 0.25253331661224365
},
{
"epoch": 0.98,
"grad_norm": 1.0721510598381356,
"kl": 0.30766239762306213,
"learning_rate": 4.5992501801514235e-06,
"loss": 0.2802,
"step": 332,
"step_loss": 0.25241219997406006
},
{
"epoch": 0.98,
"grad_norm": 1.149128958736038,
"kl": 0.31056687235832214,
"learning_rate": 4.596871428516397e-06,
"loss": 0.2778,
"step": 333,
"step_loss": 0.2913603186607361
},
{
"epoch": 0.98,
"grad_norm": 1.0863838444877447,
"kl": 0.37738847732543945,
"learning_rate": 4.594486332389011e-06,
"loss": 0.2736,
"step": 334,
"step_loss": 0.3190591335296631
},
{
"epoch": 0.98,
"grad_norm": 1.2058838263807778,
"kl": 0.3031269609928131,
"learning_rate": 4.592094899962702e-06,
"loss": 0.2911,
"step": 335,
"step_loss": 0.23730549216270447
},
{
"epoch": 0.99,
"grad_norm": 1.1861398398638574,
"kl": 0.3154381513595581,
"learning_rate": 4.589697139452673e-06,
"loss": 0.295,
"step": 336,
"step_loss": 0.30010032653808594
},
{
"epoch": 0.99,
"grad_norm": 1.1654254307836165,
"kl": 0.3234802186489105,
"learning_rate": 4.5872930590958685e-06,
"loss": 0.2821,
"step": 337,
"step_loss": 0.3050956428050995
},
{
"epoch": 0.99,
"grad_norm": 1.0142973438347946,
"kl": 0.3032388389110565,
"learning_rate": 4.584882667150939e-06,
"loss": 0.2637,
"step": 338,
"step_loss": 0.25487464666366577
},
{
"epoch": 1.0,
"grad_norm": 1.1306032144314904,
"kl": 0.37623462080955505,
"learning_rate": 4.582465971898219e-06,
"loss": 0.289,
"step": 339,
"step_loss": 0.3044344484806061
},
{
"epoch": 1.0,
"grad_norm": 1.1724697129866262,
"kl": 0.3717675507068634,
"learning_rate": 4.580042981639699e-06,
"loss": 0.2842,
"step": 340,
"step_loss": 0.30331289768218994
},
{
"epoch": 1.0,
"eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_loss": 1.5836049318313599,
"eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_runtime": 14.7278,
"eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_samples_per_second": 6.79,
"eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_steps_per_second": 0.883,
"step": 340
},
{
"epoch": 1.0,
"grad_norm": 0.9909952614947144,
"kl": 0.3604995012283325,
"learning_rate": 4.577613704698989e-06,
"loss": 0.2173,
"step": 341,
"step_loss": 1.2625408172607422
},
{
"epoch": 1.01,
"grad_norm": 1.122598641549428,
"kl": 0.37703055143356323,
"learning_rate": 4.5751781494213e-06,
"loss": 0.2233,
"step": 342,
"step_loss": 0.24784518778324127
},
{
"epoch": 1.01,
"grad_norm": 0.9555222272528335,
"kl": 0.3994651734828949,
"learning_rate": 4.572736324173409e-06,
"loss": 0.2154,
"step": 343,
"step_loss": 0.24239662289619446
},
{
"epoch": 1.01,
"grad_norm": 1.0100151801384887,
"kl": 0.40937453508377075,
"learning_rate": 4.570288237343632e-06,
"loss": 0.2098,
"step": 344,
"step_loss": 0.25453901290893555
},
{
"epoch": 1.01,
"grad_norm": 0.9870747676771315,
"kl": 0.3054189085960388,
"learning_rate": 4.567833897341797e-06,
"loss": 0.2083,
"step": 345,
"step_loss": 0.19117772579193115
},
{
"epoch": 1.02,
"grad_norm": 1.1061807368303616,
"kl": 0.41628479957580566,
"learning_rate": 4.565373312599209e-06,
"loss": 0.2152,
"step": 346,
"step_loss": 0.22663772106170654
},
{
"epoch": 1.02,
"grad_norm": 1.0384373325803509,
"kl": 0.3613884449005127,
"learning_rate": 4.56290649156863e-06,
"loss": 0.2156,
"step": 347,
"step_loss": 0.18198919296264648
},
{
"epoch": 1.02,
"grad_norm": 1.0658063843250276,
"kl": 0.3354627192020416,
"learning_rate": 4.560433442724243e-06,
"loss": 0.2078,
"step": 348,
"step_loss": 0.16816985607147217
},
{
"epoch": 1.03,
"grad_norm": 1.1109132713111654,
"kl": 0.4467100501060486,
"learning_rate": 4.5579541745616254e-06,
"loss": 0.2177,
"step": 349,
"step_loss": 0.2320152074098587
},
{
"epoch": 1.03,
"grad_norm": 1.1929174001218243,
"kl": 0.456093430519104,
"learning_rate": 4.555468695597721e-06,
"loss": 0.2167,
"step": 350,
"step_loss": 0.22836454212665558
},
{
"epoch": 1.03,
"grad_norm": 1.183660347956496,
"kl": 0.39071205258369446,
"learning_rate": 4.552977014370807e-06,
"loss": 0.2235,
"step": 351,
"step_loss": 0.2216327041387558
},
{
"epoch": 1.03,
"grad_norm": 1.113360044133729,
"kl": 0.40837928652763367,
"learning_rate": 4.550479139440466e-06,
"loss": 0.2133,
"step": 352,
"step_loss": 0.20445051789283752
},
{
"epoch": 1.04,
"grad_norm": 1.1459353950591424,
"kl": 0.355142205953598,
"learning_rate": 4.547975079387564e-06,
"loss": 0.2272,
"step": 353,
"step_loss": 0.22001862525939941
},
{
"epoch": 1.04,
"grad_norm": 1.1749772462772283,
"kl": 0.3855653405189514,
"learning_rate": 4.545464842814208e-06,
"loss": 0.2217,
"step": 354,
"step_loss": 0.22728465497493744
},
{
"epoch": 1.04,
"grad_norm": 1.015463016503802,
"kl": 0.3446847200393677,
"learning_rate": 4.542948438343726e-06,
"loss": 0.2058,
"step": 355,
"step_loss": 0.18022528290748596
},
{
"epoch": 1.05,
"grad_norm": 1.0669292034040894,
"kl": 0.36371520161628723,
"learning_rate": 4.5404258746206314e-06,
"loss": 0.21,
"step": 356,
"step_loss": 0.2281932234764099
},
{
"epoch": 1.05,
"grad_norm": 1.020434757519867,
"kl": 0.40964460372924805,
"learning_rate": 4.537897160310602e-06,
"loss": 0.2063,
"step": 357,
"step_loss": 0.20418326556682587
},
{
"epoch": 1.05,
"grad_norm": 1.0651999836970036,
"kl": 0.328068345785141,
"learning_rate": 4.535362304100439e-06,
"loss": 0.2153,
"step": 358,
"step_loss": 0.18197788298130035
},
{
"epoch": 1.06,
"grad_norm": 1.0161322060783642,
"kl": 0.3565504848957062,
"learning_rate": 4.532821314698044e-06,
"loss": 0.2113,
"step": 359,
"step_loss": 0.22694353759288788
},
{
"epoch": 1.06,
"grad_norm": 1.0601947614383682,
"kl": 0.34940865635871887,
"learning_rate": 4.5302742008323895e-06,
"loss": 0.2244,
"step": 360,
"step_loss": 0.18668481707572937
},
{
"epoch": 1.06,
"grad_norm": 1.0190544548173317,
"kl": 0.30953383445739746,
"learning_rate": 4.527720971253486e-06,
"loss": 0.2146,
"step": 361,
"step_loss": 0.2063293755054474
},
{
"epoch": 1.06,
"grad_norm": 1.0542367113097721,
"kl": 0.4005735516548157,
"learning_rate": 4.525161634732354e-06,
"loss": 0.2043,
"step": 362,
"step_loss": 0.20447780191898346
},
{
"epoch": 1.07,
"grad_norm": 1.240603468633649,
"kl": 0.40291449427604675,
"learning_rate": 4.52259620006099e-06,
"loss": 0.2211,
"step": 363,
"step_loss": 0.19387193024158478
},
{
"epoch": 1.07,
"grad_norm": 1.1166355530237915,
"kl": 0.409071147441864,
"learning_rate": 4.520024676052342e-06,
"loss": 0.2141,
"step": 364,
"step_loss": 0.2120700180530548
},
{
"epoch": 1.07,
"grad_norm": 1.1835634208796884,
"kl": 0.42897555232048035,
"learning_rate": 4.517447071540277e-06,
"loss": 0.2129,
"step": 365,
"step_loss": 0.24478043615818024
},
{
"epoch": 1.08,
"grad_norm": 1.10221612084664,
"kl": 0.4204740822315216,
"learning_rate": 4.514863395379548e-06,
"loss": 0.2127,
"step": 366,
"step_loss": 0.2228332757949829
},
{
"epoch": 1.08,
"grad_norm": 1.114889221761073,
"kl": 0.3709118664264679,
"learning_rate": 4.512273656445767e-06,
"loss": 0.2216,
"step": 367,
"step_loss": 0.204238623380661
},
{
"epoch": 1.08,
"grad_norm": 1.1697263984215087,
"kl": 0.36464443802833557,
"learning_rate": 4.509677863635373e-06,
"loss": 0.2152,
"step": 368,
"step_loss": 0.21155373752117157
},
{
"epoch": 1.08,
"grad_norm": 1.1455769887916711,
"kl": 0.33827337622642517,
"learning_rate": 4.507076025865602e-06,
"loss": 0.2025,
"step": 369,
"step_loss": 0.15999384224414825
},
{
"epoch": 1.09,
"grad_norm": 1.18888099578414,
"kl": 0.38023340702056885,
"learning_rate": 4.504468152074454e-06,
"loss": 0.2221,
"step": 370,
"step_loss": 0.22181765735149384
},
{
"epoch": 1.09,
"grad_norm": 1.1432354202041541,
"kl": 0.4160056710243225,
"learning_rate": 4.501854251220667e-06,
"loss": 0.2184,
"step": 371,
"step_loss": 0.2242666780948639
},
{
"epoch": 1.09,
"grad_norm": 1.05396341263562,
"kl": 0.324006050825119,
"learning_rate": 4.499234332283683e-06,
"loss": 0.2067,
"step": 372,
"step_loss": 0.23381561040878296
},
{
"epoch": 1.1,
"grad_norm": 1.1234179605156285,
"kl": 0.4134640395641327,
"learning_rate": 4.496608404263617e-06,
"loss": 0.2267,
"step": 373,
"step_loss": 0.2217472344636917
},
{
"epoch": 1.1,
"grad_norm": 1.0823406851201798,
"kl": 0.2976873219013214,
"learning_rate": 4.493976476181227e-06,
"loss": 0.2146,
"step": 374,
"step_loss": 0.19449269771575928
},
{
"epoch": 1.1,
"grad_norm": 1.0386992586850838,
"kl": 0.40376949310302734,
"learning_rate": 4.491338557077883e-06,
"loss": 0.2124,
"step": 375,
"step_loss": 0.2595367729663849
},
{
"epoch": 1.11,
"grad_norm": 1.1201199545655665,
"kl": 0.46266913414001465,
"learning_rate": 4.488694656015535e-06,
"loss": 0.2176,
"step": 376,
"step_loss": 0.22357900440692902
},
{
"epoch": 1.11,
"grad_norm": 1.0754291235160311,
"kl": 0.33384445309638977,
"learning_rate": 4.486044782076683e-06,
"loss": 0.2244,
"step": 377,
"step_loss": 0.16794537007808685
},
{
"epoch": 1.11,
"grad_norm": 1.0073994521995542,
"kl": 0.27469688653945923,
"learning_rate": 4.483388944364345e-06,
"loss": 0.2203,
"step": 378,
"step_loss": 0.14068152010440826
},
{
"epoch": 1.11,
"grad_norm": 1.081773692852582,
"kl": 0.4029933512210846,
"learning_rate": 4.480727152002029e-06,
"loss": 0.2315,
"step": 379,
"step_loss": 0.2484932690858841
},
{
"epoch": 1.12,
"grad_norm": 0.9668680107071213,
"kl": 0.41677016019821167,
"learning_rate": 4.478059414133695e-06,
"loss": 0.2004,
"step": 380,
"step_loss": 0.19721892476081848
},
{
"epoch": 1.12,
"grad_norm": 1.001005650539627,
"kl": 0.31512561440467834,
"learning_rate": 4.47538573992373e-06,
"loss": 0.205,
"step": 381,
"step_loss": 0.20380663871765137
},
{
"epoch": 1.12,
"grad_norm": 1.0947450062868074,
"kl": 0.357501357793808,
"learning_rate": 4.472706138556911e-06,
"loss": 0.2164,
"step": 382,
"step_loss": 0.18332478404045105
},
{
"epoch": 1.13,
"grad_norm": 1.1706570208600346,
"kl": 0.3620475232601166,
"learning_rate": 4.4700206192383796e-06,
"loss": 0.2252,
"step": 383,
"step_loss": 0.19933828711509705
},
{
"epoch": 1.13,
"grad_norm": 1.226064605249827,
"kl": 0.2993457019329071,
"learning_rate": 4.4673291911936064e-06,
"loss": 0.2265,
"step": 384,
"step_loss": 0.17195503413677216
},
{
"epoch": 1.13,
"grad_norm": 1.1576130346407478,
"kl": 0.35498231649398804,
"learning_rate": 4.464631863668357e-06,
"loss": 0.2108,
"step": 385,
"step_loss": 0.1912364363670349
},
{
"epoch": 1.13,
"grad_norm": 1.0802215632686387,
"kl": 0.3443387448787689,
"learning_rate": 4.461928645928667e-06,
"loss": 0.2105,
"step": 386,
"step_loss": 0.27471888065338135
},
{
"epoch": 1.14,
"grad_norm": 0.9802948466940768,
"kl": 0.3694034218788147,
"learning_rate": 4.459219547260805e-06,
"loss": 0.1983,
"step": 387,
"step_loss": 0.21388615667819977
},
{
"epoch": 1.14,
"grad_norm": 1.0063078315772995,
"kl": 0.2925432026386261,
"learning_rate": 4.456504576971243e-06,
"loss": 0.2123,
"step": 388,
"step_loss": 0.20165732502937317
},
{
"epoch": 1.14,
"grad_norm": 1.0786559345589601,
"kl": 0.29720914363861084,
"learning_rate": 4.453783744386621e-06,
"loss": 0.2076,
"step": 389,
"step_loss": 0.22997143864631653
},
{
"epoch": 1.15,
"grad_norm": 1.0602165185359143,
"kl": 0.31571799516677856,
"learning_rate": 4.451057058853721e-06,
"loss": 0.2134,
"step": 390,
"step_loss": 0.17782820761203766
},
{
"epoch": 1.15,
"grad_norm": 1.0133394273161123,
"kl": 0.30432724952697754,
"learning_rate": 4.448324529739429e-06,
"loss": 0.1898,
"step": 391,
"step_loss": 0.15238864719867706
},
{
"epoch": 1.15,
"grad_norm": 0.9804056306864453,
"kl": 0.3994058072566986,
"learning_rate": 4.445586166430706e-06,
"loss": 0.1903,
"step": 392,
"step_loss": 0.2173841893672943
},
{
"epoch": 1.16,
"grad_norm": 1.123296175942593,
"kl": 0.30720776319503784,
"learning_rate": 4.442841978334556e-06,
"loss": 0.2216,
"step": 393,
"step_loss": 0.19546273350715637
},
{
"epoch": 1.16,
"grad_norm": 1.0359920222691423,
"kl": 0.35486575961112976,
"learning_rate": 4.440091974877989e-06,
"loss": 0.2129,
"step": 394,
"step_loss": 0.20697163045406342
},
{
"epoch": 1.16,
"grad_norm": 1.0849629839063242,
"kl": 0.33868688344955444,
"learning_rate": 4.437336165507998e-06,
"loss": 0.2166,
"step": 395,
"step_loss": 0.21048006415367126
},
{
"epoch": 1.16,
"grad_norm": 1.0951854958685978,
"kl": 0.4097781181335449,
"learning_rate": 4.4345745596915164e-06,
"loss": 0.2149,
"step": 396,
"step_loss": 0.22375498712062836
},
{
"epoch": 1.17,
"grad_norm": 1.077971005153329,
"kl": 0.3999599516391754,
"learning_rate": 4.431807166915393e-06,
"loss": 0.1962,
"step": 397,
"step_loss": 0.20573902130126953
},
{
"epoch": 1.17,
"grad_norm": 1.1986240067763814,
"kl": 0.33355939388275146,
"learning_rate": 4.429033996686352e-06,
"loss": 0.2158,
"step": 398,
"step_loss": 0.20132023096084595
},
{
"epoch": 1.17,
"grad_norm": 1.0260499777997851,
"kl": 0.435636967420578,
"learning_rate": 4.426255058530969e-06,
"loss": 0.2093,
"step": 399,
"step_loss": 0.2543203830718994
},
{
"epoch": 1.18,
"grad_norm": 1.1062815074420902,
"kl": 0.313290536403656,
"learning_rate": 4.423470361995632e-06,
"loss": 0.2169,
"step": 400,
"step_loss": 0.1880669891834259
},
{
"epoch": 1.18,
"grad_norm": 1.1099408278287306,
"kl": 0.384952574968338,
"learning_rate": 4.420679916646512e-06,
"loss": 0.2167,
"step": 401,
"step_loss": 0.21090340614318848
},
{
"epoch": 1.18,
"grad_norm": 1.072595417050862,
"kl": 0.32443416118621826,
"learning_rate": 4.417883732069525e-06,
"loss": 0.2069,
"step": 402,
"step_loss": 0.2197069674730301
},
{
"epoch": 1.18,
"grad_norm": 1.1520310317038895,
"kl": 0.4313489496707916,
"learning_rate": 4.4150818178703084e-06,
"loss": 0.2164,
"step": 403,
"step_loss": 0.23380175232887268
},
{
"epoch": 1.19,
"grad_norm": 1.0783958233120303,
"kl": 0.4069855809211731,
"learning_rate": 4.412274183674177e-06,
"loss": 0.2254,
"step": 404,
"step_loss": 0.22361119091510773
},
{
"epoch": 1.19,
"grad_norm": 1.063343526682731,
"kl": 0.32530197501182556,
"learning_rate": 4.409460839126099e-06,
"loss": 0.1957,
"step": 405,
"step_loss": 0.20308908820152283
},
{
"epoch": 1.19,
"grad_norm": 1.1176352237327234,
"kl": 0.44647660851478577,
"learning_rate": 4.406641793890658e-06,
"loss": 0.2153,
"step": 406,
"step_loss": 0.2192254364490509
},
{
"epoch": 1.2,
"grad_norm": 1.0704852612143154,
"kl": 0.3989701271057129,
"learning_rate": 4.40381705765202e-06,
"loss": 0.2141,
"step": 407,
"step_loss": 0.2276991605758667
},
{
"epoch": 1.2,
"grad_norm": 1.1043825778150693,
"kl": 0.4035341739654541,
"learning_rate": 4.400986640113903e-06,
"loss": 0.2179,
"step": 408,
"step_loss": 0.24566595256328583
},
{
"epoch": 1.2,
"grad_norm": 1.0492597860234878,
"kl": 0.34541037678718567,
"learning_rate": 4.398150550999538e-06,
"loss": 0.2097,
"step": 409,
"step_loss": 0.22366544604301453
},
{
"epoch": 1.21,
"grad_norm": 1.104701648821187,
"kl": 0.47003430128097534,
"learning_rate": 4.395308800051645e-06,
"loss": 0.2152,
"step": 410,
"step_loss": 0.2739095389842987
},
{
"epoch": 1.21,
"grad_norm": 1.1432191403701684,
"kl": 0.3003239333629608,
"learning_rate": 4.392461397032388e-06,
"loss": 0.2209,
"step": 411,
"step_loss": 0.20217368006706238
},
{
"epoch": 1.21,
"grad_norm": 1.1006520749844693,
"kl": 0.34780094027519226,
"learning_rate": 4.389608351723354e-06,
"loss": 0.2222,
"step": 412,
"step_loss": 0.18477079272270203
},
{
"epoch": 1.21,
"grad_norm": 1.1433499590320158,
"kl": 0.29936301708221436,
"learning_rate": 4.386749673925507e-06,
"loss": 0.2136,
"step": 413,
"step_loss": 0.1923401653766632
},
{
"epoch": 1.22,
"grad_norm": 1.0604657445965895,
"kl": 0.3893413245677948,
"learning_rate": 4.383885373459162e-06,
"loss": 0.2077,
"step": 414,
"step_loss": 0.22892388701438904
},
{
"epoch": 1.22,
"grad_norm": 1.1153994570005237,
"kl": 0.4249955713748932,
"learning_rate": 4.381015460163949e-06,
"loss": 0.213,
"step": 415,
"step_loss": 0.22074082493782043
},
{
"epoch": 1.22,
"grad_norm": 1.0044730994428235,
"kl": 0.42945361137390137,
"learning_rate": 4.378139943898782e-06,
"loss": 0.201,
"step": 416,
"step_loss": 0.2300502359867096
},
{
"epoch": 1.23,
"grad_norm": 1.1201498198141744,
"kl": 0.31198564171791077,
"learning_rate": 4.375258834541819e-06,
"loss": 0.2335,
"step": 417,
"step_loss": 0.18508249521255493
},
{
"epoch": 1.23,
"grad_norm": 1.1207096425422391,
"kl": 0.3583974540233612,
"learning_rate": 4.372372141990433e-06,
"loss": 0.2065,
"step": 418,
"step_loss": 0.1857309639453888
},
{
"epoch": 1.23,
"grad_norm": 1.1496661225032172,
"kl": 0.3583613336086273,
"learning_rate": 4.369479876161179e-06,
"loss": 0.2177,
"step": 419,
"step_loss": 0.224288210272789
},
{
"epoch": 1.23,
"grad_norm": 1.0784304365856952,
"kl": 0.3118525445461273,
"learning_rate": 4.366582046989756e-06,
"loss": 0.2067,
"step": 420,
"step_loss": 0.1751691997051239
},
{
"epoch": 1.24,
"grad_norm": 1.1886899720468163,
"kl": 0.4487501382827759,
"learning_rate": 4.363678664430972e-06,
"loss": 0.2237,
"step": 421,
"step_loss": 0.25023704767227173
},
{
"epoch": 1.24,
"grad_norm": 1.1745967665142434,
"kl": 0.33707478642463684,
"learning_rate": 4.360769738458717e-06,
"loss": 0.2135,
"step": 422,
"step_loss": 0.18086591362953186
},
{
"epoch": 1.24,
"grad_norm": 1.092498622016125,
"kl": 0.3724205791950226,
"learning_rate": 4.3578552790659215e-06,
"loss": 0.2313,
"step": 423,
"step_loss": 0.22714251279830933
},
{
"epoch": 1.25,
"grad_norm": 1.1190105848672534,
"kl": 0.2946886420249939,
"learning_rate": 4.3549352962645256e-06,
"loss": 0.2036,
"step": 424,
"step_loss": 0.20033493638038635
},
{
"epoch": 1.25,
"grad_norm": 1.1415511904160704,
"kl": 0.2983555495738983,
"learning_rate": 4.352009800085442e-06,
"loss": 0.2136,
"step": 425,
"step_loss": 0.16325783729553223
},
{
"epoch": 1.25,
"grad_norm": 1.12564799402316,
"kl": 0.40603286027908325,
"learning_rate": 4.349078800578527e-06,
"loss": 0.2225,
"step": 426,
"step_loss": 0.24971996247768402
},
{
"epoch": 1.26,
"grad_norm": 1.0847789979247071,
"kl": 0.2904649078845978,
"learning_rate": 4.346142307812537e-06,
"loss": 0.2134,
"step": 427,
"step_loss": 0.21226723492145538
},
{
"epoch": 1.26,
"grad_norm": 1.1535493008057434,
"kl": 0.4135288596153259,
"learning_rate": 4.3432003318751034e-06,
"loss": 0.218,
"step": 428,
"step_loss": 0.24029088020324707
},
{
"epoch": 1.26,
"grad_norm": 1.1822688678890323,
"kl": 0.3823299705982208,
"learning_rate": 4.340252882872693e-06,
"loss": 0.2093,
"step": 429,
"step_loss": 0.23636040091514587
},
{
"epoch": 1.26,
"grad_norm": 1.218665217849662,
"kl": 0.34758618474006653,
"learning_rate": 4.337299970930573e-06,
"loss": 0.2031,
"step": 430,
"step_loss": 0.1947343796491623
},
{
"epoch": 1.27,
"grad_norm": 1.12952236007926,
"kl": 0.3034479022026062,
"learning_rate": 4.3343416061927784e-06,
"loss": 0.2125,
"step": 431,
"step_loss": 0.21196305751800537
},
{
"epoch": 1.27,
"grad_norm": 1.1167424522607692,
"kl": 0.3147704005241394,
"learning_rate": 4.331377798822074e-06,
"loss": 0.2131,
"step": 432,
"step_loss": 0.22102078795433044
},
{
"epoch": 1.27,
"grad_norm": 1.0776199042214483,
"kl": 0.38971424102783203,
"learning_rate": 4.328408558999926e-06,
"loss": 0.2129,
"step": 433,
"step_loss": 0.2175690084695816
},
{
"epoch": 1.28,
"grad_norm": 1.247807487250352,
"kl": 0.37261104583740234,
"learning_rate": 4.325433896926455e-06,
"loss": 0.233,
"step": 434,
"step_loss": 0.17759710550308228
},
{
"epoch": 1.28,
"grad_norm": 1.0828914253017792,
"kl": 0.4615696668624878,
"learning_rate": 4.322453822820416e-06,
"loss": 0.2068,
"step": 435,
"step_loss": 0.2164827585220337
},
{
"epoch": 1.28,
"grad_norm": 1.1538866091782574,
"kl": 0.34330347180366516,
"learning_rate": 4.319468346919151e-06,
"loss": 0.2141,
"step": 436,
"step_loss": 0.17444977164268494
},
{
"epoch": 1.28,
"grad_norm": 1.0771764576951577,
"kl": 0.3454141616821289,
"learning_rate": 4.316477479478562e-06,
"loss": 0.2071,
"step": 437,
"step_loss": 0.1811198741197586
},
{
"epoch": 1.29,
"grad_norm": 1.0306242648319286,
"kl": 0.33969008922576904,
"learning_rate": 4.3134812307730685e-06,
"loss": 0.2226,
"step": 438,
"step_loss": 0.20342102646827698
},
{
"epoch": 1.29,
"grad_norm": 1.204874661424694,
"kl": 0.35218775272369385,
"learning_rate": 4.310479611095579e-06,
"loss": 0.2297,
"step": 439,
"step_loss": 0.2256166785955429
},
{
"epoch": 1.29,
"grad_norm": 1.1018103259900038,
"kl": 0.3603362441062927,
"learning_rate": 4.3074726307574515e-06,
"loss": 0.2154,
"step": 440,
"step_loss": 0.1923496127128601
},
{
"epoch": 1.3,
"grad_norm": 1.1380166709119617,
"kl": 0.3439081907272339,
"learning_rate": 4.304460300088461e-06,
"loss": 0.2132,
"step": 441,
"step_loss": 0.1734137088060379
},
{
"epoch": 1.3,
"grad_norm": 1.2016707710129002,
"kl": 0.5097121000289917,
"learning_rate": 4.3014426294367585e-06,
"loss": 0.2059,
"step": 442,
"step_loss": 0.24441494047641754
},
{
"epoch": 1.3,
"grad_norm": 1.095054101053829,
"kl": 0.37071746587753296,
"learning_rate": 4.298419629168844e-06,
"loss": 0.2075,
"step": 443,
"step_loss": 0.22421976923942566
},
{
"epoch": 1.31,
"grad_norm": 1.1424892814851308,
"kl": 0.3091402053833008,
"learning_rate": 4.295391309669523e-06,
"loss": 0.2133,
"step": 444,
"step_loss": 0.19774490594863892
},
{
"epoch": 1.31,
"grad_norm": 1.0489332618798233,
"kl": 0.3951050341129303,
"learning_rate": 4.292357681341875e-06,
"loss": 0.2173,
"step": 445,
"step_loss": 0.23351700603961945
},
{
"epoch": 1.31,
"grad_norm": 1.1201440862803222,
"kl": 0.31920719146728516,
"learning_rate": 4.289318754607216e-06,
"loss": 0.2119,
"step": 446,
"step_loss": 0.19813159108161926
},
{
"epoch": 1.31,
"grad_norm": 1.109579067534783,
"kl": 0.36979711055755615,
"learning_rate": 4.286274539905064e-06,
"loss": 0.2214,
"step": 447,
"step_loss": 0.2170843780040741
},
{
"epoch": 1.32,
"grad_norm": 1.1248189065136833,
"kl": 0.4319002628326416,
"learning_rate": 4.283225047693102e-06,
"loss": 0.2216,
"step": 448,
"step_loss": 0.23914441466331482
},
{
"epoch": 1.32,
"grad_norm": 1.0143097210255512,
"kl": 0.44951027631759644,
"learning_rate": 4.280170288447145e-06,
"loss": 0.19,
"step": 449,
"step_loss": 0.23710179328918457
},
{
"epoch": 1.32,
"grad_norm": 1.1324153993387862,
"kl": 0.38769853115081787,
"learning_rate": 4.277110272661098e-06,
"loss": 0.2155,
"step": 450,
"step_loss": 0.201776921749115
},
{
"epoch": 1.33,
"grad_norm": 1.1440937390786563,
"kl": 0.4149802625179291,
"learning_rate": 4.2740450108469276e-06,
"loss": 0.2167,
"step": 451,
"step_loss": 0.22473248839378357
},
{
"epoch": 1.33,
"grad_norm": 1.1169649315923986,
"kl": 0.4232461452484131,
"learning_rate": 4.270974513534617e-06,
"loss": 0.2186,
"step": 452,
"step_loss": 0.22173169255256653
},
{
"epoch": 1.33,
"grad_norm": 1.0911796681416763,
"kl": 0.36605626344680786,
"learning_rate": 4.26789879127214e-06,
"loss": 0.2013,
"step": 453,
"step_loss": 0.24023717641830444
},
{
"epoch": 1.33,
"grad_norm": 1.1226223217769014,
"kl": 0.363466739654541,
"learning_rate": 4.2648178546254135e-06,
"loss": 0.2128,
"step": 454,
"step_loss": 0.19646257162094116
},
{
"epoch": 1.34,
"grad_norm": 1.0952440956076734,
"kl": 0.4751397669315338,
"learning_rate": 4.261731714178274e-06,
"loss": 0.2107,
"step": 455,
"step_loss": 0.27217623591423035
},
{
"epoch": 1.34,
"grad_norm": 1.140170398680307,
"kl": 0.3529389500617981,
"learning_rate": 4.25864038053243e-06,
"loss": 0.2175,
"step": 456,
"step_loss": 0.22076928615570068
},
{
"epoch": 1.34,
"grad_norm": 1.0815717361792092,
"kl": 0.3519299626350403,
"learning_rate": 4.2555438643074315e-06,
"loss": 0.2019,
"step": 457,
"step_loss": 0.19926907122135162
},
{
"epoch": 1.35,
"grad_norm": 1.1185893886506182,
"kl": 0.36596280336380005,
"learning_rate": 4.252442176140631e-06,
"loss": 0.2036,
"step": 458,
"step_loss": 0.18698200583457947
},
{
"epoch": 1.35,
"grad_norm": 1.1963180102524402,
"kl": 0.41686299443244934,
"learning_rate": 4.249335326687148e-06,
"loss": 0.2236,
"step": 459,
"step_loss": 0.23538297414779663
},
{
"epoch": 1.35,
"grad_norm": 1.1749480454138903,
"kl": 0.3308974504470825,
"learning_rate": 4.2462233266198335e-06,
"loss": 0.2188,
"step": 460,
"step_loss": 0.19405069947242737
},
{
"epoch": 1.36,
"grad_norm": 1.227170089100812,
"kl": 0.3617514967918396,
"learning_rate": 4.243106186629233e-06,
"loss": 0.2232,
"step": 461,
"step_loss": 0.1792970597743988
},
{
"epoch": 1.36,
"grad_norm": 1.0671020515492777,
"kl": 0.34086763858795166,
"learning_rate": 4.2399839174235445e-06,
"loss": 0.2123,
"step": 462,
"step_loss": 0.22149503231048584
},
{
"epoch": 1.36,
"grad_norm": 1.0270636289046342,
"kl": 0.3730754852294922,
"learning_rate": 4.236856529728593e-06,
"loss": 0.2081,
"step": 463,
"step_loss": 0.2611701488494873
},
{
"epoch": 1.36,
"grad_norm": 1.0352246387850932,
"kl": 0.31691187620162964,
"learning_rate": 4.233724034287782e-06,
"loss": 0.2062,
"step": 464,
"step_loss": 0.18789568543434143
},
{
"epoch": 1.37,
"grad_norm": 1.1036670142111487,
"kl": 0.3653966188430786,
"learning_rate": 4.230586441862063e-06,
"loss": 0.2102,
"step": 465,
"step_loss": 0.25932013988494873
},
{
"epoch": 1.37,
"grad_norm": 1.141576174412114,
"kl": 0.3486475348472595,
"learning_rate": 4.227443763229895e-06,
"loss": 0.2143,
"step": 466,
"step_loss": 0.22772841155529022
},
{
"epoch": 1.37,
"grad_norm": 1.08652115649653,
"kl": 0.38706696033477783,
"learning_rate": 4.224296009187212e-06,
"loss": 0.2092,
"step": 467,
"step_loss": 0.20331993699073792
},
{
"epoch": 1.38,
"grad_norm": 1.1132102876767822,
"kl": 0.4534846544265747,
"learning_rate": 4.221143190547384e-06,
"loss": 0.2069,
"step": 468,
"step_loss": 0.19342438876628876
},
{
"epoch": 1.38,
"grad_norm": 1.2487774294245415,
"kl": 0.34831732511520386,
"learning_rate": 4.217985318141177e-06,
"loss": 0.23,
"step": 469,
"step_loss": 0.23435795307159424
},
{
"epoch": 1.38,
"grad_norm": 1.1596986401165996,
"kl": 0.3419073224067688,
"learning_rate": 4.214822402816718e-06,
"loss": 0.2087,
"step": 470,
"step_loss": 0.17857272922992706
},
{
"epoch": 1.38,
"grad_norm": 1.0228762507304563,
"kl": 0.3407072126865387,
"learning_rate": 4.21165445543946e-06,
"loss": 0.2022,
"step": 471,
"step_loss": 0.18100890517234802
},
{
"epoch": 1.39,
"grad_norm": 1.1834791650481271,
"kl": 0.3761002719402313,
"learning_rate": 4.20848148689214e-06,
"loss": 0.2114,
"step": 472,
"step_loss": 0.20595240592956543
},
{
"epoch": 1.39,
"grad_norm": 1.0537720415925667,
"kl": 0.32567542791366577,
"learning_rate": 4.205303508074745e-06,
"loss": 0.2135,
"step": 473,
"step_loss": 0.23016318678855896
},
{
"epoch": 1.39,
"grad_norm": 1.08626518519842,
"kl": 0.39517271518707275,
"learning_rate": 4.202120529904474e-06,
"loss": 0.2076,
"step": 474,
"step_loss": 0.22103264927864075
},
{
"epoch": 1.4,
"grad_norm": 1.0419674113366015,
"kl": 0.34639662504196167,
"learning_rate": 4.1989325633157e-06,
"loss": 0.2224,
"step": 475,
"step_loss": 0.2245238721370697
},
{
"epoch": 1.4,
"grad_norm": 1.0933257642610934,
"kl": 0.3724210858345032,
"learning_rate": 4.195739619259933e-06,
"loss": 0.2081,
"step": 476,
"step_loss": 0.2216363102197647
},
{
"epoch": 1.4,
"grad_norm": 1.206514414530843,
"kl": 0.33405083417892456,
"learning_rate": 4.19254170870578e-06,
"loss": 0.2272,
"step": 477,
"step_loss": 0.19681967794895172
},
{
"epoch": 1.41,
"grad_norm": 1.127545668719584,
"kl": 0.36871930956840515,
"learning_rate": 4.18933884263891e-06,
"loss": 0.2207,
"step": 478,
"step_loss": 0.18126648664474487
},
{
"epoch": 1.41,
"grad_norm": 1.1433635783272997,
"kl": 0.3497537076473236,
"learning_rate": 4.186131032062018e-06,
"loss": 0.2206,
"step": 479,
"step_loss": 0.20069673657417297
},
{
"epoch": 1.41,
"grad_norm": 1.0952740745497684,
"kl": 0.39063096046447754,
"learning_rate": 4.182918287994781e-06,
"loss": 0.2209,
"step": 480,
"step_loss": 0.18552149832248688
},
{
"epoch": 1.41,
"grad_norm": 1.098642354287991,
"kl": 0.33700186014175415,
"learning_rate": 4.1797006214738264e-06,
"loss": 0.2026,
"step": 481,
"step_loss": 0.18201524019241333
},
{
"epoch": 1.42,
"grad_norm": 1.045712202429604,
"kl": 0.35738605260849,
"learning_rate": 4.17647804355269e-06,
"loss": 0.2026,
"step": 482,
"step_loss": 0.18481150269508362
},
{
"epoch": 1.42,
"grad_norm": 1.1438459095582905,
"kl": 0.3997901678085327,
"learning_rate": 4.1732505653017805e-06,
"loss": 0.2192,
"step": 483,
"step_loss": 0.23264771699905396
},
{
"epoch": 1.42,
"grad_norm": 1.1805883340710588,
"kl": 0.342068612575531,
"learning_rate": 4.17001819780834e-06,
"loss": 0.2131,
"step": 484,
"step_loss": 0.18483933806419373
},
{
"epoch": 1.43,
"grad_norm": 1.1642629591622176,
"kl": 0.3150475323200226,
"learning_rate": 4.1667809521764065e-06,
"loss": 0.2144,
"step": 485,
"step_loss": 0.2131662219762802
},
{
"epoch": 1.43,
"grad_norm": 1.0776552130456476,
"kl": 0.3208051025867462,
"learning_rate": 4.163538839526777e-06,
"loss": 0.2029,
"step": 486,
"step_loss": 0.1959662288427353
},
{
"epoch": 1.43,
"grad_norm": 1.0456011694309113,
"kl": 0.4914534091949463,
"learning_rate": 4.160291870996966e-06,
"loss": 0.2082,
"step": 487,
"step_loss": 0.21434994041919708
},
{
"epoch": 1.43,
"grad_norm": 1.0702724310064322,
"kl": 0.39285576343536377,
"learning_rate": 4.157040057741171e-06,
"loss": 0.2118,
"step": 488,
"step_loss": 0.20233172178268433
},
{
"epoch": 1.44,
"grad_norm": 1.022356313983593,
"kl": 0.29209986329078674,
"learning_rate": 4.15378341093023e-06,
"loss": 0.2109,
"step": 489,
"step_loss": 0.19211889803409576
},
{
"epoch": 1.44,
"grad_norm": 1.1060860463863098,
"kl": 0.31146690249443054,
"learning_rate": 4.150521941751589e-06,
"loss": 0.2284,
"step": 490,
"step_loss": 0.22323687374591827
},
{
"epoch": 1.44,
"grad_norm": 1.086327297590339,
"kl": 0.3390502333641052,
"learning_rate": 4.147255661409255e-06,
"loss": 0.2112,
"step": 491,
"step_loss": 0.19411392509937286
},
{
"epoch": 1.45,
"grad_norm": 1.0776562984953817,
"kl": 0.3938494026660919,
"learning_rate": 4.14398458112377e-06,
"loss": 0.2128,
"step": 492,
"step_loss": 0.22699597477912903
},
{
"epoch": 1.45,
"grad_norm": 1.0858023168277036,
"kl": 0.3118027448654175,
"learning_rate": 4.140708712132157e-06,
"loss": 0.2156,
"step": 493,
"step_loss": 0.21079112589359283
},
{
"epoch": 1.45,
"grad_norm": 1.1315907434090295,
"kl": 0.37194758653640747,
"learning_rate": 4.137428065687896e-06,
"loss": 0.2121,
"step": 494,
"step_loss": 0.2287684679031372
},
{
"epoch": 1.46,
"grad_norm": 1.1331173453727341,
"kl": 0.3639126121997833,
"learning_rate": 4.134142653060875e-06,
"loss": 0.2186,
"step": 495,
"step_loss": 0.2055366486310959
},
{
"epoch": 1.46,
"grad_norm": 1.1630295644530058,
"kl": 0.4070471525192261,
"learning_rate": 4.130852485537357e-06,
"loss": 0.2321,
"step": 496,
"step_loss": 0.2210061103105545
},
{
"epoch": 1.46,
"grad_norm": 1.1594682830751208,
"kl": 0.40022650361061096,
"learning_rate": 4.127557574419938e-06,
"loss": 0.2245,
"step": 497,
"step_loss": 0.2172410786151886
},
{
"epoch": 1.46,
"grad_norm": 1.0825009135777626,
"kl": 0.3248887062072754,
"learning_rate": 4.12425793102751e-06,
"loss": 0.2112,
"step": 498,
"step_loss": 0.18749205768108368
},
{
"epoch": 1.47,
"grad_norm": 1.159823925574004,
"kl": 0.4212324321269989,
"learning_rate": 4.120953566695222e-06,
"loss": 0.2334,
"step": 499,
"step_loss": 0.2644669711589813
},
{
"epoch": 1.47,
"grad_norm": 1.0191841777064992,
"kl": 0.39752498269081116,
"learning_rate": 4.117644492774441e-06,
"loss": 0.2073,
"step": 500,
"step_loss": 0.1976650357246399
},
{
"epoch": 1.47,
"grad_norm": 1.1191918326441155,
"kl": 0.3732473850250244,
"learning_rate": 4.11433072063271e-06,
"loss": 0.2132,
"step": 501,
"step_loss": 0.18028053641319275
},
{
"epoch": 1.48,
"grad_norm": 1.1922599610472286,
"kl": 0.3869403004646301,
"learning_rate": 4.111012261653716e-06,
"loss": 0.2293,
"step": 502,
"step_loss": 0.2117547243833542
},
{
"epoch": 1.48,
"grad_norm": 1.1390828459169302,
"kl": 0.3269495368003845,
"learning_rate": 4.1076891272372435e-06,
"loss": 0.2253,
"step": 503,
"step_loss": 0.20671634376049042
},
{
"epoch": 1.48,
"grad_norm": 1.1027235919075196,
"kl": 0.3744144141674042,
"learning_rate": 4.104361328799139e-06,
"loss": 0.2237,
"step": 504,
"step_loss": 0.20059865713119507
},
{
"epoch": 1.48,
"grad_norm": 1.0613715757167175,
"kl": 0.3339211940765381,
"learning_rate": 4.101028877771271e-06,
"loss": 0.2047,
"step": 505,
"step_loss": 0.17482547461986542
},
{
"epoch": 1.49,
"grad_norm": 1.09162612153003,
"kl": 0.3029939830303192,
"learning_rate": 4.09769178560149e-06,
"loss": 0.2138,
"step": 506,
"step_loss": 0.21953245997428894
},
{
"epoch": 1.49,
"grad_norm": 1.2415791947427925,
"kl": 0.3678430914878845,
"learning_rate": 4.094350063753594e-06,
"loss": 0.2275,
"step": 507,
"step_loss": 0.20582033693790436
},
{
"epoch": 1.49,
"grad_norm": 1.1048759043495957,
"kl": 0.35326552391052246,
"learning_rate": 4.0910037237072805e-06,
"loss": 0.215,
"step": 508,
"step_loss": 0.20594316720962524
},
{
"epoch": 1.5,
"grad_norm": 1.0129951540806574,
"kl": 0.3663599491119385,
"learning_rate": 4.087652776958113e-06,
"loss": 0.2142,
"step": 509,
"step_loss": 0.20410504937171936
},
{
"epoch": 1.5,
"grad_norm": 1.046767706141149,
"kl": 0.2914074659347534,
"learning_rate": 4.084297235017482e-06,
"loss": 0.2192,
"step": 510,
"step_loss": 0.1773955076932907
},
{
"epoch": 1.5,
"grad_norm": 1.0601343367693614,
"kl": 0.32609421014785767,
"learning_rate": 4.0809371094125635e-06,
"loss": 0.2082,
"step": 511,
"step_loss": 0.1615651398897171
},
{
"epoch": 1.51,
"grad_norm": 1.1354172849507647,
"kl": 0.3583594858646393,
"learning_rate": 4.077572411686277e-06,
"loss": 0.2119,
"step": 512,
"step_loss": 0.21853014826774597
},
{
"epoch": 1.51,
"grad_norm": 1.1763065280060416,
"kl": 0.34837618470191956,
"learning_rate": 4.07420315339725e-06,
"loss": 0.2182,
"step": 513,
"step_loss": 0.23535805940628052
},
{
"epoch": 1.51,
"grad_norm": 1.1126977629565107,
"kl": 0.43193358182907104,
"learning_rate": 4.070829346119778e-06,
"loss": 0.2082,
"step": 514,
"step_loss": 0.2645440101623535
},
{
"epoch": 1.51,
"grad_norm": 1.0882216657003507,
"kl": 0.29184776544570923,
"learning_rate": 4.06745100144378e-06,
"loss": 0.21,
"step": 515,
"step_loss": 0.18505370616912842
},
{
"epoch": 1.52,
"grad_norm": 1.0968084081449223,
"kl": 0.32751548290252686,
"learning_rate": 4.064068130974767e-06,
"loss": 0.2079,
"step": 516,
"step_loss": 0.20232903957366943
},
{
"epoch": 1.52,
"grad_norm": 1.099644580772103,
"kl": 0.37286919355392456,
"learning_rate": 4.060680746333793e-06,
"loss": 0.2085,
"step": 517,
"step_loss": 0.23085977137088776
},
{
"epoch": 1.52,
"grad_norm": 1.1121872730260256,
"kl": 0.25646811723709106,
"learning_rate": 4.057288859157423e-06,
"loss": 0.2174,
"step": 518,
"step_loss": 0.16534452140331268
},
{
"epoch": 1.53,
"grad_norm": 1.081954078814375,
"kl": 0.332968145608902,
"learning_rate": 4.053892481097686e-06,
"loss": 0.2119,
"step": 519,
"step_loss": 0.22676271200180054
},
{
"epoch": 1.53,
"grad_norm": 1.0690195878534157,
"kl": 0.38490551710128784,
"learning_rate": 4.050491623822041e-06,
"loss": 0.2086,
"step": 520,
"step_loss": 0.2052135318517685
},
{
"epoch": 1.53,
"grad_norm": 1.135011971238312,
"kl": 0.44730523228645325,
"learning_rate": 4.047086299013332e-06,
"loss": 0.21,
"step": 521,
"step_loss": 0.22579625248908997
},
{
"epoch": 1.53,
"grad_norm": 1.1803940053183595,
"kl": 0.3478749096393585,
"learning_rate": 4.0436765183697516e-06,
"loss": 0.2289,
"step": 522,
"step_loss": 0.20054206252098083
},
{
"epoch": 1.54,
"grad_norm": 1.06138099620151,
"kl": 0.35531559586524963,
"learning_rate": 4.040262293604799e-06,
"loss": 0.2117,
"step": 523,
"step_loss": 0.23341700434684753
},
{
"epoch": 1.54,
"grad_norm": 1.131403790342062,
"kl": 0.36995428800582886,
"learning_rate": 4.036843636447242e-06,
"loss": 0.2148,
"step": 524,
"step_loss": 0.23521539568901062
},
{
"epoch": 1.54,
"grad_norm": 1.0809651297230498,
"kl": 0.33512529730796814,
"learning_rate": 4.0334205586410706e-06,
"loss": 0.2239,
"step": 525,
"step_loss": 0.23108862340450287
},
{
"epoch": 1.55,
"grad_norm": 0.9644089850533444,
"kl": 0.3100147843360901,
"learning_rate": 4.029993071945465e-06,
"loss": 0.2021,
"step": 526,
"step_loss": 0.16294045746326447
},
{
"epoch": 1.55,
"grad_norm": 1.14530814951946,
"kl": 0.4870828688144684,
"learning_rate": 4.026561188134749e-06,
"loss": 0.2214,
"step": 527,
"step_loss": 0.23596243560314178
},
{
"epoch": 1.55,
"grad_norm": 1.1472820386014453,
"kl": 0.4136597216129303,
"learning_rate": 4.023124918998353e-06,
"loss": 0.2209,
"step": 528,
"step_loss": 0.2301231026649475
},
{
"epoch": 1.56,
"grad_norm": 1.123885543146704,
"kl": 0.3524470925331116,
"learning_rate": 4.01968427634077e-06,
"loss": 0.2253,
"step": 529,
"step_loss": 0.21960632503032684
},
{
"epoch": 1.56,
"grad_norm": 0.9967913335410991,
"kl": 0.28382453322410583,
"learning_rate": 4.016239271981519e-06,
"loss": 0.1989,
"step": 530,
"step_loss": 0.1486484706401825
},
{
"epoch": 1.56,
"grad_norm": 1.0902910989337662,
"kl": 0.38825753331184387,
"learning_rate": 4.012789917755102e-06,
"loss": 0.2212,
"step": 531,
"step_loss": 0.19914306700229645
},
{
"epoch": 1.56,
"grad_norm": 1.0173576760514342,
"kl": 0.3644031584262848,
"learning_rate": 4.0093362255109645e-06,
"loss": 0.2037,
"step": 532,
"step_loss": 0.2210751622915268
},
{
"epoch": 1.57,
"grad_norm": 1.1991664117084275,
"kl": 0.38465866446495056,
"learning_rate": 4.0058782071134544e-06,
"loss": 0.2196,
"step": 533,
"step_loss": 0.20659992098808289
},
{
"epoch": 1.57,
"grad_norm": 1.1662242070255937,
"kl": 0.4157981276512146,
"learning_rate": 4.002415874441778e-06,
"loss": 0.2163,
"step": 534,
"step_loss": 0.23980651795864105
},
{
"epoch": 1.57,
"grad_norm": 1.0904814655761281,
"kl": 0.36056768894195557,
"learning_rate": 3.998949239389968e-06,
"loss": 0.2117,
"step": 535,
"step_loss": 0.21642203629016876
},
{
"epoch": 1.58,
"grad_norm": 1.0852927276607216,
"kl": 0.3154537081718445,
"learning_rate": 3.995478313866832e-06,
"loss": 0.2147,
"step": 536,
"step_loss": 0.2088639885187149
},
{
"epoch": 1.58,
"grad_norm": 1.0681140345975404,
"kl": 0.423949658870697,
"learning_rate": 3.992003109795918e-06,
"loss": 0.231,
"step": 537,
"step_loss": 0.21801121532917023
},
{
"epoch": 1.58,
"grad_norm": 1.076997180877474,
"kl": 0.39057889580726624,
"learning_rate": 3.9885236391154725e-06,
"loss": 0.2214,
"step": 538,
"step_loss": 0.22681710124015808
},
{
"epoch": 1.58,
"grad_norm": 1.098273331215662,
"kl": 0.4600808620452881,
"learning_rate": 3.985039913778398e-06,
"loss": 0.2204,
"step": 539,
"step_loss": 0.27202731370925903
},
{
"epoch": 1.59,
"grad_norm": 1.1677412111257455,
"kl": 0.38847512006759644,
"learning_rate": 3.981551945752214e-06,
"loss": 0.2294,
"step": 540,
"step_loss": 0.22447596490383148
},
{
"epoch": 1.59,
"grad_norm": 1.014321414944766,
"kl": 0.37826013565063477,
"learning_rate": 3.978059747019014e-06,
"loss": 0.2029,
"step": 541,
"step_loss": 0.24410393834114075
},
{
"epoch": 1.59,
"grad_norm": 1.0948405619058077,
"kl": 0.3952583074569702,
"learning_rate": 3.974563329575426e-06,
"loss": 0.2234,
"step": 542,
"step_loss": 0.23489505052566528
},
{
"epoch": 1.6,
"grad_norm": 1.1174386949172748,
"kl": 0.38856184482574463,
"learning_rate": 3.971062705432569e-06,
"loss": 0.2169,
"step": 543,
"step_loss": 0.23416107892990112
},
{
"epoch": 1.6,
"grad_norm": 1.181312376726803,
"kl": 0.42837756872177124,
"learning_rate": 3.967557886616014e-06,
"loss": 0.2253,
"step": 544,
"step_loss": 0.24485422670841217
},
{
"epoch": 1.6,
"grad_norm": 1.0976730155343217,
"kl": 0.3416699767112732,
"learning_rate": 3.964048885165741e-06,
"loss": 0.2289,
"step": 545,
"step_loss": 0.2253103256225586
},
{
"epoch": 1.61,
"grad_norm": 1.1975345651778746,
"kl": 0.3401637673377991,
"learning_rate": 3.9605357131360994e-06,
"loss": 0.2249,
"step": 546,
"step_loss": 0.24569852650165558
},
{
"epoch": 1.61,
"grad_norm": 1.0984507770197158,
"kl": 0.33863207697868347,
"learning_rate": 3.957018382595765e-06,
"loss": 0.2156,
"step": 547,
"step_loss": 0.24580451846122742
},
{
"epoch": 1.61,
"grad_norm": 1.1716298673466186,
"kl": 0.33466053009033203,
"learning_rate": 3.953496905627702e-06,
"loss": 0.2147,
"step": 548,
"step_loss": 0.18085306882858276
},
{
"epoch": 1.61,
"grad_norm": 1.1734884852928138,
"kl": 0.34325727820396423,
"learning_rate": 3.949971294329112e-06,
"loss": 0.2378,
"step": 549,
"step_loss": 0.23332083225250244
},
{
"epoch": 1.62,
"grad_norm": 1.136385000201455,
"kl": 0.36762735247612,
"learning_rate": 3.946441560811407e-06,
"loss": 0.2235,
"step": 550,
"step_loss": 0.22139766812324524
},
{
"epoch": 1.62,
"grad_norm": 1.0264760838893863,
"kl": 0.3464069366455078,
"learning_rate": 3.942907717200154e-06,
"loss": 0.216,
"step": 551,
"step_loss": 0.19486932456493378
},
{
"epoch": 1.62,
"grad_norm": 1.058251659960203,
"kl": 0.352877140045166,
"learning_rate": 3.939369775635042e-06,
"loss": 0.2112,
"step": 552,
"step_loss": 0.2148822396993637
},
{
"epoch": 1.63,
"grad_norm": 1.072714998063151,
"kl": 0.33858194947242737,
"learning_rate": 3.935827748269837e-06,
"loss": 0.2112,
"step": 553,
"step_loss": 0.20188114047050476
},
{
"epoch": 1.63,
"grad_norm": 1.0835941815255097,
"kl": 0.3523325026035309,
"learning_rate": 3.932281647272341e-06,
"loss": 0.2125,
"step": 554,
"step_loss": 0.19613249599933624
},
{
"epoch": 1.63,
"grad_norm": 1.1022095639794895,
"kl": 0.41155239939689636,
"learning_rate": 3.9287314848243516e-06,
"loss": 0.2171,
"step": 555,
"step_loss": 0.2243964970111847
},
{
"epoch": 1.63,
"grad_norm": 1.0886715974181185,
"kl": 0.32835161685943604,
"learning_rate": 3.925177273121613e-06,
"loss": 0.2093,
"step": 556,
"step_loss": 0.19310572743415833
},
{
"epoch": 1.64,
"grad_norm": 1.0796872927034789,
"kl": 0.3089352548122406,
"learning_rate": 3.921619024373787e-06,
"loss": 0.2044,
"step": 557,
"step_loss": 0.19184894859790802
},
{
"epoch": 1.64,
"grad_norm": 1.0718596059733634,
"kl": 0.35348087549209595,
"learning_rate": 3.918056750804397e-06,
"loss": 0.2102,
"step": 558,
"step_loss": 0.1941283494234085
},
{
"epoch": 1.64,
"grad_norm": 1.146403256199632,
"kl": 0.339926540851593,
"learning_rate": 3.914490464650798e-06,
"loss": 0.214,
"step": 559,
"step_loss": 0.19512970745563507
},
{
"epoch": 1.65,
"grad_norm": 1.104285223312089,
"kl": 0.42092275619506836,
"learning_rate": 3.910920178164127e-06,
"loss": 0.2238,
"step": 560,
"step_loss": 0.2336612045764923
},
{
"epoch": 1.65,
"grad_norm": 1.0272793405162493,
"kl": 0.3618415892124176,
"learning_rate": 3.907345903609264e-06,
"loss": 0.2193,
"step": 561,
"step_loss": 0.2407916635274887
},
{
"epoch": 1.65,
"grad_norm": 1.105176543312791,
"kl": 0.3805115818977356,
"learning_rate": 3.903767653264787e-06,
"loss": 0.2085,
"step": 562,
"step_loss": 0.24206788837909698
},
{
"epoch": 1.66,
"grad_norm": 1.0939508934811706,
"kl": 0.36903828382492065,
"learning_rate": 3.900185439422934e-06,
"loss": 0.2092,
"step": 563,
"step_loss": 0.16102567315101624
},
{
"epoch": 1.66,
"grad_norm": 1.0506794536305846,
"kl": 0.3899340033531189,
"learning_rate": 3.896599274389558e-06,
"loss": 0.208,
"step": 564,
"step_loss": 0.22886879742145538
},
{
"epoch": 1.66,
"grad_norm": 1.0301032554040463,
"kl": 0.348848819732666,
"learning_rate": 3.893009170484086e-06,
"loss": 0.2182,
"step": 565,
"step_loss": 0.23902392387390137
},
{
"epoch": 1.66,
"grad_norm": 1.0779620821495426,
"kl": 0.34697240591049194,
"learning_rate": 3.889415140039473e-06,
"loss": 0.2148,
"step": 566,
"step_loss": 0.1859664022922516
},
{
"epoch": 1.67,
"grad_norm": 1.018795902879667,
"kl": 0.38165339827537537,
"learning_rate": 3.8858171954021695e-06,
"loss": 0.2135,
"step": 567,
"step_loss": 0.2154882550239563
},
{
"epoch": 1.67,
"grad_norm": 1.0495585663185147,
"kl": 0.389653742313385,
"learning_rate": 3.882215348932065e-06,
"loss": 0.2083,
"step": 568,
"step_loss": 0.24652042984962463
},
{
"epoch": 1.67,
"grad_norm": 1.1620507618203504,
"kl": 0.28063511848449707,
"learning_rate": 3.878609613002456e-06,
"loss": 0.2309,
"step": 569,
"step_loss": 0.21258799731731415
},
{
"epoch": 1.68,
"grad_norm": 1.0370207351765348,
"kl": 0.478799045085907,
"learning_rate": 3.875e-06,
"loss": 0.2206,
"step": 570,
"step_loss": 0.2444104254245758
},
{
"epoch": 1.68,
"grad_norm": 1.1178474477754934,
"kl": 0.33863985538482666,
"learning_rate": 3.8713865223246744e-06,
"loss": 0.2015,
"step": 571,
"step_loss": 0.18036487698554993
},
{
"epoch": 1.68,
"grad_norm": 1.1609234312290915,
"kl": 0.3761056661605835,
"learning_rate": 3.867769192389731e-06,
"loss": 0.2247,
"step": 572,
"step_loss": 0.22269845008850098
},
{
"epoch": 1.68,
"grad_norm": 1.032185592487916,
"kl": 0.3428119719028473,
"learning_rate": 3.864148022621657e-06,
"loss": 0.2071,
"step": 573,
"step_loss": 0.18176212906837463
},
{
"epoch": 1.69,
"grad_norm": 1.0666730498083177,
"kl": 0.389988511800766,
"learning_rate": 3.8605230254601275e-06,
"loss": 0.2223,
"step": 574,
"step_loss": 0.2073187381029129
},
{
"epoch": 1.69,
"grad_norm": 1.0611933920672172,
"kl": 0.4241553246974945,
"learning_rate": 3.856894213357969e-06,
"loss": 0.2166,
"step": 575,
"step_loss": 0.21341772377490997
},
{
"epoch": 1.69,
"grad_norm": 1.0463821902335075,
"kl": 0.3435147702693939,
"learning_rate": 3.853261598781112e-06,
"loss": 0.209,
"step": 576,
"step_loss": 0.2048221081495285
},
{
"epoch": 1.7,
"grad_norm": 1.1170695382643887,
"kl": 0.3356623649597168,
"learning_rate": 3.849625194208548e-06,
"loss": 0.2221,
"step": 577,
"step_loss": 0.19540490210056305
},
{
"epoch": 1.7,
"grad_norm": 0.9978130835660762,
"kl": 0.3102739751338959,
"learning_rate": 3.845985012132291e-06,
"loss": 0.2093,
"step": 578,
"step_loss": 0.1982828974723816
},
{
"epoch": 1.7,
"grad_norm": 1.1198188896714405,
"kl": 0.36010634899139404,
"learning_rate": 3.842341065057329e-06,
"loss": 0.2115,
"step": 579,
"step_loss": 0.19751714169979095
},
{
"epoch": 1.71,
"grad_norm": 1.1041808967153213,
"kl": 0.40271279215812683,
"learning_rate": 3.8386933655015855e-06,
"loss": 0.2121,
"step": 580,
"step_loss": 0.20048068463802338
},
{
"epoch": 1.71,
"grad_norm": 1.0991371658621079,
"kl": 0.3609469532966614,
"learning_rate": 3.8350419259958745e-06,
"loss": 0.2153,
"step": 581,
"step_loss": 0.2352994829416275
},
{
"epoch": 1.71,
"grad_norm": 1.0699438170139548,
"kl": 0.3842519223690033,
"learning_rate": 3.831386759083857e-06,
"loss": 0.2134,
"step": 582,
"step_loss": 0.22360186278820038
},
{
"epoch": 1.71,
"grad_norm": 1.0996292681613908,
"kl": 0.3337691128253937,
"learning_rate": 3.827727877322001e-06,
"loss": 0.2195,
"step": 583,
"step_loss": 0.1927204579114914
},
{
"epoch": 1.72,
"grad_norm": 1.184402994917098,
"kl": 0.33971020579338074,
"learning_rate": 3.824065293279532e-06,
"loss": 0.223,
"step": 584,
"step_loss": 0.18949981033802032
},
{
"epoch": 1.72,
"grad_norm": 1.0618979276884035,
"kl": 0.44578462839126587,
"learning_rate": 3.820399019538397e-06,
"loss": 0.2167,
"step": 585,
"step_loss": 0.2560417354106903
},
{
"epoch": 1.72,
"grad_norm": 1.0548994769662545,
"kl": 0.30985498428344727,
"learning_rate": 3.816729068693215e-06,
"loss": 0.2138,
"step": 586,
"step_loss": 0.17075133323669434
},
{
"epoch": 1.73,
"grad_norm": 1.1046250699328048,
"kl": 0.40717557072639465,
"learning_rate": 3.813055453351242e-06,
"loss": 0.2094,
"step": 587,
"step_loss": 0.2145349383354187
},
{
"epoch": 1.73,
"grad_norm": 1.0398697406320772,
"kl": 0.39057114720344543,
"learning_rate": 3.809378186132318e-06,
"loss": 0.2124,
"step": 588,
"step_loss": 0.22147606313228607
},
{
"epoch": 1.73,
"grad_norm": 1.0908270949218593,
"kl": 0.37297523021698,
"learning_rate": 3.805697279668829e-06,
"loss": 0.2194,
"step": 589,
"step_loss": 0.25735077261924744
},
{
"epoch": 1.73,
"grad_norm": 1.0414554175112045,
"kl": 0.3597789704799652,
"learning_rate": 3.8020127466056638e-06,
"loss": 0.2035,
"step": 590,
"step_loss": 0.19476523995399475
},
{
"epoch": 1.74,
"grad_norm": 1.042370270883592,
"kl": 0.3284936547279358,
"learning_rate": 3.7983245996001695e-06,
"loss": 0.2099,
"step": 591,
"step_loss": 0.17381024360656738
},
{
"epoch": 1.74,
"grad_norm": 1.1212450649744,
"kl": 0.29087406396865845,
"learning_rate": 3.7946328513221058e-06,
"loss": 0.225,
"step": 592,
"step_loss": 0.2309185266494751
},
{
"epoch": 1.74,
"grad_norm": 1.1047849123898952,
"kl": 0.3819228410720825,
"learning_rate": 3.7909375144536077e-06,
"loss": 0.2209,
"step": 593,
"step_loss": 0.2588649094104767
},
{
"epoch": 1.75,
"grad_norm": 1.0515086411036276,
"kl": 0.3881183862686157,
"learning_rate": 3.7872386016891342e-06,
"loss": 0.2054,
"step": 594,
"step_loss": 0.1839597374200821
},
{
"epoch": 1.75,
"grad_norm": 1.0871033893493067,
"kl": 0.38226088881492615,
"learning_rate": 3.783536125735431e-06,
"loss": 0.2089,
"step": 595,
"step_loss": 0.20572155714035034
},
{
"epoch": 1.75,
"grad_norm": 1.1196016623588247,
"kl": 0.34919315576553345,
"learning_rate": 3.7798300993114835e-06,
"loss": 0.213,
"step": 596,
"step_loss": 0.19187521934509277
},
{
"epoch": 1.76,
"grad_norm": 1.039644651891598,
"kl": 0.2882639169692993,
"learning_rate": 3.7761205351484732e-06,
"loss": 0.212,
"step": 597,
"step_loss": 0.1760086715221405
},
{
"epoch": 1.76,
"grad_norm": 1.1245600714414823,
"kl": 0.37673628330230713,
"learning_rate": 3.7724074459897346e-06,
"loss": 0.22,
"step": 598,
"step_loss": 0.1975017637014389
},
{
"epoch": 1.76,
"grad_norm": 1.1311382944584354,
"kl": 0.38365456461906433,
"learning_rate": 3.7686908445907126e-06,
"loss": 0.219,
"step": 599,
"step_loss": 0.21963948011398315
},
{
"epoch": 1.76,
"grad_norm": 1.078085850267425,
"kl": 0.382407009601593,
"learning_rate": 3.7649707437189178e-06,
"loss": 0.2195,
"step": 600,
"step_loss": 0.23141171038150787
},
{
"epoch": 1.77,
"grad_norm": 1.0427847639241463,
"kl": 0.37707236409187317,
"learning_rate": 3.761247156153881e-06,
"loss": 0.2043,
"step": 601,
"step_loss": 0.21038171648979187
},
{
"epoch": 1.77,
"grad_norm": 1.0896289777889905,
"kl": 0.36756861209869385,
"learning_rate": 3.7575200946871104e-06,
"loss": 0.2137,
"step": 602,
"step_loss": 0.23161782324314117
},
{
"epoch": 1.77,
"grad_norm": 1.0273854062297945,
"kl": 0.27731993794441223,
"learning_rate": 3.7537895721220513e-06,
"loss": 0.2046,
"step": 603,
"step_loss": 0.23568624258041382
},
{
"epoch": 1.78,
"grad_norm": 1.0737448045607119,
"kl": 0.3347272574901581,
"learning_rate": 3.7500556012740343e-06,
"loss": 0.2212,
"step": 604,
"step_loss": 0.1964089274406433
},
{
"epoch": 1.78,
"grad_norm": 1.0946594680520594,
"kl": 0.3800688683986664,
"learning_rate": 3.746318194970239e-06,
"loss": 0.2176,
"step": 605,
"step_loss": 0.22273258864879608
},
{
"epoch": 1.78,
"grad_norm": 1.1129855917797444,
"kl": 0.3622528314590454,
"learning_rate": 3.7425773660496453e-06,
"loss": 0.2186,
"step": 606,
"step_loss": 0.2413870245218277
},
{
"epoch": 1.78,
"grad_norm": 1.145527181762428,
"kl": 0.3814672827720642,
"learning_rate": 3.7388331273629914e-06,
"loss": 0.2337,
"step": 607,
"step_loss": 0.23212337493896484
},
{
"epoch": 1.79,
"grad_norm": 1.0984629724406025,
"kl": 0.2834460735321045,
"learning_rate": 3.7350854917727287e-06,
"loss": 0.2235,
"step": 608,
"step_loss": 0.17759786546230316
},
{
"epoch": 1.79,
"grad_norm": 1.1001711709509128,
"kl": 0.3364371359348297,
"learning_rate": 3.7313344721529765e-06,
"loss": 0.2146,
"step": 609,
"step_loss": 0.22853884100914001
},
{
"epoch": 1.79,
"grad_norm": 1.068141085066643,
"kl": 0.3433828353881836,
"learning_rate": 3.727580081389481e-06,
"loss": 0.2145,
"step": 610,
"step_loss": 0.21322953701019287
},
{
"epoch": 1.8,
"grad_norm": 1.0787297225275188,
"kl": 0.42644378542900085,
"learning_rate": 3.72382233237957e-06,
"loss": 0.2158,
"step": 611,
"step_loss": 0.2728964388370514
},
{
"epoch": 1.8,
"grad_norm": 1.0596709735361247,
"kl": 0.36292925477027893,
"learning_rate": 3.7200612380321034e-06,
"loss": 0.2096,
"step": 612,
"step_loss": 0.196391299366951
},
{
"epoch": 1.8,
"grad_norm": 0.9958580635836058,
"kl": 0.3467836081981659,
"learning_rate": 3.7162968112674387e-06,
"loss": 0.2074,
"step": 613,
"step_loss": 0.21914325654506683
},
{
"epoch": 1.81,
"grad_norm": 1.1091567079931492,
"kl": 0.39706793427467346,
"learning_rate": 3.7125290650173768e-06,
"loss": 0.2148,
"step": 614,
"step_loss": 0.23869748413562775
},
{
"epoch": 1.81,
"grad_norm": 1.0845278159536325,
"kl": 0.42876726388931274,
"learning_rate": 3.708758012225125e-06,
"loss": 0.217,
"step": 615,
"step_loss": 0.24647286534309387
},
{
"epoch": 1.81,
"grad_norm": 1.1069509053065003,
"kl": 0.31808775663375854,
"learning_rate": 3.7049836658452474e-06,
"loss": 0.2241,
"step": 616,
"step_loss": 0.21536415815353394
},
{
"epoch": 1.81,
"grad_norm": 1.0997318179324385,
"kl": 0.45750346779823303,
"learning_rate": 3.701206038843623e-06,
"loss": 0.2182,
"step": 617,
"step_loss": 0.23618176579475403
},
{
"epoch": 1.82,
"grad_norm": 1.098642449230739,
"kl": 0.3418111801147461,
"learning_rate": 3.697425144197402e-06,
"loss": 0.2181,
"step": 618,
"step_loss": 0.21152858436107635
},
{
"epoch": 1.82,
"grad_norm": 1.105833997472311,
"kl": 0.35120660066604614,
"learning_rate": 3.6936409948949563e-06,
"loss": 0.2235,
"step": 619,
"step_loss": 0.25562742352485657
},
{
"epoch": 1.82,
"grad_norm": 1.0909382099858478,
"kl": 0.38127419352531433,
"learning_rate": 3.689853603935843e-06,
"loss": 0.2023,
"step": 620,
"step_loss": 0.21061675250530243
},
{
"epoch": 1.83,
"grad_norm": 1.04203667585934,
"kl": 0.3696785569190979,
"learning_rate": 3.686062984330752e-06,
"loss": 0.2084,
"step": 621,
"step_loss": 0.2408429980278015
},
{
"epoch": 1.83,
"grad_norm": 1.0740199095756,
"kl": 0.35119834542274475,
"learning_rate": 3.682269149101465e-06,
"loss": 0.2249,
"step": 622,
"step_loss": 0.2259160876274109
},
{
"epoch": 1.83,
"grad_norm": 1.1244116476489243,
"kl": 0.39911478757858276,
"learning_rate": 3.6784721112808107e-06,
"loss": 0.2076,
"step": 623,
"step_loss": 0.2444825917482376
},
{
"epoch": 1.83,
"grad_norm": 1.084381841698426,
"kl": 0.364761620759964,
"learning_rate": 3.6746718839126195e-06,
"loss": 0.2136,
"step": 624,
"step_loss": 0.21551458537578583
},
{
"epoch": 1.84,
"grad_norm": 1.1078473708417569,
"kl": 0.44796106219291687,
"learning_rate": 3.6708684800516786e-06,
"loss": 0.2175,
"step": 625,
"step_loss": 0.219948410987854
},
{
"epoch": 1.84,
"grad_norm": 1.0214975768306558,
"kl": 0.2986973226070404,
"learning_rate": 3.6670619127636865e-06,
"loss": 0.2043,
"step": 626,
"step_loss": 0.16981080174446106
},
{
"epoch": 1.84,
"grad_norm": 1.0733872403257325,
"kl": 0.37517714500427246,
"learning_rate": 3.663252195125211e-06,
"loss": 0.2169,
"step": 627,
"step_loss": 0.21208983659744263
},
{
"epoch": 1.85,
"grad_norm": 1.1239709783885117,
"kl": 0.38882941007614136,
"learning_rate": 3.6594393402236405e-06,
"loss": 0.2128,
"step": 628,
"step_loss": 0.2071652114391327
},
{
"epoch": 1.85,
"grad_norm": 1.0181078127118095,
"kl": 0.35665225982666016,
"learning_rate": 3.655623361157141e-06,
"loss": 0.2065,
"step": 629,
"step_loss": 0.21186676621437073
},
{
"epoch": 1.85,
"grad_norm": 1.0980774285222674,
"kl": 0.33140304684638977,
"learning_rate": 3.65180427103461e-06,
"loss": 0.216,
"step": 630,
"step_loss": 0.23446890711784363
},
{
"epoch": 1.86,
"grad_norm": 1.1533939812862526,
"kl": 0.3695037364959717,
"learning_rate": 3.647982082975635e-06,
"loss": 0.2227,
"step": 631,
"step_loss": 0.2064458131790161
},
{
"epoch": 1.86,
"grad_norm": 1.0607104236817797,
"kl": 0.3754327893257141,
"learning_rate": 3.6441568101104434e-06,
"loss": 0.2074,
"step": 632,
"step_loss": 0.21298718452453613
},
{
"epoch": 1.86,
"grad_norm": 1.1080406855173834,
"kl": 0.372994601726532,
"learning_rate": 3.64032846557986e-06,
"loss": 0.2371,
"step": 633,
"step_loss": 0.22076019644737244
},
{
"epoch": 1.86,
"grad_norm": 1.061998718883708,
"kl": 0.36003583669662476,
"learning_rate": 3.6364970625352613e-06,
"loss": 0.2106,
"step": 634,
"step_loss": 0.236919105052948
},
{
"epoch": 1.87,
"grad_norm": 1.1629557628795668,
"kl": 0.40046006441116333,
"learning_rate": 3.6326626141385323e-06,
"loss": 0.2194,
"step": 635,
"step_loss": 0.21533732116222382
},
{
"epoch": 1.87,
"grad_norm": 1.1308113048397246,
"kl": 0.35290196537971497,
"learning_rate": 3.6288251335620185e-06,
"loss": 0.2356,
"step": 636,
"step_loss": 0.25752317905426025
},
{
"epoch": 1.87,
"grad_norm": 1.024178113211246,
"kl": 0.367868036031723,
"learning_rate": 3.6249846339884807e-06,
"loss": 0.2132,
"step": 637,
"step_loss": 0.2091902643442154
},
{
"epoch": 1.88,
"grad_norm": 1.0233015596218147,
"kl": 0.36171606183052063,
"learning_rate": 3.621141128611053e-06,
"loss": 0.211,
"step": 638,
"step_loss": 0.1969766914844513
},
{
"epoch": 1.88,
"grad_norm": 1.0521376056017973,
"kl": 0.3328798711299896,
"learning_rate": 3.617294630633193e-06,
"loss": 0.2148,
"step": 639,
"step_loss": 0.20545676350593567
},
{
"epoch": 1.88,
"grad_norm": 1.1495041143489746,
"kl": 0.31849485635757446,
"learning_rate": 3.613445153268641e-06,
"loss": 0.2273,
"step": 640,
"step_loss": 0.19370122253894806
},
{
"epoch": 1.88,
"grad_norm": 1.107261874903731,
"kl": 0.35327592492103577,
"learning_rate": 3.6095927097413697e-06,
"loss": 0.2289,
"step": 641,
"step_loss": 0.19487443566322327
},
{
"epoch": 1.89,
"grad_norm": 1.087476278188833,
"kl": 0.37533411383628845,
"learning_rate": 3.6057373132855426e-06,
"loss": 0.212,
"step": 642,
"step_loss": 0.20453642308712006
},
{
"epoch": 1.89,
"grad_norm": 1.0777004622363646,
"kl": 0.3517782390117645,
"learning_rate": 3.6018789771454686e-06,
"loss": 0.2215,
"step": 643,
"step_loss": 0.2225874960422516
},
{
"epoch": 1.89,
"grad_norm": 1.1041421512665277,
"kl": 0.3960123658180237,
"learning_rate": 3.5980177145755527e-06,
"loss": 0.2156,
"step": 644,
"step_loss": 0.19644665718078613
},
{
"epoch": 1.9,
"grad_norm": 1.107531345345793,
"kl": 0.3443949818611145,
"learning_rate": 3.5941535388402555e-06,
"loss": 0.218,
"step": 645,
"step_loss": 0.21444953978061676
},
{
"epoch": 1.9,
"grad_norm": 1.0633655556716426,
"kl": 0.32992398738861084,
"learning_rate": 3.5902864632140417e-06,
"loss": 0.1945,
"step": 646,
"step_loss": 0.17697668075561523
},
{
"epoch": 1.9,
"grad_norm": 1.0660551981501736,
"kl": 0.3524007201194763,
"learning_rate": 3.5864165009813417e-06,
"loss": 0.2062,
"step": 647,
"step_loss": 0.2261100560426712
},
{
"epoch": 1.91,
"grad_norm": 1.0902351373924726,
"kl": 0.3847709894180298,
"learning_rate": 3.5825436654365005e-06,
"loss": 0.1977,
"step": 648,
"step_loss": 0.21421456336975098
},
{
"epoch": 1.91,
"grad_norm": 1.0921552134859336,
"kl": 0.3233397603034973,
"learning_rate": 3.578667969883733e-06,
"loss": 0.2088,
"step": 649,
"step_loss": 0.1856887936592102
},
{
"epoch": 1.91,
"grad_norm": 1.067332679290162,
"kl": 0.35494789481163025,
"learning_rate": 3.5747894276370792e-06,
"loss": 0.2174,
"step": 650,
"step_loss": 0.2479093074798584
},
{
"epoch": 1.91,
"grad_norm": 1.1216410520483295,
"kl": 0.3918030261993408,
"learning_rate": 3.5709080520203593e-06,
"loss": 0.2317,
"step": 651,
"step_loss": 0.21458828449249268
},
{
"epoch": 1.92,
"grad_norm": 1.0495414510313243,
"kl": 0.383411705493927,
"learning_rate": 3.5670238563671257e-06,
"loss": 0.2046,
"step": 652,
"step_loss": 0.22937092185020447
},
{
"epoch": 1.92,
"grad_norm": 1.137161738802843,
"kl": 0.39319974184036255,
"learning_rate": 3.563136854020621e-06,
"loss": 0.2304,
"step": 653,
"step_loss": 0.2653224468231201
},
{
"epoch": 1.92,
"grad_norm": 1.1155913828745578,
"kl": 0.4275739789009094,
"learning_rate": 3.5592470583337233e-06,
"loss": 0.2204,
"step": 654,
"step_loss": 0.2726008892059326
},
{
"epoch": 1.93,
"grad_norm": 1.0500891579513196,
"kl": 0.3193601071834564,
"learning_rate": 3.5553544826689148e-06,
"loss": 0.2075,
"step": 655,
"step_loss": 0.18977577984333038
},
{
"epoch": 1.93,
"grad_norm": 1.0438682449198995,
"kl": 0.3925110697746277,
"learning_rate": 3.551459140398221e-06,
"loss": 0.2184,
"step": 656,
"step_loss": 0.24175623059272766
},
{
"epoch": 1.93,
"grad_norm": 1.1175988513740611,
"kl": 0.3625691533088684,
"learning_rate": 3.547561044903175e-06,
"loss": 0.2138,
"step": 657,
"step_loss": 0.21793964505195618
},
{
"epoch": 1.93,
"grad_norm": 1.0707034837846736,
"kl": 0.3854144215583801,
"learning_rate": 3.5436602095747665e-06,
"loss": 0.2168,
"step": 658,
"step_loss": 0.2395915985107422
},
{
"epoch": 1.94,
"grad_norm": 1.1404652940954114,
"kl": 0.3602861166000366,
"learning_rate": 3.539756647813398e-06,
"loss": 0.2356,
"step": 659,
"step_loss": 0.21894899010658264
},
{
"epoch": 1.94,
"grad_norm": 0.9838424861989383,
"kl": 0.36652958393096924,
"learning_rate": 3.535850373028839e-06,
"loss": 0.1979,
"step": 660,
"step_loss": 0.20045427978038788
},
{
"epoch": 1.94,
"grad_norm": 1.095329737833727,
"kl": 0.3199812173843384,
"learning_rate": 3.5319413986401753e-06,
"loss": 0.2183,
"step": 661,
"step_loss": 0.26433855295181274
},
{
"epoch": 1.95,
"grad_norm": 1.053846419483373,
"kl": 0.4188472032546997,
"learning_rate": 3.5280297380757692e-06,
"loss": 0.2146,
"step": 662,
"step_loss": 0.2612619400024414
},
{
"epoch": 1.95,
"grad_norm": 1.0686913624789676,
"kl": 0.3134489357471466,
"learning_rate": 3.524115404773213e-06,
"loss": 0.2118,
"step": 663,
"step_loss": 0.18129369616508484
},
{
"epoch": 1.95,
"grad_norm": 1.0608198050048145,
"kl": 0.34840127825737,
"learning_rate": 3.5201984121792753e-06,
"loss": 0.2121,
"step": 664,
"step_loss": 0.18985848128795624
},
{
"epoch": 1.96,
"grad_norm": 1.0981997640992105,
"kl": 0.3330709934234619,
"learning_rate": 3.516278773749863e-06,
"loss": 0.2064,
"step": 665,
"step_loss": 0.2144535481929779
},
{
"epoch": 1.96,
"grad_norm": 1.1140559930597989,
"kl": 0.3608890771865845,
"learning_rate": 3.512356502949973e-06,
"loss": 0.2141,
"step": 666,
"step_loss": 0.24026130139827728
},
{
"epoch": 1.96,
"grad_norm": 1.0633605809716729,
"kl": 0.32477813959121704,
"learning_rate": 3.508431613253644e-06,
"loss": 0.2104,
"step": 667,
"step_loss": 0.19436398148536682
},
{
"epoch": 1.96,
"grad_norm": 0.9553573137927444,
"kl": 0.33438920974731445,
"learning_rate": 3.5045041181439117e-06,
"loss": 0.1949,
"step": 668,
"step_loss": 0.17747747898101807
},
{
"epoch": 1.97,
"grad_norm": 1.1177038853329584,
"kl": 0.3582827150821686,
"learning_rate": 3.500574031112759e-06,
"loss": 0.223,
"step": 669,
"step_loss": 0.21867407858371735
},
{
"epoch": 1.97,
"grad_norm": 1.1246746241729981,
"kl": 0.3887527585029602,
"learning_rate": 3.496641365661079e-06,
"loss": 0.2136,
"step": 670,
"step_loss": 0.20844541490077972
},
{
"epoch": 1.97,
"grad_norm": 1.1013777287048612,
"kl": 0.36839255690574646,
"learning_rate": 3.4927061352986163e-06,
"loss": 0.2264,
"step": 671,
"step_loss": 0.23754476010799408
},
{
"epoch": 1.98,
"grad_norm": 1.0736030062333497,
"kl": 0.37584322690963745,
"learning_rate": 3.4887683535439305e-06,
"loss": 0.2149,
"step": 672,
"step_loss": 0.22229911386966705
},
{
"epoch": 1.98,
"grad_norm": 1.1162319765419337,
"kl": 0.38228553533554077,
"learning_rate": 3.484828033924343e-06,
"loss": 0.2289,
"step": 673,
"step_loss": 0.24069103598594666
},
{
"epoch": 1.98,
"grad_norm": 1.0677391357940962,
"kl": 0.37927836179733276,
"learning_rate": 3.4808851899758967e-06,
"loss": 0.2147,
"step": 674,
"step_loss": 0.19201472401618958
},
{
"epoch": 1.98,
"grad_norm": 1.0515478096285862,
"kl": 0.3583109974861145,
"learning_rate": 3.476939835243304e-06,
"loss": 0.2151,
"step": 675,
"step_loss": 0.22411265969276428
},
{
"epoch": 1.99,
"grad_norm": 1.0225232678263274,
"kl": 0.285269558429718,
"learning_rate": 3.4729919832799036e-06,
"loss": 0.199,
"step": 676,
"step_loss": 0.20125800371170044
},
{
"epoch": 1.99,
"grad_norm": 1.0899879606264502,
"kl": 0.34659573435783386,
"learning_rate": 3.46904164764761e-06,
"loss": 0.2159,
"step": 677,
"step_loss": 0.21278782188892365
},
{
"epoch": 1.99,
"grad_norm": 1.0775849606901395,
"kl": 0.3956890106201172,
"learning_rate": 3.4650888419168748e-06,
"loss": 0.2092,
"step": 678,
"step_loss": 0.2175438106060028
},
{
"epoch": 2.0,
"grad_norm": 0.9947578897117778,
"kl": 0.30330708622932434,
"learning_rate": 3.4611335796666307e-06,
"loss": 0.2031,
"step": 679,
"step_loss": 0.19578225910663605
},
{
"epoch": 2.0,
"grad_norm": 1.0764381231649376,
"kl": 0.35529449582099915,
"learning_rate": 3.457175874484251e-06,
"loss": 0.2005,
"step": 680,
"step_loss": 0.19128616154193878
},
{
"epoch": 2.0,
"eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_loss": 1.6508517265319824,
"eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_runtime": 14.6015,
"eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_samples_per_second": 6.849,
"eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_steps_per_second": 0.89,
"step": 680
},
{
"epoch": 2.0,
"grad_norm": 0.925978313670487,
"kl": 0.4176456928253174,
"learning_rate": 3.4532157399655014e-06,
"loss": 0.1679,
"step": 681,
"step_loss": 1.3116686344146729
},
{
"epoch": 2.01,
"grad_norm": 0.8940649274695096,
"kl": 0.33792364597320557,
"learning_rate": 3.4492531897144923e-06,
"loss": 0.162,
"step": 682,
"step_loss": 0.14112232625484467
},
{
"epoch": 2.01,
"grad_norm": 0.8396441200218108,
"kl": 0.3699738383293152,
"learning_rate": 3.445288237343632e-06,
"loss": 0.144,
"step": 683,
"step_loss": 0.1407199501991272
},
{
"epoch": 2.01,
"grad_norm": 0.9100745741579815,
"kl": 0.39846813678741455,
"learning_rate": 3.441320896473583e-06,
"loss": 0.1509,
"step": 684,
"step_loss": 0.16023500263690948
},
{
"epoch": 2.01,
"grad_norm": 0.9183176716722681,
"kl": 0.4535992741584778,
"learning_rate": 3.4373511807332115e-06,
"loss": 0.1555,
"step": 685,
"step_loss": 0.15503008663654327
},
{
"epoch": 2.02,
"grad_norm": 0.8961989752558022,
"kl": 0.45110467076301575,
"learning_rate": 3.433379103759542e-06,
"loss": 0.1564,
"step": 686,
"step_loss": 0.16036511957645416
},
{
"epoch": 2.02,
"grad_norm": 0.8795040894782503,
"kl": 0.3965161442756653,
"learning_rate": 3.4294046791977096e-06,
"loss": 0.146,
"step": 687,
"step_loss": 0.13043992221355438
},
{
"epoch": 2.02,
"grad_norm": 0.9844997417794465,
"kl": 0.4604860246181488,
"learning_rate": 3.4254279207009163e-06,
"loss": 0.147,
"step": 688,
"step_loss": 0.15021522343158722
},
{
"epoch": 2.03,
"grad_norm": 0.9757415983330471,
"kl": 0.4833226501941681,
"learning_rate": 3.4214488419303806e-06,
"loss": 0.1415,
"step": 689,
"step_loss": 0.14150793850421906
},
{
"epoch": 2.03,
"grad_norm": 1.0650937173916857,
"kl": 0.4186987280845642,
"learning_rate": 3.4174674565552902e-06,
"loss": 0.1507,
"step": 690,
"step_loss": 0.1668976992368698
},
{
"epoch": 2.03,
"grad_norm": 1.1276622372563851,
"kl": 0.45650917291641235,
"learning_rate": 3.413483778252759e-06,
"loss": 0.1605,
"step": 691,
"step_loss": 0.16845574975013733
},
{
"epoch": 2.03,
"grad_norm": 1.1937296491156466,
"kl": 0.4285459518432617,
"learning_rate": 3.4094978207077768e-06,
"loss": 0.1487,
"step": 692,
"step_loss": 0.1462787389755249
},
{
"epoch": 2.04,
"grad_norm": 1.0212719844193419,
"kl": 0.5114130973815918,
"learning_rate": 3.405509597613163e-06,
"loss": 0.145,
"step": 693,
"step_loss": 0.1645100861787796
},
{
"epoch": 2.04,
"grad_norm": 1.1028532757980718,
"kl": 0.520036518573761,
"learning_rate": 3.4015191226695203e-06,
"loss": 0.1545,
"step": 694,
"step_loss": 0.17864225804805756
},
{
"epoch": 2.04,
"grad_norm": 0.9733155555331748,
"kl": 0.4656696915626526,
"learning_rate": 3.397526409585185e-06,
"loss": 0.1407,
"step": 695,
"step_loss": 0.12441332638263702
},
{
"epoch": 2.05,
"grad_norm": 1.0128686328034844,
"kl": 0.4196836054325104,
"learning_rate": 3.3935314720761864e-06,
"loss": 0.1494,
"step": 696,
"step_loss": 0.12643490731716156
},
{
"epoch": 2.05,
"grad_norm": 0.9201530198545629,
"kl": 0.46877506375312805,
"learning_rate": 3.389534323866191e-06,
"loss": 0.1399,
"step": 697,
"step_loss": 0.15753169357776642
},
{
"epoch": 2.05,
"grad_norm": 0.9894624669733002,
"kl": 0.4130990505218506,
"learning_rate": 3.385534978686461e-06,
"loss": 0.1546,
"step": 698,
"step_loss": 0.17007581889629364
},
{
"epoch": 2.06,
"grad_norm": 1.0155107140429007,
"kl": 0.4775574505329132,
"learning_rate": 3.3815334502758055e-06,
"loss": 0.1489,
"step": 699,
"step_loss": 0.12813322246074677
},
{
"epoch": 2.06,
"grad_norm": 0.939102091406889,
"kl": 0.41837960481643677,
"learning_rate": 3.3775297523805365e-06,
"loss": 0.1457,
"step": 700,
"step_loss": 0.15080194175243378
},
{
"epoch": 2.06,
"grad_norm": 0.9499828261780652,
"kl": 0.46936333179473877,
"learning_rate": 3.3735238987544146e-06,
"loss": 0.152,
"step": 701,
"step_loss": 0.13999901711940765
},
{
"epoch": 2.06,
"grad_norm": 0.9757544264149329,
"kl": 0.39698389172554016,
"learning_rate": 3.369515903158607e-06,
"loss": 0.1507,
"step": 702,
"step_loss": 0.14616172015666962
},
{
"epoch": 2.07,
"grad_norm": 0.9785785921285085,
"kl": 0.5002555847167969,
"learning_rate": 3.365505779361642e-06,
"loss": 0.1487,
"step": 703,
"step_loss": 0.16984041035175323
},
{
"epoch": 2.07,
"grad_norm": 0.9897490897977824,
"kl": 0.4246920049190521,
"learning_rate": 3.3614935411393544e-06,
"loss": 0.1589,
"step": 704,
"step_loss": 0.1668768674135208
},
{
"epoch": 2.07,
"grad_norm": 0.9578147172513255,
"kl": 0.3801954388618469,
"learning_rate": 3.3574792022748463e-06,
"loss": 0.1543,
"step": 705,
"step_loss": 0.13766665756702423
},
{
"epoch": 2.08,
"grad_norm": 0.9662098961881012,
"kl": 0.43419426679611206,
"learning_rate": 3.3534627765584334e-06,
"loss": 0.1512,
"step": 706,
"step_loss": 0.15339550375938416
},
{
"epoch": 2.08,
"grad_norm": 0.9114560763013234,
"kl": 0.5094923377037048,
"learning_rate": 3.3494442777876022e-06,
"loss": 0.1439,
"step": 707,
"step_loss": 0.1555686891078949
},
{
"epoch": 2.08,
"grad_norm": 1.0718532109617787,
"kl": 0.4465751051902771,
"learning_rate": 3.3454237197669607e-06,
"loss": 0.158,
"step": 708,
"step_loss": 0.14179621636867523
},
{
"epoch": 2.08,
"grad_norm": 1.0391746116330387,
"kl": 0.426788866519928,
"learning_rate": 3.341401116308189e-06,
"loss": 0.1514,
"step": 709,
"step_loss": 0.16065070033073425
},
{
"epoch": 2.09,
"grad_norm": 0.9617041964014664,
"kl": 0.39666134119033813,
"learning_rate": 3.3373764812299954e-06,
"loss": 0.1477,
"step": 710,
"step_loss": 0.13998152315616608
},
{
"epoch": 2.09,
"grad_norm": 1.0146003249127458,
"kl": 0.3973727226257324,
"learning_rate": 3.333349828358067e-06,
"loss": 0.1507,
"step": 711,
"step_loss": 0.15805211663246155
},
{
"epoch": 2.09,
"grad_norm": 0.9869352681538952,
"kl": 0.42524510622024536,
"learning_rate": 3.3293211715250222e-06,
"loss": 0.1457,
"step": 712,
"step_loss": 0.1241888552904129
},
{
"epoch": 2.1,
"grad_norm": 0.9948029176298825,
"kl": 0.40643396973609924,
"learning_rate": 3.325290524570365e-06,
"loss": 0.151,
"step": 713,
"step_loss": 0.16702188551425934
},
{
"epoch": 2.1,
"grad_norm": 1.0201927757414222,
"kl": 0.48036760091781616,
"learning_rate": 3.321257901340434e-06,
"loss": 0.148,
"step": 714,
"step_loss": 0.1528395265340805
},
{
"epoch": 2.1,
"grad_norm": 0.9203151178241796,
"kl": 0.5000730156898499,
"learning_rate": 3.317223315688358e-06,
"loss": 0.1463,
"step": 715,
"step_loss": 0.16571396589279175
},
{
"epoch": 2.11,
"grad_norm": 0.9937513297449968,
"kl": 0.3717888593673706,
"learning_rate": 3.313186781474008e-06,
"loss": 0.1627,
"step": 716,
"step_loss": 0.17453354597091675
},
{
"epoch": 2.11,
"grad_norm": 1.0398940867357407,
"kl": 0.5787868499755859,
"learning_rate": 3.309148312563945e-06,
"loss": 0.1602,
"step": 717,
"step_loss": 0.2011195421218872
},
{
"epoch": 2.11,
"grad_norm": 0.9694706558977573,
"kl": 0.4446108639240265,
"learning_rate": 3.3051079228313815e-06,
"loss": 0.1426,
"step": 718,
"step_loss": 0.1449287086725235
},
{
"epoch": 2.11,
"grad_norm": 0.9431356634721532,
"kl": 0.4195602834224701,
"learning_rate": 3.301065626156125e-06,
"loss": 0.143,
"step": 719,
"step_loss": 0.1555873304605484
},
{
"epoch": 2.12,
"grad_norm": 0.8839846147038787,
"kl": 0.46536436676979065,
"learning_rate": 3.2970214364245352e-06,
"loss": 0.1425,
"step": 720,
"step_loss": 0.1379953920841217
},
{
"epoch": 2.12,
"grad_norm": 0.963039812210809,
"kl": 0.4512026011943817,
"learning_rate": 3.2929753675294745e-06,
"loss": 0.1529,
"step": 721,
"step_loss": 0.1425732672214508
},
{
"epoch": 2.12,
"grad_norm": 1.0145646795924912,
"kl": 0.48650485277175903,
"learning_rate": 3.2889274333702612e-06,
"loss": 0.1592,
"step": 722,
"step_loss": 0.14481569826602936
},
{
"epoch": 2.13,
"grad_norm": 0.933756411120608,
"kl": 0.45475882291793823,
"learning_rate": 3.284877647852621e-06,
"loss": 0.1427,
"step": 723,
"step_loss": 0.1405959278345108
},
{
"epoch": 2.13,
"grad_norm": 0.992827171719177,
"kl": 0.4193406403064728,
"learning_rate": 3.2808260248886375e-06,
"loss": 0.1552,
"step": 724,
"step_loss": 0.14025843143463135
},
{
"epoch": 2.13,
"grad_norm": 1.0734808867236152,
"kl": 0.4013231694698334,
"learning_rate": 3.2767725783967112e-06,
"loss": 0.1584,
"step": 725,
"step_loss": 0.16459117829799652
},
{
"epoch": 2.13,
"grad_norm": 0.9858158556441436,
"kl": 0.38677310943603516,
"learning_rate": 3.272717322301503e-06,
"loss": 0.1495,
"step": 726,
"step_loss": 0.13374319672584534
},
{
"epoch": 2.14,
"grad_norm": 0.9541473377690992,
"kl": 0.4354119598865509,
"learning_rate": 3.2686602705338906e-06,
"loss": 0.15,
"step": 727,
"step_loss": 0.15379023551940918
},
{
"epoch": 2.14,
"grad_norm": 0.9634481367476163,
"kl": 0.4873085021972656,
"learning_rate": 3.26460143703092e-06,
"loss": 0.1493,
"step": 728,
"step_loss": 0.1327579915523529
},
{
"epoch": 2.14,
"grad_norm": 0.9953389168771356,
"kl": 0.3850947916507721,
"learning_rate": 3.2605408357357624e-06,
"loss": 0.1521,
"step": 729,
"step_loss": 0.15176990628242493
},
{
"epoch": 2.15,
"grad_norm": 0.9844939750343663,
"kl": 0.44254547357559204,
"learning_rate": 3.2564784805976562e-06,
"loss": 0.1555,
"step": 730,
"step_loss": 0.13903678953647614
},
{
"epoch": 2.15,
"grad_norm": 0.9886634600223577,
"kl": 0.43694841861724854,
"learning_rate": 3.2524143855718658e-06,
"loss": 0.1565,
"step": 731,
"step_loss": 0.16289514303207397
},
{
"epoch": 2.15,
"grad_norm": 0.9484758806082827,
"kl": 0.3681836426258087,
"learning_rate": 3.2483485646196362e-06,
"loss": 0.149,
"step": 732,
"step_loss": 0.12337259948253632
},
{
"epoch": 2.16,
"grad_norm": 0.9484232485107367,
"kl": 0.4986894428730011,
"learning_rate": 3.2442810317081377e-06,
"loss": 0.1431,
"step": 733,
"step_loss": 0.13536569476127625
},
{
"epoch": 2.16,
"grad_norm": 0.9693721649945023,
"kl": 0.3540458083152771,
"learning_rate": 3.240211800810422e-06,
"loss": 0.145,
"step": 734,
"step_loss": 0.11736252903938293
},
{
"epoch": 2.16,
"grad_norm": 1.0585370770025628,
"kl": 0.43972048163414,
"learning_rate": 3.2361408859053755e-06,
"loss": 0.1481,
"step": 735,
"step_loss": 0.15756061673164368
},
{
"epoch": 2.16,
"grad_norm": 0.9537128136761093,
"kl": 0.3989626467227936,
"learning_rate": 3.2320683009776693e-06,
"loss": 0.1536,
"step": 736,
"step_loss": 0.13479158282279968
},
{
"epoch": 2.17,
"grad_norm": 0.9815792466817577,
"kl": 0.41348952054977417,
"learning_rate": 3.2279940600177097e-06,
"loss": 0.151,
"step": 737,
"step_loss": 0.12909382581710815
},
{
"epoch": 2.17,
"grad_norm": 1.04854074018167,
"kl": 0.4909619688987732,
"learning_rate": 3.223918177021594e-06,
"loss": 0.1475,
"step": 738,
"step_loss": 0.1474575400352478
},
{
"epoch": 2.17,
"grad_norm": 1.0715274180501446,
"kl": 0.45988544821739197,
"learning_rate": 3.2198406659910596e-06,
"loss": 0.1634,
"step": 739,
"step_loss": 0.16125904023647308
},
{
"epoch": 2.18,
"grad_norm": 0.9890646713016052,
"kl": 0.42592141032218933,
"learning_rate": 3.2157615409334363e-06,
"loss": 0.1499,
"step": 740,
"step_loss": 0.128481924533844
},
{
"epoch": 2.18,
"grad_norm": 0.9970158964695769,
"kl": 0.397694855928421,
"learning_rate": 3.2116808158615986e-06,
"loss": 0.1577,
"step": 741,
"step_loss": 0.1626586616039276
},
{
"epoch": 2.18,
"grad_norm": 1.0147077673417295,
"kl": 0.365612268447876,
"learning_rate": 3.207598504793917e-06,
"loss": 0.1517,
"step": 742,
"step_loss": 0.17115183174610138
},
{
"epoch": 2.18,
"grad_norm": 0.9665733618485168,
"kl": 0.41971349716186523,
"learning_rate": 3.2035146217542116e-06,
"loss": 0.1443,
"step": 743,
"step_loss": 0.14287295937538147
},
{
"epoch": 2.19,
"grad_norm": 1.0369419314745194,
"kl": 0.46320030093193054,
"learning_rate": 3.1994291807717027e-06,
"loss": 0.1556,
"step": 744,
"step_loss": 0.16582506895065308
},
{
"epoch": 2.19,
"grad_norm": 0.9781857084745722,
"kl": 0.369863361120224,
"learning_rate": 3.19534219588096e-06,
"loss": 0.1513,
"step": 745,
"step_loss": 0.1406002938747406
},
{
"epoch": 2.19,
"grad_norm": 0.9404615672126064,
"kl": 0.44528642296791077,
"learning_rate": 3.19125368112186e-06,
"loss": 0.1519,
"step": 746,
"step_loss": 0.13117581605911255
},
{
"epoch": 2.2,
"grad_norm": 1.0121457881406244,
"kl": 0.39943063259124756,
"learning_rate": 3.187163650539533e-06,
"loss": 0.155,
"step": 747,
"step_loss": 0.1424383670091629
},
{
"epoch": 2.2,
"grad_norm": 1.1106950133092461,
"kl": 0.5139979124069214,
"learning_rate": 3.1830721181843177e-06,
"loss": 0.1448,
"step": 748,
"step_loss": 0.13854211568832397
},
{
"epoch": 2.2,
"grad_norm": 0.9928293479952365,
"kl": 0.4104097783565521,
"learning_rate": 3.1789790981117103e-06,
"loss": 0.1496,
"step": 749,
"step_loss": 0.13803105056285858
},
{
"epoch": 2.21,
"grad_norm": 0.9864891298529466,
"kl": 0.5884207487106323,
"learning_rate": 3.174884604382317e-06,
"loss": 0.1457,
"step": 750,
"step_loss": 0.1734772026538849
},
{
"epoch": 2.21,
"grad_norm": 1.019026327979771,
"kl": 0.39733612537384033,
"learning_rate": 3.170788651061811e-06,
"loss": 0.1548,
"step": 751,
"step_loss": 0.15517111122608185
},
{
"epoch": 2.21,
"grad_norm": 0.9759482931761134,
"kl": 0.39855048060417175,
"learning_rate": 3.1666912522208754e-06,
"loss": 0.143,
"step": 752,
"step_loss": 0.1241132915019989
},
{
"epoch": 2.21,
"grad_norm": 0.9819776619042838,
"kl": 0.4785412549972534,
"learning_rate": 3.162592421935158e-06,
"loss": 0.1529,
"step": 753,
"step_loss": 0.1676996350288391
},
{
"epoch": 2.22,
"grad_norm": 0.9567580475400209,
"kl": 0.4445911943912506,
"learning_rate": 3.158492174285229e-06,
"loss": 0.1488,
"step": 754,
"step_loss": 0.16597110033035278
},
{
"epoch": 2.22,
"grad_norm": 0.9727635152108765,
"kl": 0.420547217130661,
"learning_rate": 3.1543905233565232e-06,
"loss": 0.1464,
"step": 755,
"step_loss": 0.12980793416500092
},
{
"epoch": 2.22,
"grad_norm": 0.9831060677637677,
"kl": 0.43531182408332825,
"learning_rate": 3.1502874832392984e-06,
"loss": 0.1502,
"step": 756,
"step_loss": 0.19235534965991974
},
{
"epoch": 2.23,
"grad_norm": 0.9741457522529087,
"kl": 0.4326942563056946,
"learning_rate": 3.146183068028582e-06,
"loss": 0.1521,
"step": 757,
"step_loss": 0.17118844389915466
},
{
"epoch": 2.23,
"grad_norm": 1.0158603395549721,
"kl": 0.4532112777233124,
"learning_rate": 3.1420772918241286e-06,
"loss": 0.1416,
"step": 758,
"step_loss": 0.1491362452507019
},
{
"epoch": 2.23,
"grad_norm": 0.9702749401691976,
"kl": 0.41253310441970825,
"learning_rate": 3.1379701687303665e-06,
"loss": 0.152,
"step": 759,
"step_loss": 0.16260167956352234
},
{
"epoch": 2.23,
"grad_norm": 0.9835143944298296,
"kl": 0.48813024163246155,
"learning_rate": 3.1338617128563505e-06,
"loss": 0.1483,
"step": 760,
"step_loss": 0.14093339443206787
},
{
"epoch": 2.24,
"grad_norm": 0.9673002377624046,
"kl": 0.45352044701576233,
"learning_rate": 3.1297519383157138e-06,
"loss": 0.1444,
"step": 761,
"step_loss": 0.14167660474777222
},
{
"epoch": 2.24,
"grad_norm": 1.0177495156776364,
"kl": 0.3557469844818115,
"learning_rate": 3.1256408592266214e-06,
"loss": 0.1545,
"step": 762,
"step_loss": 0.1343638151884079
},
{
"epoch": 2.24,
"grad_norm": 0.9023137851380141,
"kl": 0.4272156357765198,
"learning_rate": 3.121528489711718e-06,
"loss": 0.1473,
"step": 763,
"step_loss": 0.1451863944530487
},
{
"epoch": 2.25,
"grad_norm": 1.0441318127695194,
"kl": 0.4729064106941223,
"learning_rate": 3.1174148438980804e-06,
"loss": 0.151,
"step": 764,
"step_loss": 0.16162380576133728
},
{
"epoch": 2.25,
"grad_norm": 0.9950818625517187,
"kl": 0.4472399353981018,
"learning_rate": 3.113299935917174e-06,
"loss": 0.1539,
"step": 765,
"step_loss": 0.14452748000621796
},
{
"epoch": 2.25,
"grad_norm": 1.0179996166687564,
"kl": 0.5420696139335632,
"learning_rate": 3.1091837799047946e-06,
"loss": 0.1475,
"step": 766,
"step_loss": 0.18763327598571777
},
{
"epoch": 2.26,
"grad_norm": 1.0010025393208966,
"kl": 0.3447565734386444,
"learning_rate": 3.1050663900010295e-06,
"loss": 0.1485,
"step": 767,
"step_loss": 0.13680729269981384
},
{
"epoch": 2.26,
"grad_norm": 1.0142744474701502,
"kl": 0.3538120687007904,
"learning_rate": 3.1009477803502015e-06,
"loss": 0.1541,
"step": 768,
"step_loss": 0.16569288074970245
},
{
"epoch": 2.26,
"grad_norm": 1.036047382769812,
"kl": 0.4295748174190521,
"learning_rate": 3.0968279651008273e-06,
"loss": 0.1613,
"step": 769,
"step_loss": 0.14074234664440155
},
{
"epoch": 2.26,
"grad_norm": 0.9244937635890171,
"kl": 0.5195684432983398,
"learning_rate": 3.092706958405561e-06,
"loss": 0.1472,
"step": 770,
"step_loss": 0.15720242261886597
},
{
"epoch": 2.27,
"grad_norm": 0.9621433507609762,
"kl": 0.4947783052921295,
"learning_rate": 3.088584774421155e-06,
"loss": 0.1552,
"step": 771,
"step_loss": 0.16154304146766663
},
{
"epoch": 2.27,
"grad_norm": 1.0088544087696063,
"kl": 0.45518720149993896,
"learning_rate": 3.0844614273083986e-06,
"loss": 0.1537,
"step": 772,
"step_loss": 0.16250544786453247
},
{
"epoch": 2.27,
"grad_norm": 1.02735358403942,
"kl": 0.3680950999259949,
"learning_rate": 3.0803369312320834e-06,
"loss": 0.1595,
"step": 773,
"step_loss": 0.13430428504943848
},
{
"epoch": 2.28,
"grad_norm": 1.0229473541898282,
"kl": 0.45938223600387573,
"learning_rate": 3.0762113003609464e-06,
"loss": 0.1521,
"step": 774,
"step_loss": 0.16164535284042358
},
{
"epoch": 2.28,
"grad_norm": 0.9762252225185958,
"kl": 0.4909830391407013,
"learning_rate": 3.072084548867619e-06,
"loss": 0.1487,
"step": 775,
"step_loss": 0.16784648597240448
},
{
"epoch": 2.28,
"grad_norm": 1.0525530374080225,
"kl": 0.4151424765586853,
"learning_rate": 3.0679566909285865e-06,
"loss": 0.1644,
"step": 776,
"step_loss": 0.15828314423561096
},
{
"epoch": 2.28,
"grad_norm": 1.0300851369596706,
"kl": 0.3816262185573578,
"learning_rate": 3.0638277407241353e-06,
"loss": 0.1552,
"step": 777,
"step_loss": 0.1416245996952057
},
{
"epoch": 2.29,
"grad_norm": 0.9094117275175454,
"kl": 0.47151878476142883,
"learning_rate": 3.059697712438301e-06,
"loss": 0.1429,
"step": 778,
"step_loss": 0.1599782109260559
},
{
"epoch": 2.29,
"grad_norm": 0.943123316008099,
"kl": 0.4516427218914032,
"learning_rate": 3.0555666202588237e-06,
"loss": 0.1525,
"step": 779,
"step_loss": 0.17189931869506836
},
{
"epoch": 2.29,
"grad_norm": 0.9766034537311132,
"kl": 0.4513046443462372,
"learning_rate": 3.0514344783771017e-06,
"loss": 0.1516,
"step": 780,
"step_loss": 0.14572221040725708
},
{
"epoch": 2.3,
"grad_norm": 0.9983107634149363,
"kl": 0.36767828464508057,
"learning_rate": 3.0473013009881343e-06,
"loss": 0.1561,
"step": 781,
"step_loss": 0.16547314822673798
},
{
"epoch": 2.3,
"grad_norm": 1.0270127608148132,
"kl": 0.45325592160224915,
"learning_rate": 3.04316710229048e-06,
"loss": 0.1583,
"step": 782,
"step_loss": 0.14895084500312805
},
{
"epoch": 2.3,
"grad_norm": 0.9148345849077315,
"kl": 0.5390376448631287,
"learning_rate": 3.0390318964862064e-06,
"loss": 0.1519,
"step": 783,
"step_loss": 0.1619083732366562
},
{
"epoch": 2.31,
"grad_norm": 0.8725682510499037,
"kl": 0.34793299436569214,
"learning_rate": 3.03489569778084e-06,
"loss": 0.1452,
"step": 784,
"step_loss": 0.1361267864704132
},
{
"epoch": 2.31,
"grad_norm": 0.9441065859863195,
"kl": 0.4481748640537262,
"learning_rate": 3.0307585203833203e-06,
"loss": 0.1426,
"step": 785,
"step_loss": 0.15584218502044678
},
{
"epoch": 2.31,
"grad_norm": 1.0085881671115713,
"kl": 0.43633711338043213,
"learning_rate": 3.0266203785059438e-06,
"loss": 0.1528,
"step": 786,
"step_loss": 0.1516050398349762
},
{
"epoch": 2.31,
"grad_norm": 0.9369144359378735,
"kl": 0.4298512935638428,
"learning_rate": 3.0224812863643266e-06,
"loss": 0.1451,
"step": 787,
"step_loss": 0.12825755774974823
},
{
"epoch": 2.32,
"grad_norm": 1.065211790409866,
"kl": 0.43545064330101013,
"learning_rate": 3.0183412581773453e-06,
"loss": 0.1619,
"step": 788,
"step_loss": 0.16257864236831665
},
{
"epoch": 2.32,
"grad_norm": 1.0775993281913878,
"kl": 0.40961089730262756,
"learning_rate": 3.0142003081670922e-06,
"loss": 0.1628,
"step": 789,
"step_loss": 0.1613461673259735
},
{
"epoch": 2.32,
"grad_norm": 1.0237299523119836,
"kl": 0.3749983608722687,
"learning_rate": 3.010058450558827e-06,
"loss": 0.1504,
"step": 790,
"step_loss": 0.13308608531951904
},
{
"epoch": 2.33,
"grad_norm": 1.0661845972455573,
"kl": 0.472342312335968,
"learning_rate": 3.005915699580928e-06,
"loss": 0.1486,
"step": 791,
"step_loss": 0.1562490463256836
},
{
"epoch": 2.33,
"grad_norm": 0.9764395369363379,
"kl": 0.36287054419517517,
"learning_rate": 3.0017720694648407e-06,
"loss": 0.1472,
"step": 792,
"step_loss": 0.14360609650611877
},
{
"epoch": 2.33,
"grad_norm": 0.9561934092861629,
"kl": 0.41280660033226013,
"learning_rate": 2.997627574445032e-06,
"loss": 0.1425,
"step": 793,
"step_loss": 0.1299421787261963
},
{
"epoch": 2.33,
"grad_norm": 1.016934035404032,
"kl": 0.44996654987335205,
"learning_rate": 2.9934822287589404e-06,
"loss": 0.1517,
"step": 794,
"step_loss": 0.13960210978984833
},
{
"epoch": 2.34,
"grad_norm": 1.007459978061249,
"kl": 0.4418295919895172,
"learning_rate": 2.9893360466469257e-06,
"loss": 0.1586,
"step": 795,
"step_loss": 0.1698797643184662
},
{
"epoch": 2.34,
"grad_norm": 0.9777845289814279,
"kl": 0.4033919870853424,
"learning_rate": 2.9851890423522214e-06,
"loss": 0.1534,
"step": 796,
"step_loss": 0.148381307721138
},
{
"epoch": 2.34,
"grad_norm": 1.0038670483961691,
"kl": 0.41636258363723755,
"learning_rate": 2.9810412301208837e-06,
"loss": 0.1605,
"step": 797,
"step_loss": 0.15567950904369354
},
{
"epoch": 2.35,
"grad_norm": 0.9931491186767503,
"kl": 0.536481499671936,
"learning_rate": 2.976892624201747e-06,
"loss": 0.1515,
"step": 798,
"step_loss": 0.1677020788192749
},
{
"epoch": 2.35,
"grad_norm": 0.9976100824318828,
"kl": 0.3861311674118042,
"learning_rate": 2.9727432388463713e-06,
"loss": 0.1498,
"step": 799,
"step_loss": 0.14130038022994995
},
{
"epoch": 2.35,
"grad_norm": 0.9818984015412048,
"kl": 0.38746803998947144,
"learning_rate": 2.9685930883089936e-06,
"loss": 0.1524,
"step": 800,
"step_loss": 0.1593620926141739
},
{
"epoch": 2.36,
"grad_norm": 0.9288991903769893,
"kl": 0.3206455707550049,
"learning_rate": 2.9644421868464797e-06,
"loss": 0.1394,
"step": 801,
"step_loss": 0.10261634737253189
},
{
"epoch": 2.36,
"grad_norm": 0.9983514817365607,
"kl": 0.41597017645835876,
"learning_rate": 2.9602905487182758e-06,
"loss": 0.1451,
"step": 802,
"step_loss": 0.12765845656394958
},
{
"epoch": 2.36,
"grad_norm": 0.8902637518688588,
"kl": 0.35546138882637024,
"learning_rate": 2.9561381881863583e-06,
"loss": 0.1358,
"step": 803,
"step_loss": 0.11750486493110657
},
{
"epoch": 2.36,
"grad_norm": 1.1226095260209488,
"kl": 0.41053929924964905,
"learning_rate": 2.9519851195151834e-06,
"loss": 0.1441,
"step": 804,
"step_loss": 0.15928710997104645
},
{
"epoch": 2.37,
"grad_norm": 1.0162148889995983,
"kl": 0.40562787652015686,
"learning_rate": 2.9478313569716427e-06,
"loss": 0.1497,
"step": 805,
"step_loss": 0.1482928991317749
},
{
"epoch": 2.37,
"grad_norm": 0.9605017503685718,
"kl": 0.37824496626853943,
"learning_rate": 2.9436769148250107e-06,
"loss": 0.1502,
"step": 806,
"step_loss": 0.1375085562467575
},
{
"epoch": 2.37,
"grad_norm": 1.0439813106314617,
"kl": 0.4013465642929077,
"learning_rate": 2.939521807346896e-06,
"loss": 0.1515,
"step": 807,
"step_loss": 0.1387888491153717
},
{
"epoch": 2.38,
"grad_norm": 0.9915447105353192,
"kl": 0.4745499789714813,
"learning_rate": 2.935366048811192e-06,
"loss": 0.1519,
"step": 808,
"step_loss": 0.1591644287109375
},
{
"epoch": 2.38,
"grad_norm": 0.9295061078745588,
"kl": 0.39526990056037903,
"learning_rate": 2.9312096534940304e-06,
"loss": 0.1403,
"step": 809,
"step_loss": 0.14475908875465393
},
{
"epoch": 2.38,
"grad_norm": 0.9874188124202254,
"kl": 0.4027431309223175,
"learning_rate": 2.9270526356737306e-06,
"loss": 0.1551,
"step": 810,
"step_loss": 0.17166703939437866
},
{
"epoch": 2.38,
"grad_norm": 1.0386355533970422,
"kl": 0.332511842250824,
"learning_rate": 2.9228950096307477e-06,
"loss": 0.1543,
"step": 811,
"step_loss": 0.14212793111801147
},
{
"epoch": 2.39,
"grad_norm": 0.9426077934872951,
"kl": 0.5144191980361938,
"learning_rate": 2.9187367896476287e-06,
"loss": 0.1467,
"step": 812,
"step_loss": 0.17707863450050354
},
{
"epoch": 2.39,
"grad_norm": 1.033937968101504,
"kl": 0.458046019077301,
"learning_rate": 2.9145779900089603e-06,
"loss": 0.1568,
"step": 813,
"step_loss": 0.14064949750900269
},
{
"epoch": 2.39,
"grad_norm": 0.9527788628095325,
"kl": 0.46262165904045105,
"learning_rate": 2.91041862500132e-06,
"loss": 0.1517,
"step": 814,
"step_loss": 0.13187597692012787
},
{
"epoch": 2.4,
"grad_norm": 1.0259502663865276,
"kl": 0.5149811506271362,
"learning_rate": 2.9062587089132287e-06,
"loss": 0.1507,
"step": 815,
"step_loss": 0.14037488400936127
},
{
"epoch": 2.4,
"grad_norm": 0.9142149529111745,
"kl": 0.5691028237342834,
"learning_rate": 2.9020982560350987e-06,
"loss": 0.1424,
"step": 816,
"step_loss": 0.15255063772201538
},
{
"epoch": 2.4,
"grad_norm": 0.9834074836322464,
"kl": 0.5331578254699707,
"learning_rate": 2.897937280659188e-06,
"loss": 0.1522,
"step": 817,
"step_loss": 0.19720472395420074
},
{
"epoch": 2.4,
"grad_norm": 1.0291303032977386,
"kl": 0.42640256881713867,
"learning_rate": 2.893775797079548e-06,
"loss": 0.1587,
"step": 818,
"step_loss": 0.15943148732185364
},
{
"epoch": 2.41,
"grad_norm": 1.023854381889724,
"kl": 0.3928847908973694,
"learning_rate": 2.8896138195919774e-06,
"loss": 0.1478,
"step": 819,
"step_loss": 0.14019568264484406
},
{
"epoch": 2.41,
"grad_norm": 1.0242535198952862,
"kl": 0.48428666591644287,
"learning_rate": 2.885451362493971e-06,
"loss": 0.1529,
"step": 820,
"step_loss": 0.19270402193069458
},
{
"epoch": 2.41,
"grad_norm": 1.0748533004644767,
"kl": 0.5833289623260498,
"learning_rate": 2.8812884400846697e-06,
"loss": 0.1585,
"step": 821,
"step_loss": 0.15655606985092163
},
{
"epoch": 2.42,
"grad_norm": 1.0464967072640574,
"kl": 0.46840161085128784,
"learning_rate": 2.8771250666648154e-06,
"loss": 0.1546,
"step": 822,
"step_loss": 0.15261268615722656
},
{
"epoch": 2.42,
"grad_norm": 0.9831924601580038,
"kl": 0.46287956833839417,
"learning_rate": 2.872961256536697e-06,
"loss": 0.1586,
"step": 823,
"step_loss": 0.17225544154644012
},
{
"epoch": 2.42,
"grad_norm": 1.065657859586474,
"kl": 0.3879581093788147,
"learning_rate": 2.868797024004106e-06,
"loss": 0.1595,
"step": 824,
"step_loss": 0.1531311571598053
},
{
"epoch": 2.43,
"grad_norm": 1.044424305750142,
"kl": 0.43647971749305725,
"learning_rate": 2.864632383372284e-06,
"loss": 0.156,
"step": 825,
"step_loss": 0.13304683566093445
},
{
"epoch": 2.43,
"grad_norm": 0.9736991867352337,
"kl": 0.5177615880966187,
"learning_rate": 2.8604673489478736e-06,
"loss": 0.1507,
"step": 826,
"step_loss": 0.1456117331981659
},
{
"epoch": 2.43,
"grad_norm": 0.935779184315902,
"kl": 0.4033496379852295,
"learning_rate": 2.8563019350388682e-06,
"loss": 0.1441,
"step": 827,
"step_loss": 0.1455593705177307
},
{
"epoch": 2.43,
"grad_norm": 0.9724703844802562,
"kl": 0.44370484352111816,
"learning_rate": 2.852136155954573e-06,
"loss": 0.1551,
"step": 828,
"step_loss": 0.15278911590576172
},
{
"epoch": 2.44,
"grad_norm": 1.0389405085720838,
"kl": 0.3740912079811096,
"learning_rate": 2.8479700260055375e-06,
"loss": 0.1464,
"step": 829,
"step_loss": 0.14697124063968658
},
{
"epoch": 2.44,
"grad_norm": 1.045910704297703,
"kl": 0.46834734082221985,
"learning_rate": 2.8438035595035235e-06,
"loss": 0.1616,
"step": 830,
"step_loss": 0.16846756637096405
},
{
"epoch": 2.44,
"grad_norm": 1.142264846499886,
"kl": 0.47931694984436035,
"learning_rate": 2.8396367707614454e-06,
"loss": 0.1669,
"step": 831,
"step_loss": 0.18373435735702515
},
{
"epoch": 2.45,
"grad_norm": 1.0527179270858962,
"kl": 0.527995228767395,
"learning_rate": 2.835469674093326e-06,
"loss": 0.1575,
"step": 832,
"step_loss": 0.16337227821350098
},
{
"epoch": 2.45,
"grad_norm": 1.0387618411476036,
"kl": 0.42796212434768677,
"learning_rate": 2.8313022838142475e-06,
"loss": 0.1595,
"step": 833,
"step_loss": 0.16812928020954132
},
{
"epoch": 2.45,
"grad_norm": 0.9471143464094294,
"kl": 0.4106406271457672,
"learning_rate": 2.827134614240296e-06,
"loss": 0.1499,
"step": 834,
"step_loss": 0.14076904952526093
},
{
"epoch": 2.45,
"grad_norm": 0.9917655257761571,
"kl": 0.45805299282073975,
"learning_rate": 2.8229666796885224e-06,
"loss": 0.1557,
"step": 835,
"step_loss": 0.15434692800045013
},
{
"epoch": 2.46,
"grad_norm": 1.0110321513562135,
"kl": 0.40475213527679443,
"learning_rate": 2.818798494476884e-06,
"loss": 0.1465,
"step": 836,
"step_loss": 0.1469970941543579
},
{
"epoch": 2.46,
"grad_norm": 0.9764345161496416,
"kl": 0.49001234769821167,
"learning_rate": 2.814630072924201e-06,
"loss": 0.1487,
"step": 837,
"step_loss": 0.1866789162158966
},
{
"epoch": 2.46,
"grad_norm": 1.0794414278495323,
"kl": 0.4759081304073334,
"learning_rate": 2.8104614293501047e-06,
"loss": 0.166,
"step": 838,
"step_loss": 0.15782764554023743
},
{
"epoch": 2.47,
"grad_norm": 0.9236052649575086,
"kl": 0.5032958984375,
"learning_rate": 2.8062925780749913e-06,
"loss": 0.141,
"step": 839,
"step_loss": 0.1591145396232605
},
{
"epoch": 2.47,
"grad_norm": 0.9482136338056982,
"kl": 0.4338464140892029,
"learning_rate": 2.802123533419966e-06,
"loss": 0.1433,
"step": 840,
"step_loss": 0.16640107333660126
},
{
"epoch": 2.47,
"grad_norm": 1.0595480920949152,
"kl": 0.526473343372345,
"learning_rate": 2.7979543097068023e-06,
"loss": 0.1574,
"step": 841,
"step_loss": 0.1585194319486618
},
{
"epoch": 2.48,
"grad_norm": 0.9792300907182565,
"kl": 0.43886512517929077,
"learning_rate": 2.793784921257889e-06,
"loss": 0.1488,
"step": 842,
"step_loss": 0.1458326131105423
},
{
"epoch": 2.48,
"grad_norm": 0.9630706804467645,
"kl": 0.4345511794090271,
"learning_rate": 2.789615382396178e-06,
"loss": 0.1489,
"step": 843,
"step_loss": 0.15134494006633759
},
{
"epoch": 2.48,
"grad_norm": 1.0446667083350927,
"kl": 0.4915614724159241,
"learning_rate": 2.785445707445139e-06,
"loss": 0.1563,
"step": 844,
"step_loss": 0.1671372950077057
},
{
"epoch": 2.48,
"grad_norm": 1.0372490510437422,
"kl": 0.46963563561439514,
"learning_rate": 2.7812759107287092e-06,
"loss": 0.1577,
"step": 845,
"step_loss": 0.17008760571479797
},
{
"epoch": 2.49,
"grad_norm": 1.0428903957466424,
"kl": 0.4324113130569458,
"learning_rate": 2.777106006571246e-06,
"loss": 0.1637,
"step": 846,
"step_loss": 0.1375209391117096
},
{
"epoch": 2.49,
"grad_norm": 1.1117206141871079,
"kl": 0.49308332800865173,
"learning_rate": 2.7729360092974727e-06,
"loss": 0.1573,
"step": 847,
"step_loss": 0.1321687251329422
},
{
"epoch": 2.49,
"grad_norm": 1.0017834941794728,
"kl": 0.4275958836078644,
"learning_rate": 2.7687659332324348e-06,
"loss": 0.1522,
"step": 848,
"step_loss": 0.1515330672264099
},
{
"epoch": 2.5,
"grad_norm": 0.9791789290830443,
"kl": 0.3971335291862488,
"learning_rate": 2.7645957927014476e-06,
"loss": 0.1472,
"step": 849,
"step_loss": 0.1782829761505127
},
{
"epoch": 2.5,
"grad_norm": 0.9777418720782441,
"kl": 0.44619691371917725,
"learning_rate": 2.7604256020300474e-06,
"loss": 0.1504,
"step": 850,
"step_loss": 0.15114323794841766
},
{
"epoch": 2.5,
"grad_norm": 1.0171915736900594,
"kl": 0.43852925300598145,
"learning_rate": 2.7562553755439453e-06,
"loss": 0.1536,
"step": 851,
"step_loss": 0.1527268886566162
},
{
"epoch": 2.5,
"grad_norm": 0.9392503452143012,
"kl": 0.48514020442962646,
"learning_rate": 2.7520851275689705e-06,
"loss": 0.1434,
"step": 852,
"step_loss": 0.15535013377666473
},
{
"epoch": 2.51,
"grad_norm": 1.0357489994061733,
"kl": 0.40949738025665283,
"learning_rate": 2.7479148724310306e-06,
"loss": 0.1554,
"step": 853,
"step_loss": 0.15922455489635468
},
{
"epoch": 2.51,
"grad_norm": 0.9421029421438589,
"kl": 0.4990323483943939,
"learning_rate": 2.7437446244560563e-06,
"loss": 0.1501,
"step": 854,
"step_loss": 0.14648234844207764
},
{
"epoch": 2.51,
"grad_norm": 0.9961498319508436,
"kl": 0.4119671583175659,
"learning_rate": 2.739574397969953e-06,
"loss": 0.1457,
"step": 855,
"step_loss": 0.14378459751605988
},
{
"epoch": 2.52,
"grad_norm": 1.018662122435029,
"kl": 0.4358648359775543,
"learning_rate": 2.7354042072985527e-06,
"loss": 0.1596,
"step": 856,
"step_loss": 0.1559343934059143
},
{
"epoch": 2.52,
"grad_norm": 0.9455489895306939,
"kl": 0.5033860206604004,
"learning_rate": 2.731234066767566e-06,
"loss": 0.148,
"step": 857,
"step_loss": 0.14744150638580322
},
{
"epoch": 2.52,
"grad_norm": 1.046978432968794,
"kl": 0.5012477040290833,
"learning_rate": 2.727063990702528e-06,
"loss": 0.1554,
"step": 858,
"step_loss": 0.1712190806865692
},
{
"epoch": 2.53,
"grad_norm": 0.9584546228377427,
"kl": 0.4685800075531006,
"learning_rate": 2.7228939934287545e-06,
"loss": 0.1518,
"step": 859,
"step_loss": 0.1738032102584839
},
{
"epoch": 2.53,
"grad_norm": 1.0138203947600486,
"kl": 0.4495634138584137,
"learning_rate": 2.7187240892712915e-06,
"loss": 0.1542,
"step": 860,
"step_loss": 0.1489475667476654
},
{
"epoch": 2.53,
"grad_norm": 0.9471813046395302,
"kl": 0.506722092628479,
"learning_rate": 2.7145542925548625e-06,
"loss": 0.1497,
"step": 861,
"step_loss": 0.17352920770645142
},
{
"epoch": 2.53,
"grad_norm": 0.930769811268272,
"kl": 0.45764830708503723,
"learning_rate": 2.7103846176038234e-06,
"loss": 0.1463,
"step": 862,
"step_loss": 0.14699025452136993
},
{
"epoch": 2.54,
"grad_norm": 1.0613625131595172,
"kl": 0.5440824627876282,
"learning_rate": 2.7062150787421117e-06,
"loss": 0.1586,
"step": 863,
"step_loss": 0.15115922689437866
},
{
"epoch": 2.54,
"grad_norm": 1.0090354921253601,
"kl": 0.4678072929382324,
"learning_rate": 2.702045690293198e-06,
"loss": 0.1577,
"step": 864,
"step_loss": 0.19815057516098022
},
{
"epoch": 2.54,
"grad_norm": 0.9879524528369488,
"kl": 0.41325706243515015,
"learning_rate": 2.697876466580035e-06,
"loss": 0.1587,
"step": 865,
"step_loss": 0.1458713412284851
},
{
"epoch": 2.55,
"grad_norm": 0.9336052578651772,
"kl": 0.35849088430404663,
"learning_rate": 2.693707421925011e-06,
"loss": 0.1427,
"step": 866,
"step_loss": 0.1263750195503235
},
{
"epoch": 2.55,
"grad_norm": 0.962448080322168,
"kl": 0.4207032024860382,
"learning_rate": 2.689538570649896e-06,
"loss": 0.1501,
"step": 867,
"step_loss": 0.15147185325622559
},
{
"epoch": 2.55,
"grad_norm": 0.9755617125351154,
"kl": 0.35634732246398926,
"learning_rate": 2.6853699270758006e-06,
"loss": 0.1444,
"step": 868,
"step_loss": 0.13335993885993958
},
{
"epoch": 2.55,
"grad_norm": 1.0071987678638463,
"kl": 0.40423381328582764,
"learning_rate": 2.681201505523117e-06,
"loss": 0.1497,
"step": 869,
"step_loss": 0.15910114347934723
},
{
"epoch": 2.56,
"grad_norm": 0.9679292924796197,
"kl": 0.4693619906902313,
"learning_rate": 2.6770333203114783e-06,
"loss": 0.1527,
"step": 870,
"step_loss": 0.16329102218151093
},
{
"epoch": 2.56,
"grad_norm": 0.9628687579474976,
"kl": 0.4137752056121826,
"learning_rate": 2.6728653857597042e-06,
"loss": 0.1462,
"step": 871,
"step_loss": 0.12889014184474945
},
{
"epoch": 2.56,
"grad_norm": 1.0253647283281182,
"kl": 0.31751748919487,
"learning_rate": 2.6686977161857536e-06,
"loss": 0.1509,
"step": 872,
"step_loss": 0.13793063163757324
},
{
"epoch": 2.57,
"grad_norm": 0.9933774525703443,
"kl": 0.4467350244522095,
"learning_rate": 2.664530325906674e-06,
"loss": 0.1555,
"step": 873,
"step_loss": 0.15573230385780334
},
{
"epoch": 2.57,
"grad_norm": 1.0177636297789605,
"kl": 0.4431452751159668,
"learning_rate": 2.660363229238555e-06,
"loss": 0.159,
"step": 874,
"step_loss": 0.12957452237606049
},
{
"epoch": 2.57,
"grad_norm": 0.9976856899473273,
"kl": 0.415743350982666,
"learning_rate": 2.6561964404964772e-06,
"loss": 0.1414,
"step": 875,
"step_loss": 0.14293581247329712
},
{
"epoch": 2.58,
"grad_norm": 1.0243005389649271,
"kl": 0.47209423780441284,
"learning_rate": 2.6520299739944632e-06,
"loss": 0.1577,
"step": 876,
"step_loss": 0.15875697135925293
},
{
"epoch": 2.58,
"grad_norm": 1.0132980539403538,
"kl": 0.4555986821651459,
"learning_rate": 2.6478638440454287e-06,
"loss": 0.155,
"step": 877,
"step_loss": 0.14882808923721313
},
{
"epoch": 2.58,
"grad_norm": 0.9968564423415248,
"kl": 0.4383925199508667,
"learning_rate": 2.6436980649611316e-06,
"loss": 0.1527,
"step": 878,
"step_loss": 0.1468876153230667
},
{
"epoch": 2.58,
"grad_norm": 0.9946869883294787,
"kl": 0.4805772602558136,
"learning_rate": 2.6395326510521284e-06,
"loss": 0.1468,
"step": 879,
"step_loss": 0.1445452868938446
},
{
"epoch": 2.59,
"grad_norm": 1.0065650168443376,
"kl": 0.4132371246814728,
"learning_rate": 2.635367616627717e-06,
"loss": 0.1504,
"step": 880,
"step_loss": 0.1394580602645874
},
{
"epoch": 2.59,
"grad_norm": 1.0307017371698117,
"kl": 0.3783648908138275,
"learning_rate": 2.631202975995894e-06,
"loss": 0.1592,
"step": 881,
"step_loss": 0.15418383479118347
},
{
"epoch": 2.59,
"grad_norm": 1.001319848718613,
"kl": 0.5001019835472107,
"learning_rate": 2.6270387434633033e-06,
"loss": 0.1473,
"step": 882,
"step_loss": 0.14509941637516022
},
{
"epoch": 2.6,
"grad_norm": 0.9875134436983942,
"kl": 0.41825470328330994,
"learning_rate": 2.622874933335186e-06,
"loss": 0.1537,
"step": 883,
"step_loss": 0.12782949209213257
},
{
"epoch": 2.6,
"grad_norm": 1.0572877830596905,
"kl": 0.48397839069366455,
"learning_rate": 2.618711559915332e-06,
"loss": 0.1532,
"step": 884,
"step_loss": 0.15913690626621246
},
{
"epoch": 2.6,
"grad_norm": 0.9751094536600117,
"kl": 0.4292382597923279,
"learning_rate": 2.6145486375060305e-06,
"loss": 0.1522,
"step": 885,
"step_loss": 0.14782339334487915
},
{
"epoch": 2.6,
"grad_norm": 1.0673776750362411,
"kl": 0.5227698087692261,
"learning_rate": 2.610386180408023e-06,
"loss": 0.1631,
"step": 886,
"step_loss": 0.17023181915283203
},
{
"epoch": 2.61,
"grad_norm": 0.9733633359085521,
"kl": 0.35292860865592957,
"learning_rate": 2.6062242029204525e-06,
"loss": 0.1471,
"step": 887,
"step_loss": 0.16706503927707672
},
{
"epoch": 2.61,
"grad_norm": 1.0817929222314064,
"kl": 0.4509636163711548,
"learning_rate": 2.6020627193408126e-06,
"loss": 0.159,
"step": 888,
"step_loss": 0.14339911937713623
},
{
"epoch": 2.61,
"grad_norm": 1.0112704753099575,
"kl": 0.5275288820266724,
"learning_rate": 2.5979017439649016e-06,
"loss": 0.1578,
"step": 889,
"step_loss": 0.1634017825126648
},
{
"epoch": 2.62,
"grad_norm": 1.065748885440266,
"kl": 0.4825303554534912,
"learning_rate": 2.593741291086772e-06,
"loss": 0.1647,
"step": 890,
"step_loss": 0.15958373248577118
},
{
"epoch": 2.62,
"grad_norm": 1.0145482738058882,
"kl": 0.4341558814048767,
"learning_rate": 2.589581374998681e-06,
"loss": 0.1535,
"step": 891,
"step_loss": 0.15015427768230438
},
{
"epoch": 2.62,
"grad_norm": 0.965175247388308,
"kl": 0.3313429355621338,
"learning_rate": 2.5854220099910404e-06,
"loss": 0.1421,
"step": 892,
"step_loss": 0.1318102329969406
},
{
"epoch": 2.63,
"grad_norm": 0.9691866844145771,
"kl": 0.3674515187740326,
"learning_rate": 2.581263210352372e-06,
"loss": 0.1465,
"step": 893,
"step_loss": 0.12013500183820724
},
{
"epoch": 2.63,
"grad_norm": 1.0326376883901633,
"kl": 0.4059482514858246,
"learning_rate": 2.5771049903692534e-06,
"loss": 0.1576,
"step": 894,
"step_loss": 0.1739095002412796
},
{
"epoch": 2.63,
"grad_norm": 1.025107600798099,
"kl": 0.4659903943538666,
"learning_rate": 2.572947364326271e-06,
"loss": 0.1657,
"step": 895,
"step_loss": 0.1959068477153778
},
{
"epoch": 2.63,
"grad_norm": 1.0072382984139703,
"kl": 0.4354158639907837,
"learning_rate": 2.5687903465059694e-06,
"loss": 0.1544,
"step": 896,
"step_loss": 0.1440533995628357
},
{
"epoch": 2.64,
"grad_norm": 0.9879824672979248,
"kl": 0.42458993196487427,
"learning_rate": 2.5646339511888087e-06,
"loss": 0.1515,
"step": 897,
"step_loss": 0.16383150219917297
},
{
"epoch": 2.64,
"grad_norm": 0.9776984512612195,
"kl": 0.5129539966583252,
"learning_rate": 2.560478192653106e-06,
"loss": 0.145,
"step": 898,
"step_loss": 0.1519792526960373
},
{
"epoch": 2.64,
"grad_norm": 1.0045857782912961,
"kl": 0.46529197692871094,
"learning_rate": 2.5563230851749904e-06,
"loss": 0.1554,
"step": 899,
"step_loss": 0.14696285128593445
},
{
"epoch": 2.65,
"grad_norm": 1.0698829958572684,
"kl": 0.38536253571510315,
"learning_rate": 2.5521686430283584e-06,
"loss": 0.1562,
"step": 900,
"step_loss": 0.1435265839099884
},
{
"epoch": 2.65,
"grad_norm": 0.9972674616711951,
"kl": 0.5101684927940369,
"learning_rate": 2.5480148804848177e-06,
"loss": 0.1518,
"step": 901,
"step_loss": 0.17259491980075836
},
{
"epoch": 2.65,
"grad_norm": 0.9978533741995581,
"kl": 0.4602809548377991,
"learning_rate": 2.5438618118136433e-06,
"loss": 0.1524,
"step": 902,
"step_loss": 0.16445577144622803
},
{
"epoch": 2.65,
"grad_norm": 0.9469645542814312,
"kl": 0.42083609104156494,
"learning_rate": 2.539709451281725e-06,
"loss": 0.1485,
"step": 903,
"step_loss": 0.12865757942199707
},
{
"epoch": 2.66,
"grad_norm": 1.095494010676487,
"kl": 0.522094190120697,
"learning_rate": 2.5355578131535206e-06,
"loss": 0.1574,
"step": 904,
"step_loss": 0.1739048808813095
},
{
"epoch": 2.66,
"grad_norm": 0.9767740096877633,
"kl": 0.4193570613861084,
"learning_rate": 2.531406911691007e-06,
"loss": 0.1573,
"step": 905,
"step_loss": 0.1455826461315155
},
{
"epoch": 2.66,
"grad_norm": 1.017293366696319,
"kl": 0.4341852068901062,
"learning_rate": 2.5272567611536303e-06,
"loss": 0.1526,
"step": 906,
"step_loss": 0.15153871476650238
},
{
"epoch": 2.67,
"grad_norm": 0.9952002463525593,
"kl": 0.42138275504112244,
"learning_rate": 2.523107375798254e-06,
"loss": 0.1538,
"step": 907,
"step_loss": 0.1505734622478485
},
{
"epoch": 2.67,
"grad_norm": 0.9990848287474213,
"kl": 0.4363083839416504,
"learning_rate": 2.5189587698791175e-06,
"loss": 0.154,
"step": 908,
"step_loss": 0.14343413710594177
},
{
"epoch": 2.67,
"grad_norm": 0.9723709331417107,
"kl": 0.42384618520736694,
"learning_rate": 2.51481095764778e-06,
"loss": 0.1496,
"step": 909,
"step_loss": 0.1434541940689087
},
{
"epoch": 2.68,
"grad_norm": 0.953374267251741,
"kl": 0.5374601483345032,
"learning_rate": 2.510663953353075e-06,
"loss": 0.1485,
"step": 910,
"step_loss": 0.1526307612657547
},
{
"epoch": 2.68,
"grad_norm": 1.020275779519007,
"kl": 0.5095154047012329,
"learning_rate": 2.50651777124106e-06,
"loss": 0.1547,
"step": 911,
"step_loss": 0.15365783870220184
},
{
"epoch": 2.68,
"grad_norm": 1.0064179434331657,
"kl": 0.5424807667732239,
"learning_rate": 2.502372425554968e-06,
"loss": 0.1491,
"step": 912,
"step_loss": 0.1669929325580597
},
{
"epoch": 2.68,
"grad_norm": 0.9590723032371087,
"kl": 0.3669721484184265,
"learning_rate": 2.4982279305351605e-06,
"loss": 0.149,
"step": 913,
"step_loss": 0.14819172024726868
},
{
"epoch": 2.69,
"grad_norm": 1.0423700278607653,
"kl": 0.40059924125671387,
"learning_rate": 2.4940843004190727e-06,
"loss": 0.1528,
"step": 914,
"step_loss": 0.14792829751968384
},
{
"epoch": 2.69,
"grad_norm": 1.059798761869846,
"kl": 0.39160391688346863,
"learning_rate": 2.4899415494411736e-06,
"loss": 0.1494,
"step": 915,
"step_loss": 0.14644666016101837
},
{
"epoch": 2.69,
"grad_norm": 0.994269671804963,
"kl": 0.4729336202144623,
"learning_rate": 2.4857996918329093e-06,
"loss": 0.1548,
"step": 916,
"step_loss": 0.14490240812301636
},
{
"epoch": 2.7,
"grad_norm": 1.0274421441712134,
"kl": 0.4249011278152466,
"learning_rate": 2.481658741822656e-06,
"loss": 0.1528,
"step": 917,
"step_loss": 0.1569293737411499
},
{
"epoch": 2.7,
"grad_norm": 1.1032766707978614,
"kl": 0.44397133588790894,
"learning_rate": 2.4775187136356732e-06,
"loss": 0.1509,
"step": 918,
"step_loss": 0.13812614977359772
},
{
"epoch": 2.7,
"grad_norm": 1.0393047337226677,
"kl": 0.40771737694740295,
"learning_rate": 2.4733796214940565e-06,
"loss": 0.1559,
"step": 919,
"step_loss": 0.1609930545091629
},
{
"epoch": 2.7,
"grad_norm": 1.0454766342784834,
"kl": 0.40909823775291443,
"learning_rate": 2.469241479616681e-06,
"loss": 0.1562,
"step": 920,
"step_loss": 0.15960478782653809
},
{
"epoch": 2.71,
"grad_norm": 0.9710746223118797,
"kl": 0.357599139213562,
"learning_rate": 2.4651043022191605e-06,
"loss": 0.1409,
"step": 921,
"step_loss": 0.12360851466655731
},
{
"epoch": 2.71,
"grad_norm": 1.0366911538769703,
"kl": 0.44713571667671204,
"learning_rate": 2.4609681035137944e-06,
"loss": 0.1515,
"step": 922,
"step_loss": 0.13877364993095398
},
{
"epoch": 2.71,
"grad_norm": 0.9935669508473964,
"kl": 0.47438859939575195,
"learning_rate": 2.456832897709521e-06,
"loss": 0.1502,
"step": 923,
"step_loss": 0.17184007167816162
},
{
"epoch": 2.72,
"grad_norm": 1.0048074202805686,
"kl": 0.3735441565513611,
"learning_rate": 2.4526986990118672e-06,
"loss": 0.1583,
"step": 924,
"step_loss": 0.14378073811531067
},
{
"epoch": 2.72,
"grad_norm": 0.9409483433063506,
"kl": 0.3914346694946289,
"learning_rate": 2.4485655216228986e-06,
"loss": 0.1476,
"step": 925,
"step_loss": 0.15655651688575745
},
{
"epoch": 2.72,
"grad_norm": 1.0223306684022924,
"kl": 0.424472451210022,
"learning_rate": 2.444433379741176e-06,
"loss": 0.1541,
"step": 926,
"step_loss": 0.15379807353019714
},
{
"epoch": 2.73,
"grad_norm": 0.9893200206120092,
"kl": 0.47619765996932983,
"learning_rate": 2.4403022875617e-06,
"loss": 0.1467,
"step": 927,
"step_loss": 0.17208687961101532
},
{
"epoch": 2.73,
"grad_norm": 1.0262447178852225,
"kl": 0.47813111543655396,
"learning_rate": 2.436172259275866e-06,
"loss": 0.1623,
"step": 928,
"step_loss": 0.13537657260894775
},
{
"epoch": 2.73,
"grad_norm": 0.9861270661846792,
"kl": 0.38649582862854004,
"learning_rate": 2.4320433090714134e-06,
"loss": 0.1476,
"step": 929,
"step_loss": 0.1501408964395523
},
{
"epoch": 2.73,
"grad_norm": 0.9853040761251798,
"kl": 0.4063931107521057,
"learning_rate": 2.4279154511323823e-06,
"loss": 0.1615,
"step": 930,
"step_loss": 0.15353356301784515
},
{
"epoch": 2.74,
"grad_norm": 0.9205228458296352,
"kl": 0.3394644260406494,
"learning_rate": 2.4237886996390556e-06,
"loss": 0.1427,
"step": 931,
"step_loss": 0.1389724165201187
},
{
"epoch": 2.74,
"grad_norm": 0.9932025613140306,
"kl": 0.3473202884197235,
"learning_rate": 2.4196630687679173e-06,
"loss": 0.1505,
"step": 932,
"step_loss": 0.1683613657951355
},
{
"epoch": 2.74,
"grad_norm": 0.984733712974178,
"kl": 0.40478530526161194,
"learning_rate": 2.415538572691602e-06,
"loss": 0.1463,
"step": 933,
"step_loss": 0.13838434219360352
},
{
"epoch": 2.75,
"grad_norm": 0.9806916184951824,
"kl": 0.4532083570957184,
"learning_rate": 2.4114152255788466e-06,
"loss": 0.1518,
"step": 934,
"step_loss": 0.13839900493621826
},
{
"epoch": 2.75,
"grad_norm": 0.9979938112250495,
"kl": 0.389826238155365,
"learning_rate": 2.407293041594439e-06,
"loss": 0.156,
"step": 935,
"step_loss": 0.1437515914440155
},
{
"epoch": 2.75,
"grad_norm": 0.978297759451275,
"kl": 0.43591850996017456,
"learning_rate": 2.4031720348991734e-06,
"loss": 0.1506,
"step": 936,
"step_loss": 0.1359221488237381
},
{
"epoch": 2.75,
"grad_norm": 0.9961809157585862,
"kl": 0.33996838331222534,
"learning_rate": 2.399052219649799e-06,
"loss": 0.145,
"step": 937,
"step_loss": 0.14791721105575562
},
{
"epoch": 2.76,
"grad_norm": 0.9450751235156168,
"kl": 0.42210879921913147,
"learning_rate": 2.3949336099989724e-06,
"loss": 0.1503,
"step": 938,
"step_loss": 0.16048789024353027
},
{
"epoch": 2.76,
"grad_norm": 0.9925709982011522,
"kl": 0.4403047561645508,
"learning_rate": 2.390816220095207e-06,
"loss": 0.1551,
"step": 939,
"step_loss": 0.16639472544193268
},
{
"epoch": 2.76,
"grad_norm": 0.9326817674895876,
"kl": 0.4106891453266144,
"learning_rate": 2.386700064082827e-06,
"loss": 0.1437,
"step": 940,
"step_loss": 0.13783779740333557
},
{
"epoch": 2.77,
"grad_norm": 0.9993440741834876,
"kl": 0.5168544054031372,
"learning_rate": 2.38258515610192e-06,
"loss": 0.1586,
"step": 941,
"step_loss": 0.19751232862472534
},
{
"epoch": 2.77,
"grad_norm": 0.951222600504691,
"kl": 0.39257577061653137,
"learning_rate": 2.3784715102882834e-06,
"loss": 0.1512,
"step": 942,
"step_loss": 0.1248694509267807
},
{
"epoch": 2.77,
"grad_norm": 0.9983634073528408,
"kl": 0.34117716550827026,
"learning_rate": 2.3743591407733797e-06,
"loss": 0.1574,
"step": 943,
"step_loss": 0.15840350091457367
},
{
"epoch": 2.78,
"grad_norm": 1.0089856202771001,
"kl": 0.5630459785461426,
"learning_rate": 2.3702480616842865e-06,
"loss": 0.1612,
"step": 944,
"step_loss": 0.17266206443309784
},
{
"epoch": 2.78,
"grad_norm": 1.0153619278805137,
"kl": 0.518592894077301,
"learning_rate": 2.36613828714365e-06,
"loss": 0.1595,
"step": 945,
"step_loss": 0.18611447513103485
},
{
"epoch": 2.78,
"grad_norm": 0.9672443779904987,
"kl": 0.4448012113571167,
"learning_rate": 2.362029831269634e-06,
"loss": 0.1558,
"step": 946,
"step_loss": 0.14080186188220978
},
{
"epoch": 2.78,
"grad_norm": 0.9611084638649775,
"kl": 0.38354170322418213,
"learning_rate": 2.357922708175872e-06,
"loss": 0.1469,
"step": 947,
"step_loss": 0.15614628791809082
},
{
"epoch": 2.79,
"grad_norm": 1.0312340475062312,
"kl": 0.42923545837402344,
"learning_rate": 2.353816931971419e-06,
"loss": 0.1509,
"step": 948,
"step_loss": 0.15058480203151703
},
{
"epoch": 2.79,
"grad_norm": 1.0325988848719911,
"kl": 0.37678343057632446,
"learning_rate": 2.3497125167607027e-06,
"loss": 0.153,
"step": 949,
"step_loss": 0.14883080124855042
},
{
"epoch": 2.79,
"grad_norm": 0.9819062626097106,
"kl": 0.5016992688179016,
"learning_rate": 2.345609476643477e-06,
"loss": 0.1428,
"step": 950,
"step_loss": 0.15665948390960693
},
{
"epoch": 2.8,
"grad_norm": 1.0064440018956071,
"kl": 0.4498019814491272,
"learning_rate": 2.341507825714771e-06,
"loss": 0.1593,
"step": 951,
"step_loss": 0.1523018330335617
},
{
"epoch": 2.8,
"grad_norm": 0.942831756703654,
"kl": 0.508718729019165,
"learning_rate": 2.337407578064842e-06,
"loss": 0.1473,
"step": 952,
"step_loss": 0.16131407022476196
},
{
"epoch": 2.8,
"grad_norm": 1.0213628080249857,
"kl": 0.47834068536758423,
"learning_rate": 2.3333087477791257e-06,
"loss": 0.1581,
"step": 953,
"step_loss": 0.16310566663742065
},
{
"epoch": 2.8,
"grad_norm": 0.9611082313028335,
"kl": 0.3989601135253906,
"learning_rate": 2.3292113489381895e-06,
"loss": 0.152,
"step": 954,
"step_loss": 0.13949620723724365
},
{
"epoch": 2.81,
"grad_norm": 0.9853678646194656,
"kl": 0.3734014332294464,
"learning_rate": 2.325115395617683e-06,
"loss": 0.1535,
"step": 955,
"step_loss": 0.14914605021476746
},
{
"epoch": 2.81,
"grad_norm": 0.9778583931081463,
"kl": 0.45069605112075806,
"learning_rate": 2.3210209018882913e-06,
"loss": 0.1524,
"step": 956,
"step_loss": 0.15262170135974884
},
{
"epoch": 2.81,
"grad_norm": 1.014172101673639,
"kl": 0.5114811062812805,
"learning_rate": 2.316927881815683e-06,
"loss": 0.1451,
"step": 957,
"step_loss": 0.16915518045425415
},
{
"epoch": 2.82,
"grad_norm": 0.9415660896574684,
"kl": 0.39761587977409363,
"learning_rate": 2.312836349460467e-06,
"loss": 0.153,
"step": 958,
"step_loss": 0.1431863158941269
},
{
"epoch": 2.82,
"grad_norm": 1.0285671477663938,
"kl": 0.2906627655029297,
"learning_rate": 2.3087463188781408e-06,
"loss": 0.1559,
"step": 959,
"step_loss": 0.12965397536754608
},
{
"epoch": 2.82,
"grad_norm": 0.9929048693237944,
"kl": 0.4464694559574127,
"learning_rate": 2.3046578041190403e-06,
"loss": 0.1486,
"step": 960,
"step_loss": 0.15477749705314636
},
{
"epoch": 2.83,
"grad_norm": 1.0822745094493649,
"kl": 0.4634704291820526,
"learning_rate": 2.3005708192282984e-06,
"loss": 0.1604,
"step": 961,
"step_loss": 0.17266973853111267
},
{
"epoch": 2.83,
"grad_norm": 1.0941722735914765,
"kl": 0.47234082221984863,
"learning_rate": 2.2964853782457887e-06,
"loss": 0.149,
"step": 962,
"step_loss": 0.14461389183998108
},
{
"epoch": 2.83,
"grad_norm": 1.0574454523000933,
"kl": 0.39766812324523926,
"learning_rate": 2.2924014952060843e-06,
"loss": 0.1603,
"step": 963,
"step_loss": 0.14928704500198364
},
{
"epoch": 2.83,
"grad_norm": 0.9067579687361036,
"kl": 0.4438409209251404,
"learning_rate": 2.288319184138403e-06,
"loss": 0.1408,
"step": 964,
"step_loss": 0.14300301671028137
},
{
"epoch": 2.84,
"grad_norm": 1.0342452737185248,
"kl": 0.3799823820590973,
"learning_rate": 2.2842384590665644e-06,
"loss": 0.155,
"step": 965,
"step_loss": 0.15365462005138397
},
{
"epoch": 2.84,
"grad_norm": 1.0444298271978016,
"kl": 0.4924103617668152,
"learning_rate": 2.280159334008941e-06,
"loss": 0.1507,
"step": 966,
"step_loss": 0.15880931913852692
},
{
"epoch": 2.84,
"grad_norm": 0.9599603942062377,
"kl": 0.41578635573387146,
"learning_rate": 2.2760818229784065e-06,
"loss": 0.1504,
"step": 967,
"step_loss": 0.1449252814054489
},
{
"epoch": 2.85,
"grad_norm": 1.0134939283764037,
"kl": 0.46528518199920654,
"learning_rate": 2.2720059399822906e-06,
"loss": 0.1545,
"step": 968,
"step_loss": 0.1586332619190216
},
{
"epoch": 2.85,
"grad_norm": 1.0347690361891235,
"kl": 0.41343602538108826,
"learning_rate": 2.2679316990223314e-06,
"loss": 0.1578,
"step": 969,
"step_loss": 0.17001797258853912
},
{
"epoch": 2.85,
"grad_norm": 1.0055593394957056,
"kl": 0.45835837721824646,
"learning_rate": 2.263859114094625e-06,
"loss": 0.1463,
"step": 970,
"step_loss": 0.13045182824134827
},
{
"epoch": 2.85,
"grad_norm": 0.9567730457998137,
"kl": 0.5038785934448242,
"learning_rate": 2.259788199189579e-06,
"loss": 0.1515,
"step": 971,
"step_loss": 0.17427542805671692
},
{
"epoch": 2.86,
"grad_norm": 1.0010068414341897,
"kl": 0.4056503474712372,
"learning_rate": 2.255718968291864e-06,
"loss": 0.157,
"step": 972,
"step_loss": 0.15841376781463623
},
{
"epoch": 2.86,
"grad_norm": 1.0107117284340932,
"kl": 0.45705166459083557,
"learning_rate": 2.251651435380364e-06,
"loss": 0.1571,
"step": 973,
"step_loss": 0.14534001052379608
},
{
"epoch": 2.86,
"grad_norm": 0.990000349150702,
"kl": 0.5228754878044128,
"learning_rate": 2.2475856144281345e-06,
"loss": 0.1379,
"step": 974,
"step_loss": 0.16290828585624695
},
{
"epoch": 2.87,
"grad_norm": 1.0008871196700513,
"kl": 0.3806186020374298,
"learning_rate": 2.2435215194023453e-06,
"loss": 0.1525,
"step": 975,
"step_loss": 0.1635514348745346
},
{
"epoch": 2.87,
"grad_norm": 1.0960477399243882,
"kl": 0.45375317335128784,
"learning_rate": 2.239459164264238e-06,
"loss": 0.1535,
"step": 976,
"step_loss": 0.18511676788330078
},
{
"epoch": 2.87,
"grad_norm": 1.0218003843637917,
"kl": 0.37781763076782227,
"learning_rate": 2.2353985629690793e-06,
"loss": 0.1475,
"step": 977,
"step_loss": 0.12731696665287018
},
{
"epoch": 2.88,
"grad_norm": 0.9017717274035716,
"kl": 0.3964046239852905,
"learning_rate": 2.231339729466111e-06,
"loss": 0.143,
"step": 978,
"step_loss": 0.13375751674175262
},
{
"epoch": 2.88,
"grad_norm": 0.9619019793221093,
"kl": 0.36923855543136597,
"learning_rate": 2.2272826776984985e-06,
"loss": 0.1486,
"step": 979,
"step_loss": 0.15310907363891602
},
{
"epoch": 2.88,
"grad_norm": 1.0027381543220892,
"kl": 0.45206311345100403,
"learning_rate": 2.223227421603289e-06,
"loss": 0.1508,
"step": 980,
"step_loss": 0.17637795209884644
},
{
"epoch": 2.88,
"grad_norm": 1.047855188766988,
"kl": 0.44840162992477417,
"learning_rate": 2.2191739751113624e-06,
"loss": 0.1611,
"step": 981,
"step_loss": 0.1478584259748459
},
{
"epoch": 2.89,
"grad_norm": 1.0127988521429687,
"kl": 0.5084017515182495,
"learning_rate": 2.2151223521473803e-06,
"loss": 0.1474,
"step": 982,
"step_loss": 0.14254876971244812
},
{
"epoch": 2.89,
"grad_norm": 0.9277534723825935,
"kl": 0.4267195463180542,
"learning_rate": 2.2110725666297395e-06,
"loss": 0.1441,
"step": 983,
"step_loss": 0.13901741802692413
},
{
"epoch": 2.89,
"grad_norm": 1.0036892192485454,
"kl": 0.3984602987766266,
"learning_rate": 2.2070246324705253e-06,
"loss": 0.143,
"step": 984,
"step_loss": 0.1377902328968048
},
{
"epoch": 2.9,
"grad_norm": 0.9646755179948415,
"kl": 0.6093090772628784,
"learning_rate": 2.2029785635754646e-06,
"loss": 0.1476,
"step": 985,
"step_loss": 0.17706118524074554
},
{
"epoch": 2.9,
"grad_norm": 0.9905850844659927,
"kl": 0.4276701807975769,
"learning_rate": 2.1989343738438755e-06,
"loss": 0.1525,
"step": 986,
"step_loss": 0.17314979434013367
},
{
"epoch": 2.9,
"grad_norm": 1.1708291027675957,
"kl": 0.40725067257881165,
"learning_rate": 2.1948920771686196e-06,
"loss": 0.1567,
"step": 987,
"step_loss": 0.16095474362373352
},
{
"epoch": 2.9,
"grad_norm": 1.0739387979713282,
"kl": 0.39202889800071716,
"learning_rate": 2.1908516874360558e-06,
"loss": 0.1485,
"step": 988,
"step_loss": 0.13703203201293945
},
{
"epoch": 2.91,
"grad_norm": 0.9536023429872225,
"kl": 0.4466555714607239,
"learning_rate": 2.1868132185259933e-06,
"loss": 0.148,
"step": 989,
"step_loss": 0.16553649306297302
},
{
"epoch": 2.91,
"grad_norm": 0.962874004228303,
"kl": 0.36752018332481384,
"learning_rate": 2.1827766843116427e-06,
"loss": 0.1486,
"step": 990,
"step_loss": 0.1441442370414734
},
{
"epoch": 2.91,
"grad_norm": 1.0075217426405563,
"kl": 0.4437835216522217,
"learning_rate": 2.1787420986595664e-06,
"loss": 0.1485,
"step": 991,
"step_loss": 0.15905022621154785
},
{
"epoch": 2.92,
"grad_norm": 1.016123385289782,
"kl": 0.4468748867511749,
"learning_rate": 2.1747094754296353e-06,
"loss": 0.1452,
"step": 992,
"step_loss": 0.12368491291999817
},
{
"epoch": 2.92,
"grad_norm": 0.9569243857996387,
"kl": 0.4261481463909149,
"learning_rate": 2.170678828474978e-06,
"loss": 0.1471,
"step": 993,
"step_loss": 0.14896030724048615
},
{
"epoch": 2.92,
"grad_norm": 1.0626260878976141,
"kl": 0.38104549050331116,
"learning_rate": 2.1666501716419342e-06,
"loss": 0.1563,
"step": 994,
"step_loss": 0.15966196358203888
},
{
"epoch": 2.93,
"grad_norm": 0.9361293787090408,
"kl": 0.4901023805141449,
"learning_rate": 2.1626235187700057e-06,
"loss": 0.1462,
"step": 995,
"step_loss": 0.18366771936416626
},
{
"epoch": 2.93,
"grad_norm": 0.950338934995153,
"kl": 0.4179726541042328,
"learning_rate": 2.158598883691812e-06,
"loss": 0.1494,
"step": 996,
"step_loss": 0.138540118932724
},
{
"epoch": 2.93,
"grad_norm": 1.0489853611443787,
"kl": 0.506919801235199,
"learning_rate": 2.15457628023304e-06,
"loss": 0.1668,
"step": 997,
"step_loss": 0.21282246708869934
},
{
"epoch": 2.93,
"grad_norm": 1.04818521184514,
"kl": 0.5396957993507385,
"learning_rate": 2.1505557222123972e-06,
"loss": 0.1539,
"step": 998,
"step_loss": 0.1591482013463974
},
{
"epoch": 2.94,
"grad_norm": 1.0616666386652713,
"kl": 0.4631801247596741,
"learning_rate": 2.1465372234415673e-06,
"loss": 0.1555,
"step": 999,
"step_loss": 0.14960813522338867
},
{
"epoch": 2.94,
"grad_norm": 1.035119268368553,
"kl": 0.45855218172073364,
"learning_rate": 2.1425207977251544e-06,
"loss": 0.1521,
"step": 1000,
"step_loss": 0.1509908139705658
},
{
"epoch": 2.94,
"grad_norm": 0.996467609929929,
"kl": 0.3887898921966553,
"learning_rate": 2.1385064588606463e-06,
"loss": 0.1549,
"step": 1001,
"step_loss": 0.13089656829833984
},
{
"epoch": 2.95,
"grad_norm": 0.9742558665726649,
"kl": 0.40123283863067627,
"learning_rate": 2.1344942206383586e-06,
"loss": 0.1459,
"step": 1002,
"step_loss": 0.13350878655910492
},
{
"epoch": 2.95,
"grad_norm": 0.9488404858578288,
"kl": 0.382538765668869,
"learning_rate": 2.130484096841393e-06,
"loss": 0.1439,
"step": 1003,
"step_loss": 0.14286507666110992
},
{
"epoch": 2.95,
"grad_norm": 0.9513353524533048,
"kl": 0.42083343863487244,
"learning_rate": 2.1264761012455865e-06,
"loss": 0.142,
"step": 1004,
"step_loss": 0.14722788333892822
},
{
"epoch": 2.95,
"grad_norm": 1.0141753634991808,
"kl": 0.4633311629295349,
"learning_rate": 2.122470247619464e-06,
"loss": 0.1623,
"step": 1005,
"step_loss": 0.1737322211265564
},
{
"epoch": 2.96,
"grad_norm": 1.028458994383184,
"kl": 0.5300709009170532,
"learning_rate": 2.118466549724194e-06,
"loss": 0.1536,
"step": 1006,
"step_loss": 0.16598369181156158
},
{
"epoch": 2.96,
"grad_norm": 1.029391738602643,
"kl": 0.43173086643218994,
"learning_rate": 2.1144650213135397e-06,
"loss": 0.1624,
"step": 1007,
"step_loss": 0.14931721985340118
},
{
"epoch": 2.96,
"grad_norm": 0.9545265742649662,
"kl": 0.4387211501598358,
"learning_rate": 2.11046567613381e-06,
"loss": 0.1437,
"step": 1008,
"step_loss": 0.16143842041492462
},
{
"epoch": 2.97,
"grad_norm": 1.0152085404083042,
"kl": 0.46170535683631897,
"learning_rate": 2.106468527923814e-06,
"loss": 0.1575,
"step": 1009,
"step_loss": 0.14393383264541626
},
{
"epoch": 2.97,
"grad_norm": 0.9507669139022603,
"kl": 0.450935959815979,
"learning_rate": 2.1024735904148152e-06,
"loss": 0.1556,
"step": 1010,
"step_loss": 0.14951980113983154
},
{
"epoch": 2.97,
"grad_norm": 0.9775946084017444,
"kl": 0.4422980546951294,
"learning_rate": 2.098480877330481e-06,
"loss": 0.1544,
"step": 1011,
"step_loss": 0.14084143936634064
},
{
"epoch": 2.98,
"grad_norm": 0.9641237236379501,
"kl": 0.4498288035392761,
"learning_rate": 2.094490402386838e-06,
"loss": 0.1566,
"step": 1012,
"step_loss": 0.13751116394996643
},
{
"epoch": 2.98,
"grad_norm": 1.0126537288710316,
"kl": 0.39250999689102173,
"learning_rate": 2.0905021792922235e-06,
"loss": 0.1559,
"step": 1013,
"step_loss": 0.14081251621246338
},
{
"epoch": 2.98,
"grad_norm": 1.0449214471833967,
"kl": 0.5089797377586365,
"learning_rate": 2.0865162217472416e-06,
"loss": 0.1596,
"step": 1014,
"step_loss": 0.16164351999759674
},
{
"epoch": 2.98,
"grad_norm": 0.9832128900915236,
"kl": 0.39397111535072327,
"learning_rate": 2.08253254344471e-06,
"loss": 0.1489,
"step": 1015,
"step_loss": 0.13195285201072693
},
{
"epoch": 2.99,
"grad_norm": 1.081208132886941,
"kl": 0.38181591033935547,
"learning_rate": 2.0785511580696206e-06,
"loss": 0.163,
"step": 1016,
"step_loss": 0.14687636494636536
},
{
"epoch": 2.99,
"grad_norm": 0.9836684370582969,
"kl": 0.5204348564147949,
"learning_rate": 2.0745720792990836e-06,
"loss": 0.1498,
"step": 1017,
"step_loss": 0.15303507447242737
},
{
"epoch": 2.99,
"grad_norm": 1.0707243384591267,
"kl": 0.37323451042175293,
"learning_rate": 2.070595320802291e-06,
"loss": 0.1536,
"step": 1018,
"step_loss": 0.1562386006116867
},
{
"epoch": 3.0,
"grad_norm": 0.9179992183742228,
"kl": 0.4068221151828766,
"learning_rate": 2.0666208962404593e-06,
"loss": 0.1383,
"step": 1019,
"step_loss": 0.1486799120903015
},
{
"epoch": 3.0,
"grad_norm": 0.9834517584761968,
"kl": 0.4073179364204407,
"learning_rate": 2.062648819266789e-06,
"loss": 0.1498,
"step": 1020,
"step_loss": 0.15172114968299866
},
{
"epoch": 3.0,
"eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_loss": 1.7941198348999023,
"eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_runtime": 14.5917,
"eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_samples_per_second": 6.853,
"eval_sft_Qwen2.5-7B-Instruct_helpful_sneaky_eval100.json_steps_per_second": 0.891,
"step": 1020
}
],
"logging_steps": 1.0,
"max_steps": 1700,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 100.0,
"total_flos": 87053826223104.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}