alquistcoder-4B-secureLLM / trainer_state.json
kobzaond's picture
Initial upload of alquistcoder_F1_MAIN_DPO model files
33a5194
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 183,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0547945205479452,
"grad_norm": 513.996826171875,
"learning_rate": 4.090909090909091e-07,
"logits/chosen": 2.990995407104492,
"logits/rejected": 3.0081257820129395,
"logps/chosen": -298.52886962890625,
"logps/rejected": -202.96295166015625,
"loss": 0.8843,
"rewards/accuracies": 0.526562511920929,
"rewards/chosen": 0.010423189960420132,
"rewards/margins": 0.05534166842699051,
"rewards/rejected": -0.0449184887111187,
"step": 10
},
{
"epoch": 0.1095890410958904,
"grad_norm": 461.9533996582031,
"learning_rate": 8.636363636363637e-07,
"logits/chosen": 2.934217929840088,
"logits/rejected": 2.919574737548828,
"logps/chosen": -276.70391845703125,
"logps/rejected": -200.52728271484375,
"loss": 0.6858,
"rewards/accuracies": 0.65625,
"rewards/chosen": 0.7733574509620667,
"rewards/margins": 0.9247980117797852,
"rewards/rejected": -0.15144045650959015,
"step": 20
},
{
"epoch": 0.1643835616438356,
"grad_norm": 150.57086181640625,
"learning_rate": 1.3181818181818182e-06,
"logits/chosen": 3.0546913146972656,
"logits/rejected": 3.0510308742523193,
"logps/chosen": -289.57977294921875,
"logps/rejected": -218.24765014648438,
"loss": 0.2839,
"rewards/accuracies": 0.8890625238418579,
"rewards/chosen": 2.8807666301727295,
"rewards/margins": 4.351069450378418,
"rewards/rejected": -1.4703023433685303,
"step": 30
},
{
"epoch": 0.2191780821917808,
"grad_norm": 151.46690368652344,
"learning_rate": 1.7727272727272729e-06,
"logits/chosen": 3.088348150253296,
"logits/rejected": 3.1125292778015137,
"logps/chosen": -275.5274963378906,
"logps/rejected": -214.15737915039062,
"loss": 0.1502,
"rewards/accuracies": 0.9390624761581421,
"rewards/chosen": 5.216189384460449,
"rewards/margins": 8.827147483825684,
"rewards/rejected": -3.610957384109497,
"step": 40
},
{
"epoch": 0.273972602739726,
"grad_norm": 130.53453063964844,
"learning_rate": 2.2272727272727274e-06,
"logits/chosen": 2.9992308616638184,
"logits/rejected": 3.075270175933838,
"logps/chosen": -259.50836181640625,
"logps/rejected": -203.08811950683594,
"loss": 0.1281,
"rewards/accuracies": 0.957812488079071,
"rewards/chosen": 6.97296667098999,
"rewards/margins": 12.8120698928833,
"rewards/rejected": -5.839103698730469,
"step": 50
},
{
"epoch": 0.3287671232876712,
"grad_norm": 110.53746032714844,
"learning_rate": 2.6818181818181822e-06,
"logits/chosen": 3.088064670562744,
"logits/rejected": 2.986386775970459,
"logps/chosen": -325.0431213378906,
"logps/rejected": -224.979736328125,
"loss": 0.1444,
"rewards/accuracies": 0.964062511920929,
"rewards/chosen": 10.117586135864258,
"rewards/margins": 18.425701141357422,
"rewards/rejected": -8.30811595916748,
"step": 60
},
{
"epoch": 0.3835616438356164,
"grad_norm": 95.7762222290039,
"learning_rate": 3.1363636363636367e-06,
"logits/chosen": 3.040494918823242,
"logits/rejected": 3.022307872772217,
"logps/chosen": -273.9972839355469,
"logps/rejected": -211.2712860107422,
"loss": 0.1443,
"rewards/accuracies": 0.9671875238418579,
"rewards/chosen": 9.627126693725586,
"rewards/margins": 20.590023040771484,
"rewards/rejected": -10.962896347045898,
"step": 70
},
{
"epoch": 0.4383561643835616,
"grad_norm": 179.24440002441406,
"learning_rate": 3.590909090909091e-06,
"logits/chosen": 3.0298266410827637,
"logits/rejected": 3.0730605125427246,
"logps/chosen": -280.2432861328125,
"logps/rejected": -214.884033203125,
"loss": 0.142,
"rewards/accuracies": 0.979687511920929,
"rewards/chosen": 10.719534873962402,
"rewards/margins": 23.82217788696289,
"rewards/rejected": -13.102640151977539,
"step": 80
},
{
"epoch": 0.4931506849315068,
"grad_norm": 2.0804860591888428,
"learning_rate": 4.045454545454546e-06,
"logits/chosen": 3.1072518825531006,
"logits/rejected": 3.067288875579834,
"logps/chosen": -294.1097106933594,
"logps/rejected": -219.38949584960938,
"loss": 0.1262,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": 12.069157600402832,
"rewards/margins": 29.454524993896484,
"rewards/rejected": -17.385366439819336,
"step": 90
},
{
"epoch": 0.547945205479452,
"grad_norm": 134.90240478515625,
"learning_rate": 4.5e-06,
"logits/chosen": 2.9695353507995605,
"logits/rejected": 2.9900407791137695,
"logps/chosen": -270.2259826660156,
"logps/rejected": -210.32302856445312,
"loss": 0.1125,
"rewards/accuracies": 0.984375,
"rewards/chosen": 8.642024040222168,
"rewards/margins": 26.856210708618164,
"rewards/rejected": -18.21418571472168,
"step": 100
},
{
"epoch": 0.6027397260273972,
"grad_norm": 237.83163452148438,
"learning_rate": 4.954545454545455e-06,
"logits/chosen": 3.001239538192749,
"logits/rejected": 2.9165444374084473,
"logps/chosen": -261.63848876953125,
"logps/rejected": -217.56314086914062,
"loss": 0.1337,
"rewards/accuracies": 0.9765625,
"rewards/chosen": 4.362582206726074,
"rewards/margins": 21.842912673950195,
"rewards/rejected": -17.480329513549805,
"step": 110
},
{
"epoch": 0.6575342465753424,
"grad_norm": 124.83686065673828,
"learning_rate": 4.998976350571773e-06,
"logits/chosen": 3.0631394386291504,
"logits/rejected": 3.0034124851226807,
"logps/chosen": -296.0355224609375,
"logps/rejected": -219.4881134033203,
"loss": 0.176,
"rewards/accuracies": 0.96875,
"rewards/chosen": 3.6324734687805176,
"rewards/margins": 20.854042053222656,
"rewards/rejected": -17.221569061279297,
"step": 120
},
{
"epoch": 0.7123287671232876,
"grad_norm": 110.8822250366211,
"learning_rate": 4.995438885558294e-06,
"logits/chosen": 3.0476179122924805,
"logits/rejected": 2.9690792560577393,
"logps/chosen": -292.52276611328125,
"logps/rejected": -210.3925018310547,
"loss": 0.2762,
"rewards/accuracies": 0.9781249761581421,
"rewards/chosen": 5.373471736907959,
"rewards/margins": 26.076580047607422,
"rewards/rejected": -20.703105926513672,
"step": 130
},
{
"epoch": 0.7671232876712328,
"grad_norm": 72.18496704101562,
"learning_rate": 4.989378542821969e-06,
"logits/chosen": 3.0710926055908203,
"logits/rejected": 3.0577285289764404,
"logps/chosen": -284.55230712890625,
"logps/rejected": -230.9425506591797,
"loss": 0.2372,
"rewards/accuracies": 0.973437488079071,
"rewards/chosen": 5.454714775085449,
"rewards/margins": 30.868602752685547,
"rewards/rejected": -25.413890838623047,
"step": 140
},
{
"epoch": 0.821917808219178,
"grad_norm": 104.61406707763672,
"learning_rate": 4.9808014493426124e-06,
"logits/chosen": 3.053307294845581,
"logits/rejected": 3.0027899742126465,
"logps/chosen": -282.54864501953125,
"logps/rejected": -236.41592407226562,
"loss": 0.6492,
"rewards/accuracies": 0.9546874761581421,
"rewards/chosen": 0.29179587960243225,
"rewards/margins": 30.52816390991211,
"rewards/rejected": -30.23636817932129,
"step": 150
},
{
"epoch": 0.8767123287671232,
"grad_norm": 114.0179443359375,
"learning_rate": 4.9697162765239595e-06,
"logits/chosen": 3.0813591480255127,
"logits/rejected": 3.093292713165283,
"logps/chosen": -265.5400390625,
"logps/rejected": -233.4171142578125,
"loss": 0.1703,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": 2.905339002609253,
"rewards/margins": 36.841583251953125,
"rewards/rejected": -33.936241149902344,
"step": 160
},
{
"epoch": 0.9315068493150684,
"grad_norm": 132.4547576904297,
"learning_rate": 4.9561342314269055e-06,
"logits/chosen": 3.124277114868164,
"logits/rejected": 3.048166036605835,
"logps/chosen": -262.30194091796875,
"logps/rejected": -232.39297485351562,
"loss": 0.4256,
"rewards/accuracies": 0.9515625238418579,
"rewards/chosen": -1.3073980808258057,
"rewards/margins": 30.139415740966797,
"rewards/rejected": -31.446813583374023,
"step": 170
},
{
"epoch": 0.9863013698630136,
"grad_norm": 1.0581492185592651,
"learning_rate": 4.940069045439226e-06,
"logits/chosen": 3.164407968521118,
"logits/rejected": 3.164742946624756,
"logps/chosen": -274.58673095703125,
"logps/rejected": -232.9495849609375,
"loss": 0.3806,
"rewards/accuracies": 0.9703124761581421,
"rewards/chosen": 0.4807693064212799,
"rewards/margins": 33.424041748046875,
"rewards/rejected": -32.943275451660156,
"step": 180
}
],
"logging_steps": 10,
"max_steps": 1098,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}