| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 183, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0547945205479452, | |
| "grad_norm": 513.996826171875, | |
| "learning_rate": 4.090909090909091e-07, | |
| "logits/chosen": 2.990995407104492, | |
| "logits/rejected": 3.0081257820129395, | |
| "logps/chosen": -298.52886962890625, | |
| "logps/rejected": -202.96295166015625, | |
| "loss": 0.8843, | |
| "rewards/accuracies": 0.526562511920929, | |
| "rewards/chosen": 0.010423189960420132, | |
| "rewards/margins": 0.05534166842699051, | |
| "rewards/rejected": -0.0449184887111187, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.1095890410958904, | |
| "grad_norm": 461.9533996582031, | |
| "learning_rate": 8.636363636363637e-07, | |
| "logits/chosen": 2.934217929840088, | |
| "logits/rejected": 2.919574737548828, | |
| "logps/chosen": -276.70391845703125, | |
| "logps/rejected": -200.52728271484375, | |
| "loss": 0.6858, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 0.7733574509620667, | |
| "rewards/margins": 0.9247980117797852, | |
| "rewards/rejected": -0.15144045650959015, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.1643835616438356, | |
| "grad_norm": 150.57086181640625, | |
| "learning_rate": 1.3181818181818182e-06, | |
| "logits/chosen": 3.0546913146972656, | |
| "logits/rejected": 3.0510308742523193, | |
| "logps/chosen": -289.57977294921875, | |
| "logps/rejected": -218.24765014648438, | |
| "loss": 0.2839, | |
| "rewards/accuracies": 0.8890625238418579, | |
| "rewards/chosen": 2.8807666301727295, | |
| "rewards/margins": 4.351069450378418, | |
| "rewards/rejected": -1.4703023433685303, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.2191780821917808, | |
| "grad_norm": 151.46690368652344, | |
| "learning_rate": 1.7727272727272729e-06, | |
| "logits/chosen": 3.088348150253296, | |
| "logits/rejected": 3.1125292778015137, | |
| "logps/chosen": -275.5274963378906, | |
| "logps/rejected": -214.15737915039062, | |
| "loss": 0.1502, | |
| "rewards/accuracies": 0.9390624761581421, | |
| "rewards/chosen": 5.216189384460449, | |
| "rewards/margins": 8.827147483825684, | |
| "rewards/rejected": -3.610957384109497, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.273972602739726, | |
| "grad_norm": 130.53453063964844, | |
| "learning_rate": 2.2272727272727274e-06, | |
| "logits/chosen": 2.9992308616638184, | |
| "logits/rejected": 3.075270175933838, | |
| "logps/chosen": -259.50836181640625, | |
| "logps/rejected": -203.08811950683594, | |
| "loss": 0.1281, | |
| "rewards/accuracies": 0.957812488079071, | |
| "rewards/chosen": 6.97296667098999, | |
| "rewards/margins": 12.8120698928833, | |
| "rewards/rejected": -5.839103698730469, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.3287671232876712, | |
| "grad_norm": 110.53746032714844, | |
| "learning_rate": 2.6818181818181822e-06, | |
| "logits/chosen": 3.088064670562744, | |
| "logits/rejected": 2.986386775970459, | |
| "logps/chosen": -325.0431213378906, | |
| "logps/rejected": -224.979736328125, | |
| "loss": 0.1444, | |
| "rewards/accuracies": 0.964062511920929, | |
| "rewards/chosen": 10.117586135864258, | |
| "rewards/margins": 18.425701141357422, | |
| "rewards/rejected": -8.30811595916748, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.3835616438356164, | |
| "grad_norm": 95.7762222290039, | |
| "learning_rate": 3.1363636363636367e-06, | |
| "logits/chosen": 3.040494918823242, | |
| "logits/rejected": 3.022307872772217, | |
| "logps/chosen": -273.9972839355469, | |
| "logps/rejected": -211.2712860107422, | |
| "loss": 0.1443, | |
| "rewards/accuracies": 0.9671875238418579, | |
| "rewards/chosen": 9.627126693725586, | |
| "rewards/margins": 20.590023040771484, | |
| "rewards/rejected": -10.962896347045898, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.4383561643835616, | |
| "grad_norm": 179.24440002441406, | |
| "learning_rate": 3.590909090909091e-06, | |
| "logits/chosen": 3.0298266410827637, | |
| "logits/rejected": 3.0730605125427246, | |
| "logps/chosen": -280.2432861328125, | |
| "logps/rejected": -214.884033203125, | |
| "loss": 0.142, | |
| "rewards/accuracies": 0.979687511920929, | |
| "rewards/chosen": 10.719534873962402, | |
| "rewards/margins": 23.82217788696289, | |
| "rewards/rejected": -13.102640151977539, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.4931506849315068, | |
| "grad_norm": 2.0804860591888428, | |
| "learning_rate": 4.045454545454546e-06, | |
| "logits/chosen": 3.1072518825531006, | |
| "logits/rejected": 3.067288875579834, | |
| "logps/chosen": -294.1097106933594, | |
| "logps/rejected": -219.38949584960938, | |
| "loss": 0.1262, | |
| "rewards/accuracies": 0.981249988079071, | |
| "rewards/chosen": 12.069157600402832, | |
| "rewards/margins": 29.454524993896484, | |
| "rewards/rejected": -17.385366439819336, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.547945205479452, | |
| "grad_norm": 134.90240478515625, | |
| "learning_rate": 4.5e-06, | |
| "logits/chosen": 2.9695353507995605, | |
| "logits/rejected": 2.9900407791137695, | |
| "logps/chosen": -270.2259826660156, | |
| "logps/rejected": -210.32302856445312, | |
| "loss": 0.1125, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": 8.642024040222168, | |
| "rewards/margins": 26.856210708618164, | |
| "rewards/rejected": -18.21418571472168, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.6027397260273972, | |
| "grad_norm": 237.83163452148438, | |
| "learning_rate": 4.954545454545455e-06, | |
| "logits/chosen": 3.001239538192749, | |
| "logits/rejected": 2.9165444374084473, | |
| "logps/chosen": -261.63848876953125, | |
| "logps/rejected": -217.56314086914062, | |
| "loss": 0.1337, | |
| "rewards/accuracies": 0.9765625, | |
| "rewards/chosen": 4.362582206726074, | |
| "rewards/margins": 21.842912673950195, | |
| "rewards/rejected": -17.480329513549805, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.6575342465753424, | |
| "grad_norm": 124.83686065673828, | |
| "learning_rate": 4.998976350571773e-06, | |
| "logits/chosen": 3.0631394386291504, | |
| "logits/rejected": 3.0034124851226807, | |
| "logps/chosen": -296.0355224609375, | |
| "logps/rejected": -219.4881134033203, | |
| "loss": 0.176, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 3.6324734687805176, | |
| "rewards/margins": 20.854042053222656, | |
| "rewards/rejected": -17.221569061279297, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.7123287671232876, | |
| "grad_norm": 110.8822250366211, | |
| "learning_rate": 4.995438885558294e-06, | |
| "logits/chosen": 3.0476179122924805, | |
| "logits/rejected": 2.9690792560577393, | |
| "logps/chosen": -292.52276611328125, | |
| "logps/rejected": -210.3925018310547, | |
| "loss": 0.2762, | |
| "rewards/accuracies": 0.9781249761581421, | |
| "rewards/chosen": 5.373471736907959, | |
| "rewards/margins": 26.076580047607422, | |
| "rewards/rejected": -20.703105926513672, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.7671232876712328, | |
| "grad_norm": 72.18496704101562, | |
| "learning_rate": 4.989378542821969e-06, | |
| "logits/chosen": 3.0710926055908203, | |
| "logits/rejected": 3.0577285289764404, | |
| "logps/chosen": -284.55230712890625, | |
| "logps/rejected": -230.9425506591797, | |
| "loss": 0.2372, | |
| "rewards/accuracies": 0.973437488079071, | |
| "rewards/chosen": 5.454714775085449, | |
| "rewards/margins": 30.868602752685547, | |
| "rewards/rejected": -25.413890838623047, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.821917808219178, | |
| "grad_norm": 104.61406707763672, | |
| "learning_rate": 4.9808014493426124e-06, | |
| "logits/chosen": 3.053307294845581, | |
| "logits/rejected": 3.0027899742126465, | |
| "logps/chosen": -282.54864501953125, | |
| "logps/rejected": -236.41592407226562, | |
| "loss": 0.6492, | |
| "rewards/accuracies": 0.9546874761581421, | |
| "rewards/chosen": 0.29179587960243225, | |
| "rewards/margins": 30.52816390991211, | |
| "rewards/rejected": -30.23636817932129, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.8767123287671232, | |
| "grad_norm": 114.0179443359375, | |
| "learning_rate": 4.9697162765239595e-06, | |
| "logits/chosen": 3.0813591480255127, | |
| "logits/rejected": 3.093292713165283, | |
| "logps/chosen": -265.5400390625, | |
| "logps/rejected": -233.4171142578125, | |
| "loss": 0.1703, | |
| "rewards/accuracies": 0.981249988079071, | |
| "rewards/chosen": 2.905339002609253, | |
| "rewards/margins": 36.841583251953125, | |
| "rewards/rejected": -33.936241149902344, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.9315068493150684, | |
| "grad_norm": 132.4547576904297, | |
| "learning_rate": 4.9561342314269055e-06, | |
| "logits/chosen": 3.124277114868164, | |
| "logits/rejected": 3.048166036605835, | |
| "logps/chosen": -262.30194091796875, | |
| "logps/rejected": -232.39297485351562, | |
| "loss": 0.4256, | |
| "rewards/accuracies": 0.9515625238418579, | |
| "rewards/chosen": -1.3073980808258057, | |
| "rewards/margins": 30.139415740966797, | |
| "rewards/rejected": -31.446813583374023, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.9863013698630136, | |
| "grad_norm": 1.0581492185592651, | |
| "learning_rate": 4.940069045439226e-06, | |
| "logits/chosen": 3.164407968521118, | |
| "logits/rejected": 3.164742946624756, | |
| "logps/chosen": -274.58673095703125, | |
| "logps/rejected": -232.9495849609375, | |
| "loss": 0.3806, | |
| "rewards/accuracies": 0.9703124761581421, | |
| "rewards/chosen": 0.4807693064212799, | |
| "rewards/margins": 33.424041748046875, | |
| "rewards/rejected": -32.943275451660156, | |
| "step": 180 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1098, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 6, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |