{ "best_global_step": 196, "best_metric": 1.4676804542541504, "best_model_checkpoint": "./models/codet5-sequenced/checkpoint-196", "epoch": 13.067796610169491, "eval_steps": 7, "global_step": 196, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06779661016949153, "grad_norm": 35.061439514160156, "learning_rate": 0.0, "loss": 6.9968, "step": 1 }, { "epoch": 0.4745762711864407, "eval_loss": 3.7553770542144775, "eval_runtime": 31.376, "eval_samples_per_second": 4.143, "eval_steps_per_second": 0.223, "step": 7 }, { "epoch": 0.6779661016949152, "grad_norm": 2.471144676208496, "learning_rate": 0.00012857142857142855, "loss": 4.9711, "step": 10 }, { "epoch": 0.9491525423728814, "eval_loss": 2.576092481613159, "eval_runtime": 32.5287, "eval_samples_per_second": 3.996, "eval_steps_per_second": 0.215, "step": 14 }, { "epoch": 1.3389830508474576, "grad_norm": 0.970221757888794, "learning_rate": 0.0002714285714285714, "loss": 2.7955, "step": 20 }, { "epoch": 1.4067796610169492, "eval_loss": 2.0272090435028076, "eval_runtime": 32.8232, "eval_samples_per_second": 3.961, "eval_steps_per_second": 0.213, "step": 21 }, { "epoch": 1.8813559322033897, "eval_loss": 1.794872760772705, "eval_runtime": 32.1072, "eval_samples_per_second": 4.049, "eval_steps_per_second": 0.218, "step": 28 }, { "epoch": 2.0, "grad_norm": 0.253476619720459, "learning_rate": 0.00028823529411764703, "loss": 1.9943, "step": 30 }, { "epoch": 2.3389830508474576, "eval_loss": 1.6869820356369019, "eval_runtime": 33.6053, "eval_samples_per_second": 3.868, "eval_steps_per_second": 0.208, "step": 35 }, { "epoch": 2.6779661016949152, "grad_norm": 0.17588205635547638, "learning_rate": 0.00027352941176470583, "loss": 1.7476, "step": 40 }, { "epoch": 2.8135593220338984, "eval_loss": 1.633076548576355, "eval_runtime": 31.3432, "eval_samples_per_second": 4.148, "eval_steps_per_second": 0.223, "step": 42 }, { "epoch": 3.2711864406779663, "eval_loss": 1.595025658607483, "eval_runtime": 32.6382, "eval_samples_per_second": 3.983, "eval_steps_per_second": 0.214, "step": 49 }, { "epoch": 3.3389830508474576, "grad_norm": 0.1354854702949524, "learning_rate": 0.0002588235294117647, "loss": 1.6567, "step": 50 }, { "epoch": 3.7457627118644066, "eval_loss": 1.5682227611541748, "eval_runtime": 32.3796, "eval_samples_per_second": 4.015, "eval_steps_per_second": 0.216, "step": 56 }, { "epoch": 4.0, "grad_norm": 0.14623650908470154, "learning_rate": 0.0002441176470588235, "loss": 1.6035, "step": 60 }, { "epoch": 4.203389830508475, "eval_loss": 1.5545539855957031, "eval_runtime": 32.9074, "eval_samples_per_second": 3.95, "eval_steps_per_second": 0.213, "step": 63 }, { "epoch": 4.677966101694915, "grad_norm": 0.08996161818504333, "learning_rate": 0.0002294117647058823, "loss": 1.5737, "step": 70 }, { "epoch": 4.677966101694915, "eval_loss": 1.5401841402053833, "eval_runtime": 36.4472, "eval_samples_per_second": 3.567, "eval_steps_per_second": 0.192, "step": 70 }, { "epoch": 5.135593220338983, "eval_loss": 1.5304237604141235, "eval_runtime": 33.0897, "eval_samples_per_second": 3.929, "eval_steps_per_second": 0.212, "step": 77 }, { "epoch": 5.338983050847458, "grad_norm": 0.12378664314746857, "learning_rate": 0.00021470588235294116, "loss": 1.5575, "step": 80 }, { "epoch": 5.610169491525424, "eval_loss": 1.5213029384613037, "eval_runtime": 31.2672, "eval_samples_per_second": 4.158, "eval_steps_per_second": 0.224, "step": 84 }, { "epoch": 6.0, "grad_norm": 0.1171395406126976, "learning_rate": 0.00019999999999999998, "loss": 1.5386, "step": 90 }, { "epoch": 6.067796610169491, "eval_loss": 1.5130056142807007, "eval_runtime": 33.1459, "eval_samples_per_second": 3.922, "eval_steps_per_second": 0.211, "step": 91 }, { "epoch": 6.5423728813559325, "eval_loss": 1.5062706470489502, "eval_runtime": 32.4291, "eval_samples_per_second": 4.009, "eval_steps_per_second": 0.216, "step": 98 }, { "epoch": 6.677966101694915, "grad_norm": 0.06857075542211533, "learning_rate": 0.0001852941176470588, "loss": 1.5288, "step": 100 }, { "epoch": 7.0, "eval_loss": 1.5011202096939087, "eval_runtime": 32.7571, "eval_samples_per_second": 3.969, "eval_steps_per_second": 0.214, "step": 105 }, { "epoch": 7.338983050847458, "grad_norm": 0.07266195118427277, "learning_rate": 0.00017058823529411763, "loss": 1.5196, "step": 110 }, { "epoch": 7.47457627118644, "eval_loss": 1.49434232711792, "eval_runtime": 33.5153, "eval_samples_per_second": 3.879, "eval_steps_per_second": 0.209, "step": 112 }, { "epoch": 7.9491525423728815, "eval_loss": 1.493066430091858, "eval_runtime": 32.6462, "eval_samples_per_second": 3.982, "eval_steps_per_second": 0.214, "step": 119 }, { "epoch": 8.0, "grad_norm": 0.09790726006031036, "learning_rate": 0.00015588235294117646, "loss": 1.5098, "step": 120 }, { "epoch": 8.40677966101695, "eval_loss": 1.4890639781951904, "eval_runtime": 33.9906, "eval_samples_per_second": 3.825, "eval_steps_per_second": 0.206, "step": 126 }, { "epoch": 8.677966101694915, "grad_norm": 0.08605582267045975, "learning_rate": 0.00014117647058823528, "loss": 1.504, "step": 130 }, { "epoch": 8.88135593220339, "eval_loss": 1.4857922792434692, "eval_runtime": 33.6557, "eval_samples_per_second": 3.863, "eval_steps_per_second": 0.208, "step": 133 }, { "epoch": 9.338983050847457, "grad_norm": 0.07982663810253143, "learning_rate": 0.0001264705882352941, "loss": 1.501, "step": 140 }, { "epoch": 9.338983050847457, "eval_loss": 1.4824906587600708, "eval_runtime": 33.5066, "eval_samples_per_second": 3.88, "eval_steps_per_second": 0.209, "step": 140 }, { "epoch": 9.813559322033898, "eval_loss": 1.478973150253296, "eval_runtime": 33.5484, "eval_samples_per_second": 3.875, "eval_steps_per_second": 0.209, "step": 147 }, { "epoch": 10.0, "grad_norm": 0.07402677834033966, "learning_rate": 0.00011176470588235293, "loss": 1.4941, "step": 150 }, { "epoch": 10.271186440677965, "eval_loss": 1.476096749305725, "eval_runtime": 33.087, "eval_samples_per_second": 3.929, "eval_steps_per_second": 0.212, "step": 154 }, { "epoch": 10.677966101694915, "grad_norm": 0.0733569785952568, "learning_rate": 9.705882352941176e-05, "loss": 1.4894, "step": 160 }, { "epoch": 10.745762711864407, "eval_loss": 1.4757354259490967, "eval_runtime": 33.5743, "eval_samples_per_second": 3.872, "eval_steps_per_second": 0.208, "step": 161 }, { "epoch": 11.203389830508474, "eval_loss": 1.4727975130081177, "eval_runtime": 33.5918, "eval_samples_per_second": 3.87, "eval_steps_per_second": 0.208, "step": 168 }, { "epoch": 11.338983050847457, "grad_norm": 0.07810712605714798, "learning_rate": 8.23529411764706e-05, "loss": 1.4887, "step": 170 }, { "epoch": 11.677966101694915, "eval_loss": 1.4703751802444458, "eval_runtime": 33.6902, "eval_samples_per_second": 3.859, "eval_steps_per_second": 0.208, "step": 175 }, { "epoch": 12.0, "grad_norm": 0.0885886400938034, "learning_rate": 6.76470588235294e-05, "loss": 1.4844, "step": 180 }, { "epoch": 12.135593220338983, "eval_loss": 1.4694443941116333, "eval_runtime": 33.5918, "eval_samples_per_second": 3.87, "eval_steps_per_second": 0.208, "step": 182 }, { "epoch": 12.610169491525424, "eval_loss": 1.47074556350708, "eval_runtime": 33.0341, "eval_samples_per_second": 3.935, "eval_steps_per_second": 0.212, "step": 189 }, { "epoch": 12.677966101694915, "grad_norm": 0.07929002493619919, "learning_rate": 5.294117647058824e-05, "loss": 1.48, "step": 190 }, { "epoch": 13.067796610169491, "eval_loss": 1.4676804542541504, "eval_runtime": 33.3819, "eval_samples_per_second": 3.894, "eval_steps_per_second": 0.21, "step": 196 } ], "logging_steps": 10, "max_steps": 225, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 28, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 10, "early_stopping_threshold": 0.001 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2586720180633600.0, "train_batch_size": 20, "trial_name": null, "trial_params": null }