{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 3950, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.2658227848101266e-07, "loss": 0.6546, "step": 1 }, { "epoch": 0.63, "learning_rate": 3.1518987341772155e-05, "loss": 0.4879, "step": 250 }, { "epoch": 1.0, "eval_accuracy": 0.10857142857142857, "eval_f1": 0.07811852465810196, "eval_loss": 0.40317419171333313, "eval_precision": 0.17927631578947367, "eval_recall": 0.08782679738562092, "eval_runtime": 0.3221, "eval_samples_per_second": 543.295, "eval_steps_per_second": 68.3, "step": 395 }, { "epoch": 1.27, "learning_rate": 4.9894490513601396e-05, "loss": 0.4006, "step": 500 }, { "epoch": 1.9, "learning_rate": 4.878663257984936e-05, "loss": 0.3759, "step": 750 }, { "epoch": 2.0, "eval_accuracy": 0.17714285714285713, "eval_f1": 0.26697819927843125, "eval_loss": 0.37731701135635376, "eval_precision": 0.5218837535014006, "eval_recall": 0.2022453723084196, "eval_runtime": 0.286, "eval_samples_per_second": 611.901, "eval_steps_per_second": 76.925, "step": 790 }, { "epoch": 2.53, "learning_rate": 4.652248579391458e-05, "loss": 0.2948, "step": 1000 }, { "epoch": 3.0, "eval_accuracy": 0.24571428571428572, "eval_f1": 0.36607677500846103, "eval_loss": 0.39780309796333313, "eval_precision": 0.5472431077694235, "eval_recall": 0.3120779820710713, "eval_runtime": 0.3071, "eval_samples_per_second": 569.892, "eval_steps_per_second": 71.644, "step": 1185 }, { "epoch": 3.16, "learning_rate": 4.3212112295929505e-05, "loss": 0.2649, "step": 1250 }, { "epoch": 3.8, "learning_rate": 3.901643221443403e-05, "loss": 0.1864, "step": 1500 }, { "epoch": 4.0, "eval_accuracy": 0.2742857142857143, "eval_f1": 0.4266986709033833, "eval_loss": 0.4376567602157593, "eval_precision": 0.5016284584009042, "eval_recall": 0.38560897951242346, "eval_runtime": 0.2998, "eval_samples_per_second": 583.659, "eval_steps_per_second": 73.374, "step": 1580 }, { "epoch": 4.43, "learning_rate": 3.413940119927783e-05, "loss": 0.1384, "step": 1750 }, { "epoch": 5.0, "eval_accuracy": 0.2342857142857143, "eval_f1": 0.39448414058744574, "eval_loss": 0.5389729738235474, "eval_precision": 0.48297759387155675, "eval_recall": 0.37081969717487395, "eval_runtime": 0.3757, "eval_samples_per_second": 465.771, "eval_steps_per_second": 58.554, "step": 1975 }, { "epoch": 5.06, "learning_rate": 2.8818095960484663e-05, "loss": 0.111, "step": 2000 }, { "epoch": 5.7, "learning_rate": 2.3311189763638598e-05, "loss": 0.0735, "step": 2250 }, { "epoch": 6.0, "eval_accuracy": 0.24571428571428572, "eval_f1": 0.46370146148440794, "eval_loss": 0.5818756222724915, "eval_precision": 0.5038962453668336, "eval_recall": 0.4432562275600327, "eval_runtime": 0.2959, "eval_samples_per_second": 591.513, "eval_steps_per_second": 74.362, "step": 2370 }, { "epoch": 6.33, "learning_rate": 1.788637809796257e-05, "loss": 0.0593, "step": 2500 }, { "epoch": 6.96, "learning_rate": 1.2807365766276206e-05, "loss": 0.0417, "step": 2750 }, { "epoch": 7.0, "eval_accuracy": 0.2571428571428571, "eval_f1": 0.45537043929039545, "eval_loss": 0.6109325289726257, "eval_precision": 0.5327423959044721, "eval_recall": 0.41917933979084293, "eval_runtime": 0.2925, "eval_samples_per_second": 598.344, "eval_steps_per_second": 75.22, "step": 2765 }, { "epoch": 7.59, "learning_rate": 8.321047965682896e-06, "loss": 0.0247, "step": 3000 }, { "epoch": 8.0, "eval_accuracy": 0.26857142857142857, "eval_f1": 0.44079821839093025, "eval_loss": 0.6337146162986755, "eval_precision": 0.4776951058201059, "eval_recall": 0.42734534657817325, "eval_runtime": 0.3404, "eval_samples_per_second": 514.096, "eval_steps_per_second": 64.629, "step": 3160 }, { "epoch": 8.23, "learning_rate": 4.645508497778378e-06, "loss": 0.0209, "step": 3250 }, { "epoch": 8.86, "learning_rate": 1.9594185257981066e-06, "loss": 0.0159, "step": 3500 }, { "epoch": 9.0, "eval_accuracy": 0.2342857142857143, "eval_f1": 0.4276706487492863, "eval_loss": 0.6485006809234619, "eval_precision": 0.4811818562937489, "eval_recall": 0.40231404333907983, "eval_runtime": 0.3206, "eval_samples_per_second": 545.875, "eval_steps_per_second": 68.624, "step": 3555 }, { "epoch": 9.49, "learning_rate": 3.933512143182022e-07, "loss": 0.0147, "step": 3750 }, { "epoch": 10.0, "eval_accuracy": 0.2571428571428571, "eval_f1": 0.4350893581328566, "eval_loss": 0.6474436521530151, "eval_precision": 0.49936246351687524, "eval_recall": 0.40231404333907983, "eval_runtime": 0.3074, "eval_samples_per_second": 569.268, "eval_steps_per_second": 71.565, "step": 3950 }, { "epoch": 10.0, "step": 3950, "total_flos": 200561037492224.0, "train_loss": 0.0, "train_runtime": 0.0071, "train_samples_per_second": 4470373.653, "train_steps_per_second": 559504.941 }, { "epoch": 10.0, "eval_accuracy": 0.2571428571428571, "eval_f1": 0.4350893581328566, "eval_loss": 0.6474436521530151, "eval_precision": 0.49936246351687524, "eval_recall": 0.40231404333907983, "eval_runtime": 0.3645, "eval_samples_per_second": 480.122, "eval_steps_per_second": 60.358, "step": 3950 } ], "max_steps": 3950, "num_train_epochs": 10, "total_flos": 200561037492224.0, "trial_name": null, "trial_params": { "learning_rate": 5e-05 } }