| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "global_step": 3950, | |
| "is_hyper_param_search": true, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.2658227848101266e-07, | |
| "loss": 0.6546, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.1518987341772155e-05, | |
| "loss": 0.4879, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.10857142857142857, | |
| "eval_f1": 0.07811852465810196, | |
| "eval_loss": 0.40317419171333313, | |
| "eval_precision": 0.17927631578947367, | |
| "eval_recall": 0.08782679738562092, | |
| "eval_runtime": 0.3221, | |
| "eval_samples_per_second": 543.295, | |
| "eval_steps_per_second": 68.3, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 4.9894490513601396e-05, | |
| "loss": 0.4006, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 4.878663257984936e-05, | |
| "loss": 0.3759, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.17714285714285713, | |
| "eval_f1": 0.26697819927843125, | |
| "eval_loss": 0.37731701135635376, | |
| "eval_precision": 0.5218837535014006, | |
| "eval_recall": 0.2022453723084196, | |
| "eval_runtime": 0.286, | |
| "eval_samples_per_second": 611.901, | |
| "eval_steps_per_second": 76.925, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 4.652248579391458e-05, | |
| "loss": 0.2948, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.24571428571428572, | |
| "eval_f1": 0.36607677500846103, | |
| "eval_loss": 0.39780309796333313, | |
| "eval_precision": 0.5472431077694235, | |
| "eval_recall": 0.3120779820710713, | |
| "eval_runtime": 0.3071, | |
| "eval_samples_per_second": 569.892, | |
| "eval_steps_per_second": 71.644, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 4.3212112295929505e-05, | |
| "loss": 0.2649, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 3.901643221443403e-05, | |
| "loss": 0.1864, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.2742857142857143, | |
| "eval_f1": 0.4266986709033833, | |
| "eval_loss": 0.4376567602157593, | |
| "eval_precision": 0.5016284584009042, | |
| "eval_recall": 0.38560897951242346, | |
| "eval_runtime": 0.2998, | |
| "eval_samples_per_second": 583.659, | |
| "eval_steps_per_second": 73.374, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 3.413940119927783e-05, | |
| "loss": 0.1384, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.2342857142857143, | |
| "eval_f1": 0.39448414058744574, | |
| "eval_loss": 0.5389729738235474, | |
| "eval_precision": 0.48297759387155675, | |
| "eval_recall": 0.37081969717487395, | |
| "eval_runtime": 0.3757, | |
| "eval_samples_per_second": 465.771, | |
| "eval_steps_per_second": 58.554, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "learning_rate": 2.8818095960484663e-05, | |
| "loss": 0.111, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "learning_rate": 2.3311189763638598e-05, | |
| "loss": 0.0735, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.24571428571428572, | |
| "eval_f1": 0.46370146148440794, | |
| "eval_loss": 0.5818756222724915, | |
| "eval_precision": 0.5038962453668336, | |
| "eval_recall": 0.4432562275600327, | |
| "eval_runtime": 0.2959, | |
| "eval_samples_per_second": 591.513, | |
| "eval_steps_per_second": 74.362, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "learning_rate": 1.788637809796257e-05, | |
| "loss": 0.0593, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 1.2807365766276206e-05, | |
| "loss": 0.0417, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.2571428571428571, | |
| "eval_f1": 0.45537043929039545, | |
| "eval_loss": 0.6109325289726257, | |
| "eval_precision": 0.5327423959044721, | |
| "eval_recall": 0.41917933979084293, | |
| "eval_runtime": 0.2925, | |
| "eval_samples_per_second": 598.344, | |
| "eval_steps_per_second": 75.22, | |
| "step": 2765 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "learning_rate": 8.321047965682896e-06, | |
| "loss": 0.0247, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.26857142857142857, | |
| "eval_f1": 0.44079821839093025, | |
| "eval_loss": 0.6337146162986755, | |
| "eval_precision": 0.4776951058201059, | |
| "eval_recall": 0.42734534657817325, | |
| "eval_runtime": 0.3404, | |
| "eval_samples_per_second": 514.096, | |
| "eval_steps_per_second": 64.629, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 8.23, | |
| "learning_rate": 4.645508497778378e-06, | |
| "loss": 0.0209, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 8.86, | |
| "learning_rate": 1.9594185257981066e-06, | |
| "loss": 0.0159, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.2342857142857143, | |
| "eval_f1": 0.4276706487492863, | |
| "eval_loss": 0.6485006809234619, | |
| "eval_precision": 0.4811818562937489, | |
| "eval_recall": 0.40231404333907983, | |
| "eval_runtime": 0.3206, | |
| "eval_samples_per_second": 545.875, | |
| "eval_steps_per_second": 68.624, | |
| "step": 3555 | |
| }, | |
| { | |
| "epoch": 9.49, | |
| "learning_rate": 3.933512143182022e-07, | |
| "loss": 0.0147, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.2571428571428571, | |
| "eval_f1": 0.4350893581328566, | |
| "eval_loss": 0.6474436521530151, | |
| "eval_precision": 0.49936246351687524, | |
| "eval_recall": 0.40231404333907983, | |
| "eval_runtime": 0.3074, | |
| "eval_samples_per_second": 569.268, | |
| "eval_steps_per_second": 71.565, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 3950, | |
| "total_flos": 200561037492224.0, | |
| "train_loss": 0.0, | |
| "train_runtime": 0.0071, | |
| "train_samples_per_second": 4470373.653, | |
| "train_steps_per_second": 559504.941 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.2571428571428571, | |
| "eval_f1": 0.4350893581328566, | |
| "eval_loss": 0.6474436521530151, | |
| "eval_precision": 0.49936246351687524, | |
| "eval_recall": 0.40231404333907983, | |
| "eval_runtime": 0.3645, | |
| "eval_samples_per_second": 480.122, | |
| "eval_steps_per_second": 60.358, | |
| "step": 3950 | |
| } | |
| ], | |
| "max_steps": 3950, | |
| "num_train_epochs": 10, | |
| "total_flos": 200561037492224.0, | |
| "trial_name": null, | |
| "trial_params": { | |
| "learning_rate": 5e-05 | |
| } | |
| } | |