| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 50.0, | |
| "global_step": 86, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0237741456166419, | |
| "grad_norm": 3.34375, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1976238489151, | |
| "step": 1, | |
| "token_acc": 0.7184428746928747 | |
| }, | |
| { | |
| "epoch": 0.1188707280832095, | |
| "grad_norm": 3.765625, | |
| "learning_rate": 0.0001, | |
| "loss": 1.2833234071731567, | |
| "step": 5, | |
| "token_acc": 0.6957437157113635 | |
| }, | |
| { | |
| "epoch": 0.237741456166419, | |
| "grad_norm": 300.0, | |
| "learning_rate": 9.906276553136923e-05, | |
| "loss": 2.3091596603393554, | |
| "step": 10, | |
| "token_acc": 0.6008980821950969 | |
| }, | |
| { | |
| "epoch": 0.35661218424962854, | |
| "grad_norm": 3.28125, | |
| "learning_rate": 9.628619846344454e-05, | |
| "loss": 1.3249249458312988, | |
| "step": 15, | |
| "token_acc": 0.7083573751573646 | |
| }, | |
| { | |
| "epoch": 0.475482912332838, | |
| "grad_norm": 2.203125, | |
| "learning_rate": 9.177439057064683e-05, | |
| "loss": 0.9449030876159668, | |
| "step": 20, | |
| "token_acc": 0.7733868243825437 | |
| }, | |
| { | |
| "epoch": 0.5943536404160475, | |
| "grad_norm": 1.4375, | |
| "learning_rate": 8.569648672789497e-05, | |
| "loss": 0.804572582244873, | |
| "step": 25, | |
| "token_acc": 0.8030009224918576 | |
| }, | |
| { | |
| "epoch": 0.7132243684992571, | |
| "grad_norm": 1.1953125, | |
| "learning_rate": 7.828034377432693e-05, | |
| "loss": 0.6343977928161622, | |
| "step": 30, | |
| "token_acc": 0.8435477802859292 | |
| }, | |
| { | |
| "epoch": 0.8320950965824666, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 6.980398830195785e-05, | |
| "loss": 0.3958749771118164, | |
| "step": 35, | |
| "token_acc": 0.8998997800202961 | |
| }, | |
| { | |
| "epoch": 0.950965824665676, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 6.058519361147055e-05, | |
| "loss": 0.38391425609588625, | |
| "step": 40, | |
| "token_acc": 0.9056173526140155 | |
| }, | |
| { | |
| "epoch": 1.0475482912332839, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 5.096956658859122e-05, | |
| "loss": 0.29045734405517576, | |
| "step": 45, | |
| "token_acc": 0.9264699310283186 | |
| }, | |
| { | |
| "epoch": 1.1664190193164934, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 4.131759111665349e-05, | |
| "loss": 0.13265597820281982, | |
| "step": 50, | |
| "token_acc": 0.9676710929519918 | |
| }, | |
| { | |
| "epoch": 1.2852897473997027, | |
| "grad_norm": 0.48046875, | |
| "learning_rate": 3.199111375976449e-05, | |
| "loss": 0.11603926420211792, | |
| "step": 55, | |
| "token_acc": 0.9712261616975557 | |
| }, | |
| { | |
| "epoch": 1.4041604754829122, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 2.333977835991545e-05, | |
| "loss": 0.09179887771606446, | |
| "step": 60, | |
| "token_acc": 0.9757825780724563 | |
| }, | |
| { | |
| "epoch": 1.5230312035661218, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 1.5687918106563326e-05, | |
| "loss": 0.11050317287445069, | |
| "step": 65, | |
| "token_acc": 0.9738332162768583 | |
| }, | |
| { | |
| "epoch": 1.6419019316493313, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 9.322396486851626e-06, | |
| "loss": 0.0894925355911255, | |
| "step": 70, | |
| "token_acc": 0.9766682632919051 | |
| }, | |
| { | |
| "epoch": 1.7607726597325408, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 4.4818529516926726e-06, | |
| "loss": 0.0985899806022644, | |
| "step": 75, | |
| "token_acc": 0.9736953912894402 | |
| }, | |
| { | |
| "epoch": 1.8796433878157504, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 1.3477564710088098e-06, | |
| "loss": 0.11772974729537963, | |
| "step": 80, | |
| "token_acc": 0.970642493007756 | |
| }, | |
| { | |
| "epoch": 1.9985141158989599, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 3.760237478849793e-08, | |
| "loss": 0.10874439477920532, | |
| "step": 85, | |
| "token_acc": 0.9721623048495361 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 86, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.3391554666248192e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |