| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 29.997830802603037, |
| "global_step": 6900, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.43, |
| "eval_loss": 3.3600032329559326, |
| "eval_runtime": 177.4912, |
| "eval_samples_per_second": 27.601, |
| "eval_steps_per_second": 0.868, |
| "eval_wer": 1.0, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.87, |
| "eval_loss": 3.088737726211548, |
| "eval_runtime": 176.8112, |
| "eval_samples_per_second": 27.708, |
| "eval_steps_per_second": 0.871, |
| "eval_wer": 1.0, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.3, |
| "eval_loss": 3.0779149532318115, |
| "eval_runtime": 176.3294, |
| "eval_samples_per_second": 27.783, |
| "eval_steps_per_second": 0.873, |
| "eval_wer": 1.0, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.74, |
| "eval_loss": 3.05513334274292, |
| "eval_runtime": 176.7029, |
| "eval_samples_per_second": 27.724, |
| "eval_steps_per_second": 0.872, |
| "eval_wer": 1.0, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 0.00029699999999999996, |
| "loss": 4.8553, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.17, |
| "eval_loss": 3.052617311477661, |
| "eval_runtime": 176.6413, |
| "eval_samples_per_second": 27.734, |
| "eval_steps_per_second": 0.872, |
| "eval_wer": 1.0, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.61, |
| "eval_loss": 3.0559935569763184, |
| "eval_runtime": 175.4042, |
| "eval_samples_per_second": 27.93, |
| "eval_steps_per_second": 0.878, |
| "eval_wer": 1.0, |
| "step": 600 |
| }, |
| { |
| "epoch": 3.04, |
| "eval_loss": 3.125081777572632, |
| "eval_runtime": 174.6598, |
| "eval_samples_per_second": 28.049, |
| "eval_steps_per_second": 0.882, |
| "eval_wer": 1.0, |
| "step": 700 |
| }, |
| { |
| "epoch": 3.48, |
| "eval_loss": 3.087021589279175, |
| "eval_runtime": 177.8472, |
| "eval_samples_per_second": 27.546, |
| "eval_steps_per_second": 0.866, |
| "eval_wer": 1.0, |
| "step": 800 |
| }, |
| { |
| "epoch": 3.91, |
| "eval_loss": 3.08219575881958, |
| "eval_runtime": 180.0973, |
| "eval_samples_per_second": 27.202, |
| "eval_steps_per_second": 0.855, |
| "eval_wer": 1.0, |
| "step": 900 |
| }, |
| { |
| "epoch": 4.35, |
| "learning_rate": 0.00027679687499999997, |
| "loss": 3.1133, |
| "step": 1000 |
| }, |
| { |
| "epoch": 4.35, |
| "eval_loss": 3.048403739929199, |
| "eval_runtime": 177.987, |
| "eval_samples_per_second": 27.524, |
| "eval_steps_per_second": 0.865, |
| "eval_wer": 1.0, |
| "step": 1000 |
| }, |
| { |
| "epoch": 4.78, |
| "eval_loss": 3.0558109283447266, |
| "eval_runtime": 176.2514, |
| "eval_samples_per_second": 27.796, |
| "eval_steps_per_second": 0.874, |
| "eval_wer": 1.0, |
| "step": 1100 |
| }, |
| { |
| "epoch": 5.22, |
| "eval_loss": 3.1018614768981934, |
| "eval_runtime": 174.5505, |
| "eval_samples_per_second": 28.066, |
| "eval_steps_per_second": 0.882, |
| "eval_wer": 1.0, |
| "step": 1200 |
| }, |
| { |
| "epoch": 5.65, |
| "eval_loss": 3.0914077758789062, |
| "eval_runtime": 174.0307, |
| "eval_samples_per_second": 28.15, |
| "eval_steps_per_second": 0.885, |
| "eval_wer": 1.0, |
| "step": 1300 |
| }, |
| { |
| "epoch": 6.09, |
| "eval_loss": 3.069120168685913, |
| "eval_runtime": 175.4381, |
| "eval_samples_per_second": 27.924, |
| "eval_steps_per_second": 0.878, |
| "eval_wer": 1.0, |
| "step": 1400 |
| }, |
| { |
| "epoch": 6.52, |
| "learning_rate": 0.00025335937499999995, |
| "loss": 3.109, |
| "step": 1500 |
| }, |
| { |
| "epoch": 6.52, |
| "eval_loss": 3.0588901042938232, |
| "eval_runtime": 175.6572, |
| "eval_samples_per_second": 27.89, |
| "eval_steps_per_second": 0.877, |
| "eval_wer": 1.0, |
| "step": 1500 |
| }, |
| { |
| "epoch": 6.95, |
| "eval_loss": 3.050849199295044, |
| "eval_runtime": 175.685, |
| "eval_samples_per_second": 27.885, |
| "eval_steps_per_second": 0.877, |
| "eval_wer": 1.0, |
| "step": 1600 |
| }, |
| { |
| "epoch": 7.39, |
| "eval_loss": 3.054013252258301, |
| "eval_runtime": 177.236, |
| "eval_samples_per_second": 27.641, |
| "eval_steps_per_second": 0.869, |
| "eval_wer": 1.0, |
| "step": 1700 |
| }, |
| { |
| "epoch": 7.82, |
| "eval_loss": 3.0545613765716553, |
| "eval_runtime": 177.8822, |
| "eval_samples_per_second": 27.541, |
| "eval_steps_per_second": 0.866, |
| "eval_wer": 1.0, |
| "step": 1800 |
| }, |
| { |
| "epoch": 8.26, |
| "eval_loss": 3.0523643493652344, |
| "eval_runtime": 172.9222, |
| "eval_samples_per_second": 28.331, |
| "eval_steps_per_second": 0.891, |
| "eval_wer": 1.0, |
| "step": 1900 |
| }, |
| { |
| "epoch": 8.69, |
| "learning_rate": 0.00022992187499999996, |
| "loss": 3.1106, |
| "step": 2000 |
| }, |
| { |
| "epoch": 8.69, |
| "eval_loss": 3.056912422180176, |
| "eval_runtime": 175.8694, |
| "eval_samples_per_second": 27.856, |
| "eval_steps_per_second": 0.876, |
| "eval_wer": 1.0, |
| "step": 2000 |
| }, |
| { |
| "epoch": 9.13, |
| "eval_loss": 3.0621554851531982, |
| "eval_runtime": 175.7147, |
| "eval_samples_per_second": 27.88, |
| "eval_steps_per_second": 0.876, |
| "eval_wer": 1.0, |
| "step": 2100 |
| }, |
| { |
| "epoch": 9.56, |
| "eval_loss": 3.0517823696136475, |
| "eval_runtime": 174.9875, |
| "eval_samples_per_second": 27.996, |
| "eval_steps_per_second": 0.88, |
| "eval_wer": 1.0, |
| "step": 2200 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_loss": 3.0749499797821045, |
| "eval_runtime": 176.3933, |
| "eval_samples_per_second": 27.773, |
| "eval_steps_per_second": 0.873, |
| "eval_wer": 1.0, |
| "step": 2300 |
| }, |
| { |
| "epoch": 10.43, |
| "eval_loss": 3.0697524547576904, |
| "eval_runtime": 175.8486, |
| "eval_samples_per_second": 27.859, |
| "eval_steps_per_second": 0.876, |
| "eval_wer": 1.0, |
| "step": 2400 |
| }, |
| { |
| "epoch": 10.87, |
| "learning_rate": 0.00020648437499999996, |
| "loss": 3.1058, |
| "step": 2500 |
| }, |
| { |
| "epoch": 10.87, |
| "eval_loss": 3.0664749145507812, |
| "eval_runtime": 176.8396, |
| "eval_samples_per_second": 27.703, |
| "eval_steps_per_second": 0.871, |
| "eval_wer": 1.0, |
| "step": 2500 |
| }, |
| { |
| "epoch": 11.3, |
| "eval_loss": 3.055528402328491, |
| "eval_runtime": 176.8954, |
| "eval_samples_per_second": 27.694, |
| "eval_steps_per_second": 0.871, |
| "eval_wer": 1.0, |
| "step": 2600 |
| }, |
| { |
| "epoch": 11.74, |
| "eval_loss": 3.0589022636413574, |
| "eval_runtime": 177.2054, |
| "eval_samples_per_second": 27.646, |
| "eval_steps_per_second": 0.869, |
| "eval_wer": 1.0, |
| "step": 2700 |
| }, |
| { |
| "epoch": 12.17, |
| "eval_loss": 3.061063051223755, |
| "eval_runtime": 176.5606, |
| "eval_samples_per_second": 27.747, |
| "eval_steps_per_second": 0.872, |
| "eval_wer": 1.0, |
| "step": 2800 |
| }, |
| { |
| "epoch": 12.61, |
| "eval_loss": 3.056131601333618, |
| "eval_runtime": 175.9193, |
| "eval_samples_per_second": 27.848, |
| "eval_steps_per_second": 0.875, |
| "eval_wer": 1.0, |
| "step": 2900 |
| }, |
| { |
| "epoch": 13.04, |
| "learning_rate": 0.00018304687499999997, |
| "loss": 3.1071, |
| "step": 3000 |
| }, |
| { |
| "epoch": 13.04, |
| "eval_loss": 3.0480217933654785, |
| "eval_runtime": 175.6518, |
| "eval_samples_per_second": 27.89, |
| "eval_steps_per_second": 0.877, |
| "eval_wer": 1.0, |
| "step": 3000 |
| }, |
| { |
| "epoch": 13.48, |
| "eval_loss": 3.0491693019866943, |
| "eval_runtime": 173.0223, |
| "eval_samples_per_second": 28.314, |
| "eval_steps_per_second": 0.89, |
| "eval_wer": 1.0, |
| "step": 3100 |
| }, |
| { |
| "epoch": 13.91, |
| "eval_loss": 3.057448387145996, |
| "eval_runtime": 175.4684, |
| "eval_samples_per_second": 27.92, |
| "eval_steps_per_second": 0.878, |
| "eval_wer": 1.0, |
| "step": 3200 |
| }, |
| { |
| "epoch": 14.35, |
| "eval_loss": 3.053784132003784, |
| "eval_runtime": 176.0074, |
| "eval_samples_per_second": 27.834, |
| "eval_steps_per_second": 0.875, |
| "eval_wer": 1.0, |
| "step": 3300 |
| }, |
| { |
| "epoch": 14.78, |
| "eval_loss": 3.050539016723633, |
| "eval_runtime": 175.3243, |
| "eval_samples_per_second": 27.943, |
| "eval_steps_per_second": 0.878, |
| "eval_wer": 1.0, |
| "step": 3400 |
| }, |
| { |
| "epoch": 15.22, |
| "learning_rate": 0.00015960937499999997, |
| "loss": 3.1061, |
| "step": 3500 |
| }, |
| { |
| "epoch": 15.22, |
| "eval_loss": 3.059952735900879, |
| "eval_runtime": 176.5589, |
| "eval_samples_per_second": 27.747, |
| "eval_steps_per_second": 0.872, |
| "eval_wer": 1.0, |
| "step": 3500 |
| }, |
| { |
| "epoch": 15.65, |
| "eval_loss": 3.0595669746398926, |
| "eval_runtime": 177.5778, |
| "eval_samples_per_second": 27.588, |
| "eval_steps_per_second": 0.867, |
| "eval_wer": 1.0, |
| "step": 3600 |
| }, |
| { |
| "epoch": 16.09, |
| "eval_loss": 3.0623462200164795, |
| "eval_runtime": 174.5228, |
| "eval_samples_per_second": 28.071, |
| "eval_steps_per_second": 0.882, |
| "eval_wer": 1.0, |
| "step": 3700 |
| }, |
| { |
| "epoch": 16.52, |
| "eval_loss": 3.079986095428467, |
| "eval_runtime": 175.7994, |
| "eval_samples_per_second": 27.867, |
| "eval_steps_per_second": 0.876, |
| "eval_wer": 1.0, |
| "step": 3800 |
| }, |
| { |
| "epoch": 16.95, |
| "eval_loss": 3.0583465099334717, |
| "eval_runtime": 176.6021, |
| "eval_samples_per_second": 27.74, |
| "eval_steps_per_second": 0.872, |
| "eval_wer": 1.0, |
| "step": 3900 |
| }, |
| { |
| "epoch": 17.39, |
| "learning_rate": 0.00013617187499999998, |
| "loss": 3.1036, |
| "step": 4000 |
| }, |
| { |
| "epoch": 17.39, |
| "eval_loss": 3.053365468978882, |
| "eval_runtime": 175.9714, |
| "eval_samples_per_second": 27.84, |
| "eval_steps_per_second": 0.875, |
| "eval_wer": 1.0, |
| "step": 4000 |
| }, |
| { |
| "epoch": 17.82, |
| "eval_loss": 3.0563225746154785, |
| "eval_runtime": 175.7204, |
| "eval_samples_per_second": 27.88, |
| "eval_steps_per_second": 0.876, |
| "eval_wer": 1.0, |
| "step": 4100 |
| }, |
| { |
| "epoch": 18.26, |
| "eval_loss": 3.0481250286102295, |
| "eval_runtime": 175.8171, |
| "eval_samples_per_second": 27.864, |
| "eval_steps_per_second": 0.876, |
| "eval_wer": 1.0, |
| "step": 4200 |
| }, |
| { |
| "epoch": 18.69, |
| "eval_loss": 3.0476744174957275, |
| "eval_runtime": 175.2697, |
| "eval_samples_per_second": 27.951, |
| "eval_steps_per_second": 0.879, |
| "eval_wer": 1.0, |
| "step": 4300 |
| }, |
| { |
| "epoch": 19.13, |
| "eval_loss": 3.0504729747772217, |
| "eval_runtime": 175.8225, |
| "eval_samples_per_second": 27.863, |
| "eval_steps_per_second": 0.876, |
| "eval_wer": 1.0, |
| "step": 4400 |
| }, |
| { |
| "epoch": 19.56, |
| "learning_rate": 0.00011273437499999999, |
| "loss": 3.1086, |
| "step": 4500 |
| }, |
| { |
| "epoch": 19.56, |
| "eval_loss": 3.0484793186187744, |
| "eval_runtime": 176.184, |
| "eval_samples_per_second": 27.806, |
| "eval_steps_per_second": 0.874, |
| "eval_wer": 1.0, |
| "step": 4500 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_loss": 3.0480639934539795, |
| "eval_runtime": 175.5218, |
| "eval_samples_per_second": 27.911, |
| "eval_steps_per_second": 0.877, |
| "eval_wer": 1.0, |
| "step": 4600 |
| }, |
| { |
| "epoch": 20.43, |
| "eval_loss": 3.061495780944824, |
| "eval_runtime": 176.4313, |
| "eval_samples_per_second": 27.767, |
| "eval_steps_per_second": 0.873, |
| "eval_wer": 1.0, |
| "step": 4700 |
| }, |
| { |
| "epoch": 20.87, |
| "eval_loss": 3.0657691955566406, |
| "eval_runtime": 175.5853, |
| "eval_samples_per_second": 27.901, |
| "eval_steps_per_second": 0.877, |
| "eval_wer": 1.0, |
| "step": 4800 |
| }, |
| { |
| "epoch": 21.3, |
| "eval_loss": 3.050532341003418, |
| "eval_runtime": 176.2169, |
| "eval_samples_per_second": 27.801, |
| "eval_steps_per_second": 0.874, |
| "eval_wer": 1.0, |
| "step": 4900 |
| }, |
| { |
| "epoch": 21.74, |
| "learning_rate": 8.9296875e-05, |
| "loss": 3.1028, |
| "step": 5000 |
| }, |
| { |
| "epoch": 21.74, |
| "eval_loss": 3.0491702556610107, |
| "eval_runtime": 175.9502, |
| "eval_samples_per_second": 27.843, |
| "eval_steps_per_second": 0.875, |
| "eval_wer": 1.0, |
| "step": 5000 |
| }, |
| { |
| "epoch": 22.17, |
| "eval_loss": 3.048527479171753, |
| "eval_runtime": 174.9586, |
| "eval_samples_per_second": 28.001, |
| "eval_steps_per_second": 0.88, |
| "eval_wer": 1.0, |
| "step": 5100 |
| }, |
| { |
| "epoch": 22.61, |
| "eval_loss": 3.0482711791992188, |
| "eval_runtime": 176.76, |
| "eval_samples_per_second": 27.716, |
| "eval_steps_per_second": 0.871, |
| "eval_wer": 1.0, |
| "step": 5200 |
| }, |
| { |
| "epoch": 23.04, |
| "eval_loss": 3.0478527545928955, |
| "eval_runtime": 174.8893, |
| "eval_samples_per_second": 28.012, |
| "eval_steps_per_second": 0.881, |
| "eval_wer": 1.0, |
| "step": 5300 |
| }, |
| { |
| "epoch": 23.48, |
| "eval_loss": 3.05094313621521, |
| "eval_runtime": 175.0794, |
| "eval_samples_per_second": 27.982, |
| "eval_steps_per_second": 0.88, |
| "eval_wer": 1.0, |
| "step": 5400 |
| }, |
| { |
| "epoch": 23.91, |
| "learning_rate": 6.5859375e-05, |
| "loss": 3.1087, |
| "step": 5500 |
| }, |
| { |
| "epoch": 23.91, |
| "eval_loss": 3.0529990196228027, |
| "eval_runtime": 176.1904, |
| "eval_samples_per_second": 27.805, |
| "eval_steps_per_second": 0.874, |
| "eval_wer": 1.0, |
| "step": 5500 |
| }, |
| { |
| "epoch": 24.35, |
| "eval_loss": 3.048621654510498, |
| "eval_runtime": 175.2104, |
| "eval_samples_per_second": 27.961, |
| "eval_steps_per_second": 0.879, |
| "eval_wer": 1.0, |
| "step": 5600 |
| }, |
| { |
| "epoch": 24.78, |
| "eval_loss": 3.051391124725342, |
| "eval_runtime": 175.9351, |
| "eval_samples_per_second": 27.845, |
| "eval_steps_per_second": 0.875, |
| "eval_wer": 1.0, |
| "step": 5700 |
| }, |
| { |
| "epoch": 25.22, |
| "eval_loss": 3.050508499145508, |
| "eval_runtime": 175.1722, |
| "eval_samples_per_second": 27.967, |
| "eval_steps_per_second": 0.879, |
| "eval_wer": 1.0, |
| "step": 5800 |
| }, |
| { |
| "epoch": 25.65, |
| "eval_loss": 3.050753355026245, |
| "eval_runtime": 175.581, |
| "eval_samples_per_second": 27.902, |
| "eval_steps_per_second": 0.877, |
| "eval_wer": 1.0, |
| "step": 5900 |
| }, |
| { |
| "epoch": 26.09, |
| "learning_rate": 4.2421875e-05, |
| "loss": 3.1043, |
| "step": 6000 |
| }, |
| { |
| "epoch": 26.09, |
| "eval_loss": 3.050074815750122, |
| "eval_runtime": 175.9337, |
| "eval_samples_per_second": 27.846, |
| "eval_steps_per_second": 0.875, |
| "eval_wer": 1.0, |
| "step": 6000 |
| }, |
| { |
| "epoch": 26.52, |
| "eval_loss": 3.046748638153076, |
| "eval_runtime": 176.2651, |
| "eval_samples_per_second": 27.793, |
| "eval_steps_per_second": 0.874, |
| "eval_wer": 1.0, |
| "step": 6100 |
| }, |
| { |
| "epoch": 26.95, |
| "eval_loss": 3.046581268310547, |
| "eval_runtime": 175.4906, |
| "eval_samples_per_second": 27.916, |
| "eval_steps_per_second": 0.878, |
| "eval_wer": 1.0, |
| "step": 6200 |
| }, |
| { |
| "epoch": 27.39, |
| "eval_loss": 3.0465457439422607, |
| "eval_runtime": 174.3257, |
| "eval_samples_per_second": 28.103, |
| "eval_steps_per_second": 0.883, |
| "eval_wer": 1.0, |
| "step": 6300 |
| }, |
| { |
| "epoch": 27.82, |
| "eval_loss": 3.0464954376220703, |
| "eval_runtime": 174.3395, |
| "eval_samples_per_second": 28.1, |
| "eval_steps_per_second": 0.883, |
| "eval_wer": 1.0, |
| "step": 6400 |
| }, |
| { |
| "epoch": 28.26, |
| "learning_rate": 1.8984375e-05, |
| "loss": 3.1175, |
| "step": 6500 |
| }, |
| { |
| "epoch": 28.26, |
| "eval_loss": 3.046614170074463, |
| "eval_runtime": 174.756, |
| "eval_samples_per_second": 28.033, |
| "eval_steps_per_second": 0.881, |
| "eval_wer": 1.0, |
| "step": 6500 |
| }, |
| { |
| "epoch": 28.69, |
| "eval_loss": 3.046605110168457, |
| "eval_runtime": 174.8316, |
| "eval_samples_per_second": 28.021, |
| "eval_steps_per_second": 0.881, |
| "eval_wer": 1.0, |
| "step": 6600 |
| }, |
| { |
| "epoch": 29.13, |
| "eval_loss": 3.0464911460876465, |
| "eval_runtime": 174.5543, |
| "eval_samples_per_second": 28.066, |
| "eval_steps_per_second": 0.882, |
| "eval_wer": 1.0, |
| "step": 6700 |
| }, |
| { |
| "epoch": 29.56, |
| "eval_loss": 3.046463966369629, |
| "eval_runtime": 175.0973, |
| "eval_samples_per_second": 27.979, |
| "eval_steps_per_second": 0.88, |
| "eval_wer": 1.0, |
| "step": 6800 |
| }, |
| { |
| "epoch": 30.0, |
| "eval_loss": 3.046398878097534, |
| "eval_runtime": 185.0373, |
| "eval_samples_per_second": 26.476, |
| "eval_steps_per_second": 0.832, |
| "eval_wer": 1.0, |
| "step": 6900 |
| }, |
| { |
| "epoch": 30.0, |
| "step": 6900, |
| "total_flos": 1.7654566052477592e+19, |
| "train_loss": 0.04483215774314991, |
| "train_runtime": 852.37, |
| "train_samples_per_second": 518.437, |
| "train_steps_per_second": 8.095 |
| } |
| ], |
| "max_steps": 6900, |
| "num_train_epochs": 30, |
| "total_flos": 1.7654566052477592e+19, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|