{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 4.988593155893536,
  "eval_steps": 500,
  "global_step": 1970,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0025348542458808617,
      "grad_norm": 1.1835554838180542,
      "learning_rate": 0.0,
      "loss": 2.7162,
      "step": 1
    },
    {
      "epoch": 0.005069708491761723,
      "grad_norm": 1.1406067609786987,
      "learning_rate": 4e-05,
      "loss": 2.7021,
      "step": 2
    },
    {
      "epoch": 0.0076045627376425855,
      "grad_norm": 1.1929512023925781,
      "learning_rate": 8e-05,
      "loss": 2.5728,
      "step": 3
    },
    {
      "epoch": 0.010139416983523447,
      "grad_norm": 1.523325800895691,
      "learning_rate": 0.00012,
      "loss": 2.5825,
      "step": 4
    },
    {
      "epoch": 0.012674271229404309,
      "grad_norm": 1.712708592414856,
      "learning_rate": 0.00016,
      "loss": 2.1986,
      "step": 5
    },
    {
      "epoch": 0.015209125475285171,
      "grad_norm": 1.263485312461853,
      "learning_rate": 0.0002,
      "loss": 2.1478,
      "step": 6
    },
    {
      "epoch": 0.017743979721166033,
      "grad_norm": 1.2837083339691162,
      "learning_rate": 0.00019989821882951655,
      "loss": 2.2153,
      "step": 7
    },
    {
      "epoch": 0.020278833967046894,
      "grad_norm": 1.0831111669540405,
      "learning_rate": 0.0001997964376590331,
      "loss": 1.9272,
      "step": 8
    },
    {
      "epoch": 0.022813688212927757,
      "grad_norm": 0.7921498417854309,
      "learning_rate": 0.00019969465648854963,
      "loss": 1.4929,
      "step": 9
    },
    {
      "epoch": 0.025348542458808618,
      "grad_norm": 0.9243067502975464,
      "learning_rate": 0.00019959287531806617,
      "loss": 1.4312,
      "step": 10
    },
    {
      "epoch": 0.02788339670468948,
      "grad_norm": 1.2378944158554077,
      "learning_rate": 0.0001994910941475827,
      "loss": 1.1605,
      "step": 11
    },
    {
      "epoch": 0.030418250950570342,
      "grad_norm": 1.401106834411621,
      "learning_rate": 0.00019938931297709925,
      "loss": 1.0236,
      "step": 12
    },
    {
      "epoch": 0.032953105196451206,
      "grad_norm": 1.0503413677215576,
      "learning_rate": 0.00019928753180661578,
      "loss": 0.8441,
      "step": 13
    },
    {
      "epoch": 0.035487959442332066,
      "grad_norm": 0.928716778755188,
      "learning_rate": 0.00019918575063613232,
      "loss": 0.8098,
      "step": 14
    },
    {
      "epoch": 0.03802281368821293,
      "grad_norm": 0.6546494364738464,
      "learning_rate": 0.00019908396946564886,
      "loss": 0.5083,
      "step": 15
    },
    {
      "epoch": 0.04055766793409379,
      "grad_norm": 0.8399775624275208,
      "learning_rate": 0.0001989821882951654,
      "loss": 0.5798,
      "step": 16
    },
    {
      "epoch": 0.043092522179974654,
      "grad_norm": 0.6111662983894348,
      "learning_rate": 0.00019888040712468194,
      "loss": 0.471,
      "step": 17
    },
    {
      "epoch": 0.045627376425855515,
      "grad_norm": 0.6786199808120728,
      "learning_rate": 0.00019877862595419848,
      "loss": 0.5124,
      "step": 18
    },
    {
      "epoch": 0.048162230671736375,
      "grad_norm": 0.7001961469650269,
      "learning_rate": 0.00019867684478371502,
      "loss": 0.5764,
      "step": 19
    },
    {
      "epoch": 0.050697084917617236,
      "grad_norm": 0.5670634508132935,
      "learning_rate": 0.00019857506361323156,
      "loss": 0.5595,
      "step": 20
    },
    {
      "epoch": 0.053231939163498096,
      "grad_norm": 0.6825580596923828,
      "learning_rate": 0.0001984732824427481,
      "loss": 0.6601,
      "step": 21
    },
    {
      "epoch": 0.05576679340937896,
      "grad_norm": 0.5777536630630493,
      "learning_rate": 0.00019837150127226464,
      "loss": 0.6232,
      "step": 22
    },
    {
      "epoch": 0.058301647655259824,
      "grad_norm": 0.7791958451271057,
      "learning_rate": 0.00019826972010178118,
      "loss": 0.4741,
      "step": 23
    },
    {
      "epoch": 0.060836501901140684,
      "grad_norm": 0.7647196054458618,
      "learning_rate": 0.00019816793893129772,
      "loss": 0.574,
      "step": 24
    },
    {
      "epoch": 0.06337135614702155,
      "grad_norm": 0.6175855398178101,
      "learning_rate": 0.00019806615776081426,
      "loss": 0.6792,
      "step": 25
    },
    {
      "epoch": 0.06590621039290241,
      "grad_norm": 0.7071298360824585,
      "learning_rate": 0.0001979643765903308,
      "loss": 0.6333,
      "step": 26
    },
    {
      "epoch": 0.06844106463878327,
      "grad_norm": 0.7675352692604065,
      "learning_rate": 0.00019786259541984734,
      "loss": 0.5004,
      "step": 27
    },
    {
      "epoch": 0.07097591888466413,
      "grad_norm": 0.6224766969680786,
      "learning_rate": 0.00019776081424936387,
      "loss": 0.5649,
      "step": 28
    },
    {
      "epoch": 0.07351077313054499,
      "grad_norm": 0.6023550629615784,
      "learning_rate": 0.00019765903307888041,
      "loss": 0.4004,
      "step": 29
    },
    {
      "epoch": 0.07604562737642585,
      "grad_norm": 0.6253474354743958,
      "learning_rate": 0.00019755725190839695,
      "loss": 0.548,
      "step": 30
    },
    {
      "epoch": 0.07858048162230671,
      "grad_norm": 0.43560266494750977,
      "learning_rate": 0.00019745547073791352,
      "loss": 0.4721,
      "step": 31
    },
    {
      "epoch": 0.08111533586818757,
      "grad_norm": 0.6321932077407837,
      "learning_rate": 0.00019735368956743003,
      "loss": 0.4671,
      "step": 32
    },
    {
      "epoch": 0.08365019011406843,
      "grad_norm": 0.41977155208587646,
      "learning_rate": 0.00019725190839694657,
      "loss": 0.3716,
      "step": 33
    },
    {
      "epoch": 0.08618504435994931,
      "grad_norm": 0.4449223279953003,
      "learning_rate": 0.0001971501272264631,
      "loss": 0.6045,
      "step": 34
    },
    {
      "epoch": 0.08871989860583017,
      "grad_norm": 0.5593668222427368,
      "learning_rate": 0.00019704834605597965,
      "loss": 0.3789,
      "step": 35
    },
    {
      "epoch": 0.09125475285171103,
      "grad_norm": 0.4293775260448456,
      "learning_rate": 0.0001969465648854962,
      "loss": 0.3834,
      "step": 36
    },
    {
      "epoch": 0.09378960709759189,
      "grad_norm": 0.49535441398620605,
      "learning_rate": 0.00019684478371501273,
      "loss": 0.5504,
      "step": 37
    },
    {
      "epoch": 0.09632446134347275,
      "grad_norm": 0.4620949625968933,
      "learning_rate": 0.00019674300254452927,
      "loss": 0.3212,
      "step": 38
    },
    {
      "epoch": 0.09885931558935361,
      "grad_norm": 0.46665605902671814,
      "learning_rate": 0.0001966412213740458,
      "loss": 0.4868,
      "step": 39
    },
    {
      "epoch": 0.10139416983523447,
      "grad_norm": 0.4120428264141083,
      "learning_rate": 0.00019653944020356235,
      "loss": 0.4926,
      "step": 40
    },
    {
      "epoch": 0.10392902408111533,
      "grad_norm": 0.41570335626602173,
      "learning_rate": 0.00019643765903307889,
      "loss": 0.5068,
      "step": 41
    },
    {
      "epoch": 0.10646387832699619,
      "grad_norm": 0.4141896665096283,
      "learning_rate": 0.00019633587786259542,
      "loss": 0.4064,
      "step": 42
    },
    {
      "epoch": 0.10899873257287707,
      "grad_norm": 0.3192928433418274,
      "learning_rate": 0.00019623409669211196,
      "loss": 0.4581,
      "step": 43
    },
    {
      "epoch": 0.11153358681875793,
      "grad_norm": 0.4188425838947296,
      "learning_rate": 0.00019613231552162853,
      "loss": 0.371,
      "step": 44
    },
    {
      "epoch": 0.11406844106463879,
      "grad_norm": 0.3750368654727936,
      "learning_rate": 0.00019603053435114504,
      "loss": 0.3728,
      "step": 45
    },
    {
      "epoch": 0.11660329531051965,
      "grad_norm": 0.5102046728134155,
      "learning_rate": 0.00019592875318066158,
      "loss": 0.357,
      "step": 46
    },
    {
      "epoch": 0.11913814955640051,
      "grad_norm": 0.4143039882183075,
      "learning_rate": 0.00019582697201017812,
      "loss": 0.4373,
      "step": 47
    },
    {
      "epoch": 0.12167300380228137,
      "grad_norm": 0.42558473348617554,
      "learning_rate": 0.00019572519083969466,
      "loss": 0.5877,
      "step": 48
    },
    {
      "epoch": 0.12420785804816223,
      "grad_norm": 0.35768038034439087,
      "learning_rate": 0.0001956234096692112,
      "loss": 0.3326,
      "step": 49
    },
    {
      "epoch": 0.1267427122940431,
      "grad_norm": 0.32826319336891174,
      "learning_rate": 0.00019552162849872774,
      "loss": 0.3521,
      "step": 50
    },
    {
      "epoch": 0.12927756653992395,
      "grad_norm": 0.3507271409034729,
      "learning_rate": 0.00019541984732824428,
      "loss": 0.4157,
      "step": 51
    },
    {
      "epoch": 0.13181242078580482,
      "grad_norm": 0.5069169402122498,
      "learning_rate": 0.00019531806615776082,
      "loss": 0.4453,
      "step": 52
    },
    {
      "epoch": 0.13434727503168567,
      "grad_norm": 0.4759957492351532,
      "learning_rate": 0.00019521628498727736,
      "loss": 0.5131,
      "step": 53
    },
    {
      "epoch": 0.13688212927756654,
      "grad_norm": 0.4045158326625824,
      "learning_rate": 0.0001951145038167939,
      "loss": 0.3927,
      "step": 54
    },
    {
      "epoch": 0.1394169835234474,
      "grad_norm": 0.49629393219947815,
      "learning_rate": 0.00019501272264631046,
      "loss": 0.4708,
      "step": 55
    },
    {
      "epoch": 0.14195183776932827,
      "grad_norm": 0.3735599219799042,
      "learning_rate": 0.00019491094147582698,
      "loss": 0.4076,
      "step": 56
    },
    {
      "epoch": 0.1444866920152091,
      "grad_norm": 0.4713466763496399,
      "learning_rate": 0.00019480916030534354,
      "loss": 0.4187,
      "step": 57
    },
    {
      "epoch": 0.14702154626108999,
      "grad_norm": 0.6454377770423889,
      "learning_rate": 0.00019470737913486005,
      "loss": 0.4032,
      "step": 58
    },
    {
      "epoch": 0.14955640050697086,
      "grad_norm": 0.39378786087036133,
      "learning_rate": 0.00019460559796437662,
      "loss": 0.3508,
      "step": 59
    },
    {
      "epoch": 0.1520912547528517,
      "grad_norm": 0.3768695592880249,
      "learning_rate": 0.00019450381679389313,
      "loss": 0.3129,
      "step": 60
    },
    {
      "epoch": 0.15462610899873258,
      "grad_norm": 0.4250476062297821,
      "learning_rate": 0.00019440203562340967,
      "loss": 0.3426,
      "step": 61
    },
    {
      "epoch": 0.15716096324461343,
      "grad_norm": 0.3653964698314667,
      "learning_rate": 0.0001943002544529262,
      "loss": 0.3339,
      "step": 62
    },
    {
      "epoch": 0.1596958174904943,
      "grad_norm": 0.4973353445529938,
      "learning_rate": 0.00019419847328244275,
      "loss": 0.4759,
      "step": 63
    },
    {
      "epoch": 0.16223067173637515,
      "grad_norm": 0.41738295555114746,
      "learning_rate": 0.0001940966921119593,
      "loss": 0.3809,
      "step": 64
    },
    {
      "epoch": 0.16476552598225602,
      "grad_norm": 0.42326119542121887,
      "learning_rate": 0.00019399491094147583,
      "loss": 0.3399,
      "step": 65
    },
    {
      "epoch": 0.16730038022813687,
      "grad_norm": 0.4244116246700287,
      "learning_rate": 0.00019389312977099237,
      "loss": 0.4085,
      "step": 66
    },
    {
      "epoch": 0.16983523447401774,
      "grad_norm": 0.40235379338264465,
      "learning_rate": 0.0001937913486005089,
      "loss": 0.3016,
      "step": 67
    },
    {
      "epoch": 0.17237008871989862,
      "grad_norm": 0.3983120322227478,
      "learning_rate": 0.00019368956743002547,
      "loss": 0.5101,
      "step": 68
    },
    {
      "epoch": 0.17490494296577946,
      "grad_norm": 0.4857071042060852,
      "learning_rate": 0.00019358778625954199,
      "loss": 0.3131,
      "step": 69
    },
    {
      "epoch": 0.17743979721166034,
      "grad_norm": 0.5238108038902283,
      "learning_rate": 0.00019348600508905855,
      "loss": 0.5841,
      "step": 70
    },
    {
      "epoch": 0.17997465145754118,
      "grad_norm": 0.5322052240371704,
      "learning_rate": 0.00019338422391857506,
      "loss": 0.3895,
      "step": 71
    },
    {
      "epoch": 0.18250950570342206,
      "grad_norm": 0.4643409252166748,
      "learning_rate": 0.00019328244274809163,
      "loss": 0.364,
      "step": 72
    },
    {
      "epoch": 0.1850443599493029,
      "grad_norm": 0.36517271399497986,
      "learning_rate": 0.00019318066157760814,
      "loss": 0.4092,
      "step": 73
    },
    {
      "epoch": 0.18757921419518378,
      "grad_norm": 0.49409031867980957,
      "learning_rate": 0.00019307888040712468,
      "loss": 0.3359,
      "step": 74
    },
    {
      "epoch": 0.19011406844106463,
      "grad_norm": 0.44665688276290894,
      "learning_rate": 0.00019297709923664122,
      "loss": 0.3275,
      "step": 75
    },
    {
      "epoch": 0.1926489226869455,
      "grad_norm": 0.353208065032959,
      "learning_rate": 0.00019287531806615776,
      "loss": 0.3396,
      "step": 76
    },
    {
      "epoch": 0.19518377693282637,
      "grad_norm": 0.4061962366104126,
      "learning_rate": 0.0001927735368956743,
      "loss": 0.4658,
      "step": 77
    },
    {
      "epoch": 0.19771863117870722,
      "grad_norm": 0.4785591959953308,
      "learning_rate": 0.00019267175572519084,
      "loss": 0.4705,
      "step": 78
    },
    {
      "epoch": 0.2002534854245881,
      "grad_norm": 0.44644224643707275,
      "learning_rate": 0.00019256997455470738,
      "loss": 0.3573,
      "step": 79
    },
    {
      "epoch": 0.20278833967046894,
      "grad_norm": 0.4554955065250397,
      "learning_rate": 0.00019246819338422392,
      "loss": 0.3822,
      "step": 80
    },
    {
      "epoch": 0.20532319391634982,
      "grad_norm": 0.4537349343299866,
      "learning_rate": 0.00019236641221374049,
      "loss": 0.5222,
      "step": 81
    },
    {
      "epoch": 0.20785804816223066,
      "grad_norm": 0.32820987701416016,
      "learning_rate": 0.000192264631043257,
      "loss": 0.3185,
      "step": 82
    },
    {
      "epoch": 0.21039290240811154,
      "grad_norm": 0.39827391505241394,
      "learning_rate": 0.00019216284987277356,
      "loss": 0.3693,
      "step": 83
    },
    {
      "epoch": 0.21292775665399238,
      "grad_norm": 0.4188093841075897,
      "learning_rate": 0.00019206106870229008,
      "loss": 0.4168,
      "step": 84
    },
    {
      "epoch": 0.21546261089987326,
      "grad_norm": 0.4770517349243164,
      "learning_rate": 0.00019195928753180664,
      "loss": 0.4113,
      "step": 85
    },
    {
      "epoch": 0.21799746514575413,
      "grad_norm": 0.346224844455719,
      "learning_rate": 0.00019185750636132315,
      "loss": 0.4238,
      "step": 86
    },
    {
      "epoch": 0.22053231939163498,
      "grad_norm": 0.37398770451545715,
      "learning_rate": 0.00019175572519083972,
      "loss": 0.4285,
      "step": 87
    },
    {
      "epoch": 0.22306717363751585,
      "grad_norm": 0.35467982292175293,
      "learning_rate": 0.00019165394402035623,
      "loss": 0.3201,
      "step": 88
    },
    {
      "epoch": 0.2256020278833967,
      "grad_norm": 0.3411659002304077,
      "learning_rate": 0.00019155216284987277,
      "loss": 0.3428,
      "step": 89
    },
    {
      "epoch": 0.22813688212927757,
      "grad_norm": 0.4002087712287903,
      "learning_rate": 0.0001914503816793893,
      "loss": 0.5375,
      "step": 90
    },
    {
      "epoch": 0.23067173637515842,
      "grad_norm": 0.4339190423488617,
      "learning_rate": 0.00019134860050890585,
      "loss": 0.3355,
      "step": 91
    },
    {
      "epoch": 0.2332065906210393,
      "grad_norm": 0.43449410796165466,
      "learning_rate": 0.00019124681933842242,
      "loss": 0.4355,
      "step": 92
    },
    {
      "epoch": 0.23574144486692014,
      "grad_norm": 0.4565323293209076,
      "learning_rate": 0.00019114503816793893,
      "loss": 0.3178,
      "step": 93
    },
    {
      "epoch": 0.23827629911280102,
      "grad_norm": 0.46309894323349,
      "learning_rate": 0.0001910432569974555,
      "loss": 0.3308,
      "step": 94
    },
    {
      "epoch": 0.24081115335868186,
      "grad_norm": 0.3554096817970276,
      "learning_rate": 0.000190941475826972,
      "loss": 0.3358,
      "step": 95
    },
    {
      "epoch": 0.24334600760456274,
      "grad_norm": 0.39129987359046936,
      "learning_rate": 0.00019083969465648857,
      "loss": 0.3988,
      "step": 96
    },
    {
      "epoch": 0.2458808618504436,
      "grad_norm": 0.4193456470966339,
      "learning_rate": 0.0001907379134860051,
      "loss": 0.4064,
      "step": 97
    },
    {
      "epoch": 0.24841571609632446,
      "grad_norm": 0.39571425318717957,
      "learning_rate": 0.00019063613231552165,
      "loss": 0.3213,
      "step": 98
    },
    {
      "epoch": 0.2509505703422053,
      "grad_norm": 0.48566195368766785,
      "learning_rate": 0.00019053435114503817,
      "loss": 0.3505,
      "step": 99
    },
    {
      "epoch": 0.2534854245880862,
      "grad_norm": 0.43266433477401733,
      "learning_rate": 0.00019043256997455473,
      "loss": 0.3579,
      "step": 100
    },
    {
      "epoch": 0.25602027883396705,
      "grad_norm": 0.31110769510269165,
      "learning_rate": 0.00019033078880407124,
      "loss": 0.2832,
      "step": 101
    },
    {
      "epoch": 0.2585551330798479,
      "grad_norm": 0.40166690945625305,
      "learning_rate": 0.00019022900763358778,
      "loss": 0.2964,
      "step": 102
    },
    {
      "epoch": 0.26108998732572875,
      "grad_norm": 0.554072380065918,
      "learning_rate": 0.00019012722646310432,
      "loss": 0.3661,
      "step": 103
    },
    {
      "epoch": 0.26362484157160965,
      "grad_norm": 0.45009374618530273,
      "learning_rate": 0.00019002544529262086,
      "loss": 0.3812,
      "step": 104
    },
    {
      "epoch": 0.2661596958174905,
      "grad_norm": 0.48349273204803467,
      "learning_rate": 0.00018992366412213743,
      "loss": 0.4183,
      "step": 105
    },
    {
      "epoch": 0.26869455006337134,
      "grad_norm": 0.4157555103302002,
      "learning_rate": 0.00018982188295165394,
      "loss": 0.2962,
      "step": 106
    },
    {
      "epoch": 0.27122940430925224,
      "grad_norm": 0.3300265073776245,
      "learning_rate": 0.0001897201017811705,
      "loss": 0.3351,
      "step": 107
    },
    {
      "epoch": 0.2737642585551331,
      "grad_norm": 0.3690893054008484,
      "learning_rate": 0.00018961832061068702,
      "loss": 0.3251,
      "step": 108
    },
    {
      "epoch": 0.27629911280101394,
      "grad_norm": 0.49013710021972656,
      "learning_rate": 0.00018951653944020359,
      "loss": 0.4757,
      "step": 109
    },
    {
      "epoch": 0.2788339670468948,
      "grad_norm": 0.4416143000125885,
      "learning_rate": 0.0001894147582697201,
      "loss": 0.4421,
      "step": 110
    },
    {
      "epoch": 0.2813688212927757,
      "grad_norm": 0.3613321781158447,
      "learning_rate": 0.00018931297709923666,
      "loss": 0.3475,
      "step": 111
    },
    {
      "epoch": 0.28390367553865653,
      "grad_norm": 0.45548489689826965,
      "learning_rate": 0.00018921119592875318,
      "loss": 0.3587,
      "step": 112
    },
    {
      "epoch": 0.2864385297845374,
      "grad_norm": 0.49439120292663574,
      "learning_rate": 0.00018910941475826974,
      "loss": 0.4017,
      "step": 113
    },
    {
      "epoch": 0.2889733840304182,
      "grad_norm": 0.35214680433273315,
      "learning_rate": 0.00018900763358778626,
      "loss": 0.2645,
      "step": 114
    },
    {
      "epoch": 0.2915082382762991,
      "grad_norm": 0.5512099266052246,
      "learning_rate": 0.00018890585241730282,
      "loss": 0.3736,
      "step": 115
    },
    {
      "epoch": 0.29404309252217997,
      "grad_norm": 0.4146886467933655,
      "learning_rate": 0.00018880407124681936,
      "loss": 0.3361,
      "step": 116
    },
    {
      "epoch": 0.2965779467680608,
      "grad_norm": 0.42954355478286743,
      "learning_rate": 0.00018870229007633587,
      "loss": 0.3841,
      "step": 117
    },
    {
      "epoch": 0.2991128010139417,
      "grad_norm": 0.47189798951148987,
      "learning_rate": 0.00018860050890585244,
      "loss": 0.3591,
      "step": 118
    },
    {
      "epoch": 0.30164765525982257,
      "grad_norm": 0.5082337260246277,
      "learning_rate": 0.00018849872773536895,
      "loss": 0.4249,
      "step": 119
    },
    {
      "epoch": 0.3041825095057034,
      "grad_norm": 0.4005051255226135,
      "learning_rate": 0.00018839694656488552,
      "loss": 0.4433,
      "step": 120
    },
    {
      "epoch": 0.30671736375158426,
      "grad_norm": 0.4730987250804901,
      "learning_rate": 0.00018829516539440203,
      "loss": 0.3575,
      "step": 121
    },
    {
      "epoch": 0.30925221799746516,
      "grad_norm": 0.5227373242378235,
      "learning_rate": 0.0001881933842239186,
      "loss": 0.3511,
      "step": 122
    },
    {
      "epoch": 0.311787072243346,
      "grad_norm": 0.3693684935569763,
      "learning_rate": 0.0001880916030534351,
      "loss": 0.3097,
      "step": 123
    },
    {
      "epoch": 0.31432192648922685,
      "grad_norm": 0.45321500301361084,
      "learning_rate": 0.00018798982188295168,
      "loss": 0.4464,
      "step": 124
    },
    {
      "epoch": 0.31685678073510776,
      "grad_norm": 0.3797638714313507,
      "learning_rate": 0.0001878880407124682,
      "loss": 0.328,
      "step": 125
    },
    {
      "epoch": 0.3193916349809886,
      "grad_norm": 0.3996891975402832,
      "learning_rate": 0.00018778625954198475,
      "loss": 0.28,
      "step": 126
    },
    {
      "epoch": 0.32192648922686945,
      "grad_norm": 0.3931027352809906,
      "learning_rate": 0.00018768447837150127,
      "loss": 0.2439,
      "step": 127
    },
    {
      "epoch": 0.3244613434727503,
      "grad_norm": 0.4259742200374603,
      "learning_rate": 0.00018758269720101783,
      "loss": 0.3068,
      "step": 128
    },
    {
      "epoch": 0.3269961977186312,
      "grad_norm": 0.4267159402370453,
      "learning_rate": 0.00018748091603053437,
      "loss": 0.3405,
      "step": 129
    },
    {
      "epoch": 0.32953105196451205,
      "grad_norm": 0.41900908946990967,
      "learning_rate": 0.0001873791348600509,
      "loss": 0.327,
      "step": 130
    },
    {
      "epoch": 0.3320659062103929,
      "grad_norm": 0.436499685049057,
      "learning_rate": 0.00018727735368956745,
      "loss": 0.5089,
      "step": 131
    },
    {
      "epoch": 0.33460076045627374,
      "grad_norm": 0.43961402773857117,
      "learning_rate": 0.00018717557251908396,
      "loss": 0.339,
      "step": 132
    },
    {
      "epoch": 0.33713561470215464,
      "grad_norm": 0.45645856857299805,
      "learning_rate": 0.00018707379134860053,
      "loss": 0.3738,
      "step": 133
    },
    {
      "epoch": 0.3396704689480355,
      "grad_norm": 0.36948803067207336,
      "learning_rate": 0.00018697201017811704,
      "loss": 0.2777,
      "step": 134
    },
    {
      "epoch": 0.34220532319391633,
      "grad_norm": 0.32040536403656006,
      "learning_rate": 0.0001868702290076336,
      "loss": 0.3679,
      "step": 135
    },
    {
      "epoch": 0.34474017743979724,
      "grad_norm": 0.37474381923675537,
      "learning_rate": 0.00018676844783715012,
      "loss": 0.4282,
      "step": 136
    },
    {
      "epoch": 0.3472750316856781,
      "grad_norm": 0.4243752360343933,
      "learning_rate": 0.0001866666666666667,
      "loss": 0.533,
      "step": 137
    },
    {
      "epoch": 0.34980988593155893,
      "grad_norm": 0.39162227511405945,
      "learning_rate": 0.0001865648854961832,
      "loss": 0.2989,
      "step": 138
    },
    {
      "epoch": 0.3523447401774398,
      "grad_norm": 0.3585897386074066,
      "learning_rate": 0.00018646310432569977,
      "loss": 0.3368,
      "step": 139
    },
    {
      "epoch": 0.3548795944233207,
      "grad_norm": 0.39330482482910156,
      "learning_rate": 0.00018636132315521628,
      "loss": 0.4904,
      "step": 140
    },
    {
      "epoch": 0.3574144486692015,
      "grad_norm": 0.3404198884963989,
      "learning_rate": 0.00018625954198473284,
      "loss": 0.2684,
      "step": 141
    },
    {
      "epoch": 0.35994930291508237,
      "grad_norm": 0.34813976287841797,
      "learning_rate": 0.00018615776081424938,
      "loss": 0.2988,
      "step": 142
    },
    {
      "epoch": 0.36248415716096327,
      "grad_norm": 0.4100090265274048,
      "learning_rate": 0.00018605597964376592,
      "loss": 0.3325,
      "step": 143
    },
    {
      "epoch": 0.3650190114068441,
      "grad_norm": 0.2897261083126068,
      "learning_rate": 0.00018595419847328246,
      "loss": 0.2487,
      "step": 144
    },
    {
      "epoch": 0.36755386565272496,
      "grad_norm": 0.43023669719696045,
      "learning_rate": 0.00018585241730279897,
      "loss": 0.4875,
      "step": 145
    },
    {
      "epoch": 0.3700887198986058,
      "grad_norm": 0.39708128571510315,
      "learning_rate": 0.00018575063613231554,
      "loss": 0.3742,
      "step": 146
    },
    {
      "epoch": 0.3726235741444867,
      "grad_norm": 0.4191845953464508,
      "learning_rate": 0.00018564885496183205,
      "loss": 0.3253,
      "step": 147
    },
    {
      "epoch": 0.37515842839036756,
      "grad_norm": 0.3373403549194336,
      "learning_rate": 0.00018554707379134862,
      "loss": 0.2636,
      "step": 148
    },
    {
      "epoch": 0.3776932826362484,
      "grad_norm": 0.3522009551525116,
      "learning_rate": 0.00018544529262086513,
      "loss": 0.2413,
      "step": 149
    },
    {
      "epoch": 0.38022813688212925,
      "grad_norm": 0.4140997529029846,
      "learning_rate": 0.0001853435114503817,
      "loss": 0.3663,
      "step": 150
    },
    {
      "epoch": 0.38276299112801015,
      "grad_norm": 0.3986112177371979,
      "learning_rate": 0.0001852417302798982,
      "loss": 0.276,
      "step": 151
    },
    {
      "epoch": 0.385297845373891,
      "grad_norm": 0.46847087144851685,
      "learning_rate": 0.00018513994910941478,
      "loss": 0.3369,
      "step": 152
    },
    {
      "epoch": 0.38783269961977185,
      "grad_norm": 0.43623679876327515,
      "learning_rate": 0.00018503816793893132,
      "loss": 0.37,
      "step": 153
    },
    {
      "epoch": 0.39036755386565275,
      "grad_norm": 0.4128822684288025,
      "learning_rate": 0.00018493638676844785,
      "loss": 0.3763,
      "step": 154
    },
    {
      "epoch": 0.3929024081115336,
      "grad_norm": 0.3352810740470886,
      "learning_rate": 0.0001848346055979644,
      "loss": 0.2446,
      "step": 155
    },
    {
      "epoch": 0.39543726235741444,
      "grad_norm": 0.580634355545044,
      "learning_rate": 0.00018473282442748093,
      "loss": 0.3691,
      "step": 156
    },
    {
      "epoch": 0.3979721166032953,
      "grad_norm": 0.452499657869339,
      "learning_rate": 0.00018463104325699747,
      "loss": 0.4361,
      "step": 157
    },
    {
      "epoch": 0.4005069708491762,
      "grad_norm": 0.4160007834434509,
      "learning_rate": 0.000184529262086514,
      "loss": 0.4003,
      "step": 158
    },
    {
      "epoch": 0.40304182509505704,
      "grad_norm": 0.3049513101577759,
      "learning_rate": 0.00018442748091603055,
      "loss": 0.2167,
      "step": 159
    },
    {
      "epoch": 0.4055766793409379,
      "grad_norm": 0.38912078738212585,
      "learning_rate": 0.00018432569974554706,
      "loss": 0.2766,
      "step": 160
    },
    {
      "epoch": 0.40811153358681873,
      "grad_norm": 0.4433249831199646,
      "learning_rate": 0.00018422391857506363,
      "loss": 0.3331,
      "step": 161
    },
    {
      "epoch": 0.41064638783269963,
      "grad_norm": 0.36410561203956604,
      "learning_rate": 0.00018412213740458014,
      "loss": 0.2719,
      "step": 162
    },
    {
      "epoch": 0.4131812420785805,
      "grad_norm": 0.47044846415519714,
      "learning_rate": 0.0001840203562340967,
      "loss": 0.3602,
      "step": 163
    },
    {
      "epoch": 0.4157160963244613,
      "grad_norm": 0.38755008578300476,
      "learning_rate": 0.00018391857506361322,
      "loss": 0.2815,
      "step": 164
    },
    {
      "epoch": 0.41825095057034223,
      "grad_norm": 0.39241930842399597,
      "learning_rate": 0.0001838167938931298,
      "loss": 0.3642,
      "step": 165
    },
    {
      "epoch": 0.4207858048162231,
      "grad_norm": 0.37138187885284424,
      "learning_rate": 0.00018371501272264633,
      "loss": 0.267,
      "step": 166
    },
    {
      "epoch": 0.4233206590621039,
      "grad_norm": 0.4508083462715149,
      "learning_rate": 0.00018361323155216287,
      "loss": 0.4093,
      "step": 167
    },
    {
      "epoch": 0.42585551330798477,
      "grad_norm": 0.4390806257724762,
      "learning_rate": 0.0001835114503816794,
      "loss": 0.424,
      "step": 168
    },
    {
      "epoch": 0.42839036755386567,
      "grad_norm": 0.4640062153339386,
      "learning_rate": 0.00018340966921119594,
      "loss": 0.4065,
      "step": 169
    },
    {
      "epoch": 0.4309252217997465,
      "grad_norm": 0.37822040915489197,
      "learning_rate": 0.00018330788804071248,
      "loss": 0.2854,
      "step": 170
    },
    {
      "epoch": 0.43346007604562736,
      "grad_norm": 0.3658731281757355,
      "learning_rate": 0.00018320610687022902,
      "loss": 0.2826,
      "step": 171
    },
    {
      "epoch": 0.43599493029150826,
      "grad_norm": 0.4271928369998932,
      "learning_rate": 0.00018310432569974556,
      "loss": 0.4538,
      "step": 172
    },
    {
      "epoch": 0.4385297845373891,
      "grad_norm": 0.33550775051116943,
      "learning_rate": 0.00018300254452926207,
      "loss": 0.3015,
      "step": 173
    },
    {
      "epoch": 0.44106463878326996,
      "grad_norm": 0.5374005436897278,
      "learning_rate": 0.00018290076335877864,
      "loss": 0.2771,
      "step": 174
    },
    {
      "epoch": 0.4435994930291508,
      "grad_norm": 0.4630737602710724,
      "learning_rate": 0.00018279898218829515,
      "loss": 0.3786,
      "step": 175
    },
    {
      "epoch": 0.4461343472750317,
      "grad_norm": 0.4163656234741211,
      "learning_rate": 0.00018269720101781172,
      "loss": 0.3224,
      "step": 176
    },
    {
      "epoch": 0.44866920152091255,
      "grad_norm": 0.43972182273864746,
      "learning_rate": 0.00018259541984732826,
      "loss": 0.4192,
      "step": 177
    },
    {
      "epoch": 0.4512040557667934,
      "grad_norm": 0.4114130437374115,
      "learning_rate": 0.0001824936386768448,
      "loss": 0.2979,
      "step": 178
    },
    {
      "epoch": 0.45373891001267425,
      "grad_norm": 0.5002878308296204,
      "learning_rate": 0.00018239185750636134,
      "loss": 0.3339,
      "step": 179
    },
    {
      "epoch": 0.45627376425855515,
      "grad_norm": 0.42383208870887756,
      "learning_rate": 0.00018229007633587788,
      "loss": 0.2958,
      "step": 180
    },
    {
      "epoch": 0.458808618504436,
      "grad_norm": 0.3234981894493103,
      "learning_rate": 0.00018218829516539442,
      "loss": 0.2215,
      "step": 181
    },
    {
      "epoch": 0.46134347275031684,
      "grad_norm": 0.33356910943984985,
      "learning_rate": 0.00018208651399491096,
      "loss": 0.3017,
      "step": 182
    },
    {
      "epoch": 0.46387832699619774,
      "grad_norm": 0.442376047372818,
      "learning_rate": 0.0001819847328244275,
      "loss": 0.2751,
      "step": 183
    },
    {
      "epoch": 0.4664131812420786,
      "grad_norm": 0.4563845992088318,
      "learning_rate": 0.00018188295165394403,
      "loss": 0.3001,
      "step": 184
    },
    {
      "epoch": 0.46894803548795944,
      "grad_norm": 0.3957296907901764,
      "learning_rate": 0.00018178117048346057,
      "loss": 0.3864,
      "step": 185
    },
    {
      "epoch": 0.4714828897338403,
      "grad_norm": 0.32932132482528687,
      "learning_rate": 0.0001816793893129771,
      "loss": 0.2528,
      "step": 186
    },
    {
      "epoch": 0.4740177439797212,
      "grad_norm": 0.3960365951061249,
      "learning_rate": 0.00018157760814249365,
      "loss": 0.3975,
      "step": 187
    },
    {
      "epoch": 0.47655259822560203,
      "grad_norm": 0.38450995087623596,
      "learning_rate": 0.00018147582697201016,
      "loss": 0.2552,
      "step": 188
    },
    {
      "epoch": 0.4790874524714829,
      "grad_norm": 0.4259994626045227,
      "learning_rate": 0.00018137404580152673,
      "loss": 0.3,
      "step": 189
    },
    {
      "epoch": 0.4816223067173637,
      "grad_norm": 0.4965859055519104,
      "learning_rate": 0.00018127226463104327,
      "loss": 0.3099,
      "step": 190
    },
    {
      "epoch": 0.4841571609632446,
      "grad_norm": 0.38229548931121826,
      "learning_rate": 0.0001811704834605598,
      "loss": 0.3799,
      "step": 191
    },
    {
      "epoch": 0.4866920152091255,
      "grad_norm": 0.4622017741203308,
      "learning_rate": 0.00018106870229007635,
      "loss": 0.4815,
      "step": 192
    },
    {
      "epoch": 0.4892268694550063,
      "grad_norm": 0.3207991123199463,
      "learning_rate": 0.0001809669211195929,
      "loss": 0.2534,
      "step": 193
    },
    {
      "epoch": 0.4917617237008872,
      "grad_norm": 0.3322354555130005,
      "learning_rate": 0.00018086513994910943,
      "loss": 0.2331,
      "step": 194
    },
    {
      "epoch": 0.49429657794676807,
      "grad_norm": 0.35752132534980774,
      "learning_rate": 0.00018076335877862597,
      "loss": 0.3621,
      "step": 195
    },
    {
      "epoch": 0.4968314321926489,
      "grad_norm": 0.2801353633403778,
      "learning_rate": 0.0001806615776081425,
      "loss": 0.2198,
      "step": 196
    },
    {
      "epoch": 0.49936628643852976,
      "grad_norm": 0.5065000057220459,
      "learning_rate": 0.00018055979643765905,
      "loss": 0.3806,
      "step": 197
    },
    {
      "epoch": 0.5019011406844106,
      "grad_norm": 0.4308508336544037,
      "learning_rate": 0.00018045801526717558,
      "loss": 0.4028,
      "step": 198
    },
    {
      "epoch": 0.5044359949302915,
      "grad_norm": 0.5432320833206177,
      "learning_rate": 0.00018035623409669212,
      "loss": 0.506,
      "step": 199
    },
    {
      "epoch": 0.5069708491761724,
      "grad_norm": 0.37079155445098877,
      "learning_rate": 0.00018025445292620866,
      "loss": 0.2242,
      "step": 200
    },
    {
      "epoch": 0.5095057034220533,
      "grad_norm": 0.3533012568950653,
      "learning_rate": 0.00018015267175572518,
      "loss": 0.3462,
      "step": 201
    },
    {
      "epoch": 0.5120405576679341,
      "grad_norm": 0.37727662920951843,
      "learning_rate": 0.00018005089058524174,
      "loss": 0.2421,
      "step": 202
    },
    {
      "epoch": 0.514575411913815,
      "grad_norm": 0.42737269401550293,
      "learning_rate": 0.00017994910941475828,
      "loss": 0.3338,
      "step": 203
    },
    {
      "epoch": 0.5171102661596958,
      "grad_norm": 0.41085687279701233,
      "learning_rate": 0.00017984732824427482,
      "loss": 0.4233,
      "step": 204
    },
    {
      "epoch": 0.5196451204055766,
      "grad_norm": 0.4871644675731659,
      "learning_rate": 0.00017974554707379136,
      "loss": 0.3504,
      "step": 205
    },
    {
      "epoch": 0.5221799746514575,
      "grad_norm": 0.308347225189209,
      "learning_rate": 0.0001796437659033079,
      "loss": 0.27,
      "step": 206
    },
    {
      "epoch": 0.5247148288973384,
      "grad_norm": 0.31587716937065125,
      "learning_rate": 0.00017954198473282444,
      "loss": 0.3161,
      "step": 207
    },
    {
      "epoch": 0.5272496831432193,
      "grad_norm": 0.471392959356308,
      "learning_rate": 0.00017944020356234098,
      "loss": 0.3758,
      "step": 208
    },
    {
      "epoch": 0.5297845373891001,
      "grad_norm": 0.33414778113365173,
      "learning_rate": 0.00017933842239185752,
      "loss": 0.3095,
      "step": 209
    },
    {
      "epoch": 0.532319391634981,
      "grad_norm": 0.26553916931152344,
      "learning_rate": 0.00017923664122137406,
      "loss": 0.232,
      "step": 210
    },
    {
      "epoch": 0.5348542458808618,
      "grad_norm": 0.27914223074913025,
      "learning_rate": 0.0001791348600508906,
      "loss": 0.2438,
      "step": 211
    },
    {
      "epoch": 0.5373891001267427,
      "grad_norm": 0.36625003814697266,
      "learning_rate": 0.00017903307888040713,
      "loss": 0.2479,
      "step": 212
    },
    {
      "epoch": 0.5399239543726235,
      "grad_norm": 0.3876325488090515,
      "learning_rate": 0.00017893129770992367,
      "loss": 0.3428,
      "step": 213
    },
    {
      "epoch": 0.5424588086185045,
      "grad_norm": 0.5402606129646301,
      "learning_rate": 0.0001788295165394402,
      "loss": 0.394,
      "step": 214
    },
    {
      "epoch": 0.5449936628643853,
      "grad_norm": 0.4023256301879883,
      "learning_rate": 0.00017872773536895675,
      "loss": 0.3348,
      "step": 215
    },
    {
      "epoch": 0.5475285171102662,
      "grad_norm": 0.4440263509750366,
      "learning_rate": 0.0001786259541984733,
      "loss": 0.3001,
      "step": 216
    },
    {
      "epoch": 0.550063371356147,
      "grad_norm": 0.39178457856178284,
      "learning_rate": 0.00017852417302798983,
      "loss": 0.2561,
      "step": 217
    },
    {
      "epoch": 0.5525982256020279,
      "grad_norm": 0.5261508226394653,
      "learning_rate": 0.00017842239185750637,
      "loss": 0.4583,
      "step": 218
    },
    {
      "epoch": 0.5551330798479087,
      "grad_norm": 0.3981377184391022,
      "learning_rate": 0.0001783206106870229,
      "loss": 0.265,
      "step": 219
    },
    {
      "epoch": 0.5576679340937896,
      "grad_norm": 0.3689790666103363,
      "learning_rate": 0.00017821882951653945,
      "loss": 0.3965,
      "step": 220
    },
    {
      "epoch": 0.5602027883396705,
      "grad_norm": 0.38442498445510864,
      "learning_rate": 0.000178117048346056,
      "loss": 0.268,
      "step": 221
    },
    {
      "epoch": 0.5627376425855514,
      "grad_norm": 0.3051845133304596,
      "learning_rate": 0.00017801526717557253,
      "loss": 0.2362,
      "step": 222
    },
    {
      "epoch": 0.5652724968314322,
      "grad_norm": 0.41551336646080017,
      "learning_rate": 0.00017791348600508907,
      "loss": 0.3428,
      "step": 223
    },
    {
      "epoch": 0.5678073510773131,
      "grad_norm": 0.2885109484195709,
      "learning_rate": 0.0001778117048346056,
      "loss": 0.2328,
      "step": 224
    },
    {
      "epoch": 0.5703422053231939,
      "grad_norm": 0.48813045024871826,
      "learning_rate": 0.00017770992366412215,
      "loss": 0.3502,
      "step": 225
    },
    {
      "epoch": 0.5728770595690748,
      "grad_norm": 0.4413661062717438,
      "learning_rate": 0.00017760814249363869,
      "loss": 0.2687,
      "step": 226
    },
    {
      "epoch": 0.5754119138149556,
      "grad_norm": 0.422799289226532,
      "learning_rate": 0.00017750636132315522,
      "loss": 0.4776,
      "step": 227
    },
    {
      "epoch": 0.5779467680608364,
      "grad_norm": 0.39486098289489746,
      "learning_rate": 0.00017740458015267176,
      "loss": 0.3551,
      "step": 228
    },
    {
      "epoch": 0.5804816223067174,
      "grad_norm": 0.366207480430603,
      "learning_rate": 0.0001773027989821883,
      "loss": 0.2639,
      "step": 229
    },
    {
      "epoch": 0.5830164765525983,
      "grad_norm": 0.334626704454422,
      "learning_rate": 0.00017720101781170484,
      "loss": 0.2407,
      "step": 230
    },
    {
      "epoch": 0.5855513307984791,
      "grad_norm": 0.5580838918685913,
      "learning_rate": 0.00017709923664122138,
      "loss": 0.3856,
      "step": 231
    },
    {
      "epoch": 0.5880861850443599,
      "grad_norm": 0.3495747148990631,
      "learning_rate": 0.00017699745547073792,
      "loss": 0.3113,
      "step": 232
    },
    {
      "epoch": 0.5906210392902408,
      "grad_norm": 0.38515543937683105,
      "learning_rate": 0.00017689567430025446,
      "loss": 0.3765,
      "step": 233
    },
    {
      "epoch": 0.5931558935361216,
      "grad_norm": 0.43240851163864136,
      "learning_rate": 0.000176793893129771,
      "loss": 0.3094,
      "step": 234
    },
    {
      "epoch": 0.5956907477820025,
      "grad_norm": 0.42353445291519165,
      "learning_rate": 0.00017669211195928754,
      "loss": 0.2992,
      "step": 235
    },
    {
      "epoch": 0.5982256020278834,
      "grad_norm": 0.42463192343711853,
      "learning_rate": 0.00017659033078880408,
      "loss": 0.2486,
      "step": 236
    },
    {
      "epoch": 0.6007604562737643,
      "grad_norm": 0.4749039113521576,
      "learning_rate": 0.00017648854961832062,
      "loss": 0.3742,
      "step": 237
    },
    {
      "epoch": 0.6032953105196451,
      "grad_norm": 0.5651363730430603,
      "learning_rate": 0.00017638676844783716,
      "loss": 0.3079,
      "step": 238
    },
    {
      "epoch": 0.605830164765526,
      "grad_norm": 0.34195011854171753,
      "learning_rate": 0.0001762849872773537,
      "loss": 0.3236,
      "step": 239
    },
    {
      "epoch": 0.6083650190114068,
      "grad_norm": 0.5522583723068237,
      "learning_rate": 0.00017618320610687024,
      "loss": 0.3026,
      "step": 240
    },
    {
      "epoch": 0.6108998732572877,
      "grad_norm": 0.41445448994636536,
      "learning_rate": 0.00017608142493638677,
      "loss": 0.32,
      "step": 241
    },
    {
      "epoch": 0.6134347275031685,
      "grad_norm": 0.5023159384727478,
      "learning_rate": 0.00017597964376590331,
      "loss": 0.2658,
      "step": 242
    },
    {
      "epoch": 0.6159695817490495,
      "grad_norm": 0.39539164304733276,
      "learning_rate": 0.00017587786259541985,
      "loss": 0.2687,
      "step": 243
    },
    {
      "epoch": 0.6185044359949303,
      "grad_norm": 0.3105890154838562,
      "learning_rate": 0.0001757760814249364,
      "loss": 0.2224,
      "step": 244
    },
    {
      "epoch": 0.6210392902408112,
      "grad_norm": 0.3665928840637207,
      "learning_rate": 0.00017567430025445293,
      "loss": 0.3101,
      "step": 245
    },
    {
      "epoch": 0.623574144486692,
      "grad_norm": 0.28569111227989197,
      "learning_rate": 0.00017557251908396947,
      "loss": 0.2316,
      "step": 246
    },
    {
      "epoch": 0.6261089987325729,
      "grad_norm": 0.24598725140094757,
      "learning_rate": 0.000175470737913486,
      "loss": 0.2314,
      "step": 247
    },
    {
      "epoch": 0.6286438529784537,
      "grad_norm": 0.4301004111766815,
      "learning_rate": 0.00017536895674300255,
      "loss": 0.2606,
      "step": 248
    },
    {
      "epoch": 0.6311787072243346,
      "grad_norm": 0.36598455905914307,
      "learning_rate": 0.0001752671755725191,
      "loss": 0.2243,
      "step": 249
    },
    {
      "epoch": 0.6337135614702155,
      "grad_norm": 0.31714677810668945,
      "learning_rate": 0.00017516539440203563,
      "loss": 0.2561,
      "step": 250
    },
    {
      "epoch": 0.6362484157160964,
      "grad_norm": 0.5131182670593262,
      "learning_rate": 0.0001750636132315522,
      "loss": 0.3216,
      "step": 251
    },
    {
      "epoch": 0.6387832699619772,
      "grad_norm": 0.4067549407482147,
      "learning_rate": 0.0001749618320610687,
      "loss": 0.3032,
      "step": 252
    },
    {
      "epoch": 0.641318124207858,
      "grad_norm": 0.6457440853118896,
      "learning_rate": 0.00017486005089058525,
      "loss": 0.349,
      "step": 253
    },
    {
      "epoch": 0.6438529784537389,
      "grad_norm": 0.3759848177433014,
      "learning_rate": 0.00017475826972010179,
      "loss": 0.2974,
      "step": 254
    },
    {
      "epoch": 0.6463878326996197,
      "grad_norm": 0.40348076820373535,
      "learning_rate": 0.00017465648854961833,
      "loss": 0.2781,
      "step": 255
    },
    {
      "epoch": 0.6489226869455006,
      "grad_norm": 0.2639053463935852,
      "learning_rate": 0.00017455470737913486,
      "loss": 0.2413,
      "step": 256
    },
    {
      "epoch": 0.6514575411913816,
      "grad_norm": 0.4014027416706085,
      "learning_rate": 0.0001744529262086514,
      "loss": 0.2878,
      "step": 257
    },
    {
      "epoch": 0.6539923954372624,
      "grad_norm": 0.4871384799480438,
      "learning_rate": 0.00017435114503816794,
      "loss": 0.2527,
      "step": 258
    },
    {
      "epoch": 0.6565272496831432,
      "grad_norm": 0.28687578439712524,
      "learning_rate": 0.00017424936386768448,
      "loss": 0.2233,
      "step": 259
    },
    {
      "epoch": 0.6590621039290241,
      "grad_norm": 0.36948761343955994,
      "learning_rate": 0.00017414758269720102,
      "loss": 0.3007,
      "step": 260
    },
    {
      "epoch": 0.6615969581749049,
      "grad_norm": 0.6034134030342102,
      "learning_rate": 0.00017404580152671756,
      "loss": 0.3054,
      "step": 261
    },
    {
      "epoch": 0.6641318124207858,
      "grad_norm": 0.3481515645980835,
      "learning_rate": 0.0001739440203562341,
      "loss": 0.2388,
      "step": 262
    },
    {
      "epoch": 0.6666666666666666,
      "grad_norm": 0.3772611916065216,
      "learning_rate": 0.00017384223918575064,
      "loss": 0.317,
      "step": 263
    },
    {
      "epoch": 0.6692015209125475,
      "grad_norm": 0.4693986177444458,
      "learning_rate": 0.0001737404580152672,
      "loss": 0.3441,
      "step": 264
    },
    {
      "epoch": 0.6717363751584284,
      "grad_norm": 0.38484400510787964,
      "learning_rate": 0.00017363867684478372,
      "loss": 0.2637,
      "step": 265
    },
    {
      "epoch": 0.6742712294043093,
      "grad_norm": 0.3638555407524109,
      "learning_rate": 0.00017353689567430026,
      "loss": 0.2695,
      "step": 266
    },
    {
      "epoch": 0.6768060836501901,
      "grad_norm": 0.36848586797714233,
      "learning_rate": 0.0001734351145038168,
      "loss": 0.3149,
      "step": 267
    },
    {
      "epoch": 0.679340937896071,
      "grad_norm": 0.31740638613700867,
      "learning_rate": 0.00017333333333333334,
      "loss": 0.3049,
      "step": 268
    },
    {
      "epoch": 0.6818757921419518,
      "grad_norm": 0.41415438055992126,
      "learning_rate": 0.00017323155216284988,
      "loss": 0.231,
      "step": 269
    },
    {
      "epoch": 0.6844106463878327,
      "grad_norm": 0.41449829936027527,
      "learning_rate": 0.00017312977099236641,
      "loss": 0.3344,
      "step": 270
    },
    {
      "epoch": 0.6869455006337135,
      "grad_norm": 0.30683189630508423,
      "learning_rate": 0.00017302798982188295,
      "loss": 0.283,
      "step": 271
    },
    {
      "epoch": 0.6894803548795945,
      "grad_norm": 0.29896244406700134,
      "learning_rate": 0.0001729262086513995,
      "loss": 0.2363,
      "step": 272
    },
    {
      "epoch": 0.6920152091254753,
      "grad_norm": 0.44181492924690247,
      "learning_rate": 0.00017282442748091603,
      "loss": 0.3439,
      "step": 273
    },
    {
      "epoch": 0.6945500633713562,
      "grad_norm": 0.43460434675216675,
      "learning_rate": 0.00017272264631043257,
      "loss": 0.3004,
      "step": 274
    },
    {
      "epoch": 0.697084917617237,
      "grad_norm": 0.40781405568122864,
      "learning_rate": 0.00017262086513994914,
      "loss": 0.2554,
      "step": 275
    },
    {
      "epoch": 0.6996197718631179,
      "grad_norm": 0.39359861612319946,
      "learning_rate": 0.00017251908396946565,
      "loss": 0.3094,
      "step": 276
    },
    {
      "epoch": 0.7021546261089987,
      "grad_norm": 0.4507496953010559,
      "learning_rate": 0.00017241730279898222,
      "loss": 0.2985,
      "step": 277
    },
    {
      "epoch": 0.7046894803548795,
      "grad_norm": 0.4513093829154968,
      "learning_rate": 0.00017231552162849873,
      "loss": 0.4,
      "step": 278
    },
    {
      "epoch": 0.7072243346007605,
      "grad_norm": 0.3133571147918701,
      "learning_rate": 0.0001722137404580153,
      "loss": 0.2241,
      "step": 279
    },
    {
      "epoch": 0.7097591888466414,
      "grad_norm": 0.36957162618637085,
      "learning_rate": 0.0001721119592875318,
      "loss": 0.2461,
      "step": 280
    },
    {
      "epoch": 0.7122940430925222,
      "grad_norm": 0.4224545955657959,
      "learning_rate": 0.00017201017811704835,
      "loss": 0.3178,
      "step": 281
    },
    {
      "epoch": 0.714828897338403,
      "grad_norm": 0.4696861207485199,
      "learning_rate": 0.0001719083969465649,
      "loss": 0.3911,
      "step": 282
    },
    {
      "epoch": 0.7173637515842839,
      "grad_norm": 0.44058746099472046,
      "learning_rate": 0.00017180661577608143,
      "loss": 0.3169,
      "step": 283
    },
    {
      "epoch": 0.7198986058301647,
      "grad_norm": 0.32616788148880005,
      "learning_rate": 0.00017170483460559797,
      "loss": 0.2441,
      "step": 284
    },
    {
      "epoch": 0.7224334600760456,
      "grad_norm": 0.3941279649734497,
      "learning_rate": 0.0001716030534351145,
      "loss": 0.3433,
      "step": 285
    },
    {
      "epoch": 0.7249683143219265,
      "grad_norm": 0.3746216297149658,
      "learning_rate": 0.00017150127226463104,
      "loss": 0.3993,
      "step": 286
    },
    {
      "epoch": 0.7275031685678074,
      "grad_norm": 0.3758716881275177,
      "learning_rate": 0.00017139949109414758,
      "loss": 0.3139,
      "step": 287
    },
    {
      "epoch": 0.7300380228136882,
      "grad_norm": 0.35631927847862244,
      "learning_rate": 0.00017129770992366415,
      "loss": 0.2316,
      "step": 288
    },
    {
      "epoch": 0.7325728770595691,
      "grad_norm": 0.48128026723861694,
      "learning_rate": 0.00017119592875318066,
      "loss": 0.3306,
      "step": 289
    },
    {
      "epoch": 0.7351077313054499,
      "grad_norm": 0.3464122414588928,
      "learning_rate": 0.00017109414758269723,
      "loss": 0.3148,
      "step": 290
    },
    {
      "epoch": 0.7376425855513308,
      "grad_norm": 0.3772057294845581,
      "learning_rate": 0.00017099236641221374,
      "loss": 0.274,
      "step": 291
    },
    {
      "epoch": 0.7401774397972116,
      "grad_norm": 0.2896706759929657,
      "learning_rate": 0.0001708905852417303,
      "loss": 0.2275,
      "step": 292
    },
    {
      "epoch": 0.7427122940430925,
      "grad_norm": 0.48482832312583923,
      "learning_rate": 0.00017078880407124682,
      "loss": 0.2913,
      "step": 293
    },
    {
      "epoch": 0.7452471482889734,
      "grad_norm": 0.3086034655570984,
      "learning_rate": 0.00017068702290076336,
      "loss": 0.2453,
      "step": 294
    },
    {
      "epoch": 0.7477820025348543,
      "grad_norm": 0.42840075492858887,
      "learning_rate": 0.0001705852417302799,
      "loss": 0.352,
      "step": 295
    },
    {
      "epoch": 0.7503168567807351,
      "grad_norm": 0.4574609398841858,
      "learning_rate": 0.00017048346055979644,
      "loss": 0.3698,
      "step": 296
    },
    {
      "epoch": 0.752851711026616,
      "grad_norm": 0.4295889735221863,
      "learning_rate": 0.00017038167938931298,
      "loss": 0.3341,
      "step": 297
    },
    {
      "epoch": 0.7553865652724968,
      "grad_norm": 0.46036672592163086,
      "learning_rate": 0.00017027989821882952,
      "loss": 0.3175,
      "step": 298
    },
    {
      "epoch": 0.7579214195183777,
      "grad_norm": 0.45897790789604187,
      "learning_rate": 0.00017017811704834608,
      "loss": 0.31,
      "step": 299
    },
    {
      "epoch": 0.7604562737642585,
      "grad_norm": 0.2966432273387909,
      "learning_rate": 0.0001700763358778626,
      "loss": 0.2439,
      "step": 300
    },
    {
      "epoch": 0.7629911280101395,
      "grad_norm": 0.32714638113975525,
      "learning_rate": 0.00016997455470737916,
      "loss": 0.2653,
      "step": 301
    },
    {
      "epoch": 0.7655259822560203,
      "grad_norm": 0.32264646887779236,
      "learning_rate": 0.00016987277353689567,
      "loss": 0.2728,
      "step": 302
    },
    {
      "epoch": 0.7680608365019012,
      "grad_norm": 0.4073767066001892,
      "learning_rate": 0.00016977099236641224,
      "loss": 0.3501,
      "step": 303
    },
    {
      "epoch": 0.770595690747782,
      "grad_norm": 0.5493949055671692,
      "learning_rate": 0.00016966921119592875,
      "loss": 0.3212,
      "step": 304
    },
    {
      "epoch": 0.7731305449936628,
      "grad_norm": 0.335705429315567,
      "learning_rate": 0.00016956743002544532,
      "loss": 0.299,
      "step": 305
    },
    {
      "epoch": 0.7756653992395437,
      "grad_norm": 0.32758405804634094,
      "learning_rate": 0.00016946564885496183,
      "loss": 0.2547,
      "step": 306
    },
    {
      "epoch": 0.7782002534854245,
      "grad_norm": 0.32411983609199524,
      "learning_rate": 0.0001693638676844784,
      "loss": 0.2593,
      "step": 307
    },
    {
      "epoch": 0.7807351077313055,
      "grad_norm": 0.5713444352149963,
      "learning_rate": 0.0001692620865139949,
      "loss": 0.3661,
      "step": 308
    },
    {
      "epoch": 0.7832699619771863,
      "grad_norm": 0.3287065327167511,
      "learning_rate": 0.00016916030534351145,
      "loss": 0.2559,
      "step": 309
    },
    {
      "epoch": 0.7858048162230672,
      "grad_norm": 0.3499440550804138,
      "learning_rate": 0.000169058524173028,
      "loss": 0.3489,
      "step": 310
    },
    {
      "epoch": 0.788339670468948,
      "grad_norm": 0.259787917137146,
      "learning_rate": 0.00016895674300254453,
      "loss": 0.2451,
      "step": 311
    },
    {
      "epoch": 0.7908745247148289,
      "grad_norm": 0.3902716338634491,
      "learning_rate": 0.0001688549618320611,
      "loss": 0.2821,
      "step": 312
    },
    {
      "epoch": 0.7934093789607097,
      "grad_norm": 0.4061296582221985,
      "learning_rate": 0.0001687531806615776,
      "loss": 0.4289,
      "step": 313
    },
    {
      "epoch": 0.7959442332065906,
      "grad_norm": 0.3062605857849121,
      "learning_rate": 0.00016865139949109417,
      "loss": 0.2489,
      "step": 314
    },
    {
      "epoch": 0.7984790874524715,
      "grad_norm": 0.36886945366859436,
      "learning_rate": 0.00016854961832061068,
      "loss": 0.4049,
      "step": 315
    },
    {
      "epoch": 0.8010139416983524,
      "grad_norm": 0.25828975439071655,
      "learning_rate": 0.00016844783715012725,
      "loss": 0.238,
      "step": 316
    },
    {
      "epoch": 0.8035487959442332,
      "grad_norm": 0.39747142791748047,
      "learning_rate": 0.00016834605597964376,
      "loss": 0.3928,
      "step": 317
    },
    {
      "epoch": 0.8060836501901141,
      "grad_norm": 0.3884779214859009,
      "learning_rate": 0.00016824427480916033,
      "loss": 0.2881,
      "step": 318
    },
    {
      "epoch": 0.8086185044359949,
      "grad_norm": 0.3687349855899811,
      "learning_rate": 0.00016814249363867684,
      "loss": 0.3662,
      "step": 319
    },
    {
      "epoch": 0.8111533586818758,
      "grad_norm": 0.3631541132926941,
      "learning_rate": 0.0001680407124681934,
      "loss": 0.2657,
      "step": 320
    },
    {
      "epoch": 0.8136882129277566,
      "grad_norm": 0.3174535930156708,
      "learning_rate": 0.00016793893129770992,
      "loss": 0.2636,
      "step": 321
    },
    {
      "epoch": 0.8162230671736375,
      "grad_norm": 0.44168904423713684,
      "learning_rate": 0.00016783715012722646,
      "loss": 0.2882,
      "step": 322
    },
    {
      "epoch": 0.8187579214195184,
      "grad_norm": 0.370685875415802,
      "learning_rate": 0.000167735368956743,
      "loss": 0.3228,
      "step": 323
    },
    {
      "epoch": 0.8212927756653993,
      "grad_norm": 0.3001299798488617,
      "learning_rate": 0.00016763358778625954,
      "loss": 0.2256,
      "step": 324
    },
    {
      "epoch": 0.8238276299112801,
      "grad_norm": 0.37992653250694275,
      "learning_rate": 0.0001675318066157761,
      "loss": 0.2633,
      "step": 325
    },
    {
      "epoch": 0.826362484157161,
      "grad_norm": 0.4739125072956085,
      "learning_rate": 0.00016743002544529262,
      "loss": 0.3044,
      "step": 326
    },
    {
      "epoch": 0.8288973384030418,
      "grad_norm": 0.36424344778060913,
      "learning_rate": 0.00016732824427480918,
      "loss": 0.3311,
      "step": 327
    },
    {
      "epoch": 0.8314321926489227,
      "grad_norm": 0.4474777579307556,
      "learning_rate": 0.0001672264631043257,
      "loss": 0.4099,
      "step": 328
    },
    {
      "epoch": 0.8339670468948035,
      "grad_norm": 0.4337301552295685,
      "learning_rate": 0.00016712468193384226,
      "loss": 0.3567,
      "step": 329
    },
    {
      "epoch": 0.8365019011406845,
      "grad_norm": 0.37666353583335876,
      "learning_rate": 0.00016702290076335877,
      "loss": 0.3079,
      "step": 330
    },
    {
      "epoch": 0.8390367553865653,
      "grad_norm": 0.36810433864593506,
      "learning_rate": 0.00016692111959287534,
      "loss": 0.414,
      "step": 331
    },
    {
      "epoch": 0.8415716096324461,
      "grad_norm": 0.3914581537246704,
      "learning_rate": 0.00016681933842239185,
      "loss": 0.2807,
      "step": 332
    },
    {
      "epoch": 0.844106463878327,
      "grad_norm": 0.3891938626766205,
      "learning_rate": 0.00016671755725190842,
      "loss": 0.3101,
      "step": 333
    },
    {
      "epoch": 0.8466413181242078,
      "grad_norm": 0.4397302269935608,
      "learning_rate": 0.00016661577608142493,
      "loss": 0.2659,
      "step": 334
    },
    {
      "epoch": 0.8491761723700887,
      "grad_norm": 0.3152853846549988,
      "learning_rate": 0.0001665139949109415,
      "loss": 0.308,
      "step": 335
    },
    {
      "epoch": 0.8517110266159695,
      "grad_norm": 0.2894272208213806,
      "learning_rate": 0.00016641221374045804,
      "loss": 0.2675,
      "step": 336
    },
    {
      "epoch": 0.8542458808618505,
      "grad_norm": 0.27995947003364563,
      "learning_rate": 0.00016631043256997455,
      "loss": 0.2603,
      "step": 337
    },
    {
      "epoch": 0.8567807351077313,
      "grad_norm": 0.42209070920944214,
      "learning_rate": 0.00016620865139949112,
      "loss": 0.3417,
      "step": 338
    },
    {
      "epoch": 0.8593155893536122,
      "grad_norm": 0.3781871795654297,
      "learning_rate": 0.00016610687022900763,
      "loss": 0.3441,
      "step": 339
    },
    {
      "epoch": 0.861850443599493,
      "grad_norm": 0.3438952565193176,
      "learning_rate": 0.0001660050890585242,
      "loss": 0.2249,
      "step": 340
    },
    {
      "epoch": 0.8643852978453739,
      "grad_norm": 0.32164961099624634,
      "learning_rate": 0.0001659033078880407,
      "loss": 0.2472,
      "step": 341
    },
    {
      "epoch": 0.8669201520912547,
      "grad_norm": 0.3517252504825592,
      "learning_rate": 0.00016580152671755727,
      "loss": 0.2434,
      "step": 342
    },
    {
      "epoch": 0.8694550063371356,
      "grad_norm": 0.29841092228889465,
      "learning_rate": 0.00016569974554707378,
      "loss": 0.2536,
      "step": 343
    },
    {
      "epoch": 0.8719898605830165,
      "grad_norm": 0.3351423144340515,
      "learning_rate": 0.00016559796437659035,
      "loss": 0.2501,
      "step": 344
    },
    {
      "epoch": 0.8745247148288974,
      "grad_norm": 0.3979301154613495,
      "learning_rate": 0.00016549618320610686,
      "loss": 0.2358,
      "step": 345
    },
    {
      "epoch": 0.8770595690747782,
      "grad_norm": 0.3859489858150482,
      "learning_rate": 0.00016539440203562343,
      "loss": 0.2675,
      "step": 346
    },
    {
      "epoch": 0.8795944233206591,
      "grad_norm": 0.3836475908756256,
      "learning_rate": 0.00016529262086513994,
      "loss": 0.2179,
      "step": 347
    },
    {
      "epoch": 0.8821292775665399,
      "grad_norm": 0.3986142575740814,
      "learning_rate": 0.0001651908396946565,
      "loss": 0.2599,
      "step": 348
    },
    {
      "epoch": 0.8846641318124208,
      "grad_norm": 0.4105628430843353,
      "learning_rate": 0.00016508905852417305,
      "loss": 0.242,
      "step": 349
    },
    {
      "epoch": 0.8871989860583016,
      "grad_norm": 0.34334608912467957,
      "learning_rate": 0.00016498727735368956,
      "loss": 0.2771,
      "step": 350
    },
    {
      "epoch": 0.8897338403041825,
      "grad_norm": 0.3412443995475769,
      "learning_rate": 0.00016488549618320613,
      "loss": 0.2289,
      "step": 351
    },
    {
      "epoch": 0.8922686945500634,
      "grad_norm": 0.3596668541431427,
      "learning_rate": 0.00016478371501272264,
      "loss": 0.2253,
      "step": 352
    },
    {
      "epoch": 0.8948035487959443,
      "grad_norm": 0.43112802505493164,
      "learning_rate": 0.0001646819338422392,
      "loss": 0.3116,
      "step": 353
    },
    {
      "epoch": 0.8973384030418251,
      "grad_norm": 0.4306243062019348,
      "learning_rate": 0.00016458015267175572,
      "loss": 0.3099,
      "step": 354
    },
    {
      "epoch": 0.899873257287706,
      "grad_norm": 0.2773829996585846,
      "learning_rate": 0.00016447837150127228,
      "loss": 0.2765,
      "step": 355
    },
    {
      "epoch": 0.9024081115335868,
      "grad_norm": 0.5014198422431946,
      "learning_rate": 0.0001643765903307888,
      "loss": 0.302,
      "step": 356
    },
    {
      "epoch": 0.9049429657794676,
      "grad_norm": 0.4376792013645172,
      "learning_rate": 0.00016427480916030536,
      "loss": 0.2967,
      "step": 357
    },
    {
      "epoch": 0.9074778200253485,
      "grad_norm": 0.34460946917533875,
      "learning_rate": 0.00016417302798982187,
      "loss": 0.3678,
      "step": 358
    },
    {
      "epoch": 0.9100126742712294,
      "grad_norm": 0.23346909880638123,
      "learning_rate": 0.00016407124681933844,
      "loss": 0.2409,
      "step": 359
    },
    {
      "epoch": 0.9125475285171103,
      "grad_norm": 0.35633108019828796,
      "learning_rate": 0.00016396946564885498,
      "loss": 0.3555,
      "step": 360
    },
    {
      "epoch": 0.9150823827629911,
      "grad_norm": 0.26780250668525696,
      "learning_rate": 0.00016386768447837152,
      "loss": 0.2543,
      "step": 361
    },
    {
      "epoch": 0.917617237008872,
      "grad_norm": 0.34583303332328796,
      "learning_rate": 0.00016376590330788806,
      "loss": 0.2444,
      "step": 362
    },
    {
      "epoch": 0.9201520912547528,
      "grad_norm": 0.38331279158592224,
      "learning_rate": 0.0001636641221374046,
      "loss": 0.3549,
      "step": 363
    },
    {
      "epoch": 0.9226869455006337,
      "grad_norm": 0.37290483713150024,
      "learning_rate": 0.00016356234096692114,
      "loss": 0.3311,
      "step": 364
    },
    {
      "epoch": 0.9252217997465145,
      "grad_norm": 0.406568318605423,
      "learning_rate": 0.00016346055979643765,
      "loss": 0.2774,
      "step": 365
    },
    {
      "epoch": 0.9277566539923955,
      "grad_norm": 0.35498303174972534,
      "learning_rate": 0.00016335877862595422,
      "loss": 0.2121,
      "step": 366
    },
    {
      "epoch": 0.9302915082382763,
      "grad_norm": 0.3682021498680115,
      "learning_rate": 0.00016325699745547073,
      "loss": 0.2648,
      "step": 367
    },
    {
      "epoch": 0.9328263624841572,
      "grad_norm": 0.37826359272003174,
      "learning_rate": 0.0001631552162849873,
      "loss": 0.2214,
      "step": 368
    },
    {
      "epoch": 0.935361216730038,
      "grad_norm": 0.4018029570579529,
      "learning_rate": 0.0001630534351145038,
      "loss": 0.2291,
      "step": 369
    },
    {
      "epoch": 0.9378960709759189,
      "grad_norm": 0.4628411531448364,
      "learning_rate": 0.00016295165394402037,
      "loss": 0.3486,
      "step": 370
    },
    {
      "epoch": 0.9404309252217997,
      "grad_norm": 0.5615106821060181,
      "learning_rate": 0.00016284987277353689,
      "loss": 0.3281,
      "step": 371
    },
    {
      "epoch": 0.9429657794676806,
      "grad_norm": 0.40337833762168884,
      "learning_rate": 0.00016274809160305345,
      "loss": 0.22,
      "step": 372
    },
    {
      "epoch": 0.9455006337135615,
      "grad_norm": 0.4247727692127228,
      "learning_rate": 0.00016264631043257,
      "loss": 0.2801,
      "step": 373
    },
    {
      "epoch": 0.9480354879594424,
      "grad_norm": 0.28746598958969116,
      "learning_rate": 0.00016254452926208653,
      "loss": 0.2349,
      "step": 374
    },
    {
      "epoch": 0.9505703422053232,
      "grad_norm": 0.3654968738555908,
      "learning_rate": 0.00016244274809160307,
      "loss": 0.2696,
      "step": 375
    },
    {
      "epoch": 0.9531051964512041,
      "grad_norm": 0.3999825417995453,
      "learning_rate": 0.0001623409669211196,
      "loss": 0.4228,
      "step": 376
    },
    {
      "epoch": 0.9556400506970849,
      "grad_norm": 0.3065613806247711,
      "learning_rate": 0.00016223918575063615,
      "loss": 0.2505,
      "step": 377
    },
    {
      "epoch": 0.9581749049429658,
      "grad_norm": 0.3503481149673462,
      "learning_rate": 0.0001621374045801527,
      "loss": 0.2953,
      "step": 378
    },
    {
      "epoch": 0.9607097591888466,
      "grad_norm": 0.28918176889419556,
      "learning_rate": 0.00016203562340966923,
      "loss": 0.2454,
      "step": 379
    },
    {
      "epoch": 0.9632446134347274,
      "grad_norm": 0.3047085404396057,
      "learning_rate": 0.00016193384223918574,
      "loss": 0.2639,
      "step": 380
    },
    {
      "epoch": 0.9657794676806084,
      "grad_norm": 0.3775922358036041,
      "learning_rate": 0.0001618320610687023,
      "loss": 0.3787,
      "step": 381
    },
    {
      "epoch": 0.9683143219264893,
      "grad_norm": 0.32147660851478577,
      "learning_rate": 0.00016173027989821882,
      "loss": 0.2273,
      "step": 382
    },
    {
      "epoch": 0.9708491761723701,
      "grad_norm": 0.355747252702713,
      "learning_rate": 0.00016162849872773538,
      "loss": 0.2805,
      "step": 383
    },
    {
      "epoch": 0.973384030418251,
      "grad_norm": 0.2670198082923889,
      "learning_rate": 0.0001615267175572519,
      "loss": 0.2393,
      "step": 384
    },
    {
      "epoch": 0.9759188846641318,
      "grad_norm": 0.3395114839076996,
      "learning_rate": 0.00016142493638676846,
      "loss": 0.2893,
      "step": 385
    },
    {
      "epoch": 0.9784537389100126,
      "grad_norm": 0.3189052641391754,
      "learning_rate": 0.000161323155216285,
      "loss": 0.2442,
      "step": 386
    },
    {
      "epoch": 0.9809885931558935,
      "grad_norm": 0.49379605054855347,
      "learning_rate": 0.00016122137404580154,
      "loss": 0.3126,
      "step": 387
    },
    {
      "epoch": 0.9835234474017744,
      "grad_norm": 0.2787371575832367,
      "learning_rate": 0.00016111959287531808,
      "loss": 0.2329,
      "step": 388
    },
    {
      "epoch": 0.9860583016476553,
      "grad_norm": 0.3559485673904419,
      "learning_rate": 0.00016101781170483462,
      "loss": 0.335,
      "step": 389
    },
    {
      "epoch": 0.9885931558935361,
      "grad_norm": 0.43041396141052246,
      "learning_rate": 0.00016091603053435116,
      "loss": 0.3069,
      "step": 390
    },
    {
      "epoch": 0.991128010139417,
      "grad_norm": 0.3231935203075409,
      "learning_rate": 0.0001608142493638677,
      "loss": 0.2354,
      "step": 391
    },
    {
      "epoch": 0.9936628643852978,
      "grad_norm": 0.3676549792289734,
      "learning_rate": 0.00016071246819338424,
      "loss": 0.2958,
      "step": 392
    },
    {
      "epoch": 0.9961977186311787,
      "grad_norm": 0.37902191281318665,
      "learning_rate": 0.00016061068702290075,
      "loss": 0.2792,
      "step": 393
    },
    {
      "epoch": 0.9987325728770595,
      "grad_norm": 0.47126442193984985,
      "learning_rate": 0.00016050890585241732,
      "loss": 0.4871,
      "step": 394
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.4303727447986603,
      "learning_rate": 0.00016040712468193383,
      "loss": 0.2121,
      "step": 395
    },
    {
      "epoch": 1.002534854245881,
      "grad_norm": 0.3156070411205292,
      "learning_rate": 0.0001603053435114504,
      "loss": 0.2528,
      "step": 396
    },
    {
      "epoch": 1.0050697084917617,
      "grad_norm": 0.3030865788459778,
      "learning_rate": 0.00016020356234096693,
      "loss": 0.2029,
      "step": 397
    },
    {
      "epoch": 1.0076045627376427,
      "grad_norm": 0.2900277376174927,
      "learning_rate": 0.00016010178117048347,
      "loss": 0.2192,
      "step": 398
    },
    {
      "epoch": 1.0101394169835234,
      "grad_norm": 0.4288582503795624,
      "learning_rate": 0.00016,
      "loss": 0.308,
      "step": 399
    },
    {
      "epoch": 1.0126742712294043,
      "grad_norm": 0.3376273214817047,
      "learning_rate": 0.00015989821882951655,
      "loss": 0.2569,
      "step": 400
    },
    {
      "epoch": 1.015209125475285,
      "grad_norm": 0.39375385642051697,
      "learning_rate": 0.0001597964376590331,
      "loss": 0.2104,
      "step": 401
    },
    {
      "epoch": 1.017743979721166,
      "grad_norm": 0.2907378077507019,
      "learning_rate": 0.00015969465648854963,
      "loss": 0.2057,
      "step": 402
    },
    {
      "epoch": 1.020278833967047,
      "grad_norm": 0.3524622917175293,
      "learning_rate": 0.00015959287531806617,
      "loss": 0.2296,
      "step": 403
    },
    {
      "epoch": 1.0228136882129277,
      "grad_norm": 0.36487293243408203,
      "learning_rate": 0.0001594910941475827,
      "loss": 0.2133,
      "step": 404
    },
    {
      "epoch": 1.0253485424588087,
      "grad_norm": 0.4489257335662842,
      "learning_rate": 0.00015938931297709925,
      "loss": 0.2162,
      "step": 405
    },
    {
      "epoch": 1.0278833967046894,
      "grad_norm": 0.41142696142196655,
      "learning_rate": 0.0001592875318066158,
      "loss": 0.2383,
      "step": 406
    },
    {
      "epoch": 1.0304182509505704,
      "grad_norm": 0.3364538848400116,
      "learning_rate": 0.00015918575063613233,
      "loss": 0.2077,
      "step": 407
    },
    {
      "epoch": 1.0329531051964511,
      "grad_norm": 0.576775312423706,
      "learning_rate": 0.00015908396946564884,
      "loss": 0.2435,
      "step": 408
    },
    {
      "epoch": 1.035487959442332,
      "grad_norm": 0.6190880537033081,
      "learning_rate": 0.0001589821882951654,
      "loss": 0.252,
      "step": 409
    },
    {
      "epoch": 1.038022813688213,
      "grad_norm": 0.4943700432777405,
      "learning_rate": 0.00015888040712468195,
      "loss": 0.3275,
      "step": 410
    },
    {
      "epoch": 1.0405576679340938,
      "grad_norm": 0.3160712420940399,
      "learning_rate": 0.00015877862595419848,
      "loss": 0.217,
      "step": 411
    },
    {
      "epoch": 1.0430925221799747,
      "grad_norm": 0.34546172618865967,
      "learning_rate": 0.00015867684478371502,
      "loss": 0.2509,
      "step": 412
    },
    {
      "epoch": 1.0456273764258555,
      "grad_norm": 0.3498256802558899,
      "learning_rate": 0.00015857506361323156,
      "loss": 0.2376,
      "step": 413
    },
    {
      "epoch": 1.0481622306717364,
      "grad_norm": 0.29526984691619873,
      "learning_rate": 0.0001584732824427481,
      "loss": 0.2305,
      "step": 414
    },
    {
      "epoch": 1.0506970849176172,
      "grad_norm": 0.30113956332206726,
      "learning_rate": 0.00015837150127226464,
      "loss": 0.2205,
      "step": 415
    },
    {
      "epoch": 1.053231939163498,
      "grad_norm": 0.4007863402366638,
      "learning_rate": 0.00015826972010178118,
      "loss": 0.2407,
      "step": 416
    },
    {
      "epoch": 1.055766793409379,
      "grad_norm": 0.2594064176082611,
      "learning_rate": 0.00015816793893129772,
      "loss": 0.1923,
      "step": 417
    },
    {
      "epoch": 1.0583016476552598,
      "grad_norm": 0.23412476480007172,
      "learning_rate": 0.00015806615776081426,
      "loss": 0.2158,
      "step": 418
    },
    {
      "epoch": 1.0608365019011408,
      "grad_norm": 0.397443562746048,
      "learning_rate": 0.0001579643765903308,
      "loss": 0.3666,
      "step": 419
    },
    {
      "epoch": 1.0633713561470215,
      "grad_norm": 0.3756926655769348,
      "learning_rate": 0.00015786259541984734,
      "loss": 0.2081,
      "step": 420
    },
    {
      "epoch": 1.0659062103929025,
      "grad_norm": 0.5698515772819519,
      "learning_rate": 0.00015776081424936388,
      "loss": 0.2265,
      "step": 421
    },
    {
      "epoch": 1.0684410646387832,
      "grad_norm": 0.3608737289905548,
      "learning_rate": 0.00015765903307888042,
      "loss": 0.3821,
      "step": 422
    },
    {
      "epoch": 1.0709759188846641,
      "grad_norm": 0.4109106957912445,
      "learning_rate": 0.00015755725190839696,
      "loss": 0.3484,
      "step": 423
    },
    {
      "epoch": 1.073510773130545,
      "grad_norm": 0.38270992040634155,
      "learning_rate": 0.0001574554707379135,
      "loss": 0.2365,
      "step": 424
    },
    {
      "epoch": 1.0760456273764258,
      "grad_norm": 0.2857488989830017,
      "learning_rate": 0.00015735368956743004,
      "loss": 0.263,
      "step": 425
    },
    {
      "epoch": 1.0785804816223068,
      "grad_norm": 0.25236523151397705,
      "learning_rate": 0.00015725190839694657,
      "loss": 0.2216,
      "step": 426
    },
    {
      "epoch": 1.0811153358681875,
      "grad_norm": 0.40370991826057434,
      "learning_rate": 0.00015715012722646311,
      "loss": 0.3711,
      "step": 427
    },
    {
      "epoch": 1.0836501901140685,
      "grad_norm": 0.2624306380748749,
      "learning_rate": 0.00015704834605597965,
      "loss": 0.2082,
      "step": 428
    },
    {
      "epoch": 1.0861850443599492,
      "grad_norm": 0.4375905692577362,
      "learning_rate": 0.0001569465648854962,
      "loss": 0.3474,
      "step": 429
    },
    {
      "epoch": 1.0887198986058302,
      "grad_norm": 0.3287188410758972,
      "learning_rate": 0.00015684478371501273,
      "loss": 0.3097,
      "step": 430
    },
    {
      "epoch": 1.091254752851711,
      "grad_norm": 0.2669587731361389,
      "learning_rate": 0.00015674300254452927,
      "loss": 0.229,
      "step": 431
    },
    {
      "epoch": 1.0937896070975919,
      "grad_norm": 0.28192129731178284,
      "learning_rate": 0.0001566412213740458,
      "loss": 0.2226,
      "step": 432
    },
    {
      "epoch": 1.0963244613434728,
      "grad_norm": 0.30673590302467346,
      "learning_rate": 0.00015653944020356235,
      "loss": 0.2331,
      "step": 433
    },
    {
      "epoch": 1.0988593155893536,
      "grad_norm": 0.34343135356903076,
      "learning_rate": 0.0001564376590330789,
      "loss": 0.2567,
      "step": 434
    },
    {
      "epoch": 1.1013941698352345,
      "grad_norm": 0.4853306710720062,
      "learning_rate": 0.00015633587786259543,
      "loss": 0.3688,
      "step": 435
    },
    {
      "epoch": 1.1039290240811153,
      "grad_norm": 0.42215099930763245,
      "learning_rate": 0.00015623409669211197,
      "loss": 0.3465,
      "step": 436
    },
    {
      "epoch": 1.1064638783269962,
      "grad_norm": 0.5882295370101929,
      "learning_rate": 0.0001561323155216285,
      "loss": 0.4502,
      "step": 437
    },
    {
      "epoch": 1.1089987325728772,
      "grad_norm": 0.44578316807746887,
      "learning_rate": 0.00015603053435114505,
      "loss": 0.3345,
      "step": 438
    },
    {
      "epoch": 1.111533586818758,
      "grad_norm": 0.366653174161911,
      "learning_rate": 0.00015592875318066159,
      "loss": 0.2111,
      "step": 439
    },
    {
      "epoch": 1.1140684410646389,
      "grad_norm": 0.4964495003223419,
      "learning_rate": 0.00015582697201017812,
      "loss": 0.2731,
      "step": 440
    },
    {
      "epoch": 1.1166032953105196,
      "grad_norm": 0.3171039819717407,
      "learning_rate": 0.00015572519083969466,
      "loss": 0.2148,
      "step": 441
    },
    {
      "epoch": 1.1191381495564006,
      "grad_norm": 0.3483026921749115,
      "learning_rate": 0.0001556234096692112,
      "loss": 0.2481,
      "step": 442
    },
    {
      "epoch": 1.1216730038022813,
      "grad_norm": 0.37379321455955505,
      "learning_rate": 0.00015552162849872774,
      "loss": 0.3292,
      "step": 443
    },
    {
      "epoch": 1.1242078580481623,
      "grad_norm": 0.32108721137046814,
      "learning_rate": 0.00015541984732824428,
      "loss": 0.3363,
      "step": 444
    },
    {
      "epoch": 1.126742712294043,
      "grad_norm": 0.3879946768283844,
      "learning_rate": 0.00015531806615776082,
      "loss": 0.2891,
      "step": 445
    },
    {
      "epoch": 1.129277566539924,
      "grad_norm": 0.2334345281124115,
      "learning_rate": 0.00015521628498727736,
      "loss": 0.2183,
      "step": 446
    },
    {
      "epoch": 1.131812420785805,
      "grad_norm": 0.274795264005661,
      "learning_rate": 0.0001551145038167939,
      "loss": 0.2002,
      "step": 447
    },
    {
      "epoch": 1.1343472750316856,
      "grad_norm": 0.45602667331695557,
      "learning_rate": 0.00015501272264631044,
      "loss": 0.3282,
      "step": 448
    },
    {
      "epoch": 1.1368821292775666,
      "grad_norm": 0.25433096289634705,
      "learning_rate": 0.00015491094147582698,
      "loss": 0.2195,
      "step": 449
    },
    {
      "epoch": 1.1394169835234473,
      "grad_norm": 0.3606742024421692,
      "learning_rate": 0.00015480916030534352,
      "loss": 0.244,
      "step": 450
    },
    {
      "epoch": 1.1419518377693283,
      "grad_norm": 0.3597625494003296,
      "learning_rate": 0.00015470737913486006,
      "loss": 0.2117,
      "step": 451
    },
    {
      "epoch": 1.144486692015209,
      "grad_norm": 0.32967302203178406,
      "learning_rate": 0.0001546055979643766,
      "loss": 0.2662,
      "step": 452
    },
    {
      "epoch": 1.14702154626109,
      "grad_norm": 0.32538869976997375,
      "learning_rate": 0.00015450381679389314,
      "loss": 0.2439,
      "step": 453
    },
    {
      "epoch": 1.149556400506971,
      "grad_norm": 0.36263129115104675,
      "learning_rate": 0.00015440203562340968,
      "loss": 0.2688,
      "step": 454
    },
    {
      "epoch": 1.1520912547528517,
      "grad_norm": 0.4200229346752167,
      "learning_rate": 0.00015430025445292621,
      "loss": 0.3201,
      "step": 455
    },
    {
      "epoch": 1.1546261089987326,
      "grad_norm": 0.35889115929603577,
      "learning_rate": 0.00015419847328244275,
      "loss": 0.2584,
      "step": 456
    },
    {
      "epoch": 1.1571609632446134,
      "grad_norm": 0.36060044169425964,
      "learning_rate": 0.0001540966921119593,
      "loss": 0.2496,
      "step": 457
    },
    {
      "epoch": 1.1596958174904943,
      "grad_norm": 0.3046696186065674,
      "learning_rate": 0.00015399491094147583,
      "loss": 0.2102,
      "step": 458
    },
    {
      "epoch": 1.162230671736375,
      "grad_norm": 0.4576256275177002,
      "learning_rate": 0.00015389312977099237,
      "loss": 0.3594,
      "step": 459
    },
    {
      "epoch": 1.164765525982256,
      "grad_norm": 0.3436565697193146,
      "learning_rate": 0.0001537913486005089,
      "loss": 0.2289,
      "step": 460
    },
    {
      "epoch": 1.167300380228137,
      "grad_norm": 0.4197808802127838,
      "learning_rate": 0.00015368956743002545,
      "loss": 0.2863,
      "step": 461
    },
    {
      "epoch": 1.1698352344740177,
      "grad_norm": 0.3584151566028595,
      "learning_rate": 0.000153587786259542,
      "loss": 0.2797,
      "step": 462
    },
    {
      "epoch": 1.1723700887198987,
      "grad_norm": 0.29760056734085083,
      "learning_rate": 0.00015348600508905853,
      "loss": 0.212,
      "step": 463
    },
    {
      "epoch": 1.1749049429657794,
      "grad_norm": 0.3856862485408783,
      "learning_rate": 0.00015338422391857507,
      "loss": 0.2986,
      "step": 464
    },
    {
      "epoch": 1.1774397972116604,
      "grad_norm": 0.42522993683815,
      "learning_rate": 0.0001532824427480916,
      "loss": 0.2869,
      "step": 465
    },
    {
      "epoch": 1.179974651457541,
      "grad_norm": 0.33221253752708435,
      "learning_rate": 0.00015318066157760815,
      "loss": 0.2236,
      "step": 466
    },
    {
      "epoch": 1.182509505703422,
      "grad_norm": 0.35414496064186096,
      "learning_rate": 0.00015307888040712469,
      "loss": 0.2658,
      "step": 467
    },
    {
      "epoch": 1.1850443599493028,
      "grad_norm": 0.41883930563926697,
      "learning_rate": 0.00015297709923664123,
      "loss": 0.3939,
      "step": 468
    },
    {
      "epoch": 1.1875792141951838,
      "grad_norm": 0.3070299029350281,
      "learning_rate": 0.00015287531806615776,
      "loss": 0.2208,
      "step": 469
    },
    {
      "epoch": 1.1901140684410647,
      "grad_norm": 0.30749714374542236,
      "learning_rate": 0.0001527735368956743,
      "loss": 0.242,
      "step": 470
    },
    {
      "epoch": 1.1926489226869454,
      "grad_norm": 0.2579677104949951,
      "learning_rate": 0.00015267175572519084,
      "loss": 0.2435,
      "step": 471
    },
    {
      "epoch": 1.1951837769328264,
      "grad_norm": 0.46220460534095764,
      "learning_rate": 0.00015256997455470738,
      "loss": 0.2803,
      "step": 472
    },
    {
      "epoch": 1.1977186311787071,
      "grad_norm": 0.3824957609176636,
      "learning_rate": 0.00015246819338422392,
      "loss": 0.3143,
      "step": 473
    },
    {
      "epoch": 1.200253485424588,
      "grad_norm": 0.3049899637699127,
      "learning_rate": 0.00015236641221374046,
      "loss": 0.2231,
      "step": 474
    },
    {
      "epoch": 1.202788339670469,
      "grad_norm": 0.4378805458545685,
      "learning_rate": 0.000152264631043257,
      "loss": 0.2041,
      "step": 475
    },
    {
      "epoch": 1.2053231939163498,
      "grad_norm": 0.3902495801448822,
      "learning_rate": 0.00015216284987277354,
      "loss": 0.3055,
      "step": 476
    },
    {
      "epoch": 1.2078580481622307,
      "grad_norm": 0.3150664269924164,
      "learning_rate": 0.00015206106870229008,
      "loss": 0.2222,
      "step": 477
    },
    {
      "epoch": 1.2103929024081115,
      "grad_norm": 0.3551795184612274,
      "learning_rate": 0.00015195928753180662,
      "loss": 0.2304,
      "step": 478
    },
    {
      "epoch": 1.2129277566539924,
      "grad_norm": 0.35522422194480896,
      "learning_rate": 0.00015185750636132316,
      "loss": 0.2636,
      "step": 479
    },
    {
      "epoch": 1.2154626108998732,
      "grad_norm": 0.35261449217796326,
      "learning_rate": 0.0001517557251908397,
      "loss": 0.2743,
      "step": 480
    },
    {
      "epoch": 1.2179974651457541,
      "grad_norm": 0.4755167067050934,
      "learning_rate": 0.00015165394402035624,
      "loss": 0.321,
      "step": 481
    },
    {
      "epoch": 1.2205323193916349,
      "grad_norm": 0.36083585023880005,
      "learning_rate": 0.0001515521628498728,
      "loss": 0.2549,
      "step": 482
    },
    {
      "epoch": 1.2230671736375158,
      "grad_norm": 0.3213503956794739,
      "learning_rate": 0.00015145038167938932,
      "loss": 0.2685,
      "step": 483
    },
    {
      "epoch": 1.2256020278833968,
      "grad_norm": 0.29988422989845276,
      "learning_rate": 0.00015134860050890588,
      "loss": 0.3253,
      "step": 484
    },
    {
      "epoch": 1.2281368821292775,
      "grad_norm": 0.3549601435661316,
      "learning_rate": 0.0001512468193384224,
      "loss": 0.2574,
      "step": 485
    },
    {
      "epoch": 1.2306717363751585,
      "grad_norm": 0.33347830176353455,
      "learning_rate": 0.00015114503816793893,
      "loss": 0.3408,
      "step": 486
    },
    {
      "epoch": 1.2332065906210392,
      "grad_norm": 0.2988692820072174,
      "learning_rate": 0.00015104325699745547,
      "loss": 0.2583,
      "step": 487
    },
    {
      "epoch": 1.2357414448669202,
      "grad_norm": 0.2710984945297241,
      "learning_rate": 0.000150941475826972,
      "loss": 0.2708,
      "step": 488
    },
    {
      "epoch": 1.2382762991128011,
      "grad_norm": 0.28278592228889465,
      "learning_rate": 0.00015083969465648855,
      "loss": 0.2345,
      "step": 489
    },
    {
      "epoch": 1.2408111533586819,
      "grad_norm": 0.31838810443878174,
      "learning_rate": 0.0001507379134860051,
      "loss": 0.2193,
      "step": 490
    },
    {
      "epoch": 1.2433460076045628,
      "grad_norm": 0.31196919083595276,
      "learning_rate": 0.00015063613231552163,
      "loss": 0.2334,
      "step": 491
    },
    {
      "epoch": 1.2458808618504436,
      "grad_norm": 0.3953218460083008,
      "learning_rate": 0.00015053435114503817,
      "loss": 0.2716,
      "step": 492
    },
    {
      "epoch": 1.2484157160963245,
      "grad_norm": 0.4814457297325134,
      "learning_rate": 0.0001504325699745547,
      "loss": 0.2847,
      "step": 493
    },
    {
      "epoch": 1.2509505703422052,
      "grad_norm": 0.5870761275291443,
      "learning_rate": 0.00015033078880407125,
      "loss": 0.3685,
      "step": 494
    },
    {
      "epoch": 1.2534854245880862,
      "grad_norm": 0.30315646529197693,
      "learning_rate": 0.00015022900763358781,
      "loss": 0.2112,
      "step": 495
    },
    {
      "epoch": 1.256020278833967,
      "grad_norm": 0.4358583390712738,
      "learning_rate": 0.00015012722646310433,
      "loss": 0.279,
      "step": 496
    },
    {
      "epoch": 1.258555133079848,
      "grad_norm": 0.3699369728565216,
      "learning_rate": 0.0001500254452926209,
      "loss": 0.2941,
      "step": 497
    },
    {
      "epoch": 1.2610899873257289,
      "grad_norm": 0.338522344827652,
      "learning_rate": 0.0001499236641221374,
      "loss": 0.273,
      "step": 498
    },
    {
      "epoch": 1.2636248415716096,
      "grad_norm": 0.29661208391189575,
      "learning_rate": 0.00014982188295165397,
      "loss": 0.23,
      "step": 499
    },
    {
      "epoch": 1.2661596958174905,
      "grad_norm": 0.4247685968875885,
      "learning_rate": 0.00014972010178117048,
      "loss": 0.3112,
      "step": 500
    },
    {
      "epoch": 1.2686945500633713,
      "grad_norm": 0.44488340616226196,
      "learning_rate": 0.00014961832061068702,
      "loss": 0.3796,
      "step": 501
    },
    {
      "epoch": 1.2712294043092522,
      "grad_norm": 0.30672356486320496,
      "learning_rate": 0.00014951653944020356,
      "loss": 0.2222,
      "step": 502
    },
    {
      "epoch": 1.2737642585551332,
      "grad_norm": 0.3291172981262207,
      "learning_rate": 0.0001494147582697201,
      "loss": 0.2177,
      "step": 503
    },
    {
      "epoch": 1.276299112801014,
      "grad_norm": 0.4180152118206024,
      "learning_rate": 0.00014931297709923664,
      "loss": 0.3673,
      "step": 504
    },
    {
      "epoch": 1.2788339670468947,
      "grad_norm": 0.41350388526916504,
      "learning_rate": 0.00014921119592875318,
      "loss": 0.2544,
      "step": 505
    },
    {
      "epoch": 1.2813688212927756,
      "grad_norm": 0.3517690598964691,
      "learning_rate": 0.00014910941475826972,
      "loss": 0.2139,
      "step": 506
    },
    {
      "epoch": 1.2839036755386566,
      "grad_norm": 0.4273949861526489,
      "learning_rate": 0.00014900763358778626,
      "loss": 0.255,
      "step": 507
    },
    {
      "epoch": 1.2864385297845373,
      "grad_norm": 0.3510381877422333,
      "learning_rate": 0.00014890585241730283,
      "loss": 0.2503,
      "step": 508
    },
    {
      "epoch": 1.2889733840304183,
      "grad_norm": 0.4069119393825531,
      "learning_rate": 0.00014880407124681934,
      "loss": 0.3267,
      "step": 509
    },
    {
      "epoch": 1.291508238276299,
      "grad_norm": 0.6244072318077087,
      "learning_rate": 0.0001487022900763359,
      "loss": 0.2519,
      "step": 510
    },
    {
      "epoch": 1.29404309252218,
      "grad_norm": 0.473450630903244,
      "learning_rate": 0.00014860050890585242,
      "loss": 0.3093,
      "step": 511
    },
    {
      "epoch": 1.296577946768061,
      "grad_norm": 0.3139822781085968,
      "learning_rate": 0.00014849872773536898,
      "loss": 0.2396,
      "step": 512
    },
    {
      "epoch": 1.2991128010139417,
      "grad_norm": 0.23700624704360962,
      "learning_rate": 0.0001483969465648855,
      "loss": 0.1945,
      "step": 513
    },
    {
      "epoch": 1.3016476552598226,
      "grad_norm": 0.42849189043045044,
      "learning_rate": 0.00014829516539440203,
      "loss": 0.2275,
      "step": 514
    },
    {
      "epoch": 1.3041825095057034,
      "grad_norm": 0.4083426296710968,
      "learning_rate": 0.00014819338422391857,
      "loss": 0.3626,
      "step": 515
    },
    {
      "epoch": 1.3067173637515843,
      "grad_norm": 0.4541410207748413,
      "learning_rate": 0.0001480916030534351,
      "loss": 0.3102,
      "step": 516
    },
    {
      "epoch": 1.3092522179974653,
      "grad_norm": 0.6483343839645386,
      "learning_rate": 0.00014798982188295165,
      "loss": 0.3427,
      "step": 517
    },
    {
      "epoch": 1.311787072243346,
      "grad_norm": 0.3928525447845459,
      "learning_rate": 0.0001478880407124682,
      "loss": 0.3155,
      "step": 518
    },
    {
      "epoch": 1.3143219264892267,
      "grad_norm": 0.319035142660141,
      "learning_rate": 0.00014778625954198476,
      "loss": 0.2555,
      "step": 519
    },
    {
      "epoch": 1.3168567807351077,
      "grad_norm": 0.2855183780193329,
      "learning_rate": 0.00014768447837150127,
      "loss": 0.2115,
      "step": 520
    },
    {
      "epoch": 1.3193916349809887,
      "grad_norm": 0.3499714136123657,
      "learning_rate": 0.00014758269720101784,
      "loss": 0.254,
      "step": 521
    },
    {
      "epoch": 1.3219264892268694,
      "grad_norm": 0.40895748138427734,
      "learning_rate": 0.00014748091603053435,
      "loss": 0.2975,
      "step": 522
    },
    {
      "epoch": 1.3244613434727504,
      "grad_norm": 0.30614539980888367,
      "learning_rate": 0.00014737913486005091,
      "loss": 0.2584,
      "step": 523
    },
    {
      "epoch": 1.326996197718631,
      "grad_norm": 0.2832574248313904,
      "learning_rate": 0.00014727735368956743,
      "loss": 0.2259,
      "step": 524
    },
    {
      "epoch": 1.329531051964512,
      "grad_norm": 0.3444589674472809,
      "learning_rate": 0.000147175572519084,
      "loss": 0.2608,
      "step": 525
    },
    {
      "epoch": 1.332065906210393,
      "grad_norm": 0.35170844197273254,
      "learning_rate": 0.0001470737913486005,
      "loss": 0.3019,
      "step": 526
    },
    {
      "epoch": 1.3346007604562737,
      "grad_norm": 0.46164563298225403,
      "learning_rate": 0.00014697201017811707,
      "loss": 0.2024,
      "step": 527
    },
    {
      "epoch": 1.3371356147021547,
      "grad_norm": 0.2369971126317978,
      "learning_rate": 0.00014687022900763358,
      "loss": 0.1967,
      "step": 528
    },
    {
      "epoch": 1.3396704689480354,
      "grad_norm": 0.43180060386657715,
      "learning_rate": 0.00014676844783715012,
      "loss": 0.2415,
      "step": 529
    },
    {
      "epoch": 1.3422053231939164,
      "grad_norm": 0.3531292676925659,
      "learning_rate": 0.00014666666666666666,
      "loss": 0.2283,
      "step": 530
    },
    {
      "epoch": 1.3447401774397973,
      "grad_norm": 0.49374547600746155,
      "learning_rate": 0.0001465648854961832,
      "loss": 0.3025,
      "step": 531
    },
    {
      "epoch": 1.347275031685678,
      "grad_norm": 0.4822668731212616,
      "learning_rate": 0.00014646310432569977,
      "loss": 0.3498,
      "step": 532
    },
    {
      "epoch": 1.3498098859315588,
      "grad_norm": 0.4463392496109009,
      "learning_rate": 0.00014636132315521628,
      "loss": 0.2186,
      "step": 533
    },
    {
      "epoch": 1.3523447401774398,
      "grad_norm": 0.40042299032211304,
      "learning_rate": 0.00014625954198473285,
      "loss": 0.2316,
      "step": 534
    },
    {
      "epoch": 1.3548795944233207,
      "grad_norm": 0.41266927123069763,
      "learning_rate": 0.00014615776081424936,
      "loss": 0.2324,
      "step": 535
    },
    {
      "epoch": 1.3574144486692015,
      "grad_norm": 0.46208152174949646,
      "learning_rate": 0.00014605597964376593,
      "loss": 0.2261,
      "step": 536
    },
    {
      "epoch": 1.3599493029150824,
      "grad_norm": 0.38895705342292786,
      "learning_rate": 0.00014595419847328244,
      "loss": 0.2732,
      "step": 537
    },
    {
      "epoch": 1.3624841571609632,
      "grad_norm": 0.4489743113517761,
      "learning_rate": 0.000145852417302799,
      "loss": 0.3197,
      "step": 538
    },
    {
      "epoch": 1.3650190114068441,
      "grad_norm": 0.25082916021347046,
      "learning_rate": 0.00014575063613231552,
      "loss": 0.2096,
      "step": 539
    },
    {
      "epoch": 1.367553865652725,
      "grad_norm": 0.3681942820549011,
      "learning_rate": 0.00014564885496183208,
      "loss": 0.2496,
      "step": 540
    },
    {
      "epoch": 1.3700887198986058,
      "grad_norm": 0.30986878275871277,
      "learning_rate": 0.0001455470737913486,
      "loss": 0.2244,
      "step": 541
    },
    {
      "epoch": 1.3726235741444868,
      "grad_norm": 0.42349961400032043,
      "learning_rate": 0.00014544529262086513,
      "loss": 0.2315,
      "step": 542
    },
    {
      "epoch": 1.3751584283903675,
      "grad_norm": 0.29656872153282166,
      "learning_rate": 0.00014534351145038167,
      "loss": 0.2458,
      "step": 543
    },
    {
      "epoch": 1.3776932826362485,
      "grad_norm": 0.4033924341201782,
      "learning_rate": 0.0001452417302798982,
      "loss": 0.3506,
      "step": 544
    },
    {
      "epoch": 1.3802281368821292,
      "grad_norm": 0.3998583257198334,
      "learning_rate": 0.00014513994910941478,
      "loss": 0.3108,
      "step": 545
    },
    {
      "epoch": 1.3827629911280102,
      "grad_norm": 0.3335135281085968,
      "learning_rate": 0.0001450381679389313,
      "loss": 0.2816,
      "step": 546
    },
    {
      "epoch": 1.385297845373891,
      "grad_norm": 0.39304816722869873,
      "learning_rate": 0.00014493638676844786,
      "loss": 0.3968,
      "step": 547
    },
    {
      "epoch": 1.3878326996197718,
      "grad_norm": 0.34913384914398193,
      "learning_rate": 0.00014483460559796437,
      "loss": 0.2653,
      "step": 548
    },
    {
      "epoch": 1.3903675538656528,
      "grad_norm": 0.3312399387359619,
      "learning_rate": 0.00014473282442748094,
      "loss": 0.2629,
      "step": 549
    },
    {
      "epoch": 1.3929024081115335,
      "grad_norm": 0.31613558530807495,
      "learning_rate": 0.00014463104325699745,
      "loss": 0.2033,
      "step": 550
    },
    {
      "epoch": 1.3954372623574145,
      "grad_norm": 0.2872864603996277,
      "learning_rate": 0.00014452926208651402,
      "loss": 0.2097,
      "step": 551
    },
    {
      "epoch": 1.3979721166032952,
      "grad_norm": 0.24432098865509033,
      "learning_rate": 0.00014442748091603053,
      "loss": 0.2172,
      "step": 552
    },
    {
      "epoch": 1.4005069708491762,
      "grad_norm": 0.31649062037467957,
      "learning_rate": 0.0001443256997455471,
      "loss": 0.2255,
      "step": 553
    },
    {
      "epoch": 1.4030418250950571,
      "grad_norm": 0.2483261376619339,
      "learning_rate": 0.0001442239185750636,
      "loss": 0.1856,
      "step": 554
    },
    {
      "epoch": 1.4055766793409379,
      "grad_norm": 0.437757670879364,
      "learning_rate": 0.00014412213740458017,
      "loss": 0.2713,
      "step": 555
    },
    {
      "epoch": 1.4081115335868186,
      "grad_norm": 0.43551307916641235,
      "learning_rate": 0.0001440203562340967,
      "loss": 0.2654,
      "step": 556
    },
    {
      "epoch": 1.4106463878326996,
      "grad_norm": 0.5781947374343872,
      "learning_rate": 0.00014391857506361322,
      "loss": 0.3242,
      "step": 557
    },
    {
      "epoch": 1.4131812420785805,
      "grad_norm": 0.3809725344181061,
      "learning_rate": 0.0001438167938931298,
      "loss": 0.2176,
      "step": 558
    },
    {
      "epoch": 1.4157160963244613,
      "grad_norm": 0.38208654522895813,
      "learning_rate": 0.0001437150127226463,
      "loss": 0.2043,
      "step": 559
    },
    {
      "epoch": 1.4182509505703422,
      "grad_norm": 0.39930659532546997,
      "learning_rate": 0.00014361323155216287,
      "loss": 0.2914,
      "step": 560
    },
    {
      "epoch": 1.420785804816223,
      "grad_norm": 0.3019846975803375,
      "learning_rate": 0.00014351145038167938,
      "loss": 0.2037,
      "step": 561
    },
    {
      "epoch": 1.423320659062104,
      "grad_norm": 0.4549913704395294,
      "learning_rate": 0.00014340966921119595,
      "loss": 0.2308,
      "step": 562
    },
    {
      "epoch": 1.4258555133079849,
      "grad_norm": 0.38887929916381836,
      "learning_rate": 0.00014330788804071246,
      "loss": 0.2339,
      "step": 563
    },
    {
      "epoch": 1.4283903675538656,
      "grad_norm": 0.3481290340423584,
      "learning_rate": 0.00014320610687022903,
      "loss": 0.2206,
      "step": 564
    },
    {
      "epoch": 1.4309252217997466,
      "grad_norm": 0.46603840589523315,
      "learning_rate": 0.00014310432569974554,
      "loss": 0.3006,
      "step": 565
    },
    {
      "epoch": 1.4334600760456273,
      "grad_norm": 0.3586963713169098,
      "learning_rate": 0.0001430025445292621,
      "loss": 0.2646,
      "step": 566
    },
    {
      "epoch": 1.4359949302915083,
      "grad_norm": 0.3106522560119629,
      "learning_rate": 0.00014290076335877862,
      "loss": 0.2725,
      "step": 567
    },
    {
      "epoch": 1.4385297845373892,
      "grad_norm": 0.48086050152778625,
      "learning_rate": 0.00014279898218829518,
      "loss": 0.3007,
      "step": 568
    },
    {
      "epoch": 1.44106463878327,
      "grad_norm": 0.44636330008506775,
      "learning_rate": 0.00014269720101781172,
      "loss": 0.3755,
      "step": 569
    },
    {
      "epoch": 1.4435994930291507,
      "grad_norm": 0.3114064633846283,
      "learning_rate": 0.00014259541984732824,
      "loss": 0.2606,
      "step": 570
    },
    {
      "epoch": 1.4461343472750317,
      "grad_norm": 0.358394593000412,
      "learning_rate": 0.0001424936386768448,
      "loss": 0.27,
      "step": 571
    },
    {
      "epoch": 1.4486692015209126,
      "grad_norm": 0.3568032681941986,
      "learning_rate": 0.00014239185750636131,
      "loss": 0.2767,
      "step": 572
    },
    {
      "epoch": 1.4512040557667933,
      "grad_norm": 0.4407200515270233,
      "learning_rate": 0.00014229007633587788,
      "loss": 0.3786,
      "step": 573
    },
    {
      "epoch": 1.4537389100126743,
      "grad_norm": 0.4096840023994446,
      "learning_rate": 0.0001421882951653944,
      "loss": 0.3199,
      "step": 574
    },
    {
      "epoch": 1.456273764258555,
      "grad_norm": 0.3343110680580139,
      "learning_rate": 0.00014208651399491096,
      "loss": 0.2538,
      "step": 575
    },
    {
      "epoch": 1.458808618504436,
      "grad_norm": 0.27782517671585083,
      "learning_rate": 0.00014198473282442747,
      "loss": 0.2179,
      "step": 576
    },
    {
      "epoch": 1.461343472750317,
      "grad_norm": 0.2901310920715332,
      "learning_rate": 0.00014188295165394404,
      "loss": 0.2552,
      "step": 577
    },
    {
      "epoch": 1.4638783269961977,
      "grad_norm": 0.3634903132915497,
      "learning_rate": 0.00014178117048346055,
      "loss": 0.257,
      "step": 578
    },
    {
      "epoch": 1.4664131812420786,
      "grad_norm": 0.37307262420654297,
      "learning_rate": 0.00014167938931297712,
      "loss": 0.254,
      "step": 579
    },
    {
      "epoch": 1.4689480354879594,
      "grad_norm": 0.27726346254348755,
      "learning_rate": 0.00014157760814249366,
      "loss": 0.1938,
      "step": 580
    },
    {
      "epoch": 1.4714828897338403,
      "grad_norm": 0.3364371657371521,
      "learning_rate": 0.0001414758269720102,
      "loss": 0.2094,
      "step": 581
    },
    {
      "epoch": 1.4740177439797213,
      "grad_norm": 0.4418800473213196,
      "learning_rate": 0.00014137404580152673,
      "loss": 0.3243,
      "step": 582
    },
    {
      "epoch": 1.476552598225602,
      "grad_norm": 0.42042022943496704,
      "learning_rate": 0.00014127226463104327,
      "loss": 0.2333,
      "step": 583
    },
    {
      "epoch": 1.4790874524714828,
      "grad_norm": 0.36881470680236816,
      "learning_rate": 0.0001411704834605598,
      "loss": 0.2513,
      "step": 584
    },
    {
      "epoch": 1.4816223067173637,
      "grad_norm": 0.4009782671928406,
      "learning_rate": 0.00014106870229007632,
      "loss": 0.3085,
      "step": 585
    },
    {
      "epoch": 1.4841571609632447,
      "grad_norm": 0.43179744482040405,
      "learning_rate": 0.0001409669211195929,
      "loss": 0.3189,
      "step": 586
    },
    {
      "epoch": 1.4866920152091254,
      "grad_norm": 0.3721300959587097,
      "learning_rate": 0.0001408651399491094,
      "loss": 0.2318,
      "step": 587
    },
    {
      "epoch": 1.4892268694550064,
      "grad_norm": 0.3875066339969635,
      "learning_rate": 0.00014076335877862597,
      "loss": 0.2753,
      "step": 588
    },
    {
      "epoch": 1.491761723700887,
      "grad_norm": 0.35223937034606934,
      "learning_rate": 0.00014066157760814248,
      "loss": 0.2257,
      "step": 589
    },
    {
      "epoch": 1.494296577946768,
      "grad_norm": 0.30979710817337036,
      "learning_rate": 0.00014055979643765905,
      "loss": 0.2149,
      "step": 590
    },
    {
      "epoch": 1.496831432192649,
      "grad_norm": 0.23923753201961517,
      "learning_rate": 0.00014045801526717556,
      "loss": 0.1911,
      "step": 591
    },
    {
      "epoch": 1.4993662864385298,
      "grad_norm": 0.40893304347991943,
      "learning_rate": 0.00014035623409669213,
      "loss": 0.2756,
      "step": 592
    },
    {
      "epoch": 1.5019011406844105,
      "grad_norm": 0.2659086585044861,
      "learning_rate": 0.00014025445292620867,
      "loss": 0.2154,
      "step": 593
    },
    {
      "epoch": 1.5044359949302915,
      "grad_norm": 0.30749884247779846,
      "learning_rate": 0.0001401526717557252,
      "loss": 0.2184,
      "step": 594
    },
    {
      "epoch": 1.5069708491761724,
      "grad_norm": 0.3892879784107208,
      "learning_rate": 0.00014005089058524175,
      "loss": 0.2849,
      "step": 595
    },
    {
      "epoch": 1.5095057034220534,
      "grad_norm": 0.5041462779045105,
      "learning_rate": 0.00013994910941475828,
      "loss": 0.2551,
      "step": 596
    },
    {
      "epoch": 1.512040557667934,
      "grad_norm": 0.4143123924732208,
      "learning_rate": 0.00013984732824427482,
      "loss": 0.2485,
      "step": 597
    },
    {
      "epoch": 1.5145754119138148,
      "grad_norm": 0.5315548181533813,
      "learning_rate": 0.00013974554707379136,
      "loss": 0.3242,
      "step": 598
    },
    {
      "epoch": 1.5171102661596958,
      "grad_norm": 0.28680169582366943,
      "learning_rate": 0.0001396437659033079,
      "loss": 0.227,
      "step": 599
    },
    {
      "epoch": 1.5196451204055768,
      "grad_norm": 0.3015950620174408,
      "learning_rate": 0.00013954198473282441,
      "loss": 0.2122,
      "step": 600
    },
    {
      "epoch": 1.5221799746514575,
      "grad_norm": 0.30785971879959106,
      "learning_rate": 0.00013944020356234098,
      "loss": 0.2194,
      "step": 601
    },
    {
      "epoch": 1.5247148288973384,
      "grad_norm": 0.3596206605434418,
      "learning_rate": 0.0001393384223918575,
      "loss": 0.2574,
      "step": 602
    },
    {
      "epoch": 1.5272496831432192,
      "grad_norm": 0.18499840795993805,
      "learning_rate": 0.00013923664122137406,
      "loss": 0.1944,
      "step": 603
    },
    {
      "epoch": 1.5297845373891001,
      "grad_norm": 0.4346081614494324,
      "learning_rate": 0.00013913486005089057,
      "loss": 0.3187,
      "step": 604
    },
    {
      "epoch": 1.532319391634981,
      "grad_norm": 0.46154457330703735,
      "learning_rate": 0.00013903307888040714,
      "loss": 0.3149,
      "step": 605
    },
    {
      "epoch": 1.5348542458808618,
      "grad_norm": 0.3444209098815918,
      "learning_rate": 0.00013893129770992368,
      "loss": 0.2801,
      "step": 606
    },
    {
      "epoch": 1.5373891001267426,
      "grad_norm": 0.550620436668396,
      "learning_rate": 0.00013882951653944022,
      "loss": 0.3038,
      "step": 607
    },
    {
      "epoch": 1.5399239543726235,
      "grad_norm": 0.36603689193725586,
      "learning_rate": 0.00013872773536895676,
      "loss": 0.3224,
      "step": 608
    },
    {
      "epoch": 1.5424588086185045,
      "grad_norm": 0.213638037443161,
      "learning_rate": 0.0001386259541984733,
      "loss": 0.2081,
      "step": 609
    },
    {
      "epoch": 1.5449936628643854,
      "grad_norm": 0.34508904814720154,
      "learning_rate": 0.00013852417302798983,
      "loss": 0.2474,
      "step": 610
    },
    {
      "epoch": 1.5475285171102662,
      "grad_norm": 0.42072099447250366,
      "learning_rate": 0.00013842239185750637,
      "loss": 0.3049,
      "step": 611
    },
    {
      "epoch": 1.550063371356147,
      "grad_norm": 0.3760271966457367,
      "learning_rate": 0.0001383206106870229,
      "loss": 0.2499,
      "step": 612
    },
    {
      "epoch": 1.5525982256020279,
      "grad_norm": 0.24040678143501282,
      "learning_rate": 0.00013821882951653943,
      "loss": 0.2134,
      "step": 613
    },
    {
      "epoch": 1.5551330798479088,
      "grad_norm": 0.458035945892334,
      "learning_rate": 0.000138117048346056,
      "loss": 0.3375,
      "step": 614
    },
    {
      "epoch": 1.5576679340937896,
      "grad_norm": 0.30446937680244446,
      "learning_rate": 0.0001380152671755725,
      "loss": 0.2252,
      "step": 615
    },
    {
      "epoch": 1.5602027883396705,
      "grad_norm": 0.3036455810070038,
      "learning_rate": 0.00013791348600508907,
      "loss": 0.2095,
      "step": 616
    },
    {
      "epoch": 1.5627376425855513,
      "grad_norm": 0.4190979301929474,
      "learning_rate": 0.0001378117048346056,
      "loss": 0.2932,
      "step": 617
    },
    {
      "epoch": 1.5652724968314322,
      "grad_norm": 0.27648523449897766,
      "learning_rate": 0.00013770992366412215,
      "loss": 0.2133,
      "step": 618
    },
    {
      "epoch": 1.5678073510773132,
      "grad_norm": 0.28326693177223206,
      "learning_rate": 0.0001376081424936387,
      "loss": 0.2087,
      "step": 619
    },
    {
      "epoch": 1.570342205323194,
      "grad_norm": 0.3020143508911133,
      "learning_rate": 0.00013750636132315523,
      "loss": 0.2321,
      "step": 620
    },
    {
      "epoch": 1.5728770595690746,
      "grad_norm": 0.3246900141239166,
      "learning_rate": 0.00013740458015267177,
      "loss": 0.2121,
      "step": 621
    },
    {
      "epoch": 1.5754119138149556,
      "grad_norm": 0.3806106448173523,
      "learning_rate": 0.0001373027989821883,
      "loss": 0.2856,
      "step": 622
    },
    {
      "epoch": 1.5779467680608366,
      "grad_norm": 0.3568238317966461,
      "learning_rate": 0.00013720101781170485,
      "loss": 0.2579,
      "step": 623
    },
    {
      "epoch": 1.5804816223067175,
      "grad_norm": 0.45590534806251526,
      "learning_rate": 0.00013709923664122139,
      "loss": 0.2059,
      "step": 624
    },
    {
      "epoch": 1.5830164765525983,
      "grad_norm": 0.41996893286705017,
      "learning_rate": 0.00013699745547073792,
      "loss": 0.2154,
      "step": 625
    },
    {
      "epoch": 1.585551330798479,
      "grad_norm": 0.5142170190811157,
      "learning_rate": 0.00013689567430025446,
      "loss": 0.2708,
      "step": 626
    },
    {
      "epoch": 1.58808618504436,
      "grad_norm": 0.36335933208465576,
      "learning_rate": 0.000136793893129771,
      "loss": 0.2501,
      "step": 627
    },
    {
      "epoch": 1.590621039290241,
      "grad_norm": 0.3186666667461395,
      "learning_rate": 0.00013669211195928752,
      "loss": 0.2227,
      "step": 628
    },
    {
      "epoch": 1.5931558935361216,
      "grad_norm": 0.29709601402282715,
      "learning_rate": 0.00013659033078880408,
      "loss": 0.2265,
      "step": 629
    },
    {
      "epoch": 1.5956907477820024,
      "grad_norm": 0.2891612648963928,
      "learning_rate": 0.00013648854961832062,
      "loss": 0.2298,
      "step": 630
    },
    {
      "epoch": 1.5982256020278833,
      "grad_norm": 0.2191978096961975,
      "learning_rate": 0.00013638676844783716,
      "loss": 0.2049,
      "step": 631
    },
    {
      "epoch": 1.6007604562737643,
      "grad_norm": 0.37781399488449097,
      "learning_rate": 0.0001362849872773537,
      "loss": 0.3664,
      "step": 632
    },
    {
      "epoch": 1.6032953105196452,
      "grad_norm": 0.3082154393196106,
      "learning_rate": 0.00013618320610687024,
      "loss": 0.2063,
      "step": 633
    },
    {
      "epoch": 1.605830164765526,
      "grad_norm": 0.318317711353302,
      "learning_rate": 0.00013608142493638678,
      "loss": 0.2085,
      "step": 634
    },
    {
      "epoch": 1.6083650190114067,
      "grad_norm": 0.45566102862358093,
      "learning_rate": 0.00013597964376590332,
      "loss": 0.2876,
      "step": 635
    },
    {
      "epoch": 1.6108998732572877,
      "grad_norm": 0.3186021149158478,
      "learning_rate": 0.00013587786259541986,
      "loss": 0.2704,
      "step": 636
    },
    {
      "epoch": 1.6134347275031686,
      "grad_norm": 0.28905680775642395,
      "learning_rate": 0.0001357760814249364,
      "loss": 0.209,
      "step": 637
    },
    {
      "epoch": 1.6159695817490496,
      "grad_norm": 0.23341360688209534,
      "learning_rate": 0.00013567430025445294,
      "loss": 0.1835,
      "step": 638
    },
    {
      "epoch": 1.6185044359949303,
      "grad_norm": 0.336247056722641,
      "learning_rate": 0.00013557251908396947,
      "loss": 0.2547,
      "step": 639
    },
    {
      "epoch": 1.621039290240811,
      "grad_norm": 0.3736225366592407,
      "learning_rate": 0.00013547073791348601,
      "loss": 0.3053,
      "step": 640
    },
    {
      "epoch": 1.623574144486692,
      "grad_norm": 0.3983825743198395,
      "learning_rate": 0.00013536895674300255,
      "loss": 0.2395,
      "step": 641
    },
    {
      "epoch": 1.626108998732573,
      "grad_norm": 0.35913559794425964,
      "learning_rate": 0.0001352671755725191,
      "loss": 0.2918,
      "step": 642
    },
    {
      "epoch": 1.6286438529784537,
      "grad_norm": 0.2984326183795929,
      "learning_rate": 0.00013516539440203563,
      "loss": 0.2148,
      "step": 643
    },
    {
      "epoch": 1.6311787072243344,
      "grad_norm": 0.3113880753517151,
      "learning_rate": 0.00013506361323155217,
      "loss": 0.2044,
      "step": 644
    },
    {
      "epoch": 1.6337135614702154,
      "grad_norm": 0.5340004563331604,
      "learning_rate": 0.0001349618320610687,
      "loss": 0.3234,
      "step": 645
    },
    {
      "epoch": 1.6362484157160964,
      "grad_norm": 0.38927194476127625,
      "learning_rate": 0.00013486005089058525,
      "loss": 0.2866,
      "step": 646
    },
    {
      "epoch": 1.6387832699619773,
      "grad_norm": 0.38895881175994873,
      "learning_rate": 0.0001347582697201018,
      "loss": 0.2324,
      "step": 647
    },
    {
      "epoch": 1.641318124207858,
      "grad_norm": 0.41959917545318604,
      "learning_rate": 0.00013465648854961833,
      "loss": 0.2666,
      "step": 648
    },
    {
      "epoch": 1.6438529784537388,
      "grad_norm": 0.4299626648426056,
      "learning_rate": 0.00013455470737913487,
      "loss": 0.2905,
      "step": 649
    },
    {
      "epoch": 1.6463878326996197,
      "grad_norm": 0.4236285090446472,
      "learning_rate": 0.0001344529262086514,
      "loss": 0.292,
      "step": 650
    },
    {
      "epoch": 1.6489226869455007,
      "grad_norm": 0.8049849271774292,
      "learning_rate": 0.00013435114503816795,
      "loss": 0.2351,
      "step": 651
    },
    {
      "epoch": 1.6514575411913817,
      "grad_norm": 0.3420075476169586,
      "learning_rate": 0.00013424936386768449,
      "loss": 0.2355,
      "step": 652
    },
    {
      "epoch": 1.6539923954372624,
      "grad_norm": 0.3632122874259949,
      "learning_rate": 0.00013414758269720103,
      "loss": 0.2377,
      "step": 653
    },
    {
      "epoch": 1.6565272496831431,
      "grad_norm": 0.27961722016334534,
      "learning_rate": 0.00013404580152671756,
      "loss": 0.2299,
      "step": 654
    },
    {
      "epoch": 1.659062103929024,
      "grad_norm": 0.3043057918548584,
      "learning_rate": 0.0001339440203562341,
      "loss": 0.2321,
      "step": 655
    },
    {
      "epoch": 1.661596958174905,
      "grad_norm": 0.3421036899089813,
      "learning_rate": 0.00013384223918575064,
      "loss": 0.2492,
      "step": 656
    },
    {
      "epoch": 1.6641318124207858,
      "grad_norm": 0.39606526494026184,
      "learning_rate": 0.00013374045801526718,
      "loss": 0.3401,
      "step": 657
    },
    {
      "epoch": 1.6666666666666665,
      "grad_norm": 0.35081973671913147,
      "learning_rate": 0.00013363867684478372,
      "loss": 0.2175,
      "step": 658
    },
    {
      "epoch": 1.6692015209125475,
      "grad_norm": 0.420175701379776,
      "learning_rate": 0.00013353689567430026,
      "loss": 0.2813,
      "step": 659
    },
    {
      "epoch": 1.6717363751584284,
      "grad_norm": 0.24181438982486725,
      "learning_rate": 0.0001334351145038168,
      "loss": 0.219,
      "step": 660
    },
    {
      "epoch": 1.6742712294043094,
      "grad_norm": 0.6243584752082825,
      "learning_rate": 0.00013333333333333334,
      "loss": 0.3087,
      "step": 661
    },
    {
      "epoch": 1.6768060836501901,
      "grad_norm": 0.4036748707294464,
      "learning_rate": 0.00013323155216284988,
      "loss": 0.251,
      "step": 662
    },
    {
      "epoch": 1.6793409378960709,
      "grad_norm": 0.39555415511131287,
      "learning_rate": 0.00013312977099236642,
      "loss": 0.3279,
      "step": 663
    },
    {
      "epoch": 1.6818757921419518,
      "grad_norm": 0.4018571674823761,
      "learning_rate": 0.00013302798982188296,
      "loss": 0.2337,
      "step": 664
    },
    {
      "epoch": 1.6844106463878328,
      "grad_norm": 0.36354130506515503,
      "learning_rate": 0.0001329262086513995,
      "loss": 0.2503,
      "step": 665
    },
    {
      "epoch": 1.6869455006337135,
      "grad_norm": 0.32249706983566284,
      "learning_rate": 0.00013282442748091604,
      "loss": 0.27,
      "step": 666
    },
    {
      "epoch": 1.6894803548795945,
      "grad_norm": 0.33560654520988464,
      "learning_rate": 0.00013272264631043258,
      "loss": 0.203,
      "step": 667
    },
    {
      "epoch": 1.6920152091254752,
      "grad_norm": 0.39997267723083496,
      "learning_rate": 0.00013262086513994911,
      "loss": 0.2662,
      "step": 668
    },
    {
      "epoch": 1.6945500633713562,
      "grad_norm": 0.6739961504936218,
      "learning_rate": 0.00013251908396946565,
      "loss": 0.2803,
      "step": 669
    },
    {
      "epoch": 1.6970849176172371,
      "grad_norm": 0.5863606929779053,
      "learning_rate": 0.0001324173027989822,
      "loss": 0.351,
      "step": 670
    },
    {
      "epoch": 1.6996197718631179,
      "grad_norm": 0.4408819079399109,
      "learning_rate": 0.00013231552162849873,
      "loss": 0.1814,
      "step": 671
    },
    {
      "epoch": 1.7021546261089986,
      "grad_norm": 0.3341253697872162,
      "learning_rate": 0.00013221374045801527,
      "loss": 0.2156,
      "step": 672
    },
    {
      "epoch": 1.7046894803548795,
      "grad_norm": 0.3035176992416382,
      "learning_rate": 0.0001321119592875318,
      "loss": 0.2308,
      "step": 673
    },
    {
      "epoch": 1.7072243346007605,
      "grad_norm": 0.4395483136177063,
      "learning_rate": 0.00013201017811704835,
      "loss": 0.3418,
      "step": 674
    },
    {
      "epoch": 1.7097591888466415,
      "grad_norm": 0.22972792387008667,
      "learning_rate": 0.0001319083969465649,
      "loss": 0.1873,
      "step": 675
    },
    {
      "epoch": 1.7122940430925222,
      "grad_norm": 0.47378918528556824,
      "learning_rate": 0.00013180661577608143,
      "loss": 0.2514,
      "step": 676
    },
    {
      "epoch": 1.714828897338403,
      "grad_norm": 0.3947070240974426,
      "learning_rate": 0.00013170483460559797,
      "loss": 0.2289,
      "step": 677
    },
    {
      "epoch": 1.717363751584284,
      "grad_norm": 0.3789718747138977,
      "learning_rate": 0.0001316030534351145,
      "loss": 0.2476,
      "step": 678
    },
    {
      "epoch": 1.7198986058301649,
      "grad_norm": 0.4904823899269104,
      "learning_rate": 0.00013150127226463105,
      "loss": 0.2163,
      "step": 679
    },
    {
      "epoch": 1.7224334600760456,
      "grad_norm": 0.3285132646560669,
      "learning_rate": 0.0001313994910941476,
      "loss": 0.2786,
      "step": 680
    },
    {
      "epoch": 1.7249683143219265,
      "grad_norm": 0.4326847493648529,
      "learning_rate": 0.00013129770992366413,
      "loss": 0.2409,
      "step": 681
    },
    {
      "epoch": 1.7275031685678073,
      "grad_norm": 0.3819947838783264,
      "learning_rate": 0.00013119592875318067,
      "loss": 0.2076,
      "step": 682
    },
    {
      "epoch": 1.7300380228136882,
      "grad_norm": 0.4046533703804016,
      "learning_rate": 0.0001310941475826972,
      "loss": 0.2717,
      "step": 683
    },
    {
      "epoch": 1.7325728770595692,
      "grad_norm": 0.34681758284568787,
      "learning_rate": 0.00013099236641221374,
      "loss": 0.2389,
      "step": 684
    },
    {
      "epoch": 1.73510773130545,
      "grad_norm": 0.35155028104782104,
      "learning_rate": 0.00013089058524173028,
      "loss": 0.2407,
      "step": 685
    },
    {
      "epoch": 1.7376425855513307,
      "grad_norm": 0.3306678533554077,
      "learning_rate": 0.00013078880407124682,
      "loss": 0.2767,
      "step": 686
    },
    {
      "epoch": 1.7401774397972116,
      "grad_norm": 0.27715572714805603,
      "learning_rate": 0.00013068702290076336,
      "loss": 0.1955,
      "step": 687
    },
    {
      "epoch": 1.7427122940430926,
      "grad_norm": 0.3591010272502899,
      "learning_rate": 0.0001305852417302799,
      "loss": 0.2269,
      "step": 688
    },
    {
      "epoch": 1.7452471482889735,
      "grad_norm": 0.39104408025741577,
      "learning_rate": 0.00013048346055979644,
      "loss": 0.2392,
      "step": 689
    },
    {
      "epoch": 1.7477820025348543,
      "grad_norm": 0.44545605778694153,
      "learning_rate": 0.00013038167938931298,
      "loss": 0.2823,
      "step": 690
    },
    {
      "epoch": 1.750316856780735,
      "grad_norm": 0.29502785205841064,
      "learning_rate": 0.00013027989821882952,
      "loss": 0.1899,
      "step": 691
    },
    {
      "epoch": 1.752851711026616,
      "grad_norm": 0.40423381328582764,
      "learning_rate": 0.00013017811704834606,
      "loss": 0.2069,
      "step": 692
    },
    {
      "epoch": 1.755386565272497,
      "grad_norm": 0.38649502396583557,
      "learning_rate": 0.0001300763358778626,
      "loss": 0.1938,
      "step": 693
    },
    {
      "epoch": 1.7579214195183777,
      "grad_norm": 0.40014389157295227,
      "learning_rate": 0.00012997455470737914,
      "loss": 0.2825,
      "step": 694
    },
    {
      "epoch": 1.7604562737642584,
      "grad_norm": 0.4783387780189514,
      "learning_rate": 0.00012987277353689568,
      "loss": 0.2629,
      "step": 695
    },
    {
      "epoch": 1.7629911280101394,
      "grad_norm": 0.4938651919364929,
      "learning_rate": 0.00012977099236641222,
      "loss": 0.2976,
      "step": 696
    },
    {
      "epoch": 1.7655259822560203,
      "grad_norm": 0.32507607340812683,
      "learning_rate": 0.00012966921119592875,
      "loss": 0.2097,
      "step": 697
    },
    {
      "epoch": 1.7680608365019013,
      "grad_norm": 0.31158536672592163,
      "learning_rate": 0.0001295674300254453,
      "loss": 0.223,
      "step": 698
    },
    {
      "epoch": 1.770595690747782,
      "grad_norm": 0.5594013333320618,
      "learning_rate": 0.00012946564885496183,
      "loss": 0.3523,
      "step": 699
    },
    {
      "epoch": 1.7731305449936627,
      "grad_norm": 0.5820282697677612,
      "learning_rate": 0.00012936386768447837,
      "loss": 0.3181,
      "step": 700
    },
    {
      "epoch": 1.7756653992395437,
      "grad_norm": 0.3635233938694,
      "learning_rate": 0.0001292620865139949,
      "loss": 0.2387,
      "step": 701
    },
    {
      "epoch": 1.7782002534854247,
      "grad_norm": 0.3195054531097412,
      "learning_rate": 0.00012916030534351148,
      "loss": 0.2046,
      "step": 702
    },
    {
      "epoch": 1.7807351077313056,
      "grad_norm": 0.3483947217464447,
      "learning_rate": 0.000129058524173028,
      "loss": 0.2576,
      "step": 703
    },
    {
      "epoch": 1.7832699619771863,
      "grad_norm": 0.3419065475463867,
      "learning_rate": 0.00012895674300254456,
      "loss": 0.2361,
      "step": 704
    },
    {
      "epoch": 1.785804816223067,
      "grad_norm": 0.3142557442188263,
      "learning_rate": 0.00012885496183206107,
      "loss": 0.2172,
      "step": 705
    },
    {
      "epoch": 1.788339670468948,
      "grad_norm": 0.3502836227416992,
      "learning_rate": 0.0001287531806615776,
      "loss": 0.2621,
      "step": 706
    },
    {
      "epoch": 1.790874524714829,
      "grad_norm": 0.37896937131881714,
      "learning_rate": 0.00012865139949109415,
      "loss": 0.2374,
      "step": 707
    },
    {
      "epoch": 1.7934093789607097,
      "grad_norm": 0.3880506455898285,
      "learning_rate": 0.0001285496183206107,
      "loss": 0.2862,
      "step": 708
    },
    {
      "epoch": 1.7959442332065905,
      "grad_norm": 0.2648681700229645,
      "learning_rate": 0.00012844783715012723,
      "loss": 0.206,
      "step": 709
    },
    {
      "epoch": 1.7984790874524714,
      "grad_norm": 0.25072911381721497,
      "learning_rate": 0.00012834605597964377,
      "loss": 0.2123,
      "step": 710
    },
    {
      "epoch": 1.8010139416983524,
      "grad_norm": 0.3076663315296173,
      "learning_rate": 0.0001282442748091603,
      "loss": 0.2983,
      "step": 711
    },
    {
      "epoch": 1.8035487959442333,
      "grad_norm": 0.4219549000263214,
      "learning_rate": 0.00012814249363867684,
      "loss": 0.2213,
      "step": 712
    },
    {
      "epoch": 1.806083650190114,
      "grad_norm": 0.2831745445728302,
      "learning_rate": 0.00012804071246819338,
      "loss": 0.2062,
      "step": 713
    },
    {
      "epoch": 1.8086185044359948,
      "grad_norm": 0.4014468491077423,
      "learning_rate": 0.00012793893129770992,
      "loss": 0.2945,
      "step": 714
    },
    {
      "epoch": 1.8111533586818758,
      "grad_norm": 0.2980962097644806,
      "learning_rate": 0.0001278371501272265,
      "loss": 0.2179,
      "step": 715
    },
    {
      "epoch": 1.8136882129277567,
      "grad_norm": 0.2338070124387741,
      "learning_rate": 0.000127735368956743,
      "loss": 0.1664,
      "step": 716
    },
    {
      "epoch": 1.8162230671736375,
      "grad_norm": 0.6155439615249634,
      "learning_rate": 0.00012763358778625957,
      "loss": 0.3429,
      "step": 717
    },
    {
      "epoch": 1.8187579214195184,
      "grad_norm": 0.46969589591026306,
      "learning_rate": 0.00012753180661577608,
      "loss": 0.2584,
      "step": 718
    },
    {
      "epoch": 1.8212927756653992,
      "grad_norm": 0.5578194260597229,
      "learning_rate": 0.00012743002544529265,
      "loss": 0.2695,
      "step": 719
    },
    {
      "epoch": 1.8238276299112801,
      "grad_norm": 0.34903043508529663,
      "learning_rate": 0.00012732824427480916,
      "loss": 0.2119,
      "step": 720
    },
    {
      "epoch": 1.826362484157161,
      "grad_norm": 0.3990432322025299,
      "learning_rate": 0.0001272264631043257,
      "loss": 0.2487,
      "step": 721
    },
    {
      "epoch": 1.8288973384030418,
      "grad_norm": 0.3382611572742462,
      "learning_rate": 0.00012712468193384224,
      "loss": 0.2313,
      "step": 722
    },
    {
      "epoch": 1.8314321926489225,
      "grad_norm": 0.30938395857810974,
      "learning_rate": 0.00012702290076335878,
      "loss": 0.2113,
      "step": 723
    },
    {
      "epoch": 1.8339670468948035,
      "grad_norm": 0.39266690611839294,
      "learning_rate": 0.00012692111959287532,
      "loss": 0.2609,
      "step": 724
    },
    {
      "epoch": 1.8365019011406845,
      "grad_norm": 0.4396655261516571,
      "learning_rate": 0.00012681933842239186,
      "loss": 0.2518,
      "step": 725
    },
    {
      "epoch": 1.8390367553865654,
      "grad_norm": 0.4134500324726105,
      "learning_rate": 0.0001267175572519084,
      "loss": 0.3317,
      "step": 726
    },
    {
      "epoch": 1.8415716096324461,
      "grad_norm": 0.29644638299942017,
      "learning_rate": 0.00012661577608142493,
      "loss": 0.1912,
      "step": 727
    },
    {
      "epoch": 1.8441064638783269,
      "grad_norm": 0.3661201596260071,
      "learning_rate": 0.0001265139949109415,
      "loss": 0.2911,
      "step": 728
    },
    {
      "epoch": 1.8466413181242078,
      "grad_norm": 0.4504169225692749,
      "learning_rate": 0.000126412213740458,
      "loss": 0.3409,
      "step": 729
    },
    {
      "epoch": 1.8491761723700888,
      "grad_norm": 0.28516069054603577,
      "learning_rate": 0.00012631043256997458,
      "loss": 0.254,
      "step": 730
    },
    {
      "epoch": 1.8517110266159695,
      "grad_norm": 0.33754590153694153,
      "learning_rate": 0.0001262086513994911,
      "loss": 0.2275,
      "step": 731
    },
    {
      "epoch": 1.8542458808618505,
      "grad_norm": 0.26562589406967163,
      "learning_rate": 0.00012610687022900766,
      "loss": 0.1979,
      "step": 732
    },
    {
      "epoch": 1.8567807351077312,
      "grad_norm": 0.3081592321395874,
      "learning_rate": 0.00012600508905852417,
      "loss": 0.2099,
      "step": 733
    },
    {
      "epoch": 1.8593155893536122,
      "grad_norm": 0.34866124391555786,
      "learning_rate": 0.0001259033078880407,
      "loss": 0.3038,
      "step": 734
    },
    {
      "epoch": 1.8618504435994931,
      "grad_norm": 0.2867881953716278,
      "learning_rate": 0.00012580152671755725,
      "loss": 0.2225,
      "step": 735
    },
    {
      "epoch": 1.8643852978453739,
      "grad_norm": 0.2374526560306549,
      "learning_rate": 0.0001256997455470738,
      "loss": 0.1945,
      "step": 736
    },
    {
      "epoch": 1.8669201520912546,
      "grad_norm": 0.3072168827056885,
      "learning_rate": 0.00012559796437659033,
      "loss": 0.2135,
      "step": 737
    },
    {
      "epoch": 1.8694550063371356,
      "grad_norm": 0.36897239089012146,
      "learning_rate": 0.00012549618320610687,
      "loss": 0.3225,
      "step": 738
    },
    {
      "epoch": 1.8719898605830165,
      "grad_norm": 0.3114832937717438,
      "learning_rate": 0.00012539440203562343,
      "loss": 0.2064,
      "step": 739
    },
    {
      "epoch": 1.8745247148288975,
      "grad_norm": 0.40082940459251404,
      "learning_rate": 0.00012529262086513995,
      "loss": 0.2145,
      "step": 740
    },
    {
      "epoch": 1.8770595690747782,
      "grad_norm": 0.28362375497817993,
      "learning_rate": 0.0001251908396946565,
      "loss": 0.2044,
      "step": 741
    },
    {
      "epoch": 1.879594423320659,
      "grad_norm": 0.2738857567310333,
      "learning_rate": 0.00012508905852417302,
      "loss": 0.1852,
      "step": 742
    },
    {
      "epoch": 1.88212927756654,
      "grad_norm": 0.37283095717430115,
      "learning_rate": 0.0001249872773536896,
      "loss": 0.248,
      "step": 743
    },
    {
      "epoch": 1.8846641318124209,
      "grad_norm": 0.3065252900123596,
      "learning_rate": 0.0001248854961832061,
      "loss": 0.2028,
      "step": 744
    },
    {
      "epoch": 1.8871989860583016,
      "grad_norm": 0.2891787588596344,
      "learning_rate": 0.00012478371501272267,
      "loss": 0.1977,
      "step": 745
    },
    {
      "epoch": 1.8897338403041823,
      "grad_norm": 0.5002029538154602,
      "learning_rate": 0.00012468193384223918,
      "loss": 0.2731,
      "step": 746
    },
    {
      "epoch": 1.8922686945500633,
      "grad_norm": 0.34734681248664856,
      "learning_rate": 0.00012458015267175575,
      "loss": 0.2236,
      "step": 747
    },
    {
      "epoch": 1.8948035487959443,
      "grad_norm": 0.4372716248035431,
      "learning_rate": 0.00012447837150127226,
      "loss": 0.3787,
      "step": 748
    },
    {
      "epoch": 1.8973384030418252,
      "grad_norm": 0.41203773021698,
      "learning_rate": 0.0001243765903307888,
      "loss": 0.2385,
      "step": 749
    },
    {
      "epoch": 1.899873257287706,
      "grad_norm": 0.28231269121170044,
      "learning_rate": 0.00012427480916030534,
      "loss": 0.1966,
      "step": 750
    },
    {
      "epoch": 1.9024081115335867,
      "grad_norm": 0.3689015209674835,
      "learning_rate": 0.00012417302798982188,
      "loss": 0.2266,
      "step": 751
    },
    {
      "epoch": 1.9049429657794676,
      "grad_norm": 0.35862621665000916,
      "learning_rate": 0.00012407124681933844,
      "loss": 0.2226,
      "step": 752
    },
    {
      "epoch": 1.9074778200253486,
      "grad_norm": 0.27552056312561035,
      "learning_rate": 0.00012396946564885496,
      "loss": 0.2049,
      "step": 753
    },
    {
      "epoch": 1.9100126742712296,
      "grad_norm": 0.3665705919265747,
      "learning_rate": 0.00012386768447837152,
      "loss": 0.2262,
      "step": 754
    },
    {
      "epoch": 1.9125475285171103,
      "grad_norm": 0.37812677025794983,
      "learning_rate": 0.00012376590330788803,
      "loss": 0.2561,
      "step": 755
    },
    {
      "epoch": 1.915082382762991,
      "grad_norm": 0.34638741612434387,
      "learning_rate": 0.0001236641221374046,
      "loss": 0.2152,
      "step": 756
    },
    {
      "epoch": 1.917617237008872,
      "grad_norm": 0.3499183654785156,
      "learning_rate": 0.00012356234096692111,
      "loss": 0.2823,
      "step": 757
    },
    {
      "epoch": 1.920152091254753,
      "grad_norm": 0.3274863362312317,
      "learning_rate": 0.00012346055979643768,
      "loss": 0.202,
      "step": 758
    },
    {
      "epoch": 1.9226869455006337,
      "grad_norm": 0.4568060338497162,
      "learning_rate": 0.0001233587786259542,
      "loss": 0.3531,
      "step": 759
    },
    {
      "epoch": 1.9252217997465144,
      "grad_norm": 0.3351891040802002,
      "learning_rate": 0.00012325699745547076,
      "loss": 0.3491,
      "step": 760
    },
    {
      "epoch": 1.9277566539923954,
      "grad_norm": 0.3045225739479065,
      "learning_rate": 0.00012315521628498727,
      "loss": 0.2412,
      "step": 761
    },
    {
      "epoch": 1.9302915082382763,
      "grad_norm": 0.4453962445259094,
      "learning_rate": 0.0001230534351145038,
      "loss": 0.485,
      "step": 762
    },
    {
      "epoch": 1.9328263624841573,
      "grad_norm": 0.4568649232387543,
      "learning_rate": 0.00012295165394402038,
      "loss": 0.4203,
      "step": 763
    },
    {
      "epoch": 1.935361216730038,
      "grad_norm": 0.33376067876815796,
      "learning_rate": 0.0001228498727735369,
      "loss": 0.2287,
      "step": 764
    },
    {
      "epoch": 1.9378960709759188,
      "grad_norm": 0.2670106887817383,
      "learning_rate": 0.00012274809160305346,
      "loss": 0.2265,
      "step": 765
    },
    {
      "epoch": 1.9404309252217997,
      "grad_norm": 0.25930914282798767,
      "learning_rate": 0.00012264631043256997,
      "loss": 0.2661,
      "step": 766
    },
    {
      "epoch": 1.9429657794676807,
      "grad_norm": 0.22364859282970428,
      "learning_rate": 0.00012254452926208653,
      "loss": 0.1938,
      "step": 767
    },
    {
      "epoch": 1.9455006337135616,
      "grad_norm": 0.4107860028743744,
      "learning_rate": 0.00012244274809160305,
      "loss": 0.3227,
      "step": 768
    },
    {
      "epoch": 1.9480354879594424,
      "grad_norm": 0.24454613029956818,
      "learning_rate": 0.0001223409669211196,
      "loss": 0.2813,
      "step": 769
    },
    {
      "epoch": 1.950570342205323,
      "grad_norm": 0.28310418128967285,
      "learning_rate": 0.00012223918575063612,
      "loss": 0.2065,
      "step": 770
    },
    {
      "epoch": 1.953105196451204,
      "grad_norm": 0.28080177307128906,
      "learning_rate": 0.0001221374045801527,
      "loss": 0.1941,
      "step": 771
    },
    {
      "epoch": 1.955640050697085,
      "grad_norm": 0.365400105714798,
      "learning_rate": 0.0001220356234096692,
      "loss": 0.2657,
      "step": 772
    },
    {
      "epoch": 1.9581749049429658,
      "grad_norm": 0.3115444779396057,
      "learning_rate": 0.00012193384223918576,
      "loss": 0.2117,
      "step": 773
    },
    {
      "epoch": 1.9607097591888465,
      "grad_norm": 0.30900898575782776,
      "learning_rate": 0.00012183206106870228,
      "loss": 0.2563,
      "step": 774
    },
    {
      "epoch": 1.9632446134347274,
      "grad_norm": 0.341789573431015,
      "learning_rate": 0.00012173027989821883,
      "loss": 0.2396,
      "step": 775
    },
    {
      "epoch": 1.9657794676806084,
      "grad_norm": 0.39556756615638733,
      "learning_rate": 0.00012162849872773539,
      "loss": 0.2203,
      "step": 776
    },
    {
      "epoch": 1.9683143219264894,
      "grad_norm": 0.4282820224761963,
      "learning_rate": 0.00012152671755725191,
      "loss": 0.2476,
      "step": 777
    },
    {
      "epoch": 1.97084917617237,
      "grad_norm": 0.3683648109436035,
      "learning_rate": 0.00012142493638676847,
      "loss": 0.2414,
      "step": 778
    },
    {
      "epoch": 1.9733840304182508,
      "grad_norm": 0.19751296937465668,
      "learning_rate": 0.00012132315521628499,
      "loss": 0.1622,
      "step": 779
    },
    {
      "epoch": 1.9759188846641318,
      "grad_norm": 0.4522268772125244,
      "learning_rate": 0.00012122137404580154,
      "loss": 0.3372,
      "step": 780
    },
    {
      "epoch": 1.9784537389100127,
      "grad_norm": 0.3386411666870117,
      "learning_rate": 0.00012111959287531807,
      "loss": 0.1966,
      "step": 781
    },
    {
      "epoch": 1.9809885931558935,
      "grad_norm": 0.3266599178314209,
      "learning_rate": 0.00012101781170483461,
      "loss": 0.2507,
      "step": 782
    },
    {
      "epoch": 1.9835234474017744,
      "grad_norm": 0.395271897315979,
      "learning_rate": 0.00012091603053435115,
      "loss": 0.2626,
      "step": 783
    },
    {
      "epoch": 1.9860583016476552,
      "grad_norm": 0.23269407451152802,
      "learning_rate": 0.00012081424936386769,
      "loss": 0.1806,
      "step": 784
    },
    {
      "epoch": 1.9885931558935361,
      "grad_norm": 0.3929823040962219,
      "learning_rate": 0.00012071246819338421,
      "loss": 0.2912,
      "step": 785
    },
    {
      "epoch": 1.991128010139417,
      "grad_norm": 0.2597116529941559,
      "learning_rate": 0.00012061068702290077,
      "loss": 0.1918,
      "step": 786
    },
    {
      "epoch": 1.9936628643852978,
      "grad_norm": 0.44690757989883423,
      "learning_rate": 0.00012050890585241729,
      "loss": 0.2644,
      "step": 787
    },
    {
      "epoch": 1.9961977186311786,
      "grad_norm": 0.4133460819721222,
      "learning_rate": 0.00012040712468193385,
      "loss": 0.2541,
      "step": 788
    },
    {
      "epoch": 1.9987325728770595,
      "grad_norm": 0.33399301767349243,
      "learning_rate": 0.0001203053435114504,
      "loss": 0.2778,
      "step": 789
    },
    {
      "epoch": 2.0,
      "grad_norm": 0.6268282532691956,
      "learning_rate": 0.00012020356234096692,
      "loss": 0.3105,
      "step": 790
    },
    {
      "epoch": 2.002534854245881,
      "grad_norm": 0.38419365882873535,
      "learning_rate": 0.00012010178117048348,
      "loss": 0.2352,
      "step": 791
    },
    {
      "epoch": 2.005069708491762,
      "grad_norm": 0.30469566583633423,
      "learning_rate": 0.00012,
      "loss": 0.2011,
      "step": 792
    },
    {
      "epoch": 2.0076045627376424,
      "grad_norm": 0.36411482095718384,
      "learning_rate": 0.00011989821882951656,
      "loss": 0.2324,
      "step": 793
    },
    {
      "epoch": 2.0101394169835234,
      "grad_norm": 0.40986311435699463,
      "learning_rate": 0.00011979643765903308,
      "loss": 0.2217,
      "step": 794
    },
    {
      "epoch": 2.0126742712294043,
      "grad_norm": 0.46682968735694885,
      "learning_rate": 0.00011969465648854963,
      "loss": 0.2688,
      "step": 795
    },
    {
      "epoch": 2.0152091254752853,
      "grad_norm": 0.31846344470977783,
      "learning_rate": 0.00011959287531806616,
      "loss": 0.1984,
      "step": 796
    },
    {
      "epoch": 2.017743979721166,
      "grad_norm": 0.48346126079559326,
      "learning_rate": 0.0001194910941475827,
      "loss": 0.2404,
      "step": 797
    },
    {
      "epoch": 2.0202788339670468,
      "grad_norm": 0.5090253949165344,
      "learning_rate": 0.00011938931297709924,
      "loss": 0.2363,
      "step": 798
    },
    {
      "epoch": 2.0228136882129277,
      "grad_norm": 0.4886679947376251,
      "learning_rate": 0.00011928753180661578,
      "loss": 0.2656,
      "step": 799
    },
    {
      "epoch": 2.0253485424588087,
      "grad_norm": 0.5652650594711304,
      "learning_rate": 0.00011918575063613233,
      "loss": 0.2444,
      "step": 800
    },
    {
      "epoch": 2.0278833967046896,
      "grad_norm": 0.7158893346786499,
      "learning_rate": 0.00011908396946564886,
      "loss": 0.2362,
      "step": 801
    },
    {
      "epoch": 2.03041825095057,
      "grad_norm": 0.5168672800064087,
      "learning_rate": 0.00011898218829516541,
      "loss": 0.2067,
      "step": 802
    },
    {
      "epoch": 2.032953105196451,
      "grad_norm": 0.7243991494178772,
      "learning_rate": 0.00011888040712468194,
      "loss": 0.2458,
      "step": 803
    },
    {
      "epoch": 2.035487959442332,
      "grad_norm": 0.4199936091899872,
      "learning_rate": 0.00011877862595419849,
      "loss": 0.2009,
      "step": 804
    },
    {
      "epoch": 2.038022813688213,
      "grad_norm": 0.41791805624961853,
      "learning_rate": 0.00011867684478371501,
      "loss": 0.2325,
      "step": 805
    },
    {
      "epoch": 2.040557667934094,
      "grad_norm": 0.6389465928077698,
      "learning_rate": 0.00011857506361323157,
      "loss": 0.2636,
      "step": 806
    },
    {
      "epoch": 2.0430925221799745,
      "grad_norm": 0.6254114508628845,
      "learning_rate": 0.00011847328244274809,
      "loss": 0.2292,
      "step": 807
    },
    {
      "epoch": 2.0456273764258555,
      "grad_norm": 0.8436942100524902,
      "learning_rate": 0.00011837150127226465,
      "loss": 0.2913,
      "step": 808
    },
    {
      "epoch": 2.0481622306717364,
      "grad_norm": 0.42698097229003906,
      "learning_rate": 0.00011826972010178117,
      "loss": 0.2107,
      "step": 809
    },
    {
      "epoch": 2.0506970849176174,
      "grad_norm": 0.432607501745224,
      "learning_rate": 0.00011816793893129771,
      "loss": 0.1851,
      "step": 810
    },
    {
      "epoch": 2.053231939163498,
      "grad_norm": 0.48241573572158813,
      "learning_rate": 0.00011806615776081425,
      "loss": 0.2333,
      "step": 811
    },
    {
      "epoch": 2.055766793409379,
      "grad_norm": 0.3920150101184845,
      "learning_rate": 0.00011796437659033079,
      "loss": 0.2256,
      "step": 812
    },
    {
      "epoch": 2.05830164765526,
      "grad_norm": 0.3601329028606415,
      "learning_rate": 0.00011786259541984734,
      "loss": 0.2428,
      "step": 813
    },
    {
      "epoch": 2.0608365019011408,
      "grad_norm": 0.428524911403656,
      "learning_rate": 0.00011776081424936387,
      "loss": 0.3109,
      "step": 814
    },
    {
      "epoch": 2.0633713561470217,
      "grad_norm": 0.22846737504005432,
      "learning_rate": 0.00011765903307888042,
      "loss": 0.1715,
      "step": 815
    },
    {
      "epoch": 2.0659062103929022,
      "grad_norm": 0.3656214475631714,
      "learning_rate": 0.00011755725190839695,
      "loss": 0.2211,
      "step": 816
    },
    {
      "epoch": 2.068441064638783,
      "grad_norm": 0.2633965015411377,
      "learning_rate": 0.0001174554707379135,
      "loss": 0.1933,
      "step": 817
    },
    {
      "epoch": 2.070975918884664,
      "grad_norm": 0.4318942129611969,
      "learning_rate": 0.00011735368956743003,
      "loss": 0.2829,
      "step": 818
    },
    {
      "epoch": 2.073510773130545,
      "grad_norm": 0.2643216848373413,
      "learning_rate": 0.00011725190839694658,
      "loss": 0.1938,
      "step": 819
    },
    {
      "epoch": 2.076045627376426,
      "grad_norm": 0.4560074508190155,
      "learning_rate": 0.0001171501272264631,
      "loss": 0.3017,
      "step": 820
    },
    {
      "epoch": 2.0785804816223066,
      "grad_norm": 0.380374550819397,
      "learning_rate": 0.00011704834605597966,
      "loss": 0.2141,
      "step": 821
    },
    {
      "epoch": 2.0811153358681875,
      "grad_norm": 0.321417897939682,
      "learning_rate": 0.00011694656488549618,
      "loss": 0.2058,
      "step": 822
    },
    {
      "epoch": 2.0836501901140685,
      "grad_norm": 0.350496768951416,
      "learning_rate": 0.00011684478371501274,
      "loss": 0.1761,
      "step": 823
    },
    {
      "epoch": 2.0861850443599494,
      "grad_norm": 0.35794898867607117,
      "learning_rate": 0.00011674300254452927,
      "loss": 0.2016,
      "step": 824
    },
    {
      "epoch": 2.08871989860583,
      "grad_norm": 0.37890860438346863,
      "learning_rate": 0.0001166412213740458,
      "loss": 0.253,
      "step": 825
    },
    {
      "epoch": 2.091254752851711,
      "grad_norm": 0.41833457350730896,
      "learning_rate": 0.00011653944020356235,
      "loss": 0.2012,
      "step": 826
    },
    {
      "epoch": 2.093789607097592,
      "grad_norm": 0.49572086334228516,
      "learning_rate": 0.00011643765903307888,
      "loss": 0.214,
      "step": 827
    },
    {
      "epoch": 2.096324461343473,
      "grad_norm": 0.44266751408576965,
      "learning_rate": 0.00011633587786259543,
      "loss": 0.2496,
      "step": 828
    },
    {
      "epoch": 2.098859315589354,
      "grad_norm": 0.7018102407455444,
      "learning_rate": 0.00011623409669211196,
      "loss": 0.3996,
      "step": 829
    },
    {
      "epoch": 2.1013941698352343,
      "grad_norm": 0.42781826853752136,
      "learning_rate": 0.00011613231552162851,
      "loss": 0.2325,
      "step": 830
    },
    {
      "epoch": 2.1039290240811153,
      "grad_norm": 0.35814788937568665,
      "learning_rate": 0.00011603053435114504,
      "loss": 0.2003,
      "step": 831
    },
    {
      "epoch": 2.106463878326996,
      "grad_norm": 0.2381380945444107,
      "learning_rate": 0.00011592875318066159,
      "loss": 0.1791,
      "step": 832
    },
    {
      "epoch": 2.108998732572877,
      "grad_norm": 0.3152197003364563,
      "learning_rate": 0.00011582697201017811,
      "loss": 0.1802,
      "step": 833
    },
    {
      "epoch": 2.111533586818758,
      "grad_norm": 0.3493264615535736,
      "learning_rate": 0.00011572519083969467,
      "loss": 0.173,
      "step": 834
    },
    {
      "epoch": 2.1140684410646386,
      "grad_norm": 0.339036762714386,
      "learning_rate": 0.0001156234096692112,
      "loss": 0.1875,
      "step": 835
    },
    {
      "epoch": 2.1166032953105196,
      "grad_norm": 0.3622972369194031,
      "learning_rate": 0.00011552162849872775,
      "loss": 0.1892,
      "step": 836
    },
    {
      "epoch": 2.1191381495564006,
      "grad_norm": 0.7021862268447876,
      "learning_rate": 0.00011541984732824429,
      "loss": 0.272,
      "step": 837
    },
    {
      "epoch": 2.1216730038022815,
      "grad_norm": 0.4027453064918518,
      "learning_rate": 0.00011531806615776081,
      "loss": 0.2296,
      "step": 838
    },
    {
      "epoch": 2.124207858048162,
      "grad_norm": 0.3509223163127899,
      "learning_rate": 0.00011521628498727736,
      "loss": 0.1812,
      "step": 839
    },
    {
      "epoch": 2.126742712294043,
      "grad_norm": 0.4156752824783325,
      "learning_rate": 0.00011511450381679389,
      "loss": 0.2444,
      "step": 840
    },
    {
      "epoch": 2.129277566539924,
      "grad_norm": 0.3596971035003662,
      "learning_rate": 0.00011501272264631044,
      "loss": 0.1944,
      "step": 841
    },
    {
      "epoch": 2.131812420785805,
      "grad_norm": 0.4088239371776581,
      "learning_rate": 0.00011491094147582697,
      "loss": 0.1892,
      "step": 842
    },
    {
      "epoch": 2.134347275031686,
      "grad_norm": 0.3603368103504181,
      "learning_rate": 0.00011480916030534352,
      "loss": 0.1955,
      "step": 843
    },
    {
      "epoch": 2.1368821292775664,
      "grad_norm": 0.3702489733695984,
      "learning_rate": 0.00011470737913486005,
      "loss": 0.2401,
      "step": 844
    },
    {
      "epoch": 2.1394169835234473,
      "grad_norm": 0.427312433719635,
      "learning_rate": 0.0001146055979643766,
      "loss": 0.2097,
      "step": 845
    },
    {
      "epoch": 2.1419518377693283,
      "grad_norm": 0.34239426255226135,
      "learning_rate": 0.00011450381679389313,
      "loss": 0.2055,
      "step": 846
    },
    {
      "epoch": 2.1444866920152093,
      "grad_norm": 0.522627055644989,
      "learning_rate": 0.00011440203562340968,
      "loss": 0.2206,
      "step": 847
    },
    {
      "epoch": 2.14702154626109,
      "grad_norm": 0.5005999207496643,
      "learning_rate": 0.0001143002544529262,
      "loss": 0.2187,
      "step": 848
    },
    {
      "epoch": 2.1495564005069707,
      "grad_norm": 0.4834093451499939,
      "learning_rate": 0.00011419847328244276,
      "loss": 0.2616,
      "step": 849
    },
    {
      "epoch": 2.1520912547528517,
      "grad_norm": 0.3305776119232178,
      "learning_rate": 0.0001140966921119593,
      "loss": 0.2193,
      "step": 850
    },
    {
      "epoch": 2.1546261089987326,
      "grad_norm": 0.3691657781600952,
      "learning_rate": 0.00011399491094147584,
      "loss": 0.2343,
      "step": 851
    },
    {
      "epoch": 2.1571609632446136,
      "grad_norm": 0.4711242914199829,
      "learning_rate": 0.00011389312977099238,
      "loss": 0.2961,
      "step": 852
    },
    {
      "epoch": 2.159695817490494,
      "grad_norm": 0.4091726839542389,
      "learning_rate": 0.0001137913486005089,
      "loss": 0.2735,
      "step": 853
    },
    {
      "epoch": 2.162230671736375,
      "grad_norm": 0.28634020686149597,
      "learning_rate": 0.00011368956743002545,
      "loss": 0.2026,
      "step": 854
    },
    {
      "epoch": 2.164765525982256,
      "grad_norm": 0.3120497763156891,
      "learning_rate": 0.00011358778625954198,
      "loss": 0.1826,
      "step": 855
    },
    {
      "epoch": 2.167300380228137,
      "grad_norm": 0.3803773522377014,
      "learning_rate": 0.00011348600508905853,
      "loss": 0.2206,
      "step": 856
    },
    {
      "epoch": 2.169835234474018,
      "grad_norm": 0.4069412648677826,
      "learning_rate": 0.00011338422391857506,
      "loss": 0.23,
      "step": 857
    },
    {
      "epoch": 2.1723700887198985,
      "grad_norm": 0.31032097339630127,
      "learning_rate": 0.00011328244274809161,
      "loss": 0.1774,
      "step": 858
    },
    {
      "epoch": 2.1749049429657794,
      "grad_norm": 0.3429819941520691,
      "learning_rate": 0.00011318066157760814,
      "loss": 0.207,
      "step": 859
    },
    {
      "epoch": 2.1774397972116604,
      "grad_norm": 0.32155394554138184,
      "learning_rate": 0.00011307888040712469,
      "loss": 0.1817,
      "step": 860
    },
    {
      "epoch": 2.1799746514575413,
      "grad_norm": 0.3859189450740814,
      "learning_rate": 0.00011297709923664124,
      "loss": 0.205,
      "step": 861
    },
    {
      "epoch": 2.182509505703422,
      "grad_norm": 0.33794042468070984,
      "learning_rate": 0.00011287531806615777,
      "loss": 0.2002,
      "step": 862
    },
    {
      "epoch": 2.185044359949303,
      "grad_norm": 0.38762131333351135,
      "learning_rate": 0.00011277353689567431,
      "loss": 0.206,
      "step": 863
    },
    {
      "epoch": 2.1875792141951838,
      "grad_norm": 0.35734203457832336,
      "learning_rate": 0.00011267175572519085,
      "loss": 0.2332,
      "step": 864
    },
    {
      "epoch": 2.1901140684410647,
      "grad_norm": 0.32456931471824646,
      "learning_rate": 0.00011256997455470739,
      "loss": 0.1873,
      "step": 865
    },
    {
      "epoch": 2.1926489226869457,
      "grad_norm": 0.5198532938957214,
      "learning_rate": 0.00011246819338422391,
      "loss": 0.2408,
      "step": 866
    },
    {
      "epoch": 2.195183776932826,
      "grad_norm": 0.3863469362258911,
      "learning_rate": 0.00011236641221374046,
      "loss": 0.1778,
      "step": 867
    },
    {
      "epoch": 2.197718631178707,
      "grad_norm": 0.39902037382125854,
      "learning_rate": 0.00011226463104325699,
      "loss": 0.1982,
      "step": 868
    },
    {
      "epoch": 2.200253485424588,
      "grad_norm": 0.3974783718585968,
      "learning_rate": 0.00011216284987277354,
      "loss": 0.2157,
      "step": 869
    },
    {
      "epoch": 2.202788339670469,
      "grad_norm": 0.33785662055015564,
      "learning_rate": 0.00011206106870229007,
      "loss": 0.2152,
      "step": 870
    },
    {
      "epoch": 2.20532319391635,
      "grad_norm": 0.4233367145061493,
      "learning_rate": 0.00011195928753180662,
      "loss": 0.2992,
      "step": 871
    },
    {
      "epoch": 2.2078580481622305,
      "grad_norm": 0.37665534019470215,
      "learning_rate": 0.00011185750636132315,
      "loss": 0.2273,
      "step": 872
    },
    {
      "epoch": 2.2103929024081115,
      "grad_norm": 0.3841243088245392,
      "learning_rate": 0.0001117557251908397,
      "loss": 0.1991,
      "step": 873
    },
    {
      "epoch": 2.2129277566539924,
      "grad_norm": 0.3544892966747284,
      "learning_rate": 0.00011165394402035625,
      "loss": 0.2098,
      "step": 874
    },
    {
      "epoch": 2.2154626108998734,
      "grad_norm": 0.43662142753601074,
      "learning_rate": 0.00011155216284987278,
      "loss": 0.2411,
      "step": 875
    },
    {
      "epoch": 2.2179974651457544,
      "grad_norm": 0.3305199146270752,
      "learning_rate": 0.00011145038167938933,
      "loss": 0.1803,
      "step": 876
    },
    {
      "epoch": 2.220532319391635,
      "grad_norm": 0.34674328565597534,
      "learning_rate": 0.00011134860050890586,
      "loss": 0.2206,
      "step": 877
    },
    {
      "epoch": 2.223067173637516,
      "grad_norm": 0.39985305070877075,
      "learning_rate": 0.0001112468193384224,
      "loss": 0.2951,
      "step": 878
    },
    {
      "epoch": 2.225602027883397,
      "grad_norm": 0.36231693625450134,
      "learning_rate": 0.00011114503816793894,
      "loss": 0.2601,
      "step": 879
    },
    {
      "epoch": 2.2281368821292777,
      "grad_norm": 0.4199659526348114,
      "learning_rate": 0.00011104325699745548,
      "loss": 0.2719,
      "step": 880
    },
    {
      "epoch": 2.2306717363751583,
      "grad_norm": 0.3472574055194855,
      "learning_rate": 0.000110941475826972,
      "loss": 0.2437,
      "step": 881
    },
    {
      "epoch": 2.233206590621039,
      "grad_norm": 0.2765200436115265,
      "learning_rate": 0.00011083969465648855,
      "loss": 0.1983,
      "step": 882
    },
    {
      "epoch": 2.23574144486692,
      "grad_norm": 0.4466260075569153,
      "learning_rate": 0.00011073791348600508,
      "loss": 0.2323,
      "step": 883
    },
    {
      "epoch": 2.238276299112801,
      "grad_norm": 0.43661364912986755,
      "learning_rate": 0.00011063613231552163,
      "loss": 0.2957,
      "step": 884
    },
    {
      "epoch": 2.240811153358682,
      "grad_norm": 0.3262166976928711,
      "learning_rate": 0.00011053435114503819,
      "loss": 0.195,
      "step": 885
    },
    {
      "epoch": 2.2433460076045626,
      "grad_norm": 0.5085666179656982,
      "learning_rate": 0.00011043256997455471,
      "loss": 0.3349,
      "step": 886
    },
    {
      "epoch": 2.2458808618504436,
      "grad_norm": 0.46551409363746643,
      "learning_rate": 0.00011033078880407126,
      "loss": 0.3318,
      "step": 887
    },
    {
      "epoch": 2.2484157160963245,
      "grad_norm": 0.425530344247818,
      "learning_rate": 0.00011022900763358779,
      "loss": 0.2857,
      "step": 888
    },
    {
      "epoch": 2.2509505703422055,
      "grad_norm": 0.3377918601036072,
      "learning_rate": 0.00011012722646310434,
      "loss": 0.2215,
      "step": 889
    },
    {
      "epoch": 2.253485424588086,
      "grad_norm": 0.3491476774215698,
      "learning_rate": 0.00011002544529262087,
      "loss": 0.2471,
      "step": 890
    },
    {
      "epoch": 2.256020278833967,
      "grad_norm": 0.3779531419277191,
      "learning_rate": 0.00010992366412213742,
      "loss": 0.1984,
      "step": 891
    },
    {
      "epoch": 2.258555133079848,
      "grad_norm": 0.425077885389328,
      "learning_rate": 0.00010982188295165395,
      "loss": 0.2535,
      "step": 892
    },
    {
      "epoch": 2.261089987325729,
      "grad_norm": 0.40296900272369385,
      "learning_rate": 0.00010972010178117049,
      "loss": 0.1955,
      "step": 893
    },
    {
      "epoch": 2.26362484157161,
      "grad_norm": 0.4394761919975281,
      "learning_rate": 0.00010961832061068703,
      "loss": 0.2638,
      "step": 894
    },
    {
      "epoch": 2.2661596958174903,
      "grad_norm": 0.4743111729621887,
      "learning_rate": 0.00010951653944020357,
      "loss": 0.1932,
      "step": 895
    },
    {
      "epoch": 2.2686945500633713,
      "grad_norm": 0.5121330618858337,
      "learning_rate": 0.00010941475826972009,
      "loss": 0.2541,
      "step": 896
    },
    {
      "epoch": 2.2712294043092522,
      "grad_norm": 0.2810382544994354,
      "learning_rate": 0.00010931297709923664,
      "loss": 0.1884,
      "step": 897
    },
    {
      "epoch": 2.273764258555133,
      "grad_norm": 0.3637334108352661,
      "learning_rate": 0.0001092111959287532,
      "loss": 0.2208,
      "step": 898
    },
    {
      "epoch": 2.2762991128010137,
      "grad_norm": 0.4116186201572418,
      "learning_rate": 0.00010910941475826972,
      "loss": 0.1898,
      "step": 899
    },
    {
      "epoch": 2.2788339670468947,
      "grad_norm": 0.4166296720504761,
      "learning_rate": 0.00010900763358778628,
      "loss": 0.2399,
      "step": 900
    },
    {
      "epoch": 2.2813688212927756,
      "grad_norm": 0.5998784303665161,
      "learning_rate": 0.0001089058524173028,
      "loss": 0.2926,
      "step": 901
    },
    {
      "epoch": 2.2839036755386566,
      "grad_norm": 0.6252371668815613,
      "learning_rate": 0.00010880407124681935,
      "loss": 0.2392,
      "step": 902
    },
    {
      "epoch": 2.2864385297845375,
      "grad_norm": 0.4495537579059601,
      "learning_rate": 0.00010870229007633588,
      "loss": 0.2142,
      "step": 903
    },
    {
      "epoch": 2.288973384030418,
      "grad_norm": 0.5659827589988708,
      "learning_rate": 0.00010860050890585243,
      "loss": 0.2993,
      "step": 904
    },
    {
      "epoch": 2.291508238276299,
      "grad_norm": 0.4290786385536194,
      "learning_rate": 0.00010849872773536896,
      "loss": 0.3127,
      "step": 905
    },
    {
      "epoch": 2.29404309252218,
      "grad_norm": 0.3835826516151428,
      "learning_rate": 0.0001083969465648855,
      "loss": 0.1927,
      "step": 906
    },
    {
      "epoch": 2.296577946768061,
      "grad_norm": 0.4915788769721985,
      "learning_rate": 0.00010829516539440204,
      "loss": 0.2553,
      "step": 907
    },
    {
      "epoch": 2.299112801013942,
      "grad_norm": 0.42122524976730347,
      "learning_rate": 0.00010819338422391858,
      "loss": 0.2133,
      "step": 908
    },
    {
      "epoch": 2.3016476552598224,
      "grad_norm": 0.3904586732387543,
      "learning_rate": 0.0001080916030534351,
      "loss": 0.2064,
      "step": 909
    },
    {
      "epoch": 2.3041825095057034,
      "grad_norm": 0.3680777847766876,
      "learning_rate": 0.00010798982188295166,
      "loss": 0.1989,
      "step": 910
    },
    {
      "epoch": 2.3067173637515843,
      "grad_norm": 0.44054466485977173,
      "learning_rate": 0.00010788804071246821,
      "loss": 0.2386,
      "step": 911
    },
    {
      "epoch": 2.3092522179974653,
      "grad_norm": 0.28730717301368713,
      "learning_rate": 0.00010778625954198473,
      "loss": 0.175,
      "step": 912
    },
    {
      "epoch": 2.3117870722433462,
      "grad_norm": 0.4209315776824951,
      "learning_rate": 0.00010768447837150129,
      "loss": 0.2197,
      "step": 913
    },
    {
      "epoch": 2.3143219264892267,
      "grad_norm": 0.41457393765449524,
      "learning_rate": 0.00010758269720101781,
      "loss": 0.202,
      "step": 914
    },
    {
      "epoch": 2.3168567807351077,
      "grad_norm": 0.40807071328163147,
      "learning_rate": 0.00010748091603053437,
      "loss": 0.3087,
      "step": 915
    },
    {
      "epoch": 2.3193916349809887,
      "grad_norm": 0.42118731141090393,
      "learning_rate": 0.00010737913486005089,
      "loss": 0.2269,
      "step": 916
    },
    {
      "epoch": 2.3219264892268696,
      "grad_norm": 0.3436257541179657,
      "learning_rate": 0.00010727735368956744,
      "loss": 0.1987,
      "step": 917
    },
    {
      "epoch": 2.32446134347275,
      "grad_norm": 0.3721463978290558,
      "learning_rate": 0.00010717557251908397,
      "loss": 0.2081,
      "step": 918
    },
    {
      "epoch": 2.326996197718631,
      "grad_norm": 0.45050719380378723,
      "learning_rate": 0.00010707379134860052,
      "loss": 0.2199,
      "step": 919
    },
    {
      "epoch": 2.329531051964512,
      "grad_norm": 0.42665717005729675,
      "learning_rate": 0.00010697201017811705,
      "loss": 0.2176,
      "step": 920
    },
    {
      "epoch": 2.332065906210393,
      "grad_norm": 0.35217922925949097,
      "learning_rate": 0.00010687022900763359,
      "loss": 0.1915,
      "step": 921
    },
    {
      "epoch": 2.334600760456274,
      "grad_norm": 0.5407602190971375,
      "learning_rate": 0.00010676844783715014,
      "loss": 0.2309,
      "step": 922
    },
    {
      "epoch": 2.3371356147021545,
      "grad_norm": 0.6984291076660156,
      "learning_rate": 0.00010666666666666667,
      "loss": 0.2779,
      "step": 923
    },
    {
      "epoch": 2.3396704689480354,
      "grad_norm": 0.5333911776542664,
      "learning_rate": 0.00010656488549618322,
      "loss": 0.2659,
      "step": 924
    },
    {
      "epoch": 2.3422053231939164,
      "grad_norm": 0.5130952596664429,
      "learning_rate": 0.00010646310432569974,
      "loss": 0.315,
      "step": 925
    },
    {
      "epoch": 2.3447401774397973,
      "grad_norm": 0.3874262869358063,
      "learning_rate": 0.0001063613231552163,
      "loss": 0.294,
      "step": 926
    },
    {
      "epoch": 2.347275031685678,
      "grad_norm": 0.37864431738853455,
      "learning_rate": 0.00010625954198473282,
      "loss": 0.1894,
      "step": 927
    },
    {
      "epoch": 2.349809885931559,
      "grad_norm": 0.406448632478714,
      "learning_rate": 0.00010615776081424938,
      "loss": 0.1913,
      "step": 928
    },
    {
      "epoch": 2.3523447401774398,
      "grad_norm": 0.4278213381767273,
      "learning_rate": 0.0001060559796437659,
      "loss": 0.2136,
      "step": 929
    },
    {
      "epoch": 2.3548795944233207,
      "grad_norm": 0.3853738009929657,
      "learning_rate": 0.00010595419847328246,
      "loss": 0.213,
      "step": 930
    },
    {
      "epoch": 2.3574144486692017,
      "grad_norm": 0.3785664737224579,
      "learning_rate": 0.00010585241730279898,
      "loss": 0.22,
      "step": 931
    },
    {
      "epoch": 2.359949302915082,
      "grad_norm": 0.5863676071166992,
      "learning_rate": 0.00010575063613231553,
      "loss": 0.2305,
      "step": 932
    },
    {
      "epoch": 2.362484157160963,
      "grad_norm": 0.36629414558410645,
      "learning_rate": 0.00010564885496183206,
      "loss": 0.2041,
      "step": 933
    },
    {
      "epoch": 2.365019011406844,
      "grad_norm": 0.44699156284332275,
      "learning_rate": 0.0001055470737913486,
      "loss": 0.2763,
      "step": 934
    },
    {
      "epoch": 2.367553865652725,
      "grad_norm": 0.4775685667991638,
      "learning_rate": 0.00010544529262086515,
      "loss": 0.2779,
      "step": 935
    },
    {
      "epoch": 2.3700887198986056,
      "grad_norm": 0.3192265033721924,
      "learning_rate": 0.00010534351145038168,
      "loss": 0.1861,
      "step": 936
    },
    {
      "epoch": 2.3726235741444865,
      "grad_norm": 0.3589562177658081,
      "learning_rate": 0.00010524173027989823,
      "loss": 0.2266,
      "step": 937
    },
    {
      "epoch": 2.3751584283903675,
      "grad_norm": 0.36193573474884033,
      "learning_rate": 0.00010513994910941476,
      "loss": 0.2105,
      "step": 938
    },
    {
      "epoch": 2.3776932826362485,
      "grad_norm": 0.4141902029514313,
      "learning_rate": 0.00010503816793893131,
      "loss": 0.2676,
      "step": 939
    },
    {
      "epoch": 2.3802281368821294,
      "grad_norm": 0.3118525445461273,
      "learning_rate": 0.00010493638676844783,
      "loss": 0.1941,
      "step": 940
    },
    {
      "epoch": 2.3827629911280104,
      "grad_norm": 0.3232119679450989,
      "learning_rate": 0.00010483460559796439,
      "loss": 0.2065,
      "step": 941
    },
    {
      "epoch": 2.385297845373891,
      "grad_norm": 0.30440258979797363,
      "learning_rate": 0.00010473282442748091,
      "loss": 0.1834,
      "step": 942
    },
    {
      "epoch": 2.387832699619772,
      "grad_norm": 0.5841143131256104,
      "learning_rate": 0.00010463104325699747,
      "loss": 0.3785,
      "step": 943
    },
    {
      "epoch": 2.390367553865653,
      "grad_norm": 0.31851619482040405,
      "learning_rate": 0.00010452926208651399,
      "loss": 0.1798,
      "step": 944
    },
    {
      "epoch": 2.3929024081115338,
      "grad_norm": 0.3820517361164093,
      "learning_rate": 0.00010442748091603054,
      "loss": 0.2376,
      "step": 945
    },
    {
      "epoch": 2.3954372623574143,
      "grad_norm": 0.4379272758960724,
      "learning_rate": 0.00010432569974554708,
      "loss": 0.2356,
      "step": 946
    },
    {
      "epoch": 2.3979721166032952,
      "grad_norm": 0.3120323419570923,
      "learning_rate": 0.00010422391857506362,
      "loss": 0.1936,
      "step": 947
    },
    {
      "epoch": 2.400506970849176,
      "grad_norm": 0.3143107295036316,
      "learning_rate": 0.00010412213740458016,
      "loss": 0.184,
      "step": 948
    },
    {
      "epoch": 2.403041825095057,
      "grad_norm": 0.44618573784828186,
      "learning_rate": 0.00010402035623409669,
      "loss": 0.2468,
      "step": 949
    },
    {
      "epoch": 2.405576679340938,
      "grad_norm": 0.3838117718696594,
      "learning_rate": 0.00010391857506361324,
      "loss": 0.2276,
      "step": 950
    },
    {
      "epoch": 2.4081115335868186,
      "grad_norm": 0.3427219092845917,
      "learning_rate": 0.00010381679389312977,
      "loss": 0.2169,
      "step": 951
    },
    {
      "epoch": 2.4106463878326996,
      "grad_norm": 0.3738270699977875,
      "learning_rate": 0.00010371501272264632,
      "loss": 0.2447,
      "step": 952
    },
    {
      "epoch": 2.4131812420785805,
      "grad_norm": 0.33645015954971313,
      "learning_rate": 0.00010361323155216285,
      "loss": 0.1939,
      "step": 953
    },
    {
      "epoch": 2.4157160963244615,
      "grad_norm": 0.45420047640800476,
      "learning_rate": 0.0001035114503816794,
      "loss": 0.242,
      "step": 954
    },
    {
      "epoch": 2.418250950570342,
      "grad_norm": 0.47141382098197937,
      "learning_rate": 0.00010340966921119592,
      "loss": 0.2923,
      "step": 955
    },
    {
      "epoch": 2.420785804816223,
      "grad_norm": 0.42177528142929077,
      "learning_rate": 0.00010330788804071248,
      "loss": 0.2827,
      "step": 956
    },
    {
      "epoch": 2.423320659062104,
      "grad_norm": 0.409502774477005,
      "learning_rate": 0.000103206106870229,
      "loss": 0.2016,
      "step": 957
    },
    {
      "epoch": 2.425855513307985,
      "grad_norm": 0.47684770822525024,
      "learning_rate": 0.00010310432569974556,
      "loss": 0.2093,
      "step": 958
    },
    {
      "epoch": 2.428390367553866,
      "grad_norm": 0.3357095718383789,
      "learning_rate": 0.0001030025445292621,
      "loss": 0.1744,
      "step": 959
    },
    {
      "epoch": 2.4309252217997463,
      "grad_norm": 0.4120575487613678,
      "learning_rate": 0.00010290076335877863,
      "loss": 0.214,
      "step": 960
    },
    {
      "epoch": 2.4334600760456273,
      "grad_norm": 0.5090222954750061,
      "learning_rate": 0.00010279898218829517,
      "loss": 0.2427,
      "step": 961
    },
    {
      "epoch": 2.4359949302915083,
      "grad_norm": 0.4142550528049469,
      "learning_rate": 0.0001026972010178117,
      "loss": 0.2412,
      "step": 962
    },
    {
      "epoch": 2.4385297845373892,
      "grad_norm": 0.3446972966194153,
      "learning_rate": 0.00010259541984732825,
      "loss": 0.1952,
      "step": 963
    },
    {
      "epoch": 2.4410646387832697,
      "grad_norm": 0.37858110666275024,
      "learning_rate": 0.00010249363867684478,
      "loss": 0.1964,
      "step": 964
    },
    {
      "epoch": 2.4435994930291507,
      "grad_norm": 0.3989041745662689,
      "learning_rate": 0.00010239185750636133,
      "loss": 0.2115,
      "step": 965
    },
    {
      "epoch": 2.4461343472750317,
      "grad_norm": 0.3948146402835846,
      "learning_rate": 0.00010229007633587786,
      "loss": 0.2067,
      "step": 966
    },
    {
      "epoch": 2.4486692015209126,
      "grad_norm": 0.3683820068836212,
      "learning_rate": 0.00010218829516539441,
      "loss": 0.1881,
      "step": 967
    },
    {
      "epoch": 2.4512040557667936,
      "grad_norm": 0.36742380261421204,
      "learning_rate": 0.00010208651399491094,
      "loss": 0.2302,
      "step": 968
    },
    {
      "epoch": 2.453738910012674,
      "grad_norm": 0.32195988297462463,
      "learning_rate": 0.00010198473282442749,
      "loss": 0.1994,
      "step": 969
    },
    {
      "epoch": 2.456273764258555,
      "grad_norm": 0.42296963930130005,
      "learning_rate": 0.00010188295165394401,
      "loss": 0.2657,
      "step": 970
    },
    {
      "epoch": 2.458808618504436,
      "grad_norm": 0.3555774688720703,
      "learning_rate": 0.00010178117048346057,
      "loss": 0.1812,
      "step": 971
    },
    {
      "epoch": 2.461343472750317,
      "grad_norm": 0.6991668343544006,
      "learning_rate": 0.00010167938931297712,
      "loss": 0.4318,
      "step": 972
    },
    {
      "epoch": 2.463878326996198,
      "grad_norm": 0.4290355443954468,
      "learning_rate": 0.00010157760814249365,
      "loss": 0.1856,
      "step": 973
    },
    {
      "epoch": 2.4664131812420784,
      "grad_norm": 0.3479045331478119,
      "learning_rate": 0.00010147582697201018,
      "loss": 0.1844,
      "step": 974
    },
    {
      "epoch": 2.4689480354879594,
      "grad_norm": 0.3862701952457428,
      "learning_rate": 0.00010137404580152672,
      "loss": 0.2108,
      "step": 975
    },
    {
      "epoch": 2.4714828897338403,
      "grad_norm": 0.34411442279815674,
      "learning_rate": 0.00010127226463104326,
      "loss": 0.1851,
      "step": 976
    },
    {
      "epoch": 2.4740177439797213,
      "grad_norm": 0.2434609979391098,
      "learning_rate": 0.00010117048346055979,
      "loss": 0.1757,
      "step": 977
    },
    {
      "epoch": 2.4765525982256023,
      "grad_norm": 0.3341599106788635,
      "learning_rate": 0.00010106870229007634,
      "loss": 0.1879,
      "step": 978
    },
    {
      "epoch": 2.4790874524714828,
      "grad_norm": 0.27678003907203674,
      "learning_rate": 0.00010096692111959287,
      "loss": 0.1943,
      "step": 979
    },
    {
      "epoch": 2.4816223067173637,
      "grad_norm": 0.2388005256652832,
      "learning_rate": 0.00010086513994910942,
      "loss": 0.1804,
      "step": 980
    },
    {
      "epoch": 2.4841571609632447,
      "grad_norm": 0.5265661478042603,
      "learning_rate": 0.00010076335877862595,
      "loss": 0.2813,
      "step": 981
    },
    {
      "epoch": 2.4866920152091256,
      "grad_norm": 0.337007075548172,
      "learning_rate": 0.0001006615776081425,
      "loss": 0.1976,
      "step": 982
    },
    {
      "epoch": 2.489226869455006,
      "grad_norm": 0.42700427770614624,
      "learning_rate": 0.00010055979643765905,
      "loss": 0.2031,
      "step": 983
    },
    {
      "epoch": 2.491761723700887,
      "grad_norm": 0.3900333642959595,
      "learning_rate": 0.00010045801526717558,
      "loss": 0.2178,
      "step": 984
    },
    {
      "epoch": 2.494296577946768,
      "grad_norm": 0.45332932472229004,
      "learning_rate": 0.00010035623409669213,
      "loss": 0.2537,
      "step": 985
    },
    {
      "epoch": 2.496831432192649,
      "grad_norm": 0.30331265926361084,
      "learning_rate": 0.00010025445292620866,
      "loss": 0.2074,
      "step": 986
    },
    {
      "epoch": 2.49936628643853,
      "grad_norm": 0.3379949927330017,
      "learning_rate": 0.0001001526717557252,
      "loss": 0.1768,
      "step": 987
    },
    {
      "epoch": 2.5019011406844105,
      "grad_norm": 0.40859973430633545,
      "learning_rate": 0.00010005089058524174,
      "loss": 0.1984,
      "step": 988
    },
    {
      "epoch": 2.5044359949302915,
      "grad_norm": 0.3993757963180542,
      "learning_rate": 9.994910941475827e-05,
      "loss": 0.2162,
      "step": 989
    },
    {
      "epoch": 2.5069708491761724,
      "grad_norm": 0.5887713432312012,
      "learning_rate": 9.984732824427481e-05,
      "loss": 0.2806,
      "step": 990
    },
    {
      "epoch": 2.5095057034220534,
      "grad_norm": 0.3590678572654724,
      "learning_rate": 9.974554707379135e-05,
      "loss": 0.2045,
      "step": 991
    },
    {
      "epoch": 2.512040557667934,
      "grad_norm": 0.3090289831161499,
      "learning_rate": 9.964376590330789e-05,
      "loss": 0.2151,
      "step": 992
    },
    {
      "epoch": 2.514575411913815,
      "grad_norm": 0.42125657200813293,
      "learning_rate": 9.954198473282443e-05,
      "loss": 0.2277,
      "step": 993
    },
    {
      "epoch": 2.517110266159696,
      "grad_norm": 0.3213401734828949,
      "learning_rate": 9.944020356234097e-05,
      "loss": 0.1927,
      "step": 994
    },
    {
      "epoch": 2.5196451204055768,
      "grad_norm": 0.4558688998222351,
      "learning_rate": 9.933842239185751e-05,
      "loss": 0.2418,
      "step": 995
    },
    {
      "epoch": 2.5221799746514577,
      "grad_norm": 0.5181113481521606,
      "learning_rate": 9.923664122137405e-05,
      "loss": 0.2955,
      "step": 996
    },
    {
      "epoch": 2.5247148288973387,
      "grad_norm": 0.409424751996994,
      "learning_rate": 9.913486005089059e-05,
      "loss": 0.226,
      "step": 997
    },
    {
      "epoch": 2.527249683143219,
      "grad_norm": 0.44536876678466797,
      "learning_rate": 9.903307888040713e-05,
      "loss": 0.2412,
      "step": 998
    },
    {
      "epoch": 2.5297845373891,
      "grad_norm": 0.5028473734855652,
      "learning_rate": 9.893129770992367e-05,
      "loss": 0.2658,
      "step": 999
    },
    {
      "epoch": 2.532319391634981,
      "grad_norm": 0.3157128691673279,
      "learning_rate": 9.882951653944021e-05,
      "loss": 0.1939,
      "step": 1000
    },
    {
      "epoch": 2.5348542458808616,
      "grad_norm": 0.3184659481048584,
      "learning_rate": 9.872773536895676e-05,
      "loss": 0.2113,
      "step": 1001
    },
    {
      "epoch": 2.5373891001267426,
      "grad_norm": 0.5658953785896301,
      "learning_rate": 9.862595419847329e-05,
      "loss": 0.2641,
      "step": 1002
    },
    {
      "epoch": 2.5399239543726235,
      "grad_norm": 0.5306189060211182,
      "learning_rate": 9.852417302798982e-05,
      "loss": 0.2495,
      "step": 1003
    },
    {
      "epoch": 2.5424588086185045,
      "grad_norm": 0.5272448062896729,
      "learning_rate": 9.842239185750636e-05,
      "loss": 0.2212,
      "step": 1004
    },
    {
      "epoch": 2.5449936628643854,
      "grad_norm": 0.3216992914676666,
      "learning_rate": 9.83206106870229e-05,
      "loss": 0.2284,
      "step": 1005
    },
    {
      "epoch": 2.5475285171102664,
      "grad_norm": 0.3573670983314514,
      "learning_rate": 9.821882951653944e-05,
      "loss": 0.2568,
      "step": 1006
    },
    {
      "epoch": 2.550063371356147,
      "grad_norm": 0.4088655710220337,
      "learning_rate": 9.811704834605598e-05,
      "loss": 0.2033,
      "step": 1007
    },
    {
      "epoch": 2.552598225602028,
      "grad_norm": 0.33729737997055054,
      "learning_rate": 9.801526717557252e-05,
      "loss": 0.1843,
      "step": 1008
    },
    {
      "epoch": 2.555133079847909,
      "grad_norm": 0.3298558294773102,
      "learning_rate": 9.791348600508906e-05,
      "loss": 0.193,
      "step": 1009
    },
    {
      "epoch": 2.5576679340937893,
      "grad_norm": 0.33454427123069763,
      "learning_rate": 9.78117048346056e-05,
      "loss": 0.1823,
      "step": 1010
    },
    {
      "epoch": 2.5602027883396703,
      "grad_norm": 0.3466435670852661,
      "learning_rate": 9.770992366412214e-05,
      "loss": 0.2204,
      "step": 1011
    },
    {
      "epoch": 2.5627376425855513,
      "grad_norm": 0.3551004230976105,
      "learning_rate": 9.760814249363868e-05,
      "loss": 0.2027,
      "step": 1012
    },
    {
      "epoch": 2.565272496831432,
      "grad_norm": 0.4317062795162201,
      "learning_rate": 9.750636132315523e-05,
      "loss": 0.2099,
      "step": 1013
    },
    {
      "epoch": 2.567807351077313,
      "grad_norm": 0.5695217847824097,
      "learning_rate": 9.740458015267177e-05,
      "loss": 0.2547,
      "step": 1014
    },
    {
      "epoch": 2.570342205323194,
      "grad_norm": 0.4523742198944092,
      "learning_rate": 9.730279898218831e-05,
      "loss": 0.2501,
      "step": 1015
    },
    {
      "epoch": 2.5728770595690746,
      "grad_norm": 0.3191470503807068,
      "learning_rate": 9.720101781170484e-05,
      "loss": 0.1918,
      "step": 1016
    },
    {
      "epoch": 2.5754119138149556,
      "grad_norm": 0.36234062910079956,
      "learning_rate": 9.709923664122138e-05,
      "loss": 0.2081,
      "step": 1017
    },
    {
      "epoch": 2.5779467680608366,
      "grad_norm": 0.42196425795555115,
      "learning_rate": 9.699745547073791e-05,
      "loss": 0.2801,
      "step": 1018
    },
    {
      "epoch": 2.5804816223067175,
      "grad_norm": 0.3382538855075836,
      "learning_rate": 9.689567430025445e-05,
      "loss": 0.221,
      "step": 1019
    },
    {
      "epoch": 2.583016476552598,
      "grad_norm": 0.5736209750175476,
      "learning_rate": 9.679389312977099e-05,
      "loss": 0.2684,
      "step": 1020
    },
    {
      "epoch": 2.585551330798479,
      "grad_norm": 0.4692763686180115,
      "learning_rate": 9.669211195928753e-05,
      "loss": 0.244,
      "step": 1021
    },
    {
      "epoch": 2.58808618504436,
      "grad_norm": 0.4888627827167511,
      "learning_rate": 9.659033078880407e-05,
      "loss": 0.2493,
      "step": 1022
    },
    {
      "epoch": 2.590621039290241,
      "grad_norm": 0.29745686054229736,
      "learning_rate": 9.648854961832061e-05,
      "loss": 0.1757,
      "step": 1023
    },
    {
      "epoch": 2.593155893536122,
      "grad_norm": 0.476639062166214,
      "learning_rate": 9.638676844783715e-05,
      "loss": 0.2031,
      "step": 1024
    },
    {
      "epoch": 2.5956907477820024,
      "grad_norm": 0.4214845895767212,
      "learning_rate": 9.628498727735369e-05,
      "loss": 0.2588,
      "step": 1025
    },
    {
      "epoch": 2.5982256020278833,
      "grad_norm": 0.3036046326160431,
      "learning_rate": 9.618320610687024e-05,
      "loss": 0.2031,
      "step": 1026
    },
    {
      "epoch": 2.6007604562737643,
      "grad_norm": 0.7941879630088806,
      "learning_rate": 9.608142493638678e-05,
      "loss": 0.2096,
      "step": 1027
    },
    {
      "epoch": 2.6032953105196452,
      "grad_norm": 0.36381933093070984,
      "learning_rate": 9.597964376590332e-05,
      "loss": 0.2102,
      "step": 1028
    },
    {
      "epoch": 2.6058301647655258,
      "grad_norm": 0.3213381767272949,
      "learning_rate": 9.587786259541986e-05,
      "loss": 0.1884,
      "step": 1029
    },
    {
      "epoch": 2.6083650190114067,
      "grad_norm": 0.38559427857398987,
      "learning_rate": 9.577608142493639e-05,
      "loss": 0.2229,
      "step": 1030
    },
    {
      "epoch": 2.6108998732572877,
      "grad_norm": 0.4000662863254547,
      "learning_rate": 9.567430025445293e-05,
      "loss": 0.198,
      "step": 1031
    },
    {
      "epoch": 2.6134347275031686,
      "grad_norm": 0.3635396659374237,
      "learning_rate": 9.557251908396946e-05,
      "loss": 0.2267,
      "step": 1032
    },
    {
      "epoch": 2.6159695817490496,
      "grad_norm": 0.31810763478279114,
      "learning_rate": 9.5470737913486e-05,
      "loss": 0.1691,
      "step": 1033
    },
    {
      "epoch": 2.6185044359949305,
      "grad_norm": 0.29606062173843384,
      "learning_rate": 9.536895674300254e-05,
      "loss": 0.1834,
      "step": 1034
    },
    {
      "epoch": 2.621039290240811,
      "grad_norm": 0.3528769612312317,
      "learning_rate": 9.526717557251908e-05,
      "loss": 0.2086,
      "step": 1035
    },
    {
      "epoch": 2.623574144486692,
      "grad_norm": 0.4795662760734558,
      "learning_rate": 9.516539440203562e-05,
      "loss": 0.2429,
      "step": 1036
    },
    {
      "epoch": 2.626108998732573,
      "grad_norm": 0.4627299904823303,
      "learning_rate": 9.506361323155216e-05,
      "loss": 0.1956,
      "step": 1037
    },
    {
      "epoch": 2.6286438529784535,
      "grad_norm": 0.3330387473106384,
      "learning_rate": 9.496183206106871e-05,
      "loss": 0.1891,
      "step": 1038
    },
    {
      "epoch": 2.6311787072243344,
      "grad_norm": 0.4265390634536743,
      "learning_rate": 9.486005089058525e-05,
      "loss": 0.2086,
      "step": 1039
    },
    {
      "epoch": 2.6337135614702154,
      "grad_norm": 0.37214142084121704,
      "learning_rate": 9.475826972010179e-05,
      "loss": 0.2321,
      "step": 1040
    },
    {
      "epoch": 2.6362484157160964,
      "grad_norm": 0.4183201491832733,
      "learning_rate": 9.465648854961833e-05,
      "loss": 0.2029,
      "step": 1041
    },
    {
      "epoch": 2.6387832699619773,
      "grad_norm": 0.5688794851303101,
      "learning_rate": 9.455470737913487e-05,
      "loss": 0.2481,
      "step": 1042
    },
    {
      "epoch": 2.6413181242078583,
      "grad_norm": 0.38355833292007446,
      "learning_rate": 9.445292620865141e-05,
      "loss": 0.1989,
      "step": 1043
    },
    {
      "epoch": 2.643852978453739,
      "grad_norm": 0.4998534023761749,
      "learning_rate": 9.435114503816794e-05,
      "loss": 0.2272,
      "step": 1044
    },
    {
      "epoch": 2.6463878326996197,
      "grad_norm": 0.2796792685985565,
      "learning_rate": 9.424936386768448e-05,
      "loss": 0.1694,
      "step": 1045
    },
    {
      "epoch": 2.6489226869455007,
      "grad_norm": 0.30551543831825256,
      "learning_rate": 9.414758269720102e-05,
      "loss": 0.1782,
      "step": 1046
    },
    {
      "epoch": 2.6514575411913817,
      "grad_norm": 0.3933429718017578,
      "learning_rate": 9.404580152671755e-05,
      "loss": 0.272,
      "step": 1047
    },
    {
      "epoch": 2.653992395437262,
      "grad_norm": 0.3543720841407776,
      "learning_rate": 9.39440203562341e-05,
      "loss": 0.2271,
      "step": 1048
    },
    {
      "epoch": 2.656527249683143,
      "grad_norm": 0.2716831564903259,
      "learning_rate": 9.384223918575063e-05,
      "loss": 0.1898,
      "step": 1049
    },
    {
      "epoch": 2.659062103929024,
      "grad_norm": 0.3037743866443634,
      "learning_rate": 9.374045801526719e-05,
      "loss": 0.1911,
      "step": 1050
    },
    {
      "epoch": 2.661596958174905,
      "grad_norm": 0.4390093982219696,
      "learning_rate": 9.363867684478373e-05,
      "loss": 0.2369,
      "step": 1051
    },
    {
      "epoch": 2.664131812420786,
      "grad_norm": 0.3383953273296356,
      "learning_rate": 9.353689567430026e-05,
      "loss": 0.2519,
      "step": 1052
    },
    {
      "epoch": 2.6666666666666665,
      "grad_norm": 0.28227975964546204,
      "learning_rate": 9.34351145038168e-05,
      "loss": 0.1926,
      "step": 1053
    },
    {
      "epoch": 2.6692015209125475,
      "grad_norm": 0.33451253175735474,
      "learning_rate": 9.333333333333334e-05,
      "loss": 0.1864,
      "step": 1054
    },
    {
      "epoch": 2.6717363751584284,
      "grad_norm": 0.4116145372390747,
      "learning_rate": 9.323155216284988e-05,
      "loss": 0.2462,
      "step": 1055
    },
    {
      "epoch": 2.6742712294043094,
      "grad_norm": 0.43822887539863586,
      "learning_rate": 9.312977099236642e-05,
      "loss": 0.2014,
      "step": 1056
    },
    {
      "epoch": 2.67680608365019,
      "grad_norm": 0.4394984841346741,
      "learning_rate": 9.302798982188296e-05,
      "loss": 0.2378,
      "step": 1057
    },
    {
      "epoch": 2.679340937896071,
      "grad_norm": 0.4073251783847809,
      "learning_rate": 9.292620865139949e-05,
      "loss": 0.2711,
      "step": 1058
    },
    {
      "epoch": 2.681875792141952,
      "grad_norm": 0.3316657841205597,
      "learning_rate": 9.282442748091603e-05,
      "loss": 0.214,
      "step": 1059
    },
    {
      "epoch": 2.6844106463878328,
      "grad_norm": 0.2994216978549957,
      "learning_rate": 9.272264631043257e-05,
      "loss": 0.1838,
      "step": 1060
    },
    {
      "epoch": 2.6869455006337137,
      "grad_norm": 0.5388765335083008,
      "learning_rate": 9.26208651399491e-05,
      "loss": 0.277,
      "step": 1061
    },
    {
      "epoch": 2.6894803548795947,
      "grad_norm": 0.3714945912361145,
      "learning_rate": 9.251908396946566e-05,
      "loss": 0.2428,
      "step": 1062
    },
    {
      "epoch": 2.692015209125475,
      "grad_norm": 0.32202383875846863,
      "learning_rate": 9.24173027989822e-05,
      "loss": 0.2063,
      "step": 1063
    },
    {
      "epoch": 2.694550063371356,
      "grad_norm": 0.4116881191730499,
      "learning_rate": 9.231552162849874e-05,
      "loss": 0.2661,
      "step": 1064
    },
    {
      "epoch": 2.697084917617237,
      "grad_norm": 0.36626386642456055,
      "learning_rate": 9.221374045801528e-05,
      "loss": 0.2897,
      "step": 1065
    },
    {
      "epoch": 2.6996197718631176,
      "grad_norm": 0.33859655261039734,
      "learning_rate": 9.211195928753181e-05,
      "loss": 0.1959,
      "step": 1066
    },
    {
      "epoch": 2.7021546261089986,
      "grad_norm": 0.38263705372810364,
      "learning_rate": 9.201017811704835e-05,
      "loss": 0.2827,
      "step": 1067
    },
    {
      "epoch": 2.7046894803548795,
      "grad_norm": 0.3557961583137512,
      "learning_rate": 9.19083969465649e-05,
      "loss": 0.176,
      "step": 1068
    },
    {
      "epoch": 2.7072243346007605,
      "grad_norm": 0.35334861278533936,
      "learning_rate": 9.180661577608143e-05,
      "loss": 0.2183,
      "step": 1069
    },
    {
      "epoch": 2.7097591888466415,
      "grad_norm": 0.4672026038169861,
      "learning_rate": 9.170483460559797e-05,
      "loss": 0.2715,
      "step": 1070
    },
    {
      "epoch": 2.7122940430925224,
      "grad_norm": 0.41585099697113037,
      "learning_rate": 9.160305343511451e-05,
      "loss": 0.1912,
      "step": 1071
    },
    {
      "epoch": 2.714828897338403,
      "grad_norm": 0.54674232006073,
      "learning_rate": 9.150127226463104e-05,
      "loss": 0.2493,
      "step": 1072
    },
    {
      "epoch": 2.717363751584284,
      "grad_norm": 0.30595988035202026,
      "learning_rate": 9.139949109414758e-05,
      "loss": 0.1843,
      "step": 1073
    },
    {
      "epoch": 2.719898605830165,
      "grad_norm": 0.3521415889263153,
      "learning_rate": 9.129770992366413e-05,
      "loss": 0.2047,
      "step": 1074
    },
    {
      "epoch": 2.7224334600760454,
      "grad_norm": 0.47393590211868286,
      "learning_rate": 9.119592875318067e-05,
      "loss": 0.3398,
      "step": 1075
    },
    {
      "epoch": 2.7249683143219263,
      "grad_norm": 0.4672793745994568,
      "learning_rate": 9.109414758269721e-05,
      "loss": 0.3569,
      "step": 1076
    },
    {
      "epoch": 2.7275031685678073,
      "grad_norm": 0.41231435537338257,
      "learning_rate": 9.099236641221375e-05,
      "loss": 0.2323,
      "step": 1077
    },
    {
      "epoch": 2.7300380228136882,
      "grad_norm": 0.36700156331062317,
      "learning_rate": 9.089058524173029e-05,
      "loss": 0.2023,
      "step": 1078
    },
    {
      "epoch": 2.732572877059569,
      "grad_norm": 0.32198184728622437,
      "learning_rate": 9.078880407124683e-05,
      "loss": 0.1814,
      "step": 1079
    },
    {
      "epoch": 2.73510773130545,
      "grad_norm": 0.46826303005218506,
      "learning_rate": 9.068702290076337e-05,
      "loss": 0.2216,
      "step": 1080
    },
    {
      "epoch": 2.7376425855513307,
      "grad_norm": 0.3026100695133209,
      "learning_rate": 9.05852417302799e-05,
      "loss": 0.1826,
      "step": 1081
    },
    {
      "epoch": 2.7401774397972116,
      "grad_norm": 0.2897210717201233,
      "learning_rate": 9.048346055979644e-05,
      "loss": 0.1853,
      "step": 1082
    },
    {
      "epoch": 2.7427122940430926,
      "grad_norm": 0.296286940574646,
      "learning_rate": 9.038167938931298e-05,
      "loss": 0.1776,
      "step": 1083
    },
    {
      "epoch": 2.7452471482889735,
      "grad_norm": 0.374600887298584,
      "learning_rate": 9.027989821882952e-05,
      "loss": 0.2031,
      "step": 1084
    },
    {
      "epoch": 2.747782002534854,
      "grad_norm": 0.5333495140075684,
      "learning_rate": 9.017811704834606e-05,
      "loss": 0.2798,
      "step": 1085
    },
    {
      "epoch": 2.750316856780735,
      "grad_norm": 0.43342864513397217,
      "learning_rate": 9.007633587786259e-05,
      "loss": 0.2063,
      "step": 1086
    },
    {
      "epoch": 2.752851711026616,
      "grad_norm": 0.5283639430999756,
      "learning_rate": 8.997455470737914e-05,
      "loss": 0.25,
      "step": 1087
    },
    {
      "epoch": 2.755386565272497,
      "grad_norm": 0.556190013885498,
      "learning_rate": 8.987277353689568e-05,
      "loss": 0.2044,
      "step": 1088
    },
    {
      "epoch": 2.757921419518378,
      "grad_norm": 0.35083258152008057,
      "learning_rate": 8.977099236641222e-05,
      "loss": 0.188,
      "step": 1089
    },
    {
      "epoch": 2.7604562737642584,
      "grad_norm": 0.42917102575302124,
      "learning_rate": 8.966921119592876e-05,
      "loss": 0.2511,
      "step": 1090
    },
    {
      "epoch": 2.7629911280101394,
      "grad_norm": 0.5665780305862427,
      "learning_rate": 8.95674300254453e-05,
      "loss": 0.3307,
      "step": 1091
    },
    {
      "epoch": 2.7655259822560203,
      "grad_norm": 0.40193435549736023,
      "learning_rate": 8.946564885496184e-05,
      "loss": 0.2453,
      "step": 1092
    },
    {
      "epoch": 2.7680608365019013,
      "grad_norm": 0.46344733238220215,
      "learning_rate": 8.936386768447838e-05,
      "loss": 0.2096,
      "step": 1093
    },
    {
      "epoch": 2.770595690747782,
      "grad_norm": 0.4600921869277954,
      "learning_rate": 8.926208651399492e-05,
      "loss": 0.2161,
      "step": 1094
    },
    {
      "epoch": 2.7731305449936627,
      "grad_norm": 0.46053385734558105,
      "learning_rate": 8.916030534351145e-05,
      "loss": 0.2369,
      "step": 1095
    },
    {
      "epoch": 2.7756653992395437,
      "grad_norm": 0.45449280738830566,
      "learning_rate": 8.9058524173028e-05,
      "loss": 0.2344,
      "step": 1096
    },
    {
      "epoch": 2.7782002534854247,
      "grad_norm": 0.39411383867263794,
      "learning_rate": 8.895674300254453e-05,
      "loss": 0.2082,
      "step": 1097
    },
    {
      "epoch": 2.7807351077313056,
      "grad_norm": 0.38967519998550415,
      "learning_rate": 8.885496183206107e-05,
      "loss": 0.2264,
      "step": 1098
    },
    {
      "epoch": 2.7832699619771866,
      "grad_norm": 0.3357069194316864,
      "learning_rate": 8.875318066157761e-05,
      "loss": 0.1896,
      "step": 1099
    },
    {
      "epoch": 2.785804816223067,
      "grad_norm": 0.4941220283508301,
      "learning_rate": 8.865139949109415e-05,
      "loss": 0.3003,
      "step": 1100
    },
    {
      "epoch": 2.788339670468948,
      "grad_norm": 0.3897833526134491,
      "learning_rate": 8.854961832061069e-05,
      "loss": 0.1907,
      "step": 1101
    },
    {
      "epoch": 2.790874524714829,
      "grad_norm": 0.4247800409793854,
      "learning_rate": 8.844783715012723e-05,
      "loss": 0.1843,
      "step": 1102
    },
    {
      "epoch": 2.7934093789607095,
      "grad_norm": 0.46850237250328064,
      "learning_rate": 8.834605597964377e-05,
      "loss": 0.2501,
      "step": 1103
    },
    {
      "epoch": 2.7959442332065905,
      "grad_norm": 0.4753093421459198,
      "learning_rate": 8.824427480916031e-05,
      "loss": 0.2277,
      "step": 1104
    },
    {
      "epoch": 2.7984790874524714,
      "grad_norm": 0.3235141932964325,
      "learning_rate": 8.814249363867685e-05,
      "loss": 0.1817,
      "step": 1105
    },
    {
      "epoch": 2.8010139416983524,
      "grad_norm": 0.48403674364089966,
      "learning_rate": 8.804071246819339e-05,
      "loss": 0.2278,
      "step": 1106
    },
    {
      "epoch": 2.8035487959442333,
      "grad_norm": 0.30417025089263916,
      "learning_rate": 8.793893129770993e-05,
      "loss": 0.1867,
      "step": 1107
    },
    {
      "epoch": 2.8060836501901143,
      "grad_norm": 0.30289140343666077,
      "learning_rate": 8.783715012722647e-05,
      "loss": 0.1898,
      "step": 1108
    },
    {
      "epoch": 2.808618504435995,
      "grad_norm": 0.47156116366386414,
      "learning_rate": 8.7735368956743e-05,
      "loss": 0.2381,
      "step": 1109
    },
    {
      "epoch": 2.8111533586818758,
      "grad_norm": 0.4420924186706543,
      "learning_rate": 8.763358778625954e-05,
      "loss": 0.251,
      "step": 1110
    },
    {
      "epoch": 2.8136882129277567,
      "grad_norm": 0.42235851287841797,
      "learning_rate": 8.75318066157761e-05,
      "loss": 0.2007,
      "step": 1111
    },
    {
      "epoch": 2.8162230671736372,
      "grad_norm": 0.40069061517715454,
      "learning_rate": 8.743002544529262e-05,
      "loss": 0.2052,
      "step": 1112
    },
    {
      "epoch": 2.818757921419518,
      "grad_norm": 0.5213333368301392,
      "learning_rate": 8.732824427480916e-05,
      "loss": 0.2236,
      "step": 1113
    },
    {
      "epoch": 2.821292775665399,
      "grad_norm": 0.3919121026992798,
      "learning_rate": 8.72264631043257e-05,
      "loss": 0.2338,
      "step": 1114
    },
    {
      "epoch": 2.82382762991128,
      "grad_norm": 0.4295049011707306,
      "learning_rate": 8.712468193384224e-05,
      "loss": 0.2713,
      "step": 1115
    },
    {
      "epoch": 2.826362484157161,
      "grad_norm": 0.25834596157073975,
      "learning_rate": 8.702290076335878e-05,
      "loss": 0.1701,
      "step": 1116
    },
    {
      "epoch": 2.828897338403042,
      "grad_norm": 0.36217084527015686,
      "learning_rate": 8.692111959287532e-05,
      "loss": 0.1963,
      "step": 1117
    },
    {
      "epoch": 2.8314321926489225,
      "grad_norm": 0.39089757204055786,
      "learning_rate": 8.681933842239186e-05,
      "loss": 0.186,
      "step": 1118
    },
    {
      "epoch": 2.8339670468948035,
      "grad_norm": 0.45900896191596985,
      "learning_rate": 8.67175572519084e-05,
      "loss": 0.22,
      "step": 1119
    },
    {
      "epoch": 2.8365019011406845,
      "grad_norm": 0.2946614623069763,
      "learning_rate": 8.661577608142494e-05,
      "loss": 0.1771,
      "step": 1120
    },
    {
      "epoch": 2.8390367553865654,
      "grad_norm": 0.4160090982913971,
      "learning_rate": 8.651399491094148e-05,
      "loss": 0.2083,
      "step": 1121
    },
    {
      "epoch": 2.841571609632446,
      "grad_norm": 0.43507587909698486,
      "learning_rate": 8.641221374045802e-05,
      "loss": 0.2595,
      "step": 1122
    },
    {
      "epoch": 2.844106463878327,
      "grad_norm": 0.449813574552536,
      "learning_rate": 8.631043256997457e-05,
      "loss": 0.2982,
      "step": 1123
    },
    {
      "epoch": 2.846641318124208,
      "grad_norm": 0.33715054392814636,
      "learning_rate": 8.620865139949111e-05,
      "loss": 0.1851,
      "step": 1124
    },
    {
      "epoch": 2.849176172370089,
      "grad_norm": 0.4767422676086426,
      "learning_rate": 8.610687022900765e-05,
      "loss": 0.2865,
      "step": 1125
    },
    {
      "epoch": 2.8517110266159698,
      "grad_norm": 0.4232870042324066,
      "learning_rate": 8.600508905852417e-05,
      "loss": 0.2355,
      "step": 1126
    },
    {
      "epoch": 2.8542458808618507,
      "grad_norm": 0.286565363407135,
      "learning_rate": 8.590330788804071e-05,
      "loss": 0.188,
      "step": 1127
    },
    {
      "epoch": 2.8567807351077312,
      "grad_norm": 0.304606169462204,
      "learning_rate": 8.580152671755725e-05,
      "loss": 0.2367,
      "step": 1128
    },
    {
      "epoch": 2.859315589353612,
      "grad_norm": 0.4730917811393738,
      "learning_rate": 8.569974554707379e-05,
      "loss": 0.2925,
      "step": 1129
    },
    {
      "epoch": 2.861850443599493,
      "grad_norm": 0.348651647567749,
      "learning_rate": 8.559796437659033e-05,
      "loss": 0.242,
      "step": 1130
    },
    {
      "epoch": 2.8643852978453737,
      "grad_norm": 0.31156882643699646,
      "learning_rate": 8.549618320610687e-05,
      "loss": 0.1865,
      "step": 1131
    },
    {
      "epoch": 2.8669201520912546,
      "grad_norm": 0.4416813254356384,
      "learning_rate": 8.539440203562341e-05,
      "loss": 0.311,
      "step": 1132
    },
    {
      "epoch": 2.8694550063371356,
      "grad_norm": 0.2997666895389557,
      "learning_rate": 8.529262086513995e-05,
      "loss": 0.1956,
      "step": 1133
    },
    {
      "epoch": 2.8719898605830165,
      "grad_norm": 0.30020904541015625,
      "learning_rate": 8.519083969465649e-05,
      "loss": 0.206,
      "step": 1134
    },
    {
      "epoch": 2.8745247148288975,
      "grad_norm": 0.4457029104232788,
      "learning_rate": 8.508905852417304e-05,
      "loss": 0.2422,
      "step": 1135
    },
    {
      "epoch": 2.8770595690747784,
      "grad_norm": 0.3519587218761444,
      "learning_rate": 8.498727735368958e-05,
      "loss": 0.2277,
      "step": 1136
    },
    {
      "epoch": 2.879594423320659,
      "grad_norm": 0.3482111394405365,
      "learning_rate": 8.488549618320612e-05,
      "loss": 0.1981,
      "step": 1137
    },
    {
      "epoch": 2.88212927756654,
      "grad_norm": 0.31978392601013184,
      "learning_rate": 8.478371501272266e-05,
      "loss": 0.1849,
      "step": 1138
    },
    {
      "epoch": 2.884664131812421,
      "grad_norm": 0.2380414754152298,
      "learning_rate": 8.46819338422392e-05,
      "loss": 0.1619,
      "step": 1139
    },
    {
      "epoch": 2.8871989860583014,
      "grad_norm": 0.25577735900878906,
      "learning_rate": 8.458015267175572e-05,
      "loss": 0.1594,
      "step": 1140
    },
    {
      "epoch": 2.8897338403041823,
      "grad_norm": 0.36093661189079285,
      "learning_rate": 8.447837150127226e-05,
      "loss": 0.1937,
      "step": 1141
    },
    {
      "epoch": 2.8922686945500633,
      "grad_norm": 0.3542689085006714,
      "learning_rate": 8.43765903307888e-05,
      "loss": 0.2219,
      "step": 1142
    },
    {
      "epoch": 2.8948035487959443,
      "grad_norm": 0.3966139853000641,
      "learning_rate": 8.427480916030534e-05,
      "loss": 0.2427,
      "step": 1143
    },
    {
      "epoch": 2.897338403041825,
      "grad_norm": 0.3684738278388977,
      "learning_rate": 8.417302798982188e-05,
      "loss": 0.2093,
      "step": 1144
    },
    {
      "epoch": 2.899873257287706,
      "grad_norm": 0.430477499961853,
      "learning_rate": 8.407124681933842e-05,
      "loss": 0.2266,
      "step": 1145
    },
    {
      "epoch": 2.9024081115335867,
      "grad_norm": 0.32896652817726135,
      "learning_rate": 8.396946564885496e-05,
      "loss": 0.2447,
      "step": 1146
    },
    {
      "epoch": 2.9049429657794676,
      "grad_norm": 0.45568832755088806,
      "learning_rate": 8.38676844783715e-05,
      "loss": 0.2251,
      "step": 1147
    },
    {
      "epoch": 2.9074778200253486,
      "grad_norm": 0.48290732502937317,
      "learning_rate": 8.376590330788805e-05,
      "loss": 0.2471,
      "step": 1148
    },
    {
      "epoch": 2.9100126742712296,
      "grad_norm": 0.40795937180519104,
      "learning_rate": 8.366412213740459e-05,
      "loss": 0.2031,
      "step": 1149
    },
    {
      "epoch": 2.91254752851711,
      "grad_norm": 0.362835168838501,
      "learning_rate": 8.356234096692113e-05,
      "loss": 0.1991,
      "step": 1150
    },
    {
      "epoch": 2.915082382762991,
      "grad_norm": 0.38601744174957275,
      "learning_rate": 8.346055979643767e-05,
      "loss": 0.1821,
      "step": 1151
    },
    {
      "epoch": 2.917617237008872,
      "grad_norm": 0.2641182541847229,
      "learning_rate": 8.335877862595421e-05,
      "loss": 0.16,
      "step": 1152
    },
    {
      "epoch": 2.920152091254753,
      "grad_norm": 0.5600478053092957,
      "learning_rate": 8.325699745547075e-05,
      "loss": 0.2476,
      "step": 1153
    },
    {
      "epoch": 2.922686945500634,
      "grad_norm": 0.3873019516468048,
      "learning_rate": 8.315521628498727e-05,
      "loss": 0.2264,
      "step": 1154
    },
    {
      "epoch": 2.9252217997465144,
      "grad_norm": 0.2946743667125702,
      "learning_rate": 8.305343511450381e-05,
      "loss": 0.1776,
      "step": 1155
    },
    {
      "epoch": 2.9277566539923954,
      "grad_norm": 0.3886416554450989,
      "learning_rate": 8.295165394402035e-05,
      "loss": 0.2123,
      "step": 1156
    },
    {
      "epoch": 2.9302915082382763,
      "grad_norm": 0.39706671237945557,
      "learning_rate": 8.284987277353689e-05,
      "loss": 0.2319,
      "step": 1157
    },
    {
      "epoch": 2.9328263624841573,
      "grad_norm": 0.30693602561950684,
      "learning_rate": 8.274809160305343e-05,
      "loss": 0.1939,
      "step": 1158
    },
    {
      "epoch": 2.935361216730038,
      "grad_norm": 0.37277474999427795,
      "learning_rate": 8.264631043256997e-05,
      "loss": 0.2194,
      "step": 1159
    },
    {
      "epoch": 2.9378960709759188,
      "grad_norm": 0.442508727312088,
      "learning_rate": 8.254452926208652e-05,
      "loss": 0.2142,
      "step": 1160
    },
    {
      "epoch": 2.9404309252217997,
      "grad_norm": 0.275898814201355,
      "learning_rate": 8.244274809160306e-05,
      "loss": 0.1791,
      "step": 1161
    },
    {
      "epoch": 2.9429657794676807,
      "grad_norm": 0.4033918082714081,
      "learning_rate": 8.23409669211196e-05,
      "loss": 0.295,
      "step": 1162
    },
    {
      "epoch": 2.9455006337135616,
      "grad_norm": 0.46713244915008545,
      "learning_rate": 8.223918575063614e-05,
      "loss": 0.2662,
      "step": 1163
    },
    {
      "epoch": 2.9480354879594426,
      "grad_norm": 0.37975406646728516,
      "learning_rate": 8.213740458015268e-05,
      "loss": 0.1915,
      "step": 1164
    },
    {
      "epoch": 2.950570342205323,
      "grad_norm": 0.31382545828819275,
      "learning_rate": 8.203562340966922e-05,
      "loss": 0.1793,
      "step": 1165
    },
    {
      "epoch": 2.953105196451204,
      "grad_norm": 0.42415499687194824,
      "learning_rate": 8.193384223918576e-05,
      "loss": 0.2375,
      "step": 1166
    },
    {
      "epoch": 2.955640050697085,
      "grad_norm": 0.4227803647518158,
      "learning_rate": 8.18320610687023e-05,
      "loss": 0.213,
      "step": 1167
    },
    {
      "epoch": 2.9581749049429655,
      "grad_norm": 0.3395853638648987,
      "learning_rate": 8.173027989821882e-05,
      "loss": 0.1942,
      "step": 1168
    },
    {
      "epoch": 2.9607097591888465,
      "grad_norm": 0.4627746641635895,
      "learning_rate": 8.162849872773536e-05,
      "loss": 0.2266,
      "step": 1169
    },
    {
      "epoch": 2.9632446134347274,
      "grad_norm": 0.36325398087501526,
      "learning_rate": 8.15267175572519e-05,
      "loss": 0.2176,
      "step": 1170
    },
    {
      "epoch": 2.9657794676806084,
      "grad_norm": 0.4188767671585083,
      "learning_rate": 8.142493638676844e-05,
      "loss": 0.1992,
      "step": 1171
    },
    {
      "epoch": 2.9683143219264894,
      "grad_norm": 0.3149709403514862,
      "learning_rate": 8.1323155216285e-05,
      "loss": 0.1829,
      "step": 1172
    },
    {
      "epoch": 2.9708491761723703,
      "grad_norm": 0.26542145013809204,
      "learning_rate": 8.122137404580153e-05,
      "loss": 0.1801,
      "step": 1173
    },
    {
      "epoch": 2.973384030418251,
      "grad_norm": 0.28748998045921326,
      "learning_rate": 8.111959287531807e-05,
      "loss": 0.1764,
      "step": 1174
    },
    {
      "epoch": 2.975918884664132,
      "grad_norm": 0.3103797733783722,
      "learning_rate": 8.101781170483461e-05,
      "loss": 0.2047,
      "step": 1175
    },
    {
      "epoch": 2.9784537389100127,
      "grad_norm": 0.3357256054878235,
      "learning_rate": 8.091603053435115e-05,
      "loss": 0.2303,
      "step": 1176
    },
    {
      "epoch": 2.9809885931558933,
      "grad_norm": 0.4399915933609009,
      "learning_rate": 8.081424936386769e-05,
      "loss": 0.2423,
      "step": 1177
    },
    {
      "epoch": 2.983523447401774,
      "grad_norm": 0.3486070930957794,
      "learning_rate": 8.071246819338423e-05,
      "loss": 0.19,
      "step": 1178
    },
    {
      "epoch": 2.986058301647655,
      "grad_norm": 0.33286648988723755,
      "learning_rate": 8.061068702290077e-05,
      "loss": 0.1788,
      "step": 1179
    },
    {
      "epoch": 2.988593155893536,
      "grad_norm": 0.2841028571128845,
      "learning_rate": 8.050890585241731e-05,
      "loss": 0.167,
      "step": 1180
    },
    {
      "epoch": 2.991128010139417,
      "grad_norm": 0.44933149218559265,
      "learning_rate": 8.040712468193385e-05,
      "loss": 0.3098,
      "step": 1181
    },
    {
      "epoch": 2.993662864385298,
      "grad_norm": 0.2849741280078888,
      "learning_rate": 8.030534351145038e-05,
      "loss": 0.1896,
      "step": 1182
    },
    {
      "epoch": 2.9961977186311786,
      "grad_norm": 0.39720216393470764,
      "learning_rate": 8.020356234096691e-05,
      "loss": 0.2426,
      "step": 1183
    },
    {
      "epoch": 2.9987325728770595,
      "grad_norm": 0.3838231563568115,
      "learning_rate": 8.010178117048347e-05,
      "loss": 0.2194,
      "step": 1184
    },
    {
      "epoch": 3.0,
      "grad_norm": 0.6684709787368774,
      "learning_rate": 8e-05,
      "loss": 0.2783,
      "step": 1185
    },
    {
      "epoch": 3.002534854245881,
      "grad_norm": 0.44380757212638855,
      "learning_rate": 7.989821882951655e-05,
      "loss": 0.2938,
      "step": 1186
    },
    {
      "epoch": 3.005069708491762,
      "grad_norm": 0.4787996709346771,
      "learning_rate": 7.979643765903309e-05,
      "loss": 0.2998,
      "step": 1187
    },
    {
      "epoch": 3.0076045627376424,
      "grad_norm": 0.36355340480804443,
      "learning_rate": 7.969465648854962e-05,
      "loss": 0.1555,
      "step": 1188
    },
    {
      "epoch": 3.0101394169835234,
      "grad_norm": 0.37890535593032837,
      "learning_rate": 7.959287531806616e-05,
      "loss": 0.1743,
      "step": 1189
    },
    {
      "epoch": 3.0126742712294043,
      "grad_norm": 0.4317542612552643,
      "learning_rate": 7.94910941475827e-05,
      "loss": 0.1891,
      "step": 1190
    },
    {
      "epoch": 3.0152091254752853,
      "grad_norm": 0.3477863669395447,
      "learning_rate": 7.938931297709924e-05,
      "loss": 0.1576,
      "step": 1191
    },
    {
      "epoch": 3.017743979721166,
      "grad_norm": 0.414050817489624,
      "learning_rate": 7.928753180661578e-05,
      "loss": 0.2014,
      "step": 1192
    },
    {
      "epoch": 3.0202788339670468,
      "grad_norm": 0.3596842288970947,
      "learning_rate": 7.918575063613232e-05,
      "loss": 0.1482,
      "step": 1193
    },
    {
      "epoch": 3.0228136882129277,
      "grad_norm": 0.49169921875,
      "learning_rate": 7.908396946564886e-05,
      "loss": 0.1686,
      "step": 1194
    },
    {
      "epoch": 3.0253485424588087,
      "grad_norm": 0.44806674122810364,
      "learning_rate": 7.89821882951654e-05,
      "loss": 0.2044,
      "step": 1195
    },
    {
      "epoch": 3.0278833967046896,
      "grad_norm": 0.43101197481155396,
      "learning_rate": 7.888040712468194e-05,
      "loss": 0.1911,
      "step": 1196
    },
    {
      "epoch": 3.03041825095057,
      "grad_norm": 0.5595632195472717,
      "learning_rate": 7.877862595419848e-05,
      "loss": 0.1823,
      "step": 1197
    },
    {
      "epoch": 3.032953105196451,
      "grad_norm": 0.5024780035018921,
      "learning_rate": 7.867684478371502e-05,
      "loss": 0.1789,
      "step": 1198
    },
    {
      "epoch": 3.035487959442332,
      "grad_norm": 0.4227488934993744,
      "learning_rate": 7.857506361323156e-05,
      "loss": 0.1539,
      "step": 1199
    },
    {
      "epoch": 3.038022813688213,
      "grad_norm": 0.43486127257347107,
      "learning_rate": 7.84732824427481e-05,
      "loss": 0.1577,
      "step": 1200
    },
    {
      "epoch": 3.040557667934094,
      "grad_norm": 0.47951167821884155,
      "learning_rate": 7.837150127226464e-05,
      "loss": 0.1975,
      "step": 1201
    },
    {
      "epoch": 3.0430925221799745,
      "grad_norm": 0.4223075211048126,
      "learning_rate": 7.826972010178117e-05,
      "loss": 0.1719,
      "step": 1202
    },
    {
      "epoch": 3.0456273764258555,
      "grad_norm": 0.6699900031089783,
      "learning_rate": 7.816793893129771e-05,
      "loss": 0.2139,
      "step": 1203
    },
    {
      "epoch": 3.0481622306717364,
      "grad_norm": 0.6038373708724976,
      "learning_rate": 7.806615776081425e-05,
      "loss": 0.2163,
      "step": 1204
    },
    {
      "epoch": 3.0506970849176174,
      "grad_norm": 0.530208945274353,
      "learning_rate": 7.796437659033079e-05,
      "loss": 0.1482,
      "step": 1205
    },
    {
      "epoch": 3.053231939163498,
      "grad_norm": 0.6380701661109924,
      "learning_rate": 7.786259541984733e-05,
      "loss": 0.2191,
      "step": 1206
    },
    {
      "epoch": 3.055766793409379,
      "grad_norm": 0.6455860137939453,
      "learning_rate": 7.776081424936387e-05,
      "loss": 0.1812,
      "step": 1207
    },
    {
      "epoch": 3.05830164765526,
      "grad_norm": 0.5198556184768677,
      "learning_rate": 7.765903307888041e-05,
      "loss": 0.1602,
      "step": 1208
    },
    {
      "epoch": 3.0608365019011408,
      "grad_norm": 0.4842750132083893,
      "learning_rate": 7.755725190839695e-05,
      "loss": 0.1739,
      "step": 1209
    },
    {
      "epoch": 3.0633713561470217,
      "grad_norm": 0.6345165371894836,
      "learning_rate": 7.745547073791349e-05,
      "loss": 0.1841,
      "step": 1210
    },
    {
      "epoch": 3.0659062103929022,
      "grad_norm": 0.551673173904419,
      "learning_rate": 7.735368956743003e-05,
      "loss": 0.1755,
      "step": 1211
    },
    {
      "epoch": 3.068441064638783,
      "grad_norm": 0.5332705974578857,
      "learning_rate": 7.725190839694657e-05,
      "loss": 0.2175,
      "step": 1212
    },
    {
      "epoch": 3.070975918884664,
      "grad_norm": 0.6630911231040955,
      "learning_rate": 7.715012722646311e-05,
      "loss": 0.2868,
      "step": 1213
    },
    {
      "epoch": 3.073510773130545,
      "grad_norm": 0.42508792877197266,
      "learning_rate": 7.704834605597965e-05,
      "loss": 0.1811,
      "step": 1214
    },
    {
      "epoch": 3.076045627376426,
      "grad_norm": 0.504231870174408,
      "learning_rate": 7.694656488549619e-05,
      "loss": 0.1765,
      "step": 1215
    },
    {
      "epoch": 3.0785804816223066,
      "grad_norm": 0.39370813965797424,
      "learning_rate": 7.684478371501273e-05,
      "loss": 0.1739,
      "step": 1216
    },
    {
      "epoch": 3.0811153358681875,
      "grad_norm": 0.5411176085472107,
      "learning_rate": 7.674300254452926e-05,
      "loss": 0.2015,
      "step": 1217
    },
    {
      "epoch": 3.0836501901140685,
      "grad_norm": 0.58034348487854,
      "learning_rate": 7.66412213740458e-05,
      "loss": 0.2293,
      "step": 1218
    },
    {
      "epoch": 3.0861850443599494,
      "grad_norm": 0.48355352878570557,
      "learning_rate": 7.653944020356234e-05,
      "loss": 0.1858,
      "step": 1219
    },
    {
      "epoch": 3.08871989860583,
      "grad_norm": 0.3532313406467438,
      "learning_rate": 7.643765903307888e-05,
      "loss": 0.1689,
      "step": 1220
    },
    {
      "epoch": 3.091254752851711,
      "grad_norm": 0.36245197057724,
      "learning_rate": 7.633587786259542e-05,
      "loss": 0.1744,
      "step": 1221
    },
    {
      "epoch": 3.093789607097592,
      "grad_norm": 0.4752829372882843,
      "learning_rate": 7.623409669211196e-05,
      "loss": 0.1733,
      "step": 1222
    },
    {
      "epoch": 3.096324461343473,
      "grad_norm": 0.3701539933681488,
      "learning_rate": 7.61323155216285e-05,
      "loss": 0.158,
      "step": 1223
    },
    {
      "epoch": 3.098859315589354,
      "grad_norm": 0.45548319816589355,
      "learning_rate": 7.603053435114504e-05,
      "loss": 0.1822,
      "step": 1224
    },
    {
      "epoch": 3.1013941698352343,
      "grad_norm": 0.376499205827713,
      "learning_rate": 7.592875318066158e-05,
      "loss": 0.1613,
      "step": 1225
    },
    {
      "epoch": 3.1039290240811153,
      "grad_norm": 0.4430786967277527,
      "learning_rate": 7.582697201017812e-05,
      "loss": 0.1691,
      "step": 1226
    },
    {
      "epoch": 3.106463878326996,
      "grad_norm": 0.44311538338661194,
      "learning_rate": 7.572519083969466e-05,
      "loss": 0.1853,
      "step": 1227
    },
    {
      "epoch": 3.108998732572877,
      "grad_norm": 0.5815149545669556,
      "learning_rate": 7.56234096692112e-05,
      "loss": 0.2039,
      "step": 1228
    },
    {
      "epoch": 3.111533586818758,
      "grad_norm": 0.5101373195648193,
      "learning_rate": 7.552162849872774e-05,
      "loss": 0.2022,
      "step": 1229
    },
    {
      "epoch": 3.1140684410646386,
      "grad_norm": 0.6038093566894531,
      "learning_rate": 7.541984732824428e-05,
      "loss": 0.1859,
      "step": 1230
    },
    {
      "epoch": 3.1166032953105196,
      "grad_norm": 0.5133914351463318,
      "learning_rate": 7.531806615776081e-05,
      "loss": 0.1626,
      "step": 1231
    },
    {
      "epoch": 3.1191381495564006,
      "grad_norm": 0.40495821833610535,
      "learning_rate": 7.521628498727735e-05,
      "loss": 0.1739,
      "step": 1232
    },
    {
      "epoch": 3.1216730038022815,
      "grad_norm": 0.6585063934326172,
      "learning_rate": 7.511450381679391e-05,
      "loss": 0.2402,
      "step": 1233
    },
    {
      "epoch": 3.124207858048162,
      "grad_norm": 0.45598068833351135,
      "learning_rate": 7.501272264631045e-05,
      "loss": 0.1632,
      "step": 1234
    },
    {
      "epoch": 3.126742712294043,
      "grad_norm": 0.42114904522895813,
      "learning_rate": 7.491094147582699e-05,
      "loss": 0.1638,
      "step": 1235
    },
    {
      "epoch": 3.129277566539924,
      "grad_norm": 0.443198561668396,
      "learning_rate": 7.480916030534351e-05,
      "loss": 0.2148,
      "step": 1236
    },
    {
      "epoch": 3.131812420785805,
      "grad_norm": 0.5573143362998962,
      "learning_rate": 7.470737913486005e-05,
      "loss": 0.2219,
      "step": 1237
    },
    {
      "epoch": 3.134347275031686,
      "grad_norm": 0.6023311614990234,
      "learning_rate": 7.460559796437659e-05,
      "loss": 0.1987,
      "step": 1238
    },
    {
      "epoch": 3.1368821292775664,
      "grad_norm": 0.5282934904098511,
      "learning_rate": 7.450381679389313e-05,
      "loss": 0.2377,
      "step": 1239
    },
    {
      "epoch": 3.1394169835234473,
      "grad_norm": 0.49694669246673584,
      "learning_rate": 7.440203562340967e-05,
      "loss": 0.1804,
      "step": 1240
    },
    {
      "epoch": 3.1419518377693283,
      "grad_norm": 0.43045276403427124,
      "learning_rate": 7.430025445292621e-05,
      "loss": 0.1635,
      "step": 1241
    },
    {
      "epoch": 3.1444866920152093,
      "grad_norm": 0.4798453152179718,
      "learning_rate": 7.419847328244275e-05,
      "loss": 0.1696,
      "step": 1242
    },
    {
      "epoch": 3.14702154626109,
      "grad_norm": 0.5173293352127075,
      "learning_rate": 7.409669211195929e-05,
      "loss": 0.1802,
      "step": 1243
    },
    {
      "epoch": 3.1495564005069707,
      "grad_norm": 0.5398945808410645,
      "learning_rate": 7.399491094147583e-05,
      "loss": 0.1949,
      "step": 1244
    },
    {
      "epoch": 3.1520912547528517,
      "grad_norm": 0.5297830700874329,
      "learning_rate": 7.389312977099238e-05,
      "loss": 0.1987,
      "step": 1245
    },
    {
      "epoch": 3.1546261089987326,
      "grad_norm": 0.5320866703987122,
      "learning_rate": 7.379134860050892e-05,
      "loss": 0.1715,
      "step": 1246
    },
    {
      "epoch": 3.1571609632446136,
      "grad_norm": 0.6132882833480835,
      "learning_rate": 7.368956743002546e-05,
      "loss": 0.3204,
      "step": 1247
    },
    {
      "epoch": 3.159695817490494,
      "grad_norm": 0.4120640158653259,
      "learning_rate": 7.3587786259542e-05,
      "loss": 0.157,
      "step": 1248
    },
    {
      "epoch": 3.162230671736375,
      "grad_norm": 0.6765384674072266,
      "learning_rate": 7.348600508905854e-05,
      "loss": 0.2186,
      "step": 1249
    },
    {
      "epoch": 3.164765525982256,
      "grad_norm": 0.6318830847740173,
      "learning_rate": 7.338422391857506e-05,
      "loss": 0.2189,
      "step": 1250
    },
    {
      "epoch": 3.167300380228137,
      "grad_norm": 0.508305013179779,
      "learning_rate": 7.32824427480916e-05,
      "loss": 0.1962,
      "step": 1251
    },
    {
      "epoch": 3.169835234474018,
      "grad_norm": 0.603520393371582,
      "learning_rate": 7.318066157760814e-05,
      "loss": 0.2615,
      "step": 1252
    },
    {
      "epoch": 3.1723700887198985,
      "grad_norm": 0.7639157176017761,
      "learning_rate": 7.307888040712468e-05,
      "loss": 0.2982,
      "step": 1253
    },
    {
      "epoch": 3.1749049429657794,
      "grad_norm": 0.5995659232139587,
      "learning_rate": 7.297709923664122e-05,
      "loss": 0.2206,
      "step": 1254
    },
    {
      "epoch": 3.1774397972116604,
      "grad_norm": 0.6512479186058044,
      "learning_rate": 7.287531806615776e-05,
      "loss": 0.2065,
      "step": 1255
    },
    {
      "epoch": 3.1799746514575413,
      "grad_norm": 0.4128544330596924,
      "learning_rate": 7.27735368956743e-05,
      "loss": 0.1589,
      "step": 1256
    },
    {
      "epoch": 3.182509505703422,
      "grad_norm": 0.5341802835464478,
      "learning_rate": 7.267175572519084e-05,
      "loss": 0.1812,
      "step": 1257
    },
    {
      "epoch": 3.185044359949303,
      "grad_norm": 0.38032597303390503,
      "learning_rate": 7.256997455470739e-05,
      "loss": 0.1773,
      "step": 1258
    },
    {
      "epoch": 3.1875792141951838,
      "grad_norm": 0.5732728838920593,
      "learning_rate": 7.246819338422393e-05,
      "loss": 0.2047,
      "step": 1259
    },
    {
      "epoch": 3.1901140684410647,
      "grad_norm": 0.47396236658096313,
      "learning_rate": 7.236641221374047e-05,
      "loss": 0.2095,
      "step": 1260
    },
    {
      "epoch": 3.1926489226869457,
      "grad_norm": 0.4764629304409027,
      "learning_rate": 7.226463104325701e-05,
      "loss": 0.1802,
      "step": 1261
    },
    {
      "epoch": 3.195183776932826,
      "grad_norm": 0.5802401304244995,
      "learning_rate": 7.216284987277355e-05,
      "loss": 0.1821,
      "step": 1262
    },
    {
      "epoch": 3.197718631178707,
      "grad_norm": 0.47988972067832947,
      "learning_rate": 7.206106870229009e-05,
      "loss": 0.163,
      "step": 1263
    },
    {
      "epoch": 3.200253485424588,
      "grad_norm": 0.48500359058380127,
      "learning_rate": 7.195928753180661e-05,
      "loss": 0.1739,
      "step": 1264
    },
    {
      "epoch": 3.202788339670469,
      "grad_norm": 0.7479031682014465,
      "learning_rate": 7.185750636132315e-05,
      "loss": 0.2646,
      "step": 1265
    },
    {
      "epoch": 3.20532319391635,
      "grad_norm": 0.48695701360702515,
      "learning_rate": 7.175572519083969e-05,
      "loss": 0.1822,
      "step": 1266
    },
    {
      "epoch": 3.2078580481622305,
      "grad_norm": 0.712354838848114,
      "learning_rate": 7.165394402035623e-05,
      "loss": 0.1827,
      "step": 1267
    },
    {
      "epoch": 3.2103929024081115,
      "grad_norm": 0.4304606020450592,
      "learning_rate": 7.155216284987277e-05,
      "loss": 0.1759,
      "step": 1268
    },
    {
      "epoch": 3.2129277566539924,
      "grad_norm": 0.44741392135620117,
      "learning_rate": 7.145038167938931e-05,
      "loss": 0.1979,
      "step": 1269
    },
    {
      "epoch": 3.2154626108998734,
      "grad_norm": 0.3691045045852661,
      "learning_rate": 7.134860050890586e-05,
      "loss": 0.1575,
      "step": 1270
    },
    {
      "epoch": 3.2179974651457544,
      "grad_norm": 0.4908023476600647,
      "learning_rate": 7.12468193384224e-05,
      "loss": 0.1854,
      "step": 1271
    },
    {
      "epoch": 3.220532319391635,
      "grad_norm": 0.3953510820865631,
      "learning_rate": 7.114503816793894e-05,
      "loss": 0.1821,
      "step": 1272
    },
    {
      "epoch": 3.223067173637516,
      "grad_norm": 0.35227248072624207,
      "learning_rate": 7.104325699745548e-05,
      "loss": 0.173,
      "step": 1273
    },
    {
      "epoch": 3.225602027883397,
      "grad_norm": 0.41285187005996704,
      "learning_rate": 7.094147582697202e-05,
      "loss": 0.1708,
      "step": 1274
    },
    {
      "epoch": 3.2281368821292777,
      "grad_norm": 0.5076828002929688,
      "learning_rate": 7.083969465648856e-05,
      "loss": 0.2128,
      "step": 1275
    },
    {
      "epoch": 3.2306717363751583,
      "grad_norm": 0.5385151505470276,
      "learning_rate": 7.07379134860051e-05,
      "loss": 0.2181,
      "step": 1276
    },
    {
      "epoch": 3.233206590621039,
      "grad_norm": 0.4620850086212158,
      "learning_rate": 7.063613231552164e-05,
      "loss": 0.212,
      "step": 1277
    },
    {
      "epoch": 3.23574144486692,
      "grad_norm": 0.6768701672554016,
      "learning_rate": 7.053435114503816e-05,
      "loss": 0.2704,
      "step": 1278
    },
    {
      "epoch": 3.238276299112801,
      "grad_norm": 0.43216967582702637,
      "learning_rate": 7.04325699745547e-05,
      "loss": 0.1633,
      "step": 1279
    },
    {
      "epoch": 3.240811153358682,
      "grad_norm": 0.3756103813648224,
      "learning_rate": 7.033078880407124e-05,
      "loss": 0.1767,
      "step": 1280
    },
    {
      "epoch": 3.2433460076045626,
      "grad_norm": 0.612819254398346,
      "learning_rate": 7.022900763358778e-05,
      "loss": 0.2563,
      "step": 1281
    },
    {
      "epoch": 3.2458808618504436,
      "grad_norm": 0.5477813482284546,
      "learning_rate": 7.012722646310433e-05,
      "loss": 0.2053,
      "step": 1282
    },
    {
      "epoch": 3.2484157160963245,
      "grad_norm": 0.3412390351295471,
      "learning_rate": 7.002544529262087e-05,
      "loss": 0.1506,
      "step": 1283
    },
    {
      "epoch": 3.2509505703422055,
      "grad_norm": 0.34337860345840454,
      "learning_rate": 6.992366412213741e-05,
      "loss": 0.1612,
      "step": 1284
    },
    {
      "epoch": 3.253485424588086,
      "grad_norm": 0.37943509221076965,
      "learning_rate": 6.982188295165395e-05,
      "loss": 0.168,
      "step": 1285
    },
    {
      "epoch": 3.256020278833967,
      "grad_norm": 0.6030418872833252,
      "learning_rate": 6.972010178117049e-05,
      "loss": 0.2146,
      "step": 1286
    },
    {
      "epoch": 3.258555133079848,
      "grad_norm": 0.34367507696151733,
      "learning_rate": 6.961832061068703e-05,
      "loss": 0.1726,
      "step": 1287
    },
    {
      "epoch": 3.261089987325729,
      "grad_norm": 0.3952295780181885,
      "learning_rate": 6.951653944020357e-05,
      "loss": 0.1754,
      "step": 1288
    },
    {
      "epoch": 3.26362484157161,
      "grad_norm": 0.5151681900024414,
      "learning_rate": 6.941475826972011e-05,
      "loss": 0.1849,
      "step": 1289
    },
    {
      "epoch": 3.2661596958174903,
      "grad_norm": 0.496988445520401,
      "learning_rate": 6.931297709923665e-05,
      "loss": 0.1938,
      "step": 1290
    },
    {
      "epoch": 3.2686945500633713,
      "grad_norm": 0.45343711972236633,
      "learning_rate": 6.921119592875319e-05,
      "loss": 0.1845,
      "step": 1291
    },
    {
      "epoch": 3.2712294043092522,
      "grad_norm": 0.5323635935783386,
      "learning_rate": 6.910941475826971e-05,
      "loss": 0.177,
      "step": 1292
    },
    {
      "epoch": 3.273764258555133,
      "grad_norm": 0.39680036902427673,
      "learning_rate": 6.900763358778625e-05,
      "loss": 0.1843,
      "step": 1293
    },
    {
      "epoch": 3.2762991128010137,
      "grad_norm": 0.4767110049724579,
      "learning_rate": 6.89058524173028e-05,
      "loss": 0.2103,
      "step": 1294
    },
    {
      "epoch": 3.2788339670468947,
      "grad_norm": 0.5565052032470703,
      "learning_rate": 6.880407124681934e-05,
      "loss": 0.2185,
      "step": 1295
    },
    {
      "epoch": 3.2813688212927756,
      "grad_norm": 0.5472534894943237,
      "learning_rate": 6.870229007633588e-05,
      "loss": 0.2237,
      "step": 1296
    },
    {
      "epoch": 3.2839036755386566,
      "grad_norm": 0.632560133934021,
      "learning_rate": 6.860050890585242e-05,
      "loss": 0.2213,
      "step": 1297
    },
    {
      "epoch": 3.2864385297845375,
      "grad_norm": 0.5626386404037476,
      "learning_rate": 6.849872773536896e-05,
      "loss": 0.2324,
      "step": 1298
    },
    {
      "epoch": 3.288973384030418,
      "grad_norm": 0.5527671575546265,
      "learning_rate": 6.83969465648855e-05,
      "loss": 0.227,
      "step": 1299
    },
    {
      "epoch": 3.291508238276299,
      "grad_norm": 0.6093178391456604,
      "learning_rate": 6.829516539440204e-05,
      "loss": 0.2368,
      "step": 1300
    },
    {
      "epoch": 3.29404309252218,
      "grad_norm": 0.3845243453979492,
      "learning_rate": 6.819338422391858e-05,
      "loss": 0.1804,
      "step": 1301
    },
    {
      "epoch": 3.296577946768061,
      "grad_norm": 0.6384890079498291,
      "learning_rate": 6.809160305343512e-05,
      "loss": 0.2598,
      "step": 1302
    },
    {
      "epoch": 3.299112801013942,
      "grad_norm": 0.5135822892189026,
      "learning_rate": 6.798982188295166e-05,
      "loss": 0.2142,
      "step": 1303
    },
    {
      "epoch": 3.3016476552598224,
      "grad_norm": 0.4996071457862854,
      "learning_rate": 6.78880407124682e-05,
      "loss": 0.2107,
      "step": 1304
    },
    {
      "epoch": 3.3041825095057034,
      "grad_norm": 0.31445005536079407,
      "learning_rate": 6.778625954198474e-05,
      "loss": 0.1764,
      "step": 1305
    },
    {
      "epoch": 3.3067173637515843,
      "grad_norm": 0.544301450252533,
      "learning_rate": 6.768447837150128e-05,
      "loss": 0.2856,
      "step": 1306
    },
    {
      "epoch": 3.3092522179974653,
      "grad_norm": 0.5029551982879639,
      "learning_rate": 6.758269720101782e-05,
      "loss": 0.2374,
      "step": 1307
    },
    {
      "epoch": 3.3117870722433462,
      "grad_norm": 0.3769523799419403,
      "learning_rate": 6.748091603053436e-05,
      "loss": 0.1853,
      "step": 1308
    },
    {
      "epoch": 3.3143219264892267,
      "grad_norm": 0.3540287911891937,
      "learning_rate": 6.73791348600509e-05,
      "loss": 0.193,
      "step": 1309
    },
    {
      "epoch": 3.3168567807351077,
      "grad_norm": 0.42674198746681213,
      "learning_rate": 6.727735368956743e-05,
      "loss": 0.1953,
      "step": 1310
    },
    {
      "epoch": 3.3193916349809887,
      "grad_norm": 0.5152068138122559,
      "learning_rate": 6.717557251908397e-05,
      "loss": 0.1871,
      "step": 1311
    },
    {
      "epoch": 3.3219264892268696,
      "grad_norm": 0.48964372277259827,
      "learning_rate": 6.707379134860051e-05,
      "loss": 0.2142,
      "step": 1312
    },
    {
      "epoch": 3.32446134347275,
      "grad_norm": 0.5390191674232483,
      "learning_rate": 6.697201017811705e-05,
      "loss": 0.1764,
      "step": 1313
    },
    {
      "epoch": 3.326996197718631,
      "grad_norm": 0.3849482238292694,
      "learning_rate": 6.687022900763359e-05,
      "loss": 0.1681,
      "step": 1314
    },
    {
      "epoch": 3.329531051964512,
      "grad_norm": 0.36165010929107666,
      "learning_rate": 6.676844783715013e-05,
      "loss": 0.148,
      "step": 1315
    },
    {
      "epoch": 3.332065906210393,
      "grad_norm": 0.47739362716674805,
      "learning_rate": 6.666666666666667e-05,
      "loss": 0.1748,
      "step": 1316
    },
    {
      "epoch": 3.334600760456274,
      "grad_norm": 0.41228094696998596,
      "learning_rate": 6.656488549618321e-05,
      "loss": 0.2006,
      "step": 1317
    },
    {
      "epoch": 3.3371356147021545,
      "grad_norm": 0.43494951725006104,
      "learning_rate": 6.646310432569975e-05,
      "loss": 0.1821,
      "step": 1318
    },
    {
      "epoch": 3.3396704689480354,
      "grad_norm": 0.5502039194107056,
      "learning_rate": 6.636132315521629e-05,
      "loss": 0.208,
      "step": 1319
    },
    {
      "epoch": 3.3422053231939164,
      "grad_norm": 0.5151738524436951,
      "learning_rate": 6.625954198473283e-05,
      "loss": 0.2304,
      "step": 1320
    },
    {
      "epoch": 3.3447401774397973,
      "grad_norm": 0.3866114914417267,
      "learning_rate": 6.615776081424937e-05,
      "loss": 0.1738,
      "step": 1321
    },
    {
      "epoch": 3.347275031685678,
      "grad_norm": 0.5542702674865723,
      "learning_rate": 6.60559796437659e-05,
      "loss": 0.1885,
      "step": 1322
    },
    {
      "epoch": 3.349809885931559,
      "grad_norm": 0.5107680559158325,
      "learning_rate": 6.595419847328245e-05,
      "loss": 0.1856,
      "step": 1323
    },
    {
      "epoch": 3.3523447401774398,
      "grad_norm": 0.8266568183898926,
      "learning_rate": 6.585241730279898e-05,
      "loss": 0.2826,
      "step": 1324
    },
    {
      "epoch": 3.3548795944233207,
      "grad_norm": 0.45209088921546936,
      "learning_rate": 6.575063613231552e-05,
      "loss": 0.1519,
      "step": 1325
    },
    {
      "epoch": 3.3574144486692017,
      "grad_norm": 0.4708397388458252,
      "learning_rate": 6.564885496183206e-05,
      "loss": 0.1834,
      "step": 1326
    },
    {
      "epoch": 3.359949302915082,
      "grad_norm": 0.39958736300468445,
      "learning_rate": 6.55470737913486e-05,
      "loss": 0.1444,
      "step": 1327
    },
    {
      "epoch": 3.362484157160963,
      "grad_norm": 0.5764468312263489,
      "learning_rate": 6.544529262086514e-05,
      "loss": 0.2024,
      "step": 1328
    },
    {
      "epoch": 3.365019011406844,
      "grad_norm": 0.4573269188404083,
      "learning_rate": 6.534351145038168e-05,
      "loss": 0.1857,
      "step": 1329
    },
    {
      "epoch": 3.367553865652725,
      "grad_norm": 0.598423957824707,
      "learning_rate": 6.524173027989822e-05,
      "loss": 0.2206,
      "step": 1330
    },
    {
      "epoch": 3.3700887198986056,
      "grad_norm": 0.5643012523651123,
      "learning_rate": 6.513994910941476e-05,
      "loss": 0.157,
      "step": 1331
    },
    {
      "epoch": 3.3726235741444865,
      "grad_norm": 0.6568096876144409,
      "learning_rate": 6.50381679389313e-05,
      "loss": 0.2588,
      "step": 1332
    },
    {
      "epoch": 3.3751584283903675,
      "grad_norm": 0.6552339792251587,
      "learning_rate": 6.493638676844784e-05,
      "loss": 0.2032,
      "step": 1333
    },
    {
      "epoch": 3.3776932826362485,
      "grad_norm": 0.5274556279182434,
      "learning_rate": 6.483460559796438e-05,
      "loss": 0.1877,
      "step": 1334
    },
    {
      "epoch": 3.3802281368821294,
      "grad_norm": 0.43894869089126587,
      "learning_rate": 6.473282442748092e-05,
      "loss": 0.155,
      "step": 1335
    },
    {
      "epoch": 3.3827629911280104,
      "grad_norm": 0.6116171479225159,
      "learning_rate": 6.463104325699746e-05,
      "loss": 0.2978,
      "step": 1336
    },
    {
      "epoch": 3.385297845373891,
      "grad_norm": 0.4588301479816437,
      "learning_rate": 6.4529262086514e-05,
      "loss": 0.1765,
      "step": 1337
    },
    {
      "epoch": 3.387832699619772,
      "grad_norm": 0.4299813508987427,
      "learning_rate": 6.442748091603053e-05,
      "loss": 0.1725,
      "step": 1338
    },
    {
      "epoch": 3.390367553865653,
      "grad_norm": 0.4996776580810547,
      "learning_rate": 6.432569974554707e-05,
      "loss": 0.1815,
      "step": 1339
    },
    {
      "epoch": 3.3929024081115338,
      "grad_norm": 0.42195963859558105,
      "learning_rate": 6.422391857506361e-05,
      "loss": 0.1544,
      "step": 1340
    },
    {
      "epoch": 3.3954372623574143,
      "grad_norm": 0.3918668031692505,
      "learning_rate": 6.412213740458015e-05,
      "loss": 0.1677,
      "step": 1341
    },
    {
      "epoch": 3.3979721166032952,
      "grad_norm": 0.5436106324195862,
      "learning_rate": 6.402035623409669e-05,
      "loss": 0.2624,
      "step": 1342
    },
    {
      "epoch": 3.400506970849176,
      "grad_norm": 0.5056617856025696,
      "learning_rate": 6.391857506361324e-05,
      "loss": 0.1735,
      "step": 1343
    },
    {
      "epoch": 3.403041825095057,
      "grad_norm": 0.497035950422287,
      "learning_rate": 6.381679389312978e-05,
      "loss": 0.192,
      "step": 1344
    },
    {
      "epoch": 3.405576679340938,
      "grad_norm": 0.4464019238948822,
      "learning_rate": 6.371501272264632e-05,
      "loss": 0.165,
      "step": 1345
    },
    {
      "epoch": 3.4081115335868186,
      "grad_norm": 0.3940610885620117,
      "learning_rate": 6.361323155216285e-05,
      "loss": 0.1698,
      "step": 1346
    },
    {
      "epoch": 3.4106463878326996,
      "grad_norm": 0.34197869896888733,
      "learning_rate": 6.351145038167939e-05,
      "loss": 0.1676,
      "step": 1347
    },
    {
      "epoch": 3.4131812420785805,
      "grad_norm": 0.5477511286735535,
      "learning_rate": 6.340966921119593e-05,
      "loss": 0.2913,
      "step": 1348
    },
    {
      "epoch": 3.4157160963244615,
      "grad_norm": 0.47384947538375854,
      "learning_rate": 6.330788804071247e-05,
      "loss": 0.1807,
      "step": 1349
    },
    {
      "epoch": 3.418250950570342,
      "grad_norm": 0.4805784821510315,
      "learning_rate": 6.3206106870229e-05,
      "loss": 0.1844,
      "step": 1350
    },
    {
      "epoch": 3.420785804816223,
      "grad_norm": 0.4914521276950836,
      "learning_rate": 6.310432569974555e-05,
      "loss": 0.21,
      "step": 1351
    },
    {
      "epoch": 3.423320659062104,
      "grad_norm": 0.42754796147346497,
      "learning_rate": 6.300254452926209e-05,
      "loss": 0.2003,
      "step": 1352
    },
    {
      "epoch": 3.425855513307985,
      "grad_norm": 0.5367889404296875,
      "learning_rate": 6.290076335877862e-05,
      "loss": 0.2126,
      "step": 1353
    },
    {
      "epoch": 3.428390367553866,
      "grad_norm": 0.5015621781349182,
      "learning_rate": 6.279898218829516e-05,
      "loss": 0.176,
      "step": 1354
    },
    {
      "epoch": 3.4309252217997463,
      "grad_norm": 0.4498123228549957,
      "learning_rate": 6.269720101781172e-05,
      "loss": 0.1963,
      "step": 1355
    },
    {
      "epoch": 3.4334600760456273,
      "grad_norm": 0.4548507034778595,
      "learning_rate": 6.259541984732826e-05,
      "loss": 0.185,
      "step": 1356
    },
    {
      "epoch": 3.4359949302915083,
      "grad_norm": 0.5188789963722229,
      "learning_rate": 6.24936386768448e-05,
      "loss": 0.2152,
      "step": 1357
    },
    {
      "epoch": 3.4385297845373892,
      "grad_norm": 0.5717540979385376,
      "learning_rate": 6.239185750636133e-05,
      "loss": 0.2541,
      "step": 1358
    },
    {
      "epoch": 3.4410646387832697,
      "grad_norm": 0.43195176124572754,
      "learning_rate": 6.229007633587787e-05,
      "loss": 0.1841,
      "step": 1359
    },
    {
      "epoch": 3.4435994930291507,
      "grad_norm": 0.8148223161697388,
      "learning_rate": 6.21882951653944e-05,
      "loss": 0.1903,
      "step": 1360
    },
    {
      "epoch": 3.4461343472750317,
      "grad_norm": 0.39928868412971497,
      "learning_rate": 6.208651399491094e-05,
      "loss": 0.1551,
      "step": 1361
    },
    {
      "epoch": 3.4486692015209126,
      "grad_norm": 0.8072621822357178,
      "learning_rate": 6.198473282442748e-05,
      "loss": 0.1973,
      "step": 1362
    },
    {
      "epoch": 3.4512040557667936,
      "grad_norm": 0.6420927047729492,
      "learning_rate": 6.188295165394402e-05,
      "loss": 0.2304,
      "step": 1363
    },
    {
      "epoch": 3.453738910012674,
      "grad_norm": 0.4896611273288727,
      "learning_rate": 6.178117048346056e-05,
      "loss": 0.1968,
      "step": 1364
    },
    {
      "epoch": 3.456273764258555,
      "grad_norm": 0.5518379211425781,
      "learning_rate": 6.16793893129771e-05,
      "loss": 0.2136,
      "step": 1365
    },
    {
      "epoch": 3.458808618504436,
      "grad_norm": 0.35489922761917114,
      "learning_rate": 6.157760814249364e-05,
      "loss": 0.1735,
      "step": 1366
    },
    {
      "epoch": 3.461343472750317,
      "grad_norm": 0.3575512766838074,
      "learning_rate": 6.147582697201019e-05,
      "loss": 0.1704,
      "step": 1367
    },
    {
      "epoch": 3.463878326996198,
      "grad_norm": 0.46745261549949646,
      "learning_rate": 6.137404580152673e-05,
      "loss": 0.1702,
      "step": 1368
    },
    {
      "epoch": 3.4664131812420784,
      "grad_norm": 0.39378833770751953,
      "learning_rate": 6.127226463104327e-05,
      "loss": 0.1512,
      "step": 1369
    },
    {
      "epoch": 3.4689480354879594,
      "grad_norm": 0.5645838975906372,
      "learning_rate": 6.11704834605598e-05,
      "loss": 0.2053,
      "step": 1370
    },
    {
      "epoch": 3.4714828897338403,
      "grad_norm": 0.3613208830356598,
      "learning_rate": 6.106870229007635e-05,
      "loss": 0.1749,
      "step": 1371
    },
    {
      "epoch": 3.4740177439797213,
      "grad_norm": 0.573124349117279,
      "learning_rate": 6.096692111959288e-05,
      "loss": 0.2229,
      "step": 1372
    },
    {
      "epoch": 3.4765525982256023,
      "grad_norm": 0.43110212683677673,
      "learning_rate": 6.086513994910942e-05,
      "loss": 0.2082,
      "step": 1373
    },
    {
      "epoch": 3.4790874524714828,
      "grad_norm": 0.6268284320831299,
      "learning_rate": 6.076335877862596e-05,
      "loss": 0.2826,
      "step": 1374
    },
    {
      "epoch": 3.4816223067173637,
      "grad_norm": 0.5699491500854492,
      "learning_rate": 6.0661577608142496e-05,
      "loss": 0.2373,
      "step": 1375
    },
    {
      "epoch": 3.4841571609632447,
      "grad_norm": 0.451548308134079,
      "learning_rate": 6.0559796437659035e-05,
      "loss": 0.1782,
      "step": 1376
    },
    {
      "epoch": 3.4866920152091256,
      "grad_norm": 0.44955211877822876,
      "learning_rate": 6.0458015267175575e-05,
      "loss": 0.1896,
      "step": 1377
    },
    {
      "epoch": 3.489226869455006,
      "grad_norm": 0.44076019525527954,
      "learning_rate": 6.035623409669211e-05,
      "loss": 0.1854,
      "step": 1378
    },
    {
      "epoch": 3.491761723700887,
      "grad_norm": 0.8012815117835999,
      "learning_rate": 6.0254452926208646e-05,
      "loss": 0.2067,
      "step": 1379
    },
    {
      "epoch": 3.494296577946768,
      "grad_norm": 0.5558981895446777,
      "learning_rate": 6.01526717557252e-05,
      "loss": 0.1913,
      "step": 1380
    },
    {
      "epoch": 3.496831432192649,
      "grad_norm": 0.42501258850097656,
      "learning_rate": 6.005089058524174e-05,
      "loss": 0.1781,
      "step": 1381
    },
    {
      "epoch": 3.49936628643853,
      "grad_norm": 0.3618164658546448,
      "learning_rate": 5.994910941475828e-05,
      "loss": 0.1472,
      "step": 1382
    },
    {
      "epoch": 3.5019011406844105,
      "grad_norm": 0.5384409427642822,
      "learning_rate": 5.984732824427482e-05,
      "loss": 0.2063,
      "step": 1383
    },
    {
      "epoch": 3.5044359949302915,
      "grad_norm": 0.5103084444999695,
      "learning_rate": 5.974554707379135e-05,
      "loss": 0.1737,
      "step": 1384
    },
    {
      "epoch": 3.5069708491761724,
      "grad_norm": 0.37908968329429626,
      "learning_rate": 5.964376590330789e-05,
      "loss": 0.1599,
      "step": 1385
    },
    {
      "epoch": 3.5095057034220534,
      "grad_norm": 0.5049726963043213,
      "learning_rate": 5.954198473282443e-05,
      "loss": 0.1891,
      "step": 1386
    },
    {
      "epoch": 3.512040557667934,
      "grad_norm": 0.4436114430427551,
      "learning_rate": 5.944020356234097e-05,
      "loss": 0.1667,
      "step": 1387
    },
    {
      "epoch": 3.514575411913815,
      "grad_norm": 0.6733534336090088,
      "learning_rate": 5.933842239185751e-05,
      "loss": 0.2714,
      "step": 1388
    },
    {
      "epoch": 3.517110266159696,
      "grad_norm": 0.7258228659629822,
      "learning_rate": 5.9236641221374046e-05,
      "loss": 0.258,
      "step": 1389
    },
    {
      "epoch": 3.5196451204055768,
      "grad_norm": 0.6425923705101013,
      "learning_rate": 5.9134860050890586e-05,
      "loss": 0.1791,
      "step": 1390
    },
    {
      "epoch": 3.5221799746514577,
      "grad_norm": 0.45786988735198975,
      "learning_rate": 5.9033078880407125e-05,
      "loss": 0.1989,
      "step": 1391
    },
    {
      "epoch": 3.5247148288973387,
      "grad_norm": 0.43258994817733765,
      "learning_rate": 5.893129770992367e-05,
      "loss": 0.166,
      "step": 1392
    },
    {
      "epoch": 3.527249683143219,
      "grad_norm": 0.36486050486564636,
      "learning_rate": 5.882951653944021e-05,
      "loss": 0.1634,
      "step": 1393
    },
    {
      "epoch": 3.5297845373891,
      "grad_norm": 0.5883339047431946,
      "learning_rate": 5.872773536895675e-05,
      "loss": 0.2236,
      "step": 1394
    },
    {
      "epoch": 3.532319391634981,
      "grad_norm": 0.6296584010124207,
      "learning_rate": 5.862595419847329e-05,
      "loss": 0.1866,
      "step": 1395
    },
    {
      "epoch": 3.5348542458808616,
      "grad_norm": 0.4262075126171112,
      "learning_rate": 5.852417302798983e-05,
      "loss": 0.1707,
      "step": 1396
    },
    {
      "epoch": 3.5373891001267426,
      "grad_norm": 0.459573894739151,
      "learning_rate": 5.842239185750637e-05,
      "loss": 0.1654,
      "step": 1397
    },
    {
      "epoch": 3.5399239543726235,
      "grad_norm": 0.47115570306777954,
      "learning_rate": 5.83206106870229e-05,
      "loss": 0.1936,
      "step": 1398
    },
    {
      "epoch": 3.5424588086185045,
      "grad_norm": 0.41362589597702026,
      "learning_rate": 5.821882951653944e-05,
      "loss": 0.1897,
      "step": 1399
    },
    {
      "epoch": 3.5449936628643854,
      "grad_norm": 0.4314422607421875,
      "learning_rate": 5.811704834605598e-05,
      "loss": 0.172,
      "step": 1400
    },
    {
      "epoch": 3.5475285171102664,
      "grad_norm": 0.48116129636764526,
      "learning_rate": 5.801526717557252e-05,
      "loss": 0.1721,
      "step": 1401
    },
    {
      "epoch": 3.550063371356147,
      "grad_norm": 0.3902725279331207,
      "learning_rate": 5.791348600508906e-05,
      "loss": 0.1886,
      "step": 1402
    },
    {
      "epoch": 3.552598225602028,
      "grad_norm": 0.37996864318847656,
      "learning_rate": 5.78117048346056e-05,
      "loss": 0.1705,
      "step": 1403
    },
    {
      "epoch": 3.555133079847909,
      "grad_norm": 0.589279294013977,
      "learning_rate": 5.770992366412214e-05,
      "loss": 0.1848,
      "step": 1404
    },
    {
      "epoch": 3.5576679340937893,
      "grad_norm": 0.4233790636062622,
      "learning_rate": 5.760814249363868e-05,
      "loss": 0.18,
      "step": 1405
    },
    {
      "epoch": 3.5602027883396703,
      "grad_norm": 0.3760955333709717,
      "learning_rate": 5.750636132315522e-05,
      "loss": 0.1743,
      "step": 1406
    },
    {
      "epoch": 3.5627376425855513,
      "grad_norm": 0.552793562412262,
      "learning_rate": 5.740458015267176e-05,
      "loss": 0.2315,
      "step": 1407
    },
    {
      "epoch": 3.565272496831432,
      "grad_norm": 0.5440211892127991,
      "learning_rate": 5.73027989821883e-05,
      "loss": 0.186,
      "step": 1408
    },
    {
      "epoch": 3.567807351077313,
      "grad_norm": 0.5183967351913452,
      "learning_rate": 5.720101781170484e-05,
      "loss": 0.1626,
      "step": 1409
    },
    {
      "epoch": 3.570342205323194,
      "grad_norm": 0.47962069511413574,
      "learning_rate": 5.709923664122138e-05,
      "loss": 0.1813,
      "step": 1410
    },
    {
      "epoch": 3.5728770595690746,
      "grad_norm": 0.8065668940544128,
      "learning_rate": 5.699745547073792e-05,
      "loss": 0.2537,
      "step": 1411
    },
    {
      "epoch": 3.5754119138149556,
      "grad_norm": 0.46018585562705994,
      "learning_rate": 5.689567430025445e-05,
      "loss": 0.1756,
      "step": 1412
    },
    {
      "epoch": 3.5779467680608366,
      "grad_norm": 0.5229590535163879,
      "learning_rate": 5.679389312977099e-05,
      "loss": 0.1873,
      "step": 1413
    },
    {
      "epoch": 3.5804816223067175,
      "grad_norm": 0.510209321975708,
      "learning_rate": 5.669211195928753e-05,
      "loss": 0.167,
      "step": 1414
    },
    {
      "epoch": 3.583016476552598,
      "grad_norm": 0.4264031648635864,
      "learning_rate": 5.659033078880407e-05,
      "loss": 0.1705,
      "step": 1415
    },
    {
      "epoch": 3.585551330798479,
      "grad_norm": 0.6208323240280151,
      "learning_rate": 5.648854961832062e-05,
      "loss": 0.2268,
      "step": 1416
    },
    {
      "epoch": 3.58808618504436,
      "grad_norm": 0.3730670213699341,
      "learning_rate": 5.6386768447837154e-05,
      "loss": 0.1676,
      "step": 1417
    },
    {
      "epoch": 3.590621039290241,
      "grad_norm": 0.52936190366745,
      "learning_rate": 5.628498727735369e-05,
      "loss": 0.2055,
      "step": 1418
    },
    {
      "epoch": 3.593155893536122,
      "grad_norm": 0.44800981879234314,
      "learning_rate": 5.618320610687023e-05,
      "loss": 0.1782,
      "step": 1419
    },
    {
      "epoch": 3.5956907477820024,
      "grad_norm": 0.37429654598236084,
      "learning_rate": 5.608142493638677e-05,
      "loss": 0.1566,
      "step": 1420
    },
    {
      "epoch": 3.5982256020278833,
      "grad_norm": 0.5618942975997925,
      "learning_rate": 5.597964376590331e-05,
      "loss": 0.2249,
      "step": 1421
    },
    {
      "epoch": 3.6007604562737643,
      "grad_norm": 0.6893648505210876,
      "learning_rate": 5.587786259541985e-05,
      "loss": 0.2104,
      "step": 1422
    },
    {
      "epoch": 3.6032953105196452,
      "grad_norm": 0.4185943603515625,
      "learning_rate": 5.577608142493639e-05,
      "loss": 0.1729,
      "step": 1423
    },
    {
      "epoch": 3.6058301647655258,
      "grad_norm": 0.46326011419296265,
      "learning_rate": 5.567430025445293e-05,
      "loss": 0.1888,
      "step": 1424
    },
    {
      "epoch": 3.6083650190114067,
      "grad_norm": 0.4564262628555298,
      "learning_rate": 5.557251908396947e-05,
      "loss": 0.1957,
      "step": 1425
    },
    {
      "epoch": 3.6108998732572877,
      "grad_norm": 0.654411256313324,
      "learning_rate": 5.5470737913486e-05,
      "loss": 0.2101,
      "step": 1426
    },
    {
      "epoch": 3.6134347275031686,
      "grad_norm": 0.4059501886367798,
      "learning_rate": 5.536895674300254e-05,
      "loss": 0.1638,
      "step": 1427
    },
    {
      "epoch": 3.6159695817490496,
      "grad_norm": 0.4155724346637726,
      "learning_rate": 5.526717557251909e-05,
      "loss": 0.1799,
      "step": 1428
    },
    {
      "epoch": 3.6185044359949305,
      "grad_norm": 0.4041290581226349,
      "learning_rate": 5.516539440203563e-05,
      "loss": 0.1755,
      "step": 1429
    },
    {
      "epoch": 3.621039290240811,
      "grad_norm": 0.3458746373653412,
      "learning_rate": 5.506361323155217e-05,
      "loss": 0.1474,
      "step": 1430
    },
    {
      "epoch": 3.623574144486692,
      "grad_norm": 0.5046303272247314,
      "learning_rate": 5.496183206106871e-05,
      "loss": 0.2554,
      "step": 1431
    },
    {
      "epoch": 3.626108998732573,
      "grad_norm": 0.4284549951553345,
      "learning_rate": 5.4860050890585244e-05,
      "loss": 0.1855,
      "step": 1432
    },
    {
      "epoch": 3.6286438529784535,
      "grad_norm": 0.5116839408874512,
      "learning_rate": 5.475826972010178e-05,
      "loss": 0.1777,
      "step": 1433
    },
    {
      "epoch": 3.6311787072243344,
      "grad_norm": 0.4303711950778961,
      "learning_rate": 5.465648854961832e-05,
      "loss": 0.1792,
      "step": 1434
    },
    {
      "epoch": 3.6337135614702154,
      "grad_norm": 0.4602053463459015,
      "learning_rate": 5.455470737913486e-05,
      "loss": 0.1716,
      "step": 1435
    },
    {
      "epoch": 3.6362484157160964,
      "grad_norm": 0.47606271505355835,
      "learning_rate": 5.44529262086514e-05,
      "loss": 0.2063,
      "step": 1436
    },
    {
      "epoch": 3.6387832699619773,
      "grad_norm": 0.5861607193946838,
      "learning_rate": 5.435114503816794e-05,
      "loss": 0.2133,
      "step": 1437
    },
    {
      "epoch": 3.6413181242078583,
      "grad_norm": 0.42663708329200745,
      "learning_rate": 5.424936386768448e-05,
      "loss": 0.1662,
      "step": 1438
    },
    {
      "epoch": 3.643852978453739,
      "grad_norm": 0.6255937218666077,
      "learning_rate": 5.414758269720102e-05,
      "loss": 0.1875,
      "step": 1439
    },
    {
      "epoch": 3.6463878326996197,
      "grad_norm": 0.5422307252883911,
      "learning_rate": 5.404580152671755e-05,
      "loss": 0.1624,
      "step": 1440
    },
    {
      "epoch": 3.6489226869455007,
      "grad_norm": 0.540477991104126,
      "learning_rate": 5.3944020356234104e-05,
      "loss": 0.2489,
      "step": 1441
    },
    {
      "epoch": 3.6514575411913817,
      "grad_norm": 0.5656100511550903,
      "learning_rate": 5.3842239185750643e-05,
      "loss": 0.2289,
      "step": 1442
    },
    {
      "epoch": 3.653992395437262,
      "grad_norm": 0.5202456712722778,
      "learning_rate": 5.374045801526718e-05,
      "loss": 0.23,
      "step": 1443
    },
    {
      "epoch": 3.656527249683143,
      "grad_norm": 0.5069813132286072,
      "learning_rate": 5.363867684478372e-05,
      "loss": 0.1845,
      "step": 1444
    },
    {
      "epoch": 3.659062103929024,
      "grad_norm": 0.5711066126823425,
      "learning_rate": 5.353689567430026e-05,
      "loss": 0.2076,
      "step": 1445
    },
    {
      "epoch": 3.661596958174905,
      "grad_norm": 0.5115897059440613,
      "learning_rate": 5.3435114503816794e-05,
      "loss": 0.1696,
      "step": 1446
    },
    {
      "epoch": 3.664131812420786,
      "grad_norm": 0.6119818687438965,
      "learning_rate": 5.333333333333333e-05,
      "loss": 0.1905,
      "step": 1447
    },
    {
      "epoch": 3.6666666666666665,
      "grad_norm": 0.7333729863166809,
      "learning_rate": 5.323155216284987e-05,
      "loss": 0.2208,
      "step": 1448
    },
    {
      "epoch": 3.6692015209125475,
      "grad_norm": 0.5657917857170105,
      "learning_rate": 5.312977099236641e-05,
      "loss": 0.218,
      "step": 1449
    },
    {
      "epoch": 3.6717363751584284,
      "grad_norm": 0.5568459033966064,
      "learning_rate": 5.302798982188295e-05,
      "loss": 0.1957,
      "step": 1450
    },
    {
      "epoch": 3.6742712294043094,
      "grad_norm": 0.40060222148895264,
      "learning_rate": 5.292620865139949e-05,
      "loss": 0.1634,
      "step": 1451
    },
    {
      "epoch": 3.67680608365019,
      "grad_norm": 0.5395296216011047,
      "learning_rate": 5.282442748091603e-05,
      "loss": 0.2284,
      "step": 1452
    },
    {
      "epoch": 3.679340937896071,
      "grad_norm": 0.395298570394516,
      "learning_rate": 5.2722646310432576e-05,
      "loss": 0.1717,
      "step": 1453
    },
    {
      "epoch": 3.681875792141952,
      "grad_norm": 0.4693946838378906,
      "learning_rate": 5.2620865139949115e-05,
      "loss": 0.1719,
      "step": 1454
    },
    {
      "epoch": 3.6844106463878328,
      "grad_norm": 0.5206104516983032,
      "learning_rate": 5.2519083969465654e-05,
      "loss": 0.2158,
      "step": 1455
    },
    {
      "epoch": 3.6869455006337137,
      "grad_norm": 0.5576691031455994,
      "learning_rate": 5.2417302798982194e-05,
      "loss": 0.2031,
      "step": 1456
    },
    {
      "epoch": 3.6894803548795947,
      "grad_norm": 0.5826637148857117,
      "learning_rate": 5.231552162849873e-05,
      "loss": 0.2785,
      "step": 1457
    },
    {
      "epoch": 3.692015209125475,
      "grad_norm": 0.5928865075111389,
      "learning_rate": 5.221374045801527e-05,
      "loss": 0.1765,
      "step": 1458
    },
    {
      "epoch": 3.694550063371356,
      "grad_norm": 0.5932832956314087,
      "learning_rate": 5.211195928753181e-05,
      "loss": 0.1767,
      "step": 1459
    },
    {
      "epoch": 3.697084917617237,
      "grad_norm": 0.4178262948989868,
      "learning_rate": 5.2010178117048344e-05,
      "loss": 0.1636,
      "step": 1460
    },
    {
      "epoch": 3.6996197718631176,
      "grad_norm": 0.6029627919197083,
      "learning_rate": 5.1908396946564884e-05,
      "loss": 0.2086,
      "step": 1461
    },
    {
      "epoch": 3.7021546261089986,
      "grad_norm": 0.48641863465309143,
      "learning_rate": 5.180661577608142e-05,
      "loss": 0.1613,
      "step": 1462
    },
    {
      "epoch": 3.7046894803548795,
      "grad_norm": 0.40176740288734436,
      "learning_rate": 5.170483460559796e-05,
      "loss": 0.1647,
      "step": 1463
    },
    {
      "epoch": 3.7072243346007605,
      "grad_norm": 0.42600035667419434,
      "learning_rate": 5.16030534351145e-05,
      "loss": 0.1818,
      "step": 1464
    },
    {
      "epoch": 3.7097591888466415,
      "grad_norm": 0.48061972856521606,
      "learning_rate": 5.150127226463105e-05,
      "loss": 0.187,
      "step": 1465
    },
    {
      "epoch": 3.7122940430925224,
      "grad_norm": 0.4085710346698761,
      "learning_rate": 5.139949109414759e-05,
      "loss": 0.1562,
      "step": 1466
    },
    {
      "epoch": 3.714828897338403,
      "grad_norm": 0.4378439486026764,
      "learning_rate": 5.1297709923664126e-05,
      "loss": 0.1723,
      "step": 1467
    },
    {
      "epoch": 3.717363751584284,
      "grad_norm": 0.5806863307952881,
      "learning_rate": 5.1195928753180665e-05,
      "loss": 0.2069,
      "step": 1468
    },
    {
      "epoch": 3.719898605830165,
      "grad_norm": 0.4711120128631592,
      "learning_rate": 5.1094147582697205e-05,
      "loss": 0.1851,
      "step": 1469
    },
    {
      "epoch": 3.7224334600760454,
      "grad_norm": 0.47227099537849426,
      "learning_rate": 5.0992366412213744e-05,
      "loss": 0.1885,
      "step": 1470
    },
    {
      "epoch": 3.7249683143219263,
      "grad_norm": 0.4405531585216522,
      "learning_rate": 5.0890585241730283e-05,
      "loss": 0.1662,
      "step": 1471
    },
    {
      "epoch": 3.7275031685678073,
      "grad_norm": 0.5168079733848572,
      "learning_rate": 5.078880407124682e-05,
      "loss": 0.2002,
      "step": 1472
    },
    {
      "epoch": 3.7300380228136882,
      "grad_norm": 0.3839830160140991,
      "learning_rate": 5.068702290076336e-05,
      "loss": 0.168,
      "step": 1473
    },
    {
      "epoch": 3.732572877059569,
      "grad_norm": 0.338012158870697,
      "learning_rate": 5.0585241730279895e-05,
      "loss": 0.1596,
      "step": 1474
    },
    {
      "epoch": 3.73510773130545,
      "grad_norm": 0.5466023087501526,
      "learning_rate": 5.0483460559796434e-05,
      "loss": 0.2379,
      "step": 1475
    },
    {
      "epoch": 3.7376425855513307,
      "grad_norm": 0.44543328881263733,
      "learning_rate": 5.038167938931297e-05,
      "loss": 0.1778,
      "step": 1476
    },
    {
      "epoch": 3.7401774397972116,
      "grad_norm": 0.4166903793811798,
      "learning_rate": 5.0279898218829526e-05,
      "loss": 0.1554,
      "step": 1477
    },
    {
      "epoch": 3.7427122940430926,
      "grad_norm": 0.3806212544441223,
      "learning_rate": 5.0178117048346065e-05,
      "loss": 0.1648,
      "step": 1478
    },
    {
      "epoch": 3.7452471482889735,
      "grad_norm": 0.5990723967552185,
      "learning_rate": 5.00763358778626e-05,
      "loss": 0.2348,
      "step": 1479
    },
    {
      "epoch": 3.747782002534854,
      "grad_norm": 0.715096116065979,
      "learning_rate": 4.997455470737914e-05,
      "loss": 0.2201,
      "step": 1480
    },
    {
      "epoch": 3.750316856780735,
      "grad_norm": 0.6297019124031067,
      "learning_rate": 4.9872773536895677e-05,
      "loss": 0.2398,
      "step": 1481
    },
    {
      "epoch": 3.752851711026616,
      "grad_norm": 0.6131380200386047,
      "learning_rate": 4.9770992366412216e-05,
      "loss": 0.2128,
      "step": 1482
    },
    {
      "epoch": 3.755386565272497,
      "grad_norm": 0.5018277764320374,
      "learning_rate": 4.9669211195928755e-05,
      "loss": 0.1913,
      "step": 1483
    },
    {
      "epoch": 3.757921419518378,
      "grad_norm": 0.516939103603363,
      "learning_rate": 4.9567430025445294e-05,
      "loss": 0.1958,
      "step": 1484
    },
    {
      "epoch": 3.7604562737642584,
      "grad_norm": 0.4485652446746826,
      "learning_rate": 4.9465648854961834e-05,
      "loss": 0.1678,
      "step": 1485
    },
    {
      "epoch": 3.7629911280101394,
      "grad_norm": 0.6227991580963135,
      "learning_rate": 4.936386768447838e-05,
      "loss": 0.2403,
      "step": 1486
    },
    {
      "epoch": 3.7655259822560203,
      "grad_norm": 0.42331916093826294,
      "learning_rate": 4.926208651399491e-05,
      "loss": 0.1673,
      "step": 1487
    },
    {
      "epoch": 3.7680608365019013,
      "grad_norm": 0.5072351098060608,
      "learning_rate": 4.916030534351145e-05,
      "loss": 0.204,
      "step": 1488
    },
    {
      "epoch": 3.770595690747782,
      "grad_norm": 0.445578008890152,
      "learning_rate": 4.905852417302799e-05,
      "loss": 0.1908,
      "step": 1489
    },
    {
      "epoch": 3.7731305449936627,
      "grad_norm": 0.49046698212623596,
      "learning_rate": 4.895674300254453e-05,
      "loss": 0.1615,
      "step": 1490
    },
    {
      "epoch": 3.7756653992395437,
      "grad_norm": 0.37768882513046265,
      "learning_rate": 4.885496183206107e-05,
      "loss": 0.1604,
      "step": 1491
    },
    {
      "epoch": 3.7782002534854247,
      "grad_norm": 0.38343289494514465,
      "learning_rate": 4.8753180661577616e-05,
      "loss": 0.1709,
      "step": 1492
    },
    {
      "epoch": 3.7807351077313056,
      "grad_norm": 0.4102202355861664,
      "learning_rate": 4.8651399491094155e-05,
      "loss": 0.1629,
      "step": 1493
    },
    {
      "epoch": 3.7832699619771866,
      "grad_norm": 0.4545007050037384,
      "learning_rate": 4.854961832061069e-05,
      "loss": 0.1709,
      "step": 1494
    },
    {
      "epoch": 3.785804816223067,
      "grad_norm": 0.48300206661224365,
      "learning_rate": 4.844783715012723e-05,
      "loss": 0.2211,
      "step": 1495
    },
    {
      "epoch": 3.788339670468948,
      "grad_norm": 0.5301868319511414,
      "learning_rate": 4.8346055979643766e-05,
      "loss": 0.2053,
      "step": 1496
    },
    {
      "epoch": 3.790874524714829,
      "grad_norm": 0.48716598749160767,
      "learning_rate": 4.8244274809160306e-05,
      "loss": 0.2392,
      "step": 1497
    },
    {
      "epoch": 3.7934093789607095,
      "grad_norm": 0.6201879978179932,
      "learning_rate": 4.8142493638676845e-05,
      "loss": 0.2267,
      "step": 1498
    },
    {
      "epoch": 3.7959442332065905,
      "grad_norm": 0.46254560351371765,
      "learning_rate": 4.804071246819339e-05,
      "loss": 0.1824,
      "step": 1499
    },
    {
      "epoch": 3.7984790874524714,
      "grad_norm": 0.6153382658958435,
      "learning_rate": 4.793893129770993e-05,
      "loss": 0.2095,
      "step": 1500
    },
    {
      "epoch": 3.8010139416983524,
      "grad_norm": 0.6054911613464355,
      "learning_rate": 4.783715012722646e-05,
      "loss": 0.2291,
      "step": 1501
    },
    {
      "epoch": 3.8035487959442333,
      "grad_norm": 0.3899902403354645,
      "learning_rate": 4.7735368956743e-05,
      "loss": 0.1507,
      "step": 1502
    },
    {
      "epoch": 3.8060836501901143,
      "grad_norm": 0.4634632170200348,
      "learning_rate": 4.763358778625954e-05,
      "loss": 0.1436,
      "step": 1503
    },
    {
      "epoch": 3.808618504435995,
      "grad_norm": 0.6829271912574768,
      "learning_rate": 4.753180661577608e-05,
      "loss": 0.2611,
      "step": 1504
    },
    {
      "epoch": 3.8111533586818758,
      "grad_norm": 0.553393542766571,
      "learning_rate": 4.743002544529263e-05,
      "loss": 0.1862,
      "step": 1505
    },
    {
      "epoch": 3.8136882129277567,
      "grad_norm": 0.4285520315170288,
      "learning_rate": 4.7328244274809166e-05,
      "loss": 0.1522,
      "step": 1506
    },
    {
      "epoch": 3.8162230671736372,
      "grad_norm": 0.5505307912826538,
      "learning_rate": 4.7226463104325705e-05,
      "loss": 0.2056,
      "step": 1507
    },
    {
      "epoch": 3.818757921419518,
      "grad_norm": 0.635071873664856,
      "learning_rate": 4.712468193384224e-05,
      "loss": 0.1899,
      "step": 1508
    },
    {
      "epoch": 3.821292775665399,
      "grad_norm": 0.4297153353691101,
      "learning_rate": 4.702290076335878e-05,
      "loss": 0.1632,
      "step": 1509
    },
    {
      "epoch": 3.82382762991128,
      "grad_norm": 0.5538508892059326,
      "learning_rate": 4.6921119592875317e-05,
      "loss": 0.1965,
      "step": 1510
    },
    {
      "epoch": 3.826362484157161,
      "grad_norm": 0.6736975908279419,
      "learning_rate": 4.681933842239186e-05,
      "loss": 0.2334,
      "step": 1511
    },
    {
      "epoch": 3.828897338403042,
      "grad_norm": 0.49381881952285767,
      "learning_rate": 4.67175572519084e-05,
      "loss": 0.2074,
      "step": 1512
    },
    {
      "epoch": 3.8314321926489225,
      "grad_norm": 0.4285455346107483,
      "learning_rate": 4.661577608142494e-05,
      "loss": 0.176,
      "step": 1513
    },
    {
      "epoch": 3.8339670468948035,
      "grad_norm": 0.5771308541297913,
      "learning_rate": 4.651399491094148e-05,
      "loss": 0.229,
      "step": 1514
    },
    {
      "epoch": 3.8365019011406845,
      "grad_norm": 0.4749429225921631,
      "learning_rate": 4.641221374045801e-05,
      "loss": 0.1968,
      "step": 1515
    },
    {
      "epoch": 3.8390367553865654,
      "grad_norm": 0.48094430565834045,
      "learning_rate": 4.631043256997455e-05,
      "loss": 0.1982,
      "step": 1516
    },
    {
      "epoch": 3.841571609632446,
      "grad_norm": 0.49878042936325073,
      "learning_rate": 4.62086513994911e-05,
      "loss": 0.1552,
      "step": 1517
    },
    {
      "epoch": 3.844106463878327,
      "grad_norm": 0.4872034192085266,
      "learning_rate": 4.610687022900764e-05,
      "loss": 0.1808,
      "step": 1518
    },
    {
      "epoch": 3.846641318124208,
      "grad_norm": 0.4905577600002289,
      "learning_rate": 4.600508905852418e-05,
      "loss": 0.1703,
      "step": 1519
    },
    {
      "epoch": 3.849176172370089,
      "grad_norm": 0.49980783462524414,
      "learning_rate": 4.5903307888040716e-05,
      "loss": 0.1727,
      "step": 1520
    },
    {
      "epoch": 3.8517110266159698,
      "grad_norm": 0.5426180958747864,
      "learning_rate": 4.5801526717557256e-05,
      "loss": 0.2192,
      "step": 1521
    },
    {
      "epoch": 3.8542458808618507,
      "grad_norm": 0.6399853825569153,
      "learning_rate": 4.569974554707379e-05,
      "loss": 0.2387,
      "step": 1522
    },
    {
      "epoch": 3.8567807351077312,
      "grad_norm": 0.5311464667320251,
      "learning_rate": 4.5597964376590334e-05,
      "loss": 0.1976,
      "step": 1523
    },
    {
      "epoch": 3.859315589353612,
      "grad_norm": 0.5433202981948853,
      "learning_rate": 4.5496183206106874e-05,
      "loss": 0.1916,
      "step": 1524
    },
    {
      "epoch": 3.861850443599493,
      "grad_norm": 0.4024597704410553,
      "learning_rate": 4.539440203562341e-05,
      "loss": 0.1643,
      "step": 1525
    },
    {
      "epoch": 3.8643852978453737,
      "grad_norm": 0.347566157579422,
      "learning_rate": 4.529262086513995e-05,
      "loss": 0.1676,
      "step": 1526
    },
    {
      "epoch": 3.8669201520912546,
      "grad_norm": 0.45405861735343933,
      "learning_rate": 4.519083969465649e-05,
      "loss": 0.1963,
      "step": 1527
    },
    {
      "epoch": 3.8694550063371356,
      "grad_norm": 0.6430472731590271,
      "learning_rate": 4.508905852417303e-05,
      "loss": 0.2322,
      "step": 1528
    },
    {
      "epoch": 3.8719898605830165,
      "grad_norm": 0.4391939043998718,
      "learning_rate": 4.498727735368957e-05,
      "loss": 0.1871,
      "step": 1529
    },
    {
      "epoch": 3.8745247148288975,
      "grad_norm": 0.47301623225212097,
      "learning_rate": 4.488549618320611e-05,
      "loss": 0.1549,
      "step": 1530
    },
    {
      "epoch": 3.8770595690747784,
      "grad_norm": 0.4237573742866516,
      "learning_rate": 4.478371501272265e-05,
      "loss": 0.1548,
      "step": 1531
    },
    {
      "epoch": 3.879594423320659,
      "grad_norm": 0.5859849452972412,
      "learning_rate": 4.468193384223919e-05,
      "loss": 0.2023,
      "step": 1532
    },
    {
      "epoch": 3.88212927756654,
      "grad_norm": 0.45050573348999023,
      "learning_rate": 4.458015267175573e-05,
      "loss": 0.165,
      "step": 1533
    },
    {
      "epoch": 3.884664131812421,
      "grad_norm": 0.5347339510917664,
      "learning_rate": 4.447837150127227e-05,
      "loss": 0.1854,
      "step": 1534
    },
    {
      "epoch": 3.8871989860583014,
      "grad_norm": 0.375836580991745,
      "learning_rate": 4.4376590330788806e-05,
      "loss": 0.152,
      "step": 1535
    },
    {
      "epoch": 3.8897338403041823,
      "grad_norm": 0.5403718948364258,
      "learning_rate": 4.4274809160305345e-05,
      "loss": 0.2065,
      "step": 1536
    },
    {
      "epoch": 3.8922686945500633,
      "grad_norm": 0.5624736547470093,
      "learning_rate": 4.4173027989821885e-05,
      "loss": 0.1857,
      "step": 1537
    },
    {
      "epoch": 3.8948035487959443,
      "grad_norm": 0.5971560478210449,
      "learning_rate": 4.4071246819338424e-05,
      "loss": 0.1928,
      "step": 1538
    },
    {
      "epoch": 3.897338403041825,
      "grad_norm": 0.5225517153739929,
      "learning_rate": 4.396946564885496e-05,
      "loss": 0.2054,
      "step": 1539
    },
    {
      "epoch": 3.899873257287706,
      "grad_norm": 0.47341519594192505,
      "learning_rate": 4.38676844783715e-05,
      "loss": 0.1786,
      "step": 1540
    },
    {
      "epoch": 3.9024081115335867,
      "grad_norm": 0.3734676241874695,
      "learning_rate": 4.376590330788805e-05,
      "loss": 0.1447,
      "step": 1541
    },
    {
      "epoch": 3.9049429657794676,
      "grad_norm": 0.5003755688667297,
      "learning_rate": 4.366412213740458e-05,
      "loss": 0.1734,
      "step": 1542
    },
    {
      "epoch": 3.9074778200253486,
      "grad_norm": 0.41165000200271606,
      "learning_rate": 4.356234096692112e-05,
      "loss": 0.172,
      "step": 1543
    },
    {
      "epoch": 3.9100126742712296,
      "grad_norm": 0.45096197724342346,
      "learning_rate": 4.346055979643766e-05,
      "loss": 0.1726,
      "step": 1544
    },
    {
      "epoch": 3.91254752851711,
      "grad_norm": 0.5445842146873474,
      "learning_rate": 4.33587786259542e-05,
      "loss": 0.206,
      "step": 1545
    },
    {
      "epoch": 3.915082382762991,
      "grad_norm": 0.5139321088790894,
      "learning_rate": 4.325699745547074e-05,
      "loss": 0.1803,
      "step": 1546
    },
    {
      "epoch": 3.917617237008872,
      "grad_norm": 0.5652433633804321,
      "learning_rate": 4.3155216284987285e-05,
      "loss": 0.2051,
      "step": 1547
    },
    {
      "epoch": 3.920152091254753,
      "grad_norm": 0.38091734051704407,
      "learning_rate": 4.3053435114503824e-05,
      "loss": 0.1541,
      "step": 1548
    },
    {
      "epoch": 3.922686945500634,
      "grad_norm": 0.3614705801010132,
      "learning_rate": 4.2951653944020356e-05,
      "loss": 0.147,
      "step": 1549
    },
    {
      "epoch": 3.9252217997465144,
      "grad_norm": 0.4551761746406555,
      "learning_rate": 4.2849872773536896e-05,
      "loss": 0.1685,
      "step": 1550
    },
    {
      "epoch": 3.9277566539923954,
      "grad_norm": 0.5226624011993408,
      "learning_rate": 4.2748091603053435e-05,
      "loss": 0.1727,
      "step": 1551
    },
    {
      "epoch": 3.9302915082382763,
      "grad_norm": 0.3541867136955261,
      "learning_rate": 4.2646310432569974e-05,
      "loss": 0.1488,
      "step": 1552
    },
    {
      "epoch": 3.9328263624841573,
      "grad_norm": 0.4599204659461975,
      "learning_rate": 4.254452926208652e-05,
      "loss": 0.1536,
      "step": 1553
    },
    {
      "epoch": 3.935361216730038,
      "grad_norm": 0.45082637667655945,
      "learning_rate": 4.244274809160306e-05,
      "loss": 0.1671,
      "step": 1554
    },
    {
      "epoch": 3.9378960709759188,
      "grad_norm": 0.6053276658058167,
      "learning_rate": 4.23409669211196e-05,
      "loss": 0.2043,
      "step": 1555
    },
    {
      "epoch": 3.9404309252217997,
      "grad_norm": 0.506443440914154,
      "learning_rate": 4.223918575063613e-05,
      "loss": 0.1893,
      "step": 1556
    },
    {
      "epoch": 3.9429657794676807,
      "grad_norm": 0.6029784679412842,
      "learning_rate": 4.213740458015267e-05,
      "loss": 0.201,
      "step": 1557
    },
    {
      "epoch": 3.9455006337135616,
      "grad_norm": 0.3993350863456726,
      "learning_rate": 4.203562340966921e-05,
      "loss": 0.1637,
      "step": 1558
    },
    {
      "epoch": 3.9480354879594426,
      "grad_norm": 0.5887712836265564,
      "learning_rate": 4.193384223918575e-05,
      "loss": 0.2207,
      "step": 1559
    },
    {
      "epoch": 3.950570342205323,
      "grad_norm": 0.5538966059684753,
      "learning_rate": 4.1832061068702296e-05,
      "loss": 0.1674,
      "step": 1560
    },
    {
      "epoch": 3.953105196451204,
      "grad_norm": 0.4831174910068512,
      "learning_rate": 4.1730279898218835e-05,
      "loss": 0.1694,
      "step": 1561
    },
    {
      "epoch": 3.955640050697085,
      "grad_norm": 0.39700761437416077,
      "learning_rate": 4.1628498727735374e-05,
      "loss": 0.1695,
      "step": 1562
    },
    {
      "epoch": 3.9581749049429655,
      "grad_norm": 0.5388202667236328,
      "learning_rate": 4.152671755725191e-05,
      "loss": 0.1769,
      "step": 1563
    },
    {
      "epoch": 3.9607097591888465,
      "grad_norm": 0.5717085599899292,
      "learning_rate": 4.1424936386768446e-05,
      "loss": 0.2602,
      "step": 1564
    },
    {
      "epoch": 3.9632446134347274,
      "grad_norm": 0.4135623872280121,
      "learning_rate": 4.1323155216284985e-05,
      "loss": 0.1512,
      "step": 1565
    },
    {
      "epoch": 3.9657794676806084,
      "grad_norm": 0.478411465883255,
      "learning_rate": 4.122137404580153e-05,
      "loss": 0.1967,
      "step": 1566
    },
    {
      "epoch": 3.9683143219264894,
      "grad_norm": 0.4836915135383606,
      "learning_rate": 4.111959287531807e-05,
      "loss": 0.2297,
      "step": 1567
    },
    {
      "epoch": 3.9708491761723703,
      "grad_norm": 0.6355355978012085,
      "learning_rate": 4.101781170483461e-05,
      "loss": 0.2291,
      "step": 1568
    },
    {
      "epoch": 3.973384030418251,
      "grad_norm": 0.42811089754104614,
      "learning_rate": 4.091603053435115e-05,
      "loss": 0.1518,
      "step": 1569
    },
    {
      "epoch": 3.975918884664132,
      "grad_norm": 0.5778828859329224,
      "learning_rate": 4.081424936386768e-05,
      "loss": 0.1638,
      "step": 1570
    },
    {
      "epoch": 3.9784537389100127,
      "grad_norm": 0.4650358259677887,
      "learning_rate": 4.071246819338422e-05,
      "loss": 0.1658,
      "step": 1571
    },
    {
      "epoch": 3.9809885931558933,
      "grad_norm": 0.5939072966575623,
      "learning_rate": 4.061068702290077e-05,
      "loss": 0.2276,
      "step": 1572
    },
    {
      "epoch": 3.983523447401774,
      "grad_norm": 0.5296881794929504,
      "learning_rate": 4.050890585241731e-05,
      "loss": 0.1895,
      "step": 1573
    },
    {
      "epoch": 3.986058301647655,
      "grad_norm": 0.4479645788669586,
      "learning_rate": 4.0407124681933846e-05,
      "loss": 0.168,
      "step": 1574
    },
    {
      "epoch": 3.988593155893536,
      "grad_norm": 0.6041486859321594,
      "learning_rate": 4.0305343511450385e-05,
      "loss": 0.2225,
      "step": 1575
    },
    {
      "epoch": 3.991128010139417,
      "grad_norm": 1.0764771699905396,
      "learning_rate": 4.0203562340966925e-05,
      "loss": 0.1736,
      "step": 1576
    },
    {
      "epoch": 3.993662864385298,
      "grad_norm": 0.4830266535282135,
      "learning_rate": 4.010178117048346e-05,
      "loss": 0.2017,
      "step": 1577
    },
    {
      "epoch": 3.9961977186311786,
      "grad_norm": 0.4032004773616791,
      "learning_rate": 4e-05,
      "loss": 0.1723,
      "step": 1578
    },
    {
      "epoch": 3.9987325728770595,
      "grad_norm": 0.4441380798816681,
      "learning_rate": 3.989821882951654e-05,
      "loss": 0.1714,
      "step": 1579
    },
    {
      "epoch": 4.0,
      "grad_norm": 0.673060953617096,
      "learning_rate": 3.979643765903308e-05,
      "loss": 0.1651,
      "step": 1580
    },
    {
      "epoch": 4.002534854245881,
      "grad_norm": 0.5185714960098267,
      "learning_rate": 3.969465648854962e-05,
      "loss": 0.1877,
      "step": 1581
    },
    {
      "epoch": 4.005069708491762,
      "grad_norm": 0.4302978217601776,
      "learning_rate": 3.959287531806616e-05,
      "loss": 0.1575,
      "step": 1582
    },
    {
      "epoch": 4.007604562737643,
      "grad_norm": 0.45982813835144043,
      "learning_rate": 3.94910941475827e-05,
      "loss": 0.1615,
      "step": 1583
    },
    {
      "epoch": 4.010139416983524,
      "grad_norm": 0.4118313789367676,
      "learning_rate": 3.938931297709924e-05,
      "loss": 0.1508,
      "step": 1584
    },
    {
      "epoch": 4.012674271229404,
      "grad_norm": 0.6039855480194092,
      "learning_rate": 3.928753180661578e-05,
      "loss": 0.1782,
      "step": 1585
    },
    {
      "epoch": 4.015209125475285,
      "grad_norm": 0.4311355948448181,
      "learning_rate": 3.918575063613232e-05,
      "loss": 0.1488,
      "step": 1586
    },
    {
      "epoch": 4.017743979721166,
      "grad_norm": 0.7398537993431091,
      "learning_rate": 3.908396946564886e-05,
      "loss": 0.1879,
      "step": 1587
    },
    {
      "epoch": 4.020278833967047,
      "grad_norm": 0.37064164876937866,
      "learning_rate": 3.8982188295165396e-05,
      "loss": 0.1257,
      "step": 1588
    },
    {
      "epoch": 4.022813688212928,
      "grad_norm": 0.46931344270706177,
      "learning_rate": 3.8880407124681936e-05,
      "loss": 0.1579,
      "step": 1589
    },
    {
      "epoch": 4.025348542458809,
      "grad_norm": 0.4544156789779663,
      "learning_rate": 3.8778625954198475e-05,
      "loss": 0.134,
      "step": 1590
    },
    {
      "epoch": 4.02788339670469,
      "grad_norm": 0.5562132000923157,
      "learning_rate": 3.8676844783715014e-05,
      "loss": 0.1488,
      "step": 1591
    },
    {
      "epoch": 4.030418250950571,
      "grad_norm": 0.5679481625556946,
      "learning_rate": 3.8575063613231554e-05,
      "loss": 0.1322,
      "step": 1592
    },
    {
      "epoch": 4.032953105196452,
      "grad_norm": 0.6101714372634888,
      "learning_rate": 3.847328244274809e-05,
      "loss": 0.1534,
      "step": 1593
    },
    {
      "epoch": 4.035487959442332,
      "grad_norm": 0.8060622215270996,
      "learning_rate": 3.837150127226463e-05,
      "loss": 0.1986,
      "step": 1594
    },
    {
      "epoch": 4.038022813688213,
      "grad_norm": 0.5501425266265869,
      "learning_rate": 3.826972010178117e-05,
      "loss": 0.1444,
      "step": 1595
    },
    {
      "epoch": 4.0405576679340935,
      "grad_norm": 0.5117461085319519,
      "learning_rate": 3.816793893129771e-05,
      "loss": 0.1259,
      "step": 1596
    },
    {
      "epoch": 4.0430925221799745,
      "grad_norm": 0.571770429611206,
      "learning_rate": 3.806615776081425e-05,
      "loss": 0.1413,
      "step": 1597
    },
    {
      "epoch": 4.0456273764258555,
      "grad_norm": 0.7756439447402954,
      "learning_rate": 3.796437659033079e-05,
      "loss": 0.1874,
      "step": 1598
    },
    {
      "epoch": 4.048162230671736,
      "grad_norm": 0.6393389701843262,
      "learning_rate": 3.786259541984733e-05,
      "loss": 0.1226,
      "step": 1599
    },
    {
      "epoch": 4.050697084917617,
      "grad_norm": 0.7177454233169556,
      "learning_rate": 3.776081424936387e-05,
      "loss": 0.1382,
      "step": 1600
    },
    {
      "epoch": 4.053231939163498,
      "grad_norm": 0.6561391353607178,
      "learning_rate": 3.765903307888041e-05,
      "loss": 0.1557,
      "step": 1601
    },
    {
      "epoch": 4.055766793409379,
      "grad_norm": 0.8319444060325623,
      "learning_rate": 3.7557251908396954e-05,
      "loss": 0.1608,
      "step": 1602
    },
    {
      "epoch": 4.05830164765526,
      "grad_norm": 0.7468693852424622,
      "learning_rate": 3.745547073791349e-05,
      "loss": 0.1442,
      "step": 1603
    },
    {
      "epoch": 4.06083650190114,
      "grad_norm": 0.623657763004303,
      "learning_rate": 3.7353689567430025e-05,
      "loss": 0.1395,
      "step": 1604
    },
    {
      "epoch": 4.063371356147021,
      "grad_norm": 0.5870152115821838,
      "learning_rate": 3.7251908396946565e-05,
      "loss": 0.1322,
      "step": 1605
    },
    {
      "epoch": 4.065906210392902,
      "grad_norm": 0.6840811371803284,
      "learning_rate": 3.7150127226463104e-05,
      "loss": 0.132,
      "step": 1606
    },
    {
      "epoch": 4.068441064638783,
      "grad_norm": 0.6177504658699036,
      "learning_rate": 3.704834605597964e-05,
      "loss": 0.1265,
      "step": 1607
    },
    {
      "epoch": 4.070975918884664,
      "grad_norm": 0.6908831000328064,
      "learning_rate": 3.694656488549619e-05,
      "loss": 0.1593,
      "step": 1608
    },
    {
      "epoch": 4.073510773130545,
      "grad_norm": 0.787434458732605,
      "learning_rate": 3.684478371501273e-05,
      "loss": 0.1184,
      "step": 1609
    },
    {
      "epoch": 4.076045627376426,
      "grad_norm": 0.8011195063591003,
      "learning_rate": 3.674300254452927e-05,
      "loss": 0.1341,
      "step": 1610
    },
    {
      "epoch": 4.078580481622307,
      "grad_norm": 0.5523831248283386,
      "learning_rate": 3.66412213740458e-05,
      "loss": 0.1283,
      "step": 1611
    },
    {
      "epoch": 4.081115335868188,
      "grad_norm": 0.6396963596343994,
      "learning_rate": 3.653944020356234e-05,
      "loss": 0.1424,
      "step": 1612
    },
    {
      "epoch": 4.083650190114068,
      "grad_norm": 0.7471883893013,
      "learning_rate": 3.643765903307888e-05,
      "loss": 0.1627,
      "step": 1613
    },
    {
      "epoch": 4.086185044359949,
      "grad_norm": 0.5498061776161194,
      "learning_rate": 3.633587786259542e-05,
      "loss": 0.1478,
      "step": 1614
    },
    {
      "epoch": 4.08871989860583,
      "grad_norm": 0.6853391528129578,
      "learning_rate": 3.6234096692111965e-05,
      "loss": 0.1588,
      "step": 1615
    },
    {
      "epoch": 4.091254752851711,
      "grad_norm": 0.6638361811637878,
      "learning_rate": 3.6132315521628504e-05,
      "loss": 0.1695,
      "step": 1616
    },
    {
      "epoch": 4.093789607097592,
      "grad_norm": 0.6155263781547546,
      "learning_rate": 3.603053435114504e-05,
      "loss": 0.1355,
      "step": 1617
    },
    {
      "epoch": 4.096324461343473,
      "grad_norm": 0.574590265750885,
      "learning_rate": 3.5928753180661576e-05,
      "loss": 0.1498,
      "step": 1618
    },
    {
      "epoch": 4.098859315589354,
      "grad_norm": 0.5972251296043396,
      "learning_rate": 3.5826972010178115e-05,
      "loss": 0.1684,
      "step": 1619
    },
    {
      "epoch": 4.101394169835235,
      "grad_norm": 0.668618381023407,
      "learning_rate": 3.5725190839694654e-05,
      "loss": 0.1377,
      "step": 1620
    },
    {
      "epoch": 4.103929024081116,
      "grad_norm": 0.6238232851028442,
      "learning_rate": 3.56234096692112e-05,
      "loss": 0.2025,
      "step": 1621
    },
    {
      "epoch": 4.106463878326996,
      "grad_norm": 0.9182467460632324,
      "learning_rate": 3.552162849872774e-05,
      "loss": 0.1539,
      "step": 1622
    },
    {
      "epoch": 4.108998732572877,
      "grad_norm": 0.6368919014930725,
      "learning_rate": 3.541984732824428e-05,
      "loss": 0.1421,
      "step": 1623
    },
    {
      "epoch": 4.111533586818758,
      "grad_norm": 0.7871132493019104,
      "learning_rate": 3.531806615776082e-05,
      "loss": 0.1482,
      "step": 1624
    },
    {
      "epoch": 4.114068441064639,
      "grad_norm": 0.7697343230247498,
      "learning_rate": 3.521628498727735e-05,
      "loss": 0.1607,
      "step": 1625
    },
    {
      "epoch": 4.11660329531052,
      "grad_norm": 0.5805296897888184,
      "learning_rate": 3.511450381679389e-05,
      "loss": 0.1497,
      "step": 1626
    },
    {
      "epoch": 4.119138149556401,
      "grad_norm": 0.6484183073043823,
      "learning_rate": 3.5012722646310436e-05,
      "loss": 0.1827,
      "step": 1627
    },
    {
      "epoch": 4.1216730038022815,
      "grad_norm": 1.0351064205169678,
      "learning_rate": 3.4910941475826976e-05,
      "loss": 0.2331,
      "step": 1628
    },
    {
      "epoch": 4.1242078580481625,
      "grad_norm": 0.620452344417572,
      "learning_rate": 3.4809160305343515e-05,
      "loss": 0.1516,
      "step": 1629
    },
    {
      "epoch": 4.126742712294043,
      "grad_norm": 0.6269112229347229,
      "learning_rate": 3.4707379134860054e-05,
      "loss": 0.1322,
      "step": 1630
    },
    {
      "epoch": 4.129277566539924,
      "grad_norm": 0.7780957221984863,
      "learning_rate": 3.4605597964376594e-05,
      "loss": 0.1974,
      "step": 1631
    },
    {
      "epoch": 4.1318124207858045,
      "grad_norm": 0.6183624267578125,
      "learning_rate": 3.4503816793893126e-05,
      "loss": 0.1423,
      "step": 1632
    },
    {
      "epoch": 4.134347275031685,
      "grad_norm": 0.715943455696106,
      "learning_rate": 3.440203562340967e-05,
      "loss": 0.1422,
      "step": 1633
    },
    {
      "epoch": 4.136882129277566,
      "grad_norm": 0.6383997201919556,
      "learning_rate": 3.430025445292621e-05,
      "loss": 0.1566,
      "step": 1634
    },
    {
      "epoch": 4.139416983523447,
      "grad_norm": 0.6354379653930664,
      "learning_rate": 3.419847328244275e-05,
      "loss": 0.14,
      "step": 1635
    },
    {
      "epoch": 4.141951837769328,
      "grad_norm": 0.5692049264907837,
      "learning_rate": 3.409669211195929e-05,
      "loss": 0.1315,
      "step": 1636
    },
    {
      "epoch": 4.144486692015209,
      "grad_norm": 0.5286855697631836,
      "learning_rate": 3.399491094147583e-05,
      "loss": 0.119,
      "step": 1637
    },
    {
      "epoch": 4.14702154626109,
      "grad_norm": 0.6007808446884155,
      "learning_rate": 3.389312977099237e-05,
      "loss": 0.1368,
      "step": 1638
    },
    {
      "epoch": 4.149556400506971,
      "grad_norm": 0.8727791905403137,
      "learning_rate": 3.379134860050891e-05,
      "loss": 0.1635,
      "step": 1639
    },
    {
      "epoch": 4.152091254752852,
      "grad_norm": 0.7203207015991211,
      "learning_rate": 3.368956743002545e-05,
      "loss": 0.1668,
      "step": 1640
    },
    {
      "epoch": 4.154626108998732,
      "grad_norm": 0.7178492546081543,
      "learning_rate": 3.358778625954199e-05,
      "loss": 0.1601,
      "step": 1641
    },
    {
      "epoch": 4.157160963244613,
      "grad_norm": 0.6133365035057068,
      "learning_rate": 3.3486005089058526e-05,
      "loss": 0.1438,
      "step": 1642
    },
    {
      "epoch": 4.159695817490494,
      "grad_norm": 0.690122127532959,
      "learning_rate": 3.3384223918575065e-05,
      "loss": 0.1592,
      "step": 1643
    },
    {
      "epoch": 4.162230671736375,
      "grad_norm": 0.5469484925270081,
      "learning_rate": 3.3282442748091605e-05,
      "loss": 0.1499,
      "step": 1644
    },
    {
      "epoch": 4.164765525982256,
      "grad_norm": 0.7380850911140442,
      "learning_rate": 3.3180661577608144e-05,
      "loss": 0.1724,
      "step": 1645
    },
    {
      "epoch": 4.167300380228137,
      "grad_norm": 0.6949165463447571,
      "learning_rate": 3.307888040712468e-05,
      "loss": 0.1642,
      "step": 1646
    },
    {
      "epoch": 4.169835234474018,
      "grad_norm": 0.6445840001106262,
      "learning_rate": 3.297709923664122e-05,
      "loss": 0.1576,
      "step": 1647
    },
    {
      "epoch": 4.172370088719899,
      "grad_norm": 0.577178418636322,
      "learning_rate": 3.287531806615776e-05,
      "loss": 0.1482,
      "step": 1648
    },
    {
      "epoch": 4.17490494296578,
      "grad_norm": 0.5232000350952148,
      "learning_rate": 3.27735368956743e-05,
      "loss": 0.1385,
      "step": 1649
    },
    {
      "epoch": 4.17743979721166,
      "grad_norm": 0.8429796695709229,
      "learning_rate": 3.267175572519084e-05,
      "loss": 0.2456,
      "step": 1650
    },
    {
      "epoch": 4.179974651457541,
      "grad_norm": 0.5647293925285339,
      "learning_rate": 3.256997455470738e-05,
      "loss": 0.1482,
      "step": 1651
    },
    {
      "epoch": 4.182509505703422,
      "grad_norm": 0.7679947018623352,
      "learning_rate": 3.246819338422392e-05,
      "loss": 0.1705,
      "step": 1652
    },
    {
      "epoch": 4.185044359949303,
      "grad_norm": 0.7913497686386108,
      "learning_rate": 3.236641221374046e-05,
      "loss": 0.2133,
      "step": 1653
    },
    {
      "epoch": 4.187579214195184,
      "grad_norm": 0.5105036497116089,
      "learning_rate": 3.2264631043257e-05,
      "loss": 0.1335,
      "step": 1654
    },
    {
      "epoch": 4.190114068441065,
      "grad_norm": 0.6503207087516785,
      "learning_rate": 3.216284987277354e-05,
      "loss": 0.1872,
      "step": 1655
    },
    {
      "epoch": 4.192648922686946,
      "grad_norm": 0.9579104781150818,
      "learning_rate": 3.2061068702290076e-05,
      "loss": 0.1985,
      "step": 1656
    },
    {
      "epoch": 4.195183776932827,
      "grad_norm": 0.5334345698356628,
      "learning_rate": 3.195928753180662e-05,
      "loss": 0.137,
      "step": 1657
    },
    {
      "epoch": 4.197718631178708,
      "grad_norm": 0.7031605243682861,
      "learning_rate": 3.185750636132316e-05,
      "loss": 0.1574,
      "step": 1658
    },
    {
      "epoch": 4.200253485424588,
      "grad_norm": 0.6237590909004211,
      "learning_rate": 3.1755725190839694e-05,
      "loss": 0.1686,
      "step": 1659
    },
    {
      "epoch": 4.202788339670469,
      "grad_norm": 0.827680230140686,
      "learning_rate": 3.1653944020356234e-05,
      "loss": 0.1765,
      "step": 1660
    },
    {
      "epoch": 4.20532319391635,
      "grad_norm": 0.6170578002929688,
      "learning_rate": 3.155216284987277e-05,
      "loss": 0.1699,
      "step": 1661
    },
    {
      "epoch": 4.2078580481622305,
      "grad_norm": 0.600803017616272,
      "learning_rate": 3.145038167938931e-05,
      "loss": 0.1345,
      "step": 1662
    },
    {
      "epoch": 4.2103929024081115,
      "grad_norm": 0.5505921840667725,
      "learning_rate": 3.134860050890586e-05,
      "loss": 0.1418,
      "step": 1663
    },
    {
      "epoch": 4.212927756653992,
      "grad_norm": 0.5893916487693787,
      "learning_rate": 3.12468193384224e-05,
      "loss": 0.1414,
      "step": 1664
    },
    {
      "epoch": 4.215462610899873,
      "grad_norm": 0.7622592449188232,
      "learning_rate": 3.114503816793894e-05,
      "loss": 0.1568,
      "step": 1665
    },
    {
      "epoch": 4.217997465145754,
      "grad_norm": 0.6462287306785583,
      "learning_rate": 3.104325699745547e-05,
      "loss": 0.1641,
      "step": 1666
    },
    {
      "epoch": 4.220532319391635,
      "grad_norm": 0.4971311092376709,
      "learning_rate": 3.094147582697201e-05,
      "loss": 0.1276,
      "step": 1667
    },
    {
      "epoch": 4.223067173637516,
      "grad_norm": 0.7270475029945374,
      "learning_rate": 3.083969465648855e-05,
      "loss": 0.1603,
      "step": 1668
    },
    {
      "epoch": 4.225602027883396,
      "grad_norm": 0.5765766501426697,
      "learning_rate": 3.0737913486005094e-05,
      "loss": 0.1341,
      "step": 1669
    },
    {
      "epoch": 4.228136882129277,
      "grad_norm": 0.577694296836853,
      "learning_rate": 3.0636132315521633e-05,
      "loss": 0.1415,
      "step": 1670
    },
    {
      "epoch": 4.230671736375158,
      "grad_norm": 0.6085098385810852,
      "learning_rate": 3.053435114503817e-05,
      "loss": 0.1359,
      "step": 1671
    },
    {
      "epoch": 4.233206590621039,
      "grad_norm": 0.6224119663238525,
      "learning_rate": 3.043256997455471e-05,
      "loss": 0.1494,
      "step": 1672
    },
    {
      "epoch": 4.23574144486692,
      "grad_norm": 0.4535973072052002,
      "learning_rate": 3.0330788804071248e-05,
      "loss": 0.1415,
      "step": 1673
    },
    {
      "epoch": 4.238276299112801,
      "grad_norm": 0.6283777356147766,
      "learning_rate": 3.0229007633587787e-05,
      "loss": 0.1569,
      "step": 1674
    },
    {
      "epoch": 4.240811153358682,
      "grad_norm": 0.6005566120147705,
      "learning_rate": 3.0127226463104323e-05,
      "loss": 0.1385,
      "step": 1675
    },
    {
      "epoch": 4.243346007604563,
      "grad_norm": 0.6437854766845703,
      "learning_rate": 3.002544529262087e-05,
      "loss": 0.1584,
      "step": 1676
    },
    {
      "epoch": 4.245880861850444,
      "grad_norm": 0.5184986591339111,
      "learning_rate": 2.992366412213741e-05,
      "loss": 0.1384,
      "step": 1677
    },
    {
      "epoch": 4.248415716096324,
      "grad_norm": 0.5969160199165344,
      "learning_rate": 2.9821882951653945e-05,
      "loss": 0.1609,
      "step": 1678
    },
    {
      "epoch": 4.250950570342205,
      "grad_norm": 0.85272616147995,
      "learning_rate": 2.9720101781170484e-05,
      "loss": 0.178,
      "step": 1679
    },
    {
      "epoch": 4.253485424588086,
      "grad_norm": 0.5351912379264832,
      "learning_rate": 2.9618320610687023e-05,
      "loss": 0.1465,
      "step": 1680
    },
    {
      "epoch": 4.256020278833967,
      "grad_norm": 0.5821883678436279,
      "learning_rate": 2.9516539440203562e-05,
      "loss": 0.135,
      "step": 1681
    },
    {
      "epoch": 4.258555133079848,
      "grad_norm": 0.5453548431396484,
      "learning_rate": 2.9414758269720105e-05,
      "loss": 0.1287,
      "step": 1682
    },
    {
      "epoch": 4.261089987325729,
      "grad_norm": 0.6280243396759033,
      "learning_rate": 2.9312977099236644e-05,
      "loss": 0.152,
      "step": 1683
    },
    {
      "epoch": 4.26362484157161,
      "grad_norm": 0.5709437131881714,
      "learning_rate": 2.9211195928753184e-05,
      "loss": 0.1487,
      "step": 1684
    },
    {
      "epoch": 4.266159695817491,
      "grad_norm": 0.4667048752307892,
      "learning_rate": 2.910941475826972e-05,
      "loss": 0.129,
      "step": 1685
    },
    {
      "epoch": 4.268694550063372,
      "grad_norm": 0.5744767189025879,
      "learning_rate": 2.900763358778626e-05,
      "loss": 0.1668,
      "step": 1686
    },
    {
      "epoch": 4.271229404309253,
      "grad_norm": 0.552631139755249,
      "learning_rate": 2.89058524173028e-05,
      "loss": 0.128,
      "step": 1687
    },
    {
      "epoch": 4.273764258555133,
      "grad_norm": 0.46616679430007935,
      "learning_rate": 2.880407124681934e-05,
      "loss": 0.1168,
      "step": 1688
    },
    {
      "epoch": 4.276299112801014,
      "grad_norm": 0.7842658758163452,
      "learning_rate": 2.870229007633588e-05,
      "loss": 0.1617,
      "step": 1689
    },
    {
      "epoch": 4.278833967046895,
      "grad_norm": 0.5530945062637329,
      "learning_rate": 2.860050890585242e-05,
      "loss": 0.1619,
      "step": 1690
    },
    {
      "epoch": 4.281368821292776,
      "grad_norm": 0.9341786503791809,
      "learning_rate": 2.849872773536896e-05,
      "loss": 0.231,
      "step": 1691
    },
    {
      "epoch": 4.283903675538657,
      "grad_norm": 0.8043704032897949,
      "learning_rate": 2.8396946564885495e-05,
      "loss": 0.1826,
      "step": 1692
    },
    {
      "epoch": 4.2864385297845375,
      "grad_norm": 0.4446638524532318,
      "learning_rate": 2.8295165394402034e-05,
      "loss": 0.1413,
      "step": 1693
    },
    {
      "epoch": 4.2889733840304185,
      "grad_norm": 0.6845833659172058,
      "learning_rate": 2.8193384223918577e-05,
      "loss": 0.1577,
      "step": 1694
    },
    {
      "epoch": 4.2915082382762995,
      "grad_norm": 0.6702572107315063,
      "learning_rate": 2.8091603053435116e-05,
      "loss": 0.1714,
      "step": 1695
    },
    {
      "epoch": 4.29404309252218,
      "grad_norm": 0.6405001282691956,
      "learning_rate": 2.7989821882951656e-05,
      "loss": 0.1527,
      "step": 1696
    },
    {
      "epoch": 4.2965779467680605,
      "grad_norm": 0.6155828833580017,
      "learning_rate": 2.7888040712468195e-05,
      "loss": 0.1471,
      "step": 1697
    },
    {
      "epoch": 4.299112801013941,
      "grad_norm": 0.5606924295425415,
      "learning_rate": 2.7786259541984734e-05,
      "loss": 0.1331,
      "step": 1698
    },
    {
      "epoch": 4.301647655259822,
      "grad_norm": 0.7498462200164795,
      "learning_rate": 2.768447837150127e-05,
      "loss": 0.1713,
      "step": 1699
    },
    {
      "epoch": 4.304182509505703,
      "grad_norm": 0.6262723803520203,
      "learning_rate": 2.7582697201017816e-05,
      "loss": 0.1585,
      "step": 1700
    },
    {
      "epoch": 4.306717363751584,
      "grad_norm": 0.6729116439819336,
      "learning_rate": 2.7480916030534355e-05,
      "loss": 0.1347,
      "step": 1701
    },
    {
      "epoch": 4.309252217997465,
      "grad_norm": 0.7870539426803589,
      "learning_rate": 2.737913486005089e-05,
      "loss": 0.1512,
      "step": 1702
    },
    {
      "epoch": 4.311787072243346,
      "grad_norm": 0.4943903684616089,
      "learning_rate": 2.727735368956743e-05,
      "loss": 0.1274,
      "step": 1703
    },
    {
      "epoch": 4.314321926489227,
      "grad_norm": 0.4763108193874359,
      "learning_rate": 2.717557251908397e-05,
      "loss": 0.1228,
      "step": 1704
    },
    {
      "epoch": 4.316856780735108,
      "grad_norm": 0.6400578618049622,
      "learning_rate": 2.707379134860051e-05,
      "loss": 0.1558,
      "step": 1705
    },
    {
      "epoch": 4.319391634980988,
      "grad_norm": 0.5445212125778198,
      "learning_rate": 2.6972010178117052e-05,
      "loss": 0.1328,
      "step": 1706
    },
    {
      "epoch": 4.321926489226869,
      "grad_norm": 0.6329374313354492,
      "learning_rate": 2.687022900763359e-05,
      "loss": 0.1615,
      "step": 1707
    },
    {
      "epoch": 4.32446134347275,
      "grad_norm": 0.5299343466758728,
      "learning_rate": 2.676844783715013e-05,
      "loss": 0.122,
      "step": 1708
    },
    {
      "epoch": 4.326996197718631,
      "grad_norm": 0.6486507058143616,
      "learning_rate": 2.6666666666666667e-05,
      "loss": 0.1553,
      "step": 1709
    },
    {
      "epoch": 4.329531051964512,
      "grad_norm": 0.6306889653205872,
      "learning_rate": 2.6564885496183206e-05,
      "loss": 0.1638,
      "step": 1710
    },
    {
      "epoch": 4.332065906210393,
      "grad_norm": 0.6417018175125122,
      "learning_rate": 2.6463104325699745e-05,
      "loss": 0.1404,
      "step": 1711
    },
    {
      "epoch": 4.334600760456274,
      "grad_norm": 0.7283552289009094,
      "learning_rate": 2.6361323155216288e-05,
      "loss": 0.1837,
      "step": 1712
    },
    {
      "epoch": 4.337135614702155,
      "grad_norm": 0.7142099142074585,
      "learning_rate": 2.6259541984732827e-05,
      "loss": 0.1535,
      "step": 1713
    },
    {
      "epoch": 4.339670468948036,
      "grad_norm": 0.6059632897377014,
      "learning_rate": 2.6157760814249367e-05,
      "loss": 0.1551,
      "step": 1714
    },
    {
      "epoch": 4.342205323193916,
      "grad_norm": 0.6492133140563965,
      "learning_rate": 2.6055979643765906e-05,
      "loss": 0.1413,
      "step": 1715
    },
    {
      "epoch": 4.344740177439797,
      "grad_norm": 0.7166099548339844,
      "learning_rate": 2.5954198473282442e-05,
      "loss": 0.1534,
      "step": 1716
    },
    {
      "epoch": 4.347275031685678,
      "grad_norm": 0.6357300877571106,
      "learning_rate": 2.585241730279898e-05,
      "loss": 0.1445,
      "step": 1717
    },
    {
      "epoch": 4.349809885931559,
      "grad_norm": 0.6684461236000061,
      "learning_rate": 2.5750636132315524e-05,
      "loss": 0.1469,
      "step": 1718
    },
    {
      "epoch": 4.35234474017744,
      "grad_norm": 0.7808713912963867,
      "learning_rate": 2.5648854961832063e-05,
      "loss": 0.1892,
      "step": 1719
    },
    {
      "epoch": 4.354879594423321,
      "grad_norm": 0.6660336852073669,
      "learning_rate": 2.5547073791348602e-05,
      "loss": 0.1545,
      "step": 1720
    },
    {
      "epoch": 4.357414448669202,
      "grad_norm": 0.7266603112220764,
      "learning_rate": 2.5445292620865142e-05,
      "loss": 0.1346,
      "step": 1721
    },
    {
      "epoch": 4.359949302915083,
      "grad_norm": 0.5710493326187134,
      "learning_rate": 2.534351145038168e-05,
      "loss": 0.1199,
      "step": 1722
    },
    {
      "epoch": 4.362484157160964,
      "grad_norm": 0.6178765296936035,
      "learning_rate": 2.5241730279898217e-05,
      "loss": 0.1416,
      "step": 1723
    },
    {
      "epoch": 4.365019011406844,
      "grad_norm": 0.5881832242012024,
      "learning_rate": 2.5139949109414763e-05,
      "loss": 0.1389,
      "step": 1724
    },
    {
      "epoch": 4.367553865652725,
      "grad_norm": 0.5589767694473267,
      "learning_rate": 2.50381679389313e-05,
      "loss": 0.1356,
      "step": 1725
    },
    {
      "epoch": 4.370088719898606,
      "grad_norm": 0.611072301864624,
      "learning_rate": 2.4936386768447838e-05,
      "loss": 0.1618,
      "step": 1726
    },
    {
      "epoch": 4.3726235741444865,
      "grad_norm": 1.0045723915100098,
      "learning_rate": 2.4834605597964378e-05,
      "loss": 0.2004,
      "step": 1727
    },
    {
      "epoch": 4.3751584283903675,
      "grad_norm": 1.0154621601104736,
      "learning_rate": 2.4732824427480917e-05,
      "loss": 0.1593,
      "step": 1728
    },
    {
      "epoch": 4.3776932826362485,
      "grad_norm": 0.7933842539787292,
      "learning_rate": 2.4631043256997456e-05,
      "loss": 0.183,
      "step": 1729
    },
    {
      "epoch": 4.380228136882129,
      "grad_norm": 0.8141732811927795,
      "learning_rate": 2.4529262086513996e-05,
      "loss": 0.1412,
      "step": 1730
    },
    {
      "epoch": 4.38276299112801,
      "grad_norm": 0.6575155854225159,
      "learning_rate": 2.4427480916030535e-05,
      "loss": 0.1592,
      "step": 1731
    },
    {
      "epoch": 4.385297845373891,
      "grad_norm": 0.7710108757019043,
      "learning_rate": 2.4325699745547078e-05,
      "loss": 0.2306,
      "step": 1732
    },
    {
      "epoch": 4.387832699619771,
      "grad_norm": 0.6438276767730713,
      "learning_rate": 2.4223918575063613e-05,
      "loss": 0.143,
      "step": 1733
    },
    {
      "epoch": 4.390367553865652,
      "grad_norm": 0.7019467949867249,
      "learning_rate": 2.4122137404580153e-05,
      "loss": 0.1641,
      "step": 1734
    },
    {
      "epoch": 4.392902408111533,
      "grad_norm": 0.598584771156311,
      "learning_rate": 2.4020356234096695e-05,
      "loss": 0.1456,
      "step": 1735
    },
    {
      "epoch": 4.395437262357414,
      "grad_norm": 0.6024305820465088,
      "learning_rate": 2.391857506361323e-05,
      "loss": 0.1287,
      "step": 1736
    },
    {
      "epoch": 4.397972116603295,
      "grad_norm": 0.8446558713912964,
      "learning_rate": 2.381679389312977e-05,
      "loss": 0.1705,
      "step": 1737
    },
    {
      "epoch": 4.400506970849176,
      "grad_norm": 0.5697831511497498,
      "learning_rate": 2.3715012722646313e-05,
      "loss": 0.1386,
      "step": 1738
    },
    {
      "epoch": 4.403041825095057,
      "grad_norm": 0.6655327677726746,
      "learning_rate": 2.3613231552162853e-05,
      "loss": 0.186,
      "step": 1739
    },
    {
      "epoch": 4.405576679340938,
      "grad_norm": 1.1001065969467163,
      "learning_rate": 2.351145038167939e-05,
      "loss": 0.2531,
      "step": 1740
    },
    {
      "epoch": 4.408111533586819,
      "grad_norm": 0.5302372574806213,
      "learning_rate": 2.340966921119593e-05,
      "loss": 0.1342,
      "step": 1741
    },
    {
      "epoch": 4.4106463878327,
      "grad_norm": 0.6450605392456055,
      "learning_rate": 2.330788804071247e-05,
      "loss": 0.1499,
      "step": 1742
    },
    {
      "epoch": 4.41318124207858,
      "grad_norm": 0.5733135342597961,
      "learning_rate": 2.3206106870229007e-05,
      "loss": 0.166,
      "step": 1743
    },
    {
      "epoch": 4.415716096324461,
      "grad_norm": 0.609865665435791,
      "learning_rate": 2.310432569974555e-05,
      "loss": 0.1306,
      "step": 1744
    },
    {
      "epoch": 4.418250950570342,
      "grad_norm": 0.5957082509994507,
      "learning_rate": 2.300254452926209e-05,
      "loss": 0.1309,
      "step": 1745
    },
    {
      "epoch": 4.420785804816223,
      "grad_norm": 0.5951780080795288,
      "learning_rate": 2.2900763358778628e-05,
      "loss": 0.1366,
      "step": 1746
    },
    {
      "epoch": 4.423320659062104,
      "grad_norm": 0.7225191593170166,
      "learning_rate": 2.2798982188295167e-05,
      "loss": 0.1825,
      "step": 1747
    },
    {
      "epoch": 4.425855513307985,
      "grad_norm": 0.6427996158599854,
      "learning_rate": 2.2697201017811707e-05,
      "loss": 0.1326,
      "step": 1748
    },
    {
      "epoch": 4.428390367553866,
      "grad_norm": 0.49267786741256714,
      "learning_rate": 2.2595419847328246e-05,
      "loss": 0.1367,
      "step": 1749
    },
    {
      "epoch": 4.430925221799747,
      "grad_norm": 0.5365452766418457,
      "learning_rate": 2.2493638676844785e-05,
      "loss": 0.1456,
      "step": 1750
    },
    {
      "epoch": 4.433460076045628,
      "grad_norm": 0.65265291929245,
      "learning_rate": 2.2391857506361324e-05,
      "loss": 0.1379,
      "step": 1751
    },
    {
      "epoch": 4.435994930291509,
      "grad_norm": 0.5401502847671509,
      "learning_rate": 2.2290076335877864e-05,
      "loss": 0.1293,
      "step": 1752
    },
    {
      "epoch": 4.438529784537389,
      "grad_norm": 0.6832171678543091,
      "learning_rate": 2.2188295165394403e-05,
      "loss": 0.1448,
      "step": 1753
    },
    {
      "epoch": 4.44106463878327,
      "grad_norm": 0.8080681562423706,
      "learning_rate": 2.2086513994910942e-05,
      "loss": 0.1832,
      "step": 1754
    },
    {
      "epoch": 4.443599493029151,
      "grad_norm": 0.6201688051223755,
      "learning_rate": 2.198473282442748e-05,
      "loss": 0.159,
      "step": 1755
    },
    {
      "epoch": 4.446134347275032,
      "grad_norm": 0.8549275994300842,
      "learning_rate": 2.1882951653944024e-05,
      "loss": 0.2103,
      "step": 1756
    },
    {
      "epoch": 4.448669201520913,
      "grad_norm": 0.5879942178726196,
      "learning_rate": 2.178117048346056e-05,
      "loss": 0.1524,
      "step": 1757
    },
    {
      "epoch": 4.451204055766794,
      "grad_norm": 0.6592312455177307,
      "learning_rate": 2.16793893129771e-05,
      "loss": 0.1535,
      "step": 1758
    },
    {
      "epoch": 4.4537389100126745,
      "grad_norm": 0.6493979096412659,
      "learning_rate": 2.1577608142493642e-05,
      "loss": 0.1451,
      "step": 1759
    },
    {
      "epoch": 4.4562737642585555,
      "grad_norm": 0.7973134517669678,
      "learning_rate": 2.1475826972010178e-05,
      "loss": 0.1519,
      "step": 1760
    },
    {
      "epoch": 4.458808618504436,
      "grad_norm": 0.7703438401222229,
      "learning_rate": 2.1374045801526718e-05,
      "loss": 0.1653,
      "step": 1761
    },
    {
      "epoch": 4.4613434727503165,
      "grad_norm": 1.0013222694396973,
      "learning_rate": 2.127226463104326e-05,
      "loss": 0.2064,
      "step": 1762
    },
    {
      "epoch": 4.4638783269961975,
      "grad_norm": 0.7007017135620117,
      "learning_rate": 2.11704834605598e-05,
      "loss": 0.1401,
      "step": 1763
    },
    {
      "epoch": 4.466413181242078,
      "grad_norm": 0.5366234183311462,
      "learning_rate": 2.1068702290076335e-05,
      "loss": 0.1389,
      "step": 1764
    },
    {
      "epoch": 4.468948035487959,
      "grad_norm": 0.7167120575904846,
      "learning_rate": 2.0966921119592875e-05,
      "loss": 0.1817,
      "step": 1765
    },
    {
      "epoch": 4.47148288973384,
      "grad_norm": 0.7901313900947571,
      "learning_rate": 2.0865139949109417e-05,
      "loss": 0.1817,
      "step": 1766
    },
    {
      "epoch": 4.474017743979721,
      "grad_norm": 0.6681633591651917,
      "learning_rate": 2.0763358778625953e-05,
      "loss": 0.1458,
      "step": 1767
    },
    {
      "epoch": 4.476552598225602,
      "grad_norm": 0.5067597031593323,
      "learning_rate": 2.0661577608142493e-05,
      "loss": 0.1301,
      "step": 1768
    },
    {
      "epoch": 4.479087452471483,
      "grad_norm": 0.6582893133163452,
      "learning_rate": 2.0559796437659035e-05,
      "loss": 0.1576,
      "step": 1769
    },
    {
      "epoch": 4.481622306717364,
      "grad_norm": 0.6628451943397522,
      "learning_rate": 2.0458015267175575e-05,
      "loss": 0.168,
      "step": 1770
    },
    {
      "epoch": 4.484157160963244,
      "grad_norm": 0.5435721278190613,
      "learning_rate": 2.035623409669211e-05,
      "loss": 0.1476,
      "step": 1771
    },
    {
      "epoch": 4.486692015209125,
      "grad_norm": 0.6182110905647278,
      "learning_rate": 2.0254452926208653e-05,
      "loss": 0.1441,
      "step": 1772
    },
    {
      "epoch": 4.489226869455006,
      "grad_norm": 0.9246516823768616,
      "learning_rate": 2.0152671755725193e-05,
      "loss": 0.1747,
      "step": 1773
    },
    {
      "epoch": 4.491761723700887,
      "grad_norm": 0.5967719554901123,
      "learning_rate": 2.005089058524173e-05,
      "loss": 0.1461,
      "step": 1774
    },
    {
      "epoch": 4.494296577946768,
      "grad_norm": 0.5998682379722595,
      "learning_rate": 1.994910941475827e-05,
      "loss": 0.1276,
      "step": 1775
    },
    {
      "epoch": 4.496831432192649,
      "grad_norm": 0.6168457865715027,
      "learning_rate": 1.984732824427481e-05,
      "loss": 0.1407,
      "step": 1776
    },
    {
      "epoch": 4.49936628643853,
      "grad_norm": 0.6580602526664734,
      "learning_rate": 1.974554707379135e-05,
      "loss": 0.149,
      "step": 1777
    },
    {
      "epoch": 4.501901140684411,
      "grad_norm": 0.5117031335830688,
      "learning_rate": 1.964376590330789e-05,
      "loss": 0.1397,
      "step": 1778
    },
    {
      "epoch": 4.504435994930292,
      "grad_norm": 0.4603317975997925,
      "learning_rate": 1.954198473282443e-05,
      "loss": 0.1211,
      "step": 1779
    },
    {
      "epoch": 4.506970849176172,
      "grad_norm": 0.5981631278991699,
      "learning_rate": 1.9440203562340968e-05,
      "loss": 0.1371,
      "step": 1780
    },
    {
      "epoch": 4.509505703422053,
      "grad_norm": 0.6693590879440308,
      "learning_rate": 1.9338422391857507e-05,
      "loss": 0.1495,
      "step": 1781
    },
    {
      "epoch": 4.512040557667934,
      "grad_norm": 0.5286784172058105,
      "learning_rate": 1.9236641221374046e-05,
      "loss": 0.1304,
      "step": 1782
    },
    {
      "epoch": 4.514575411913815,
      "grad_norm": 0.7040352821350098,
      "learning_rate": 1.9134860050890586e-05,
      "loss": 0.1584,
      "step": 1783
    },
    {
      "epoch": 4.517110266159696,
      "grad_norm": 0.6396339535713196,
      "learning_rate": 1.9033078880407125e-05,
      "loss": 0.1529,
      "step": 1784
    },
    {
      "epoch": 4.519645120405577,
      "grad_norm": 0.6708245873451233,
      "learning_rate": 1.8931297709923664e-05,
      "loss": 0.1477,
      "step": 1785
    },
    {
      "epoch": 4.522179974651458,
      "grad_norm": 0.6562108993530273,
      "learning_rate": 1.8829516539440204e-05,
      "loss": 0.1499,
      "step": 1786
    },
    {
      "epoch": 4.524714828897339,
      "grad_norm": 0.5181876420974731,
      "learning_rate": 1.8727735368956746e-05,
      "loss": 0.1398,
      "step": 1787
    },
    {
      "epoch": 4.52724968314322,
      "grad_norm": 0.5952017307281494,
      "learning_rate": 1.8625954198473282e-05,
      "loss": 0.1438,
      "step": 1788
    },
    {
      "epoch": 4.5297845373891,
      "grad_norm": 0.6668636202812195,
      "learning_rate": 1.852417302798982e-05,
      "loss": 0.1805,
      "step": 1789
    },
    {
      "epoch": 4.532319391634981,
      "grad_norm": 0.5433321595191956,
      "learning_rate": 1.8422391857506364e-05,
      "loss": 0.1397,
      "step": 1790
    },
    {
      "epoch": 4.534854245880862,
      "grad_norm": 0.5353025197982788,
      "learning_rate": 1.83206106870229e-05,
      "loss": 0.1419,
      "step": 1791
    },
    {
      "epoch": 4.537389100126743,
      "grad_norm": 0.6123271584510803,
      "learning_rate": 1.821882951653944e-05,
      "loss": 0.1493,
      "step": 1792
    },
    {
      "epoch": 4.5399239543726235,
      "grad_norm": 0.6581493616104126,
      "learning_rate": 1.8117048346055982e-05,
      "loss": 0.1467,
      "step": 1793
    },
    {
      "epoch": 4.5424588086185045,
      "grad_norm": 0.5537798404693604,
      "learning_rate": 1.801526717557252e-05,
      "loss": 0.1467,
      "step": 1794
    },
    {
      "epoch": 4.544993662864385,
      "grad_norm": 0.7163582444190979,
      "learning_rate": 1.7913486005089058e-05,
      "loss": 0.1736,
      "step": 1795
    },
    {
      "epoch": 4.547528517110266,
      "grad_norm": 0.694922149181366,
      "learning_rate": 1.78117048346056e-05,
      "loss": 0.1516,
      "step": 1796
    },
    {
      "epoch": 4.550063371356147,
      "grad_norm": 0.7119778394699097,
      "learning_rate": 1.770992366412214e-05,
      "loss": 0.1899,
      "step": 1797
    },
    {
      "epoch": 4.552598225602027,
      "grad_norm": 0.7570186853408813,
      "learning_rate": 1.7608142493638675e-05,
      "loss": 0.1951,
      "step": 1798
    },
    {
      "epoch": 4.555133079847908,
      "grad_norm": 0.6789132356643677,
      "learning_rate": 1.7506361323155218e-05,
      "loss": 0.1475,
      "step": 1799
    },
    {
      "epoch": 4.557667934093789,
      "grad_norm": 0.5750378966331482,
      "learning_rate": 1.7404580152671757e-05,
      "loss": 0.1431,
      "step": 1800
    },
    {
      "epoch": 4.56020278833967,
      "grad_norm": 0.6066502332687378,
      "learning_rate": 1.7302798982188297e-05,
      "loss": 0.16,
      "step": 1801
    },
    {
      "epoch": 4.562737642585551,
      "grad_norm": 0.5730226039886475,
      "learning_rate": 1.7201017811704836e-05,
      "loss": 0.1455,
      "step": 1802
    },
    {
      "epoch": 4.565272496831432,
      "grad_norm": 0.5752687454223633,
      "learning_rate": 1.7099236641221375e-05,
      "loss": 0.1281,
      "step": 1803
    },
    {
      "epoch": 4.567807351077313,
      "grad_norm": 0.5497205853462219,
      "learning_rate": 1.6997455470737915e-05,
      "loss": 0.1431,
      "step": 1804
    },
    {
      "epoch": 4.570342205323194,
      "grad_norm": 0.7738269567489624,
      "learning_rate": 1.6895674300254454e-05,
      "loss": 0.1523,
      "step": 1805
    },
    {
      "epoch": 4.572877059569075,
      "grad_norm": 0.5750918388366699,
      "learning_rate": 1.6793893129770993e-05,
      "loss": 0.1466,
      "step": 1806
    },
    {
      "epoch": 4.575411913814955,
      "grad_norm": 0.5575040578842163,
      "learning_rate": 1.6692111959287533e-05,
      "loss": 0.1267,
      "step": 1807
    },
    {
      "epoch": 4.577946768060836,
      "grad_norm": 0.509616494178772,
      "learning_rate": 1.6590330788804072e-05,
      "loss": 0.1434,
      "step": 1808
    },
    {
      "epoch": 4.580481622306717,
      "grad_norm": 0.643009603023529,
      "learning_rate": 1.648854961832061e-05,
      "loss": 0.136,
      "step": 1809
    },
    {
      "epoch": 4.583016476552598,
      "grad_norm": 0.5133553743362427,
      "learning_rate": 1.638676844783715e-05,
      "loss": 0.1223,
      "step": 1810
    },
    {
      "epoch": 4.585551330798479,
      "grad_norm": 0.7505659461021423,
      "learning_rate": 1.628498727735369e-05,
      "loss": 0.1607,
      "step": 1811
    },
    {
      "epoch": 4.58808618504436,
      "grad_norm": 0.6981300711631775,
      "learning_rate": 1.618320610687023e-05,
      "loss": 0.1525,
      "step": 1812
    },
    {
      "epoch": 4.590621039290241,
      "grad_norm": 0.4981435537338257,
      "learning_rate": 1.608142493638677e-05,
      "loss": 0.1236,
      "step": 1813
    },
    {
      "epoch": 4.593155893536122,
      "grad_norm": 0.6467440724372864,
      "learning_rate": 1.597964376590331e-05,
      "loss": 0.153,
      "step": 1814
    },
    {
      "epoch": 4.595690747782003,
      "grad_norm": 0.6843181848526001,
      "learning_rate": 1.5877862595419847e-05,
      "loss": 0.1604,
      "step": 1815
    },
    {
      "epoch": 4.598225602027884,
      "grad_norm": 0.49898776412010193,
      "learning_rate": 1.5776081424936386e-05,
      "loss": 0.1165,
      "step": 1816
    },
    {
      "epoch": 4.600760456273765,
      "grad_norm": 0.6252351403236389,
      "learning_rate": 1.567430025445293e-05,
      "loss": 0.1228,
      "step": 1817
    },
    {
      "epoch": 4.603295310519645,
      "grad_norm": 0.5452350974082947,
      "learning_rate": 1.557251908396947e-05,
      "loss": 0.1245,
      "step": 1818
    },
    {
      "epoch": 4.605830164765526,
      "grad_norm": 0.6847854852676392,
      "learning_rate": 1.5470737913486004e-05,
      "loss": 0.1462,
      "step": 1819
    },
    {
      "epoch": 4.608365019011407,
      "grad_norm": 0.49941131472587585,
      "learning_rate": 1.5368956743002547e-05,
      "loss": 0.1268,
      "step": 1820
    },
    {
      "epoch": 4.610899873257288,
      "grad_norm": 0.581243097782135,
      "learning_rate": 1.5267175572519086e-05,
      "loss": 0.1296,
      "step": 1821
    },
    {
      "epoch": 4.613434727503169,
      "grad_norm": 0.8345553874969482,
      "learning_rate": 1.5165394402035624e-05,
      "loss": 0.1307,
      "step": 1822
    },
    {
      "epoch": 4.61596958174905,
      "grad_norm": 0.6534408926963806,
      "learning_rate": 1.5063613231552162e-05,
      "loss": 0.1446,
      "step": 1823
    },
    {
      "epoch": 4.6185044359949305,
      "grad_norm": 0.7743064165115356,
      "learning_rate": 1.4961832061068704e-05,
      "loss": 0.2027,
      "step": 1824
    },
    {
      "epoch": 4.6210392902408115,
      "grad_norm": 0.6709569096565247,
      "learning_rate": 1.4860050890585242e-05,
      "loss": 0.1427,
      "step": 1825
    },
    {
      "epoch": 4.6235741444866925,
      "grad_norm": 0.6598264575004578,
      "learning_rate": 1.4758269720101781e-05,
      "loss": 0.1399,
      "step": 1826
    },
    {
      "epoch": 4.6261089987325725,
      "grad_norm": 0.49041053652763367,
      "learning_rate": 1.4656488549618322e-05,
      "loss": 0.133,
      "step": 1827
    },
    {
      "epoch": 4.6286438529784535,
      "grad_norm": 0.6697686910629272,
      "learning_rate": 1.455470737913486e-05,
      "loss": 0.1735,
      "step": 1828
    },
    {
      "epoch": 4.6311787072243344,
      "grad_norm": 0.5481597781181335,
      "learning_rate": 1.44529262086514e-05,
      "loss": 0.1244,
      "step": 1829
    },
    {
      "epoch": 4.633713561470215,
      "grad_norm": 0.6251161694526672,
      "learning_rate": 1.435114503816794e-05,
      "loss": 0.1436,
      "step": 1830
    },
    {
      "epoch": 4.636248415716096,
      "grad_norm": 0.7515272498130798,
      "learning_rate": 1.424936386768448e-05,
      "loss": 0.1493,
      "step": 1831
    },
    {
      "epoch": 4.638783269961977,
      "grad_norm": 0.8478451371192932,
      "learning_rate": 1.4147582697201017e-05,
      "loss": 0.1519,
      "step": 1832
    },
    {
      "epoch": 4.641318124207858,
      "grad_norm": 0.5417062640190125,
      "learning_rate": 1.4045801526717558e-05,
      "loss": 0.1318,
      "step": 1833
    },
    {
      "epoch": 4.643852978453739,
      "grad_norm": 0.6493893265724182,
      "learning_rate": 1.3944020356234097e-05,
      "loss": 0.1546,
      "step": 1834
    },
    {
      "epoch": 4.64638783269962,
      "grad_norm": 0.8475616574287415,
      "learning_rate": 1.3842239185750635e-05,
      "loss": 0.172,
      "step": 1835
    },
    {
      "epoch": 4.6489226869455,
      "grad_norm": 0.5484082698822021,
      "learning_rate": 1.3740458015267178e-05,
      "loss": 0.1203,
      "step": 1836
    },
    {
      "epoch": 4.651457541191381,
      "grad_norm": 0.6533843874931335,
      "learning_rate": 1.3638676844783715e-05,
      "loss": 0.1501,
      "step": 1837
    },
    {
      "epoch": 4.653992395437262,
      "grad_norm": 0.7521854043006897,
      "learning_rate": 1.3536895674300255e-05,
      "loss": 0.1955,
      "step": 1838
    },
    {
      "epoch": 4.656527249683143,
      "grad_norm": 0.6500900983810425,
      "learning_rate": 1.3435114503816796e-05,
      "loss": 0.14,
      "step": 1839
    },
    {
      "epoch": 4.659062103929024,
      "grad_norm": 0.7133599519729614,
      "learning_rate": 1.3333333333333333e-05,
      "loss": 0.1707,
      "step": 1840
    },
    {
      "epoch": 4.661596958174905,
      "grad_norm": 0.7065775394439697,
      "learning_rate": 1.3231552162849873e-05,
      "loss": 0.144,
      "step": 1841
    },
    {
      "epoch": 4.664131812420786,
      "grad_norm": 0.7716514468193054,
      "learning_rate": 1.3129770992366414e-05,
      "loss": 0.1792,
      "step": 1842
    },
    {
      "epoch": 4.666666666666667,
      "grad_norm": 0.9312828779220581,
      "learning_rate": 1.3027989821882953e-05,
      "loss": 0.2139,
      "step": 1843
    },
    {
      "epoch": 4.669201520912548,
      "grad_norm": 0.5163487792015076,
      "learning_rate": 1.292620865139949e-05,
      "loss": 0.139,
      "step": 1844
    },
    {
      "epoch": 4.671736375158428,
      "grad_norm": 0.7424818277359009,
      "learning_rate": 1.2824427480916032e-05,
      "loss": 0.1533,
      "step": 1845
    },
    {
      "epoch": 4.674271229404309,
      "grad_norm": 0.5935065150260925,
      "learning_rate": 1.2722646310432571e-05,
      "loss": 0.1319,
      "step": 1846
    },
    {
      "epoch": 4.67680608365019,
      "grad_norm": 0.7372322678565979,
      "learning_rate": 1.2620865139949108e-05,
      "loss": 0.1832,
      "step": 1847
    },
    {
      "epoch": 4.679340937896071,
      "grad_norm": 0.5936238765716553,
      "learning_rate": 1.251908396946565e-05,
      "loss": 0.1357,
      "step": 1848
    },
    {
      "epoch": 4.681875792141952,
      "grad_norm": 0.6689032316207886,
      "learning_rate": 1.2417302798982189e-05,
      "loss": 0.1709,
      "step": 1849
    },
    {
      "epoch": 4.684410646387833,
      "grad_norm": 0.6519850492477417,
      "learning_rate": 1.2315521628498728e-05,
      "loss": 0.1438,
      "step": 1850
    },
    {
      "epoch": 4.686945500633714,
      "grad_norm": 0.5853939056396484,
      "learning_rate": 1.2213740458015267e-05,
      "loss": 0.134,
      "step": 1851
    },
    {
      "epoch": 4.689480354879595,
      "grad_norm": 0.5059859752655029,
      "learning_rate": 1.2111959287531807e-05,
      "loss": 0.1088,
      "step": 1852
    },
    {
      "epoch": 4.692015209125476,
      "grad_norm": 0.6989784240722656,
      "learning_rate": 1.2010178117048348e-05,
      "loss": 0.1527,
      "step": 1853
    },
    {
      "epoch": 4.694550063371356,
      "grad_norm": 0.5851006507873535,
      "learning_rate": 1.1908396946564885e-05,
      "loss": 0.143,
      "step": 1854
    },
    {
      "epoch": 4.697084917617237,
      "grad_norm": 0.5606602430343628,
      "learning_rate": 1.1806615776081426e-05,
      "loss": 0.1288,
      "step": 1855
    },
    {
      "epoch": 4.699619771863118,
      "grad_norm": 0.6175526976585388,
      "learning_rate": 1.1704834605597966e-05,
      "loss": 0.1564,
      "step": 1856
    },
    {
      "epoch": 4.702154626108999,
      "grad_norm": 0.5776654481887817,
      "learning_rate": 1.1603053435114503e-05,
      "loss": 0.1323,
      "step": 1857
    },
    {
      "epoch": 4.7046894803548795,
      "grad_norm": 0.5664159059524536,
      "learning_rate": 1.1501272264631044e-05,
      "loss": 0.1371,
      "step": 1858
    },
    {
      "epoch": 4.7072243346007605,
      "grad_norm": 0.7187889218330383,
      "learning_rate": 1.1399491094147584e-05,
      "loss": 0.1476,
      "step": 1859
    },
    {
      "epoch": 4.7097591888466415,
      "grad_norm": 0.5795005559921265,
      "learning_rate": 1.1297709923664123e-05,
      "loss": 0.1373,
      "step": 1860
    },
    {
      "epoch": 4.712294043092522,
      "grad_norm": 0.5491251945495605,
      "learning_rate": 1.1195928753180662e-05,
      "loss": 0.1192,
      "step": 1861
    },
    {
      "epoch": 4.714828897338403,
      "grad_norm": 0.4715762734413147,
      "learning_rate": 1.1094147582697202e-05,
      "loss": 0.1106,
      "step": 1862
    },
    {
      "epoch": 4.7173637515842834,
      "grad_norm": 0.6300286054611206,
      "learning_rate": 1.099236641221374e-05,
      "loss": 0.138,
      "step": 1863
    },
    {
      "epoch": 4.719898605830164,
      "grad_norm": 0.7265313267707825,
      "learning_rate": 1.089058524173028e-05,
      "loss": 0.2246,
      "step": 1864
    },
    {
      "epoch": 4.722433460076045,
      "grad_norm": 0.7080928087234497,
      "learning_rate": 1.0788804071246821e-05,
      "loss": 0.1335,
      "step": 1865
    },
    {
      "epoch": 4.724968314321926,
      "grad_norm": 0.605714738368988,
      "learning_rate": 1.0687022900763359e-05,
      "loss": 0.1412,
      "step": 1866
    },
    {
      "epoch": 4.727503168567807,
      "grad_norm": 0.6648192405700684,
      "learning_rate": 1.05852417302799e-05,
      "loss": 0.1648,
      "step": 1867
    },
    {
      "epoch": 4.730038022813688,
      "grad_norm": 0.6057281494140625,
      "learning_rate": 1.0483460559796437e-05,
      "loss": 0.1266,
      "step": 1868
    },
    {
      "epoch": 4.732572877059569,
      "grad_norm": 0.6135514974594116,
      "learning_rate": 1.0381679389312977e-05,
      "loss": 0.1457,
      "step": 1869
    },
    {
      "epoch": 4.73510773130545,
      "grad_norm": 0.6599459052085876,
      "learning_rate": 1.0279898218829518e-05,
      "loss": 0.1558,
      "step": 1870
    },
    {
      "epoch": 4.737642585551331,
      "grad_norm": 0.5975873470306396,
      "learning_rate": 1.0178117048346055e-05,
      "loss": 0.134,
      "step": 1871
    },
    {
      "epoch": 4.740177439797211,
      "grad_norm": 0.6581792235374451,
      "learning_rate": 1.0076335877862596e-05,
      "loss": 0.1463,
      "step": 1872
    },
    {
      "epoch": 4.742712294043092,
      "grad_norm": 0.5627064108848572,
      "learning_rate": 9.974554707379136e-06,
      "loss": 0.1238,
      "step": 1873
    },
    {
      "epoch": 4.745247148288973,
      "grad_norm": 0.6461361050605774,
      "learning_rate": 9.872773536895675e-06,
      "loss": 0.1621,
      "step": 1874
    },
    {
      "epoch": 4.747782002534854,
      "grad_norm": 0.5615333914756775,
      "learning_rate": 9.770992366412214e-06,
      "loss": 0.1387,
      "step": 1875
    },
    {
      "epoch": 4.750316856780735,
      "grad_norm": 0.6830117702484131,
      "learning_rate": 9.669211195928754e-06,
      "loss": 0.1397,
      "step": 1876
    },
    {
      "epoch": 4.752851711026616,
      "grad_norm": 0.731072187423706,
      "learning_rate": 9.567430025445293e-06,
      "loss": 0.1508,
      "step": 1877
    },
    {
      "epoch": 4.755386565272497,
      "grad_norm": 0.7469286918640137,
      "learning_rate": 9.465648854961832e-06,
      "loss": 0.1944,
      "step": 1878
    },
    {
      "epoch": 4.757921419518378,
      "grad_norm": 0.700532078742981,
      "learning_rate": 9.363867684478373e-06,
      "loss": 0.1697,
      "step": 1879
    },
    {
      "epoch": 4.760456273764259,
      "grad_norm": 0.7140323519706726,
      "learning_rate": 9.26208651399491e-06,
      "loss": 0.1597,
      "step": 1880
    },
    {
      "epoch": 4.76299112801014,
      "grad_norm": 0.6711133718490601,
      "learning_rate": 9.16030534351145e-06,
      "loss": 0.1731,
      "step": 1881
    },
    {
      "epoch": 4.765525982256021,
      "grad_norm": 0.43002957105636597,
      "learning_rate": 9.058524173027991e-06,
      "loss": 0.1181,
      "step": 1882
    },
    {
      "epoch": 4.768060836501901,
      "grad_norm": 0.669159471988678,
      "learning_rate": 8.956743002544529e-06,
      "loss": 0.1578,
      "step": 1883
    },
    {
      "epoch": 4.770595690747782,
      "grad_norm": 0.5030307769775391,
      "learning_rate": 8.85496183206107e-06,
      "loss": 0.1213,
      "step": 1884
    },
    {
      "epoch": 4.773130544993663,
      "grad_norm": 0.7841615080833435,
      "learning_rate": 8.753180661577609e-06,
      "loss": 0.1619,
      "step": 1885
    },
    {
      "epoch": 4.775665399239544,
      "grad_norm": 0.5570418834686279,
      "learning_rate": 8.651399491094148e-06,
      "loss": 0.1308,
      "step": 1886
    },
    {
      "epoch": 4.778200253485425,
      "grad_norm": 0.6690031886100769,
      "learning_rate": 8.549618320610688e-06,
      "loss": 0.1413,
      "step": 1887
    },
    {
      "epoch": 4.780735107731306,
      "grad_norm": 0.524140477180481,
      "learning_rate": 8.447837150127227e-06,
      "loss": 0.1354,
      "step": 1888
    },
    {
      "epoch": 4.783269961977187,
      "grad_norm": 0.5612379908561707,
      "learning_rate": 8.346055979643766e-06,
      "loss": 0.1375,
      "step": 1889
    },
    {
      "epoch": 4.7858048162230675,
      "grad_norm": 0.851925790309906,
      "learning_rate": 8.244274809160306e-06,
      "loss": 0.1783,
      "step": 1890
    },
    {
      "epoch": 4.7883396704689485,
      "grad_norm": 0.8507834672927856,
      "learning_rate": 8.142493638676845e-06,
      "loss": 0.1743,
      "step": 1891
    },
    {
      "epoch": 4.7908745247148286,
      "grad_norm": 0.8136033415794373,
      "learning_rate": 8.040712468193384e-06,
      "loss": 0.1381,
      "step": 1892
    },
    {
      "epoch": 4.7934093789607095,
      "grad_norm": 0.7247329354286194,
      "learning_rate": 7.938931297709924e-06,
      "loss": 0.1793,
      "step": 1893
    },
    {
      "epoch": 4.7959442332065905,
      "grad_norm": 0.5494823455810547,
      "learning_rate": 7.837150127226465e-06,
      "loss": 0.1231,
      "step": 1894
    },
    {
      "epoch": 4.798479087452471,
      "grad_norm": 0.6107218861579895,
      "learning_rate": 7.735368956743002e-06,
      "loss": 0.1358,
      "step": 1895
    },
    {
      "epoch": 4.801013941698352,
      "grad_norm": 0.6297575235366821,
      "learning_rate": 7.633587786259543e-06,
      "loss": 0.1699,
      "step": 1896
    },
    {
      "epoch": 4.803548795944233,
      "grad_norm": 0.8669266700744629,
      "learning_rate": 7.531806615776081e-06,
      "loss": 0.1982,
      "step": 1897
    },
    {
      "epoch": 4.806083650190114,
      "grad_norm": 0.583975076675415,
      "learning_rate": 7.430025445292621e-06,
      "loss": 0.1517,
      "step": 1898
    },
    {
      "epoch": 4.808618504435995,
      "grad_norm": 0.6059403419494629,
      "learning_rate": 7.328244274809161e-06,
      "loss": 0.138,
      "step": 1899
    },
    {
      "epoch": 4.811153358681876,
      "grad_norm": 1.0802148580551147,
      "learning_rate": 7.2264631043257e-06,
      "loss": 0.1677,
      "step": 1900
    },
    {
      "epoch": 4.813688212927756,
      "grad_norm": 0.5637528300285339,
      "learning_rate": 7.12468193384224e-06,
      "loss": 0.1517,
      "step": 1901
    },
    {
      "epoch": 4.816223067173637,
      "grad_norm": 0.6925719976425171,
      "learning_rate": 7.022900763358779e-06,
      "loss": 0.1636,
      "step": 1902
    },
    {
      "epoch": 4.818757921419518,
      "grad_norm": 0.6529707908630371,
      "learning_rate": 6.9211195928753175e-06,
      "loss": 0.1587,
      "step": 1903
    },
    {
      "epoch": 4.821292775665399,
      "grad_norm": 1.1477290391921997,
      "learning_rate": 6.819338422391858e-06,
      "loss": 0.1655,
      "step": 1904
    },
    {
      "epoch": 4.82382762991128,
      "grad_norm": 0.7867985367774963,
      "learning_rate": 6.717557251908398e-06,
      "loss": 0.1955,
      "step": 1905
    },
    {
      "epoch": 4.826362484157161,
      "grad_norm": 0.617871105670929,
      "learning_rate": 6.615776081424936e-06,
      "loss": 0.1554,
      "step": 1906
    },
    {
      "epoch": 4.828897338403042,
      "grad_norm": 0.5985192656517029,
      "learning_rate": 6.5139949109414765e-06,
      "loss": 0.1484,
      "step": 1907
    },
    {
      "epoch": 4.831432192648923,
      "grad_norm": 0.6069400310516357,
      "learning_rate": 6.412213740458016e-06,
      "loss": 0.1326,
      "step": 1908
    },
    {
      "epoch": 4.833967046894804,
      "grad_norm": 0.9009010195732117,
      "learning_rate": 6.310432569974554e-06,
      "loss": 0.1999,
      "step": 1909
    },
    {
      "epoch": 4.836501901140684,
      "grad_norm": 0.5913792848587036,
      "learning_rate": 6.208651399491094e-06,
      "loss": 0.1381,
      "step": 1910
    },
    {
      "epoch": 4.839036755386565,
      "grad_norm": 0.5730859637260437,
      "learning_rate": 6.106870229007634e-06,
      "loss": 0.1346,
      "step": 1911
    },
    {
      "epoch": 4.841571609632446,
      "grad_norm": 0.6579172611236572,
      "learning_rate": 6.005089058524174e-06,
      "loss": 0.1572,
      "step": 1912
    },
    {
      "epoch": 4.844106463878327,
      "grad_norm": 0.5854265093803406,
      "learning_rate": 5.903307888040713e-06,
      "loss": 0.1359,
      "step": 1913
    },
    {
      "epoch": 4.846641318124208,
      "grad_norm": 0.7668277025222778,
      "learning_rate": 5.801526717557252e-06,
      "loss": 0.1728,
      "step": 1914
    },
    {
      "epoch": 4.849176172370089,
      "grad_norm": 0.8092861175537109,
      "learning_rate": 5.699745547073792e-06,
      "loss": 0.1741,
      "step": 1915
    },
    {
      "epoch": 4.85171102661597,
      "grad_norm": 0.6868001818656921,
      "learning_rate": 5.597964376590331e-06,
      "loss": 0.1604,
      "step": 1916
    },
    {
      "epoch": 4.854245880861851,
      "grad_norm": 0.6506228446960449,
      "learning_rate": 5.49618320610687e-06,
      "loss": 0.1459,
      "step": 1917
    },
    {
      "epoch": 4.856780735107732,
      "grad_norm": 0.6033440232276917,
      "learning_rate": 5.394402035623411e-06,
      "loss": 0.1435,
      "step": 1918
    },
    {
      "epoch": 4.859315589353612,
      "grad_norm": 0.7446348071098328,
      "learning_rate": 5.29262086513995e-06,
      "loss": 0.165,
      "step": 1919
    },
    {
      "epoch": 4.861850443599493,
      "grad_norm": 0.5380656123161316,
      "learning_rate": 5.190839694656488e-06,
      "loss": 0.1504,
      "step": 1920
    },
    {
      "epoch": 4.864385297845374,
      "grad_norm": 0.6752755641937256,
      "learning_rate": 5.089058524173028e-06,
      "loss": 0.1616,
      "step": 1921
    },
    {
      "epoch": 4.866920152091255,
      "grad_norm": 0.6897322535514832,
      "learning_rate": 4.987277353689568e-06,
      "loss": 0.1409,
      "step": 1922
    },
    {
      "epoch": 4.869455006337136,
      "grad_norm": 0.5405673980712891,
      "learning_rate": 4.885496183206107e-06,
      "loss": 0.1215,
      "step": 1923
    },
    {
      "epoch": 4.8719898605830165,
      "grad_norm": 0.6921371221542358,
      "learning_rate": 4.7837150127226464e-06,
      "loss": 0.1554,
      "step": 1924
    },
    {
      "epoch": 4.8745247148288975,
      "grad_norm": 0.6672477722167969,
      "learning_rate": 4.681933842239187e-06,
      "loss": 0.1685,
      "step": 1925
    },
    {
      "epoch": 4.8770595690747784,
      "grad_norm": 0.5887411236763,
      "learning_rate": 4.580152671755725e-06,
      "loss": 0.1495,
      "step": 1926
    },
    {
      "epoch": 4.879594423320659,
      "grad_norm": 0.8119281530380249,
      "learning_rate": 4.478371501272264e-06,
      "loss": 0.1778,
      "step": 1927
    },
    {
      "epoch": 4.8821292775665395,
      "grad_norm": 0.6423155665397644,
      "learning_rate": 4.3765903307888045e-06,
      "loss": 0.1532,
      "step": 1928
    },
    {
      "epoch": 4.88466413181242,
      "grad_norm": 0.576859712600708,
      "learning_rate": 4.274809160305344e-06,
      "loss": 0.1474,
      "step": 1929
    },
    {
      "epoch": 4.887198986058301,
      "grad_norm": 0.668792188167572,
      "learning_rate": 4.173027989821883e-06,
      "loss": 0.1583,
      "step": 1930
    },
    {
      "epoch": 4.889733840304182,
      "grad_norm": 0.727428138256073,
      "learning_rate": 4.0712468193384225e-06,
      "loss": 0.1759,
      "step": 1931
    },
    {
      "epoch": 4.892268694550063,
      "grad_norm": 0.7260742783546448,
      "learning_rate": 3.969465648854962e-06,
      "loss": 0.1665,
      "step": 1932
    },
    {
      "epoch": 4.894803548795944,
      "grad_norm": 0.6192269921302795,
      "learning_rate": 3.867684478371501e-06,
      "loss": 0.1377,
      "step": 1933
    },
    {
      "epoch": 4.897338403041825,
      "grad_norm": 0.7672135233879089,
      "learning_rate": 3.7659033078880404e-06,
      "loss": 0.1696,
      "step": 1934
    },
    {
      "epoch": 4.899873257287706,
      "grad_norm": 0.5162369012832642,
      "learning_rate": 3.6641221374045806e-06,
      "loss": 0.1384,
      "step": 1935
    },
    {
      "epoch": 4.902408111533587,
      "grad_norm": 0.6594913601875305,
      "learning_rate": 3.56234096692112e-06,
      "loss": 0.1714,
      "step": 1936
    },
    {
      "epoch": 4.904942965779467,
      "grad_norm": 0.7748851776123047,
      "learning_rate": 3.4605597964376588e-06,
      "loss": 0.2014,
      "step": 1937
    },
    {
      "epoch": 4.907477820025348,
      "grad_norm": 0.6400601267814636,
      "learning_rate": 3.358778625954199e-06,
      "loss": 0.1522,
      "step": 1938
    },
    {
      "epoch": 4.910012674271229,
      "grad_norm": 0.5443174839019775,
      "learning_rate": 3.2569974554707382e-06,
      "loss": 0.1276,
      "step": 1939
    },
    {
      "epoch": 4.91254752851711,
      "grad_norm": 0.6544225811958313,
      "learning_rate": 3.155216284987277e-06,
      "loss": 0.1441,
      "step": 1940
    },
    {
      "epoch": 4.915082382762991,
      "grad_norm": 0.6579450368881226,
      "learning_rate": 3.053435114503817e-06,
      "loss": 0.1688,
      "step": 1941
    },
    {
      "epoch": 4.917617237008872,
      "grad_norm": 0.594393253326416,
      "learning_rate": 2.9516539440203566e-06,
      "loss": 0.1586,
      "step": 1942
    },
    {
      "epoch": 4.920152091254753,
      "grad_norm": 0.6417977213859558,
      "learning_rate": 2.849872773536896e-06,
      "loss": 0.1389,
      "step": 1943
    },
    {
      "epoch": 4.922686945500634,
      "grad_norm": 0.5247513055801392,
      "learning_rate": 2.748091603053435e-06,
      "loss": 0.1282,
      "step": 1944
    },
    {
      "epoch": 4.925221799746515,
      "grad_norm": 0.6372106075286865,
      "learning_rate": 2.646310432569975e-06,
      "loss": 0.1391,
      "step": 1945
    },
    {
      "epoch": 4.927756653992396,
      "grad_norm": 0.5967155694961548,
      "learning_rate": 2.544529262086514e-06,
      "loss": 0.1358,
      "step": 1946
    },
    {
      "epoch": 4.930291508238277,
      "grad_norm": 0.6050627827644348,
      "learning_rate": 2.4427480916030536e-06,
      "loss": 0.1449,
      "step": 1947
    },
    {
      "epoch": 4.932826362484157,
      "grad_norm": 0.7595526576042175,
      "learning_rate": 2.3409669211195933e-06,
      "loss": 0.1838,
      "step": 1948
    },
    {
      "epoch": 4.935361216730038,
      "grad_norm": 0.7220463156700134,
      "learning_rate": 2.239185750636132e-06,
      "loss": 0.1695,
      "step": 1949
    },
    {
      "epoch": 4.937896070975919,
      "grad_norm": 0.4891555905342102,
      "learning_rate": 2.137404580152672e-06,
      "loss": 0.1394,
      "step": 1950
    },
    {
      "epoch": 4.9404309252218,
      "grad_norm": 0.5262938141822815,
      "learning_rate": 2.0356234096692112e-06,
      "loss": 0.1452,
      "step": 1951
    },
    {
      "epoch": 4.942965779467681,
      "grad_norm": 0.7193884253501892,
      "learning_rate": 1.9338422391857505e-06,
      "loss": 0.176,
      "step": 1952
    },
    {
      "epoch": 4.945500633713562,
      "grad_norm": 0.7117200493812561,
      "learning_rate": 1.8320610687022903e-06,
      "loss": 0.1697,
      "step": 1953
    },
    {
      "epoch": 4.948035487959443,
      "grad_norm": 0.7884610891342163,
      "learning_rate": 1.7302798982188294e-06,
      "loss": 0.1864,
      "step": 1954
    },
    {
      "epoch": 4.9505703422053235,
      "grad_norm": 0.8606098890304565,
      "learning_rate": 1.6284987277353691e-06,
      "loss": 0.1568,
      "step": 1955
    },
    {
      "epoch": 4.9531051964512045,
      "grad_norm": 0.5030885338783264,
      "learning_rate": 1.5267175572519084e-06,
      "loss": 0.1306,
      "step": 1956
    },
    {
      "epoch": 4.955640050697085,
      "grad_norm": 0.5155559182167053,
      "learning_rate": 1.424936386768448e-06,
      "loss": 0.1311,
      "step": 1957
    },
    {
      "epoch": 4.9581749049429655,
      "grad_norm": 0.4945980906486511,
      "learning_rate": 1.3231552162849875e-06,
      "loss": 0.1212,
      "step": 1958
    },
    {
      "epoch": 4.9607097591888465,
      "grad_norm": 0.79302978515625,
      "learning_rate": 1.2213740458015268e-06,
      "loss": 0.1763,
      "step": 1959
    },
    {
      "epoch": 4.9632446134347274,
      "grad_norm": 0.6397921442985535,
      "learning_rate": 1.119592875318066e-06,
      "loss": 0.1416,
      "step": 1960
    },
    {
      "epoch": 4.965779467680608,
      "grad_norm": 0.6680799722671509,
      "learning_rate": 1.0178117048346056e-06,
      "loss": 0.1519,
      "step": 1961
    },
    {
      "epoch": 4.968314321926489,
      "grad_norm": 0.5919336080551147,
      "learning_rate": 9.160305343511451e-07,
      "loss": 0.16,
      "step": 1962
    },
    {
      "epoch": 4.97084917617237,
      "grad_norm": 0.5929127335548401,
      "learning_rate": 8.142493638676846e-07,
      "loss": 0.143,
      "step": 1963
    },
    {
      "epoch": 4.973384030418251,
      "grad_norm": 0.5678686499595642,
      "learning_rate": 7.12468193384224e-07,
      "loss": 0.1236,
      "step": 1964
    },
    {
      "epoch": 4.975918884664132,
      "grad_norm": 0.5478057861328125,
      "learning_rate": 6.106870229007634e-07,
      "loss": 0.1407,
      "step": 1965
    },
    {
      "epoch": 4.978453738910012,
      "grad_norm": 0.6003939509391785,
      "learning_rate": 5.089058524173028e-07,
      "loss": 0.1315,
      "step": 1966
    },
    {
      "epoch": 4.980988593155893,
      "grad_norm": 0.5943416357040405,
      "learning_rate": 4.071246819338423e-07,
      "loss": 0.1451,
      "step": 1967
    },
    {
      "epoch": 4.983523447401774,
      "grad_norm": 0.5419045090675354,
      "learning_rate": 3.053435114503817e-07,
      "loss": 0.1338,
      "step": 1968
    },
    {
      "epoch": 4.986058301647655,
      "grad_norm": 0.5665134787559509,
      "learning_rate": 2.0356234096692114e-07,
      "loss": 0.1347,
      "step": 1969
    },
    {
      "epoch": 4.988593155893536,
      "grad_norm": 0.5646002292633057,
      "learning_rate": 1.0178117048346057e-07,
      "loss": 0.1352,
      "step": 1970
    }
  ],
  "logging_steps": 1,
  "max_steps": 1970,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 5,
  "save_steps": 100,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 1.5558390987853286e+17,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}