Text Generation
Transformers
Safetensors
chess_transformer
chess
llm-course
chess-challenge
custom_code
Instructions to use LLM-course/chess-yassine with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use LLM-course/chess-yassine with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="LLM-course/chess-yassine", trust_remote_code=True)# Load model directly from transformers import AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained("LLM-course/chess-yassine", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use LLM-course/chess-yassine with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "LLM-course/chess-yassine" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "LLM-course/chess-yassine", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/LLM-course/chess-yassine
- SGLang
How to use LLM-course/chess-yassine with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "LLM-course/chess-yassine" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "LLM-course/chess-yassine", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "LLM-course/chess-yassine" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "LLM-course/chess-yassine", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use LLM-course/chess-yassine with Docker Model Runner:
docker model run hf.co/LLM-course/chess-yassine
| { | |
| "best_global_step": 148440, | |
| "best_metric": 0.7365977168083191, | |
| "best_model_checkpoint": "./output_optimized/checkpoint-148440", | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 148440, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0033683643222850983, | |
| "grad_norm": 1.9248961210250854, | |
| "learning_rate": 2.000808407437348e-06, | |
| "loss": 5.0547, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.006736728644570197, | |
| "grad_norm": 1.0100284814834595, | |
| "learning_rate": 4.021827000808407e-06, | |
| "loss": 4.9226, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.010105092966855295, | |
| "grad_norm": 0.8452675938606262, | |
| "learning_rate": 6.042845594179466e-06, | |
| "loss": 4.8032, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.013473457289140393, | |
| "grad_norm": 0.878520131111145, | |
| "learning_rate": 8.063864187550524e-06, | |
| "loss": 4.7193, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.016841821611425493, | |
| "grad_norm": 1.0584090948104858, | |
| "learning_rate": 1.0084882780921583e-05, | |
| "loss": 4.5769, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.02021018593371059, | |
| "grad_norm": 1.1221024990081787, | |
| "learning_rate": 1.2105901374292643e-05, | |
| "loss": 4.3638, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.02357855025599569, | |
| "grad_norm": 1.0861926078796387, | |
| "learning_rate": 1.41269199676637e-05, | |
| "loss": 4.155, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.026946914578280787, | |
| "grad_norm": 0.9905880093574524, | |
| "learning_rate": 1.6147938561034762e-05, | |
| "loss": 3.9701, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.030315278900565887, | |
| "grad_norm": 0.9011399149894714, | |
| "learning_rate": 1.816895715440582e-05, | |
| "loss": 3.7974, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.03368364322285099, | |
| "grad_norm": 0.8480072617530823, | |
| "learning_rate": 2.0189975747776877e-05, | |
| "loss": 3.6336, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.03705200754513608, | |
| "grad_norm": 0.7879598736763, | |
| "learning_rate": 2.2210994341147935e-05, | |
| "loss": 3.4894, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.04042037186742118, | |
| "grad_norm": 0.7154058814048767, | |
| "learning_rate": 2.4232012934518997e-05, | |
| "loss": 3.3559, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.04378873618970628, | |
| "grad_norm": 0.6783929467201233, | |
| "learning_rate": 2.6253031527890058e-05, | |
| "loss": 3.235, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.04715710051199138, | |
| "grad_norm": 0.6816830635070801, | |
| "learning_rate": 2.8274050121261112e-05, | |
| "loss": 3.1325, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.05052546483427647, | |
| "grad_norm": 0.6334635615348816, | |
| "learning_rate": 3.029506871463217e-05, | |
| "loss": 3.0338, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.05389382915656157, | |
| "grad_norm": 0.6427187323570251, | |
| "learning_rate": 3.2316087308003235e-05, | |
| "loss": 2.9431, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.057262193478846674, | |
| "grad_norm": 1.2221870422363281, | |
| "learning_rate": 3.433710590137429e-05, | |
| "loss": 2.8522, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.060630557801131774, | |
| "grad_norm": 1.4814372062683105, | |
| "learning_rate": 3.635812449474535e-05, | |
| "loss": 2.7769, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.06399892212341687, | |
| "grad_norm": 1.1986867189407349, | |
| "learning_rate": 3.837914308811641e-05, | |
| "loss": 2.7072, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.06736728644570197, | |
| "grad_norm": 1.0901182889938354, | |
| "learning_rate": 4.0400161681487466e-05, | |
| "loss": 2.6491, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.07073565076798706, | |
| "grad_norm": 1.8428642749786377, | |
| "learning_rate": 4.2421180274858524e-05, | |
| "loss": 2.5944, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.07410401509027216, | |
| "grad_norm": 1.2476333379745483, | |
| "learning_rate": 4.444219886822959e-05, | |
| "loss": 2.5473, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.07747237941255726, | |
| "grad_norm": 1.3376599550247192, | |
| "learning_rate": 4.6463217461600646e-05, | |
| "loss": 2.5115, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.08084074373484236, | |
| "grad_norm": 2.028588056564331, | |
| "learning_rate": 4.84842360549717e-05, | |
| "loss": 2.4653, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.08420910805712746, | |
| "grad_norm": 1.5413949489593506, | |
| "learning_rate": 5.0505254648342755e-05, | |
| "loss": 2.4203, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.08757747237941256, | |
| "grad_norm": 1.3644788265228271, | |
| "learning_rate": 5.252627324171382e-05, | |
| "loss": 2.3903, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.09094583670169766, | |
| "grad_norm": 1.130286693572998, | |
| "learning_rate": 5.454729183508488e-05, | |
| "loss": 2.3527, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.09431420102398276, | |
| "grad_norm": 1.6935234069824219, | |
| "learning_rate": 5.6568310428455935e-05, | |
| "loss": 2.3229, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.09768256534626785, | |
| "grad_norm": 1.9905304908752441, | |
| "learning_rate": 5.8589329021827e-05, | |
| "loss": 2.2926, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.10105092966855295, | |
| "grad_norm": 1.4488565921783447, | |
| "learning_rate": 6.061034761519806e-05, | |
| "loss": 2.2595, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.10441929399083805, | |
| "grad_norm": 1.6210366487503052, | |
| "learning_rate": 6.263136620856912e-05, | |
| "loss": 2.2317, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.10778765831312315, | |
| "grad_norm": 1.6805219650268555, | |
| "learning_rate": 6.465238480194017e-05, | |
| "loss": 2.1987, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.11115602263540825, | |
| "grad_norm": 1.6502385139465332, | |
| "learning_rate": 6.667340339531123e-05, | |
| "loss": 2.181, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.11452438695769335, | |
| "grad_norm": 1.831292986869812, | |
| "learning_rate": 6.869442198868228e-05, | |
| "loss": 2.1532, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.11789275127997845, | |
| "grad_norm": 1.9188601970672607, | |
| "learning_rate": 7.071544058205335e-05, | |
| "loss": 2.1127, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.12126111560226355, | |
| "grad_norm": 1.6280624866485596, | |
| "learning_rate": 7.273645917542441e-05, | |
| "loss": 2.0921, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.12462947992454863, | |
| "grad_norm": 1.5468984842300415, | |
| "learning_rate": 7.475747776879546e-05, | |
| "loss": 2.0626, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.12799784424683375, | |
| "grad_norm": 1.7183716297149658, | |
| "learning_rate": 7.677849636216651e-05, | |
| "loss": 2.0302, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.13136620856911885, | |
| "grad_norm": 1.8704299926757812, | |
| "learning_rate": 7.879951495553758e-05, | |
| "loss": 1.9946, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.13473457289140395, | |
| "grad_norm": 1.719117522239685, | |
| "learning_rate": 8.082053354890864e-05, | |
| "loss": 1.955, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.13810293721368902, | |
| "grad_norm": 1.8330260515213013, | |
| "learning_rate": 8.28415521422797e-05, | |
| "loss": 1.9137, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.14147130153597412, | |
| "grad_norm": 2.341217517852783, | |
| "learning_rate": 8.486257073565076e-05, | |
| "loss": 1.8765, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.14483966585825922, | |
| "grad_norm": 1.792738914489746, | |
| "learning_rate": 8.688358932902182e-05, | |
| "loss": 1.837, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.14820803018054432, | |
| "grad_norm": 2.237147092819214, | |
| "learning_rate": 8.890460792239287e-05, | |
| "loss": 1.7995, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.15157639450282942, | |
| "grad_norm": 1.8367396593093872, | |
| "learning_rate": 9.092562651576394e-05, | |
| "loss": 1.7706, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.15494475882511452, | |
| "grad_norm": 2.0587222576141357, | |
| "learning_rate": 9.2946645109135e-05, | |
| "loss": 1.7431, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.15831312314739962, | |
| "grad_norm": 1.8988635540008545, | |
| "learning_rate": 9.496766370250605e-05, | |
| "loss": 1.7179, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.16168148746968472, | |
| "grad_norm": 2.192547559738159, | |
| "learning_rate": 9.698868229587712e-05, | |
| "loss": 1.6932, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.16504985179196982, | |
| "grad_norm": 1.6913732290267944, | |
| "learning_rate": 9.900970088924818e-05, | |
| "loss": 1.6705, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.16841821611425492, | |
| "grad_norm": 1.705934762954712, | |
| "learning_rate": 0.00010103071948261923, | |
| "loss": 1.6509, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.17178658043654002, | |
| "grad_norm": 1.9764398336410522, | |
| "learning_rate": 0.0001030517380759903, | |
| "loss": 1.6212, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.17515494475882512, | |
| "grad_norm": 2.109279155731201, | |
| "learning_rate": 0.00010507275666936134, | |
| "loss": 1.6087, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.17852330908111022, | |
| "grad_norm": 2.036076784133911, | |
| "learning_rate": 0.0001070937752627324, | |
| "loss": 1.5878, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.18189167340339532, | |
| "grad_norm": 2.1286652088165283, | |
| "learning_rate": 0.00010911479385610347, | |
| "loss": 1.5641, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.18526003772568042, | |
| "grad_norm": 2.04007625579834, | |
| "learning_rate": 0.00011113581244947452, | |
| "loss": 1.5462, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.18862840204796552, | |
| "grad_norm": 1.9469410181045532, | |
| "learning_rate": 0.00011315683104284558, | |
| "loss": 1.528, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.1919967663702506, | |
| "grad_norm": 1.9078123569488525, | |
| "learning_rate": 0.00011517784963621665, | |
| "loss": 1.5101, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.1953651306925357, | |
| "grad_norm": 2.1443777084350586, | |
| "learning_rate": 0.0001171988682295877, | |
| "loss": 1.4885, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.1987334950148208, | |
| "grad_norm": 1.8993617296218872, | |
| "learning_rate": 0.00011921988682295876, | |
| "loss": 1.478, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.2021018593371059, | |
| "grad_norm": 1.7812656164169312, | |
| "learning_rate": 0.00012124090541632983, | |
| "loss": 1.4621, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.205470223659391, | |
| "grad_norm": 1.7858940362930298, | |
| "learning_rate": 0.0001232619240097009, | |
| "loss": 1.4494, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.2088385879816761, | |
| "grad_norm": 1.9336419105529785, | |
| "learning_rate": 0.00012528294260307193, | |
| "loss": 1.4393, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.2122069523039612, | |
| "grad_norm": 1.85440993309021, | |
| "learning_rate": 0.000127303961196443, | |
| "loss": 1.43, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.2155753166262463, | |
| "grad_norm": 1.583737850189209, | |
| "learning_rate": 0.00012932497978981406, | |
| "loss": 1.4198, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.2189436809485314, | |
| "grad_norm": 2.076510429382324, | |
| "learning_rate": 0.00013134599838318512, | |
| "loss": 1.4034, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.2223120452708165, | |
| "grad_norm": 1.6790341138839722, | |
| "learning_rate": 0.0001333670169765562, | |
| "loss": 1.3937, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.2256804095931016, | |
| "grad_norm": 1.9147748947143555, | |
| "learning_rate": 0.00013538803556992725, | |
| "loss": 1.3818, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.2290487739153867, | |
| "grad_norm": 1.6485368013381958, | |
| "learning_rate": 0.0001374090541632983, | |
| "loss": 1.3758, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.2324171382376718, | |
| "grad_norm": 2.087151527404785, | |
| "learning_rate": 0.00013943007275666935, | |
| "loss": 1.3696, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.2357855025599569, | |
| "grad_norm": 1.7565312385559082, | |
| "learning_rate": 0.0001414510913500404, | |
| "loss": 1.3579, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.239153866882242, | |
| "grad_norm": 1.83383047580719, | |
| "learning_rate": 0.00014347210994341146, | |
| "loss": 1.3532, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.2425222312045271, | |
| "grad_norm": 1.8977510929107666, | |
| "learning_rate": 0.00014549312853678252, | |
| "loss": 1.3465, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.24589059552681217, | |
| "grad_norm": 2.1144802570343018, | |
| "learning_rate": 0.00014751414713015358, | |
| "loss": 1.3389, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.24925895984909727, | |
| "grad_norm": 1.6824164390563965, | |
| "learning_rate": 0.00014953516572352465, | |
| "loss": 1.3283, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.2526273241713824, | |
| "grad_norm": 1.8628549575805664, | |
| "learning_rate": 0.00015155618431689571, | |
| "loss": 1.3225, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.2559956884936675, | |
| "grad_norm": 1.651308298110962, | |
| "learning_rate": 0.00015357720291026675, | |
| "loss": 1.3153, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.2593640528159526, | |
| "grad_norm": 1.6041486263275146, | |
| "learning_rate": 0.00015559822150363782, | |
| "loss": 1.3056, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.2627324171382377, | |
| "grad_norm": 1.9386959075927734, | |
| "learning_rate": 0.00015761924009700888, | |
| "loss": 1.3046, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.2661007814605228, | |
| "grad_norm": 1.8345019817352295, | |
| "learning_rate": 0.00015964025869037995, | |
| "loss": 1.2884, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.2694691457828079, | |
| "grad_norm": 1.4670854806900024, | |
| "learning_rate": 0.000161661277283751, | |
| "loss": 1.286, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.27283751010509294, | |
| "grad_norm": 1.8858684301376343, | |
| "learning_rate": 0.00016368229587712205, | |
| "loss": 1.2774, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.27620587442737804, | |
| "grad_norm": 1.8875221014022827, | |
| "learning_rate": 0.0001657033144704931, | |
| "loss": 1.2736, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.27957423874966314, | |
| "grad_norm": 1.7752630710601807, | |
| "learning_rate": 0.00016772433306386418, | |
| "loss": 1.2746, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.28294260307194824, | |
| "grad_norm": 1.5532513856887817, | |
| "learning_rate": 0.00016974535165723524, | |
| "loss": 1.267, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.28631096739423334, | |
| "grad_norm": 1.6465749740600586, | |
| "learning_rate": 0.0001717663702506063, | |
| "loss": 1.2599, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.28967933171651844, | |
| "grad_norm": 1.5771738290786743, | |
| "learning_rate": 0.00017378738884397737, | |
| "loss": 1.2557, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.29304769603880354, | |
| "grad_norm": 1.4705991744995117, | |
| "learning_rate": 0.0001758084074373484, | |
| "loss": 1.252, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.29641606036108864, | |
| "grad_norm": 1.489914059638977, | |
| "learning_rate": 0.00017782942603071947, | |
| "loss": 1.2425, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.29978442468337374, | |
| "grad_norm": 1.5901821851730347, | |
| "learning_rate": 0.00017985044462409054, | |
| "loss": 1.2341, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.30315278900565884, | |
| "grad_norm": 1.5143710374832153, | |
| "learning_rate": 0.0001818714632174616, | |
| "loss": 1.2309, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.30652115332794394, | |
| "grad_norm": 1.5409547090530396, | |
| "learning_rate": 0.00018389248181083267, | |
| "loss": 1.2258, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.30988951765022904, | |
| "grad_norm": 1.4094816446304321, | |
| "learning_rate": 0.00018591350040420368, | |
| "loss": 1.2197, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.31325788197251414, | |
| "grad_norm": 1.6748660802841187, | |
| "learning_rate": 0.00018793451899757474, | |
| "loss": 1.2176, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.31662624629479924, | |
| "grad_norm": 1.587318778038025, | |
| "learning_rate": 0.0001899555375909458, | |
| "loss": 1.2091, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.31999461061708434, | |
| "grad_norm": 1.5422818660736084, | |
| "learning_rate": 0.00019197655618431687, | |
| "loss": 1.2079, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.32336297493936944, | |
| "grad_norm": 1.30134117603302, | |
| "learning_rate": 0.00019399757477768793, | |
| "loss": 1.2017, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.32673133926165454, | |
| "grad_norm": 1.368249773979187, | |
| "learning_rate": 0.00019601859337105897, | |
| "loss": 1.2026, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.33009970358393964, | |
| "grad_norm": 1.5968406200408936, | |
| "learning_rate": 0.00019803961196443004, | |
| "loss": 1.1902, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.33346806790622474, | |
| "grad_norm": 1.435455083847046, | |
| "learning_rate": 0.0002000606305578011, | |
| "loss": 1.1894, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.33683643222850984, | |
| "grad_norm": 1.4132752418518066, | |
| "learning_rate": 0.00020208164915117217, | |
| "loss": 1.1841, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.34020479655079494, | |
| "grad_norm": 1.5038225650787354, | |
| "learning_rate": 0.00020410266774454323, | |
| "loss": 1.1859, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.34357316087308004, | |
| "grad_norm": 1.4201886653900146, | |
| "learning_rate": 0.0002061236863379143, | |
| "loss": 1.1743, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.34694152519536514, | |
| "grad_norm": 1.3544988632202148, | |
| "learning_rate": 0.00020814470493128533, | |
| "loss": 1.1724, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.35030988951765024, | |
| "grad_norm": 1.6340460777282715, | |
| "learning_rate": 0.0002101657235246564, | |
| "loss": 1.1679, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.35367825383993534, | |
| "grad_norm": 1.3137534856796265, | |
| "learning_rate": 0.00021218674211802746, | |
| "loss": 1.1602, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.35704661816222044, | |
| "grad_norm": 1.3838586807250977, | |
| "learning_rate": 0.00021420776071139853, | |
| "loss": 1.1602, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.36041498248450554, | |
| "grad_norm": 1.46292245388031, | |
| "learning_rate": 0.0002162287793047696, | |
| "loss": 1.154, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.36378334680679064, | |
| "grad_norm": 1.2189207077026367, | |
| "learning_rate": 0.00021824979789814066, | |
| "loss": 1.1518, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.36715171112907574, | |
| "grad_norm": 1.3567001819610596, | |
| "learning_rate": 0.0002202708164915117, | |
| "loss": 1.1437, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.37052007545136084, | |
| "grad_norm": 1.3379132747650146, | |
| "learning_rate": 0.00022229183508488276, | |
| "loss": 1.1418, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.37388843977364594, | |
| "grad_norm": 1.2323216199874878, | |
| "learning_rate": 0.00022431285367825382, | |
| "loss": 1.1382, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.37725680409593104, | |
| "grad_norm": 1.9002209901809692, | |
| "learning_rate": 0.0002263338722716249, | |
| "loss": 1.1323, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.3806251684182161, | |
| "grad_norm": 1.3124207258224487, | |
| "learning_rate": 0.00022835489086499595, | |
| "loss": 1.1336, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.3839935327405012, | |
| "grad_norm": 1.3445236682891846, | |
| "learning_rate": 0.000230375909458367, | |
| "loss": 1.1259, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.3873618970627863, | |
| "grad_norm": 1.2994790077209473, | |
| "learning_rate": 0.00023239692805173805, | |
| "loss": 1.1298, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.3907302613850714, | |
| "grad_norm": 1.2289458513259888, | |
| "learning_rate": 0.00023441794664510912, | |
| "loss": 1.1226, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.3940986257073565, | |
| "grad_norm": 1.1772109270095825, | |
| "learning_rate": 0.00023643896523848018, | |
| "loss": 1.1215, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.3974669900296416, | |
| "grad_norm": 1.3592746257781982, | |
| "learning_rate": 0.00023845998383185125, | |
| "loss": 1.1179, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.4008353543519267, | |
| "grad_norm": 1.1571407318115234, | |
| "learning_rate": 0.0002404810024252223, | |
| "loss": 1.1124, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.4042037186742118, | |
| "grad_norm": 1.3592592477798462, | |
| "learning_rate": 0.00024250202101859335, | |
| "loss": 1.1083, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.4075720829964969, | |
| "grad_norm": 1.2856664657592773, | |
| "learning_rate": 0.00024452303961196444, | |
| "loss": 1.1083, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.410940447318782, | |
| "grad_norm": 1.2512900829315186, | |
| "learning_rate": 0.0002465440582053355, | |
| "loss": 1.0984, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.4143088116410671, | |
| "grad_norm": 1.239823818206787, | |
| "learning_rate": 0.0002485650767987065, | |
| "loss": 1.0984, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.4176771759633522, | |
| "grad_norm": 1.4879858493804932, | |
| "learning_rate": 0.0002505860953920776, | |
| "loss": 1.0962, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.4210455402856373, | |
| "grad_norm": 1.1708803176879883, | |
| "learning_rate": 0.00025260711398544865, | |
| "loss": 1.0914, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.4244139046079224, | |
| "grad_norm": 1.092463731765747, | |
| "learning_rate": 0.0002546281325788197, | |
| "loss": 1.089, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.4277822689302075, | |
| "grad_norm": 1.2004188299179077, | |
| "learning_rate": 0.0002566491511721908, | |
| "loss": 1.0884, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.4311506332524926, | |
| "grad_norm": 1.1339149475097656, | |
| "learning_rate": 0.0002586701697655618, | |
| "loss": 1.0857, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.4345189975747777, | |
| "grad_norm": 1.3253908157348633, | |
| "learning_rate": 0.00026069118835893285, | |
| "loss": 1.0821, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.4378873618970628, | |
| "grad_norm": 1.2655895948410034, | |
| "learning_rate": 0.00026271220695230394, | |
| "loss": 1.0775, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.4412557262193479, | |
| "grad_norm": 1.4968757629394531, | |
| "learning_rate": 0.000264733225545675, | |
| "loss": 1.0764, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.444624090541633, | |
| "grad_norm": 1.200173020362854, | |
| "learning_rate": 0.00026675424413904607, | |
| "loss": 1.0768, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.4479924548639181, | |
| "grad_norm": 1.3085741996765137, | |
| "learning_rate": 0.0002687752627324171, | |
| "loss": 1.0739, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.4513608191862032, | |
| "grad_norm": 1.181569218635559, | |
| "learning_rate": 0.00027079628132578815, | |
| "loss": 1.0685, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.4547291835084883, | |
| "grad_norm": 1.2471662759780884, | |
| "learning_rate": 0.00027281729991915924, | |
| "loss": 1.0668, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.4580975478307734, | |
| "grad_norm": 1.0714460611343384, | |
| "learning_rate": 0.0002748383185125303, | |
| "loss": 1.0687, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.4614659121530585, | |
| "grad_norm": 1.2705806493759155, | |
| "learning_rate": 0.00027685933710590137, | |
| "loss": 1.0602, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.4648342764753436, | |
| "grad_norm": 1.199216365814209, | |
| "learning_rate": 0.0002788803556992724, | |
| "loss": 1.0572, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.4682026407976287, | |
| "grad_norm": 1.1781370639801025, | |
| "learning_rate": 0.00028090137429264344, | |
| "loss": 1.056, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.4715710051199138, | |
| "grad_norm": 1.1787018775939941, | |
| "learning_rate": 0.00028292239288601453, | |
| "loss": 1.0582, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.4749393694421989, | |
| "grad_norm": 1.0408787727355957, | |
| "learning_rate": 0.00028494341147938557, | |
| "loss": 1.0509, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.478307733764484, | |
| "grad_norm": 1.2054550647735596, | |
| "learning_rate": 0.00028696443007275666, | |
| "loss": 1.0496, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.4816760980867691, | |
| "grad_norm": 1.059328317642212, | |
| "learning_rate": 0.0002889854486661277, | |
| "loss": 1.0484, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.4850444624090542, | |
| "grad_norm": 1.0218919515609741, | |
| "learning_rate": 0.0002910064672594988, | |
| "loss": 1.0443, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.4884128267313393, | |
| "grad_norm": 1.569550633430481, | |
| "learning_rate": 0.00029302748585286983, | |
| "loss": 1.0439, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.49178119105362433, | |
| "grad_norm": 1.2660326957702637, | |
| "learning_rate": 0.00029504850444624087, | |
| "loss": 1.0412, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.49514955537590943, | |
| "grad_norm": 0.9795782566070557, | |
| "learning_rate": 0.00029706952303961196, | |
| "loss": 1.0362, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.49851791969819453, | |
| "grad_norm": 1.0511739253997803, | |
| "learning_rate": 0.000299090541632983, | |
| "loss": 1.0379, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.5018862840204796, | |
| "grad_norm": 1.077668309211731, | |
| "learning_rate": 0.00029987649330818285, | |
| "loss": 1.034, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.5052546483427648, | |
| "grad_norm": 0.9709302186965942, | |
| "learning_rate": 0.0002996519356866972, | |
| "loss": 1.0297, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.5086230126650498, | |
| "grad_norm": 1.038855791091919, | |
| "learning_rate": 0.0002994273780652115, | |
| "loss": 1.0295, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.511991376987335, | |
| "grad_norm": 1.1095309257507324, | |
| "learning_rate": 0.00029920282044372586, | |
| "loss": 1.0241, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.51535974130962, | |
| "grad_norm": 1.0058341026306152, | |
| "learning_rate": 0.00029897826282224014, | |
| "loss": 1.0233, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.5187281056319052, | |
| "grad_norm": 1.1029912233352661, | |
| "learning_rate": 0.00029875370520075447, | |
| "loss": 1.0253, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.5220964699541902, | |
| "grad_norm": 1.1280447244644165, | |
| "learning_rate": 0.0002985291475792688, | |
| "loss": 1.02, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.5254648342764754, | |
| "grad_norm": 1.0095000267028809, | |
| "learning_rate": 0.00029830458995778314, | |
| "loss": 1.0185, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.5288331985987604, | |
| "grad_norm": 1.071540117263794, | |
| "learning_rate": 0.0002980800323362975, | |
| "loss": 1.0133, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.5322015629210456, | |
| "grad_norm": 0.9709872007369995, | |
| "learning_rate": 0.0002978554747148118, | |
| "loss": 1.0189, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.5355699272433306, | |
| "grad_norm": 1.1805214881896973, | |
| "learning_rate": 0.0002976309170933261, | |
| "loss": 1.0145, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.5389382915656158, | |
| "grad_norm": 1.1302651166915894, | |
| "learning_rate": 0.0002974063594718405, | |
| "loss": 1.0088, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.5423066558879008, | |
| "grad_norm": 1.280207872390747, | |
| "learning_rate": 0.00029718180185035476, | |
| "loss": 1.0126, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.5456750202101859, | |
| "grad_norm": 1.024566888809204, | |
| "learning_rate": 0.0002969572442288691, | |
| "loss": 1.006, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.549043384532471, | |
| "grad_norm": 1.192209243774414, | |
| "learning_rate": 0.00029673268660738343, | |
| "loss": 1.0076, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.5524117488547561, | |
| "grad_norm": 0.9792165756225586, | |
| "learning_rate": 0.0002965081289858977, | |
| "loss": 1.005, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.5557801131770412, | |
| "grad_norm": 1.6060813665390015, | |
| "learning_rate": 0.0002962835713644121, | |
| "loss": 0.9999, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.5591484774993263, | |
| "grad_norm": 0.9630849957466125, | |
| "learning_rate": 0.0002960590137429264, | |
| "loss": 1.0024, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.5625168418216114, | |
| "grad_norm": 0.9396387934684753, | |
| "learning_rate": 0.0002958344561214407, | |
| "loss": 1.0004, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.5658852061438965, | |
| "grad_norm": 0.9031047821044922, | |
| "learning_rate": 0.00029560989849995506, | |
| "loss": 0.9984, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.5692535704661816, | |
| "grad_norm": 1.0625028610229492, | |
| "learning_rate": 0.0002953853408784694, | |
| "loss": 0.9932, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.5726219347884667, | |
| "grad_norm": 0.9275569319725037, | |
| "learning_rate": 0.0002951607832569837, | |
| "loss": 0.9925, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.5759902991107518, | |
| "grad_norm": 0.8847247362136841, | |
| "learning_rate": 0.00029493622563549806, | |
| "loss": 0.9909, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.5793586634330369, | |
| "grad_norm": 0.9581294059753418, | |
| "learning_rate": 0.0002947116680140124, | |
| "loss": 0.9923, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.582727027755322, | |
| "grad_norm": 1.003164529800415, | |
| "learning_rate": 0.0002944871103925267, | |
| "loss": 0.9914, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.5860953920776071, | |
| "grad_norm": 1.010026454925537, | |
| "learning_rate": 0.000294262552771041, | |
| "loss": 0.9854, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.5894637563998922, | |
| "grad_norm": 0.9266247153282166, | |
| "learning_rate": 0.00029403799514955535, | |
| "loss": 0.9879, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.5928321207221773, | |
| "grad_norm": 0.9909249544143677, | |
| "learning_rate": 0.0002938134375280697, | |
| "loss": 0.9876, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.5962004850444624, | |
| "grad_norm": 0.8353651165962219, | |
| "learning_rate": 0.000293588879906584, | |
| "loss": 0.9846, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.5995688493667475, | |
| "grad_norm": 0.9142294526100159, | |
| "learning_rate": 0.00029336432228509835, | |
| "loss": 0.9877, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.6029372136890326, | |
| "grad_norm": 0.9139926433563232, | |
| "learning_rate": 0.00029313976466361263, | |
| "loss": 0.9831, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.6063055780113177, | |
| "grad_norm": 0.8871977925300598, | |
| "learning_rate": 0.000292915207042127, | |
| "loss": 0.9817, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.6096739423336028, | |
| "grad_norm": 0.9932221174240112, | |
| "learning_rate": 0.0002926906494206413, | |
| "loss": 0.979, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.6130423066558879, | |
| "grad_norm": 0.9240766167640686, | |
| "learning_rate": 0.00029246609179915564, | |
| "loss": 0.9774, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.616410670978173, | |
| "grad_norm": 0.932101845741272, | |
| "learning_rate": 0.00029224153417767, | |
| "loss": 0.9797, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.6197790353004581, | |
| "grad_norm": 0.9871794581413269, | |
| "learning_rate": 0.00029201697655618426, | |
| "loss": 0.9809, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.6231473996227432, | |
| "grad_norm": 0.8771729469299316, | |
| "learning_rate": 0.00029179241893469865, | |
| "loss": 0.9761, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.6265157639450283, | |
| "grad_norm": 1.103968858718872, | |
| "learning_rate": 0.0002915678613132129, | |
| "loss": 0.9779, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.6298841282673134, | |
| "grad_norm": 1.0848268270492554, | |
| "learning_rate": 0.00029134330369172726, | |
| "loss": 0.9722, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.6332524925895985, | |
| "grad_norm": 0.8907010555267334, | |
| "learning_rate": 0.0002911187460702416, | |
| "loss": 0.9709, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.6366208569118836, | |
| "grad_norm": 0.9081377983093262, | |
| "learning_rate": 0.00029089418844875593, | |
| "loss": 0.9681, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.6399892212341687, | |
| "grad_norm": 0.873905599117279, | |
| "learning_rate": 0.00029066963082727027, | |
| "loss": 0.9735, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.6433575855564538, | |
| "grad_norm": 0.9111950397491455, | |
| "learning_rate": 0.0002904450732057846, | |
| "loss": 0.97, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.6467259498787389, | |
| "grad_norm": 0.9769060611724854, | |
| "learning_rate": 0.0002902205155842989, | |
| "loss": 0.9688, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.650094314201024, | |
| "grad_norm": 1.0822559595108032, | |
| "learning_rate": 0.0002899959579628132, | |
| "loss": 0.968, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.6534626785233091, | |
| "grad_norm": 0.8573871850967407, | |
| "learning_rate": 0.00028977140034132755, | |
| "loss": 0.967, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.6568310428455941, | |
| "grad_norm": 0.989267885684967, | |
| "learning_rate": 0.0002895468427198419, | |
| "loss": 0.9652, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.6601994071678793, | |
| "grad_norm": 0.9819543361663818, | |
| "learning_rate": 0.0002893222850983562, | |
| "loss": 0.9597, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.6635677714901643, | |
| "grad_norm": 0.9204864501953125, | |
| "learning_rate": 0.00028909772747687056, | |
| "loss": 0.9641, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.6669361358124495, | |
| "grad_norm": 1.1161561012268066, | |
| "learning_rate": 0.0002888731698553849, | |
| "loss": 0.9619, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.6703045001347345, | |
| "grad_norm": 0.8925914764404297, | |
| "learning_rate": 0.0002886486122338992, | |
| "loss": 0.9599, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.6736728644570197, | |
| "grad_norm": 0.9228368401527405, | |
| "learning_rate": 0.0002884240546124135, | |
| "loss": 0.9603, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.6770412287793047, | |
| "grad_norm": 0.8357170224189758, | |
| "learning_rate": 0.00028819949699092785, | |
| "loss": 0.9562, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.6804095931015899, | |
| "grad_norm": 0.9358044266700745, | |
| "learning_rate": 0.0002879749393694422, | |
| "loss": 0.963, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.6837779574238749, | |
| "grad_norm": 0.83344966173172, | |
| "learning_rate": 0.0002877503817479565, | |
| "loss": 0.9555, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.6871463217461601, | |
| "grad_norm": 0.8138599395751953, | |
| "learning_rate": 0.0002875258241264708, | |
| "loss": 0.9572, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.6905146860684451, | |
| "grad_norm": 0.886358380317688, | |
| "learning_rate": 0.0002873012665049852, | |
| "loss": 0.9525, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.6938830503907303, | |
| "grad_norm": 0.9100881814956665, | |
| "learning_rate": 0.00028707670888349947, | |
| "loss": 0.9545, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.6972514147130153, | |
| "grad_norm": 0.8902551531791687, | |
| "learning_rate": 0.0002868521512620138, | |
| "loss": 0.9542, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.7006197790353005, | |
| "grad_norm": 1.24783456325531, | |
| "learning_rate": 0.00028662759364052814, | |
| "loss": 0.9536, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.7039881433575855, | |
| "grad_norm": 0.831633448600769, | |
| "learning_rate": 0.0002864030360190425, | |
| "loss": 0.9526, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.7073565076798707, | |
| "grad_norm": 0.8300578594207764, | |
| "learning_rate": 0.0002861784783975568, | |
| "loss": 0.9478, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.7107248720021557, | |
| "grad_norm": 0.9245336055755615, | |
| "learning_rate": 0.0002859539207760711, | |
| "loss": 0.9531, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.7140932363244409, | |
| "grad_norm": 0.8717476725578308, | |
| "learning_rate": 0.0002857293631545854, | |
| "loss": 0.9487, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.7174616006467259, | |
| "grad_norm": 0.8705008625984192, | |
| "learning_rate": 0.00028550480553309976, | |
| "loss": 0.9486, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.7208299649690111, | |
| "grad_norm": 0.8324209451675415, | |
| "learning_rate": 0.0002852802479116141, | |
| "loss": 0.9506, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.7241983292912961, | |
| "grad_norm": 0.9107707738876343, | |
| "learning_rate": 0.00028505569029012843, | |
| "loss": 0.9465, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.7275666936135813, | |
| "grad_norm": 0.8204140067100525, | |
| "learning_rate": 0.00028483113266864277, | |
| "loss": 0.9462, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.7309350579358663, | |
| "grad_norm": 0.8158605098724365, | |
| "learning_rate": 0.00028460657504715705, | |
| "loss": 0.9483, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.7343034222581515, | |
| "grad_norm": 0.8872929811477661, | |
| "learning_rate": 0.00028438201742567144, | |
| "loss": 0.9461, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.7376717865804365, | |
| "grad_norm": 0.8722573518753052, | |
| "learning_rate": 0.0002841574598041857, | |
| "loss": 0.943, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.7410401509027217, | |
| "grad_norm": 0.8522630929946899, | |
| "learning_rate": 0.00028393290218270005, | |
| "loss": 0.9449, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.7444085152250067, | |
| "grad_norm": 0.850837767124176, | |
| "learning_rate": 0.0002837083445612144, | |
| "loss": 0.9441, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.7477768795472919, | |
| "grad_norm": 0.7917930483818054, | |
| "learning_rate": 0.0002834837869397287, | |
| "loss": 0.9442, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.7511452438695769, | |
| "grad_norm": 0.9168843030929565, | |
| "learning_rate": 0.00028325922931824306, | |
| "loss": 0.9427, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.7545136081918621, | |
| "grad_norm": 0.9117637276649475, | |
| "learning_rate": 0.00028303467169675734, | |
| "loss": 0.9436, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.7578819725141471, | |
| "grad_norm": 0.8345003724098206, | |
| "learning_rate": 0.0002828101140752717, | |
| "loss": 0.9394, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.7612503368364322, | |
| "grad_norm": 0.8290796875953674, | |
| "learning_rate": 0.000282585556453786, | |
| "loss": 0.938, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.7646187011587173, | |
| "grad_norm": 0.7990386486053467, | |
| "learning_rate": 0.00028236099883230034, | |
| "loss": 0.9393, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.7679870654810024, | |
| "grad_norm": 0.7871207594871521, | |
| "learning_rate": 0.0002821364412108147, | |
| "loss": 0.9376, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.7713554298032875, | |
| "grad_norm": 0.7800641059875488, | |
| "learning_rate": 0.000281911883589329, | |
| "loss": 0.9366, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.7747237941255726, | |
| "grad_norm": 0.8521484136581421, | |
| "learning_rate": 0.00028168732596784335, | |
| "loss": 0.9377, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.7780921584478577, | |
| "grad_norm": 0.8726188540458679, | |
| "learning_rate": 0.00028146276834635763, | |
| "loss": 0.9344, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.7814605227701428, | |
| "grad_norm": 0.9499660730361938, | |
| "learning_rate": 0.00028123821072487197, | |
| "loss": 0.9357, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.7848288870924279, | |
| "grad_norm": 0.8431583642959595, | |
| "learning_rate": 0.0002810136531033863, | |
| "loss": 0.9373, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.788197251414713, | |
| "grad_norm": 0.8140375018119812, | |
| "learning_rate": 0.00028078909548190064, | |
| "loss": 0.9352, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.7915656157369981, | |
| "grad_norm": 0.8821849226951599, | |
| "learning_rate": 0.00028056453786041497, | |
| "loss": 0.9346, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.7949339800592832, | |
| "grad_norm": 1.0091123580932617, | |
| "learning_rate": 0.0002803399802389293, | |
| "loss": 0.9328, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.7983023443815683, | |
| "grad_norm": 1.0306652784347534, | |
| "learning_rate": 0.0002801154226174436, | |
| "loss": 0.9332, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.8016707087038534, | |
| "grad_norm": 0.8444597721099854, | |
| "learning_rate": 0.000279890864995958, | |
| "loss": 0.9372, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.8050390730261385, | |
| "grad_norm": 0.8706274628639221, | |
| "learning_rate": 0.00027966630737447226, | |
| "loss": 0.9351, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.8084074373484236, | |
| "grad_norm": 0.8325883150100708, | |
| "learning_rate": 0.0002794417497529866, | |
| "loss": 0.9296, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.8117758016707087, | |
| "grad_norm": 0.7604151964187622, | |
| "learning_rate": 0.00027921719213150093, | |
| "loss": 0.9292, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.8151441659929938, | |
| "grad_norm": 0.916677713394165, | |
| "learning_rate": 0.0002789926345100152, | |
| "loss": 0.9274, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.8185125303152789, | |
| "grad_norm": 0.8420447111129761, | |
| "learning_rate": 0.0002787680768885296, | |
| "loss": 0.9293, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 0.821880894637564, | |
| "grad_norm": 0.8805976510047913, | |
| "learning_rate": 0.0002785435192670439, | |
| "loss": 0.9243, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.8252492589598491, | |
| "grad_norm": 0.8013381361961365, | |
| "learning_rate": 0.0002783189616455582, | |
| "loss": 0.9296, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.8286176232821342, | |
| "grad_norm": 0.8348533511161804, | |
| "learning_rate": 0.00027809440402407255, | |
| "loss": 0.9256, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.8319859876044193, | |
| "grad_norm": 0.7982239723205566, | |
| "learning_rate": 0.0002778698464025869, | |
| "loss": 0.9273, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 0.8353543519267044, | |
| "grad_norm": 0.9021079540252686, | |
| "learning_rate": 0.0002776452887811012, | |
| "loss": 0.9238, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.8387227162489895, | |
| "grad_norm": 0.7782942652702332, | |
| "learning_rate": 0.0002774207311596155, | |
| "loss": 0.9248, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 0.8420910805712746, | |
| "grad_norm": 0.8153879046440125, | |
| "learning_rate": 0.00027719617353812984, | |
| "loss": 0.9272, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.8454594448935597, | |
| "grad_norm": 0.9402956366539001, | |
| "learning_rate": 0.00027697161591664417, | |
| "loss": 0.9243, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 0.8488278092158448, | |
| "grad_norm": 0.7734837532043457, | |
| "learning_rate": 0.0002767470582951585, | |
| "loss": 0.9262, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.8521961735381299, | |
| "grad_norm": 0.7865928411483765, | |
| "learning_rate": 0.00027652250067367284, | |
| "loss": 0.9247, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 0.855564537860415, | |
| "grad_norm": 0.8157060742378235, | |
| "learning_rate": 0.0002762979430521872, | |
| "loss": 0.9235, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 0.8589329021827001, | |
| "grad_norm": 0.9254295825958252, | |
| "learning_rate": 0.0002760733854307015, | |
| "loss": 0.9201, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.8623012665049852, | |
| "grad_norm": 0.7986881136894226, | |
| "learning_rate": 0.00027584882780921585, | |
| "loss": 0.9215, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.8656696308272703, | |
| "grad_norm": 1.0841712951660156, | |
| "learning_rate": 0.00027562427018773013, | |
| "loss": 0.9234, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 0.8690379951495554, | |
| "grad_norm": 0.8564463257789612, | |
| "learning_rate": 0.00027539971256624446, | |
| "loss": 0.9238, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 0.8724063594718404, | |
| "grad_norm": 0.7546072602272034, | |
| "learning_rate": 0.0002751751549447588, | |
| "loss": 0.9195, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 0.8757747237941256, | |
| "grad_norm": 1.1822130680084229, | |
| "learning_rate": 0.00027495059732327313, | |
| "loss": 0.9204, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.8791430881164106, | |
| "grad_norm": 0.7953358292579651, | |
| "learning_rate": 0.00027472603970178747, | |
| "loss": 0.9234, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 0.8825114524386958, | |
| "grad_norm": 0.7510402798652649, | |
| "learning_rate": 0.00027450148208030175, | |
| "loss": 0.9193, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 0.8858798167609808, | |
| "grad_norm": 1.1970155239105225, | |
| "learning_rate": 0.00027427692445881614, | |
| "loss": 0.9165, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 0.889248181083266, | |
| "grad_norm": 1.0077903270721436, | |
| "learning_rate": 0.0002740523668373304, | |
| "loss": 0.9136, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.892616545405551, | |
| "grad_norm": 0.7909373044967651, | |
| "learning_rate": 0.00027382780921584476, | |
| "loss": 0.9162, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.8959849097278362, | |
| "grad_norm": 0.7484354376792908, | |
| "learning_rate": 0.0002736032515943591, | |
| "loss": 0.9151, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 0.8993532740501212, | |
| "grad_norm": 0.7733712792396545, | |
| "learning_rate": 0.0002733786939728734, | |
| "loss": 0.9171, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 0.9027216383724064, | |
| "grad_norm": 1.0612813234329224, | |
| "learning_rate": 0.00027315413635138776, | |
| "loss": 0.9202, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.9060900026946914, | |
| "grad_norm": 0.8826086521148682, | |
| "learning_rate": 0.00027292957872990204, | |
| "loss": 0.9184, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 0.9094583670169766, | |
| "grad_norm": 0.7911275029182434, | |
| "learning_rate": 0.0002727050211084164, | |
| "loss": 0.9144, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.9128267313392616, | |
| "grad_norm": 0.7702584862709045, | |
| "learning_rate": 0.0002724804634869307, | |
| "loss": 0.9086, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 0.9161950956615468, | |
| "grad_norm": 0.7692966461181641, | |
| "learning_rate": 0.00027225590586544505, | |
| "loss": 0.9127, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.9195634599838318, | |
| "grad_norm": 0.7272462844848633, | |
| "learning_rate": 0.0002720313482439594, | |
| "loss": 0.9129, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 0.922931824306117, | |
| "grad_norm": 0.7643866539001465, | |
| "learning_rate": 0.0002718067906224737, | |
| "loss": 0.9127, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 0.926300188628402, | |
| "grad_norm": 0.8528723120689392, | |
| "learning_rate": 0.000271582233000988, | |
| "loss": 0.9118, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.9296685529506872, | |
| "grad_norm": 0.9294866323471069, | |
| "learning_rate": 0.0002713576753795024, | |
| "loss": 0.913, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.9330369172729722, | |
| "grad_norm": 0.7819476127624512, | |
| "learning_rate": 0.00027113311775801667, | |
| "loss": 0.9094, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 0.9364052815952574, | |
| "grad_norm": 0.9195557236671448, | |
| "learning_rate": 0.000270908560136531, | |
| "loss": 0.9102, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 0.9397736459175424, | |
| "grad_norm": 0.8723948001861572, | |
| "learning_rate": 0.00027068400251504534, | |
| "loss": 0.9099, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 0.9431420102398276, | |
| "grad_norm": 0.7643413543701172, | |
| "learning_rate": 0.0002704594448935597, | |
| "loss": 0.9101, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.9465103745621126, | |
| "grad_norm": 0.8065772652626038, | |
| "learning_rate": 0.000270234887272074, | |
| "loss": 0.9099, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 0.9498787388843978, | |
| "grad_norm": 0.7567630410194397, | |
| "learning_rate": 0.0002700103296505883, | |
| "loss": 0.9092, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 0.9532471032066828, | |
| "grad_norm": 0.8164196014404297, | |
| "learning_rate": 0.0002697857720291026, | |
| "loss": 0.9115, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 0.956615467528968, | |
| "grad_norm": 0.7787773013114929, | |
| "learning_rate": 0.00026956121440761696, | |
| "loss": 0.9062, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 0.959983831851253, | |
| "grad_norm": 0.724734902381897, | |
| "learning_rate": 0.0002693366567861313, | |
| "loss": 0.9078, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.9633521961735382, | |
| "grad_norm": 0.813438355922699, | |
| "learning_rate": 0.00026911209916464563, | |
| "loss": 0.9075, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 0.9667205604958232, | |
| "grad_norm": 0.9264329671859741, | |
| "learning_rate": 0.00026888754154315997, | |
| "loss": 0.9052, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 0.9700889248181084, | |
| "grad_norm": 0.8521473407745361, | |
| "learning_rate": 0.0002686629839216743, | |
| "loss": 0.9034, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 0.9734572891403934, | |
| "grad_norm": 0.7979128360748291, | |
| "learning_rate": 0.0002684384263001886, | |
| "loss": 0.9052, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 0.9768256534626786, | |
| "grad_norm": 0.7806681394577026, | |
| "learning_rate": 0.0002682138686787029, | |
| "loss": 0.9039, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.9801940177849636, | |
| "grad_norm": 0.7266373038291931, | |
| "learning_rate": 0.00026798931105721725, | |
| "loss": 0.9072, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 0.9835623821072487, | |
| "grad_norm": 0.8608720302581787, | |
| "learning_rate": 0.0002677647534357316, | |
| "loss": 0.9062, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 0.9869307464295338, | |
| "grad_norm": 0.7421156764030457, | |
| "learning_rate": 0.0002675401958142459, | |
| "loss": 0.9059, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 0.9902991107518189, | |
| "grad_norm": 0.7319119572639465, | |
| "learning_rate": 0.00026731563819276026, | |
| "loss": 0.9074, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 0.993667475074104, | |
| "grad_norm": 1.1928077936172485, | |
| "learning_rate": 0.00026709108057127454, | |
| "loss": 0.9024, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.9970358393963891, | |
| "grad_norm": 0.7490862011909485, | |
| "learning_rate": 0.00026686652294978893, | |
| "loss": 0.904, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.840101957321167, | |
| "eval_runtime": 1.8082, | |
| "eval_samples_per_second": 2765.256, | |
| "eval_steps_per_second": 43.691, | |
| "step": 29688 | |
| }, | |
| { | |
| "epoch": 1.0004042037186742, | |
| "grad_norm": 0.7771435379981995, | |
| "learning_rate": 0.0002666419653283032, | |
| "loss": 0.9014, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 1.0037725680409593, | |
| "grad_norm": 0.7980936765670776, | |
| "learning_rate": 0.00026641740770681755, | |
| "loss": 0.9045, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 1.0071409323632443, | |
| "grad_norm": 0.7477487921714783, | |
| "learning_rate": 0.0002661928500853319, | |
| "loss": 0.904, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 1.0105092966855296, | |
| "grad_norm": 0.7866721153259277, | |
| "learning_rate": 0.00026596829246384616, | |
| "loss": 0.905, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 1.0138776610078146, | |
| "grad_norm": 0.8210276365280151, | |
| "learning_rate": 0.00026574373484236055, | |
| "loss": 0.9023, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 1.0172460253300997, | |
| "grad_norm": 0.683873176574707, | |
| "learning_rate": 0.00026551917722087483, | |
| "loss": 0.899, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 1.0206143896523847, | |
| "grad_norm": 0.772179901599884, | |
| "learning_rate": 0.00026529461959938917, | |
| "loss": 0.8972, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 1.02398275397467, | |
| "grad_norm": 0.9590583443641663, | |
| "learning_rate": 0.0002650700619779035, | |
| "loss": 0.9048, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 1.027351118296955, | |
| "grad_norm": 0.8030016422271729, | |
| "learning_rate": 0.00026484550435641784, | |
| "loss": 0.9007, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 1.03071948261924, | |
| "grad_norm": 0.7494839429855347, | |
| "learning_rate": 0.0002646209467349322, | |
| "loss": 0.9002, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 1.0340878469415251, | |
| "grad_norm": 0.7339987754821777, | |
| "learning_rate": 0.00026439638911344645, | |
| "loss": 0.8995, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 1.0374562112638104, | |
| "grad_norm": 0.7874321937561035, | |
| "learning_rate": 0.0002641718314919608, | |
| "loss": 0.8982, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 1.0408245755860954, | |
| "grad_norm": 0.6752808690071106, | |
| "learning_rate": 0.0002639472738704751, | |
| "loss": 0.8986, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 1.0441929399083805, | |
| "grad_norm": 0.758162796497345, | |
| "learning_rate": 0.00026372271624898946, | |
| "loss": 0.8963, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 1.0475613042306655, | |
| "grad_norm": 1.2287030220031738, | |
| "learning_rate": 0.0002634981586275038, | |
| "loss": 0.8999, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 1.0509296685529508, | |
| "grad_norm": 0.7714762687683105, | |
| "learning_rate": 0.00026327360100601813, | |
| "loss": 0.8952, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 1.0542980328752358, | |
| "grad_norm": 0.7721061706542969, | |
| "learning_rate": 0.00026304904338453247, | |
| "loss": 0.895, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 1.0576663971975209, | |
| "grad_norm": 0.7775672078132629, | |
| "learning_rate": 0.0002628244857630468, | |
| "loss": 0.8973, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 1.061034761519806, | |
| "grad_norm": 0.8213030695915222, | |
| "learning_rate": 0.0002625999281415611, | |
| "loss": 0.8958, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 1.0644031258420912, | |
| "grad_norm": 1.3196107149124146, | |
| "learning_rate": 0.0002623753705200754, | |
| "loss": 0.8953, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 1.0677714901643762, | |
| "grad_norm": 0.7078897953033447, | |
| "learning_rate": 0.00026215081289858975, | |
| "loss": 0.8981, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 1.0711398544866613, | |
| "grad_norm": 0.7529389262199402, | |
| "learning_rate": 0.0002619262552771041, | |
| "loss": 0.8963, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 1.0745082188089463, | |
| "grad_norm": 0.7418708801269531, | |
| "learning_rate": 0.0002617016976556184, | |
| "loss": 0.8931, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 1.0778765831312316, | |
| "grad_norm": 0.6806597113609314, | |
| "learning_rate": 0.0002614771400341327, | |
| "loss": 0.8938, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.0812449474535166, | |
| "grad_norm": 0.8359425067901611, | |
| "learning_rate": 0.0002612525824126471, | |
| "loss": 0.8909, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 1.0846133117758017, | |
| "grad_norm": 0.7026517391204834, | |
| "learning_rate": 0.0002610280247911614, | |
| "loss": 0.8932, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 1.0879816760980867, | |
| "grad_norm": 0.9777092933654785, | |
| "learning_rate": 0.0002608034671696757, | |
| "loss": 0.8936, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 1.0913500404203718, | |
| "grad_norm": 0.7617833018302917, | |
| "learning_rate": 0.00026057890954819004, | |
| "loss": 0.8957, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 1.094718404742657, | |
| "grad_norm": 0.7257563471794128, | |
| "learning_rate": 0.0002603543519267044, | |
| "loss": 0.8947, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 1.098086769064942, | |
| "grad_norm": 0.6984378695487976, | |
| "learning_rate": 0.0002601297943052187, | |
| "loss": 0.8933, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 1.1014551333872271, | |
| "grad_norm": 0.7216737270355225, | |
| "learning_rate": 0.000259905236683733, | |
| "loss": 0.8912, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 1.1048234977095122, | |
| "grad_norm": 0.8986912965774536, | |
| "learning_rate": 0.00025968067906224733, | |
| "loss": 0.8924, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 1.1081918620317974, | |
| "grad_norm": 0.7360677719116211, | |
| "learning_rate": 0.00025945612144076167, | |
| "loss": 0.8912, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 1.1115602263540825, | |
| "grad_norm": 0.7221621870994568, | |
| "learning_rate": 0.000259231563819276, | |
| "loss": 0.8903, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 1.1149285906763675, | |
| "grad_norm": 0.8666985034942627, | |
| "learning_rate": 0.00025900700619779034, | |
| "loss": 0.8869, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 1.1182969549986526, | |
| "grad_norm": 0.7224547266960144, | |
| "learning_rate": 0.00025878244857630467, | |
| "loss": 0.8909, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 1.1216653193209378, | |
| "grad_norm": 1.036876916885376, | |
| "learning_rate": 0.000258557890954819, | |
| "loss": 0.8931, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 1.1250336836432229, | |
| "grad_norm": 0.795993447303772, | |
| "learning_rate": 0.00025833333333333334, | |
| "loss": 0.8853, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 1.128402047965508, | |
| "grad_norm": 0.7044444680213928, | |
| "learning_rate": 0.0002581087757118476, | |
| "loss": 0.8913, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 1.131770412287793, | |
| "grad_norm": 0.7511852979660034, | |
| "learning_rate": 0.00025788421809036196, | |
| "loss": 0.8886, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 1.1351387766100782, | |
| "grad_norm": 0.8065422177314758, | |
| "learning_rate": 0.0002576596604688763, | |
| "loss": 0.8882, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 1.1385071409323633, | |
| "grad_norm": 0.7372477650642395, | |
| "learning_rate": 0.00025743510284739063, | |
| "loss": 0.8886, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 1.1418755052546483, | |
| "grad_norm": 0.7889522314071655, | |
| "learning_rate": 0.00025721054522590496, | |
| "loss": 0.8886, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 1.1452438695769334, | |
| "grad_norm": 0.6768883466720581, | |
| "learning_rate": 0.00025698598760441925, | |
| "loss": 0.8895, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 1.1486122338992186, | |
| "grad_norm": 0.8599404096603394, | |
| "learning_rate": 0.00025676142998293363, | |
| "loss": 0.8904, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 1.1519805982215037, | |
| "grad_norm": 0.716754138469696, | |
| "learning_rate": 0.0002565368723614479, | |
| "loss": 0.8898, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 1.1553489625437887, | |
| "grad_norm": 0.7366636991500854, | |
| "learning_rate": 0.00025631231473996225, | |
| "loss": 0.8872, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 1.1587173268660738, | |
| "grad_norm": 0.7212886810302734, | |
| "learning_rate": 0.0002560877571184766, | |
| "loss": 0.8897, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 1.162085691188359, | |
| "grad_norm": 0.768371045589447, | |
| "learning_rate": 0.00025586319949699087, | |
| "loss": 0.8871, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 1.165454055510644, | |
| "grad_norm": 0.7179331183433533, | |
| "learning_rate": 0.00025563864187550526, | |
| "loss": 0.8864, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 1.1688224198329291, | |
| "grad_norm": 0.7503130435943604, | |
| "learning_rate": 0.00025541408425401954, | |
| "loss": 0.8857, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 1.1721907841552142, | |
| "grad_norm": 0.7497594952583313, | |
| "learning_rate": 0.00025518952663253387, | |
| "loss": 0.8858, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 1.1755591484774994, | |
| "grad_norm": 0.6965382695198059, | |
| "learning_rate": 0.0002549649690110482, | |
| "loss": 0.8867, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 1.1789275127997845, | |
| "grad_norm": 0.7309035658836365, | |
| "learning_rate": 0.00025474041138956254, | |
| "loss": 0.8865, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 1.1822958771220695, | |
| "grad_norm": 0.692935049533844, | |
| "learning_rate": 0.0002545158537680769, | |
| "loss": 0.8839, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 1.1856642414443546, | |
| "grad_norm": 0.7206865549087524, | |
| "learning_rate": 0.0002542912961465912, | |
| "loss": 0.8833, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 1.1890326057666396, | |
| "grad_norm": 0.718701958656311, | |
| "learning_rate": 0.0002540667385251055, | |
| "loss": 0.8831, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 1.1924009700889249, | |
| "grad_norm": 0.797205924987793, | |
| "learning_rate": 0.0002538421809036199, | |
| "loss": 0.8851, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 1.19576933441121, | |
| "grad_norm": 0.6977315545082092, | |
| "learning_rate": 0.00025361762328213416, | |
| "loss": 0.8834, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 1.199137698733495, | |
| "grad_norm": 0.768139660358429, | |
| "learning_rate": 0.0002533930656606485, | |
| "loss": 0.8822, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 1.2025060630557802, | |
| "grad_norm": 0.7986385226249695, | |
| "learning_rate": 0.00025316850803916283, | |
| "loss": 0.8816, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 1.2058744273780653, | |
| "grad_norm": 0.7320616841316223, | |
| "learning_rate": 0.00025294395041767717, | |
| "loss": 0.8827, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 1.2092427917003503, | |
| "grad_norm": 0.7750969529151917, | |
| "learning_rate": 0.0002527193927961915, | |
| "loss": 0.8851, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 1.2126111560226354, | |
| "grad_norm": 0.7603546380996704, | |
| "learning_rate": 0.0002524948351747058, | |
| "loss": 0.8834, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.2159795203449204, | |
| "grad_norm": 0.6906490325927734, | |
| "learning_rate": 0.0002522702775532201, | |
| "loss": 0.8835, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 1.2193478846672057, | |
| "grad_norm": 0.7611385583877563, | |
| "learning_rate": 0.00025204571993173446, | |
| "loss": 0.8832, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 1.2227162489894907, | |
| "grad_norm": 0.732187807559967, | |
| "learning_rate": 0.0002518211623102488, | |
| "loss": 0.8813, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 1.2260846133117758, | |
| "grad_norm": 0.7710540890693665, | |
| "learning_rate": 0.0002515966046887631, | |
| "loss": 0.8835, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 1.2294529776340608, | |
| "grad_norm": 0.7388759255409241, | |
| "learning_rate": 0.0002513720470672774, | |
| "loss": 0.8846, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 1.232821341956346, | |
| "grad_norm": 0.7000882029533386, | |
| "learning_rate": 0.0002511474894457918, | |
| "loss": 0.8818, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 1.2361897062786311, | |
| "grad_norm": 0.7058309316635132, | |
| "learning_rate": 0.0002509229318243061, | |
| "loss": 0.8827, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 1.2395580706009162, | |
| "grad_norm": 0.80439692735672, | |
| "learning_rate": 0.0002506983742028204, | |
| "loss": 0.8793, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 1.2429264349232012, | |
| "grad_norm": 0.7056805491447449, | |
| "learning_rate": 0.00025047381658133475, | |
| "loss": 0.8763, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 1.2462947992454865, | |
| "grad_norm": 0.7477532625198364, | |
| "learning_rate": 0.0002502492589598491, | |
| "loss": 0.8803, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 1.2496631635677715, | |
| "grad_norm": 0.8339030742645264, | |
| "learning_rate": 0.0002500247013383634, | |
| "loss": 0.8782, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 1.2530315278900566, | |
| "grad_norm": 0.7052040100097656, | |
| "learning_rate": 0.00024980014371687775, | |
| "loss": 0.8834, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 1.2563998922123416, | |
| "grad_norm": 0.7932031750679016, | |
| "learning_rate": 0.00024957558609539204, | |
| "loss": 0.882, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 1.2597682565346267, | |
| "grad_norm": 0.7311996817588806, | |
| "learning_rate": 0.00024935102847390637, | |
| "loss": 0.8802, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 1.263136620856912, | |
| "grad_norm": 0.7274471521377563, | |
| "learning_rate": 0.0002491264708524207, | |
| "loss": 0.8801, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 1.266504985179197, | |
| "grad_norm": 0.6959684491157532, | |
| "learning_rate": 0.00024890191323093504, | |
| "loss": 0.8742, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 1.269873349501482, | |
| "grad_norm": 0.7355061173439026, | |
| "learning_rate": 0.0002486773556094494, | |
| "loss": 0.8763, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 1.2732417138237673, | |
| "grad_norm": 0.6900234818458557, | |
| "learning_rate": 0.00024845279798796366, | |
| "loss": 0.879, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 1.2766100781460523, | |
| "grad_norm": 0.7259871363639832, | |
| "learning_rate": 0.00024822824036647805, | |
| "loss": 0.8769, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 1.2799784424683374, | |
| "grad_norm": 0.7086994051933289, | |
| "learning_rate": 0.00024800368274499233, | |
| "loss": 0.877, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 1.2833468067906224, | |
| "grad_norm": 0.7197995185852051, | |
| "learning_rate": 0.00024777912512350666, | |
| "loss": 0.8757, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 1.2867151711129075, | |
| "grad_norm": 0.7302994728088379, | |
| "learning_rate": 0.000247554567502021, | |
| "loss": 0.8801, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 1.2900835354351927, | |
| "grad_norm": 0.6908255815505981, | |
| "learning_rate": 0.00024733000988053533, | |
| "loss": 0.8766, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 1.2934518997574778, | |
| "grad_norm": 0.8338357210159302, | |
| "learning_rate": 0.00024710545225904967, | |
| "loss": 0.874, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 1.2968202640797628, | |
| "grad_norm": 0.7342631220817566, | |
| "learning_rate": 0.00024688089463756395, | |
| "loss": 0.8786, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 1.300188628402048, | |
| "grad_norm": 0.7488550543785095, | |
| "learning_rate": 0.0002466563370160783, | |
| "loss": 0.8813, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 1.3035569927243331, | |
| "grad_norm": 0.7316462993621826, | |
| "learning_rate": 0.0002464317793945926, | |
| "loss": 0.8755, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 1.3069253570466182, | |
| "grad_norm": 0.7482060790061951, | |
| "learning_rate": 0.00024620722177310695, | |
| "loss": 0.8737, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 1.3102937213689032, | |
| "grad_norm": 0.7243971228599548, | |
| "learning_rate": 0.0002459826641516213, | |
| "loss": 0.8759, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 1.3136620856911883, | |
| "grad_norm": 0.8776415586471558, | |
| "learning_rate": 0.0002457581065301356, | |
| "loss": 0.8757, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 1.3170304500134735, | |
| "grad_norm": 0.6730260848999023, | |
| "learning_rate": 0.00024553354890864996, | |
| "loss": 0.873, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 1.3203988143357586, | |
| "grad_norm": 0.6937080025672913, | |
| "learning_rate": 0.0002453089912871643, | |
| "loss": 0.8737, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 1.3237671786580436, | |
| "grad_norm": 0.7004138827323914, | |
| "learning_rate": 0.0002450844336656786, | |
| "loss": 0.8771, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 1.3271355429803289, | |
| "grad_norm": 0.6719589829444885, | |
| "learning_rate": 0.0002448598760441929, | |
| "loss": 0.8758, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 1.330503907302614, | |
| "grad_norm": 0.738394021987915, | |
| "learning_rate": 0.00024463531842270725, | |
| "loss": 0.8743, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 1.333872271624899, | |
| "grad_norm": 0.7103344202041626, | |
| "learning_rate": 0.0002444107608012216, | |
| "loss": 0.8712, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 1.337240635947184, | |
| "grad_norm": 0.69880610704422, | |
| "learning_rate": 0.0002441862031797359, | |
| "loss": 0.8737, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 1.340609000269469, | |
| "grad_norm": 0.8922190070152283, | |
| "learning_rate": 0.00024396164555825023, | |
| "loss": 0.8718, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 1.3439773645917543, | |
| "grad_norm": 0.7306973338127136, | |
| "learning_rate": 0.00024373708793676456, | |
| "loss": 0.8712, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 1.3473457289140394, | |
| "grad_norm": 0.7311689257621765, | |
| "learning_rate": 0.00024351253031527887, | |
| "loss": 0.875, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 1.3507140932363244, | |
| "grad_norm": 0.7093273997306824, | |
| "learning_rate": 0.00024328797269379323, | |
| "loss": 0.8711, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 1.3540824575586097, | |
| "grad_norm": 0.6579886078834534, | |
| "learning_rate": 0.00024306341507230754, | |
| "loss": 0.8773, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 1.3574508218808947, | |
| "grad_norm": 0.6832658648490906, | |
| "learning_rate": 0.00024283885745082185, | |
| "loss": 0.8727, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 1.3608191862031798, | |
| "grad_norm": 0.7372367978096008, | |
| "learning_rate": 0.00024261429982933618, | |
| "loss": 0.8718, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 1.3641875505254648, | |
| "grad_norm": 0.691271185874939, | |
| "learning_rate": 0.0002423897422078505, | |
| "loss": 0.8716, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 1.3675559148477499, | |
| "grad_norm": 0.7311553359031677, | |
| "learning_rate": 0.00024216518458636485, | |
| "loss": 0.8711, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 1.3709242791700351, | |
| "grad_norm": 0.7508808970451355, | |
| "learning_rate": 0.00024194062696487916, | |
| "loss": 0.8708, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 1.3742926434923202, | |
| "grad_norm": 0.707360029220581, | |
| "learning_rate": 0.0002417160693433935, | |
| "loss": 0.8727, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 1.3776610078146052, | |
| "grad_norm": 0.6759727001190186, | |
| "learning_rate": 0.0002414915117219078, | |
| "loss": 0.8681, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 1.3810293721368903, | |
| "grad_norm": 0.6768555045127869, | |
| "learning_rate": 0.00024126695410042217, | |
| "loss": 0.8709, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 1.3843977364591753, | |
| "grad_norm": 0.7042515873908997, | |
| "learning_rate": 0.00024104239647893647, | |
| "loss": 0.8711, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 1.3877661007814606, | |
| "grad_norm": 0.7361947894096375, | |
| "learning_rate": 0.00024081783885745078, | |
| "loss": 0.87, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 1.3911344651037456, | |
| "grad_norm": 0.7217181921005249, | |
| "learning_rate": 0.00024059328123596512, | |
| "loss": 0.8673, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 1.3945028294260307, | |
| "grad_norm": 0.6747815608978271, | |
| "learning_rate": 0.00024036872361447945, | |
| "loss": 0.8686, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 1.397871193748316, | |
| "grad_norm": 0.8375660181045532, | |
| "learning_rate": 0.0002401441659929938, | |
| "loss": 0.8711, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 1.401239558070601, | |
| "grad_norm": 0.7188234925270081, | |
| "learning_rate": 0.0002399196083715081, | |
| "loss": 0.8687, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 1.404607922392886, | |
| "grad_norm": 0.7048287987709045, | |
| "learning_rate": 0.00023969505075002243, | |
| "loss": 0.8663, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 1.407976286715171, | |
| "grad_norm": 0.703464925289154, | |
| "learning_rate": 0.00023947049312853677, | |
| "loss": 0.8698, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 1.411344651037456, | |
| "grad_norm": 0.725428581237793, | |
| "learning_rate": 0.0002392459355070511, | |
| "loss": 0.8704, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 1.4147130153597414, | |
| "grad_norm": 0.7408603429794312, | |
| "learning_rate": 0.0002390213778855654, | |
| "loss": 0.868, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 1.4180813796820264, | |
| "grad_norm": 0.718657910823822, | |
| "learning_rate": 0.00023879682026407974, | |
| "loss": 0.8684, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 1.4214497440043115, | |
| "grad_norm": 0.6897197961807251, | |
| "learning_rate": 0.00023857226264259408, | |
| "loss": 0.8638, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 1.4248181083265967, | |
| "grad_norm": 0.7002888917922974, | |
| "learning_rate": 0.0002383477050211084, | |
| "loss": 0.8694, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 1.4281864726488818, | |
| "grad_norm": 0.6644707322120667, | |
| "learning_rate": 0.00023812314739962272, | |
| "loss": 0.8684, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 1.4315548369711668, | |
| "grad_norm": 0.7583789229393005, | |
| "learning_rate": 0.00023789858977813703, | |
| "loss": 0.8671, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 1.4349232012934519, | |
| "grad_norm": 1.1469764709472656, | |
| "learning_rate": 0.0002376740321566514, | |
| "loss": 0.8659, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 1.438291565615737, | |
| "grad_norm": 0.7378877997398376, | |
| "learning_rate": 0.0002374494745351657, | |
| "loss": 0.8668, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 1.4416599299380222, | |
| "grad_norm": 0.6720541715621948, | |
| "learning_rate": 0.00023722491691368004, | |
| "loss": 0.867, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 1.4450282942603072, | |
| "grad_norm": 0.6979005336761475, | |
| "learning_rate": 0.00023700035929219435, | |
| "loss": 0.865, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 1.4483966585825923, | |
| "grad_norm": 0.6991161108016968, | |
| "learning_rate": 0.0002367758016707087, | |
| "loss": 0.8646, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 1.4517650229048775, | |
| "grad_norm": 0.7050434350967407, | |
| "learning_rate": 0.00023655124404922302, | |
| "loss": 0.8663, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 1.4551333872271626, | |
| "grad_norm": 0.733540415763855, | |
| "learning_rate": 0.00023632668642773732, | |
| "loss": 0.869, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 1.4585017515494476, | |
| "grad_norm": 0.6488509774208069, | |
| "learning_rate": 0.00023610212880625166, | |
| "loss": 0.8645, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 1.4618701158717327, | |
| "grad_norm": 0.760858416557312, | |
| "learning_rate": 0.00023587757118476597, | |
| "loss": 0.8671, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 1.4652384801940177, | |
| "grad_norm": 0.6800232529640198, | |
| "learning_rate": 0.00023565301356328033, | |
| "loss": 0.8647, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 1.468606844516303, | |
| "grad_norm": 0.6814864873886108, | |
| "learning_rate": 0.00023542845594179464, | |
| "loss": 0.8679, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 1.471975208838588, | |
| "grad_norm": 0.6680698394775391, | |
| "learning_rate": 0.00023520389832030897, | |
| "loss": 0.8638, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 1.475343573160873, | |
| "grad_norm": 0.7103099226951599, | |
| "learning_rate": 0.00023497934069882328, | |
| "loss": 0.8625, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 1.478711937483158, | |
| "grad_norm": 0.6946255564689636, | |
| "learning_rate": 0.00023475478307733764, | |
| "loss": 0.8636, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 1.4820803018054431, | |
| "grad_norm": 0.7505577802658081, | |
| "learning_rate": 0.00023453022545585195, | |
| "loss": 0.8622, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 1.4854486661277284, | |
| "grad_norm": 0.623587965965271, | |
| "learning_rate": 0.00023430566783436626, | |
| "loss": 0.8617, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 1.4888170304500135, | |
| "grad_norm": 0.7254552841186523, | |
| "learning_rate": 0.0002340811102128806, | |
| "loss": 0.8631, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 1.4921853947722985, | |
| "grad_norm": 0.7500590682029724, | |
| "learning_rate": 0.00023385655259139493, | |
| "loss": 0.8665, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 1.4955537590945838, | |
| "grad_norm": 0.6879542469978333, | |
| "learning_rate": 0.00023363199496990926, | |
| "loss": 0.8652, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 1.4989221234168688, | |
| "grad_norm": 1.074501872062683, | |
| "learning_rate": 0.00023340743734842357, | |
| "loss": 0.8635, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 1.5022904877391539, | |
| "grad_norm": 0.6877649426460266, | |
| "learning_rate": 0.0002331828797269379, | |
| "loss": 0.8639, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 1.5056588520614391, | |
| "grad_norm": 0.6956282258033752, | |
| "learning_rate": 0.00023295832210545224, | |
| "loss": 0.8625, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 1.509027216383724, | |
| "grad_norm": 0.6834612488746643, | |
| "learning_rate": 0.00023273376448396658, | |
| "loss": 0.863, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 1.5123955807060092, | |
| "grad_norm": 0.6679215431213379, | |
| "learning_rate": 0.00023250920686248089, | |
| "loss": 0.8604, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 1.5157639450282943, | |
| "grad_norm": 0.7261891961097717, | |
| "learning_rate": 0.00023228464924099525, | |
| "loss": 0.8614, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 1.5191323093505793, | |
| "grad_norm": 0.6932804584503174, | |
| "learning_rate": 0.00023206009161950956, | |
| "loss": 0.8629, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 1.5225006736728646, | |
| "grad_norm": 0.9139769673347473, | |
| "learning_rate": 0.00023183553399802386, | |
| "loss": 0.8592, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 1.5258690379951494, | |
| "grad_norm": 0.7025532126426697, | |
| "learning_rate": 0.0002316109763765382, | |
| "loss": 0.8623, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 1.5292374023174347, | |
| "grad_norm": 0.6872456073760986, | |
| "learning_rate": 0.0002313864187550525, | |
| "loss": 0.8634, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 1.5326057666397197, | |
| "grad_norm": 0.6915197968482971, | |
| "learning_rate": 0.00023116186113356687, | |
| "loss": 0.8604, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 1.5359741309620047, | |
| "grad_norm": 0.6892699599266052, | |
| "learning_rate": 0.00023093730351208118, | |
| "loss": 0.8591, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 1.53934249528429, | |
| "grad_norm": 0.7258453369140625, | |
| "learning_rate": 0.00023071274589059551, | |
| "loss": 0.8611, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 1.542710859606575, | |
| "grad_norm": 0.6410971879959106, | |
| "learning_rate": 0.00023048818826910982, | |
| "loss": 0.8585, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 1.54607922392886, | |
| "grad_norm": 0.7481923699378967, | |
| "learning_rate": 0.00023026363064762418, | |
| "loss": 0.8612, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 1.5494475882511454, | |
| "grad_norm": 0.6729730367660522, | |
| "learning_rate": 0.0002300390730261385, | |
| "loss": 0.8594, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 1.5528159525734302, | |
| "grad_norm": 0.6492398977279663, | |
| "learning_rate": 0.0002298145154046528, | |
| "loss": 0.8596, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 1.5561843168957155, | |
| "grad_norm": 0.7804999947547913, | |
| "learning_rate": 0.00022958995778316714, | |
| "loss": 0.8588, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 1.5595526812180005, | |
| "grad_norm": 0.692258358001709, | |
| "learning_rate": 0.00022936540016168144, | |
| "loss": 0.8644, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 1.5629210455402855, | |
| "grad_norm": 0.8399534821510315, | |
| "learning_rate": 0.0002291408425401958, | |
| "loss": 0.8563, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 1.5662894098625708, | |
| "grad_norm": 0.7541986107826233, | |
| "learning_rate": 0.00022891628491871011, | |
| "loss": 0.8597, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 1.5696577741848559, | |
| "grad_norm": 0.7269881367683411, | |
| "learning_rate": 0.00022869172729722445, | |
| "loss": 0.8613, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 1.573026138507141, | |
| "grad_norm": 0.6875913143157959, | |
| "learning_rate": 0.00022846716967573876, | |
| "loss": 0.8569, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 1.5763945028294262, | |
| "grad_norm": 0.6820959448814392, | |
| "learning_rate": 0.00022824261205425312, | |
| "loss": 0.8578, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 1.579762867151711, | |
| "grad_norm": 0.63446044921875, | |
| "learning_rate": 0.00022801805443276743, | |
| "loss": 0.8583, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 1.5831312314739963, | |
| "grad_norm": 0.7082163691520691, | |
| "learning_rate": 0.00022779349681128174, | |
| "loss": 0.8591, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 1.5864995957962813, | |
| "grad_norm": 0.6886746883392334, | |
| "learning_rate": 0.0002275689391897961, | |
| "loss": 0.8569, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 1.5898679601185663, | |
| "grad_norm": 0.6877068281173706, | |
| "learning_rate": 0.0002273443815683104, | |
| "loss": 0.8563, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 1.5932363244408516, | |
| "grad_norm": 0.7213451266288757, | |
| "learning_rate": 0.00022711982394682474, | |
| "loss": 0.8576, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 1.5966046887631367, | |
| "grad_norm": 0.6784006357192993, | |
| "learning_rate": 0.00022689526632533905, | |
| "loss": 0.8561, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 1.5999730530854217, | |
| "grad_norm": 0.7651084661483765, | |
| "learning_rate": 0.0002266707087038534, | |
| "loss": 0.8533, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 1.603341417407707, | |
| "grad_norm": 0.6762063503265381, | |
| "learning_rate": 0.00022644615108236772, | |
| "loss": 0.8593, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 1.6067097817299918, | |
| "grad_norm": 0.6897014379501343, | |
| "learning_rate": 0.00022622159346088205, | |
| "loss": 0.8584, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 1.610078146052277, | |
| "grad_norm": 0.642955482006073, | |
| "learning_rate": 0.00022599703583939636, | |
| "loss": 0.8536, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 1.613446510374562, | |
| "grad_norm": 0.676459014415741, | |
| "learning_rate": 0.00022577247821791067, | |
| "loss": 0.8561, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 1.6168148746968471, | |
| "grad_norm": 0.7143009901046753, | |
| "learning_rate": 0.00022554792059642503, | |
| "loss": 0.8593, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 1.6201832390191324, | |
| "grad_norm": 0.7367132306098938, | |
| "learning_rate": 0.00022532336297493934, | |
| "loss": 0.8547, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 1.6235516033414175, | |
| "grad_norm": 0.6949586868286133, | |
| "learning_rate": 0.00022509880535345368, | |
| "loss": 0.8577, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 1.6269199676637025, | |
| "grad_norm": 0.672558605670929, | |
| "learning_rate": 0.00022487424773196798, | |
| "loss": 0.8526, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 1.6302883319859878, | |
| "grad_norm": 0.6996020078659058, | |
| "learning_rate": 0.00022464969011048235, | |
| "loss": 0.8527, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 1.6336566963082726, | |
| "grad_norm": 0.7007562518119812, | |
| "learning_rate": 0.00022442513248899666, | |
| "loss": 0.8536, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 1.6370250606305579, | |
| "grad_norm": 0.7103307247161865, | |
| "learning_rate": 0.000224200574867511, | |
| "loss": 0.8557, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 1.640393424952843, | |
| "grad_norm": 0.7483230829238892, | |
| "learning_rate": 0.0002239760172460253, | |
| "loss": 0.857, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 1.643761789275128, | |
| "grad_norm": 0.7092981338500977, | |
| "learning_rate": 0.00022375145962453966, | |
| "loss": 0.8589, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 1.6471301535974132, | |
| "grad_norm": 0.7270293235778809, | |
| "learning_rate": 0.00022352690200305397, | |
| "loss": 0.8553, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 1.650498517919698, | |
| "grad_norm": 0.6916648149490356, | |
| "learning_rate": 0.00022330234438156828, | |
| "loss": 0.8566, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 1.6538668822419833, | |
| "grad_norm": 0.6687049269676208, | |
| "learning_rate": 0.0002230777867600826, | |
| "loss": 0.8532, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 1.6572352465642683, | |
| "grad_norm": 0.689947783946991, | |
| "learning_rate": 0.00022285322913859695, | |
| "loss": 0.8556, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 1.6606036108865534, | |
| "grad_norm": 0.7345608472824097, | |
| "learning_rate": 0.00022262867151711128, | |
| "loss": 0.8526, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 1.6639719752088387, | |
| "grad_norm": 0.6420150399208069, | |
| "learning_rate": 0.0002224041138956256, | |
| "loss": 0.8584, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 1.6673403395311237, | |
| "grad_norm": 0.7315524816513062, | |
| "learning_rate": 0.00022217955627413993, | |
| "loss": 0.8516, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 1.6707087038534087, | |
| "grad_norm": 0.674518883228302, | |
| "learning_rate": 0.00022195499865265426, | |
| "loss": 0.8564, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 1.674077068175694, | |
| "grad_norm": 0.6845901608467102, | |
| "learning_rate": 0.0002217304410311686, | |
| "loss": 0.8493, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 1.6774454324979788, | |
| "grad_norm": 0.6932186484336853, | |
| "learning_rate": 0.0002215058834096829, | |
| "loss": 0.8516, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 1.680813796820264, | |
| "grad_norm": 0.6544727087020874, | |
| "learning_rate": 0.0002212813257881972, | |
| "loss": 0.8519, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 1.6841821611425492, | |
| "grad_norm": 0.7253223657608032, | |
| "learning_rate": 0.00022105676816671157, | |
| "loss": 0.8529, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 1.6875505254648342, | |
| "grad_norm": 0.6576797962188721, | |
| "learning_rate": 0.00022083221054522588, | |
| "loss": 0.8528, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 1.6909188897871195, | |
| "grad_norm": 0.715744137763977, | |
| "learning_rate": 0.00022060765292374022, | |
| "loss": 0.8537, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 1.6942872541094045, | |
| "grad_norm": 0.6994728446006775, | |
| "learning_rate": 0.00022038309530225453, | |
| "loss": 0.852, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 1.6976556184316896, | |
| "grad_norm": 0.6934739947319031, | |
| "learning_rate": 0.0002201585376807689, | |
| "loss": 0.8509, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 1.7010239827539748, | |
| "grad_norm": 0.7031779885292053, | |
| "learning_rate": 0.0002199339800592832, | |
| "loss": 0.853, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 1.7043923470762596, | |
| "grad_norm": 0.7014051079750061, | |
| "learning_rate": 0.00021970942243779753, | |
| "loss": 0.8514, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 1.707760711398545, | |
| "grad_norm": 0.8519027233123779, | |
| "learning_rate": 0.00021948486481631184, | |
| "loss": 0.8525, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 1.71112907572083, | |
| "grad_norm": 0.6523889899253845, | |
| "learning_rate": 0.00021926030719482615, | |
| "loss": 0.8514, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 1.714497440043115, | |
| "grad_norm": 0.7346422076225281, | |
| "learning_rate": 0.0002190357495733405, | |
| "loss": 0.856, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 1.7178658043654003, | |
| "grad_norm": 0.7833214402198792, | |
| "learning_rate": 0.00021881119195185482, | |
| "loss": 0.8518, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 1.7212341686876853, | |
| "grad_norm": 0.599700391292572, | |
| "learning_rate": 0.00021858663433036915, | |
| "loss": 0.8539, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 1.7246025330099704, | |
| "grad_norm": 0.7568506002426147, | |
| "learning_rate": 0.00021836207670888346, | |
| "loss": 0.8504, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 1.7279708973322556, | |
| "grad_norm": 0.71254563331604, | |
| "learning_rate": 0.00021813751908739782, | |
| "loss": 0.849, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 1.7313392616545404, | |
| "grad_norm": 0.6965556144714355, | |
| "learning_rate": 0.00021791296146591213, | |
| "loss": 0.8506, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 1.7347076259768257, | |
| "grad_norm": 0.6838847994804382, | |
| "learning_rate": 0.00021768840384442647, | |
| "loss": 0.8487, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 1.7380759902991108, | |
| "grad_norm": 0.6968240737915039, | |
| "learning_rate": 0.00021746384622294077, | |
| "loss": 0.8498, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 1.7414443546213958, | |
| "grad_norm": 0.6424885988235474, | |
| "learning_rate": 0.00021723928860145514, | |
| "loss": 0.8495, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 1.744812718943681, | |
| "grad_norm": 0.6591705083847046, | |
| "learning_rate": 0.00021701473097996945, | |
| "loss": 0.8474, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 1.7481810832659659, | |
| "grad_norm": 0.9693089723587036, | |
| "learning_rate": 0.00021679017335848375, | |
| "loss": 0.8468, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 1.7515494475882512, | |
| "grad_norm": 0.6744303107261658, | |
| "learning_rate": 0.0002165656157369981, | |
| "loss": 0.8488, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 1.7549178119105362, | |
| "grad_norm": 0.7250896096229553, | |
| "learning_rate": 0.00021634105811551242, | |
| "loss": 0.8501, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 1.7582861762328212, | |
| "grad_norm": 0.6125505566596985, | |
| "learning_rate": 0.00021611650049402676, | |
| "loss": 0.8481, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 1.7616545405551065, | |
| "grad_norm": 0.6853183507919312, | |
| "learning_rate": 0.00021589194287254107, | |
| "loss": 0.8496, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 1.7650229048773916, | |
| "grad_norm": 0.6613684892654419, | |
| "learning_rate": 0.0002156673852510554, | |
| "loss": 0.8496, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 1.7683912691996766, | |
| "grad_norm": 0.6338353157043457, | |
| "learning_rate": 0.00021544282762956974, | |
| "loss": 0.8473, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 1.7717596335219619, | |
| "grad_norm": 0.7311720848083496, | |
| "learning_rate": 0.00021521827000808407, | |
| "loss": 0.8494, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 1.7751279978442467, | |
| "grad_norm": 0.8986194133758545, | |
| "learning_rate": 0.00021499371238659838, | |
| "loss": 0.847, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 1.778496362166532, | |
| "grad_norm": 0.7145596146583557, | |
| "learning_rate": 0.0002147691547651127, | |
| "loss": 0.8513, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 1.781864726488817, | |
| "grad_norm": 0.673674464225769, | |
| "learning_rate": 0.00021454459714362705, | |
| "loss": 0.8496, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 1.785233090811102, | |
| "grad_norm": 0.6568505167961121, | |
| "learning_rate": 0.00021432003952214136, | |
| "loss": 0.8477, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 1.7886014551333873, | |
| "grad_norm": 0.8661649227142334, | |
| "learning_rate": 0.0002140954819006557, | |
| "loss": 0.8437, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 1.7919698194556724, | |
| "grad_norm": 0.636997401714325, | |
| "learning_rate": 0.00021387092427917, | |
| "loss": 0.8498, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 1.7953381837779574, | |
| "grad_norm": 0.6258701682090759, | |
| "learning_rate": 0.00021364636665768436, | |
| "loss": 0.846, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 1.7987065481002427, | |
| "grad_norm": 0.7167558073997498, | |
| "learning_rate": 0.00021342180903619867, | |
| "loss": 0.8496, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 1.8020749124225275, | |
| "grad_norm": 0.725511372089386, | |
| "learning_rate": 0.000213197251414713, | |
| "loss": 0.8503, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 1.8054432767448128, | |
| "grad_norm": 0.7038520574569702, | |
| "learning_rate": 0.00021297269379322732, | |
| "loss": 0.8486, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 1.8088116410670978, | |
| "grad_norm": 0.6768267154693604, | |
| "learning_rate": 0.00021274813617174162, | |
| "loss": 0.8475, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 1.8121800053893828, | |
| "grad_norm": 0.6625707745552063, | |
| "learning_rate": 0.000212523578550256, | |
| "loss": 0.8466, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 1.815548369711668, | |
| "grad_norm": 0.6589378118515015, | |
| "learning_rate": 0.0002122990209287703, | |
| "loss": 0.8449, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 1.8189167340339532, | |
| "grad_norm": 0.8694218993186951, | |
| "learning_rate": 0.00021207446330728463, | |
| "loss": 0.8464, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 1.8222850983562382, | |
| "grad_norm": 0.6879692673683167, | |
| "learning_rate": 0.00021184990568579894, | |
| "loss": 0.8464, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 1.8256534626785235, | |
| "grad_norm": 0.7406269907951355, | |
| "learning_rate": 0.0002116253480643133, | |
| "loss": 0.8451, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 1.8290218270008083, | |
| "grad_norm": 0.645255982875824, | |
| "learning_rate": 0.0002114007904428276, | |
| "loss": 0.8459, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 1.8323901913230936, | |
| "grad_norm": 0.6275530457496643, | |
| "learning_rate": 0.00021117623282134194, | |
| "loss": 0.848, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 1.8357585556453786, | |
| "grad_norm": 0.673546314239502, | |
| "learning_rate": 0.00021095167519985625, | |
| "loss": 0.8421, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 1.8391269199676636, | |
| "grad_norm": 0.6627029180526733, | |
| "learning_rate": 0.00021072711757837061, | |
| "loss": 0.8443, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 1.842495284289949, | |
| "grad_norm": 0.6747744083404541, | |
| "learning_rate": 0.00021050255995688492, | |
| "loss": 0.8439, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 1.845863648612234, | |
| "grad_norm": 0.6658967733383179, | |
| "learning_rate": 0.00021027800233539923, | |
| "loss": 0.8435, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 1.849232012934519, | |
| "grad_norm": 0.6818722486495972, | |
| "learning_rate": 0.00021005344471391357, | |
| "loss": 0.8433, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 1.852600377256804, | |
| "grad_norm": 0.6817068457603455, | |
| "learning_rate": 0.0002098288870924279, | |
| "loss": 0.8449, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 1.855968741579089, | |
| "grad_norm": 0.6656964421272278, | |
| "learning_rate": 0.00020960432947094224, | |
| "loss": 0.8448, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 1.8593371059013744, | |
| "grad_norm": 0.7307213544845581, | |
| "learning_rate": 0.00020937977184945654, | |
| "loss": 0.8439, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 1.8627054702236594, | |
| "grad_norm": 0.7471979856491089, | |
| "learning_rate": 0.00020915521422797088, | |
| "loss": 0.8461, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 1.8660738345459444, | |
| "grad_norm": 0.6660134792327881, | |
| "learning_rate": 0.00020893065660648521, | |
| "loss": 0.849, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 1.8694421988682297, | |
| "grad_norm": 0.6332405209541321, | |
| "learning_rate": 0.00020870609898499955, | |
| "loss": 0.8417, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 1.8728105631905145, | |
| "grad_norm": 0.6369178295135498, | |
| "learning_rate": 0.00020848154136351386, | |
| "loss": 0.844, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 1.8761789275127998, | |
| "grad_norm": 0.6878752112388611, | |
| "learning_rate": 0.00020825698374202817, | |
| "loss": 0.8435, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 1.8795472918350848, | |
| "grad_norm": 0.7077382206916809, | |
| "learning_rate": 0.00020803242612054253, | |
| "loss": 0.8448, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 1.8829156561573699, | |
| "grad_norm": 0.6542516350746155, | |
| "learning_rate": 0.00020780786849905684, | |
| "loss": 0.8441, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 1.8862840204796552, | |
| "grad_norm": 0.6779966950416565, | |
| "learning_rate": 0.00020758331087757117, | |
| "loss": 0.8452, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 1.8896523848019402, | |
| "grad_norm": 0.7874273657798767, | |
| "learning_rate": 0.00020735875325608548, | |
| "loss": 0.8408, | |
| "step": 56100 | |
| }, | |
| { | |
| "epoch": 1.8930207491242252, | |
| "grad_norm": 0.7043356895446777, | |
| "learning_rate": 0.00020713419563459984, | |
| "loss": 0.8459, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 1.8963891134465105, | |
| "grad_norm": 0.6597324013710022, | |
| "learning_rate": 0.00020690963801311415, | |
| "loss": 0.8418, | |
| "step": 56300 | |
| }, | |
| { | |
| "epoch": 1.8997574777687953, | |
| "grad_norm": 0.6724010705947876, | |
| "learning_rate": 0.00020668508039162848, | |
| "loss": 0.8459, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 1.9031258420910806, | |
| "grad_norm": 0.7157804369926453, | |
| "learning_rate": 0.0002064605227701428, | |
| "loss": 0.8459, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 1.9064942064133656, | |
| "grad_norm": 0.730675995349884, | |
| "learning_rate": 0.0002062359651486571, | |
| "loss": 0.8477, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 1.9098625707356507, | |
| "grad_norm": 0.6512274146080017, | |
| "learning_rate": 0.00020601140752717146, | |
| "loss": 0.8424, | |
| "step": 56700 | |
| }, | |
| { | |
| "epoch": 1.913230935057936, | |
| "grad_norm": 0.679719090461731, | |
| "learning_rate": 0.00020578684990568577, | |
| "loss": 0.8465, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 1.916599299380221, | |
| "grad_norm": 0.7341735363006592, | |
| "learning_rate": 0.0002055622922842001, | |
| "loss": 0.8465, | |
| "step": 56900 | |
| }, | |
| { | |
| "epoch": 1.919967663702506, | |
| "grad_norm": 0.6398690342903137, | |
| "learning_rate": 0.00020533773466271441, | |
| "loss": 0.8417, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 1.9233360280247913, | |
| "grad_norm": 0.8629583716392517, | |
| "learning_rate": 0.00020511317704122878, | |
| "loss": 0.8438, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 1.9267043923470761, | |
| "grad_norm": 0.6348667740821838, | |
| "learning_rate": 0.00020488861941974308, | |
| "loss": 0.8411, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 1.9300727566693614, | |
| "grad_norm": 1.0562591552734375, | |
| "learning_rate": 0.00020466406179825742, | |
| "loss": 0.8401, | |
| "step": 57300 | |
| }, | |
| { | |
| "epoch": 1.9334411209916464, | |
| "grad_norm": 0.7126754522323608, | |
| "learning_rate": 0.00020443950417677173, | |
| "loss": 0.8469, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 1.9368094853139315, | |
| "grad_norm": 0.624739408493042, | |
| "learning_rate": 0.00020421494655528606, | |
| "loss": 0.8422, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 1.9401778496362168, | |
| "grad_norm": 0.6490176916122437, | |
| "learning_rate": 0.0002039903889338004, | |
| "loss": 0.841, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 1.9435462139585018, | |
| "grad_norm": 0.6586236357688904, | |
| "learning_rate": 0.0002037658313123147, | |
| "loss": 0.8412, | |
| "step": 57700 | |
| }, | |
| { | |
| "epoch": 1.9469145782807868, | |
| "grad_norm": 0.66822749376297, | |
| "learning_rate": 0.00020354127369082904, | |
| "loss": 0.8418, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 1.9502829426030721, | |
| "grad_norm": 0.6851320266723633, | |
| "learning_rate": 0.00020331671606934338, | |
| "loss": 0.8414, | |
| "step": 57900 | |
| }, | |
| { | |
| "epoch": 1.953651306925357, | |
| "grad_norm": 0.6610788702964783, | |
| "learning_rate": 0.0002030921584478577, | |
| "loss": 0.8409, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 1.9570196712476422, | |
| "grad_norm": 0.6205683946609497, | |
| "learning_rate": 0.00020286760082637202, | |
| "loss": 0.8384, | |
| "step": 58100 | |
| }, | |
| { | |
| "epoch": 1.9603880355699272, | |
| "grad_norm": 0.6619114875793457, | |
| "learning_rate": 0.00020264304320488636, | |
| "loss": 0.8404, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 1.9637563998922123, | |
| "grad_norm": 0.6718551516532898, | |
| "learning_rate": 0.0002024184855834007, | |
| "loss": 0.8428, | |
| "step": 58300 | |
| }, | |
| { | |
| "epoch": 1.9671247642144976, | |
| "grad_norm": 0.696954607963562, | |
| "learning_rate": 0.00020219392796191503, | |
| "loss": 0.8377, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 1.9704931285367824, | |
| "grad_norm": 0.7444251775741577, | |
| "learning_rate": 0.00020196937034042933, | |
| "loss": 0.8399, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 1.9738614928590676, | |
| "grad_norm": 1.1385860443115234, | |
| "learning_rate": 0.00020174481271894364, | |
| "loss": 0.8424, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 1.9772298571813527, | |
| "grad_norm": 0.6890325546264648, | |
| "learning_rate": 0.000201520255097458, | |
| "loss": 0.8441, | |
| "step": 58700 | |
| }, | |
| { | |
| "epoch": 1.9805982215036377, | |
| "grad_norm": 0.6715498566627502, | |
| "learning_rate": 0.0002012956974759723, | |
| "loss": 0.8426, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 1.983966585825923, | |
| "grad_norm": 0.683191180229187, | |
| "learning_rate": 0.00020107113985448665, | |
| "loss": 0.8408, | |
| "step": 58900 | |
| }, | |
| { | |
| "epoch": 1.987334950148208, | |
| "grad_norm": 0.7549853324890137, | |
| "learning_rate": 0.00020084658223300096, | |
| "loss": 0.8406, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 1.990703314470493, | |
| "grad_norm": 0.6465044617652893, | |
| "learning_rate": 0.00020062202461151532, | |
| "loss": 0.8398, | |
| "step": 59100 | |
| }, | |
| { | |
| "epoch": 1.9940716787927784, | |
| "grad_norm": 0.6415732502937317, | |
| "learning_rate": 0.00020039746699002963, | |
| "loss": 0.8385, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 1.9974400431150632, | |
| "grad_norm": 0.6739877462387085, | |
| "learning_rate": 0.00020017290936854396, | |
| "loss": 0.8405, | |
| "step": 59300 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.7839618921279907, | |
| "eval_runtime": 1.804, | |
| "eval_samples_per_second": 2771.644, | |
| "eval_steps_per_second": 43.792, | |
| "step": 59376 | |
| }, | |
| { | |
| "epoch": 2.0008084074373484, | |
| "grad_norm": 0.7029405832290649, | |
| "learning_rate": 0.00019994835174705827, | |
| "loss": 0.8394, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 2.0041767717596337, | |
| "grad_norm": 1.100113034248352, | |
| "learning_rate": 0.00019972379412557258, | |
| "loss": 0.8415, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 2.0075451360819185, | |
| "grad_norm": 0.6430971026420593, | |
| "learning_rate": 0.00019949923650408694, | |
| "loss": 0.8368, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 2.010913500404204, | |
| "grad_norm": 0.6618381142616272, | |
| "learning_rate": 0.00019927467888260125, | |
| "loss": 0.8374, | |
| "step": 59700 | |
| }, | |
| { | |
| "epoch": 2.0142818647264886, | |
| "grad_norm": 0.7243971228599548, | |
| "learning_rate": 0.00019905012126111558, | |
| "loss": 0.84, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 2.017650229048774, | |
| "grad_norm": 0.6579388380050659, | |
| "learning_rate": 0.0001988255636396299, | |
| "loss": 0.8395, | |
| "step": 59900 | |
| }, | |
| { | |
| "epoch": 2.021018593371059, | |
| "grad_norm": 0.6798832416534424, | |
| "learning_rate": 0.00019860100601814425, | |
| "loss": 0.8378, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 2.024386957693344, | |
| "grad_norm": 0.7237014174461365, | |
| "learning_rate": 0.00019837644839665856, | |
| "loss": 0.8403, | |
| "step": 60100 | |
| }, | |
| { | |
| "epoch": 2.0277553220156292, | |
| "grad_norm": 0.6715816259384155, | |
| "learning_rate": 0.0001981518907751729, | |
| "loss": 0.8412, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 2.0311236863379145, | |
| "grad_norm": 0.6749352812767029, | |
| "learning_rate": 0.0001979273331536872, | |
| "loss": 0.8387, | |
| "step": 60300 | |
| }, | |
| { | |
| "epoch": 2.0344920506601993, | |
| "grad_norm": 0.6498907804489136, | |
| "learning_rate": 0.00019770277553220154, | |
| "loss": 0.8365, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 2.0378604149824846, | |
| "grad_norm": 0.6706451177597046, | |
| "learning_rate": 0.00019747821791071588, | |
| "loss": 0.8353, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 2.0412287793047694, | |
| "grad_norm": 0.6508938670158386, | |
| "learning_rate": 0.00019725366028923018, | |
| "loss": 0.8386, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 2.0445971436270547, | |
| "grad_norm": 0.6738646626472473, | |
| "learning_rate": 0.00019702910266774452, | |
| "loss": 0.8357, | |
| "step": 60700 | |
| }, | |
| { | |
| "epoch": 2.04796550794934, | |
| "grad_norm": 0.6249734163284302, | |
| "learning_rate": 0.00019680454504625885, | |
| "loss": 0.8381, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 2.051333872271625, | |
| "grad_norm": 0.72087162733078, | |
| "learning_rate": 0.0001965799874247732, | |
| "loss": 0.8377, | |
| "step": 60900 | |
| }, | |
| { | |
| "epoch": 2.05470223659391, | |
| "grad_norm": 0.7066697478294373, | |
| "learning_rate": 0.0001963554298032875, | |
| "loss": 0.8397, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 2.0580706009161953, | |
| "grad_norm": 0.7159758806228638, | |
| "learning_rate": 0.00019613087218180186, | |
| "loss": 0.8382, | |
| "step": 61100 | |
| }, | |
| { | |
| "epoch": 2.06143896523848, | |
| "grad_norm": 0.7034109830856323, | |
| "learning_rate": 0.00019590631456031617, | |
| "loss": 0.8386, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 2.0648073295607654, | |
| "grad_norm": 0.648295521736145, | |
| "learning_rate": 0.0001956817569388305, | |
| "loss": 0.8353, | |
| "step": 61300 | |
| }, | |
| { | |
| "epoch": 2.0681756938830502, | |
| "grad_norm": 0.909184992313385, | |
| "learning_rate": 0.0001954571993173448, | |
| "loss": 0.8366, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 2.0715440582053355, | |
| "grad_norm": 0.6607633233070374, | |
| "learning_rate": 0.00019523264169585912, | |
| "loss": 0.8389, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 2.0749124225276208, | |
| "grad_norm": 0.6451642513275146, | |
| "learning_rate": 0.00019500808407437348, | |
| "loss": 0.8379, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 2.0782807868499056, | |
| "grad_norm": 0.6556397080421448, | |
| "learning_rate": 0.0001947835264528878, | |
| "loss": 0.837, | |
| "step": 61700 | |
| }, | |
| { | |
| "epoch": 2.081649151172191, | |
| "grad_norm": 0.6268017292022705, | |
| "learning_rate": 0.00019455896883140212, | |
| "loss": 0.8379, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 2.0850175154944757, | |
| "grad_norm": 0.6620351672172546, | |
| "learning_rate": 0.00019433441120991643, | |
| "loss": 0.8377, | |
| "step": 61900 | |
| }, | |
| { | |
| "epoch": 2.088385879816761, | |
| "grad_norm": 0.6334593296051025, | |
| "learning_rate": 0.0001941098535884308, | |
| "loss": 0.8367, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 2.091754244139046, | |
| "grad_norm": 0.6498568058013916, | |
| "learning_rate": 0.0001938852959669451, | |
| "loss": 0.836, | |
| "step": 62100 | |
| }, | |
| { | |
| "epoch": 2.095122608461331, | |
| "grad_norm": 0.6664881110191345, | |
| "learning_rate": 0.00019366073834545944, | |
| "loss": 0.8366, | |
| "step": 62200 | |
| }, | |
| { | |
| "epoch": 2.0984909727836163, | |
| "grad_norm": 0.6434110999107361, | |
| "learning_rate": 0.00019343618072397375, | |
| "loss": 0.8339, | |
| "step": 62300 | |
| }, | |
| { | |
| "epoch": 2.1018593371059016, | |
| "grad_norm": 0.6958553194999695, | |
| "learning_rate": 0.00019321162310248805, | |
| "loss": 0.8363, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 2.1052277014281864, | |
| "grad_norm": 0.7187645435333252, | |
| "learning_rate": 0.00019298706548100242, | |
| "loss": 0.8337, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 2.1085960657504716, | |
| "grad_norm": 0.6984855532646179, | |
| "learning_rate": 0.00019276250785951672, | |
| "loss": 0.8363, | |
| "step": 62600 | |
| }, | |
| { | |
| "epoch": 2.1119644300727565, | |
| "grad_norm": 0.7342332601547241, | |
| "learning_rate": 0.00019253795023803106, | |
| "loss": 0.8385, | |
| "step": 62700 | |
| }, | |
| { | |
| "epoch": 2.1153327943950417, | |
| "grad_norm": 0.7250052690505981, | |
| "learning_rate": 0.00019231339261654537, | |
| "loss": 0.8333, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 2.118701158717327, | |
| "grad_norm": 0.6984183192253113, | |
| "learning_rate": 0.00019208883499505973, | |
| "loss": 0.8343, | |
| "step": 62900 | |
| }, | |
| { | |
| "epoch": 2.122069523039612, | |
| "grad_norm": 1.0108722448349, | |
| "learning_rate": 0.00019186427737357404, | |
| "loss": 0.836, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 2.125437887361897, | |
| "grad_norm": 0.6725150942802429, | |
| "learning_rate": 0.00019163971975208837, | |
| "loss": 0.8372, | |
| "step": 63100 | |
| }, | |
| { | |
| "epoch": 2.1288062516841824, | |
| "grad_norm": 0.6805692911148071, | |
| "learning_rate": 0.0001914151621306027, | |
| "loss": 0.8311, | |
| "step": 63200 | |
| }, | |
| { | |
| "epoch": 2.132174616006467, | |
| "grad_norm": 0.9061579704284668, | |
| "learning_rate": 0.00019119060450911702, | |
| "loss": 0.8343, | |
| "step": 63300 | |
| }, | |
| { | |
| "epoch": 2.1355429803287524, | |
| "grad_norm": 0.6339781880378723, | |
| "learning_rate": 0.00019096604688763135, | |
| "loss": 0.8405, | |
| "step": 63400 | |
| }, | |
| { | |
| "epoch": 2.1389113446510373, | |
| "grad_norm": 0.6159859895706177, | |
| "learning_rate": 0.00019074148926614566, | |
| "loss": 0.8352, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 2.1422797089733225, | |
| "grad_norm": 0.6404457092285156, | |
| "learning_rate": 0.00019051693164466002, | |
| "loss": 0.835, | |
| "step": 63600 | |
| }, | |
| { | |
| "epoch": 2.145648073295608, | |
| "grad_norm": 0.619216799736023, | |
| "learning_rate": 0.00019029237402317433, | |
| "loss": 0.8323, | |
| "step": 63700 | |
| }, | |
| { | |
| "epoch": 2.1490164376178926, | |
| "grad_norm": 0.6672142744064331, | |
| "learning_rate": 0.00019006781640168867, | |
| "loss": 0.8318, | |
| "step": 63800 | |
| }, | |
| { | |
| "epoch": 2.152384801940178, | |
| "grad_norm": 0.6713272929191589, | |
| "learning_rate": 0.00018984325878020297, | |
| "loss": 0.835, | |
| "step": 63900 | |
| }, | |
| { | |
| "epoch": 2.155753166262463, | |
| "grad_norm": 0.6657119989395142, | |
| "learning_rate": 0.00018961870115871734, | |
| "loss": 0.8312, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 2.159121530584748, | |
| "grad_norm": 0.7010686993598938, | |
| "learning_rate": 0.00018939414353723164, | |
| "loss": 0.8334, | |
| "step": 64100 | |
| }, | |
| { | |
| "epoch": 2.1624898949070332, | |
| "grad_norm": 0.67507404088974, | |
| "learning_rate": 0.00018916958591574598, | |
| "loss": 0.8337, | |
| "step": 64200 | |
| }, | |
| { | |
| "epoch": 2.165858259229318, | |
| "grad_norm": 0.7008448243141174, | |
| "learning_rate": 0.0001889450282942603, | |
| "loss": 0.8334, | |
| "step": 64300 | |
| }, | |
| { | |
| "epoch": 2.1692266235516033, | |
| "grad_norm": 0.634978175163269, | |
| "learning_rate": 0.0001887204706727746, | |
| "loss": 0.8372, | |
| "step": 64400 | |
| }, | |
| { | |
| "epoch": 2.1725949878738886, | |
| "grad_norm": 0.7248919010162354, | |
| "learning_rate": 0.00018849591305128896, | |
| "loss": 0.8355, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 2.1759633521961734, | |
| "grad_norm": 0.6367628574371338, | |
| "learning_rate": 0.00018827135542980327, | |
| "loss": 0.8328, | |
| "step": 64600 | |
| }, | |
| { | |
| "epoch": 2.1793317165184587, | |
| "grad_norm": 0.712471604347229, | |
| "learning_rate": 0.0001880467978083176, | |
| "loss": 0.8296, | |
| "step": 64700 | |
| }, | |
| { | |
| "epoch": 2.1827000808407435, | |
| "grad_norm": 0.671159565448761, | |
| "learning_rate": 0.0001878222401868319, | |
| "loss": 0.8339, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 2.186068445163029, | |
| "grad_norm": 0.6872281432151794, | |
| "learning_rate": 0.00018759768256534627, | |
| "loss": 0.8313, | |
| "step": 64900 | |
| }, | |
| { | |
| "epoch": 2.189436809485314, | |
| "grad_norm": 0.7017882466316223, | |
| "learning_rate": 0.00018737312494386058, | |
| "loss": 0.8327, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 2.192805173807599, | |
| "grad_norm": 0.7563288807868958, | |
| "learning_rate": 0.00018714856732237491, | |
| "loss": 0.8342, | |
| "step": 65100 | |
| }, | |
| { | |
| "epoch": 2.196173538129884, | |
| "grad_norm": 0.6648709177970886, | |
| "learning_rate": 0.00018692400970088922, | |
| "loss": 0.8318, | |
| "step": 65200 | |
| }, | |
| { | |
| "epoch": 2.1995419024521694, | |
| "grad_norm": 0.6673408150672913, | |
| "learning_rate": 0.00018669945207940356, | |
| "loss": 0.8328, | |
| "step": 65300 | |
| }, | |
| { | |
| "epoch": 2.2029102667744542, | |
| "grad_norm": 0.8705912232398987, | |
| "learning_rate": 0.0001864748944579179, | |
| "loss": 0.831, | |
| "step": 65400 | |
| }, | |
| { | |
| "epoch": 2.2062786310967395, | |
| "grad_norm": 0.6253674030303955, | |
| "learning_rate": 0.0001862503368364322, | |
| "loss": 0.8322, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 2.2096469954190243, | |
| "grad_norm": 0.7175179719924927, | |
| "learning_rate": 0.00018602577921494654, | |
| "loss": 0.8316, | |
| "step": 65600 | |
| }, | |
| { | |
| "epoch": 2.2130153597413096, | |
| "grad_norm": 0.6952610611915588, | |
| "learning_rate": 0.00018580122159346087, | |
| "loss": 0.831, | |
| "step": 65700 | |
| }, | |
| { | |
| "epoch": 2.216383724063595, | |
| "grad_norm": 0.6487058401107788, | |
| "learning_rate": 0.0001855766639719752, | |
| "loss": 0.8323, | |
| "step": 65800 | |
| }, | |
| { | |
| "epoch": 2.2197520883858797, | |
| "grad_norm": 0.7087188959121704, | |
| "learning_rate": 0.00018535210635048951, | |
| "loss": 0.8337, | |
| "step": 65900 | |
| }, | |
| { | |
| "epoch": 2.223120452708165, | |
| "grad_norm": 0.689513087272644, | |
| "learning_rate": 0.00018512754872900385, | |
| "loss": 0.834, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 2.22648881703045, | |
| "grad_norm": 1.3590748310089111, | |
| "learning_rate": 0.00018490299110751818, | |
| "loss": 0.8328, | |
| "step": 66100 | |
| }, | |
| { | |
| "epoch": 2.229857181352735, | |
| "grad_norm": 0.640274167060852, | |
| "learning_rate": 0.0001846784334860325, | |
| "loss": 0.8333, | |
| "step": 66200 | |
| }, | |
| { | |
| "epoch": 2.2332255456750203, | |
| "grad_norm": 0.794620156288147, | |
| "learning_rate": 0.00018445387586454683, | |
| "loss": 0.8343, | |
| "step": 66300 | |
| }, | |
| { | |
| "epoch": 2.236593909997305, | |
| "grad_norm": 0.6511592268943787, | |
| "learning_rate": 0.00018422931824306114, | |
| "loss": 0.8328, | |
| "step": 66400 | |
| }, | |
| { | |
| "epoch": 2.2399622743195904, | |
| "grad_norm": 0.6505751609802246, | |
| "learning_rate": 0.0001840047606215755, | |
| "loss": 0.8302, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 2.2433306386418757, | |
| "grad_norm": 0.7967960834503174, | |
| "learning_rate": 0.0001837802030000898, | |
| "loss": 0.83, | |
| "step": 66600 | |
| }, | |
| { | |
| "epoch": 2.2466990029641605, | |
| "grad_norm": 0.708914577960968, | |
| "learning_rate": 0.00018355564537860414, | |
| "loss": 0.8305, | |
| "step": 66700 | |
| }, | |
| { | |
| "epoch": 2.2500673672864457, | |
| "grad_norm": 0.6149790287017822, | |
| "learning_rate": 0.00018333108775711845, | |
| "loss": 0.8334, | |
| "step": 66800 | |
| }, | |
| { | |
| "epoch": 2.2534357316087306, | |
| "grad_norm": 0.6834396719932556, | |
| "learning_rate": 0.0001831065301356328, | |
| "loss": 0.8314, | |
| "step": 66900 | |
| }, | |
| { | |
| "epoch": 2.256804095931016, | |
| "grad_norm": 0.7162107229232788, | |
| "learning_rate": 0.00018288197251414712, | |
| "loss": 0.8327, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 2.260172460253301, | |
| "grad_norm": 0.6464916467666626, | |
| "learning_rate": 0.00018265741489266143, | |
| "loss": 0.8311, | |
| "step": 67100 | |
| }, | |
| { | |
| "epoch": 2.263540824575586, | |
| "grad_norm": 0.680050253868103, | |
| "learning_rate": 0.00018243285727117576, | |
| "loss": 0.8293, | |
| "step": 67200 | |
| }, | |
| { | |
| "epoch": 2.266909188897871, | |
| "grad_norm": 0.7750843167304993, | |
| "learning_rate": 0.00018220829964969007, | |
| "loss": 0.8286, | |
| "step": 67300 | |
| }, | |
| { | |
| "epoch": 2.2702775532201565, | |
| "grad_norm": 0.6611768007278442, | |
| "learning_rate": 0.00018198374202820443, | |
| "loss": 0.8311, | |
| "step": 67400 | |
| }, | |
| { | |
| "epoch": 2.2736459175424413, | |
| "grad_norm": 0.7073565125465393, | |
| "learning_rate": 0.00018175918440671874, | |
| "loss": 0.829, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 2.2770142818647265, | |
| "grad_norm": 0.6587579846382141, | |
| "learning_rate": 0.00018153462678523308, | |
| "loss": 0.8301, | |
| "step": 67600 | |
| }, | |
| { | |
| "epoch": 2.280382646187012, | |
| "grad_norm": 0.6691922545433044, | |
| "learning_rate": 0.00018131006916374739, | |
| "loss": 0.8291, | |
| "step": 67700 | |
| }, | |
| { | |
| "epoch": 2.2837510105092966, | |
| "grad_norm": 0.6645218133926392, | |
| "learning_rate": 0.00018108551154226175, | |
| "loss": 0.8294, | |
| "step": 67800 | |
| }, | |
| { | |
| "epoch": 2.287119374831582, | |
| "grad_norm": 0.6749672293663025, | |
| "learning_rate": 0.00018086095392077606, | |
| "loss": 0.8324, | |
| "step": 67900 | |
| }, | |
| { | |
| "epoch": 2.2904877391538667, | |
| "grad_norm": 0.6658663153648376, | |
| "learning_rate": 0.0001806363962992904, | |
| "loss": 0.8292, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 2.293856103476152, | |
| "grad_norm": 0.7315119504928589, | |
| "learning_rate": 0.0001804118386778047, | |
| "loss": 0.8289, | |
| "step": 68100 | |
| }, | |
| { | |
| "epoch": 2.2972244677984373, | |
| "grad_norm": 0.6700948476791382, | |
| "learning_rate": 0.00018018728105631903, | |
| "loss": 0.8266, | |
| "step": 68200 | |
| }, | |
| { | |
| "epoch": 2.300592832120722, | |
| "grad_norm": 0.6325820088386536, | |
| "learning_rate": 0.00017996272343483337, | |
| "loss": 0.8312, | |
| "step": 68300 | |
| }, | |
| { | |
| "epoch": 2.3039611964430073, | |
| "grad_norm": 0.6814998984336853, | |
| "learning_rate": 0.00017973816581334768, | |
| "loss": 0.8288, | |
| "step": 68400 | |
| }, | |
| { | |
| "epoch": 2.307329560765292, | |
| "grad_norm": 0.6832602024078369, | |
| "learning_rate": 0.000179513608191862, | |
| "loss": 0.8278, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 2.3106979250875774, | |
| "grad_norm": 0.7399811148643494, | |
| "learning_rate": 0.00017928905057037635, | |
| "loss": 0.8313, | |
| "step": 68600 | |
| }, | |
| { | |
| "epoch": 2.3140662894098627, | |
| "grad_norm": 0.6988467574119568, | |
| "learning_rate": 0.00017906449294889068, | |
| "loss": 0.8298, | |
| "step": 68700 | |
| }, | |
| { | |
| "epoch": 2.3174346537321475, | |
| "grad_norm": 0.6190904974937439, | |
| "learning_rate": 0.000178839935327405, | |
| "loss": 0.8307, | |
| "step": 68800 | |
| }, | |
| { | |
| "epoch": 2.320803018054433, | |
| "grad_norm": 0.6427486538887024, | |
| "learning_rate": 0.00017861537770591933, | |
| "loss": 0.8309, | |
| "step": 68900 | |
| }, | |
| { | |
| "epoch": 2.324171382376718, | |
| "grad_norm": 0.661953330039978, | |
| "learning_rate": 0.00017839082008443366, | |
| "loss": 0.8291, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 2.327539746699003, | |
| "grad_norm": 0.6629980206489563, | |
| "learning_rate": 0.00017816626246294797, | |
| "loss": 0.8268, | |
| "step": 69100 | |
| }, | |
| { | |
| "epoch": 2.330908111021288, | |
| "grad_norm": 0.8455718755722046, | |
| "learning_rate": 0.0001779417048414623, | |
| "loss": 0.8281, | |
| "step": 69200 | |
| }, | |
| { | |
| "epoch": 2.3342764753435734, | |
| "grad_norm": 0.6195480227470398, | |
| "learning_rate": 0.0001777171472199766, | |
| "loss": 0.8271, | |
| "step": 69300 | |
| }, | |
| { | |
| "epoch": 2.3376448396658582, | |
| "grad_norm": 0.6320804357528687, | |
| "learning_rate": 0.00017749258959849098, | |
| "loss": 0.8272, | |
| "step": 69400 | |
| }, | |
| { | |
| "epoch": 2.3410132039881435, | |
| "grad_norm": 0.6865086555480957, | |
| "learning_rate": 0.00017726803197700528, | |
| "loss": 0.83, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 2.3443815683104283, | |
| "grad_norm": 0.6443759799003601, | |
| "learning_rate": 0.00017704347435551962, | |
| "loss": 0.828, | |
| "step": 69600 | |
| }, | |
| { | |
| "epoch": 2.3477499326327136, | |
| "grad_norm": 0.6651887893676758, | |
| "learning_rate": 0.00017681891673403393, | |
| "loss": 0.8309, | |
| "step": 69700 | |
| }, | |
| { | |
| "epoch": 2.351118296954999, | |
| "grad_norm": 0.6643931269645691, | |
| "learning_rate": 0.0001765943591125483, | |
| "loss": 0.8285, | |
| "step": 69800 | |
| }, | |
| { | |
| "epoch": 2.3544866612772837, | |
| "grad_norm": 0.6697126030921936, | |
| "learning_rate": 0.0001763698014910626, | |
| "loss": 0.8264, | |
| "step": 69900 | |
| }, | |
| { | |
| "epoch": 2.357855025599569, | |
| "grad_norm": 0.6785570979118347, | |
| "learning_rate": 0.0001761452438695769, | |
| "loss": 0.8255, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 2.3612233899218538, | |
| "grad_norm": 0.6412234306335449, | |
| "learning_rate": 0.00017592068624809124, | |
| "loss": 0.8271, | |
| "step": 70100 | |
| }, | |
| { | |
| "epoch": 2.364591754244139, | |
| "grad_norm": 0.7083507180213928, | |
| "learning_rate": 0.00017569612862660555, | |
| "loss": 0.83, | |
| "step": 70200 | |
| }, | |
| { | |
| "epoch": 2.3679601185664243, | |
| "grad_norm": 0.6519679427146912, | |
| "learning_rate": 0.0001754715710051199, | |
| "loss": 0.8249, | |
| "step": 70300 | |
| }, | |
| { | |
| "epoch": 2.371328482888709, | |
| "grad_norm": 0.7082831263542175, | |
| "learning_rate": 0.00017524701338363422, | |
| "loss": 0.8287, | |
| "step": 70400 | |
| }, | |
| { | |
| "epoch": 2.3746968472109944, | |
| "grad_norm": 0.6893306374549866, | |
| "learning_rate": 0.00017502245576214855, | |
| "loss": 0.8241, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 2.378065211533279, | |
| "grad_norm": 0.6624453663825989, | |
| "learning_rate": 0.00017479789814066286, | |
| "loss": 0.8281, | |
| "step": 70600 | |
| }, | |
| { | |
| "epoch": 2.3814335758555645, | |
| "grad_norm": 0.7071661353111267, | |
| "learning_rate": 0.00017457334051917722, | |
| "loss": 0.8257, | |
| "step": 70700 | |
| }, | |
| { | |
| "epoch": 2.3848019401778497, | |
| "grad_norm": 0.6531967520713806, | |
| "learning_rate": 0.00017434878289769153, | |
| "loss": 0.8253, | |
| "step": 70800 | |
| }, | |
| { | |
| "epoch": 2.3881703045001346, | |
| "grad_norm": 0.6394172310829163, | |
| "learning_rate": 0.00017412422527620587, | |
| "loss": 0.8291, | |
| "step": 70900 | |
| }, | |
| { | |
| "epoch": 2.39153866882242, | |
| "grad_norm": 0.7370265126228333, | |
| "learning_rate": 0.00017389966765472018, | |
| "loss": 0.8282, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 2.394907033144705, | |
| "grad_norm": 0.6256112456321716, | |
| "learning_rate": 0.0001736751100332345, | |
| "loss": 0.8267, | |
| "step": 71100 | |
| }, | |
| { | |
| "epoch": 2.39827539746699, | |
| "grad_norm": 0.6319020390510559, | |
| "learning_rate": 0.00017345055241174885, | |
| "loss": 0.8271, | |
| "step": 71200 | |
| }, | |
| { | |
| "epoch": 2.401643761789275, | |
| "grad_norm": 0.638664186000824, | |
| "learning_rate": 0.00017322599479026315, | |
| "loss": 0.8259, | |
| "step": 71300 | |
| }, | |
| { | |
| "epoch": 2.4050121261115605, | |
| "grad_norm": 0.6793828010559082, | |
| "learning_rate": 0.0001730014371687775, | |
| "loss": 0.8265, | |
| "step": 71400 | |
| }, | |
| { | |
| "epoch": 2.4083804904338453, | |
| "grad_norm": 0.7026681303977966, | |
| "learning_rate": 0.00017277687954729182, | |
| "loss": 0.8296, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 2.4117488547561305, | |
| "grad_norm": 0.6481872200965881, | |
| "learning_rate": 0.00017255232192580616, | |
| "loss": 0.826, | |
| "step": 71600 | |
| }, | |
| { | |
| "epoch": 2.4151172190784154, | |
| "grad_norm": 0.68873530626297, | |
| "learning_rate": 0.00017232776430432047, | |
| "loss": 0.8283, | |
| "step": 71700 | |
| }, | |
| { | |
| "epoch": 2.4184855834007006, | |
| "grad_norm": 0.6869419813156128, | |
| "learning_rate": 0.0001721032066828348, | |
| "loss": 0.8251, | |
| "step": 71800 | |
| }, | |
| { | |
| "epoch": 2.421853947722986, | |
| "grad_norm": 0.6462306380271912, | |
| "learning_rate": 0.00017187864906134914, | |
| "loss": 0.8265, | |
| "step": 71900 | |
| }, | |
| { | |
| "epoch": 2.4252223120452707, | |
| "grad_norm": 0.6632818579673767, | |
| "learning_rate": 0.00017165409143986345, | |
| "loss": 0.8287, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 2.428590676367556, | |
| "grad_norm": 0.7588053345680237, | |
| "learning_rate": 0.00017142953381837778, | |
| "loss": 0.8278, | |
| "step": 72100 | |
| }, | |
| { | |
| "epoch": 2.431959040689841, | |
| "grad_norm": 0.6804139614105225, | |
| "learning_rate": 0.0001712049761968921, | |
| "loss": 0.8266, | |
| "step": 72200 | |
| }, | |
| { | |
| "epoch": 2.435327405012126, | |
| "grad_norm": 0.8674142956733704, | |
| "learning_rate": 0.00017098041857540645, | |
| "loss": 0.8242, | |
| "step": 72300 | |
| }, | |
| { | |
| "epoch": 2.4386957693344113, | |
| "grad_norm": 1.0071535110473633, | |
| "learning_rate": 0.00017075586095392076, | |
| "loss": 0.8294, | |
| "step": 72400 | |
| }, | |
| { | |
| "epoch": 2.442064133656696, | |
| "grad_norm": 0.6555808782577515, | |
| "learning_rate": 0.0001705313033324351, | |
| "loss": 0.8256, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 2.4454324979789814, | |
| "grad_norm": 0.6730812788009644, | |
| "learning_rate": 0.0001703067457109494, | |
| "loss": 0.8254, | |
| "step": 72600 | |
| }, | |
| { | |
| "epoch": 2.4488008623012663, | |
| "grad_norm": 0.6546606421470642, | |
| "learning_rate": 0.00017008218808946377, | |
| "loss": 0.8257, | |
| "step": 72700 | |
| }, | |
| { | |
| "epoch": 2.4521692266235515, | |
| "grad_norm": 0.6847641468048096, | |
| "learning_rate": 0.00016985763046797807, | |
| "loss": 0.824, | |
| "step": 72800 | |
| }, | |
| { | |
| "epoch": 2.455537590945837, | |
| "grad_norm": 0.6383925080299377, | |
| "learning_rate": 0.00016963307284649238, | |
| "loss": 0.8238, | |
| "step": 72900 | |
| }, | |
| { | |
| "epoch": 2.4589059552681216, | |
| "grad_norm": 0.6601071357727051, | |
| "learning_rate": 0.00016940851522500672, | |
| "loss": 0.8215, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 2.462274319590407, | |
| "grad_norm": 0.6254110336303711, | |
| "learning_rate": 0.00016918395760352102, | |
| "loss": 0.8254, | |
| "step": 73100 | |
| }, | |
| { | |
| "epoch": 2.465642683912692, | |
| "grad_norm": 0.6367729902267456, | |
| "learning_rate": 0.0001689593999820354, | |
| "loss": 0.8201, | |
| "step": 73200 | |
| }, | |
| { | |
| "epoch": 2.469011048234977, | |
| "grad_norm": 0.6448660492897034, | |
| "learning_rate": 0.0001687348423605497, | |
| "loss": 0.8237, | |
| "step": 73300 | |
| }, | |
| { | |
| "epoch": 2.4723794125572622, | |
| "grad_norm": 0.6661122441291809, | |
| "learning_rate": 0.00016851028473906403, | |
| "loss": 0.8257, | |
| "step": 73400 | |
| }, | |
| { | |
| "epoch": 2.4757477768795475, | |
| "grad_norm": 0.619644820690155, | |
| "learning_rate": 0.00016828572711757834, | |
| "loss": 0.8238, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 2.4791161412018323, | |
| "grad_norm": 0.7607592344284058, | |
| "learning_rate": 0.0001680611694960927, | |
| "loss": 0.8225, | |
| "step": 73600 | |
| }, | |
| { | |
| "epoch": 2.4824845055241176, | |
| "grad_norm": 0.6733140349388123, | |
| "learning_rate": 0.000167836611874607, | |
| "loss": 0.8212, | |
| "step": 73700 | |
| }, | |
| { | |
| "epoch": 2.4858528698464024, | |
| "grad_norm": 0.9612058401107788, | |
| "learning_rate": 0.00016761205425312134, | |
| "loss": 0.8214, | |
| "step": 73800 | |
| }, | |
| { | |
| "epoch": 2.4892212341686877, | |
| "grad_norm": 0.618076503276825, | |
| "learning_rate": 0.00016738749663163565, | |
| "loss": 0.8231, | |
| "step": 73900 | |
| }, | |
| { | |
| "epoch": 2.492589598490973, | |
| "grad_norm": 0.6606545448303223, | |
| "learning_rate": 0.00016716293901015, | |
| "loss": 0.8235, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 2.4959579628132578, | |
| "grad_norm": 0.6311343312263489, | |
| "learning_rate": 0.00016693838138866432, | |
| "loss": 0.8227, | |
| "step": 74100 | |
| }, | |
| { | |
| "epoch": 2.499326327135543, | |
| "grad_norm": 0.713347315788269, | |
| "learning_rate": 0.00016671382376717863, | |
| "loss": 0.825, | |
| "step": 74200 | |
| }, | |
| { | |
| "epoch": 2.502694691457828, | |
| "grad_norm": 0.658001720905304, | |
| "learning_rate": 0.00016648926614569297, | |
| "loss": 0.8227, | |
| "step": 74300 | |
| }, | |
| { | |
| "epoch": 2.506063055780113, | |
| "grad_norm": 0.7130460739135742, | |
| "learning_rate": 0.0001662647085242073, | |
| "loss": 0.823, | |
| "step": 74400 | |
| }, | |
| { | |
| "epoch": 2.5094314201023984, | |
| "grad_norm": 0.7092128992080688, | |
| "learning_rate": 0.00016604015090272164, | |
| "loss": 0.8271, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 2.512799784424683, | |
| "grad_norm": 0.6681484580039978, | |
| "learning_rate": 0.00016581559328123594, | |
| "loss": 0.8208, | |
| "step": 74600 | |
| }, | |
| { | |
| "epoch": 2.5161681487469685, | |
| "grad_norm": 0.649299144744873, | |
| "learning_rate": 0.00016559103565975028, | |
| "loss": 0.8229, | |
| "step": 74700 | |
| }, | |
| { | |
| "epoch": 2.5195365130692533, | |
| "grad_norm": 0.6099591255187988, | |
| "learning_rate": 0.00016536647803826461, | |
| "loss": 0.8221, | |
| "step": 74800 | |
| }, | |
| { | |
| "epoch": 2.5229048773915386, | |
| "grad_norm": 0.655128538608551, | |
| "learning_rate": 0.00016514192041677892, | |
| "loss": 0.8216, | |
| "step": 74900 | |
| }, | |
| { | |
| "epoch": 2.526273241713824, | |
| "grad_norm": 2.5822091102600098, | |
| "learning_rate": 0.00016491736279529326, | |
| "loss": 0.8213, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 2.529641606036109, | |
| "grad_norm": 0.6650177240371704, | |
| "learning_rate": 0.00016469280517380757, | |
| "loss": 0.8279, | |
| "step": 75100 | |
| }, | |
| { | |
| "epoch": 2.533009970358394, | |
| "grad_norm": 0.9380492568016052, | |
| "learning_rate": 0.00016446824755232193, | |
| "loss": 0.8245, | |
| "step": 75200 | |
| }, | |
| { | |
| "epoch": 2.536378334680679, | |
| "grad_norm": 0.6475105881690979, | |
| "learning_rate": 0.00016424368993083624, | |
| "loss": 0.8233, | |
| "step": 75300 | |
| }, | |
| { | |
| "epoch": 2.539746699002964, | |
| "grad_norm": 0.6452984809875488, | |
| "learning_rate": 0.00016401913230935057, | |
| "loss": 0.8218, | |
| "step": 75400 | |
| }, | |
| { | |
| "epoch": 2.5431150633252493, | |
| "grad_norm": 1.478945255279541, | |
| "learning_rate": 0.00016379457468786488, | |
| "loss": 0.825, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 2.5464834276475345, | |
| "grad_norm": 0.6364376544952393, | |
| "learning_rate": 0.00016357001706637924, | |
| "loss": 0.8215, | |
| "step": 75600 | |
| }, | |
| { | |
| "epoch": 2.5498517919698194, | |
| "grad_norm": 0.5982120633125305, | |
| "learning_rate": 0.00016334545944489355, | |
| "loss": 0.8244, | |
| "step": 75700 | |
| }, | |
| { | |
| "epoch": 2.5532201562921046, | |
| "grad_norm": 0.641855001449585, | |
| "learning_rate": 0.00016312090182340786, | |
| "loss": 0.8238, | |
| "step": 75800 | |
| }, | |
| { | |
| "epoch": 2.5565885206143895, | |
| "grad_norm": 0.6564063429832458, | |
| "learning_rate": 0.0001628963442019222, | |
| "loss": 0.8228, | |
| "step": 75900 | |
| }, | |
| { | |
| "epoch": 2.5599568849366747, | |
| "grad_norm": 0.6515690684318542, | |
| "learning_rate": 0.0001626717865804365, | |
| "loss": 0.825, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 2.56332524925896, | |
| "grad_norm": 0.7063090205192566, | |
| "learning_rate": 0.00016244722895895086, | |
| "loss": 0.8232, | |
| "step": 76100 | |
| }, | |
| { | |
| "epoch": 2.566693613581245, | |
| "grad_norm": 1.018594741821289, | |
| "learning_rate": 0.00016222267133746517, | |
| "loss": 0.8219, | |
| "step": 76200 | |
| }, | |
| { | |
| "epoch": 2.57006197790353, | |
| "grad_norm": 0.7534065842628479, | |
| "learning_rate": 0.0001619981137159795, | |
| "loss": 0.8208, | |
| "step": 76300 | |
| }, | |
| { | |
| "epoch": 2.573430342225815, | |
| "grad_norm": 0.6062216758728027, | |
| "learning_rate": 0.00016177355609449382, | |
| "loss": 0.8224, | |
| "step": 76400 | |
| }, | |
| { | |
| "epoch": 2.5767987065481, | |
| "grad_norm": 0.6766044497489929, | |
| "learning_rate": 0.00016154899847300818, | |
| "loss": 0.823, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 2.5801670708703854, | |
| "grad_norm": 0.668302595615387, | |
| "learning_rate": 0.00016132444085152249, | |
| "loss": 0.8211, | |
| "step": 76600 | |
| }, | |
| { | |
| "epoch": 2.5835354351926707, | |
| "grad_norm": 0.8911457061767578, | |
| "learning_rate": 0.0001610998832300368, | |
| "loss": 0.8212, | |
| "step": 76700 | |
| }, | |
| { | |
| "epoch": 2.5869037995149555, | |
| "grad_norm": 0.718221127986908, | |
| "learning_rate": 0.00016087532560855113, | |
| "loss": 0.8228, | |
| "step": 76800 | |
| }, | |
| { | |
| "epoch": 2.590272163837241, | |
| "grad_norm": 0.7011673450469971, | |
| "learning_rate": 0.00016065076798706546, | |
| "loss": 0.8216, | |
| "step": 76900 | |
| }, | |
| { | |
| "epoch": 2.5936405281595256, | |
| "grad_norm": 0.6552968621253967, | |
| "learning_rate": 0.0001604262103655798, | |
| "loss": 0.8216, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 2.597008892481811, | |
| "grad_norm": 0.6576195955276489, | |
| "learning_rate": 0.0001602016527440941, | |
| "loss": 0.8197, | |
| "step": 77100 | |
| }, | |
| { | |
| "epoch": 2.600377256804096, | |
| "grad_norm": 0.6853031516075134, | |
| "learning_rate": 0.00015997709512260847, | |
| "loss": 0.8186, | |
| "step": 77200 | |
| }, | |
| { | |
| "epoch": 2.603745621126381, | |
| "grad_norm": 0.631230890750885, | |
| "learning_rate": 0.00015975253750112278, | |
| "loss": 0.8212, | |
| "step": 77300 | |
| }, | |
| { | |
| "epoch": 2.6071139854486662, | |
| "grad_norm": 0.6358488202095032, | |
| "learning_rate": 0.0001595279798796371, | |
| "loss": 0.8221, | |
| "step": 77400 | |
| }, | |
| { | |
| "epoch": 2.610482349770951, | |
| "grad_norm": 1.2744354009628296, | |
| "learning_rate": 0.00015930342225815142, | |
| "loss": 0.8214, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 2.6138507140932363, | |
| "grad_norm": 0.9848027229309082, | |
| "learning_rate": 0.00015907886463666578, | |
| "loss": 0.8238, | |
| "step": 77600 | |
| }, | |
| { | |
| "epoch": 2.6172190784155216, | |
| "grad_norm": 0.7024006843566895, | |
| "learning_rate": 0.0001588543070151801, | |
| "loss": 0.8203, | |
| "step": 77700 | |
| }, | |
| { | |
| "epoch": 2.6205874427378064, | |
| "grad_norm": 0.628036379814148, | |
| "learning_rate": 0.0001586297493936944, | |
| "loss": 0.8214, | |
| "step": 77800 | |
| }, | |
| { | |
| "epoch": 2.6239558070600917, | |
| "grad_norm": 1.1920697689056396, | |
| "learning_rate": 0.00015840519177220873, | |
| "loss": 0.8203, | |
| "step": 77900 | |
| }, | |
| { | |
| "epoch": 2.6273241713823765, | |
| "grad_norm": 0.6503965258598328, | |
| "learning_rate": 0.00015818063415072304, | |
| "loss": 0.8213, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 2.6306925357046618, | |
| "grad_norm": 0.6561105847358704, | |
| "learning_rate": 0.0001579560765292374, | |
| "loss": 0.8215, | |
| "step": 78100 | |
| }, | |
| { | |
| "epoch": 2.634060900026947, | |
| "grad_norm": 0.9598469734191895, | |
| "learning_rate": 0.0001577315189077517, | |
| "loss": 0.8189, | |
| "step": 78200 | |
| }, | |
| { | |
| "epoch": 2.637429264349232, | |
| "grad_norm": 0.6700890064239502, | |
| "learning_rate": 0.00015750696128626605, | |
| "loss": 0.8217, | |
| "step": 78300 | |
| }, | |
| { | |
| "epoch": 2.640797628671517, | |
| "grad_norm": 0.6581085920333862, | |
| "learning_rate": 0.00015728240366478036, | |
| "loss": 0.8207, | |
| "step": 78400 | |
| }, | |
| { | |
| "epoch": 2.644165992993802, | |
| "grad_norm": 0.6442667841911316, | |
| "learning_rate": 0.00015705784604329472, | |
| "loss": 0.8198, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 2.647534357316087, | |
| "grad_norm": 1.1659986972808838, | |
| "learning_rate": 0.00015683328842180903, | |
| "loss": 0.8232, | |
| "step": 78600 | |
| }, | |
| { | |
| "epoch": 2.6509027216383725, | |
| "grad_norm": 0.6638743877410889, | |
| "learning_rate": 0.00015660873080032333, | |
| "loss": 0.8184, | |
| "step": 78700 | |
| }, | |
| { | |
| "epoch": 2.6542710859606578, | |
| "grad_norm": 0.649681806564331, | |
| "learning_rate": 0.00015638417317883767, | |
| "loss": 0.8195, | |
| "step": 78800 | |
| }, | |
| { | |
| "epoch": 2.6576394502829426, | |
| "grad_norm": 1.0518876314163208, | |
| "learning_rate": 0.00015615961555735198, | |
| "loss": 0.8222, | |
| "step": 78900 | |
| }, | |
| { | |
| "epoch": 2.661007814605228, | |
| "grad_norm": 0.6661698222160339, | |
| "learning_rate": 0.00015593505793586634, | |
| "loss": 0.8192, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 2.6643761789275127, | |
| "grad_norm": 0.6558882594108582, | |
| "learning_rate": 0.00015571050031438065, | |
| "loss": 0.8151, | |
| "step": 79100 | |
| }, | |
| { | |
| "epoch": 2.667744543249798, | |
| "grad_norm": 0.621672511100769, | |
| "learning_rate": 0.00015548594269289498, | |
| "loss": 0.8185, | |
| "step": 79200 | |
| }, | |
| { | |
| "epoch": 2.671112907572083, | |
| "grad_norm": 0.6606272459030151, | |
| "learning_rate": 0.00015526138507140932, | |
| "loss": 0.8178, | |
| "step": 79300 | |
| }, | |
| { | |
| "epoch": 2.674481271894368, | |
| "grad_norm": 0.685043215751648, | |
| "learning_rate": 0.00015503682744992365, | |
| "loss": 0.8199, | |
| "step": 79400 | |
| }, | |
| { | |
| "epoch": 2.6778496362166533, | |
| "grad_norm": 0.6863681077957153, | |
| "learning_rate": 0.00015481226982843796, | |
| "loss": 0.8209, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 2.681218000538938, | |
| "grad_norm": 0.6480740308761597, | |
| "learning_rate": 0.00015458771220695227, | |
| "loss": 0.8209, | |
| "step": 79600 | |
| }, | |
| { | |
| "epoch": 2.6845863648612234, | |
| "grad_norm": 0.6727776527404785, | |
| "learning_rate": 0.00015436315458546663, | |
| "loss": 0.8207, | |
| "step": 79700 | |
| }, | |
| { | |
| "epoch": 2.6879547291835086, | |
| "grad_norm": 0.7330679893493652, | |
| "learning_rate": 0.00015413859696398094, | |
| "loss": 0.8217, | |
| "step": 79800 | |
| }, | |
| { | |
| "epoch": 2.6913230935057935, | |
| "grad_norm": 0.7128227949142456, | |
| "learning_rate": 0.00015391403934249528, | |
| "loss": 0.8229, | |
| "step": 79900 | |
| }, | |
| { | |
| "epoch": 2.6946914578280787, | |
| "grad_norm": 0.6492688059806824, | |
| "learning_rate": 0.00015368948172100958, | |
| "loss": 0.8162, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 2.6980598221503636, | |
| "grad_norm": 0.8806473016738892, | |
| "learning_rate": 0.00015346492409952395, | |
| "loss": 0.8227, | |
| "step": 80100 | |
| }, | |
| { | |
| "epoch": 2.701428186472649, | |
| "grad_norm": 0.8799885511398315, | |
| "learning_rate": 0.00015324036647803825, | |
| "loss": 0.8165, | |
| "step": 80200 | |
| }, | |
| { | |
| "epoch": 2.704796550794934, | |
| "grad_norm": 0.6979735493659973, | |
| "learning_rate": 0.0001530158088565526, | |
| "loss": 0.8198, | |
| "step": 80300 | |
| }, | |
| { | |
| "epoch": 2.7081649151172194, | |
| "grad_norm": 0.6624419689178467, | |
| "learning_rate": 0.0001527912512350669, | |
| "loss": 0.8194, | |
| "step": 80400 | |
| }, | |
| { | |
| "epoch": 2.711533279439504, | |
| "grad_norm": 0.7164821624755859, | |
| "learning_rate": 0.00015256669361358126, | |
| "loss": 0.8193, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 2.7149016437617894, | |
| "grad_norm": 0.6688589453697205, | |
| "learning_rate": 0.00015234213599209557, | |
| "loss": 0.8156, | |
| "step": 80600 | |
| }, | |
| { | |
| "epoch": 2.7182700080840743, | |
| "grad_norm": 0.6661184430122375, | |
| "learning_rate": 0.00015211757837060988, | |
| "loss": 0.8199, | |
| "step": 80700 | |
| }, | |
| { | |
| "epoch": 2.7216383724063595, | |
| "grad_norm": 0.6526447534561157, | |
| "learning_rate": 0.0001518930207491242, | |
| "loss": 0.8187, | |
| "step": 80800 | |
| }, | |
| { | |
| "epoch": 2.725006736728645, | |
| "grad_norm": 0.6510984897613525, | |
| "learning_rate": 0.00015166846312763852, | |
| "loss": 0.8189, | |
| "step": 80900 | |
| }, | |
| { | |
| "epoch": 2.7283751010509296, | |
| "grad_norm": 0.6921165585517883, | |
| "learning_rate": 0.00015144390550615288, | |
| "loss": 0.8182, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 2.731743465373215, | |
| "grad_norm": 0.7041354179382324, | |
| "learning_rate": 0.0001512193478846672, | |
| "loss": 0.8208, | |
| "step": 81100 | |
| }, | |
| { | |
| "epoch": 2.7351118296954997, | |
| "grad_norm": 0.639445424079895, | |
| "learning_rate": 0.00015099479026318152, | |
| "loss": 0.8202, | |
| "step": 81200 | |
| }, | |
| { | |
| "epoch": 2.738480194017785, | |
| "grad_norm": 0.7262235879898071, | |
| "learning_rate": 0.00015077023264169583, | |
| "loss": 0.8184, | |
| "step": 81300 | |
| }, | |
| { | |
| "epoch": 2.7418485583400702, | |
| "grad_norm": 0.6470584273338318, | |
| "learning_rate": 0.0001505456750202102, | |
| "loss": 0.8199, | |
| "step": 81400 | |
| }, | |
| { | |
| "epoch": 2.745216922662355, | |
| "grad_norm": 0.6758275628089905, | |
| "learning_rate": 0.0001503211173987245, | |
| "loss": 0.817, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 2.7485852869846403, | |
| "grad_norm": 0.6550074815750122, | |
| "learning_rate": 0.0001500965597772388, | |
| "loss": 0.82, | |
| "step": 81600 | |
| }, | |
| { | |
| "epoch": 2.751953651306925, | |
| "grad_norm": 0.6312419176101685, | |
| "learning_rate": 0.00014987200215575315, | |
| "loss": 0.8209, | |
| "step": 81700 | |
| }, | |
| { | |
| "epoch": 2.7553220156292104, | |
| "grad_norm": 0.6456059813499451, | |
| "learning_rate": 0.00014964744453426748, | |
| "loss": 0.8173, | |
| "step": 81800 | |
| }, | |
| { | |
| "epoch": 2.7586903799514957, | |
| "grad_norm": 0.6251012086868286, | |
| "learning_rate": 0.00014942288691278182, | |
| "loss": 0.8147, | |
| "step": 81900 | |
| }, | |
| { | |
| "epoch": 2.7620587442737805, | |
| "grad_norm": 0.6424401998519897, | |
| "learning_rate": 0.00014919832929129613, | |
| "loss": 0.8173, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 2.7654271085960658, | |
| "grad_norm": 0.7199423313140869, | |
| "learning_rate": 0.00014897377166981046, | |
| "loss": 0.8163, | |
| "step": 82100 | |
| }, | |
| { | |
| "epoch": 2.7687954729183506, | |
| "grad_norm": 0.6573197841644287, | |
| "learning_rate": 0.0001487492140483248, | |
| "loss": 0.8145, | |
| "step": 82200 | |
| }, | |
| { | |
| "epoch": 2.772163837240636, | |
| "grad_norm": 0.6857665777206421, | |
| "learning_rate": 0.0001485246564268391, | |
| "loss": 0.8188, | |
| "step": 82300 | |
| }, | |
| { | |
| "epoch": 2.775532201562921, | |
| "grad_norm": 0.6398062705993652, | |
| "learning_rate": 0.00014830009880535344, | |
| "loss": 0.8173, | |
| "step": 82400 | |
| }, | |
| { | |
| "epoch": 2.7789005658852064, | |
| "grad_norm": 0.6743867993354797, | |
| "learning_rate": 0.00014807554118386777, | |
| "loss": 0.8176, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 2.782268930207491, | |
| "grad_norm": 0.6104719042778015, | |
| "learning_rate": 0.0001478509835623821, | |
| "loss": 0.8171, | |
| "step": 82600 | |
| }, | |
| { | |
| "epoch": 2.7856372945297765, | |
| "grad_norm": 0.6518858075141907, | |
| "learning_rate": 0.00014762642594089642, | |
| "loss": 0.8203, | |
| "step": 82700 | |
| }, | |
| { | |
| "epoch": 2.7890056588520613, | |
| "grad_norm": 0.7392122745513916, | |
| "learning_rate": 0.00014740186831941075, | |
| "loss": 0.8171, | |
| "step": 82800 | |
| }, | |
| { | |
| "epoch": 2.7923740231743466, | |
| "grad_norm": 0.6652575135231018, | |
| "learning_rate": 0.0001471773106979251, | |
| "loss": 0.8155, | |
| "step": 82900 | |
| }, | |
| { | |
| "epoch": 2.795742387496632, | |
| "grad_norm": 0.698665201663971, | |
| "learning_rate": 0.0001469527530764394, | |
| "loss": 0.8202, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 2.7991107518189167, | |
| "grad_norm": 0.6012236475944519, | |
| "learning_rate": 0.00014672819545495373, | |
| "loss": 0.817, | |
| "step": 83100 | |
| }, | |
| { | |
| "epoch": 2.802479116141202, | |
| "grad_norm": 0.6791641116142273, | |
| "learning_rate": 0.00014650363783346804, | |
| "loss": 0.8185, | |
| "step": 83200 | |
| }, | |
| { | |
| "epoch": 2.8058474804634868, | |
| "grad_norm": 0.694733738899231, | |
| "learning_rate": 0.00014627908021198237, | |
| "loss": 0.8161, | |
| "step": 83300 | |
| }, | |
| { | |
| "epoch": 2.809215844785772, | |
| "grad_norm": 1.395378589630127, | |
| "learning_rate": 0.0001460545225904967, | |
| "loss": 0.8136, | |
| "step": 83400 | |
| }, | |
| { | |
| "epoch": 2.8125842091080573, | |
| "grad_norm": 1.1096270084381104, | |
| "learning_rate": 0.00014582996496901104, | |
| "loss": 0.8187, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 2.815952573430342, | |
| "grad_norm": 0.6426212787628174, | |
| "learning_rate": 0.00014560540734752535, | |
| "loss": 0.817, | |
| "step": 83600 | |
| }, | |
| { | |
| "epoch": 2.8193209377526274, | |
| "grad_norm": 0.6201661229133606, | |
| "learning_rate": 0.0001453808497260397, | |
| "loss": 0.8166, | |
| "step": 83700 | |
| }, | |
| { | |
| "epoch": 2.822689302074912, | |
| "grad_norm": 0.6646463871002197, | |
| "learning_rate": 0.00014515629210455402, | |
| "loss": 0.82, | |
| "step": 83800 | |
| }, | |
| { | |
| "epoch": 2.8260576663971975, | |
| "grad_norm": 0.6990267038345337, | |
| "learning_rate": 0.00014493173448306833, | |
| "loss": 0.8169, | |
| "step": 83900 | |
| }, | |
| { | |
| "epoch": 2.8294260307194827, | |
| "grad_norm": 0.671810507774353, | |
| "learning_rate": 0.00014470717686158267, | |
| "loss": 0.8167, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 2.8327943950417676, | |
| "grad_norm": 0.686876118183136, | |
| "learning_rate": 0.000144482619240097, | |
| "loss": 0.8137, | |
| "step": 84100 | |
| }, | |
| { | |
| "epoch": 2.836162759364053, | |
| "grad_norm": 0.6434644460678101, | |
| "learning_rate": 0.0001442580616186113, | |
| "loss": 0.8157, | |
| "step": 84200 | |
| }, | |
| { | |
| "epoch": 2.8395311236863376, | |
| "grad_norm": 0.652999758720398, | |
| "learning_rate": 0.00014403350399712564, | |
| "loss": 0.8121, | |
| "step": 84300 | |
| }, | |
| { | |
| "epoch": 2.842899488008623, | |
| "grad_norm": 0.6468531489372253, | |
| "learning_rate": 0.00014380894637563998, | |
| "loss": 0.8175, | |
| "step": 84400 | |
| }, | |
| { | |
| "epoch": 2.846267852330908, | |
| "grad_norm": 0.6018803119659424, | |
| "learning_rate": 0.00014358438875415432, | |
| "loss": 0.816, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 2.8496362166531934, | |
| "grad_norm": 0.6775005459785461, | |
| "learning_rate": 0.00014335983113266862, | |
| "loss": 0.8128, | |
| "step": 84600 | |
| }, | |
| { | |
| "epoch": 2.8530045809754783, | |
| "grad_norm": 0.664910078048706, | |
| "learning_rate": 0.00014313527351118296, | |
| "loss": 0.8172, | |
| "step": 84700 | |
| }, | |
| { | |
| "epoch": 2.8563729452977635, | |
| "grad_norm": 0.6959900259971619, | |
| "learning_rate": 0.0001429107158896973, | |
| "loss": 0.8135, | |
| "step": 84800 | |
| }, | |
| { | |
| "epoch": 2.8597413096200484, | |
| "grad_norm": 0.6213033199310303, | |
| "learning_rate": 0.0001426861582682116, | |
| "loss": 0.8153, | |
| "step": 84900 | |
| }, | |
| { | |
| "epoch": 2.8631096739423336, | |
| "grad_norm": 0.6437749266624451, | |
| "learning_rate": 0.00014246160064672594, | |
| "loss": 0.8114, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 2.866478038264619, | |
| "grad_norm": 0.6763966083526611, | |
| "learning_rate": 0.00014223704302524027, | |
| "loss": 0.8138, | |
| "step": 85100 | |
| }, | |
| { | |
| "epoch": 2.8698464025869037, | |
| "grad_norm": 1.10175621509552, | |
| "learning_rate": 0.00014201248540375458, | |
| "loss": 0.8153, | |
| "step": 85200 | |
| }, | |
| { | |
| "epoch": 2.873214766909189, | |
| "grad_norm": 0.6517946124076843, | |
| "learning_rate": 0.00014178792778226892, | |
| "loss": 0.8147, | |
| "step": 85300 | |
| }, | |
| { | |
| "epoch": 2.876583131231474, | |
| "grad_norm": 0.7234548926353455, | |
| "learning_rate": 0.00014156337016078325, | |
| "loss": 0.8182, | |
| "step": 85400 | |
| }, | |
| { | |
| "epoch": 2.879951495553759, | |
| "grad_norm": 0.6792501211166382, | |
| "learning_rate": 0.00014133881253929759, | |
| "loss": 0.8154, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 2.8833198598760443, | |
| "grad_norm": 0.6222261786460876, | |
| "learning_rate": 0.0001411142549178119, | |
| "loss": 0.816, | |
| "step": 85600 | |
| }, | |
| { | |
| "epoch": 2.886688224198329, | |
| "grad_norm": 0.6346508264541626, | |
| "learning_rate": 0.00014088969729632623, | |
| "loss": 0.8159, | |
| "step": 85700 | |
| }, | |
| { | |
| "epoch": 2.8900565885206144, | |
| "grad_norm": 0.7334688305854797, | |
| "learning_rate": 0.00014066513967484054, | |
| "loss": 0.8166, | |
| "step": 85800 | |
| }, | |
| { | |
| "epoch": 2.8934249528428992, | |
| "grad_norm": 0.6864719986915588, | |
| "learning_rate": 0.00014044058205335487, | |
| "loss": 0.819, | |
| "step": 85900 | |
| }, | |
| { | |
| "epoch": 2.8967933171651845, | |
| "grad_norm": 0.5927285552024841, | |
| "learning_rate": 0.0001402160244318692, | |
| "loss": 0.8148, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 2.90016168148747, | |
| "grad_norm": 0.660213828086853, | |
| "learning_rate": 0.00013999146681038354, | |
| "loss": 0.815, | |
| "step": 86100 | |
| }, | |
| { | |
| "epoch": 2.903530045809755, | |
| "grad_norm": 0.6887788772583008, | |
| "learning_rate": 0.00013976690918889785, | |
| "loss": 0.8143, | |
| "step": 86200 | |
| }, | |
| { | |
| "epoch": 2.90689841013204, | |
| "grad_norm": 1.1385151147842407, | |
| "learning_rate": 0.00013954235156741219, | |
| "loss": 0.8139, | |
| "step": 86300 | |
| }, | |
| { | |
| "epoch": 2.910266774454325, | |
| "grad_norm": 0.6464802026748657, | |
| "learning_rate": 0.00013931779394592652, | |
| "loss": 0.8149, | |
| "step": 86400 | |
| }, | |
| { | |
| "epoch": 2.91363513877661, | |
| "grad_norm": 0.6867853999137878, | |
| "learning_rate": 0.00013909323632444086, | |
| "loss": 0.8139, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 2.9170035030988952, | |
| "grad_norm": 0.8868036866188049, | |
| "learning_rate": 0.00013886867870295516, | |
| "loss": 0.8139, | |
| "step": 86600 | |
| }, | |
| { | |
| "epoch": 2.9203718674211805, | |
| "grad_norm": 0.6516538262367249, | |
| "learning_rate": 0.0001386441210814695, | |
| "loss": 0.8153, | |
| "step": 86700 | |
| }, | |
| { | |
| "epoch": 2.9237402317434653, | |
| "grad_norm": 0.6790093779563904, | |
| "learning_rate": 0.0001384195634599838, | |
| "loss": 0.8133, | |
| "step": 86800 | |
| }, | |
| { | |
| "epoch": 2.9271085960657506, | |
| "grad_norm": 0.6333130598068237, | |
| "learning_rate": 0.00013819500583849814, | |
| "loss": 0.8117, | |
| "step": 86900 | |
| }, | |
| { | |
| "epoch": 2.9304769603880354, | |
| "grad_norm": 0.7102107405662537, | |
| "learning_rate": 0.00013797044821701248, | |
| "loss": 0.8128, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 2.9338453247103207, | |
| "grad_norm": 0.7193422913551331, | |
| "learning_rate": 0.00013774589059552679, | |
| "loss": 0.8121, | |
| "step": 87100 | |
| }, | |
| { | |
| "epoch": 2.937213689032606, | |
| "grad_norm": 0.7304584980010986, | |
| "learning_rate": 0.00013752133297404112, | |
| "loss": 0.8114, | |
| "step": 87200 | |
| }, | |
| { | |
| "epoch": 2.9405820533548908, | |
| "grad_norm": 0.8924300074577332, | |
| "learning_rate": 0.00013729677535255546, | |
| "loss": 0.8137, | |
| "step": 87300 | |
| }, | |
| { | |
| "epoch": 2.943950417677176, | |
| "grad_norm": 0.6813507080078125, | |
| "learning_rate": 0.0001370722177310698, | |
| "loss": 0.8109, | |
| "step": 87400 | |
| }, | |
| { | |
| "epoch": 2.947318781999461, | |
| "grad_norm": 0.6427081823348999, | |
| "learning_rate": 0.0001368476601095841, | |
| "loss": 0.8125, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 2.950687146321746, | |
| "grad_norm": 0.6571387052536011, | |
| "learning_rate": 0.00013662310248809843, | |
| "loss": 0.8132, | |
| "step": 87600 | |
| }, | |
| { | |
| "epoch": 2.9540555106440314, | |
| "grad_norm": 0.7705689072608948, | |
| "learning_rate": 0.00013639854486661277, | |
| "loss": 0.8129, | |
| "step": 87700 | |
| }, | |
| { | |
| "epoch": 2.957423874966316, | |
| "grad_norm": 0.7075904011726379, | |
| "learning_rate": 0.00013617398724512708, | |
| "loss": 0.8143, | |
| "step": 87800 | |
| }, | |
| { | |
| "epoch": 2.9607922392886015, | |
| "grad_norm": 0.6926144957542419, | |
| "learning_rate": 0.0001359494296236414, | |
| "loss": 0.8115, | |
| "step": 87900 | |
| }, | |
| { | |
| "epoch": 2.9641606036108863, | |
| "grad_norm": 0.7183883190155029, | |
| "learning_rate": 0.00013572487200215575, | |
| "loss": 0.8122, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 2.9675289679331716, | |
| "grad_norm": 0.6963924169540405, | |
| "learning_rate": 0.00013550031438067006, | |
| "loss": 0.8151, | |
| "step": 88100 | |
| }, | |
| { | |
| "epoch": 2.970897332255457, | |
| "grad_norm": 0.6360912919044495, | |
| "learning_rate": 0.0001352757567591844, | |
| "loss": 0.8129, | |
| "step": 88200 | |
| }, | |
| { | |
| "epoch": 2.974265696577742, | |
| "grad_norm": 0.7141982316970825, | |
| "learning_rate": 0.00013505119913769873, | |
| "loss": 0.8142, | |
| "step": 88300 | |
| }, | |
| { | |
| "epoch": 2.977634060900027, | |
| "grad_norm": 0.6647577285766602, | |
| "learning_rate": 0.00013482664151621306, | |
| "loss": 0.811, | |
| "step": 88400 | |
| }, | |
| { | |
| "epoch": 2.981002425222312, | |
| "grad_norm": 0.6686524152755737, | |
| "learning_rate": 0.00013460208389472737, | |
| "loss": 0.8149, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 2.984370789544597, | |
| "grad_norm": 0.66321861743927, | |
| "learning_rate": 0.0001343775262732417, | |
| "loss": 0.8103, | |
| "step": 88600 | |
| }, | |
| { | |
| "epoch": 2.9877391538668823, | |
| "grad_norm": 0.622572660446167, | |
| "learning_rate": 0.00013415296865175601, | |
| "loss": 0.8144, | |
| "step": 88700 | |
| }, | |
| { | |
| "epoch": 2.9911075181891675, | |
| "grad_norm": 0.667248547077179, | |
| "learning_rate": 0.00013392841103027035, | |
| "loss": 0.8108, | |
| "step": 88800 | |
| }, | |
| { | |
| "epoch": 2.9944758825114524, | |
| "grad_norm": 0.6621103882789612, | |
| "learning_rate": 0.00013370385340878468, | |
| "loss": 0.8106, | |
| "step": 88900 | |
| }, | |
| { | |
| "epoch": 2.9978442468337376, | |
| "grad_norm": 0.6303636431694031, | |
| "learning_rate": 0.00013347929578729902, | |
| "loss": 0.8104, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 0.760505199432373, | |
| "eval_runtime": 1.8138, | |
| "eval_samples_per_second": 2756.665, | |
| "eval_steps_per_second": 43.555, | |
| "step": 89064 | |
| }, | |
| { | |
| "epoch": 3.0012126111560224, | |
| "grad_norm": 0.6279102563858032, | |
| "learning_rate": 0.00013325473816581333, | |
| "loss": 0.813, | |
| "step": 89100 | |
| }, | |
| { | |
| "epoch": 3.0045809754783077, | |
| "grad_norm": 0.6355727910995483, | |
| "learning_rate": 0.00013303018054432766, | |
| "loss": 0.811, | |
| "step": 89200 | |
| }, | |
| { | |
| "epoch": 3.007949339800593, | |
| "grad_norm": 0.6710761189460754, | |
| "learning_rate": 0.000132805622922842, | |
| "loss": 0.8147, | |
| "step": 89300 | |
| }, | |
| { | |
| "epoch": 3.011317704122878, | |
| "grad_norm": 0.6306372284889221, | |
| "learning_rate": 0.00013258106530135633, | |
| "loss": 0.8151, | |
| "step": 89400 | |
| }, | |
| { | |
| "epoch": 3.014686068445163, | |
| "grad_norm": 0.6803897023200989, | |
| "learning_rate": 0.00013235650767987064, | |
| "loss": 0.8086, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 3.0180544327674483, | |
| "grad_norm": 0.6367260217666626, | |
| "learning_rate": 0.00013213195005838498, | |
| "loss": 0.8119, | |
| "step": 89600 | |
| }, | |
| { | |
| "epoch": 3.021422797089733, | |
| "grad_norm": 0.664561927318573, | |
| "learning_rate": 0.00013190739243689928, | |
| "loss": 0.8094, | |
| "step": 89700 | |
| }, | |
| { | |
| "epoch": 3.0247911614120184, | |
| "grad_norm": 0.6986654996871948, | |
| "learning_rate": 0.00013168283481541362, | |
| "loss": 0.8116, | |
| "step": 89800 | |
| }, | |
| { | |
| "epoch": 3.0281595257343032, | |
| "grad_norm": 0.6583049893379211, | |
| "learning_rate": 0.00013145827719392795, | |
| "loss": 0.8148, | |
| "step": 89900 | |
| }, | |
| { | |
| "epoch": 3.0315278900565885, | |
| "grad_norm": 0.6716769337654114, | |
| "learning_rate": 0.00013123371957244226, | |
| "loss": 0.8137, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 3.034896254378874, | |
| "grad_norm": 0.7089846134185791, | |
| "learning_rate": 0.0001310091619509566, | |
| "loss": 0.809, | |
| "step": 90100 | |
| }, | |
| { | |
| "epoch": 3.0382646187011586, | |
| "grad_norm": 0.6873953938484192, | |
| "learning_rate": 0.00013078460432947093, | |
| "loss": 0.8113, | |
| "step": 90200 | |
| }, | |
| { | |
| "epoch": 3.041632983023444, | |
| "grad_norm": 0.6583080887794495, | |
| "learning_rate": 0.00013056004670798527, | |
| "loss": 0.8067, | |
| "step": 90300 | |
| }, | |
| { | |
| "epoch": 3.045001347345729, | |
| "grad_norm": 0.6893338561058044, | |
| "learning_rate": 0.00013033548908649958, | |
| "loss": 0.8126, | |
| "step": 90400 | |
| }, | |
| { | |
| "epoch": 3.048369711668014, | |
| "grad_norm": 0.9407336115837097, | |
| "learning_rate": 0.0001301109314650139, | |
| "loss": 0.8077, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 3.0517380759902992, | |
| "grad_norm": 1.1255403757095337, | |
| "learning_rate": 0.00012988637384352822, | |
| "loss": 0.8104, | |
| "step": 90600 | |
| }, | |
| { | |
| "epoch": 3.055106440312584, | |
| "grad_norm": 0.6687456369400024, | |
| "learning_rate": 0.00012966181622204255, | |
| "loss": 0.8122, | |
| "step": 90700 | |
| }, | |
| { | |
| "epoch": 3.0584748046348693, | |
| "grad_norm": 0.6100497245788574, | |
| "learning_rate": 0.0001294372586005569, | |
| "loss": 0.8098, | |
| "step": 90800 | |
| }, | |
| { | |
| "epoch": 3.0618431689571546, | |
| "grad_norm": 0.6621761918067932, | |
| "learning_rate": 0.00012921270097907123, | |
| "loss": 0.8118, | |
| "step": 90900 | |
| }, | |
| { | |
| "epoch": 3.0652115332794394, | |
| "grad_norm": 0.8076705932617188, | |
| "learning_rate": 0.00012898814335758553, | |
| "loss": 0.811, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 3.0685798976017247, | |
| "grad_norm": 0.6705955266952515, | |
| "learning_rate": 0.00012876358573609987, | |
| "loss": 0.8116, | |
| "step": 91100 | |
| }, | |
| { | |
| "epoch": 3.0719482619240095, | |
| "grad_norm": 0.6365945339202881, | |
| "learning_rate": 0.0001285390281146142, | |
| "loss": 0.8074, | |
| "step": 91200 | |
| }, | |
| { | |
| "epoch": 3.0753166262462948, | |
| "grad_norm": 0.9314165115356445, | |
| "learning_rate": 0.00012831447049312854, | |
| "loss": 0.8092, | |
| "step": 91300 | |
| }, | |
| { | |
| "epoch": 3.07868499056858, | |
| "grad_norm": 0.6733311414718628, | |
| "learning_rate": 0.00012808991287164285, | |
| "loss": 0.8102, | |
| "step": 91400 | |
| }, | |
| { | |
| "epoch": 3.082053354890865, | |
| "grad_norm": 0.6910605430603027, | |
| "learning_rate": 0.00012786535525015718, | |
| "loss": 0.8065, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 3.08542171921315, | |
| "grad_norm": 0.7043003439903259, | |
| "learning_rate": 0.0001276407976286715, | |
| "loss": 0.8073, | |
| "step": 91600 | |
| }, | |
| { | |
| "epoch": 3.0887900835354354, | |
| "grad_norm": 0.6764921545982361, | |
| "learning_rate": 0.00012741624000718583, | |
| "loss": 0.8089, | |
| "step": 91700 | |
| }, | |
| { | |
| "epoch": 3.09215844785772, | |
| "grad_norm": 0.6997144818305969, | |
| "learning_rate": 0.00012719168238570016, | |
| "loss": 0.8088, | |
| "step": 91800 | |
| }, | |
| { | |
| "epoch": 3.0955268121800055, | |
| "grad_norm": 0.6801837086677551, | |
| "learning_rate": 0.0001269671247642145, | |
| "loss": 0.8094, | |
| "step": 91900 | |
| }, | |
| { | |
| "epoch": 3.0988951765022903, | |
| "grad_norm": 0.6339113116264343, | |
| "learning_rate": 0.0001267425671427288, | |
| "loss": 0.8109, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 3.1022635408245756, | |
| "grad_norm": 0.6691506505012512, | |
| "learning_rate": 0.00012651800952124314, | |
| "loss": 0.8135, | |
| "step": 92100 | |
| }, | |
| { | |
| "epoch": 3.105631905146861, | |
| "grad_norm": 0.6617900133132935, | |
| "learning_rate": 0.00012629345189975747, | |
| "loss": 0.8106, | |
| "step": 92200 | |
| }, | |
| { | |
| "epoch": 3.1090002694691456, | |
| "grad_norm": 0.6737276911735535, | |
| "learning_rate": 0.0001260688942782718, | |
| "loss": 0.8128, | |
| "step": 92300 | |
| }, | |
| { | |
| "epoch": 3.112368633791431, | |
| "grad_norm": 0.670802652835846, | |
| "learning_rate": 0.00012584433665678612, | |
| "loss": 0.8106, | |
| "step": 92400 | |
| }, | |
| { | |
| "epoch": 3.115736998113716, | |
| "grad_norm": 0.626956045627594, | |
| "learning_rate": 0.00012561977903530045, | |
| "loss": 0.8114, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 3.119105362436001, | |
| "grad_norm": 0.6243528127670288, | |
| "learning_rate": 0.00012539522141381476, | |
| "loss": 0.8115, | |
| "step": 92600 | |
| }, | |
| { | |
| "epoch": 3.1224737267582863, | |
| "grad_norm": 0.6828027367591858, | |
| "learning_rate": 0.0001251706637923291, | |
| "loss": 0.807, | |
| "step": 92700 | |
| }, | |
| { | |
| "epoch": 3.125842091080571, | |
| "grad_norm": 0.704557478427887, | |
| "learning_rate": 0.00012494610617084343, | |
| "loss": 0.8085, | |
| "step": 92800 | |
| }, | |
| { | |
| "epoch": 3.1292104554028564, | |
| "grad_norm": 0.6188080310821533, | |
| "learning_rate": 0.00012472154854935774, | |
| "loss": 0.8079, | |
| "step": 92900 | |
| }, | |
| { | |
| "epoch": 3.1325788197251416, | |
| "grad_norm": 0.6640317440032959, | |
| "learning_rate": 0.00012449699092787207, | |
| "loss": 0.808, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 3.1359471840474265, | |
| "grad_norm": 0.6489530801773071, | |
| "learning_rate": 0.0001242724333063864, | |
| "loss": 0.8089, | |
| "step": 93100 | |
| }, | |
| { | |
| "epoch": 3.1393155483697117, | |
| "grad_norm": 0.6527587175369263, | |
| "learning_rate": 0.00012404787568490074, | |
| "loss": 0.809, | |
| "step": 93200 | |
| }, | |
| { | |
| "epoch": 3.1426839126919965, | |
| "grad_norm": 0.6802580952644348, | |
| "learning_rate": 0.00012382331806341508, | |
| "loss": 0.8099, | |
| "step": 93300 | |
| }, | |
| { | |
| "epoch": 3.146052277014282, | |
| "grad_norm": 0.6751042604446411, | |
| "learning_rate": 0.0001235987604419294, | |
| "loss": 0.8103, | |
| "step": 93400 | |
| }, | |
| { | |
| "epoch": 3.149420641336567, | |
| "grad_norm": 0.723804235458374, | |
| "learning_rate": 0.0001233742028204437, | |
| "loss": 0.8043, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 3.152789005658852, | |
| "grad_norm": 0.768860399723053, | |
| "learning_rate": 0.00012314964519895803, | |
| "loss": 0.8105, | |
| "step": 93600 | |
| }, | |
| { | |
| "epoch": 3.156157369981137, | |
| "grad_norm": 0.6464242339134216, | |
| "learning_rate": 0.00012292508757747237, | |
| "loss": 0.808, | |
| "step": 93700 | |
| }, | |
| { | |
| "epoch": 3.1595257343034224, | |
| "grad_norm": 0.7125059962272644, | |
| "learning_rate": 0.0001227005299559867, | |
| "loss": 0.8103, | |
| "step": 93800 | |
| }, | |
| { | |
| "epoch": 3.1628940986257073, | |
| "grad_norm": 0.6584749221801758, | |
| "learning_rate": 0.000122475972334501, | |
| "loss": 0.81, | |
| "step": 93900 | |
| }, | |
| { | |
| "epoch": 3.1662624629479925, | |
| "grad_norm": 0.6689501404762268, | |
| "learning_rate": 0.00012225141471301534, | |
| "loss": 0.8086, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 3.169630827270278, | |
| "grad_norm": 0.6383669376373291, | |
| "learning_rate": 0.00012202685709152968, | |
| "loss": 0.8097, | |
| "step": 94100 | |
| }, | |
| { | |
| "epoch": 3.1729991915925626, | |
| "grad_norm": 0.6786794066429138, | |
| "learning_rate": 0.000121802299470044, | |
| "loss": 0.809, | |
| "step": 94200 | |
| }, | |
| { | |
| "epoch": 3.176367555914848, | |
| "grad_norm": 0.6704023480415344, | |
| "learning_rate": 0.00012157774184855834, | |
| "loss": 0.8086, | |
| "step": 94300 | |
| }, | |
| { | |
| "epoch": 3.1797359202371327, | |
| "grad_norm": 0.652862012386322, | |
| "learning_rate": 0.00012135318422707266, | |
| "loss": 0.808, | |
| "step": 94400 | |
| }, | |
| { | |
| "epoch": 3.183104284559418, | |
| "grad_norm": 0.8095204830169678, | |
| "learning_rate": 0.00012112862660558698, | |
| "loss": 0.8066, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 3.1864726488817032, | |
| "grad_norm": 0.6581931710243225, | |
| "learning_rate": 0.0001209040689841013, | |
| "loss": 0.8061, | |
| "step": 94600 | |
| }, | |
| { | |
| "epoch": 3.189841013203988, | |
| "grad_norm": 0.6642458438873291, | |
| "learning_rate": 0.00012067951136261564, | |
| "loss": 0.8079, | |
| "step": 94700 | |
| }, | |
| { | |
| "epoch": 3.1932093775262733, | |
| "grad_norm": 0.6264484524726868, | |
| "learning_rate": 0.00012045495374112996, | |
| "loss": 0.8108, | |
| "step": 94800 | |
| }, | |
| { | |
| "epoch": 3.196577741848558, | |
| "grad_norm": 0.6631668210029602, | |
| "learning_rate": 0.0001202303961196443, | |
| "loss": 0.8076, | |
| "step": 94900 | |
| }, | |
| { | |
| "epoch": 3.1999461061708434, | |
| "grad_norm": 0.636448085308075, | |
| "learning_rate": 0.00012000583849815862, | |
| "loss": 0.805, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 3.2033144704931287, | |
| "grad_norm": 0.6331253051757812, | |
| "learning_rate": 0.00011978128087667295, | |
| "loss": 0.8089, | |
| "step": 95100 | |
| }, | |
| { | |
| "epoch": 3.2066828348154135, | |
| "grad_norm": 0.6623615026473999, | |
| "learning_rate": 0.00011955672325518727, | |
| "loss": 0.8089, | |
| "step": 95200 | |
| }, | |
| { | |
| "epoch": 3.2100511991376988, | |
| "grad_norm": 0.671399712562561, | |
| "learning_rate": 0.00011933216563370161, | |
| "loss": 0.8054, | |
| "step": 95300 | |
| }, | |
| { | |
| "epoch": 3.213419563459984, | |
| "grad_norm": 0.6822311282157898, | |
| "learning_rate": 0.00011910760801221592, | |
| "loss": 0.8057, | |
| "step": 95400 | |
| }, | |
| { | |
| "epoch": 3.216787927782269, | |
| "grad_norm": 0.6769167184829712, | |
| "learning_rate": 0.00011888305039073025, | |
| "loss": 0.8065, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 3.220156292104554, | |
| "grad_norm": 0.6246688961982727, | |
| "learning_rate": 0.00011865849276924457, | |
| "loss": 0.8063, | |
| "step": 95600 | |
| }, | |
| { | |
| "epoch": 3.223524656426839, | |
| "grad_norm": 0.6401100754737854, | |
| "learning_rate": 0.00011843393514775891, | |
| "loss": 0.807, | |
| "step": 95700 | |
| }, | |
| { | |
| "epoch": 3.226893020749124, | |
| "grad_norm": 0.6910848021507263, | |
| "learning_rate": 0.00011820937752627323, | |
| "loss": 0.8068, | |
| "step": 95800 | |
| }, | |
| { | |
| "epoch": 3.2302613850714095, | |
| "grad_norm": 0.7022745013237, | |
| "learning_rate": 0.00011798481990478756, | |
| "loss": 0.8077, | |
| "step": 95900 | |
| }, | |
| { | |
| "epoch": 3.2336297493936943, | |
| "grad_norm": 0.7098489999771118, | |
| "learning_rate": 0.00011776026228330189, | |
| "loss": 0.8079, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 3.2369981137159796, | |
| "grad_norm": 1.6580332517623901, | |
| "learning_rate": 0.00011753570466181622, | |
| "loss": 0.8052, | |
| "step": 96100 | |
| }, | |
| { | |
| "epoch": 3.240366478038265, | |
| "grad_norm": 0.6385944485664368, | |
| "learning_rate": 0.00011731114704033054, | |
| "loss": 0.8097, | |
| "step": 96200 | |
| }, | |
| { | |
| "epoch": 3.2437348423605497, | |
| "grad_norm": 0.6737959384918213, | |
| "learning_rate": 0.00011708658941884488, | |
| "loss": 0.8075, | |
| "step": 96300 | |
| }, | |
| { | |
| "epoch": 3.247103206682835, | |
| "grad_norm": 0.6414308547973633, | |
| "learning_rate": 0.00011686203179735919, | |
| "loss": 0.8074, | |
| "step": 96400 | |
| }, | |
| { | |
| "epoch": 3.2504715710051197, | |
| "grad_norm": 0.6727792024612427, | |
| "learning_rate": 0.00011663747417587351, | |
| "loss": 0.8082, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 3.253839935327405, | |
| "grad_norm": 0.6280369162559509, | |
| "learning_rate": 0.00011641291655438784, | |
| "loss": 0.8098, | |
| "step": 96600 | |
| }, | |
| { | |
| "epoch": 3.2572082996496903, | |
| "grad_norm": 0.6988112330436707, | |
| "learning_rate": 0.00011618835893290216, | |
| "loss": 0.8079, | |
| "step": 96700 | |
| }, | |
| { | |
| "epoch": 3.260576663971975, | |
| "grad_norm": 0.6650980710983276, | |
| "learning_rate": 0.0001159638013114165, | |
| "loss": 0.8084, | |
| "step": 96800 | |
| }, | |
| { | |
| "epoch": 3.2639450282942604, | |
| "grad_norm": 0.6886364817619324, | |
| "learning_rate": 0.00011573924368993082, | |
| "loss": 0.8074, | |
| "step": 96900 | |
| }, | |
| { | |
| "epoch": 3.267313392616545, | |
| "grad_norm": 0.6584846377372742, | |
| "learning_rate": 0.00011551468606844516, | |
| "loss": 0.8111, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 3.2706817569388305, | |
| "grad_norm": 0.9834907650947571, | |
| "learning_rate": 0.00011529012844695948, | |
| "loss": 0.8088, | |
| "step": 97100 | |
| }, | |
| { | |
| "epoch": 3.2740501212611157, | |
| "grad_norm": 0.6571055054664612, | |
| "learning_rate": 0.00011506557082547381, | |
| "loss": 0.8043, | |
| "step": 97200 | |
| }, | |
| { | |
| "epoch": 3.2774184855834005, | |
| "grad_norm": 0.6906171441078186, | |
| "learning_rate": 0.00011484101320398814, | |
| "loss": 0.8081, | |
| "step": 97300 | |
| }, | |
| { | |
| "epoch": 3.280786849905686, | |
| "grad_norm": 0.7476776838302612, | |
| "learning_rate": 0.00011461645558250246, | |
| "loss": 0.8064, | |
| "step": 97400 | |
| }, | |
| { | |
| "epoch": 3.284155214227971, | |
| "grad_norm": 0.6860908269882202, | |
| "learning_rate": 0.00011439189796101678, | |
| "loss": 0.8073, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 3.287523578550256, | |
| "grad_norm": 0.6590797901153564, | |
| "learning_rate": 0.00011416734033953111, | |
| "loss": 0.807, | |
| "step": 97600 | |
| }, | |
| { | |
| "epoch": 3.290891942872541, | |
| "grad_norm": 0.7175418138504028, | |
| "learning_rate": 0.00011394278271804544, | |
| "loss": 0.8073, | |
| "step": 97700 | |
| }, | |
| { | |
| "epoch": 3.2942603071948264, | |
| "grad_norm": 0.6721409559249878, | |
| "learning_rate": 0.00011371822509655977, | |
| "loss": 0.806, | |
| "step": 97800 | |
| }, | |
| { | |
| "epoch": 3.2976286715171113, | |
| "grad_norm": 0.6370182633399963, | |
| "learning_rate": 0.00011349366747507409, | |
| "loss": 0.8094, | |
| "step": 97900 | |
| }, | |
| { | |
| "epoch": 3.3009970358393965, | |
| "grad_norm": 0.6653867959976196, | |
| "learning_rate": 0.00011326910985358843, | |
| "loss": 0.8021, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 3.3043654001616813, | |
| "grad_norm": 0.635477602481842, | |
| "learning_rate": 0.00011304455223210275, | |
| "loss": 0.8074, | |
| "step": 98100 | |
| }, | |
| { | |
| "epoch": 3.3077337644839666, | |
| "grad_norm": 0.7132477760314941, | |
| "learning_rate": 0.00011281999461061708, | |
| "loss": 0.8048, | |
| "step": 98200 | |
| }, | |
| { | |
| "epoch": 3.311102128806252, | |
| "grad_norm": 0.661605954170227, | |
| "learning_rate": 0.00011259543698913139, | |
| "loss": 0.8065, | |
| "step": 98300 | |
| }, | |
| { | |
| "epoch": 3.3144704931285367, | |
| "grad_norm": 0.7334872484207153, | |
| "learning_rate": 0.00011237087936764573, | |
| "loss": 0.8056, | |
| "step": 98400 | |
| }, | |
| { | |
| "epoch": 3.317838857450822, | |
| "grad_norm": 0.7113956212997437, | |
| "learning_rate": 0.00011214632174616005, | |
| "loss": 0.8068, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 3.321207221773107, | |
| "grad_norm": 0.6793413758277893, | |
| "learning_rate": 0.00011192176412467438, | |
| "loss": 0.8102, | |
| "step": 98600 | |
| }, | |
| { | |
| "epoch": 3.324575586095392, | |
| "grad_norm": 0.6595569849014282, | |
| "learning_rate": 0.0001116972065031887, | |
| "loss": 0.8045, | |
| "step": 98700 | |
| }, | |
| { | |
| "epoch": 3.3279439504176773, | |
| "grad_norm": 0.6264058351516724, | |
| "learning_rate": 0.00011147264888170304, | |
| "loss": 0.8059, | |
| "step": 98800 | |
| }, | |
| { | |
| "epoch": 3.331312314739962, | |
| "grad_norm": 0.7037299275398254, | |
| "learning_rate": 0.00011124809126021736, | |
| "loss": 0.8044, | |
| "step": 98900 | |
| }, | |
| { | |
| "epoch": 3.3346806790622474, | |
| "grad_norm": 0.6255789995193481, | |
| "learning_rate": 0.0001110235336387317, | |
| "loss": 0.8081, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 3.3380490433845322, | |
| "grad_norm": 0.6675742864608765, | |
| "learning_rate": 0.00011079897601724602, | |
| "loss": 0.8028, | |
| "step": 99100 | |
| }, | |
| { | |
| "epoch": 3.3414174077068175, | |
| "grad_norm": 0.6799077391624451, | |
| "learning_rate": 0.00011057441839576035, | |
| "loss": 0.804, | |
| "step": 99200 | |
| }, | |
| { | |
| "epoch": 3.3447857720291028, | |
| "grad_norm": 0.6606206893920898, | |
| "learning_rate": 0.00011034986077427466, | |
| "loss": 0.8064, | |
| "step": 99300 | |
| }, | |
| { | |
| "epoch": 3.3481541363513876, | |
| "grad_norm": 0.7276676893234253, | |
| "learning_rate": 0.00011012530315278898, | |
| "loss": 0.8074, | |
| "step": 99400 | |
| }, | |
| { | |
| "epoch": 3.351522500673673, | |
| "grad_norm": 0.6872825026512146, | |
| "learning_rate": 0.00010990074553130332, | |
| "loss": 0.8056, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 3.354890864995958, | |
| "grad_norm": 0.6640327572822571, | |
| "learning_rate": 0.00010967618790981764, | |
| "loss": 0.8078, | |
| "step": 99600 | |
| }, | |
| { | |
| "epoch": 3.358259229318243, | |
| "grad_norm": 0.9947742819786072, | |
| "learning_rate": 0.00010945163028833198, | |
| "loss": 0.8018, | |
| "step": 99700 | |
| }, | |
| { | |
| "epoch": 3.361627593640528, | |
| "grad_norm": 0.6360524296760559, | |
| "learning_rate": 0.0001092270726668463, | |
| "loss": 0.807, | |
| "step": 99800 | |
| }, | |
| { | |
| "epoch": 3.3649959579628135, | |
| "grad_norm": 0.7029403448104858, | |
| "learning_rate": 0.00010900251504536063, | |
| "loss": 0.8059, | |
| "step": 99900 | |
| }, | |
| { | |
| "epoch": 3.3683643222850983, | |
| "grad_norm": 0.7347244024276733, | |
| "learning_rate": 0.00010877795742387497, | |
| "loss": 0.8069, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 3.3717326866073836, | |
| "grad_norm": 0.6576654314994812, | |
| "learning_rate": 0.00010855339980238929, | |
| "loss": 0.8035, | |
| "step": 100100 | |
| }, | |
| { | |
| "epoch": 3.3751010509296684, | |
| "grad_norm": 0.6341889500617981, | |
| "learning_rate": 0.0001083288421809036, | |
| "loss": 0.8065, | |
| "step": 100200 | |
| }, | |
| { | |
| "epoch": 3.3784694152519537, | |
| "grad_norm": 0.6809447407722473, | |
| "learning_rate": 0.00010810428455941793, | |
| "loss": 0.8071, | |
| "step": 100300 | |
| }, | |
| { | |
| "epoch": 3.381837779574239, | |
| "grad_norm": 0.6789582967758179, | |
| "learning_rate": 0.00010787972693793226, | |
| "loss": 0.8081, | |
| "step": 100400 | |
| }, | |
| { | |
| "epoch": 3.3852061438965237, | |
| "grad_norm": 0.6672530174255371, | |
| "learning_rate": 0.00010765516931644659, | |
| "loss": 0.8079, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 3.388574508218809, | |
| "grad_norm": 0.6484895348548889, | |
| "learning_rate": 0.00010743061169496091, | |
| "loss": 0.8053, | |
| "step": 100600 | |
| }, | |
| { | |
| "epoch": 3.391942872541094, | |
| "grad_norm": 0.6628451943397522, | |
| "learning_rate": 0.00010720605407347525, | |
| "loss": 0.8041, | |
| "step": 100700 | |
| }, | |
| { | |
| "epoch": 3.395311236863379, | |
| "grad_norm": 0.6461237668991089, | |
| "learning_rate": 0.00010698149645198957, | |
| "loss": 0.804, | |
| "step": 100800 | |
| }, | |
| { | |
| "epoch": 3.3986796011856644, | |
| "grad_norm": 0.6682327389717102, | |
| "learning_rate": 0.0001067569388305039, | |
| "loss": 0.8049, | |
| "step": 100900 | |
| }, | |
| { | |
| "epoch": 3.402047965507949, | |
| "grad_norm": 0.6611519455909729, | |
| "learning_rate": 0.00010653238120901823, | |
| "loss": 0.8004, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 3.4054163298302345, | |
| "grad_norm": 0.6624406576156616, | |
| "learning_rate": 0.00010630782358753256, | |
| "loss": 0.8039, | |
| "step": 101100 | |
| }, | |
| { | |
| "epoch": 3.4087846941525197, | |
| "grad_norm": 0.6566054224967957, | |
| "learning_rate": 0.00010608326596604687, | |
| "loss": 0.8058, | |
| "step": 101200 | |
| }, | |
| { | |
| "epoch": 3.4121530584748045, | |
| "grad_norm": 0.6751037240028381, | |
| "learning_rate": 0.0001058587083445612, | |
| "loss": 0.8051, | |
| "step": 101300 | |
| }, | |
| { | |
| "epoch": 3.41552142279709, | |
| "grad_norm": 0.6572412252426147, | |
| "learning_rate": 0.00010563415072307553, | |
| "loss": 0.8048, | |
| "step": 101400 | |
| }, | |
| { | |
| "epoch": 3.418889787119375, | |
| "grad_norm": 0.7792493104934692, | |
| "learning_rate": 0.00010540959310158986, | |
| "loss": 0.8044, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 3.42225815144166, | |
| "grad_norm": 0.6584118604660034, | |
| "learning_rate": 0.00010518503548010418, | |
| "loss": 0.8026, | |
| "step": 101600 | |
| }, | |
| { | |
| "epoch": 3.425626515763945, | |
| "grad_norm": 0.6414441466331482, | |
| "learning_rate": 0.00010496047785861852, | |
| "loss": 0.8068, | |
| "step": 101700 | |
| }, | |
| { | |
| "epoch": 3.42899488008623, | |
| "grad_norm": 0.6849080920219421, | |
| "learning_rate": 0.00010473592023713284, | |
| "loss": 0.8052, | |
| "step": 101800 | |
| }, | |
| { | |
| "epoch": 3.4323632444085153, | |
| "grad_norm": 0.6778447031974792, | |
| "learning_rate": 0.00010451136261564717, | |
| "loss": 0.8069, | |
| "step": 101900 | |
| }, | |
| { | |
| "epoch": 3.4357316087308005, | |
| "grad_norm": 0.6514096260070801, | |
| "learning_rate": 0.0001042868049941615, | |
| "loss": 0.8042, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 3.4390999730530853, | |
| "grad_norm": 0.630409300327301, | |
| "learning_rate": 0.00010406224737267583, | |
| "loss": 0.8029, | |
| "step": 102100 | |
| }, | |
| { | |
| "epoch": 3.4424683373753706, | |
| "grad_norm": 0.709867000579834, | |
| "learning_rate": 0.00010383768975119014, | |
| "loss": 0.8026, | |
| "step": 102200 | |
| }, | |
| { | |
| "epoch": 3.4458367016976554, | |
| "grad_norm": 0.6706274151802063, | |
| "learning_rate": 0.00010361313212970447, | |
| "loss": 0.8069, | |
| "step": 102300 | |
| }, | |
| { | |
| "epoch": 3.4492050660199407, | |
| "grad_norm": 0.6600052118301392, | |
| "learning_rate": 0.0001033885745082188, | |
| "loss": 0.8028, | |
| "step": 102400 | |
| }, | |
| { | |
| "epoch": 3.452573430342226, | |
| "grad_norm": 0.6819061040878296, | |
| "learning_rate": 0.00010316401688673313, | |
| "loss": 0.8063, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 3.455941794664511, | |
| "grad_norm": 0.6513516902923584, | |
| "learning_rate": 0.00010293945926524745, | |
| "loss": 0.8008, | |
| "step": 102600 | |
| }, | |
| { | |
| "epoch": 3.459310158986796, | |
| "grad_norm": 0.623355507850647, | |
| "learning_rate": 0.00010271490164376179, | |
| "loss": 0.8035, | |
| "step": 102700 | |
| }, | |
| { | |
| "epoch": 3.462678523309081, | |
| "grad_norm": 0.6720972657203674, | |
| "learning_rate": 0.00010249034402227611, | |
| "loss": 0.8057, | |
| "step": 102800 | |
| }, | |
| { | |
| "epoch": 3.466046887631366, | |
| "grad_norm": 0.6593225002288818, | |
| "learning_rate": 0.00010226578640079045, | |
| "loss": 0.8026, | |
| "step": 102900 | |
| }, | |
| { | |
| "epoch": 3.4694152519536514, | |
| "grad_norm": 0.6316161751747131, | |
| "learning_rate": 0.00010204122877930477, | |
| "loss": 0.8019, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 3.4727836162759362, | |
| "grad_norm": 0.7166170477867126, | |
| "learning_rate": 0.00010181667115781907, | |
| "loss": 0.8046, | |
| "step": 103100 | |
| }, | |
| { | |
| "epoch": 3.4761519805982215, | |
| "grad_norm": 0.6723181009292603, | |
| "learning_rate": 0.00010159211353633341, | |
| "loss": 0.8038, | |
| "step": 103200 | |
| }, | |
| { | |
| "epoch": 3.4795203449205068, | |
| "grad_norm": 0.6460021734237671, | |
| "learning_rate": 0.00010136755591484773, | |
| "loss": 0.8049, | |
| "step": 103300 | |
| }, | |
| { | |
| "epoch": 3.4828887092427916, | |
| "grad_norm": 0.7485737800598145, | |
| "learning_rate": 0.00010114299829336207, | |
| "loss": 0.805, | |
| "step": 103400 | |
| }, | |
| { | |
| "epoch": 3.486257073565077, | |
| "grad_norm": 0.67794269323349, | |
| "learning_rate": 0.00010091844067187639, | |
| "loss": 0.8026, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 3.489625437887362, | |
| "grad_norm": 0.6387248635292053, | |
| "learning_rate": 0.00010069388305039072, | |
| "loss": 0.8043, | |
| "step": 103600 | |
| }, | |
| { | |
| "epoch": 3.492993802209647, | |
| "grad_norm": 0.662192702293396, | |
| "learning_rate": 0.00010046932542890505, | |
| "loss": 0.8031, | |
| "step": 103700 | |
| }, | |
| { | |
| "epoch": 3.496362166531932, | |
| "grad_norm": 0.7354533076286316, | |
| "learning_rate": 0.00010024476780741938, | |
| "loss": 0.8051, | |
| "step": 103800 | |
| }, | |
| { | |
| "epoch": 3.499730530854217, | |
| "grad_norm": 0.7070392370223999, | |
| "learning_rate": 0.0001000202101859337, | |
| "loss": 0.8036, | |
| "step": 103900 | |
| }, | |
| { | |
| "epoch": 3.5030988951765023, | |
| "grad_norm": 0.7134096622467041, | |
| "learning_rate": 9.979565256444804e-05, | |
| "loss": 0.8034, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 3.5064672594987876, | |
| "grad_norm": 0.6784800291061401, | |
| "learning_rate": 9.957109494296235e-05, | |
| "loss": 0.8029, | |
| "step": 104100 | |
| }, | |
| { | |
| "epoch": 3.5098356238210724, | |
| "grad_norm": 0.6493478417396545, | |
| "learning_rate": 9.934653732147668e-05, | |
| "loss": 0.8022, | |
| "step": 104200 | |
| }, | |
| { | |
| "epoch": 3.5132039881433577, | |
| "grad_norm": 0.6267081499099731, | |
| "learning_rate": 9.9121979699991e-05, | |
| "loss": 0.8027, | |
| "step": 104300 | |
| }, | |
| { | |
| "epoch": 3.5165723524656425, | |
| "grad_norm": 0.7198253273963928, | |
| "learning_rate": 9.889742207850534e-05, | |
| "loss": 0.8021, | |
| "step": 104400 | |
| }, | |
| { | |
| "epoch": 3.5199407167879277, | |
| "grad_norm": 0.7415684461593628, | |
| "learning_rate": 9.867286445701966e-05, | |
| "loss": 0.8049, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 3.523309081110213, | |
| "grad_norm": 0.6261735558509827, | |
| "learning_rate": 9.8448306835534e-05, | |
| "loss": 0.8018, | |
| "step": 104600 | |
| }, | |
| { | |
| "epoch": 3.526677445432498, | |
| "grad_norm": 0.6972131133079529, | |
| "learning_rate": 9.822374921404832e-05, | |
| "loss": 0.8046, | |
| "step": 104700 | |
| }, | |
| { | |
| "epoch": 3.530045809754783, | |
| "grad_norm": 0.657211184501648, | |
| "learning_rate": 9.799919159256265e-05, | |
| "loss": 0.8049, | |
| "step": 104800 | |
| }, | |
| { | |
| "epoch": 3.533414174077068, | |
| "grad_norm": 0.6501233577728271, | |
| "learning_rate": 9.777463397107697e-05, | |
| "loss": 0.8004, | |
| "step": 104900 | |
| }, | |
| { | |
| "epoch": 3.536782538399353, | |
| "grad_norm": 0.7275915741920471, | |
| "learning_rate": 9.75500763495913e-05, | |
| "loss": 0.804, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 3.5401509027216385, | |
| "grad_norm": 0.6765680313110352, | |
| "learning_rate": 9.732551872810562e-05, | |
| "loss": 0.8044, | |
| "step": 105100 | |
| }, | |
| { | |
| "epoch": 3.5435192670439237, | |
| "grad_norm": 0.7218645811080933, | |
| "learning_rate": 9.710096110661995e-05, | |
| "loss": 0.8034, | |
| "step": 105200 | |
| }, | |
| { | |
| "epoch": 3.5468876313662085, | |
| "grad_norm": 0.713777482509613, | |
| "learning_rate": 9.687640348513427e-05, | |
| "loss": 0.8034, | |
| "step": 105300 | |
| }, | |
| { | |
| "epoch": 3.550255995688494, | |
| "grad_norm": 0.6668462157249451, | |
| "learning_rate": 9.665184586364861e-05, | |
| "loss": 0.8029, | |
| "step": 105400 | |
| }, | |
| { | |
| "epoch": 3.5536243600107786, | |
| "grad_norm": 0.7213618755340576, | |
| "learning_rate": 9.642728824216293e-05, | |
| "loss": 0.802, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 3.556992724333064, | |
| "grad_norm": 0.6867049932479858, | |
| "learning_rate": 9.620273062067726e-05, | |
| "loss": 0.8027, | |
| "step": 105600 | |
| }, | |
| { | |
| "epoch": 3.560361088655349, | |
| "grad_norm": 0.6288276314735413, | |
| "learning_rate": 9.597817299919159e-05, | |
| "loss": 0.8017, | |
| "step": 105700 | |
| }, | |
| { | |
| "epoch": 3.563729452977634, | |
| "grad_norm": 0.6391323208808899, | |
| "learning_rate": 9.575361537770592e-05, | |
| "loss": 0.8013, | |
| "step": 105800 | |
| }, | |
| { | |
| "epoch": 3.5670978172999193, | |
| "grad_norm": 0.6860449314117432, | |
| "learning_rate": 9.552905775622024e-05, | |
| "loss": 0.7998, | |
| "step": 105900 | |
| }, | |
| { | |
| "epoch": 3.570466181622204, | |
| "grad_norm": 0.7907363772392273, | |
| "learning_rate": 9.530450013473455e-05, | |
| "loss": 0.8067, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 3.5738345459444893, | |
| "grad_norm": 0.6776504516601562, | |
| "learning_rate": 9.507994251324889e-05, | |
| "loss": 0.8017, | |
| "step": 106100 | |
| }, | |
| { | |
| "epoch": 3.5772029102667746, | |
| "grad_norm": 0.7182029485702515, | |
| "learning_rate": 9.485538489176321e-05, | |
| "loss": 0.7997, | |
| "step": 106200 | |
| }, | |
| { | |
| "epoch": 3.5805712745890594, | |
| "grad_norm": 0.6875755786895752, | |
| "learning_rate": 9.463082727027754e-05, | |
| "loss": 0.8043, | |
| "step": 106300 | |
| }, | |
| { | |
| "epoch": 3.5839396389113447, | |
| "grad_norm": 0.6848233938217163, | |
| "learning_rate": 9.440626964879187e-05, | |
| "loss": 0.8025, | |
| "step": 106400 | |
| }, | |
| { | |
| "epoch": 3.5873080032336295, | |
| "grad_norm": 0.6334069967269897, | |
| "learning_rate": 9.41817120273062e-05, | |
| "loss": 0.8032, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 3.590676367555915, | |
| "grad_norm": 0.6613095998764038, | |
| "learning_rate": 9.395715440582052e-05, | |
| "loss": 0.8014, | |
| "step": 106600 | |
| }, | |
| { | |
| "epoch": 3.5940447318782, | |
| "grad_norm": 0.6903994083404541, | |
| "learning_rate": 9.373259678433486e-05, | |
| "loss": 0.8043, | |
| "step": 106700 | |
| }, | |
| { | |
| "epoch": 3.597413096200485, | |
| "grad_norm": 0.6296311616897583, | |
| "learning_rate": 9.350803916284918e-05, | |
| "loss": 0.8003, | |
| "step": 106800 | |
| }, | |
| { | |
| "epoch": 3.60078146052277, | |
| "grad_norm": 0.7521107196807861, | |
| "learning_rate": 9.328348154136351e-05, | |
| "loss": 0.7999, | |
| "step": 106900 | |
| }, | |
| { | |
| "epoch": 3.604149824845055, | |
| "grad_norm": 0.6513036489486694, | |
| "learning_rate": 9.305892391987782e-05, | |
| "loss": 0.8039, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 3.6075181891673402, | |
| "grad_norm": 0.6684443950653076, | |
| "learning_rate": 9.283436629839216e-05, | |
| "loss": 0.8027, | |
| "step": 107100 | |
| }, | |
| { | |
| "epoch": 3.6108865534896255, | |
| "grad_norm": 0.6627715826034546, | |
| "learning_rate": 9.260980867690648e-05, | |
| "loss": 0.8025, | |
| "step": 107200 | |
| }, | |
| { | |
| "epoch": 3.6142549178119108, | |
| "grad_norm": 0.7347738742828369, | |
| "learning_rate": 9.238525105542081e-05, | |
| "loss": 0.8036, | |
| "step": 107300 | |
| }, | |
| { | |
| "epoch": 3.6176232821341956, | |
| "grad_norm": 0.7007977366447449, | |
| "learning_rate": 9.216069343393514e-05, | |
| "loss": 0.8035, | |
| "step": 107400 | |
| }, | |
| { | |
| "epoch": 3.620991646456481, | |
| "grad_norm": 0.6845256686210632, | |
| "learning_rate": 9.193613581244947e-05, | |
| "loss": 0.8032, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 3.6243600107787657, | |
| "grad_norm": 0.7089165449142456, | |
| "learning_rate": 9.171157819096379e-05, | |
| "loss": 0.8046, | |
| "step": 107600 | |
| }, | |
| { | |
| "epoch": 3.627728375101051, | |
| "grad_norm": 0.7869235873222351, | |
| "learning_rate": 9.148702056947813e-05, | |
| "loss": 0.8019, | |
| "step": 107700 | |
| }, | |
| { | |
| "epoch": 3.631096739423336, | |
| "grad_norm": 0.6592691540718079, | |
| "learning_rate": 9.126246294799245e-05, | |
| "loss": 0.8012, | |
| "step": 107800 | |
| }, | |
| { | |
| "epoch": 3.634465103745621, | |
| "grad_norm": 0.6670995354652405, | |
| "learning_rate": 9.103790532650677e-05, | |
| "loss": 0.8028, | |
| "step": 107900 | |
| }, | |
| { | |
| "epoch": 3.6378334680679063, | |
| "grad_norm": 0.7031666040420532, | |
| "learning_rate": 9.081334770502109e-05, | |
| "loss": 0.8017, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 3.641201832390191, | |
| "grad_norm": 0.7147188782691956, | |
| "learning_rate": 9.058879008353543e-05, | |
| "loss": 0.7982, | |
| "step": 108100 | |
| }, | |
| { | |
| "epoch": 3.6445701967124764, | |
| "grad_norm": 0.7295541763305664, | |
| "learning_rate": 9.036423246204975e-05, | |
| "loss": 0.8005, | |
| "step": 108200 | |
| }, | |
| { | |
| "epoch": 3.6479385610347617, | |
| "grad_norm": 0.6590719819068909, | |
| "learning_rate": 9.013967484056408e-05, | |
| "loss": 0.8037, | |
| "step": 108300 | |
| }, | |
| { | |
| "epoch": 3.6513069253570465, | |
| "grad_norm": 0.6898632049560547, | |
| "learning_rate": 8.99151172190784e-05, | |
| "loss": 0.8002, | |
| "step": 108400 | |
| }, | |
| { | |
| "epoch": 3.6546752896793318, | |
| "grad_norm": 0.661747395992279, | |
| "learning_rate": 8.969055959759274e-05, | |
| "loss": 0.8035, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 3.6580436540016166, | |
| "grad_norm": 0.7051920294761658, | |
| "learning_rate": 8.946600197610706e-05, | |
| "loss": 0.801, | |
| "step": 108600 | |
| }, | |
| { | |
| "epoch": 3.661412018323902, | |
| "grad_norm": 0.6946534514427185, | |
| "learning_rate": 8.92414443546214e-05, | |
| "loss": 0.803, | |
| "step": 108700 | |
| }, | |
| { | |
| "epoch": 3.664780382646187, | |
| "grad_norm": 0.649159848690033, | |
| "learning_rate": 8.901688673313572e-05, | |
| "loss": 0.8003, | |
| "step": 108800 | |
| }, | |
| { | |
| "epoch": 3.6681487469684724, | |
| "grad_norm": 0.6959517002105713, | |
| "learning_rate": 8.879232911165003e-05, | |
| "loss": 0.8025, | |
| "step": 108900 | |
| }, | |
| { | |
| "epoch": 3.671517111290757, | |
| "grad_norm": 0.7102181315422058, | |
| "learning_rate": 8.856777149016436e-05, | |
| "loss": 0.8003, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 3.6748854756130425, | |
| "grad_norm": 0.6565383076667786, | |
| "learning_rate": 8.83432138686787e-05, | |
| "loss": 0.7994, | |
| "step": 109100 | |
| }, | |
| { | |
| "epoch": 3.6782538399353273, | |
| "grad_norm": 0.6567991375923157, | |
| "learning_rate": 8.811865624719302e-05, | |
| "loss": 0.802, | |
| "step": 109200 | |
| }, | |
| { | |
| "epoch": 3.6816222042576126, | |
| "grad_norm": 0.6707866787910461, | |
| "learning_rate": 8.789409862570736e-05, | |
| "loss": 0.7997, | |
| "step": 109300 | |
| }, | |
| { | |
| "epoch": 3.684990568579898, | |
| "grad_norm": 0.6689081192016602, | |
| "learning_rate": 8.766954100422168e-05, | |
| "loss": 0.8005, | |
| "step": 109400 | |
| }, | |
| { | |
| "epoch": 3.6883589329021826, | |
| "grad_norm": 0.6506887078285217, | |
| "learning_rate": 8.744498338273601e-05, | |
| "loss": 0.8017, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 3.691727297224468, | |
| "grad_norm": 0.6316550970077515, | |
| "learning_rate": 8.722042576125033e-05, | |
| "loss": 0.8001, | |
| "step": 109600 | |
| }, | |
| { | |
| "epoch": 3.6950956615467527, | |
| "grad_norm": 0.6964483261108398, | |
| "learning_rate": 8.699586813976467e-05, | |
| "loss": 0.8005, | |
| "step": 109700 | |
| }, | |
| { | |
| "epoch": 3.698464025869038, | |
| "grad_norm": 0.6844159364700317, | |
| "learning_rate": 8.677131051827898e-05, | |
| "loss": 0.7971, | |
| "step": 109800 | |
| }, | |
| { | |
| "epoch": 3.7018323901913233, | |
| "grad_norm": 0.5916749835014343, | |
| "learning_rate": 8.65467528967933e-05, | |
| "loss": 0.8008, | |
| "step": 109900 | |
| }, | |
| { | |
| "epoch": 3.705200754513608, | |
| "grad_norm": 0.7180382609367371, | |
| "learning_rate": 8.632219527530763e-05, | |
| "loss": 0.7967, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 3.7085691188358934, | |
| "grad_norm": 0.6756430864334106, | |
| "learning_rate": 8.609763765382196e-05, | |
| "loss": 0.8002, | |
| "step": 110100 | |
| }, | |
| { | |
| "epoch": 3.711937483158178, | |
| "grad_norm": 0.6347695589065552, | |
| "learning_rate": 8.587308003233629e-05, | |
| "loss": 0.801, | |
| "step": 110200 | |
| }, | |
| { | |
| "epoch": 3.7153058474804634, | |
| "grad_norm": 0.712821364402771, | |
| "learning_rate": 8.564852241085061e-05, | |
| "loss": 0.8, | |
| "step": 110300 | |
| }, | |
| { | |
| "epoch": 3.7186742118027487, | |
| "grad_norm": 0.7329118251800537, | |
| "learning_rate": 8.542396478936495e-05, | |
| "loss": 0.7991, | |
| "step": 110400 | |
| }, | |
| { | |
| "epoch": 3.7220425761250335, | |
| "grad_norm": 0.6399374008178711, | |
| "learning_rate": 8.519940716787927e-05, | |
| "loss": 0.802, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 3.725410940447319, | |
| "grad_norm": 0.7076860666275024, | |
| "learning_rate": 8.49748495463936e-05, | |
| "loss": 0.8004, | |
| "step": 110600 | |
| }, | |
| { | |
| "epoch": 3.7287793047696036, | |
| "grad_norm": 0.6736636161804199, | |
| "learning_rate": 8.475029192490793e-05, | |
| "loss": 0.7985, | |
| "step": 110700 | |
| }, | |
| { | |
| "epoch": 3.732147669091889, | |
| "grad_norm": 0.7174369692802429, | |
| "learning_rate": 8.452573430342225e-05, | |
| "loss": 0.7997, | |
| "step": 110800 | |
| }, | |
| { | |
| "epoch": 3.735516033414174, | |
| "grad_norm": 0.6668689250946045, | |
| "learning_rate": 8.430117668193657e-05, | |
| "loss": 0.7984, | |
| "step": 110900 | |
| }, | |
| { | |
| "epoch": 3.7388843977364594, | |
| "grad_norm": 0.6991139054298401, | |
| "learning_rate": 8.40766190604509e-05, | |
| "loss": 0.8011, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 3.7422527620587442, | |
| "grad_norm": 0.6559997797012329, | |
| "learning_rate": 8.385206143896523e-05, | |
| "loss": 0.8002, | |
| "step": 111100 | |
| }, | |
| { | |
| "epoch": 3.7456211263810295, | |
| "grad_norm": 0.6859176754951477, | |
| "learning_rate": 8.362750381747956e-05, | |
| "loss": 0.799, | |
| "step": 111200 | |
| }, | |
| { | |
| "epoch": 3.7489894907033143, | |
| "grad_norm": 0.7021101117134094, | |
| "learning_rate": 8.340294619599388e-05, | |
| "loss": 0.796, | |
| "step": 111300 | |
| }, | |
| { | |
| "epoch": 3.7523578550255996, | |
| "grad_norm": 0.6929513216018677, | |
| "learning_rate": 8.317838857450822e-05, | |
| "loss": 0.7977, | |
| "step": 111400 | |
| }, | |
| { | |
| "epoch": 3.755726219347885, | |
| "grad_norm": 0.6644559502601624, | |
| "learning_rate": 8.295383095302254e-05, | |
| "loss": 0.7994, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 3.7590945836701697, | |
| "grad_norm": 0.6280823349952698, | |
| "learning_rate": 8.272927333153687e-05, | |
| "loss": 0.7974, | |
| "step": 111600 | |
| }, | |
| { | |
| "epoch": 3.762462947992455, | |
| "grad_norm": 0.699496328830719, | |
| "learning_rate": 8.25047157100512e-05, | |
| "loss": 0.7995, | |
| "step": 111700 | |
| }, | |
| { | |
| "epoch": 3.7658313123147398, | |
| "grad_norm": 0.7624558806419373, | |
| "learning_rate": 8.228015808856552e-05, | |
| "loss": 0.8004, | |
| "step": 111800 | |
| }, | |
| { | |
| "epoch": 3.769199676637025, | |
| "grad_norm": 0.7288152575492859, | |
| "learning_rate": 8.205560046707984e-05, | |
| "loss": 0.8029, | |
| "step": 111900 | |
| }, | |
| { | |
| "epoch": 3.7725680409593103, | |
| "grad_norm": 0.6420552730560303, | |
| "learning_rate": 8.183104284559417e-05, | |
| "loss": 0.8015, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 3.775936405281595, | |
| "grad_norm": 0.6827369332313538, | |
| "learning_rate": 8.16064852241085e-05, | |
| "loss": 0.7997, | |
| "step": 112100 | |
| }, | |
| { | |
| "epoch": 3.7793047696038804, | |
| "grad_norm": 0.7149909138679504, | |
| "learning_rate": 8.138192760262283e-05, | |
| "loss": 0.8008, | |
| "step": 112200 | |
| }, | |
| { | |
| "epoch": 3.782673133926165, | |
| "grad_norm": 0.7044945955276489, | |
| "learning_rate": 8.115736998113715e-05, | |
| "loss": 0.7974, | |
| "step": 112300 | |
| }, | |
| { | |
| "epoch": 3.7860414982484505, | |
| "grad_norm": 0.7744246125221252, | |
| "learning_rate": 8.093281235965149e-05, | |
| "loss": 0.7971, | |
| "step": 112400 | |
| }, | |
| { | |
| "epoch": 3.7894098625707358, | |
| "grad_norm": 0.6370006203651428, | |
| "learning_rate": 8.070825473816581e-05, | |
| "loss": 0.7987, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 3.792778226893021, | |
| "grad_norm": 0.6973426342010498, | |
| "learning_rate": 8.048369711668015e-05, | |
| "loss": 0.7999, | |
| "step": 112600 | |
| }, | |
| { | |
| "epoch": 3.796146591215306, | |
| "grad_norm": 0.7414847612380981, | |
| "learning_rate": 8.025913949519445e-05, | |
| "loss": 0.7979, | |
| "step": 112700 | |
| }, | |
| { | |
| "epoch": 3.799514955537591, | |
| "grad_norm": 0.6913410425186157, | |
| "learning_rate": 8.003458187370878e-05, | |
| "loss": 0.796, | |
| "step": 112800 | |
| }, | |
| { | |
| "epoch": 3.802883319859876, | |
| "grad_norm": 0.6863036155700684, | |
| "learning_rate": 7.981002425222311e-05, | |
| "loss": 0.7975, | |
| "step": 112900 | |
| }, | |
| { | |
| "epoch": 3.806251684182161, | |
| "grad_norm": 0.804077684879303, | |
| "learning_rate": 7.958546663073743e-05, | |
| "loss": 0.7968, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 3.8096200485044465, | |
| "grad_norm": 1.013036847114563, | |
| "learning_rate": 7.936090900925177e-05, | |
| "loss": 0.7999, | |
| "step": 113100 | |
| }, | |
| { | |
| "epoch": 3.8129884128267313, | |
| "grad_norm": 0.7045647501945496, | |
| "learning_rate": 7.913635138776609e-05, | |
| "loss": 0.7991, | |
| "step": 113200 | |
| }, | |
| { | |
| "epoch": 3.8163567771490166, | |
| "grad_norm": 0.6505812406539917, | |
| "learning_rate": 7.891179376628042e-05, | |
| "loss": 0.8006, | |
| "step": 113300 | |
| }, | |
| { | |
| "epoch": 3.8197251414713014, | |
| "grad_norm": 0.6592190861701965, | |
| "learning_rate": 7.868723614479475e-05, | |
| "loss": 0.7991, | |
| "step": 113400 | |
| }, | |
| { | |
| "epoch": 3.8230935057935866, | |
| "grad_norm": 0.6840342879295349, | |
| "learning_rate": 7.846267852330908e-05, | |
| "loss": 0.798, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 3.826461870115872, | |
| "grad_norm": 0.6523902416229248, | |
| "learning_rate": 7.82381209018234e-05, | |
| "loss": 0.7982, | |
| "step": 113600 | |
| }, | |
| { | |
| "epoch": 3.8298302344381567, | |
| "grad_norm": 0.6831576228141785, | |
| "learning_rate": 7.801356328033772e-05, | |
| "loss": 0.7988, | |
| "step": 113700 | |
| }, | |
| { | |
| "epoch": 3.833198598760442, | |
| "grad_norm": 0.6503862738609314, | |
| "learning_rate": 7.778900565885205e-05, | |
| "loss": 0.7995, | |
| "step": 113800 | |
| }, | |
| { | |
| "epoch": 3.836566963082727, | |
| "grad_norm": 0.6413772702217102, | |
| "learning_rate": 7.756444803736638e-05, | |
| "loss": 0.7992, | |
| "step": 113900 | |
| }, | |
| { | |
| "epoch": 3.839935327405012, | |
| "grad_norm": 0.708017885684967, | |
| "learning_rate": 7.73398904158807e-05, | |
| "loss": 0.7995, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 3.8433036917272974, | |
| "grad_norm": 0.686399519443512, | |
| "learning_rate": 7.711533279439504e-05, | |
| "loss": 0.7974, | |
| "step": 114100 | |
| }, | |
| { | |
| "epoch": 3.846672056049582, | |
| "grad_norm": 0.6614728569984436, | |
| "learning_rate": 7.689077517290936e-05, | |
| "loss": 0.7972, | |
| "step": 114200 | |
| }, | |
| { | |
| "epoch": 3.8500404203718674, | |
| "grad_norm": 1.1526070833206177, | |
| "learning_rate": 7.66662175514237e-05, | |
| "loss": 0.795, | |
| "step": 114300 | |
| }, | |
| { | |
| "epoch": 3.8534087846941523, | |
| "grad_norm": 0.6846965551376343, | |
| "learning_rate": 7.644165992993802e-05, | |
| "loss": 0.799, | |
| "step": 114400 | |
| }, | |
| { | |
| "epoch": 3.8567771490164375, | |
| "grad_norm": 0.699941098690033, | |
| "learning_rate": 7.621710230845235e-05, | |
| "loss": 0.796, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 3.860145513338723, | |
| "grad_norm": 0.7409800291061401, | |
| "learning_rate": 7.599254468696666e-05, | |
| "loss": 0.7975, | |
| "step": 114600 | |
| }, | |
| { | |
| "epoch": 3.863513877661008, | |
| "grad_norm": 0.7050840854644775, | |
| "learning_rate": 7.5767987065481e-05, | |
| "loss": 0.7965, | |
| "step": 114700 | |
| }, | |
| { | |
| "epoch": 3.866882241983293, | |
| "grad_norm": 0.6288536787033081, | |
| "learning_rate": 7.554342944399532e-05, | |
| "loss": 0.7987, | |
| "step": 114800 | |
| }, | |
| { | |
| "epoch": 3.870250606305578, | |
| "grad_norm": 0.69952791929245, | |
| "learning_rate": 7.531887182250965e-05, | |
| "loss": 0.7983, | |
| "step": 114900 | |
| }, | |
| { | |
| "epoch": 3.873618970627863, | |
| "grad_norm": 0.7967835664749146, | |
| "learning_rate": 7.509431420102397e-05, | |
| "loss": 0.7992, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 3.8769873349501482, | |
| "grad_norm": 0.6878296732902527, | |
| "learning_rate": 7.486975657953831e-05, | |
| "loss": 0.7953, | |
| "step": 115100 | |
| }, | |
| { | |
| "epoch": 3.8803556992724335, | |
| "grad_norm": 0.750624418258667, | |
| "learning_rate": 7.464519895805263e-05, | |
| "loss": 0.7972, | |
| "step": 115200 | |
| }, | |
| { | |
| "epoch": 3.8837240635947183, | |
| "grad_norm": 0.6286699771881104, | |
| "learning_rate": 7.442064133656697e-05, | |
| "loss": 0.7979, | |
| "step": 115300 | |
| }, | |
| { | |
| "epoch": 3.8870924279170036, | |
| "grad_norm": 0.7046734094619751, | |
| "learning_rate": 7.419608371508129e-05, | |
| "loss": 0.7991, | |
| "step": 115400 | |
| }, | |
| { | |
| "epoch": 3.8904607922392884, | |
| "grad_norm": 0.6798041462898254, | |
| "learning_rate": 7.397152609359561e-05, | |
| "loss": 0.8019, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 3.8938291565615737, | |
| "grad_norm": 0.6814720034599304, | |
| "learning_rate": 7.374696847210994e-05, | |
| "loss": 0.7992, | |
| "step": 115600 | |
| }, | |
| { | |
| "epoch": 3.897197520883859, | |
| "grad_norm": 0.6247894167900085, | |
| "learning_rate": 7.352241085062427e-05, | |
| "loss": 0.7959, | |
| "step": 115700 | |
| }, | |
| { | |
| "epoch": 3.900565885206144, | |
| "grad_norm": 0.6836827397346497, | |
| "learning_rate": 7.329785322913859e-05, | |
| "loss": 0.7971, | |
| "step": 115800 | |
| }, | |
| { | |
| "epoch": 3.903934249528429, | |
| "grad_norm": 0.6945433616638184, | |
| "learning_rate": 7.307329560765291e-05, | |
| "loss": 0.7947, | |
| "step": 115900 | |
| }, | |
| { | |
| "epoch": 3.907302613850714, | |
| "grad_norm": 0.7244613766670227, | |
| "learning_rate": 7.284873798616724e-05, | |
| "loss": 0.7966, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 3.910670978172999, | |
| "grad_norm": 0.6739228367805481, | |
| "learning_rate": 7.262418036468158e-05, | |
| "loss": 0.7987, | |
| "step": 116100 | |
| }, | |
| { | |
| "epoch": 3.9140393424952844, | |
| "grad_norm": 0.7027273774147034, | |
| "learning_rate": 7.23996227431959e-05, | |
| "loss": 0.7966, | |
| "step": 116200 | |
| }, | |
| { | |
| "epoch": 3.9174077068175692, | |
| "grad_norm": 0.7074873447418213, | |
| "learning_rate": 7.217506512171022e-05, | |
| "loss": 0.7953, | |
| "step": 116300 | |
| }, | |
| { | |
| "epoch": 3.9207760711398545, | |
| "grad_norm": 0.6511521935462952, | |
| "learning_rate": 7.195050750022454e-05, | |
| "loss": 0.8015, | |
| "step": 116400 | |
| }, | |
| { | |
| "epoch": 3.9241444354621393, | |
| "grad_norm": 0.7246668934822083, | |
| "learning_rate": 7.172594987873888e-05, | |
| "loss": 0.7963, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 3.9275127997844246, | |
| "grad_norm": 0.6860557198524475, | |
| "learning_rate": 7.15013922572532e-05, | |
| "loss": 0.7973, | |
| "step": 116600 | |
| }, | |
| { | |
| "epoch": 3.93088116410671, | |
| "grad_norm": 0.7153899669647217, | |
| "learning_rate": 7.127683463576754e-05, | |
| "loss": 0.7959, | |
| "step": 116700 | |
| }, | |
| { | |
| "epoch": 3.934249528428995, | |
| "grad_norm": 0.6767284274101257, | |
| "learning_rate": 7.105227701428186e-05, | |
| "loss": 0.7971, | |
| "step": 116800 | |
| }, | |
| { | |
| "epoch": 3.93761789275128, | |
| "grad_norm": 0.6651219129562378, | |
| "learning_rate": 7.082771939279618e-05, | |
| "loss": 0.7959, | |
| "step": 116900 | |
| }, | |
| { | |
| "epoch": 3.940986257073565, | |
| "grad_norm": 0.6394347548484802, | |
| "learning_rate": 7.060316177131051e-05, | |
| "loss": 0.7988, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 3.94435462139585, | |
| "grad_norm": 0.6922232508659363, | |
| "learning_rate": 7.037860414982484e-05, | |
| "loss": 0.7961, | |
| "step": 117100 | |
| }, | |
| { | |
| "epoch": 3.9477229857181353, | |
| "grad_norm": 0.6803662776947021, | |
| "learning_rate": 7.015404652833917e-05, | |
| "loss": 0.7982, | |
| "step": 117200 | |
| }, | |
| { | |
| "epoch": 3.9510913500404206, | |
| "grad_norm": 0.7751142978668213, | |
| "learning_rate": 6.992948890685349e-05, | |
| "loss": 0.7968, | |
| "step": 117300 | |
| }, | |
| { | |
| "epoch": 3.9544597143627054, | |
| "grad_norm": 0.6750118136405945, | |
| "learning_rate": 6.970493128536781e-05, | |
| "loss": 0.7955, | |
| "step": 117400 | |
| }, | |
| { | |
| "epoch": 3.9578280786849906, | |
| "grad_norm": 0.6720499992370605, | |
| "learning_rate": 6.948037366388215e-05, | |
| "loss": 0.7975, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 3.9611964430072755, | |
| "grad_norm": 0.7234087586402893, | |
| "learning_rate": 6.925581604239647e-05, | |
| "loss": 0.7954, | |
| "step": 117600 | |
| }, | |
| { | |
| "epoch": 3.9645648073295607, | |
| "grad_norm": 0.6893304586410522, | |
| "learning_rate": 6.903125842091079e-05, | |
| "loss": 0.7958, | |
| "step": 117700 | |
| }, | |
| { | |
| "epoch": 3.967933171651846, | |
| "grad_norm": 0.6690725684165955, | |
| "learning_rate": 6.880670079942513e-05, | |
| "loss": 0.794, | |
| "step": 117800 | |
| }, | |
| { | |
| "epoch": 3.971301535974131, | |
| "grad_norm": 0.6747570633888245, | |
| "learning_rate": 6.858214317793945e-05, | |
| "loss": 0.7962, | |
| "step": 117900 | |
| }, | |
| { | |
| "epoch": 3.974669900296416, | |
| "grad_norm": 0.7051836848258972, | |
| "learning_rate": 6.835758555645378e-05, | |
| "loss": 0.7947, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 3.978038264618701, | |
| "grad_norm": 0.6995989084243774, | |
| "learning_rate": 6.81330279349681e-05, | |
| "loss": 0.795, | |
| "step": 118100 | |
| }, | |
| { | |
| "epoch": 3.981406628940986, | |
| "grad_norm": 0.6704210638999939, | |
| "learning_rate": 6.790847031348243e-05, | |
| "loss": 0.7944, | |
| "step": 118200 | |
| }, | |
| { | |
| "epoch": 3.9847749932632714, | |
| "grad_norm": 0.6781893372535706, | |
| "learning_rate": 6.768391269199676e-05, | |
| "loss": 0.7954, | |
| "step": 118300 | |
| }, | |
| { | |
| "epoch": 3.9881433575855567, | |
| "grad_norm": 0.6674184203147888, | |
| "learning_rate": 6.745935507051109e-05, | |
| "loss": 0.7984, | |
| "step": 118400 | |
| }, | |
| { | |
| "epoch": 3.9915117219078415, | |
| "grad_norm": 0.6545355319976807, | |
| "learning_rate": 6.723479744902542e-05, | |
| "loss": 0.7946, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 3.994880086230127, | |
| "grad_norm": 0.6851162910461426, | |
| "learning_rate": 6.701023982753974e-05, | |
| "loss": 0.7954, | |
| "step": 118600 | |
| }, | |
| { | |
| "epoch": 3.9982484505524116, | |
| "grad_norm": 0.6962630152702332, | |
| "learning_rate": 6.678568220605406e-05, | |
| "loss": 0.7949, | |
| "step": 118700 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 0.7465363144874573, | |
| "eval_runtime": 1.8135, | |
| "eval_samples_per_second": 2757.131, | |
| "eval_steps_per_second": 43.563, | |
| "step": 118752 | |
| }, | |
| { | |
| "epoch": 4.001616814874697, | |
| "grad_norm": 0.7494738101959229, | |
| "learning_rate": 6.65611245845684e-05, | |
| "loss": 0.7985, | |
| "step": 118800 | |
| }, | |
| { | |
| "epoch": 4.004985179196982, | |
| "grad_norm": 0.6624446511268616, | |
| "learning_rate": 6.633656696308272e-05, | |
| "loss": 0.7954, | |
| "step": 118900 | |
| }, | |
| { | |
| "epoch": 4.008353543519267, | |
| "grad_norm": 0.69657963514328, | |
| "learning_rate": 6.611200934159706e-05, | |
| "loss": 0.7957, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 4.011721907841552, | |
| "grad_norm": 0.706929087638855, | |
| "learning_rate": 6.588745172011138e-05, | |
| "loss": 0.7969, | |
| "step": 119100 | |
| }, | |
| { | |
| "epoch": 4.015090272163837, | |
| "grad_norm": 0.7379885911941528, | |
| "learning_rate": 6.56628940986257e-05, | |
| "loss": 0.7968, | |
| "step": 119200 | |
| }, | |
| { | |
| "epoch": 4.018458636486122, | |
| "grad_norm": 0.6987789869308472, | |
| "learning_rate": 6.543833647714002e-05, | |
| "loss": 0.7959, | |
| "step": 119300 | |
| }, | |
| { | |
| "epoch": 4.021827000808408, | |
| "grad_norm": 0.6569750308990479, | |
| "learning_rate": 6.521377885565436e-05, | |
| "loss": 0.7917, | |
| "step": 119400 | |
| }, | |
| { | |
| "epoch": 4.025195365130693, | |
| "grad_norm": 0.7094253301620483, | |
| "learning_rate": 6.498922123416869e-05, | |
| "loss": 0.7937, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 4.028563729452977, | |
| "grad_norm": 0.7103100419044495, | |
| "learning_rate": 6.476466361268301e-05, | |
| "loss": 0.7988, | |
| "step": 119600 | |
| }, | |
| { | |
| "epoch": 4.0319320937752625, | |
| "grad_norm": 0.7065451145172119, | |
| "learning_rate": 6.454010599119733e-05, | |
| "loss": 0.7905, | |
| "step": 119700 | |
| }, | |
| { | |
| "epoch": 4.035300458097548, | |
| "grad_norm": 0.690153181552887, | |
| "learning_rate": 6.431554836971166e-05, | |
| "loss": 0.7948, | |
| "step": 119800 | |
| }, | |
| { | |
| "epoch": 4.038668822419833, | |
| "grad_norm": 0.7387445569038391, | |
| "learning_rate": 6.409099074822599e-05, | |
| "loss": 0.7944, | |
| "step": 119900 | |
| }, | |
| { | |
| "epoch": 4.042037186742118, | |
| "grad_norm": 0.6826400756835938, | |
| "learning_rate": 6.386643312674031e-05, | |
| "loss": 0.7935, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 4.045405551064403, | |
| "grad_norm": 0.6468490362167358, | |
| "learning_rate": 6.364187550525463e-05, | |
| "loss": 0.796, | |
| "step": 120100 | |
| }, | |
| { | |
| "epoch": 4.048773915386688, | |
| "grad_norm": 0.6864830851554871, | |
| "learning_rate": 6.341731788376897e-05, | |
| "loss": 0.7948, | |
| "step": 120200 | |
| }, | |
| { | |
| "epoch": 4.052142279708973, | |
| "grad_norm": 0.7382389903068542, | |
| "learning_rate": 6.319276026228329e-05, | |
| "loss": 0.7943, | |
| "step": 120300 | |
| }, | |
| { | |
| "epoch": 4.0555106440312585, | |
| "grad_norm": 0.7259221076965332, | |
| "learning_rate": 6.296820264079763e-05, | |
| "loss": 0.7975, | |
| "step": 120400 | |
| }, | |
| { | |
| "epoch": 4.058879008353544, | |
| "grad_norm": 0.7078028917312622, | |
| "learning_rate": 6.274364501931195e-05, | |
| "loss": 0.7969, | |
| "step": 120500 | |
| }, | |
| { | |
| "epoch": 4.062247372675829, | |
| "grad_norm": 0.6797070503234863, | |
| "learning_rate": 6.251908739782627e-05, | |
| "loss": 0.7946, | |
| "step": 120600 | |
| }, | |
| { | |
| "epoch": 4.065615736998113, | |
| "grad_norm": 0.7093040347099304, | |
| "learning_rate": 6.22945297763406e-05, | |
| "loss": 0.7931, | |
| "step": 120700 | |
| }, | |
| { | |
| "epoch": 4.068984101320399, | |
| "grad_norm": 0.6741282939910889, | |
| "learning_rate": 6.206997215485493e-05, | |
| "loss": 0.7975, | |
| "step": 120800 | |
| }, | |
| { | |
| "epoch": 4.072352465642684, | |
| "grad_norm": 0.6645729541778564, | |
| "learning_rate": 6.184541453336926e-05, | |
| "loss": 0.7959, | |
| "step": 120900 | |
| }, | |
| { | |
| "epoch": 4.075720829964969, | |
| "grad_norm": 0.6510828137397766, | |
| "learning_rate": 6.162085691188358e-05, | |
| "loss": 0.7945, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 4.0790891942872545, | |
| "grad_norm": 0.7214908599853516, | |
| "learning_rate": 6.13962992903979e-05, | |
| "loss": 0.7934, | |
| "step": 121100 | |
| }, | |
| { | |
| "epoch": 4.082457558609539, | |
| "grad_norm": 0.7020803093910217, | |
| "learning_rate": 6.117174166891224e-05, | |
| "loss": 0.7952, | |
| "step": 121200 | |
| }, | |
| { | |
| "epoch": 4.085825922931824, | |
| "grad_norm": 0.7014907002449036, | |
| "learning_rate": 6.094718404742656e-05, | |
| "loss": 0.7932, | |
| "step": 121300 | |
| }, | |
| { | |
| "epoch": 4.089194287254109, | |
| "grad_norm": 0.6577868461608887, | |
| "learning_rate": 6.072262642594089e-05, | |
| "loss": 0.7904, | |
| "step": 121400 | |
| }, | |
| { | |
| "epoch": 4.092562651576395, | |
| "grad_norm": 0.6450611352920532, | |
| "learning_rate": 6.049806880445522e-05, | |
| "loss": 0.7917, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 4.09593101589868, | |
| "grad_norm": 0.7224249243736267, | |
| "learning_rate": 6.027351118296954e-05, | |
| "loss": 0.793, | |
| "step": 121600 | |
| }, | |
| { | |
| "epoch": 4.099299380220964, | |
| "grad_norm": 0.6740891337394714, | |
| "learning_rate": 6.004895356148387e-05, | |
| "loss": 0.7929, | |
| "step": 121700 | |
| }, | |
| { | |
| "epoch": 4.10266774454325, | |
| "grad_norm": 0.7219831347465515, | |
| "learning_rate": 5.98243959399982e-05, | |
| "loss": 0.7921, | |
| "step": 121800 | |
| }, | |
| { | |
| "epoch": 4.106036108865535, | |
| "grad_norm": 0.7117908596992493, | |
| "learning_rate": 5.9599838318512525e-05, | |
| "loss": 0.7936, | |
| "step": 121900 | |
| }, | |
| { | |
| "epoch": 4.10940447318782, | |
| "grad_norm": 0.8350685238838196, | |
| "learning_rate": 5.9375280697026854e-05, | |
| "loss": 0.7924, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 4.112772837510105, | |
| "grad_norm": 0.6902172565460205, | |
| "learning_rate": 5.9150723075541175e-05, | |
| "loss": 0.7931, | |
| "step": 122100 | |
| }, | |
| { | |
| "epoch": 4.116141201832391, | |
| "grad_norm": 0.6757128238677979, | |
| "learning_rate": 5.8926165454055504e-05, | |
| "loss": 0.7954, | |
| "step": 122200 | |
| }, | |
| { | |
| "epoch": 4.119509566154675, | |
| "grad_norm": 0.6877591013908386, | |
| "learning_rate": 5.870160783256983e-05, | |
| "loss": 0.7916, | |
| "step": 122300 | |
| }, | |
| { | |
| "epoch": 4.12287793047696, | |
| "grad_norm": 0.677144467830658, | |
| "learning_rate": 5.847705021108416e-05, | |
| "loss": 0.7951, | |
| "step": 122400 | |
| }, | |
| { | |
| "epoch": 4.1262462947992455, | |
| "grad_norm": 0.6870157122612, | |
| "learning_rate": 5.825249258959848e-05, | |
| "loss": 0.7931, | |
| "step": 122500 | |
| }, | |
| { | |
| "epoch": 4.129614659121531, | |
| "grad_norm": 0.7329301834106445, | |
| "learning_rate": 5.802793496811281e-05, | |
| "loss": 0.7947, | |
| "step": 122600 | |
| }, | |
| { | |
| "epoch": 4.132983023443816, | |
| "grad_norm": 0.7153013348579407, | |
| "learning_rate": 5.780337734662714e-05, | |
| "loss": 0.7927, | |
| "step": 122700 | |
| }, | |
| { | |
| "epoch": 4.1363513877661005, | |
| "grad_norm": 0.6681983470916748, | |
| "learning_rate": 5.757881972514147e-05, | |
| "loss": 0.7966, | |
| "step": 122800 | |
| }, | |
| { | |
| "epoch": 4.139719752088386, | |
| "grad_norm": 0.6760982275009155, | |
| "learning_rate": 5.7354262103655796e-05, | |
| "loss": 0.7932, | |
| "step": 122900 | |
| }, | |
| { | |
| "epoch": 4.143088116410671, | |
| "grad_norm": 0.6279290914535522, | |
| "learning_rate": 5.712970448217012e-05, | |
| "loss": 0.7936, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 4.146456480732956, | |
| "grad_norm": 0.6763176918029785, | |
| "learning_rate": 5.6905146860684446e-05, | |
| "loss": 0.7966, | |
| "step": 123100 | |
| }, | |
| { | |
| "epoch": 4.1498248450552415, | |
| "grad_norm": 0.6843128204345703, | |
| "learning_rate": 5.6680589239198774e-05, | |
| "loss": 0.7933, | |
| "step": 123200 | |
| }, | |
| { | |
| "epoch": 4.153193209377526, | |
| "grad_norm": 0.6466397643089294, | |
| "learning_rate": 5.64560316177131e-05, | |
| "loss": 0.7955, | |
| "step": 123300 | |
| }, | |
| { | |
| "epoch": 4.156561573699811, | |
| "grad_norm": 0.6983882188796997, | |
| "learning_rate": 5.623147399622743e-05, | |
| "loss": 0.7951, | |
| "step": 123400 | |
| }, | |
| { | |
| "epoch": 4.159929938022096, | |
| "grad_norm": 0.6982767581939697, | |
| "learning_rate": 5.600691637474175e-05, | |
| "loss": 0.7932, | |
| "step": 123500 | |
| }, | |
| { | |
| "epoch": 4.163298302344382, | |
| "grad_norm": 0.6864616870880127, | |
| "learning_rate": 5.578235875325608e-05, | |
| "loss": 0.7926, | |
| "step": 123600 | |
| }, | |
| { | |
| "epoch": 4.166666666666667, | |
| "grad_norm": 0.7273553609848022, | |
| "learning_rate": 5.555780113177041e-05, | |
| "loss": 0.7935, | |
| "step": 123700 | |
| }, | |
| { | |
| "epoch": 4.170035030988951, | |
| "grad_norm": 0.6874902248382568, | |
| "learning_rate": 5.533324351028474e-05, | |
| "loss": 0.7913, | |
| "step": 123800 | |
| }, | |
| { | |
| "epoch": 4.173403395311237, | |
| "grad_norm": 0.7229161262512207, | |
| "learning_rate": 5.5108685888799067e-05, | |
| "loss": 0.7887, | |
| "step": 123900 | |
| }, | |
| { | |
| "epoch": 4.176771759633522, | |
| "grad_norm": 0.7190577387809753, | |
| "learning_rate": 5.488412826731338e-05, | |
| "loss": 0.7908, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 4.180140123955807, | |
| "grad_norm": 0.6708100438117981, | |
| "learning_rate": 5.4659570645827717e-05, | |
| "loss": 0.7937, | |
| "step": 124100 | |
| }, | |
| { | |
| "epoch": 4.183508488278092, | |
| "grad_norm": 0.8221725821495056, | |
| "learning_rate": 5.4435013024342045e-05, | |
| "loss": 0.7923, | |
| "step": 124200 | |
| }, | |
| { | |
| "epoch": 4.186876852600378, | |
| "grad_norm": 0.7301611304283142, | |
| "learning_rate": 5.4210455402856373e-05, | |
| "loss": 0.7946, | |
| "step": 124300 | |
| }, | |
| { | |
| "epoch": 4.190245216922662, | |
| "grad_norm": 0.7508817315101624, | |
| "learning_rate": 5.39858977813707e-05, | |
| "loss": 0.7921, | |
| "step": 124400 | |
| }, | |
| { | |
| "epoch": 4.193613581244947, | |
| "grad_norm": 0.7159464359283447, | |
| "learning_rate": 5.376134015988502e-05, | |
| "loss": 0.7916, | |
| "step": 124500 | |
| }, | |
| { | |
| "epoch": 4.196981945567233, | |
| "grad_norm": 0.6572859883308411, | |
| "learning_rate": 5.3536782538399345e-05, | |
| "loss": 0.7932, | |
| "step": 124600 | |
| }, | |
| { | |
| "epoch": 4.200350309889518, | |
| "grad_norm": 0.8845502138137817, | |
| "learning_rate": 5.3312224916913674e-05, | |
| "loss": 0.7914, | |
| "step": 124700 | |
| }, | |
| { | |
| "epoch": 4.203718674211803, | |
| "grad_norm": 0.7034029364585876, | |
| "learning_rate": 5.3087667295428e-05, | |
| "loss": 0.7925, | |
| "step": 124800 | |
| }, | |
| { | |
| "epoch": 4.2070870385340875, | |
| "grad_norm": 0.66123366355896, | |
| "learning_rate": 5.2863109673942324e-05, | |
| "loss": 0.7956, | |
| "step": 124900 | |
| }, | |
| { | |
| "epoch": 4.210455402856373, | |
| "grad_norm": 0.7376932501792908, | |
| "learning_rate": 5.263855205245665e-05, | |
| "loss": 0.7891, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 4.213823767178658, | |
| "grad_norm": 0.7154867649078369, | |
| "learning_rate": 5.241399443097098e-05, | |
| "loss": 0.7921, | |
| "step": 125100 | |
| }, | |
| { | |
| "epoch": 4.217192131500943, | |
| "grad_norm": 0.6891977190971375, | |
| "learning_rate": 5.218943680948531e-05, | |
| "loss": 0.7938, | |
| "step": 125200 | |
| }, | |
| { | |
| "epoch": 4.220560495823229, | |
| "grad_norm": 0.713810384273529, | |
| "learning_rate": 5.196487918799964e-05, | |
| "loss": 0.7926, | |
| "step": 125300 | |
| }, | |
| { | |
| "epoch": 4.223928860145513, | |
| "grad_norm": 0.7049047350883484, | |
| "learning_rate": 5.174032156651396e-05, | |
| "loss": 0.7955, | |
| "step": 125400 | |
| }, | |
| { | |
| "epoch": 4.227297224467798, | |
| "grad_norm": 0.7068188786506653, | |
| "learning_rate": 5.151576394502829e-05, | |
| "loss": 0.7966, | |
| "step": 125500 | |
| }, | |
| { | |
| "epoch": 4.2306655887900835, | |
| "grad_norm": 0.7107961177825928, | |
| "learning_rate": 5.1291206323542616e-05, | |
| "loss": 0.7959, | |
| "step": 125600 | |
| }, | |
| { | |
| "epoch": 4.234033953112369, | |
| "grad_norm": 0.7235453128814697, | |
| "learning_rate": 5.1066648702056944e-05, | |
| "loss": 0.7904, | |
| "step": 125700 | |
| }, | |
| { | |
| "epoch": 4.237402317434654, | |
| "grad_norm": 0.7281498908996582, | |
| "learning_rate": 5.084209108057127e-05, | |
| "loss": 0.7916, | |
| "step": 125800 | |
| }, | |
| { | |
| "epoch": 4.240770681756938, | |
| "grad_norm": 0.726445198059082, | |
| "learning_rate": 5.0617533459085594e-05, | |
| "loss": 0.7905, | |
| "step": 125900 | |
| }, | |
| { | |
| "epoch": 4.244139046079224, | |
| "grad_norm": 0.7289313673973083, | |
| "learning_rate": 5.039297583759992e-05, | |
| "loss": 0.796, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 4.247507410401509, | |
| "grad_norm": 0.6762425899505615, | |
| "learning_rate": 5.016841821611425e-05, | |
| "loss": 0.7898, | |
| "step": 126100 | |
| }, | |
| { | |
| "epoch": 4.250875774723794, | |
| "grad_norm": 0.7091333866119385, | |
| "learning_rate": 4.994386059462858e-05, | |
| "loss": 0.7946, | |
| "step": 126200 | |
| }, | |
| { | |
| "epoch": 4.2542441390460795, | |
| "grad_norm": 0.6724031567573547, | |
| "learning_rate": 4.971930297314291e-05, | |
| "loss": 0.7931, | |
| "step": 126300 | |
| }, | |
| { | |
| "epoch": 4.257612503368365, | |
| "grad_norm": 0.7157333493232727, | |
| "learning_rate": 4.949474535165723e-05, | |
| "loss": 0.7925, | |
| "step": 126400 | |
| }, | |
| { | |
| "epoch": 4.260980867690649, | |
| "grad_norm": 0.7710690498352051, | |
| "learning_rate": 4.927018773017156e-05, | |
| "loss": 0.795, | |
| "step": 126500 | |
| }, | |
| { | |
| "epoch": 4.264349232012934, | |
| "grad_norm": 0.6310470104217529, | |
| "learning_rate": 4.9045630108685886e-05, | |
| "loss": 0.7931, | |
| "step": 126600 | |
| }, | |
| { | |
| "epoch": 4.26771759633522, | |
| "grad_norm": 0.6831243634223938, | |
| "learning_rate": 4.8821072487200215e-05, | |
| "loss": 0.7926, | |
| "step": 126700 | |
| }, | |
| { | |
| "epoch": 4.271085960657505, | |
| "grad_norm": 0.675401508808136, | |
| "learning_rate": 4.859651486571454e-05, | |
| "loss": 0.7902, | |
| "step": 126800 | |
| }, | |
| { | |
| "epoch": 4.27445432497979, | |
| "grad_norm": 0.715557873249054, | |
| "learning_rate": 4.8371957244228865e-05, | |
| "loss": 0.7932, | |
| "step": 126900 | |
| }, | |
| { | |
| "epoch": 4.2778226893020745, | |
| "grad_norm": 0.6567670702934265, | |
| "learning_rate": 4.814739962274319e-05, | |
| "loss": 0.7925, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 4.28119105362436, | |
| "grad_norm": 0.6738788485527039, | |
| "learning_rate": 4.792284200125752e-05, | |
| "loss": 0.7932, | |
| "step": 127100 | |
| }, | |
| { | |
| "epoch": 4.284559417946645, | |
| "grad_norm": 0.6749903559684753, | |
| "learning_rate": 4.769828437977185e-05, | |
| "loss": 0.792, | |
| "step": 127200 | |
| }, | |
| { | |
| "epoch": 4.28792778226893, | |
| "grad_norm": 0.6749402284622192, | |
| "learning_rate": 4.7473726758286165e-05, | |
| "loss": 0.7924, | |
| "step": 127300 | |
| }, | |
| { | |
| "epoch": 4.291296146591216, | |
| "grad_norm": 0.7213864326477051, | |
| "learning_rate": 4.724916913680049e-05, | |
| "loss": 0.7923, | |
| "step": 127400 | |
| }, | |
| { | |
| "epoch": 4.2946645109135, | |
| "grad_norm": 0.6684932112693787, | |
| "learning_rate": 4.702461151531482e-05, | |
| "loss": 0.7906, | |
| "step": 127500 | |
| }, | |
| { | |
| "epoch": 4.298032875235785, | |
| "grad_norm": 0.676459014415741, | |
| "learning_rate": 4.680005389382916e-05, | |
| "loss": 0.791, | |
| "step": 127600 | |
| }, | |
| { | |
| "epoch": 4.3014012395580705, | |
| "grad_norm": 0.6472882032394409, | |
| "learning_rate": 4.6575496272343485e-05, | |
| "loss": 0.7893, | |
| "step": 127700 | |
| }, | |
| { | |
| "epoch": 4.304769603880356, | |
| "grad_norm": 0.6770561337471008, | |
| "learning_rate": 4.63509386508578e-05, | |
| "loss": 0.7907, | |
| "step": 127800 | |
| }, | |
| { | |
| "epoch": 4.308137968202641, | |
| "grad_norm": 0.6275637149810791, | |
| "learning_rate": 4.612638102937213e-05, | |
| "loss": 0.7913, | |
| "step": 127900 | |
| }, | |
| { | |
| "epoch": 4.311506332524926, | |
| "grad_norm": 0.6540498733520508, | |
| "learning_rate": 4.590182340788646e-05, | |
| "loss": 0.793, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 4.314874696847211, | |
| "grad_norm": 0.7203840017318726, | |
| "learning_rate": 4.5677265786400785e-05, | |
| "loss": 0.7913, | |
| "step": 128100 | |
| }, | |
| { | |
| "epoch": 4.318243061169496, | |
| "grad_norm": 0.6884647607803345, | |
| "learning_rate": 4.5452708164915114e-05, | |
| "loss": 0.7928, | |
| "step": 128200 | |
| }, | |
| { | |
| "epoch": 4.321611425491781, | |
| "grad_norm": 0.8688404560089111, | |
| "learning_rate": 4.5228150543429435e-05, | |
| "loss": 0.791, | |
| "step": 128300 | |
| }, | |
| { | |
| "epoch": 4.3249797898140665, | |
| "grad_norm": 0.6714907884597778, | |
| "learning_rate": 4.5003592921943764e-05, | |
| "loss": 0.7909, | |
| "step": 128400 | |
| }, | |
| { | |
| "epoch": 4.328348154136352, | |
| "grad_norm": 0.6698452830314636, | |
| "learning_rate": 4.477903530045809e-05, | |
| "loss": 0.7934, | |
| "step": 128500 | |
| }, | |
| { | |
| "epoch": 4.331716518458636, | |
| "grad_norm": 0.6693661212921143, | |
| "learning_rate": 4.455447767897242e-05, | |
| "loss": 0.7917, | |
| "step": 128600 | |
| }, | |
| { | |
| "epoch": 4.335084882780921, | |
| "grad_norm": 0.6978726983070374, | |
| "learning_rate": 4.432992005748675e-05, | |
| "loss": 0.7903, | |
| "step": 128700 | |
| }, | |
| { | |
| "epoch": 4.338453247103207, | |
| "grad_norm": 0.7052512168884277, | |
| "learning_rate": 4.410536243600107e-05, | |
| "loss": 0.7909, | |
| "step": 128800 | |
| }, | |
| { | |
| "epoch": 4.341821611425492, | |
| "grad_norm": 0.6648239493370056, | |
| "learning_rate": 4.38808048145154e-05, | |
| "loss": 0.793, | |
| "step": 128900 | |
| }, | |
| { | |
| "epoch": 4.345189975747777, | |
| "grad_norm": 0.6616061329841614, | |
| "learning_rate": 4.365624719302973e-05, | |
| "loss": 0.7915, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 4.348558340070062, | |
| "grad_norm": 0.7084757089614868, | |
| "learning_rate": 4.3431689571544056e-05, | |
| "loss": 0.7918, | |
| "step": 129100 | |
| }, | |
| { | |
| "epoch": 4.351926704392347, | |
| "grad_norm": 0.6875206828117371, | |
| "learning_rate": 4.3207131950058384e-05, | |
| "loss": 0.7893, | |
| "step": 129200 | |
| }, | |
| { | |
| "epoch": 4.355295068714632, | |
| "grad_norm": 0.6854655742645264, | |
| "learning_rate": 4.2982574328572706e-05, | |
| "loss": 0.7859, | |
| "step": 129300 | |
| }, | |
| { | |
| "epoch": 4.358663433036917, | |
| "grad_norm": 0.7505910992622375, | |
| "learning_rate": 4.2758016707087034e-05, | |
| "loss": 0.7934, | |
| "step": 129400 | |
| }, | |
| { | |
| "epoch": 4.362031797359203, | |
| "grad_norm": 0.6831994652748108, | |
| "learning_rate": 4.253345908560136e-05, | |
| "loss": 0.7941, | |
| "step": 129500 | |
| }, | |
| { | |
| "epoch": 4.365400161681487, | |
| "grad_norm": 0.7224695086479187, | |
| "learning_rate": 4.230890146411569e-05, | |
| "loss": 0.7907, | |
| "step": 129600 | |
| }, | |
| { | |
| "epoch": 4.368768526003772, | |
| "grad_norm": 0.7102277874946594, | |
| "learning_rate": 4.208434384263001e-05, | |
| "loss": 0.7926, | |
| "step": 129700 | |
| }, | |
| { | |
| "epoch": 4.372136890326058, | |
| "grad_norm": 0.6760639548301697, | |
| "learning_rate": 4.185978622114434e-05, | |
| "loss": 0.7949, | |
| "step": 129800 | |
| }, | |
| { | |
| "epoch": 4.375505254648343, | |
| "grad_norm": 0.6891269087791443, | |
| "learning_rate": 4.163522859965867e-05, | |
| "loss": 0.7901, | |
| "step": 129900 | |
| }, | |
| { | |
| "epoch": 4.378873618970628, | |
| "grad_norm": 0.6708014607429504, | |
| "learning_rate": 4.1410670978173e-05, | |
| "loss": 0.7894, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 4.382241983292913, | |
| "grad_norm": 0.7219834327697754, | |
| "learning_rate": 4.1186113356687327e-05, | |
| "loss": 0.7917, | |
| "step": 130100 | |
| }, | |
| { | |
| "epoch": 4.385610347615198, | |
| "grad_norm": 0.6797767281532288, | |
| "learning_rate": 4.096155573520165e-05, | |
| "loss": 0.79, | |
| "step": 130200 | |
| }, | |
| { | |
| "epoch": 4.388978711937483, | |
| "grad_norm": 1.1183526515960693, | |
| "learning_rate": 4.073699811371598e-05, | |
| "loss": 0.7903, | |
| "step": 130300 | |
| }, | |
| { | |
| "epoch": 4.392347076259768, | |
| "grad_norm": 0.6638786792755127, | |
| "learning_rate": 4.0512440492230305e-05, | |
| "loss": 0.7929, | |
| "step": 130400 | |
| }, | |
| { | |
| "epoch": 4.3957154405820535, | |
| "grad_norm": 0.6631746888160706, | |
| "learning_rate": 4.0287882870744633e-05, | |
| "loss": 0.7895, | |
| "step": 130500 | |
| }, | |
| { | |
| "epoch": 4.399083804904339, | |
| "grad_norm": 0.6432344317436218, | |
| "learning_rate": 4.006332524925896e-05, | |
| "loss": 0.793, | |
| "step": 130600 | |
| }, | |
| { | |
| "epoch": 4.402452169226623, | |
| "grad_norm": 0.6888932585716248, | |
| "learning_rate": 3.983876762777328e-05, | |
| "loss": 0.7863, | |
| "step": 130700 | |
| }, | |
| { | |
| "epoch": 4.4058205335489085, | |
| "grad_norm": 0.6781389713287354, | |
| "learning_rate": 3.9614210006287605e-05, | |
| "loss": 0.7894, | |
| "step": 130800 | |
| }, | |
| { | |
| "epoch": 4.409188897871194, | |
| "grad_norm": 0.6817391514778137, | |
| "learning_rate": 3.9389652384801934e-05, | |
| "loss": 0.7915, | |
| "step": 130900 | |
| }, | |
| { | |
| "epoch": 4.412557262193479, | |
| "grad_norm": 0.7040595412254333, | |
| "learning_rate": 3.916509476331627e-05, | |
| "loss": 0.7898, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 4.415925626515764, | |
| "grad_norm": 0.6829173564910889, | |
| "learning_rate": 3.89405371418306e-05, | |
| "loss": 0.7911, | |
| "step": 131100 | |
| }, | |
| { | |
| "epoch": 4.419293990838049, | |
| "grad_norm": 0.7113239765167236, | |
| "learning_rate": 3.871597952034491e-05, | |
| "loss": 0.7871, | |
| "step": 131200 | |
| }, | |
| { | |
| "epoch": 4.422662355160334, | |
| "grad_norm": 0.6862262487411499, | |
| "learning_rate": 3.849142189885924e-05, | |
| "loss": 0.7915, | |
| "step": 131300 | |
| }, | |
| { | |
| "epoch": 4.426030719482619, | |
| "grad_norm": 0.6750577092170715, | |
| "learning_rate": 3.826686427737357e-05, | |
| "loss": 0.7925, | |
| "step": 131400 | |
| }, | |
| { | |
| "epoch": 4.429399083804904, | |
| "grad_norm": 0.7142935395240784, | |
| "learning_rate": 3.80423066558879e-05, | |
| "loss": 0.7906, | |
| "step": 131500 | |
| }, | |
| { | |
| "epoch": 4.43276744812719, | |
| "grad_norm": 0.7272329330444336, | |
| "learning_rate": 3.7817749034402226e-05, | |
| "loss": 0.7898, | |
| "step": 131600 | |
| }, | |
| { | |
| "epoch": 4.436135812449475, | |
| "grad_norm": 0.7627023458480835, | |
| "learning_rate": 3.759319141291655e-05, | |
| "loss": 0.7907, | |
| "step": 131700 | |
| }, | |
| { | |
| "epoch": 4.439504176771759, | |
| "grad_norm": 0.6552876830101013, | |
| "learning_rate": 3.7368633791430876e-05, | |
| "loss": 0.7901, | |
| "step": 131800 | |
| }, | |
| { | |
| "epoch": 4.442872541094045, | |
| "grad_norm": 0.7480065822601318, | |
| "learning_rate": 3.7144076169945204e-05, | |
| "loss": 0.7917, | |
| "step": 131900 | |
| }, | |
| { | |
| "epoch": 4.44624090541633, | |
| "grad_norm": 0.7101287841796875, | |
| "learning_rate": 3.691951854845953e-05, | |
| "loss": 0.7909, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 4.449609269738615, | |
| "grad_norm": 0.7063937187194824, | |
| "learning_rate": 3.669496092697386e-05, | |
| "loss": 0.7897, | |
| "step": 132100 | |
| }, | |
| { | |
| "epoch": 4.4529776340609, | |
| "grad_norm": 0.7050164341926575, | |
| "learning_rate": 3.647040330548819e-05, | |
| "loss": 0.7924, | |
| "step": 132200 | |
| }, | |
| { | |
| "epoch": 4.456345998383185, | |
| "grad_norm": 0.7028830051422119, | |
| "learning_rate": 3.624584568400251e-05, | |
| "loss": 0.7894, | |
| "step": 132300 | |
| }, | |
| { | |
| "epoch": 4.45971436270547, | |
| "grad_norm": 0.7394464015960693, | |
| "learning_rate": 3.602128806251684e-05, | |
| "loss": 0.7916, | |
| "step": 132400 | |
| }, | |
| { | |
| "epoch": 4.463082727027755, | |
| "grad_norm": 0.6921053528785706, | |
| "learning_rate": 3.579673044103116e-05, | |
| "loss": 0.7888, | |
| "step": 132500 | |
| }, | |
| { | |
| "epoch": 4.466451091350041, | |
| "grad_norm": 0.6709455847740173, | |
| "learning_rate": 3.557217281954549e-05, | |
| "loss": 0.7915, | |
| "step": 132600 | |
| }, | |
| { | |
| "epoch": 4.469819455672326, | |
| "grad_norm": 0.7549142241477966, | |
| "learning_rate": 3.534761519805982e-05, | |
| "loss": 0.7925, | |
| "step": 132700 | |
| }, | |
| { | |
| "epoch": 4.47318781999461, | |
| "grad_norm": 0.716698944568634, | |
| "learning_rate": 3.5123057576574146e-05, | |
| "loss": 0.7893, | |
| "step": 132800 | |
| }, | |
| { | |
| "epoch": 4.4765561843168955, | |
| "grad_norm": 0.6615895628929138, | |
| "learning_rate": 3.4898499955088475e-05, | |
| "loss": 0.7864, | |
| "step": 132900 | |
| }, | |
| { | |
| "epoch": 4.479924548639181, | |
| "grad_norm": 0.8180909156799316, | |
| "learning_rate": 3.4673942333602796e-05, | |
| "loss": 0.7902, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 4.483292912961466, | |
| "grad_norm": 0.7339180111885071, | |
| "learning_rate": 3.4449384712117125e-05, | |
| "loss": 0.789, | |
| "step": 133100 | |
| }, | |
| { | |
| "epoch": 4.486661277283751, | |
| "grad_norm": 0.921172559261322, | |
| "learning_rate": 3.422482709063145e-05, | |
| "loss": 0.7876, | |
| "step": 133200 | |
| }, | |
| { | |
| "epoch": 4.490029641606036, | |
| "grad_norm": 0.7154456973075867, | |
| "learning_rate": 3.400026946914578e-05, | |
| "loss": 0.7911, | |
| "step": 133300 | |
| }, | |
| { | |
| "epoch": 4.493398005928321, | |
| "grad_norm": 0.9519571661949158, | |
| "learning_rate": 3.377571184766011e-05, | |
| "loss": 0.7895, | |
| "step": 133400 | |
| }, | |
| { | |
| "epoch": 4.496766370250606, | |
| "grad_norm": 0.7096812725067139, | |
| "learning_rate": 3.355115422617443e-05, | |
| "loss": 0.7892, | |
| "step": 133500 | |
| }, | |
| { | |
| "epoch": 4.5001347345728915, | |
| "grad_norm": 0.6969826817512512, | |
| "learning_rate": 3.332659660468876e-05, | |
| "loss": 0.7913, | |
| "step": 133600 | |
| }, | |
| { | |
| "epoch": 4.503503098895177, | |
| "grad_norm": 0.6900309920310974, | |
| "learning_rate": 3.310203898320309e-05, | |
| "loss": 0.7909, | |
| "step": 133700 | |
| }, | |
| { | |
| "epoch": 4.506871463217461, | |
| "grad_norm": 0.7131757736206055, | |
| "learning_rate": 3.287748136171742e-05, | |
| "loss": 0.7932, | |
| "step": 133800 | |
| }, | |
| { | |
| "epoch": 4.510239827539746, | |
| "grad_norm": 0.6708106398582458, | |
| "learning_rate": 3.265292374023174e-05, | |
| "loss": 0.7889, | |
| "step": 133900 | |
| }, | |
| { | |
| "epoch": 4.513608191862032, | |
| "grad_norm": 0.7623175382614136, | |
| "learning_rate": 3.242836611874607e-05, | |
| "loss": 0.7906, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 4.516976556184317, | |
| "grad_norm": 0.7173711657524109, | |
| "learning_rate": 3.2203808497260395e-05, | |
| "loss": 0.7869, | |
| "step": 134100 | |
| }, | |
| { | |
| "epoch": 4.520344920506602, | |
| "grad_norm": 0.7341943979263306, | |
| "learning_rate": 3.197925087577472e-05, | |
| "loss": 0.7897, | |
| "step": 134200 | |
| }, | |
| { | |
| "epoch": 4.5237132848288875, | |
| "grad_norm": 0.7385048866271973, | |
| "learning_rate": 3.1754693254289045e-05, | |
| "loss": 0.7884, | |
| "step": 134300 | |
| }, | |
| { | |
| "epoch": 4.527081649151172, | |
| "grad_norm": 0.7369481921195984, | |
| "learning_rate": 3.1530135632803374e-05, | |
| "loss": 0.7922, | |
| "step": 134400 | |
| }, | |
| { | |
| "epoch": 4.530450013473457, | |
| "grad_norm": 0.7261357307434082, | |
| "learning_rate": 3.13055780113177e-05, | |
| "loss": 0.7896, | |
| "step": 134500 | |
| }, | |
| { | |
| "epoch": 4.533818377795742, | |
| "grad_norm": 0.7368954420089722, | |
| "learning_rate": 3.108102038983203e-05, | |
| "loss": 0.7877, | |
| "step": 134600 | |
| }, | |
| { | |
| "epoch": 4.537186742118028, | |
| "grad_norm": 0.6391497850418091, | |
| "learning_rate": 3.085646276834635e-05, | |
| "loss": 0.7883, | |
| "step": 134700 | |
| }, | |
| { | |
| "epoch": 4.540555106440313, | |
| "grad_norm": 0.6790878176689148, | |
| "learning_rate": 3.063190514686068e-05, | |
| "loss": 0.7878, | |
| "step": 134800 | |
| }, | |
| { | |
| "epoch": 4.543923470762598, | |
| "grad_norm": 0.6800740361213684, | |
| "learning_rate": 3.0407347525375006e-05, | |
| "loss": 0.7879, | |
| "step": 134900 | |
| }, | |
| { | |
| "epoch": 4.5472918350848825, | |
| "grad_norm": 0.6817995309829712, | |
| "learning_rate": 3.0182789903889334e-05, | |
| "loss": 0.7893, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 4.550660199407168, | |
| "grad_norm": 0.706876814365387, | |
| "learning_rate": 2.995823228240366e-05, | |
| "loss": 0.7877, | |
| "step": 135100 | |
| }, | |
| { | |
| "epoch": 4.554028563729453, | |
| "grad_norm": 0.6842048168182373, | |
| "learning_rate": 2.9733674660917988e-05, | |
| "loss": 0.7887, | |
| "step": 135200 | |
| }, | |
| { | |
| "epoch": 4.557396928051738, | |
| "grad_norm": 0.8431084752082825, | |
| "learning_rate": 2.9509117039432316e-05, | |
| "loss": 0.7895, | |
| "step": 135300 | |
| }, | |
| { | |
| "epoch": 4.560765292374024, | |
| "grad_norm": 0.6965360045433044, | |
| "learning_rate": 2.928455941794664e-05, | |
| "loss": 0.7857, | |
| "step": 135400 | |
| }, | |
| { | |
| "epoch": 4.564133656696308, | |
| "grad_norm": 0.675323486328125, | |
| "learning_rate": 2.906000179646097e-05, | |
| "loss": 0.7917, | |
| "step": 135500 | |
| }, | |
| { | |
| "epoch": 4.567502021018593, | |
| "grad_norm": 0.6971179842948914, | |
| "learning_rate": 2.8835444174975295e-05, | |
| "loss": 0.791, | |
| "step": 135600 | |
| }, | |
| { | |
| "epoch": 4.5708703853408785, | |
| "grad_norm": 0.7096483111381531, | |
| "learning_rate": 2.8610886553489623e-05, | |
| "loss": 0.7886, | |
| "step": 135700 | |
| }, | |
| { | |
| "epoch": 4.574238749663164, | |
| "grad_norm": 0.6643834710121155, | |
| "learning_rate": 2.838632893200395e-05, | |
| "loss": 0.7903, | |
| "step": 135800 | |
| }, | |
| { | |
| "epoch": 4.577607113985449, | |
| "grad_norm": 0.6781139373779297, | |
| "learning_rate": 2.8161771310518276e-05, | |
| "loss": 0.7861, | |
| "step": 135900 | |
| }, | |
| { | |
| "epoch": 4.580975478307733, | |
| "grad_norm": 0.69442218542099, | |
| "learning_rate": 2.7937213689032605e-05, | |
| "loss": 0.7884, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 4.584343842630019, | |
| "grad_norm": 0.6849011778831482, | |
| "learning_rate": 2.771265606754693e-05, | |
| "loss": 0.7892, | |
| "step": 136100 | |
| }, | |
| { | |
| "epoch": 4.587712206952304, | |
| "grad_norm": 0.7133069634437561, | |
| "learning_rate": 2.7488098446061258e-05, | |
| "loss": 0.7904, | |
| "step": 136200 | |
| }, | |
| { | |
| "epoch": 4.591080571274589, | |
| "grad_norm": 0.7414596080780029, | |
| "learning_rate": 2.7263540824575583e-05, | |
| "loss": 0.7933, | |
| "step": 136300 | |
| }, | |
| { | |
| "epoch": 4.5944489355968745, | |
| "grad_norm": 0.7594481706619263, | |
| "learning_rate": 2.703898320308991e-05, | |
| "loss": 0.7901, | |
| "step": 136400 | |
| }, | |
| { | |
| "epoch": 4.597817299919159, | |
| "grad_norm": 0.7436200976371765, | |
| "learning_rate": 2.681442558160424e-05, | |
| "loss": 0.7863, | |
| "step": 136500 | |
| }, | |
| { | |
| "epoch": 4.601185664241444, | |
| "grad_norm": 0.7052866220474243, | |
| "learning_rate": 2.6589867960118562e-05, | |
| "loss": 0.7899, | |
| "step": 136600 | |
| }, | |
| { | |
| "epoch": 4.604554028563729, | |
| "grad_norm": 0.720867395401001, | |
| "learning_rate": 2.636531033863289e-05, | |
| "loss": 0.786, | |
| "step": 136700 | |
| }, | |
| { | |
| "epoch": 4.607922392886015, | |
| "grad_norm": 0.7140068411827087, | |
| "learning_rate": 2.6140752717147215e-05, | |
| "loss": 0.7886, | |
| "step": 136800 | |
| }, | |
| { | |
| "epoch": 4.6112907572083, | |
| "grad_norm": 0.6707795858383179, | |
| "learning_rate": 2.5916195095661544e-05, | |
| "loss": 0.792, | |
| "step": 136900 | |
| }, | |
| { | |
| "epoch": 4.614659121530584, | |
| "grad_norm": 0.6954900622367859, | |
| "learning_rate": 2.5691637474175872e-05, | |
| "loss": 0.785, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 4.61802748585287, | |
| "grad_norm": 0.695578396320343, | |
| "learning_rate": 2.5467079852690197e-05, | |
| "loss": 0.7897, | |
| "step": 137100 | |
| }, | |
| { | |
| "epoch": 4.621395850175155, | |
| "grad_norm": 0.8920716047286987, | |
| "learning_rate": 2.5242522231204525e-05, | |
| "loss": 0.7912, | |
| "step": 137200 | |
| }, | |
| { | |
| "epoch": 4.62476421449744, | |
| "grad_norm": 0.6703862547874451, | |
| "learning_rate": 2.501796460971885e-05, | |
| "loss": 0.79, | |
| "step": 137300 | |
| }, | |
| { | |
| "epoch": 4.628132578819725, | |
| "grad_norm": 0.7049610018730164, | |
| "learning_rate": 2.479340698823318e-05, | |
| "loss": 0.7876, | |
| "step": 137400 | |
| }, | |
| { | |
| "epoch": 4.63150094314201, | |
| "grad_norm": 0.7894124388694763, | |
| "learning_rate": 2.4568849366747504e-05, | |
| "loss": 0.7897, | |
| "step": 137500 | |
| }, | |
| { | |
| "epoch": 4.634869307464295, | |
| "grad_norm": 2.398905038833618, | |
| "learning_rate": 2.4344291745261832e-05, | |
| "loss": 0.7909, | |
| "step": 137600 | |
| }, | |
| { | |
| "epoch": 4.63823767178658, | |
| "grad_norm": 0.6975995302200317, | |
| "learning_rate": 2.411973412377616e-05, | |
| "loss": 0.789, | |
| "step": 137700 | |
| }, | |
| { | |
| "epoch": 4.641606036108866, | |
| "grad_norm": 0.717632532119751, | |
| "learning_rate": 2.3895176502290486e-05, | |
| "loss": 0.7883, | |
| "step": 137800 | |
| }, | |
| { | |
| "epoch": 4.644974400431151, | |
| "grad_norm": 0.7279338240623474, | |
| "learning_rate": 2.3670618880804814e-05, | |
| "loss": 0.787, | |
| "step": 137900 | |
| }, | |
| { | |
| "epoch": 4.648342764753436, | |
| "grad_norm": 0.6684018969535828, | |
| "learning_rate": 2.344606125931914e-05, | |
| "loss": 0.7875, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 4.6517111290757205, | |
| "grad_norm": 0.7201947569847107, | |
| "learning_rate": 2.3221503637833468e-05, | |
| "loss": 0.791, | |
| "step": 138100 | |
| }, | |
| { | |
| "epoch": 4.655079493398006, | |
| "grad_norm": 0.71978360414505, | |
| "learning_rate": 2.2996946016347796e-05, | |
| "loss": 0.7871, | |
| "step": 138200 | |
| }, | |
| { | |
| "epoch": 4.658447857720291, | |
| "grad_norm": 0.6821112632751465, | |
| "learning_rate": 2.2772388394862118e-05, | |
| "loss": 0.7888, | |
| "step": 138300 | |
| }, | |
| { | |
| "epoch": 4.661816222042576, | |
| "grad_norm": 0.7123835682868958, | |
| "learning_rate": 2.2547830773376446e-05, | |
| "loss": 0.7879, | |
| "step": 138400 | |
| }, | |
| { | |
| "epoch": 4.6651845863648616, | |
| "grad_norm": 0.6627094745635986, | |
| "learning_rate": 2.232327315189077e-05, | |
| "loss": 0.7901, | |
| "step": 138500 | |
| }, | |
| { | |
| "epoch": 4.668552950687147, | |
| "grad_norm": 0.6200422644615173, | |
| "learning_rate": 2.20987155304051e-05, | |
| "loss": 0.791, | |
| "step": 138600 | |
| }, | |
| { | |
| "epoch": 4.671921315009431, | |
| "grad_norm": 0.7372182011604309, | |
| "learning_rate": 2.1874157908919425e-05, | |
| "loss": 0.7846, | |
| "step": 138700 | |
| }, | |
| { | |
| "epoch": 4.6752896793317165, | |
| "grad_norm": 0.7178613543510437, | |
| "learning_rate": 2.1649600287433753e-05, | |
| "loss": 0.7903, | |
| "step": 138800 | |
| }, | |
| { | |
| "epoch": 4.678658043654002, | |
| "grad_norm": 0.7218438386917114, | |
| "learning_rate": 2.142504266594808e-05, | |
| "loss": 0.7881, | |
| "step": 138900 | |
| }, | |
| { | |
| "epoch": 4.682026407976287, | |
| "grad_norm": 0.7254891395568848, | |
| "learning_rate": 2.1200485044462406e-05, | |
| "loss": 0.7882, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 4.685394772298572, | |
| "grad_norm": 0.6931572556495667, | |
| "learning_rate": 2.0975927422976735e-05, | |
| "loss": 0.7878, | |
| "step": 139100 | |
| }, | |
| { | |
| "epoch": 4.688763136620857, | |
| "grad_norm": 0.6938092708587646, | |
| "learning_rate": 2.075136980149106e-05, | |
| "loss": 0.7859, | |
| "step": 139200 | |
| }, | |
| { | |
| "epoch": 4.692131500943142, | |
| "grad_norm": 0.7220520973205566, | |
| "learning_rate": 2.0526812180005388e-05, | |
| "loss": 0.7874, | |
| "step": 139300 | |
| }, | |
| { | |
| "epoch": 4.695499865265427, | |
| "grad_norm": 0.7422808408737183, | |
| "learning_rate": 2.0302254558519717e-05, | |
| "loss": 0.7861, | |
| "step": 139400 | |
| }, | |
| { | |
| "epoch": 4.698868229587712, | |
| "grad_norm": 0.6828125715255737, | |
| "learning_rate": 2.007769693703404e-05, | |
| "loss": 0.7864, | |
| "step": 139500 | |
| }, | |
| { | |
| "epoch": 4.702236593909998, | |
| "grad_norm": 0.7483955025672913, | |
| "learning_rate": 1.985313931554837e-05, | |
| "loss": 0.7847, | |
| "step": 139600 | |
| }, | |
| { | |
| "epoch": 4.705604958232282, | |
| "grad_norm": 0.7267476916313171, | |
| "learning_rate": 1.9628581694062695e-05, | |
| "loss": 0.788, | |
| "step": 139700 | |
| }, | |
| { | |
| "epoch": 4.708973322554567, | |
| "grad_norm": 0.6834740042686462, | |
| "learning_rate": 1.9404024072577024e-05, | |
| "loss": 0.7845, | |
| "step": 139800 | |
| }, | |
| { | |
| "epoch": 4.712341686876853, | |
| "grad_norm": 0.7835246324539185, | |
| "learning_rate": 1.9179466451091345e-05, | |
| "loss": 0.7856, | |
| "step": 139900 | |
| }, | |
| { | |
| "epoch": 4.715710051199138, | |
| "grad_norm": 0.6620598435401917, | |
| "learning_rate": 1.8954908829605674e-05, | |
| "loss": 0.7915, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 4.719078415521423, | |
| "grad_norm": 0.7632565498352051, | |
| "learning_rate": 1.8730351208120002e-05, | |
| "loss": 0.7878, | |
| "step": 140100 | |
| }, | |
| { | |
| "epoch": 4.7224467798437075, | |
| "grad_norm": 0.6834028363227844, | |
| "learning_rate": 1.8505793586634327e-05, | |
| "loss": 0.7852, | |
| "step": 140200 | |
| }, | |
| { | |
| "epoch": 4.725815144165993, | |
| "grad_norm": 0.8396201729774475, | |
| "learning_rate": 1.8281235965148655e-05, | |
| "loss": 0.7893, | |
| "step": 140300 | |
| }, | |
| { | |
| "epoch": 4.729183508488278, | |
| "grad_norm": 0.7341758608818054, | |
| "learning_rate": 1.8056678343662984e-05, | |
| "loss": 0.7892, | |
| "step": 140400 | |
| }, | |
| { | |
| "epoch": 4.732551872810563, | |
| "grad_norm": 0.74070143699646, | |
| "learning_rate": 1.783212072217731e-05, | |
| "loss": 0.7864, | |
| "step": 140500 | |
| }, | |
| { | |
| "epoch": 4.735920237132849, | |
| "grad_norm": 0.7226638793945312, | |
| "learning_rate": 1.7607563100691637e-05, | |
| "loss": 0.7861, | |
| "step": 140600 | |
| }, | |
| { | |
| "epoch": 4.739288601455133, | |
| "grad_norm": 0.6856757402420044, | |
| "learning_rate": 1.7383005479205962e-05, | |
| "loss": 0.7878, | |
| "step": 140700 | |
| }, | |
| { | |
| "epoch": 4.742656965777418, | |
| "grad_norm": 0.695364773273468, | |
| "learning_rate": 1.7158447857720287e-05, | |
| "loss": 0.788, | |
| "step": 140800 | |
| }, | |
| { | |
| "epoch": 4.7460253300997035, | |
| "grad_norm": 0.7357635498046875, | |
| "learning_rate": 1.6933890236234616e-05, | |
| "loss": 0.7853, | |
| "step": 140900 | |
| }, | |
| { | |
| "epoch": 4.749393694421989, | |
| "grad_norm": 0.6697304248809814, | |
| "learning_rate": 1.6709332614748944e-05, | |
| "loss": 0.7868, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 4.752762058744274, | |
| "grad_norm": 0.719422459602356, | |
| "learning_rate": 1.648477499326327e-05, | |
| "loss": 0.7887, | |
| "step": 141100 | |
| }, | |
| { | |
| "epoch": 4.756130423066558, | |
| "grad_norm": 0.650025486946106, | |
| "learning_rate": 1.6260217371777598e-05, | |
| "loss": 0.7901, | |
| "step": 141200 | |
| }, | |
| { | |
| "epoch": 4.759498787388844, | |
| "grad_norm": 0.6734464168548584, | |
| "learning_rate": 1.6035659750291923e-05, | |
| "loss": 0.7885, | |
| "step": 141300 | |
| }, | |
| { | |
| "epoch": 4.762867151711129, | |
| "grad_norm": 0.6939591765403748, | |
| "learning_rate": 1.581110212880625e-05, | |
| "loss": 0.7896, | |
| "step": 141400 | |
| }, | |
| { | |
| "epoch": 4.766235516033414, | |
| "grad_norm": 0.6683725118637085, | |
| "learning_rate": 1.558654450732058e-05, | |
| "loss": 0.7909, | |
| "step": 141500 | |
| }, | |
| { | |
| "epoch": 4.7696038803556995, | |
| "grad_norm": 0.7492696642875671, | |
| "learning_rate": 1.5361986885834905e-05, | |
| "loss": 0.7856, | |
| "step": 141600 | |
| }, | |
| { | |
| "epoch": 4.772972244677985, | |
| "grad_norm": 0.6370770931243896, | |
| "learning_rate": 1.5137429264349231e-05, | |
| "loss": 0.7858, | |
| "step": 141700 | |
| }, | |
| { | |
| "epoch": 4.776340609000269, | |
| "grad_norm": 0.7032959461212158, | |
| "learning_rate": 1.4912871642863558e-05, | |
| "loss": 0.7833, | |
| "step": 141800 | |
| }, | |
| { | |
| "epoch": 4.779708973322554, | |
| "grad_norm": 0.6765040159225464, | |
| "learning_rate": 1.4688314021377885e-05, | |
| "loss": 0.7861, | |
| "step": 141900 | |
| }, | |
| { | |
| "epoch": 4.78307733764484, | |
| "grad_norm": 0.6935933232307434, | |
| "learning_rate": 1.446375639989221e-05, | |
| "loss": 0.7834, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 4.786445701967125, | |
| "grad_norm": 0.716052770614624, | |
| "learning_rate": 1.4239198778406538e-05, | |
| "loss": 0.7857, | |
| "step": 142100 | |
| }, | |
| { | |
| "epoch": 4.78981406628941, | |
| "grad_norm": 0.7378547787666321, | |
| "learning_rate": 1.4014641156920865e-05, | |
| "loss": 0.7849, | |
| "step": 142200 | |
| }, | |
| { | |
| "epoch": 4.793182430611695, | |
| "grad_norm": 0.7271299958229065, | |
| "learning_rate": 1.3790083535435192e-05, | |
| "loss": 0.7877, | |
| "step": 142300 | |
| }, | |
| { | |
| "epoch": 4.79655079493398, | |
| "grad_norm": 0.6609006524085999, | |
| "learning_rate": 1.3565525913949518e-05, | |
| "loss": 0.7857, | |
| "step": 142400 | |
| }, | |
| { | |
| "epoch": 4.799919159256265, | |
| "grad_norm": 0.6987965703010559, | |
| "learning_rate": 1.3340968292463845e-05, | |
| "loss": 0.7848, | |
| "step": 142500 | |
| }, | |
| { | |
| "epoch": 4.80328752357855, | |
| "grad_norm": 0.6643743515014648, | |
| "learning_rate": 1.3116410670978172e-05, | |
| "loss": 0.7872, | |
| "step": 142600 | |
| }, | |
| { | |
| "epoch": 4.806655887900836, | |
| "grad_norm": 0.642590343952179, | |
| "learning_rate": 1.28918530494925e-05, | |
| "loss": 0.7858, | |
| "step": 142700 | |
| }, | |
| { | |
| "epoch": 4.810024252223121, | |
| "grad_norm": 0.6913971304893494, | |
| "learning_rate": 1.2667295428006827e-05, | |
| "loss": 0.7862, | |
| "step": 142800 | |
| }, | |
| { | |
| "epoch": 4.813392616545405, | |
| "grad_norm": 0.7790234684944153, | |
| "learning_rate": 1.2442737806521152e-05, | |
| "loss": 0.7869, | |
| "step": 142900 | |
| }, | |
| { | |
| "epoch": 4.8167609808676906, | |
| "grad_norm": 0.6860402226448059, | |
| "learning_rate": 1.2218180185035479e-05, | |
| "loss": 0.7872, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 4.820129345189976, | |
| "grad_norm": 0.73018479347229, | |
| "learning_rate": 1.1993622563549805e-05, | |
| "loss": 0.785, | |
| "step": 143100 | |
| }, | |
| { | |
| "epoch": 4.823497709512261, | |
| "grad_norm": 0.7606022357940674, | |
| "learning_rate": 1.1769064942064132e-05, | |
| "loss": 0.7835, | |
| "step": 143200 | |
| }, | |
| { | |
| "epoch": 4.826866073834546, | |
| "grad_norm": 0.7062585949897766, | |
| "learning_rate": 1.154450732057846e-05, | |
| "loss": 0.7863, | |
| "step": 143300 | |
| }, | |
| { | |
| "epoch": 4.830234438156831, | |
| "grad_norm": 0.7451142072677612, | |
| "learning_rate": 1.1319949699092787e-05, | |
| "loss": 0.7853, | |
| "step": 143400 | |
| }, | |
| { | |
| "epoch": 4.833602802479116, | |
| "grad_norm": 0.6708253622055054, | |
| "learning_rate": 1.1095392077607114e-05, | |
| "loss": 0.7872, | |
| "step": 143500 | |
| }, | |
| { | |
| "epoch": 4.836971166801401, | |
| "grad_norm": 0.6949145197868347, | |
| "learning_rate": 1.0870834456121439e-05, | |
| "loss": 0.7877, | |
| "step": 143600 | |
| }, | |
| { | |
| "epoch": 4.8403395311236865, | |
| "grad_norm": 0.7620200514793396, | |
| "learning_rate": 1.0646276834635766e-05, | |
| "loss": 0.784, | |
| "step": 143700 | |
| }, | |
| { | |
| "epoch": 4.843707895445972, | |
| "grad_norm": 0.6912148594856262, | |
| "learning_rate": 1.0421719213150092e-05, | |
| "loss": 0.7838, | |
| "step": 143800 | |
| }, | |
| { | |
| "epoch": 4.847076259768256, | |
| "grad_norm": 0.6649619936943054, | |
| "learning_rate": 1.019716159166442e-05, | |
| "loss": 0.7847, | |
| "step": 143900 | |
| }, | |
| { | |
| "epoch": 4.8504446240905414, | |
| "grad_norm": 0.6840892434120178, | |
| "learning_rate": 9.972603970178747e-06, | |
| "loss": 0.7842, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 4.853812988412827, | |
| "grad_norm": 0.6763936877250671, | |
| "learning_rate": 9.748046348693074e-06, | |
| "loss": 0.7884, | |
| "step": 144100 | |
| }, | |
| { | |
| "epoch": 4.857181352735112, | |
| "grad_norm": 0.7060201168060303, | |
| "learning_rate": 9.523488727207401e-06, | |
| "loss": 0.7869, | |
| "step": 144200 | |
| }, | |
| { | |
| "epoch": 4.860549717057397, | |
| "grad_norm": 0.6832892298698425, | |
| "learning_rate": 9.298931105721728e-06, | |
| "loss": 0.7846, | |
| "step": 144300 | |
| }, | |
| { | |
| "epoch": 4.863918081379682, | |
| "grad_norm": 0.6947805285453796, | |
| "learning_rate": 9.074373484236054e-06, | |
| "loss": 0.7876, | |
| "step": 144400 | |
| }, | |
| { | |
| "epoch": 4.867286445701967, | |
| "grad_norm": 0.6749753355979919, | |
| "learning_rate": 8.849815862750381e-06, | |
| "loss": 0.7848, | |
| "step": 144500 | |
| }, | |
| { | |
| "epoch": 4.870654810024252, | |
| "grad_norm": 0.7636469602584839, | |
| "learning_rate": 8.625258241264708e-06, | |
| "loss": 0.7846, | |
| "step": 144600 | |
| }, | |
| { | |
| "epoch": 4.874023174346537, | |
| "grad_norm": 0.6952735781669617, | |
| "learning_rate": 8.400700619779035e-06, | |
| "loss": 0.7889, | |
| "step": 144700 | |
| }, | |
| { | |
| "epoch": 4.877391538668823, | |
| "grad_norm": 0.6861172318458557, | |
| "learning_rate": 8.176142998293361e-06, | |
| "loss": 0.787, | |
| "step": 144800 | |
| }, | |
| { | |
| "epoch": 4.880759902991107, | |
| "grad_norm": 0.7696015238761902, | |
| "learning_rate": 7.951585376807688e-06, | |
| "loss": 0.7858, | |
| "step": 144900 | |
| }, | |
| { | |
| "epoch": 4.884128267313392, | |
| "grad_norm": 0.7360905408859253, | |
| "learning_rate": 7.727027755322015e-06, | |
| "loss": 0.7875, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 4.887496631635678, | |
| "grad_norm": 0.7402172088623047, | |
| "learning_rate": 7.502470133836341e-06, | |
| "loss": 0.7901, | |
| "step": 145100 | |
| }, | |
| { | |
| "epoch": 4.890864995957963, | |
| "grad_norm": 0.7550304532051086, | |
| "learning_rate": 7.277912512350668e-06, | |
| "loss": 0.7881, | |
| "step": 145200 | |
| }, | |
| { | |
| "epoch": 4.894233360280248, | |
| "grad_norm": 0.7490784525871277, | |
| "learning_rate": 7.053354890864996e-06, | |
| "loss": 0.7866, | |
| "step": 145300 | |
| }, | |
| { | |
| "epoch": 4.8976017246025325, | |
| "grad_norm": 0.675995945930481, | |
| "learning_rate": 6.828797269379322e-06, | |
| "loss": 0.7868, | |
| "step": 145400 | |
| }, | |
| { | |
| "epoch": 4.900970088924818, | |
| "grad_norm": 0.6862391829490662, | |
| "learning_rate": 6.604239647893648e-06, | |
| "loss": 0.7851, | |
| "step": 145500 | |
| }, | |
| { | |
| "epoch": 4.904338453247103, | |
| "grad_norm": 0.6939913630485535, | |
| "learning_rate": 6.379682026407976e-06, | |
| "loss": 0.7871, | |
| "step": 145600 | |
| }, | |
| { | |
| "epoch": 4.907706817569388, | |
| "grad_norm": 0.741671085357666, | |
| "learning_rate": 6.155124404922303e-06, | |
| "loss": 0.7829, | |
| "step": 145700 | |
| }, | |
| { | |
| "epoch": 4.911075181891674, | |
| "grad_norm": 0.6528514623641968, | |
| "learning_rate": 5.930566783436629e-06, | |
| "loss": 0.7869, | |
| "step": 145800 | |
| }, | |
| { | |
| "epoch": 4.914443546213959, | |
| "grad_norm": 0.6991773843765259, | |
| "learning_rate": 5.706009161950956e-06, | |
| "loss": 0.7847, | |
| "step": 145900 | |
| }, | |
| { | |
| "epoch": 4.917811910536243, | |
| "grad_norm": 0.6840393543243408, | |
| "learning_rate": 5.481451540465283e-06, | |
| "loss": 0.7874, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 4.9211802748585285, | |
| "grad_norm": 0.6707117557525635, | |
| "learning_rate": 5.2568939189796095e-06, | |
| "loss": 0.7874, | |
| "step": 146100 | |
| }, | |
| { | |
| "epoch": 4.924548639180814, | |
| "grad_norm": 0.7077836990356445, | |
| "learning_rate": 5.032336297493937e-06, | |
| "loss": 0.7858, | |
| "step": 146200 | |
| }, | |
| { | |
| "epoch": 4.927917003503099, | |
| "grad_norm": 0.6887540817260742, | |
| "learning_rate": 4.807778676008263e-06, | |
| "loss": 0.7834, | |
| "step": 146300 | |
| }, | |
| { | |
| "epoch": 4.931285367825384, | |
| "grad_norm": 0.710809051990509, | |
| "learning_rate": 4.5832210545225905e-06, | |
| "loss": 0.7855, | |
| "step": 146400 | |
| }, | |
| { | |
| "epoch": 4.93465373214767, | |
| "grad_norm": 0.6486634016036987, | |
| "learning_rate": 4.358663433036916e-06, | |
| "loss": 0.7833, | |
| "step": 146500 | |
| }, | |
| { | |
| "epoch": 4.938022096469954, | |
| "grad_norm": 0.7380653619766235, | |
| "learning_rate": 4.134105811551244e-06, | |
| "loss": 0.7825, | |
| "step": 146600 | |
| }, | |
| { | |
| "epoch": 4.941390460792239, | |
| "grad_norm": 0.672187328338623, | |
| "learning_rate": 3.909548190065571e-06, | |
| "loss": 0.7853, | |
| "step": 146700 | |
| }, | |
| { | |
| "epoch": 4.9447588251145245, | |
| "grad_norm": 0.7624046206474304, | |
| "learning_rate": 3.684990568579897e-06, | |
| "loss": 0.787, | |
| "step": 146800 | |
| }, | |
| { | |
| "epoch": 4.94812718943681, | |
| "grad_norm": 0.7582620978355408, | |
| "learning_rate": 3.460432947094224e-06, | |
| "loss": 0.7852, | |
| "step": 146900 | |
| }, | |
| { | |
| "epoch": 4.951495553759095, | |
| "grad_norm": 0.7094969153404236, | |
| "learning_rate": 3.235875325608551e-06, | |
| "loss": 0.7845, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 4.954863918081379, | |
| "grad_norm": 0.7196788787841797, | |
| "learning_rate": 3.0113177041228775e-06, | |
| "loss": 0.7844, | |
| "step": 147100 | |
| }, | |
| { | |
| "epoch": 4.958232282403665, | |
| "grad_norm": 0.6927585005760193, | |
| "learning_rate": 2.7867600826372042e-06, | |
| "loss": 0.7839, | |
| "step": 147200 | |
| }, | |
| { | |
| "epoch": 4.96160064672595, | |
| "grad_norm": 0.7253730893135071, | |
| "learning_rate": 2.5622024611515314e-06, | |
| "loss": 0.7838, | |
| "step": 147300 | |
| }, | |
| { | |
| "epoch": 4.964969011048235, | |
| "grad_norm": 0.6914359331130981, | |
| "learning_rate": 2.337644839665858e-06, | |
| "loss": 0.7853, | |
| "step": 147400 | |
| }, | |
| { | |
| "epoch": 4.9683373753705204, | |
| "grad_norm": 0.7005605101585388, | |
| "learning_rate": 2.113087218180185e-06, | |
| "loss": 0.7852, | |
| "step": 147500 | |
| }, | |
| { | |
| "epoch": 4.971705739692805, | |
| "grad_norm": 0.6463395357131958, | |
| "learning_rate": 1.8885295966945117e-06, | |
| "loss": 0.7868, | |
| "step": 147600 | |
| }, | |
| { | |
| "epoch": 4.97507410401509, | |
| "grad_norm": 0.6650646328926086, | |
| "learning_rate": 1.6639719752088385e-06, | |
| "loss": 0.7874, | |
| "step": 147700 | |
| }, | |
| { | |
| "epoch": 4.978442468337375, | |
| "grad_norm": 0.7527862191200256, | |
| "learning_rate": 1.4394143537231652e-06, | |
| "loss": 0.7866, | |
| "step": 147800 | |
| }, | |
| { | |
| "epoch": 4.981810832659661, | |
| "grad_norm": 0.7495971322059631, | |
| "learning_rate": 1.2148567322374921e-06, | |
| "loss": 0.786, | |
| "step": 147900 | |
| }, | |
| { | |
| "epoch": 4.985179196981946, | |
| "grad_norm": 0.6791830062866211, | |
| "learning_rate": 9.902991107518188e-07, | |
| "loss": 0.7863, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 4.98854756130423, | |
| "grad_norm": 0.6754565834999084, | |
| "learning_rate": 7.657414892661456e-07, | |
| "loss": 0.7832, | |
| "step": 148100 | |
| }, | |
| { | |
| "epoch": 4.9919159256265155, | |
| "grad_norm": 0.6369759440422058, | |
| "learning_rate": 5.411838677804724e-07, | |
| "loss": 0.7807, | |
| "step": 148200 | |
| }, | |
| { | |
| "epoch": 4.995284289948801, | |
| "grad_norm": 0.7656301259994507, | |
| "learning_rate": 3.166262462947992e-07, | |
| "loss": 0.7841, | |
| "step": 148300 | |
| }, | |
| { | |
| "epoch": 4.998652654271086, | |
| "grad_norm": 0.6411105394363403, | |
| "learning_rate": 9.206862480912601e-08, | |
| "loss": 0.787, | |
| "step": 148400 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 0.7365977168083191, | |
| "eval_runtime": 1.819, | |
| "eval_samples_per_second": 2748.792, | |
| "eval_steps_per_second": 43.431, | |
| "step": 148440 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 148440, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8701968384000000.0, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |