MianzhiPan commited on
Commit
ee0318e
·
verified ·
1 Parent(s): 9f74516

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
added_tokens.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</think>": 151668,
3
+ "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
+ "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
+ "<|box_end|>": 151649,
9
+ "<|box_start|>": 151648,
10
+ "<|endoftext|>": 151643,
11
+ "<|file_sep|>": 151664,
12
+ "<|fim_middle|>": 151660,
13
+ "<|fim_pad|>": 151662,
14
+ "<|fim_prefix|>": 151659,
15
+ "<|fim_suffix|>": 151661,
16
+ "<|im_end|>": 151645,
17
+ "<|im_start|>": 151644,
18
+ "<|image_pad|>": 151655,
19
+ "<|object_ref_end|>": 151647,
20
+ "<|object_ref_start|>": 151646,
21
+ "<|quad_end|>": 151651,
22
+ "<|quad_start|>": 151650,
23
+ "<|repo_name|>": 151663,
24
+ "<|video_pad|>": 151656,
25
+ "<|vision_end|>": 151653,
26
+ "<|vision_pad|>": 151654,
27
+ "<|vision_start|>": 151652
28
+ }
args.json ADDED
@@ -0,0 +1,522 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "output_dir": "/root/bayes-gpfs-a44dc86fb6234d1dbd414fa3ac64ecdc/lijf/swift-checkpoint/1230/v10-20260104-235527",
3
+ "overwrite_output_dir": false,
4
+ "do_train": false,
5
+ "do_eval": false,
6
+ "do_predict": false,
7
+ "eval_strategy": "no",
8
+ "prediction_loss_only": false,
9
+ "per_device_train_batch_size": 14,
10
+ "per_device_eval_batch_size": 1,
11
+ "per_gpu_train_batch_size": null,
12
+ "per_gpu_eval_batch_size": null,
13
+ "gradient_accumulation_steps": 4,
14
+ "eval_accumulation_steps": null,
15
+ "eval_delay": 0,
16
+ "torch_empty_cache_steps": null,
17
+ "learning_rate": 5e-07,
18
+ "weight_decay": 0.1,
19
+ "adam_beta1": 0.9,
20
+ "adam_beta2": 0.95,
21
+ "adam_epsilon": 1e-08,
22
+ "max_grad_norm": 1.0,
23
+ "num_train_epochs": 1.0,
24
+ "max_steps": -1,
25
+ "lr_scheduler_type": "cosine",
26
+ "lr_scheduler_kwargs": null,
27
+ "warmup_ratio": 0.0,
28
+ "warmup_steps": 0,
29
+ "log_level": "passive",
30
+ "log_level_replica": "warning",
31
+ "log_on_each_node": true,
32
+ "logging_dir": "/root/bayes-gpfs-a44dc86fb6234d1dbd414fa3ac64ecdc/lijf/swift-checkpoint/1230/v10-20260104-235527/runs",
33
+ "logging_strategy": "steps",
34
+ "logging_first_step": true,
35
+ "logging_steps": 1,
36
+ "logging_nan_inf_filter": true,
37
+ "save_strategy": "steps",
38
+ "save_steps": 10.0,
39
+ "save_total_limit": null,
40
+ "save_safetensors": true,
41
+ "save_on_each_node": false,
42
+ "save_only_model": true,
43
+ "restore_callback_states_from_checkpoint": false,
44
+ "no_cuda": false,
45
+ "use_cpu": false,
46
+ "use_mps_device": false,
47
+ "seed": 7777,
48
+ "data_seed": 42,
49
+ "jit_mode_eval": false,
50
+ "bf16": true,
51
+ "fp16": false,
52
+ "fp16_opt_level": "O1",
53
+ "half_precision_backend": "auto",
54
+ "bf16_full_eval": false,
55
+ "fp16_full_eval": false,
56
+ "tf32": null,
57
+ "local_rank": 0,
58
+ "ddp_backend": null,
59
+ "tpu_num_cores": null,
60
+ "tpu_metrics_debug": false,
61
+ "debug": null,
62
+ "dataloader_drop_last": false,
63
+ "eval_steps": 10.0,
64
+ "dataloader_num_workers": 8,
65
+ "dataloader_prefetch_factor": null,
66
+ "past_index": -1,
67
+ "run_name": "/root/bayes-gpfs-a44dc86fb6234d1dbd414fa3ac64ecdc/lijf/swift-checkpoint/1230/v10-20260104-235527",
68
+ "disable_tqdm": null,
69
+ "remove_unused_columns": false,
70
+ "label_names": null,
71
+ "load_best_model_at_end": false,
72
+ "metric_for_best_model": "loss",
73
+ "greater_is_better": false,
74
+ "ignore_data_skip": false,
75
+ "fsdp": null,
76
+ "fsdp_min_num_params": 0,
77
+ "fsdp_config": null,
78
+ "fsdp_transformer_layer_cls_to_wrap": null,
79
+ "accelerator_config": {
80
+ "dispatch_batches": false
81
+ },
82
+ "parallelism_config": null,
83
+ "deepspeed": {
84
+ "fp16": {
85
+ "enabled": "auto",
86
+ "loss_scale": 0,
87
+ "loss_scale_window": 1000,
88
+ "initial_scale_power": 16,
89
+ "hysteresis": 2,
90
+ "min_loss_scale": 1
91
+ },
92
+ "bf16": {
93
+ "enabled": "auto"
94
+ },
95
+ "zero_optimization": {
96
+ "stage": 3,
97
+ "offload_optimizer": {
98
+ "device": "none",
99
+ "pin_memory": true
100
+ },
101
+ "offload_param": {
102
+ "device": "none",
103
+ "pin_memory": true
104
+ },
105
+ "overlap_comm": false,
106
+ "contiguous_gradients": true,
107
+ "sub_group_size": 1000000000.0,
108
+ "reduce_bucket_size": "auto",
109
+ "zero_quantized_weights": false,
110
+ "zero_quantized_gradients": false,
111
+ "stage3_prefetch_bucket_size": "auto",
112
+ "stage3_param_persistence_threshold": "auto",
113
+ "stage3_max_live_parameters": 1000000000.0,
114
+ "stage3_max_reuse_distance": 1000000000.0,
115
+ "stage3_gather_16bit_weights_on_model_save": true
116
+ },
117
+ "gradient_accumulation_steps": "auto",
118
+ "gradient_clipping": "auto",
119
+ "steps_per_print": 2000,
120
+ "train_batch_size": "auto",
121
+ "train_micro_batch_size_per_gpu": "auto",
122
+ "wall_clock_breakdown": false
123
+ },
124
+ "label_smoothing_factor": 0.0,
125
+ "optim": "adamw_torch_fused",
126
+ "optim_args": null,
127
+ "adafactor": false,
128
+ "group_by_length": false,
129
+ "length_column_name": "length",
130
+ "report_to": [
131
+ "swanlab"
132
+ ],
133
+ "project": "huggingface",
134
+ "trackio_space_id": "trackio",
135
+ "ddp_find_unused_parameters": null,
136
+ "ddp_bucket_cap_mb": null,
137
+ "ddp_broadcast_buffers": null,
138
+ "dataloader_pin_memory": true,
139
+ "dataloader_persistent_workers": false,
140
+ "skip_memory_metrics": true,
141
+ "use_legacy_prediction_loop": false,
142
+ "push_to_hub": false,
143
+ "resume_from_checkpoint": null,
144
+ "hub_model_id": null,
145
+ "hub_strategy": "every_save",
146
+ "hub_token": null,
147
+ "hub_private_repo": null,
148
+ "hub_always_push": false,
149
+ "hub_revision": null,
150
+ "gradient_checkpointing": true,
151
+ "gradient_checkpointing_kwargs": null,
152
+ "include_inputs_for_metrics": false,
153
+ "include_for_metrics": [],
154
+ "eval_do_concat_batches": true,
155
+ "fp16_backend": "auto",
156
+ "push_to_hub_model_id": null,
157
+ "push_to_hub_organization": null,
158
+ "push_to_hub_token": null,
159
+ "mp_parameters": "",
160
+ "auto_find_batch_size": false,
161
+ "full_determinism": false,
162
+ "torchdynamo": null,
163
+ "ray_scope": "last",
164
+ "ddp_timeout": 18000000,
165
+ "torch_compile": false,
166
+ "torch_compile_backend": null,
167
+ "torch_compile_mode": null,
168
+ "include_tokens_per_second": false,
169
+ "include_num_input_tokens_seen": false,
170
+ "neftune_noise_alpha": null,
171
+ "optim_target_modules": null,
172
+ "batch_eval_metrics": false,
173
+ "eval_on_start": false,
174
+ "use_liger_kernel": false,
175
+ "liger_kernel_config": null,
176
+ "eval_use_gather_object": false,
177
+ "average_tokens_across_devices": true,
178
+ "sortish_sampler": false,
179
+ "predict_with_generate": false,
180
+ "generation_max_length": null,
181
+ "generation_num_beams": null,
182
+ "generation_config": null,
183
+ "tuner_backend": "peft",
184
+ "vit_gradient_checkpointing": null,
185
+ "router_aux_loss_coef": 0.0,
186
+ "enable_dft_loss": false,
187
+ "enable_channel_loss": false,
188
+ "check_model": true,
189
+ "acc_strategy": "token",
190
+ "train_dataloader_shuffle": true,
191
+ "max_epochs": null,
192
+ "aligner_lr": null,
193
+ "vit_lr": null,
194
+ "use_logits_to_keep": null,
195
+ "ds3_gather_for_generation": true,
196
+ "resume_only_model": false,
197
+ "optimizer": null,
198
+ "loss_type": "sapo",
199
+ "metric": null,
200
+ "eval_use_evalscope": false,
201
+ "eval_dataset": [],
202
+ "eval_dataset_args": null,
203
+ "eval_limit": null,
204
+ "eval_generation_config": null,
205
+ "extra_eval_args": null,
206
+ "use_flash_ckpt": false,
207
+ "use_ray": false,
208
+ "ray_exp_name": null,
209
+ "device_groups": null,
210
+ "model": "/root/bayes-gpfs-a44dc86fb6234d1dbd414fa3ac64ecdc/lijf/swift-checkpoint/1230/v9-20260103-144759/checkpoint-330",
211
+ "model_type": "qwen3",
212
+ "model_revision": null,
213
+ "task_type": "causal_lm",
214
+ "torch_dtype": "bfloat16",
215
+ "attn_impl": "flash_attn",
216
+ "new_special_tokens": [],
217
+ "num_labels": null,
218
+ "problem_type": null,
219
+ "rope_scaling": null,
220
+ "device_map": null,
221
+ "max_memory": {},
222
+ "max_model_len": null,
223
+ "local_repo_path": null,
224
+ "init_strategy": null,
225
+ "template": "qwen3",
226
+ "system": null,
227
+ "max_length": 40960,
228
+ "truncation_strategy": "left",
229
+ "max_pixels": null,
230
+ "agent_template": null,
231
+ "norm_bbox": null,
232
+ "use_chat_template": true,
233
+ "padding_free": false,
234
+ "padding_side": "right",
235
+ "loss_scale": "last_round",
236
+ "sequence_parallel_size": 1,
237
+ "response_prefix": null,
238
+ "template_backend": "swift",
239
+ "dataset": [
240
+ "/root/bayes-gpfs-a44dc86fb6234d1dbd414fa3ac64ecdc/lijf/data/train_data_swift_euler_sample20000_v4prompt.json"
241
+ ],
242
+ "val_dataset": [],
243
+ "cached_dataset": [],
244
+ "cached_val_dataset": [],
245
+ "split_dataset_ratio": 0.0,
246
+ "dataset_num_proc": 4,
247
+ "load_from_cache_file": true,
248
+ "dataset_shuffle": true,
249
+ "val_dataset_shuffle": false,
250
+ "streaming": false,
251
+ "interleave_prob": null,
252
+ "stopping_strategy": "first_exhausted",
253
+ "shuffle_buffer_size": 1000,
254
+ "download_mode": "reuse_dataset_if_exists",
255
+ "columns": {},
256
+ "strict": false,
257
+ "model_name": null,
258
+ "model_author": null,
259
+ "custom_dataset_info": [],
260
+ "quant_method": null,
261
+ "quant_bits": null,
262
+ "hqq_axis": null,
263
+ "bnb_4bit_compute_dtype": "bfloat16",
264
+ "bnb_4bit_quant_type": "nf4",
265
+ "bnb_4bit_use_double_quant": true,
266
+ "bnb_4bit_quant_storage": null,
267
+ "max_new_tokens": 1510,
268
+ "temperature": 0.9,
269
+ "top_k": 50,
270
+ "top_p": 0.99,
271
+ "repetition_penalty": 1.0,
272
+ "num_beams": 1,
273
+ "stream": false,
274
+ "stop_words": [],
275
+ "logprobs": false,
276
+ "top_logprobs": null,
277
+ "ckpt_dir": "/root/bayes-gpfs-a44dc86fb6234d1dbd414fa3ac64ecdc/lijf/swift-checkpoint/1230/v9-20260103-144759/checkpoint-330",
278
+ "lora_modules": [],
279
+ "train_type": "full",
280
+ "adapters": [],
281
+ "external_plugins": [],
282
+ "model_kwargs": {},
283
+ "load_args": false,
284
+ "load_data_args": false,
285
+ "packing": false,
286
+ "packing_length": null,
287
+ "packing_num_proc": 1,
288
+ "lazy_tokenize": false,
289
+ "custom_register_path": [],
290
+ "use_hf": false,
291
+ "ignore_args_error": false,
292
+ "use_swift_lora": false,
293
+ "freeze_parameters": [],
294
+ "freeze_parameters_regex": null,
295
+ "freeze_parameters_ratio": 0.0,
296
+ "trainable_parameters": [],
297
+ "trainable_parameters_regex": null,
298
+ "freeze_llm": false,
299
+ "freeze_vit": true,
300
+ "freeze_aligner": true,
301
+ "target_modules": [
302
+ "all-linear"
303
+ ],
304
+ "target_regex": null,
305
+ "target_parameters": null,
306
+ "modules_to_save": [],
307
+ "lora_rank": 8,
308
+ "lora_alpha": 32,
309
+ "lora_dropout": 0.05,
310
+ "lora_bias": "none",
311
+ "lora_dtype": null,
312
+ "lorap_lr_ratio": null,
313
+ "use_rslora": false,
314
+ "use_dora": false,
315
+ "lora_ga_batch_size": 2,
316
+ "lora_ga_iters": 2,
317
+ "lora_ga_max_length": 1024,
318
+ "lora_ga_direction": "ArB2r",
319
+ "lora_ga_scale": "stable",
320
+ "lora_ga_stable_gamma": 16,
321
+ "init_weights": true,
322
+ "fourier_n_frequency": 2000,
323
+ "fourier_scaling": 300.0,
324
+ "boft_block_size": 4,
325
+ "boft_block_num": 0,
326
+ "boft_n_butterfly_factor": 1,
327
+ "boft_dropout": 0.0,
328
+ "vera_rank": 256,
329
+ "vera_projection_prng_key": 0,
330
+ "vera_dropout": 0.0,
331
+ "vera_d_initial": 0.1,
332
+ "adapter_act": "gelu",
333
+ "adapter_length": 128,
334
+ "use_galore": false,
335
+ "galore_target_modules": null,
336
+ "galore_rank": 128,
337
+ "galore_update_proj_gap": 50,
338
+ "galore_scale": 1.0,
339
+ "galore_proj_type": "std",
340
+ "galore_optim_per_parameter": false,
341
+ "galore_with_embedding": false,
342
+ "galore_quantization": false,
343
+ "galore_proj_quant": false,
344
+ "galore_proj_bits": 4,
345
+ "galore_proj_group_size": 256,
346
+ "galore_cos_threshold": 0.4,
347
+ "galore_gamma_proj": 2,
348
+ "galore_queue_size": 5,
349
+ "adalora_target_r": 8,
350
+ "adalora_init_r": 12,
351
+ "adalora_tinit": 0,
352
+ "adalora_tfinal": 0,
353
+ "adalora_deltaT": 1,
354
+ "adalora_beta1": 0.85,
355
+ "adalora_beta2": 0.85,
356
+ "adalora_orth_reg_weight": 0.5,
357
+ "llamapro_num_new_blocks": 4,
358
+ "llamapro_num_groups": null,
359
+ "lisa_activated_layers": 0,
360
+ "lisa_step_interval": 20,
361
+ "reft_layer_key": null,
362
+ "reft_layers": null,
363
+ "reft_rank": 4,
364
+ "reft_intervention_type": "LoreftIntervention",
365
+ "reft_args": null,
366
+ "swanlab_token": null,
367
+ "swanlab_project": null,
368
+ "swanlab_workspace": null,
369
+ "swanlab_exp_name": "/root/bayes-gpfs-a44dc86fb6234d1dbd414fa3ac64ecdc/lijf/swift-checkpoint/1230/v10-20260104-235527",
370
+ "swanlab_lark_webhook_url": null,
371
+ "swanlab_lark_secret": null,
372
+ "swanlab_mode": "cloud",
373
+ "add_version": true,
374
+ "create_checkpoint_symlink": false,
375
+ "zero_hpz_partition_size": null,
376
+ "deepspeed_autotp_size": null,
377
+ "early_stop_interval": null,
378
+ "sft_alpha": 0,
379
+ "chord_sft_dataset": [],
380
+ "chord_sft_per_device_train_batch_size": null,
381
+ "chord_enable_phi_function": false,
382
+ "chord_mu_warmup_steps": null,
383
+ "chord_mu_decay_steps": null,
384
+ "chord_mu_peak": null,
385
+ "chord_mu_valley": null,
386
+ "reward_model": null,
387
+ "reward_adapters": [],
388
+ "reward_model_type": null,
389
+ "reward_model_revision": null,
390
+ "num_ppo_epochs": 4,
391
+ "whiten_rewards": false,
392
+ "kl_coef": 0.05,
393
+ "cliprange": 0.2,
394
+ "vf_coef": 0.1,
395
+ "cliprange_value": 0.2,
396
+ "gamma": 1.0,
397
+ "lam": 0.95,
398
+ "num_mini_batches": 1,
399
+ "local_rollout_forward_batch_size": 64,
400
+ "num_sample_generations": 10,
401
+ "response_length": 1510,
402
+ "missing_eos_penalty": null,
403
+ "vllm_gpu_memory_utilization": 0.5,
404
+ "vllm_tensor_parallel_size": 1,
405
+ "vllm_pipeline_parallel_size": 1,
406
+ "vllm_enable_expert_parallel": false,
407
+ "vllm_max_num_seqs": null,
408
+ "vllm_max_model_len": null,
409
+ "vllm_disable_custom_all_reduce": true,
410
+ "vllm_enforce_eager": false,
411
+ "vllm_limit_mm_per_prompt": null,
412
+ "vllm_max_lora_rank": 16,
413
+ "vllm_enable_prefix_caching": true,
414
+ "vllm_use_async_engine": false,
415
+ "vllm_quantization": null,
416
+ "vllm_reasoning_parser": null,
417
+ "vllm_disable_cascade_attn": false,
418
+ "vllm_mm_processor_cache_gb": null,
419
+ "vllm_speculative_config": null,
420
+ "vllm_engine_kwargs": {},
421
+ "vllm_data_parallel_size": 1,
422
+ "use_vllm": true,
423
+ "vllm_mode": "server",
424
+ "vllm_enable_lora": false,
425
+ "vllm_server_base_url": null,
426
+ "vllm_server_host": [
427
+ "0.0.0.0"
428
+ ],
429
+ "vllm_server_port": [
430
+ 8000
431
+ ],
432
+ "vllm_server_timeout": 240.0,
433
+ "vllm_server_group_port": null,
434
+ "enable_flattened_weight_sync": true,
435
+ "async_generate": false,
436
+ "sleep_level": 0,
437
+ "move_model_batches": null,
438
+ "offload_optimizer": false,
439
+ "offload_model": false,
440
+ "wandb_log_unique_prompts": null,
441
+ "epsilon": 0.2,
442
+ "epsilon_high": 0.3,
443
+ "delta": null,
444
+ "cosine_min_len_value_wrong": -0.5,
445
+ "cosine_max_len_value_wrong": 0.0,
446
+ "cosine_min_len_value_correct": 1.0,
447
+ "cosine_max_len_value_correct": 0.5,
448
+ "cosine_max_len": null,
449
+ "repetition_n_grams": 3,
450
+ "repetition_max_penalty": -1.0,
451
+ "reward_model_plugin": null,
452
+ "sync_ref_model": false,
453
+ "ref_model_sync_steps": 512,
454
+ "ref_model_mixup_alpha": 0.6,
455
+ "multi_turn_scheduler": null,
456
+ "max_turns": null,
457
+ "completion_length_limit_scope": "per_round",
458
+ "vllm_server_pass_dataset": false,
459
+ "dynamic_sample": true,
460
+ "max_resample_times": 2,
461
+ "overlong_filter": false,
462
+ "soft_max_length": null,
463
+ "soft_cache_length": null,
464
+ "scale_rewards": "group",
465
+ "log_entropy": false,
466
+ "top_entropy_quantile": 1.0,
467
+ "importance_sampling_level": "token",
468
+ "tau_pos": 1.0,
469
+ "tau_neg": 1.05,
470
+ "advantage_estimator": "grpo",
471
+ "kl_in_reward": false,
472
+ "generation_batch_size": null,
473
+ "steps_per_generation": null,
474
+ "rollout_importance_sampling_mode": null,
475
+ "rollout_importance_sampling_threshold": 2.0,
476
+ "log_rollout_offpolicy_metrics": false,
477
+ "off_policy_sequence_mask_delta": null,
478
+ "num_generations": 8,
479
+ "reward_funcs": [
480
+ "format"
481
+ ],
482
+ "reward_weights": null,
483
+ "log_completions": true,
484
+ "num_iterations": 1,
485
+ "teacher_model": null,
486
+ "teacher_adapters": [],
487
+ "teacher_model_type": null,
488
+ "teacher_model_revision": null,
489
+ "teacher_deepspeed": null,
490
+ "rlhf_type": "grpo",
491
+ "ref_model": null,
492
+ "ref_adapters": [],
493
+ "ref_model_type": null,
494
+ "ref_model_revision": null,
495
+ "beta": 0.0,
496
+ "label_smoothing": 0,
497
+ "max_completion_length": 1510,
498
+ "rpo_alpha": null,
499
+ "ld_alpha": null,
500
+ "discopop_tau": 0.05,
501
+ "loss_weights": null,
502
+ "cpo_alpha": 1.0,
503
+ "simpo_gamma": 1,
504
+ "desirable_weight": 1.0,
505
+ "undesirable_weight": 1.0,
506
+ "center_rewards_coefficient": null,
507
+ "lmbda": 0.5,
508
+ "seq_kd": false,
509
+ "offload_teacher_model": false,
510
+ "vllm_client": "<swift.trainers.rlhf_trainer.vllm_client.VLLMClient object at 0x7fad40903bf0>",
511
+ "rank": 0,
512
+ "global_world_size": 6,
513
+ "local_world_size": 6,
514
+ "model_suffix": "checkpoint-330",
515
+ "model_info": "ModelInfo(model_type='qwen3', model_dir='/root/bayes-gpfs-a44dc86fb6234d1dbd414fa3ac64ecdc/lijf/swift-checkpoint/1230/v9-20260103-144759/checkpoint-330', torch_dtype=torch.bfloat16, max_model_len=40960, quant_method=None, quant_bits=None, rope_scaling=None, is_moe_model=False, is_multimodal=False, config=None, task_type='causal_lm', num_labels=None)",
516
+ "model_meta": "ModelMeta(model_type='qwen3', model_groups=[ModelGroup(models=[Model(ms_model_id='Qwen/Qwen3-0.6B-Base', hf_model_id='Qwen/Qwen3-0.6B-Base', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-1.7B-Base', hf_model_id='Qwen/Qwen3-1.7B-Base', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-4B-Base', hf_model_id='Qwen/Qwen3-4B-Base', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-8B-Base', hf_model_id='Qwen/Qwen3-8B-Base', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-14B-Base', hf_model_id='Qwen/Qwen3-14B-Base', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-0.6B', hf_model_id='Qwen/Qwen3-0.6B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-1.7B', hf_model_id='Qwen/Qwen3-1.7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-4B', hf_model_id='Qwen/Qwen3-4B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-8B', hf_model_id='Qwen/Qwen3-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-14B', hf_model_id='Qwen/Qwen3-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-32B', hf_model_id='Qwen/Qwen3-32B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-0.6B-FP8', hf_model_id='Qwen/Qwen3-0.6B-FP8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-1.7B-FP8', hf_model_id='Qwen/Qwen3-1.7B-FP8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-4B-FP8', hf_model_id='Qwen/Qwen3-4B-FP8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-8B-FP8', hf_model_id='Qwen/Qwen3-8B-FP8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-14B-FP8', hf_model_id='Qwen/Qwen3-14B-FP8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-32B-FP8', hf_model_id='Qwen/Qwen3-32B-FP8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-4B-AWQ', hf_model_id='Qwen/Qwen3-4B-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-8B-AWQ', hf_model_id='Qwen/Qwen3-8B-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-14B-AWQ', hf_model_id='Qwen/Qwen3-14B-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-32B-AWQ', hf_model_id='Qwen/Qwen3-32B-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='swift/Qwen3-32B-AWQ', hf_model_id=None, model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='qwen3', get_function=<function get_model_tokenizer_with_flash_attn at 0x7fad405618a0>, model_arch=ModelKeys(arch_name='llama', embedding='model.embed_tokens', module_list='model.layers', lm_head='lm_head', q_proj='model.layers.{}.self_attn.q_proj', k_proj='model.layers.{}.self_attn.k_proj', v_proj='model.layers.{}.self_attn.v_proj', o_proj='model.layers.{}.self_attn.o_proj', attention='model.layers.{}.self_attn', mlp='model.layers.{}.mlp', down_proj='model.layers.{}.mlp.down_proj', qkv_proj=None, qk_proj=None, qa_proj=None, qb_proj=None, kv_proj=None, kva_proj=None, kvb_proj=None), architectures=['Qwen3ForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, is_reranker=False, task_type=None, ignore_patterns=None, requires=['transformers>=4.51'], tags=[])",
517
+ "model_dir": "/root/bayes-gpfs-a44dc86fb6234d1dbd414fa3ac64ecdc/lijf/swift-checkpoint/1230/v9-20260103-144759/checkpoint-330",
518
+ "_val_dataset_exists": [],
519
+ "hub": "<class 'swift.hub.hub.MSHub'>",
520
+ "evaluation_strategy": "steps",
521
+ "training_args": "GRPOConfig(output_dir='/root/bayes-gpfs-a44dc86fb6234d1dbd414fa3ac64ecdc/lijf/swift-checkpoint/1230/v10-20260104-235527', overwrite_output_dir=False, do_train=False, do_eval=False, do_predict=False, eval_strategy=<IntervalStrategy.NO: 'no'>, prediction_loss_only=False, per_device_train_batch_size=14, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=4, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=5e-07, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.95, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=1.0, max_steps=-1, lr_scheduler_type=<SchedulerType.COSINE: 'cosine'>, lr_scheduler_kwargs=None, warmup_ratio=0.0, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/root/bayes-gpfs-a44dc86fb6234d1dbd414fa3ac64ecdc/lijf/swift-checkpoint/1230/v10-20260104-235527/runs', logging_strategy=<IntervalStrategy.STEPS: 'steps'>, logging_first_step=True, logging_steps=1, logging_nan_inf_filter=True, save_strategy=<SaveStrategy.STEPS: 'steps'>, save_steps=10, save_total_limit=None, save_safetensors=True, save_on_each_node=False, save_only_model=True, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=7777, data_seed=42, jit_mode_eval=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=True, eval_steps=10.0, dataloader_num_workers=8, dataloader_prefetch_factor=10, past_index=-1, run_name='/root/bayes-gpfs-a44dc86fb6234d1dbd414fa3ac64ecdc/lijf/swift-checkpoint/1230/v10-20260104-235527', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), parallelism_config=None, deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': False, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'zero_quantized_weights': False, 'zero_quantized_gradients': False, 'stage3_prefetch_bucket_size': 0, 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=<OptimizerNames.ADAMW_TORCH_FUSED: 'adamw_torch_fused'>, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['swanlab'], project='huggingface', trackio_space_id='trackio', ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=<HubStrategy.EVERY_SAVE: 'every_save'>, hub_token=None, hub_private_repo=None, hub_always_push=False, hub_revision=None, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=18000000, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, liger_kernel_config=None, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, disable_dropout=False, max_prompt_length=512, num_generations=8, max_completion_length=1510, ds3_gather_for_generation=True, shuffle_dataset=True, generation_batch_size=336, steps_per_generation=4, temperature=0.9, top_p=0.99, top_k=50, min_p=None, generation_kwargs=None, repetition_penalty=1.0, use_transformers_paged=False, cache_implementation=None, use_vllm=True, vllm_mode='server', vllm_model_impl='vllm', vllm_enable_sleep_mode=False, vllm_guided_decoding_regex=None, vllm_server_base_url=None, vllm_server_host=['0.0.0.0'], vllm_server_port=[8000], vllm_server_timeout=240.0, vllm_gpu_memory_utilization=0.5, vllm_tensor_parallel_size=1, beta=0.0, num_iterations=1, epsilon=0.2, delta=None, epsilon_high=0.3, importance_sampling_level='token', reward_weights=None, scale_rewards='group', loss_type='sapo', mask_truncated_completions=False, sync_ref_model=False, ref_model_mixup_alpha=0.6, ref_model_sync_steps=512, top_entropy_quantile=1.0, use_liger_loss=False, vllm_importance_sampling_correction=True, vllm_importance_sampling_cap=2.0, log_completions=True, num_completions_to_print=None, wandb_log_unique_prompts=None, tuner_backend='peft', vit_gradient_checkpointing=True, router_aux_loss_coef=0.0, enable_dft_loss=False, enable_channel_loss=False, check_model=True, acc_strategy='token', train_dataloader_shuffle=True, max_epochs=None, aligner_lr=None, vit_lr=None, use_logits_to_keep=None, resume_only_model=False, optimizer=None, metric=None, eval_use_evalscope=False, eval_dataset=[], eval_dataset_args=None, eval_limit=None, eval_generation_config=None, extra_eval_args=None, use_flash_ckpt=False, sft_alpha=0, chord_sft_dataset=[], chord_sft_per_device_train_batch_size=None, chord_enable_phi_function=False, chord_mu_warmup_steps=None, chord_mu_decay_steps=None, chord_mu_peak=None, chord_mu_valley=None, train_type='full', local_repo_path=None, galore_config=None, padding_side='right', padding_free=False, task_type='causal_lm', problem_type=None, vllm_pipeline_parallel_size=1, vllm_enable_expert_parallel=False, vllm_max_num_seqs=None, vllm_max_model_len=None, vllm_disable_custom_all_reduce=True, vllm_enforce_eager=False, vllm_limit_mm_per_prompt=None, vllm_max_lora_rank=16, vllm_enable_prefix_caching=True, vllm_use_async_engine=False, vllm_quantization=None, vllm_reasoning_parser=None, vllm_disable_cascade_attn=False, vllm_mm_processor_cache_gb=None, vllm_speculative_config=None, vllm_engine_kwargs={}, vllm_data_parallel_size=1, stop_words=[], vllm_enable_lora=False, lora_rank=8, vllm_server_group_port=None, enable_flattened_weight_sync=True, async_generate=False, sleep_level=0, move_model_batches=None, offload_optimizer=False, offload_model=False, cosine_min_len_value_wrong=-0.5, cosine_max_len_value_wrong=0.0, cosine_min_len_value_correct=1.0, cosine_max_len_value_correct=0.5, cosine_max_len=1510, repetition_n_grams=3, repetition_max_penalty=-1.0, reward_model=None, reward_model_plugin=None, multi_turn_scheduler=None, max_turns=None, completion_length_limit_scope='per_round', vllm_server_pass_dataset=False, dynamic_sample=True, max_resample_times=2, overlong_filter=False, soft_max_length=None, soft_cache_length=None, log_entropy=False, tau_pos=1.0, tau_neg=1.05, advantage_estimator='grpo', kl_in_reward=False, dataset_shuffle=True, rollout_importance_sampling_mode=None, rollout_importance_sampling_threshold=2.0, log_rollout_offpolicy_metrics=False, off_policy_sequence_mask_delta=None)"
522
+ }
chat_template.jinja ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}
4
+ {{- messages[0].content + '\n\n' }}
5
+ {%- endif %}
6
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
+ {%- for tool in tools %}
8
+ {{- "\n" }}
9
+ {{- tool | tojson }}
10
+ {%- endfor %}
11
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
+ {%- else %}
13
+ {%- if messages[0].role == 'system' %}
14
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
+ {%- endif %}
16
+ {%- endif %}
17
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
+ {%- for message in messages[::-1] %}
19
+ {%- set index = (messages|length - 1) - loop.index0 %}
20
+ {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
21
+ {%- set ns.multi_step_tool = false %}
22
+ {%- set ns.last_query_index = index %}
23
+ {%- endif %}
24
+ {%- endfor %}
25
+ {%- for message in messages %}
26
+ {%- if message.content is string %}
27
+ {%- set content = message.content %}
28
+ {%- else %}
29
+ {%- set content = '' %}
30
+ {%- endif %}
31
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
32
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
33
+ {%- elif message.role == "assistant" %}
34
+ {%- set reasoning_content = '' %}
35
+ {%- if message.reasoning_content is string %}
36
+ {%- set reasoning_content = message.reasoning_content %}
37
+ {%- else %}
38
+ {%- if '</think>' in content %}
39
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
40
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
41
+ {%- endif %}
42
+ {%- endif %}
43
+ {%- if loop.index0 > ns.last_query_index %}
44
+ {%- if loop.last or (not loop.last and reasoning_content) %}
45
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
46
+ {%- else %}
47
+ {{- '<|im_start|>' + message.role + '\n' + content }}
48
+ {%- endif %}
49
+ {%- else %}
50
+ {{- '<|im_start|>' + message.role + '\n' + content }}
51
+ {%- endif %}
52
+ {%- if message.tool_calls %}
53
+ {%- for tool_call in message.tool_calls %}
54
+ {%- if (loop.first and content) or (not loop.first) %}
55
+ {{- '\n' }}
56
+ {%- endif %}
57
+ {%- if tool_call.function %}
58
+ {%- set tool_call = tool_call.function %}
59
+ {%- endif %}
60
+ {{- '<tool_call>\n{"name": "' }}
61
+ {{- tool_call.name }}
62
+ {{- '", "arguments": ' }}
63
+ {%- if tool_call.arguments is string %}
64
+ {{- tool_call.arguments }}
65
+ {%- else %}
66
+ {{- tool_call.arguments | tojson }}
67
+ {%- endif %}
68
+ {{- '}\n</tool_call>' }}
69
+ {%- endfor %}
70
+ {%- endif %}
71
+ {{- '<|im_end|>\n' }}
72
+ {%- elif message.role == "tool" %}
73
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
74
+ {{- '<|im_start|>user' }}
75
+ {%- endif %}
76
+ {{- '\n<tool_response>\n' }}
77
+ {{- content }}
78
+ {{- '\n</tool_response>' }}
79
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
80
+ {{- '<|im_end|>\n' }}
81
+ {%- endif %}
82
+ {%- endif %}
83
+ {%- endfor %}
84
+ {%- if add_generation_prompt %}
85
+ {{- '<|im_start|>assistant\n' }}
86
+ {%- if enable_thinking is defined and enable_thinking is false %}
87
+ {{- '<think>\n\n</think>\n\n' }}
88
+ {%- endif %}
89
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "dtype": "bfloat16",
8
+ "eos_token_id": 151645,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 4096,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 12288,
14
+ "layer_types": [
15
+ "full_attention",
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention"
51
+ ],
52
+ "max_position_embeddings": 40960,
53
+ "max_window_layers": 36,
54
+ "model_type": "qwen3",
55
+ "num_attention_heads": 32,
56
+ "num_hidden_layers": 36,
57
+ "num_key_value_heads": 8,
58
+ "pad_token_id": 151643,
59
+ "rms_norm_eps": 1e-06,
60
+ "rope_scaling": null,
61
+ "rope_theta": 1000000,
62
+ "sliding_window": null,
63
+ "tie_word_embeddings": false,
64
+ "transformers_version": "4.57.3",
65
+ "use_cache": false,
66
+ "use_sliding_window": false,
67
+ "vocab_size": 151936
68
+ }
generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_sample": true,
3
+ "eos_token_id": [
4
+ 151645,
5
+ 151643
6
+ ],
7
+ "pad_token_id": 151643,
8
+ "temperature": 0.6,
9
+ "top_k": 20,
10
+ "top_p": 0.95,
11
+ "transformers_version": "4.57.3"
12
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4d7c8de4103fcc21df974ba0512c9d7a84657c04cb0c93d8c365bac82c6b50f
3
+ size 4902257696
model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dfea5c3c20c327a9ae36e152d5085f3ea436fe1d96aa206b8e3c563b9bccd4b
3
+ size 4915960368
model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a7ca1ca94de3b668cbedb0b388d6d111e1e29aaf2a9530355a4ebd7b9b7285f
3
+ size 4983068496
model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66917b77f50d8f30d6dc0a2d24217c8611f8fa37333a7b1f8c03f797799507b5
3
+ size 1580230264
model.safetensors.index.json ADDED
@@ -0,0 +1,407 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_parameters": 308224,
4
+ "total_size": 16381470720
5
+ },
6
+ "weight_map": {
7
+ "lm_head.weight": "model-00004-of-00004.safetensors",
8
+ "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
9
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
10
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
11
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
12
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
13
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
14
+ "model.layers.0.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
15
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
16
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
17
+ "model.layers.0.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
18
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
19
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
20
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
21
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
22
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
23
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
24
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
25
+ "model.layers.1.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
26
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
27
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
28
+ "model.layers.1.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
29
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
30
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
31
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
32
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
33
+ "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
34
+ "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
35
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
36
+ "model.layers.10.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
37
+ "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
38
+ "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
39
+ "model.layers.10.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
40
+ "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
41
+ "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
42
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
43
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
44
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
45
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
46
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
47
+ "model.layers.11.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
48
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
49
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
50
+ "model.layers.11.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
51
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
52
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
53
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
54
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
55
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
56
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
57
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
58
+ "model.layers.12.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
59
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
60
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
61
+ "model.layers.12.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
62
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
63
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
64
+ "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
65
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
66
+ "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
67
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
68
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
69
+ "model.layers.13.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
70
+ "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
71
+ "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
72
+ "model.layers.13.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
73
+ "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
74
+ "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
75
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
76
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
77
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
78
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
79
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
80
+ "model.layers.14.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
81
+ "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
82
+ "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
83
+ "model.layers.14.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
84
+ "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
85
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
86
+ "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
87
+ "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
88
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
89
+ "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
90
+ "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
91
+ "model.layers.15.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
92
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
93
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
94
+ "model.layers.15.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
95
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
96
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
97
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
98
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
99
+ "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
100
+ "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
101
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
102
+ "model.layers.16.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
103
+ "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
104
+ "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
105
+ "model.layers.16.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
106
+ "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
107
+ "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
108
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
109
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
110
+ "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
111
+ "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
112
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
113
+ "model.layers.17.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
114
+ "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
115
+ "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
116
+ "model.layers.17.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
117
+ "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
118
+ "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
119
+ "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
120
+ "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
121
+ "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
122
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
123
+ "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
124
+ "model.layers.18.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
125
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
126
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
127
+ "model.layers.18.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
128
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
129
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
130
+ "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
131
+ "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
132
+ "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
133
+ "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
134
+ "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
135
+ "model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
136
+ "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
137
+ "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
138
+ "model.layers.19.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
139
+ "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
140
+ "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
141
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
142
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
143
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
144
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
145
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
146
+ "model.layers.2.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
147
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
148
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
149
+ "model.layers.2.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
150
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
151
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
152
+ "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors",
153
+ "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
154
+ "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
155
+ "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
156
+ "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
157
+ "model.layers.20.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
158
+ "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
159
+ "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
160
+ "model.layers.20.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
161
+ "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
162
+ "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
163
+ "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors",
164
+ "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
165
+ "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
166
+ "model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
167
+ "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
168
+ "model.layers.21.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
169
+ "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
170
+ "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
171
+ "model.layers.21.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
172
+ "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
173
+ "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
174
+ "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
175
+ "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
176
+ "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
177
+ "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
178
+ "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
179
+ "model.layers.22.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
180
+ "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
181
+ "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
182
+ "model.layers.22.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
183
+ "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
184
+ "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
185
+ "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
186
+ "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
187
+ "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
188
+ "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
189
+ "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
190
+ "model.layers.23.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
191
+ "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
192
+ "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
193
+ "model.layers.23.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
194
+ "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
195
+ "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
196
+ "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
197
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
198
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
199
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
200
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
201
+ "model.layers.24.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
202
+ "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
203
+ "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
204
+ "model.layers.24.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
205
+ "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
206
+ "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
207
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
208
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
209
+ "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
210
+ "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
211
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
212
+ "model.layers.25.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
213
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
214
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
215
+ "model.layers.25.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
216
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
217
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
218
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
219
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
220
+ "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
221
+ "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
222
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
223
+ "model.layers.26.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
224
+ "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
225
+ "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
226
+ "model.layers.26.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
227
+ "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
228
+ "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
229
+ "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
230
+ "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
231
+ "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
232
+ "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
233
+ "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
234
+ "model.layers.27.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
235
+ "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
236
+ "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
237
+ "model.layers.27.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
238
+ "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
239
+ "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
240
+ "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
241
+ "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
242
+ "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
243
+ "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
244
+ "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
245
+ "model.layers.28.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
246
+ "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
247
+ "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
248
+ "model.layers.28.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
249
+ "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
250
+ "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
251
+ "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
252
+ "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
253
+ "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
254
+ "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
255
+ "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
256
+ "model.layers.29.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
257
+ "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
258
+ "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
259
+ "model.layers.29.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
260
+ "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
261
+ "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
262
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
263
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
264
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
265
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
266
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
267
+ "model.layers.3.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
268
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
269
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
270
+ "model.layers.3.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
271
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
272
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
273
+ "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
274
+ "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
275
+ "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
276
+ "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
277
+ "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
278
+ "model.layers.30.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
279
+ "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
280
+ "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
281
+ "model.layers.30.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
282
+ "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
283
+ "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
284
+ "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors",
285
+ "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
286
+ "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
287
+ "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
288
+ "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
289
+ "model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
290
+ "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
291
+ "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
292
+ "model.layers.31.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
293
+ "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
294
+ "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
295
+ "model.layers.32.input_layernorm.weight": "model-00003-of-00004.safetensors",
296
+ "model.layers.32.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
297
+ "model.layers.32.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
298
+ "model.layers.32.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
299
+ "model.layers.32.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
300
+ "model.layers.32.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
301
+ "model.layers.32.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
302
+ "model.layers.32.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
303
+ "model.layers.32.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
304
+ "model.layers.32.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
305
+ "model.layers.32.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
306
+ "model.layers.33.input_layernorm.weight": "model-00003-of-00004.safetensors",
307
+ "model.layers.33.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
308
+ "model.layers.33.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
309
+ "model.layers.33.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
310
+ "model.layers.33.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
311
+ "model.layers.33.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
312
+ "model.layers.33.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
313
+ "model.layers.33.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
314
+ "model.layers.33.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
315
+ "model.layers.33.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
316
+ "model.layers.33.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
317
+ "model.layers.34.input_layernorm.weight": "model-00003-of-00004.safetensors",
318
+ "model.layers.34.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
319
+ "model.layers.34.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
320
+ "model.layers.34.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
321
+ "model.layers.34.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
322
+ "model.layers.34.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
323
+ "model.layers.34.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
324
+ "model.layers.34.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
325
+ "model.layers.34.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
326
+ "model.layers.34.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
327
+ "model.layers.34.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
328
+ "model.layers.35.input_layernorm.weight": "model-00004-of-00004.safetensors",
329
+ "model.layers.35.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
330
+ "model.layers.35.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
331
+ "model.layers.35.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
332
+ "model.layers.35.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
333
+ "model.layers.35.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
334
+ "model.layers.35.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
335
+ "model.layers.35.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
336
+ "model.layers.35.self_attn.q_norm.weight": "model-00004-of-00004.safetensors",
337
+ "model.layers.35.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
338
+ "model.layers.35.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
339
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
340
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
341
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
342
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
343
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
344
+ "model.layers.4.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
345
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
346
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
347
+ "model.layers.4.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
348
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
349
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
350
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
351
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
352
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
353
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
354
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
355
+ "model.layers.5.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
356
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
357
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
358
+ "model.layers.5.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
359
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
360
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
361
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
362
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
363
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
364
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
365
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
366
+ "model.layers.6.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
367
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
368
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
369
+ "model.layers.6.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
370
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
371
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
372
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
373
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
374
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
375
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
376
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
377
+ "model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
378
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
379
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
380
+ "model.layers.7.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
381
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
382
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
383
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
384
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
385
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
386
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
387
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
388
+ "model.layers.8.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
389
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
390
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
391
+ "model.layers.8.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
392
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
393
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
394
+ "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
395
+ "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
396
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
397
+ "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
398
+ "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
399
+ "model.layers.9.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
400
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
401
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
402
+ "model.layers.9.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
403
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
404
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
405
+ "model.norm.weight": "model-00004-of-00004.safetensors"
406
+ }
407
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
tokenizer_config.json ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
+ }
213
+ },
214
+ "additional_special_tokens": [
215
+ "<|im_start|>",
216
+ "<|im_end|>",
217
+ "<|object_ref_start|>",
218
+ "<|object_ref_end|>",
219
+ "<|box_start|>",
220
+ "<|box_end|>",
221
+ "<|quad_start|>",
222
+ "<|quad_end|>",
223
+ "<|vision_start|>",
224
+ "<|vision_end|>",
225
+ "<|vision_pad|>",
226
+ "<|image_pad|>",
227
+ "<|video_pad|>"
228
+ ],
229
+ "bos_token": null,
230
+ "clean_up_tokenization_spaces": false,
231
+ "eos_token": "<|im_end|>",
232
+ "errors": "replace",
233
+ "extra_special_tokens": {},
234
+ "model_max_length": 131072,
235
+ "pad_token": "<|endoftext|>",
236
+ "padding_side": "right",
237
+ "split_special_tokens": false,
238
+ "tokenizer_class": "Qwen2Tokenizer",
239
+ "unk_token": null
240
+ }
trainer_state.json ADDED
@@ -0,0 +1,2594 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.33613445378151263,
6
+ "eval_steps": 10.0,
7
+ "global_step": 160,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "completions/clipped_ratio": 0.0,
14
+ "completions/max_length": 1342.0,
15
+ "completions/mean_length": 355.04168701171875,
16
+ "completions/min_length": 198.0,
17
+ "epoch": 0.0021008403361344537,
18
+ "frac_reward_zero_std": 0.0,
19
+ "grad_norm": 0.021037648231048197,
20
+ "learning_rate": 4.999945550452306e-07,
21
+ "loss": -4.4948080812901026e-09,
22
+ "reward": 0.7855739593505859,
23
+ "reward_std": 0.24952110648155212,
24
+ "rewards/Format/mean": 0.7855738401412964,
25
+ "rewards/Format/std": 0.43342384696006775,
26
+ "step": 1
27
+ },
28
+ {
29
+ "completions/clipped_ratio": 0.0,
30
+ "completions/max_length": 1342.0,
31
+ "completions/mean_length": 302.2351379394531,
32
+ "completions/min_length": 199.0,
33
+ "epoch": 0.004201680672268907,
34
+ "frac_reward_zero_std": 0.0,
35
+ "grad_norm": 0.022925960697215003,
36
+ "learning_rate": 4.999782204181026e-07,
37
+ "loss": -8.022956521358537e-09,
38
+ "reward": 0.7519891262054443,
39
+ "reward_std": 0.23649102449417114,
40
+ "rewards/Format/mean": 0.7519891262054443,
41
+ "rewards/Format/std": 0.36793601512908936,
42
+ "step": 2
43
+ },
44
+ {
45
+ "completions/clipped_ratio": 0.0,
46
+ "completions/max_length": 1342.0,
47
+ "completions/mean_length": 344.3095397949219,
48
+ "completions/min_length": 198.0,
49
+ "epoch": 0.0063025210084033615,
50
+ "frac_reward_zero_std": 0.0,
51
+ "grad_norm": 0.02045542358968525,
52
+ "learning_rate": 4.999509968301466e-07,
53
+ "loss": -2.761371433734894e-07,
54
+ "reward": 0.7871310114860535,
55
+ "reward_std": 0.21801209449768066,
56
+ "rewards/Format/mean": 0.7871310114860535,
57
+ "rewards/Format/std": 0.4382215142250061,
58
+ "step": 3
59
+ },
60
+ {
61
+ "completions/clipped_ratio": 0.0,
62
+ "completions/max_length": 768.0,
63
+ "completions/mean_length": 312.3422546386719,
64
+ "completions/min_length": 198.0,
65
+ "epoch": 0.008403361344537815,
66
+ "frac_reward_zero_std": 0.0,
67
+ "grad_norm": 0.021377196413974633,
68
+ "learning_rate": 4.999128854672122e-07,
69
+ "loss": -4.6122656804925555e-09,
70
+ "reward": 0.7807313203811646,
71
+ "reward_std": 0.22699780762195587,
72
+ "rewards/Format/mean": 0.7807313203811646,
73
+ "rewards/Format/std": 0.43211880326271057,
74
+ "step": 4
75
+ },
76
+ {
77
+ "completions/clipped_ratio": 0.0,
78
+ "completions/max_length": 768.0,
79
+ "completions/mean_length": 337.4970397949219,
80
+ "completions/min_length": 198.0,
81
+ "epoch": 0.01050420168067227,
82
+ "frac_reward_zero_std": 0.0,
83
+ "grad_norm": 0.021104436102075432,
84
+ "learning_rate": 4.998638879894165e-07,
85
+ "loss": 2.6707615674581575e-08,
86
+ "reward": 0.773746132850647,
87
+ "reward_std": 0.22035112977027893,
88
+ "rewards/Format/mean": 0.773746132850647,
89
+ "rewards/Format/std": 0.44727665185928345,
90
+ "step": 5
91
+ },
92
+ {
93
+ "completions/clipped_ratio": 0.0,
94
+ "completions/max_length": 687.0,
95
+ "completions/mean_length": 315.9077453613281,
96
+ "completions/min_length": 198.0,
97
+ "epoch": 0.012605042016806723,
98
+ "frac_reward_zero_std": 0.0,
99
+ "grad_norm": 0.02226229034518054,
100
+ "learning_rate": 4.998040065310719e-07,
101
+ "loss": 1.478086630868347e-07,
102
+ "reward": 0.7133380174636841,
103
+ "reward_std": 0.2343178689479828,
104
+ "rewards/Format/mean": 0.7133380174636841,
105
+ "rewards/Format/std": 0.4145345687866211,
106
+ "step": 6
107
+ },
108
+ {
109
+ "completions/clipped_ratio": 0.0,
110
+ "completions/max_length": 768.0,
111
+ "completions/mean_length": 308.1547546386719,
112
+ "completions/min_length": 198.0,
113
+ "epoch": 0.014705882352941176,
114
+ "frac_reward_zero_std": 0.0,
115
+ "grad_norm": 0.021533796559532928,
116
+ "learning_rate": 4.997332437005931e-07,
117
+ "loss": 2.4917730101492452e-08,
118
+ "reward": 0.7322717308998108,
119
+ "reward_std": 0.20535993576049805,
120
+ "rewards/Format/mean": 0.732271671295166,
121
+ "rewards/Format/std": 0.4378887414932251,
122
+ "step": 7
123
+ },
124
+ {
125
+ "completions/clipped_ratio": 0.0,
126
+ "completions/max_length": 768.0,
127
+ "completions/mean_length": 313.3095397949219,
128
+ "completions/min_length": 199.0,
129
+ "epoch": 0.01680672268907563,
130
+ "frac_reward_zero_std": 0.0,
131
+ "grad_norm": 0.021304344086945183,
132
+ "learning_rate": 4.996516025803834e-07,
133
+ "loss": 1.3737007975578308e-08,
134
+ "reward": 0.7182475924491882,
135
+ "reward_std": 0.23242181539535522,
136
+ "rewards/Format/mean": 0.7182475328445435,
137
+ "rewards/Format/std": 0.44208911061286926,
138
+ "step": 8
139
+ },
140
+ {
141
+ "completions/clipped_ratio": 0.0,
142
+ "completions/max_length": 768.0,
143
+ "completions/mean_length": 317.15179443359375,
144
+ "completions/min_length": 198.0,
145
+ "epoch": 0.018907563025210083,
146
+ "frac_reward_zero_std": 0.0,
147
+ "grad_norm": 0.02017258006861427,
148
+ "learning_rate": 4.995590867267005e-07,
149
+ "loss": 7.085812825380344e-08,
150
+ "reward": 0.7011752724647522,
151
+ "reward_std": 0.20264199376106262,
152
+ "rewards/Format/mean": 0.7011752724647522,
153
+ "rewards/Format/std": 0.4038677513599396,
154
+ "step": 9
155
+ },
156
+ {
157
+ "completions/clipped_ratio": 0.0,
158
+ "completions/max_length": 768.0,
159
+ "completions/mean_length": 321.8422546386719,
160
+ "completions/min_length": 159.0,
161
+ "epoch": 0.02100840336134454,
162
+ "frac_reward_zero_std": 0.0,
163
+ "grad_norm": 0.020625774151377174,
164
+ "learning_rate": 4.994557001695013e-07,
165
+ "loss": -2.6154642895903635e-08,
166
+ "reward": 0.7294002175331116,
167
+ "reward_std": 0.21035045385360718,
168
+ "rewards/Format/mean": 0.729400098323822,
169
+ "rewards/Format/std": 0.4142078161239624,
170
+ "step": 10
171
+ },
172
+ {
173
+ "completions/clipped_ratio": 0.0,
174
+ "completions/max_length": 768.0,
175
+ "completions/mean_length": 356.3958435058594,
176
+ "completions/min_length": 198.0,
177
+ "epoch": 0.023109243697478993,
178
+ "frac_reward_zero_std": 0.0,
179
+ "grad_norm": 0.019654160962126983,
180
+ "learning_rate": 4.993414474122671e-07,
181
+ "loss": -2.0527902933054065e-08,
182
+ "reward": 0.8128222823143005,
183
+ "reward_std": 0.20116277039051056,
184
+ "rewards/Format/mean": 0.812822163105011,
185
+ "rewards/Format/std": 0.40243202447891235,
186
+ "step": 11
187
+ },
188
+ {
189
+ "completions/clipped_ratio": 0.0,
190
+ "completions/max_length": 768.0,
191
+ "completions/mean_length": 323.87799072265625,
192
+ "completions/min_length": 198.0,
193
+ "epoch": 0.025210084033613446,
194
+ "frac_reward_zero_std": 0.0,
195
+ "grad_norm": 0.020754328818283256,
196
+ "learning_rate": 4.992163334318065e-07,
197
+ "loss": -1.482355127535584e-08,
198
+ "reward": 0.6890013813972473,
199
+ "reward_std": 0.21107301115989685,
200
+ "rewards/Format/mean": 0.6890013217926025,
201
+ "rewards/Format/std": 0.39970317482948303,
202
+ "step": 12
203
+ },
204
+ {
205
+ "completions/clipped_ratio": 0.0,
206
+ "completions/max_length": 1342.0,
207
+ "completions/mean_length": 319.3035888671875,
208
+ "completions/min_length": 198.0,
209
+ "epoch": 0.0273109243697479,
210
+ "frac_reward_zero_std": 0.0,
211
+ "grad_norm": 0.020993633732131523,
212
+ "learning_rate": 4.990803636780392e-07,
213
+ "loss": 5.5336084159307575e-08,
214
+ "reward": 0.7208616137504578,
215
+ "reward_std": 0.18355540931224823,
216
+ "rewards/Format/mean": 0.7208616137504578,
217
+ "rewards/Format/std": 0.3905758857727051,
218
+ "step": 13
219
+ },
220
+ {
221
+ "completions/clipped_ratio": 0.0,
222
+ "completions/max_length": 1342.0,
223
+ "completions/mean_length": 327.8809509277344,
224
+ "completions/min_length": 198.0,
225
+ "epoch": 0.029411764705882353,
226
+ "frac_reward_zero_std": 0.0,
227
+ "grad_norm": 0.02060347390098513,
228
+ "learning_rate": 4.989335440737586e-07,
229
+ "loss": -6.441648103816533e-09,
230
+ "reward": 0.783822774887085,
231
+ "reward_std": 0.21424761414527893,
232
+ "rewards/Format/mean": 0.7838225364685059,
233
+ "rewards/Format/std": 0.40218111872673035,
234
+ "step": 14
235
+ },
236
+ {
237
+ "completions/clipped_ratio": 0.0,
238
+ "completions/max_length": 1342.0,
239
+ "completions/mean_length": 326.9345397949219,
240
+ "completions/min_length": 198.0,
241
+ "epoch": 0.031512605042016806,
242
+ "frac_reward_zero_std": 0.0,
243
+ "grad_norm": 0.020836389129103162,
244
+ "learning_rate": 4.987758810143735e-07,
245
+ "loss": -1.8005570368018198e-08,
246
+ "reward": 0.7345045208930969,
247
+ "reward_std": 0.18862709403038025,
248
+ "rewards/Format/mean": 0.7345045208930969,
249
+ "rewards/Format/std": 0.4182124137878418,
250
+ "step": 15
251
+ },
252
+ {
253
+ "completions/clipped_ratio": 0.0,
254
+ "completions/max_length": 1342.0,
255
+ "completions/mean_length": 295.19940185546875,
256
+ "completions/min_length": 198.0,
257
+ "epoch": 0.03361344537815126,
258
+ "frac_reward_zero_std": 0.0,
259
+ "grad_norm": 0.022330910640345463,
260
+ "learning_rate": 4.986073813676296e-07,
261
+ "loss": -1.6293157045765838e-07,
262
+ "reward": 0.8834701776504517,
263
+ "reward_std": 0.16254234313964844,
264
+ "rewards/Format/mean": 0.8834701776504517,
265
+ "rewards/Format/std": 0.43395134806632996,
266
+ "step": 16
267
+ },
268
+ {
269
+ "completions/clipped_ratio": 0.0,
270
+ "completions/max_length": 687.0,
271
+ "completions/mean_length": 312.9583435058594,
272
+ "completions/min_length": 198.0,
273
+ "epoch": 0.03571428571428571,
274
+ "frac_reward_zero_std": 0.0,
275
+ "grad_norm": 0.021047264068784488,
276
+ "learning_rate": 4.984280524733107e-07,
277
+ "loss": 2.6620304183211374e-08,
278
+ "reward": 0.784546434879303,
279
+ "reward_std": 0.21301837265491486,
280
+ "rewards/Format/mean": 0.784546434879303,
281
+ "rewards/Format/std": 0.3901645541191101,
282
+ "step": 17
283
+ },
284
+ {
285
+ "completions/clipped_ratio": 0.0,
286
+ "completions/max_length": 768.0,
287
+ "completions/mean_length": 288.04168701171875,
288
+ "completions/min_length": 198.0,
289
+ "epoch": 0.037815126050420166,
290
+ "frac_reward_zero_std": 0.0,
291
+ "grad_norm": 0.021946993516431364,
292
+ "learning_rate": 4.982379021429183e-07,
293
+ "loss": 2.041148761122713e-08,
294
+ "reward": 0.7687810659408569,
295
+ "reward_std": 0.20664972066879272,
296
+ "rewards/Format/mean": 0.7687809467315674,
297
+ "rewards/Format/std": 0.42514485120773315,
298
+ "step": 18
299
+ },
300
+ {
301
+ "completions/clipped_ratio": 0.0,
302
+ "completions/max_length": 686.0,
303
+ "completions/mean_length": 293.889892578125,
304
+ "completions/min_length": 198.0,
305
+ "epoch": 0.03991596638655462,
306
+ "frac_reward_zero_std": 0.0,
307
+ "grad_norm": 0.020959933687470466,
308
+ "learning_rate": 4.980369386593323e-07,
309
+ "loss": -1.2580616726154403e-07,
310
+ "reward": 0.7214851975440979,
311
+ "reward_std": 0.2365146428346634,
312
+ "rewards/Format/mean": 0.7214851975440979,
313
+ "rewards/Format/std": 0.401768296957016,
314
+ "step": 19
315
+ },
316
+ {
317
+ "completions/clipped_ratio": 0.0,
318
+ "completions/max_length": 768.0,
319
+ "completions/mean_length": 285.31549072265625,
320
+ "completions/min_length": 198.0,
321
+ "epoch": 0.04201680672268908,
322
+ "frac_reward_zero_std": 0.0,
323
+ "grad_norm": 0.022296633634109067,
324
+ "learning_rate": 4.978251707764491e-07,
325
+ "loss": 4.093384831094227e-08,
326
+ "reward": 0.8078346252441406,
327
+ "reward_std": 0.2158266305923462,
328
+ "rewards/Format/mean": 0.8078346252441406,
329
+ "rewards/Format/std": 0.42168736457824707,
330
+ "step": 20
331
+ },
332
+ {
333
+ "completions/clipped_ratio": 0.0,
334
+ "completions/max_length": 1506.0,
335
+ "completions/mean_length": 323.58929443359375,
336
+ "completions/min_length": 160.0,
337
+ "epoch": 0.04411764705882353,
338
+ "frac_reward_zero_std": 0.0,
339
+ "grad_norm": 0.023742562097239708,
340
+ "learning_rate": 4.976026077188012e-07,
341
+ "loss": 2.5494955480098724e-08,
342
+ "reward": 0.8054117560386658,
343
+ "reward_std": 0.22615350782871246,
344
+ "rewards/Format/mean": 0.8054117560386658,
345
+ "rewards/Format/std": 0.42718493938446045,
346
+ "step": 21
347
+ },
348
+ {
349
+ "completions/clipped_ratio": 0.0,
350
+ "completions/max_length": 768.0,
351
+ "completions/mean_length": 293.1696472167969,
352
+ "completions/min_length": 199.0,
353
+ "epoch": 0.046218487394957986,
354
+ "frac_reward_zero_std": 0.0,
355
+ "grad_norm": 0.022976623854574853,
356
+ "learning_rate": 4.973692591811548e-07,
357
+ "loss": -2.1614445344653177e-08,
358
+ "reward": 0.7918370962142944,
359
+ "reward_std": 0.22367800772190094,
360
+ "rewards/Format/mean": 0.7918370366096497,
361
+ "rewards/Format/std": 0.40376970171928406,
362
+ "step": 22
363
+ },
364
+ {
365
+ "completions/clipped_ratio": 0.0,
366
+ "completions/max_length": 768.0,
367
+ "completions/mean_length": 313.16070556640625,
368
+ "completions/min_length": 198.0,
369
+ "epoch": 0.04831932773109244,
370
+ "frac_reward_zero_std": 0.0,
371
+ "grad_norm": 0.019838098652474632,
372
+ "learning_rate": 4.971251353280878e-07,
373
+ "loss": -2.7066562324762344e-08,
374
+ "reward": 0.7928844690322876,
375
+ "reward_std": 0.2446475327014923,
376
+ "rewards/Format/mean": 0.7928844690322876,
377
+ "rewards/Format/std": 0.43465301394462585,
378
+ "step": 23
379
+ },
380
+ {
381
+ "completions/clipped_ratio": 0.0,
382
+ "completions/max_length": 1342.0,
383
+ "completions/mean_length": 326.0714416503906,
384
+ "completions/min_length": 199.0,
385
+ "epoch": 0.05042016806722689,
386
+ "frac_reward_zero_std": 0.0,
387
+ "grad_norm": 0.02218711944758538,
388
+ "learning_rate": 4.968702467935469e-07,
389
+ "loss": -3.345000365584383e-08,
390
+ "reward": 0.7154150605201721,
391
+ "reward_std": 0.245140939950943,
392
+ "rewards/Format/mean": 0.7154150605201721,
393
+ "rewards/Format/std": 0.4379575848579407,
394
+ "step": 24
395
+ },
396
+ {
397
+ "completions/clipped_ratio": 0.0,
398
+ "completions/max_length": 769.0,
399
+ "completions/mean_length": 340.9821472167969,
400
+ "completions/min_length": 198.0,
401
+ "epoch": 0.052521008403361345,
402
+ "frac_reward_zero_std": 0.0,
403
+ "grad_norm": 0.020876166915487417,
404
+ "learning_rate": 4.966046046803842e-07,
405
+ "loss": -7.101334631443024e-09,
406
+ "reward": 0.8230172991752625,
407
+ "reward_std": 0.18825757503509521,
408
+ "rewards/Format/mean": 0.8230172991752625,
409
+ "rewards/Format/std": 0.42464303970336914,
410
+ "step": 25
411
+ },
412
+ {
413
+ "completions/clipped_ratio": 0.0,
414
+ "completions/max_length": 687.0,
415
+ "completions/mean_length": 265.13690185546875,
416
+ "completions/min_length": 198.0,
417
+ "epoch": 0.0546218487394958,
418
+ "frac_reward_zero_std": 0.0,
419
+ "grad_norm": 0.02215521070006038,
420
+ "learning_rate": 4.963282205598744e-07,
421
+ "loss": -4.260024866198364e-07,
422
+ "reward": 0.7306621670722961,
423
+ "reward_std": 0.2076164036989212,
424
+ "rewards/Format/mean": 0.7306621074676514,
425
+ "rewards/Format/std": 0.43059706687927246,
426
+ "step": 26
427
+ },
428
+ {
429
+ "completions/clipped_ratio": 0.0,
430
+ "completions/max_length": 769.0,
431
+ "completions/mean_length": 315.25299072265625,
432
+ "completions/min_length": 198.0,
433
+ "epoch": 0.05672268907563025,
434
+ "frac_reward_zero_std": 0.0,
435
+ "grad_norm": 0.02286620326069718,
436
+ "learning_rate": 4.960411064712094e-07,
437
+ "loss": 3.905924472746847e-08,
438
+ "reward": 0.731938898563385,
439
+ "reward_std": 0.18439903855323792,
440
+ "rewards/Format/mean": 0.731938898563385,
441
+ "rewards/Format/std": 0.4483509063720703,
442
+ "step": 27
443
+ },
444
+ {
445
+ "completions/clipped_ratio": 0.0,
446
+ "completions/max_length": 768.0,
447
+ "completions/mean_length": 320.06549072265625,
448
+ "completions/min_length": 198.0,
449
+ "epoch": 0.058823529411764705,
450
+ "frac_reward_zero_std": 0.0,
451
+ "grad_norm": 0.023622422761631585,
452
+ "learning_rate": 4.957432749209755e-07,
453
+ "loss": 3.764095524161348e-08,
454
+ "reward": 0.6645475029945374,
455
+ "reward_std": 0.21483086049556732,
456
+ "rewards/Format/mean": 0.6645474433898926,
457
+ "rewards/Format/std": 0.48270633816719055,
458
+ "step": 28
459
+ },
460
+ {
461
+ "completions/clipped_ratio": 0.0,
462
+ "completions/max_length": 768.0,
463
+ "completions/mean_length": 314.22320556640625,
464
+ "completions/min_length": 199.0,
465
+ "epoch": 0.06092436974789916,
466
+ "frac_reward_zero_std": 0.0,
467
+ "grad_norm": 0.02371833835622164,
468
+ "learning_rate": 4.954347388826068e-07,
469
+ "loss": 4.532436648219118e-08,
470
+ "reward": 0.76105135679245,
471
+ "reward_std": 0.17552907764911652,
472
+ "rewards/Format/mean": 0.7610514760017395,
473
+ "rewards/Format/std": 0.4020070731639862,
474
+ "step": 29
475
+ },
476
+ {
477
+ "completions/clipped_ratio": 0.0,
478
+ "completions/max_length": 1506.0,
479
+ "completions/mean_length": 334.9077453613281,
480
+ "completions/min_length": 199.0,
481
+ "epoch": 0.06302521008403361,
482
+ "frac_reward_zero_std": 0.0,
483
+ "grad_norm": 0.023203664004931217,
484
+ "learning_rate": 4.951155117958216e-07,
485
+ "loss": 1.6763806343078613e-08,
486
+ "reward": 0.7118187546730042,
487
+ "reward_std": 0.22034096717834473,
488
+ "rewards/Format/mean": 0.7118187546730042,
489
+ "rewards/Format/std": 0.38407468795776367,
490
+ "step": 30
491
+ },
492
+ {
493
+ "completions/clipped_ratio": 0.0,
494
+ "completions/max_length": 768.0,
495
+ "completions/mean_length": 280.4702453613281,
496
+ "completions/min_length": 160.0,
497
+ "epoch": 0.06512605042016807,
498
+ "frac_reward_zero_std": 0.0,
499
+ "grad_norm": 0.023918645212801567,
500
+ "learning_rate": 4.947856075660365e-07,
501
+ "loss": -1.1688098311424255e-07,
502
+ "reward": 0.7921440005302429,
503
+ "reward_std": 0.22425949573516846,
504
+ "rewards/Format/mean": 0.7921439409255981,
505
+ "rewards/Format/std": 0.4136338233947754,
506
+ "step": 31
507
+ },
508
+ {
509
+ "completions/clipped_ratio": 0.0,
510
+ "completions/max_length": 1342.0,
511
+ "completions/mean_length": 324.1160888671875,
512
+ "completions/min_length": 199.0,
513
+ "epoch": 0.06722689075630252,
514
+ "frac_reward_zero_std": 0.0,
515
+ "grad_norm": 0.023742767036121413,
516
+ "learning_rate": 4.944450405637601e-07,
517
+ "loss": -1.1253481702055979e-08,
518
+ "reward": 0.8076608180999756,
519
+ "reward_std": 0.2165224552154541,
520
+ "rewards/Format/mean": 0.8076607584953308,
521
+ "rewards/Format/std": 0.44222164154052734,
522
+ "step": 32
523
+ },
524
+ {
525
+ "completions/clipped_ratio": 0.0,
526
+ "completions/max_length": 1506.0,
527
+ "completions/mean_length": 332.67559814453125,
528
+ "completions/min_length": 120.0,
529
+ "epoch": 0.06932773109243698,
530
+ "frac_reward_zero_std": 0.0,
531
+ "grad_norm": 0.02209550077274739,
532
+ "learning_rate": 4.940938256239681e-07,
533
+ "loss": 2.1358331991905288e-07,
534
+ "reward": 0.7858653664588928,
535
+ "reward_std": 0.2149583101272583,
536
+ "rewards/Format/mean": 0.785865306854248,
537
+ "rewards/Format/std": 0.4652771055698395,
538
+ "step": 33
539
+ },
540
+ {
541
+ "completions/clipped_ratio": 0.0,
542
+ "completions/max_length": 769.0,
543
+ "completions/mean_length": 319.1011962890625,
544
+ "completions/min_length": 120.0,
545
+ "epoch": 0.07142857142857142,
546
+ "frac_reward_zero_std": 0.0,
547
+ "grad_norm": 0.021654820418371478,
548
+ "learning_rate": 4.937319780454559e-07,
549
+ "loss": -7.737738627611179e-08,
550
+ "reward": 0.7102841734886169,
551
+ "reward_std": 0.256997287273407,
552
+ "rewards/Format/mean": 0.7102841734886169,
553
+ "rewards/Format/std": 0.424626886844635,
554
+ "step": 34
555
+ },
556
+ {
557
+ "completions/clipped_ratio": 0.0,
558
+ "completions/max_length": 768.0,
559
+ "completions/mean_length": 315.98809814453125,
560
+ "completions/min_length": 120.0,
561
+ "epoch": 0.07352941176470588,
562
+ "frac_reward_zero_std": 0.0,
563
+ "grad_norm": 0.021693853262730635,
564
+ "learning_rate": 4.933595135901732e-07,
565
+ "loss": 7.966688997385063e-08,
566
+ "reward": 0.7334724068641663,
567
+ "reward_std": 0.2136024832725525,
568
+ "rewards/Format/mean": 0.7334724068641663,
569
+ "rewards/Format/std": 0.4270455837249756,
570
+ "step": 35
571
+ },
572
+ {
573
+ "completions/clipped_ratio": 0.0,
574
+ "completions/max_length": 768.0,
575
+ "completions/mean_length": 303.7053527832031,
576
+ "completions/min_length": 120.0,
577
+ "epoch": 0.07563025210084033,
578
+ "frac_reward_zero_std": 0.0,
579
+ "grad_norm": 0.02198882336822173,
580
+ "learning_rate": 4.929764484825369e-07,
581
+ "loss": 1.1862721294164658e-07,
582
+ "reward": 0.7203904986381531,
583
+ "reward_std": 0.21221692860126495,
584
+ "rewards/Format/mean": 0.7203904986381531,
585
+ "rewards/Format/std": 0.44525712728500366,
586
+ "step": 36
587
+ },
588
+ {
589
+ "completions/clipped_ratio": 0.0,
590
+ "completions/max_length": 1342.0,
591
+ "completions/mean_length": 324.9821472167969,
592
+ "completions/min_length": 160.0,
593
+ "epoch": 0.07773109243697479,
594
+ "frac_reward_zero_std": 0.0,
595
+ "grad_norm": 0.021317827313861167,
596
+ "learning_rate": 4.925827994087244e-07,
597
+ "loss": 7.792065730427566e-07,
598
+ "reward": 0.804713785648346,
599
+ "reward_std": 0.25957217812538147,
600
+ "rewards/Format/mean": 0.8047137260437012,
601
+ "rewards/Format/std": 0.4052075445652008,
602
+ "step": 37
603
+ },
604
+ {
605
+ "completions/clipped_ratio": 0.0,
606
+ "completions/max_length": 768.0,
607
+ "completions/mean_length": 282.6785888671875,
608
+ "completions/min_length": 199.0,
609
+ "epoch": 0.07983193277310924,
610
+ "frac_reward_zero_std": 0.0,
611
+ "grad_norm": 0.023450792607016036,
612
+ "learning_rate": 4.921785835159471e-07,
613
+ "loss": -2.578599378466606e-08,
614
+ "reward": 0.8093736171722412,
615
+ "reward_std": 0.2329341322183609,
616
+ "rewards/Format/mean": 0.809373676776886,
617
+ "rewards/Format/std": 0.3801536560058594,
618
+ "step": 38
619
+ },
620
+ {
621
+ "completions/clipped_ratio": 0.0,
622
+ "completions/max_length": 768.0,
623
+ "completions/mean_length": 336.31549072265625,
624
+ "completions/min_length": 198.0,
625
+ "epoch": 0.0819327731092437,
626
+ "frac_reward_zero_std": 0.0,
627
+ "grad_norm": 0.019551884825023486,
628
+ "learning_rate": 4.917638184117029e-07,
629
+ "loss": 7.106878285867424e-08,
630
+ "reward": 0.7056736946105957,
631
+ "reward_std": 0.23160889744758606,
632
+ "rewards/Format/mean": 0.7056736946105957,
633
+ "rewards/Format/std": 0.393891841173172,
634
+ "step": 39
635
+ },
636
+ {
637
+ "completions/clipped_ratio": 0.0,
638
+ "completions/max_length": 687.0,
639
+ "completions/mean_length": 295.9940490722656,
640
+ "completions/min_length": 160.0,
641
+ "epoch": 0.08403361344537816,
642
+ "frac_reward_zero_std": 0.0,
643
+ "grad_norm": 0.022024929736522203,
644
+ "learning_rate": 4.913385221630096e-07,
645
+ "loss": -5.975986816508794e-09,
646
+ "reward": 0.7073485851287842,
647
+ "reward_std": 0.19632889330387115,
648
+ "rewards/Format/mean": 0.7073485851287842,
649
+ "rewards/Format/std": 0.42258939146995544,
650
+ "step": 40
651
+ },
652
+ {
653
+ "completions/clipped_ratio": 0.0,
654
+ "completions/max_length": 768.0,
655
+ "completions/mean_length": 293.9761962890625,
656
+ "completions/min_length": 160.0,
657
+ "epoch": 0.0861344537815126,
658
+ "frac_reward_zero_std": 0.0,
659
+ "grad_norm": 0.02232842099342926,
660
+ "learning_rate": 4.90902713295618e-07,
661
+ "loss": 2.5068100484304523e-08,
662
+ "reward": 0.7132770419120789,
663
+ "reward_std": 0.19586697220802307,
664
+ "rewards/Format/mean": 0.7132770419120789,
665
+ "rewards/Format/std": 0.42476534843444824,
666
+ "step": 41
667
+ },
668
+ {
669
+ "completions/clipped_ratio": 0.0,
670
+ "completions/max_length": 768.0,
671
+ "completions/mean_length": 311.27679443359375,
672
+ "completions/min_length": 198.0,
673
+ "epoch": 0.08823529411764706,
674
+ "frac_reward_zero_std": 0.0,
675
+ "grad_norm": 0.02140426288104263,
676
+ "learning_rate": 4.904564107932048e-07,
677
+ "loss": -1.008932848378663e-08,
678
+ "reward": 0.7778394222259521,
679
+ "reward_std": 0.21497690677642822,
680
+ "rewards/Format/mean": 0.7778393626213074,
681
+ "rewards/Format/std": 0.37954428791999817,
682
+ "step": 42
683
+ },
684
+ {
685
+ "completions/clipped_ratio": 0.0,
686
+ "completions/max_length": 1506.0,
687
+ "completions/mean_length": 315.7083435058594,
688
+ "completions/min_length": 198.0,
689
+ "epoch": 0.09033613445378151,
690
+ "frac_reward_zero_std": 0.0,
691
+ "grad_norm": 0.022736832042720416,
692
+ "learning_rate": 4.899996340965453e-07,
693
+ "loss": -1.0710209608078003e-08,
694
+ "reward": 0.755723774433136,
695
+ "reward_std": 0.24569866061210632,
696
+ "rewards/Format/mean": 0.7557238340377808,
697
+ "rewards/Format/std": 0.4109819829463959,
698
+ "step": 43
699
+ },
700
+ {
701
+ "completions/clipped_ratio": 0.0,
702
+ "completions/max_length": 768.0,
703
+ "completions/mean_length": 344.2113037109375,
704
+ "completions/min_length": 199.0,
705
+ "epoch": 0.09243697478991597,
706
+ "frac_reward_zero_std": 0.0,
707
+ "grad_norm": 0.02138771387814232,
708
+ "learning_rate": 4.89532403102667e-07,
709
+ "loss": -1.4513110713210153e-08,
710
+ "reward": 0.7406675219535828,
711
+ "reward_std": 0.21497930586338043,
712
+ "rewards/Format/mean": 0.7406675815582275,
713
+ "rewards/Format/std": 0.4190816879272461,
714
+ "step": 44
715
+ },
716
+ {
717
+ "completions/clipped_ratio": 0.0,
718
+ "completions/max_length": 768.0,
719
+ "completions/mean_length": 309.27679443359375,
720
+ "completions/min_length": 159.0,
721
+ "epoch": 0.09453781512605042,
722
+ "frac_reward_zero_std": 0.0,
723
+ "grad_norm": 0.022871068366824305,
724
+ "learning_rate": 4.890547381639833e-07,
725
+ "loss": -7.729977369308472e-08,
726
+ "reward": 0.7762974500656128,
727
+ "reward_std": 0.21736590564250946,
728
+ "rewards/Format/mean": 0.7762974500656128,
729
+ "rewards/Format/std": 0.4111401438713074,
730
+ "step": 45
731
+ },
732
+ {
733
+ "completions/clipped_ratio": 0.0,
734
+ "completions/max_length": 1342.0,
735
+ "completions/mean_length": 343.5535888671875,
736
+ "completions/min_length": 197.0,
737
+ "epoch": 0.09663865546218488,
738
+ "frac_reward_zero_std": 0.0,
739
+ "grad_norm": 0.02093852191820869,
740
+ "learning_rate": 4.885666600874058e-07,
741
+ "loss": 2.7900873078579025e-07,
742
+ "reward": 0.854030966758728,
743
+ "reward_std": 0.18537142872810364,
744
+ "rewards/Format/mean": 0.854030966758728,
745
+ "rewards/Format/std": 0.4128037691116333,
746
+ "step": 46
747
+ },
748
+ {
749
+ "completions/clipped_ratio": 0.0,
750
+ "completions/max_length": 768.0,
751
+ "completions/mean_length": 296.8452453613281,
752
+ "completions/min_length": 198.0,
753
+ "epoch": 0.09873949579831932,
754
+ "frac_reward_zero_std": 0.0,
755
+ "grad_norm": 0.023946826754021152,
756
+ "learning_rate": 4.88068190133439e-07,
757
+ "loss": -2.1082261270066738e-08,
758
+ "reward": 0.7467535138130188,
759
+ "reward_std": 0.2100360095500946,
760
+ "rewards/Format/mean": 0.7467535138130188,
761
+ "rewards/Format/std": 0.439401239156723,
762
+ "step": 47
763
+ },
764
+ {
765
+ "completions/clipped_ratio": 0.0,
766
+ "completions/max_length": 768.0,
767
+ "completions/mean_length": 298.8601379394531,
768
+ "completions/min_length": 159.0,
769
+ "epoch": 0.10084033613445378,
770
+ "frac_reward_zero_std": 0.0,
771
+ "grad_norm": 0.024341956446782127,
772
+ "learning_rate": 4.875593500152537e-07,
773
+ "loss": -9.693516034303684e-08,
774
+ "reward": 0.8396908044815063,
775
+ "reward_std": 0.2241268903017044,
776
+ "rewards/Format/mean": 0.8396908044815063,
777
+ "rewards/Format/std": 0.44729915261268616,
778
+ "step": 48
779
+ },
780
+ {
781
+ "completions/clipped_ratio": 0.0,
782
+ "completions/max_length": 768.0,
783
+ "completions/mean_length": 349.0922546386719,
784
+ "completions/min_length": 198.0,
785
+ "epoch": 0.10294117647058823,
786
+ "frac_reward_zero_std": 0.0,
787
+ "grad_norm": 0.021906888076953825,
788
+ "learning_rate": 4.870401618977415e-07,
789
+ "loss": -8.692344266592045e-08,
790
+ "reward": 0.7963646650314331,
791
+ "reward_std": 0.2095474749803543,
792
+ "rewards/Format/mean": 0.7963647246360779,
793
+ "rewards/Format/std": 0.4469922184944153,
794
+ "step": 49
795
+ },
796
+ {
797
+ "completions/clipped_ratio": 0.0,
798
+ "completions/max_length": 768.0,
799
+ "completions/mean_length": 312.8988037109375,
800
+ "completions/min_length": 197.0,
801
+ "epoch": 0.10504201680672269,
802
+ "frac_reward_zero_std": 0.0,
803
+ "grad_norm": 0.022507536722345175,
804
+ "learning_rate": 4.865106483965486e-07,
805
+ "loss": -1.067301923285413e-06,
806
+ "reward": 0.7550658583641052,
807
+ "reward_std": 0.19095541536808014,
808
+ "rewards/Format/mean": 0.7550658583641052,
809
+ "rewards/Format/std": 0.4258660078048706,
810
+ "step": 50
811
+ },
812
+ {
813
+ "completions/clipped_ratio": 0.0,
814
+ "completions/max_length": 1506.0,
815
+ "completions/mean_length": 355.8511962890625,
816
+ "completions/min_length": 198.0,
817
+ "epoch": 0.10714285714285714,
818
+ "frac_reward_zero_std": 0.0,
819
+ "grad_norm": 0.02015228453830586,
820
+ "learning_rate": 4.859708325770919e-07,
821
+ "loss": 1.4814682458563766e-07,
822
+ "reward": 0.7671193480491638,
823
+ "reward_std": 0.22645963728427887,
824
+ "rewards/Format/mean": 0.7671194672584534,
825
+ "rewards/Format/std": 0.38804346323013306,
826
+ "step": 51
827
+ },
828
+ {
829
+ "completions/clipped_ratio": 0.0,
830
+ "completions/max_length": 768.0,
831
+ "completions/mean_length": 356.6815490722656,
832
+ "completions/min_length": 198.0,
833
+ "epoch": 0.1092436974789916,
834
+ "frac_reward_zero_std": 0.0,
835
+ "grad_norm": 0.021174633775772358,
836
+ "learning_rate": 4.854207379535528e-07,
837
+ "loss": 4.93989027461339e-08,
838
+ "reward": 0.7558528780937195,
839
+ "reward_std": 0.18881027400493622,
840
+ "rewards/Format/mean": 0.7558528184890747,
841
+ "rewards/Format/std": 0.3856556713581085,
842
+ "step": 52
843
+ },
844
+ {
845
+ "completions/clipped_ratio": 0.0,
846
+ "completions/max_length": 768.0,
847
+ "completions/mean_length": 355.6190490722656,
848
+ "completions/min_length": 199.0,
849
+ "epoch": 0.11134453781512606,
850
+ "frac_reward_zero_std": 0.0,
851
+ "grad_norm": 0.020084274827431532,
852
+ "learning_rate": 4.848603884878543e-07,
853
+ "loss": -1.2091672374481277e-07,
854
+ "reward": 0.7519421577453613,
855
+ "reward_std": 0.18212375044822693,
856
+ "rewards/Format/mean": 0.7519420385360718,
857
+ "rewards/Format/std": 0.4610062837600708,
858
+ "step": 53
859
+ },
860
+ {
861
+ "completions/clipped_ratio": 0.0,
862
+ "completions/max_length": 768.0,
863
+ "completions/mean_length": 324.8571472167969,
864
+ "completions/min_length": 198.0,
865
+ "epoch": 0.1134453781512605,
866
+ "frac_reward_zero_std": 0.0,
867
+ "grad_norm": 0.021072603055607323,
868
+ "learning_rate": 4.842898085886164e-07,
869
+ "loss": 1.2945383787155151e-07,
870
+ "reward": 0.760277271270752,
871
+ "reward_std": 0.20669521391391754,
872
+ "rewards/Format/mean": 0.7602772116661072,
873
+ "rewards/Format/std": 0.419103741645813,
874
+ "step": 54
875
+ },
876
+ {
877
+ "completions/clipped_ratio": 0.0,
878
+ "completions/max_length": 769.0,
879
+ "completions/mean_length": 276.7113037109375,
880
+ "completions/min_length": 199.0,
881
+ "epoch": 0.11554621848739496,
882
+ "frac_reward_zero_std": 0.0,
883
+ "grad_norm": 0.02261640755115689,
884
+ "learning_rate": 4.837090231100927e-07,
885
+ "loss": -8.467274881240883e-08,
886
+ "reward": 0.6966712474822998,
887
+ "reward_std": 0.22269676625728607,
888
+ "rewards/Format/mean": 0.696671187877655,
889
+ "rewards/Format/std": 0.41274622082710266,
890
+ "step": 55
891
+ },
892
+ {
893
+ "completions/clipped_ratio": 0.0,
894
+ "completions/max_length": 768.0,
895
+ "completions/mean_length": 277.6220397949219,
896
+ "completions/min_length": 198.0,
897
+ "epoch": 0.11764705882352941,
898
+ "frac_reward_zero_std": 0.0,
899
+ "grad_norm": 0.022558099386400868,
900
+ "learning_rate": 4.83118057351089e-07,
901
+ "loss": -1.909090130425284e-08,
902
+ "reward": 0.8017712831497192,
903
+ "reward_std": 0.16136251389980316,
904
+ "rewards/Format/mean": 0.8017712831497192,
905
+ "rewards/Format/std": 0.4239819645881653,
906
+ "step": 56
907
+ },
908
+ {
909
+ "completions/clipped_ratio": 0.0,
910
+ "completions/max_length": 768.0,
911
+ "completions/mean_length": 313.4702453613281,
912
+ "completions/min_length": 198.0,
913
+ "epoch": 0.11974789915966387,
914
+ "frac_reward_zero_std": 0.0,
915
+ "grad_norm": 0.021065495134534075,
916
+ "learning_rate": 4.825169370538594e-07,
917
+ "loss": 4.1272002704317856e-08,
918
+ "reward": 0.8284590840339661,
919
+ "reward_std": 0.2156655192375183,
920
+ "rewards/Format/mean": 0.8284590840339661,
921
+ "rewards/Format/std": 0.46244877576828003,
922
+ "step": 57
923
+ },
924
+ {
925
+ "completions/clipped_ratio": 0.0,
926
+ "completions/max_length": 768.0,
927
+ "completions/mean_length": 271.9434509277344,
928
+ "completions/min_length": 199.0,
929
+ "epoch": 0.12184873949579832,
930
+ "frac_reward_zero_std": 0.0,
931
+ "grad_norm": 0.02105719182994569,
932
+ "learning_rate": 4.819056884029869e-07,
933
+ "loss": 4.602285841315279e-08,
934
+ "reward": 0.7805833220481873,
935
+ "reward_std": 0.23237991333007812,
936
+ "rewards/Format/mean": 0.7805833220481873,
937
+ "rewards/Format/std": 0.4038439393043518,
938
+ "step": 58
939
+ },
940
+ {
941
+ "completions/clipped_ratio": 0.0,
942
+ "completions/max_length": 1506.0,
943
+ "completions/mean_length": 336.7976379394531,
944
+ "completions/min_length": 159.0,
945
+ "epoch": 0.12394957983193278,
946
+ "frac_reward_zero_std": 0.0,
947
+ "grad_norm": 0.023453861537187842,
948
+ "learning_rate": 4.812843380242414e-07,
949
+ "loss": -1.7229467630386353e-08,
950
+ "reward": 0.7510617971420288,
951
+ "reward_std": 0.2485683560371399,
952
+ "rewards/Format/mean": 0.751061737537384,
953
+ "rewards/Format/std": 0.4163859188556671,
954
+ "step": 59
955
+ },
956
+ {
957
+ "completions/clipped_ratio": 0.0,
958
+ "completions/max_length": 768.0,
959
+ "completions/mean_length": 317.0297546386719,
960
+ "completions/min_length": 199.0,
961
+ "epoch": 0.12605042016806722,
962
+ "frac_reward_zero_std": 0.0,
963
+ "grad_norm": 0.021394320119888805,
964
+ "learning_rate": 4.806529129834207e-07,
965
+ "loss": -1.9829411002092456e-08,
966
+ "reward": 0.7956244349479675,
967
+ "reward_std": 0.21525447070598602,
968
+ "rewards/Format/mean": 0.7956244945526123,
969
+ "rewards/Format/std": 0.40508654713630676,
970
+ "step": 60
971
+ },
972
+ {
973
+ "completions/clipped_ratio": 0.0,
974
+ "completions/max_length": 768.0,
975
+ "completions/mean_length": 283.47918701171875,
976
+ "completions/min_length": 160.0,
977
+ "epoch": 0.12815126050420167,
978
+ "frac_reward_zero_std": 0.0,
979
+ "grad_norm": 0.022439700157567594,
980
+ "learning_rate": 4.80011440785171e-07,
981
+ "loss": 7.28643499314785e-07,
982
+ "reward": 0.7234399318695068,
983
+ "reward_std": 0.21129381656646729,
984
+ "rewards/Format/mean": 0.7234399318695068,
985
+ "rewards/Format/std": 0.4189586043357849,
986
+ "step": 61
987
+ },
988
+ {
989
+ "completions/clipped_ratio": 0.0,
990
+ "completions/max_length": 522.0,
991
+ "completions/mean_length": 298.375,
992
+ "completions/min_length": 198.0,
993
+ "epoch": 0.13025210084033614,
994
+ "frac_reward_zero_std": 0.0,
995
+ "grad_norm": 0.02137696717873036,
996
+ "learning_rate": 4.79359949371789e-07,
997
+ "loss": -3.476937848745365e-08,
998
+ "reward": 0.7402118444442749,
999
+ "reward_std": 0.23357845842838287,
1000
+ "rewards/Format/mean": 0.7402118444442749,
1001
+ "rewards/Format/std": 0.40814313292503357,
1002
+ "step": 62
1003
+ },
1004
+ {
1005
+ "completions/clipped_ratio": 0.0,
1006
+ "completions/max_length": 1507.0,
1007
+ "completions/mean_length": 320.3809509277344,
1008
+ "completions/min_length": 199.0,
1009
+ "epoch": 0.1323529411764706,
1010
+ "frac_reward_zero_std": 0.0,
1011
+ "grad_norm": 0.02234537350858673,
1012
+ "learning_rate": 4.786984671220053e-07,
1013
+ "loss": -1.841302399441247e-08,
1014
+ "reward": 0.658408522605896,
1015
+ "reward_std": 0.22749584913253784,
1016
+ "rewards/Format/mean": 0.6584084630012512,
1017
+ "rewards/Format/std": 0.4250440001487732,
1018
+ "step": 63
1019
+ },
1020
+ {
1021
+ "completions/clipped_ratio": 0.0,
1022
+ "completions/max_length": 768.0,
1023
+ "completions/mean_length": 298.8095397949219,
1024
+ "completions/min_length": 199.0,
1025
+ "epoch": 0.13445378151260504,
1026
+ "frac_reward_zero_std": 0.0,
1027
+ "grad_norm": 0.022527706502464447,
1028
+ "learning_rate": 4.780270228497469e-07,
1029
+ "loss": -2.9297856585230875e-08,
1030
+ "reward": 0.607234537601471,
1031
+ "reward_std": 0.22511887550354004,
1032
+ "rewards/Format/mean": 0.6072344779968262,
1033
+ "rewards/Format/std": 0.42338699102401733,
1034
+ "step": 64
1035
+ },
1036
+ {
1037
+ "completions/clipped_ratio": 0.0,
1038
+ "completions/max_length": 686.0,
1039
+ "completions/mean_length": 288.1160888671875,
1040
+ "completions/min_length": 159.0,
1041
+ "epoch": 0.13655462184873948,
1042
+ "frac_reward_zero_std": 0.0,
1043
+ "grad_norm": 0.022025433234447573,
1044
+ "learning_rate": 4.773456458028837e-07,
1045
+ "loss": -1.1494073532958282e-06,
1046
+ "reward": 0.7116742134094238,
1047
+ "reward_std": 0.23428229987621307,
1048
+ "rewards/Format/mean": 0.7116742134094238,
1049
+ "rewards/Format/std": 0.4564502239227295,
1050
+ "step": 65
1051
+ },
1052
+ {
1053
+ "completions/clipped_ratio": 0.0,
1054
+ "completions/max_length": 768.0,
1055
+ "completions/mean_length": 325.8809509277344,
1056
+ "completions/min_length": 198.0,
1057
+ "epoch": 0.13865546218487396,
1058
+ "frac_reward_zero_std": 0.0,
1059
+ "grad_norm": 0.020350840110300188,
1060
+ "learning_rate": 4.7665436566195315e-07,
1061
+ "loss": -1.0236787773010292e-07,
1062
+ "reward": 0.7899753451347351,
1063
+ "reward_std": 0.17132306098937988,
1064
+ "rewards/Format/mean": 0.7899752259254456,
1065
+ "rewards/Format/std": 0.45619064569473267,
1066
+ "step": 66
1067
+ },
1068
+ {
1069
+ "completions/clipped_ratio": 0.0,
1070
+ "completions/max_length": 768.0,
1071
+ "completions/mean_length": 302.61309814453125,
1072
+ "completions/min_length": 198.0,
1073
+ "epoch": 0.1407563025210084,
1074
+ "frac_reward_zero_std": 0.0,
1075
+ "grad_norm": 0.020800900406587757,
1076
+ "learning_rate": 4.75953212538868e-07,
1077
+ "loss": -5.005616543485303e-08,
1078
+ "reward": 0.7871248126029968,
1079
+ "reward_std": 0.1806209683418274,
1080
+ "rewards/Format/mean": 0.7871248126029968,
1081
+ "rewards/Format/std": 0.41989296674728394,
1082
+ "step": 67
1083
+ },
1084
+ {
1085
+ "completions/clipped_ratio": 0.0,
1086
+ "completions/max_length": 769.0,
1087
+ "completions/mean_length": 273.7738037109375,
1088
+ "completions/min_length": 198.0,
1089
+ "epoch": 0.14285714285714285,
1090
+ "frac_reward_zero_std": 0.0,
1091
+ "grad_norm": 0.023831714283269304,
1092
+ "learning_rate": 4.752422169756047e-07,
1093
+ "loss": -1.0067597031593323e-06,
1094
+ "reward": 0.6903886198997498,
1095
+ "reward_std": 0.17775125801563263,
1096
+ "rewards/Format/mean": 0.6903886198997498,
1097
+ "rewards/Format/std": 0.4055922329425812,
1098
+ "step": 68
1099
+ },
1100
+ {
1101
+ "completions/clipped_ratio": 0.0,
1102
+ "completions/max_length": 850.0,
1103
+ "completions/mean_length": 322.8303527832031,
1104
+ "completions/min_length": 199.0,
1105
+ "epoch": 0.14495798319327732,
1106
+ "frac_reward_zero_std": 0.0,
1107
+ "grad_norm": 0.022193015718304492,
1108
+ "learning_rate": 4.745214099428728e-07,
1109
+ "loss": 1.5910094353088766e-09,
1110
+ "reward": 0.7548283338546753,
1111
+ "reward_std": 0.2273932248353958,
1112
+ "rewards/Format/mean": 0.7548283338546753,
1113
+ "rewards/Format/std": 0.4323543906211853,
1114
+ "step": 69
1115
+ },
1116
+ {
1117
+ "completions/clipped_ratio": 0.0,
1118
+ "completions/max_length": 769.0,
1119
+ "completions/mean_length": 278.47918701171875,
1120
+ "completions/min_length": 198.0,
1121
+ "epoch": 0.14705882352941177,
1122
+ "frac_reward_zero_std": 0.0,
1123
+ "grad_norm": 0.022765015523825417,
1124
+ "learning_rate": 4.737908228387656e-07,
1125
+ "loss": 1.4939967485361194e-08,
1126
+ "reward": 0.7890443205833435,
1127
+ "reward_std": 0.20072004199028015,
1128
+ "rewards/Format/mean": 0.7890443205833435,
1129
+ "rewards/Format/std": 0.39575085043907166,
1130
+ "step": 70
1131
+ },
1132
+ {
1133
+ "completions/clipped_ratio": 0.0,
1134
+ "completions/max_length": 687.0,
1135
+ "completions/mean_length": 288.1726379394531,
1136
+ "completions/min_length": 198.0,
1137
+ "epoch": 0.14915966386554622,
1138
+ "frac_reward_zero_std": 0.0,
1139
+ "grad_norm": 0.023908175533162216,
1140
+ "learning_rate": 4.7305048748739307e-07,
1141
+ "loss": 1.121467629872086e-08,
1142
+ "reward": 0.7613670825958252,
1143
+ "reward_std": 0.2253635972738266,
1144
+ "rewards/Format/mean": 0.76136714220047,
1145
+ "rewards/Format/std": 0.42572322487831116,
1146
+ "step": 71
1147
+ },
1148
+ {
1149
+ "completions/clipped_ratio": 0.0,
1150
+ "completions/max_length": 769.0,
1151
+ "completions/mean_length": 337.3928527832031,
1152
+ "completions/min_length": 198.0,
1153
+ "epoch": 0.15126050420168066,
1154
+ "frac_reward_zero_std": 0.0,
1155
+ "grad_norm": 0.02097626897929019,
1156
+ "learning_rate": 4.7230043613749527e-07,
1157
+ "loss": 1.257285475730896e-08,
1158
+ "reward": 0.7256394624710083,
1159
+ "reward_std": 0.22580596804618835,
1160
+ "rewards/Format/mean": 0.7256394624710083,
1161
+ "rewards/Format/std": 0.39843472838401794,
1162
+ "step": 72
1163
+ },
1164
+ {
1165
+ "completions/clipped_ratio": 0.0,
1166
+ "completions/max_length": 768.0,
1167
+ "completions/mean_length": 309.66668701171875,
1168
+ "completions/min_length": 199.0,
1169
+ "epoch": 0.15336134453781514,
1170
+ "frac_reward_zero_std": 0.0,
1171
+ "grad_norm": 0.019473547154302228,
1172
+ "learning_rate": 4.715407014610376e-07,
1173
+ "loss": 7.326404727336921e-08,
1174
+ "reward": 0.708452582359314,
1175
+ "reward_std": 0.18794287741184235,
1176
+ "rewards/Format/mean": 0.708452582359314,
1177
+ "rewards/Format/std": 0.424693763256073,
1178
+ "step": 73
1179
+ },
1180
+ {
1181
+ "completions/clipped_ratio": 0.0,
1182
+ "completions/max_length": 768.0,
1183
+ "completions/mean_length": 312.514892578125,
1184
+ "completions/min_length": 199.0,
1185
+ "epoch": 0.15546218487394958,
1186
+ "frac_reward_zero_std": 0.0,
1187
+ "grad_norm": 0.023519539078701225,
1188
+ "learning_rate": 4.7077131655178763e-07,
1189
+ "loss": -1.2495244838817143e-08,
1190
+ "reward": 0.6197510361671448,
1191
+ "reward_std": 0.2203647792339325,
1192
+ "rewards/Format/mean": 0.6197510957717896,
1193
+ "rewards/Format/std": 0.4002760946750641,
1194
+ "step": 74
1195
+ },
1196
+ {
1197
+ "completions/clipped_ratio": 0.0,
1198
+ "completions/max_length": 1342.0,
1199
+ "completions/mean_length": 331.8988037109375,
1200
+ "completions/min_length": 198.0,
1201
+ "epoch": 0.15756302521008403,
1202
+ "frac_reward_zero_std": 0.0,
1203
+ "grad_norm": 0.02372442049372717,
1204
+ "learning_rate": 4.699923149238736e-07,
1205
+ "loss": -3.0578426901684e-08,
1206
+ "reward": 0.7845470905303955,
1207
+ "reward_std": 0.16474288702011108,
1208
+ "rewards/Format/mean": 0.7845470905303955,
1209
+ "rewards/Format/std": 0.4058076739311218,
1210
+ "step": 75
1211
+ },
1212
+ {
1213
+ "completions/clipped_ratio": 0.0,
1214
+ "completions/max_length": 768.0,
1215
+ "completions/mean_length": 330.875,
1216
+ "completions/min_length": 199.0,
1217
+ "epoch": 0.15966386554621848,
1218
+ "frac_reward_zero_std": 0.0,
1219
+ "grad_norm": 0.02124154532002299,
1220
+ "learning_rate": 4.6920373051032467e-07,
1221
+ "loss": -1.1069157146437192e-08,
1222
+ "reward": 0.7532238960266113,
1223
+ "reward_std": 0.194017231464386,
1224
+ "rewards/Format/mean": 0.7532238960266113,
1225
+ "rewards/Format/std": 0.3895232677459717,
1226
+ "step": 76
1227
+ },
1228
+ {
1229
+ "completions/clipped_ratio": 0.0,
1230
+ "completions/max_length": 1342.0,
1231
+ "completions/mean_length": 377.3303527832031,
1232
+ "completions/min_length": 199.0,
1233
+ "epoch": 0.16176470588235295,
1234
+ "frac_reward_zero_std": 0.0,
1235
+ "grad_norm": 0.020858800230663294,
1236
+ "learning_rate": 4.6840559766159235e-07,
1237
+ "loss": 1.0477378964424133e-08,
1238
+ "reward": 0.7387242913246155,
1239
+ "reward_std": 0.21203239262104034,
1240
+ "rewards/Format/mean": 0.7387242913246155,
1241
+ "rewards/Format/std": 0.42181792855262756,
1242
+ "step": 77
1243
+ },
1244
+ {
1245
+ "completions/clipped_ratio": 0.0,
1246
+ "completions/max_length": 687.0,
1247
+ "completions/mean_length": 259.52679443359375,
1248
+ "completions/min_length": 198.0,
1249
+ "epoch": 0.1638655462184874,
1250
+ "frac_reward_zero_std": 0.0,
1251
+ "grad_norm": 0.02286021061978266,
1252
+ "learning_rate": 4.6759795114405485e-07,
1253
+ "loss": 3.1044087300813317e-09,
1254
+ "reward": 0.9169377088546753,
1255
+ "reward_std": 0.17799963057041168,
1256
+ "rewards/Format/mean": 0.9169377684593201,
1257
+ "rewards/Format/std": 0.4258681535720825,
1258
+ "step": 78
1259
+ },
1260
+ {
1261
+ "completions/clipped_ratio": 0.0,
1262
+ "completions/max_length": 768.0,
1263
+ "completions/mean_length": 319.3511962890625,
1264
+ "completions/min_length": 198.0,
1265
+ "epoch": 0.16596638655462184,
1266
+ "frac_reward_zero_std": 0.0,
1267
+ "grad_norm": 0.021012193835424638,
1268
+ "learning_rate": 4.667808261385022e-07,
1269
+ "loss": 1.1078858719315576e-08,
1270
+ "reward": 0.7053860425949097,
1271
+ "reward_std": 0.16264012455940247,
1272
+ "rewards/Format/mean": 0.7053859829902649,
1273
+ "rewards/Format/std": 0.42969760298728943,
1274
+ "step": 79
1275
+ },
1276
+ {
1277
+ "completions/clipped_ratio": 0.0,
1278
+ "completions/max_length": 768.0,
1279
+ "completions/mean_length": 311.3809509277344,
1280
+ "completions/min_length": 199.0,
1281
+ "epoch": 0.16806722689075632,
1282
+ "frac_reward_zero_std": 0.0,
1283
+ "grad_norm": 0.021137528456480086,
1284
+ "learning_rate": 4.65954258238604e-07,
1285
+ "loss": -4.638873463136406e-08,
1286
+ "reward": 0.764985978603363,
1287
+ "reward_std": 0.19966480135917664,
1288
+ "rewards/Format/mean": 0.7649859189987183,
1289
+ "rewards/Format/std": 0.42946040630340576,
1290
+ "step": 80
1291
+ },
1292
+ {
1293
+ "completions/clipped_ratio": 0.0,
1294
+ "completions/max_length": 768.0,
1295
+ "completions/mean_length": 310.19940185546875,
1296
+ "completions/min_length": 198.0,
1297
+ "epoch": 0.17016806722689076,
1298
+ "frac_reward_zero_std": 0.0,
1299
+ "grad_norm": 0.020084748936691238,
1300
+ "learning_rate": 4.651182834493589e-07,
1301
+ "loss": -3.507981816142092e-08,
1302
+ "reward": 0.7804700136184692,
1303
+ "reward_std": 0.1991633027791977,
1304
+ "rewards/Format/mean": 0.7804700136184692,
1305
+ "rewards/Format/std": 0.367445707321167,
1306
+ "step": 81
1307
+ },
1308
+ {
1309
+ "completions/clipped_ratio": 0.0,
1310
+ "completions/max_length": 768.0,
1311
+ "completions/mean_length": 284.16668701171875,
1312
+ "completions/min_length": 199.0,
1313
+ "epoch": 0.1722689075630252,
1314
+ "frac_reward_zero_std": 0.0,
1315
+ "grad_norm": 0.02153826431184902,
1316
+ "learning_rate": 4.6427293818552613e-07,
1317
+ "loss": 6.37470947140173e-08,
1318
+ "reward": 0.7739503979682922,
1319
+ "reward_std": 0.20568089187145233,
1320
+ "rewards/Format/mean": 0.7739503979682922,
1321
+ "rewards/Format/std": 0.3993769884109497,
1322
+ "step": 82
1323
+ },
1324
+ {
1325
+ "completions/clipped_ratio": 0.0,
1326
+ "completions/max_length": 768.0,
1327
+ "completions/mean_length": 336.3125,
1328
+ "completions/min_length": 162.0,
1329
+ "epoch": 0.17436974789915966,
1330
+ "frac_reward_zero_std": 0.0,
1331
+ "grad_norm": 0.022147267057562484,
1332
+ "learning_rate": 4.634182592700396e-07,
1333
+ "loss": 1.1331091620547795e-08,
1334
+ "reward": 0.7379960417747498,
1335
+ "reward_std": 0.22984465956687927,
1336
+ "rewards/Format/mean": 0.737995982170105,
1337
+ "rewards/Format/std": 0.3927246630191803,
1338
+ "step": 83
1339
+ },
1340
+ {
1341
+ "completions/clipped_ratio": 0.0,
1342
+ "completions/max_length": 768.0,
1343
+ "completions/mean_length": 316.0446472167969,
1344
+ "completions/min_length": 199.0,
1345
+ "epoch": 0.17647058823529413,
1346
+ "frac_reward_zero_std": 0.0,
1347
+ "grad_norm": 0.02057295767392179,
1348
+ "learning_rate": 4.6255428393240354e-07,
1349
+ "loss": 1.7622369341552258e-07,
1350
+ "reward": 0.7516170144081116,
1351
+ "reward_std": 0.22444850206375122,
1352
+ "rewards/Format/mean": 0.7516169548034668,
1353
+ "rewards/Format/std": 0.4411431849002838,
1354
+ "step": 84
1355
+ },
1356
+ {
1357
+ "completions/clipped_ratio": 0.0,
1358
+ "completions/max_length": 769.0,
1359
+ "completions/mean_length": 362.54168701171875,
1360
+ "completions/min_length": 198.0,
1361
+ "epoch": 0.17857142857142858,
1362
+ "frac_reward_zero_std": 0.0,
1363
+ "grad_norm": 0.02062685572369038,
1364
+ "learning_rate": 4.6168104980707103e-07,
1365
+ "loss": -3.3930216414290726e-09,
1366
+ "reward": 0.8316090106964111,
1367
+ "reward_std": 0.1859341859817505,
1368
+ "rewards/Format/mean": 0.8316090703010559,
1369
+ "rewards/Format/std": 0.39255210757255554,
1370
+ "step": 85
1371
+ },
1372
+ {
1373
+ "completions/clipped_ratio": 0.0,
1374
+ "completions/max_length": 768.0,
1375
+ "completions/mean_length": 339.33929443359375,
1376
+ "completions/min_length": 198.0,
1377
+ "epoch": 0.18067226890756302,
1378
+ "frac_reward_zero_std": 0.0,
1379
+ "grad_norm": 0.02208004369853976,
1380
+ "learning_rate": 4.607985949318046e-07,
1381
+ "loss": 1.8766149878501892e-07,
1382
+ "reward": 0.7510151863098145,
1383
+ "reward_std": 0.2171468734741211,
1384
+ "rewards/Format/mean": 0.7510151863098145,
1385
+ "rewards/Format/std": 0.40840059518814087,
1386
+ "step": 86
1387
+ },
1388
+ {
1389
+ "completions/clipped_ratio": 0.0,
1390
+ "completions/max_length": 686.0,
1391
+ "completions/mean_length": 310.3363037109375,
1392
+ "completions/min_length": 198.0,
1393
+ "epoch": 0.18277310924369747,
1394
+ "frac_reward_zero_std": 0.0,
1395
+ "grad_norm": 0.02226476070059597,
1396
+ "learning_rate": 4.599069577460194e-07,
1397
+ "loss": 7.55729487877943e-08,
1398
+ "reward": 0.7193702459335327,
1399
+ "reward_std": 0.1840115487575531,
1400
+ "rewards/Format/mean": 0.7193701863288879,
1401
+ "rewards/Format/std": 0.40376046299934387,
1402
+ "step": 87
1403
+ },
1404
+ {
1405
+ "completions/clipped_ratio": 0.0,
1406
+ "completions/max_length": 769.0,
1407
+ "completions/mean_length": 317.16668701171875,
1408
+ "completions/min_length": 198.0,
1409
+ "epoch": 0.18487394957983194,
1410
+ "frac_reward_zero_std": 0.0,
1411
+ "grad_norm": 0.020923288046997623,
1412
+ "learning_rate": 4.590061770891085e-07,
1413
+ "loss": -3.119930624961853e-08,
1414
+ "reward": 0.6998317241668701,
1415
+ "reward_std": 0.24230313301086426,
1416
+ "rewards/Format/mean": 0.6998317241668701,
1417
+ "rewards/Format/std": 0.4203824996948242,
1418
+ "step": 88
1419
+ },
1420
+ {
1421
+ "completions/clipped_ratio": 0.0,
1422
+ "completions/max_length": 1506.0,
1423
+ "completions/mean_length": 306.81549072265625,
1424
+ "completions/min_length": 160.0,
1425
+ "epoch": 0.1869747899159664,
1426
+ "frac_reward_zero_std": 0.0,
1427
+ "grad_norm": 0.02264755656937367,
1428
+ "learning_rate": 4.5809629219875136e-07,
1429
+ "loss": -2.5688981608595896e-08,
1430
+ "reward": 0.6732820868492126,
1431
+ "reward_std": 0.20462745428085327,
1432
+ "rewards/Format/mean": 0.6732820868492126,
1433
+ "rewards/Format/std": 0.40139421820640564,
1434
+ "step": 89
1435
+ },
1436
+ {
1437
+ "completions/clipped_ratio": 0.0,
1438
+ "completions/max_length": 1342.0,
1439
+ "completions/mean_length": 346.28570556640625,
1440
+ "completions/min_length": 160.0,
1441
+ "epoch": 0.18907563025210083,
1442
+ "frac_reward_zero_std": 0.0,
1443
+ "grad_norm": 0.023182571517168665,
1444
+ "learning_rate": 4.5717734270920466e-07,
1445
+ "loss": 3.0617229640483856e-08,
1446
+ "reward": 0.7511137127876282,
1447
+ "reward_std": 0.2009718120098114,
1448
+ "rewards/Format/mean": 0.7511137127876282,
1449
+ "rewards/Format/std": 0.38486021757125854,
1450
+ "step": 90
1451
+ },
1452
+ {
1453
+ "completions/clipped_ratio": 0.0,
1454
+ "completions/max_length": 768.0,
1455
+ "completions/mean_length": 284.2708435058594,
1456
+ "completions/min_length": 198.0,
1457
+ "epoch": 0.19117647058823528,
1458
+ "frac_reward_zero_std": 0.0,
1459
+ "grad_norm": 0.02245594849872551,
1460
+ "learning_rate": 4.5624936864957555e-07,
1461
+ "loss": 5.991508800207157e-08,
1462
+ "reward": 0.8196629285812378,
1463
+ "reward_std": 0.2135486602783203,
1464
+ "rewards/Format/mean": 0.8196629285812378,
1465
+ "rewards/Format/std": 0.40691888332366943,
1466
+ "step": 91
1467
+ },
1468
+ {
1469
+ "completions/clipped_ratio": 0.0,
1470
+ "completions/max_length": 768.0,
1471
+ "completions/mean_length": 308.56549072265625,
1472
+ "completions/min_length": 199.0,
1473
+ "epoch": 0.19327731092436976,
1474
+ "frac_reward_zero_std": 0.0,
1475
+ "grad_norm": 0.023038185737041545,
1476
+ "learning_rate": 4.553124104420784e-07,
1477
+ "loss": 5.513096645870519e-09,
1478
+ "reward": 0.7160678505897522,
1479
+ "reward_std": 0.19483555853366852,
1480
+ "rewards/Format/mean": 0.7160677909851074,
1481
+ "rewards/Format/std": 0.4294646978378296,
1482
+ "step": 92
1483
+ },
1484
+ {
1485
+ "completions/clipped_ratio": 0.0,
1486
+ "completions/max_length": 768.0,
1487
+ "completions/mean_length": 257.5446472167969,
1488
+ "completions/min_length": 159.0,
1489
+ "epoch": 0.1953781512605042,
1490
+ "frac_reward_zero_std": 0.0,
1491
+ "grad_norm": 0.022949994512294538,
1492
+ "learning_rate": 4.5436650890027357e-07,
1493
+ "loss": 4.5207951160364246e-08,
1494
+ "reward": 0.6487872004508972,
1495
+ "reward_std": 0.20081423223018646,
1496
+ "rewards/Format/mean": 0.6487872004508972,
1497
+ "rewards/Format/std": 0.39187052845954895,
1498
+ "step": 93
1499
+ },
1500
+ {
1501
+ "completions/clipped_ratio": 0.0,
1502
+ "completions/max_length": 768.0,
1503
+ "completions/mean_length": 314.42559814453125,
1504
+ "completions/min_length": 199.0,
1505
+ "epoch": 0.19747899159663865,
1506
+ "frac_reward_zero_std": 0.0,
1507
+ "grad_norm": 0.02162282805513244,
1508
+ "learning_rate": 4.5341170522729e-07,
1509
+ "loss": -1.3669099985236244e-07,
1510
+ "reward": 0.7372074127197266,
1511
+ "reward_std": 0.2066640704870224,
1512
+ "rewards/Format/mean": 0.7372074127197266,
1513
+ "rewards/Format/std": 0.4436173141002655,
1514
+ "step": 94
1515
+ },
1516
+ {
1517
+ "completions/clipped_ratio": 0.0,
1518
+ "completions/max_length": 768.0,
1519
+ "completions/mean_length": 283.4047546386719,
1520
+ "completions/min_length": 160.0,
1521
+ "epoch": 0.19957983193277312,
1522
+ "frac_reward_zero_std": 0.0,
1523
+ "grad_norm": 0.022696137816341274,
1524
+ "learning_rate": 4.5244804101403025e-07,
1525
+ "loss": 1.1709441416485333e-08,
1526
+ "reward": 0.7385284900665283,
1527
+ "reward_std": 0.1931488960981369,
1528
+ "rewards/Format/mean": 0.7385285496711731,
1529
+ "rewards/Format/std": 0.3812759220600128,
1530
+ "step": 95
1531
+ },
1532
+ {
1533
+ "completions/clipped_ratio": 0.0,
1534
+ "completions/max_length": 768.0,
1535
+ "completions/mean_length": 315.264892578125,
1536
+ "completions/min_length": 198.0,
1537
+ "epoch": 0.20168067226890757,
1538
+ "frac_reward_zero_std": 0.0,
1539
+ "grad_norm": 0.02093845716856131,
1540
+ "learning_rate": 4.5147555823735875e-07,
1541
+ "loss": 2.987993497072239e-08,
1542
+ "reward": 0.8301222920417786,
1543
+ "reward_std": 0.17476744949817657,
1544
+ "rewards/Format/mean": 0.8301222920417786,
1545
+ "rewards/Format/std": 0.4103260338306427,
1546
+ "step": 96
1547
+ },
1548
+ {
1549
+ "completions/clipped_ratio": 0.0,
1550
+ "completions/max_length": 768.0,
1551
+ "completions/mean_length": 338.46429443359375,
1552
+ "completions/min_length": 198.0,
1553
+ "epoch": 0.20378151260504201,
1554
+ "frac_reward_zero_std": 0.0,
1555
+ "grad_norm": 0.021956591264555105,
1556
+ "learning_rate": 4.504942992582732e-07,
1557
+ "loss": 2.8812792152166367e-08,
1558
+ "reward": 0.7950236797332764,
1559
+ "reward_std": 0.21779192984104156,
1560
+ "rewards/Format/mean": 0.7950237989425659,
1561
+ "rewards/Format/std": 0.438232958316803,
1562
+ "step": 97
1563
+ },
1564
+ {
1565
+ "completions/clipped_ratio": 0.0,
1566
+ "completions/max_length": 768.0,
1567
+ "completions/mean_length": 294.6726379394531,
1568
+ "completions/min_length": 199.0,
1569
+ "epoch": 0.20588235294117646,
1570
+ "frac_reward_zero_std": 0.0,
1571
+ "grad_norm": 0.021076392956800603,
1572
+ "learning_rate": 4.495043068200599e-07,
1573
+ "loss": 7.821635335858446e-06,
1574
+ "reward": 0.7780522108078003,
1575
+ "reward_std": 0.20273412764072418,
1576
+ "rewards/Format/mean": 0.7780520915985107,
1577
+ "rewards/Format/std": 0.4342520833015442,
1578
+ "step": 98
1579
+ },
1580
+ {
1581
+ "completions/clipped_ratio": 0.0,
1582
+ "completions/max_length": 768.0,
1583
+ "completions/mean_length": 282.41070556640625,
1584
+ "completions/min_length": 198.0,
1585
+ "epoch": 0.20798319327731093,
1586
+ "frac_reward_zero_std": 0.0,
1587
+ "grad_norm": 0.022125236669221,
1588
+ "learning_rate": 4.48505624046431e-07,
1589
+ "loss": -1.0756775736808777e-07,
1590
+ "reward": 0.7524579763412476,
1591
+ "reward_std": 0.20691367983818054,
1592
+ "rewards/Format/mean": 0.7524579167366028,
1593
+ "rewards/Format/std": 0.4068320095539093,
1594
+ "step": 99
1595
+ },
1596
+ {
1597
+ "completions/clipped_ratio": 0.0,
1598
+ "completions/max_length": 768.0,
1599
+ "completions/mean_length": 352.04168701171875,
1600
+ "completions/min_length": 159.0,
1601
+ "epoch": 0.21008403361344538,
1602
+ "frac_reward_zero_std": 0.0,
1603
+ "grad_norm": 0.02057438016675885,
1604
+ "learning_rate": 4.47498294439647e-07,
1605
+ "loss": -3.625506561633074e-09,
1606
+ "reward": 0.7926063537597656,
1607
+ "reward_std": 0.1882854700088501,
1608
+ "rewards/Format/mean": 0.7926063537597656,
1609
+ "rewards/Format/std": 0.41169852018356323,
1610
+ "step": 100
1611
+ },
1612
+ {
1613
+ "completions/clipped_ratio": 0.0,
1614
+ "completions/max_length": 768.0,
1615
+ "completions/mean_length": 316.60418701171875,
1616
+ "completions/min_length": 199.0,
1617
+ "epoch": 0.21218487394957983,
1618
+ "frac_reward_zero_std": 0.0,
1619
+ "grad_norm": 0.021410138644511176,
1620
+ "learning_rate": 4.4648236187862087e-07,
1621
+ "loss": 3.6476802023344135e-08,
1622
+ "reward": 0.791693925857544,
1623
+ "reward_std": 0.21765516698360443,
1624
+ "rewards/Format/mean": 0.791693925857544,
1625
+ "rewards/Format/std": 0.426066517829895,
1626
+ "step": 101
1627
+ },
1628
+ {
1629
+ "completions/clipped_ratio": 0.0,
1630
+ "completions/max_length": 768.0,
1631
+ "completions/mean_length": 352.9047546386719,
1632
+ "completions/min_length": 199.0,
1633
+ "epoch": 0.21428571428571427,
1634
+ "frac_reward_zero_std": 0.0,
1635
+ "grad_norm": 0.021208677071151955,
1636
+ "learning_rate": 4.4545787061700746e-07,
1637
+ "loss": 2.8788539552238035e-08,
1638
+ "reward": 0.6777361035346985,
1639
+ "reward_std": 0.24635247886180878,
1640
+ "rewards/Format/mean": 0.6777361035346985,
1641
+ "rewards/Format/std": 0.3930741548538208,
1642
+ "step": 102
1643
+ },
1644
+ {
1645
+ "completions/clipped_ratio": 0.0,
1646
+ "completions/max_length": 768.0,
1647
+ "completions/mean_length": 315.26190185546875,
1648
+ "completions/min_length": 199.0,
1649
+ "epoch": 0.21638655462184875,
1650
+ "frac_reward_zero_std": 0.0,
1651
+ "grad_norm": 0.022644890757122185,
1652
+ "learning_rate": 4.444248652812753e-07,
1653
+ "loss": -6.675448815940399e-08,
1654
+ "reward": 0.8054797053337097,
1655
+ "reward_std": 0.23741725087165833,
1656
+ "rewards/Format/mean": 0.8054797053337097,
1657
+ "rewards/Format/std": 0.44436848163604736,
1658
+ "step": 103
1659
+ },
1660
+ {
1661
+ "completions/clipped_ratio": 0.0,
1662
+ "completions/max_length": 768.0,
1663
+ "completions/mean_length": 303.4583435058594,
1664
+ "completions/min_length": 198.0,
1665
+ "epoch": 0.2184873949579832,
1666
+ "frac_reward_zero_std": 0.0,
1667
+ "grad_norm": 0.022063876471544695,
1668
+ "learning_rate": 4.4338339086876327e-07,
1669
+ "loss": -3.948419902144451e-08,
1670
+ "reward": 0.7238919734954834,
1671
+ "reward_std": 0.2449919581413269,
1672
+ "rewards/Format/mean": 0.7238919138908386,
1673
+ "rewards/Format/std": 0.4637271761894226,
1674
+ "step": 104
1675
+ },
1676
+ {
1677
+ "completions/clipped_ratio": 0.0,
1678
+ "completions/max_length": 768.0,
1679
+ "completions/mean_length": 323.86309814453125,
1680
+ "completions/min_length": 198.0,
1681
+ "epoch": 0.22058823529411764,
1682
+ "frac_reward_zero_std": 0.0,
1683
+ "grad_norm": 0.018902399577417992,
1684
+ "learning_rate": 4.4233349274571974e-07,
1685
+ "loss": -3.1995975859899772e-06,
1686
+ "reward": 0.7562804818153381,
1687
+ "reward_std": 0.21583375334739685,
1688
+ "rewards/Format/mean": 0.7562804818153381,
1689
+ "rewards/Format/std": 0.4550980031490326,
1690
+ "step": 105
1691
+ },
1692
+ {
1693
+ "completions/clipped_ratio": 0.0,
1694
+ "completions/max_length": 768.0,
1695
+ "completions/mean_length": 351.827392578125,
1696
+ "completions/min_length": 199.0,
1697
+ "epoch": 0.22268907563025211,
1698
+ "frac_reward_zero_std": 0.0,
1699
+ "grad_norm": 0.020303164140937018,
1700
+ "learning_rate": 4.4127521664532703e-07,
1701
+ "loss": -2.4301456846842484e-07,
1702
+ "reward": 0.8513760566711426,
1703
+ "reward_std": 0.20463679730892181,
1704
+ "rewards/Format/mean": 0.8513760566711426,
1705
+ "rewards/Format/std": 0.39889758825302124,
1706
+ "step": 106
1707
+ },
1708
+ {
1709
+ "completions/clipped_ratio": 0.0,
1710
+ "completions/max_length": 768.0,
1711
+ "completions/mean_length": 326.6428527832031,
1712
+ "completions/min_length": 199.0,
1713
+ "epoch": 0.22478991596638656,
1714
+ "frac_reward_zero_std": 0.0,
1715
+ "grad_norm": 0.0219472486398814,
1716
+ "learning_rate": 4.402086086657092e-07,
1717
+ "loss": 2.6775524020195007e-08,
1718
+ "reward": 0.77166748046875,
1719
+ "reward_std": 0.2029147446155548,
1720
+ "rewards/Format/mean": 0.77166748046875,
1721
+ "rewards/Format/std": 0.4164963662624359,
1722
+ "step": 107
1723
+ },
1724
+ {
1725
+ "completions/clipped_ratio": 0.0,
1726
+ "completions/max_length": 768.0,
1727
+ "completions/mean_length": 324.1101379394531,
1728
+ "completions/min_length": 120.0,
1729
+ "epoch": 0.226890756302521,
1730
+ "frac_reward_zero_std": 0.0,
1731
+ "grad_norm": 0.022782343967976404,
1732
+ "learning_rate": 4.3913371526792395e-07,
1733
+ "loss": -4.617807913831484e-09,
1734
+ "reward": 0.6442007422447205,
1735
+ "reward_std": 0.2533365786075592,
1736
+ "rewards/Format/mean": 0.6442006826400757,
1737
+ "rewards/Format/std": 0.41148844361305237,
1738
+ "step": 108
1739
+ },
1740
+ {
1741
+ "completions/clipped_ratio": 0.0,
1742
+ "completions/max_length": 1342.0,
1743
+ "completions/mean_length": 320.0089416503906,
1744
+ "completions/min_length": 198.0,
1745
+ "epoch": 0.22899159663865545,
1746
+ "frac_reward_zero_std": 0.0,
1747
+ "grad_norm": 0.021277674903659907,
1748
+ "learning_rate": 4.380505832739387e-07,
1749
+ "loss": -9.996195871053715e-08,
1750
+ "reward": 0.715027391910553,
1751
+ "reward_std": 0.22619880735874176,
1752
+ "rewards/Format/mean": 0.7150274515151978,
1753
+ "rewards/Format/std": 0.4316016733646393,
1754
+ "step": 109
1755
+ },
1756
+ {
1757
+ "completions/clipped_ratio": 0.0,
1758
+ "completions/max_length": 1506.0,
1759
+ "completions/mean_length": 353.5833435058594,
1760
+ "completions/min_length": 198.0,
1761
+ "epoch": 0.23109243697478993,
1762
+ "frac_reward_zero_std": 0.0,
1763
+ "grad_norm": 0.02114454110529574,
1764
+ "learning_rate": 4.3695925986459107e-07,
1765
+ "loss": -3.1044088188991736e-08,
1766
+ "reward": 0.7676001191139221,
1767
+ "reward_std": 0.21530351042747498,
1768
+ "rewards/Format/mean": 0.7675999999046326,
1769
+ "rewards/Format/std": 0.4146890938282013,
1770
+ "step": 110
1771
+ },
1772
+ {
1773
+ "completions/clipped_ratio": 0.0,
1774
+ "completions/max_length": 1342.0,
1775
+ "completions/mean_length": 324.327392578125,
1776
+ "completions/min_length": 199.0,
1777
+ "epoch": 0.23319327731092437,
1778
+ "frac_reward_zero_std": 0.0,
1779
+ "grad_norm": 0.02211505664733671,
1780
+ "learning_rate": 4.3585979257753404e-07,
1781
+ "loss": -8.524511940777302e-08,
1782
+ "reward": 0.7371337413787842,
1783
+ "reward_std": 0.1817293018102646,
1784
+ "rewards/Format/mean": 0.7371336817741394,
1785
+ "rewards/Format/std": 0.3963373601436615,
1786
+ "step": 111
1787
+ },
1788
+ {
1789
+ "completions/clipped_ratio": 0.0,
1790
+ "completions/max_length": 1342.0,
1791
+ "completions/mean_length": 349.9910888671875,
1792
+ "completions/min_length": 199.0,
1793
+ "epoch": 0.23529411764705882,
1794
+ "frac_reward_zero_std": 0.0,
1795
+ "grad_norm": 0.020857981203141616,
1796
+ "learning_rate": 4.3475222930516473e-07,
1797
+ "loss": -6.961636245250702e-08,
1798
+ "reward": 0.7560895681381226,
1799
+ "reward_std": 0.19613270461559296,
1800
+ "rewards/Format/mean": 0.7560895681381226,
1801
+ "rewards/Format/std": 0.4381280541419983,
1802
+ "step": 112
1803
+ },
1804
+ {
1805
+ "completions/clipped_ratio": 0.0,
1806
+ "completions/max_length": 768.0,
1807
+ "completions/mean_length": 336.3809509277344,
1808
+ "completions/min_length": 159.0,
1809
+ "epoch": 0.23739495798319327,
1810
+ "frac_reward_zero_std": 0.0,
1811
+ "grad_norm": 0.021401398329492366,
1812
+ "learning_rate": 4.3363661829253863e-07,
1813
+ "loss": -2.4103513851514435e-08,
1814
+ "reward": 0.6850998401641846,
1815
+ "reward_std": 0.22407284379005432,
1816
+ "rewards/Format/mean": 0.6850998401641846,
1817
+ "rewards/Format/std": 0.38775303959846497,
1818
+ "step": 113
1819
+ },
1820
+ {
1821
+ "completions/clipped_ratio": 0.0,
1822
+ "completions/max_length": 768.0,
1823
+ "completions/mean_length": 313.3035888671875,
1824
+ "completions/min_length": 159.0,
1825
+ "epoch": 0.23949579831932774,
1826
+ "frac_reward_zero_std": 0.0,
1827
+ "grad_norm": 0.020810383479177904,
1828
+ "learning_rate": 4.3251300813526746e-07,
1829
+ "loss": -1.4014852922628052e-06,
1830
+ "reward": 0.771544337272644,
1831
+ "reward_std": 0.18077707290649414,
1832
+ "rewards/Format/mean": 0.771544337272644,
1833
+ "rewards/Format/std": 0.4377117156982422,
1834
+ "step": 114
1835
+ },
1836
+ {
1837
+ "completions/clipped_ratio": 0.0,
1838
+ "completions/max_length": 1506.0,
1839
+ "completions/mean_length": 365.6160888671875,
1840
+ "completions/min_length": 159.0,
1841
+ "epoch": 0.2415966386554622,
1842
+ "frac_reward_zero_std": 0.0,
1843
+ "grad_norm": 0.01993254663702268,
1844
+ "learning_rate": 4.313814477774035e-07,
1845
+ "loss": 2.9717437399767732e-08,
1846
+ "reward": 0.7557961940765381,
1847
+ "reward_std": 0.22641652822494507,
1848
+ "rewards/Format/mean": 0.7557962536811829,
1849
+ "rewards/Format/std": 0.44599175453186035,
1850
+ "step": 115
1851
+ },
1852
+ {
1853
+ "completions/clipped_ratio": 0.0,
1854
+ "completions/max_length": 768.0,
1855
+ "completions/mean_length": 350.72918701171875,
1856
+ "completions/min_length": 159.0,
1857
+ "epoch": 0.24369747899159663,
1858
+ "frac_reward_zero_std": 0.0,
1859
+ "grad_norm": 0.02234505269337804,
1860
+ "learning_rate": 4.302419865093062e-07,
1861
+ "loss": 1.707424779340272e-08,
1862
+ "reward": 0.7636775374412537,
1863
+ "reward_std": 0.21471408009529114,
1864
+ "rewards/Format/mean": 0.7636774778366089,
1865
+ "rewards/Format/std": 0.4227873682975769,
1866
+ "step": 116
1867
+ },
1868
+ {
1869
+ "completions/clipped_ratio": 0.0,
1870
+ "completions/max_length": 769.0,
1871
+ "completions/mean_length": 317.83929443359375,
1872
+ "completions/min_length": 198.0,
1873
+ "epoch": 0.24579831932773108,
1874
+ "frac_reward_zero_std": 0.0,
1875
+ "grad_norm": 0.020429239550428895,
1876
+ "learning_rate": 4.290946739654962e-07,
1877
+ "loss": 2.06831227700377e-08,
1878
+ "reward": 0.7483717799186707,
1879
+ "reward_std": 0.21330294013023376,
1880
+ "rewards/Format/mean": 0.7483717203140259,
1881
+ "rewards/Format/std": 0.42091742157936096,
1882
+ "step": 117
1883
+ },
1884
+ {
1885
+ "completions/clipped_ratio": 0.0,
1886
+ "completions/max_length": 768.0,
1887
+ "completions/mean_length": 299.8511962890625,
1888
+ "completions/min_length": 199.0,
1889
+ "epoch": 0.24789915966386555,
1890
+ "frac_reward_zero_std": 0.0,
1891
+ "grad_norm": 0.023624878887809193,
1892
+ "learning_rate": 4.2793956012249277e-07,
1893
+ "loss": -7.4062320720713615e-09,
1894
+ "reward": 0.717814564704895,
1895
+ "reward_std": 0.1896313577890396,
1896
+ "rewards/Format/mean": 0.717814564704895,
1897
+ "rewards/Format/std": 0.36752453446388245,
1898
+ "step": 118
1899
+ },
1900
+ {
1901
+ "completions/clipped_ratio": 0.0,
1902
+ "completions/max_length": 771.0,
1903
+ "completions/mean_length": 327.69940185546875,
1904
+ "completions/min_length": 199.0,
1905
+ "epoch": 0.25,
1906
+ "frac_reward_zero_std": 0.0,
1907
+ "grad_norm": 0.021914172622371157,
1908
+ "learning_rate": 4.2677669529663686e-07,
1909
+ "loss": -4.623352367616462e-09,
1910
+ "reward": 0.7293979525566101,
1911
+ "reward_std": 0.22163952887058258,
1912
+ "rewards/Format/mean": 0.7293978929519653,
1913
+ "rewards/Format/std": 0.42033547163009644,
1914
+ "step": 119
1915
+ },
1916
+ {
1917
+ "completions/clipped_ratio": 0.0,
1918
+ "completions/max_length": 768.0,
1919
+ "completions/mean_length": 306.44940185546875,
1920
+ "completions/min_length": 159.0,
1921
+ "epoch": 0.25210084033613445,
1922
+ "frac_reward_zero_std": 0.0,
1923
+ "grad_norm": 0.021303999540729524,
1924
+ "learning_rate": 4.256061301418996e-07,
1925
+ "loss": 1.514563336968422e-07,
1926
+ "reward": 0.7895549535751343,
1927
+ "reward_std": 0.1913151741027832,
1928
+ "rewards/Format/mean": 0.7895549535751343,
1929
+ "rewards/Format/std": 0.4348748028278351,
1930
+ "step": 120
1931
+ },
1932
+ {
1933
+ "completions/clipped_ratio": 0.0,
1934
+ "completions/max_length": 768.0,
1935
+ "completions/mean_length": 298.8452453613281,
1936
+ "completions/min_length": 198.0,
1937
+ "epoch": 0.2542016806722689,
1938
+ "frac_reward_zero_std": 0.0,
1939
+ "grad_norm": 0.02319407730887222,
1940
+ "learning_rate": 4.2442791564767554e-07,
1941
+ "loss": -8.02489665829853e-08,
1942
+ "reward": 0.6953231692314148,
1943
+ "reward_std": 0.21529938280582428,
1944
+ "rewards/Format/mean": 0.69532310962677,
1945
+ "rewards/Format/std": 0.4057838022708893,
1946
+ "step": 121
1947
+ },
1948
+ {
1949
+ "completions/clipped_ratio": 0.0,
1950
+ "completions/max_length": 1342.0,
1951
+ "completions/mean_length": 324.2202453613281,
1952
+ "completions/min_length": 198.0,
1953
+ "epoch": 0.25630252100840334,
1954
+ "frac_reward_zero_std": 0.0,
1955
+ "grad_norm": 0.02263642851378137,
1956
+ "learning_rate": 4.232421031365617e-07,
1957
+ "loss": -1.7530207685467758e-07,
1958
+ "reward": 0.7791425585746765,
1959
+ "reward_std": 0.2066822499036789,
1960
+ "rewards/Format/mean": 0.7791424989700317,
1961
+ "rewards/Format/std": 0.4412955343723297,
1962
+ "step": 122
1963
+ },
1964
+ {
1965
+ "completions/clipped_ratio": 0.0,
1966
+ "completions/max_length": 768.0,
1967
+ "completions/mean_length": 323.0684509277344,
1968
+ "completions/min_length": 198.0,
1969
+ "epoch": 0.25840336134453784,
1970
+ "frac_reward_zero_std": 0.0,
1971
+ "grad_norm": 0.02339538076080322,
1972
+ "learning_rate": 4.2204874426212196e-07,
1973
+ "loss": 6.678359198986072e-08,
1974
+ "reward": 0.742396891117096,
1975
+ "reward_std": 0.23194286227226257,
1976
+ "rewards/Format/mean": 0.7423968315124512,
1977
+ "rewards/Format/std": 0.3980790078639984,
1978
+ "step": 123
1979
+ },
1980
+ {
1981
+ "completions/clipped_ratio": 0.0,
1982
+ "completions/max_length": 768.0,
1983
+ "completions/mean_length": 332.5952453613281,
1984
+ "completions/min_length": 199.0,
1985
+ "epoch": 0.2605042016806723,
1986
+ "frac_reward_zero_std": 0.0,
1987
+ "grad_norm": 0.020899420916059255,
1988
+ "learning_rate": 4.2084789100663707e-07,
1989
+ "loss": -6.004813712934265e-08,
1990
+ "reward": 0.7840648889541626,
1991
+ "reward_std": 0.2170037180185318,
1992
+ "rewards/Format/mean": 0.7840649485588074,
1993
+ "rewards/Format/std": 0.4444384276866913,
1994
+ "step": 124
1995
+ },
1996
+ {
1997
+ "completions/clipped_ratio": 0.0,
1998
+ "completions/max_length": 768.0,
1999
+ "completions/mean_length": 353.702392578125,
2000
+ "completions/min_length": 159.0,
2001
+ "epoch": 0.26260504201680673,
2002
+ "frac_reward_zero_std": 0.0,
2003
+ "grad_norm": 0.021737039586716876,
2004
+ "learning_rate": 4.1963959567884045e-07,
2005
+ "loss": 1.9370345398783684e-06,
2006
+ "reward": 0.7687969207763672,
2007
+ "reward_std": 0.1971704214811325,
2008
+ "rewards/Format/mean": 0.7687969207763672,
2009
+ "rewards/Format/std": 0.4055511951446533,
2010
+ "step": 125
2011
+ },
2012
+ {
2013
+ "completions/clipped_ratio": 0.0,
2014
+ "completions/max_length": 768.0,
2015
+ "completions/mean_length": 352.6160888671875,
2016
+ "completions/min_length": 199.0,
2017
+ "epoch": 0.2647058823529412,
2018
+ "frac_reward_zero_std": 0.0,
2019
+ "grad_norm": 0.019568313468217944,
2020
+ "learning_rate": 4.1842391091163933e-07,
2021
+ "loss": 1.0647151782450237e-07,
2022
+ "reward": 0.6903642416000366,
2023
+ "reward_std": 0.21716326475143433,
2024
+ "rewards/Format/mean": 0.6903641819953918,
2025
+ "rewards/Format/std": 0.4223073422908783,
2026
+ "step": 126
2027
+ },
2028
+ {
2029
+ "completions/clipped_ratio": 0.0,
2030
+ "completions/max_length": 1506.0,
2031
+ "completions/mean_length": 346.1964416503906,
2032
+ "completions/min_length": 198.0,
2033
+ "epoch": 0.2668067226890756,
2034
+ "frac_reward_zero_std": 0.0,
2035
+ "grad_norm": 0.021781078759713437,
2036
+ "learning_rate": 4.172008896598221e-07,
2037
+ "loss": -1.932494342327118e-08,
2038
+ "reward": 0.6439427137374878,
2039
+ "reward_std": 0.23764324188232422,
2040
+ "rewards/Format/mean": 0.6439427137374878,
2041
+ "rewards/Format/std": 0.35776689648628235,
2042
+ "step": 127
2043
+ },
2044
+ {
2045
+ "completions/clipped_ratio": 0.0,
2046
+ "completions/max_length": 768.0,
2047
+ "completions/mean_length": 311.27679443359375,
2048
+ "completions/min_length": 198.0,
2049
+ "epoch": 0.2689075630252101,
2050
+ "frac_reward_zero_std": 0.0,
2051
+ "grad_norm": 0.020675785779927288,
2052
+ "learning_rate": 4.1597058519775206e-07,
2053
+ "loss": 2.434050472288618e-08,
2054
+ "reward": 0.762788712978363,
2055
+ "reward_std": 0.20824600756168365,
2056
+ "rewards/Format/mean": 0.7627886533737183,
2057
+ "rewards/Format/std": 0.4559004008769989,
2058
+ "step": 128
2059
+ },
2060
+ {
2061
+ "completions/clipped_ratio": 0.0,
2062
+ "completions/max_length": 686.0,
2063
+ "completions/mean_length": 286.1577453613281,
2064
+ "completions/min_length": 198.0,
2065
+ "epoch": 0.2710084033613445,
2066
+ "frac_reward_zero_std": 0.0,
2067
+ "grad_norm": 0.022676520851550745,
2068
+ "learning_rate": 4.1473305111704647e-07,
2069
+ "loss": -4.1715492171690016e-10,
2070
+ "reward": 0.7366788387298584,
2071
+ "reward_std": 0.2162959724664688,
2072
+ "rewards/Format/mean": 0.7366787791252136,
2073
+ "rewards/Format/std": 0.4161640703678131,
2074
+ "step": 129
2075
+ },
2076
+ {
2077
+ "completions/clipped_ratio": 0.0,
2078
+ "completions/max_length": 1342.0,
2079
+ "completions/mean_length": 344.3035888671875,
2080
+ "completions/min_length": 199.0,
2081
+ "epoch": 0.27310924369747897,
2082
+ "frac_reward_zero_std": 0.0,
2083
+ "grad_norm": 0.021195441506469768,
2084
+ "learning_rate": 4.1348834132424204e-07,
2085
+ "loss": -2.9957544001035785e-08,
2086
+ "reward": 0.6983749866485596,
2087
+ "reward_std": 0.24195848405361176,
2088
+ "rewards/Format/mean": 0.6983750462532043,
2089
+ "rewards/Format/std": 0.4164566993713379,
2090
+ "step": 130
2091
+ },
2092
+ {
2093
+ "completions/clipped_ratio": 0.0,
2094
+ "completions/max_length": 768.0,
2095
+ "completions/mean_length": 294.2172546386719,
2096
+ "completions/min_length": 199.0,
2097
+ "epoch": 0.27521008403361347,
2098
+ "frac_reward_zero_std": 0.0,
2099
+ "grad_norm": 0.022531382850310606,
2100
+ "learning_rate": 4.1223651003844686e-07,
2101
+ "loss": 1.3135529108865285e-08,
2102
+ "reward": 0.7773038148880005,
2103
+ "reward_std": 0.2455342710018158,
2104
+ "rewards/Format/mean": 0.7773038148880005,
2105
+ "rewards/Format/std": 0.422204852104187,
2106
+ "step": 131
2107
+ },
2108
+ {
2109
+ "completions/clipped_ratio": 0.0,
2110
+ "completions/max_length": 768.0,
2111
+ "completions/mean_length": 297.6845397949219,
2112
+ "completions/min_length": 159.0,
2113
+ "epoch": 0.2773109243697479,
2114
+ "frac_reward_zero_std": 0.0,
2115
+ "grad_norm": 0.022882891514577307,
2116
+ "learning_rate": 4.109776117889789e-07,
2117
+ "loss": -2.2817403078079224e-08,
2118
+ "reward": 0.6837427616119385,
2119
+ "reward_std": 0.23065118491649628,
2120
+ "rewards/Format/mean": 0.6837427020072937,
2121
+ "rewards/Format/std": 0.3885253667831421,
2122
+ "step": 132
2123
+ },
2124
+ {
2125
+ "completions/clipped_ratio": 0.0,
2126
+ "completions/max_length": 769.0,
2127
+ "completions/mean_length": 377.06549072265625,
2128
+ "completions/min_length": 198.0,
2129
+ "epoch": 0.27941176470588236,
2130
+ "frac_reward_zero_std": 0.0,
2131
+ "grad_norm": 0.02032169465392032,
2132
+ "learning_rate": 4.097117014129903e-07,
2133
+ "loss": 5.8052442852840613e-08,
2134
+ "reward": 0.7971833944320679,
2135
+ "reward_std": 0.20347455143928528,
2136
+ "rewards/Format/mean": 0.7971833348274231,
2137
+ "rewards/Format/std": 0.41090497374534607,
2138
+ "step": 133
2139
+ },
2140
+ {
2141
+ "completions/clipped_ratio": 0.0,
2142
+ "completions/max_length": 768.0,
2143
+ "completions/mean_length": 260.139892578125,
2144
+ "completions/min_length": 120.0,
2145
+ "epoch": 0.2815126050420168,
2146
+ "frac_reward_zero_std": 0.0,
2147
+ "grad_norm": 0.023671255564906197,
2148
+ "learning_rate": 4.0843883405307903e-07,
2149
+ "loss": 5.122274160385132e-09,
2150
+ "reward": 0.7170312404632568,
2151
+ "reward_std": 0.2006462663412094,
2152
+ "rewards/Format/mean": 0.7170313000679016,
2153
+ "rewards/Format/std": 0.3871210813522339,
2154
+ "step": 134
2155
+ },
2156
+ {
2157
+ "completions/clipped_ratio": 0.0,
2158
+ "completions/max_length": 768.0,
2159
+ "completions/mean_length": 301.7440490722656,
2160
+ "completions/min_length": 194.0,
2161
+ "epoch": 0.28361344537815125,
2162
+ "frac_reward_zero_std": 0.0,
2163
+ "grad_norm": 0.02260828517741192,
2164
+ "learning_rate": 4.071590651548867e-07,
2165
+ "loss": -6.402842700481415e-08,
2166
+ "reward": 0.7180067896842957,
2167
+ "reward_std": 0.20798729360103607,
2168
+ "rewards/Format/mean": 0.7180067896842957,
2169
+ "rewards/Format/std": 0.4340338706970215,
2170
+ "step": 135
2171
+ },
2172
+ {
2173
+ "completions/clipped_ratio": 0.0,
2174
+ "completions/max_length": 768.0,
2175
+ "completions/mean_length": 343.1934509277344,
2176
+ "completions/min_length": 198.0,
2177
+ "epoch": 0.2857142857142857,
2178
+ "frac_reward_zero_std": 0.0,
2179
+ "grad_norm": 0.021561434357882863,
2180
+ "learning_rate": 4.058724504646834e-07,
2181
+ "loss": -1.5360032534772472e-07,
2182
+ "reward": 0.7687699794769287,
2183
+ "reward_std": 0.1734752207994461,
2184
+ "rewards/Format/mean": 0.7687699198722839,
2185
+ "rewards/Format/std": 0.43927615880966187,
2186
+ "step": 136
2187
+ },
2188
+ {
2189
+ "completions/clipped_ratio": 0.0,
2190
+ "completions/max_length": 768.0,
2191
+ "completions/mean_length": 277.6339416503906,
2192
+ "completions/min_length": 120.0,
2193
+ "epoch": 0.28781512605042014,
2194
+ "frac_reward_zero_std": 0.0,
2195
+ "grad_norm": 0.02332708832741251,
2196
+ "learning_rate": 4.045790460269395e-07,
2197
+ "loss": -1.370984534787567e-07,
2198
+ "reward": 0.792543351650238,
2199
+ "reward_std": 0.18136709928512573,
2200
+ "rewards/Format/mean": 0.7925432920455933,
2201
+ "rewards/Format/std": 0.41060832142829895,
2202
+ "step": 137
2203
+ },
2204
+ {
2205
+ "completions/clipped_ratio": 0.0,
2206
+ "completions/max_length": 768.0,
2207
+ "completions/mean_length": 300.5863037109375,
2208
+ "completions/min_length": 199.0,
2209
+ "epoch": 0.28991596638655465,
2210
+ "frac_reward_zero_std": 0.0,
2211
+ "grad_norm": 0.02129516530146727,
2212
+ "learning_rate": 4.0327890818188424e-07,
2213
+ "loss": -1.843242714016924e-08,
2214
+ "reward": 0.7091884016990662,
2215
+ "reward_std": 0.22365686297416687,
2216
+ "rewards/Format/mean": 0.7091883420944214,
2217
+ "rewards/Format/std": 0.4319912791252136,
2218
+ "step": 138
2219
+ },
2220
+ {
2221
+ "completions/clipped_ratio": 0.0,
2222
+ "completions/max_length": 768.0,
2223
+ "completions/mean_length": 334.5208435058594,
2224
+ "completions/min_length": 198.0,
2225
+ "epoch": 0.2920168067226891,
2226
+ "frac_reward_zero_std": 0.0,
2227
+ "grad_norm": 0.023272411214161765,
2228
+ "learning_rate": 4.019720935630518e-07,
2229
+ "loss": -9.972913161959696e-09,
2230
+ "reward": 0.7660070061683655,
2231
+ "reward_std": 0.23378267884254456,
2232
+ "rewards/Format/mean": 0.7660070061683655,
2233
+ "rewards/Format/std": 0.38249534368515015,
2234
+ "step": 139
2235
+ },
2236
+ {
2237
+ "completions/clipped_ratio": 0.0,
2238
+ "completions/max_length": 768.0,
2239
+ "completions/mean_length": 302.8333435058594,
2240
+ "completions/min_length": 199.0,
2241
+ "epoch": 0.29411764705882354,
2242
+ "frac_reward_zero_std": 0.0,
2243
+ "grad_norm": 0.021849733731665922,
2244
+ "learning_rate": 4.006586590948141e-07,
2245
+ "loss": 1.9387032068607368e-07,
2246
+ "reward": 0.67587810754776,
2247
+ "reward_std": 0.21087896823883057,
2248
+ "rewards/Format/mean": 0.6758780479431152,
2249
+ "rewards/Format/std": 0.4165256917476654,
2250
+ "step": 140
2251
+ },
2252
+ {
2253
+ "completions/clipped_ratio": 0.0,
2254
+ "completions/max_length": 768.0,
2255
+ "completions/mean_length": 332.6309509277344,
2256
+ "completions/min_length": 198.0,
2257
+ "epoch": 0.296218487394958,
2258
+ "frac_reward_zero_std": 0.0,
2259
+ "grad_norm": 0.020255242809123063,
2260
+ "learning_rate": 3.993386619899013e-07,
2261
+ "loss": -1.5474368808554573e-07,
2262
+ "reward": 0.7691038250923157,
2263
+ "reward_std": 0.1774113029241562,
2264
+ "rewards/Format/mean": 0.7691037058830261,
2265
+ "rewards/Format/std": 0.4117504358291626,
2266
+ "step": 141
2267
+ },
2268
+ {
2269
+ "completions/clipped_ratio": 0.0,
2270
+ "completions/max_length": 850.0,
2271
+ "completions/mean_length": 326.6458435058594,
2272
+ "completions/min_length": 199.0,
2273
+ "epoch": 0.29831932773109243,
2274
+ "frac_reward_zero_std": 0.0,
2275
+ "grad_norm": 0.02122102709293986,
2276
+ "learning_rate": 3.980121597469095e-07,
2277
+ "loss": -2.9385555535554886e-06,
2278
+ "reward": 0.7644386291503906,
2279
+ "reward_std": 0.19999758899211884,
2280
+ "rewards/Format/mean": 0.7644386291503906,
2281
+ "rewards/Format/std": 0.4267158508300781,
2282
+ "step": 142
2283
+ },
2284
+ {
2285
+ "completions/clipped_ratio": 0.0,
2286
+ "completions/max_length": 521.0,
2287
+ "completions/mean_length": 238.9791717529297,
2288
+ "completions/min_length": 198.0,
2289
+ "epoch": 0.3004201680672269,
2290
+ "frac_reward_zero_std": 0.0,
2291
+ "grad_norm": 0.02396484053358148,
2292
+ "learning_rate": 3.966792101477967e-07,
2293
+ "loss": -5.01594854540599e-07,
2294
+ "reward": 0.7725779414176941,
2295
+ "reward_std": 0.22164370119571686,
2296
+ "rewards/Format/mean": 0.7725780010223389,
2297
+ "rewards/Format/std": 0.4088430106639862,
2298
+ "step": 143
2299
+ },
2300
+ {
2301
+ "completions/clipped_ratio": 0.0,
2302
+ "completions/max_length": 768.0,
2303
+ "completions/mean_length": 338.3958435058594,
2304
+ "completions/min_length": 159.0,
2305
+ "epoch": 0.3025210084033613,
2306
+ "frac_reward_zero_std": 0.0,
2307
+ "grad_norm": 0.020618657919954396,
2308
+ "learning_rate": 3.953398712553649e-07,
2309
+ "loss": 7.453907358012657e-08,
2310
+ "reward": 0.7254589200019836,
2311
+ "reward_std": 0.1895904392004013,
2312
+ "rewards/Format/mean": 0.7254589200019836,
2313
+ "rewards/Format/std": 0.398413747549057,
2314
+ "step": 144
2315
+ },
2316
+ {
2317
+ "completions/clipped_ratio": 0.0,
2318
+ "completions/max_length": 768.0,
2319
+ "completions/mean_length": 307.577392578125,
2320
+ "completions/min_length": 199.0,
2321
+ "epoch": 0.30462184873949577,
2322
+ "frac_reward_zero_std": 0.0,
2323
+ "grad_norm": 0.02247793403852773,
2324
+ "learning_rate": 3.939942014107318e-07,
2325
+ "loss": 2.7724308893084526e-07,
2326
+ "reward": 0.8555787801742554,
2327
+ "reward_std": 0.2160158008337021,
2328
+ "rewards/Format/mean": 0.8555786609649658,
2329
+ "rewards/Format/std": 0.42867380380630493,
2330
+ "step": 145
2331
+ },
2332
+ {
2333
+ "completions/clipped_ratio": 0.0,
2334
+ "completions/max_length": 1342.0,
2335
+ "completions/mean_length": 313.4434509277344,
2336
+ "completions/min_length": 199.0,
2337
+ "epoch": 0.3067226890756303,
2338
+ "frac_reward_zero_std": 0.0,
2339
+ "grad_norm": 0.021747669825807746,
2340
+ "learning_rate": 3.9264225923078873e-07,
2341
+ "loss": 2.951516648863617e-07,
2342
+ "reward": 0.735656201839447,
2343
+ "reward_std": 0.22219358384609222,
2344
+ "rewards/Format/mean": 0.735656201839447,
2345
+ "rewards/Format/std": 0.4243871569633484,
2346
+ "step": 146
2347
+ },
2348
+ {
2349
+ "completions/clipped_ratio": 0.0,
2350
+ "completions/max_length": 769.0,
2351
+ "completions/mean_length": 315.4285888671875,
2352
+ "completions/min_length": 199.0,
2353
+ "epoch": 0.3088235294117647,
2354
+ "frac_reward_zero_std": 0.0,
2355
+ "grad_norm": 0.023099620413964204,
2356
+ "learning_rate": 3.9128410360564793e-07,
2357
+ "loss": 1.2012703791697277e-06,
2358
+ "reward": 0.7941672205924988,
2359
+ "reward_std": 0.2046356201171875,
2360
+ "rewards/Format/mean": 0.7941672801971436,
2361
+ "rewards/Format/std": 0.415115088224411,
2362
+ "step": 147
2363
+ },
2364
+ {
2365
+ "completions/clipped_ratio": 0.0,
2366
+ "completions/max_length": 768.0,
2367
+ "completions/mean_length": 369.1458435058594,
2368
+ "completions/min_length": 199.0,
2369
+ "epoch": 0.31092436974789917,
2370
+ "frac_reward_zero_std": 0.0,
2371
+ "grad_norm": 0.019600667526561574,
2372
+ "learning_rate": 3.8991979369607704e-07,
2373
+ "loss": -5.145557224750519e-08,
2374
+ "reward": 0.7482826709747314,
2375
+ "reward_std": 0.2349366545677185,
2376
+ "rewards/Format/mean": 0.7482826709747314,
2377
+ "rewards/Format/std": 0.459494411945343,
2378
+ "step": 148
2379
+ },
2380
+ {
2381
+ "completions/clipped_ratio": 0.0,
2382
+ "completions/max_length": 768.0,
2383
+ "completions/mean_length": 315.2053527832031,
2384
+ "completions/min_length": 198.0,
2385
+ "epoch": 0.3130252100840336,
2386
+ "frac_reward_zero_std": 0.0,
2387
+ "grad_norm": 0.021300485390984402,
2388
+ "learning_rate": 3.88549388930922e-07,
2389
+ "loss": -2.4940597143086052e-08,
2390
+ "reward": 0.7685186266899109,
2391
+ "reward_std": 0.21305865049362183,
2392
+ "rewards/Format/mean": 0.7685186266899109,
2393
+ "rewards/Format/std": 0.44278663396835327,
2394
+ "step": 149
2395
+ },
2396
+ {
2397
+ "completions/clipped_ratio": 0.0,
2398
+ "completions/max_length": 768.0,
2399
+ "completions/mean_length": 291.1964416503906,
2400
+ "completions/min_length": 199.0,
2401
+ "epoch": 0.31512605042016806,
2402
+ "frac_reward_zero_std": 0.0,
2403
+ "grad_norm": 0.02244035610136545,
2404
+ "learning_rate": 3.871729490045185e-07,
2405
+ "loss": -4.181250545798321e-08,
2406
+ "reward": 0.7938781976699829,
2407
+ "reward_std": 0.20275752246379852,
2408
+ "rewards/Format/mean": 0.7938780784606934,
2409
+ "rewards/Format/std": 0.3901706039905548,
2410
+ "step": 150
2411
+ },
2412
+ {
2413
+ "completions/clipped_ratio": 0.0,
2414
+ "completions/max_length": 768.0,
2415
+ "completions/mean_length": 316.139892578125,
2416
+ "completions/min_length": 199.0,
2417
+ "epoch": 0.3172268907563025,
2418
+ "frac_reward_zero_std": 0.0,
2419
+ "grad_norm": 0.022827898878748757,
2420
+ "learning_rate": 3.8579053387409167e-07,
2421
+ "loss": -1.7547669983741798e-07,
2422
+ "reward": 0.6709676384925842,
2423
+ "reward_std": 0.17893125116825104,
2424
+ "rewards/Format/mean": 0.6709675788879395,
2425
+ "rewards/Format/std": 0.3869513273239136,
2426
+ "step": 151
2427
+ },
2428
+ {
2429
+ "completions/clipped_ratio": 0.0,
2430
+ "completions/max_length": 768.0,
2431
+ "completions/mean_length": 292.16668701171875,
2432
+ "completions/min_length": 198.0,
2433
+ "epoch": 0.31932773109243695,
2434
+ "frac_reward_zero_std": 0.0,
2435
+ "grad_norm": 0.0237070906609801,
2436
+ "learning_rate": 3.8440220375714435e-07,
2437
+ "loss": 1.8810412427683332e-07,
2438
+ "reward": 0.7782310247421265,
2439
+ "reward_std": 0.24187692999839783,
2440
+ "rewards/Format/mean": 0.7782310247421265,
2441
+ "rewards/Format/std": 0.4222787022590637,
2442
+ "step": 152
2443
+ },
2444
+ {
2445
+ "completions/clipped_ratio": 0.0,
2446
+ "completions/max_length": 1342.0,
2447
+ "completions/mean_length": 385.75,
2448
+ "completions/min_length": 199.0,
2449
+ "epoch": 0.32142857142857145,
2450
+ "frac_reward_zero_std": 0.0,
2451
+ "grad_norm": 0.019384474178349216,
2452
+ "learning_rate": 3.8300801912883414e-07,
2453
+ "loss": 4.4517219066619873e-07,
2454
+ "reward": 0.7122718691825867,
2455
+ "reward_std": 0.21986432373523712,
2456
+ "rewards/Format/mean": 0.7122718095779419,
2457
+ "rewards/Format/std": 0.37839367985725403,
2458
+ "step": 153
2459
+ },
2460
+ {
2461
+ "completions/clipped_ratio": 0.0,
2462
+ "completions/max_length": 768.0,
2463
+ "completions/mean_length": 347.327392578125,
2464
+ "completions/min_length": 199.0,
2465
+ "epoch": 0.3235294117647059,
2466
+ "frac_reward_zero_std": 0.0,
2467
+ "grad_norm": 0.02158057802527968,
2468
+ "learning_rate": 3.8160804071933894e-07,
2469
+ "loss": 2.33606751720572e-08,
2470
+ "reward": 0.7716006636619568,
2471
+ "reward_std": 0.22200366854667664,
2472
+ "rewards/Format/mean": 0.771600604057312,
2473
+ "rewards/Format/std": 0.41280749440193176,
2474
+ "step": 154
2475
+ },
2476
+ {
2477
+ "completions/clipped_ratio": 0.0,
2478
+ "completions/max_length": 1506.0,
2479
+ "completions/mean_length": 363.7053527832031,
2480
+ "completions/min_length": 199.0,
2481
+ "epoch": 0.32563025210084034,
2482
+ "frac_reward_zero_std": 0.0,
2483
+ "grad_norm": 0.02108699741197977,
2484
+ "learning_rate": 3.8020232951121166e-07,
2485
+ "loss": 2.332741289023943e-08,
2486
+ "reward": 0.6571605801582336,
2487
+ "reward_std": 0.19497917592525482,
2488
+ "rewards/Format/mean": 0.6571606397628784,
2489
+ "rewards/Format/std": 0.42475777864456177,
2490
+ "step": 155
2491
+ },
2492
+ {
2493
+ "completions/clipped_ratio": 0.0,
2494
+ "completions/max_length": 768.0,
2495
+ "completions/mean_length": 311.125,
2496
+ "completions/min_length": 199.0,
2497
+ "epoch": 0.3277310924369748,
2498
+ "frac_reward_zero_std": 0.0,
2499
+ "grad_norm": 0.022373293491625736,
2500
+ "learning_rate": 3.7879094673672396e-07,
2501
+ "loss": 9.565459535565424e-09,
2502
+ "reward": 0.6847943663597107,
2503
+ "reward_std": 0.22381485998630524,
2504
+ "rewards/Format/mean": 0.6847943067550659,
2505
+ "rewards/Format/std": 0.3858049511909485,
2506
+ "step": 156
2507
+ },
2508
+ {
2509
+ "completions/clipped_ratio": 0.0,
2510
+ "completions/max_length": 768.0,
2511
+ "completions/mean_length": 294.9226379394531,
2512
+ "completions/min_length": 198.0,
2513
+ "epoch": 0.32983193277310924,
2514
+ "frac_reward_zero_std": 0.0,
2515
+ "grad_norm": 0.022319290745013702,
2516
+ "learning_rate": 3.773739538751988e-07,
2517
+ "loss": -1.637575586244111e-08,
2518
+ "reward": 0.649658203125,
2519
+ "reward_std": 0.21089015901088715,
2520
+ "rewards/Format/mean": 0.6496582627296448,
2521
+ "rewards/Format/std": 0.41071802377700806,
2522
+ "step": 157
2523
+ },
2524
+ {
2525
+ "completions/clipped_ratio": 0.0,
2526
+ "completions/max_length": 768.0,
2527
+ "completions/mean_length": 335.27679443359375,
2528
+ "completions/min_length": 198.0,
2529
+ "epoch": 0.3319327731092437,
2530
+ "frac_reward_zero_std": 0.0,
2531
+ "grad_norm": 0.02023267713284172,
2532
+ "learning_rate": 3.759514126503324e-07,
2533
+ "loss": -5.4147676564753056e-08,
2534
+ "reward": 0.7150367498397827,
2535
+ "reward_std": 0.19841539859771729,
2536
+ "rewards/Format/mean": 0.7150366902351379,
2537
+ "rewards/Format/std": 0.40306705236434937,
2538
+ "step": 158
2539
+ },
2540
+ {
2541
+ "completions/clipped_ratio": 0.0,
2542
+ "completions/max_length": 768.0,
2543
+ "completions/mean_length": 282.11309814453125,
2544
+ "completions/min_length": 198.0,
2545
+ "epoch": 0.33403361344537813,
2546
+ "frac_reward_zero_std": 0.0,
2547
+ "grad_norm": 0.022689317429528014,
2548
+ "learning_rate": 3.745233850275058e-07,
2549
+ "loss": 2.4428009055554867e-06,
2550
+ "reward": 0.8255599141120911,
2551
+ "reward_std": 0.17811521887779236,
2552
+ "rewards/Format/mean": 0.8255599141120911,
2553
+ "rewards/Format/std": 0.44584164023399353,
2554
+ "step": 159
2555
+ },
2556
+ {
2557
+ "completions/clipped_ratio": 0.0,
2558
+ "completions/max_length": 768.0,
2559
+ "completions/mean_length": 319.94940185546875,
2560
+ "completions/min_length": 198.0,
2561
+ "epoch": 0.33613445378151263,
2562
+ "frac_reward_zero_std": 0.0,
2563
+ "grad_norm": 0.019830774041192913,
2564
+ "learning_rate": 3.730899332110855e-07,
2565
+ "loss": -1.4872490794459736e-07,
2566
+ "reward": 0.7067343592643738,
2567
+ "reward_std": 0.23453274369239807,
2568
+ "rewards/Format/mean": 0.7067343592643738,
2569
+ "rewards/Format/std": 0.3960113227367401,
2570
+ "step": 160
2571
+ }
2572
+ ],
2573
+ "logging_steps": 1,
2574
+ "max_steps": 476,
2575
+ "num_input_tokens_seen": 0,
2576
+ "num_train_epochs": 1,
2577
+ "save_steps": 10,
2578
+ "stateful_callbacks": {
2579
+ "TrainerControl": {
2580
+ "args": {
2581
+ "should_epoch_stop": false,
2582
+ "should_evaluate": false,
2583
+ "should_log": false,
2584
+ "should_save": true,
2585
+ "should_training_stop": false
2586
+ },
2587
+ "attributes": {}
2588
+ }
2589
+ },
2590
+ "total_flos": 0.0,
2591
+ "train_batch_size": 14,
2592
+ "trial_name": null,
2593
+ "trial_params": null
2594
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3c236fc4b17dcf9de63759589a576fe6b1d98b45f385ae479ad12c7e843e8bc
3
+ size 12689
vocab.json ADDED
The diff for this file is too large to render. See raw diff