run: run_dir: ./runs/instruct_run_24b seed: 42 wandb: enabled: true project: sft-training entity: null name: null tags: - sft-lora - 24b-Devstral notes: null model: repo_id: ./CPT/runs/cpt_run_v1/merged_24b_cpt_lora revision: null base_local_dir: base_model trust_remote_code: true tokenizer_use_fast: true device_map: auto torch_dtype: bfloat16 use_4bit: false bnb_4bit_quant_type: nf4 bnb_4bit_use_double_quant: false bnb_4bit_compute_dtype: bfloat16 attn_implementation: null data: train_jsonl: ../sft_dataset.jsonl eval_jsonl: null eval_split_ratio: 0.1 instruction_field: instruction input_field: input output_field: output format_type: custom system_prompt: "You are a Hyperswitch Rust code analyzer. Identify functions/structs\ \ that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain\ \ the data flow and why each component must change:\n- Flow: [Input \u2192 Processing\ \ \u2192 Output with arrows]\n- For each component: \"The [ComponentName] ([path])\ \ must [action] because [reason]\u2014without this, [consequence]\"\n- Explain\ \ coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\n\ add::crates/another/file.rs::function::AnotherComponent\n\n\n## Rules\n\n\ 1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for\ \ nested items: `status::StructName::Type::Name`\n3. Always explain \"must change\ \ because\" and \"without this\"\n3. Types of components: function, struct, enum,\ \ impl, trait\n4. If there is extra information (e.g., enum variants), include\ \ that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with \n\ \n## Example\n\n##TASK\nAdd webhook subscription support\n\n##OUTPUT\nThe webhook\ \ system routes events via EventClass enum. Flow: webhook \u2192 EventClass \u2192\ \ handler \u2192 processing. The EventClass enum (crates/common_enums/src/enums.rs::EventClass)\ \ must add Subscriptions variant because it defines event routing\u2014without\ \ this, subscription events cannot be processed. The SubscriptionStatus impl (crates/common_enums/src/transformers.rs::SubscriptionStatus)\ \ must map to EventType because it converts status to events\u2014without this,\ \ status changes don't trigger webhooks. These are coupled: EventClass routes\ \ to handlers that use SubscriptionStatus mappings.\n\n##SELECT\ncrates/common_enums/src/enums.rs::EventClass\n\ crates/common_enums/src/transformers.rs::SubscriptionStatus\n\n" custom_template: '##INSTRUCTION {instruction}<|im_end|> ##TASK {input}<|im_end|> ##OUTPUT {output}<|im_end|>' max_length: 2048 shuffle: true num_proc: 4 peft: enabled: true r: 8 lora_alpha: 16 lora_dropout: 0.05 bias: none target_modules: auto train: num_train_epochs: 6 per_device_train_batch_size: 1 per_device_eval_batch_size: 1 gradient_accumulation_steps: 8 learning_rate: 1e-4 weight_decay: 0.0 warmup_ratio: 0.08 lr_scheduler_type: cosine optim: adamw_torch max_grad_norm: 0.8 gradient_checkpointing: true logging_steps: 2 save_strategy: steps save_steps: 500 save_total_limit: 20 evaluation_strategy: steps eval_steps: 100 load_best_model_at_end: true early_stopping: enabled: true patience: 3 min_delta: 0.001 metric: eval_loss mode: min resume_from_checkpoint: auto merge: enabled: true merged_dtype: float16 max_shard_size: 2GB output_dir: ./merged_24b_instruct_lora