diff --git a/lisa-ivl3-2b_bi2cbe_aati_srm/ckpt_model/config.json b/lisa-ivl3-2b_bi2cbe_aati_srm/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_aati_srm/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_bi2cbe_aati_srm/ckpt_model/model.safetensors b/lisa-ivl3-2b_bi2cbe_aati_srm/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..513791f28a4388c67e67682dfba4b39bba173651 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_aati_srm/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f3befb636a4bfdc7f407e8ec01f5b84a2869952dea6106070ab00f7d6b760ef +size 4211070232 diff --git a/lisa-ivl3-2b_bi2cbe_aati_srm/ckpt_model/training_args.bin b/lisa-ivl3-2b_bi2cbe_aati_srm/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4af699f9615dfd626c4a85f9df1225e08a922360 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_aati_srm/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:240c4be74f4b416addef8e472ffd17a8f0a9206792bbe14eb3617a6736ec132e +size 7352 diff --git a/lisa-ivl3-2b_bi2cbe_aati_srm/evaluation_metrics.json b/lisa-ivl3-2b_bi2cbe_aati_srm/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..911007f147de04f2052e959ed723fb1f0e25df5d --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_aati_srm/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.528189480304718, + "eval_ciou": 0.6158800721168518 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.57504802942276, + "eval_ciou": 0.6519048810005188 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5494521856307983, + "eval_ciou": 0.6140078902244568 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5751751065254211, + "eval_ciou": 0.6430273652076721 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.5621751546859741, + "eval_ciou": 0.6091215014457703 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.5681710243225098, + "eval_ciou": 0.5827724933624268 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.5692390203475952, + "eval_ciou": 0.5854980945587158 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.5853511095046997, + "eval_ciou": 0.5549483895301819 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.5778804421424866, + "eval_ciou": 0.5894233584403992 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.5841119885444641, + "eval_ciou": 0.5798441171646118 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.6100905537605286, + "eval_ciou": 0.6119125485420227 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.799595832824707, + "eval_ciou": 0.8027365207672119 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8168815970420837, + "eval_ciou": 0.8243600130081177 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7745703458786011, + "eval_ciou": 0.7807985544204712 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7551393508911133, + "eval_ciou": 0.7453246712684631 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7942529916763306, + "eval_ciou": 0.7944912910461426 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7106485366821289, + "eval_ciou": 0.6990127563476562 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7611709833145142, + "eval_ciou": 0.7682604193687439 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7570181488990784, + "eval_ciou": 0.7642934918403625 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_bi2cbe_aati_srm/events.out.tfevents.1758303972.bask-pg0308u25a.2176098.0 b/lisa-ivl3-2b_bi2cbe_aati_srm/events.out.tfevents.1758303972.bask-pg0308u25a.2176098.0 new file mode 100644 index 0000000000000000000000000000000000000000..804abe87cca4bff1cb717891c9697b75aa5186c8 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_aati_srm/events.out.tfevents.1758303972.bask-pg0308u25a.2176098.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fd0eb014901254de4a95eb71fc2571d348cabd50a57e396fb2c3d342a3d7fb9 +size 486 diff --git a/lisa-ivl3-2b_bi2cbe_aati_srm/events.out.tfevents.1758304185.bask-pg0308u25a.2184107.0 b/lisa-ivl3-2b_bi2cbe_aati_srm/events.out.tfevents.1758304185.bask-pg0308u25a.2184107.0 new file mode 100644 index 0000000000000000000000000000000000000000..48a49817e7c3b4760daf74a99b450a63bd7d774a --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_aati_srm/events.out.tfevents.1758304185.bask-pg0308u25a.2184107.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1af39459ef2a226054ba8d41e28d9351e1edf097f258d661335b41ebe8f3adc +size 212352 diff --git a/lisa-ivl3-2b_bi2cbe_aati_srm/runs/Sep19_18-46-08_bask-pg0308u25a/events.out.tfevents.1758304037.bask-pg0308u25a.2176098.1 b/lisa-ivl3-2b_bi2cbe_aati_srm/runs/Sep19_18-46-08_bask-pg0308u25a/events.out.tfevents.1758304037.bask-pg0308u25a.2176098.1 new file mode 100644 index 0000000000000000000000000000000000000000..b3d234ccfb46e8bfc4511daa253a4042012a3eac --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_aati_srm/runs/Sep19_18-46-08_bask-pg0308u25a/events.out.tfevents.1758304037.bask-pg0308u25a.2176098.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4044136eab8f6e9cbcae192b20ebf5e4ed93ba0420ebf5e87c4b2238109fa4e +size 9325 diff --git a/lisa-ivl3-2b_bi2cbe_aati_srm/runs/Sep19_18-49-42_bask-pg0308u25a/events.out.tfevents.1758304237.bask-pg0308u25a.2184107.1 b/lisa-ivl3-2b_bi2cbe_aati_srm/runs/Sep19_18-49-42_bask-pg0308u25a/events.out.tfevents.1758304237.bask-pg0308u25a.2184107.1 new file mode 100644 index 0000000000000000000000000000000000000000..6d38c0d96640d9133c95632433abc576e0750c7f --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_aati_srm/runs/Sep19_18-49-42_bask-pg0308u25a/events.out.tfevents.1758304237.bask-pg0308u25a.2184107.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94cc218abf38e00bc41bdedd1db488f808895ac0ae3c3898e84ea6a04bec0e07 +size 116381 diff --git a/lisa-ivl3-2b_bi2cbe_aati_srm/runs/Sep19_18-49-42_bask-pg0308u25a/events.out.tfevents.1758339214.bask-pg0308u25a.2184107.2 b/lisa-ivl3-2b_bi2cbe_aati_srm/runs/Sep19_18-49-42_bask-pg0308u25a/events.out.tfevents.1758339214.bask-pg0308u25a.2184107.2 new file mode 100644 index 0000000000000000000000000000000000000000..450a7a2ef5916032728fcf461fdd60a8a60573a9 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_aati_srm/runs/Sep19_18-49-42_bask-pg0308u25a/events.out.tfevents.1758339214.bask-pg0308u25a.2184107.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c63060c4d02dcdbdc52b17a6208816f1c9a7a952fd8d4159426ee935d22b9aad +size 1402 diff --git a/lisa-ivl3-2b_bi2cbe_aati_srs/ckpt_model/config.json b/lisa-ivl3-2b_bi2cbe_aati_srs/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_aati_srs/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_bi2cbe_aati_srs/ckpt_model/model.safetensors b/lisa-ivl3-2b_bi2cbe_aati_srs/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..45890e0262059560695893d4b3b9f8483bbc8f74 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_aati_srs/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e94e426c479692c600bd7da2a58a66d89b1a0b6105e5f4fba892bb35fa05130 +size 4211070232 diff --git a/lisa-ivl3-2b_bi2cbe_aati_srs/ckpt_model/training_args.bin b/lisa-ivl3-2b_bi2cbe_aati_srs/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..9de4db1bf7bf115e3dd800d14b342cfbabd81e93 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_aati_srs/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9efb840060edee20988b8e9bb354bde899f2bb93875da91a53a68a5e8259ceb3 +size 7352 diff --git a/lisa-ivl3-2b_bi2cbe_aati_srs/evaluation_metrics.json b/lisa-ivl3-2b_bi2cbe_aati_srs/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..2e3c8a56f2d1def77ab05008b3012b62db2112f9 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_aati_srs/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5235294699668884, + "eval_ciou": 0.5689558982849121 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.4938444197177887, + "eval_ciou": 0.5315812826156616 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.52805095911026, + "eval_ciou": 0.5778353810310364 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.539726734161377, + "eval_ciou": 0.5628448128700256 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.5516190528869629, + "eval_ciou": 0.5699143409729004 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.5535993576049805, + "eval_ciou": 0.5486313700675964 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.5771014094352722, + "eval_ciou": 0.630760908126831 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.5713648796081543, + "eval_ciou": 0.5902009606361389 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.566828727722168, + "eval_ciou": 0.5753384232521057 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.5662292838096619, + "eval_ciou": 0.5734410285949707 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.5632049441337585, + "eval_ciou": 0.5677175521850586 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.8058294057846069, + "eval_ciou": 0.810309886932373 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8246700167655945, + "eval_ciou": 0.8328030705451965 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7877973914146423, + "eval_ciou": 0.7934398651123047 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7641584277153015, + "eval_ciou": 0.7556021809577942 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7998954057693481, + "eval_ciou": 0.7981683611869812 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7219251394271851, + "eval_ciou": 0.7105168104171753 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.766793966293335, + "eval_ciou": 0.7789492011070251 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7627427577972412, + "eval_ciou": 0.7680707573890686 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_bi2cbe_aati_srs/events.out.tfevents.1758407218.bask-pg0308u25a.3988218.0 b/lisa-ivl3-2b_bi2cbe_aati_srs/events.out.tfevents.1758407218.bask-pg0308u25a.3988218.0 new file mode 100644 index 0000000000000000000000000000000000000000..14270825af42b40a29649d2fd2b35fb200494068 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_aati_srs/events.out.tfevents.1758407218.bask-pg0308u25a.3988218.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceb9faddf368f10dc1f79077182ae3f0763d1d459cd00745aefdef29c4785740 +size 212352 diff --git a/lisa-ivl3-2b_bi2cbe_aati_srs/runs/Sep20_23-26-55_bask-pg0308u25a/events.out.tfevents.1758407268.bask-pg0308u25a.3988218.1 b/lisa-ivl3-2b_bi2cbe_aati_srs/runs/Sep20_23-26-55_bask-pg0308u25a/events.out.tfevents.1758407268.bask-pg0308u25a.3988218.1 new file mode 100644 index 0000000000000000000000000000000000000000..45456673b19c3902a3eb0fe7c2e2cc26f0d77d66 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_aati_srs/runs/Sep20_23-26-55_bask-pg0308u25a/events.out.tfevents.1758407268.bask-pg0308u25a.3988218.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c60008ebfd9d19da94c88abc8cbca2ad3850f341e6cd01437d6e2c1a16144b9 +size 116381 diff --git a/lisa-ivl3-2b_bi2cbe_aati_srs/runs/Sep20_23-26-55_bask-pg0308u25a/events.out.tfevents.1758441983.bask-pg0308u25a.3988218.2 b/lisa-ivl3-2b_bi2cbe_aati_srs/runs/Sep20_23-26-55_bask-pg0308u25a/events.out.tfevents.1758441983.bask-pg0308u25a.3988218.2 new file mode 100644 index 0000000000000000000000000000000000000000..c6140bfb348fb9205853e581795774ff73a5ca18 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_aati_srs/runs/Sep20_23-26-55_bask-pg0308u25a/events.out.tfevents.1758441983.bask-pg0308u25a.3988218.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebda0e4e7c56eda12f89e0b655238dcdd19f18e497ae1d685d308e1e21d0b7dd +size 1402 diff --git a/lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/ckpt_model/config.json b/lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/ckpt_model/model.safetensors b/lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eb2b45286f03b160fe5598ccd53f670f6bffb540 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c525c1ae85c17caddcdb1b0d45df96d56a86227e4f990cbb36d7331a692551ad +size 4211070232 diff --git a/lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/ckpt_model/training_args.bin b/lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c06f729b1cdff26aa2e28847cee0614b6f5a84ad --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7da9ac5d9ad0770700bf7b786cf18482fad5fd08e88a5e860bd99406ca19065d +size 7352 diff --git a/lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/evaluation_metrics.json b/lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..b0947d8079832b700dbc2f0f6c95653d4cb165a0 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.4889245629310608, + "eval_ciou": 0.5533338189125061 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5513965487480164, + "eval_ciou": 0.6630034446716309 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5442399382591248, + "eval_ciou": 0.6376377940177917 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5748190879821777, + "eval_ciou": 0.6525793671607971 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.5847772359848022, + "eval_ciou": 0.643996000289917 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.5810279846191406, + "eval_ciou": 0.6454022526741028 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.5949556827545166, + "eval_ciou": 0.6094688177108765 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6049715280532837, + "eval_ciou": 0.6379661560058594 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6034538149833679, + "eval_ciou": 0.6570442914962769 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6016661524772644, + "eval_ciou": 0.6353110671043396 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.6043070554733276, + "eval_ciou": 0.608022153377533 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7902190089225769, + "eval_ciou": 0.7928427457809448 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.807979166507721, + "eval_ciou": 0.8122658729553223 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.763839066028595, + "eval_ciou": 0.764618992805481 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.738534688949585, + "eval_ciou": 0.7319899201393127 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7776216864585876, + "eval_ciou": 0.7770327925682068 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.6900521516799927, + "eval_ciou": 0.676867663860321 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7514216899871826, + "eval_ciou": 0.7589317560195923 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7455593943595886, + "eval_ciou": 0.7489427924156189 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/events.out.tfevents.1758822942.bask-pg0308u25a.3977024.0 b/lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/events.out.tfevents.1758822942.bask-pg0308u25a.3977024.0 new file mode 100644 index 0000000000000000000000000000000000000000..5609290334aff0cd682f8654b35c9657740f79ec --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/events.out.tfevents.1758822942.bask-pg0308u25a.3977024.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5090ab4b5276a748931cd1068f50c6d17618a085656691fb72a03b346bd7b3b +size 88 diff --git a/lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/events.out.tfevents.1758823030.bask-pg0308u25a.3998347.0 b/lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/events.out.tfevents.1758823030.bask-pg0308u25a.3998347.0 new file mode 100644 index 0000000000000000000000000000000000000000..90e9100dd4c26a54e01bec9192f482d15c48585a --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/events.out.tfevents.1758823030.bask-pg0308u25a.3998347.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aac91ef4c5538767282625b5123a14924a0f19f313d68d62ed42912194aebe6 +size 212352 diff --git a/lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/runs/Sep25_18-57-07_bask-pg0308u25a/events.out.tfevents.1758823107.bask-pg0308u25a.3998347.1 b/lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/runs/Sep25_18-57-07_bask-pg0308u25a/events.out.tfevents.1758823107.bask-pg0308u25a.3998347.1 new file mode 100644 index 0000000000000000000000000000000000000000..c9c5a750534b9089321e206e92b1737b3c448830 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/runs/Sep25_18-57-07_bask-pg0308u25a/events.out.tfevents.1758823107.bask-pg0308u25a.3998347.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb225847cc5dc86b3ecdb0241d649d67245d9ca0710b65086da8f329c5700ee6 +size 116397 diff --git a/lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/runs/Sep25_18-57-07_bask-pg0308u25a/events.out.tfevents.1758870402.bask-pg0308u25a.3998347.2 b/lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/runs/Sep25_18-57-07_bask-pg0308u25a/events.out.tfevents.1758870402.bask-pg0308u25a.3998347.2 new file mode 100644 index 0000000000000000000000000000000000000000..b0a8e3c292ebb0002baa6e33435d1dacaba10624 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/runs/Sep25_18-57-07_bask-pg0308u25a/events.out.tfevents.1758870402.bask-pg0308u25a.3998347.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0907be6c27349e6b811188c1c28862e763dfde6a47b69586b494273b0268d8f9 +size 1402 diff --git a/lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/ckpt_model/config.json b/lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/ckpt_model/model.safetensors b/lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..64464e1e61e1c8ced6a93fca5ecb24b7567fb5e0 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9a73ee98332ed56252ee2690f1a7351446fd80c4253500657c9284e0a0f05fd +size 4211070232 diff --git a/lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/ckpt_model/training_args.bin b/lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4d2a83537f34f14a8fa04fed1778a7439b899145 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:833f70825712e685e5ab69b01da135f496f6a901ba5bc7b958a93796e95e8a09 +size 7352 diff --git a/lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/evaluation_metrics.json b/lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..9f53830123fd2737d9deb46fc12cf9dc9228475d --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5344988107681274, + "eval_ciou": 0.5989851355552673 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5579254031181335, + "eval_ciou": 0.646024227142334 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5501570701599121, + "eval_ciou": 0.6018446683883667 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5774487853050232, + "eval_ciou": 0.6542478203773499 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.5822131037712097, + "eval_ciou": 0.6766245365142822 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.5897811055183411, + "eval_ciou": 0.6791333556175232 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.5887703895568848, + "eval_ciou": 0.6910147070884705 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.5998998880386353, + "eval_ciou": 0.6640490293502808 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.5920247435569763, + "eval_ciou": 0.6693744659423828 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6001232266426086, + "eval_ciou": 0.6858417987823486 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.5927180051803589, + "eval_ciou": 0.6138883233070374 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7834749817848206, + "eval_ciou": 0.790122926235199 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8022208213806152, + "eval_ciou": 0.8086150884628296 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7566637396812439, + "eval_ciou": 0.7609479427337646 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7318623065948486, + "eval_ciou": 0.7281762361526489 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7749318480491638, + "eval_ciou": 0.7748793363571167 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.682511568069458, + "eval_ciou": 0.6719024777412415 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.743724524974823, + "eval_ciou": 0.7526366710662842 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7407757043838501, + "eval_ciou": 0.7478148341178894 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/events.out.tfevents.1758558655.bask-pg0308u29a.637997.0 b/lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/events.out.tfevents.1758558655.bask-pg0308u29a.637997.0 new file mode 100644 index 0000000000000000000000000000000000000000..4d1e31e28a4ae3f21f8260f0e424ab0f37f88260 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/events.out.tfevents.1758558655.bask-pg0308u29a.637997.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb61bef795309a3922462febab004b11ccf980e30eac016b8230fa293a7b7656 +size 212352 diff --git a/lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/runs/Sep22_17-30-52_bask-pg0308u29a/events.out.tfevents.1758558731.bask-pg0308u29a.637997.1 b/lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/runs/Sep22_17-30-52_bask-pg0308u29a/events.out.tfevents.1758558731.bask-pg0308u29a.637997.1 new file mode 100644 index 0000000000000000000000000000000000000000..adf4599f047bc090bc582cae91775c446a9f6ef3 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/runs/Sep22_17-30-52_bask-pg0308u29a/events.out.tfevents.1758558731.bask-pg0308u29a.637997.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e87c13cd165f21e6038c79b21806f6123c013c42bd45ef063c0e1671b8751dd7 +size 116399 diff --git a/lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/runs/Sep22_17-30-52_bask-pg0308u29a/events.out.tfevents.1758608704.bask-pg0308u29a.637997.2 b/lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/runs/Sep22_17-30-52_bask-pg0308u29a/events.out.tfevents.1758608704.bask-pg0308u29a.637997.2 new file mode 100644 index 0000000000000000000000000000000000000000..16a3325a1fa0f2602426f4ffb4ded2a27b7b7bad --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/runs/Sep22_17-30-52_bask-pg0308u29a/events.out.tfevents.1758608704.bask-pg0308u29a.637997.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf83b859f6d1900c3189c78baebb5ee0e6400bc6d00390754f89d38c552dfb86 +size 1402 diff --git a/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/ckpt_model/config.json b/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/ckpt_model/model.safetensors b/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c8beb107d597a2e570f8ba8227ff08605f25158 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee127087b96ff9811fbd07f160c2648b5879ccbcd7e88fbf1c57578cc6427656 +size 4211070232 diff --git a/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/ckpt_model/training_args.bin b/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f4139c36f6dc20d2a5b7a9965d5abd4fea5a34e0 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faf8b9941672f97b3e8473f3e7078f73367e3fad3a4c4b35b69c5a79104328df +size 7352 diff --git a/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/evaluation_metrics.json b/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..e45d8f9a7d3cd71e806f251b3b06e3b6adccadc2 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5294323563575745, + "eval_ciou": 0.5168058276176453 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5691018104553223, + "eval_ciou": 0.5466322302818298 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5456892848014832, + "eval_ciou": 0.6087337732315063 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5649483799934387, + "eval_ciou": 0.5830232501029968 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.5766127109527588, + "eval_ciou": 0.592596709728241 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.5876106023788452, + "eval_ciou": 0.6196873188018799 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.5895294547080994, + "eval_ciou": 0.5830597281455994 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.5922108888626099, + "eval_ciou": 0.5886086225509644 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6001683473587036, + "eval_ciou": 0.5857241749763489 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6061425805091858, + "eval_ciou": 0.6062945127487183 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.5916463136672974, + "eval_ciou": 0.5962467789649963 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7914398908615112, + "eval_ciou": 0.7944017052650452 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8115019202232361, + "eval_ciou": 0.8148282766342163 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7657762169837952, + "eval_ciou": 0.7644218802452087 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7408427000045776, + "eval_ciou": 0.7323743104934692 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7806029319763184, + "eval_ciou": 0.7790927886962891 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.6951540112495422, + "eval_ciou": 0.683707058429718 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7511024475097656, + "eval_ciou": 0.7564254403114319 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7492029070854187, + "eval_ciou": 0.7516255378723145 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/events.out.tfevents.1758821654.bask-pg0308u18a.998112.0 b/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/events.out.tfevents.1758821654.bask-pg0308u18a.998112.0 new file mode 100644 index 0000000000000000000000000000000000000000..971cdf08cdaeea7f89b8e3a0596e6822bec6fd17 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/events.out.tfevents.1758821654.bask-pg0308u18a.998112.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f0f6402efa20cb4eb1c3dcaf9815e71b1964b12d1903fcd9a8cfc7ba8fc924f +size 486 diff --git a/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/events.out.tfevents.1758821976.bask-pg0308u18a.1004472.0 b/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/events.out.tfevents.1758821976.bask-pg0308u18a.1004472.0 new file mode 100644 index 0000000000000000000000000000000000000000..fc45c513dfe25de3010a15ae35d32cf4624c3394 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/events.out.tfevents.1758821976.bask-pg0308u18a.1004472.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b21a9269fe7a0131c65a9b63b6caa674fe4f8b73071b6caa68cc19ce4724062 +size 88 diff --git a/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/events.out.tfevents.1758822179.bask-pg0308u18a.1008370.0 b/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/events.out.tfevents.1758822179.bask-pg0308u18a.1008370.0 new file mode 100644 index 0000000000000000000000000000000000000000..7c8f106ade9fabb7afe2081806015c3623601a2c --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/events.out.tfevents.1758822179.bask-pg0308u18a.1008370.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28cd9c79f6c1e5c37360f0ca87b6fa4ff89de989a108bdfeadb937195000c169 +size 212352 diff --git a/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/runs/Sep25_18-34-12_bask-pg0308u18a/events.out.tfevents.1758821725.bask-pg0308u18a.998112.1 b/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/runs/Sep25_18-34-12_bask-pg0308u18a/events.out.tfevents.1758821725.bask-pg0308u18a.998112.1 new file mode 100644 index 0000000000000000000000000000000000000000..d8f778aae8c8a98d7e7d3f1111961febaa47d109 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/runs/Sep25_18-34-12_bask-pg0308u18a/events.out.tfevents.1758821725.bask-pg0308u18a.998112.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb59ce061f378068b22c6735f8430ed9ace878dcac3b831594603aaa1bb107a1 +size 9338 diff --git a/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/runs/Sep25_18-39-34_bask-pg0308u18a/events.out.tfevents.1758822035.bask-pg0308u18a.1004472.1 b/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/runs/Sep25_18-39-34_bask-pg0308u18a/events.out.tfevents.1758822035.bask-pg0308u18a.1004472.1 new file mode 100644 index 0000000000000000000000000000000000000000..9a3584458f90f4ad1c32ff1030cfbea29a6e92e3 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/runs/Sep25_18-39-34_bask-pg0308u18a/events.out.tfevents.1758822035.bask-pg0308u18a.1004472.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2050a14b591ad7960da8a1c578624e5fcadc630a64886750db14457a302231c +size 9131 diff --git a/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/runs/Sep25_18-42-56_bask-pg0308u18a/events.out.tfevents.1758822240.bask-pg0308u18a.1008370.1 b/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/runs/Sep25_18-42-56_bask-pg0308u18a/events.out.tfevents.1758822240.bask-pg0308u18a.1008370.1 new file mode 100644 index 0000000000000000000000000000000000000000..e1d9e183d1bf723dbd5c47a9decf69ca3449cd58 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/runs/Sep25_18-42-56_bask-pg0308u18a/events.out.tfevents.1758822240.bask-pg0308u18a.1008370.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbc263fde5f12193454e49ef64849b95edfdbe9d84d7748d1bda1f0c04506652 +size 116397 diff --git a/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/runs/Sep25_18-42-56_bask-pg0308u18a/events.out.tfevents.1758869487.bask-pg0308u18a.1008370.2 b/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/runs/Sep25_18-42-56_bask-pg0308u18a/events.out.tfevents.1758869487.bask-pg0308u18a.1008370.2 new file mode 100644 index 0000000000000000000000000000000000000000..f6307c9645109cd82c391598e7d0b94813794aff --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/runs/Sep25_18-42-56_bask-pg0308u18a/events.out.tfevents.1758869487.bask-pg0308u18a.1008370.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d1915b5faf4072182ed92a81dc1182e464a0d20fc50e6147b785c025d972e0f +size 1402 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_coco/ckpt_model/config.json b/lisa-ivl3-2b_bi2cbe_vlorati_coco/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_coco/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_coco/ckpt_model/model.safetensors b/lisa-ivl3-2b_bi2cbe_vlorati_coco/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9987ae62ac3a38c7c98e19b04418a88420950a57 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_coco/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c75bb36d150d76c1aded195dfe0969ac1e5f7b51708e3253890664b3306fe7b6 +size 4211070232 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_coco/ckpt_model/training_args.bin b/lisa-ivl3-2b_bi2cbe_vlorati_coco/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a7c0b461198c1748f944eca2414b2b01dfa0eb58 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_coco/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:586d01e0f868c297c5556fb5760bb5f53403f37d00934248a1f70708c2bbdc4d +size 7352 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_coco/evaluation_metrics.json b/lisa-ivl3-2b_bi2cbe_vlorati_coco/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..e35a7df97850e723f4a6184e439dbbc6592e6503 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_coco/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5369102954864502, + "eval_ciou": 0.5064759254455566 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5686467289924622, + "eval_ciou": 0.611318051815033 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5613127946853638, + "eval_ciou": 0.6206056475639343 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5933331847190857, + "eval_ciou": 0.6126891374588013 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.6065125465393066, + "eval_ciou": 0.6544414162635803 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.5995581150054932, + "eval_ciou": 0.6379423141479492 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.6159911155700684, + "eval_ciou": 0.621420681476593 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6243378520011902, + "eval_ciou": 0.6523417234420776 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6166976690292358, + "eval_ciou": 0.6346321702003479 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6171593070030212, + "eval_ciou": 0.6407290697097778 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.5836987495422363, + "eval_ciou": 0.6126533150672913 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7852296233177185, + "eval_ciou": 0.7870670557022095 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8046602010726929, + "eval_ciou": 0.8073185086250305 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7577531337738037, + "eval_ciou": 0.7593867778778076 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7351171374320984, + "eval_ciou": 0.7267892956733704 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7731851935386658, + "eval_ciou": 0.7728055119514465 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.6876177191734314, + "eval_ciou": 0.6751795411109924 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7494315505027771, + "eval_ciou": 0.7570431232452393 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7449917197227478, + "eval_ciou": 0.750389039516449 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_coco/events.out.tfevents.1758236140.bask-pg0309u16a.1220041.0 b/lisa-ivl3-2b_bi2cbe_vlorati_coco/events.out.tfevents.1758236140.bask-pg0309u16a.1220041.0 new file mode 100644 index 0000000000000000000000000000000000000000..1bc7ac90964cb7ca0c6470f7218ef84a742366ea --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_coco/events.out.tfevents.1758236140.bask-pg0309u16a.1220041.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a673a2115871fc802ba01b020cb9f8f61e30b9e9406eedf604c1d500fb25c686 +size 212352 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_coco/runs/Sep18_23-55-37_bask-pg0309u16a/events.out.tfevents.1758236201.bask-pg0309u16a.1220041.1 b/lisa-ivl3-2b_bi2cbe_vlorati_coco/runs/Sep18_23-55-37_bask-pg0309u16a/events.out.tfevents.1758236201.bask-pg0309u16a.1220041.1 new file mode 100644 index 0000000000000000000000000000000000000000..9c16b84e66fb04eb0af8149b5f8f400e5319565a --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_coco/runs/Sep18_23-55-37_bask-pg0309u16a/events.out.tfevents.1758236201.bask-pg0309u16a.1220041.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04a7180c0676429a3801a6fe71e0ec596fc2cff90bfd8a557828056eb13598e1 +size 116395 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_coco/runs/Sep18_23-55-37_bask-pg0309u16a/events.out.tfevents.1758283609.bask-pg0309u16a.1220041.2 b/lisa-ivl3-2b_bi2cbe_vlorati_coco/runs/Sep18_23-55-37_bask-pg0309u16a/events.out.tfevents.1758283609.bask-pg0309u16a.1220041.2 new file mode 100644 index 0000000000000000000000000000000000000000..99f1fecdf2fdd391338b875b6bdd20faca3dadbb --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_coco/runs/Sep18_23-55-37_bask-pg0309u16a/events.out.tfevents.1758283609.bask-pg0309u16a.1220041.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad3a469cb6b9847f58a453fce555b48ae34226582d16eaa962b62daec1fe25d6 +size 1402 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_sr/ckpt_model/config.json b/lisa-ivl3-2b_bi2cbe_vlorati_sr/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_sr/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_sr/ckpt_model/model.safetensors b/lisa-ivl3-2b_bi2cbe_vlorati_sr/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f4a8016bb7f5ddc1a0a554707c7681dce763e2e1 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_sr/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c94f89e72ae01751ebb8d5912fd557799f72ea83b86b81f62cdc46fda72f35f +size 4211070232 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_sr/ckpt_model/training_args.bin b/lisa-ivl3-2b_bi2cbe_vlorati_sr/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..5a03f0544a83fb5e1c009681f0088dd3bdb28a2b --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_sr/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:022471703d36a79af62e558670c53fd7ad53ee5128d4a65b85e148c16cff4cf3 +size 7352 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_sr/evaluation_metrics.json b/lisa-ivl3-2b_bi2cbe_vlorati_sr/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..e6fd06d79d56f470a637d8f5837926e453866a36 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_sr/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.54237961769104, + "eval_ciou": 0.5924025177955627 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5600003600120544, + "eval_ciou": 0.5568979382514954 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5723169445991516, + "eval_ciou": 0.6404613256454468 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5950077772140503, + "eval_ciou": 0.591992199420929 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.5933323502540588, + "eval_ciou": 0.6031982898712158 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.5994836688041687, + "eval_ciou": 0.6908509135246277 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.6031705141067505, + "eval_ciou": 0.689110517501831 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6064140200614929, + "eval_ciou": 0.6731957197189331 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6144939064979553, + "eval_ciou": 0.7057955861091614 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6208682656288147, + "eval_ciou": 0.699893593788147 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.5947456359863281, + "eval_ciou": 0.641963005065918 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7865906357765198, + "eval_ciou": 0.7917014360427856 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8070659637451172, + "eval_ciou": 0.8116178512573242 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7621356844902039, + "eval_ciou": 0.7609087228775024 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.736609160900116, + "eval_ciou": 0.7305412888526917 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7776393294334412, + "eval_ciou": 0.7768460512161255 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.6853057742118835, + "eval_ciou": 0.6753689050674438 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7492673397064209, + "eval_ciou": 0.7572758197784424 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7419416308403015, + "eval_ciou": 0.7488384246826172 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_sr/events.out.tfevents.1757946152.bask-pg0309u17a.1698773.0 b/lisa-ivl3-2b_bi2cbe_vlorati_sr/events.out.tfevents.1757946152.bask-pg0309u17a.1698773.0 new file mode 100644 index 0000000000000000000000000000000000000000..4d3226815b08622e5651b90a4123829a81e6aa15 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_sr/events.out.tfevents.1757946152.bask-pg0309u17a.1698773.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efcd0ec73096a2aabb73c7ff0159f5224773643d3926888a75e2e6ecb7e4e105 +size 144736 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_sr/events.out.tfevents.1757980269.bask-pg0309u17a.2863297.0 b/lisa-ivl3-2b_bi2cbe_vlorati_sr/events.out.tfevents.1757980269.bask-pg0309u17a.2863297.0 new file mode 100644 index 0000000000000000000000000000000000000000..970b44f7634d59c68b11e44c15268d66b6a18124 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_sr/events.out.tfevents.1757980269.bask-pg0309u17a.2863297.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25af22997db371ac9e7e7510f3c240004b9448e4c24047d2e2d31a23f51c6891 +size 88 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_sr/events.out.tfevents.1757981251.bask-pg0309u17a.2924107.0 b/lisa-ivl3-2b_bi2cbe_vlorati_sr/events.out.tfevents.1757981251.bask-pg0309u17a.2924107.0 new file mode 100644 index 0000000000000000000000000000000000000000..474e30e2b65ab90e3e9aad05ccd9a2971d457bdb --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_sr/events.out.tfevents.1757981251.bask-pg0309u17a.2924107.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:413edaa6065c13be730064a4530ee71cf241b153b98f272a0aab32fd23e9b54d +size 87954 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_sr/events.out.tfevents.1758130061.bask-pg0309u17a.378141.0 b/lisa-ivl3-2b_bi2cbe_vlorati_sr/events.out.tfevents.1758130061.bask-pg0309u17a.378141.0 new file mode 100644 index 0000000000000000000000000000000000000000..f0a51710820cfa64f529670a4c2883c5e0acaecc --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_sr/events.out.tfevents.1758130061.bask-pg0309u17a.378141.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b83e374299e487aac6c80a085f8fd833dd2eb5401b934889d241fad50e21b928 +size 88 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_sr/events.out.tfevents.1758130146.bask-pg0309u17a.382569.0 b/lisa-ivl3-2b_bi2cbe_vlorati_sr/events.out.tfevents.1758130146.bask-pg0309u17a.382569.0 new file mode 100644 index 0000000000000000000000000000000000000000..55a8234435428df93b04a7ec2f2598c455889979 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_sr/events.out.tfevents.1758130146.bask-pg0309u17a.382569.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4903e1de7dd7bbc8d2d582b7980b0c08f44d48db9dc106c7276ccc164478993 +size 4336 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_sr/events.out.tfevents.1758137539.bask-pg0309u17a.629478.0 b/lisa-ivl3-2b_bi2cbe_vlorati_sr/events.out.tfevents.1758137539.bask-pg0309u17a.629478.0 new file mode 100644 index 0000000000000000000000000000000000000000..fca86cfca266d2480cc74cb36f32a326b845895e --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_sr/events.out.tfevents.1758137539.bask-pg0309u17a.629478.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49ddd48bcafcea6be03fd96818bb44261644a0c72d0d84c374b27c2352352f40 +size 4336 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_sr/runs/Sep15_15-22-29_bask-pg0309u17a/events.out.tfevents.1757946234.bask-pg0309u17a.1698773.1 b/lisa-ivl3-2b_bi2cbe_vlorati_sr/runs/Sep15_15-22-29_bask-pg0309u17a/events.out.tfevents.1757946234.bask-pg0309u17a.1698773.1 new file mode 100644 index 0000000000000000000000000000000000000000..2fad5116c9c06a3b4d7465ca8857e0bc1e82b6ca --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_sr/runs/Sep15_15-22-29_bask-pg0309u17a/events.out.tfevents.1757946234.bask-pg0309u17a.1698773.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87de6fb53badbf7e76081676ac88f365ec82442576fc977d094ef00a1dde0b3e +size 83803 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_sr/runs/Sep16_01-07-28_bask-pg0309u17a/events.out.tfevents.1757981380.bask-pg0309u17a.2924107.1 b/lisa-ivl3-2b_bi2cbe_vlorati_sr/runs/Sep16_01-07-28_bask-pg0309u17a/events.out.tfevents.1757981380.bask-pg0309u17a.2924107.1 new file mode 100644 index 0000000000000000000000000000000000000000..0eaa3c9b9c81fd28fb8b19d52221436f39195d75 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_sr/runs/Sep16_01-07-28_bask-pg0309u17a/events.out.tfevents.1757981380.bask-pg0309u17a.2924107.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec1e07f3920e95e2d693225c1cc10972657d674ad749249b9508ddc33eb5b1a4 +size 52263 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_sr/runs/Sep16_01-07-28_bask-pg0309u17a/events.out.tfevents.1758000502.bask-pg0309u17a.2924107.2 b/lisa-ivl3-2b_bi2cbe_vlorati_sr/runs/Sep16_01-07-28_bask-pg0309u17a/events.out.tfevents.1758000502.bask-pg0309u17a.2924107.2 new file mode 100644 index 0000000000000000000000000000000000000000..bdf5d1b1f6946b955277751fd5d7638cf03ca82c --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_sr/runs/Sep16_01-07-28_bask-pg0309u17a/events.out.tfevents.1758000502.bask-pg0309u17a.2924107.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4ada0a07b618cb7fce2954782ac352c9ee55d5459d30c63e56c72becaf41fb3 +size 1402 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_sr/runs/Sep17_18-29-04_bask-pg0309u17a/events.out.tfevents.1758130250.bask-pg0309u17a.382569.1 b/lisa-ivl3-2b_bi2cbe_vlorati_sr/runs/Sep17_18-29-04_bask-pg0309u17a/events.out.tfevents.1758130250.bask-pg0309u17a.382569.1 new file mode 100644 index 0000000000000000000000000000000000000000..ab6d27809825de601b9e24e5eb8590c72a270901 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_sr/runs/Sep17_18-29-04_bask-pg0309u17a/events.out.tfevents.1758130250.bask-pg0309u17a.382569.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bd72a6704548f972192c5ef90cf04db7231d8a4d6cac7b55bc90194450666f5 +size 898 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_sr/runs/Sep17_20-32-16_bask-pg0309u17a/events.out.tfevents.1758137590.bask-pg0309u17a.629478.1 b/lisa-ivl3-2b_bi2cbe_vlorati_sr/runs/Sep17_20-32-16_bask-pg0309u17a/events.out.tfevents.1758137590.bask-pg0309u17a.629478.1 new file mode 100644 index 0000000000000000000000000000000000000000..8e2c78cb27e7df603039f3887d3e612a42d0d681 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_sr/runs/Sep17_20-32-16_bask-pg0309u17a/events.out.tfevents.1758137590.bask-pg0309u17a.629478.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a934173c5fe44f0187ae3e6e49ff3cbd10c6b8dde68e100877efccf91fd93693 +size 898 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/ckpt_model/config.json b/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/ckpt_model/model.safetensors b/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..532f5208219e48276e12a4f7d78a45b61183686b --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e5cf6ab5d5fdf8ba740e80ef36dbfe37551e98028cc9e8abf338fe046b4fb08 +size 4211070232 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/ckpt_model/training_args.bin b/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..6d0e3c7b77d0166da9a6391fe236dfece45ec8c1 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e9626025e3b4f928f114d0ada92b05f16cb328f1d6329aa5081b15f35872a59 +size 7416 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/evaluation_metrics.json b/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..3f4b524dfca542caddf991b0b711adbcfb294a18 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5494612455368042, + "eval_ciou": 0.6438744068145752 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.59159255027771, + "eval_ciou": 0.6862638592720032 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.6055992245674133, + "eval_ciou": 0.6832388639450073 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.604692280292511, + "eval_ciou": 0.6781006455421448 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.6054744124412537, + "eval_ciou": 0.6330469250679016 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.6265935301780701, + "eval_ciou": 0.7067847847938538 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.6277354955673218, + "eval_ciou": 0.6961160898208618 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6310630440711975, + "eval_ciou": 0.6774649620056152 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6295342445373535, + "eval_ciou": 0.7090522050857544 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6310564279556274, + "eval_ciou": 0.7113096117973328 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.6080661416053772, + "eval_ciou": 0.6248094439506531 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7814750075340271, + "eval_ciou": 0.7884069085121155 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8027645945549011, + "eval_ciou": 0.8096579313278198 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7598546743392944, + "eval_ciou": 0.7646240592002869 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7313858270645142, + "eval_ciou": 0.7301313281059265 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7768750190734863, + "eval_ciou": 0.7811311483383179 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.6883339285850525, + "eval_ciou": 0.6788879036903381 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7468889355659485, + "eval_ciou": 0.759071946144104 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7411845922470093, + "eval_ciou": 0.7494972944259644 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/events.out.tfevents.1758328324.bask-pg0308u26a.1841429.0 b/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/events.out.tfevents.1758328324.bask-pg0308u26a.1841429.0 new file mode 100644 index 0000000000000000000000000000000000000000..fb27e24e51cdc35b3d6a7925db54e67320541cc2 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/events.out.tfevents.1758328324.bask-pg0308u26a.1841429.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f85fba2fe4e04132f3ce8dd72e717a02c3cad7ff74f62be5bfb54a5391f894d1 +size 133801 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/events.out.tfevents.1758376987.bask-pg0308u26a.3352770.0 b/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/events.out.tfevents.1758376987.bask-pg0308u26a.3352770.0 new file mode 100644 index 0000000000000000000000000000000000000000..df8a948157f7b086b3bfc6f2ff45e7d8a123df72 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/events.out.tfevents.1758376987.bask-pg0308u26a.3352770.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41a4ea6e1d526a389287f18883cfbcdf103a4506b06f83c0c2079fd92965a58e +size 88 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/events.out.tfevents.1758377494.bask-pg0308u26a.3361924.0 b/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/events.out.tfevents.1758377494.bask-pg0308u26a.3361924.0 new file mode 100644 index 0000000000000000000000000000000000000000..673d75dcc4e171ba17cd99ce200f4d1129dcf1c0 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/events.out.tfevents.1758377494.bask-pg0308u26a.3361924.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f65e6b5d3cf03c82051147e3ca0e568f39aa4d1e2f4e9c3b0687f569d91b113a +size 88 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/events.out.tfevents.1758378048.bask-pg0308u26a.3371520.0 b/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/events.out.tfevents.1758378048.bask-pg0308u26a.3371520.0 new file mode 100644 index 0000000000000000000000000000000000000000..b5bf89a607792d8b24aa61a28726b1522bfa38b8 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/events.out.tfevents.1758378048.bask-pg0308u26a.3371520.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42ab81a38a18f49ba17d6f5cd5ec80799f2ac51f22f5c1f41522db04fe3632b5 +size 87954 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/runs/Sep20_01-32-01_bask-pg0308u26a/events.out.tfevents.1758342795.bask-pg0308u26a.1841429.1 b/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/runs/Sep20_01-32-01_bask-pg0308u26a/events.out.tfevents.1758342795.bask-pg0308u26a.1841429.1 new file mode 100644 index 0000000000000000000000000000000000000000..3abbe8cef66b5120feb02fd8ab5a752a225dc09a --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/runs/Sep20_01-32-01_bask-pg0308u26a/events.out.tfevents.1758342795.bask-pg0308u26a.1841429.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51cfcf60406f39c15524243dcddf6ed96d48f8fd60a08fa415d67ebd0791265d +size 78113 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/runs/Sep20_15-03-04_bask-pg0308u26a/events.out.tfevents.1758377070.bask-pg0308u26a.3352770.1 b/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/runs/Sep20_15-03-04_bask-pg0308u26a/events.out.tfevents.1758377070.bask-pg0308u26a.3352770.1 new file mode 100644 index 0000000000000000000000000000000000000000..3b630245e2053bf59c0a2d82626ca69f22c8a7da --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/runs/Sep20_15-03-04_bask-pg0308u26a/events.out.tfevents.1758377070.bask-pg0308u26a.3352770.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a42589078450d2af562c24c56583ae0768367165fcf0dd41458a269dffecb27 +size 9130 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/runs/Sep20_15-11-31_bask-pg0308u26a/events.out.tfevents.1758377566.bask-pg0308u26a.3361924.1 b/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/runs/Sep20_15-11-31_bask-pg0308u26a/events.out.tfevents.1758377566.bask-pg0308u26a.3361924.1 new file mode 100644 index 0000000000000000000000000000000000000000..bc2a45870f31cee2bd3cb405d20ee04ec5c8d0bb --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/runs/Sep20_15-11-31_bask-pg0308u26a/events.out.tfevents.1758377566.bask-pg0308u26a.3361924.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9a10425daf698267dd7b41e5caf610329e5782239db07566ca7413bc1af27c5 +size 9130 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/runs/Sep20_15-20-46_bask-pg0308u26a/events.out.tfevents.1758378129.bask-pg0308u26a.3371520.1 b/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/runs/Sep20_15-20-46_bask-pg0308u26a/events.out.tfevents.1758378129.bask-pg0308u26a.3371520.1 new file mode 100644 index 0000000000000000000000000000000000000000..8b022f5d4db20d1c3caa2a40a04a65bb0fc6a4b2 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/runs/Sep20_15-20-46_bask-pg0308u26a/events.out.tfevents.1758378129.bask-pg0308u26a.3371520.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f7660cbe3a12a6e518b4cff9db02358d5f7d4d8bcb703a2c23f699602afba07 +size 52332 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/runs/Sep20_15-20-46_bask-pg0308u26a/events.out.tfevents.1758400639.bask-pg0308u26a.3371520.2 b/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/runs/Sep20_15-20-46_bask-pg0308u26a/events.out.tfevents.1758400639.bask-pg0308u26a.3371520.2 new file mode 100644 index 0000000000000000000000000000000000000000..00a5de5db1161bab0844cb52e33713d60251340d --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_sr_r128/runs/Sep20_15-20-46_bask-pg0308u26a/events.out.tfevents.1758400639.bask-pg0308u26a.3371520.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82c3ef319d99dfe989081bafeb5a860a807e4e85087c2706824857b8c79bd4dc +size 1402 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_srbs10a10/ckpt_model/config.json b/lisa-ivl3-2b_bi2cbe_vlorati_srbs10a10/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_srbs10a10/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_srbs10a10/ckpt_model/model.safetensors b/lisa-ivl3-2b_bi2cbe_vlorati_srbs10a10/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..21c046597d064ee5c0cff1644cc94293a1cd2f01 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_srbs10a10/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08bbcc1aee138c074f8a69980b57ba8fd9529c5efdbf05823820dbae6e5cdea1 +size 4211070232 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_srbs10a10/ckpt_model/training_args.bin b/lisa-ivl3-2b_bi2cbe_vlorati_srbs10a10/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..afb690338cdace28a57cba04b7aa67ba8197c787 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_srbs10a10/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2724e416281926dae574615cfab4605c93a1daeca0dcdbf2431e71c5f9de0668 +size 7480 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_srbs10a10/evaluation_metrics.json b/lisa-ivl3-2b_bi2cbe_vlorati_srbs10a10/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..560979cdeb0d70cf804e4b9f2b6e0fcecd7fb119 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_srbs10a10/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5537072420120239, + "eval_ciou": 0.6305330395698547 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5854451060295105, + "eval_ciou": 0.6421226859092712 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5920965075492859, + "eval_ciou": 0.6503556966781616 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.6044809222221375, + "eval_ciou": 0.6537495851516724 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.6118924617767334, + "eval_ciou": 0.6136976480484009 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.5973416566848755, + "eval_ciou": 0.6474172472953796 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.6110580563545227, + "eval_ciou": 0.6548898220062256 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.601639986038208, + "eval_ciou": 0.5974438190460205 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6035985350608826, + "eval_ciou": 0.6317234039306641 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6114206314086914, + "eval_ciou": 0.6330931186676025 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.6057354211807251, + "eval_ciou": 0.6372289061546326 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7906848788261414, + "eval_ciou": 0.7919746041297913 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8105133771896362, + "eval_ciou": 0.8149374127388 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7650476098060608, + "eval_ciou": 0.7661905288696289 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7436209917068481, + "eval_ciou": 0.7324531674385071 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7853293418884277, + "eval_ciou": 0.7856197357177734 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.6950657963752747, + "eval_ciou": 0.6787523031234741 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7540370225906372, + "eval_ciou": 0.7600134611129761 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7480374574661255, + "eval_ciou": 0.7532860636711121 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_srbs10a10/events.out.tfevents.1757986527.bask-pg0308u25a.4099727.0 b/lisa-ivl3-2b_bi2cbe_vlorati_srbs10a10/events.out.tfevents.1757986527.bask-pg0308u25a.4099727.0 new file mode 100644 index 0000000000000000000000000000000000000000..81ae8d5257f51d7fcd13a824fc6fb0063cc9d1bd --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_srbs10a10/events.out.tfevents.1757986527.bask-pg0308u25a.4099727.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52055091eb94afa92924e3ce74b5a1eef3ead3036bedee9048f2edd29c860f28 +size 123179 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_srbs10a10/events.out.tfevents.1758070968.bask-pg0308u25a.1203267.0 b/lisa-ivl3-2b_bi2cbe_vlorati_srbs10a10/events.out.tfevents.1758070968.bask-pg0308u25a.1203267.0 new file mode 100644 index 0000000000000000000000000000000000000000..d1dbd8740f6f417b023a914006cc2da0a0a37e35 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_srbs10a10/events.out.tfevents.1758070968.bask-pg0308u25a.1203267.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76353b006ba59f134c4ced5d1f02d35667fa68bdf7ee2ed75499a507fb4dd06d +size 88 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_srbs10a10/events.out.tfevents.1758071353.bask-pg0308u25a.1209730.0 b/lisa-ivl3-2b_bi2cbe_vlorati_srbs10a10/events.out.tfevents.1758071353.bask-pg0308u25a.1209730.0 new file mode 100644 index 0000000000000000000000000000000000000000..f175a2dbbf71ee4e852a7fc47f232de9fd23bca0 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_srbs10a10/events.out.tfevents.1758071353.bask-pg0308u25a.1209730.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e77cb6374115189ca88231c2b845c4854663652a17dd838c9f83c2749af44914 +size 108701 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_srbs10a10/runs/Sep16_02-35-25_bask-pg0308u25a/events.out.tfevents.1757995592.bask-pg0308u25a.4099727.1 b/lisa-ivl3-2b_bi2cbe_vlorati_srbs10a10/runs/Sep16_02-35-25_bask-pg0308u25a/events.out.tfevents.1757995592.bask-pg0308u25a.4099727.1 new file mode 100644 index 0000000000000000000000000000000000000000..40faeb1f770cef498cacdf65b291f8cc5ae35154 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_srbs10a10/runs/Sep16_02-35-25_bask-pg0308u25a/events.out.tfevents.1757995592.bask-pg0308u25a.4099727.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebf97ad0dd7895f27f34a7c0fbd4d1cc18e4d4f71698fbfc691b091a1fe0669a +size 72701 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_srbs10a10/runs/Sep17_02-09-10_bask-pg0308u25a/events.out.tfevents.1758071500.bask-pg0308u25a.1209730.1 b/lisa-ivl3-2b_bi2cbe_vlorati_srbs10a10/runs/Sep17_02-09-10_bask-pg0308u25a/events.out.tfevents.1758071500.bask-pg0308u25a.1209730.1 new file mode 100644 index 0000000000000000000000000000000000000000..458a23a8dedfef7ddace4412f45a291c93eda10f --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_srbs10a10/runs/Sep17_02-09-10_bask-pg0308u25a/events.out.tfevents.1758071500.bask-pg0308u25a.1209730.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3773c04cce40eae72e9cece68331aaba8080395f9cb1148d7b808a595290d771 +size 63037 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_srbs10a10/runs/Sep17_02-09-10_bask-pg0308u25a/events.out.tfevents.1758139621.bask-pg0308u25a.1209730.2 b/lisa-ivl3-2b_bi2cbe_vlorati_srbs10a10/runs/Sep17_02-09-10_bask-pg0308u25a/events.out.tfevents.1758139621.bask-pg0308u25a.1209730.2 new file mode 100644 index 0000000000000000000000000000000000000000..7278d1d00d950804dfe30b45351efd0483179b5d --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_srbs10a10/runs/Sep17_02-09-10_bask-pg0308u25a/events.out.tfevents.1758139621.bask-pg0308u25a.1209730.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b346f90ce5d74ed8d376bc40bbcf666ecf0ef88bc14086f1e9de83745841e4e6 +size 1402 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_srm/ckpt_model/config.json b/lisa-ivl3-2b_bi2cbe_vlorati_srm/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_srm/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_srm/ckpt_model/model.safetensors b/lisa-ivl3-2b_bi2cbe_vlorati_srm/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..64d40fd80f12d963946d6d5f9e150b544eea4a84 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_srm/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae3eedc504a94c09bc68c205f700c0a07631bfe2d797fea7c751249616bbadf8 +size 4211070232 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_srm/ckpt_model/training_args.bin b/lisa-ivl3-2b_bi2cbe_vlorati_srm/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..595eac81f23808a6c50e56a7243357615fb0ea35 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_srm/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5db567deeae10a06a5deea189c08b9662c2783e68283dd21cfeac94eb40b771 +size 7352 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_srm/evaluation_metrics.json b/lisa-ivl3-2b_bi2cbe_vlorati_srm/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..f33361df60c2d56ba1f6819c5078138ed7d4fcd8 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_srm/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.538974404335022, + "eval_ciou": 0.5568175911903381 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5835084915161133, + "eval_ciou": 0.6460436582565308 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5795681476593018, + "eval_ciou": 0.6659083962440491 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5814592838287354, + "eval_ciou": 0.6421337723731995 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.6004868745803833, + "eval_ciou": 0.6717376112937927 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.6145725250244141, + "eval_ciou": 0.7002845406532288 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.6179752945899963, + "eval_ciou": 0.668799102306366 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6211929321289062, + "eval_ciou": 0.662628173828125 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6220943927764893, + "eval_ciou": 0.6644752025604248 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6246389746665955, + "eval_ciou": 0.6690987944602966 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.6116735935211182, + "eval_ciou": 0.6331431865692139 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7840677499771118, + "eval_ciou": 0.7879241704940796 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8055709004402161, + "eval_ciou": 0.8110809326171875 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7591402530670166, + "eval_ciou": 0.7599254846572876 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7321800589561462, + "eval_ciou": 0.7262216806411743 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7771302461624146, + "eval_ciou": 0.7806680202484131 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.686467707157135, + "eval_ciou": 0.6752142906188965 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7481905817985535, + "eval_ciou": 0.7531195878982544 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7462314367294312, + "eval_ciou": 0.7509754300117493 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_srm/events.out.tfevents.1758304923.bask-pg0308u29a.635887.0 b/lisa-ivl3-2b_bi2cbe_vlorati_srm/events.out.tfevents.1758304923.bask-pg0308u29a.635887.0 new file mode 100644 index 0000000000000000000000000000000000000000..9bae2a9ee88d93dd7be660c16749ea989bf20c10 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_srm/events.out.tfevents.1758304923.bask-pg0308u29a.635887.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dca768a1b66031d32fbf9c31ba7c45e25e1a47ad14de81d299a570be1dc1d87b +size 212352 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_srm/runs/Sep19_19-01-58_bask-pg0308u29a/events.out.tfevents.1758304999.bask-pg0308u29a.635887.1 b/lisa-ivl3-2b_bi2cbe_vlorati_srm/runs/Sep19_19-01-58_bask-pg0308u29a/events.out.tfevents.1758304999.bask-pg0308u29a.635887.1 new file mode 100644 index 0000000000000000000000000000000000000000..5e4d5b335e75d140240fee91746002ddc0ddf0a6 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_srm/runs/Sep19_19-01-58_bask-pg0308u29a/events.out.tfevents.1758304999.bask-pg0308u29a.635887.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:195f41b18d1bc8cf6d86b14d647689ea7cc1b416f8ecbcdd047c04afae49275d +size 116393 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_srm/runs/Sep19_19-01-58_bask-pg0308u29a/events.out.tfevents.1758354591.bask-pg0308u29a.635887.2 b/lisa-ivl3-2b_bi2cbe_vlorati_srm/runs/Sep19_19-01-58_bask-pg0308u29a/events.out.tfevents.1758354591.bask-pg0308u29a.635887.2 new file mode 100644 index 0000000000000000000000000000000000000000..f0a28af708232593d21a9f3714d5fa1da536c58f --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_srm/runs/Sep19_19-01-58_bask-pg0308u29a/events.out.tfevents.1758354591.bask-pg0308u29a.635887.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:355710cc0985ad9459c6a29334e8c3c63e678356f77ab17410d1beea7ca3a4d2 +size 1402 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_srm_r64/ckpt_model/config.json b/lisa-ivl3-2b_bi2cbe_vlorati_srm_r64/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_srm_r64/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_srm_r64/ckpt_model/model.safetensors b/lisa-ivl3-2b_bi2cbe_vlorati_srm_r64/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..027eecd0375ee2d88a626c74d7c936afa7967927 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_srm_r64/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23bd2c42800b2f9c606a923dc9b4c74437d910fc1c033b6c799c84af5187c8d3 +size 4211070232 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_srm_r64/ckpt_model/training_args.bin b/lisa-ivl3-2b_bi2cbe_vlorati_srm_r64/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..10bbc3e6073fc1e0ba2c2166ec760f20ca9527bf --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_srm_r64/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:350813bdb181f622826abf4f8f9695f7331f4f822c15ff41093d0f7f40e84aae +size 7352 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_srm_r64/evaluation_metrics.json b/lisa-ivl3-2b_bi2cbe_vlorati_srm_r64/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..9845b406dce24176c9385ee7ec0e2a9cbb6d9f1d --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_srm_r64/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.554678201675415, + "eval_ciou": 0.5564561486244202 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5697285532951355, + "eval_ciou": 0.6436676383018494 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5932938456535339, + "eval_ciou": 0.6398689150810242 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.6033034920692444, + "eval_ciou": 0.6905348300933838 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.5898310542106628, + "eval_ciou": 0.6853882074356079 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.6134674549102783, + "eval_ciou": 0.6617282629013062 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.6204383969306946, + "eval_ciou": 0.641028642654419 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6218186616897583, + "eval_ciou": 0.6841350197792053 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6245032548904419, + "eval_ciou": 0.6648175716400146 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6203395128250122, + "eval_ciou": 0.6550574898719788 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.605689287185669, + "eval_ciou": 0.6104215383529663 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7883896827697754, + "eval_ciou": 0.7948012351989746 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.807920515537262, + "eval_ciou": 0.8154407739639282 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7615307569503784, + "eval_ciou": 0.7621538043022156 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7417901158332825, + "eval_ciou": 0.7400577664375305 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.780209481716156, + "eval_ciou": 0.7827674746513367 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.6902382373809814, + "eval_ciou": 0.6782473921775818 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7536671161651611, + "eval_ciou": 0.7648316621780396 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7525750994682312, + "eval_ciou": 0.7618380784988403 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_srm_r64/events.out.tfevents.1758245236.bask-pg0308u25a.430668.0 b/lisa-ivl3-2b_bi2cbe_vlorati_srm_r64/events.out.tfevents.1758245236.bask-pg0308u25a.430668.0 new file mode 100644 index 0000000000000000000000000000000000000000..1e48adc8a0088b1b38079c002174184ca46014ab --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_srm_r64/events.out.tfevents.1758245236.bask-pg0308u25a.430668.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8151524851f2dde96e656d927f01f962d6b6d4e9cc536b26f29023b2f4542423 +size 212352 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_srm_r64/runs/Sep19_02-27-14_bask-pg0308u25a/events.out.tfevents.1758250721.bask-pg0308u25a.430668.1 b/lisa-ivl3-2b_bi2cbe_vlorati_srm_r64/runs/Sep19_02-27-14_bask-pg0308u25a/events.out.tfevents.1758250721.bask-pg0308u25a.430668.1 new file mode 100644 index 0000000000000000000000000000000000000000..2c13c1f13f07a8de37e56009a5bb1039e0a052bd --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_srm_r64/runs/Sep19_02-27-14_bask-pg0308u25a/events.out.tfevents.1758250721.bask-pg0308u25a.430668.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8e5a5b3fa94fb38ff39089872fa2f3ad87205ecadf5aca73df4991c2527e567 +size 116401 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_srm_r64/runs/Sep19_02-27-14_bask-pg0308u25a/events.out.tfevents.1758298425.bask-pg0308u25a.430668.2 b/lisa-ivl3-2b_bi2cbe_vlorati_srm_r64/runs/Sep19_02-27-14_bask-pg0308u25a/events.out.tfevents.1758298425.bask-pg0308u25a.430668.2 new file mode 100644 index 0000000000000000000000000000000000000000..23abb5f2610983e2da885fb36dea6cc52b31da9f --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_srm_r64/runs/Sep19_02-27-14_bask-pg0308u25a/events.out.tfevents.1758298425.bask-pg0308u25a.430668.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddc90af6a1b6c4a8ef700e7bdebdfb3b56a256cb02a3411af06a6dfded2019e6 +size 1402 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_srs/ckpt_model/config.json b/lisa-ivl3-2b_bi2cbe_vlorati_srs/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_srs/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_srs/ckpt_model/model.safetensors b/lisa-ivl3-2b_bi2cbe_vlorati_srs/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dee6cff5ac9c08a0b875b2d54006a6bdcd3f0293 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_srs/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29d0bca37b66eaeaed341c719ec27f35dc07415a43d7fa39a2904e19f3cc27a4 +size 4211070232 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_srs/ckpt_model/training_args.bin b/lisa-ivl3-2b_bi2cbe_vlorati_srs/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..37880f71ab8cb7791cd399c9cbb94a47eab184b6 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_srs/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2297d9137ec9621da27dcb58d12e5f74cd863a7fc5c65c710b47c209a978f332 +size 7352 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_srs/evaluation_metrics.json b/lisa-ivl3-2b_bi2cbe_vlorati_srs/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..57009ba0b6b97cd5fa513964ea2a004f65959edf --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_srs/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5310198664665222, + "eval_ciou": 0.6241332292556763 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5797128081321716, + "eval_ciou": 0.6275715231895447 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5611972212791443, + "eval_ciou": 0.6109294891357422 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5503039956092834, + "eval_ciou": 0.5966303944587708 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.5533938407897949, + "eval_ciou": 0.5984307527542114 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.5851241946220398, + "eval_ciou": 0.6020902991294861 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.6032989025115967, + "eval_ciou": 0.6681031584739685 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6129122376441956, + "eval_ciou": 0.6399829983711243 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6082441806793213, + "eval_ciou": 0.6599416136741638 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6099132299423218, + "eval_ciou": 0.6377223134040833 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.561841607093811, + "eval_ciou": 0.5626693367958069 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.794409990310669, + "eval_ciou": 0.798770010471344 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8144858479499817, + "eval_ciou": 0.8200773000717163 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7718109488487244, + "eval_ciou": 0.774285078048706 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7511366009712219, + "eval_ciou": 0.7442933917045593 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7922115921974182, + "eval_ciou": 0.7961419820785522 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7089573740959167, + "eval_ciou": 0.6948512196540833 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7571201920509338, + "eval_ciou": 0.7667766809463501 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7528367638587952, + "eval_ciou": 0.7601445913314819 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_srs/events.out.tfevents.1758285465.bask-pg0309u16a.1979480.0 b/lisa-ivl3-2b_bi2cbe_vlorati_srs/events.out.tfevents.1758285465.bask-pg0309u16a.1979480.0 new file mode 100644 index 0000000000000000000000000000000000000000..6edb631226f8a8c238122b48e45840a7eec35897 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_srs/events.out.tfevents.1758285465.bask-pg0309u16a.1979480.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93c6cbaf06cb933daad6e0e9baf265c46de569ec609df8ab84ae6cf5fc7b10d5 +size 212352 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_srs/runs/Sep19_13-37-42_bask-pg0309u16a/events.out.tfevents.1758285529.bask-pg0309u16a.1979480.1 b/lisa-ivl3-2b_bi2cbe_vlorati_srs/runs/Sep19_13-37-42_bask-pg0309u16a/events.out.tfevents.1758285529.bask-pg0309u16a.1979480.1 new file mode 100644 index 0000000000000000000000000000000000000000..68b7a44d8bc73223060a9e2f776c36b592975abe --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_srs/runs/Sep19_13-37-42_bask-pg0309u16a/events.out.tfevents.1758285529.bask-pg0309u16a.1979480.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cccd8b521c56f1ff80ebd38bf6846c78e6f361b3e9db98e0f2fb7c5a3c765fea +size 116393 diff --git a/lisa-ivl3-2b_bi2cbe_vlorati_srs/runs/Sep19_13-37-42_bask-pg0309u16a/events.out.tfevents.1758330690.bask-pg0309u16a.1979480.2 b/lisa-ivl3-2b_bi2cbe_vlorati_srs/runs/Sep19_13-37-42_bask-pg0309u16a/events.out.tfevents.1758330690.bask-pg0309u16a.1979480.2 new file mode 100644 index 0000000000000000000000000000000000000000..8f007e4e6ac117bdb48aa807288394aef3ce4c82 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_vlorati_srs/runs/Sep19_13-37-42_bask-pg0309u16a/events.out.tfevents.1758330690.bask-pg0309u16a.1979480.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1af73f60f6addef2db7115b89fcc9fef44898b8a28d4bb6581da556fb62d7935 +size 1402 diff --git a/lisa-ivl3-2b_bi2cbe_x2_vlorati_sr/ckpt_model/config.json b/lisa-ivl3-2b_bi2cbe_x2_vlorati_sr/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_x2_vlorati_sr/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_bi2cbe_x2_vlorati_sr/ckpt_model/model.safetensors b/lisa-ivl3-2b_bi2cbe_x2_vlorati_sr/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..927fbd80d571a3884365c64f12a9ec71c7f0a1e8 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_x2_vlorati_sr/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3564a0d8ee8ee2c766934fdb80251abf65efebc99bc12faf1c257fdc4f1576e +size 4211070232 diff --git a/lisa-ivl3-2b_bi2cbe_x2_vlorati_sr/ckpt_model/training_args.bin b/lisa-ivl3-2b_bi2cbe_x2_vlorati_sr/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e8e3586cbe35bca2ceb267673d547affd06c57da --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_x2_vlorati_sr/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6618639567c115ecf4d7014baee82ce24c8dde3b7cdd2fc64a232b2a18cc2aed +size 7352 diff --git a/lisa-ivl3-2b_bi2cbe_x2_vlorati_sr/evaluation_metrics.json b/lisa-ivl3-2b_bi2cbe_x2_vlorati_sr/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..4be8bfb9b9cafa0f3b74287157a7950484cbcd3f --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_x2_vlorati_sr/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5656086802482605, + "eval_ciou": 0.6177656650543213 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.6077502965927124, + "eval_ciou": 0.644785463809967 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.582970917224884, + "eval_ciou": 0.6152505874633789 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.6142171025276184, + "eval_ciou": 0.6329956650733948 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.6014272570610046, + "eval_ciou": 0.6434641480445862 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.599386990070343, + "eval_ciou": 0.6123512983322144 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.6192695498466492, + "eval_ciou": 0.6566187739372253 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6358219981193542, + "eval_ciou": 0.6850590705871582 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6380608081817627, + "eval_ciou": 0.6882437467575073 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6343135237693787, + "eval_ciou": 0.677971601486206 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.622559130191803, + "eval_ciou": 0.6233833432197571 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7914116978645325, + "eval_ciou": 0.792972207069397 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8148249387741089, + "eval_ciou": 0.818058431148529 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.766990602016449, + "eval_ciou": 0.7622269988059998 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7441037893295288, + "eval_ciou": 0.7317826747894287 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7857435345649719, + "eval_ciou": 0.7798903584480286 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.6982150077819824, + "eval_ciou": 0.6818784475326538 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7582646608352661, + "eval_ciou": 0.7621763944625854 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7516303062438965, + "eval_ciou": 0.748121440410614 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_bi2cbe_x2_vlorati_sr/events.out.tfevents.1757955653.bask-pg0308u25a.3625594.0 b/lisa-ivl3-2b_bi2cbe_x2_vlorati_sr/events.out.tfevents.1757955653.bask-pg0308u25a.3625594.0 new file mode 100644 index 0000000000000000000000000000000000000000..28f0382595a98b630b4231b80506b6711f7afb3e --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_x2_vlorati_sr/events.out.tfevents.1757955653.bask-pg0308u25a.3625594.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c29c55c6c90c7d3aab33354a5cdcd801c56b81bd27459ff739953e4c6d563f40 +size 82495 diff --git a/lisa-ivl3-2b_bi2cbe_x2_vlorati_sr/events.out.tfevents.1757982500.bask-pg0308u25a.4034786.0 b/lisa-ivl3-2b_bi2cbe_x2_vlorati_sr/events.out.tfevents.1757982500.bask-pg0308u25a.4034786.0 new file mode 100644 index 0000000000000000000000000000000000000000..d3041b33bec8cd6ec5d3b00561ef1d2ea351086f --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_x2_vlorati_sr/events.out.tfevents.1757982500.bask-pg0308u25a.4034786.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0fa53ccc2111722c3abb4fb67ea77a8d69b316ab36dbecd1ec5ee65f8cb4b0e +size 150195 diff --git a/lisa-ivl3-2b_bi2cbe_x2_vlorati_sr/runs/Sep15_18-00-50_bask-pg0308u25a/events.out.tfevents.1757955729.bask-pg0308u25a.3625594.1 b/lisa-ivl3-2b_bi2cbe_x2_vlorati_sr/runs/Sep15_18-00-50_bask-pg0308u25a/events.out.tfevents.1757955729.bask-pg0308u25a.3625594.1 new file mode 100644 index 0000000000000000000000000000000000000000..50f7995b8cc97afcf25d2cc5c2f9adce9280915f --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_x2_vlorati_sr/runs/Sep15_18-00-50_bask-pg0308u25a/events.out.tfevents.1757955729.bask-pg0308u25a.3625594.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67a3a4c8f1b2cb30dfb3ad1cd30ec3167b54cfe15f99bfc0ec2ecf679dc5f18d +size 51718 diff --git a/lisa-ivl3-2b_bi2cbe_x2_vlorati_sr/runs/Sep16_01-28-17_bask-pg0308u25a/events.out.tfevents.1757982644.bask-pg0308u25a.4034786.1 b/lisa-ivl3-2b_bi2cbe_x2_vlorati_sr/runs/Sep16_01-28-17_bask-pg0308u25a/events.out.tfevents.1757982644.bask-pg0308u25a.4034786.1 new file mode 100644 index 0000000000000000000000000000000000000000..06885e9ee8289e14530c23a089a5f55de0b345ce --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_x2_vlorati_sr/runs/Sep16_01-28-17_bask-pg0308u25a/events.out.tfevents.1757982644.bask-pg0308u25a.4034786.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ffc2679b722cd9be6cca42d69f6616bd07084bc7bdb3b08e5ce1e92766564e1 +size 84354 diff --git a/lisa-ivl3-2b_bi2cbe_x2_vlorati_sr/runs/Sep16_01-28-17_bask-pg0308u25a/events.out.tfevents.1758028917.bask-pg0308u25a.4034786.2 b/lisa-ivl3-2b_bi2cbe_x2_vlorati_sr/runs/Sep16_01-28-17_bask-pg0308u25a/events.out.tfevents.1758028917.bask-pg0308u25a.4034786.2 new file mode 100644 index 0000000000000000000000000000000000000000..a0370e1ebd4b0f2d953226a81ea14f6aca99c482 --- /dev/null +++ b/lisa-ivl3-2b_bi2cbe_x2_vlorati_sr/runs/Sep16_01-28-17_bask-pg0308u25a/events.out.tfevents.1758028917.bask-pg0308u25a.4034786.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11d71e7948e4d8c9fc0af457a7c63a933cc81af3240d38d48a05698f81ed6a25 +size 1402 diff --git a/lisa-ivl3-2b_bi2cs_vlorati_sr/ckpt_model/config.json b/lisa-ivl3-2b_bi2cs_vlorati_sr/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_bi2cs_vlorati_sr/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_bi2cs_vlorati_sr/ckpt_model/model.safetensors b/lisa-ivl3-2b_bi2cs_vlorati_sr/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..341d5cb5e6bf677e21784ad4ab3c447291849cbe --- /dev/null +++ b/lisa-ivl3-2b_bi2cs_vlorati_sr/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa819cdeab5af733da8383cc56051a01ed35d3aefb271a199204e3c97f03768b +size 4211070232 diff --git a/lisa-ivl3-2b_bi2cs_vlorati_sr/ckpt_model/training_args.bin b/lisa-ivl3-2b_bi2cs_vlorati_sr/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..aaac6d993954ccff3cbf59974c7195323e3ecb8d --- /dev/null +++ b/lisa-ivl3-2b_bi2cs_vlorati_sr/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e538020fc96d5bbaac3d5b32843f6a83d0a43982a4dcea197bb0c577dee01ed +size 7352 diff --git a/lisa-ivl3-2b_bi2cs_vlorati_sr/evaluation_metrics.json b/lisa-ivl3-2b_bi2cs_vlorati_sr/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..32a4eaf5fa7627c5c3ba400e768fcb26957591d4 --- /dev/null +++ b/lisa-ivl3-2b_bi2cs_vlorati_sr/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5496846437454224, + "eval_ciou": 0.6229026913642883 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5592015981674194, + "eval_ciou": 0.6727785468101501 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5656583905220032, + "eval_ciou": 0.634355366230011 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5915398597717285, + "eval_ciou": 0.663320779800415 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.5890551805496216, + "eval_ciou": 0.6735559701919556 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.5956386923789978, + "eval_ciou": 0.652233362197876 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.5987291932106018, + "eval_ciou": 0.6610731482505798 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6051978468894958, + "eval_ciou": 0.6690239906311035 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.613719642162323, + "eval_ciou": 0.6691925525665283 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.61874920129776, + "eval_ciou": 0.6677387952804565 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.5987407565116882, + "eval_ciou": 0.6260291337966919 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7829092741012573, + "eval_ciou": 0.7886332273483276 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8074366450309753, + "eval_ciou": 0.8120928406715393 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7572473287582397, + "eval_ciou": 0.7591013312339783 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7329379916191101, + "eval_ciou": 0.7306557893753052 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7770923376083374, + "eval_ciou": 0.7790165543556213 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.6828778982162476, + "eval_ciou": 0.671309769153595 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7486732006072998, + "eval_ciou": 0.7577866911888123 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7440311908721924, + "eval_ciou": 0.7473567724227905 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_bi2cs_vlorati_sr/events.out.tfevents.1758235699.bask-pg0308u26a.243266.0 b/lisa-ivl3-2b_bi2cs_vlorati_sr/events.out.tfevents.1758235699.bask-pg0308u26a.243266.0 new file mode 100644 index 0000000000000000000000000000000000000000..56037eb31e58340a191203c896bf7afc998f6367 --- /dev/null +++ b/lisa-ivl3-2b_bi2cs_vlorati_sr/events.out.tfevents.1758235699.bask-pg0308u26a.243266.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a682fe39874e61311527b52ccd3e6b48cacae819c89dc1d7a6c525bf385bb42 +size 212352 diff --git a/lisa-ivl3-2b_bi2cs_vlorati_sr/runs/Sep18_23-48-17_bask-pg0308u26a/events.out.tfevents.1758235775.bask-pg0308u26a.243266.1 b/lisa-ivl3-2b_bi2cs_vlorati_sr/runs/Sep18_23-48-17_bask-pg0308u26a/events.out.tfevents.1758235775.bask-pg0308u26a.243266.1 new file mode 100644 index 0000000000000000000000000000000000000000..2a045b5cfc17c2bf79a943c2f321783ff5bc740b --- /dev/null +++ b/lisa-ivl3-2b_bi2cs_vlorati_sr/runs/Sep18_23-48-17_bask-pg0308u26a/events.out.tfevents.1758235775.bask-pg0308u26a.243266.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff2d4d94de05b7b54a60c0f13954a93d9d39247672b4c8dcc9abfa9e64f883b2 +size 116389 diff --git a/lisa-ivl3-2b_bi2cs_vlorati_sr/runs/Sep18_23-48-17_bask-pg0308u26a/events.out.tfevents.1758283402.bask-pg0308u26a.243266.2 b/lisa-ivl3-2b_bi2cs_vlorati_sr/runs/Sep18_23-48-17_bask-pg0308u26a/events.out.tfevents.1758283402.bask-pg0308u26a.243266.2 new file mode 100644 index 0000000000000000000000000000000000000000..bfa3d1e84763deb7d6cc6b3242134d51915bdf73 --- /dev/null +++ b/lisa-ivl3-2b_bi2cs_vlorati_sr/runs/Sep18_23-48-17_bask-pg0308u26a/events.out.tfevents.1758283402.bask-pg0308u26a.243266.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ff5b05b92bcca177492f1f2085d1bd24d79c2b2cd0f5558d7f294f4626a4263 +size 1402 diff --git a/lisa-ivl3-2b_bibe_vlorati_sr/ckpt_model/config.json b/lisa-ivl3-2b_bibe_vlorati_sr/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_bibe_vlorati_sr/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_bibe_vlorati_sr/ckpt_model/model.safetensors b/lisa-ivl3-2b_bibe_vlorati_sr/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..40e5507df28800eec1ec102044cfdb981069ddb8 --- /dev/null +++ b/lisa-ivl3-2b_bibe_vlorati_sr/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ef94c57fc0af99e6309db32ff6a2687e5fe737f60c79716b5791b3cd3462807 +size 4211070232 diff --git a/lisa-ivl3-2b_bibe_vlorati_sr/ckpt_model/training_args.bin b/lisa-ivl3-2b_bibe_vlorati_sr/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..5309e3b63c5e1065ed417eb27ecfc8b44fb222da --- /dev/null +++ b/lisa-ivl3-2b_bibe_vlorati_sr/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15304d37d961ef83f90f5cf11c45f51f2d6a2127fd500d8738583ef7bb1b092a +size 7352 diff --git a/lisa-ivl3-2b_bibe_vlorati_sr/evaluation_metrics.json b/lisa-ivl3-2b_bibe_vlorati_sr/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..4dd539c837e5c4735f86db6162d8dc5a4d0cc057 --- /dev/null +++ b/lisa-ivl3-2b_bibe_vlorati_sr/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5249593257904053, + "eval_ciou": 0.5540566444396973 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5671162605285645, + "eval_ciou": 0.6326936483383179 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5812656283378601, + "eval_ciou": 0.6858826279640198 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.6030995845794678, + "eval_ciou": 0.6155330538749695 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.6062400341033936, + "eval_ciou": 0.6789443492889404 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.6075817942619324, + "eval_ciou": 0.7042753100395203 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.6141175031661987, + "eval_ciou": 0.7158334255218506 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6258394122123718, + "eval_ciou": 0.6839633584022522 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6258042454719543, + "eval_ciou": 0.7153687477111816 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6261785626411438, + "eval_ciou": 0.7021158933639526 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.5950783491134644, + "eval_ciou": 0.614780604839325 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7819390296936035, + "eval_ciou": 0.7876020669937134 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8033014535903931, + "eval_ciou": 0.8094930648803711 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7548945546150208, + "eval_ciou": 0.7584441900253296 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7323951721191406, + "eval_ciou": 0.7317303419113159 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7716150879859924, + "eval_ciou": 0.7732266187667847 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.6848216652870178, + "eval_ciou": 0.6768019199371338 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.748109757900238, + "eval_ciou": 0.7565798759460449 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7474566698074341, + "eval_ciou": 0.756172239780426 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_bibe_vlorati_sr/events.out.tfevents.1758630890.bask-pg0308u29a.1810351.0 b/lisa-ivl3-2b_bibe_vlorati_sr/events.out.tfevents.1758630890.bask-pg0308u29a.1810351.0 new file mode 100644 index 0000000000000000000000000000000000000000..22b2383d7deba188976c25201b4359324b707e80 --- /dev/null +++ b/lisa-ivl3-2b_bibe_vlorati_sr/events.out.tfevents.1758630890.bask-pg0308u29a.1810351.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91e637834a54cdda0885e349ac7a6ac1ccd27248cca88e907ed7dceaa087d769 +size 212352 diff --git a/lisa-ivl3-2b_bibe_vlorati_sr/runs/Sep23_13-34-48_bask-pg0308u29a/events.out.tfevents.1758630953.bask-pg0308u29a.1810351.1 b/lisa-ivl3-2b_bibe_vlorati_sr/runs/Sep23_13-34-48_bask-pg0308u29a/events.out.tfevents.1758630953.bask-pg0308u29a.1810351.1 new file mode 100644 index 0000000000000000000000000000000000000000..44253f1a4bd0768de1518dfc2d0cad07ff84fa32 --- /dev/null +++ b/lisa-ivl3-2b_bibe_vlorati_sr/runs/Sep23_13-34-48_bask-pg0308u29a/events.out.tfevents.1758630953.bask-pg0308u29a.1810351.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ca8679a28a1eb085c1a0bad7d99739252cde286e665e4b6317d8a4984dd6779 +size 116387 diff --git a/lisa-ivl3-2b_bibe_vlorati_sr/runs/Sep23_13-34-48_bask-pg0308u29a/events.out.tfevents.1758682291.bask-pg0308u29a.1810351.2 b/lisa-ivl3-2b_bibe_vlorati_sr/runs/Sep23_13-34-48_bask-pg0308u29a/events.out.tfevents.1758682291.bask-pg0308u29a.1810351.2 new file mode 100644 index 0000000000000000000000000000000000000000..2f85ea5c2cb6f570bc92d16bc720893e5c6b6a8e --- /dev/null +++ b/lisa-ivl3-2b_bibe_vlorati_sr/runs/Sep23_13-34-48_bask-pg0308u29a/events.out.tfevents.1758682291.bask-pg0308u29a.1810351.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ff9b7006ae28d06671f615b8df37ccd797c5556fd1b325957dce368e0e5223e +size 1402 diff --git a/lisa-ivl3-2b_vlorati_sr_1/ckpt_model/config.json b/lisa-ivl3-2b_vlorati_sr_1/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa0efe9e975d906488d234e8f6ec309c18056c --- /dev/null +++ b/lisa-ivl3-2b_vlorati_sr_1/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 1536, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-2b_vlorati_sr_1/ckpt_model/model.safetensors b/lisa-ivl3-2b_vlorati_sr_1/ckpt_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c0bd57dc570102f564257a9d1a7d0187e14fc974 --- /dev/null +++ b/lisa-ivl3-2b_vlorati_sr_1/ckpt_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e08fb89fc8f34908344ed5969a3c0efa629f7393059e2ccf6a597f6bc2ac736e +size 4211067064 diff --git a/lisa-ivl3-2b_vlorati_sr_1/ckpt_model/training_args.bin b/lisa-ivl3-2b_vlorati_sr_1/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..28cf192dad9b1d455de23097d304197ab4343913 --- /dev/null +++ b/lisa-ivl3-2b_vlorati_sr_1/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8bfb31f84676d292e536e0d32f5a9973c303d635ca465dc7e7fa282bbb1438e +size 7352 diff --git a/lisa-ivl3-2b_vlorati_sr_1/evaluation_metrics.json b/lisa-ivl3-2b_vlorati_sr_1/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..55836030422daa4449fbf5842220db5203bcadb9 --- /dev/null +++ b/lisa-ivl3-2b_vlorati_sr_1/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5428226590156555, + "eval_ciou": 0.6486860513687134 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.576973021030426, + "eval_ciou": 0.6283387541770935 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5801995396614075, + "eval_ciou": 0.6884477138519287 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.5849356651306152, + "eval_ciou": 0.6493880748748779 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.6133280992507935, + "eval_ciou": 0.7162936925888062 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.6252800226211548, + "eval_ciou": 0.7077770233154297 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.6256431341171265, + "eval_ciou": 0.7088700532913208 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6295732855796814, + "eval_ciou": 0.7179023027420044 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6263434886932373, + "eval_ciou": 0.7101185321807861 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.6301770806312561, + "eval_ciou": 0.7127786874771118 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.605364203453064, + "eval_ciou": 0.6502220630645752 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.7860114574432373, + "eval_ciou": 0.7906588315963745 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8029815554618835, + "eval_ciou": 0.8074691891670227 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7556571364402771, + "eval_ciou": 0.7569315433502197 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7340661287307739, + "eval_ciou": 0.7281174659729004 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7740760445594788, + "eval_ciou": 0.7732947468757629 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.6851834058761597, + "eval_ciou": 0.6708152294158936 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7482894659042358, + "eval_ciou": 0.7577313780784607 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.7458229064941406, + "eval_ciou": 0.7469159960746765 + } +] \ No newline at end of file diff --git a/lisa-ivl3-2b_vlorati_sr_1/events.out.tfevents.1759431298.bask-pg0308u12a.1501219.0 b/lisa-ivl3-2b_vlorati_sr_1/events.out.tfevents.1759431298.bask-pg0308u12a.1501219.0 new file mode 100644 index 0000000000000000000000000000000000000000..12ea5e5bcb748c1fc19439f08464c44a6df32994 --- /dev/null +++ b/lisa-ivl3-2b_vlorati_sr_1/events.out.tfevents.1759431298.bask-pg0308u12a.1501219.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db2339aed120502ad88f905f648a1ce369405c0ab2967a7419c5f08af68bb2f6 +size 88 diff --git a/lisa-ivl3-2b_vlorati_sr_1/events.out.tfevents.1759431428.bask-pg0308u12a.1504252.0 b/lisa-ivl3-2b_vlorati_sr_1/events.out.tfevents.1759431428.bask-pg0308u12a.1504252.0 new file mode 100644 index 0000000000000000000000000000000000000000..950aeac416c4e6e2108ddfaa223f9053a145ee7d --- /dev/null +++ b/lisa-ivl3-2b_vlorati_sr_1/events.out.tfevents.1759431428.bask-pg0308u12a.1504252.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faee330f50ffaa79e423d437d81923129a903eb7fa5a9f73a15e8e03a4fb36eb +size 212352 diff --git a/lisa-ivl3-2b_vlorati_sr_1/runs/Oct02_19-54-55_bask-pg0308u12a/events.out.tfevents.1759431364.bask-pg0308u12a.1501219.1 b/lisa-ivl3-2b_vlorati_sr_1/runs/Oct02_19-54-55_bask-pg0308u12a/events.out.tfevents.1759431364.bask-pg0308u12a.1501219.1 new file mode 100644 index 0000000000000000000000000000000000000000..759af85d096657422e020f1410acf7e25dc032a6 --- /dev/null +++ b/lisa-ivl3-2b_vlorati_sr_1/runs/Oct02_19-54-55_bask-pg0308u12a/events.out.tfevents.1759431364.bask-pg0308u12a.1501219.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8d0d163215621316ddf07a06378d7044647e019bdfc0aebac0c0257bdccc24c +size 9115 diff --git a/lisa-ivl3-2b_vlorati_sr_1/runs/Oct02_19-57-05_bask-pg0308u12a/events.out.tfevents.1759431494.bask-pg0308u12a.1504252.1 b/lisa-ivl3-2b_vlorati_sr_1/runs/Oct02_19-57-05_bask-pg0308u12a/events.out.tfevents.1759431494.bask-pg0308u12a.1504252.1 new file mode 100644 index 0000000000000000000000000000000000000000..7cf57a0a7f5c000fd42e837b6766c20c50d81b63 --- /dev/null +++ b/lisa-ivl3-2b_vlorati_sr_1/runs/Oct02_19-57-05_bask-pg0308u12a/events.out.tfevents.1759431494.bask-pg0308u12a.1504252.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a2eb2bedeb1aa28be2a022107eb86ea274bb1ffe5ffb3012fabfd2e89bf0a1f +size 116381 diff --git a/lisa-ivl3-2b_vlorati_sr_1/runs/Oct02_19-57-05_bask-pg0308u12a/events.out.tfevents.1759478152.bask-pg0308u12a.1504252.2 b/lisa-ivl3-2b_vlorati_sr_1/runs/Oct02_19-57-05_bask-pg0308u12a/events.out.tfevents.1759478152.bask-pg0308u12a.1504252.2 new file mode 100644 index 0000000000000000000000000000000000000000..c11849fd87b9e052bef68c73c10c8bfa04ad2273 --- /dev/null +++ b/lisa-ivl3-2b_vlorati_sr_1/runs/Oct02_19-57-05_bask-pg0308u12a/events.out.tfevents.1759478152.bask-pg0308u12a.1504252.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:918fdf6b12acf49b5d5eef96e56837a115f65219c1d7fb731c62b12deeb324e8 +size 1402 diff --git a/lisa-ivl3-8b_bi2cbe_vlorati_sr/ckpt_model/config.json b/lisa-ivl3-8b_bi2cbe_vlorati_sr/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..413e24ed9d7104d9c96eeb65b41ab698746e16f4 --- /dev/null +++ b/lisa-ivl3-8b_bi2cbe_vlorati_sr/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 3584, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 3584, + "initializer_range": 0.02, + "intermediate_size": 18944, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 28, + "num_hidden_layers": 28, + "num_key_value_heads": 4, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-8b_bi2cbe_vlorati_sr/ckpt_model/model-00001-of-00004.safetensors b/lisa-ivl3-8b_bi2cbe_vlorati_sr/ckpt_model/model-00001-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f2f9d41faaf81e4562016140687efea9d4461904 --- /dev/null +++ b/lisa-ivl3-8b_bi2cbe_vlorati_sr/ckpt_model/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdc8e79b0c90936e4c98f858aac528456fb0a71175ef2b9e5013101f017f2213 +size 4991138296 diff --git a/lisa-ivl3-8b_bi2cbe_vlorati_sr/ckpt_model/model-00002-of-00004.safetensors b/lisa-ivl3-8b_bi2cbe_vlorati_sr/ckpt_model/model-00002-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..84625e3eaab54ba2b3cbd769b9e44b42e3533491 --- /dev/null +++ b/lisa-ivl3-8b_bi2cbe_vlorati_sr/ckpt_model/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b6c33aeed21ea7332539a94592f2845461011ea908af2c13c0dd58e61b9c21a +size 4958443072 diff --git a/lisa-ivl3-8b_bi2cbe_vlorati_sr/ckpt_model/model-00003-of-00004.safetensors b/lisa-ivl3-8b_bi2cbe_vlorati_sr/ckpt_model/model-00003-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2c71413b371915acc4c2846c48404196906d452a --- /dev/null +++ b/lisa-ivl3-8b_bi2cbe_vlorati_sr/ckpt_model/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:949f6525f6b44bba6168bda886872f8244fb719e11c90d6ee2d8ab84717a4cd0 +size 4796984024 diff --git a/lisa-ivl3-8b_bi2cbe_vlorati_sr/ckpt_model/model-00004-of-00004.safetensors b/lisa-ivl3-8b_bi2cbe_vlorati_sr/ckpt_model/model-00004-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6ee5d87ccd6c7dcc4f2f06399552dad91aa20a98 --- /dev/null +++ b/lisa-ivl3-8b_bi2cbe_vlorati_sr/ckpt_model/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b43a8c31f1d5a2ae4fa2d8170f99839422b33963d533bcaa1db77f803dea10a +size 1322191472 diff --git a/lisa-ivl3-8b_bi2cbe_vlorati_sr/ckpt_model/model.safetensors.index.json b/lisa-ivl3-8b_bi2cbe_vlorati_sr/ckpt_model/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..c2e4fbe418b4133a57764ab7223751a047719d21 --- /dev/null +++ b/lisa-ivl3-8b_bi2cbe_vlorati_sr/ckpt_model/model.safetensors.index.json @@ -0,0 +1,703 @@ +{ + "metadata": { + "total_parameters": 8034335744, + "total_size": 16068671488 + }, + "weight_map": { + "combine_decode_proj.0.weight": "model-00004-of-00004.safetensors", + "combine_decode_proj.1.bias": "model-00004-of-00004.safetensors", + "combine_decode_proj.1.weight": "model-00004-of-00004.safetensors", + "combine_decode_proj.3.bias": "model-00004-of-00004.safetensors", + "combine_decode_proj.3.weight": "model-00004-of-00004.safetensors", + "combine_seg_proj.0.bias": "model-00004-of-00004.safetensors", + "combine_seg_proj.0.weight": "model-00004-of-00004.safetensors", + "combine_seg_proj.2.bias": "model-00004-of-00004.safetensors", + "combine_seg_proj.2.weight": "model-00004-of-00004.safetensors", + "language_model.lm_head.weight": "model-00004-of-00004.safetensors", + "language_model.model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.17.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.17.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.18.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.7.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.7.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.7.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.7.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.7.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.7.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.7.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.7.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.7.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.8.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.norm.weight": "model-00003-of-00004.safetensors", + "mlp1.0.bias": "model-00004-of-00004.safetensors", + "mlp1.0.weight": "model-00004-of-00004.safetensors", + "mlp1.1.bias": "model-00004-of-00004.safetensors", + "mlp1.1.weight": "model-00004-of-00004.safetensors", + "mlp1.3.bias": "model-00004-of-00004.safetensors", + "mlp1.3.weight": "model-00004-of-00004.safetensors", + "seg_img_embed.weight": "model-00004-of-00004.safetensors", + "vision_model.embeddings.class_embedding": "model-00001-of-00004.safetensors", + "vision_model.embeddings.patch_embedding.bias": "model-00001-of-00004.safetensors", + "vision_model.embeddings.patch_embedding.weight": "model-00001-of-00004.safetensors", + "vision_model.embeddings.position_embedding": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.norm2.weight": "model-00001-of-00004.safetensors" + } +} diff --git a/lisa-ivl3-8b_bi2cbe_vlorati_sr/ckpt_model/training_args.bin b/lisa-ivl3-8b_bi2cbe_vlorati_sr/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a146feaab68acd08d37e094de139ad75c52be355 --- /dev/null +++ b/lisa-ivl3-8b_bi2cbe_vlorati_sr/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46031c8e341071fec97f20f3e174983da8679b34c5c234b89b0c8c727d48403d +size 7352 diff --git a/lisa-ivl3-8b_bi2cbe_vlorati_sr/evaluation_metrics.json b/lisa-ivl3-8b_bi2cbe_vlorati_sr/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..86b8b0b1a665867a7f71658a528fcbf9fd687d44 --- /dev/null +++ b/lisa-ivl3-8b_bi2cbe_vlorati_sr/evaluation_metrics.json @@ -0,0 +1,116 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5522104501724243, + "eval_ciou": 0.5921730995178223 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5862514972686768, + "eval_ciou": 0.6146094799041748 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5691287517547607, + "eval_ciou": 0.6004741787910461 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.6095577478408813, + "eval_ciou": 0.6496008038520813 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.5996950268745422, + "eval_ciou": 0.640630841255188 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.625974178314209, + "eval_ciou": 0.6630069017410278 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.6291065216064453, + "eval_ciou": 0.6643728017807007 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.6354078054428101, + "eval_ciou": 0.6727522015571594 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6396750211715698, + "eval_ciou": 0.6753989458084106 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.642238438129425, + "eval_ciou": 0.6665040850639343 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.6567072868347168, + "eval_ciou": 0.6823872327804565 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.8002016544342041, + "eval_ciou": 0.8052685260772705 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8198291659355164, + "eval_ciou": 0.8262685537338257 + }, + { + "val_dataset": "refcoco|unc|testB", + "epoch": 10.0, + "eval_giou": 0.7816345691680908, + "eval_ciou": 0.7817952632904053 + }, + { + "val_dataset": "refcoco+|unc|val", + "epoch": 10.0, + "eval_giou": 0.7596989870071411, + "eval_ciou": 0.7552387714385986 + }, + { + "val_dataset": "refcoco+|unc|testA", + "epoch": 10.0, + "eval_giou": 0.7958518862724304, + "eval_ciou": 0.7981890439987183 + }, + { + "val_dataset": "refcoco+|unc|testB", + "epoch": 10.0, + "eval_giou": 0.718018651008606, + "eval_ciou": 0.705427885055542 + }, + { + "val_dataset": "refcocog|umd|test", + "epoch": 10.0, + "eval_giou": 0.7654740214347839, + "eval_ciou": 0.7747682332992554 + }, + { + "val_dataset": "refcocog|umd|val", + "epoch": 10.0, + "eval_giou": 0.76044762134552, + "eval_ciou": 0.7683126926422119 + } +] \ No newline at end of file diff --git a/lisa-ivl3-8b_bi2cbe_vlorati_sr/events.out.tfevents.1757987137.bask-pg0309u17a.3248673.0 b/lisa-ivl3-8b_bi2cbe_vlorati_sr/events.out.tfevents.1757987137.bask-pg0309u17a.3248673.0 new file mode 100644 index 0000000000000000000000000000000000000000..a2869a708c07fa3b969efe058cd37c073436e2a9 --- /dev/null +++ b/lisa-ivl3-8b_bi2cbe_vlorati_sr/events.out.tfevents.1757987137.bask-pg0309u17a.3248673.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51b0fc9a32f40656f4e445f69710a804a1bc5f3c119efb97d53e0ba4b88f0f68 +size 212352 diff --git a/lisa-ivl3-8b_bi2cbe_vlorati_sr/runs/Sep16_02-45-35_bask-pg0309u17a/events.out.tfevents.1758003495.bask-pg0309u17a.3248673.1 b/lisa-ivl3-8b_bi2cbe_vlorati_sr/runs/Sep16_02-45-35_bask-pg0309u17a/events.out.tfevents.1758003495.bask-pg0309u17a.3248673.1 new file mode 100644 index 0000000000000000000000000000000000000000..2015c00451ff4dd7625da64693b941c9038a9b63 --- /dev/null +++ b/lisa-ivl3-8b_bi2cbe_vlorati_sr/runs/Sep16_02-45-35_bask-pg0309u17a/events.out.tfevents.1758003495.bask-pg0309u17a.3248673.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc0d373b5b64750d777b96a86f71e689b8a6c13d63d4ced8d1c6434ffb5f7564 +size 116389 diff --git a/lisa-ivl3-8b_bi2cbe_vlorati_sr/runs/Sep16_02-45-35_bask-pg0309u17a/events.out.tfevents.1758054733.bask-pg0309u17a.3248673.2 b/lisa-ivl3-8b_bi2cbe_vlorati_sr/runs/Sep16_02-45-35_bask-pg0309u17a/events.out.tfevents.1758054733.bask-pg0309u17a.3248673.2 new file mode 100644 index 0000000000000000000000000000000000000000..05e71cd8b6fcdd15a929583fa851b430019523dc --- /dev/null +++ b/lisa-ivl3-8b_bi2cbe_vlorati_sr/runs/Sep16_02-45-35_bask-pg0309u17a/events.out.tfevents.1758054733.bask-pg0309u17a.3248673.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0989d46c366b71cd51c2b335f9ea0e39819669f12993e078d19521ec3ec52cdc +size 1402 diff --git a/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/ckpt_model/config.json b/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/ckpt_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..413e24ed9d7104d9c96eeb65b41ab698746e16f4 --- /dev/null +++ b/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/ckpt_model/config.json @@ -0,0 +1,143 @@ +{ + "architectures": [ + "InternVL3Self" + ], + "auto_map": { + "AutoConfig": "configuration_internvl_chat.InternVLChatConfig", + "AutoModel": "modeling_internvl_chat.InternVLChatModel", + "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel" + }, + "downsample_ratio": 0.5, + "dtype": "bfloat16", + "dynamic_image_size": true, + "eos_token_id": 151645, + "force_image_size": 448, + "hidden_size": 3584, + "image_fold": null, + "llm_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 3584, + "initializer_range": 0.02, + "intermediate_size": 18944, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 70, + "model_type": "qwen2", + "moe_config": null, + "num_attention_heads": 28, + "num_hidden_layers": 28, + "num_key_value_heads": 4, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 2.0, + "rope_type": "dynamic", + "type": "dynamic" + }, + "rope_theta": 1000000.0, + "sliding_window": null, + "use_bfloat16": true, + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151676 + }, + "max_dynamic_patch": 12, + "min_dynamic_patch": 1, + "model_type": "internvl_chat", + "output_attentions": false, + "pad2square": false, + "pad_token_id": 151643, + "ps_version": "v2", + "select_layer": -1, + "system_message": null, + "template": "internvl2_5", + "tie_word_embeddings": false, + "transformers_version": null, + "use_backbone_lora": 0, + "use_llm_lora": 0, + "use_thumbnail": true, + "vision_config": { + "_attn_implementation_autoset": true, + "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5", + "architectures": [ + "InternVisionModel" + ], + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_intern_vit.InternVisionConfig", + "AutoModel": "modeling_intern_vit.InternVisionModel" + }, + "capacity_factor": 1.2, + "drop_path_rate": 0.1, + "dropout": 0.0, + "dtype": "bfloat16", + "eval_capacity_factor": 1.4, + "hidden_act": "gelu", + "hidden_size": 1024, + "image_size": 448, + "initializer_factor": 0.1, + "initializer_range": 1e-10, + "intermediate_size": 4096, + "laux_allreduce": "all_nodes", + "layer_norm_eps": 1e-06, + "model_type": "intern_vit_6b", + "moe_coeff_ratio": 0.5, + "moe_intermediate_size": 768, + "moe_output_scale": 4.0, + "noisy_gate_policy": "RSample_before", + "norm_type": "layer_norm", + "num_attention_heads": 16, + "num_channels": 3, + "num_experts": 8, + "num_hidden_layers": 24, + "num_routed_experts": 4, + "num_shared_experts": 4, + "patch_size": 14, + "qk_normalization": false, + "qkv_bias": true, + "shared_expert_intermediate_size": 3072, + "use_bfloat16": true, + "use_flash_attn": true, + "use_moe": false, + "use_residual": true, + "use_rts": false, + "use_weighted_residual": false + } +} diff --git a/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/ckpt_model/model-00001-of-00004.safetensors b/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/ckpt_model/model-00001-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..32c736bf11d12ea4c3a69190081542eaa563965c --- /dev/null +++ b/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/ckpt_model/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6ee9bc9b68d8aaa0e37e645cd2d5c33d4ae76ba0aeaf39b042a1267451ecd8a +size 4991138296 diff --git a/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/ckpt_model/model-00002-of-00004.safetensors b/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/ckpt_model/model-00002-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b49a5319cf9791b3bb80b09b434c413b1958c8e8 --- /dev/null +++ b/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/ckpt_model/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba02d3aa9d1a862ca8cb2c6b58416b372805952803f782fccfb44f6d963c5c04 +size 4958443072 diff --git a/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/ckpt_model/model-00003-of-00004.safetensors b/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/ckpt_model/model-00003-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..de8173f345190efbf22c418b5e58b48c4b723235 --- /dev/null +++ b/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/ckpt_model/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06148427d0e7729a60d1e027c7d4a2cb9a50d10db97c9b93f03993df86f9725e +size 4796984024 diff --git a/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/ckpt_model/model-00004-of-00004.safetensors b/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/ckpt_model/model-00004-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d93e20c7810391eb2b5dc46006020ac9213723a4 --- /dev/null +++ b/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/ckpt_model/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5858d22d799322524f0b886eb2e2040131f8ad74ed03263ce262ae46864928e9 +size 1322191472 diff --git a/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/ckpt_model/model.safetensors.index.json b/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/ckpt_model/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..c2e4fbe418b4133a57764ab7223751a047719d21 --- /dev/null +++ b/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/ckpt_model/model.safetensors.index.json @@ -0,0 +1,703 @@ +{ + "metadata": { + "total_parameters": 8034335744, + "total_size": 16068671488 + }, + "weight_map": { + "combine_decode_proj.0.weight": "model-00004-of-00004.safetensors", + "combine_decode_proj.1.bias": "model-00004-of-00004.safetensors", + "combine_decode_proj.1.weight": "model-00004-of-00004.safetensors", + "combine_decode_proj.3.bias": "model-00004-of-00004.safetensors", + "combine_decode_proj.3.weight": "model-00004-of-00004.safetensors", + "combine_seg_proj.0.bias": "model-00004-of-00004.safetensors", + "combine_seg_proj.0.weight": "model-00004-of-00004.safetensors", + "combine_seg_proj.2.bias": "model-00004-of-00004.safetensors", + "combine_seg_proj.2.weight": "model-00004-of-00004.safetensors", + "language_model.lm_head.weight": "model-00004-of-00004.safetensors", + "language_model.model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.17.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.17.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.18.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.18.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "language_model.model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "language_model.model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.7.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.7.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.7.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.7.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.7.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.7.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.7.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.7.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.7.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "language_model.model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "language_model.model.layers.8.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.8.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "language_model.model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "language_model.model.norm.weight": "model-00003-of-00004.safetensors", + "mlp1.0.bias": "model-00004-of-00004.safetensors", + "mlp1.0.weight": "model-00004-of-00004.safetensors", + "mlp1.1.bias": "model-00004-of-00004.safetensors", + "mlp1.1.weight": "model-00004-of-00004.safetensors", + "mlp1.3.bias": "model-00004-of-00004.safetensors", + "mlp1.3.weight": "model-00004-of-00004.safetensors", + "seg_img_embed.weight": "model-00004-of-00004.safetensors", + "vision_model.embeddings.class_embedding": "model-00001-of-00004.safetensors", + "vision_model.embeddings.patch_embedding.bias": "model-00001-of-00004.safetensors", + "vision_model.embeddings.patch_embedding.weight": "model-00001-of-00004.safetensors", + "vision_model.embeddings.position_embedding": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.0.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.1.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.10.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.11.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.12.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.13.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.14.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.15.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.16.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.17.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.18.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.19.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.2.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.20.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.21.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.22.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.23.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.3.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.4.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.5.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.6.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.7.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.8.norm2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.attn.proj.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.attn.proj.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.attn.qkv.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.attn.qkv.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.ls1": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.ls2": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.mlp.fc1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.mlp.fc1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.mlp.fc2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.mlp.fc2.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.norm1.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.norm1.weight": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.norm2.bias": "model-00001-of-00004.safetensors", + "vision_model.encoder.layers.9.norm2.weight": "model-00001-of-00004.safetensors" + } +} diff --git a/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/ckpt_model/training_args.bin b/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/ckpt_model/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..697165eeb5417949e3159b36ea470a807f2f53cd --- /dev/null +++ b/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/ckpt_model/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f059ec81b6e5088f4b823aee6970957ec478e5888c1a097cb10d942456f8373 +size 7352 diff --git a/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/evaluation_metrics.json b/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/evaluation_metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..9b7e313a48237993c459324059a66a781ef9169a --- /dev/null +++ b/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/evaluation_metrics.json @@ -0,0 +1,80 @@ +[ + { + "val_dataset": "ReasonSeg|val", + "epoch": 1.0, + "eval_giou": 0.5703674554824829, + "eval_ciou": 0.6351487636566162 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 2.0, + "eval_giou": 0.5881666541099548, + "eval_ciou": 0.6231042742729187 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 3.0, + "eval_giou": 0.5979794859886169, + "eval_ciou": 0.6206631064414978 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 4.0, + "eval_giou": 0.6222222447395325, + "eval_ciou": 0.6428196430206299 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 5.0, + "eval_giou": 0.626693606376648, + "eval_ciou": 0.6364426612854004 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 6.0, + "eval_giou": 0.6284255981445312, + "eval_ciou": 0.6394948959350586 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 7.0, + "eval_giou": 0.6391634941101074, + "eval_ciou": 0.6693043112754822 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 8.0, + "eval_giou": 0.632407546043396, + "eval_ciou": 0.6376776695251465 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 9.0, + "eval_giou": 0.6354352235794067, + "eval_ciou": 0.6508103609085083 + }, + { + "val_dataset": "ReasonSeg|val", + "epoch": 10.0, + "eval_giou": 0.63142991065979, + "eval_ciou": 0.6496836543083191 + }, + { + "val_dataset": "ReasonSeg|test", + "epoch": 10.0, + "eval_giou": 0.6590790748596191, + "eval_ciou": 0.6544433236122131 + }, + { + "val_dataset": "refcoco|unc|val", + "epoch": 10.0, + "eval_giou": 0.8068445324897766, + "eval_ciou": 0.8099668025970459 + }, + { + "val_dataset": "refcoco|unc|testA", + "epoch": 10.0, + "eval_giou": 0.8208774924278259, + "eval_ciou": 0.8279926776885986 + } +] \ No newline at end of file diff --git a/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/events.out.tfevents.1758458323.bask-pg0308u25a.680398.0 b/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/events.out.tfevents.1758458323.bask-pg0308u25a.680398.0 new file mode 100644 index 0000000000000000000000000000000000000000..a8e3706d44c83d55e4212f19021f03806e6340cf --- /dev/null +++ b/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/events.out.tfevents.1758458323.bask-pg0308u25a.680398.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb7fb13c482a773b1d17c7a90abccc05f9f1d53bb6ecc5a383ca62586b1110ae +size 209370 diff --git a/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/runs/Sep21_13-38-40_bask-pg0308u25a/events.out.tfevents.1758458492.bask-pg0308u25a.680398.1 b/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/runs/Sep21_13-38-40_bask-pg0308u25a/events.out.tfevents.1758458492.bask-pg0308u25a.680398.1 new file mode 100644 index 0000000000000000000000000000000000000000..03e53904665689f1d7eda86fc929fb24ebbb944d --- /dev/null +++ b/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/runs/Sep21_13-38-40_bask-pg0308u25a/events.out.tfevents.1758458492.bask-pg0308u25a.680398.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea568c8317b8b69c9b6a5ff9f24770b604b80aa710aa79191d7b75bfefafb6ae +size 116399 diff --git a/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/runs/Sep21_13-38-40_bask-pg0308u25a/events.out.tfevents.1758510224.bask-pg0308u25a.680398.2 b/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/runs/Sep21_13-38-40_bask-pg0308u25a/events.out.tfevents.1758510224.bask-pg0308u25a.680398.2 new file mode 100644 index 0000000000000000000000000000000000000000..bf7e1ef29cacb443fdd406af37a6f251cbf1105b --- /dev/null +++ b/lisa-ivl3-8b_bi2cbe_vlorati_srm_r64/runs/Sep21_13-38-40_bask-pg0308u25a/events.out.tfevents.1758510224.bask-pg0308u25a.680398.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48230031b24753a66cdeb5ecca07b33cbf010307e08ad6bd3a53e92f53300034 +size 526