Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- lisa-ivl3-2b_bi2cbe_aati_srm/ckpt_model/config.json +143 -0
- lisa-ivl3-2b_bi2cbe_aati_srm/ckpt_model/model.safetensors +3 -0
- lisa-ivl3-2b_bi2cbe_aati_srm/ckpt_model/training_args.bin +3 -0
- lisa-ivl3-2b_bi2cbe_aati_srm/evaluation_metrics.json +116 -0
- lisa-ivl3-2b_bi2cbe_aati_srm/events.out.tfevents.1758303972.bask-pg0308u25a.2176098.0 +3 -0
- lisa-ivl3-2b_bi2cbe_aati_srm/events.out.tfevents.1758304185.bask-pg0308u25a.2184107.0 +3 -0
- lisa-ivl3-2b_bi2cbe_aati_srm/runs/Sep19_18-46-08_bask-pg0308u25a/events.out.tfevents.1758304037.bask-pg0308u25a.2176098.1 +3 -0
- lisa-ivl3-2b_bi2cbe_aati_srm/runs/Sep19_18-49-42_bask-pg0308u25a/events.out.tfevents.1758304237.bask-pg0308u25a.2184107.1 +3 -0
- lisa-ivl3-2b_bi2cbe_aati_srm/runs/Sep19_18-49-42_bask-pg0308u25a/events.out.tfevents.1758339214.bask-pg0308u25a.2184107.2 +3 -0
- lisa-ivl3-2b_bi2cbe_aati_srs/ckpt_model/config.json +143 -0
- lisa-ivl3-2b_bi2cbe_aati_srs/ckpt_model/model.safetensors +3 -0
- lisa-ivl3-2b_bi2cbe_aati_srs/ckpt_model/training_args.bin +3 -0
- lisa-ivl3-2b_bi2cbe_aati_srs/evaluation_metrics.json +116 -0
- lisa-ivl3-2b_bi2cbe_aati_srs/events.out.tfevents.1758407218.bask-pg0308u25a.3988218.0 +3 -0
- lisa-ivl3-2b_bi2cbe_aati_srs/runs/Sep20_23-26-55_bask-pg0308u25a/events.out.tfevents.1758407268.bask-pg0308u25a.3988218.1 +3 -0
- lisa-ivl3-2b_bi2cbe_aati_srs/runs/Sep20_23-26-55_bask-pg0308u25a/events.out.tfevents.1758441983.bask-pg0308u25a.3988218.2 +3 -0
- lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/ckpt_model/config.json +143 -0
- lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/ckpt_model/model.safetensors +3 -0
- lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/ckpt_model/training_args.bin +3 -0
- lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/evaluation_metrics.json +116 -0
- lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/events.out.tfevents.1758822942.bask-pg0308u25a.3977024.0 +3 -0
- lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/events.out.tfevents.1758823030.bask-pg0308u25a.3998347.0 +3 -0
- lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/runs/Sep25_18-57-07_bask-pg0308u25a/events.out.tfevents.1758823107.bask-pg0308u25a.3998347.1 +3 -0
- lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/runs/Sep25_18-57-07_bask-pg0308u25a/events.out.tfevents.1758870402.bask-pg0308u25a.3998347.2 +3 -0
- lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/ckpt_model/config.json +143 -0
- lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/ckpt_model/model.safetensors +3 -0
- lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/ckpt_model/training_args.bin +3 -0
- lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/evaluation_metrics.json +116 -0
- lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/events.out.tfevents.1758558655.bask-pg0308u29a.637997.0 +3 -0
- lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/runs/Sep22_17-30-52_bask-pg0308u29a/events.out.tfevents.1758558731.bask-pg0308u29a.637997.1 +3 -0
- lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/runs/Sep22_17-30-52_bask-pg0308u29a/events.out.tfevents.1758608704.bask-pg0308u29a.637997.2 +3 -0
- lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/ckpt_model/config.json +143 -0
- lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/ckpt_model/model.safetensors +3 -0
- lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/ckpt_model/training_args.bin +3 -0
- lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/evaluation_metrics.json +116 -0
- lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/events.out.tfevents.1758821654.bask-pg0308u18a.998112.0 +3 -0
- lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/events.out.tfevents.1758821976.bask-pg0308u18a.1004472.0 +3 -0
- lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/events.out.tfevents.1758822179.bask-pg0308u18a.1008370.0 +3 -0
- lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/runs/Sep25_18-34-12_bask-pg0308u18a/events.out.tfevents.1758821725.bask-pg0308u18a.998112.1 +3 -0
- lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/runs/Sep25_18-39-34_bask-pg0308u18a/events.out.tfevents.1758822035.bask-pg0308u18a.1004472.1 +3 -0
- lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/runs/Sep25_18-42-56_bask-pg0308u18a/events.out.tfevents.1758822240.bask-pg0308u18a.1008370.1 +3 -0
- lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/runs/Sep25_18-42-56_bask-pg0308u18a/events.out.tfevents.1758869487.bask-pg0308u18a.1008370.2 +3 -0
- lisa-ivl3-2b_bi2cbe_vlorati_coco/ckpt_model/config.json +143 -0
- lisa-ivl3-2b_bi2cbe_vlorati_coco/ckpt_model/model.safetensors +3 -0
- lisa-ivl3-2b_bi2cbe_vlorati_coco/ckpt_model/training_args.bin +3 -0
- lisa-ivl3-2b_bi2cbe_vlorati_coco/evaluation_metrics.json +116 -0
- lisa-ivl3-2b_bi2cbe_vlorati_coco/events.out.tfevents.1758236140.bask-pg0309u16a.1220041.0 +3 -0
- lisa-ivl3-2b_bi2cbe_vlorati_coco/runs/Sep18_23-55-37_bask-pg0309u16a/events.out.tfevents.1758236201.bask-pg0309u16a.1220041.1 +3 -0
- lisa-ivl3-2b_bi2cbe_vlorati_coco/runs/Sep18_23-55-37_bask-pg0309u16a/events.out.tfevents.1758283609.bask-pg0309u16a.1220041.2 +3 -0
- lisa-ivl3-2b_bi2cbe_vlorati_sr/ckpt_model/config.json +143 -0
lisa-ivl3-2b_bi2cbe_aati_srm/ckpt_model/config.json
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"InternVL3Self"
|
| 4 |
+
],
|
| 5 |
+
"auto_map": {
|
| 6 |
+
"AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
|
| 7 |
+
"AutoModel": "modeling_internvl_chat.InternVLChatModel",
|
| 8 |
+
"AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
|
| 9 |
+
},
|
| 10 |
+
"downsample_ratio": 0.5,
|
| 11 |
+
"dtype": "bfloat16",
|
| 12 |
+
"dynamic_image_size": true,
|
| 13 |
+
"eos_token_id": 151645,
|
| 14 |
+
"force_image_size": 448,
|
| 15 |
+
"hidden_size": 1536,
|
| 16 |
+
"image_fold": null,
|
| 17 |
+
"llm_config": {
|
| 18 |
+
"_attn_implementation_autoset": true,
|
| 19 |
+
"_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
|
| 20 |
+
"architectures": [
|
| 21 |
+
"Qwen2ForCausalLM"
|
| 22 |
+
],
|
| 23 |
+
"attention_dropout": 0.0,
|
| 24 |
+
"bos_token_id": 151643,
|
| 25 |
+
"dtype": "bfloat16",
|
| 26 |
+
"eos_token_id": 151643,
|
| 27 |
+
"hidden_act": "silu",
|
| 28 |
+
"hidden_size": 1536,
|
| 29 |
+
"initializer_range": 0.02,
|
| 30 |
+
"intermediate_size": 8960,
|
| 31 |
+
"layer_types": [
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention",
|
| 45 |
+
"full_attention",
|
| 46 |
+
"full_attention",
|
| 47 |
+
"full_attention",
|
| 48 |
+
"full_attention",
|
| 49 |
+
"full_attention",
|
| 50 |
+
"full_attention",
|
| 51 |
+
"full_attention",
|
| 52 |
+
"full_attention",
|
| 53 |
+
"full_attention",
|
| 54 |
+
"full_attention",
|
| 55 |
+
"full_attention",
|
| 56 |
+
"full_attention",
|
| 57 |
+
"full_attention",
|
| 58 |
+
"full_attention",
|
| 59 |
+
"full_attention"
|
| 60 |
+
],
|
| 61 |
+
"max_position_embeddings": 32768,
|
| 62 |
+
"max_window_layers": 70,
|
| 63 |
+
"model_type": "qwen2",
|
| 64 |
+
"moe_config": null,
|
| 65 |
+
"num_attention_heads": 12,
|
| 66 |
+
"num_hidden_layers": 28,
|
| 67 |
+
"num_key_value_heads": 2,
|
| 68 |
+
"rms_norm_eps": 1e-06,
|
| 69 |
+
"rope_scaling": {
|
| 70 |
+
"factor": 2.0,
|
| 71 |
+
"rope_type": "dynamic",
|
| 72 |
+
"type": "dynamic"
|
| 73 |
+
},
|
| 74 |
+
"rope_theta": 1000000.0,
|
| 75 |
+
"sliding_window": null,
|
| 76 |
+
"use_bfloat16": true,
|
| 77 |
+
"use_cache": false,
|
| 78 |
+
"use_sliding_window": false,
|
| 79 |
+
"vocab_size": 151676
|
| 80 |
+
},
|
| 81 |
+
"max_dynamic_patch": 12,
|
| 82 |
+
"min_dynamic_patch": 1,
|
| 83 |
+
"model_type": "internvl_chat",
|
| 84 |
+
"output_attentions": false,
|
| 85 |
+
"pad2square": false,
|
| 86 |
+
"pad_token_id": 151643,
|
| 87 |
+
"ps_version": "v2",
|
| 88 |
+
"select_layer": -1,
|
| 89 |
+
"system_message": null,
|
| 90 |
+
"template": "internvl2_5",
|
| 91 |
+
"tie_word_embeddings": false,
|
| 92 |
+
"transformers_version": null,
|
| 93 |
+
"use_backbone_lora": 0,
|
| 94 |
+
"use_llm_lora": 0,
|
| 95 |
+
"use_thumbnail": true,
|
| 96 |
+
"vision_config": {
|
| 97 |
+
"_attn_implementation_autoset": true,
|
| 98 |
+
"_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
|
| 99 |
+
"architectures": [
|
| 100 |
+
"InternVisionModel"
|
| 101 |
+
],
|
| 102 |
+
"attention_dropout": 0.0,
|
| 103 |
+
"auto_map": {
|
| 104 |
+
"AutoConfig": "configuration_intern_vit.InternVisionConfig",
|
| 105 |
+
"AutoModel": "modeling_intern_vit.InternVisionModel"
|
| 106 |
+
},
|
| 107 |
+
"capacity_factor": 1.2,
|
| 108 |
+
"drop_path_rate": 0.1,
|
| 109 |
+
"dropout": 0.0,
|
| 110 |
+
"dtype": "bfloat16",
|
| 111 |
+
"eval_capacity_factor": 1.4,
|
| 112 |
+
"hidden_act": "gelu",
|
| 113 |
+
"hidden_size": 1024,
|
| 114 |
+
"image_size": 448,
|
| 115 |
+
"initializer_factor": 0.1,
|
| 116 |
+
"initializer_range": 1e-10,
|
| 117 |
+
"intermediate_size": 4096,
|
| 118 |
+
"laux_allreduce": "all_nodes",
|
| 119 |
+
"layer_norm_eps": 1e-06,
|
| 120 |
+
"model_type": "intern_vit_6b",
|
| 121 |
+
"moe_coeff_ratio": 0.5,
|
| 122 |
+
"moe_intermediate_size": 768,
|
| 123 |
+
"moe_output_scale": 4.0,
|
| 124 |
+
"noisy_gate_policy": "RSample_before",
|
| 125 |
+
"norm_type": "layer_norm",
|
| 126 |
+
"num_attention_heads": 16,
|
| 127 |
+
"num_channels": 3,
|
| 128 |
+
"num_experts": 8,
|
| 129 |
+
"num_hidden_layers": 24,
|
| 130 |
+
"num_routed_experts": 4,
|
| 131 |
+
"num_shared_experts": 4,
|
| 132 |
+
"patch_size": 14,
|
| 133 |
+
"qk_normalization": false,
|
| 134 |
+
"qkv_bias": true,
|
| 135 |
+
"shared_expert_intermediate_size": 3072,
|
| 136 |
+
"use_bfloat16": true,
|
| 137 |
+
"use_flash_attn": true,
|
| 138 |
+
"use_moe": false,
|
| 139 |
+
"use_residual": true,
|
| 140 |
+
"use_rts": false,
|
| 141 |
+
"use_weighted_residual": false
|
| 142 |
+
}
|
| 143 |
+
}
|
lisa-ivl3-2b_bi2cbe_aati_srm/ckpt_model/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5f3befb636a4bfdc7f407e8ec01f5b84a2869952dea6106070ab00f7d6b760ef
|
| 3 |
+
size 4211070232
|
lisa-ivl3-2b_bi2cbe_aati_srm/ckpt_model/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:240c4be74f4b416addef8e472ffd17a8f0a9206792bbe14eb3617a6736ec132e
|
| 3 |
+
size 7352
|
lisa-ivl3-2b_bi2cbe_aati_srm/evaluation_metrics.json
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"val_dataset": "ReasonSeg|val",
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_giou": 0.528189480304718,
|
| 6 |
+
"eval_ciou": 0.6158800721168518
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"val_dataset": "ReasonSeg|val",
|
| 10 |
+
"epoch": 2.0,
|
| 11 |
+
"eval_giou": 0.57504802942276,
|
| 12 |
+
"eval_ciou": 0.6519048810005188
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"val_dataset": "ReasonSeg|val",
|
| 16 |
+
"epoch": 3.0,
|
| 17 |
+
"eval_giou": 0.5494521856307983,
|
| 18 |
+
"eval_ciou": 0.6140078902244568
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"val_dataset": "ReasonSeg|val",
|
| 22 |
+
"epoch": 4.0,
|
| 23 |
+
"eval_giou": 0.5751751065254211,
|
| 24 |
+
"eval_ciou": 0.6430273652076721
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"val_dataset": "ReasonSeg|val",
|
| 28 |
+
"epoch": 5.0,
|
| 29 |
+
"eval_giou": 0.5621751546859741,
|
| 30 |
+
"eval_ciou": 0.6091215014457703
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"val_dataset": "ReasonSeg|val",
|
| 34 |
+
"epoch": 6.0,
|
| 35 |
+
"eval_giou": 0.5681710243225098,
|
| 36 |
+
"eval_ciou": 0.5827724933624268
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"val_dataset": "ReasonSeg|val",
|
| 40 |
+
"epoch": 7.0,
|
| 41 |
+
"eval_giou": 0.5692390203475952,
|
| 42 |
+
"eval_ciou": 0.5854980945587158
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"val_dataset": "ReasonSeg|val",
|
| 46 |
+
"epoch": 8.0,
|
| 47 |
+
"eval_giou": 0.5853511095046997,
|
| 48 |
+
"eval_ciou": 0.5549483895301819
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"val_dataset": "ReasonSeg|val",
|
| 52 |
+
"epoch": 9.0,
|
| 53 |
+
"eval_giou": 0.5778804421424866,
|
| 54 |
+
"eval_ciou": 0.5894233584403992
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"val_dataset": "ReasonSeg|val",
|
| 58 |
+
"epoch": 10.0,
|
| 59 |
+
"eval_giou": 0.5841119885444641,
|
| 60 |
+
"eval_ciou": 0.5798441171646118
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"val_dataset": "ReasonSeg|test",
|
| 64 |
+
"epoch": 10.0,
|
| 65 |
+
"eval_giou": 0.6100905537605286,
|
| 66 |
+
"eval_ciou": 0.6119125485420227
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"val_dataset": "refcoco|unc|val",
|
| 70 |
+
"epoch": 10.0,
|
| 71 |
+
"eval_giou": 0.799595832824707,
|
| 72 |
+
"eval_ciou": 0.8027365207672119
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"val_dataset": "refcoco|unc|testA",
|
| 76 |
+
"epoch": 10.0,
|
| 77 |
+
"eval_giou": 0.8168815970420837,
|
| 78 |
+
"eval_ciou": 0.8243600130081177
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"val_dataset": "refcoco|unc|testB",
|
| 82 |
+
"epoch": 10.0,
|
| 83 |
+
"eval_giou": 0.7745703458786011,
|
| 84 |
+
"eval_ciou": 0.7807985544204712
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"val_dataset": "refcoco+|unc|val",
|
| 88 |
+
"epoch": 10.0,
|
| 89 |
+
"eval_giou": 0.7551393508911133,
|
| 90 |
+
"eval_ciou": 0.7453246712684631
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"val_dataset": "refcoco+|unc|testA",
|
| 94 |
+
"epoch": 10.0,
|
| 95 |
+
"eval_giou": 0.7942529916763306,
|
| 96 |
+
"eval_ciou": 0.7944912910461426
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"val_dataset": "refcoco+|unc|testB",
|
| 100 |
+
"epoch": 10.0,
|
| 101 |
+
"eval_giou": 0.7106485366821289,
|
| 102 |
+
"eval_ciou": 0.6990127563476562
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"val_dataset": "refcocog|umd|test",
|
| 106 |
+
"epoch": 10.0,
|
| 107 |
+
"eval_giou": 0.7611709833145142,
|
| 108 |
+
"eval_ciou": 0.7682604193687439
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"val_dataset": "refcocog|umd|val",
|
| 112 |
+
"epoch": 10.0,
|
| 113 |
+
"eval_giou": 0.7570181488990784,
|
| 114 |
+
"eval_ciou": 0.7642934918403625
|
| 115 |
+
}
|
| 116 |
+
]
|
lisa-ivl3-2b_bi2cbe_aati_srm/events.out.tfevents.1758303972.bask-pg0308u25a.2176098.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6fd0eb014901254de4a95eb71fc2571d348cabd50a57e396fb2c3d342a3d7fb9
|
| 3 |
+
size 486
|
lisa-ivl3-2b_bi2cbe_aati_srm/events.out.tfevents.1758304185.bask-pg0308u25a.2184107.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a1af39459ef2a226054ba8d41e28d9351e1edf097f258d661335b41ebe8f3adc
|
| 3 |
+
size 212352
|
lisa-ivl3-2b_bi2cbe_aati_srm/runs/Sep19_18-46-08_bask-pg0308u25a/events.out.tfevents.1758304037.bask-pg0308u25a.2176098.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4044136eab8f6e9cbcae192b20ebf5e4ed93ba0420ebf5e87c4b2238109fa4e
|
| 3 |
+
size 9325
|
lisa-ivl3-2b_bi2cbe_aati_srm/runs/Sep19_18-49-42_bask-pg0308u25a/events.out.tfevents.1758304237.bask-pg0308u25a.2184107.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:94cc218abf38e00bc41bdedd1db488f808895ac0ae3c3898e84ea6a04bec0e07
|
| 3 |
+
size 116381
|
lisa-ivl3-2b_bi2cbe_aati_srm/runs/Sep19_18-49-42_bask-pg0308u25a/events.out.tfevents.1758339214.bask-pg0308u25a.2184107.2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c63060c4d02dcdbdc52b17a6208816f1c9a7a952fd8d4159426ee935d22b9aad
|
| 3 |
+
size 1402
|
lisa-ivl3-2b_bi2cbe_aati_srs/ckpt_model/config.json
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"InternVL3Self"
|
| 4 |
+
],
|
| 5 |
+
"auto_map": {
|
| 6 |
+
"AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
|
| 7 |
+
"AutoModel": "modeling_internvl_chat.InternVLChatModel",
|
| 8 |
+
"AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
|
| 9 |
+
},
|
| 10 |
+
"downsample_ratio": 0.5,
|
| 11 |
+
"dtype": "bfloat16",
|
| 12 |
+
"dynamic_image_size": true,
|
| 13 |
+
"eos_token_id": 151645,
|
| 14 |
+
"force_image_size": 448,
|
| 15 |
+
"hidden_size": 1536,
|
| 16 |
+
"image_fold": null,
|
| 17 |
+
"llm_config": {
|
| 18 |
+
"_attn_implementation_autoset": true,
|
| 19 |
+
"_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
|
| 20 |
+
"architectures": [
|
| 21 |
+
"Qwen2ForCausalLM"
|
| 22 |
+
],
|
| 23 |
+
"attention_dropout": 0.0,
|
| 24 |
+
"bos_token_id": 151643,
|
| 25 |
+
"dtype": "bfloat16",
|
| 26 |
+
"eos_token_id": 151643,
|
| 27 |
+
"hidden_act": "silu",
|
| 28 |
+
"hidden_size": 1536,
|
| 29 |
+
"initializer_range": 0.02,
|
| 30 |
+
"intermediate_size": 8960,
|
| 31 |
+
"layer_types": [
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention",
|
| 45 |
+
"full_attention",
|
| 46 |
+
"full_attention",
|
| 47 |
+
"full_attention",
|
| 48 |
+
"full_attention",
|
| 49 |
+
"full_attention",
|
| 50 |
+
"full_attention",
|
| 51 |
+
"full_attention",
|
| 52 |
+
"full_attention",
|
| 53 |
+
"full_attention",
|
| 54 |
+
"full_attention",
|
| 55 |
+
"full_attention",
|
| 56 |
+
"full_attention",
|
| 57 |
+
"full_attention",
|
| 58 |
+
"full_attention",
|
| 59 |
+
"full_attention"
|
| 60 |
+
],
|
| 61 |
+
"max_position_embeddings": 32768,
|
| 62 |
+
"max_window_layers": 70,
|
| 63 |
+
"model_type": "qwen2",
|
| 64 |
+
"moe_config": null,
|
| 65 |
+
"num_attention_heads": 12,
|
| 66 |
+
"num_hidden_layers": 28,
|
| 67 |
+
"num_key_value_heads": 2,
|
| 68 |
+
"rms_norm_eps": 1e-06,
|
| 69 |
+
"rope_scaling": {
|
| 70 |
+
"factor": 2.0,
|
| 71 |
+
"rope_type": "dynamic",
|
| 72 |
+
"type": "dynamic"
|
| 73 |
+
},
|
| 74 |
+
"rope_theta": 1000000.0,
|
| 75 |
+
"sliding_window": null,
|
| 76 |
+
"use_bfloat16": true,
|
| 77 |
+
"use_cache": false,
|
| 78 |
+
"use_sliding_window": false,
|
| 79 |
+
"vocab_size": 151676
|
| 80 |
+
},
|
| 81 |
+
"max_dynamic_patch": 12,
|
| 82 |
+
"min_dynamic_patch": 1,
|
| 83 |
+
"model_type": "internvl_chat",
|
| 84 |
+
"output_attentions": false,
|
| 85 |
+
"pad2square": false,
|
| 86 |
+
"pad_token_id": 151643,
|
| 87 |
+
"ps_version": "v2",
|
| 88 |
+
"select_layer": -1,
|
| 89 |
+
"system_message": null,
|
| 90 |
+
"template": "internvl2_5",
|
| 91 |
+
"tie_word_embeddings": false,
|
| 92 |
+
"transformers_version": null,
|
| 93 |
+
"use_backbone_lora": 0,
|
| 94 |
+
"use_llm_lora": 0,
|
| 95 |
+
"use_thumbnail": true,
|
| 96 |
+
"vision_config": {
|
| 97 |
+
"_attn_implementation_autoset": true,
|
| 98 |
+
"_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
|
| 99 |
+
"architectures": [
|
| 100 |
+
"InternVisionModel"
|
| 101 |
+
],
|
| 102 |
+
"attention_dropout": 0.0,
|
| 103 |
+
"auto_map": {
|
| 104 |
+
"AutoConfig": "configuration_intern_vit.InternVisionConfig",
|
| 105 |
+
"AutoModel": "modeling_intern_vit.InternVisionModel"
|
| 106 |
+
},
|
| 107 |
+
"capacity_factor": 1.2,
|
| 108 |
+
"drop_path_rate": 0.1,
|
| 109 |
+
"dropout": 0.0,
|
| 110 |
+
"dtype": "bfloat16",
|
| 111 |
+
"eval_capacity_factor": 1.4,
|
| 112 |
+
"hidden_act": "gelu",
|
| 113 |
+
"hidden_size": 1024,
|
| 114 |
+
"image_size": 448,
|
| 115 |
+
"initializer_factor": 0.1,
|
| 116 |
+
"initializer_range": 1e-10,
|
| 117 |
+
"intermediate_size": 4096,
|
| 118 |
+
"laux_allreduce": "all_nodes",
|
| 119 |
+
"layer_norm_eps": 1e-06,
|
| 120 |
+
"model_type": "intern_vit_6b",
|
| 121 |
+
"moe_coeff_ratio": 0.5,
|
| 122 |
+
"moe_intermediate_size": 768,
|
| 123 |
+
"moe_output_scale": 4.0,
|
| 124 |
+
"noisy_gate_policy": "RSample_before",
|
| 125 |
+
"norm_type": "layer_norm",
|
| 126 |
+
"num_attention_heads": 16,
|
| 127 |
+
"num_channels": 3,
|
| 128 |
+
"num_experts": 8,
|
| 129 |
+
"num_hidden_layers": 24,
|
| 130 |
+
"num_routed_experts": 4,
|
| 131 |
+
"num_shared_experts": 4,
|
| 132 |
+
"patch_size": 14,
|
| 133 |
+
"qk_normalization": false,
|
| 134 |
+
"qkv_bias": true,
|
| 135 |
+
"shared_expert_intermediate_size": 3072,
|
| 136 |
+
"use_bfloat16": true,
|
| 137 |
+
"use_flash_attn": true,
|
| 138 |
+
"use_moe": false,
|
| 139 |
+
"use_residual": true,
|
| 140 |
+
"use_rts": false,
|
| 141 |
+
"use_weighted_residual": false
|
| 142 |
+
}
|
| 143 |
+
}
|
lisa-ivl3-2b_bi2cbe_aati_srs/ckpt_model/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8e94e426c479692c600bd7da2a58a66d89b1a0b6105e5f4fba892bb35fa05130
|
| 3 |
+
size 4211070232
|
lisa-ivl3-2b_bi2cbe_aati_srs/ckpt_model/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9efb840060edee20988b8e9bb354bde899f2bb93875da91a53a68a5e8259ceb3
|
| 3 |
+
size 7352
|
lisa-ivl3-2b_bi2cbe_aati_srs/evaluation_metrics.json
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"val_dataset": "ReasonSeg|val",
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_giou": 0.5235294699668884,
|
| 6 |
+
"eval_ciou": 0.5689558982849121
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"val_dataset": "ReasonSeg|val",
|
| 10 |
+
"epoch": 2.0,
|
| 11 |
+
"eval_giou": 0.4938444197177887,
|
| 12 |
+
"eval_ciou": 0.5315812826156616
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"val_dataset": "ReasonSeg|val",
|
| 16 |
+
"epoch": 3.0,
|
| 17 |
+
"eval_giou": 0.52805095911026,
|
| 18 |
+
"eval_ciou": 0.5778353810310364
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"val_dataset": "ReasonSeg|val",
|
| 22 |
+
"epoch": 4.0,
|
| 23 |
+
"eval_giou": 0.539726734161377,
|
| 24 |
+
"eval_ciou": 0.5628448128700256
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"val_dataset": "ReasonSeg|val",
|
| 28 |
+
"epoch": 5.0,
|
| 29 |
+
"eval_giou": 0.5516190528869629,
|
| 30 |
+
"eval_ciou": 0.5699143409729004
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"val_dataset": "ReasonSeg|val",
|
| 34 |
+
"epoch": 6.0,
|
| 35 |
+
"eval_giou": 0.5535993576049805,
|
| 36 |
+
"eval_ciou": 0.5486313700675964
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"val_dataset": "ReasonSeg|val",
|
| 40 |
+
"epoch": 7.0,
|
| 41 |
+
"eval_giou": 0.5771014094352722,
|
| 42 |
+
"eval_ciou": 0.630760908126831
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"val_dataset": "ReasonSeg|val",
|
| 46 |
+
"epoch": 8.0,
|
| 47 |
+
"eval_giou": 0.5713648796081543,
|
| 48 |
+
"eval_ciou": 0.5902009606361389
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"val_dataset": "ReasonSeg|val",
|
| 52 |
+
"epoch": 9.0,
|
| 53 |
+
"eval_giou": 0.566828727722168,
|
| 54 |
+
"eval_ciou": 0.5753384232521057
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"val_dataset": "ReasonSeg|val",
|
| 58 |
+
"epoch": 10.0,
|
| 59 |
+
"eval_giou": 0.5662292838096619,
|
| 60 |
+
"eval_ciou": 0.5734410285949707
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"val_dataset": "ReasonSeg|test",
|
| 64 |
+
"epoch": 10.0,
|
| 65 |
+
"eval_giou": 0.5632049441337585,
|
| 66 |
+
"eval_ciou": 0.5677175521850586
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"val_dataset": "refcoco|unc|val",
|
| 70 |
+
"epoch": 10.0,
|
| 71 |
+
"eval_giou": 0.8058294057846069,
|
| 72 |
+
"eval_ciou": 0.810309886932373
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"val_dataset": "refcoco|unc|testA",
|
| 76 |
+
"epoch": 10.0,
|
| 77 |
+
"eval_giou": 0.8246700167655945,
|
| 78 |
+
"eval_ciou": 0.8328030705451965
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"val_dataset": "refcoco|unc|testB",
|
| 82 |
+
"epoch": 10.0,
|
| 83 |
+
"eval_giou": 0.7877973914146423,
|
| 84 |
+
"eval_ciou": 0.7934398651123047
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"val_dataset": "refcoco+|unc|val",
|
| 88 |
+
"epoch": 10.0,
|
| 89 |
+
"eval_giou": 0.7641584277153015,
|
| 90 |
+
"eval_ciou": 0.7556021809577942
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"val_dataset": "refcoco+|unc|testA",
|
| 94 |
+
"epoch": 10.0,
|
| 95 |
+
"eval_giou": 0.7998954057693481,
|
| 96 |
+
"eval_ciou": 0.7981683611869812
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"val_dataset": "refcoco+|unc|testB",
|
| 100 |
+
"epoch": 10.0,
|
| 101 |
+
"eval_giou": 0.7219251394271851,
|
| 102 |
+
"eval_ciou": 0.7105168104171753
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"val_dataset": "refcocog|umd|test",
|
| 106 |
+
"epoch": 10.0,
|
| 107 |
+
"eval_giou": 0.766793966293335,
|
| 108 |
+
"eval_ciou": 0.7789492011070251
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"val_dataset": "refcocog|umd|val",
|
| 112 |
+
"epoch": 10.0,
|
| 113 |
+
"eval_giou": 0.7627427577972412,
|
| 114 |
+
"eval_ciou": 0.7680707573890686
|
| 115 |
+
}
|
| 116 |
+
]
|
lisa-ivl3-2b_bi2cbe_aati_srs/events.out.tfevents.1758407218.bask-pg0308u25a.3988218.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ceb9faddf368f10dc1f79077182ae3f0763d1d459cd00745aefdef29c4785740
|
| 3 |
+
size 212352
|
lisa-ivl3-2b_bi2cbe_aati_srs/runs/Sep20_23-26-55_bask-pg0308u25a/events.out.tfevents.1758407268.bask-pg0308u25a.3988218.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c60008ebfd9d19da94c88abc8cbca2ad3850f341e6cd01437d6e2c1a16144b9
|
| 3 |
+
size 116381
|
lisa-ivl3-2b_bi2cbe_aati_srs/runs/Sep20_23-26-55_bask-pg0308u25a/events.out.tfevents.1758441983.bask-pg0308u25a.3988218.2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ebda0e4e7c56eda12f89e0b655238dcdd19f18e497ae1d685d308e1e21d0b7dd
|
| 3 |
+
size 1402
|
lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/ckpt_model/config.json
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"InternVL3Self"
|
| 4 |
+
],
|
| 5 |
+
"auto_map": {
|
| 6 |
+
"AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
|
| 7 |
+
"AutoModel": "modeling_internvl_chat.InternVLChatModel",
|
| 8 |
+
"AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
|
| 9 |
+
},
|
| 10 |
+
"downsample_ratio": 0.5,
|
| 11 |
+
"dtype": "bfloat16",
|
| 12 |
+
"dynamic_image_size": true,
|
| 13 |
+
"eos_token_id": 151645,
|
| 14 |
+
"force_image_size": 448,
|
| 15 |
+
"hidden_size": 1536,
|
| 16 |
+
"image_fold": null,
|
| 17 |
+
"llm_config": {
|
| 18 |
+
"_attn_implementation_autoset": true,
|
| 19 |
+
"_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
|
| 20 |
+
"architectures": [
|
| 21 |
+
"Qwen2ForCausalLM"
|
| 22 |
+
],
|
| 23 |
+
"attention_dropout": 0.0,
|
| 24 |
+
"bos_token_id": 151643,
|
| 25 |
+
"dtype": "bfloat16",
|
| 26 |
+
"eos_token_id": 151643,
|
| 27 |
+
"hidden_act": "silu",
|
| 28 |
+
"hidden_size": 1536,
|
| 29 |
+
"initializer_range": 0.02,
|
| 30 |
+
"intermediate_size": 8960,
|
| 31 |
+
"layer_types": [
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention",
|
| 45 |
+
"full_attention",
|
| 46 |
+
"full_attention",
|
| 47 |
+
"full_attention",
|
| 48 |
+
"full_attention",
|
| 49 |
+
"full_attention",
|
| 50 |
+
"full_attention",
|
| 51 |
+
"full_attention",
|
| 52 |
+
"full_attention",
|
| 53 |
+
"full_attention",
|
| 54 |
+
"full_attention",
|
| 55 |
+
"full_attention",
|
| 56 |
+
"full_attention",
|
| 57 |
+
"full_attention",
|
| 58 |
+
"full_attention",
|
| 59 |
+
"full_attention"
|
| 60 |
+
],
|
| 61 |
+
"max_position_embeddings": 32768,
|
| 62 |
+
"max_window_layers": 70,
|
| 63 |
+
"model_type": "qwen2",
|
| 64 |
+
"moe_config": null,
|
| 65 |
+
"num_attention_heads": 12,
|
| 66 |
+
"num_hidden_layers": 28,
|
| 67 |
+
"num_key_value_heads": 2,
|
| 68 |
+
"rms_norm_eps": 1e-06,
|
| 69 |
+
"rope_scaling": {
|
| 70 |
+
"factor": 2.0,
|
| 71 |
+
"rope_type": "dynamic",
|
| 72 |
+
"type": "dynamic"
|
| 73 |
+
},
|
| 74 |
+
"rope_theta": 1000000.0,
|
| 75 |
+
"sliding_window": null,
|
| 76 |
+
"use_bfloat16": true,
|
| 77 |
+
"use_cache": false,
|
| 78 |
+
"use_sliding_window": false,
|
| 79 |
+
"vocab_size": 151676
|
| 80 |
+
},
|
| 81 |
+
"max_dynamic_patch": 12,
|
| 82 |
+
"min_dynamic_patch": 1,
|
| 83 |
+
"model_type": "internvl_chat",
|
| 84 |
+
"output_attentions": false,
|
| 85 |
+
"pad2square": false,
|
| 86 |
+
"pad_token_id": 151643,
|
| 87 |
+
"ps_version": "v2",
|
| 88 |
+
"select_layer": -1,
|
| 89 |
+
"system_message": null,
|
| 90 |
+
"template": "internvl2_5",
|
| 91 |
+
"tie_word_embeddings": false,
|
| 92 |
+
"transformers_version": null,
|
| 93 |
+
"use_backbone_lora": 0,
|
| 94 |
+
"use_llm_lora": 0,
|
| 95 |
+
"use_thumbnail": true,
|
| 96 |
+
"vision_config": {
|
| 97 |
+
"_attn_implementation_autoset": true,
|
| 98 |
+
"_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
|
| 99 |
+
"architectures": [
|
| 100 |
+
"InternVisionModel"
|
| 101 |
+
],
|
| 102 |
+
"attention_dropout": 0.0,
|
| 103 |
+
"auto_map": {
|
| 104 |
+
"AutoConfig": "configuration_intern_vit.InternVisionConfig",
|
| 105 |
+
"AutoModel": "modeling_intern_vit.InternVisionModel"
|
| 106 |
+
},
|
| 107 |
+
"capacity_factor": 1.2,
|
| 108 |
+
"drop_path_rate": 0.1,
|
| 109 |
+
"dropout": 0.0,
|
| 110 |
+
"dtype": "bfloat16",
|
| 111 |
+
"eval_capacity_factor": 1.4,
|
| 112 |
+
"hidden_act": "gelu",
|
| 113 |
+
"hidden_size": 1024,
|
| 114 |
+
"image_size": 448,
|
| 115 |
+
"initializer_factor": 0.1,
|
| 116 |
+
"initializer_range": 1e-10,
|
| 117 |
+
"intermediate_size": 4096,
|
| 118 |
+
"laux_allreduce": "all_nodes",
|
| 119 |
+
"layer_norm_eps": 1e-06,
|
| 120 |
+
"model_type": "intern_vit_6b",
|
| 121 |
+
"moe_coeff_ratio": 0.5,
|
| 122 |
+
"moe_intermediate_size": 768,
|
| 123 |
+
"moe_output_scale": 4.0,
|
| 124 |
+
"noisy_gate_policy": "RSample_before",
|
| 125 |
+
"norm_type": "layer_norm",
|
| 126 |
+
"num_attention_heads": 16,
|
| 127 |
+
"num_channels": 3,
|
| 128 |
+
"num_experts": 8,
|
| 129 |
+
"num_hidden_layers": 24,
|
| 130 |
+
"num_routed_experts": 4,
|
| 131 |
+
"num_shared_experts": 4,
|
| 132 |
+
"patch_size": 14,
|
| 133 |
+
"qk_normalization": false,
|
| 134 |
+
"qkv_bias": true,
|
| 135 |
+
"shared_expert_intermediate_size": 3072,
|
| 136 |
+
"use_bfloat16": true,
|
| 137 |
+
"use_flash_attn": true,
|
| 138 |
+
"use_moe": false,
|
| 139 |
+
"use_residual": true,
|
| 140 |
+
"use_rts": false,
|
| 141 |
+
"use_weighted_residual": false
|
| 142 |
+
}
|
| 143 |
+
}
|
lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/ckpt_model/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c525c1ae85c17caddcdb1b0d45df96d56a86227e4f990cbb36d7331a692551ad
|
| 3 |
+
size 4211070232
|
lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/ckpt_model/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7da9ac5d9ad0770700bf7b786cf18482fad5fd08e88a5e860bd99406ca19065d
|
| 3 |
+
size 7352
|
lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/evaluation_metrics.json
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"val_dataset": "ReasonSeg|val",
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_giou": 0.4889245629310608,
|
| 6 |
+
"eval_ciou": 0.5533338189125061
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"val_dataset": "ReasonSeg|val",
|
| 10 |
+
"epoch": 2.0,
|
| 11 |
+
"eval_giou": 0.5513965487480164,
|
| 12 |
+
"eval_ciou": 0.6630034446716309
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"val_dataset": "ReasonSeg|val",
|
| 16 |
+
"epoch": 3.0,
|
| 17 |
+
"eval_giou": 0.5442399382591248,
|
| 18 |
+
"eval_ciou": 0.6376377940177917
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"val_dataset": "ReasonSeg|val",
|
| 22 |
+
"epoch": 4.0,
|
| 23 |
+
"eval_giou": 0.5748190879821777,
|
| 24 |
+
"eval_ciou": 0.6525793671607971
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"val_dataset": "ReasonSeg|val",
|
| 28 |
+
"epoch": 5.0,
|
| 29 |
+
"eval_giou": 0.5847772359848022,
|
| 30 |
+
"eval_ciou": 0.643996000289917
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"val_dataset": "ReasonSeg|val",
|
| 34 |
+
"epoch": 6.0,
|
| 35 |
+
"eval_giou": 0.5810279846191406,
|
| 36 |
+
"eval_ciou": 0.6454022526741028
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"val_dataset": "ReasonSeg|val",
|
| 40 |
+
"epoch": 7.0,
|
| 41 |
+
"eval_giou": 0.5949556827545166,
|
| 42 |
+
"eval_ciou": 0.6094688177108765
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"val_dataset": "ReasonSeg|val",
|
| 46 |
+
"epoch": 8.0,
|
| 47 |
+
"eval_giou": 0.6049715280532837,
|
| 48 |
+
"eval_ciou": 0.6379661560058594
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"val_dataset": "ReasonSeg|val",
|
| 52 |
+
"epoch": 9.0,
|
| 53 |
+
"eval_giou": 0.6034538149833679,
|
| 54 |
+
"eval_ciou": 0.6570442914962769
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"val_dataset": "ReasonSeg|val",
|
| 58 |
+
"epoch": 10.0,
|
| 59 |
+
"eval_giou": 0.6016661524772644,
|
| 60 |
+
"eval_ciou": 0.6353110671043396
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"val_dataset": "ReasonSeg|test",
|
| 64 |
+
"epoch": 10.0,
|
| 65 |
+
"eval_giou": 0.6043070554733276,
|
| 66 |
+
"eval_ciou": 0.608022153377533
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"val_dataset": "refcoco|unc|val",
|
| 70 |
+
"epoch": 10.0,
|
| 71 |
+
"eval_giou": 0.7902190089225769,
|
| 72 |
+
"eval_ciou": 0.7928427457809448
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"val_dataset": "refcoco|unc|testA",
|
| 76 |
+
"epoch": 10.0,
|
| 77 |
+
"eval_giou": 0.807979166507721,
|
| 78 |
+
"eval_ciou": 0.8122658729553223
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"val_dataset": "refcoco|unc|testB",
|
| 82 |
+
"epoch": 10.0,
|
| 83 |
+
"eval_giou": 0.763839066028595,
|
| 84 |
+
"eval_ciou": 0.764618992805481
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"val_dataset": "refcoco+|unc|val",
|
| 88 |
+
"epoch": 10.0,
|
| 89 |
+
"eval_giou": 0.738534688949585,
|
| 90 |
+
"eval_ciou": 0.7319899201393127
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"val_dataset": "refcoco+|unc|testA",
|
| 94 |
+
"epoch": 10.0,
|
| 95 |
+
"eval_giou": 0.7776216864585876,
|
| 96 |
+
"eval_ciou": 0.7770327925682068
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"val_dataset": "refcoco+|unc|testB",
|
| 100 |
+
"epoch": 10.0,
|
| 101 |
+
"eval_giou": 0.6900521516799927,
|
| 102 |
+
"eval_ciou": 0.676867663860321
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"val_dataset": "refcocog|umd|test",
|
| 106 |
+
"epoch": 10.0,
|
| 107 |
+
"eval_giou": 0.7514216899871826,
|
| 108 |
+
"eval_ciou": 0.7589317560195923
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"val_dataset": "refcocog|umd|val",
|
| 112 |
+
"epoch": 10.0,
|
| 113 |
+
"eval_giou": 0.7455593943595886,
|
| 114 |
+
"eval_ciou": 0.7489427924156189
|
| 115 |
+
}
|
| 116 |
+
]
|
lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/events.out.tfevents.1758822942.bask-pg0308u25a.3977024.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5090ab4b5276a748931cd1068f50c6d17618a085656691fb72a03b346bd7b3b
|
| 3 |
+
size 88
|
lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/events.out.tfevents.1758823030.bask-pg0308u25a.3998347.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4aac91ef4c5538767282625b5123a14924a0f19f313d68d62ed42912194aebe6
|
| 3 |
+
size 212352
|
lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/runs/Sep25_18-57-07_bask-pg0308u25a/events.out.tfevents.1758823107.bask-pg0308u25a.3998347.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eb225847cc5dc86b3ecdb0241d649d67245d9ca0710b65086da8f329c5700ee6
|
| 3 |
+
size 116397
|
lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/runs/Sep25_18-57-07_bask-pg0308u25a/events.out.tfevents.1758870402.bask-pg0308u25a.3998347.2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0907be6c27349e6b811188c1c28862e763dfde6a47b69586b494273b0268d8f9
|
| 3 |
+
size 1402
|
lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/ckpt_model/config.json
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"InternVL3Self"
|
| 4 |
+
],
|
| 5 |
+
"auto_map": {
|
| 6 |
+
"AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
|
| 7 |
+
"AutoModel": "modeling_internvl_chat.InternVLChatModel",
|
| 8 |
+
"AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
|
| 9 |
+
},
|
| 10 |
+
"downsample_ratio": 0.5,
|
| 11 |
+
"dtype": "bfloat16",
|
| 12 |
+
"dynamic_image_size": true,
|
| 13 |
+
"eos_token_id": 151645,
|
| 14 |
+
"force_image_size": 448,
|
| 15 |
+
"hidden_size": 1536,
|
| 16 |
+
"image_fold": null,
|
| 17 |
+
"llm_config": {
|
| 18 |
+
"_attn_implementation_autoset": true,
|
| 19 |
+
"_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
|
| 20 |
+
"architectures": [
|
| 21 |
+
"Qwen2ForCausalLM"
|
| 22 |
+
],
|
| 23 |
+
"attention_dropout": 0.0,
|
| 24 |
+
"bos_token_id": 151643,
|
| 25 |
+
"dtype": "bfloat16",
|
| 26 |
+
"eos_token_id": 151643,
|
| 27 |
+
"hidden_act": "silu",
|
| 28 |
+
"hidden_size": 1536,
|
| 29 |
+
"initializer_range": 0.02,
|
| 30 |
+
"intermediate_size": 8960,
|
| 31 |
+
"layer_types": [
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention",
|
| 45 |
+
"full_attention",
|
| 46 |
+
"full_attention",
|
| 47 |
+
"full_attention",
|
| 48 |
+
"full_attention",
|
| 49 |
+
"full_attention",
|
| 50 |
+
"full_attention",
|
| 51 |
+
"full_attention",
|
| 52 |
+
"full_attention",
|
| 53 |
+
"full_attention",
|
| 54 |
+
"full_attention",
|
| 55 |
+
"full_attention",
|
| 56 |
+
"full_attention",
|
| 57 |
+
"full_attention",
|
| 58 |
+
"full_attention",
|
| 59 |
+
"full_attention"
|
| 60 |
+
],
|
| 61 |
+
"max_position_embeddings": 32768,
|
| 62 |
+
"max_window_layers": 70,
|
| 63 |
+
"model_type": "qwen2",
|
| 64 |
+
"moe_config": null,
|
| 65 |
+
"num_attention_heads": 12,
|
| 66 |
+
"num_hidden_layers": 28,
|
| 67 |
+
"num_key_value_heads": 2,
|
| 68 |
+
"rms_norm_eps": 1e-06,
|
| 69 |
+
"rope_scaling": {
|
| 70 |
+
"factor": 2.0,
|
| 71 |
+
"rope_type": "dynamic",
|
| 72 |
+
"type": "dynamic"
|
| 73 |
+
},
|
| 74 |
+
"rope_theta": 1000000.0,
|
| 75 |
+
"sliding_window": null,
|
| 76 |
+
"use_bfloat16": true,
|
| 77 |
+
"use_cache": false,
|
| 78 |
+
"use_sliding_window": false,
|
| 79 |
+
"vocab_size": 151676
|
| 80 |
+
},
|
| 81 |
+
"max_dynamic_patch": 12,
|
| 82 |
+
"min_dynamic_patch": 1,
|
| 83 |
+
"model_type": "internvl_chat",
|
| 84 |
+
"output_attentions": false,
|
| 85 |
+
"pad2square": false,
|
| 86 |
+
"pad_token_id": 151643,
|
| 87 |
+
"ps_version": "v2",
|
| 88 |
+
"select_layer": -1,
|
| 89 |
+
"system_message": null,
|
| 90 |
+
"template": "internvl2_5",
|
| 91 |
+
"tie_word_embeddings": false,
|
| 92 |
+
"transformers_version": null,
|
| 93 |
+
"use_backbone_lora": 0,
|
| 94 |
+
"use_llm_lora": 0,
|
| 95 |
+
"use_thumbnail": true,
|
| 96 |
+
"vision_config": {
|
| 97 |
+
"_attn_implementation_autoset": true,
|
| 98 |
+
"_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
|
| 99 |
+
"architectures": [
|
| 100 |
+
"InternVisionModel"
|
| 101 |
+
],
|
| 102 |
+
"attention_dropout": 0.0,
|
| 103 |
+
"auto_map": {
|
| 104 |
+
"AutoConfig": "configuration_intern_vit.InternVisionConfig",
|
| 105 |
+
"AutoModel": "modeling_intern_vit.InternVisionModel"
|
| 106 |
+
},
|
| 107 |
+
"capacity_factor": 1.2,
|
| 108 |
+
"drop_path_rate": 0.1,
|
| 109 |
+
"dropout": 0.0,
|
| 110 |
+
"dtype": "bfloat16",
|
| 111 |
+
"eval_capacity_factor": 1.4,
|
| 112 |
+
"hidden_act": "gelu",
|
| 113 |
+
"hidden_size": 1024,
|
| 114 |
+
"image_size": 448,
|
| 115 |
+
"initializer_factor": 0.1,
|
| 116 |
+
"initializer_range": 1e-10,
|
| 117 |
+
"intermediate_size": 4096,
|
| 118 |
+
"laux_allreduce": "all_nodes",
|
| 119 |
+
"layer_norm_eps": 1e-06,
|
| 120 |
+
"model_type": "intern_vit_6b",
|
| 121 |
+
"moe_coeff_ratio": 0.5,
|
| 122 |
+
"moe_intermediate_size": 768,
|
| 123 |
+
"moe_output_scale": 4.0,
|
| 124 |
+
"noisy_gate_policy": "RSample_before",
|
| 125 |
+
"norm_type": "layer_norm",
|
| 126 |
+
"num_attention_heads": 16,
|
| 127 |
+
"num_channels": 3,
|
| 128 |
+
"num_experts": 8,
|
| 129 |
+
"num_hidden_layers": 24,
|
| 130 |
+
"num_routed_experts": 4,
|
| 131 |
+
"num_shared_experts": 4,
|
| 132 |
+
"patch_size": 14,
|
| 133 |
+
"qk_normalization": false,
|
| 134 |
+
"qkv_bias": true,
|
| 135 |
+
"shared_expert_intermediate_size": 3072,
|
| 136 |
+
"use_bfloat16": true,
|
| 137 |
+
"use_flash_attn": true,
|
| 138 |
+
"use_moe": false,
|
| 139 |
+
"use_residual": true,
|
| 140 |
+
"use_rts": false,
|
| 141 |
+
"use_weighted_residual": false
|
| 142 |
+
}
|
| 143 |
+
}
|
lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/ckpt_model/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a9a73ee98332ed56252ee2690f1a7351446fd80c4253500657c9284e0a0f05fd
|
| 3 |
+
size 4211070232
|
lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/ckpt_model/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:833f70825712e685e5ab69b01da135f496f6a901ba5bc7b958a93796e95e8a09
|
| 3 |
+
size 7352
|
lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/evaluation_metrics.json
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"val_dataset": "ReasonSeg|val",
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_giou": 0.5344988107681274,
|
| 6 |
+
"eval_ciou": 0.5989851355552673
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"val_dataset": "ReasonSeg|val",
|
| 10 |
+
"epoch": 2.0,
|
| 11 |
+
"eval_giou": 0.5579254031181335,
|
| 12 |
+
"eval_ciou": 0.646024227142334
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"val_dataset": "ReasonSeg|val",
|
| 16 |
+
"epoch": 3.0,
|
| 17 |
+
"eval_giou": 0.5501570701599121,
|
| 18 |
+
"eval_ciou": 0.6018446683883667
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"val_dataset": "ReasonSeg|val",
|
| 22 |
+
"epoch": 4.0,
|
| 23 |
+
"eval_giou": 0.5774487853050232,
|
| 24 |
+
"eval_ciou": 0.6542478203773499
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"val_dataset": "ReasonSeg|val",
|
| 28 |
+
"epoch": 5.0,
|
| 29 |
+
"eval_giou": 0.5822131037712097,
|
| 30 |
+
"eval_ciou": 0.6766245365142822
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"val_dataset": "ReasonSeg|val",
|
| 34 |
+
"epoch": 6.0,
|
| 35 |
+
"eval_giou": 0.5897811055183411,
|
| 36 |
+
"eval_ciou": 0.6791333556175232
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"val_dataset": "ReasonSeg|val",
|
| 40 |
+
"epoch": 7.0,
|
| 41 |
+
"eval_giou": 0.5887703895568848,
|
| 42 |
+
"eval_ciou": 0.6910147070884705
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"val_dataset": "ReasonSeg|val",
|
| 46 |
+
"epoch": 8.0,
|
| 47 |
+
"eval_giou": 0.5998998880386353,
|
| 48 |
+
"eval_ciou": 0.6640490293502808
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"val_dataset": "ReasonSeg|val",
|
| 52 |
+
"epoch": 9.0,
|
| 53 |
+
"eval_giou": 0.5920247435569763,
|
| 54 |
+
"eval_ciou": 0.6693744659423828
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"val_dataset": "ReasonSeg|val",
|
| 58 |
+
"epoch": 10.0,
|
| 59 |
+
"eval_giou": 0.6001232266426086,
|
| 60 |
+
"eval_ciou": 0.6858417987823486
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"val_dataset": "ReasonSeg|test",
|
| 64 |
+
"epoch": 10.0,
|
| 65 |
+
"eval_giou": 0.5927180051803589,
|
| 66 |
+
"eval_ciou": 0.6138883233070374
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"val_dataset": "refcoco|unc|val",
|
| 70 |
+
"epoch": 10.0,
|
| 71 |
+
"eval_giou": 0.7834749817848206,
|
| 72 |
+
"eval_ciou": 0.790122926235199
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"val_dataset": "refcoco|unc|testA",
|
| 76 |
+
"epoch": 10.0,
|
| 77 |
+
"eval_giou": 0.8022208213806152,
|
| 78 |
+
"eval_ciou": 0.8086150884628296
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"val_dataset": "refcoco|unc|testB",
|
| 82 |
+
"epoch": 10.0,
|
| 83 |
+
"eval_giou": 0.7566637396812439,
|
| 84 |
+
"eval_ciou": 0.7609479427337646
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"val_dataset": "refcoco+|unc|val",
|
| 88 |
+
"epoch": 10.0,
|
| 89 |
+
"eval_giou": 0.7318623065948486,
|
| 90 |
+
"eval_ciou": 0.7281762361526489
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"val_dataset": "refcoco+|unc|testA",
|
| 94 |
+
"epoch": 10.0,
|
| 95 |
+
"eval_giou": 0.7749318480491638,
|
| 96 |
+
"eval_ciou": 0.7748793363571167
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"val_dataset": "refcoco+|unc|testB",
|
| 100 |
+
"epoch": 10.0,
|
| 101 |
+
"eval_giou": 0.682511568069458,
|
| 102 |
+
"eval_ciou": 0.6719024777412415
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"val_dataset": "refcocog|umd|test",
|
| 106 |
+
"epoch": 10.0,
|
| 107 |
+
"eval_giou": 0.743724524974823,
|
| 108 |
+
"eval_ciou": 0.7526366710662842
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"val_dataset": "refcocog|umd|val",
|
| 112 |
+
"epoch": 10.0,
|
| 113 |
+
"eval_giou": 0.7407757043838501,
|
| 114 |
+
"eval_ciou": 0.7478148341178894
|
| 115 |
+
}
|
| 116 |
+
]
|
lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/events.out.tfevents.1758558655.bask-pg0308u29a.637997.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bb61bef795309a3922462febab004b11ccf980e30eac016b8230fa293a7b7656
|
| 3 |
+
size 212352
|
lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/runs/Sep22_17-30-52_bask-pg0308u29a/events.out.tfevents.1758558731.bask-pg0308u29a.637997.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e87c13cd165f21e6038c79b21806f6123c013c42bd45ef063c0e1671b8751dd7
|
| 3 |
+
size 116399
|
lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/runs/Sep22_17-30-52_bask-pg0308u29a/events.out.tfevents.1758608704.bask-pg0308u29a.637997.2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bf83b859f6d1900c3189c78baebb5ee0e6400bc6d00390754f89d38c552dfb86
|
| 3 |
+
size 1402
|
lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/ckpt_model/config.json
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"InternVL3Self"
|
| 4 |
+
],
|
| 5 |
+
"auto_map": {
|
| 6 |
+
"AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
|
| 7 |
+
"AutoModel": "modeling_internvl_chat.InternVLChatModel",
|
| 8 |
+
"AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
|
| 9 |
+
},
|
| 10 |
+
"downsample_ratio": 0.5,
|
| 11 |
+
"dtype": "bfloat16",
|
| 12 |
+
"dynamic_image_size": true,
|
| 13 |
+
"eos_token_id": 151645,
|
| 14 |
+
"force_image_size": 448,
|
| 15 |
+
"hidden_size": 1536,
|
| 16 |
+
"image_fold": null,
|
| 17 |
+
"llm_config": {
|
| 18 |
+
"_attn_implementation_autoset": true,
|
| 19 |
+
"_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
|
| 20 |
+
"architectures": [
|
| 21 |
+
"Qwen2ForCausalLM"
|
| 22 |
+
],
|
| 23 |
+
"attention_dropout": 0.0,
|
| 24 |
+
"bos_token_id": 151643,
|
| 25 |
+
"dtype": "bfloat16",
|
| 26 |
+
"eos_token_id": 151643,
|
| 27 |
+
"hidden_act": "silu",
|
| 28 |
+
"hidden_size": 1536,
|
| 29 |
+
"initializer_range": 0.02,
|
| 30 |
+
"intermediate_size": 8960,
|
| 31 |
+
"layer_types": [
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention",
|
| 45 |
+
"full_attention",
|
| 46 |
+
"full_attention",
|
| 47 |
+
"full_attention",
|
| 48 |
+
"full_attention",
|
| 49 |
+
"full_attention",
|
| 50 |
+
"full_attention",
|
| 51 |
+
"full_attention",
|
| 52 |
+
"full_attention",
|
| 53 |
+
"full_attention",
|
| 54 |
+
"full_attention",
|
| 55 |
+
"full_attention",
|
| 56 |
+
"full_attention",
|
| 57 |
+
"full_attention",
|
| 58 |
+
"full_attention",
|
| 59 |
+
"full_attention"
|
| 60 |
+
],
|
| 61 |
+
"max_position_embeddings": 32768,
|
| 62 |
+
"max_window_layers": 70,
|
| 63 |
+
"model_type": "qwen2",
|
| 64 |
+
"moe_config": null,
|
| 65 |
+
"num_attention_heads": 12,
|
| 66 |
+
"num_hidden_layers": 28,
|
| 67 |
+
"num_key_value_heads": 2,
|
| 68 |
+
"rms_norm_eps": 1e-06,
|
| 69 |
+
"rope_scaling": {
|
| 70 |
+
"factor": 2.0,
|
| 71 |
+
"rope_type": "dynamic",
|
| 72 |
+
"type": "dynamic"
|
| 73 |
+
},
|
| 74 |
+
"rope_theta": 1000000.0,
|
| 75 |
+
"sliding_window": null,
|
| 76 |
+
"use_bfloat16": true,
|
| 77 |
+
"use_cache": false,
|
| 78 |
+
"use_sliding_window": false,
|
| 79 |
+
"vocab_size": 151676
|
| 80 |
+
},
|
| 81 |
+
"max_dynamic_patch": 12,
|
| 82 |
+
"min_dynamic_patch": 1,
|
| 83 |
+
"model_type": "internvl_chat",
|
| 84 |
+
"output_attentions": false,
|
| 85 |
+
"pad2square": false,
|
| 86 |
+
"pad_token_id": 151643,
|
| 87 |
+
"ps_version": "v2",
|
| 88 |
+
"select_layer": -1,
|
| 89 |
+
"system_message": null,
|
| 90 |
+
"template": "internvl2_5",
|
| 91 |
+
"tie_word_embeddings": false,
|
| 92 |
+
"transformers_version": null,
|
| 93 |
+
"use_backbone_lora": 0,
|
| 94 |
+
"use_llm_lora": 0,
|
| 95 |
+
"use_thumbnail": true,
|
| 96 |
+
"vision_config": {
|
| 97 |
+
"_attn_implementation_autoset": true,
|
| 98 |
+
"_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
|
| 99 |
+
"architectures": [
|
| 100 |
+
"InternVisionModel"
|
| 101 |
+
],
|
| 102 |
+
"attention_dropout": 0.0,
|
| 103 |
+
"auto_map": {
|
| 104 |
+
"AutoConfig": "configuration_intern_vit.InternVisionConfig",
|
| 105 |
+
"AutoModel": "modeling_intern_vit.InternVisionModel"
|
| 106 |
+
},
|
| 107 |
+
"capacity_factor": 1.2,
|
| 108 |
+
"drop_path_rate": 0.1,
|
| 109 |
+
"dropout": 0.0,
|
| 110 |
+
"dtype": "bfloat16",
|
| 111 |
+
"eval_capacity_factor": 1.4,
|
| 112 |
+
"hidden_act": "gelu",
|
| 113 |
+
"hidden_size": 1024,
|
| 114 |
+
"image_size": 448,
|
| 115 |
+
"initializer_factor": 0.1,
|
| 116 |
+
"initializer_range": 1e-10,
|
| 117 |
+
"intermediate_size": 4096,
|
| 118 |
+
"laux_allreduce": "all_nodes",
|
| 119 |
+
"layer_norm_eps": 1e-06,
|
| 120 |
+
"model_type": "intern_vit_6b",
|
| 121 |
+
"moe_coeff_ratio": 0.5,
|
| 122 |
+
"moe_intermediate_size": 768,
|
| 123 |
+
"moe_output_scale": 4.0,
|
| 124 |
+
"noisy_gate_policy": "RSample_before",
|
| 125 |
+
"norm_type": "layer_norm",
|
| 126 |
+
"num_attention_heads": 16,
|
| 127 |
+
"num_channels": 3,
|
| 128 |
+
"num_experts": 8,
|
| 129 |
+
"num_hidden_layers": 24,
|
| 130 |
+
"num_routed_experts": 4,
|
| 131 |
+
"num_shared_experts": 4,
|
| 132 |
+
"patch_size": 14,
|
| 133 |
+
"qk_normalization": false,
|
| 134 |
+
"qkv_bias": true,
|
| 135 |
+
"shared_expert_intermediate_size": 3072,
|
| 136 |
+
"use_bfloat16": true,
|
| 137 |
+
"use_flash_attn": true,
|
| 138 |
+
"use_moe": false,
|
| 139 |
+
"use_residual": true,
|
| 140 |
+
"use_rts": false,
|
| 141 |
+
"use_weighted_residual": false
|
| 142 |
+
}
|
| 143 |
+
}
|
lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/ckpt_model/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ee127087b96ff9811fbd07f160c2648b5879ccbcd7e88fbf1c57578cc6427656
|
| 3 |
+
size 4211070232
|
lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/ckpt_model/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:faf8b9941672f97b3e8473f3e7078f73367e3fad3a4c4b35b69c5a79104328df
|
| 3 |
+
size 7352
|
lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/evaluation_metrics.json
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"val_dataset": "ReasonSeg|val",
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_giou": 0.5294323563575745,
|
| 6 |
+
"eval_ciou": 0.5168058276176453
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"val_dataset": "ReasonSeg|val",
|
| 10 |
+
"epoch": 2.0,
|
| 11 |
+
"eval_giou": 0.5691018104553223,
|
| 12 |
+
"eval_ciou": 0.5466322302818298
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"val_dataset": "ReasonSeg|val",
|
| 16 |
+
"epoch": 3.0,
|
| 17 |
+
"eval_giou": 0.5456892848014832,
|
| 18 |
+
"eval_ciou": 0.6087337732315063
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"val_dataset": "ReasonSeg|val",
|
| 22 |
+
"epoch": 4.0,
|
| 23 |
+
"eval_giou": 0.5649483799934387,
|
| 24 |
+
"eval_ciou": 0.5830232501029968
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"val_dataset": "ReasonSeg|val",
|
| 28 |
+
"epoch": 5.0,
|
| 29 |
+
"eval_giou": 0.5766127109527588,
|
| 30 |
+
"eval_ciou": 0.592596709728241
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"val_dataset": "ReasonSeg|val",
|
| 34 |
+
"epoch": 6.0,
|
| 35 |
+
"eval_giou": 0.5876106023788452,
|
| 36 |
+
"eval_ciou": 0.6196873188018799
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"val_dataset": "ReasonSeg|val",
|
| 40 |
+
"epoch": 7.0,
|
| 41 |
+
"eval_giou": 0.5895294547080994,
|
| 42 |
+
"eval_ciou": 0.5830597281455994
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"val_dataset": "ReasonSeg|val",
|
| 46 |
+
"epoch": 8.0,
|
| 47 |
+
"eval_giou": 0.5922108888626099,
|
| 48 |
+
"eval_ciou": 0.5886086225509644
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"val_dataset": "ReasonSeg|val",
|
| 52 |
+
"epoch": 9.0,
|
| 53 |
+
"eval_giou": 0.6001683473587036,
|
| 54 |
+
"eval_ciou": 0.5857241749763489
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"val_dataset": "ReasonSeg|val",
|
| 58 |
+
"epoch": 10.0,
|
| 59 |
+
"eval_giou": 0.6061425805091858,
|
| 60 |
+
"eval_ciou": 0.6062945127487183
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"val_dataset": "ReasonSeg|test",
|
| 64 |
+
"epoch": 10.0,
|
| 65 |
+
"eval_giou": 0.5916463136672974,
|
| 66 |
+
"eval_ciou": 0.5962467789649963
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"val_dataset": "refcoco|unc|val",
|
| 70 |
+
"epoch": 10.0,
|
| 71 |
+
"eval_giou": 0.7914398908615112,
|
| 72 |
+
"eval_ciou": 0.7944017052650452
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"val_dataset": "refcoco|unc|testA",
|
| 76 |
+
"epoch": 10.0,
|
| 77 |
+
"eval_giou": 0.8115019202232361,
|
| 78 |
+
"eval_ciou": 0.8148282766342163
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"val_dataset": "refcoco|unc|testB",
|
| 82 |
+
"epoch": 10.0,
|
| 83 |
+
"eval_giou": 0.7657762169837952,
|
| 84 |
+
"eval_ciou": 0.7644218802452087
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"val_dataset": "refcoco+|unc|val",
|
| 88 |
+
"epoch": 10.0,
|
| 89 |
+
"eval_giou": 0.7408427000045776,
|
| 90 |
+
"eval_ciou": 0.7323743104934692
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"val_dataset": "refcoco+|unc|testA",
|
| 94 |
+
"epoch": 10.0,
|
| 95 |
+
"eval_giou": 0.7806029319763184,
|
| 96 |
+
"eval_ciou": 0.7790927886962891
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"val_dataset": "refcoco+|unc|testB",
|
| 100 |
+
"epoch": 10.0,
|
| 101 |
+
"eval_giou": 0.6951540112495422,
|
| 102 |
+
"eval_ciou": 0.683707058429718
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"val_dataset": "refcocog|umd|test",
|
| 106 |
+
"epoch": 10.0,
|
| 107 |
+
"eval_giou": 0.7511024475097656,
|
| 108 |
+
"eval_ciou": 0.7564254403114319
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"val_dataset": "refcocog|umd|val",
|
| 112 |
+
"epoch": 10.0,
|
| 113 |
+
"eval_giou": 0.7492029070854187,
|
| 114 |
+
"eval_ciou": 0.7516255378723145
|
| 115 |
+
}
|
| 116 |
+
]
|
lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/events.out.tfevents.1758821654.bask-pg0308u18a.998112.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f0f6402efa20cb4eb1c3dcaf9815e71b1964b12d1903fcd9a8cfc7ba8fc924f
|
| 3 |
+
size 486
|
lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/events.out.tfevents.1758821976.bask-pg0308u18a.1004472.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b21a9269fe7a0131c65a9b63b6caa674fe4f8b73071b6caa68cc19ce4724062
|
| 3 |
+
size 88
|
lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/events.out.tfevents.1758822179.bask-pg0308u18a.1008370.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:28cd9c79f6c1e5c37360f0ca87b6fa4ff89de989a108bdfeadb937195000c169
|
| 3 |
+
size 212352
|
lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/runs/Sep25_18-34-12_bask-pg0308u18a/events.out.tfevents.1758821725.bask-pg0308u18a.998112.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb59ce061f378068b22c6735f8430ed9ace878dcac3b831594603aaa1bb107a1
|
| 3 |
+
size 9338
|
lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/runs/Sep25_18-39-34_bask-pg0308u18a/events.out.tfevents.1758822035.bask-pg0308u18a.1004472.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f2050a14b591ad7960da8a1c578624e5fcadc630a64886750db14457a302231c
|
| 3 |
+
size 9131
|
lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/runs/Sep25_18-42-56_bask-pg0308u18a/events.out.tfevents.1758822240.bask-pg0308u18a.1008370.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dbc263fde5f12193454e49ef64849b95edfdbe9d84d7748d1bda1f0c04506652
|
| 3 |
+
size 116397
|
lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/runs/Sep25_18-42-56_bask-pg0308u18a/events.out.tfevents.1758869487.bask-pg0308u18a.1008370.2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9d1915b5faf4072182ed92a81dc1182e464a0d20fc50e6147b785c025d972e0f
|
| 3 |
+
size 1402
|
lisa-ivl3-2b_bi2cbe_vlorati_coco/ckpt_model/config.json
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"InternVL3Self"
|
| 4 |
+
],
|
| 5 |
+
"auto_map": {
|
| 6 |
+
"AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
|
| 7 |
+
"AutoModel": "modeling_internvl_chat.InternVLChatModel",
|
| 8 |
+
"AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
|
| 9 |
+
},
|
| 10 |
+
"downsample_ratio": 0.5,
|
| 11 |
+
"dtype": "bfloat16",
|
| 12 |
+
"dynamic_image_size": true,
|
| 13 |
+
"eos_token_id": 151645,
|
| 14 |
+
"force_image_size": 448,
|
| 15 |
+
"hidden_size": 1536,
|
| 16 |
+
"image_fold": null,
|
| 17 |
+
"llm_config": {
|
| 18 |
+
"_attn_implementation_autoset": true,
|
| 19 |
+
"_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
|
| 20 |
+
"architectures": [
|
| 21 |
+
"Qwen2ForCausalLM"
|
| 22 |
+
],
|
| 23 |
+
"attention_dropout": 0.0,
|
| 24 |
+
"bos_token_id": 151643,
|
| 25 |
+
"dtype": "bfloat16",
|
| 26 |
+
"eos_token_id": 151643,
|
| 27 |
+
"hidden_act": "silu",
|
| 28 |
+
"hidden_size": 1536,
|
| 29 |
+
"initializer_range": 0.02,
|
| 30 |
+
"intermediate_size": 8960,
|
| 31 |
+
"layer_types": [
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention",
|
| 45 |
+
"full_attention",
|
| 46 |
+
"full_attention",
|
| 47 |
+
"full_attention",
|
| 48 |
+
"full_attention",
|
| 49 |
+
"full_attention",
|
| 50 |
+
"full_attention",
|
| 51 |
+
"full_attention",
|
| 52 |
+
"full_attention",
|
| 53 |
+
"full_attention",
|
| 54 |
+
"full_attention",
|
| 55 |
+
"full_attention",
|
| 56 |
+
"full_attention",
|
| 57 |
+
"full_attention",
|
| 58 |
+
"full_attention",
|
| 59 |
+
"full_attention"
|
| 60 |
+
],
|
| 61 |
+
"max_position_embeddings": 32768,
|
| 62 |
+
"max_window_layers": 70,
|
| 63 |
+
"model_type": "qwen2",
|
| 64 |
+
"moe_config": null,
|
| 65 |
+
"num_attention_heads": 12,
|
| 66 |
+
"num_hidden_layers": 28,
|
| 67 |
+
"num_key_value_heads": 2,
|
| 68 |
+
"rms_norm_eps": 1e-06,
|
| 69 |
+
"rope_scaling": {
|
| 70 |
+
"factor": 2.0,
|
| 71 |
+
"rope_type": "dynamic",
|
| 72 |
+
"type": "dynamic"
|
| 73 |
+
},
|
| 74 |
+
"rope_theta": 1000000.0,
|
| 75 |
+
"sliding_window": null,
|
| 76 |
+
"use_bfloat16": true,
|
| 77 |
+
"use_cache": false,
|
| 78 |
+
"use_sliding_window": false,
|
| 79 |
+
"vocab_size": 151676
|
| 80 |
+
},
|
| 81 |
+
"max_dynamic_patch": 12,
|
| 82 |
+
"min_dynamic_patch": 1,
|
| 83 |
+
"model_type": "internvl_chat",
|
| 84 |
+
"output_attentions": false,
|
| 85 |
+
"pad2square": false,
|
| 86 |
+
"pad_token_id": 151643,
|
| 87 |
+
"ps_version": "v2",
|
| 88 |
+
"select_layer": -1,
|
| 89 |
+
"system_message": null,
|
| 90 |
+
"template": "internvl2_5",
|
| 91 |
+
"tie_word_embeddings": false,
|
| 92 |
+
"transformers_version": null,
|
| 93 |
+
"use_backbone_lora": 0,
|
| 94 |
+
"use_llm_lora": 0,
|
| 95 |
+
"use_thumbnail": true,
|
| 96 |
+
"vision_config": {
|
| 97 |
+
"_attn_implementation_autoset": true,
|
| 98 |
+
"_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
|
| 99 |
+
"architectures": [
|
| 100 |
+
"InternVisionModel"
|
| 101 |
+
],
|
| 102 |
+
"attention_dropout": 0.0,
|
| 103 |
+
"auto_map": {
|
| 104 |
+
"AutoConfig": "configuration_intern_vit.InternVisionConfig",
|
| 105 |
+
"AutoModel": "modeling_intern_vit.InternVisionModel"
|
| 106 |
+
},
|
| 107 |
+
"capacity_factor": 1.2,
|
| 108 |
+
"drop_path_rate": 0.1,
|
| 109 |
+
"dropout": 0.0,
|
| 110 |
+
"dtype": "bfloat16",
|
| 111 |
+
"eval_capacity_factor": 1.4,
|
| 112 |
+
"hidden_act": "gelu",
|
| 113 |
+
"hidden_size": 1024,
|
| 114 |
+
"image_size": 448,
|
| 115 |
+
"initializer_factor": 0.1,
|
| 116 |
+
"initializer_range": 1e-10,
|
| 117 |
+
"intermediate_size": 4096,
|
| 118 |
+
"laux_allreduce": "all_nodes",
|
| 119 |
+
"layer_norm_eps": 1e-06,
|
| 120 |
+
"model_type": "intern_vit_6b",
|
| 121 |
+
"moe_coeff_ratio": 0.5,
|
| 122 |
+
"moe_intermediate_size": 768,
|
| 123 |
+
"moe_output_scale": 4.0,
|
| 124 |
+
"noisy_gate_policy": "RSample_before",
|
| 125 |
+
"norm_type": "layer_norm",
|
| 126 |
+
"num_attention_heads": 16,
|
| 127 |
+
"num_channels": 3,
|
| 128 |
+
"num_experts": 8,
|
| 129 |
+
"num_hidden_layers": 24,
|
| 130 |
+
"num_routed_experts": 4,
|
| 131 |
+
"num_shared_experts": 4,
|
| 132 |
+
"patch_size": 14,
|
| 133 |
+
"qk_normalization": false,
|
| 134 |
+
"qkv_bias": true,
|
| 135 |
+
"shared_expert_intermediate_size": 3072,
|
| 136 |
+
"use_bfloat16": true,
|
| 137 |
+
"use_flash_attn": true,
|
| 138 |
+
"use_moe": false,
|
| 139 |
+
"use_residual": true,
|
| 140 |
+
"use_rts": false,
|
| 141 |
+
"use_weighted_residual": false
|
| 142 |
+
}
|
| 143 |
+
}
|
lisa-ivl3-2b_bi2cbe_vlorati_coco/ckpt_model/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c75bb36d150d76c1aded195dfe0969ac1e5f7b51708e3253890664b3306fe7b6
|
| 3 |
+
size 4211070232
|
lisa-ivl3-2b_bi2cbe_vlorati_coco/ckpt_model/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:586d01e0f868c297c5556fb5760bb5f53403f37d00934248a1f70708c2bbdc4d
|
| 3 |
+
size 7352
|
lisa-ivl3-2b_bi2cbe_vlorati_coco/evaluation_metrics.json
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"val_dataset": "ReasonSeg|val",
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_giou": 0.5369102954864502,
|
| 6 |
+
"eval_ciou": 0.5064759254455566
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"val_dataset": "ReasonSeg|val",
|
| 10 |
+
"epoch": 2.0,
|
| 11 |
+
"eval_giou": 0.5686467289924622,
|
| 12 |
+
"eval_ciou": 0.611318051815033
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"val_dataset": "ReasonSeg|val",
|
| 16 |
+
"epoch": 3.0,
|
| 17 |
+
"eval_giou": 0.5613127946853638,
|
| 18 |
+
"eval_ciou": 0.6206056475639343
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"val_dataset": "ReasonSeg|val",
|
| 22 |
+
"epoch": 4.0,
|
| 23 |
+
"eval_giou": 0.5933331847190857,
|
| 24 |
+
"eval_ciou": 0.6126891374588013
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"val_dataset": "ReasonSeg|val",
|
| 28 |
+
"epoch": 5.0,
|
| 29 |
+
"eval_giou": 0.6065125465393066,
|
| 30 |
+
"eval_ciou": 0.6544414162635803
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"val_dataset": "ReasonSeg|val",
|
| 34 |
+
"epoch": 6.0,
|
| 35 |
+
"eval_giou": 0.5995581150054932,
|
| 36 |
+
"eval_ciou": 0.6379423141479492
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"val_dataset": "ReasonSeg|val",
|
| 40 |
+
"epoch": 7.0,
|
| 41 |
+
"eval_giou": 0.6159911155700684,
|
| 42 |
+
"eval_ciou": 0.621420681476593
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"val_dataset": "ReasonSeg|val",
|
| 46 |
+
"epoch": 8.0,
|
| 47 |
+
"eval_giou": 0.6243378520011902,
|
| 48 |
+
"eval_ciou": 0.6523417234420776
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"val_dataset": "ReasonSeg|val",
|
| 52 |
+
"epoch": 9.0,
|
| 53 |
+
"eval_giou": 0.6166976690292358,
|
| 54 |
+
"eval_ciou": 0.6346321702003479
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"val_dataset": "ReasonSeg|val",
|
| 58 |
+
"epoch": 10.0,
|
| 59 |
+
"eval_giou": 0.6171593070030212,
|
| 60 |
+
"eval_ciou": 0.6407290697097778
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"val_dataset": "ReasonSeg|test",
|
| 64 |
+
"epoch": 10.0,
|
| 65 |
+
"eval_giou": 0.5836987495422363,
|
| 66 |
+
"eval_ciou": 0.6126533150672913
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"val_dataset": "refcoco|unc|val",
|
| 70 |
+
"epoch": 10.0,
|
| 71 |
+
"eval_giou": 0.7852296233177185,
|
| 72 |
+
"eval_ciou": 0.7870670557022095
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"val_dataset": "refcoco|unc|testA",
|
| 76 |
+
"epoch": 10.0,
|
| 77 |
+
"eval_giou": 0.8046602010726929,
|
| 78 |
+
"eval_ciou": 0.8073185086250305
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"val_dataset": "refcoco|unc|testB",
|
| 82 |
+
"epoch": 10.0,
|
| 83 |
+
"eval_giou": 0.7577531337738037,
|
| 84 |
+
"eval_ciou": 0.7593867778778076
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"val_dataset": "refcoco+|unc|val",
|
| 88 |
+
"epoch": 10.0,
|
| 89 |
+
"eval_giou": 0.7351171374320984,
|
| 90 |
+
"eval_ciou": 0.7267892956733704
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"val_dataset": "refcoco+|unc|testA",
|
| 94 |
+
"epoch": 10.0,
|
| 95 |
+
"eval_giou": 0.7731851935386658,
|
| 96 |
+
"eval_ciou": 0.7728055119514465
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"val_dataset": "refcoco+|unc|testB",
|
| 100 |
+
"epoch": 10.0,
|
| 101 |
+
"eval_giou": 0.6876177191734314,
|
| 102 |
+
"eval_ciou": 0.6751795411109924
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"val_dataset": "refcocog|umd|test",
|
| 106 |
+
"epoch": 10.0,
|
| 107 |
+
"eval_giou": 0.7494315505027771,
|
| 108 |
+
"eval_ciou": 0.7570431232452393
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"val_dataset": "refcocog|umd|val",
|
| 112 |
+
"epoch": 10.0,
|
| 113 |
+
"eval_giou": 0.7449917197227478,
|
| 114 |
+
"eval_ciou": 0.750389039516449
|
| 115 |
+
}
|
| 116 |
+
]
|
lisa-ivl3-2b_bi2cbe_vlorati_coco/events.out.tfevents.1758236140.bask-pg0309u16a.1220041.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a673a2115871fc802ba01b020cb9f8f61e30b9e9406eedf604c1d500fb25c686
|
| 3 |
+
size 212352
|
lisa-ivl3-2b_bi2cbe_vlorati_coco/runs/Sep18_23-55-37_bask-pg0309u16a/events.out.tfevents.1758236201.bask-pg0309u16a.1220041.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04a7180c0676429a3801a6fe71e0ec596fc2cff90bfd8a557828056eb13598e1
|
| 3 |
+
size 116395
|
lisa-ivl3-2b_bi2cbe_vlorati_coco/runs/Sep18_23-55-37_bask-pg0309u16a/events.out.tfevents.1758283609.bask-pg0309u16a.1220041.2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ad3a469cb6b9847f58a453fce555b48ae34226582d16eaa962b62daec1fe25d6
|
| 3 |
+
size 1402
|
lisa-ivl3-2b_bi2cbe_vlorati_sr/ckpt_model/config.json
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"InternVL3Self"
|
| 4 |
+
],
|
| 5 |
+
"auto_map": {
|
| 6 |
+
"AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
|
| 7 |
+
"AutoModel": "modeling_internvl_chat.InternVLChatModel",
|
| 8 |
+
"AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
|
| 9 |
+
},
|
| 10 |
+
"downsample_ratio": 0.5,
|
| 11 |
+
"dtype": "bfloat16",
|
| 12 |
+
"dynamic_image_size": true,
|
| 13 |
+
"eos_token_id": 151645,
|
| 14 |
+
"force_image_size": 448,
|
| 15 |
+
"hidden_size": 1536,
|
| 16 |
+
"image_fold": null,
|
| 17 |
+
"llm_config": {
|
| 18 |
+
"_attn_implementation_autoset": true,
|
| 19 |
+
"_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
|
| 20 |
+
"architectures": [
|
| 21 |
+
"Qwen2ForCausalLM"
|
| 22 |
+
],
|
| 23 |
+
"attention_dropout": 0.0,
|
| 24 |
+
"bos_token_id": 151643,
|
| 25 |
+
"dtype": "bfloat16",
|
| 26 |
+
"eos_token_id": 151643,
|
| 27 |
+
"hidden_act": "silu",
|
| 28 |
+
"hidden_size": 1536,
|
| 29 |
+
"initializer_range": 0.02,
|
| 30 |
+
"intermediate_size": 8960,
|
| 31 |
+
"layer_types": [
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention",
|
| 45 |
+
"full_attention",
|
| 46 |
+
"full_attention",
|
| 47 |
+
"full_attention",
|
| 48 |
+
"full_attention",
|
| 49 |
+
"full_attention",
|
| 50 |
+
"full_attention",
|
| 51 |
+
"full_attention",
|
| 52 |
+
"full_attention",
|
| 53 |
+
"full_attention",
|
| 54 |
+
"full_attention",
|
| 55 |
+
"full_attention",
|
| 56 |
+
"full_attention",
|
| 57 |
+
"full_attention",
|
| 58 |
+
"full_attention",
|
| 59 |
+
"full_attention"
|
| 60 |
+
],
|
| 61 |
+
"max_position_embeddings": 32768,
|
| 62 |
+
"max_window_layers": 70,
|
| 63 |
+
"model_type": "qwen2",
|
| 64 |
+
"moe_config": null,
|
| 65 |
+
"num_attention_heads": 12,
|
| 66 |
+
"num_hidden_layers": 28,
|
| 67 |
+
"num_key_value_heads": 2,
|
| 68 |
+
"rms_norm_eps": 1e-06,
|
| 69 |
+
"rope_scaling": {
|
| 70 |
+
"factor": 2.0,
|
| 71 |
+
"rope_type": "dynamic",
|
| 72 |
+
"type": "dynamic"
|
| 73 |
+
},
|
| 74 |
+
"rope_theta": 1000000.0,
|
| 75 |
+
"sliding_window": null,
|
| 76 |
+
"use_bfloat16": true,
|
| 77 |
+
"use_cache": false,
|
| 78 |
+
"use_sliding_window": false,
|
| 79 |
+
"vocab_size": 151676
|
| 80 |
+
},
|
| 81 |
+
"max_dynamic_patch": 12,
|
| 82 |
+
"min_dynamic_patch": 1,
|
| 83 |
+
"model_type": "internvl_chat",
|
| 84 |
+
"output_attentions": false,
|
| 85 |
+
"pad2square": false,
|
| 86 |
+
"pad_token_id": 151643,
|
| 87 |
+
"ps_version": "v2",
|
| 88 |
+
"select_layer": -1,
|
| 89 |
+
"system_message": null,
|
| 90 |
+
"template": "internvl2_5",
|
| 91 |
+
"tie_word_embeddings": false,
|
| 92 |
+
"transformers_version": null,
|
| 93 |
+
"use_backbone_lora": 0,
|
| 94 |
+
"use_llm_lora": 0,
|
| 95 |
+
"use_thumbnail": true,
|
| 96 |
+
"vision_config": {
|
| 97 |
+
"_attn_implementation_autoset": true,
|
| 98 |
+
"_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
|
| 99 |
+
"architectures": [
|
| 100 |
+
"InternVisionModel"
|
| 101 |
+
],
|
| 102 |
+
"attention_dropout": 0.0,
|
| 103 |
+
"auto_map": {
|
| 104 |
+
"AutoConfig": "configuration_intern_vit.InternVisionConfig",
|
| 105 |
+
"AutoModel": "modeling_intern_vit.InternVisionModel"
|
| 106 |
+
},
|
| 107 |
+
"capacity_factor": 1.2,
|
| 108 |
+
"drop_path_rate": 0.1,
|
| 109 |
+
"dropout": 0.0,
|
| 110 |
+
"dtype": "bfloat16",
|
| 111 |
+
"eval_capacity_factor": 1.4,
|
| 112 |
+
"hidden_act": "gelu",
|
| 113 |
+
"hidden_size": 1024,
|
| 114 |
+
"image_size": 448,
|
| 115 |
+
"initializer_factor": 0.1,
|
| 116 |
+
"initializer_range": 1e-10,
|
| 117 |
+
"intermediate_size": 4096,
|
| 118 |
+
"laux_allreduce": "all_nodes",
|
| 119 |
+
"layer_norm_eps": 1e-06,
|
| 120 |
+
"model_type": "intern_vit_6b",
|
| 121 |
+
"moe_coeff_ratio": 0.5,
|
| 122 |
+
"moe_intermediate_size": 768,
|
| 123 |
+
"moe_output_scale": 4.0,
|
| 124 |
+
"noisy_gate_policy": "RSample_before",
|
| 125 |
+
"norm_type": "layer_norm",
|
| 126 |
+
"num_attention_heads": 16,
|
| 127 |
+
"num_channels": 3,
|
| 128 |
+
"num_experts": 8,
|
| 129 |
+
"num_hidden_layers": 24,
|
| 130 |
+
"num_routed_experts": 4,
|
| 131 |
+
"num_shared_experts": 4,
|
| 132 |
+
"patch_size": 14,
|
| 133 |
+
"qk_normalization": false,
|
| 134 |
+
"qkv_bias": true,
|
| 135 |
+
"shared_expert_intermediate_size": 3072,
|
| 136 |
+
"use_bfloat16": true,
|
| 137 |
+
"use_flash_attn": true,
|
| 138 |
+
"use_moe": false,
|
| 139 |
+
"use_residual": true,
|
| 140 |
+
"use_rts": false,
|
| 141 |
+
"use_weighted_residual": false
|
| 142 |
+
}
|
| 143 |
+
}
|