UltraDoughnut commited on
Commit
c206437
·
verified ·
1 Parent(s): 81b77f6

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. lisa-ivl3-2b_bi2cbe_aati_srm/ckpt_model/config.json +143 -0
  2. lisa-ivl3-2b_bi2cbe_aati_srm/ckpt_model/model.safetensors +3 -0
  3. lisa-ivl3-2b_bi2cbe_aati_srm/ckpt_model/training_args.bin +3 -0
  4. lisa-ivl3-2b_bi2cbe_aati_srm/evaluation_metrics.json +116 -0
  5. lisa-ivl3-2b_bi2cbe_aati_srm/events.out.tfevents.1758303972.bask-pg0308u25a.2176098.0 +3 -0
  6. lisa-ivl3-2b_bi2cbe_aati_srm/events.out.tfevents.1758304185.bask-pg0308u25a.2184107.0 +3 -0
  7. lisa-ivl3-2b_bi2cbe_aati_srm/runs/Sep19_18-46-08_bask-pg0308u25a/events.out.tfevents.1758304037.bask-pg0308u25a.2176098.1 +3 -0
  8. lisa-ivl3-2b_bi2cbe_aati_srm/runs/Sep19_18-49-42_bask-pg0308u25a/events.out.tfevents.1758304237.bask-pg0308u25a.2184107.1 +3 -0
  9. lisa-ivl3-2b_bi2cbe_aati_srm/runs/Sep19_18-49-42_bask-pg0308u25a/events.out.tfevents.1758339214.bask-pg0308u25a.2184107.2 +3 -0
  10. lisa-ivl3-2b_bi2cbe_aati_srs/ckpt_model/config.json +143 -0
  11. lisa-ivl3-2b_bi2cbe_aati_srs/ckpt_model/model.safetensors +3 -0
  12. lisa-ivl3-2b_bi2cbe_aati_srs/ckpt_model/training_args.bin +3 -0
  13. lisa-ivl3-2b_bi2cbe_aati_srs/evaluation_metrics.json +116 -0
  14. lisa-ivl3-2b_bi2cbe_aati_srs/events.out.tfevents.1758407218.bask-pg0308u25a.3988218.0 +3 -0
  15. lisa-ivl3-2b_bi2cbe_aati_srs/runs/Sep20_23-26-55_bask-pg0308u25a/events.out.tfevents.1758407268.bask-pg0308u25a.3988218.1 +3 -0
  16. lisa-ivl3-2b_bi2cbe_aati_srs/runs/Sep20_23-26-55_bask-pg0308u25a/events.out.tfevents.1758441983.bask-pg0308u25a.3988218.2 +3 -0
  17. lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/ckpt_model/config.json +143 -0
  18. lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/ckpt_model/model.safetensors +3 -0
  19. lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/ckpt_model/training_args.bin +3 -0
  20. lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/evaluation_metrics.json +116 -0
  21. lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/events.out.tfevents.1758822942.bask-pg0308u25a.3977024.0 +3 -0
  22. lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/events.out.tfevents.1758823030.bask-pg0308u25a.3998347.0 +3 -0
  23. lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/runs/Sep25_18-57-07_bask-pg0308u25a/events.out.tfevents.1758823107.bask-pg0308u25a.3998347.1 +3 -0
  24. lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/runs/Sep25_18-57-07_bask-pg0308u25a/events.out.tfevents.1758870402.bask-pg0308u25a.3998347.2 +3 -0
  25. lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/ckpt_model/config.json +143 -0
  26. lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/ckpt_model/model.safetensors +3 -0
  27. lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/ckpt_model/training_args.bin +3 -0
  28. lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/evaluation_metrics.json +116 -0
  29. lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/events.out.tfevents.1758558655.bask-pg0308u29a.637997.0 +3 -0
  30. lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/runs/Sep22_17-30-52_bask-pg0308u29a/events.out.tfevents.1758558731.bask-pg0308u29a.637997.1 +3 -0
  31. lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/runs/Sep22_17-30-52_bask-pg0308u29a/events.out.tfevents.1758608704.bask-pg0308u29a.637997.2 +3 -0
  32. lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/ckpt_model/config.json +143 -0
  33. lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/ckpt_model/model.safetensors +3 -0
  34. lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/ckpt_model/training_args.bin +3 -0
  35. lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/evaluation_metrics.json +116 -0
  36. lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/events.out.tfevents.1758821654.bask-pg0308u18a.998112.0 +3 -0
  37. lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/events.out.tfevents.1758821976.bask-pg0308u18a.1004472.0 +3 -0
  38. lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/events.out.tfevents.1758822179.bask-pg0308u18a.1008370.0 +3 -0
  39. lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/runs/Sep25_18-34-12_bask-pg0308u18a/events.out.tfevents.1758821725.bask-pg0308u18a.998112.1 +3 -0
  40. lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/runs/Sep25_18-39-34_bask-pg0308u18a/events.out.tfevents.1758822035.bask-pg0308u18a.1004472.1 +3 -0
  41. lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/runs/Sep25_18-42-56_bask-pg0308u18a/events.out.tfevents.1758822240.bask-pg0308u18a.1008370.1 +3 -0
  42. lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/runs/Sep25_18-42-56_bask-pg0308u18a/events.out.tfevents.1758869487.bask-pg0308u18a.1008370.2 +3 -0
  43. lisa-ivl3-2b_bi2cbe_vlorati_coco/ckpt_model/config.json +143 -0
  44. lisa-ivl3-2b_bi2cbe_vlorati_coco/ckpt_model/model.safetensors +3 -0
  45. lisa-ivl3-2b_bi2cbe_vlorati_coco/ckpt_model/training_args.bin +3 -0
  46. lisa-ivl3-2b_bi2cbe_vlorati_coco/evaluation_metrics.json +116 -0
  47. lisa-ivl3-2b_bi2cbe_vlorati_coco/events.out.tfevents.1758236140.bask-pg0309u16a.1220041.0 +3 -0
  48. lisa-ivl3-2b_bi2cbe_vlorati_coco/runs/Sep18_23-55-37_bask-pg0309u16a/events.out.tfevents.1758236201.bask-pg0309u16a.1220041.1 +3 -0
  49. lisa-ivl3-2b_bi2cbe_vlorati_coco/runs/Sep18_23-55-37_bask-pg0309u16a/events.out.tfevents.1758283609.bask-pg0309u16a.1220041.2 +3 -0
  50. lisa-ivl3-2b_bi2cbe_vlorati_sr/ckpt_model/config.json +143 -0
lisa-ivl3-2b_bi2cbe_aati_srm/ckpt_model/config.json ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "InternVL3Self"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
7
+ "AutoModel": "modeling_internvl_chat.InternVLChatModel",
8
+ "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
9
+ },
10
+ "downsample_ratio": 0.5,
11
+ "dtype": "bfloat16",
12
+ "dynamic_image_size": true,
13
+ "eos_token_id": 151645,
14
+ "force_image_size": 448,
15
+ "hidden_size": 1536,
16
+ "image_fold": null,
17
+ "llm_config": {
18
+ "_attn_implementation_autoset": true,
19
+ "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
20
+ "architectures": [
21
+ "Qwen2ForCausalLM"
22
+ ],
23
+ "attention_dropout": 0.0,
24
+ "bos_token_id": 151643,
25
+ "dtype": "bfloat16",
26
+ "eos_token_id": 151643,
27
+ "hidden_act": "silu",
28
+ "hidden_size": 1536,
29
+ "initializer_range": 0.02,
30
+ "intermediate_size": 8960,
31
+ "layer_types": [
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention",
53
+ "full_attention",
54
+ "full_attention",
55
+ "full_attention",
56
+ "full_attention",
57
+ "full_attention",
58
+ "full_attention",
59
+ "full_attention"
60
+ ],
61
+ "max_position_embeddings": 32768,
62
+ "max_window_layers": 70,
63
+ "model_type": "qwen2",
64
+ "moe_config": null,
65
+ "num_attention_heads": 12,
66
+ "num_hidden_layers": 28,
67
+ "num_key_value_heads": 2,
68
+ "rms_norm_eps": 1e-06,
69
+ "rope_scaling": {
70
+ "factor": 2.0,
71
+ "rope_type": "dynamic",
72
+ "type": "dynamic"
73
+ },
74
+ "rope_theta": 1000000.0,
75
+ "sliding_window": null,
76
+ "use_bfloat16": true,
77
+ "use_cache": false,
78
+ "use_sliding_window": false,
79
+ "vocab_size": 151676
80
+ },
81
+ "max_dynamic_patch": 12,
82
+ "min_dynamic_patch": 1,
83
+ "model_type": "internvl_chat",
84
+ "output_attentions": false,
85
+ "pad2square": false,
86
+ "pad_token_id": 151643,
87
+ "ps_version": "v2",
88
+ "select_layer": -1,
89
+ "system_message": null,
90
+ "template": "internvl2_5",
91
+ "tie_word_embeddings": false,
92
+ "transformers_version": null,
93
+ "use_backbone_lora": 0,
94
+ "use_llm_lora": 0,
95
+ "use_thumbnail": true,
96
+ "vision_config": {
97
+ "_attn_implementation_autoset": true,
98
+ "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
99
+ "architectures": [
100
+ "InternVisionModel"
101
+ ],
102
+ "attention_dropout": 0.0,
103
+ "auto_map": {
104
+ "AutoConfig": "configuration_intern_vit.InternVisionConfig",
105
+ "AutoModel": "modeling_intern_vit.InternVisionModel"
106
+ },
107
+ "capacity_factor": 1.2,
108
+ "drop_path_rate": 0.1,
109
+ "dropout": 0.0,
110
+ "dtype": "bfloat16",
111
+ "eval_capacity_factor": 1.4,
112
+ "hidden_act": "gelu",
113
+ "hidden_size": 1024,
114
+ "image_size": 448,
115
+ "initializer_factor": 0.1,
116
+ "initializer_range": 1e-10,
117
+ "intermediate_size": 4096,
118
+ "laux_allreduce": "all_nodes",
119
+ "layer_norm_eps": 1e-06,
120
+ "model_type": "intern_vit_6b",
121
+ "moe_coeff_ratio": 0.5,
122
+ "moe_intermediate_size": 768,
123
+ "moe_output_scale": 4.0,
124
+ "noisy_gate_policy": "RSample_before",
125
+ "norm_type": "layer_norm",
126
+ "num_attention_heads": 16,
127
+ "num_channels": 3,
128
+ "num_experts": 8,
129
+ "num_hidden_layers": 24,
130
+ "num_routed_experts": 4,
131
+ "num_shared_experts": 4,
132
+ "patch_size": 14,
133
+ "qk_normalization": false,
134
+ "qkv_bias": true,
135
+ "shared_expert_intermediate_size": 3072,
136
+ "use_bfloat16": true,
137
+ "use_flash_attn": true,
138
+ "use_moe": false,
139
+ "use_residual": true,
140
+ "use_rts": false,
141
+ "use_weighted_residual": false
142
+ }
143
+ }
lisa-ivl3-2b_bi2cbe_aati_srm/ckpt_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f3befb636a4bfdc7f407e8ec01f5b84a2869952dea6106070ab00f7d6b760ef
3
+ size 4211070232
lisa-ivl3-2b_bi2cbe_aati_srm/ckpt_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:240c4be74f4b416addef8e472ffd17a8f0a9206792bbe14eb3617a6736ec132e
3
+ size 7352
lisa-ivl3-2b_bi2cbe_aati_srm/evaluation_metrics.json ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "val_dataset": "ReasonSeg|val",
4
+ "epoch": 1.0,
5
+ "eval_giou": 0.528189480304718,
6
+ "eval_ciou": 0.6158800721168518
7
+ },
8
+ {
9
+ "val_dataset": "ReasonSeg|val",
10
+ "epoch": 2.0,
11
+ "eval_giou": 0.57504802942276,
12
+ "eval_ciou": 0.6519048810005188
13
+ },
14
+ {
15
+ "val_dataset": "ReasonSeg|val",
16
+ "epoch": 3.0,
17
+ "eval_giou": 0.5494521856307983,
18
+ "eval_ciou": 0.6140078902244568
19
+ },
20
+ {
21
+ "val_dataset": "ReasonSeg|val",
22
+ "epoch": 4.0,
23
+ "eval_giou": 0.5751751065254211,
24
+ "eval_ciou": 0.6430273652076721
25
+ },
26
+ {
27
+ "val_dataset": "ReasonSeg|val",
28
+ "epoch": 5.0,
29
+ "eval_giou": 0.5621751546859741,
30
+ "eval_ciou": 0.6091215014457703
31
+ },
32
+ {
33
+ "val_dataset": "ReasonSeg|val",
34
+ "epoch": 6.0,
35
+ "eval_giou": 0.5681710243225098,
36
+ "eval_ciou": 0.5827724933624268
37
+ },
38
+ {
39
+ "val_dataset": "ReasonSeg|val",
40
+ "epoch": 7.0,
41
+ "eval_giou": 0.5692390203475952,
42
+ "eval_ciou": 0.5854980945587158
43
+ },
44
+ {
45
+ "val_dataset": "ReasonSeg|val",
46
+ "epoch": 8.0,
47
+ "eval_giou": 0.5853511095046997,
48
+ "eval_ciou": 0.5549483895301819
49
+ },
50
+ {
51
+ "val_dataset": "ReasonSeg|val",
52
+ "epoch": 9.0,
53
+ "eval_giou": 0.5778804421424866,
54
+ "eval_ciou": 0.5894233584403992
55
+ },
56
+ {
57
+ "val_dataset": "ReasonSeg|val",
58
+ "epoch": 10.0,
59
+ "eval_giou": 0.5841119885444641,
60
+ "eval_ciou": 0.5798441171646118
61
+ },
62
+ {
63
+ "val_dataset": "ReasonSeg|test",
64
+ "epoch": 10.0,
65
+ "eval_giou": 0.6100905537605286,
66
+ "eval_ciou": 0.6119125485420227
67
+ },
68
+ {
69
+ "val_dataset": "refcoco|unc|val",
70
+ "epoch": 10.0,
71
+ "eval_giou": 0.799595832824707,
72
+ "eval_ciou": 0.8027365207672119
73
+ },
74
+ {
75
+ "val_dataset": "refcoco|unc|testA",
76
+ "epoch": 10.0,
77
+ "eval_giou": 0.8168815970420837,
78
+ "eval_ciou": 0.8243600130081177
79
+ },
80
+ {
81
+ "val_dataset": "refcoco|unc|testB",
82
+ "epoch": 10.0,
83
+ "eval_giou": 0.7745703458786011,
84
+ "eval_ciou": 0.7807985544204712
85
+ },
86
+ {
87
+ "val_dataset": "refcoco+|unc|val",
88
+ "epoch": 10.0,
89
+ "eval_giou": 0.7551393508911133,
90
+ "eval_ciou": 0.7453246712684631
91
+ },
92
+ {
93
+ "val_dataset": "refcoco+|unc|testA",
94
+ "epoch": 10.0,
95
+ "eval_giou": 0.7942529916763306,
96
+ "eval_ciou": 0.7944912910461426
97
+ },
98
+ {
99
+ "val_dataset": "refcoco+|unc|testB",
100
+ "epoch": 10.0,
101
+ "eval_giou": 0.7106485366821289,
102
+ "eval_ciou": 0.6990127563476562
103
+ },
104
+ {
105
+ "val_dataset": "refcocog|umd|test",
106
+ "epoch": 10.0,
107
+ "eval_giou": 0.7611709833145142,
108
+ "eval_ciou": 0.7682604193687439
109
+ },
110
+ {
111
+ "val_dataset": "refcocog|umd|val",
112
+ "epoch": 10.0,
113
+ "eval_giou": 0.7570181488990784,
114
+ "eval_ciou": 0.7642934918403625
115
+ }
116
+ ]
lisa-ivl3-2b_bi2cbe_aati_srm/events.out.tfevents.1758303972.bask-pg0308u25a.2176098.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fd0eb014901254de4a95eb71fc2571d348cabd50a57e396fb2c3d342a3d7fb9
3
+ size 486
lisa-ivl3-2b_bi2cbe_aati_srm/events.out.tfevents.1758304185.bask-pg0308u25a.2184107.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1af39459ef2a226054ba8d41e28d9351e1edf097f258d661335b41ebe8f3adc
3
+ size 212352
lisa-ivl3-2b_bi2cbe_aati_srm/runs/Sep19_18-46-08_bask-pg0308u25a/events.out.tfevents.1758304037.bask-pg0308u25a.2176098.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4044136eab8f6e9cbcae192b20ebf5e4ed93ba0420ebf5e87c4b2238109fa4e
3
+ size 9325
lisa-ivl3-2b_bi2cbe_aati_srm/runs/Sep19_18-49-42_bask-pg0308u25a/events.out.tfevents.1758304237.bask-pg0308u25a.2184107.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94cc218abf38e00bc41bdedd1db488f808895ac0ae3c3898e84ea6a04bec0e07
3
+ size 116381
lisa-ivl3-2b_bi2cbe_aati_srm/runs/Sep19_18-49-42_bask-pg0308u25a/events.out.tfevents.1758339214.bask-pg0308u25a.2184107.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c63060c4d02dcdbdc52b17a6208816f1c9a7a952fd8d4159426ee935d22b9aad
3
+ size 1402
lisa-ivl3-2b_bi2cbe_aati_srs/ckpt_model/config.json ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "InternVL3Self"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
7
+ "AutoModel": "modeling_internvl_chat.InternVLChatModel",
8
+ "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
9
+ },
10
+ "downsample_ratio": 0.5,
11
+ "dtype": "bfloat16",
12
+ "dynamic_image_size": true,
13
+ "eos_token_id": 151645,
14
+ "force_image_size": 448,
15
+ "hidden_size": 1536,
16
+ "image_fold": null,
17
+ "llm_config": {
18
+ "_attn_implementation_autoset": true,
19
+ "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
20
+ "architectures": [
21
+ "Qwen2ForCausalLM"
22
+ ],
23
+ "attention_dropout": 0.0,
24
+ "bos_token_id": 151643,
25
+ "dtype": "bfloat16",
26
+ "eos_token_id": 151643,
27
+ "hidden_act": "silu",
28
+ "hidden_size": 1536,
29
+ "initializer_range": 0.02,
30
+ "intermediate_size": 8960,
31
+ "layer_types": [
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention",
53
+ "full_attention",
54
+ "full_attention",
55
+ "full_attention",
56
+ "full_attention",
57
+ "full_attention",
58
+ "full_attention",
59
+ "full_attention"
60
+ ],
61
+ "max_position_embeddings": 32768,
62
+ "max_window_layers": 70,
63
+ "model_type": "qwen2",
64
+ "moe_config": null,
65
+ "num_attention_heads": 12,
66
+ "num_hidden_layers": 28,
67
+ "num_key_value_heads": 2,
68
+ "rms_norm_eps": 1e-06,
69
+ "rope_scaling": {
70
+ "factor": 2.0,
71
+ "rope_type": "dynamic",
72
+ "type": "dynamic"
73
+ },
74
+ "rope_theta": 1000000.0,
75
+ "sliding_window": null,
76
+ "use_bfloat16": true,
77
+ "use_cache": false,
78
+ "use_sliding_window": false,
79
+ "vocab_size": 151676
80
+ },
81
+ "max_dynamic_patch": 12,
82
+ "min_dynamic_patch": 1,
83
+ "model_type": "internvl_chat",
84
+ "output_attentions": false,
85
+ "pad2square": false,
86
+ "pad_token_id": 151643,
87
+ "ps_version": "v2",
88
+ "select_layer": -1,
89
+ "system_message": null,
90
+ "template": "internvl2_5",
91
+ "tie_word_embeddings": false,
92
+ "transformers_version": null,
93
+ "use_backbone_lora": 0,
94
+ "use_llm_lora": 0,
95
+ "use_thumbnail": true,
96
+ "vision_config": {
97
+ "_attn_implementation_autoset": true,
98
+ "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
99
+ "architectures": [
100
+ "InternVisionModel"
101
+ ],
102
+ "attention_dropout": 0.0,
103
+ "auto_map": {
104
+ "AutoConfig": "configuration_intern_vit.InternVisionConfig",
105
+ "AutoModel": "modeling_intern_vit.InternVisionModel"
106
+ },
107
+ "capacity_factor": 1.2,
108
+ "drop_path_rate": 0.1,
109
+ "dropout": 0.0,
110
+ "dtype": "bfloat16",
111
+ "eval_capacity_factor": 1.4,
112
+ "hidden_act": "gelu",
113
+ "hidden_size": 1024,
114
+ "image_size": 448,
115
+ "initializer_factor": 0.1,
116
+ "initializer_range": 1e-10,
117
+ "intermediate_size": 4096,
118
+ "laux_allreduce": "all_nodes",
119
+ "layer_norm_eps": 1e-06,
120
+ "model_type": "intern_vit_6b",
121
+ "moe_coeff_ratio": 0.5,
122
+ "moe_intermediate_size": 768,
123
+ "moe_output_scale": 4.0,
124
+ "noisy_gate_policy": "RSample_before",
125
+ "norm_type": "layer_norm",
126
+ "num_attention_heads": 16,
127
+ "num_channels": 3,
128
+ "num_experts": 8,
129
+ "num_hidden_layers": 24,
130
+ "num_routed_experts": 4,
131
+ "num_shared_experts": 4,
132
+ "patch_size": 14,
133
+ "qk_normalization": false,
134
+ "qkv_bias": true,
135
+ "shared_expert_intermediate_size": 3072,
136
+ "use_bfloat16": true,
137
+ "use_flash_attn": true,
138
+ "use_moe": false,
139
+ "use_residual": true,
140
+ "use_rts": false,
141
+ "use_weighted_residual": false
142
+ }
143
+ }
lisa-ivl3-2b_bi2cbe_aati_srs/ckpt_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e94e426c479692c600bd7da2a58a66d89b1a0b6105e5f4fba892bb35fa05130
3
+ size 4211070232
lisa-ivl3-2b_bi2cbe_aati_srs/ckpt_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9efb840060edee20988b8e9bb354bde899f2bb93875da91a53a68a5e8259ceb3
3
+ size 7352
lisa-ivl3-2b_bi2cbe_aati_srs/evaluation_metrics.json ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "val_dataset": "ReasonSeg|val",
4
+ "epoch": 1.0,
5
+ "eval_giou": 0.5235294699668884,
6
+ "eval_ciou": 0.5689558982849121
7
+ },
8
+ {
9
+ "val_dataset": "ReasonSeg|val",
10
+ "epoch": 2.0,
11
+ "eval_giou": 0.4938444197177887,
12
+ "eval_ciou": 0.5315812826156616
13
+ },
14
+ {
15
+ "val_dataset": "ReasonSeg|val",
16
+ "epoch": 3.0,
17
+ "eval_giou": 0.52805095911026,
18
+ "eval_ciou": 0.5778353810310364
19
+ },
20
+ {
21
+ "val_dataset": "ReasonSeg|val",
22
+ "epoch": 4.0,
23
+ "eval_giou": 0.539726734161377,
24
+ "eval_ciou": 0.5628448128700256
25
+ },
26
+ {
27
+ "val_dataset": "ReasonSeg|val",
28
+ "epoch": 5.0,
29
+ "eval_giou": 0.5516190528869629,
30
+ "eval_ciou": 0.5699143409729004
31
+ },
32
+ {
33
+ "val_dataset": "ReasonSeg|val",
34
+ "epoch": 6.0,
35
+ "eval_giou": 0.5535993576049805,
36
+ "eval_ciou": 0.5486313700675964
37
+ },
38
+ {
39
+ "val_dataset": "ReasonSeg|val",
40
+ "epoch": 7.0,
41
+ "eval_giou": 0.5771014094352722,
42
+ "eval_ciou": 0.630760908126831
43
+ },
44
+ {
45
+ "val_dataset": "ReasonSeg|val",
46
+ "epoch": 8.0,
47
+ "eval_giou": 0.5713648796081543,
48
+ "eval_ciou": 0.5902009606361389
49
+ },
50
+ {
51
+ "val_dataset": "ReasonSeg|val",
52
+ "epoch": 9.0,
53
+ "eval_giou": 0.566828727722168,
54
+ "eval_ciou": 0.5753384232521057
55
+ },
56
+ {
57
+ "val_dataset": "ReasonSeg|val",
58
+ "epoch": 10.0,
59
+ "eval_giou": 0.5662292838096619,
60
+ "eval_ciou": 0.5734410285949707
61
+ },
62
+ {
63
+ "val_dataset": "ReasonSeg|test",
64
+ "epoch": 10.0,
65
+ "eval_giou": 0.5632049441337585,
66
+ "eval_ciou": 0.5677175521850586
67
+ },
68
+ {
69
+ "val_dataset": "refcoco|unc|val",
70
+ "epoch": 10.0,
71
+ "eval_giou": 0.8058294057846069,
72
+ "eval_ciou": 0.810309886932373
73
+ },
74
+ {
75
+ "val_dataset": "refcoco|unc|testA",
76
+ "epoch": 10.0,
77
+ "eval_giou": 0.8246700167655945,
78
+ "eval_ciou": 0.8328030705451965
79
+ },
80
+ {
81
+ "val_dataset": "refcoco|unc|testB",
82
+ "epoch": 10.0,
83
+ "eval_giou": 0.7877973914146423,
84
+ "eval_ciou": 0.7934398651123047
85
+ },
86
+ {
87
+ "val_dataset": "refcoco+|unc|val",
88
+ "epoch": 10.0,
89
+ "eval_giou": 0.7641584277153015,
90
+ "eval_ciou": 0.7556021809577942
91
+ },
92
+ {
93
+ "val_dataset": "refcoco+|unc|testA",
94
+ "epoch": 10.0,
95
+ "eval_giou": 0.7998954057693481,
96
+ "eval_ciou": 0.7981683611869812
97
+ },
98
+ {
99
+ "val_dataset": "refcoco+|unc|testB",
100
+ "epoch": 10.0,
101
+ "eval_giou": 0.7219251394271851,
102
+ "eval_ciou": 0.7105168104171753
103
+ },
104
+ {
105
+ "val_dataset": "refcocog|umd|test",
106
+ "epoch": 10.0,
107
+ "eval_giou": 0.766793966293335,
108
+ "eval_ciou": 0.7789492011070251
109
+ },
110
+ {
111
+ "val_dataset": "refcocog|umd|val",
112
+ "epoch": 10.0,
113
+ "eval_giou": 0.7627427577972412,
114
+ "eval_ciou": 0.7680707573890686
115
+ }
116
+ ]
lisa-ivl3-2b_bi2cbe_aati_srs/events.out.tfevents.1758407218.bask-pg0308u25a.3988218.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ceb9faddf368f10dc1f79077182ae3f0763d1d459cd00745aefdef29c4785740
3
+ size 212352
lisa-ivl3-2b_bi2cbe_aati_srs/runs/Sep20_23-26-55_bask-pg0308u25a/events.out.tfevents.1758407268.bask-pg0308u25a.3988218.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c60008ebfd9d19da94c88abc8cbca2ad3850f341e6cd01437d6e2c1a16144b9
3
+ size 116381
lisa-ivl3-2b_bi2cbe_aati_srs/runs/Sep20_23-26-55_bask-pg0308u25a/events.out.tfevents.1758441983.bask-pg0308u25a.3988218.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebda0e4e7c56eda12f89e0b655238dcdd19f18e497ae1d685d308e1e21d0b7dd
3
+ size 1402
lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/ckpt_model/config.json ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "InternVL3Self"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
7
+ "AutoModel": "modeling_internvl_chat.InternVLChatModel",
8
+ "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
9
+ },
10
+ "downsample_ratio": 0.5,
11
+ "dtype": "bfloat16",
12
+ "dynamic_image_size": true,
13
+ "eos_token_id": 151645,
14
+ "force_image_size": 448,
15
+ "hidden_size": 1536,
16
+ "image_fold": null,
17
+ "llm_config": {
18
+ "_attn_implementation_autoset": true,
19
+ "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
20
+ "architectures": [
21
+ "Qwen2ForCausalLM"
22
+ ],
23
+ "attention_dropout": 0.0,
24
+ "bos_token_id": 151643,
25
+ "dtype": "bfloat16",
26
+ "eos_token_id": 151643,
27
+ "hidden_act": "silu",
28
+ "hidden_size": 1536,
29
+ "initializer_range": 0.02,
30
+ "intermediate_size": 8960,
31
+ "layer_types": [
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention",
53
+ "full_attention",
54
+ "full_attention",
55
+ "full_attention",
56
+ "full_attention",
57
+ "full_attention",
58
+ "full_attention",
59
+ "full_attention"
60
+ ],
61
+ "max_position_embeddings": 32768,
62
+ "max_window_layers": 70,
63
+ "model_type": "qwen2",
64
+ "moe_config": null,
65
+ "num_attention_heads": 12,
66
+ "num_hidden_layers": 28,
67
+ "num_key_value_heads": 2,
68
+ "rms_norm_eps": 1e-06,
69
+ "rope_scaling": {
70
+ "factor": 2.0,
71
+ "rope_type": "dynamic",
72
+ "type": "dynamic"
73
+ },
74
+ "rope_theta": 1000000.0,
75
+ "sliding_window": null,
76
+ "use_bfloat16": true,
77
+ "use_cache": false,
78
+ "use_sliding_window": false,
79
+ "vocab_size": 151676
80
+ },
81
+ "max_dynamic_patch": 12,
82
+ "min_dynamic_patch": 1,
83
+ "model_type": "internvl_chat",
84
+ "output_attentions": false,
85
+ "pad2square": false,
86
+ "pad_token_id": 151643,
87
+ "ps_version": "v2",
88
+ "select_layer": -1,
89
+ "system_message": null,
90
+ "template": "internvl2_5",
91
+ "tie_word_embeddings": false,
92
+ "transformers_version": null,
93
+ "use_backbone_lora": 0,
94
+ "use_llm_lora": 0,
95
+ "use_thumbnail": true,
96
+ "vision_config": {
97
+ "_attn_implementation_autoset": true,
98
+ "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
99
+ "architectures": [
100
+ "InternVisionModel"
101
+ ],
102
+ "attention_dropout": 0.0,
103
+ "auto_map": {
104
+ "AutoConfig": "configuration_intern_vit.InternVisionConfig",
105
+ "AutoModel": "modeling_intern_vit.InternVisionModel"
106
+ },
107
+ "capacity_factor": 1.2,
108
+ "drop_path_rate": 0.1,
109
+ "dropout": 0.0,
110
+ "dtype": "bfloat16",
111
+ "eval_capacity_factor": 1.4,
112
+ "hidden_act": "gelu",
113
+ "hidden_size": 1024,
114
+ "image_size": 448,
115
+ "initializer_factor": 0.1,
116
+ "initializer_range": 1e-10,
117
+ "intermediate_size": 4096,
118
+ "laux_allreduce": "all_nodes",
119
+ "layer_norm_eps": 1e-06,
120
+ "model_type": "intern_vit_6b",
121
+ "moe_coeff_ratio": 0.5,
122
+ "moe_intermediate_size": 768,
123
+ "moe_output_scale": 4.0,
124
+ "noisy_gate_policy": "RSample_before",
125
+ "norm_type": "layer_norm",
126
+ "num_attention_heads": 16,
127
+ "num_channels": 3,
128
+ "num_experts": 8,
129
+ "num_hidden_layers": 24,
130
+ "num_routed_experts": 4,
131
+ "num_shared_experts": 4,
132
+ "patch_size": 14,
133
+ "qk_normalization": false,
134
+ "qkv_bias": true,
135
+ "shared_expert_intermediate_size": 3072,
136
+ "use_bfloat16": true,
137
+ "use_flash_attn": true,
138
+ "use_moe": false,
139
+ "use_residual": true,
140
+ "use_rts": false,
141
+ "use_weighted_residual": false
142
+ }
143
+ }
lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/ckpt_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c525c1ae85c17caddcdb1b0d45df96d56a86227e4f990cbb36d7331a692551ad
3
+ size 4211070232
lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/ckpt_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7da9ac5d9ad0770700bf7b786cf18482fad5fd08e88a5e860bd99406ca19065d
3
+ size 7352
lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/evaluation_metrics.json ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "val_dataset": "ReasonSeg|val",
4
+ "epoch": 1.0,
5
+ "eval_giou": 0.4889245629310608,
6
+ "eval_ciou": 0.5533338189125061
7
+ },
8
+ {
9
+ "val_dataset": "ReasonSeg|val",
10
+ "epoch": 2.0,
11
+ "eval_giou": 0.5513965487480164,
12
+ "eval_ciou": 0.6630034446716309
13
+ },
14
+ {
15
+ "val_dataset": "ReasonSeg|val",
16
+ "epoch": 3.0,
17
+ "eval_giou": 0.5442399382591248,
18
+ "eval_ciou": 0.6376377940177917
19
+ },
20
+ {
21
+ "val_dataset": "ReasonSeg|val",
22
+ "epoch": 4.0,
23
+ "eval_giou": 0.5748190879821777,
24
+ "eval_ciou": 0.6525793671607971
25
+ },
26
+ {
27
+ "val_dataset": "ReasonSeg|val",
28
+ "epoch": 5.0,
29
+ "eval_giou": 0.5847772359848022,
30
+ "eval_ciou": 0.643996000289917
31
+ },
32
+ {
33
+ "val_dataset": "ReasonSeg|val",
34
+ "epoch": 6.0,
35
+ "eval_giou": 0.5810279846191406,
36
+ "eval_ciou": 0.6454022526741028
37
+ },
38
+ {
39
+ "val_dataset": "ReasonSeg|val",
40
+ "epoch": 7.0,
41
+ "eval_giou": 0.5949556827545166,
42
+ "eval_ciou": 0.6094688177108765
43
+ },
44
+ {
45
+ "val_dataset": "ReasonSeg|val",
46
+ "epoch": 8.0,
47
+ "eval_giou": 0.6049715280532837,
48
+ "eval_ciou": 0.6379661560058594
49
+ },
50
+ {
51
+ "val_dataset": "ReasonSeg|val",
52
+ "epoch": 9.0,
53
+ "eval_giou": 0.6034538149833679,
54
+ "eval_ciou": 0.6570442914962769
55
+ },
56
+ {
57
+ "val_dataset": "ReasonSeg|val",
58
+ "epoch": 10.0,
59
+ "eval_giou": 0.6016661524772644,
60
+ "eval_ciou": 0.6353110671043396
61
+ },
62
+ {
63
+ "val_dataset": "ReasonSeg|test",
64
+ "epoch": 10.0,
65
+ "eval_giou": 0.6043070554733276,
66
+ "eval_ciou": 0.608022153377533
67
+ },
68
+ {
69
+ "val_dataset": "refcoco|unc|val",
70
+ "epoch": 10.0,
71
+ "eval_giou": 0.7902190089225769,
72
+ "eval_ciou": 0.7928427457809448
73
+ },
74
+ {
75
+ "val_dataset": "refcoco|unc|testA",
76
+ "epoch": 10.0,
77
+ "eval_giou": 0.807979166507721,
78
+ "eval_ciou": 0.8122658729553223
79
+ },
80
+ {
81
+ "val_dataset": "refcoco|unc|testB",
82
+ "epoch": 10.0,
83
+ "eval_giou": 0.763839066028595,
84
+ "eval_ciou": 0.764618992805481
85
+ },
86
+ {
87
+ "val_dataset": "refcoco+|unc|val",
88
+ "epoch": 10.0,
89
+ "eval_giou": 0.738534688949585,
90
+ "eval_ciou": 0.7319899201393127
91
+ },
92
+ {
93
+ "val_dataset": "refcoco+|unc|testA",
94
+ "epoch": 10.0,
95
+ "eval_giou": 0.7776216864585876,
96
+ "eval_ciou": 0.7770327925682068
97
+ },
98
+ {
99
+ "val_dataset": "refcoco+|unc|testB",
100
+ "epoch": 10.0,
101
+ "eval_giou": 0.6900521516799927,
102
+ "eval_ciou": 0.676867663860321
103
+ },
104
+ {
105
+ "val_dataset": "refcocog|umd|test",
106
+ "epoch": 10.0,
107
+ "eval_giou": 0.7514216899871826,
108
+ "eval_ciou": 0.7589317560195923
109
+ },
110
+ {
111
+ "val_dataset": "refcocog|umd|val",
112
+ "epoch": 10.0,
113
+ "eval_giou": 0.7455593943595886,
114
+ "eval_ciou": 0.7489427924156189
115
+ }
116
+ ]
lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/events.out.tfevents.1758822942.bask-pg0308u25a.3977024.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5090ab4b5276a748931cd1068f50c6d17618a085656691fb72a03b346bd7b3b
3
+ size 88
lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/events.out.tfevents.1758823030.bask-pg0308u25a.3998347.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4aac91ef4c5538767282625b5123a14924a0f19f313d68d62ed42912194aebe6
3
+ size 212352
lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/runs/Sep25_18-57-07_bask-pg0308u25a/events.out.tfevents.1758823107.bask-pg0308u25a.3998347.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb225847cc5dc86b3ecdb0241d649d67245d9ca0710b65086da8f329c5700ee6
3
+ size 116397
lisa-ivl3-2b_bi2cbe_ib_vlorati_sr/runs/Sep25_18-57-07_bask-pg0308u25a/events.out.tfevents.1758870402.bask-pg0308u25a.3998347.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0907be6c27349e6b811188c1c28862e763dfde6a47b69586b494273b0268d8f9
3
+ size 1402
lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/ckpt_model/config.json ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "InternVL3Self"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
7
+ "AutoModel": "modeling_internvl_chat.InternVLChatModel",
8
+ "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
9
+ },
10
+ "downsample_ratio": 0.5,
11
+ "dtype": "bfloat16",
12
+ "dynamic_image_size": true,
13
+ "eos_token_id": 151645,
14
+ "force_image_size": 448,
15
+ "hidden_size": 1536,
16
+ "image_fold": null,
17
+ "llm_config": {
18
+ "_attn_implementation_autoset": true,
19
+ "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
20
+ "architectures": [
21
+ "Qwen2ForCausalLM"
22
+ ],
23
+ "attention_dropout": 0.0,
24
+ "bos_token_id": 151643,
25
+ "dtype": "bfloat16",
26
+ "eos_token_id": 151643,
27
+ "hidden_act": "silu",
28
+ "hidden_size": 1536,
29
+ "initializer_range": 0.02,
30
+ "intermediate_size": 8960,
31
+ "layer_types": [
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention",
53
+ "full_attention",
54
+ "full_attention",
55
+ "full_attention",
56
+ "full_attention",
57
+ "full_attention",
58
+ "full_attention",
59
+ "full_attention"
60
+ ],
61
+ "max_position_embeddings": 32768,
62
+ "max_window_layers": 70,
63
+ "model_type": "qwen2",
64
+ "moe_config": null,
65
+ "num_attention_heads": 12,
66
+ "num_hidden_layers": 28,
67
+ "num_key_value_heads": 2,
68
+ "rms_norm_eps": 1e-06,
69
+ "rope_scaling": {
70
+ "factor": 2.0,
71
+ "rope_type": "dynamic",
72
+ "type": "dynamic"
73
+ },
74
+ "rope_theta": 1000000.0,
75
+ "sliding_window": null,
76
+ "use_bfloat16": true,
77
+ "use_cache": false,
78
+ "use_sliding_window": false,
79
+ "vocab_size": 151676
80
+ },
81
+ "max_dynamic_patch": 12,
82
+ "min_dynamic_patch": 1,
83
+ "model_type": "internvl_chat",
84
+ "output_attentions": false,
85
+ "pad2square": false,
86
+ "pad_token_id": 151643,
87
+ "ps_version": "v2",
88
+ "select_layer": -1,
89
+ "system_message": null,
90
+ "template": "internvl2_5",
91
+ "tie_word_embeddings": false,
92
+ "transformers_version": null,
93
+ "use_backbone_lora": 0,
94
+ "use_llm_lora": 0,
95
+ "use_thumbnail": true,
96
+ "vision_config": {
97
+ "_attn_implementation_autoset": true,
98
+ "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
99
+ "architectures": [
100
+ "InternVisionModel"
101
+ ],
102
+ "attention_dropout": 0.0,
103
+ "auto_map": {
104
+ "AutoConfig": "configuration_intern_vit.InternVisionConfig",
105
+ "AutoModel": "modeling_intern_vit.InternVisionModel"
106
+ },
107
+ "capacity_factor": 1.2,
108
+ "drop_path_rate": 0.1,
109
+ "dropout": 0.0,
110
+ "dtype": "bfloat16",
111
+ "eval_capacity_factor": 1.4,
112
+ "hidden_act": "gelu",
113
+ "hidden_size": 1024,
114
+ "image_size": 448,
115
+ "initializer_factor": 0.1,
116
+ "initializer_range": 1e-10,
117
+ "intermediate_size": 4096,
118
+ "laux_allreduce": "all_nodes",
119
+ "layer_norm_eps": 1e-06,
120
+ "model_type": "intern_vit_6b",
121
+ "moe_coeff_ratio": 0.5,
122
+ "moe_intermediate_size": 768,
123
+ "moe_output_scale": 4.0,
124
+ "noisy_gate_policy": "RSample_before",
125
+ "norm_type": "layer_norm",
126
+ "num_attention_heads": 16,
127
+ "num_channels": 3,
128
+ "num_experts": 8,
129
+ "num_hidden_layers": 24,
130
+ "num_routed_experts": 4,
131
+ "num_shared_experts": 4,
132
+ "patch_size": 14,
133
+ "qk_normalization": false,
134
+ "qkv_bias": true,
135
+ "shared_expert_intermediate_size": 3072,
136
+ "use_bfloat16": true,
137
+ "use_flash_attn": true,
138
+ "use_moe": false,
139
+ "use_residual": true,
140
+ "use_rts": false,
141
+ "use_weighted_residual": false
142
+ }
143
+ }
lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/ckpt_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9a73ee98332ed56252ee2690f1a7351446fd80c4253500657c9284e0a0f05fd
3
+ size 4211070232
lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/ckpt_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:833f70825712e685e5ab69b01da135f496f6a901ba5bc7b958a93796e95e8a09
3
+ size 7352
lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/evaluation_metrics.json ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "val_dataset": "ReasonSeg|val",
4
+ "epoch": 1.0,
5
+ "eval_giou": 0.5344988107681274,
6
+ "eval_ciou": 0.5989851355552673
7
+ },
8
+ {
9
+ "val_dataset": "ReasonSeg|val",
10
+ "epoch": 2.0,
11
+ "eval_giou": 0.5579254031181335,
12
+ "eval_ciou": 0.646024227142334
13
+ },
14
+ {
15
+ "val_dataset": "ReasonSeg|val",
16
+ "epoch": 3.0,
17
+ "eval_giou": 0.5501570701599121,
18
+ "eval_ciou": 0.6018446683883667
19
+ },
20
+ {
21
+ "val_dataset": "ReasonSeg|val",
22
+ "epoch": 4.0,
23
+ "eval_giou": 0.5774487853050232,
24
+ "eval_ciou": 0.6542478203773499
25
+ },
26
+ {
27
+ "val_dataset": "ReasonSeg|val",
28
+ "epoch": 5.0,
29
+ "eval_giou": 0.5822131037712097,
30
+ "eval_ciou": 0.6766245365142822
31
+ },
32
+ {
33
+ "val_dataset": "ReasonSeg|val",
34
+ "epoch": 6.0,
35
+ "eval_giou": 0.5897811055183411,
36
+ "eval_ciou": 0.6791333556175232
37
+ },
38
+ {
39
+ "val_dataset": "ReasonSeg|val",
40
+ "epoch": 7.0,
41
+ "eval_giou": 0.5887703895568848,
42
+ "eval_ciou": 0.6910147070884705
43
+ },
44
+ {
45
+ "val_dataset": "ReasonSeg|val",
46
+ "epoch": 8.0,
47
+ "eval_giou": 0.5998998880386353,
48
+ "eval_ciou": 0.6640490293502808
49
+ },
50
+ {
51
+ "val_dataset": "ReasonSeg|val",
52
+ "epoch": 9.0,
53
+ "eval_giou": 0.5920247435569763,
54
+ "eval_ciou": 0.6693744659423828
55
+ },
56
+ {
57
+ "val_dataset": "ReasonSeg|val",
58
+ "epoch": 10.0,
59
+ "eval_giou": 0.6001232266426086,
60
+ "eval_ciou": 0.6858417987823486
61
+ },
62
+ {
63
+ "val_dataset": "ReasonSeg|test",
64
+ "epoch": 10.0,
65
+ "eval_giou": 0.5927180051803589,
66
+ "eval_ciou": 0.6138883233070374
67
+ },
68
+ {
69
+ "val_dataset": "refcoco|unc|val",
70
+ "epoch": 10.0,
71
+ "eval_giou": 0.7834749817848206,
72
+ "eval_ciou": 0.790122926235199
73
+ },
74
+ {
75
+ "val_dataset": "refcoco|unc|testA",
76
+ "epoch": 10.0,
77
+ "eval_giou": 0.8022208213806152,
78
+ "eval_ciou": 0.8086150884628296
79
+ },
80
+ {
81
+ "val_dataset": "refcoco|unc|testB",
82
+ "epoch": 10.0,
83
+ "eval_giou": 0.7566637396812439,
84
+ "eval_ciou": 0.7609479427337646
85
+ },
86
+ {
87
+ "val_dataset": "refcoco+|unc|val",
88
+ "epoch": 10.0,
89
+ "eval_giou": 0.7318623065948486,
90
+ "eval_ciou": 0.7281762361526489
91
+ },
92
+ {
93
+ "val_dataset": "refcoco+|unc|testA",
94
+ "epoch": 10.0,
95
+ "eval_giou": 0.7749318480491638,
96
+ "eval_ciou": 0.7748793363571167
97
+ },
98
+ {
99
+ "val_dataset": "refcoco+|unc|testB",
100
+ "epoch": 10.0,
101
+ "eval_giou": 0.682511568069458,
102
+ "eval_ciou": 0.6719024777412415
103
+ },
104
+ {
105
+ "val_dataset": "refcocog|umd|test",
106
+ "epoch": 10.0,
107
+ "eval_giou": 0.743724524974823,
108
+ "eval_ciou": 0.7526366710662842
109
+ },
110
+ {
111
+ "val_dataset": "refcocog|umd|val",
112
+ "epoch": 10.0,
113
+ "eval_giou": 0.7407757043838501,
114
+ "eval_ciou": 0.7478148341178894
115
+ }
116
+ ]
lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/events.out.tfevents.1758558655.bask-pg0308u29a.637997.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb61bef795309a3922462febab004b11ccf980e30eac016b8230fa293a7b7656
3
+ size 212352
lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/runs/Sep22_17-30-52_bask-pg0308u29a/events.out.tfevents.1758558731.bask-pg0308u29a.637997.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e87c13cd165f21e6038c79b21806f6123c013c42bd45ef063c0e1671b8751dd7
3
+ size 116399
lisa-ivl3-2b_bi2cbe_ivs_vlorati_sr/runs/Sep22_17-30-52_bask-pg0308u29a/events.out.tfevents.1758608704.bask-pg0308u29a.637997.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf83b859f6d1900c3189c78baebb5ee0e6400bc6d00390754f89d38c552dfb86
3
+ size 1402
lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/ckpt_model/config.json ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "InternVL3Self"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
7
+ "AutoModel": "modeling_internvl_chat.InternVLChatModel",
8
+ "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
9
+ },
10
+ "downsample_ratio": 0.5,
11
+ "dtype": "bfloat16",
12
+ "dynamic_image_size": true,
13
+ "eos_token_id": 151645,
14
+ "force_image_size": 448,
15
+ "hidden_size": 1536,
16
+ "image_fold": null,
17
+ "llm_config": {
18
+ "_attn_implementation_autoset": true,
19
+ "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
20
+ "architectures": [
21
+ "Qwen2ForCausalLM"
22
+ ],
23
+ "attention_dropout": 0.0,
24
+ "bos_token_id": 151643,
25
+ "dtype": "bfloat16",
26
+ "eos_token_id": 151643,
27
+ "hidden_act": "silu",
28
+ "hidden_size": 1536,
29
+ "initializer_range": 0.02,
30
+ "intermediate_size": 8960,
31
+ "layer_types": [
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention",
53
+ "full_attention",
54
+ "full_attention",
55
+ "full_attention",
56
+ "full_attention",
57
+ "full_attention",
58
+ "full_attention",
59
+ "full_attention"
60
+ ],
61
+ "max_position_embeddings": 32768,
62
+ "max_window_layers": 70,
63
+ "model_type": "qwen2",
64
+ "moe_config": null,
65
+ "num_attention_heads": 12,
66
+ "num_hidden_layers": 28,
67
+ "num_key_value_heads": 2,
68
+ "rms_norm_eps": 1e-06,
69
+ "rope_scaling": {
70
+ "factor": 2.0,
71
+ "rope_type": "dynamic",
72
+ "type": "dynamic"
73
+ },
74
+ "rope_theta": 1000000.0,
75
+ "sliding_window": null,
76
+ "use_bfloat16": true,
77
+ "use_cache": false,
78
+ "use_sliding_window": false,
79
+ "vocab_size": 151676
80
+ },
81
+ "max_dynamic_patch": 12,
82
+ "min_dynamic_patch": 1,
83
+ "model_type": "internvl_chat",
84
+ "output_attentions": false,
85
+ "pad2square": false,
86
+ "pad_token_id": 151643,
87
+ "ps_version": "v2",
88
+ "select_layer": -1,
89
+ "system_message": null,
90
+ "template": "internvl2_5",
91
+ "tie_word_embeddings": false,
92
+ "transformers_version": null,
93
+ "use_backbone_lora": 0,
94
+ "use_llm_lora": 0,
95
+ "use_thumbnail": true,
96
+ "vision_config": {
97
+ "_attn_implementation_autoset": true,
98
+ "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
99
+ "architectures": [
100
+ "InternVisionModel"
101
+ ],
102
+ "attention_dropout": 0.0,
103
+ "auto_map": {
104
+ "AutoConfig": "configuration_intern_vit.InternVisionConfig",
105
+ "AutoModel": "modeling_intern_vit.InternVisionModel"
106
+ },
107
+ "capacity_factor": 1.2,
108
+ "drop_path_rate": 0.1,
109
+ "dropout": 0.0,
110
+ "dtype": "bfloat16",
111
+ "eval_capacity_factor": 1.4,
112
+ "hidden_act": "gelu",
113
+ "hidden_size": 1024,
114
+ "image_size": 448,
115
+ "initializer_factor": 0.1,
116
+ "initializer_range": 1e-10,
117
+ "intermediate_size": 4096,
118
+ "laux_allreduce": "all_nodes",
119
+ "layer_norm_eps": 1e-06,
120
+ "model_type": "intern_vit_6b",
121
+ "moe_coeff_ratio": 0.5,
122
+ "moe_intermediate_size": 768,
123
+ "moe_output_scale": 4.0,
124
+ "noisy_gate_policy": "RSample_before",
125
+ "norm_type": "layer_norm",
126
+ "num_attention_heads": 16,
127
+ "num_channels": 3,
128
+ "num_experts": 8,
129
+ "num_hidden_layers": 24,
130
+ "num_routed_experts": 4,
131
+ "num_shared_experts": 4,
132
+ "patch_size": 14,
133
+ "qk_normalization": false,
134
+ "qkv_bias": true,
135
+ "shared_expert_intermediate_size": 3072,
136
+ "use_bfloat16": true,
137
+ "use_flash_attn": true,
138
+ "use_moe": false,
139
+ "use_residual": true,
140
+ "use_rts": false,
141
+ "use_weighted_residual": false
142
+ }
143
+ }
lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/ckpt_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee127087b96ff9811fbd07f160c2648b5879ccbcd7e88fbf1c57578cc6427656
3
+ size 4211070232
lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/ckpt_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faf8b9941672f97b3e8473f3e7078f73367e3fad3a4c4b35b69c5a79104328df
3
+ size 7352
lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/evaluation_metrics.json ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "val_dataset": "ReasonSeg|val",
4
+ "epoch": 1.0,
5
+ "eval_giou": 0.5294323563575745,
6
+ "eval_ciou": 0.5168058276176453
7
+ },
8
+ {
9
+ "val_dataset": "ReasonSeg|val",
10
+ "epoch": 2.0,
11
+ "eval_giou": 0.5691018104553223,
12
+ "eval_ciou": 0.5466322302818298
13
+ },
14
+ {
15
+ "val_dataset": "ReasonSeg|val",
16
+ "epoch": 3.0,
17
+ "eval_giou": 0.5456892848014832,
18
+ "eval_ciou": 0.6087337732315063
19
+ },
20
+ {
21
+ "val_dataset": "ReasonSeg|val",
22
+ "epoch": 4.0,
23
+ "eval_giou": 0.5649483799934387,
24
+ "eval_ciou": 0.5830232501029968
25
+ },
26
+ {
27
+ "val_dataset": "ReasonSeg|val",
28
+ "epoch": 5.0,
29
+ "eval_giou": 0.5766127109527588,
30
+ "eval_ciou": 0.592596709728241
31
+ },
32
+ {
33
+ "val_dataset": "ReasonSeg|val",
34
+ "epoch": 6.0,
35
+ "eval_giou": 0.5876106023788452,
36
+ "eval_ciou": 0.6196873188018799
37
+ },
38
+ {
39
+ "val_dataset": "ReasonSeg|val",
40
+ "epoch": 7.0,
41
+ "eval_giou": 0.5895294547080994,
42
+ "eval_ciou": 0.5830597281455994
43
+ },
44
+ {
45
+ "val_dataset": "ReasonSeg|val",
46
+ "epoch": 8.0,
47
+ "eval_giou": 0.5922108888626099,
48
+ "eval_ciou": 0.5886086225509644
49
+ },
50
+ {
51
+ "val_dataset": "ReasonSeg|val",
52
+ "epoch": 9.0,
53
+ "eval_giou": 0.6001683473587036,
54
+ "eval_ciou": 0.5857241749763489
55
+ },
56
+ {
57
+ "val_dataset": "ReasonSeg|val",
58
+ "epoch": 10.0,
59
+ "eval_giou": 0.6061425805091858,
60
+ "eval_ciou": 0.6062945127487183
61
+ },
62
+ {
63
+ "val_dataset": "ReasonSeg|test",
64
+ "epoch": 10.0,
65
+ "eval_giou": 0.5916463136672974,
66
+ "eval_ciou": 0.5962467789649963
67
+ },
68
+ {
69
+ "val_dataset": "refcoco|unc|val",
70
+ "epoch": 10.0,
71
+ "eval_giou": 0.7914398908615112,
72
+ "eval_ciou": 0.7944017052650452
73
+ },
74
+ {
75
+ "val_dataset": "refcoco|unc|testA",
76
+ "epoch": 10.0,
77
+ "eval_giou": 0.8115019202232361,
78
+ "eval_ciou": 0.8148282766342163
79
+ },
80
+ {
81
+ "val_dataset": "refcoco|unc|testB",
82
+ "epoch": 10.0,
83
+ "eval_giou": 0.7657762169837952,
84
+ "eval_ciou": 0.7644218802452087
85
+ },
86
+ {
87
+ "val_dataset": "refcoco+|unc|val",
88
+ "epoch": 10.0,
89
+ "eval_giou": 0.7408427000045776,
90
+ "eval_ciou": 0.7323743104934692
91
+ },
92
+ {
93
+ "val_dataset": "refcoco+|unc|testA",
94
+ "epoch": 10.0,
95
+ "eval_giou": 0.7806029319763184,
96
+ "eval_ciou": 0.7790927886962891
97
+ },
98
+ {
99
+ "val_dataset": "refcoco+|unc|testB",
100
+ "epoch": 10.0,
101
+ "eval_giou": 0.6951540112495422,
102
+ "eval_ciou": 0.683707058429718
103
+ },
104
+ {
105
+ "val_dataset": "refcocog|umd|test",
106
+ "epoch": 10.0,
107
+ "eval_giou": 0.7511024475097656,
108
+ "eval_ciou": 0.7564254403114319
109
+ },
110
+ {
111
+ "val_dataset": "refcocog|umd|val",
112
+ "epoch": 10.0,
113
+ "eval_giou": 0.7492029070854187,
114
+ "eval_ciou": 0.7516255378723145
115
+ }
116
+ ]
lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/events.out.tfevents.1758821654.bask-pg0308u18a.998112.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f0f6402efa20cb4eb1c3dcaf9815e71b1964b12d1903fcd9a8cfc7ba8fc924f
3
+ size 486
lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/events.out.tfevents.1758821976.bask-pg0308u18a.1004472.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b21a9269fe7a0131c65a9b63b6caa674fe4f8b73071b6caa68cc19ce4724062
3
+ size 88
lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/events.out.tfevents.1758822179.bask-pg0308u18a.1008370.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28cd9c79f6c1e5c37360f0ca87b6fa4ff89de989a108bdfeadb937195000c169
3
+ size 212352
lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/runs/Sep25_18-34-12_bask-pg0308u18a/events.out.tfevents.1758821725.bask-pg0308u18a.998112.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb59ce061f378068b22c6735f8430ed9ace878dcac3b831594603aaa1bb107a1
3
+ size 9338
lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/runs/Sep25_18-39-34_bask-pg0308u18a/events.out.tfevents.1758822035.bask-pg0308u18a.1004472.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2050a14b591ad7960da8a1c578624e5fcadc630a64886750db14457a302231c
3
+ size 9131
lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/runs/Sep25_18-42-56_bask-pg0308u18a/events.out.tfevents.1758822240.bask-pg0308u18a.1008370.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbc263fde5f12193454e49ef64849b95edfdbe9d84d7748d1bda1f0c04506652
3
+ size 116397
lisa-ivl3-2b_bi2cbe_tb_vlorati_sr/runs/Sep25_18-42-56_bask-pg0308u18a/events.out.tfevents.1758869487.bask-pg0308u18a.1008370.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d1915b5faf4072182ed92a81dc1182e464a0d20fc50e6147b785c025d972e0f
3
+ size 1402
lisa-ivl3-2b_bi2cbe_vlorati_coco/ckpt_model/config.json ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "InternVL3Self"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
7
+ "AutoModel": "modeling_internvl_chat.InternVLChatModel",
8
+ "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
9
+ },
10
+ "downsample_ratio": 0.5,
11
+ "dtype": "bfloat16",
12
+ "dynamic_image_size": true,
13
+ "eos_token_id": 151645,
14
+ "force_image_size": 448,
15
+ "hidden_size": 1536,
16
+ "image_fold": null,
17
+ "llm_config": {
18
+ "_attn_implementation_autoset": true,
19
+ "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
20
+ "architectures": [
21
+ "Qwen2ForCausalLM"
22
+ ],
23
+ "attention_dropout": 0.0,
24
+ "bos_token_id": 151643,
25
+ "dtype": "bfloat16",
26
+ "eos_token_id": 151643,
27
+ "hidden_act": "silu",
28
+ "hidden_size": 1536,
29
+ "initializer_range": 0.02,
30
+ "intermediate_size": 8960,
31
+ "layer_types": [
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention",
53
+ "full_attention",
54
+ "full_attention",
55
+ "full_attention",
56
+ "full_attention",
57
+ "full_attention",
58
+ "full_attention",
59
+ "full_attention"
60
+ ],
61
+ "max_position_embeddings": 32768,
62
+ "max_window_layers": 70,
63
+ "model_type": "qwen2",
64
+ "moe_config": null,
65
+ "num_attention_heads": 12,
66
+ "num_hidden_layers": 28,
67
+ "num_key_value_heads": 2,
68
+ "rms_norm_eps": 1e-06,
69
+ "rope_scaling": {
70
+ "factor": 2.0,
71
+ "rope_type": "dynamic",
72
+ "type": "dynamic"
73
+ },
74
+ "rope_theta": 1000000.0,
75
+ "sliding_window": null,
76
+ "use_bfloat16": true,
77
+ "use_cache": false,
78
+ "use_sliding_window": false,
79
+ "vocab_size": 151676
80
+ },
81
+ "max_dynamic_patch": 12,
82
+ "min_dynamic_patch": 1,
83
+ "model_type": "internvl_chat",
84
+ "output_attentions": false,
85
+ "pad2square": false,
86
+ "pad_token_id": 151643,
87
+ "ps_version": "v2",
88
+ "select_layer": -1,
89
+ "system_message": null,
90
+ "template": "internvl2_5",
91
+ "tie_word_embeddings": false,
92
+ "transformers_version": null,
93
+ "use_backbone_lora": 0,
94
+ "use_llm_lora": 0,
95
+ "use_thumbnail": true,
96
+ "vision_config": {
97
+ "_attn_implementation_autoset": true,
98
+ "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
99
+ "architectures": [
100
+ "InternVisionModel"
101
+ ],
102
+ "attention_dropout": 0.0,
103
+ "auto_map": {
104
+ "AutoConfig": "configuration_intern_vit.InternVisionConfig",
105
+ "AutoModel": "modeling_intern_vit.InternVisionModel"
106
+ },
107
+ "capacity_factor": 1.2,
108
+ "drop_path_rate": 0.1,
109
+ "dropout": 0.0,
110
+ "dtype": "bfloat16",
111
+ "eval_capacity_factor": 1.4,
112
+ "hidden_act": "gelu",
113
+ "hidden_size": 1024,
114
+ "image_size": 448,
115
+ "initializer_factor": 0.1,
116
+ "initializer_range": 1e-10,
117
+ "intermediate_size": 4096,
118
+ "laux_allreduce": "all_nodes",
119
+ "layer_norm_eps": 1e-06,
120
+ "model_type": "intern_vit_6b",
121
+ "moe_coeff_ratio": 0.5,
122
+ "moe_intermediate_size": 768,
123
+ "moe_output_scale": 4.0,
124
+ "noisy_gate_policy": "RSample_before",
125
+ "norm_type": "layer_norm",
126
+ "num_attention_heads": 16,
127
+ "num_channels": 3,
128
+ "num_experts": 8,
129
+ "num_hidden_layers": 24,
130
+ "num_routed_experts": 4,
131
+ "num_shared_experts": 4,
132
+ "patch_size": 14,
133
+ "qk_normalization": false,
134
+ "qkv_bias": true,
135
+ "shared_expert_intermediate_size": 3072,
136
+ "use_bfloat16": true,
137
+ "use_flash_attn": true,
138
+ "use_moe": false,
139
+ "use_residual": true,
140
+ "use_rts": false,
141
+ "use_weighted_residual": false
142
+ }
143
+ }
lisa-ivl3-2b_bi2cbe_vlorati_coco/ckpt_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c75bb36d150d76c1aded195dfe0969ac1e5f7b51708e3253890664b3306fe7b6
3
+ size 4211070232
lisa-ivl3-2b_bi2cbe_vlorati_coco/ckpt_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:586d01e0f868c297c5556fb5760bb5f53403f37d00934248a1f70708c2bbdc4d
3
+ size 7352
lisa-ivl3-2b_bi2cbe_vlorati_coco/evaluation_metrics.json ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "val_dataset": "ReasonSeg|val",
4
+ "epoch": 1.0,
5
+ "eval_giou": 0.5369102954864502,
6
+ "eval_ciou": 0.5064759254455566
7
+ },
8
+ {
9
+ "val_dataset": "ReasonSeg|val",
10
+ "epoch": 2.0,
11
+ "eval_giou": 0.5686467289924622,
12
+ "eval_ciou": 0.611318051815033
13
+ },
14
+ {
15
+ "val_dataset": "ReasonSeg|val",
16
+ "epoch": 3.0,
17
+ "eval_giou": 0.5613127946853638,
18
+ "eval_ciou": 0.6206056475639343
19
+ },
20
+ {
21
+ "val_dataset": "ReasonSeg|val",
22
+ "epoch": 4.0,
23
+ "eval_giou": 0.5933331847190857,
24
+ "eval_ciou": 0.6126891374588013
25
+ },
26
+ {
27
+ "val_dataset": "ReasonSeg|val",
28
+ "epoch": 5.0,
29
+ "eval_giou": 0.6065125465393066,
30
+ "eval_ciou": 0.6544414162635803
31
+ },
32
+ {
33
+ "val_dataset": "ReasonSeg|val",
34
+ "epoch": 6.0,
35
+ "eval_giou": 0.5995581150054932,
36
+ "eval_ciou": 0.6379423141479492
37
+ },
38
+ {
39
+ "val_dataset": "ReasonSeg|val",
40
+ "epoch": 7.0,
41
+ "eval_giou": 0.6159911155700684,
42
+ "eval_ciou": 0.621420681476593
43
+ },
44
+ {
45
+ "val_dataset": "ReasonSeg|val",
46
+ "epoch": 8.0,
47
+ "eval_giou": 0.6243378520011902,
48
+ "eval_ciou": 0.6523417234420776
49
+ },
50
+ {
51
+ "val_dataset": "ReasonSeg|val",
52
+ "epoch": 9.0,
53
+ "eval_giou": 0.6166976690292358,
54
+ "eval_ciou": 0.6346321702003479
55
+ },
56
+ {
57
+ "val_dataset": "ReasonSeg|val",
58
+ "epoch": 10.0,
59
+ "eval_giou": 0.6171593070030212,
60
+ "eval_ciou": 0.6407290697097778
61
+ },
62
+ {
63
+ "val_dataset": "ReasonSeg|test",
64
+ "epoch": 10.0,
65
+ "eval_giou": 0.5836987495422363,
66
+ "eval_ciou": 0.6126533150672913
67
+ },
68
+ {
69
+ "val_dataset": "refcoco|unc|val",
70
+ "epoch": 10.0,
71
+ "eval_giou": 0.7852296233177185,
72
+ "eval_ciou": 0.7870670557022095
73
+ },
74
+ {
75
+ "val_dataset": "refcoco|unc|testA",
76
+ "epoch": 10.0,
77
+ "eval_giou": 0.8046602010726929,
78
+ "eval_ciou": 0.8073185086250305
79
+ },
80
+ {
81
+ "val_dataset": "refcoco|unc|testB",
82
+ "epoch": 10.0,
83
+ "eval_giou": 0.7577531337738037,
84
+ "eval_ciou": 0.7593867778778076
85
+ },
86
+ {
87
+ "val_dataset": "refcoco+|unc|val",
88
+ "epoch": 10.0,
89
+ "eval_giou": 0.7351171374320984,
90
+ "eval_ciou": 0.7267892956733704
91
+ },
92
+ {
93
+ "val_dataset": "refcoco+|unc|testA",
94
+ "epoch": 10.0,
95
+ "eval_giou": 0.7731851935386658,
96
+ "eval_ciou": 0.7728055119514465
97
+ },
98
+ {
99
+ "val_dataset": "refcoco+|unc|testB",
100
+ "epoch": 10.0,
101
+ "eval_giou": 0.6876177191734314,
102
+ "eval_ciou": 0.6751795411109924
103
+ },
104
+ {
105
+ "val_dataset": "refcocog|umd|test",
106
+ "epoch": 10.0,
107
+ "eval_giou": 0.7494315505027771,
108
+ "eval_ciou": 0.7570431232452393
109
+ },
110
+ {
111
+ "val_dataset": "refcocog|umd|val",
112
+ "epoch": 10.0,
113
+ "eval_giou": 0.7449917197227478,
114
+ "eval_ciou": 0.750389039516449
115
+ }
116
+ ]
lisa-ivl3-2b_bi2cbe_vlorati_coco/events.out.tfevents.1758236140.bask-pg0309u16a.1220041.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a673a2115871fc802ba01b020cb9f8f61e30b9e9406eedf604c1d500fb25c686
3
+ size 212352
lisa-ivl3-2b_bi2cbe_vlorati_coco/runs/Sep18_23-55-37_bask-pg0309u16a/events.out.tfevents.1758236201.bask-pg0309u16a.1220041.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04a7180c0676429a3801a6fe71e0ec596fc2cff90bfd8a557828056eb13598e1
3
+ size 116395
lisa-ivl3-2b_bi2cbe_vlorati_coco/runs/Sep18_23-55-37_bask-pg0309u16a/events.out.tfevents.1758283609.bask-pg0309u16a.1220041.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad3a469cb6b9847f58a453fce555b48ae34226582d16eaa962b62daec1fe25d6
3
+ size 1402
lisa-ivl3-2b_bi2cbe_vlorati_sr/ckpt_model/config.json ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "InternVL3Self"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_internvl_chat.InternVLChatConfig",
7
+ "AutoModel": "modeling_internvl_chat.InternVLChatModel",
8
+ "AutoModelForCausalLM": "modeling_internvl_chat.InternVLChatModel"
9
+ },
10
+ "downsample_ratio": 0.5,
11
+ "dtype": "bfloat16",
12
+ "dynamic_image_size": true,
13
+ "eos_token_id": 151645,
14
+ "force_image_size": 448,
15
+ "hidden_size": 1536,
16
+ "image_fold": null,
17
+ "llm_config": {
18
+ "_attn_implementation_autoset": true,
19
+ "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
20
+ "architectures": [
21
+ "Qwen2ForCausalLM"
22
+ ],
23
+ "attention_dropout": 0.0,
24
+ "bos_token_id": 151643,
25
+ "dtype": "bfloat16",
26
+ "eos_token_id": 151643,
27
+ "hidden_act": "silu",
28
+ "hidden_size": 1536,
29
+ "initializer_range": 0.02,
30
+ "intermediate_size": 8960,
31
+ "layer_types": [
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention",
53
+ "full_attention",
54
+ "full_attention",
55
+ "full_attention",
56
+ "full_attention",
57
+ "full_attention",
58
+ "full_attention",
59
+ "full_attention"
60
+ ],
61
+ "max_position_embeddings": 32768,
62
+ "max_window_layers": 70,
63
+ "model_type": "qwen2",
64
+ "moe_config": null,
65
+ "num_attention_heads": 12,
66
+ "num_hidden_layers": 28,
67
+ "num_key_value_heads": 2,
68
+ "rms_norm_eps": 1e-06,
69
+ "rope_scaling": {
70
+ "factor": 2.0,
71
+ "rope_type": "dynamic",
72
+ "type": "dynamic"
73
+ },
74
+ "rope_theta": 1000000.0,
75
+ "sliding_window": null,
76
+ "use_bfloat16": true,
77
+ "use_cache": false,
78
+ "use_sliding_window": false,
79
+ "vocab_size": 151676
80
+ },
81
+ "max_dynamic_patch": 12,
82
+ "min_dynamic_patch": 1,
83
+ "model_type": "internvl_chat",
84
+ "output_attentions": false,
85
+ "pad2square": false,
86
+ "pad_token_id": 151643,
87
+ "ps_version": "v2",
88
+ "select_layer": -1,
89
+ "system_message": null,
90
+ "template": "internvl2_5",
91
+ "tie_word_embeddings": false,
92
+ "transformers_version": null,
93
+ "use_backbone_lora": 0,
94
+ "use_llm_lora": 0,
95
+ "use_thumbnail": true,
96
+ "vision_config": {
97
+ "_attn_implementation_autoset": true,
98
+ "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
99
+ "architectures": [
100
+ "InternVisionModel"
101
+ ],
102
+ "attention_dropout": 0.0,
103
+ "auto_map": {
104
+ "AutoConfig": "configuration_intern_vit.InternVisionConfig",
105
+ "AutoModel": "modeling_intern_vit.InternVisionModel"
106
+ },
107
+ "capacity_factor": 1.2,
108
+ "drop_path_rate": 0.1,
109
+ "dropout": 0.0,
110
+ "dtype": "bfloat16",
111
+ "eval_capacity_factor": 1.4,
112
+ "hidden_act": "gelu",
113
+ "hidden_size": 1024,
114
+ "image_size": 448,
115
+ "initializer_factor": 0.1,
116
+ "initializer_range": 1e-10,
117
+ "intermediate_size": 4096,
118
+ "laux_allreduce": "all_nodes",
119
+ "layer_norm_eps": 1e-06,
120
+ "model_type": "intern_vit_6b",
121
+ "moe_coeff_ratio": 0.5,
122
+ "moe_intermediate_size": 768,
123
+ "moe_output_scale": 4.0,
124
+ "noisy_gate_policy": "RSample_before",
125
+ "norm_type": "layer_norm",
126
+ "num_attention_heads": 16,
127
+ "num_channels": 3,
128
+ "num_experts": 8,
129
+ "num_hidden_layers": 24,
130
+ "num_routed_experts": 4,
131
+ "num_shared_experts": 4,
132
+ "patch_size": 14,
133
+ "qk_normalization": false,
134
+ "qkv_bias": true,
135
+ "shared_expert_intermediate_size": 3072,
136
+ "use_bfloat16": true,
137
+ "use_flash_attn": true,
138
+ "use_moe": false,
139
+ "use_residual": true,
140
+ "use_rts": false,
141
+ "use_weighted_residual": false
142
+ }
143
+ }