{ "activation": "swiglu", "bias": false, "dim_feedforward": 3072, "dpt_features": 128, "dpt_out_channels": [ 96, 192, 384, 768 ], "dpt_out_layers": null, "dropout": 0.0, "include_alpha": false, "latent_dim": 768, "norm_first": true, "norm_type": "rms_norm", "num_heads": 6, "num_layers": 12, "num_register_tokens": 16, "patch_size": 8, "pe_type": "rope", "qk_norm": true, "rope_double_max_freq": false, "rope_type": "triangle", "texture_channels": 13, "texture_encode_patch_size": 32, "texture_encoder_norm_type": "rms_norm", "turn_to_cam_coord": true, "use_dpt_decoder": true, "use_ldr": false, "use_vn_encoder": true, "vdir_num_freqs": 0, "vdir_pe_type": "nerf", "vertex_pe_num_freqs": 12, "view_indep_qk_norm": true, "view_transformer_ffn_hidden_dim": 3072, "view_transformer_include_self_attn": true, "view_transformer_latent_dim": 768, "view_transformer_n_heads": 6, "view_transformer_n_layers": 6, "view_transformer_use_swin_attn": false, "vn_encoder_norm_type": "rms_norm", "vn_pe_num_freqs": 6 }