{ "data": { "aspect_ratio_range": [0.5, 2.0], "area_range": [250000, 1000000], "clamp_max_depth": 1000.0, "center_augmentation": 0.5, "fov_range_absolute": [1, 179], "fov_range_relative": [0.01, 1.0], "image_augmentation": ["jittering", "jpeg_loss", "blurring"], "datasets": [ { "name": "TartanAir", "path": "blobmnt/data_v3/TartanAir", "label_type": "synthetic", "index": ".index.txt", "depth": "depth.png", "weight": 4.8, "center_augmentation": 0.25, "fov_range_absolute": [30, 150], "fov_range_relative": [0.5, 1.0], "image_augmentation": ["jittering", "jpeg_loss", "blurring", "shot_noise"] } ] }, "model_version": "v1", "model": { "encoder": "dinov2_vitl14", "remap_output": "exp", "intermediate_layers": 4, "dim_upsample": [256, 128, 64], "dim_times_res_block_hidden": 2, "num_res_blocks": 2, "num_tokens_range": [1200, 2500], "last_conv_channels": 32, "last_conv_size": 1 }, "optimizer": { "type": "AdamW", "params": [ {"params": {"include": ["*"], "exclude": ["*backbone.*"]}, "lr": 1e-4}, {"params": {"include": ["*backbone.*"]}, "lr": 1e-5} ] }, "lr_scheduler": { "type": "SequentialLR", "params": { "schedulers": [ {"type": "LambdaLR", "params": {"lr_lambda": ["1.0", "max(0.0, min(1.0, (epoch - 1000) / 1000))"]}}, {"type": "StepLR", "params": {"step_size": 25000, "gamma": 0.5}} ], "milestones": [2000] } }, "low_resolution_training_steps": 50000, "loss": { "invalid": {}, "synthetic": { "global": {"function": "affine_invariant_global_loss", "weight": 1.0, "params": {"align_resolution": 32}}, "patch_4": {"function": "affine_invariant_local_loss", "weight": 1.0, "params": {"level": 4, "align_resolution": 16, "num_patches": 16}}, "patch_16": {"function": "affine_invariant_local_loss", "weight": 1.0, "params": {"level": 16, "align_resolution": 8, "num_patches": 256}}, "patch_64": {"function": "affine_invariant_local_loss", "weight": 1.0, "params": {"level": 64, "align_resolution": 4, "num_patches": 4096}}, "normal": {"function": "normal_loss", "weight": 1.0}, "mask": {"function": "mask_l2_loss", "weight": 1.0} }, "sfm": { "global": {"function": "affine_invariant_global_loss", "weight": 1.0, "params": {"align_resolution": 32}}, "patch_4": {"function": "affine_invariant_local_loss", "weight": 1.0, "params": {"level": 4, "align_resolution": 16, "num_patches": 16}}, "patch_16": {"function": "affine_invariant_local_loss", "weight": 1.0, "params": {"level": 16, "align_resolution": 8, "num_patches": 256}}, "mask": {"function": "mask_l2_loss", "weight": 1.0} }, "lidar": { "global": {"function": "affine_invariant_global_loss", "weight": 1.0, "params": {"align_resolution": 32}}, "patch_4": {"function": "affine_invariant_local_loss", "weight": 1.0, "params": {"level": 4, "align_resolution": 16, "num_patches": 16}}, "mask": {"function": "mask_l2_loss", "weight": 1.0} } } }