Yiming-M
/

CLIP-EBC

Model card Files Files and versions

CLIP-EBC / run.sh

Yiming-M's picture

🐣 born

8b98de9 verified 9 months ago

history blame contribute delete

1.9 kB

	#!/bin/sh
	export CUDA_VISIBLE_DEVICES=0 # Set the GPU ID. Comment this line to use all GPUs and DDP.

	# Train the commonly used VGG19-based encoder-decoder model on NWPU-Crowd.
	# Change `--dataset` to `sha` or `shb` or `qnrf` to train on ShanghaiTech A, or ShanghaiTech B, or UCF-QNRF.
	python trainer.py \
	--model vgg19_ae --input_size 448 --reduction 8 --truncation 4 --anchor_points average \
	--dataset nwpu \
	--count_loss dmcount &&

	# Train the CLIP-EBC (ResNet50) model on ShanghaiTech A. Use `--dataset shb` if you want to train on ShanghaiTech B.
	python trainer.py \
	--model clip_resnet50 --input_size 448 --reduction 8 --truncation 4 --anchor_points average --prompt_type word \
	--dataset sha \
	# --sliding_window --window_size 448 --stride 448 \ # Uncomment this line to enable sliding window prediction with a stride size of 448.
	--count_loss dmcount &&

	# Train the CLIP-EBC (ViT-B/16) model on UCF-QNRF, using VPT in training and sliding window prediction in testing.
	# By default, 32 tokens for each layer are used in VPT. You can also set `--num_vpt` to change the number of tokens.
	# By default, the deep visual prompt tuning is used. You can set `--shallow_vpt` to use the shallow visual prompt tuning.
	# `--amp` enables automatic mixed precision training.
	python trainer.py \
	--model clip_vit_b_16 --input_size 224 --reduction 8 --truncation 4 \
	--dataset qnrf --batch_size 16 --amp \
	--num_crops 2 --sliding_window --window_size 224 --stride 224 --warmup_lr 1e-3 \
	--count_loss dmcount

	# Generate results on NWPU-Crowd Test.
	# python test_nwpu.py \
	# --model clip_vit_b_16 --input_size 224 --reduction 8 --truncation 4 --anchor_points average --prompt_type word \
	# --num_vpt 32 --vpt_drop 0.0 --sliding_window --stride 224 \
	# --weight_path ./checkpoints/nwpu/clip_vit_b_16_word_224_8_4_fine_1.0_dmcount/best_rmse_1.pth