Renamed Weights Folder Names; Added app.py

Browse files

Files changed (7) hide show

app.py +160 -0
pre-trained weights/{CLIP-EBC-ViT-B:16 (NWPU) → CLIP-EBC-ViT-B-16-NWPU}/README.md +0 -0
pre-trained weights/{CLIP-EBC-ViT-B:16 (NWPU) → CLIP-EBC-ViT-B-16-NWPU}/config.json +0 -0
pre-trained weights/{CLIP-EBC-ViT-B:16 (NWPU) → CLIP-EBC-ViT-B-16-NWPU}/model.safetensors +0 -0
pre-trained weights/{CLIP-EBC-ViT-L:14 (NWPU) → CLIP-EBC-ViT-L-14-NWPU}/README.md +0 -0
pre-trained weights/{CLIP-EBC-ViT-L:14 (NWPU) → CLIP-EBC-ViT-L-14-NWPU}/config.json +0 -0
pre-trained weights/{CLIP-EBC-ViT-L:14 (NWPU) → CLIP-EBC-ViT-L-14-NWPU}/model.safetensors +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,160 @@

+import torch
+from torch import nn
+import numpy as np
+from PIL import Image
+import json, os
+import gradio as gr
+import torchvision.transforms.functional as TF
+from safetensors.torch import load_file  # Import the load_file function from safetensors
+from matplotlib import cm
+from models import get_model
+from utils import resize_density_map, init_seeds
+mean = (0.485, 0.456, 0.406)
+std = (0.229, 0.224, 0.225)
+alpha = 0.8
+init_seeds(42)
+# -----------------------------
+# Define the model architecture
+# -----------------------------
+truncation = 4
+reduction = 8
+granularity = "fine"
+anchor_points = "average"
+model_name = "clip_vit_l_14"
+input_size = 224
+# Comment the lines below to test non-CLIP models.
+prompt_type = "word"
+num_vpt = 32
+vpt_drop = 0.
+deep_vpt = True
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+if truncation is None:  # regression, no truncation.
+    bins, anchor_points = None, None
+else:
+    with open(os.path.join("configs", f"reduction_{reduction}.json"), "r") as f:
+        config = json.load(f)[str(truncation)]["nwpu"]
+    bins = config["bins"][granularity]
+    anchor_points = config["anchor_points"][granularity]["average"] if anchor_points == "average" else config["anchor_points"][granularity]["middle"]
+    bins = [(float(b[0]), float(b[1])) for b in bins]
+    anchor_points = [float(p) for p in anchor_points]
+model = get_model(
+    backbone=model_name,
+    input_size=input_size,
+    reduction=reduction,
+    bins=bins,
+    anchor_points=anchor_points,
+    # CLIP parameters
+    prompt_type=prompt_type,
+    num_vpt=num_vpt,
+    vpt_drop=vpt_drop,
+    deep_vpt=deep_vpt
+)
+weights_path = os.path.join("pre-trained weights", "CLIP-EBC-ViT-L-14-NWPU", "model.safetensors")
+state_dict = load_file(weights_path)
+new_state_dict = {}
+for k, v in state_dict.items():
+    new_state_dict[k.replace("model.", "")] = v
+model.load_state_dict(new_state_dict)
+model.to(device)
+model.eval()
+# -----------------------------
+# Preprocessing function
+# -----------------------------
+# Adjust the image transforms to match what your model expects.
+def transform(image: Image.Image):
+    assert isinstance(image, Image.Image), "Input must be a PIL Image"
+    image_tensor = TF.to_tensor(image)
+    image_height, image_width = image_tensor.shape[-2:]
+    if image_height < input_size or image_width < input_size:
+        # Find the ratio to resize the image while maintaining the aspect ratio
+        ratio = max(input_size / image_height, input_size / image_width)
+        new_height = int(image_height * ratio) + 1
+        new_width = int(image_width * ratio) + 1
+        image_tensor = TF.resize(image_tensor, (new_height, new_width), interpolation=TF.InterpolationMode.BICUBIC, antialias=True)
+    image_tensor = TF.normalize(image_tensor, mean=mean, std=std)
+    return image_tensor.unsqueeze(0)  # Add batch dimension
+# -----------------------------
+# Inference function
+# -----------------------------
+def predict(image: Image.Image):
+    """
+    Given an input image, preprocess it, run the model to obtain a density map,
+    compute the total crowd count, and prepare the density map for display.
+    """
+    # Preprocess the image
+    input_width, input_height = image.size
+    input_tensor = transform(image).to(device)  # shape: (1, 3, H, W)
+    with torch.no_grad():
+        density_map = model(input_tensor)  # expected shape: (1, 1, H, W)
+        total_count = density_map.sum().item()
+        resized_density_map = resize_density_map(density_map, (input_height, input_width)).cpu().squeeze().numpy()
+    # Normalize the density map for display purposes
+    eps = 1e-8
+    density_map_norm = (resized_density_map - resized_density_map.min()) / (resized_density_map.max() - resized_density_map.min() + eps)
+    # Apply a colormap (e.g., 'jet') to get an RGBA image
+    colormap = cm.get_cmap("jet")
+    # The colormap returns values in [0,1]. Scale to [0,255] and convert to uint8.
+    density_map_color = (colormap(density_map_norm) * 255).astype(np.uint8)
+    density_map_color_img = Image.fromarray(density_map_color).convert("RGBA")
+    # Ensure the original image is in RGBA format.
+    image_rgba = image.convert("RGBA")
+    overlayed_image = Image.blend(image_rgba, density_map_color_img, alpha=alpha)
+    return image, overlayed_image, f"Predicted Count: {total_count:.2f}"
+# -----------------------------
+# Build Gradio Interface using Blocks for a two-column layout
+# -----------------------------
+with gr.Blocks() as demo:
+    gr.Markdown("# Crowd Counting Demo")
+    gr.Markdown("Upload an image or select an example below to see the predicted crowd density map and total count.")
+    with gr.Row():
+        with gr.Column():
+            input_img = gr.Image(
+                label="Input Image",
+                sources=["upload", "clipboard"],
+                type="pil",
+            )
+            submit_btn = gr.Button("Predict")
+        with gr.Column():
+            output_img = gr.Image(label="Predicted Density Map", type="pil")
+            output_text = gr.Textbox(label="Total Count")
+    submit_btn.click(fn=predict, inputs=input_img, outputs=[input_img, output_img, output_text])
+    # Optional: add example images. Ensure these files are in your repo.
+    gr.Examples(
+        examples=[
+            ["example1.jpg"],
+            ["example2.jpg"]
+        ],
+        inputs=input_img,
+        label="Try an example"
+    )
+# Launch the app
+demo.launch(share=True)

pre-trained weights/{CLIP-EBC-ViT-B:16 (NWPU) → CLIP-EBC-ViT-B-16-NWPU}/README.md RENAMED Viewed

File without changes

pre-trained weights/{CLIP-EBC-ViT-B:16 (NWPU) → CLIP-EBC-ViT-B-16-NWPU}/config.json RENAMED Viewed

File without changes

pre-trained weights/{CLIP-EBC-ViT-B:16 (NWPU) → CLIP-EBC-ViT-B-16-NWPU}/model.safetensors RENAMED Viewed

File without changes

pre-trained weights/{CLIP-EBC-ViT-L:14 (NWPU) → CLIP-EBC-ViT-L-14-NWPU}/README.md RENAMED Viewed

File without changes

pre-trained weights/{CLIP-EBC-ViT-L:14 (NWPU) → CLIP-EBC-ViT-L-14-NWPU}/config.json RENAMED Viewed

File without changes

pre-trained weights/{CLIP-EBC-ViT-L:14 (NWPU) → CLIP-EBC-ViT-L-14-NWPU}/model.safetensors RENAMED Viewed

File without changes