|
|
import os |
|
|
import cv2 |
|
|
import torch |
|
|
import numpy as np |
|
|
import gradio as gr |
|
|
import base64 |
|
|
from typing import List, Dict, Any |
|
|
import tempfile |
|
|
import time |
|
|
from PIL import Image, ImageDraw |
|
|
import json |
|
|
import io |
|
|
|
|
|
|
|
|
try: |
|
|
from models.retinaface import RetinaFace |
|
|
from utils.prior_box import PriorBox |
|
|
from utils.py_cpu_nms import py_cpu_nms |
|
|
from utils.box_utils import decode, decode_landm |
|
|
print("β
All imports successful!") |
|
|
except ImportError as e: |
|
|
print(f"β Import error: {e}") |
|
|
import sys |
|
|
sys.exit(1) |
|
|
|
|
|
|
|
|
mobilenet_model = None |
|
|
resnet_model = None |
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
|
|
|
def load_models(): |
|
|
"""Load both MobileNet and ResNet RetinaFace models""" |
|
|
global mobilenet_model, resnet_model |
|
|
|
|
|
try: |
|
|
print("Starting model loading...") |
|
|
|
|
|
|
|
|
mobilenet_cfg = { |
|
|
'name': 'mobilenet0.25', |
|
|
'min_sizes': [[16, 32], [64, 128], [256, 512]], |
|
|
'steps': [8, 16, 32], |
|
|
'variance': [0.1, 0.2], |
|
|
'clip': False, |
|
|
'loc_weight': 2.0, |
|
|
'gpu_train': True, |
|
|
'batch_size': 32, |
|
|
'ngpu': 1, |
|
|
'epoch': 250, |
|
|
'decay1': 190, |
|
|
'decay2': 220, |
|
|
'image_size': 640, |
|
|
'pretrain': False, |
|
|
'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3}, |
|
|
'in_channel': 32, |
|
|
'out_channel': 64 |
|
|
} |
|
|
|
|
|
resnet_cfg = { |
|
|
'name': 'Resnet50', |
|
|
'min_sizes': [[16, 32], [64, 128], [256, 512]], |
|
|
'steps': [8, 16, 32], |
|
|
'variance': [0.1, 0.2], |
|
|
'clip': False, |
|
|
'loc_weight': 2.0, |
|
|
'gpu_train': True, |
|
|
'batch_size': 24, |
|
|
'ngpu': 4, |
|
|
'epoch': 100, |
|
|
'decay1': 70, |
|
|
'decay2': 90, |
|
|
'image_size': 840, |
|
|
'pretrain': False, |
|
|
'return_layers': {'layer2': 1, 'layer3': 2, 'layer4': 3}, |
|
|
'in_channel': 256, |
|
|
'out_channel': 256 |
|
|
} |
|
|
|
|
|
|
|
|
if not os.path.exists('mobilenet0.25_Final.pth'): |
|
|
print("β mobilenet0.25_Final.pth not found!") |
|
|
return False |
|
|
if not os.path.exists('Resnet50_Final.pth'): |
|
|
print("β Resnet50_Final.pth not found!") |
|
|
return False |
|
|
|
|
|
print("Model files found, loading MobileNet...") |
|
|
|
|
|
|
|
|
try: |
|
|
mobilenet_model = RetinaFace(cfg=mobilenet_cfg, phase='test') |
|
|
print("β
MobileNet model instance created") |
|
|
|
|
|
|
|
|
mobilenet_state = torch.load('mobilenet0.25_Final.pth', map_location=device) |
|
|
print(f"β
MobileNet state dict loaded with {len(mobilenet_state.keys())} keys") |
|
|
|
|
|
|
|
|
missing_keys, unexpected_keys = mobilenet_model.load_state_dict(mobilenet_state, strict=False) |
|
|
|
|
|
if missing_keys: |
|
|
print(f"β οΈ Missing keys in MobileNet: {missing_keys[:5]}...") |
|
|
if unexpected_keys: |
|
|
print(f"β οΈ Unexpected keys in MobileNet: {unexpected_keys[:5]}...") |
|
|
|
|
|
mobilenet_model.eval() |
|
|
mobilenet_model = mobilenet_model.to(device) |
|
|
print("β
MobileNet model loaded successfully!") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"β Error loading MobileNet: {e}") |
|
|
mobilenet_model = None |
|
|
|
|
|
print("Loading ResNet...") |
|
|
|
|
|
|
|
|
try: |
|
|
resnet_model = RetinaFace(cfg=resnet_cfg, phase='test') |
|
|
print("β
ResNet model instance created") |
|
|
|
|
|
|
|
|
resnet_state = torch.load('Resnet50_Final.pth', map_location=device) |
|
|
print(f"β
ResNet state dict loaded with {len(resnet_state.keys())} keys") |
|
|
|
|
|
|
|
|
missing_keys, unexpected_keys = resnet_model.load_state_dict(resnet_state, strict=False) |
|
|
|
|
|
if missing_keys: |
|
|
print(f"β οΈ Missing keys in ResNet: {missing_keys[:5]}...") |
|
|
if unexpected_keys: |
|
|
print(f"β οΈ Unexpected keys in ResNet: {unexpected_keys[:5]}...") |
|
|
|
|
|
resnet_model.eval() |
|
|
resnet_model = resnet_model.to(device) |
|
|
print("β
ResNet model loaded successfully!") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"β Error loading ResNet: {e}") |
|
|
resnet_model = None |
|
|
|
|
|
|
|
|
if mobilenet_model is not None or resnet_model is not None: |
|
|
print("β
At least one model loaded successfully!") |
|
|
return True |
|
|
else: |
|
|
print("β No models loaded successfully!") |
|
|
return False |
|
|
|
|
|
except Exception as e: |
|
|
import traceback |
|
|
print(f"β Error in load_models: {e}") |
|
|
print(f"β Full traceback: {traceback.format_exc()}") |
|
|
return False |
|
|
|
|
|
def detect_faces(image, model_type="mobilenet", confidence_threshold=0.5, nms_threshold=0.4): |
|
|
"""Core face detection function""" |
|
|
try: |
|
|
start_time = time.time() |
|
|
|
|
|
|
|
|
if model_type == "resnet": |
|
|
model = resnet_model |
|
|
cfg = { |
|
|
'min_sizes': [[16, 32], [64, 128], [256, 512]], |
|
|
'steps': [8, 16, 32], |
|
|
'variance': [0.1, 0.2], |
|
|
'clip': False, |
|
|
'image_size': 840 |
|
|
} |
|
|
if model is None: |
|
|
|
|
|
print("β οΈ ResNet not available, falling back to MobileNet") |
|
|
model = mobilenet_model |
|
|
model_type = "mobilenet" |
|
|
cfg['image_size'] = 640 |
|
|
else: |
|
|
model = mobilenet_model |
|
|
cfg = { |
|
|
'min_sizes': [[16, 32], [64, 128], [256, 512]], |
|
|
'steps': [8, 16, 32], |
|
|
'variance': [0.1, 0.2], |
|
|
'clip': False, |
|
|
'image_size': 640 |
|
|
} |
|
|
if model is None: |
|
|
|
|
|
print("β οΈ MobileNet not available, falling back to ResNet") |
|
|
model = resnet_model |
|
|
model_type = "resnet" |
|
|
cfg['image_size'] = 840 |
|
|
|
|
|
if model is None: |
|
|
return None, "β No models are loaded. Please check the model loading logs." |
|
|
|
|
|
|
|
|
if isinstance(image, Image.Image): |
|
|
image = np.array(image) |
|
|
|
|
|
|
|
|
img = np.float32(image) |
|
|
im_height, im_width, _ = img.shape |
|
|
scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) |
|
|
img -= (104, 117, 123) |
|
|
img = img.transpose(2, 0, 1) |
|
|
img = torch.from_numpy(img).unsqueeze(0) |
|
|
img = img.to(device) |
|
|
scale = scale.to(device) |
|
|
|
|
|
|
|
|
with torch.no_grad(): |
|
|
loc, conf, landms = model(img) |
|
|
|
|
|
|
|
|
priorbox = PriorBox(cfg, image_size=(im_height, im_width)) |
|
|
priors = priorbox.forward() |
|
|
priors = priors.to(device) |
|
|
prior_data = priors.data |
|
|
boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) |
|
|
boxes = boxes * scale |
|
|
boxes = boxes.cpu().numpy() |
|
|
scores = conf.squeeze(0).data.cpu().numpy()[:, 1] |
|
|
landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) |
|
|
scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2], |
|
|
img.shape[3], img.shape[2], img.shape[3], img.shape[2], |
|
|
img.shape[3], img.shape[2]]) |
|
|
scale1 = scale1.to(device) |
|
|
landms = landms * scale1 |
|
|
landms = landms.cpu().numpy() |
|
|
|
|
|
|
|
|
inds = np.where(scores > confidence_threshold)[0] |
|
|
boxes = boxes[inds] |
|
|
landms = landms[inds] |
|
|
scores = scores[inds] |
|
|
|
|
|
|
|
|
order = scores.argsort()[::-1][:5000] |
|
|
boxes = boxes[order] |
|
|
landms = landms[order] |
|
|
scores = scores[order] |
|
|
|
|
|
|
|
|
dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) |
|
|
keep = py_cpu_nms(dets, nms_threshold) |
|
|
dets = dets[keep, :] |
|
|
landms = landms[keep] |
|
|
|
|
|
|
|
|
result_image = Image.fromarray(image) |
|
|
draw = ImageDraw.Draw(result_image) |
|
|
|
|
|
faces = [] |
|
|
for b, landmarks in zip(dets, landms): |
|
|
if b[4] < confidence_threshold: |
|
|
continue |
|
|
|
|
|
|
|
|
draw.rectangle([b[0], b[1], b[2], b[3]], outline="red", width=2) |
|
|
|
|
|
|
|
|
draw.text((b[0], b[1] - 15), f'{b[4]:.2f}', fill="red") |
|
|
|
|
|
|
|
|
for i in range(0, 10, 2): |
|
|
draw.ellipse([landmarks[i]-2, landmarks[i+1]-2, landmarks[i]+2, landmarks[i+1]+2], fill="blue") |
|
|
|
|
|
faces.append({ |
|
|
"bbox": {"x1": float(b[0]), "y1": float(b[1]), "x2": float(b[2]), "y2": float(b[3])}, |
|
|
"confidence": float(b[4]), |
|
|
"landmarks": { |
|
|
"left_eye": [float(landmarks[0]), float(landmarks[1])], |
|
|
"right_eye": [float(landmarks[2]), float(landmarks[3])], |
|
|
"nose": [float(landmarks[4]), float(landmarks[5])], |
|
|
"left_mouth": [float(landmarks[6]), float(landmarks[7])], |
|
|
"right_mouth": [float(landmarks[8]), float(landmarks[9])] |
|
|
} |
|
|
}) |
|
|
|
|
|
processing_time = time.time() - start_time |
|
|
|
|
|
result_text = f""" |
|
|
**Detection Results:** |
|
|
- **Faces Detected:** {len(faces)} |
|
|
- **Model Used:** {model_type} |
|
|
- **Processing Time:** {processing_time:.3f}s |
|
|
- **Confidence Threshold:** {confidence_threshold} |
|
|
- **NMS Threshold:** {nms_threshold} |
|
|
""" |
|
|
|
|
|
return result_image, result_text |
|
|
|
|
|
except Exception as e: |
|
|
return None, f"Error: {str(e)}" |
|
|
|
|
|
|
|
|
def test_model_loading(): |
|
|
"""Test model loading step by step""" |
|
|
try: |
|
|
print("=== Testing Model Loading ===") |
|
|
|
|
|
|
|
|
print("Testing RetinaFace import...") |
|
|
test_cfg = { |
|
|
'name': 'mobilenet0.25', |
|
|
'min_sizes': [[16, 32], [64, 128], [256, 512]], |
|
|
'steps': [8, 16, 32], |
|
|
'variance': [0.1, 0.2], |
|
|
'clip': False, |
|
|
'pretrain': False, |
|
|
'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3}, |
|
|
'in_channel': 32, |
|
|
'out_channel': 64 |
|
|
} |
|
|
|
|
|
print("Creating RetinaFace instance...") |
|
|
model = RetinaFace(cfg=test_cfg, phase='test') |
|
|
print(f"β
Model created successfully: {type(model)}") |
|
|
|
|
|
print("Checking model file...") |
|
|
if os.path.exists('mobilenet0.25_Final.pth'): |
|
|
print("β
Model file exists") |
|
|
|
|
|
print("Loading state dict...") |
|
|
state_dict = torch.load('mobilenet0.25_Final.pth', map_location='cpu') |
|
|
print(f"β
State dict loaded, keys: {len(state_dict.keys())}") |
|
|
|
|
|
print("Loading state dict into model...") |
|
|
model.load_state_dict(state_dict) |
|
|
print("β
State dict loaded successfully!") |
|
|
|
|
|
return True |
|
|
else: |
|
|
print("β Model file not found") |
|
|
return False |
|
|
|
|
|
except Exception as e: |
|
|
import traceback |
|
|
print(f"β Test failed: {e}") |
|
|
print(f"β Traceback: {traceback.format_exc()}") |
|
|
return False |
|
|
|
|
|
|
|
|
def test_api_endpoint(): |
|
|
"""Test function to verify API is working""" |
|
|
try: |
|
|
|
|
|
import numpy as np |
|
|
test_img = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8) |
|
|
test_pil = Image.fromarray(test_img) |
|
|
|
|
|
|
|
|
result_img, result_text = detect_faces(test_pil, "mobilenet", 0.5, 0.4) |
|
|
|
|
|
if result_img is not None: |
|
|
return "β
API function test passed - detection pipeline works" |
|
|
else: |
|
|
return f"β API function test failed: {result_text}" |
|
|
|
|
|
except Exception as e: |
|
|
return f"β API test error: {str(e)}" |
|
|
|
|
|
|
|
|
print("Loading RetinaFace models...") |
|
|
print("Running model loading test...") |
|
|
test_result = test_model_loading() |
|
|
if test_result: |
|
|
print("Test passed, proceeding with full model loading...") |
|
|
model_loaded = load_models() |
|
|
else: |
|
|
print("Test failed, skipping model loading...") |
|
|
model_loaded = False |
|
|
|
|
|
|
|
|
def create_interface(): |
|
|
with gr.Blocks(title="RetinaFace Face Detection") as demo: |
|
|
gr.Markdown("# π₯ RetinaFace Face Detection API") |
|
|
gr.Markdown("Real-time face detection using RetinaFace with MobileNet and ResNet backbones") |
|
|
|
|
|
if model_loaded: |
|
|
gr.Markdown("β
**Status**: Models loaded successfully!") |
|
|
|
|
|
api_test_result = test_api_endpoint() |
|
|
gr.Markdown(f"π§ **API Test**: {api_test_result}") |
|
|
else: |
|
|
gr.Markdown("β **Status**: Error loading models") |
|
|
gr.Markdown("π§ **API Test**: Cannot test API - models not loaded") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
input_image = gr.Image(type="pil", label="Upload Image") |
|
|
model_choice = gr.Dropdown( |
|
|
choices=["mobilenet", "resnet"], |
|
|
value="mobilenet", |
|
|
label="Model" |
|
|
) |
|
|
confidence = gr.Slider( |
|
|
minimum=0.1, maximum=1.0, value=0.5, step=0.1, |
|
|
label="Confidence" |
|
|
) |
|
|
nms = gr.Slider( |
|
|
minimum=0.1, maximum=1.0, value=0.4, step=0.1, |
|
|
label="NMS Threshold" |
|
|
) |
|
|
detect_btn = gr.Button("π Detect Faces", variant="primary") |
|
|
|
|
|
with gr.Column(): |
|
|
output_image = gr.Image(label="Results") |
|
|
output_text = gr.Markdown() |
|
|
|
|
|
detect_btn.click( |
|
|
fn=detect_faces, |
|
|
inputs=[input_image, model_choice, confidence, nms], |
|
|
outputs=[output_image, output_text] |
|
|
) |
|
|
|
|
|
gr.Markdown(""" |
|
|
## π API Information |
|
|
|
|
|
**Your API is automatically available at these endpoints:** |
|
|
|
|
|
### Main API Endpoint |
|
|
``` |
|
|
POST /api/predict |
|
|
``` |
|
|
|
|
|
**Request format:** |
|
|
```json |
|
|
{ |
|
|
"data": [ |
|
|
"<image_as_PIL_or_path>", |
|
|
"mobilenet", |
|
|
0.5, |
|
|
0.4 |
|
|
] |
|
|
} |
|
|
``` |
|
|
|
|
|
**Response format:** |
|
|
```json |
|
|
{ |
|
|
"data": [ |
|
|
"<processed_image>", |
|
|
"**Detection Results:**\\n- **Faces Detected:** 2\\n..." |
|
|
] |
|
|
} |
|
|
``` |
|
|
|
|
|
### For Thunkable Integration: |
|
|
- **URL:** `https://aditya-g07-retinaface-face-detection.hf.space/api/predict` |
|
|
- **Method:** POST |
|
|
- **Content-Type:** application/json |
|
|
|
|
|
### API Status: |
|
|
- β
**Gradio auto-generates API endpoints** |
|
|
- β
**No additional configuration needed** |
|
|
- β
**"No API found" message is normal for Gradio 4.36.0** |
|
|
|
|
|
**Note:** The "No API found" error in the UI doesn't affect API functionality. |
|
|
""") |
|
|
|
|
|
return demo |
|
|
|
|
|
|
|
|
demo = create_interface() |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch( |
|
|
server_name="0.0.0.0", |
|
|
server_port=7860, |
|
|
share=True |
|
|
) |
|
|
|