""" Farm Object Detection API - Gradio Interface RT-DETR models for agricultural object detection """ import gradio as gr import torch import cv2 import numpy as np from PIL import Image import json import base64 import io import time from typing import List, Dict, Any # Import RT-DETR try: from transformers import RTDetrForObjectDetection, RTDetrImageProcessor MODELS_AVAILABLE = True except ImportError: MODELS_AVAILABLE = False class ObjectDetectionAPI: def __init__(self): self.models = {} self.processors = {} self.model_configs = { "r18vd": "PekingU/rtdetr_r18vd", "r34vd": "PekingU/rtdetr_r34vd", "r50vd": "PekingU/rtdetr_r50vd" } if MODELS_AVAILABLE: self.load_models() def load_models(self): """Load RT-DETR models""" for model_key, model_name in self.model_configs.items(): try: print(f"Loading {model_name}...") processor = RTDetrImageProcessor.from_pretrained(model_name) model = RTDetrForObjectDetection.from_pretrained(model_name) self.processors[model_key] = processor self.models[model_key] = model print(f"✅ {model_name} loaded successfully") except Exception as e: print(f"❌ Failed to load {model_name}: {e}") def detect_objects(self, image: Image.Image, model_key: str = "r50vd") -> Dict[str, Any]: """Detect objects in image using RT-DETR""" if not MODELS_AVAILABLE or model_key not in self.models: return {"error": "Model not available"} start_time = time.time() try: # Preprocess image processor = self.processors[model_key] model = self.models[model_key] inputs = processor(images=image, return_tensors="pt") # Run inference with torch.no_grad(): outputs = model(**inputs) # Post-process results target_sizes = torch.tensor([image.size[::-1]]) results = processor.post_process_object_detection( outputs, threshold=0.3, target_sizes=target_sizes )[0] # Format detections detections = [] for score, label, box in zip(results["scores"], results["labels"], results["boxes"]): if score > 0.3: # Confidence threshold detections.append({ "class": model.config.id2label[label.item()], "confidence": float(score), "bbox": [float(x) for x in box], "area": float((box[2] - box[0]) * (box[3] - box[1])) }) processing_time = time.time() - start_time return { "objects_detected": len(detections), "detections": detections, "processing_time": round(processing_time, 2), "model_used": f"rtdetr_{model_key}" } except Exception as e: return {"error": str(e)} def draw_detections(self, image: Image.Image, detections: List[Dict]) -> Image.Image: """Draw bounding boxes on image""" img_array = np.array(image) for det in detections: bbox = det["bbox"] x1, y1, x2, y2 = map(int, bbox) # Draw bounding box cv2.rectangle(img_array, (x1, y1), (x2, y2), (0, 255, 0), 2) # Draw label label = f"{det['class']}: {det['confidence']:.2f}" cv2.putText(img_array, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) return Image.fromarray(img_array) # Initialize API api = ObjectDetectionAPI() def predict_objects(image, model_choice): """Gradio prediction function""" if image is None: return None, "Please upload an image" # Convert to PIL Image if isinstance(image, np.ndarray): image = Image.fromarray(image) # Run detection results = api.detect_objects(image, model_choice) if "error" in results: return None, f"Error: {results['error']}" # Draw detections annotated_image = api.draw_detections(image, results["detections"]) # Format results text results_text = f""" 🔍 **Detection Results** - **Objects detected**: {results['objects_detected']} - **Processing time**: {results['processing_time']}s - **Model used**: {results['model_used']} **Detections**: """ for i, det in enumerate(results["detections"][:10], 1): # Show top 10 results_text += f"\n{i}. **{det['class']}** (confidence: {det['confidence']:.2f})" return annotated_image, results_text def predict_api(image_b64, model_choice): """API endpoint function""" try: # Decode base64 image image_data = base64.b64decode(image_b64) image = Image.open(io.BytesIO(image_data)) # Run detection results = api.detect_objects(image, model_choice) return results except Exception as e: return {"error": str(e)} # Gradio Interface with gr.Blocks(title="🔍 Farm Object Detection API") as app: gr.Markdown("# 🔍 Farm Object Detection API") gr.Markdown("Detect farm equipment, crops, workers, and animals using RT-DETR models") with gr.Tab("🖼️ Image Analysis"): with gr.Row(): with gr.Column(): image_input = gr.Image(type="pil", label="Upload Farm Image") model_choice = gr.Dropdown( choices=["r18vd", "r34vd", "r50vd"], value="r50vd", label="Select Model" ) detect_btn = gr.Button("🔍 Detect Objects", variant="primary") with gr.Column(): output_image = gr.Image(label="Detected Objects") results_text = gr.Textbox(label="Detection Results", lines=10) detect_btn.click( predict_objects, inputs=[image_input, model_choice], outputs=[output_image, results_text] ) with gr.Tab("📡 API Usage"): gr.Markdown(""" ## 🚀 API Endpoint **POST** `/api/predict` ### Request Format ```json { "data": ["", ""] } ``` ### Response Format ```json { "objects_detected": 5, "detections": [ { "class": "tractor", "confidence": 0.95, "bbox": [100, 150, 400, 350], "area": 75000 } ], "processing_time": 0.8, "model_used": "rtdetr_r50vd" } ``` ### Model Options - **r18vd**: Fast inference (recommended for real-time) - **r34vd**: Balanced performance - **r50vd**: High accuracy (recommended for analysis) """) if __name__ == "__main__": app.launch()