Spaces:
Sleeping
Sleeping
| """ | |
| Farm Object Detection API - Gradio Interface | |
| RT-DETR models for agricultural object detection | |
| """ | |
| import gradio as gr | |
| import torch | |
| import cv2 | |
| import numpy as np | |
| from PIL import Image | |
| import json | |
| import base64 | |
| import io | |
| import time | |
| from typing import List, Dict, Any | |
| # Import RT-DETR | |
| try: | |
| from transformers import RTDetrForObjectDetection, RTDetrImageProcessor | |
| MODELS_AVAILABLE = True | |
| except ImportError: | |
| MODELS_AVAILABLE = False | |
| class ObjectDetectionAPI: | |
| def __init__(self): | |
| self.models = {} | |
| self.processors = {} | |
| self.model_configs = { | |
| "r18vd": "PekingU/rtdetr_r18vd", | |
| "r34vd": "PekingU/rtdetr_r34vd", | |
| "r50vd": "PekingU/rtdetr_r50vd" | |
| } | |
| if MODELS_AVAILABLE: | |
| self.load_models() | |
| def load_models(self): | |
| """Load RT-DETR models""" | |
| for model_key, model_name in self.model_configs.items(): | |
| try: | |
| print(f"Loading {model_name}...") | |
| processor = RTDetrImageProcessor.from_pretrained(model_name) | |
| model = RTDetrForObjectDetection.from_pretrained(model_name) | |
| self.processors[model_key] = processor | |
| self.models[model_key] = model | |
| print(f"β {model_name} loaded successfully") | |
| except Exception as e: | |
| print(f"β Failed to load {model_name}: {e}") | |
| def detect_objects(self, image: Image.Image, model_key: str = "r50vd") -> Dict[str, Any]: | |
| """Detect objects in image using RT-DETR""" | |
| if not MODELS_AVAILABLE or model_key not in self.models: | |
| return {"error": "Model not available"} | |
| start_time = time.time() | |
| try: | |
| # Preprocess image | |
| processor = self.processors[model_key] | |
| model = self.models[model_key] | |
| inputs = processor(images=image, return_tensors="pt") | |
| # Run inference | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| # Post-process results | |
| target_sizes = torch.tensor([image.size[::-1]]) | |
| results = processor.post_process_object_detection( | |
| outputs, threshold=0.3, target_sizes=target_sizes | |
| )[0] | |
| # Format detections | |
| detections = [] | |
| for score, label, box in zip(results["scores"], results["labels"], results["boxes"]): | |
| if score > 0.3: # Confidence threshold | |
| detections.append({ | |
| "class": model.config.id2label[label.item()], | |
| "confidence": float(score), | |
| "bbox": [float(x) for x in box], | |
| "area": float((box[2] - box[0]) * (box[3] - box[1])) | |
| }) | |
| processing_time = time.time() - start_time | |
| return { | |
| "objects_detected": len(detections), | |
| "detections": detections, | |
| "processing_time": round(processing_time, 2), | |
| "model_used": f"rtdetr_{model_key}" | |
| } | |
| except Exception as e: | |
| return {"error": str(e)} | |
| def draw_detections(self, image: Image.Image, detections: List[Dict]) -> Image.Image: | |
| """Draw bounding boxes on image""" | |
| img_array = np.array(image) | |
| for det in detections: | |
| bbox = det["bbox"] | |
| x1, y1, x2, y2 = map(int, bbox) | |
| # Draw bounding box | |
| cv2.rectangle(img_array, (x1, y1), (x2, y2), (0, 255, 0), 2) | |
| # Draw label | |
| label = f"{det['class']}: {det['confidence']:.2f}" | |
| cv2.putText(img_array, label, (x1, y1-10), | |
| cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) | |
| return Image.fromarray(img_array) | |
| # Initialize API | |
| api = ObjectDetectionAPI() | |
| def predict_objects(image, model_choice): | |
| """Gradio prediction function""" | |
| if image is None: | |
| return None, "Please upload an image" | |
| # Convert to PIL Image | |
| if isinstance(image, np.ndarray): | |
| image = Image.fromarray(image) | |
| # Run detection | |
| results = api.detect_objects(image, model_choice) | |
| if "error" in results: | |
| return None, f"Error: {results['error']}" | |
| # Draw detections | |
| annotated_image = api.draw_detections(image, results["detections"]) | |
| # Format results text | |
| results_text = f""" | |
| π **Detection Results** | |
| - **Objects detected**: {results['objects_detected']} | |
| - **Processing time**: {results['processing_time']}s | |
| - **Model used**: {results['model_used']} | |
| **Detections**: | |
| """ | |
| for i, det in enumerate(results["detections"][:10], 1): # Show top 10 | |
| results_text += f"\n{i}. **{det['class']}** (confidence: {det['confidence']:.2f})" | |
| return annotated_image, results_text | |
| def predict_api(image_b64, model_choice): | |
| """API endpoint function""" | |
| try: | |
| # Decode base64 image | |
| image_data = base64.b64decode(image_b64) | |
| image = Image.open(io.BytesIO(image_data)) | |
| # Run detection | |
| results = api.detect_objects(image, model_choice) | |
| return results | |
| except Exception as e: | |
| return {"error": str(e)} | |
| # Gradio Interface | |
| with gr.Blocks(title="π Farm Object Detection API") as app: | |
| gr.Markdown("# π Farm Object Detection API") | |
| gr.Markdown("Detect farm equipment, crops, workers, and animals using RT-DETR models") | |
| with gr.Tab("πΌοΈ Image Analysis"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| image_input = gr.Image(type="pil", label="Upload Farm Image") | |
| model_choice = gr.Dropdown( | |
| choices=["r18vd", "r34vd", "r50vd"], | |
| value="r50vd", | |
| label="Select Model" | |
| ) | |
| detect_btn = gr.Button("π Detect Objects", variant="primary") | |
| with gr.Column(): | |
| output_image = gr.Image(label="Detected Objects") | |
| results_text = gr.Textbox(label="Detection Results", lines=10) | |
| detect_btn.click( | |
| predict_objects, | |
| inputs=[image_input, model_choice], | |
| outputs=[output_image, results_text] | |
| ) | |
| with gr.Tab("π‘ API Usage"): | |
| gr.Markdown(""" | |
| ## π API Endpoint | |
| **POST** `/api/predict` | |
| ### Request Format | |
| ```json | |
| { | |
| "data": ["<base64_image>", "<model_choice>"] | |
| } | |
| ``` | |
| ### Response Format | |
| ```json | |
| { | |
| "objects_detected": 5, | |
| "detections": [ | |
| { | |
| "class": "tractor", | |
| "confidence": 0.95, | |
| "bbox": [100, 150, 400, 350], | |
| "area": 75000 | |
| } | |
| ], | |
| "processing_time": 0.8, | |
| "model_used": "rtdetr_r50vd" | |
| } | |
| ``` | |
| ### Model Options | |
| - **r18vd**: Fast inference (recommended for real-time) | |
| - **r34vd**: Balanced performance | |
| - **r50vd**: High accuracy (recommended for analysis) | |
| """) | |
| if __name__ == "__main__": | |
| app.launch() |