Spaces:
Running
Running
updating handler for cpu-only inference
Browse files- handler.py +19 -6
handler.py
CHANGED
|
@@ -2,6 +2,7 @@ import base64
|
|
| 2 |
import io
|
| 3 |
import json
|
| 4 |
import logging
|
|
|
|
| 5 |
import time
|
| 6 |
from pathlib import Path
|
| 7 |
from typing import Any
|
|
@@ -109,7 +110,17 @@ class EndpointHandler:
|
|
| 109 |
if not mapping_file.exists():
|
| 110 |
raise FileNotFoundError(f"Mapping file not found: {mapping_file}")
|
| 111 |
|
| 112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
self.model = load_model(str(weights_file), self.device)
|
| 114 |
self.transform = transforms.Compose(
|
| 115 |
[
|
|
@@ -159,11 +170,13 @@ class EndpointHandler:
|
|
| 159 |
|
| 160 |
inference_start_time = time.time()
|
| 161 |
with torch.inference_mode():
|
| 162 |
-
# Preprocess image on CPU
|
| 163 |
-
image_tensor = self.transform(image).unsqueeze(0)
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
|
|
|
|
|
|
| 167 |
|
| 168 |
# Run model on GPU
|
| 169 |
probs = self.model(image_tensor)[0] # Get probs for the single image
|
|
|
|
| 2 |
import io
|
| 3 |
import json
|
| 4 |
import logging
|
| 5 |
+
import os
|
| 6 |
import time
|
| 7 |
from pathlib import Path
|
| 8 |
from typing import Any
|
|
|
|
| 110 |
if not mapping_file.exists():
|
| 111 |
raise FileNotFoundError(f"Mapping file not found: {mapping_file}")
|
| 112 |
|
| 113 |
+
# Robust device selection: prefer CPU unless CUDA is truly usable
|
| 114 |
+
force_cpu = os.environ.get("FORCE_CPU", "0") in {"1", "true", "TRUE", "yes", "on"}
|
| 115 |
+
if not force_cpu and torch.cuda.is_available():
|
| 116 |
+
try:
|
| 117 |
+
# Probe that CUDA can actually be used (driver present)
|
| 118 |
+
torch.zeros(1).to("cuda")
|
| 119 |
+
self.device = "cuda"
|
| 120 |
+
except Exception:
|
| 121 |
+
self.device = "cpu"
|
| 122 |
+
else:
|
| 123 |
+
self.device = "cpu"
|
| 124 |
self.model = load_model(str(weights_file), self.device)
|
| 125 |
self.transform = transforms.Compose(
|
| 126 |
[
|
|
|
|
| 170 |
|
| 171 |
inference_start_time = time.time()
|
| 172 |
with torch.inference_mode():
|
| 173 |
+
# Preprocess image on CPU
|
| 174 |
+
image_tensor = self.transform(image).unsqueeze(0)
|
| 175 |
+
# Pin memory and use non_blocking transfer only when using CUDA
|
| 176 |
+
if self.device == "cuda":
|
| 177 |
+
image_tensor = image_tensor.pin_memory().to(self.device, non_blocking=True)
|
| 178 |
+
else:
|
| 179 |
+
image_tensor = image_tensor.to(self.device)
|
| 180 |
|
| 181 |
# Run model on GPU
|
| 182 |
probs = self.model(image_tensor)[0] # Get probs for the single image
|