add sam2 yolo auto annotation
This commit is contained in:
@@ -0,0 +1,118 @@
|
||||
"""
|
||||
Object detection backends.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, Optional
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def create_detector(config: Dict[str, Any]):
|
||||
"""
|
||||
Create detector based on configuration.
|
||||
|
||||
Args:
|
||||
config: Detector configuration dict
|
||||
|
||||
Returns:
|
||||
Detector instance
|
||||
"""
|
||||
detector_type = config.get('type', 'yolo').lower()
|
||||
model_path = config.get('model_path', 'models/yolov9t.pt')
|
||||
|
||||
logger.info(f"Creating detector: type={detector_type}, model={model_path}")
|
||||
|
||||
# Try RKNN first if specified
|
||||
if detector_type == 'rknn':
|
||||
try:
|
||||
from .rknn_detector import RKNNDetector
|
||||
|
||||
rknn_config = config.get('rknn', {})
|
||||
detector = RKNNDetector(
|
||||
model_path=model_path,
|
||||
target_platform=rknn_config.get('target_platform', 'rk3588'),
|
||||
core_mask=rknn_config.get('core_mask', 7),
|
||||
input_size=tuple(config.get('input_size', [640, 640])),
|
||||
conf_threshold=config.get('conf_threshold', 0.25),
|
||||
nms_threshold=config.get('nms_threshold', 0.45),
|
||||
)
|
||||
|
||||
if detector.load_model():
|
||||
logger.info("RKNN detector initialized successfully")
|
||||
return detector
|
||||
else:
|
||||
logger.warning("RKNN detector failed to load, trying fallback")
|
||||
|
||||
except ImportError as e:
|
||||
logger.warning(f"RKNN not available: {e}")
|
||||
except Exception as e:
|
||||
logger.warning(f"RKNN initialization failed: {e}")
|
||||
|
||||
# Try ONNX if specified or as fallback
|
||||
if detector_type == 'onnx' or (detector_type == 'rknn' and config.get('fallback', {}).get('enabled', True)):
|
||||
fallback_config = config.get('fallback', {})
|
||||
onnx_config = config.get('onnx', {})
|
||||
|
||||
if fallback_config.get('type') == 'onnx' or detector_type == 'onnx':
|
||||
try:
|
||||
from .onnx_detector import ONNXDetector
|
||||
|
||||
# Determine model path
|
||||
onnx_model_path = model_path
|
||||
if model_path.endswith('.rknn'):
|
||||
onnx_model_path = model_path.replace('.rknn', '.onnx')
|
||||
elif model_path.endswith('.pt'):
|
||||
onnx_model_path = model_path.replace('.pt', '.onnx')
|
||||
|
||||
# Get device from onnx config or fallback config
|
||||
device = onnx_config.get('device') or fallback_config.get('device', 'cpu')
|
||||
|
||||
detector = ONNXDetector(
|
||||
model_path=onnx_model_path,
|
||||
input_size=tuple(config.get('input_size', [640, 640])),
|
||||
conf_threshold=config.get('conf_threshold', 0.25),
|
||||
nms_threshold=config.get('nms_threshold', 0.45),
|
||||
device=device,
|
||||
num_threads=onnx_config.get('num_threads', 0),
|
||||
optimization_level=onnx_config.get('optimization_level', 'all'),
|
||||
)
|
||||
|
||||
if detector.load_model():
|
||||
logger.info("ONNX detector initialized successfully")
|
||||
return detector
|
||||
|
||||
except ImportError as e:
|
||||
logger.warning(f"ONNX runtime not available: {e}")
|
||||
logger.info("Install with: pip install onnxruntime")
|
||||
except Exception as e:
|
||||
logger.warning(f"ONNX initialization failed: {e}")
|
||||
|
||||
# Use Ultralytics YOLO as default/fallback
|
||||
try:
|
||||
from .yolo_detector import YOLODetector
|
||||
|
||||
fallback_config = config.get('fallback', {})
|
||||
device = fallback_config.get('device', 'cpu')
|
||||
|
||||
# Adjust model path
|
||||
if model_path.endswith('.rknn'):
|
||||
model_path = model_path.replace('.rknn', '.pt')
|
||||
elif model_path.endswith('.onnx'):
|
||||
model_path = model_path.replace('.onnx', '.pt')
|
||||
|
||||
detector = YOLODetector(
|
||||
model_path=model_path,
|
||||
conf_threshold=config.get('conf_threshold', 0.25),
|
||||
nms_threshold=config.get('nms_threshold', 0.45),
|
||||
device=device,
|
||||
)
|
||||
|
||||
if detector.load_model():
|
||||
logger.info(f"YOLO detector initialized on {device}")
|
||||
return detector
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize any detector: {e}")
|
||||
|
||||
return None
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,230 @@
|
||||
"""
|
||||
Base detector interface.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List, Tuple, Optional
|
||||
from dataclasses import dataclass
|
||||
import numpy as np
|
||||
|
||||
|
||||
@dataclass
|
||||
class BBox:
|
||||
"""Bounding box."""
|
||||
x1: float
|
||||
y1: float
|
||||
x2: float
|
||||
y2: float
|
||||
|
||||
def to_yolo(self, img_width: int, img_height: int) -> Tuple[float, float, float, float]:
|
||||
"""Convert to YOLO format (normalized x_center, y_center, width, height)."""
|
||||
x_center = ((self.x1 + self.x2) / 2) / img_width
|
||||
y_center = ((self.y1 + self.y2) / 2) / img_height
|
||||
width = (self.x2 - self.x1) / img_width
|
||||
height = (self.y2 - self.y1) / img_height
|
||||
return (x_center, y_center, width, height)
|
||||
|
||||
def area(self) -> float:
|
||||
"""Calculate area in pixels."""
|
||||
return (self.x2 - self.x1) * (self.y2 - self.y1)
|
||||
|
||||
@property
|
||||
def width(self) -> float:
|
||||
return self.x2 - self.x1
|
||||
|
||||
@property
|
||||
def height(self) -> float:
|
||||
return self.y2 - self.y1
|
||||
|
||||
|
||||
@dataclass
|
||||
class Detection:
|
||||
"""Single detection result."""
|
||||
class_id: int
|
||||
class_name: str
|
||||
confidence: float
|
||||
bbox: BBox
|
||||
track_id: Optional[int] = None
|
||||
|
||||
|
||||
class BaseDetector(ABC):
|
||||
"""Abstract base class for object detectors."""
|
||||
|
||||
# COCO class names
|
||||
COCO_CLASSES = {
|
||||
0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane',
|
||||
5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light',
|
||||
10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench',
|
||||
14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow',
|
||||
20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack',
|
||||
25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee',
|
||||
30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite',
|
||||
34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard',
|
||||
38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork',
|
||||
43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple',
|
||||
48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog',
|
||||
53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch',
|
||||
58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv',
|
||||
63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone',
|
||||
68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator',
|
||||
73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear',
|
||||
78: 'hair drier', 79: 'toothbrush'
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_path: str,
|
||||
input_size: Tuple[int, int] = (640, 640),
|
||||
conf_threshold: float = 0.25,
|
||||
nms_threshold: float = 0.45,
|
||||
class_names: Optional[dict] = None,
|
||||
):
|
||||
"""
|
||||
Initialize detector.
|
||||
|
||||
Args:
|
||||
model_path: Path to model file
|
||||
input_size: Model input size (width, height)
|
||||
conf_threshold: Confidence threshold
|
||||
nms_threshold: NMS IoU threshold
|
||||
class_names: Class ID to name mapping
|
||||
"""
|
||||
self.model_path = model_path
|
||||
self.input_size = input_size
|
||||
self.conf_threshold = conf_threshold
|
||||
self.nms_threshold = nms_threshold
|
||||
self.class_names = class_names or self.COCO_CLASSES
|
||||
self.model = None
|
||||
|
||||
@abstractmethod
|
||||
def load_model(self) -> bool:
|
||||
"""Load model. Returns True on success."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def detect(self, frame: np.ndarray) -> List[Detection]:
|
||||
"""
|
||||
Run detection on frame.
|
||||
|
||||
Args:
|
||||
frame: Input image (BGR, HWC)
|
||||
|
||||
Returns:
|
||||
List of Detection objects
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def release(self) -> None:
|
||||
"""Release resources."""
|
||||
pass
|
||||
|
||||
def preprocess(self, frame: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Preprocess frame for inference.
|
||||
|
||||
Args:
|
||||
frame: Input frame (BGR, HWC)
|
||||
|
||||
Returns:
|
||||
Preprocessed input tensor
|
||||
"""
|
||||
import cv2
|
||||
|
||||
# Resize
|
||||
input_width, input_height = self.input_size
|
||||
resized = cv2.resize(frame, (input_width, input_height))
|
||||
|
||||
# BGR to RGB
|
||||
rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
|
||||
|
||||
# Normalize to [0, 1]
|
||||
normalized = rgb.astype(np.float32) / 255.0
|
||||
|
||||
# HWC to CHW
|
||||
transposed = normalized.transpose(2, 0, 1)
|
||||
|
||||
# Add batch dimension
|
||||
batched = np.expand_dims(transposed, axis=0)
|
||||
|
||||
return batched
|
||||
|
||||
def postprocess(
|
||||
self,
|
||||
outputs: np.ndarray,
|
||||
original_shape: Tuple[int, int],
|
||||
) -> List[Detection]:
|
||||
"""
|
||||
Postprocess model outputs.
|
||||
|
||||
Args:
|
||||
outputs: Raw model outputs
|
||||
original_shape: Original frame shape (height, width)
|
||||
|
||||
Returns:
|
||||
List of Detection objects
|
||||
"""
|
||||
# This is a generic implementation for YOLO-style outputs
|
||||
# Override in subclasses for specific model output formats
|
||||
|
||||
orig_h, orig_w = original_shape
|
||||
input_w, input_h = self.input_size
|
||||
|
||||
detections = []
|
||||
|
||||
# Assume outputs shape: [1, num_boxes, 5+num_classes] or similar
|
||||
# This will vary by model - subclasses should override
|
||||
|
||||
return detections
|
||||
|
||||
def nms(
|
||||
self,
|
||||
boxes: np.ndarray,
|
||||
scores: np.ndarray,
|
||||
iou_threshold: float = 0.45,
|
||||
) -> List[int]:
|
||||
"""
|
||||
Non-maximum suppression.
|
||||
|
||||
Args:
|
||||
boxes: Array of boxes [N, 4] in xyxy format
|
||||
scores: Array of scores [N]
|
||||
iou_threshold: IoU threshold
|
||||
|
||||
Returns:
|
||||
List of indices to keep
|
||||
"""
|
||||
if len(boxes) == 0:
|
||||
return []
|
||||
|
||||
x1 = boxes[:, 0]
|
||||
y1 = boxes[:, 1]
|
||||
x2 = boxes[:, 2]
|
||||
y2 = boxes[:, 3]
|
||||
|
||||
areas = (x2 - x1) * (y2 - y1)
|
||||
order = scores.argsort()[::-1]
|
||||
|
||||
keep = []
|
||||
while order.size > 0:
|
||||
i = order[0]
|
||||
keep.append(i)
|
||||
|
||||
if order.size == 1:
|
||||
break
|
||||
|
||||
xx1 = np.maximum(x1[i], x1[order[1:]])
|
||||
yy1 = np.maximum(y1[i], y1[order[1:]])
|
||||
xx2 = np.minimum(x2[i], x2[order[1:]])
|
||||
yy2 = np.minimum(y2[i], y2[order[1:]])
|
||||
|
||||
w = np.maximum(0, xx2 - xx1)
|
||||
h = np.maximum(0, yy2 - yy1)
|
||||
|
||||
inter = w * h
|
||||
iou = inter / (areas[i] + areas[order[1:]] - inter)
|
||||
|
||||
inds = np.where(iou <= iou_threshold)[0]
|
||||
order = order[inds + 1]
|
||||
|
||||
return keep
|
||||
@@ -0,0 +1,283 @@
|
||||
"""
|
||||
ONNX Runtime detector backend.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import logging
|
||||
from typing import List, Tuple, Optional
|
||||
|
||||
from .base import BaseDetector, Detection, BBox
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ONNXDetector(BaseDetector):
|
||||
"""
|
||||
ONNX Runtime-based YOLO detector.
|
||||
|
||||
Supports CPU and CUDA execution providers.
|
||||
This is the recommended backend for CPU-only inference.
|
||||
|
||||
Features:
|
||||
- Cross-platform (Linux, Windows, macOS, ARM)
|
||||
- No special hardware required
|
||||
- Optimized CPU inference with threading
|
||||
- Optional CUDA support
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_path: str,
|
||||
input_size: Tuple[int, int] = (640, 640),
|
||||
conf_threshold: float = 0.25,
|
||||
nms_threshold: float = 0.45,
|
||||
device: str = "cpu",
|
||||
num_threads: int = 0,
|
||||
optimization_level: str = "all",
|
||||
class_names: Optional[dict] = None,
|
||||
):
|
||||
"""
|
||||
Initialize ONNX detector.
|
||||
|
||||
Args:
|
||||
model_path: Path to .onnx model file
|
||||
input_size: Model input size (width, height)
|
||||
conf_threshold: Confidence threshold
|
||||
nms_threshold: NMS IoU threshold
|
||||
device: Device ('cpu' or 'cuda')
|
||||
num_threads: CPU threads (0 = auto based on CPU cores)
|
||||
optimization_level: Graph optimization ('none', 'basic', 'extended', 'all')
|
||||
class_names: Class ID to name mapping
|
||||
"""
|
||||
super().__init__(
|
||||
model_path=model_path,
|
||||
input_size=input_size,
|
||||
conf_threshold=conf_threshold,
|
||||
nms_threshold=nms_threshold,
|
||||
class_names=class_names,
|
||||
)
|
||||
self.device = device
|
||||
self.num_threads = num_threads
|
||||
self.optimization_level = optimization_level
|
||||
self.session = None
|
||||
self.input_name = None
|
||||
self.output_names = None
|
||||
|
||||
def load_model(self) -> bool:
|
||||
"""Load ONNX model."""
|
||||
try:
|
||||
import onnxruntime as ort
|
||||
|
||||
# Select execution providers
|
||||
if self.device == "cuda":
|
||||
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
|
||||
else:
|
||||
providers = ['CPUExecutionProvider']
|
||||
|
||||
logger.info(f"Loading ONNX model: {self.model_path}")
|
||||
logger.info(f" Device: {self.device}")
|
||||
logger.info(f" Threads: {self.num_threads if self.num_threads > 0 else 'auto'}")
|
||||
|
||||
# Create session options
|
||||
sess_options = ort.SessionOptions()
|
||||
|
||||
# Set optimization level
|
||||
opt_levels = {
|
||||
'none': ort.GraphOptimizationLevel.ORT_DISABLE_ALL,
|
||||
'basic': ort.GraphOptimizationLevel.ORT_ENABLE_BASIC,
|
||||
'extended': ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED,
|
||||
'all': ort.GraphOptimizationLevel.ORT_ENABLE_ALL,
|
||||
}
|
||||
sess_options.graph_optimization_level = opt_levels.get(
|
||||
self.optimization_level,
|
||||
ort.GraphOptimizationLevel.ORT_ENABLE_ALL
|
||||
)
|
||||
|
||||
# Set CPU threading options
|
||||
if self.num_threads > 0:
|
||||
sess_options.intra_op_num_threads = self.num_threads
|
||||
sess_options.inter_op_num_threads = self.num_threads
|
||||
|
||||
# Enable memory optimization
|
||||
sess_options.enable_mem_pattern = True
|
||||
sess_options.enable_cpu_mem_arena = True
|
||||
|
||||
# Create session
|
||||
self.session = ort.InferenceSession(
|
||||
self.model_path,
|
||||
sess_options=sess_options,
|
||||
providers=providers,
|
||||
)
|
||||
|
||||
# Get input/output info
|
||||
self.input_name = self.session.get_inputs()[0].name
|
||||
self.output_names = [o.name for o in self.session.get_outputs()]
|
||||
|
||||
# Get input shape
|
||||
input_shape = self.session.get_inputs()[0].shape
|
||||
if len(input_shape) == 4:
|
||||
self.input_size = (input_shape[3], input_shape[2]) # width, height
|
||||
|
||||
actual_provider = self.session.get_providers()[0]
|
||||
logger.info(f"ONNX model loaded successfully")
|
||||
logger.info(f" Provider: {actual_provider}")
|
||||
logger.info(f" Input size: {self.input_size}")
|
||||
|
||||
return True
|
||||
|
||||
except ImportError:
|
||||
logger.error("onnxruntime not found. Install with: pip install onnxruntime")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load ONNX model: {e}")
|
||||
return False
|
||||
|
||||
def detect(self, frame: np.ndarray) -> List[Detection]:
|
||||
"""
|
||||
Run detection on frame.
|
||||
|
||||
Args:
|
||||
frame: Input image (BGR, HWC)
|
||||
|
||||
Returns:
|
||||
List of Detection objects
|
||||
"""
|
||||
if self.session is None:
|
||||
logger.warning("ONNX session not initialized")
|
||||
return []
|
||||
|
||||
try:
|
||||
orig_h, orig_w = frame.shape[:2]
|
||||
|
||||
# Preprocess
|
||||
input_tensor, ratio, pad = self._preprocess(frame)
|
||||
|
||||
# Run inference
|
||||
outputs = self.session.run(self.output_names, {self.input_name: input_tensor})
|
||||
|
||||
# Postprocess
|
||||
detections = self._postprocess(outputs, (orig_h, orig_w), ratio, pad)
|
||||
|
||||
return detections
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"ONNX inference error: {e}")
|
||||
return []
|
||||
|
||||
def _preprocess(self, frame: np.ndarray) -> Tuple[np.ndarray, float, Tuple[float, float]]:
|
||||
"""Preprocess frame for ONNX inference."""
|
||||
import cv2
|
||||
|
||||
input_w, input_h = self.input_size
|
||||
orig_h, orig_w = frame.shape[:2]
|
||||
|
||||
# Calculate scale
|
||||
ratio = min(input_w / orig_w, input_h / orig_h)
|
||||
new_w = int(orig_w * ratio)
|
||||
new_h = int(orig_h * ratio)
|
||||
|
||||
# Resize
|
||||
resized = cv2.resize(frame, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
|
||||
|
||||
# Pad
|
||||
pad_w = (input_w - new_w) / 2
|
||||
pad_h = (input_h - new_h) / 2
|
||||
|
||||
top = int(round(pad_h - 0.1))
|
||||
bottom = int(round(pad_h + 0.1))
|
||||
left = int(round(pad_w - 0.1))
|
||||
right = int(round(pad_w + 0.1))
|
||||
|
||||
padded = cv2.copyMakeBorder(
|
||||
resized, top, bottom, left, right,
|
||||
cv2.BORDER_CONSTANT, value=(114, 114, 114)
|
||||
)
|
||||
|
||||
# BGR to RGB
|
||||
rgb = cv2.cvtColor(padded, cv2.COLOR_BGR2RGB)
|
||||
|
||||
# Normalize
|
||||
normalized = rgb.astype(np.float32) / 255.0
|
||||
|
||||
# HWC to NCHW
|
||||
transposed = normalized.transpose(2, 0, 1)
|
||||
batched = np.expand_dims(transposed, axis=0)
|
||||
|
||||
return batched, ratio, (pad_w, pad_h)
|
||||
|
||||
def _postprocess(
|
||||
self,
|
||||
outputs: list,
|
||||
original_shape: Tuple[int, int],
|
||||
ratio: float,
|
||||
pad: Tuple[float, float],
|
||||
) -> List[Detection]:
|
||||
"""Postprocess ONNX outputs."""
|
||||
detections = []
|
||||
orig_h, orig_w = original_shape
|
||||
pad_w, pad_h = pad
|
||||
|
||||
# Handle different output formats
|
||||
output = outputs[0]
|
||||
|
||||
if output.ndim == 3:
|
||||
output = output[0]
|
||||
|
||||
# Transpose if needed (num_classes+4 x num_boxes -> num_boxes x num_classes+4)
|
||||
if output.shape[0] < output.shape[1]:
|
||||
output = output.T
|
||||
|
||||
for row in output:
|
||||
if len(row) < 5:
|
||||
continue
|
||||
|
||||
# Parse based on format
|
||||
if len(row) == 85: # YOLOv5 format with obj_conf
|
||||
x, y, w, h, obj_conf = row[:5]
|
||||
class_confs = row[5:]
|
||||
class_id = np.argmax(class_confs)
|
||||
confidence = obj_conf * class_confs[class_id]
|
||||
else: # YOLOv8/v9 format without obj_conf
|
||||
x, y, w, h = row[:4]
|
||||
class_confs = row[4:]
|
||||
class_id = np.argmax(class_confs)
|
||||
confidence = class_confs[class_id]
|
||||
|
||||
if confidence < self.conf_threshold:
|
||||
continue
|
||||
|
||||
# Convert to xyxy and scale back
|
||||
x1 = (x - w / 2 - pad_w) / ratio
|
||||
y1 = (y - h / 2 - pad_h) / ratio
|
||||
x2 = (x + w / 2 - pad_w) / ratio
|
||||
y2 = (y + h / 2 - pad_h) / ratio
|
||||
|
||||
# Clip
|
||||
x1 = max(0, min(orig_w, x1))
|
||||
y1 = max(0, min(orig_h, y1))
|
||||
x2 = max(0, min(orig_w, x2))
|
||||
y2 = max(0, min(orig_h, y2))
|
||||
|
||||
class_name = self.class_names.get(int(class_id), str(class_id))
|
||||
|
||||
detection = Detection(
|
||||
class_id=int(class_id),
|
||||
class_name=class_name,
|
||||
confidence=float(confidence),
|
||||
bbox=BBox(x1=x1, y1=y1, x2=x2, y2=y2),
|
||||
)
|
||||
detections.append(detection)
|
||||
|
||||
# Apply NMS
|
||||
if detections:
|
||||
boxes = np.array([[d.bbox.x1, d.bbox.y1, d.bbox.x2, d.bbox.y2] for d in detections])
|
||||
scores = np.array([d.confidence for d in detections])
|
||||
keep = self.nms(boxes, scores, self.nms_threshold)
|
||||
detections = [detections[i] for i in keep]
|
||||
|
||||
return detections
|
||||
|
||||
def release(self) -> None:
|
||||
"""Release ONNX session."""
|
||||
self.session = None
|
||||
logger.info("ONNX detector released")
|
||||
@@ -0,0 +1,327 @@
|
||||
"""
|
||||
RKNN detector backend for Rockchip NPU.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import logging
|
||||
from typing import List, Tuple, Optional
|
||||
|
||||
from .base import BaseDetector, Detection, BBox
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class RKNNDetector(BaseDetector):
|
||||
"""
|
||||
RKNN-based YOLO detector for Rockchip NPU.
|
||||
|
||||
Supports: RK3588, RK3568, RK3566, RK3562, RV1106, etc.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_path: str,
|
||||
target_platform: str = "rk3588",
|
||||
core_mask: int = 7,
|
||||
input_size: Tuple[int, int] = (640, 640),
|
||||
conf_threshold: float = 0.25,
|
||||
nms_threshold: float = 0.45,
|
||||
class_names: Optional[dict] = None,
|
||||
):
|
||||
"""
|
||||
Initialize RKNN detector.
|
||||
|
||||
Args:
|
||||
model_path: Path to .rknn model file
|
||||
target_platform: Target Rockchip platform
|
||||
core_mask: NPU core mask (RK3588: 7=all 3 cores)
|
||||
input_size: Model input size
|
||||
conf_threshold: Confidence threshold
|
||||
nms_threshold: NMS threshold
|
||||
class_names: Class ID to name mapping
|
||||
"""
|
||||
super().__init__(
|
||||
model_path=model_path,
|
||||
input_size=input_size,
|
||||
conf_threshold=conf_threshold,
|
||||
nms_threshold=nms_threshold,
|
||||
class_names=class_names,
|
||||
)
|
||||
|
||||
self.target_platform = target_platform
|
||||
self.core_mask = core_mask
|
||||
self.rknn = None
|
||||
|
||||
def load_model(self) -> bool:
|
||||
"""Load RKNN model to NPU."""
|
||||
try:
|
||||
# Try rknnlite2 first (for ARM devices)
|
||||
try:
|
||||
from rknnlite.api import RKNNLite
|
||||
self.rknn = RKNNLite()
|
||||
is_lite = True
|
||||
logger.info("Using RKNNLite2 runtime")
|
||||
except ImportError:
|
||||
# Fall back to rknn-toolkit2 (for x86 simulation)
|
||||
from rknn.api import RKNN
|
||||
self.rknn = RKNN()
|
||||
is_lite = False
|
||||
logger.info("Using RKNN-Toolkit2 runtime")
|
||||
|
||||
# Load model
|
||||
logger.info(f"Loading RKNN model: {self.model_path}")
|
||||
ret = self.rknn.load_rknn(self.model_path)
|
||||
if ret != 0:
|
||||
logger.error(f"Failed to load RKNN model: {ret}")
|
||||
return False
|
||||
|
||||
# Initialize runtime
|
||||
if is_lite:
|
||||
ret = self.rknn.init_runtime(core_mask=self.core_mask)
|
||||
else:
|
||||
ret = self.rknn.init_runtime(
|
||||
target=self.target_platform,
|
||||
device_id=None,
|
||||
)
|
||||
|
||||
if ret != 0:
|
||||
logger.error(f"Failed to init RKNN runtime: {ret}")
|
||||
return False
|
||||
|
||||
logger.info("RKNN model loaded successfully")
|
||||
return True
|
||||
|
||||
except ImportError as e:
|
||||
logger.error(f"RKNN library not available: {e}")
|
||||
logger.info("Install with: pip install rknnlite2 (ARM) or rknn-toolkit2 (x86)")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load RKNN model: {e}")
|
||||
return False
|
||||
|
||||
def detect(self, frame: np.ndarray) -> List[Detection]:
|
||||
"""
|
||||
Run detection on frame using NPU.
|
||||
|
||||
Args:
|
||||
frame: Input image (BGR, HWC)
|
||||
|
||||
Returns:
|
||||
List of Detection objects
|
||||
"""
|
||||
if self.rknn is None:
|
||||
logger.warning("RKNN not initialized")
|
||||
return []
|
||||
|
||||
orig_h, orig_w = frame.shape[:2]
|
||||
|
||||
# Preprocess
|
||||
input_data = self._preprocess_rknn(frame)
|
||||
|
||||
# Run inference
|
||||
outputs = self.rknn.inference(inputs=[input_data])
|
||||
|
||||
if outputs is None:
|
||||
logger.warning("RKNN inference returned None")
|
||||
return []
|
||||
|
||||
# Postprocess
|
||||
detections = self._postprocess_yolo(outputs, (orig_h, orig_w))
|
||||
|
||||
return detections
|
||||
|
||||
def _preprocess_rknn(self, frame: np.ndarray) -> np.ndarray:
|
||||
"""Preprocess frame for RKNN inference."""
|
||||
import cv2
|
||||
|
||||
input_w, input_h = self.input_size
|
||||
|
||||
# Resize with letterbox
|
||||
img, ratio, (dw, dh) = self._letterbox(frame, (input_h, input_w))
|
||||
|
||||
# Store for postprocessing
|
||||
self._ratio = ratio
|
||||
self._pad = (dw, dh)
|
||||
self._orig_shape = frame.shape[:2]
|
||||
|
||||
# BGR to RGB (RKNN typically expects RGB)
|
||||
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
||||
|
||||
return img
|
||||
|
||||
def _letterbox(
|
||||
self,
|
||||
img: np.ndarray,
|
||||
new_shape: Tuple[int, int],
|
||||
color: Tuple[int, int, int] = (114, 114, 114),
|
||||
) -> Tuple[np.ndarray, float, Tuple[int, int]]:
|
||||
"""Resize and pad image while maintaining aspect ratio."""
|
||||
import cv2
|
||||
|
||||
shape = img.shape[:2] # [height, width]
|
||||
|
||||
# Scale ratio
|
||||
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
|
||||
|
||||
# Compute padding
|
||||
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
|
||||
dw = new_shape[1] - new_unpad[0]
|
||||
dh = new_shape[0] - new_unpad[1]
|
||||
|
||||
dw /= 2
|
||||
dh /= 2
|
||||
|
||||
if shape[::-1] != new_unpad:
|
||||
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
|
||||
|
||||
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
|
||||
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
|
||||
|
||||
img = cv2.copyMakeBorder(
|
||||
img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color
|
||||
)
|
||||
|
||||
return img, r, (dw, dh)
|
||||
|
||||
def _postprocess_yolo(
|
||||
self,
|
||||
outputs: list,
|
||||
original_shape: Tuple[int, int],
|
||||
) -> List[Detection]:
|
||||
"""
|
||||
Postprocess YOLO outputs from RKNN.
|
||||
|
||||
Handles common YOLO output formats:
|
||||
- YOLOv5/v8/v9 style: [1, num_boxes, 5+num_classes]
|
||||
- Split outputs: boxes, scores, classes separate
|
||||
"""
|
||||
detections = []
|
||||
|
||||
try:
|
||||
# Handle different output formats
|
||||
if len(outputs) == 1:
|
||||
# Single output tensor
|
||||
output = outputs[0]
|
||||
if output.ndim == 3:
|
||||
output = output[0] # Remove batch dim
|
||||
|
||||
# Assume format: [num_boxes, 5+num_classes] or [5+num_classes, num_boxes]
|
||||
if output.shape[0] < output.shape[1]:
|
||||
output = output.T
|
||||
|
||||
detections = self._parse_yolo_output(output, original_shape)
|
||||
|
||||
elif len(outputs) >= 3:
|
||||
# Split outputs (boxes, scores, classes)
|
||||
# This is common for quantized RKNN models
|
||||
detections = self._parse_split_outputs(outputs, original_shape)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Postprocessing error: {e}")
|
||||
|
||||
return detections
|
||||
|
||||
def _parse_yolo_output(
|
||||
self,
|
||||
output: np.ndarray,
|
||||
original_shape: Tuple[int, int],
|
||||
) -> List[Detection]:
|
||||
"""Parse standard YOLO output format."""
|
||||
detections = []
|
||||
orig_h, orig_w = original_shape
|
||||
input_w, input_h = self.input_size
|
||||
|
||||
ratio = self._ratio
|
||||
dw, dh = self._pad
|
||||
|
||||
for row in output:
|
||||
# Format: [x, y, w, h, obj_conf, cls1_conf, cls2_conf, ...]
|
||||
# or: [x, y, w, h, cls1_conf, cls2_conf, ...] (obj_conf = max class conf)
|
||||
|
||||
if len(row) < 5:
|
||||
continue
|
||||
|
||||
# Check if obj_conf exists
|
||||
if len(row) == 85: # 4 + 1 + 80 classes (with obj_conf)
|
||||
x, y, w, h, obj_conf = row[:5]
|
||||
class_confs = row[5:]
|
||||
class_id = np.argmax(class_confs)
|
||||
class_conf = class_confs[class_id]
|
||||
confidence = obj_conf * class_conf
|
||||
else: # No separate obj_conf
|
||||
x, y, w, h = row[:4]
|
||||
class_confs = row[4:]
|
||||
class_id = np.argmax(class_confs)
|
||||
confidence = class_confs[class_id]
|
||||
|
||||
if confidence < self.conf_threshold:
|
||||
continue
|
||||
|
||||
# Convert to xyxy
|
||||
x1 = x - w / 2
|
||||
y1 = y - h / 2
|
||||
x2 = x + w / 2
|
||||
y2 = y + h / 2
|
||||
|
||||
# Remove padding and scale back
|
||||
x1 = (x1 - dw) / ratio
|
||||
y1 = (y1 - dh) / ratio
|
||||
x2 = (x2 - dw) / ratio
|
||||
y2 = (y2 - dh) / ratio
|
||||
|
||||
# Clip to image bounds
|
||||
x1 = max(0, min(orig_w, x1))
|
||||
y1 = max(0, min(orig_h, y1))
|
||||
x2 = max(0, min(orig_w, x2))
|
||||
y2 = max(0, min(orig_h, y2))
|
||||
|
||||
class_name = self.class_names.get(int(class_id), str(class_id))
|
||||
|
||||
detection = Detection(
|
||||
class_id=int(class_id),
|
||||
class_name=class_name,
|
||||
confidence=float(confidence),
|
||||
bbox=BBox(x1=x1, y1=y1, x2=x2, y2=y2),
|
||||
)
|
||||
detections.append(detection)
|
||||
|
||||
# Apply NMS
|
||||
if detections:
|
||||
detections = self._apply_nms(detections)
|
||||
|
||||
return detections
|
||||
|
||||
def _parse_split_outputs(
|
||||
self,
|
||||
outputs: list,
|
||||
original_shape: Tuple[int, int],
|
||||
) -> List[Detection]:
|
||||
"""Parse split output format (common in quantized models)."""
|
||||
# This format varies by model - implement based on specific model output
|
||||
# Common format: [boxes, scores, class_ids, num_dets]
|
||||
|
||||
detections = []
|
||||
|
||||
# Placeholder - implement based on actual model output format
|
||||
logger.warning("Split output parsing not fully implemented")
|
||||
|
||||
return detections
|
||||
|
||||
def _apply_nms(self, detections: List[Detection]) -> List[Detection]:
|
||||
"""Apply NMS to detections."""
|
||||
if not detections:
|
||||
return []
|
||||
|
||||
boxes = np.array([[d.bbox.x1, d.bbox.y1, d.bbox.x2, d.bbox.y2] for d in detections])
|
||||
scores = np.array([d.confidence for d in detections])
|
||||
|
||||
keep_indices = self.nms(boxes, scores, self.nms_threshold)
|
||||
|
||||
return [detections[i] for i in keep_indices]
|
||||
|
||||
def release(self) -> None:
|
||||
"""Release RKNN resources."""
|
||||
if self.rknn is not None:
|
||||
self.rknn.release()
|
||||
self.rknn = None
|
||||
logger.info("RKNN resources released")
|
||||
@@ -0,0 +1,123 @@
|
||||
"""
|
||||
Ultralytics YOLO detector backend.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import logging
|
||||
from typing import List, Optional
|
||||
|
||||
from .base import BaseDetector, Detection, BBox
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class YOLODetector(BaseDetector):
|
||||
"""
|
||||
Ultralytics YOLO detector.
|
||||
|
||||
Supports YOLOv5, YOLOv8, YOLOv9, etc.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_path: str,
|
||||
conf_threshold: float = 0.25,
|
||||
nms_threshold: float = 0.45,
|
||||
device: str = "cpu",
|
||||
class_names: Optional[dict] = None,
|
||||
):
|
||||
"""
|
||||
Initialize YOLO detector.
|
||||
|
||||
Args:
|
||||
model_path: Path to .pt model file
|
||||
conf_threshold: Confidence threshold
|
||||
nms_threshold: NMS IoU threshold
|
||||
device: Device to run on ('cpu', 'cuda', '0', etc.)
|
||||
class_names: Class ID to name mapping
|
||||
"""
|
||||
super().__init__(
|
||||
model_path=model_path,
|
||||
conf_threshold=conf_threshold,
|
||||
nms_threshold=nms_threshold,
|
||||
class_names=class_names,
|
||||
)
|
||||
self.device = device
|
||||
|
||||
def load_model(self) -> bool:
|
||||
"""Load YOLO model."""
|
||||
try:
|
||||
from ultralytics import YOLO
|
||||
|
||||
logger.info(f"Loading YOLO model: {self.model_path}")
|
||||
self.model = YOLO(self.model_path)
|
||||
self.model.to(self.device)
|
||||
|
||||
# Update class names from model if available
|
||||
if hasattr(self.model, 'names'):
|
||||
self.class_names = self.model.names
|
||||
|
||||
logger.info(f"YOLO model loaded on {self.device}")
|
||||
return True
|
||||
|
||||
except ImportError:
|
||||
logger.error("ultralytics package not found. Install with: pip install ultralytics")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load YOLO model: {e}")
|
||||
return False
|
||||
|
||||
def detect(self, frame: np.ndarray) -> List[Detection]:
|
||||
"""
|
||||
Run detection on frame.
|
||||
|
||||
Args:
|
||||
frame: Input image (BGR, HWC)
|
||||
|
||||
Returns:
|
||||
List of Detection objects
|
||||
"""
|
||||
if self.model is None:
|
||||
logger.warning("Model not loaded")
|
||||
return []
|
||||
|
||||
try:
|
||||
# Run inference
|
||||
results = self.model.predict(
|
||||
frame,
|
||||
conf=self.conf_threshold,
|
||||
iou=self.nms_threshold,
|
||||
verbose=False,
|
||||
)
|
||||
|
||||
detections = []
|
||||
|
||||
for result in results:
|
||||
if result.boxes is None:
|
||||
continue
|
||||
|
||||
for box in result.boxes:
|
||||
class_id = int(box.cls[0].item())
|
||||
confidence = float(box.conf[0].item())
|
||||
x1, y1, x2, y2 = box.xyxy[0].tolist()
|
||||
|
||||
class_name = self.class_names.get(class_id, str(class_id))
|
||||
|
||||
detection = Detection(
|
||||
class_id=class_id,
|
||||
class_name=class_name,
|
||||
confidence=confidence,
|
||||
bbox=BBox(x1=x1, y1=y1, x2=x2, y2=y2),
|
||||
)
|
||||
detections.append(detection)
|
||||
|
||||
return detections
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Detection error: {e}")
|
||||
return []
|
||||
|
||||
def release(self) -> None:
|
||||
"""Release resources."""
|
||||
self.model = None
|
||||
logger.info("YOLO detector released")
|
||||
Reference in New Issue
Block a user