328 lines
10 KiB
Python
328 lines
10 KiB
Python
"""
|
|
RKNN detector backend for Rockchip NPU.
|
|
"""
|
|
|
|
import numpy as np
|
|
import logging
|
|
from typing import List, Tuple, Optional
|
|
|
|
from .base import BaseDetector, Detection, BBox
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class RKNNDetector(BaseDetector):
|
|
"""
|
|
RKNN-based YOLO detector for Rockchip NPU.
|
|
|
|
Supports: RK3588, RK3568, RK3566, RK3562, RV1106, etc.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
model_path: str,
|
|
target_platform: str = "rk3588",
|
|
core_mask: int = 7,
|
|
input_size: Tuple[int, int] = (640, 640),
|
|
conf_threshold: float = 0.25,
|
|
nms_threshold: float = 0.45,
|
|
class_names: Optional[dict] = None,
|
|
):
|
|
"""
|
|
Initialize RKNN detector.
|
|
|
|
Args:
|
|
model_path: Path to .rknn model file
|
|
target_platform: Target Rockchip platform
|
|
core_mask: NPU core mask (RK3588: 7=all 3 cores)
|
|
input_size: Model input size
|
|
conf_threshold: Confidence threshold
|
|
nms_threshold: NMS threshold
|
|
class_names: Class ID to name mapping
|
|
"""
|
|
super().__init__(
|
|
model_path=model_path,
|
|
input_size=input_size,
|
|
conf_threshold=conf_threshold,
|
|
nms_threshold=nms_threshold,
|
|
class_names=class_names,
|
|
)
|
|
|
|
self.target_platform = target_platform
|
|
self.core_mask = core_mask
|
|
self.rknn = None
|
|
|
|
def load_model(self) -> bool:
|
|
"""Load RKNN model to NPU."""
|
|
try:
|
|
# Try rknnlite2 first (for ARM devices)
|
|
try:
|
|
from rknnlite.api import RKNNLite
|
|
self.rknn = RKNNLite()
|
|
is_lite = True
|
|
logger.info("Using RKNNLite2 runtime")
|
|
except ImportError:
|
|
# Fall back to rknn-toolkit2 (for x86 simulation)
|
|
from rknn.api import RKNN
|
|
self.rknn = RKNN()
|
|
is_lite = False
|
|
logger.info("Using RKNN-Toolkit2 runtime")
|
|
|
|
# Load model
|
|
logger.info(f"Loading RKNN model: {self.model_path}")
|
|
ret = self.rknn.load_rknn(self.model_path)
|
|
if ret != 0:
|
|
logger.error(f"Failed to load RKNN model: {ret}")
|
|
return False
|
|
|
|
# Initialize runtime
|
|
if is_lite:
|
|
ret = self.rknn.init_runtime(core_mask=self.core_mask)
|
|
else:
|
|
ret = self.rknn.init_runtime(
|
|
target=self.target_platform,
|
|
device_id=None,
|
|
)
|
|
|
|
if ret != 0:
|
|
logger.error(f"Failed to init RKNN runtime: {ret}")
|
|
return False
|
|
|
|
logger.info("RKNN model loaded successfully")
|
|
return True
|
|
|
|
except ImportError as e:
|
|
logger.error(f"RKNN library not available: {e}")
|
|
logger.info("Install with: pip install rknnlite2 (ARM) or rknn-toolkit2 (x86)")
|
|
return False
|
|
except Exception as e:
|
|
logger.error(f"Failed to load RKNN model: {e}")
|
|
return False
|
|
|
|
def detect(self, frame: np.ndarray) -> List[Detection]:
|
|
"""
|
|
Run detection on frame using NPU.
|
|
|
|
Args:
|
|
frame: Input image (BGR, HWC)
|
|
|
|
Returns:
|
|
List of Detection objects
|
|
"""
|
|
if self.rknn is None:
|
|
logger.warning("RKNN not initialized")
|
|
return []
|
|
|
|
orig_h, orig_w = frame.shape[:2]
|
|
|
|
# Preprocess
|
|
input_data = self._preprocess_rknn(frame)
|
|
|
|
# Run inference
|
|
outputs = self.rknn.inference(inputs=[input_data])
|
|
|
|
if outputs is None:
|
|
logger.warning("RKNN inference returned None")
|
|
return []
|
|
|
|
# Postprocess
|
|
detections = self._postprocess_yolo(outputs, (orig_h, orig_w))
|
|
|
|
return detections
|
|
|
|
def _preprocess_rknn(self, frame: np.ndarray) -> np.ndarray:
|
|
"""Preprocess frame for RKNN inference."""
|
|
import cv2
|
|
|
|
input_w, input_h = self.input_size
|
|
|
|
# Resize with letterbox
|
|
img, ratio, (dw, dh) = self._letterbox(frame, (input_h, input_w))
|
|
|
|
# Store for postprocessing
|
|
self._ratio = ratio
|
|
self._pad = (dw, dh)
|
|
self._orig_shape = frame.shape[:2]
|
|
|
|
# BGR to RGB (RKNN typically expects RGB)
|
|
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
|
|
|
return img
|
|
|
|
def _letterbox(
|
|
self,
|
|
img: np.ndarray,
|
|
new_shape: Tuple[int, int],
|
|
color: Tuple[int, int, int] = (114, 114, 114),
|
|
) -> Tuple[np.ndarray, float, Tuple[int, int]]:
|
|
"""Resize and pad image while maintaining aspect ratio."""
|
|
import cv2
|
|
|
|
shape = img.shape[:2] # [height, width]
|
|
|
|
# Scale ratio
|
|
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
|
|
|
|
# Compute padding
|
|
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
|
|
dw = new_shape[1] - new_unpad[0]
|
|
dh = new_shape[0] - new_unpad[1]
|
|
|
|
dw /= 2
|
|
dh /= 2
|
|
|
|
if shape[::-1] != new_unpad:
|
|
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
|
|
|
|
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
|
|
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
|
|
|
|
img = cv2.copyMakeBorder(
|
|
img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color
|
|
)
|
|
|
|
return img, r, (dw, dh)
|
|
|
|
def _postprocess_yolo(
|
|
self,
|
|
outputs: list,
|
|
original_shape: Tuple[int, int],
|
|
) -> List[Detection]:
|
|
"""
|
|
Postprocess YOLO outputs from RKNN.
|
|
|
|
Handles common YOLO output formats:
|
|
- YOLOv5/v8/v9 style: [1, num_boxes, 5+num_classes]
|
|
- Split outputs: boxes, scores, classes separate
|
|
"""
|
|
detections = []
|
|
|
|
try:
|
|
# Handle different output formats
|
|
if len(outputs) == 1:
|
|
# Single output tensor
|
|
output = outputs[0]
|
|
if output.ndim == 3:
|
|
output = output[0] # Remove batch dim
|
|
|
|
# Assume format: [num_boxes, 5+num_classes] or [5+num_classes, num_boxes]
|
|
if output.shape[0] < output.shape[1]:
|
|
output = output.T
|
|
|
|
detections = self._parse_yolo_output(output, original_shape)
|
|
|
|
elif len(outputs) >= 3:
|
|
# Split outputs (boxes, scores, classes)
|
|
# This is common for quantized RKNN models
|
|
detections = self._parse_split_outputs(outputs, original_shape)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Postprocessing error: {e}")
|
|
|
|
return detections
|
|
|
|
def _parse_yolo_output(
|
|
self,
|
|
output: np.ndarray,
|
|
original_shape: Tuple[int, int],
|
|
) -> List[Detection]:
|
|
"""Parse standard YOLO output format."""
|
|
detections = []
|
|
orig_h, orig_w = original_shape
|
|
input_w, input_h = self.input_size
|
|
|
|
ratio = self._ratio
|
|
dw, dh = self._pad
|
|
|
|
for row in output:
|
|
# Format: [x, y, w, h, obj_conf, cls1_conf, cls2_conf, ...]
|
|
# or: [x, y, w, h, cls1_conf, cls2_conf, ...] (obj_conf = max class conf)
|
|
|
|
if len(row) < 5:
|
|
continue
|
|
|
|
# Check if obj_conf exists
|
|
if len(row) == 85: # 4 + 1 + 80 classes (with obj_conf)
|
|
x, y, w, h, obj_conf = row[:5]
|
|
class_confs = row[5:]
|
|
class_id = np.argmax(class_confs)
|
|
class_conf = class_confs[class_id]
|
|
confidence = obj_conf * class_conf
|
|
else: # No separate obj_conf
|
|
x, y, w, h = row[:4]
|
|
class_confs = row[4:]
|
|
class_id = np.argmax(class_confs)
|
|
confidence = class_confs[class_id]
|
|
|
|
if confidence < self.conf_threshold:
|
|
continue
|
|
|
|
# Convert to xyxy
|
|
x1 = x - w / 2
|
|
y1 = y - h / 2
|
|
x2 = x + w / 2
|
|
y2 = y + h / 2
|
|
|
|
# Remove padding and scale back
|
|
x1 = (x1 - dw) / ratio
|
|
y1 = (y1 - dh) / ratio
|
|
x2 = (x2 - dw) / ratio
|
|
y2 = (y2 - dh) / ratio
|
|
|
|
# Clip to image bounds
|
|
x1 = max(0, min(orig_w, x1))
|
|
y1 = max(0, min(orig_h, y1))
|
|
x2 = max(0, min(orig_w, x2))
|
|
y2 = max(0, min(orig_h, y2))
|
|
|
|
class_name = self.class_names.get(int(class_id), str(class_id))
|
|
|
|
detection = Detection(
|
|
class_id=int(class_id),
|
|
class_name=class_name,
|
|
confidence=float(confidence),
|
|
bbox=BBox(x1=x1, y1=y1, x2=x2, y2=y2),
|
|
)
|
|
detections.append(detection)
|
|
|
|
# Apply NMS
|
|
if detections:
|
|
detections = self._apply_nms(detections)
|
|
|
|
return detections
|
|
|
|
def _parse_split_outputs(
|
|
self,
|
|
outputs: list,
|
|
original_shape: Tuple[int, int],
|
|
) -> List[Detection]:
|
|
"""Parse split output format (common in quantized models)."""
|
|
# This format varies by model - implement based on specific model output
|
|
# Common format: [boxes, scores, class_ids, num_dets]
|
|
|
|
detections = []
|
|
|
|
# Placeholder - implement based on actual model output format
|
|
logger.warning("Split output parsing not fully implemented")
|
|
|
|
return detections
|
|
|
|
def _apply_nms(self, detections: List[Detection]) -> List[Detection]:
|
|
"""Apply NMS to detections."""
|
|
if not detections:
|
|
return []
|
|
|
|
boxes = np.array([[d.bbox.x1, d.bbox.y1, d.bbox.x2, d.bbox.y2] for d in detections])
|
|
scores = np.array([d.confidence for d in detections])
|
|
|
|
keep_indices = self.nms(boxes, scores, self.nms_threshold)
|
|
|
|
return [detections[i] for i in keep_indices]
|
|
|
|
def release(self) -> None:
|
|
"""Release RKNN resources."""
|
|
if self.rknn is not None:
|
|
self.rknn.release()
|
|
self.rknn = None
|
|
logger.info("RKNN resources released")
|