Files
dataset-yolo-script/sam2-cpu/frigate_mini/detector/rknn_detector.py
T
2026-02-04 15:29:36 +07:00

328 lines
10 KiB
Python

"""
RKNN detector backend for Rockchip NPU.
"""
import numpy as np
import logging
from typing import List, Tuple, Optional
from .base import BaseDetector, Detection, BBox
logger = logging.getLogger(__name__)
class RKNNDetector(BaseDetector):
"""
RKNN-based YOLO detector for Rockchip NPU.
Supports: RK3588, RK3568, RK3566, RK3562, RV1106, etc.
"""
def __init__(
self,
model_path: str,
target_platform: str = "rk3588",
core_mask: int = 7,
input_size: Tuple[int, int] = (640, 640),
conf_threshold: float = 0.25,
nms_threshold: float = 0.45,
class_names: Optional[dict] = None,
):
"""
Initialize RKNN detector.
Args:
model_path: Path to .rknn model file
target_platform: Target Rockchip platform
core_mask: NPU core mask (RK3588: 7=all 3 cores)
input_size: Model input size
conf_threshold: Confidence threshold
nms_threshold: NMS threshold
class_names: Class ID to name mapping
"""
super().__init__(
model_path=model_path,
input_size=input_size,
conf_threshold=conf_threshold,
nms_threshold=nms_threshold,
class_names=class_names,
)
self.target_platform = target_platform
self.core_mask = core_mask
self.rknn = None
def load_model(self) -> bool:
"""Load RKNN model to NPU."""
try:
# Try rknnlite2 first (for ARM devices)
try:
from rknnlite.api import RKNNLite
self.rknn = RKNNLite()
is_lite = True
logger.info("Using RKNNLite2 runtime")
except ImportError:
# Fall back to rknn-toolkit2 (for x86 simulation)
from rknn.api import RKNN
self.rknn = RKNN()
is_lite = False
logger.info("Using RKNN-Toolkit2 runtime")
# Load model
logger.info(f"Loading RKNN model: {self.model_path}")
ret = self.rknn.load_rknn(self.model_path)
if ret != 0:
logger.error(f"Failed to load RKNN model: {ret}")
return False
# Initialize runtime
if is_lite:
ret = self.rknn.init_runtime(core_mask=self.core_mask)
else:
ret = self.rknn.init_runtime(
target=self.target_platform,
device_id=None,
)
if ret != 0:
logger.error(f"Failed to init RKNN runtime: {ret}")
return False
logger.info("RKNN model loaded successfully")
return True
except ImportError as e:
logger.error(f"RKNN library not available: {e}")
logger.info("Install with: pip install rknnlite2 (ARM) or rknn-toolkit2 (x86)")
return False
except Exception as e:
logger.error(f"Failed to load RKNN model: {e}")
return False
def detect(self, frame: np.ndarray) -> List[Detection]:
"""
Run detection on frame using NPU.
Args:
frame: Input image (BGR, HWC)
Returns:
List of Detection objects
"""
if self.rknn is None:
logger.warning("RKNN not initialized")
return []
orig_h, orig_w = frame.shape[:2]
# Preprocess
input_data = self._preprocess_rknn(frame)
# Run inference
outputs = self.rknn.inference(inputs=[input_data])
if outputs is None:
logger.warning("RKNN inference returned None")
return []
# Postprocess
detections = self._postprocess_yolo(outputs, (orig_h, orig_w))
return detections
def _preprocess_rknn(self, frame: np.ndarray) -> np.ndarray:
"""Preprocess frame for RKNN inference."""
import cv2
input_w, input_h = self.input_size
# Resize with letterbox
img, ratio, (dw, dh) = self._letterbox(frame, (input_h, input_w))
# Store for postprocessing
self._ratio = ratio
self._pad = (dw, dh)
self._orig_shape = frame.shape[:2]
# BGR to RGB (RKNN typically expects RGB)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
return img
def _letterbox(
self,
img: np.ndarray,
new_shape: Tuple[int, int],
color: Tuple[int, int, int] = (114, 114, 114),
) -> Tuple[np.ndarray, float, Tuple[int, int]]:
"""Resize and pad image while maintaining aspect ratio."""
import cv2
shape = img.shape[:2] # [height, width]
# Scale ratio
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
# Compute padding
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw = new_shape[1] - new_unpad[0]
dh = new_shape[0] - new_unpad[1]
dw /= 2
dh /= 2
if shape[::-1] != new_unpad:
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
img = cv2.copyMakeBorder(
img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color
)
return img, r, (dw, dh)
def _postprocess_yolo(
self,
outputs: list,
original_shape: Tuple[int, int],
) -> List[Detection]:
"""
Postprocess YOLO outputs from RKNN.
Handles common YOLO output formats:
- YOLOv5/v8/v9 style: [1, num_boxes, 5+num_classes]
- Split outputs: boxes, scores, classes separate
"""
detections = []
try:
# Handle different output formats
if len(outputs) == 1:
# Single output tensor
output = outputs[0]
if output.ndim == 3:
output = output[0] # Remove batch dim
# Assume format: [num_boxes, 5+num_classes] or [5+num_classes, num_boxes]
if output.shape[0] < output.shape[1]:
output = output.T
detections = self._parse_yolo_output(output, original_shape)
elif len(outputs) >= 3:
# Split outputs (boxes, scores, classes)
# This is common for quantized RKNN models
detections = self._parse_split_outputs(outputs, original_shape)
except Exception as e:
logger.error(f"Postprocessing error: {e}")
return detections
def _parse_yolo_output(
self,
output: np.ndarray,
original_shape: Tuple[int, int],
) -> List[Detection]:
"""Parse standard YOLO output format."""
detections = []
orig_h, orig_w = original_shape
input_w, input_h = self.input_size
ratio = self._ratio
dw, dh = self._pad
for row in output:
# Format: [x, y, w, h, obj_conf, cls1_conf, cls2_conf, ...]
# or: [x, y, w, h, cls1_conf, cls2_conf, ...] (obj_conf = max class conf)
if len(row) < 5:
continue
# Check if obj_conf exists
if len(row) == 85: # 4 + 1 + 80 classes (with obj_conf)
x, y, w, h, obj_conf = row[:5]
class_confs = row[5:]
class_id = np.argmax(class_confs)
class_conf = class_confs[class_id]
confidence = obj_conf * class_conf
else: # No separate obj_conf
x, y, w, h = row[:4]
class_confs = row[4:]
class_id = np.argmax(class_confs)
confidence = class_confs[class_id]
if confidence < self.conf_threshold:
continue
# Convert to xyxy
x1 = x - w / 2
y1 = y - h / 2
x2 = x + w / 2
y2 = y + h / 2
# Remove padding and scale back
x1 = (x1 - dw) / ratio
y1 = (y1 - dh) / ratio
x2 = (x2 - dw) / ratio
y2 = (y2 - dh) / ratio
# Clip to image bounds
x1 = max(0, min(orig_w, x1))
y1 = max(0, min(orig_h, y1))
x2 = max(0, min(orig_w, x2))
y2 = max(0, min(orig_h, y2))
class_name = self.class_names.get(int(class_id), str(class_id))
detection = Detection(
class_id=int(class_id),
class_name=class_name,
confidence=float(confidence),
bbox=BBox(x1=x1, y1=y1, x2=x2, y2=y2),
)
detections.append(detection)
# Apply NMS
if detections:
detections = self._apply_nms(detections)
return detections
def _parse_split_outputs(
self,
outputs: list,
original_shape: Tuple[int, int],
) -> List[Detection]:
"""Parse split output format (common in quantized models)."""
# This format varies by model - implement based on specific model output
# Common format: [boxes, scores, class_ids, num_dets]
detections = []
# Placeholder - implement based on actual model output format
logger.warning("Split output parsing not fully implemented")
return detections
def _apply_nms(self, detections: List[Detection]) -> List[Detection]:
"""Apply NMS to detections."""
if not detections:
return []
boxes = np.array([[d.bbox.x1, d.bbox.y1, d.bbox.x2, d.bbox.y2] for d in detections])
scores = np.array([d.confidence for d in detections])
keep_indices = self.nms(boxes, scores, self.nms_threshold)
return [detections[i] for i in keep_indices]
def release(self) -> None:
"""Release RKNN resources."""
if self.rknn is not None:
self.rknn.release()
self.rknn = None
logger.info("RKNN resources released")