add sam2 yolo auto annotation

2026-02-04 15:29:36 +07:00
parent 7e56948ece
commit 5a951d8812
2061 changed files with 316473 additions and 0 deletions
@@ -0,0 +1,8 @@
+"""
+Output writers for snapshots and annotations.
+"""
+
+from .snapshot import SnapshotWriter
+from .annotation import AnnotationWriter
+
+__all__ = ["SnapshotWriter", "AnnotationWriter"]
@@ -0,0 +1,143 @@
+"""
+YOLO format annotation writer.
+"""
+
+import logging
+from pathlib import Path
+from typing import List, Dict, Tuple, Optional, Any
+
+logger = logging.getLogger(__name__)
+
+
+class AnnotationWriter:
+    """
+    Write YOLO format annotation files.
+    
+    Output format:
+        class_id x_center y_center width height
+        (all values normalized to 0-1)
+    """
+    
+    def __init__(
+        self,
+        output_dir: str,
+        camera_name: str = "default",
+        class_names: Optional[Dict[int, str]] = None,
+    ):
+        """
+        Initialize annotation writer.
+        
+        Args:
+            output_dir: Base output directory
+            camera_name: Camera name (creates subdirectory)
+            class_names: Class ID to name mapping
+        """
+        if camera_name:
+            self.output_dir = Path(output_dir) / camera_name
+        else:
+            self.output_dir = Path(output_dir)
+        self.camera_name = camera_name
+        self.class_names = class_names or {}
+        
+        # Create output directory
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        
+        self._label_count = 0
+        self._total_objects = 0
+    
+    def save(
+        self,
+        detections: List[Any],
+        image_name: str,
+        image_size: Tuple[int, int],
+    ) -> str:
+        """
+        Save YOLO label file.
+        
+        Args:
+            detections: List of Detection objects
+            image_name: Image filename (without extension)
+            image_size: Image dimensions (width, height)
+            
+        Returns:
+            Path to saved label file
+        """
+        img_w, img_h = image_size
+        
+        lines = []
+        for det in detections:
+            # Get bbox coordinates
+            if hasattr(det, 'bbox'):
+                x_c, y_c, w, h = det.bbox.to_yolo(img_w, img_h)
+                class_id = det.class_id
+            else:
+                # Handle dict format
+                bbox = det.get('bbox', [0, 0, 0, 0])
+                class_id = det.get('class_id', 0)
+                x1, y1, x2, y2 = bbox
+                x_c = ((x1 + x2) / 2) / img_w
+                y_c = ((y1 + y2) / 2) / img_h
+                w = (x2 - x1) / img_w
+                h = (y2 - y1) / img_h
+            
+            # Clamp values
+            x_c = max(0, min(1, x_c))
+            y_c = max(0, min(1, y_c))
+            w = max(0, min(1, w))
+            h = max(0, min(1, h))
+            
+            line = f"{class_id} {x_c:.6f} {y_c:.6f} {w:.6f} {h:.6f}"
+            lines.append(line)
+        
+        # Write label file
+        label_path = self.output_dir / f"{image_name}.txt"
+        with open(label_path, 'w') as f:
+            f.write('\n'.join(lines))
+        
+        self._label_count += 1
+        self._total_objects += len(lines)
+        
+        logger.debug(f"Saved annotation: {label_path} ({len(lines)} objects)")
+        
+        return str(label_path)
+    
+    def detection_to_yolo(
+        self,
+        detection: Any,
+        img_width: int,
+        img_height: int,
+    ) -> str:
+        """
+        Convert single detection to YOLO format string.
+        
+        Args:
+            detection: Detection object
+            img_width: Image width
+            img_height: Image height
+            
+        Returns:
+            YOLO format string
+        """
+        if hasattr(detection, 'bbox'):
+            x_c, y_c, w, h = detection.bbox.to_yolo(img_width, img_height)
+            class_id = detection.class_id
+        else:
+            bbox = detection.get('bbox', [0, 0, 0, 0])
+            class_id = detection.get('class_id', 0)
+            x1, y1, x2, y2 = bbox
+            x_c = ((x1 + x2) / 2) / img_width
+            y_c = ((y1 + y2) / 2) / img_height
+            w = (x2 - x1) / img_width
+            h = (y2 - y1) / img_height
+        
+        return f"{class_id} {x_c:.6f} {y_c:.6f} {w:.6f} {h:.6f}"
+    
+    @property
+    def count(self) -> int:
+        """Get total label file count."""
+        return self._label_count
+    
+    @property
+    def total_objects(self) -> int:
+        """Get total object count across all files."""
+        return self._total_objects
@@ -0,0 +1,137 @@
+"""
+Snapshot capture and saving.
+"""
+
+import cv2
+import logging
+import numpy as np
+from pathlib import Path
+from datetime import datetime
+from typing import Optional, Tuple
+
+logger = logging.getLogger(__name__)
+
+
+class SnapshotWriter:
+    """
+    Save snapshot images.
+    
+    Features:
+    - Clean snapshots (no overlays)
+    - Automatic timestamped naming
+    - JPEG/PNG support
+    """
+    
+    def __init__(
+        self,
+        output_dir: str,
+        camera_name: str = "default",
+        image_format: str = "jpg",
+        quality: int = 95,
+        clean: bool = True,
+    ):
+        """
+        Initialize snapshot writer.
+        
+        Args:
+            output_dir: Base output directory
+            camera_name: Camera/source name (creates subdirectory)
+            image_format: Output format ('jpg' or 'png')
+            quality: JPEG quality (1-100)
+            clean: Save clean images without overlays
+        """
+        if camera_name:
+            self.output_dir = Path(output_dir) / camera_name
+        else:
+            self.output_dir = Path(output_dir)
+        self.camera_name = camera_name
+        self.image_format = image_format
+        self.quality = quality
+        self.clean = clean
+        
+        # Create output directory
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        
+        self._snapshot_count = 0
+    
+    def save(
+        self,
+        frame: np.ndarray,
+        frame_id: int = 0,
+        prefix: Optional[str] = None,
+    ) -> str:
+        """
+        Save snapshot image.
+        
+        Args:
+            frame: Image frame (BGR)
+            frame_id: Frame index for naming
+            prefix: Optional filename prefix
+            
+        Returns:
+            Path to saved file
+        """
+        # Generate filename
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-3]
+        
+        if prefix:
+            filename = f"{prefix}_{timestamp}_{frame_id:06d}.{self.image_format}"
+        else:
+            filename = f"{timestamp}_{frame_id:06d}.{self.image_format}"
+        
+        filepath = self.output_dir / filename
+        
+        # Save image
+        if self.image_format == "jpg":
+            cv2.imwrite(str(filepath), frame, [cv2.IMWRITE_JPEG_QUALITY, self.quality])
+        else:
+            cv2.imwrite(str(filepath), frame)
+        
+        self._snapshot_count += 1
+        logger.debug(f"Saved snapshot: {filepath}")
+        
+        return str(filepath)
+    
+    def save_crop(
+        self,
+        frame: np.ndarray,
+        bbox: Tuple[float, float, float, float],
+        frame_id: int = 0,
+        padding: float = 0.1,
+    ) -> str:
+        """
+        Save cropped snapshot around bounding box.
+        
+        Args:
+            frame: Image frame
+            bbox: Bounding box (x1, y1, x2, y2)
+            frame_id: Frame index
+            padding: Relative padding around bbox
+            
+        Returns:
+            Path to saved file
+        """
+        h, w = frame.shape[:2]
+        x1, y1, x2, y2 = bbox
+        
+        # Calculate padding
+        bw = x2 - x1
+        bh = y2 - y1
+        pad_x = int(bw * padding)
+        pad_y = int(bh * padding)
+        
+        # Apply padding with bounds checking
+        x1 = max(0, int(x1 - pad_x))
+        y1 = max(0, int(y1 - pad_y))
+        x2 = min(w, int(x2 + pad_x))
+        y2 = min(h, int(y2 + pad_y))
+        
+        # Crop
+        cropped = frame[y1:y2, x1:x2]
+        
+        return self.save(cropped, frame_id, prefix="crop")
+    
+    @property
+    def count(self) -> int:
+        """Get total snapshot count."""
+        return self._snapshot_count