727 lines
22 KiB
Plaintext
727 lines
22 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Create YOLO Dataset from SAM2 Annotations\n",
|
|
"\n",
|
|
"Convert SAM2 mask annotations to YOLO detection format (bounding boxes).\n",
|
|
"\n",
|
|
"## Input\n",
|
|
"- Frames from `01_sam2_video_annotation.ipynb`\n",
|
|
"- Annotations JSON file\n",
|
|
"\n",
|
|
"## Output\n",
|
|
"- YOLO format dataset ready for training\n",
|
|
"- `data.yaml` configuration file\n",
|
|
"\n",
|
|
"**Platform:** Kaggle"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 1. Setup"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import os\n",
|
|
"import cv2\n",
|
|
"import json\n",
|
|
"import yaml\n",
|
|
"import shutil\n",
|
|
"import random\n",
|
|
"import numpy as np\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"from pathlib import Path\n",
|
|
"from tqdm.notebook import tqdm\n",
|
|
"from collections import defaultdict\n",
|
|
"\n",
|
|
"print(\"Setup complete!\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Configuration - UPDATE THESE PATHS\n",
|
|
"FRAMES_DIR = './frames' # From notebook 01\n",
|
|
"ANNOTATIONS_FILE = './annotations/annotations.json' # From notebook 01\n",
|
|
"MASKS_DIR = './annotations/masks' # Optional: mask images\n",
|
|
"\n",
|
|
"# Output dataset\n",
|
|
"DATASET_DIR = './yolo_dataset'\n",
|
|
"\n",
|
|
"# Dataset settings\n",
|
|
"CLASS_NAMES = ['object'] # Single class for generic objects\n",
|
|
"VAL_SPLIT = 0.2 # 20% validation\n",
|
|
"SEED = 42 # Random seed for reproducibility\n",
|
|
"\n",
|
|
"# Filtering\n",
|
|
"MIN_BBOX_AREA = 100 # Minimum bbox area in pixels\n",
|
|
"MIN_BBOX_SIZE = 0.01 # Minimum bbox dimension (normalized, 0-1)\n",
|
|
"MAX_OBJECTS_PER_IMAGE = 100 # Maximum objects per image\n",
|
|
"MIN_IOU_SCORE = 0.5 # Minimum SAM2 IoU score\n",
|
|
"\n",
|
|
"random.seed(SEED)\n",
|
|
"np.random.seed(SEED)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 2. Load Annotations"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Load annotations\n",
|
|
"with open(ANNOTATIONS_FILE, 'r') as f:\n",
|
|
" annotations = json.load(f)\n",
|
|
"\n",
|
|
"print(f\"Loaded annotations for {len(annotations)} frames\")\n",
|
|
"\n",
|
|
"# Show sample\n",
|
|
"sample_frame = list(annotations.keys())[0]\n",
|
|
"print(f\"\\nSample annotation ({sample_frame}):\")\n",
|
|
"print(json.dumps(annotations[sample_frame][:2], indent=2))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Verify frames exist\n",
|
|
"frames_path = Path(FRAMES_DIR)\n",
|
|
"frame_files = list(frames_path.glob(\"*.jpg\")) + list(frames_path.glob(\"*.png\"))\n",
|
|
"\n",
|
|
"print(f\"Found {len(frame_files)} frame images\")\n",
|
|
"\n",
|
|
"# Check matching\n",
|
|
"annotation_frames = set(annotations.keys())\n",
|
|
"image_frames = {f.name for f in frame_files}\n",
|
|
"\n",
|
|
"matched = annotation_frames & image_frames\n",
|
|
"print(f\"Matched frames: {len(matched)}\")\n",
|
|
"\n",
|
|
"if len(matched) < len(annotation_frames):\n",
|
|
" missing = annotation_frames - image_frames\n",
|
|
" print(f\"Warning: {len(missing)} annotated frames missing images\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 3. Create YOLO Dataset Structure"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Create directory structure\n",
|
|
"dataset_path = Path(DATASET_DIR)\n",
|
|
"\n",
|
|
"images_train = dataset_path / 'images' / 'train'\n",
|
|
"images_val = dataset_path / 'images' / 'val'\n",
|
|
"labels_train = dataset_path / 'labels' / 'train'\n",
|
|
"labels_val = dataset_path / 'labels' / 'val'\n",
|
|
"\n",
|
|
"for dir_path in [images_train, images_val, labels_train, labels_val]:\n",
|
|
" dir_path.mkdir(parents=True, exist_ok=True)\n",
|
|
" print(f\"Created: {dir_path}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 4. Convert Annotations to YOLO Format"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def bbox_xywh_to_yolo(bbox, image_width, image_height):\n",
|
|
" \"\"\"\n",
|
|
" Convert [x, y, w, h] bbox to YOLO format [x_center, y_center, width, height] normalized.\n",
|
|
" \"\"\"\n",
|
|
" x, y, w, h = bbox\n",
|
|
" \n",
|
|
" x_center = (x + w / 2) / image_width\n",
|
|
" y_center = (y + h / 2) / image_height\n",
|
|
" width = w / image_width\n",
|
|
" height = h / image_height\n",
|
|
" \n",
|
|
" # Clamp to [0, 1]\n",
|
|
" x_center = max(0, min(1, x_center))\n",
|
|
" y_center = max(0, min(1, y_center))\n",
|
|
" width = max(0, min(1, width))\n",
|
|
" height = max(0, min(1, height))\n",
|
|
" \n",
|
|
" return x_center, y_center, width, height\n",
|
|
"\n",
|
|
"\n",
|
|
"def filter_annotations(anns, img_width, img_height, \n",
|
|
" min_area=100, min_size=0.01, \n",
|
|
" min_iou=0.5, max_objects=100):\n",
|
|
" \"\"\"\n",
|
|
" Filter annotations based on criteria.\n",
|
|
" \"\"\"\n",
|
|
" filtered = []\n",
|
|
" \n",
|
|
" for ann in anns:\n",
|
|
" bbox = ann.get('bbox', [])\n",
|
|
" area = ann.get('area', 0)\n",
|
|
" iou = ann.get('predicted_iou', 1.0)\n",
|
|
" \n",
|
|
" # Check area\n",
|
|
" if area < min_area:\n",
|
|
" continue\n",
|
|
" \n",
|
|
" # Check IoU score\n",
|
|
" if iou < min_iou:\n",
|
|
" continue\n",
|
|
" \n",
|
|
" # Check bbox dimensions\n",
|
|
" if len(bbox) == 4:\n",
|
|
" _, _, w, h = bbox\n",
|
|
" if w / img_width < min_size or h / img_height < min_size:\n",
|
|
" continue\n",
|
|
" \n",
|
|
" filtered.append(ann)\n",
|
|
" \n",
|
|
" # Limit number of objects (keep highest IoU)\n",
|
|
" if len(filtered) > max_objects:\n",
|
|
" filtered.sort(key=lambda x: x.get('predicted_iou', 0), reverse=True)\n",
|
|
" filtered = filtered[:max_objects]\n",
|
|
" \n",
|
|
" return filtered"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def process_frame(frame_name, frame_anns, frames_dir, images_dir, labels_dir, class_id=0):\n",
|
|
" \"\"\"\n",
|
|
" Process a single frame: copy image and create YOLO label file.\n",
|
|
" \"\"\"\n",
|
|
" frame_path = Path(frames_dir) / frame_name\n",
|
|
" \n",
|
|
" if not frame_path.exists():\n",
|
|
" return 0\n",
|
|
" \n",
|
|
" # Read image dimensions\n",
|
|
" image = cv2.imread(str(frame_path))\n",
|
|
" if image is None:\n",
|
|
" return 0\n",
|
|
" \n",
|
|
" height, width = image.shape[:2]\n",
|
|
" \n",
|
|
" # Filter annotations\n",
|
|
" filtered_anns = filter_annotations(\n",
|
|
" frame_anns, width, height,\n",
|
|
" min_area=MIN_BBOX_AREA,\n",
|
|
" min_size=MIN_BBOX_SIZE,\n",
|
|
" min_iou=MIN_IOU_SCORE,\n",
|
|
" max_objects=MAX_OBJECTS_PER_IMAGE\n",
|
|
" )\n",
|
|
" \n",
|
|
" # Copy image\n",
|
|
" dest_image = images_dir / frame_name\n",
|
|
" shutil.copy2(frame_path, dest_image)\n",
|
|
" \n",
|
|
" # Create YOLO labels\n",
|
|
" labels = []\n",
|
|
" for ann in filtered_anns:\n",
|
|
" bbox = ann.get('bbox', [])\n",
|
|
" if len(bbox) != 4:\n",
|
|
" continue\n",
|
|
" \n",
|
|
" x_center, y_center, w, h = bbox_xywh_to_yolo(bbox, width, height)\n",
|
|
" \n",
|
|
" # YOLO format: class x_center y_center width height\n",
|
|
" label_line = f\"{class_id} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}\"\n",
|
|
" labels.append(label_line)\n",
|
|
" \n",
|
|
" # Write label file\n",
|
|
" label_name = Path(frame_name).stem + '.txt'\n",
|
|
" label_path = labels_dir / label_name\n",
|
|
" \n",
|
|
" with open(label_path, 'w') as f:\n",
|
|
" f.write('\\n'.join(labels))\n",
|
|
" \n",
|
|
" return len(labels)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Split frames into train/val\n",
|
|
"frame_names = list(annotations.keys())\n",
|
|
"random.shuffle(frame_names)\n",
|
|
"\n",
|
|
"split_idx = int(len(frame_names) * (1 - VAL_SPLIT))\n",
|
|
"train_frames = frame_names[:split_idx]\n",
|
|
"val_frames = frame_names[split_idx:]\n",
|
|
"\n",
|
|
"print(f\"Train frames: {len(train_frames)}\")\n",
|
|
"print(f\"Val frames: {len(val_frames)}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Process training frames\n",
|
|
"train_objects = 0\n",
|
|
"for frame_name in tqdm(train_frames, desc=\"Processing train\"):\n",
|
|
" count = process_frame(\n",
|
|
" frame_name,\n",
|
|
" annotations.get(frame_name, []),\n",
|
|
" FRAMES_DIR,\n",
|
|
" images_train,\n",
|
|
" labels_train,\n",
|
|
" class_id=0\n",
|
|
" )\n",
|
|
" train_objects += count\n",
|
|
"\n",
|
|
"print(f\"\\nTrain: {len(train_frames)} images, {train_objects} objects\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Process validation frames\n",
|
|
"val_objects = 0\n",
|
|
"for frame_name in tqdm(val_frames, desc=\"Processing val\"):\n",
|
|
" count = process_frame(\n",
|
|
" frame_name,\n",
|
|
" annotations.get(frame_name, []),\n",
|
|
" FRAMES_DIR,\n",
|
|
" images_val,\n",
|
|
" labels_val,\n",
|
|
" class_id=0\n",
|
|
" )\n",
|
|
" val_objects += count\n",
|
|
"\n",
|
|
"print(f\"\\nVal: {len(val_frames)} images, {val_objects} objects\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 5. Create data.yaml"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Create YOLO data.yaml configuration\n",
|
|
"data_config = {\n",
|
|
" 'path': str(Path(DATASET_DIR).absolute()),\n",
|
|
" 'train': 'images/train',\n",
|
|
" 'val': 'images/val',\n",
|
|
" 'names': {i: name for i, name in enumerate(CLASS_NAMES)},\n",
|
|
" 'nc': len(CLASS_NAMES)\n",
|
|
"}\n",
|
|
"\n",
|
|
"yaml_path = dataset_path / 'data.yaml'\n",
|
|
"with open(yaml_path, 'w') as f:\n",
|
|
" yaml.dump(data_config, f, default_flow_style=False, sort_keys=False)\n",
|
|
"\n",
|
|
"print(f\"Created: {yaml_path}\")\n",
|
|
"print(\"\\nContents:\")\n",
|
|
"with open(yaml_path) as f:\n",
|
|
" print(f.read())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 6. Validate Dataset"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def validate_dataset(dataset_dir):\n",
|
|
" \"\"\"Validate YOLO dataset structure.\"\"\"\n",
|
|
" dataset_path = Path(dataset_dir)\n",
|
|
" results = {'valid': True, 'errors': [], 'warnings': [], 'stats': {}}\n",
|
|
" \n",
|
|
" # Check data.yaml\n",
|
|
" yaml_path = dataset_path / 'data.yaml'\n",
|
|
" if not yaml_path.exists():\n",
|
|
" results['errors'].append(\"Missing data.yaml\")\n",
|
|
" results['valid'] = False\n",
|
|
" else:\n",
|
|
" with open(yaml_path) as f:\n",
|
|
" config = yaml.safe_load(f)\n",
|
|
" results['stats']['num_classes'] = config.get('nc', 0)\n",
|
|
" results['stats']['class_names'] = config.get('names', {})\n",
|
|
" \n",
|
|
" # Check directories and count files\n",
|
|
" for split in ['train', 'val']:\n",
|
|
" images_dir = dataset_path / 'images' / split\n",
|
|
" labels_dir = dataset_path / 'labels' / split\n",
|
|
" \n",
|
|
" if not images_dir.exists():\n",
|
|
" results['errors'].append(f\"Missing images/{split}\")\n",
|
|
" results['valid'] = False\n",
|
|
" continue\n",
|
|
" \n",
|
|
" image_files = list(images_dir.glob(\"*.jpg\")) + list(images_dir.glob(\"*.png\"))\n",
|
|
" label_files = list(labels_dir.glob(\"*.txt\"))\n",
|
|
" \n",
|
|
" results['stats'][f'{split}_images'] = len(image_files)\n",
|
|
" results['stats'][f'{split}_labels'] = len(label_files)\n",
|
|
" \n",
|
|
" # Check for missing labels\n",
|
|
" image_stems = {f.stem for f in image_files}\n",
|
|
" label_stems = {f.stem for f in label_files}\n",
|
|
" missing = image_stems - label_stems\n",
|
|
" \n",
|
|
" if missing:\n",
|
|
" results['warnings'].append(f\"{len(missing)} {split} images missing labels\")\n",
|
|
" \n",
|
|
" # Count total objects\n",
|
|
" total_objects = 0\n",
|
|
" for label_file in label_files:\n",
|
|
" with open(label_file) as f:\n",
|
|
" lines = [l.strip() for l in f if l.strip()]\n",
|
|
" total_objects += len(lines)\n",
|
|
" results['stats'][f'{split}_objects'] = total_objects\n",
|
|
" \n",
|
|
" return results\n",
|
|
"\n",
|
|
"# Validate\n",
|
|
"validation = validate_dataset(DATASET_DIR)\n",
|
|
"\n",
|
|
"print(\"Dataset Validation:\")\n",
|
|
"print(f\" Valid: {validation['valid']}\")\n",
|
|
"print(f\"\\nStatistics:\")\n",
|
|
"for key, value in validation['stats'].items():\n",
|
|
" print(f\" {key}: {value}\")\n",
|
|
"\n",
|
|
"if validation['errors']:\n",
|
|
" print(f\"\\nErrors:\")\n",
|
|
" for err in validation['errors']:\n",
|
|
" print(f\" - {err}\")\n",
|
|
"\n",
|
|
"if validation['warnings']:\n",
|
|
" print(f\"\\nWarnings:\")\n",
|
|
" for warn in validation['warnings']:\n",
|
|
" print(f\" - {warn}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 7. Visualize Dataset Samples"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def visualize_yolo_sample(image_path, label_path, class_names):\n",
|
|
" \"\"\"Visualize YOLO annotation on image.\"\"\"\n",
|
|
" image = cv2.imread(str(image_path))\n",
|
|
" image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n",
|
|
" height, width = image.shape[:2]\n",
|
|
" \n",
|
|
" # Read labels\n",
|
|
" if label_path.exists():\n",
|
|
" with open(label_path) as f:\n",
|
|
" labels = [l.strip().split() for l in f if l.strip()]\n",
|
|
" else:\n",
|
|
" labels = []\n",
|
|
" \n",
|
|
" # Draw bboxes\n",
|
|
" colors = plt.cm.tab10(np.linspace(0, 1, 10))\n",
|
|
" \n",
|
|
" for label in labels:\n",
|
|
" class_id = int(label[0])\n",
|
|
" x_center, y_center, w, h = map(float, label[1:5])\n",
|
|
" \n",
|
|
" # Convert to pixel coordinates\n",
|
|
" x1 = int((x_center - w/2) * width)\n",
|
|
" y1 = int((y_center - h/2) * height)\n",
|
|
" x2 = int((x_center + w/2) * width)\n",
|
|
" y2 = int((y_center + h/2) * height)\n",
|
|
" \n",
|
|
" color = tuple(int(c * 255) for c in colors[class_id % 10][:3])\n",
|
|
" cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)\n",
|
|
" \n",
|
|
" # Add label\n",
|
|
" class_name = class_names.get(class_id, str(class_id))\n",
|
|
" cv2.putText(image, class_name, (x1, y1-5), \n",
|
|
" cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)\n",
|
|
" \n",
|
|
" return image, len(labels)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Visualize train samples\n",
|
|
"train_images = sorted(images_train.glob(\"*.jpg\"))[:6]\n",
|
|
"\n",
|
|
"fig, axes = plt.subplots(2, 3, figsize=(18, 12))\n",
|
|
"class_names_dict = {i: name for i, name in enumerate(CLASS_NAMES)}\n",
|
|
"\n",
|
|
"for ax, img_path in zip(axes.flat, train_images):\n",
|
|
" label_path = labels_train / (img_path.stem + '.txt')\n",
|
|
" vis, count = visualize_yolo_sample(img_path, label_path, class_names_dict)\n",
|
|
" \n",
|
|
" ax.imshow(vis)\n",
|
|
" ax.set_title(f\"{img_path.name} ({count} objects)\")\n",
|
|
" ax.axis('off')\n",
|
|
"\n",
|
|
"plt.suptitle('Training Samples with YOLO Annotations', fontsize=14)\n",
|
|
"plt.tight_layout()\n",
|
|
"plt.show()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Object count distribution\n",
|
|
"train_label_files = list(labels_train.glob(\"*.txt\"))\n",
|
|
"val_label_files = list(labels_val.glob(\"*.txt\"))\n",
|
|
"\n",
|
|
"def count_objects_in_labels(label_files):\n",
|
|
" counts = []\n",
|
|
" for lf in label_files:\n",
|
|
" with open(lf) as f:\n",
|
|
" lines = [l.strip() for l in f if l.strip()]\n",
|
|
" counts.append(len(lines))\n",
|
|
" return counts\n",
|
|
"\n",
|
|
"train_counts = count_objects_in_labels(train_label_files)\n",
|
|
"val_counts = count_objects_in_labels(val_label_files)\n",
|
|
"\n",
|
|
"fig, axes = plt.subplots(1, 2, figsize=(14, 4))\n",
|
|
"\n",
|
|
"axes[0].hist(train_counts, bins=30, edgecolor='black', alpha=0.7, label='Train')\n",
|
|
"axes[0].hist(val_counts, bins=30, edgecolor='black', alpha=0.7, label='Val')\n",
|
|
"axes[0].set_xlabel('Objects per image')\n",
|
|
"axes[0].set_ylabel('Frequency')\n",
|
|
"axes[0].set_title('Objects per Image Distribution')\n",
|
|
"axes[0].legend()\n",
|
|
"\n",
|
|
"# Bbox size distribution\n",
|
|
"bbox_sizes = []\n",
|
|
"for lf in train_label_files:\n",
|
|
" with open(lf) as f:\n",
|
|
" for line in f:\n",
|
|
" parts = line.strip().split()\n",
|
|
" if len(parts) >= 5:\n",
|
|
" w, h = float(parts[3]), float(parts[4])\n",
|
|
" bbox_sizes.append(w * h)\n",
|
|
"\n",
|
|
"axes[1].hist(bbox_sizes, bins=50, edgecolor='black', alpha=0.7)\n",
|
|
"axes[1].set_xlabel('Bbox area (normalized)')\n",
|
|
"axes[1].set_ylabel('Frequency')\n",
|
|
"axes[1].set_title('Bounding Box Size Distribution')\n",
|
|
"\n",
|
|
"plt.tight_layout()\n",
|
|
"plt.show()\n",
|
|
"\n",
|
|
"print(f\"\\nBbox size stats:\")\n",
|
|
"print(f\" Min: {min(bbox_sizes):.4f}\")\n",
|
|
"print(f\" Max: {max(bbox_sizes):.4f}\")\n",
|
|
"print(f\" Mean: {np.mean(bbox_sizes):.4f}\")\n",
|
|
"print(f\" Median: {np.median(bbox_sizes):.4f}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 8. Export for Kaggle"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Create zip archive for Kaggle dataset\n",
|
|
"import zipfile\n",
|
|
"\n",
|
|
"EXPORT_ZIP = 'yolo_dataset.zip'\n",
|
|
"\n",
|
|
"print(f\"Creating {EXPORT_ZIP}...\")\n",
|
|
"\n",
|
|
"with zipfile.ZipFile(EXPORT_ZIP, 'w', zipfile.ZIP_DEFLATED) as zipf:\n",
|
|
" for root, dirs, files in os.walk(DATASET_DIR):\n",
|
|
" for file in files:\n",
|
|
" file_path = os.path.join(root, file)\n",
|
|
" arcname = os.path.relpath(file_path, os.path.dirname(DATASET_DIR))\n",
|
|
" zipf.write(file_path, arcname)\n",
|
|
"\n",
|
|
"zip_size = os.path.getsize(EXPORT_ZIP) / 1024 / 1024\n",
|
|
"print(f\"\\nExport complete!\")\n",
|
|
"print(f\" File: {EXPORT_ZIP}\")\n",
|
|
"print(f\" Size: {zip_size:.1f} MB\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Alternative: Create Kaggle dataset directly (if kaggle CLI available)\n",
|
|
"# Uncomment to use\n",
|
|
"\n",
|
|
"# KAGGLE_USERNAME = 'your-username'\n",
|
|
"# DATASET_NAME = 'sam2-yolo-custom'\n",
|
|
"\n",
|
|
"# # Create dataset metadata\n",
|
|
"# metadata = {\n",
|
|
"# 'title': 'SAM2 Auto-Annotated YOLO Dataset',\n",
|
|
"# 'id': f'{KAGGLE_USERNAME}/{DATASET_NAME}',\n",
|
|
"# 'licenses': [{'name': 'CC0-1.0'}]\n",
|
|
"# }\n",
|
|
"\n",
|
|
"# metadata_path = dataset_path / 'dataset-metadata.json'\n",
|
|
"# with open(metadata_path, 'w') as f:\n",
|
|
"# json.dump(metadata, f, indent=2)\n",
|
|
"\n",
|
|
"# # Upload to Kaggle\n",
|
|
"# !kaggle datasets create -p {DATASET_DIR}"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 9. Dataset Summary"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Final summary\n",
|
|
"print(\"=\" * 50)\n",
|
|
"print(\"YOLO DATASET SUMMARY\")\n",
|
|
"print(\"=\" * 50)\n",
|
|
"print(f\"\\nDataset location: {Path(DATASET_DIR).absolute()}\")\n",
|
|
"print(f\"\\nClasses ({len(CLASS_NAMES)}):\")\n",
|
|
"for i, name in enumerate(CLASS_NAMES):\n",
|
|
" print(f\" {i}: {name}\")\n",
|
|
"\n",
|
|
"print(f\"\\nSplit:\")\n",
|
|
"print(f\" Train: {validation['stats']['train_images']} images, {validation['stats']['train_objects']} objects\")\n",
|
|
"print(f\" Val: {validation['stats']['val_images']} images, {validation['stats']['val_objects']} objects\")\n",
|
|
"print(f\" Total: {validation['stats']['train_images'] + validation['stats']['val_images']} images\")\n",
|
|
"\n",
|
|
"print(f\"\\nFiles:\")\n",
|
|
"print(f\" data.yaml: {yaml_path}\")\n",
|
|
"print(f\" Export: {EXPORT_ZIP}\")\n",
|
|
"\n",
|
|
"print(\"\\n\" + \"=\" * 50)\n",
|
|
"print(\"Ready for YOLOv9t training!\")\n",
|
|
"print(\"=\" * 50)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"---\n",
|
|
"\n",
|
|
"## Next Steps\n",
|
|
"\n",
|
|
"1. **Upload dataset to Kaggle** (if not already done)\n",
|
|
" - Go to kaggle.com/datasets/new\n",
|
|
" - Upload `yolo_dataset.zip`\n",
|
|
" \n",
|
|
"2. **Run `03_train_yolov9t.ipynb`** to train YOLOv9t\n",
|
|
"\n",
|
|
"### Dataset Structure\n",
|
|
"```\n",
|
|
"yolo_dataset/\n",
|
|
"├── data.yaml\n",
|
|
"├── images/\n",
|
|
"│ ├── train/\n",
|
|
"│ └── val/\n",
|
|
"└── labels/\n",
|
|
" ├── train/\n",
|
|
" └── val/\n",
|
|
"```"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"name": "python",
|
|
"version": "3.10.0"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|