From f458eeee82b67f99293a863b9417fdd452b6a05c Mon Sep 17 00:00:00 2001 From: dillonj Date: Mon, 22 Dec 2025 14:48:17 -0700 Subject: [PATCH] Add multi-framework dataset setup for RF-DETR, YOLOX, and YOLOv6 - Create dataset_coco/ for RF-DETR (COCO format) - Rename dataset_split/ to dataset_yolo/ for clarity - Add setup_datasets.py script for automated multi-format setup - Update YOLOv6 script with correct 10-class configuration - Update README with framework comparison and training instructions - Update .gitignore to exclude both dataset directories --- .gitignore | 3 +- README.md | 83 +++++++++++++++++++++++++--------- setup_datasets.py | 111 ++++++++++++++++++++++++++++++++++++++++++++++ train_yolov6.py | 4 +- 4 files changed, 178 insertions(+), 23 deletions(-) create mode 100644 setup_datasets.py diff --git a/.gitignore b/.gitignore index d325debd..ef631791 100644 --- a/.gitignore +++ b/.gitignore @@ -36,7 +36,8 @@ runs/ # Dataset (large files) IMAGE/ images/ -dataset_split/ +dataset_yolo/ +dataset_coco/ *.jpg *.jpeg *.png diff --git a/README.md b/README.md index c1353d6b..cbe4c95b 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,9 @@ This repository contains a complete wood defect detection system using YOLOX/YOL - Valid: 2,027 images - Test: 2,029 images -**Format**: YOLO format (images/ and labels/ subdirectories with data.yaml configuration) +**Formats Available**: +- `dataset_coco/` → COCO format for RF-DETR +- `dataset_yolo/` → YOLO format for YOLOX, YOLOv6, YOLOv8 ## 🚀 Quick Start @@ -48,21 +50,19 @@ source .venv/bin/activate # Install dependencies pip install -U pip -pip install ultralytics gradio +pip install ultralytics gradio rfdetr ``` -### 2. Download Dataset - -The dataset is not included in the repository due to size. Download from Kaggle and organize as follows: +### 2. Setup Datasets ```bash -# Download from Kaggle (requires Kaggle API) +# Download dataset from Kaggle (requires Kaggle API) kaggle datasets download -d kirs0816/wood-surface-defects unzip wood-surface-defects.zip -# Run the dataset preparation script -python split_coco_dataset.py -python reorganize_dataset.py +# Create multi-format datasets +python split_coco_dataset.py # Creates dataset_yolo/ +python setup_datasets.py # Creates dataset_coco/ and updates configs ``` ### 3. Launch Annotation GUI @@ -77,10 +77,38 @@ Open http://localhost:7860 in your browser to access the web-based annotation in - Manual annotation tools - Real-time result visualization -### 4. Train Model +### 4. Train Models +Choose from three different frameworks: + +#### RF-DETR (Highest accuracy, slower training) ```bash -python train_yolox.py --dataset-dir dataset_split --model yolox-nano --epochs 5 --batch-size 4 +python train_rfdetr.py \ + --dataset-dir dataset_coco \ + --output-dir runs/rfdetr_medium \ + --model medium \ + --epochs 50 \ + --batch-size 4 \ + --grad-accum-steps 4 \ + --lr 1e-4 +``` + +#### YOLOX (Balanced performance/speed) +```bash +python train_yolox.py \ + --dataset-dir dataset_yolo \ + --model yolox-nano \ + --epochs 50 \ + --batch-size 8 +``` + +#### YOLOv6 (Fastest, edge-optimized) +```bash +python train_yolov6.py \ + --dataset-dir dataset_yolo \ + --model yolov6n \ + --epochs 50 \ + --batch-size 8 ``` ## 📁 Project Structure @@ -88,11 +116,23 @@ python train_yolox.py --dataset-dir dataset_split --model yolox-nano --epochs 5 ``` saw_mill_knot_detection/ ├── annotation_gui.py # Gradio web interface for annotation +├── train_rfdetr.py # RF-DETR training script ├── train_yolox.py # YOLOX training script -├── split_coco_dataset.py # Dataset splitting utility -├── reorganize_dataset.py # Dataset reorganization to YOLO format -├── config.py # Configuration settings -├── dataset_split/ # Training data (excluded from git) +├── train_yolov6.py # YOLOv6 training script +├── setup_datasets.py # Multi-format dataset setup script +├── split_coco_dataset.py # Dataset splitting utility +├── config.py # Configuration settings +├── dataset_coco/ # RF-DETR dataset (COCO format) +│ ├── train/ +│ │ ├── *.jpg # Training images +│ │ └── _annotations.coco.json +│ ├── valid/ +│ │ ├── *.jpg # Validation images +│ │ └── _annotations.coco.json +│ └── test/ +│ ├── *.jpg # Test images +│ └── _annotations.coco.json +├── dataset_yolo/ # YOLOX/YOLOv6/YOLOv8 dataset (YOLO format) │ ├── train/ │ │ ├── images/ # Training images │ │ └── labels/ # YOLO format labels @@ -104,17 +144,20 @@ saw_mill_knot_detection/ │ │ └── labels/ # YOLO format labels │ └── data.yaml # YOLO dataset configuration ├── runs/ # Training outputs (excluded from git) -│ └── yolox_training/ -│ └── training/ -│ └── weights/ -│ ├── best.pt # Best model weights -│ └── last.pt # Latest model weights ├── bbox_coco_dataset.json # Original COCO annotations ├── requirements.txt # Python dependencies ├── .gitignore # Excludes large data files └── README.md # This file ``` +## 🤖 Framework Comparison + +| Framework | Accuracy | Speed | Memory | Deployment | Best For | +|-----------|----------|-------|--------|------------|----------| +| **RF-DETR** | ⭐⭐⭐⭐⭐ | ⭐⭐ | ⭐⭐⭐ | CPU/GPU | Highest accuracy, research | +| **YOLOX** | ⭐⭐⭐⭐ | ⭐⭐⭐⭐ | ⭐⭐⭐ | Edge devices | Balanced performance | +| **YOLOv6** | ⭐⭐⭐ | ⭐⭐⭐⭐⭐ | ⭐⭐ | Mobile/Edge | Fast inference, production | + ## 🛠️ Usage Guide ### Annotation GUI Features diff --git a/setup_datasets.py b/setup_datasets.py new file mode 100644 index 00000000..f7ef989d --- /dev/null +++ b/setup_datasets.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python3 +""" +Setup multi-format datasets for different model frameworks. + +Creates: +- dataset_coco/ for RF-DETR (COCO format) +- dataset_yolo/ for YOLOX/YOLOv6/YOLOv8 (YOLO format) + +Usage: + python setup_datasets.py +""" + +import json +import shutil +from pathlib import Path + + +def setup_coco_dataset(): + """Set up COCO format dataset for RF-DETR.""" + print("Setting up COCO format dataset...") + + coco_dir = Path("dataset_coco") + yolo_dir = Path("dataset_yolo") + + if not yolo_dir.exists(): + print("Error: dataset_yolo/ not found. Run split_coco_dataset.py first!") + return False + + # Create COCO directories + for split in ["train", "valid", "test"]: + split_dir = coco_dir / split + split_dir.mkdir(parents=True, exist_ok=True) + + # Copy images from YOLO dataset + yolo_images = yolo_dir / split / "images" + if yolo_images.exists(): + for img_file in yolo_images.glob("*"): + shutil.copy2(img_file, split_dir) + + # Copy COCO annotations + coco_ann = yolo_dir / split / "_annotations.coco.json" + if coco_ann.exists(): + shutil.copy2(coco_ann, split_dir) + + print(f"COCO dataset created at: {coco_dir}") + return True + + +def update_yolov6_data_config(): + """Update YOLOv6 data config to use correct number of classes.""" + print("Updating YOLOv6 data configuration...") + + # Load the COCO annotations to get class information + coco_file = Path("dataset_yolo/train/_annotations.coco.json") + if not coco_file.exists(): + print("Warning: Cannot find COCO annotations to update YOLOv6 config") + return + + with coco_file.open('r') as f: + data = json.load(f) + + categories = data['categories'] + nc = len(categories) + names = [cat['name'] for cat in categories] + + # Update the YOLOv6 training script + yolov6_script = Path("train_yolov6.py") + if yolov6_script.exists(): + content = yolov6_script.read_text() + + # Replace hardcoded nc: 1 and names: ['knot'] + old_config = "nc: 1\nnames: ['knot']" + new_config = f"nc: {nc}\nnames: {names}" + + if old_config in content: + content = content.replace(old_config, new_config) + yolov6_script.write_text(content) + print(f"Updated YOLOv6 script with {nc} classes: {names}") + else: + print("YOLOv6 config already updated or not found") + + +def main(): + print("Setting up multi-format datasets for different ML frameworks...\n") + + # Setup COCO format for RF-DETR + if setup_coco_dataset(): + print("✅ COCO format dataset ready for RF-DETR") + else: + print("❌ Failed to setup COCO dataset") + return + + # Update YOLOv6 configuration + update_yolov6_data_config() + + print("\n" + "="*60) + print("DATASET SETUP COMPLETE!") + print("="*60) + print("Available datasets:") + print(" 📁 dataset_coco/ → RF-DETR (COCO format)") + print(" 📁 dataset_yolo/ → YOLOX, YOLOv6, YOLOv8 (YOLO format)") + print() + print("Training commands:") + print(" 🔶 RF-DETR: python train_rfdetr.py --dataset-dir dataset_coco --output-dir runs/rfdetr") + print(" 🔵 YOLOX: python train_yolox.py --dataset-dir dataset_yolo --model yolox-nano") + print(" 🟡 YOLOv6: python train_yolov6.py --dataset-dir dataset_yolo --model yolov6n") + print("="*60) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/train_yolov6.py b/train_yolov6.py index ed72fd81..b0e421b0 100644 --- a/train_yolov6.py +++ b/train_yolov6.py @@ -68,8 +68,8 @@ def train_yolov6( f.write(f"""train: {train_dir.absolute()} val: {valid_dir.absolute()} -nc: 1 -names: ['knot'] +nc: 10 +names: ['Live knot', 'Dead knot', 'Knot with crack', 'Crack', 'Resin', 'Marrow', 'Quartzity', 'Knot missing', 'Blue stain', 'Overgrown'] """) print(f"\n{'='*60}")