Add multi-framework dataset setup for RF-DETR, YOLOX, and YOLOv6
- Create dataset_coco/ for RF-DETR (COCO format) - Rename dataset_split/ to dataset_yolo/ for clarity - Add setup_datasets.py script for automated multi-format setup - Update YOLOv6 script with correct 10-class configuration - Update README with framework comparison and training instructions - Update .gitignore to exclude both dataset directories
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@ -36,7 +36,8 @@ runs/
|
|||||||
# Dataset (large files)
|
# Dataset (large files)
|
||||||
IMAGE/
|
IMAGE/
|
||||||
images/
|
images/
|
||||||
dataset_split/
|
dataset_yolo/
|
||||||
|
dataset_coco/
|
||||||
*.jpg
|
*.jpg
|
||||||
*.jpeg
|
*.jpeg
|
||||||
*.png
|
*.png
|
||||||
|
|||||||
83
README.md
83
README.md
@ -31,7 +31,9 @@ This repository contains a complete wood defect detection system using YOLOX/YOL
|
|||||||
- Valid: 2,027 images
|
- Valid: 2,027 images
|
||||||
- Test: 2,029 images
|
- Test: 2,029 images
|
||||||
|
|
||||||
**Format**: YOLO format (images/ and labels/ subdirectories with data.yaml configuration)
|
**Formats Available**:
|
||||||
|
- `dataset_coco/` → COCO format for RF-DETR
|
||||||
|
- `dataset_yolo/` → YOLO format for YOLOX, YOLOv6, YOLOv8
|
||||||
|
|
||||||
## 🚀 Quick Start
|
## 🚀 Quick Start
|
||||||
|
|
||||||
@ -48,21 +50,19 @@ source .venv/bin/activate
|
|||||||
|
|
||||||
# Install dependencies
|
# Install dependencies
|
||||||
pip install -U pip
|
pip install -U pip
|
||||||
pip install ultralytics gradio
|
pip install ultralytics gradio rfdetr
|
||||||
```
|
```
|
||||||
|
|
||||||
### 2. Download Dataset
|
### 2. Setup Datasets
|
||||||
|
|
||||||
The dataset is not included in the repository due to size. Download from Kaggle and organize as follows:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Download from Kaggle (requires Kaggle API)
|
# Download dataset from Kaggle (requires Kaggle API)
|
||||||
kaggle datasets download -d kirs0816/wood-surface-defects
|
kaggle datasets download -d kirs0816/wood-surface-defects
|
||||||
unzip wood-surface-defects.zip
|
unzip wood-surface-defects.zip
|
||||||
|
|
||||||
# Run the dataset preparation script
|
# Create multi-format datasets
|
||||||
python split_coco_dataset.py
|
python split_coco_dataset.py # Creates dataset_yolo/
|
||||||
python reorganize_dataset.py
|
python setup_datasets.py # Creates dataset_coco/ and updates configs
|
||||||
```
|
```
|
||||||
|
|
||||||
### 3. Launch Annotation GUI
|
### 3. Launch Annotation GUI
|
||||||
@ -77,10 +77,38 @@ Open http://localhost:7860 in your browser to access the web-based annotation in
|
|||||||
- Manual annotation tools
|
- Manual annotation tools
|
||||||
- Real-time result visualization
|
- Real-time result visualization
|
||||||
|
|
||||||
### 4. Train Model
|
### 4. Train Models
|
||||||
|
|
||||||
|
Choose from three different frameworks:
|
||||||
|
|
||||||
|
#### RF-DETR (Highest accuracy, slower training)
|
||||||
```bash
|
```bash
|
||||||
python train_yolox.py --dataset-dir dataset_split --model yolox-nano --epochs 5 --batch-size 4
|
python train_rfdetr.py \
|
||||||
|
--dataset-dir dataset_coco \
|
||||||
|
--output-dir runs/rfdetr_medium \
|
||||||
|
--model medium \
|
||||||
|
--epochs 50 \
|
||||||
|
--batch-size 4 \
|
||||||
|
--grad-accum-steps 4 \
|
||||||
|
--lr 1e-4
|
||||||
|
```
|
||||||
|
|
||||||
|
#### YOLOX (Balanced performance/speed)
|
||||||
|
```bash
|
||||||
|
python train_yolox.py \
|
||||||
|
--dataset-dir dataset_yolo \
|
||||||
|
--model yolox-nano \
|
||||||
|
--epochs 50 \
|
||||||
|
--batch-size 8
|
||||||
|
```
|
||||||
|
|
||||||
|
#### YOLOv6 (Fastest, edge-optimized)
|
||||||
|
```bash
|
||||||
|
python train_yolov6.py \
|
||||||
|
--dataset-dir dataset_yolo \
|
||||||
|
--model yolov6n \
|
||||||
|
--epochs 50 \
|
||||||
|
--batch-size 8
|
||||||
```
|
```
|
||||||
|
|
||||||
## 📁 Project Structure
|
## 📁 Project Structure
|
||||||
@ -88,11 +116,23 @@ python train_yolox.py --dataset-dir dataset_split --model yolox-nano --epochs 5
|
|||||||
```
|
```
|
||||||
saw_mill_knot_detection/
|
saw_mill_knot_detection/
|
||||||
├── annotation_gui.py # Gradio web interface for annotation
|
├── annotation_gui.py # Gradio web interface for annotation
|
||||||
|
├── train_rfdetr.py # RF-DETR training script
|
||||||
├── train_yolox.py # YOLOX training script
|
├── train_yolox.py # YOLOX training script
|
||||||
├── split_coco_dataset.py # Dataset splitting utility
|
├── train_yolov6.py # YOLOv6 training script
|
||||||
├── reorganize_dataset.py # Dataset reorganization to YOLO format
|
├── setup_datasets.py # Multi-format dataset setup script
|
||||||
├── config.py # Configuration settings
|
├── split_coco_dataset.py # Dataset splitting utility
|
||||||
├── dataset_split/ # Training data (excluded from git)
|
├── config.py # Configuration settings
|
||||||
|
├── dataset_coco/ # RF-DETR dataset (COCO format)
|
||||||
|
│ ├── train/
|
||||||
|
│ │ ├── *.jpg # Training images
|
||||||
|
│ │ └── _annotations.coco.json
|
||||||
|
│ ├── valid/
|
||||||
|
│ │ ├── *.jpg # Validation images
|
||||||
|
│ │ └── _annotations.coco.json
|
||||||
|
│ └── test/
|
||||||
|
│ ├── *.jpg # Test images
|
||||||
|
│ └── _annotations.coco.json
|
||||||
|
├── dataset_yolo/ # YOLOX/YOLOv6/YOLOv8 dataset (YOLO format)
|
||||||
│ ├── train/
|
│ ├── train/
|
||||||
│ │ ├── images/ # Training images
|
│ │ ├── images/ # Training images
|
||||||
│ │ └── labels/ # YOLO format labels
|
│ │ └── labels/ # YOLO format labels
|
||||||
@ -104,17 +144,20 @@ saw_mill_knot_detection/
|
|||||||
│ │ └── labels/ # YOLO format labels
|
│ │ └── labels/ # YOLO format labels
|
||||||
│ └── data.yaml # YOLO dataset configuration
|
│ └── data.yaml # YOLO dataset configuration
|
||||||
├── runs/ # Training outputs (excluded from git)
|
├── runs/ # Training outputs (excluded from git)
|
||||||
│ └── yolox_training/
|
|
||||||
│ └── training/
|
|
||||||
│ └── weights/
|
|
||||||
│ ├── best.pt # Best model weights
|
|
||||||
│ └── last.pt # Latest model weights
|
|
||||||
├── bbox_coco_dataset.json # Original COCO annotations
|
├── bbox_coco_dataset.json # Original COCO annotations
|
||||||
├── requirements.txt # Python dependencies
|
├── requirements.txt # Python dependencies
|
||||||
├── .gitignore # Excludes large data files
|
├── .gitignore # Excludes large data files
|
||||||
└── README.md # This file
|
└── README.md # This file
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## 🤖 Framework Comparison
|
||||||
|
|
||||||
|
| Framework | Accuracy | Speed | Memory | Deployment | Best For |
|
||||||
|
|-----------|----------|-------|--------|------------|----------|
|
||||||
|
| **RF-DETR** | ⭐⭐⭐⭐⭐ | ⭐⭐ | ⭐⭐⭐ | CPU/GPU | Highest accuracy, research |
|
||||||
|
| **YOLOX** | ⭐⭐⭐⭐ | ⭐⭐⭐⭐ | ⭐⭐⭐ | Edge devices | Balanced performance |
|
||||||
|
| **YOLOv6** | ⭐⭐⭐ | ⭐⭐⭐⭐⭐ | ⭐⭐ | Mobile/Edge | Fast inference, production |
|
||||||
|
|
||||||
## 🛠️ Usage Guide
|
## 🛠️ Usage Guide
|
||||||
|
|
||||||
### Annotation GUI Features
|
### Annotation GUI Features
|
||||||
|
|||||||
111
setup_datasets.py
Normal file
111
setup_datasets.py
Normal file
@ -0,0 +1,111 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Setup multi-format datasets for different model frameworks.
|
||||||
|
|
||||||
|
Creates:
|
||||||
|
- dataset_coco/ for RF-DETR (COCO format)
|
||||||
|
- dataset_yolo/ for YOLOX/YOLOv6/YOLOv8 (YOLO format)
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python setup_datasets.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
def setup_coco_dataset():
|
||||||
|
"""Set up COCO format dataset for RF-DETR."""
|
||||||
|
print("Setting up COCO format dataset...")
|
||||||
|
|
||||||
|
coco_dir = Path("dataset_coco")
|
||||||
|
yolo_dir = Path("dataset_yolo")
|
||||||
|
|
||||||
|
if not yolo_dir.exists():
|
||||||
|
print("Error: dataset_yolo/ not found. Run split_coco_dataset.py first!")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Create COCO directories
|
||||||
|
for split in ["train", "valid", "test"]:
|
||||||
|
split_dir = coco_dir / split
|
||||||
|
split_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Copy images from YOLO dataset
|
||||||
|
yolo_images = yolo_dir / split / "images"
|
||||||
|
if yolo_images.exists():
|
||||||
|
for img_file in yolo_images.glob("*"):
|
||||||
|
shutil.copy2(img_file, split_dir)
|
||||||
|
|
||||||
|
# Copy COCO annotations
|
||||||
|
coco_ann = yolo_dir / split / "_annotations.coco.json"
|
||||||
|
if coco_ann.exists():
|
||||||
|
shutil.copy2(coco_ann, split_dir)
|
||||||
|
|
||||||
|
print(f"COCO dataset created at: {coco_dir}")
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def update_yolov6_data_config():
|
||||||
|
"""Update YOLOv6 data config to use correct number of classes."""
|
||||||
|
print("Updating YOLOv6 data configuration...")
|
||||||
|
|
||||||
|
# Load the COCO annotations to get class information
|
||||||
|
coco_file = Path("dataset_yolo/train/_annotations.coco.json")
|
||||||
|
if not coco_file.exists():
|
||||||
|
print("Warning: Cannot find COCO annotations to update YOLOv6 config")
|
||||||
|
return
|
||||||
|
|
||||||
|
with coco_file.open('r') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
|
||||||
|
categories = data['categories']
|
||||||
|
nc = len(categories)
|
||||||
|
names = [cat['name'] for cat in categories]
|
||||||
|
|
||||||
|
# Update the YOLOv6 training script
|
||||||
|
yolov6_script = Path("train_yolov6.py")
|
||||||
|
if yolov6_script.exists():
|
||||||
|
content = yolov6_script.read_text()
|
||||||
|
|
||||||
|
# Replace hardcoded nc: 1 and names: ['knot']
|
||||||
|
old_config = "nc: 1\nnames: ['knot']"
|
||||||
|
new_config = f"nc: {nc}\nnames: {names}"
|
||||||
|
|
||||||
|
if old_config in content:
|
||||||
|
content = content.replace(old_config, new_config)
|
||||||
|
yolov6_script.write_text(content)
|
||||||
|
print(f"Updated YOLOv6 script with {nc} classes: {names}")
|
||||||
|
else:
|
||||||
|
print("YOLOv6 config already updated or not found")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("Setting up multi-format datasets for different ML frameworks...\n")
|
||||||
|
|
||||||
|
# Setup COCO format for RF-DETR
|
||||||
|
if setup_coco_dataset():
|
||||||
|
print("✅ COCO format dataset ready for RF-DETR")
|
||||||
|
else:
|
||||||
|
print("❌ Failed to setup COCO dataset")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Update YOLOv6 configuration
|
||||||
|
update_yolov6_data_config()
|
||||||
|
|
||||||
|
print("\n" + "="*60)
|
||||||
|
print("DATASET SETUP COMPLETE!")
|
||||||
|
print("="*60)
|
||||||
|
print("Available datasets:")
|
||||||
|
print(" 📁 dataset_coco/ → RF-DETR (COCO format)")
|
||||||
|
print(" 📁 dataset_yolo/ → YOLOX, YOLOv6, YOLOv8 (YOLO format)")
|
||||||
|
print()
|
||||||
|
print("Training commands:")
|
||||||
|
print(" 🔶 RF-DETR: python train_rfdetr.py --dataset-dir dataset_coco --output-dir runs/rfdetr")
|
||||||
|
print(" 🔵 YOLOX: python train_yolox.py --dataset-dir dataset_yolo --model yolox-nano")
|
||||||
|
print(" 🟡 YOLOv6: python train_yolov6.py --dataset-dir dataset_yolo --model yolov6n")
|
||||||
|
print("="*60)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@ -68,8 +68,8 @@ def train_yolov6(
|
|||||||
f.write(f"""train: {train_dir.absolute()}
|
f.write(f"""train: {train_dir.absolute()}
|
||||||
val: {valid_dir.absolute()}
|
val: {valid_dir.absolute()}
|
||||||
|
|
||||||
nc: 1
|
nc: 10
|
||||||
names: ['knot']
|
names: ['Live knot', 'Dead knot', 'Knot with crack', 'Crack', 'Resin', 'Marrow', 'Quartzity', 'Knot missing', 'Blue stain', 'Overgrown']
|
||||||
""")
|
""")
|
||||||
|
|
||||||
print(f"\n{'='*60}")
|
print(f"\n{'='*60}")
|
||||||
|
|||||||
Reference in New Issue
Block a user