111 lines
3.4 KiB
Python
111 lines
3.4 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
Setup multi-format datasets for different model frameworks.
|
||
|
|
|
||
|
|
Creates:
|
||
|
|
- dataset_coco/ for RF-DETR (COCO format)
|
||
|
|
- dataset_yolo/ for YOLOX/YOLOv6/YOLOv8 (YOLO format)
|
||
|
|
|
||
|
|
Usage:
|
||
|
|
python setup_datasets.py
|
||
|
|
"""
|
||
|
|
|
||
|
|
import json
|
||
|
|
import shutil
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
|
||
|
|
def setup_coco_dataset():
|
||
|
|
"""Set up COCO format dataset for RF-DETR."""
|
||
|
|
print("Setting up COCO format dataset...")
|
||
|
|
|
||
|
|
coco_dir = Path("dataset_coco")
|
||
|
|
yolo_dir = Path("dataset_yolo")
|
||
|
|
|
||
|
|
if not yolo_dir.exists():
|
||
|
|
print("Error: dataset_yolo/ not found. Run split_coco_dataset.py first!")
|
||
|
|
return False
|
||
|
|
|
||
|
|
# Create COCO directories
|
||
|
|
for split in ["train", "valid", "test"]:
|
||
|
|
split_dir = coco_dir / split
|
||
|
|
split_dir.mkdir(parents=True, exist_ok=True)
|
||
|
|
|
||
|
|
# Copy images from YOLO dataset
|
||
|
|
yolo_images = yolo_dir / split / "images"
|
||
|
|
if yolo_images.exists():
|
||
|
|
for img_file in yolo_images.glob("*"):
|
||
|
|
shutil.copy2(img_file, split_dir)
|
||
|
|
|
||
|
|
# Copy COCO annotations
|
||
|
|
coco_ann = yolo_dir / split / "_annotations.coco.json"
|
||
|
|
if coco_ann.exists():
|
||
|
|
shutil.copy2(coco_ann, split_dir)
|
||
|
|
|
||
|
|
print(f"COCO dataset created at: {coco_dir}")
|
||
|
|
return True
|
||
|
|
|
||
|
|
|
||
|
|
def update_yolov6_data_config():
|
||
|
|
"""Update YOLOv6 data config to use correct number of classes."""
|
||
|
|
print("Updating YOLOv6 data configuration...")
|
||
|
|
|
||
|
|
# Load the COCO annotations to get class information
|
||
|
|
coco_file = Path("dataset_yolo/train/_annotations.coco.json")
|
||
|
|
if not coco_file.exists():
|
||
|
|
print("Warning: Cannot find COCO annotations to update YOLOv6 config")
|
||
|
|
return
|
||
|
|
|
||
|
|
with coco_file.open('r') as f:
|
||
|
|
data = json.load(f)
|
||
|
|
|
||
|
|
categories = data['categories']
|
||
|
|
nc = len(categories)
|
||
|
|
names = [cat['name'] for cat in categories]
|
||
|
|
|
||
|
|
# Update the YOLOv6 training script
|
||
|
|
yolov6_script = Path("train_yolov6.py")
|
||
|
|
if yolov6_script.exists():
|
||
|
|
content = yolov6_script.read_text()
|
||
|
|
|
||
|
|
# Replace hardcoded nc: 1 and names: ['knot']
|
||
|
|
old_config = "nc: 1\nnames: ['knot']"
|
||
|
|
new_config = f"nc: {nc}\nnames: {names}"
|
||
|
|
|
||
|
|
if old_config in content:
|
||
|
|
content = content.replace(old_config, new_config)
|
||
|
|
yolov6_script.write_text(content)
|
||
|
|
print(f"Updated YOLOv6 script with {nc} classes: {names}")
|
||
|
|
else:
|
||
|
|
print("YOLOv6 config already updated or not found")
|
||
|
|
|
||
|
|
|
||
|
|
def main():
|
||
|
|
print("Setting up multi-format datasets for different ML frameworks...\n")
|
||
|
|
|
||
|
|
# Setup COCO format for RF-DETR
|
||
|
|
if setup_coco_dataset():
|
||
|
|
print("✅ COCO format dataset ready for RF-DETR")
|
||
|
|
else:
|
||
|
|
print("❌ Failed to setup COCO dataset")
|
||
|
|
return
|
||
|
|
|
||
|
|
# Update YOLOv6 configuration
|
||
|
|
update_yolov6_data_config()
|
||
|
|
|
||
|
|
print("\n" + "="*60)
|
||
|
|
print("DATASET SETUP COMPLETE!")
|
||
|
|
print("="*60)
|
||
|
|
print("Available datasets:")
|
||
|
|
print(" 📁 dataset_coco/ → RF-DETR (COCO format)")
|
||
|
|
print(" 📁 dataset_yolo/ → YOLOX, YOLOv6, YOLOv8 (YOLO format)")
|
||
|
|
print()
|
||
|
|
print("Training commands:")
|
||
|
|
print(" 🔶 RF-DETR: python train_rfdetr.py --dataset-dir dataset_coco --output-dir runs/rfdetr")
|
||
|
|
print(" 🔵 YOLOX: python train_yolox.py --dataset-dir dataset_yolo --model yolox-nano")
|
||
|
|
print(" 🟡 YOLOv6: python train_yolov6.py --dataset-dir dataset_yolo --model yolov6n")
|
||
|
|
print("="*60)
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|