#!/usr/bin/env python3 """ Setup multi-format datasets for different model frameworks. Creates: - dataset_coco/ for RF-DETR (COCO format) - dataset_yolo/ for YOLOX/YOLOv6/YOLOv8 (YOLO format) Usage: python setup_datasets.py """ import json import shutil from pathlib import Path def setup_coco_dataset(): """Set up COCO format dataset for RF-DETR.""" print("Setting up COCO format dataset...") coco_dir = Path("dataset_coco") yolo_dir = Path("dataset_yolo") if not yolo_dir.exists(): print("Error: dataset_yolo/ not found. Run split_coco_dataset.py first!") return False # Create COCO directories for split in ["train", "valid", "test"]: split_dir = coco_dir / split split_dir.mkdir(parents=True, exist_ok=True) # Copy images from YOLO dataset yolo_images = yolo_dir / split / "images" if yolo_images.exists(): for img_file in yolo_images.glob("*"): shutil.copy2(img_file, split_dir) # Copy COCO annotations coco_ann = yolo_dir / split / "_annotations.coco.json" if coco_ann.exists(): shutil.copy2(coco_ann, split_dir) print(f"COCO dataset created at: {coco_dir}") return True def update_yolov6_data_config(): """Update YOLOv6 data config to use correct number of classes.""" print("Updating YOLOv6 data configuration...") # Load the COCO annotations to get class information coco_file = Path("dataset_yolo/train/_annotations.coco.json") if not coco_file.exists(): print("Warning: Cannot find COCO annotations to update YOLOv6 config") return with coco_file.open('r') as f: data = json.load(f) categories = data['categories'] nc = len(categories) names = [cat['name'] for cat in categories] # Update the YOLOv6 training script yolov6_script = Path("train_yolov6.py") if yolov6_script.exists(): content = yolov6_script.read_text() # Replace hardcoded nc: 1 and names: ['knot'] old_config = "nc: 1\nnames: ['knot']" new_config = f"nc: {nc}\nnames: {names}" if old_config in content: content = content.replace(old_config, new_config) yolov6_script.write_text(content) print(f"Updated YOLOv6 script with {nc} classes: {names}") else: print("YOLOv6 config already updated or not found") def main(): print("Setting up multi-format datasets for different ML frameworks...\n") # Setup COCO format for RF-DETR if setup_coco_dataset(): print("✅ COCO format dataset ready for RF-DETR") else: print("❌ Failed to setup COCO dataset") return # Update YOLOv6 configuration update_yolov6_data_config() print("\n" + "="*60) print("DATASET SETUP COMPLETE!") print("="*60) print("Available datasets:") print(" 📁 dataset_coco/ → RF-DETR (COCO format)") print(" 📁 dataset_yolo/ → YOLOX, YOLOv6, YOLOv8 (YOLO format)") print() print("Training commands:") print(" 🔶 RF-DETR: python train_rfdetr.py --dataset-dir dataset_coco --output-dir runs/rfdetr") print(" 🔵 YOLOX: python train_yolox.py --dataset-dir dataset_yolo --model yolox-nano") print(" 🟡 YOLOv6: python train_yolov6.py --dataset-dir dataset_yolo --model yolov6n") print("="*60) if __name__ == "__main__": main()