""" Reorganize dataset to YOLO format with images/ and labels/ subdirectories. """ from pathlib import Path import shutil def reorganize_split(split_dir: Path): """Reorganize one split (train/valid/test) to YOLO format.""" print(f"Reorganizing {split_dir.name}...") # Create images directory images_dir = split_dir / "images" images_dir.mkdir(exist_ok=True) # Move all .jpg files to images/ moved_count = 0 for img_file in split_dir.glob("*.jpg"): dest = images_dir / img_file.name if not dest.exists(): shutil.move(str(img_file), str(dest)) moved_count += 1 print(f" Moved {moved_count} images to {split_dir.name}/images/") # Check labels directory labels_dir = split_dir / "labels" if labels_dir.exists(): label_count = len(list(labels_dir.glob("*.txt"))) print(f" Found {label_count} labels in {split_dir.name}/labels/") else: print(f" WARNING: No labels directory in {split_dir.name}/") def update_data_yaml(dataset_dir: Path): """Update data.yaml to reflect new structure.""" data_yaml = dataset_dir / "data.yaml" content = f"""# YOLO dataset configuration path: {dataset_dir.absolute()} # dataset root dir train: train/images # train images (relative to 'path') val: valid/images # val images (relative to 'path') test: test/images # test images (relative to 'path') # Classes names: 0: Live knot 1: Dead knot 2: Knot with crack 3: Crack 4: Resin 5: Marrow 6: Quartzity 7: Knot missing 8: Blue stain 9: Overgrown """ data_yaml.write_text(content) print(f"\n✓ Updated {data_yaml}") def main(): dataset_dir = Path("dataset_split") if not dataset_dir.exists(): print(f"Error: {dataset_dir} not found") return # Reorganize each split for split_name in ["train", "valid", "test"]: split_dir = dataset_dir / split_name if split_dir.exists(): reorganize_split(split_dir) else: print(f"Warning: {split_dir} not found") # Update data.yaml update_data_yaml(dataset_dir) print("\n" + "="*60) print("Dataset reorganization complete!") print("="*60) print("\nNew structure:") print("dataset_split/") print(" ├── train/") print(" │ ├── images/") print(" │ └── labels/") print(" ├── valid/") print(" │ ├── images/") print(" │ └── labels/") print(" ├── test/") print(" │ ├── images/") print(" │ └── labels/") print(" └── data.yaml") if __name__ == "__main__": main()