Files
saw_mill_knot_detection/reorganize_dataset.py

97 lines
2.7 KiB
Python
Raw Permalink Normal View History

"""
Reorganize dataset to YOLO format with images/ and labels/ subdirectories.
"""
from pathlib import Path
import shutil
def reorganize_split(split_dir: Path):
"""Reorganize one split (train/valid/test) to YOLO format."""
print(f"Reorganizing {split_dir.name}...")
# Create images directory
images_dir = split_dir / "images"
images_dir.mkdir(exist_ok=True)
# Move all .jpg files to images/
moved_count = 0
for img_file in split_dir.glob("*.jpg"):
dest = images_dir / img_file.name
if not dest.exists():
shutil.move(str(img_file), str(dest))
moved_count += 1
print(f" Moved {moved_count} images to {split_dir.name}/images/")
# Check labels directory
labels_dir = split_dir / "labels"
if labels_dir.exists():
label_count = len(list(labels_dir.glob("*.txt")))
print(f" Found {label_count} labels in {split_dir.name}/labels/")
else:
print(f" WARNING: No labels directory in {split_dir.name}/")
def update_data_yaml(dataset_dir: Path):
"""Update data.yaml to reflect new structure."""
data_yaml = dataset_dir / "data.yaml"
content = f"""# YOLO dataset configuration
path: {dataset_dir.absolute()} # dataset root dir
train: train/images # train images (relative to 'path')
val: valid/images # val images (relative to 'path')
test: test/images # test images (relative to 'path')
# Classes
names:
0: Live knot
1: Dead knot
2: Knot with crack
3: Crack
4: Resin
5: Marrow
6: Quartzity
7: Knot missing
8: Blue stain
9: Overgrown
"""
data_yaml.write_text(content)
print(f"\n✓ Updated {data_yaml}")
def main():
dataset_dir = Path("dataset_split")
if not dataset_dir.exists():
print(f"Error: {dataset_dir} not found")
return
# Reorganize each split
for split_name in ["train", "valid", "test"]:
split_dir = dataset_dir / split_name
if split_dir.exists():
reorganize_split(split_dir)
else:
print(f"Warning: {split_dir} not found")
# Update data.yaml
update_data_yaml(dataset_dir)
print("\n" + "="*60)
print("Dataset reorganization complete!")
print("="*60)
print("\nNew structure:")
print("dataset_split/")
print(" ├── train/")
print(" │ ├── images/")
print(" │ └── labels/")
print(" ├── valid/")
print(" │ ├── images/")
print(" │ └── labels/")
print(" ├── test/")
print(" │ ├── images/")
print(" │ └── labels/")
print(" └── data.yaml")
if __name__ == "__main__":
main()