97 lines
2.7 KiB
Python
97 lines
2.7 KiB
Python
|
|
"""
|
||
|
|
Reorganize dataset to YOLO format with images/ and labels/ subdirectories.
|
||
|
|
"""
|
||
|
|
|
||
|
|
from pathlib import Path
|
||
|
|
import shutil
|
||
|
|
|
||
|
|
def reorganize_split(split_dir: Path):
|
||
|
|
"""Reorganize one split (train/valid/test) to YOLO format."""
|
||
|
|
print(f"Reorganizing {split_dir.name}...")
|
||
|
|
|
||
|
|
# Create images directory
|
||
|
|
images_dir = split_dir / "images"
|
||
|
|
images_dir.mkdir(exist_ok=True)
|
||
|
|
|
||
|
|
# Move all .jpg files to images/
|
||
|
|
moved_count = 0
|
||
|
|
for img_file in split_dir.glob("*.jpg"):
|
||
|
|
dest = images_dir / img_file.name
|
||
|
|
if not dest.exists():
|
||
|
|
shutil.move(str(img_file), str(dest))
|
||
|
|
moved_count += 1
|
||
|
|
|
||
|
|
print(f" Moved {moved_count} images to {split_dir.name}/images/")
|
||
|
|
|
||
|
|
# Check labels directory
|
||
|
|
labels_dir = split_dir / "labels"
|
||
|
|
if labels_dir.exists():
|
||
|
|
label_count = len(list(labels_dir.glob("*.txt")))
|
||
|
|
print(f" Found {label_count} labels in {split_dir.name}/labels/")
|
||
|
|
else:
|
||
|
|
print(f" WARNING: No labels directory in {split_dir.name}/")
|
||
|
|
|
||
|
|
def update_data_yaml(dataset_dir: Path):
|
||
|
|
"""Update data.yaml to reflect new structure."""
|
||
|
|
data_yaml = dataset_dir / "data.yaml"
|
||
|
|
|
||
|
|
content = f"""# YOLO dataset configuration
|
||
|
|
path: {dataset_dir.absolute()} # dataset root dir
|
||
|
|
train: train/images # train images (relative to 'path')
|
||
|
|
val: valid/images # val images (relative to 'path')
|
||
|
|
test: test/images # test images (relative to 'path')
|
||
|
|
|
||
|
|
# Classes
|
||
|
|
names:
|
||
|
|
0: Live knot
|
||
|
|
1: Dead knot
|
||
|
|
2: Knot with crack
|
||
|
|
3: Crack
|
||
|
|
4: Resin
|
||
|
|
5: Marrow
|
||
|
|
6: Quartzity
|
||
|
|
7: Knot missing
|
||
|
|
8: Blue stain
|
||
|
|
9: Overgrown
|
||
|
|
"""
|
||
|
|
|
||
|
|
data_yaml.write_text(content)
|
||
|
|
print(f"\n✓ Updated {data_yaml}")
|
||
|
|
|
||
|
|
def main():
|
||
|
|
dataset_dir = Path("dataset_split")
|
||
|
|
|
||
|
|
if not dataset_dir.exists():
|
||
|
|
print(f"Error: {dataset_dir} not found")
|
||
|
|
return
|
||
|
|
|
||
|
|
# Reorganize each split
|
||
|
|
for split_name in ["train", "valid", "test"]:
|
||
|
|
split_dir = dataset_dir / split_name
|
||
|
|
if split_dir.exists():
|
||
|
|
reorganize_split(split_dir)
|
||
|
|
else:
|
||
|
|
print(f"Warning: {split_dir} not found")
|
||
|
|
|
||
|
|
# Update data.yaml
|
||
|
|
update_data_yaml(dataset_dir)
|
||
|
|
|
||
|
|
print("\n" + "="*60)
|
||
|
|
print("Dataset reorganization complete!")
|
||
|
|
print("="*60)
|
||
|
|
print("\nNew structure:")
|
||
|
|
print("dataset_split/")
|
||
|
|
print(" ├── train/")
|
||
|
|
print(" │ ├── images/")
|
||
|
|
print(" │ └── labels/")
|
||
|
|
print(" ├── valid/")
|
||
|
|
print(" │ ├── images/")
|
||
|
|
print(" │ └── labels/")
|
||
|
|
print(" ├── test/")
|
||
|
|
print(" │ ├── images/")
|
||
|
|
print(" │ └── labels/")
|
||
|
|
print(" └── data.yaml")
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|