Initial commit: Wood knot detection model and GUI
This commit is contained in:
96
reorganize_dataset.py
Normal file
96
reorganize_dataset.py
Normal file
@ -0,0 +1,96 @@
|
||||
"""
|
||||
Reorganize dataset to YOLO format with images/ and labels/ subdirectories.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
import shutil
|
||||
|
||||
def reorganize_split(split_dir: Path):
|
||||
"""Reorganize one split (train/valid/test) to YOLO format."""
|
||||
print(f"Reorganizing {split_dir.name}...")
|
||||
|
||||
# Create images directory
|
||||
images_dir = split_dir / "images"
|
||||
images_dir.mkdir(exist_ok=True)
|
||||
|
||||
# Move all .jpg files to images/
|
||||
moved_count = 0
|
||||
for img_file in split_dir.glob("*.jpg"):
|
||||
dest = images_dir / img_file.name
|
||||
if not dest.exists():
|
||||
shutil.move(str(img_file), str(dest))
|
||||
moved_count += 1
|
||||
|
||||
print(f" Moved {moved_count} images to {split_dir.name}/images/")
|
||||
|
||||
# Check labels directory
|
||||
labels_dir = split_dir / "labels"
|
||||
if labels_dir.exists():
|
||||
label_count = len(list(labels_dir.glob("*.txt")))
|
||||
print(f" Found {label_count} labels in {split_dir.name}/labels/")
|
||||
else:
|
||||
print(f" WARNING: No labels directory in {split_dir.name}/")
|
||||
|
||||
def update_data_yaml(dataset_dir: Path):
|
||||
"""Update data.yaml to reflect new structure."""
|
||||
data_yaml = dataset_dir / "data.yaml"
|
||||
|
||||
content = f"""# YOLO dataset configuration
|
||||
path: {dataset_dir.absolute()} # dataset root dir
|
||||
train: train/images # train images (relative to 'path')
|
||||
val: valid/images # val images (relative to 'path')
|
||||
test: test/images # test images (relative to 'path')
|
||||
|
||||
# Classes
|
||||
names:
|
||||
0: Live knot
|
||||
1: Dead knot
|
||||
2: Knot with crack
|
||||
3: Crack
|
||||
4: Resin
|
||||
5: Marrow
|
||||
6: Quartzity
|
||||
7: Knot missing
|
||||
8: Blue stain
|
||||
9: Overgrown
|
||||
"""
|
||||
|
||||
data_yaml.write_text(content)
|
||||
print(f"\n✓ Updated {data_yaml}")
|
||||
|
||||
def main():
|
||||
dataset_dir = Path("dataset_split")
|
||||
|
||||
if not dataset_dir.exists():
|
||||
print(f"Error: {dataset_dir} not found")
|
||||
return
|
||||
|
||||
# Reorganize each split
|
||||
for split_name in ["train", "valid", "test"]:
|
||||
split_dir = dataset_dir / split_name
|
||||
if split_dir.exists():
|
||||
reorganize_split(split_dir)
|
||||
else:
|
||||
print(f"Warning: {split_dir} not found")
|
||||
|
||||
# Update data.yaml
|
||||
update_data_yaml(dataset_dir)
|
||||
|
||||
print("\n" + "="*60)
|
||||
print("Dataset reorganization complete!")
|
||||
print("="*60)
|
||||
print("\nNew structure:")
|
||||
print("dataset_split/")
|
||||
print(" ├── train/")
|
||||
print(" │ ├── images/")
|
||||
print(" │ └── labels/")
|
||||
print(" ├── valid/")
|
||||
print(" │ ├── images/")
|
||||
print(" │ └── labels/")
|
||||
print(" ├── test/")
|
||||
print(" │ ├── images/")
|
||||
print(" │ └── labels/")
|
||||
print(" └── data.yaml")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user