Initial commit: Wood knot detection model and GUI
This commit is contained in:
89
validate_coco_dataset.py
Normal file
89
validate_coco_dataset.py
Normal file
@ -0,0 +1,89 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
SPLITS = ("train", "valid", "test")
|
||||
|
||||
|
||||
def _load_json(path: Path) -> dict:
|
||||
with path.open("r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def _validate_split(split_dir: Path) -> list[str]:
|
||||
errors: list[str] = []
|
||||
ann_path = split_dir / "_annotations.coco.json"
|
||||
if not ann_path.exists():
|
||||
return [f"Missing {ann_path}"]
|
||||
|
||||
data = _load_json(ann_path)
|
||||
|
||||
for key in ("images", "annotations", "categories"):
|
||||
if key not in data:
|
||||
errors.append(f"{ann_path}: missing key '{key}'")
|
||||
|
||||
images = data.get("images", [])
|
||||
categories = data.get("categories", [])
|
||||
|
||||
if not isinstance(images, list) or not images:
|
||||
errors.append(f"{ann_path}: 'images' must be a non-empty list")
|
||||
|
||||
if not isinstance(categories, list) or not categories:
|
||||
errors.append(f"{ann_path}: 'categories' must be a non-empty list")
|
||||
|
||||
# Verify referenced image files exist
|
||||
missing_files = 0
|
||||
checked = 0
|
||||
for img in images[:5000]:
|
||||
file_name = img.get("file_name")
|
||||
if not file_name:
|
||||
continue
|
||||
checked += 1
|
||||
if not (split_dir / file_name).exists():
|
||||
missing_files += 1
|
||||
|
||||
if checked and missing_files:
|
||||
errors.append(
|
||||
f"{ann_path}: {missing_files}/{checked} referenced image files are missing in {split_dir}"
|
||||
)
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="Validate COCO dataset structure for RF-DETR.")
|
||||
parser.add_argument(
|
||||
"--dataset-dir",
|
||||
type=Path,
|
||||
required=True,
|
||||
help="Path to dataset root containing train/ valid/ test/",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
dataset_dir: Path = args.dataset_dir
|
||||
if not dataset_dir.exists():
|
||||
raise SystemExit(f"Dataset dir not found: {dataset_dir}")
|
||||
|
||||
all_errors: list[str] = []
|
||||
for split in SPLITS:
|
||||
split_dir = dataset_dir / split
|
||||
if not split_dir.exists():
|
||||
all_errors.append(f"Missing split directory: {split_dir}")
|
||||
continue
|
||||
all_errors.extend(_validate_split(split_dir))
|
||||
|
||||
if all_errors:
|
||||
print("Dataset validation: FAILED")
|
||||
for e in all_errors:
|
||||
print(f"- {e}")
|
||||
return 2
|
||||
|
||||
print("Dataset validation: OK")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user