From b918dd14b70a551f32f203671f2c18c470c1f0c9 Mon Sep 17 00:00:00 2001 From: dillonj Date: Fri, 26 Dec 2025 15:17:02 -0700 Subject: [PATCH] UI improvements --- OAK_D_WORKFLOW_README.md | 257 ++++++++++++++++++++++++++++++++++++ README.md | 3 +- tk_annotation_gui.py | 272 +++++++++++++++++++++++++++++++++++++-- 3 files changed, 523 insertions(+), 9 deletions(-) create mode 100644 OAK_D_WORKFLOW_README.md diff --git a/OAK_D_WORKFLOW_README.md b/OAK_D_WORKFLOW_README.md new file mode 100644 index 00000000..2b152b85 --- /dev/null +++ b/OAK_D_WORKFLOW_README.md @@ -0,0 +1,257 @@ +# OAK-D 4 Pro Workflow: Label, Train, and Convert AI Model + +This guide walks you through the complete workflow for creating a custom wood knot detection model optimized for the OAK-D 4 Pro camera: from manual image annotation to trained model conversion for edge deployment. + +## ๐Ÿ“‹ Prerequisites + +### Environment Setup +```bash +# Clone the repository +git clone git@143.244.157.110:dillon_stuff/saw_mill_knot_detection.git +cd saw_mill_knot_detection + +# Create virtual environment +python -m venv .venv +source .venv/bin/activate # On Windows: .venv\Scripts\activate + +# Install dependencies +pip install -r requirements.txt +``` + +### Required Dependencies +- Python 3.8+ +- Pillow (for image processing) +- Ultralytics (for YOLO/RT-DETR models) +- RF-DETR (optional, for RF-DETR models) +- OpenVINO (installed via convert script) + +## ๐Ÿท๏ธ Step 1: Label Images + +Use the Tkinter-based annotation GUI to manually label your wood surface images. + +### 1.1 Prepare Images +Place your images in a directory (e.g., `IMAGE/`): +``` +IMAGE/ +โ”œโ”€โ”€ image1.jpg +โ”œโ”€โ”€ image2.jpg +โ””โ”€โ”€ annotations.json # Will be created/updated +``` + +### 1.2 Launch Annotation GUI +```bash +# Using the convenience script +./run_tk_gui.sh --images-dir IMAGE/ + +# Or directly +python tk_annotation_gui.py --images-dir IMAGE/ +``` + +### 1.3 Annotate Images +1. **Navigate**: Use Prev/Next buttons or click image thumbnails +2. **Draw Boxes**: Click and drag on the image to create bounding boxes +3. **Auto-Label** (optional): Load trained weights and auto-detect knots + - Enter weights path (e.g., `runs/yolox_training/training/weights/best.pt`) + - Select model type (auto-detect usually works) + - Set confidence threshold (0.3-0.7 recommended) + - Click "Load Model" then "Auto-Label Current" +4. **Edit Annotations**: Double-click list items to delete, or manually draw corrections +5. **Save**: Annotations auto-save to `IMAGE/annotations.json` + +### 1.4 Annotation Format +Each image gets entries like: +```json +{ + "image1.jpg": [ + { + "bbox": [x1, y1, x2, y2], + "label": "knot", + "confidence": 1.0, + "source": "manual" + } + ] +} +``` + +**Tips**: +- Aim for 100-500 annotated images for good results +- Focus on challenging cases (small knots, lighting variations) +- Use auto-labeling to speed up the process, then manually correct + +## ๐Ÿ‹๏ธ Step 2: Train Model + +Train a detection model using your annotated images. + +### 2.1 Prepare Dataset (Optional) +The training script can prepare the dataset automatically, but you can do it manually: +```bash +python train_model.py --prepare-dataset --images-dir IMAGE --annotations annotations.json --dataset dataset_prepared +``` + +### 2.2 Choose Model Framework +Available frameworks (all MIT/Apache 2.0 licensed): +- **RF-DETR**: Highest accuracy, slower inference +- **RT-DETR**: Good balance, optimized for edge devices +- **YOLOv6**: Fast inference, good for real-time +- **YOLOX**: Versatile, widely supported + +### 2.3 Train Model +```bash +# Basic training +python train_model.py \ + --framework rtdetr \ + --dataset dataset_prepared \ + --output runs/rtdetr_training \ + --model-size small \ + --epochs 100 + +# Advanced options +python train_model.py \ + --framework yolox \ + --dataset dataset_prepared \ + --output runs/yolox_training \ + --model-size nano \ + --epochs 50 \ + --batch-size 8 \ + --lr 0.001 \ + --prepare-dataset \ + --images-dir IMAGE \ + --annotations annotations.json +``` + +### 2.4 Monitor Training +- Check `runs/*/training/` for logs and checkpoints +- Training saves best model as `best.pt` +- Use TensorBoard or Weights & Biases for monitoring (if configured) + +**Training Tips**: +- Start with `nano` or `small` models for faster iteration +- 50-200 epochs typically sufficient +- Monitor validation mAP for convergence +- Use data augmentation for better generalization + +## ๐Ÿ”„ Step 3: Convert for OAK-D Deployment + +Convert the trained model to OpenVINO format for OAK-D 4 Pro. + +### 3.1 Run Conversion +```bash +# Basic conversion +python convert_for_deployment.py \ + --model runs/rtdetr_training/training/weights/best.pt \ + --output oak_d_deployment + +# Advanced options +python convert_for_deployment.py \ + --model runs/yolox_training/training/weights/best.pt \ + --output oak_d_deployment \ + --img-size 640 \ + --framework auto +``` + +### 3.2 Output Files +After conversion, you'll get: +``` +oak_d_deployment/ +โ”œโ”€โ”€ model.xml # OpenVINO IR model +โ”œโ”€โ”€ model.bin # OpenVINO IR weights +โ”œโ”€โ”€ model.onnx # ONNX format (intermediate) +โ””โ”€โ”€ config.yaml # Model configuration +``` + +### 3.3 Convert to Blob Format +For OAK-D deployment, convert to `.blob` format: + +**Option A: Online Converter (Recommended)** +1. Go to https://blobconverter.luxonis.com/ +2. Upload `model.xml` +3. Select "OAK-D 4 Pro" +4. Download `.blob` file + +**Option B: Command Line** +```bash +pip install blobconverter +blobconverter --openvino-xml oak_d_deployment/model.xml +``` + +## ๐Ÿงช Step 4: Test and Deploy + +### 4.1 Test OpenVINO Model +```bash +# Verify model loads +python -c "from openvino.runtime import Core; core = Core(); model = core.read_model('oak_d_deployment/model.xml'); print('โœ“ Model loaded')" +``` + +### 4.2 Deploy to OAK-D +Use DepthAI Python API or OAK-D examples: +```python +import depthai as dai + +# Create pipeline +pipeline = dai.Pipeline() + +# Load your blob +detection_nn = pipeline.create(dai.node.NeuralNetwork) +detection_nn.setBlobPath("model.blob") + +# Configure camera and output streams +# ... (see DepthAI documentation) +``` + +### 4.3 Performance Optimization +- **Quantization**: Use 8-bit quantization for faster inference +- **Model Size**: Nano models work best on edge devices +- **Input Resolution**: 320x320 or 416x416 balances speed/accuracy +- **Calibration**: Test with real-world images for best results + +## ๐Ÿ”ง Troubleshooting + +### Common Issues + +**GUI won't start**: +- Ensure Pillow and Tkinter are installed +- Check Python version (3.8+ required) + +**Training fails**: +- Verify dataset format (COCO for RF-DETR, YOLO for others) +- Check GPU memory if using CUDA +- Reduce batch size if out of memory + +**Conversion fails**: +- Ensure model is compatible with OpenVINO +- Check input/output shapes match expectations +- Try different image sizes (320, 416, 512, 640) + +**OAK-D deployment issues**: +- Verify blob was created for correct OAK model (4 Pro) +- Check camera calibration and input preprocessing +- Ensure model input size matches camera output + +### Getting Help +- Check existing issues in the repository +- Review DepthAI documentation: https://docs.luxonis.com/ +- Test with provided example models first + +## ๐Ÿ“Š Performance Benchmarks + +Expected performance on OAK-D 4 Pro: + +| Model | Size | FPS | mAP | Use Case | +|-------|------|-----|-----|----------| +| RT-DETR | Nano | 25-35 | 0.75 | Balanced | +| YOLOX | Nano | 30-45 | 0.70 | Fast | +| RF-DETR | Nano | 15-25 | 0.80 | Accurate | + +*Results vary based on model training and calibration* + +## ๐ŸŽฏ Next Steps + +1. **Iterate**: Collect more data, retrain, redeploy +2. **Optimize**: Experiment with quantization and pruning +3. **Integrate**: Add your model to production applications +4. **Monitor**: Track performance in real-world conditions + +--- + +**License**: All models are MIT/Apache 2.0 licensed - free for commercial use! +/home/dillon/_code/saw_mill_knot_detection/OAK_D_WORKFLOW_README.md \ No newline at end of file diff --git a/README.md b/README.md index d2e046a1..47e652f4 100644 --- a/README.md +++ b/README.md @@ -104,7 +104,7 @@ python train_model.py --framework yolov6 --dataset dataset_prepared --output run # Convert trained model for edge deployment python convert_for_deployment.py --model runs/training/weights/best.pt --output oak_d_deployment --img-size 640 -# See TRAINING_README.md for deployment instructions +# See OAK_D_WORKFLOW_README.md for complete labeling, training, and deployment workflow ``` ## ๐Ÿ“ Project Structure @@ -115,6 +115,7 @@ saw_mill_knot_detection/ โ”œโ”€โ”€ run_tk_gui.sh # Convenience launcher โ”œโ”€โ”€ train_model.py # Unified training script for all frameworks โ”œโ”€โ”€ convert_for_deployment.py # Model conversion for OAK-D deployment +โ”œโ”€โ”€ OAK_D_WORKFLOW_README.md # Complete workflow guide for OAK-D deployment โ”œโ”€โ”€ TRAINING_README.md # Detailed training and deployment guide โ”œโ”€โ”€ setup_datasets.py # Multi-format dataset setup script โ”œโ”€โ”€ split_coco_dataset.py # Dataset splitting utility diff --git a/tk_annotation_gui.py b/tk_annotation_gui.py index 2df47dab..93ed6828 100644 --- a/tk_annotation_gui.py +++ b/tk_annotation_gui.py @@ -38,6 +38,12 @@ from dataclasses import dataclass from pathlib import Path from typing import Any +try: + import torch + CUDA_AVAILABLE = torch.cuda.is_available() +except ImportError: + CUDA_AVAILABLE = False + import tkinter as tk from tkinter import ttk @@ -88,6 +94,16 @@ class TkAnnotationApp: self._draw_start: tuple[float, float] | None = None self._preview_rect_id: int | None = None + # New variables for box selection and editing + self.selected_box_index: int | None = None + self.dragging: bool = False + self.drag_start: tuple[float, float] | None = None + self.drag_mode: str | None = None # 'move' or 'resize' + self.resize_corner: str | None = None # 'nw', 'ne', 'sw', 'se' + self._is_selecting: bool = False + self._potential_select: int | None = None + self._mouse_moved: bool = False + self.model: Any | None = None self.model_type: str | None = None # rf-detr | rt-detr | yolov6 | yolox self.model_path: Path | None = None @@ -100,6 +116,12 @@ class TkAnnotationApp: self._build_ui() self._load_images_dir(self.images_dir) + self._auto_load_model() + + def _auto_load_model(self) -> None: + if DEFAULT_MODEL_WEIGHTS and Path(DEFAULT_MODEL_WEIGHTS).expanduser().exists(): + self._set_model_status("Auto-loading model...") + self.load_model() # ------------------------- UI ------------------------- @@ -150,6 +172,12 @@ class TkAnnotationApp: self.canvas.bind("", self._on_mouse_move) self.canvas.bind("", self._on_mouse_up) + # New binds for right-click resize and delete key + self.canvas.bind("", self._on_right_mouse_down) + self.canvas.bind("", self._on_right_mouse_move) + self.canvas.bind("", self._on_right_mouse_up) + self.root.bind("", self._on_delete_key) + # Right: boxes list + controls right = ttk.Frame(container) right.grid(row=2, column=1, sticky="nsew") @@ -203,6 +231,7 @@ class TkAnnotationApp: right.rowconfigure(3, weight=1) self.box_list.bind("", self._on_box_double_click) + self.box_list.bind("<>", self._on_box_select) buttons = ttk.Frame(right) buttons.grid(row=4, column=0, columnspan=2, sticky="ew", pady=(6, 0)) @@ -299,7 +328,8 @@ class TkAnnotationApp: ) else: # Ultralytics models - results = self.model.predict(source=str(img_path), conf=threshold, save=False, verbose=False) + device = 'cuda' if CUDA_AVAILABLE else 'cpu' + results = self.model.predict(source=self.current_image, conf=threshold, save=False, verbose=False, device=device) for result in results: for box in result.boxes: x1, y1, x2, y2 = box.xyxy[0].tolist() @@ -323,7 +353,9 @@ class TkAnnotationApp: # Match legacy behavior: append auto boxes to existing key = img_path.name - self.annotations.setdefault(key, []) + # Remove previous auto labels + existing_boxes = self.annotations.get(key, []) + self.annotations[key] = [box for box in existing_boxes if box.get("source") != "auto"] self.annotations[key].extend(new_boxes) self._save_annotations() @@ -402,6 +434,7 @@ class TkAnnotationApp: return self.current_image_path = self.image_paths[self.current_idx] + self.selected_box_index = None # Reset selection try: img = Image.open(self.current_image_path).convert("RGB") @@ -479,7 +512,16 @@ class TkAnnotationApp: x1, y1, x2, y2 = bbox dx1, dy1 = self._img_to_disp(x1, y1) dx2, dy2 = self._img_to_disp(x2, y2) - self.canvas.create_rectangle(dx1, dy1, dx2, dy2, outline="#00FF66", width=2, tags=("box", f"box_{i}")) + color = "#FF4444" if i == self.selected_box_index else "#00FF66" + width = 3 if i == self.selected_box_index else 2 + self.canvas.create_rectangle(dx1, dy1, dx2, dy2, outline=color, width=width, tags=("box", f"box_{i}")) + if i == self.selected_box_index: + # Draw resize handles + handle_size = 6 + self.canvas.create_rectangle(dx1-handle_size, dy1-handle_size, dx1+handle_size, dy1+handle_size, fill=color, tags=("box", f"box_{i}")) + self.canvas.create_rectangle(dx2-handle_size, dy1-handle_size, dx2+handle_size, dy1+handle_size, fill=color, tags=("box", f"box_{i}")) + self.canvas.create_rectangle(dx1-handle_size, dy2-handle_size, dx1+handle_size, dy2+handle_size, fill=color, tags=("box", f"box_{i}")) + self.canvas.create_rectangle(dx2-handle_size, dy2-handle_size, dx2+handle_size, dy2+handle_size, fill=color, tags=("box", f"box_{i}")) def _img_to_disp(self, x: float, y: float) -> tuple[float, float]: assert self.transform is not None @@ -496,17 +538,137 @@ class TkAnnotationApp: iy = min(max(iy, 0.0), float(h)) return ix, iy + def _find_box_at_point(self, x: float, y: float) -> int | None: + """Find the box at the given display coordinates, prioritizing smaller boxes.""" + if self.current_image_path is None: + return None + boxes = self.annotations.get(self.current_image_path.name, []) or [] + candidates = [] + for i, box in enumerate(boxes): + bbox = box.get("bbox") + if not bbox or len(bbox) != 4: + continue + x1, y1, x2, y2 = bbox + dx1, dy1 = self._img_to_disp(x1, y1) + dx2, dy2 = self._img_to_disp(x2, y2) + if dx1 <= x <= dx2 and dy1 <= y <= dy2: + area = (x2 - x1) * (y2 - y1) + candidates.append((area, i)) + if not candidates: + return None + # Sort by area ascending (smaller first) + candidates.sort() + return candidates[0][1] + + def _find_resize_corner(self, x: float, y: float, box_index: int) -> str | None: + """Find which corner/handle is clicked for resizing.""" + if self.current_image_path is None: + return None + boxes = self.annotations.get(self.current_image_path.name, []) or [] + if box_index >= len(boxes): + return None + bbox = boxes[box_index].get("bbox") + if not bbox or len(bbox) != 4: + return None + x1, y1, x2, y2 = bbox + dx1, dy1 = self._img_to_disp(x1, y1) + dx2, dy2 = self._img_to_disp(x2, y2) + handle_size = 10 # Slightly larger for easier clicking + corners = { + 'nw': (dx1, dy1), + 'ne': (dx2, dy1), + 'sw': (dx1, dy2), + 'se': (dx2, dy2) + } + for corner, (cx, cy) in corners.items(): + if cx - handle_size <= x <= cx + handle_size and cy - handle_size <= y <= cy + handle_size: + return corner + return None + # ------------------------- Mouse interactions ------------------------- def _on_mouse_down(self, event: tk.Event) -> None: if self.current_image is None or self.current_image_path is None or self.transform is None: return + + # Check if Ctrl is held for moving or resizing boxes + if event.state & 0x4: # Ctrl key + # First, check if clicking on a corner of the selected box for resizing + if self.selected_box_index is not None: + corner = self._find_resize_corner(event.x, event.y, self.selected_box_index) + if corner: + self.dragging = True + self.drag_mode = 'resize' + self.resize_corner = corner + self.drag_start = (event.x, event.y) + return + + # Otherwise, select and move a box + box_index = self._find_box_at_point(event.x, event.y) + if box_index is not None: + self.selected_box_index = box_index + self.dragging = True + self.drag_mode = 'move' + self.drag_start = (event.x, event.y) + self._refresh_box_list() + self._redraw_boxes() + return + + # Normal mode: check if clicking on corner of selected box for resizing + if self.selected_box_index is not None: + corner = self._find_resize_corner(event.x, event.y, self.selected_box_index) + if corner: + self.dragging = True + self.drag_mode = 'resize' + self.resize_corner = corner + self.drag_start = (event.x, event.y) + return + + # Normal mode: check if clicking inside a box to potentially select it + box_index = self._find_box_at_point(event.x, event.y) + if box_index is not None: + self._potential_select = box_index + self._is_selecting = True + self._mouse_moved = False + return + + # Otherwise, start drawing self._draw_start = (event.x, event.y) + self._is_selecting = False if self._preview_rect_id is not None: self.canvas.delete(self._preview_rect_id) self._preview_rect_id = None def _on_mouse_move(self, event: tk.Event) -> None: + if self.dragging and self.drag_mode == 'move' and self.drag_start and self.selected_box_index is not None: + # Move the box + dx = event.x - self.drag_start[0] + dy = event.y - self.drag_start[1] + if self.current_image_path is None: + return + boxes = self.annotations.get(self.current_image_path.name, []) or [] + if self.selected_box_index >= len(boxes): + return + bbox = boxes[self.selected_box_index]["bbox"] + x1, y1, x2, y2 = bbox + # Convert to display coords, move, convert back + dx1, dy1 = self._img_to_disp(x1, y1) + dx2, dy2 = self._img_to_disp(x2, y2) + dx1 += dx + dy1 += dy + dx2 += dx + dy2 += dy + ix1, iy1 = self._disp_to_img(dx1, dy1) + ix2, iy2 = self._disp_to_img(dx2, dy2) + boxes[self.selected_box_index]["bbox"] = [ix1, iy1, ix2, iy2] + self.drag_start = (event.x, event.y) + self._redraw_boxes() + return + + if self._is_selecting: + self._mouse_moved = True + return + if self._draw_start is None or self.current_image is None or self.transform is None: return @@ -521,6 +683,23 @@ class TkAnnotationApp: ) def _on_mouse_up(self, event: tk.Event) -> None: + if self.dragging: + self.dragging = False + self.drag_mode = None + self.drag_start = None + self._save_annotations() + return + + if self._is_selecting: + if not self._mouse_moved and self._potential_select is not None: + self.selected_box_index = self._potential_select + self._refresh_box_list() + self._redraw_boxes() + self._is_selecting = False + self._potential_select = None + self._mouse_moved = False + return + if self._draw_start is None or self.current_image is None or self.current_image_path is None or self.transform is None: self._draw_start = None return @@ -558,7 +737,71 @@ class TkAnnotationApp: self._refresh_box_list() self._redraw_boxes() - # ------------------------- Box list actions ------------------------- + def _on_right_mouse_down(self, event: tk.Event) -> None: + if self.current_image is None or self.current_image_path is None or self.transform is None: + return + box_index = self._find_box_at_point(event.x, event.y) + if box_index is not None: + corner = self._find_resize_corner(event.x, event.y, box_index) + if corner: + self.selected_box_index = box_index + self.dragging = True + self.drag_mode = 'resize' + self.resize_corner = corner + self.drag_start = (event.x, event.y) + self._refresh_box_list() + self._redraw_boxes() + + def _on_right_mouse_move(self, event: tk.Event) -> None: + if not self.dragging or self.drag_mode != 'resize' or self.resize_corner is None or self.selected_box_index is None or self.drag_start is None: + return + if self.current_image_path is None: + return + boxes = self.annotations.get(self.current_image_path.name, []) or [] + if self.selected_box_index >= len(boxes): + return + bbox = boxes[self.selected_box_index]["bbox"] + x1, y1, x2, y2 = bbox + dx = event.x - self.drag_start[0] + dy = event.y - self.drag_start[1] + + # Convert to display coords + dx1, dy1 = self._img_to_disp(x1, y1) + dx2, dy2 = self._img_to_disp(x2, y2) + + if 'n' in self.resize_corner: + dy1 += dy + if 's' in self.resize_corner: + dy2 += dy + if 'w' in self.resize_corner: + dx1 += dx + if 'e' in self.resize_corner: + dx2 += dx + + # Convert back to image coords + ix1, iy1 = self._disp_to_img(dx1, dy1) + ix2, iy2 = self._disp_to_img(dx2, dy2) + + # Ensure min size + if abs(ix2 - ix1) < 2: + ix2 = ix1 + 2 if ix2 > ix1 else ix1 - 2 + if abs(iy2 - iy1) < 2: + iy2 = iy1 + 2 if iy2 > iy1 else iy1 - 2 + + boxes[self.selected_box_index]["bbox"] = [min(ix1, ix2), min(iy1, iy2), max(ix1, ix2), max(iy1, iy2)] + self.drag_start = (event.x, event.y) + self._redraw_boxes() + + def _on_right_mouse_up(self, event: tk.Event) -> None: + if self.dragging and self.drag_mode == 'resize': + self.dragging = False + self.drag_mode = None + self.resize_corner = None + self.drag_start = None + self._save_annotations() + + def _on_delete_key(self, event: tk.Event) -> None: + self.delete_selected_box() def _refresh_box_list(self) -> None: self.box_list.delete(0, tk.END) @@ -572,17 +815,23 @@ class TkAnnotationApp: label = str(box.get("label", "knot")) src = str(box.get("source", "manual")) conf = box.get("confidence", 1.0) + marker = "[x]" if idx == self.selected_box_index else "[ ]" self.box_list.insert( tk.END, - f"[x] {idx}: {label} ({src}, {conf:.3f}) ({x1:.1f},{y1:.1f})-({x2:.1f},{y2:.1f})", + f"{marker} {idx}: {label} ({src}, {conf:.3f}) ({x1:.1f},{y1:.1f})-({x2:.1f},{y2:.1f})", ) + # Select the item in listbox if selected + if self.selected_box_index is not None and self.selected_box_index < self.box_list.size(): + self.box_list.selection_set(self.selected_box_index) def _selected_box_index(self) -> int | None: sel = self.box_list.curselection() if not sel: return None - # Listbox index corresponds to displayed entries, which correspond to boxes in order - return int(sel[0]) + idx = int(sel[0]) + self.selected_box_index = idx + self._redraw_boxes() + return idx def delete_selected_box(self) -> None: if self.current_image_path is None: @@ -594,10 +843,16 @@ class TkAnnotationApp: boxes = self.annotations.get(self.current_image_path.name, []) or [] if 0 <= idx < len(boxes): del boxes[idx] + if self.selected_box_index == idx: + self.selected_box_index = None + elif self.selected_box_index is not None and self.selected_box_index > idx: + self.selected_box_index -= 1 self._save_annotations() self._refresh_box_list() self._redraw_boxes() - + def _on_box_select(self, event: tk.Event) -> None: + self._selected_box_index() + self._refresh_box_list() # To update markers def _on_box_double_click(self, _event: tk.Event) -> None: self.delete_selected_box() @@ -605,6 +860,7 @@ class TkAnnotationApp: if self.current_image_path is None: return self.annotations[self.current_image_path.name] = [] + self.selected_box_index = None self._save_annotations() self._refresh_box_list() self._redraw_boxes()