removed gradio

2025-12-23 18:24:40 -07:00
parent 43a34aaf00
commit 8804b45067
12 changed files with 1363 additions and 2271 deletions
--- a/tk_annotation_gui.py
+++ b/tk_annotation_gui.py
@ -0,0 +1,645 @@
+#!/usr/bin/env python3
+"""Tkinter-based annotation GUI.
+
+This is a standalone GUI for manual bounding-box annotation that writes
+`annotations.json` in the same format used by the project:
+
+{
+  "image.jpg": [
+    {"bbox": [x1, y1, x2, y2], "label": "knot", "confidence": 1.0, "source": "manual"},
+    ...
+  ],
+  ...
+}
+
+This project uses the Tkinter GUI as the annotation interface.
+
+Run:
+  python tk_annotation_gui.py
+
+Optional:
+  python tk_annotation_gui.py --images-dir IMAGE/
+
+Controls:
+- Click-drag on the image to create a box
+- Double-click a box entry to delete it
+- Prev/Next to navigate
+
+Notes:
+- Boxes are stored in ORIGINAL image pixel coordinates.
+- The displayed image is scaled to fit the canvas; coordinates are converted.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+import tkinter as tk
+from tkinter import ttk
+
+from PIL import Image, ImageTk
+
+# Defaults
+DEFAULT_IMAGES_DIR = "IMAGE/"
+DEFAULT_MODEL_WEIGHTS = ""
+ANNOTATION_CATEGORIES = ["knot"]
+DEFAULT_DETECTION_THRESHOLD = 0.5
+
+try:
+    import config as _cfg
+
+    DEFAULT_IMAGES_DIR = getattr(_cfg, "DEFAULT_IMAGES_DIR", DEFAULT_IMAGES_DIR)
+    DEFAULT_MODEL_WEIGHTS = getattr(_cfg, "DEFAULT_MODEL_WEIGHTS", DEFAULT_MODEL_WEIGHTS)
+    ANNOTATION_CATEGORIES = getattr(_cfg, "ANNOTATION_CATEGORIES", ANNOTATION_CATEGORIES)
+    DEFAULT_DETECTION_THRESHOLD = float(getattr(_cfg, "DEFAULT_DETECTION_THRESHOLD", DEFAULT_DETECTION_THRESHOLD))
+except Exception:
+    pass
+
+
+@dataclass
+class DisplayTransform:
+    scale: float
+    offset_x: float
+    offset_y: float
+
+
+class TkAnnotationApp:
+    def __init__(self, root: tk.Tk, images_dir: Path):
+        self.root = root
+        self.root.title("Wood Knot Annotation Tool (Tkinter)")
+
+        self.images_dir = images_dir
+        self.image_paths: list[Path] = []
+        self.current_idx: int = 0
+
+        self.ann_file: Path = self.images_dir / "annotations.json"
+        self.annotations: dict[str, list[dict[str, Any]]] = {}
+
+        self.current_image: Image.Image | None = None
+        self.current_image_path: Path | None = None
+        self.current_photo: ImageTk.PhotoImage | None = None
+
+        self.transform: DisplayTransform | None = None
+
+        self._draw_start: tuple[float, float] | None = None
+        self._preview_rect_id: int | None = None
+
+        self.model: Any | None = None
+        self.model_type: str | None = None  # rf-detr | rt-detr | yolov6 | yolox
+        self.model_path: Path | None = None
+
+        self.model_path_var = tk.StringVar(value=str(DEFAULT_MODEL_WEIGHTS) if DEFAULT_MODEL_WEIGHTS else "")
+        self.model_type_var = tk.StringVar(value="auto")
+        self.threshold_var = tk.DoubleVar(value=float(DEFAULT_DETECTION_THRESHOLD))
+
+        self.label_var = tk.StringVar(value=(ANNOTATION_CATEGORIES[0] if ANNOTATION_CATEGORIES else "knot"))
+
+        self._build_ui()
+        self._load_images_dir(self.images_dir)
+
+    # ------------------------- UI -------------------------
+
+    def _build_ui(self) -> None:
+        container = ttk.Frame(self.root, padding=8)
+        container.grid(row=0, column=0, sticky="nsew")
+
+        self.root.rowconfigure(0, weight=1)
+        self.root.columnconfigure(0, weight=1)
+
+        # Top controls
+        top = ttk.Frame(container)
+        top.grid(row=0, column=0, columnspan=2, sticky="ew", pady=(0, 8))
+        top.columnconfigure(1, weight=1)
+
+        ttk.Label(top, text="Images dir:").grid(row=0, column=0, sticky="w")
+        self.images_dir_var = tk.StringVar(value=str(self.images_dir))
+        self.images_dir_entry = ttk.Entry(top, textvariable=self.images_dir_var)
+        self.images_dir_entry.grid(row=0, column=1, sticky="ew", padx=6)
+        ttk.Button(top, text="Load", command=self._on_load_dir).grid(row=0, column=2, sticky="ew")
+
+        self.index_label = ttk.Label(top, text="Image: -/-")
+        self.index_label.grid(row=0, column=3, sticky="e", padx=(10, 0))
+
+        ttk.Separator(container, orient="horizontal").grid(row=1, column=0, columnspan=2, sticky="ew", pady=(0, 8))
+
+        # Left: Canvas
+        left = ttk.Frame(container)
+        left.grid(row=2, column=0, sticky="nsew", padx=(0, 8))
+        container.rowconfigure(2, weight=1)
+        container.columnconfigure(0, weight=3)
+
+        nav = ttk.Frame(left)
+        nav.grid(row=0, column=0, sticky="ew", pady=(0, 6))
+        nav.columnconfigure(2, weight=1)
+
+        ttk.Button(nav, text="Prev", command=self.prev_image).grid(row=0, column=0, sticky="w")
+        ttk.Button(nav, text="Next", command=self.next_image).grid(row=0, column=1, sticky="w", padx=(6, 0))
+        self.status_label = ttk.Label(nav, text="", foreground="#444")
+        self.status_label.grid(row=0, column=2, sticky="w", padx=(10, 0))
+
+        self.canvas = tk.Canvas(left, width=1200, height=800, bg="#111", highlightthickness=0)
+        self.canvas.grid(row=1, column=0, sticky="nsew")
+        left.rowconfigure(1, weight=1)
+        left.columnconfigure(0, weight=1)
+
+        self.canvas.bind("<ButtonPress-1>", self._on_mouse_down)
+        self.canvas.bind("<B1-Motion>", self._on_mouse_move)
+        self.canvas.bind("<ButtonRelease-1>", self._on_mouse_up)
+
+        # Right: boxes list + controls
+        right = ttk.Frame(container)
+        right.grid(row=2, column=1, sticky="nsew")
+        container.columnconfigure(1, weight=1)
+
+        # Model controls
+        model_frame = ttk.LabelFrame(right, text="Auto-Label", padding=8)
+        model_frame.grid(row=0, column=0, columnspan=2, sticky="ew")
+        model_frame.columnconfigure(1, weight=1)
+
+        ttk.Label(model_frame, text="Weights:").grid(row=0, column=0, sticky="w")
+        self.model_entry = ttk.Entry(model_frame, textvariable=self.model_path_var)
+        self.model_entry.grid(row=0, column=1, sticky="ew", padx=(6, 0))
+
+        ttk.Label(model_frame, text="Type:").grid(row=1, column=0, sticky="w", pady=(6, 0))
+        self.model_type_menu = ttk.OptionMenu(
+            model_frame,
+            self.model_type_var,
+            self.model_type_var.get(),
+            "auto",
+            "rf-detr",
+            "rt-detr",
+            "yolov6",
+            "yolox",
+        )
+        self.model_type_menu.grid(row=1, column=1, sticky="ew", padx=(6, 0), pady=(6, 0))
+
+        ttk.Label(model_frame, text="Threshold:").grid(row=2, column=0, sticky="w", pady=(6, 0))
+        self.threshold_scale = ttk.Scale(model_frame, from_=0.05, to=0.95, variable=self.threshold_var)
+        self.threshold_scale.grid(row=2, column=1, sticky="ew", padx=(6, 0), pady=(6, 0))
+
+        model_buttons = ttk.Frame(model_frame)
+        model_buttons.grid(row=3, column=0, columnspan=2, sticky="ew", pady=(8, 0))
+        model_buttons.columnconfigure(0, weight=1)
+        model_buttons.columnconfigure(1, weight=1)
+        ttk.Button(model_buttons, text="Load Model", command=self.load_model).grid(row=0, column=0, sticky="ew")
+        ttk.Button(model_buttons, text="Auto-Label Current", command=self.auto_label_current).grid(row=0, column=1, sticky="ew", padx=(6, 0))
+
+        self.model_status = ttk.Label(model_frame, text="No model loaded")
+        self.model_status.grid(row=4, column=0, columnspan=2, sticky="w", pady=(6, 0))
+
+        ttk.Label(right, text="Label:").grid(row=1, column=0, sticky="w", pady=(10, 0))
+        self.label_menu = ttk.OptionMenu(right, self.label_var, self.label_var.get(), *ANNOTATION_CATEGORIES)
+        self.label_menu.grid(row=1, column=1, sticky="ew", padx=(6, 0), pady=(10, 0))
+        right.columnconfigure(1, weight=1)
+
+        ttk.Label(right, text="Annotations:").grid(row=2, column=0, columnspan=2, sticky="w", pady=(10, 4))
+
+        self.box_list = tk.Listbox(right, height=18)
+        self.box_list.grid(row=3, column=0, columnspan=2, sticky="nsew")
+        right.rowconfigure(3, weight=1)
+
+        self.box_list.bind("<Double-Button-1>", self._on_box_double_click)
+
+        buttons = ttk.Frame(right)
+        buttons.grid(row=4, column=0, columnspan=2, sticky="ew", pady=(6, 0))
+        ttk.Button(buttons, text="Delete Selected", command=self.delete_selected_box).grid(row=0, column=0, sticky="ew")
+        ttk.Button(buttons, text="Clear All", command=self.clear_all_boxes).grid(row=0, column=1, sticky="ew", padx=(6, 0))
+
+        # Make buttons frame expand reasonably
+        buttons.columnconfigure(0, weight=1)
+        buttons.columnconfigure(1, weight=1)
+
+    # ------------------------- Model loading / auto-label -------------------------
+
+    def _guess_model_type_from_path(self, path: Path) -> str:
+        s = str(path).lower()
+        if "rf" in s or "checkpoint" in s or s.endswith(".pth"):
+            return "rf-detr"
+        if "rtdetr" in s or "rt-detr" in s:
+            return "rt-detr"
+        if "yolov6" in s:
+            return "yolov6"
+        if "yolox" in s:
+            return "yolox"
+        # Default to ultralytics RT-DETR if ambiguous
+        return "rt-detr"
+
+    def load_model(self) -> None:
+        raw = self.model_path_var.get().strip()
+        if not raw:
+            self._set_model_status("No weights path provided")
+            return
+
+        weights_path = Path(raw).expanduser()
+        if not weights_path.exists():
+            self._set_model_status(f"File not found: {weights_path}")
+            return
+
+        selected = (self.model_type_var.get() or "auto").strip().lower()
+        model_type = self._guess_model_type_from_path(weights_path) if selected == "auto" else selected
+
+        try:
+            # RF-DETR optional
+            if model_type == "rf-detr":
+                from rfdetr import RFDETRNano
+
+                self.model = RFDETRNano(pretrain_weights=str(weights_path))
+            else:
+                if model_type == "rt-detr":
+                    from ultralytics import RTDETR
+
+                    self.model = RTDETR(str(weights_path))
+                else:
+                    from ultralytics import YOLO
+
+                    self.model = YOLO(str(weights_path))
+
+            self.model_type = model_type
+            self.model_path = weights_path
+            self._set_model_status(f"Loaded: {weights_path.name} ({model_type})")
+        except Exception as e:
+            self.model = None
+            self.model_type = None
+            self.model_path = None
+            self._set_model_status(f"Load failed: {e}")
+
+    def auto_label_current(self) -> None:
+        if self.current_image_path is None:
+            return
+        if self.model is None or self.model_type is None:
+            self._set_model_status("No model loaded")
+            return
+
+        threshold = float(self.threshold_var.get())
+        img_path = self.current_image_path
+
+        try:
+            new_boxes: list[dict[str, Any]] = []
+
+            if self.model_type == "rf-detr":
+                # RF-DETR model expects PIL image
+                if self.current_image is None:
+                    return
+                detections = self.model.predict(self.current_image, threshold=threshold)
+                for i in range(len(detections)):
+                    xyxy = detections.xyxy[i]
+                    conf = float(detections.confidence[i]) if detections.confidence is not None else 1.0
+                    x1, y1, x2, y2 = xyxy
+                    new_boxes.append(
+                        {
+                            "bbox": [float(x1), float(y1), float(x2), float(y2)],
+                            "label": "knot",
+                            "confidence": conf,
+                            "source": "auto",
+                        }
+                    )
+            else:
+                # Ultralytics models
+                results = self.model.predict(source=str(img_path), conf=threshold, save=False, verbose=False)
+                for result in results:
+                    for box in result.boxes:
+                        x1, y1, x2, y2 = box.xyxy[0].tolist()
+                        conf = float(box.conf[0])
+                        label = "knot"
+                        try:
+                            cls = int(box.cls[0])
+                            if hasattr(self.model, "names") and cls in self.model.names:
+                                label = str(self.model.names[cls])
+                        except Exception:
+                            pass
+
+                        new_boxes.append(
+                            {
+                                "bbox": [float(x1), float(y1), float(x2), float(y2)],
+                                "label": label,
+                                "confidence": conf,
+                                "source": "auto",
+                            }
+                        )
+
+            # Match legacy behavior: append auto boxes to existing
+            key = img_path.name
+            self.annotations.setdefault(key, [])
+            self.annotations[key].extend(new_boxes)
+            self._save_annotations()
+
+            self._refresh_box_list()
+            self._redraw_boxes()
+            self._set_model_status(f"Auto-labeled: {len(new_boxes)}")
+        except Exception as e:
+            self._set_model_status(f"Auto-label failed: {e}")
+
+    def _set_model_status(self, msg: str) -> None:
+        self.model_status.config(text=msg)
+
+    # ------------------------- Data load/save -------------------------
+
+    def _load_images_dir(self, images_dir: Path) -> None:
+        images_dir = images_dir.expanduser().resolve()
+        if not images_dir.exists() or not images_dir.is_dir():
+            self._set_status(f"Invalid images dir: {images_dir}")
+            return
+
+        self.images_dir = images_dir
+        self.ann_file = self.images_dir / "annotations.json"
+
+        self.image_paths = sorted(list(images_dir.glob("*.jpg")) + list(images_dir.glob("*.png")) + list(images_dir.glob("*.jpeg")))
+        self.current_idx = 0
+
+        # Load annotations (if present)
+        self.annotations = {}
+        if self.ann_file.exists():
+            try:
+                with self.ann_file.open("r") as f:
+                    data = json.load(f)
+                if isinstance(data, dict):
+                    self.annotations = data
+            except Exception as e:
+                self._set_status(f"Failed to load annotations.json: {e}")
+
+        if not self.image_paths:
+            self._set_status("No images found")
+            self._clear_canvas()
+            self._update_index_label()
+            self._refresh_box_list()
+            return
+
+        self._set_status("")
+        self.load_current_image()
+
+    def _save_annotations(self) -> None:
+        # Ensure we always have an entry for current image
+        if self.current_image_path is not None:
+            key = self.current_image_path.name
+            self.annotations.setdefault(key, [])
+
+        try:
+            with self.ann_file.open("w") as f:
+                json.dump(self.annotations, f, indent=2)
+        except Exception as e:
+            self._set_status(f"Failed to save annotations: {e}")
+
+    # ------------------------- Navigation -------------------------
+
+    def prev_image(self) -> None:
+        if not self.image_paths:
+            return
+        self.current_idx = max(0, self.current_idx - 1)
+        self.load_current_image()
+
+    def next_image(self) -> None:
+        if not self.image_paths:
+            return
+        self.current_idx = min(len(self.image_paths) - 1, self.current_idx + 1)
+        self.load_current_image()
+
+    def load_current_image(self) -> None:
+        if not self.image_paths:
+            return
+
+        self.current_image_path = self.image_paths[self.current_idx]
+
+        try:
+            img = Image.open(self.current_image_path).convert("RGB")
+        except Exception as e:
+            self._set_status(f"Failed to open image: {e}")
+            return
+
+        self.current_image = img
+        self._update_index_label()
+
+        # Ensure annotation list exists
+        self.annotations.setdefault(self.current_image_path.name, [])
+
+        self._render_image_and_boxes()
+        self._refresh_box_list()
+
+    def _update_index_label(self) -> None:
+        total = len(self.image_paths)
+        if total == 0:
+            self.index_label.config(text="Image: -/-")
+            return
+        filename = self.image_paths[self.current_idx].name
+        self.index_label.config(text=f"Image {self.current_idx + 1}/{total}: {filename}")
+
+    # ------------------------- Canvas rendering -------------------------
+
+    def _clear_canvas(self) -> None:
+        self.canvas.delete("all")
+        self.current_photo = None
+        self.transform = None
+
+    def _render_image_and_boxes(self) -> None:
+        self._clear_canvas()
+        if self.current_image is None:
+            return
+
+        canvas_w = int(self.canvas.winfo_width())
+        canvas_h = int(self.canvas.winfo_height())
+        # If not yet realized, fall back to configured size
+        if canvas_w <= 2:
+            canvas_w = int(self.canvas["width"])
+        if canvas_h <= 2:
+            canvas_h = int(self.canvas["height"])
+
+        orig_w, orig_h = self.current_image.size
+        scale = min(canvas_w / orig_w, canvas_h / orig_h)
+        scale = max(scale, 1e-6)
+
+        disp_w = int(orig_w * scale)
+        disp_h = int(orig_h * scale)
+
+        offset_x = (canvas_w - disp_w) / 2
+        offset_y = (canvas_h - disp_h) / 2
+
+        disp_img = self.current_image.resize((disp_w, disp_h), Image.Resampling.BILINEAR)
+        self.current_photo = ImageTk.PhotoImage(disp_img)
+
+        # Draw image
+        self.canvas.create_image(offset_x, offset_y, anchor="nw", image=self.current_photo)
+        self.transform = DisplayTransform(scale=scale, offset_x=offset_x, offset_y=offset_y)
+
+        # Draw boxes
+        self._redraw_boxes()
+
+    def _redraw_boxes(self) -> None:
+        self.canvas.delete("box")
+        if self.current_image_path is None or self.transform is None:
+            return
+
+        boxes = self.annotations.get(self.current_image_path.name, []) or []
+        for i, box in enumerate(boxes):
+            bbox = box.get("bbox") if isinstance(box, dict) else None
+            if not bbox or len(bbox) != 4:
+                continue
+            x1, y1, x2, y2 = bbox
+            dx1, dy1 = self._img_to_disp(x1, y1)
+            dx2, dy2 = self._img_to_disp(x2, y2)
+            self.canvas.create_rectangle(dx1, dy1, dx2, dy2, outline="#00FF66", width=2, tags=("box", f"box_{i}"))
+
+    def _img_to_disp(self, x: float, y: float) -> tuple[float, float]:
+        assert self.transform is not None
+        return (x * self.transform.scale + self.transform.offset_x, y * self.transform.scale + self.transform.offset_y)
+
+    def _disp_to_img(self, x: float, y: float) -> tuple[float, float]:
+        assert self.transform is not None
+        ix = (x - self.transform.offset_x) / self.transform.scale
+        iy = (y - self.transform.offset_y) / self.transform.scale
+        if self.current_image is None:
+            return ix, iy
+        w, h = self.current_image.size
+        ix = min(max(ix, 0.0), float(w))
+        iy = min(max(iy, 0.0), float(h))
+        return ix, iy
+
+    # ------------------------- Mouse interactions -------------------------
+
+    def _on_mouse_down(self, event: tk.Event) -> None:
+        if self.current_image is None or self.current_image_path is None or self.transform is None:
+            return
+        self._draw_start = (event.x, event.y)
+        if self._preview_rect_id is not None:
+            self.canvas.delete(self._preview_rect_id)
+            self._preview_rect_id = None
+
+    def _on_mouse_move(self, event: tk.Event) -> None:
+        if self._draw_start is None or self.current_image is None or self.transform is None:
+            return
+
+        x0, y0 = self._draw_start
+        x1, y1 = event.x, event.y
+
+        if self._preview_rect_id is not None:
+            self.canvas.delete(self._preview_rect_id)
+
+        self._preview_rect_id = self.canvas.create_rectangle(
+            x0, y0, x1, y1, outline="#FFCC00", width=2, dash=(4, 2)
+        )
+
+    def _on_mouse_up(self, event: tk.Event) -> None:
+        if self._draw_start is None or self.current_image is None or self.current_image_path is None or self.transform is None:
+            self._draw_start = None
+            return
+
+        x0, y0 = self._draw_start
+        x1, y1 = event.x, event.y
+        self._draw_start = None
+
+        if self._preview_rect_id is not None:
+            self.canvas.delete(self._preview_rect_id)
+            self._preview_rect_id = None
+
+        # Convert to image coords
+        ix0, iy0 = self._disp_to_img(x0, y0)
+        ix1, iy1 = self._disp_to_img(x1, y1)
+
+        x_min, x_max = sorted([ix0, ix1])
+        y_min, y_max = sorted([iy0, iy1])
+
+        # Ignore tiny drags
+        if (x_max - x_min) < 2 or (y_max - y_min) < 2:
+            return
+
+        new_box = {
+            "bbox": [float(x_min), float(y_min), float(x_max), float(y_max)],
+            "label": self.label_var.get() or "knot",
+            "confidence": 1.0,
+            "source": "manual",
+        }
+
+        boxes = self.annotations.setdefault(self.current_image_path.name, [])
+        boxes.append(new_box)
+        self._save_annotations()
+
+        self._refresh_box_list()
+        self._redraw_boxes()
+
+    # ------------------------- Box list actions -------------------------
+
+    def _refresh_box_list(self) -> None:
+        self.box_list.delete(0, tk.END)
+        if self.current_image_path is None:
+            return
+        boxes = self.annotations.get(self.current_image_path.name, []) or []
+        for idx, box in enumerate(boxes):
+            if not isinstance(box, dict) or "bbox" not in box:
+                continue
+            x1, y1, x2, y2 = box["bbox"]
+            label = str(box.get("label", "knot"))
+            src = str(box.get("source", "manual"))
+            conf = box.get("confidence", 1.0)
+            self.box_list.insert(
+                tk.END,
+                f"[x] {idx}: {label} ({src}, {conf:.3f})  ({x1:.1f},{y1:.1f})-({x2:.1f},{y2:.1f})",
+            )
+
+    def _selected_box_index(self) -> int | None:
+        sel = self.box_list.curselection()
+        if not sel:
+            return None
+        # Listbox index corresponds to displayed entries, which correspond to boxes in order
+        return int(sel[0])
+
+    def delete_selected_box(self) -> None:
+        if self.current_image_path is None:
+            return
+        idx = self._selected_box_index()
+        if idx is None:
+            return
+
+        boxes = self.annotations.get(self.current_image_path.name, []) or []
+        if 0 <= idx < len(boxes):
+            del boxes[idx]
+            self._save_annotations()
+            self._refresh_box_list()
+            self._redraw_boxes()
+
+    def _on_box_double_click(self, _event: tk.Event) -> None:
+        self.delete_selected_box()
+
+    def clear_all_boxes(self) -> None:
+        if self.current_image_path is None:
+            return
+        self.annotations[self.current_image_path.name] = []
+        self._save_annotations()
+        self._refresh_box_list()
+        self._redraw_boxes()
+
+    # ------------------------- Misc -------------------------
+
+    def _set_status(self, msg: str) -> None:
+        self.status_label.config(text=msg)
+
+    def _on_load_dir(self) -> None:
+        path = Path(self.images_dir_var.get().strip())
+        self._load_images_dir(path)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Tkinter annotation GUI")
+    parser.add_argument(
+        "--images-dir",
+        type=Path,
+        default=Path(DEFAULT_IMAGES_DIR) if DEFAULT_IMAGES_DIR else Path("IMAGE/"),
+        help="Directory containing images and annotations.json",
+    )
+    args = parser.parse_args()
+
+    root = tk.Tk()
+    app = TkAnnotationApp(root, args.images_dir)
+
+    # Re-render on first layout so scaling is correct
+    def after_layout() -> None:
+        if app.current_image is not None:
+            app._render_image_and_boxes()
+
+    root.after(50, after_layout)
+    root.mainloop()
+
+
+if __name__ == "__main__":
+    main()