TalkEdit/backend/services/background_removal.py

"""
AI background removal using MediaPipe for person segmentation.
Applied during export as a post-processing step — no real-time preview.
"""

import logging
import subprocess
import tempfile
import os
from pathlib import Path

logger = logging.getLogger(__name__)

MEDIAPIPE_AVAILABLE = False

try:
    import mediapipe as mp
    MEDIAPIPE_AVAILABLE = True
except ImportError:
    pass


def is_available() -> bool:
    return MEDIAPIPE_AVAILABLE


def remove_background_on_export(
    input_path: str,
    output_path: str,
    replacement: str = "blur",
    replacement_value: str = "",
) -> str:
    """
    Process video frame-by-frame using FFmpeg chromakey fallback,
    or MediaPipe-based segmentation if available.

    Args:
        input_path: source video
        output_path: destination
        replacement: 'blur', 'color', or 'image'
        replacement_value: hex color or image path (for color/image modes)

    Returns:
        output_path
    """
    input_path = str(Path(input_path).resolve())
    output_path = str(Path(output_path).resolve())

    if MEDIAPIPE_AVAILABLE:
        return _remove_with_mediapipe(input_path, output_path, replacement, replacement_value)
    else:
        return _remove_with_ffmpeg_portrait(input_path, output_path, replacement, replacement_value)


def _remove_with_mediapipe(
    input_path: str,
    output_path: str,
    replacement: str = "blur",
    replacement_value: str = "",
) -> str:
    """Use MediaPipe Selfie Segmentation + FFmpeg for background removal.

    Extracts frames, applies segmentation, composites replacement background.
    """
    try:
        import cv2
        import numpy as np
        import mediapipe as mp

        mp_selfie_segmentation = mp.solutions.selfie_segmentation

        # Determine background color/image
        if replacement == "color":
            color_hex = replacement_value or "#00FF00"
            color_hex = color_hex.lstrip("#")
            bg_color = tuple(int(color_hex[i:i+2], 16) for i in (0, 2, 4))
            bg_color = bg_color[::-1]  # RGB -> BGR
        elif replacement == "image":
            bg_image = cv2.imread(replacement_value) if replacement_value else None
            if bg_image is None:
                bg_color = (0, 255, 0)
                bg_image = None
        else:
            # Blur background (default)
            bg_color = None

        # Open video
        cap = cv2.VideoCapture(input_path)
        fps = cap.get(cv2.CAP_PROP_FPS)
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

        # Temp directory for processed frames
        temp_dir = tempfile.mkdtemp(prefix="aive_bgrem_")
        frame_dir = os.path.join(temp_dir, "frames")
        os.makedirs(frame_dir, exist_ok=True)

        with mp_selfie_segmentation.SelfieSegmentation(model_selection=0) as segmenter:
            frame_idx = 0
            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break

                # Convert to RGB for MediaPipe
                rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                result = segmenter.process(rgb)
                mask = result.segmentation_mask

                # Threshold the mask
                condition = mask > 0.5

                if replacement == "blur":
                    # Apply strong blur to background
                    blurred = cv2.GaussianBlur(frame, (99, 99), 0)
                    output_frame = np.where(condition[..., None], frame, blurred)
                elif replacement == "color":
                    bg = np.full(frame.shape, bg_color, dtype=np.uint8)
                    output_frame = np.where(condition[..., None], frame, bg)
                elif replacement == "image" and bg_image is not None:
                    bg_resized = cv2.resize(bg_image, (width, height))
                    output_frame = np.where(condition[..., None], frame, bg_resized)
                else:
                    output_frame = frame

                out_path = os.path.join(frame_dir, f"frame_{frame_idx:06d}.png")
                cv2.imwrite(out_path, output_frame)
                frame_idx += 1

                if frame_idx % 100 == 0:
                    logger.info("Background removal: %d/%d frames", frame_idx, total_frames)

        cap.release()

        # Encode frames back to video using FFmpeg
        import subprocess as _sp
        ffmpeg = "ffmpeg"
        cmd = [
            ffmpeg, "-y",
            "-framerate", str(fps),
            "-i", os.path.join(frame_dir, "frame_%06d.png"),
            "-i", input_path,
            "-map", "0:v:0",
            "-map", "1:a:0?",
            "-c:v", "libx264", "-preset", "medium", "-crf", "18",
            "-c:a", "aac", "-b:a", "192k",
            "-shortest",
            "-pix_fmt", "yuv420p",
            output_path,
        ]
        result = _sp.run(cmd, capture_output=True, text=True)
        if result.returncode != 0:
            raise RuntimeError(f"FFmpeg frame encode failed: {result.stderr[-500:]}")

        # Cleanup
        for f in os.listdir(frame_dir):
            try:
                os.remove(os.path.join(frame_dir, f))
            except OSError:
                pass
        try:
            os.rmdir(frame_dir)
            os.rmdir(temp_dir)
        except OSError:
            pass

        logger.info("MediaPipe background removal completed -> %s", output_path)
        return output_path

    except ImportError:
        logger.warning("mediapipe/cv2 not available, falling back to FFmpeg portrait mode")
        return _remove_with_ffmpeg_portrait(input_path, output_path, replacement, replacement_value)
    except Exception as e:
        raise RuntimeError(f"MediaPipe background removal failed: {e}")


def _remove_with_ffmpeg_portrait(
    input_path: str,
    output_path: str,
    replacement: str = "blur",
    replacement_value: str = "",
) -> str:
    """Fallback: basic FFmpeg-only background blur.

    Uses a strong gaussian blur as a crude background replacement.
    For proper person segmentation (color/image replacement), install:
      pip install mediapipe opencv-python
    """
    ffmpeg = "ffmpeg"

    if replacement == "blur":
        filter_complex = "gblur=sigma=30"
    elif replacement == "color":
        color = replacement_value or "00FF00"
        filter_complex = (
            f"split[fg][bg];"
            f"[bg]colorkey=0x{color}:0.3:0.1[bg_key];"
            f"[fg][bg_key]overlay"
        )
    elif replacement == "image" and replacement_value:
        escaped = replacement_value.replace("\\", "/").replace(":", "\\:")
        filter_complex = (
            f"movie='{escaped}':loop=0,scale=iw:ih[bg];"
            f"[0:v][bg]overlay=0:0:shortest=1"
        )
    else:
        filter_complex = "null"

    if filter_complex == "null":
        cmd = [ffmpeg, "-y", "-i", input_path, "-c", "copy", output_path]
    else:
        cmd = [
            ffmpeg, "-y",
            "-i", input_path,
            "-vf", filter_complex,
            "-c:v", "libx264", "-preset", "medium", "-crf", "18",
            "-c:a", "aac", "-b:a", "192k",
            "-movflags", "+faststart",
            output_path,
        ]

    result = subprocess.run(cmd, capture_output=True, text=True)
    if result.returncode != 0:
        raise RuntimeError(f"FFmpeg background removal failed: {result.stderr[-500:]}")

    logger.warning(
        "FFmpeg fallback background removal used (no MediaPipe). "
        "Install 'mediapipe' and 'opencv-python' for proper person segmentation."
    )
    return output_path