Files
TalkEdit/backend/services/background_removal.py
2026-05-05 23:31:18 -06:00

233 lines
7.5 KiB
Python

"""
AI background removal using MediaPipe for person segmentation.
Applied during export as a post-processing step — no real-time preview.
"""
import logging
import subprocess
import tempfile
import os
from pathlib import Path
logger = logging.getLogger(__name__)
MEDIAPIPE_AVAILABLE = False
try:
import mediapipe as mp
MEDIAPIPE_AVAILABLE = True
except ImportError:
pass
def is_available() -> bool:
return MEDIAPIPE_AVAILABLE
def remove_background_on_export(
input_path: str,
output_path: str,
replacement: str = "blur",
replacement_value: str = "",
) -> str:
"""
Process video frame-by-frame using FFmpeg chromakey fallback,
or MediaPipe-based segmentation if available.
Args:
input_path: source video
output_path: destination
replacement: 'blur', 'color', or 'image'
replacement_value: hex color or image path (for color/image modes)
Returns:
output_path
"""
input_path = str(Path(input_path).resolve())
output_path = str(Path(output_path).resolve())
if MEDIAPIPE_AVAILABLE:
return _remove_with_mediapipe(input_path, output_path, replacement, replacement_value)
else:
return _remove_with_ffmpeg_portrait(input_path, output_path, replacement, replacement_value)
def _remove_with_mediapipe(
input_path: str,
output_path: str,
replacement: str = "blur",
replacement_value: str = "",
) -> str:
"""Use MediaPipe Selfie Segmentation + FFmpeg for background removal.
Extracts frames, applies segmentation, composites replacement background.
"""
try:
import cv2
import numpy as np
import mediapipe as mp
mp_selfie_segmentation = mp.solutions.selfie_segmentation
# Determine background color/image
if replacement == "color":
color_hex = replacement_value or "#00FF00"
color_hex = color_hex.lstrip("#")
bg_color = tuple(int(color_hex[i:i+2], 16) for i in (0, 2, 4))
bg_color = bg_color[::-1] # RGB -> BGR
elif replacement == "image":
bg_image = cv2.imread(replacement_value) if replacement_value else None
if bg_image is None:
bg_color = (0, 255, 0)
bg_image = None
else:
# Blur background (default)
bg_color = None
# Open video
cap = cv2.VideoCapture(input_path)
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
# Temp directory for processed frames
temp_dir = tempfile.mkdtemp(prefix="aive_bgrem_")
frame_dir = os.path.join(temp_dir, "frames")
os.makedirs(frame_dir, exist_ok=True)
with mp_selfie_segmentation.SelfieSegmentation(model_selection=0) as segmenter:
frame_idx = 0
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
# Convert to RGB for MediaPipe
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
result = segmenter.process(rgb)
mask = result.segmentation_mask
# Threshold the mask
condition = mask > 0.5
if replacement == "blur":
# Apply strong blur to background
blurred = cv2.GaussianBlur(frame, (99, 99), 0)
output_frame = np.where(condition[..., None], frame, blurred)
elif replacement == "color":
bg = np.full(frame.shape, bg_color, dtype=np.uint8)
output_frame = np.where(condition[..., None], frame, bg)
elif replacement == "image" and bg_image is not None:
bg_resized = cv2.resize(bg_image, (width, height))
output_frame = np.where(condition[..., None], frame, bg_resized)
else:
output_frame = frame
out_path = os.path.join(frame_dir, f"frame_{frame_idx:06d}.png")
cv2.imwrite(out_path, output_frame)
frame_idx += 1
if frame_idx % 100 == 0:
logger.info("Background removal: %d/%d frames", frame_idx, total_frames)
cap.release()
# Encode frames back to video using FFmpeg
import subprocess as _sp
ffmpeg = "ffmpeg"
cmd = [
ffmpeg, "-y",
"-framerate", str(fps),
"-i", os.path.join(frame_dir, "frame_%06d.png"),
"-i", input_path,
"-map", "0:v:0",
"-map", "1:a:0?",
"-c:v", "libx264", "-preset", "medium", "-crf", "18",
"-c:a", "aac", "-b:a", "192k",
"-shortest",
"-pix_fmt", "yuv420p",
output_path,
]
result = _sp.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"FFmpeg frame encode failed: {result.stderr[-500:]}")
# Cleanup
for f in os.listdir(frame_dir):
try:
os.remove(os.path.join(frame_dir, f))
except OSError:
pass
try:
os.rmdir(frame_dir)
os.rmdir(temp_dir)
except OSError:
pass
logger.info("MediaPipe background removal completed -> %s", output_path)
return output_path
except ImportError:
logger.warning("mediapipe/cv2 not available, falling back to FFmpeg portrait mode")
return _remove_with_ffmpeg_portrait(input_path, output_path, replacement, replacement_value)
except Exception as e:
raise RuntimeError(f"MediaPipe background removal failed: {e}")
def _remove_with_ffmpeg_portrait(
input_path: str,
output_path: str,
replacement: str = "blur",
replacement_value: str = "",
) -> str:
"""Fallback: basic FFmpeg-only background blur.
Uses a strong gaussian blur as a crude background replacement.
For proper person segmentation (color/image replacement), install:
pip install mediapipe opencv-python
"""
ffmpeg = "ffmpeg"
if replacement == "blur":
filter_complex = "gblur=sigma=30"
elif replacement == "color":
color = replacement_value or "00FF00"
filter_complex = (
f"split[fg][bg];"
f"[bg]colorkey=0x{color}:0.3:0.1[bg_key];"
f"[fg][bg_key]overlay"
)
elif replacement == "image" and replacement_value:
escaped = replacement_value.replace("\\", "/").replace(":", "\\:")
filter_complex = (
f"movie='{escaped}':loop=0,scale=iw:ih[bg];"
f"[0:v][bg]overlay=0:0:shortest=1"
)
else:
filter_complex = "null"
if filter_complex == "null":
cmd = [ffmpeg, "-y", "-i", input_path, "-c", "copy", output_path]
else:
cmd = [
ffmpeg, "-y",
"-i", input_path,
"-vf", filter_complex,
"-c:v", "libx264", "-preset", "medium", "-crf", "18",
"-c:a", "aac", "-b:a", "192k",
"-movflags", "+faststart",
output_path,
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"FFmpeg background removal failed: {result.stderr[-500:]}")
logger.warning(
"FFmpeg fallback background removal used (no MediaPipe). "
"Install 'mediapipe' and 'opencv-python' for proper person segmentation."
)
return output_path