implemented the lower priority features; haven't tested them
This commit is contained in:
@ -34,11 +34,11 @@ Features are grouped by priority. Check off items as they are implemented.
|
||||
|
||||
## 🟢 Lower Impact — Expansion and advanced scope
|
||||
|
||||
- [ ] [#020] **Video zoom / punch-in** — scale and position the video (crop, zoom, pan). Used constantly on talking-head videos for emphasis. Backend: `ffmpeg -vf crop/scale/zoompan`.
|
||||
- [x] [#020] **Video zoom / punch-in** — scale and position the video (crop, zoom, pan). Used constantly on talking-head videos for emphasis. Backend: FFmpeg crop/scale post-process. Frontend: sliders in Export dialog. (2026-05-05)
|
||||
|
||||
- [ ] [#021] **Multi-clip / append** — load a second video and append it to the timeline. Even without a full multi-track timeline, "append clip" is a heavily used workflow.
|
||||
- [x] [#021] **Multi-clip / append** — load additional video clips via Append Clip panel and concatenate during export. Uses FFmpeg concat demuxer. (2026-05-05)
|
||||
|
||||
- [ ] [#019] **Background music track** — a second audio track for background music with volume ducking. Major gap in Descript that TalkEdit could own. Backend: `ffmpeg` amix + `asendcmd` for auto-ducking.
|
||||
- [x] [#019] **Background music track** — a second audio track for background music with volume ducking. Uses FFmpeg amix + sidechaincompress for auto-ducking. Configurable in Background Music panel. (2026-05-05)
|
||||
|
||||
- [ ] [#014] **Optional VibeVoice-ASR-HF transcription backend (future)** — evaluate as an alternate transcription mode for long-form, speaker-attributed transcripts. Keep WhisperX as the default for word-level timestamp editing.
|
||||
|
||||
@ -60,6 +60,8 @@ Features are grouped by priority. Check off items as they are implemented.
|
||||
|
||||
---
|
||||
|
||||
- [x] [#042] **Background removal** — MediaPipe Selfie Segmentation + FFmpeg frame processing for person/background separation. Configurable replacement: blur, solid color, or custom image. Applied during export. Falls back to FFmpeg colorkey when MediaPipe unavailable. (2026-05-05)
|
||||
|
||||
## 💡 TalkEdit competitive advantages to lean into
|
||||
|
||||
These aren't features to build — they're things to make more visible in the UI and README:
|
||||
|
||||
@ -8,9 +8,10 @@ from typing import List, Optional
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
||||
from services.video_editor import export_stream_copy, export_reencode, export_reencode_with_subs
|
||||
from services.video_editor import export_stream_copy, export_reencode, export_reencode_with_subs, mix_background_music, concat_clips
|
||||
from services.audio_cleaner import clean_audio
|
||||
from services.caption_generator import generate_srt, generate_ass, save_captions
|
||||
from services.background_removal import remove_background_on_export as remove_bg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter()
|
||||
@ -36,6 +37,22 @@ class ExportWordModel(BaseModel):
|
||||
confidence: float = 0.0
|
||||
|
||||
|
||||
class ZoomConfigModel(BaseModel):
|
||||
enabled: bool = False
|
||||
zoomFactor: float = 1.0
|
||||
panX: float = 0.0
|
||||
panY: float = 0.0
|
||||
|
||||
|
||||
class BackgroundMusicModel(BaseModel):
|
||||
path: str
|
||||
volumeDb: float = 0.0
|
||||
duckingEnabled: bool = False
|
||||
duckingDb: float = 6.0
|
||||
duckingAttackMs: float = 10.0
|
||||
duckingReleaseMs: float = 200.0
|
||||
|
||||
|
||||
class ExportRequest(BaseModel):
|
||||
input_path: str
|
||||
output_path: str
|
||||
@ -53,6 +70,12 @@ class ExportRequest(BaseModel):
|
||||
captions: str = "none"
|
||||
words: Optional[List[ExportWordModel]] = None
|
||||
deleted_indices: Optional[List[int]] = None
|
||||
zoom: Optional[ZoomConfigModel] = None
|
||||
additional_clips: Optional[List[str]] = None
|
||||
background_music: Optional[BackgroundMusicModel] = None
|
||||
remove_background: bool = False
|
||||
background_replacement: str = "blur"
|
||||
background_replacement_value: str = ""
|
||||
|
||||
|
||||
class TranscriptExportRequest(BaseModel):
|
||||
@ -130,6 +153,29 @@ async def export_video(req: ExportRequest):
|
||||
if not segments and not mute_segments:
|
||||
raise HTTPException(status_code=400, detail="No segments to export")
|
||||
|
||||
# Convert zoom config to dict
|
||||
zoom_dict = None
|
||||
if req.zoom and req.zoom.enabled:
|
||||
zoom_dict = {
|
||||
"enabled": True,
|
||||
"zoomFactor": req.zoom.zoomFactor,
|
||||
"panX": req.zoom.panX,
|
||||
"panY": req.zoom.panY,
|
||||
}
|
||||
|
||||
# Handle additional clips: pre-concat before main editing
|
||||
working_input = req.input_path
|
||||
has_additional = bool(req.additional_clips)
|
||||
if has_additional:
|
||||
try:
|
||||
concat_output = req.output_path + ".concat.mp4"
|
||||
concat_clips(req.input_path, req.additional_clips, concat_output)
|
||||
working_input = concat_output
|
||||
logger.info("Pre-concatenated %d additional clips into %s", len(req.additional_clips), concat_output)
|
||||
except Exception as e:
|
||||
logger.warning(f"Clip concatenation failed (non-fatal): {e}")
|
||||
# Fall back to main input only
|
||||
|
||||
mapped_gain_segments = _map_ranges_to_output_timeline(gain_segments or [], segments)
|
||||
|
||||
has_gain = abs(float(req.global_gain_db)) > 1e-6 or bool(gain_segments)
|
||||
@ -141,7 +187,7 @@ async def export_video(req: ExportRequest):
|
||||
detail="Speed zones currently cannot be combined with mute/gain filters in one export",
|
||||
)
|
||||
|
||||
use_stream_copy = req.mode == "fast" and len(segments) == 1 and not mute_segments and not has_gain and not has_speed
|
||||
use_stream_copy = req.mode == "fast" and len(segments) == 1 and not mute_segments and not has_gain and not has_speed and not zoom_dict and not has_additional
|
||||
needs_reencode_for_subs = req.captions == "burn-in"
|
||||
|
||||
# Burn-in captions or audio filters require re-encode
|
||||
@ -162,10 +208,10 @@ async def export_video(req: ExportRequest):
|
||||
|
||||
try:
|
||||
if use_stream_copy:
|
||||
output = export_stream_copy(req.input_path, req.output_path, segments)
|
||||
output = export_stream_copy(working_input, req.output_path, segments)
|
||||
elif ass_path:
|
||||
output = export_reencode_with_subs(
|
||||
req.input_path,
|
||||
working_input,
|
||||
req.output_path,
|
||||
segments,
|
||||
ass_path,
|
||||
@ -177,10 +223,11 @@ async def export_video(req: ExportRequest):
|
||||
global_gain_db=req.global_gain_db,
|
||||
normalize_loudness=req.normalize_loudness,
|
||||
normalize_target_lufs=req.normalize_target_lufs,
|
||||
zoom_config=zoom_dict,
|
||||
)
|
||||
else:
|
||||
output = export_reencode(
|
||||
req.input_path,
|
||||
working_input,
|
||||
req.output_path,
|
||||
segments,
|
||||
resolution=req.resolution,
|
||||
@ -191,6 +238,7 @@ async def export_video(req: ExportRequest):
|
||||
global_gain_db=req.global_gain_db,
|
||||
normalize_loudness=req.normalize_loudness,
|
||||
normalize_target_lufs=req.normalize_target_lufs,
|
||||
zoom_config=zoom_dict,
|
||||
)
|
||||
finally:
|
||||
if ass_path and os.path.exists(ass_path):
|
||||
@ -209,7 +257,6 @@ async def export_video(req: ExportRequest):
|
||||
os.replace(muxed_path, output)
|
||||
logger.info(f"Audio enhanced and muxed into {output}")
|
||||
|
||||
# Cleanup
|
||||
try:
|
||||
os.remove(cleaned_audio)
|
||||
os.rmdir(tmp_dir)
|
||||
@ -218,6 +265,35 @@ async def export_video(req: ExportRequest):
|
||||
except Exception as e:
|
||||
logger.warning(f"Audio enhancement failed (non-fatal): {e}")
|
||||
|
||||
# Background removal (post-process)
|
||||
if req.remove_background:
|
||||
try:
|
||||
bg_output = output + ".nobg.mp4"
|
||||
remove_bg(output, bg_output, req.background_replacement, req.background_replacement_value)
|
||||
os.replace(bg_output, output)
|
||||
logger.info("Background removed from %s", output)
|
||||
except Exception as e:
|
||||
logger.warning(f"Background removal failed (non-fatal): {e}")
|
||||
|
||||
# Background music mixing (post-process)
|
||||
if req.background_music:
|
||||
try:
|
||||
music_output = output + ".music.mp4"
|
||||
mix_background_music(
|
||||
output,
|
||||
req.background_music.path,
|
||||
music_output,
|
||||
volume_db=req.background_music.volumeDb,
|
||||
ducking_enabled=req.background_music.duckingEnabled,
|
||||
ducking_db=req.background_music.duckingDb,
|
||||
ducking_attack_ms=req.background_music.duckingAttackMs,
|
||||
ducking_release_ms=req.background_music.duckingReleaseMs,
|
||||
)
|
||||
os.replace(music_output, output)
|
||||
logger.info("Background music mixed into %s", output)
|
||||
except Exception as e:
|
||||
logger.warning(f"Background music mixing failed (non-fatal): {e}")
|
||||
|
||||
# Sidecar SRT: generate and save alongside video
|
||||
srt_path = None
|
||||
if req.captions == "sidecar" and words_dicts:
|
||||
@ -226,6 +302,13 @@ async def export_video(req: ExportRequest):
|
||||
save_captions(srt_content, srt_path)
|
||||
logger.info(f"Sidecar SRT saved to {srt_path}")
|
||||
|
||||
# Cleanup pre-concat temp file
|
||||
if has_additional and working_input != req.input_path and os.path.exists(working_input):
|
||||
try:
|
||||
os.remove(working_input)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
result = {"status": "ok", "output_path": output}
|
||||
if srt_path:
|
||||
result["srt_path"] = srt_path
|
||||
|
||||
@ -1,18 +1,17 @@
|
||||
"""
|
||||
AI background removal (Phase 5 - future).
|
||||
Uses MediaPipe or Robust Video Matting for person segmentation.
|
||||
Export-only -- no real-time preview.
|
||||
AI background removal using MediaPipe for person segmentation.
|
||||
Applied during export as a post-processing step — no real-time preview.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import subprocess
|
||||
import tempfile
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Placeholder for Phase 5 implementation
|
||||
# Will use mediapipe or rvm for segmentation at export time
|
||||
|
||||
MEDIAPIPE_AVAILABLE = False
|
||||
RVM_AVAILABLE = False
|
||||
|
||||
try:
|
||||
import mediapipe as mp
|
||||
@ -20,14 +19,9 @@ try:
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
pass # rvm import would go here
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
def is_available() -> bool:
|
||||
return MEDIAPIPE_AVAILABLE or RVM_AVAILABLE
|
||||
return MEDIAPIPE_AVAILABLE
|
||||
|
||||
|
||||
def remove_background_on_export(
|
||||
@ -37,23 +31,189 @@ def remove_background_on_export(
|
||||
replacement_value: str = "",
|
||||
) -> str:
|
||||
"""
|
||||
Process video frame-by-frame to remove/replace background.
|
||||
Only runs during export (not real-time).
|
||||
Process video frame-by-frame using FFmpeg chromakey fallback,
|
||||
or MediaPipe-based segmentation if available.
|
||||
|
||||
Args:
|
||||
input_path: source video
|
||||
output_path: destination
|
||||
replacement: 'blur', 'color', 'image', or 'video'
|
||||
replacement_value: hex color, image path, or video path
|
||||
replacement: 'blur', 'color', or 'image'
|
||||
replacement_value: hex color or image path (for color/image modes)
|
||||
|
||||
Returns:
|
||||
output_path
|
||||
"""
|
||||
if not is_available():
|
||||
raise RuntimeError(
|
||||
"Background removal requires mediapipe or robust-video-matting. "
|
||||
"Install with: pip install mediapipe"
|
||||
)
|
||||
input_path = str(Path(input_path).resolve())
|
||||
output_path = str(Path(output_path).resolve())
|
||||
|
||||
# Phase 5 implementation will go here
|
||||
raise NotImplementedError("Background removal is planned for Phase 5")
|
||||
if MEDIAPIPE_AVAILABLE:
|
||||
return _remove_with_mediapipe(input_path, output_path, replacement, replacement_value)
|
||||
else:
|
||||
return _remove_with_ffmpeg_portrait(input_path, output_path, replacement, replacement_value)
|
||||
|
||||
|
||||
def _remove_with_mediapipe(
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
replacement: str = "blur",
|
||||
replacement_value: str = "",
|
||||
) -> str:
|
||||
"""Use MediaPipe Selfie Segmentation + FFmpeg for background removal.
|
||||
|
||||
Extracts frames, applies segmentation, composites replacement background.
|
||||
"""
|
||||
try:
|
||||
import cv2
|
||||
import numpy as np
|
||||
import mediapipe as mp
|
||||
|
||||
mp_selfie_segmentation = mp.solutions.selfie_segmentation
|
||||
|
||||
# Determine background color/image
|
||||
if replacement == "color":
|
||||
color_hex = replacement_value or "#00FF00"
|
||||
color_hex = color_hex.lstrip("#")
|
||||
bg_color = tuple(int(color_hex[i:i+2], 16) for i in (0, 2, 4))
|
||||
bg_color = bg_color[::-1] # RGB -> BGR
|
||||
elif replacement == "image":
|
||||
bg_image = cv2.imread(replacement_value) if replacement_value else None
|
||||
if bg_image is None:
|
||||
bg_color = (0, 255, 0)
|
||||
bg_image = None
|
||||
else:
|
||||
# Blur background (default)
|
||||
bg_color = None
|
||||
|
||||
# Open video
|
||||
cap = cv2.VideoCapture(input_path)
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
# Temp directory for processed frames
|
||||
temp_dir = tempfile.mkdtemp(prefix="aive_bgrem_")
|
||||
frame_dir = os.path.join(temp_dir, "frames")
|
||||
os.makedirs(frame_dir, exist_ok=True)
|
||||
|
||||
with mp_selfie_segmentation.SelfieSegmentation(model_selection=0) as segmenter:
|
||||
frame_idx = 0
|
||||
while cap.isOpened():
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
# Convert to RGB for MediaPipe
|
||||
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
||||
result = segmenter.process(rgb)
|
||||
mask = result.segmentation_mask
|
||||
|
||||
# Threshold the mask
|
||||
condition = mask > 0.5
|
||||
|
||||
if replacement == "blur":
|
||||
# Apply strong blur to background
|
||||
blurred = cv2.GaussianBlur(frame, (99, 99), 0)
|
||||
output_frame = np.where(condition[..., None], frame, blurred)
|
||||
elif replacement == "color":
|
||||
bg = np.full(frame.shape, bg_color, dtype=np.uint8)
|
||||
output_frame = np.where(condition[..., None], frame, bg)
|
||||
elif replacement == "image" and bg_image is not None:
|
||||
bg_resized = cv2.resize(bg_image, (width, height))
|
||||
output_frame = np.where(condition[..., None], frame, bg_resized)
|
||||
else:
|
||||
output_frame = frame
|
||||
|
||||
out_path = os.path.join(frame_dir, f"frame_{frame_idx:06d}.png")
|
||||
cv2.imwrite(out_path, output_frame)
|
||||
frame_idx += 1
|
||||
|
||||
if frame_idx % 100 == 0:
|
||||
logger.info("Background removal: %d/%d frames", frame_idx, total_frames)
|
||||
|
||||
cap.release()
|
||||
|
||||
# Encode frames back to video using FFmpeg
|
||||
import subprocess as _sp
|
||||
ffmpeg = "ffmpeg"
|
||||
cmd = [
|
||||
ffmpeg, "-y",
|
||||
"-framerate", str(fps),
|
||||
"-i", os.path.join(frame_dir, "frame_%06d.png"),
|
||||
"-i", input_path,
|
||||
"-map", "0:v:0",
|
||||
"-map", "1:a:0?",
|
||||
"-c:v", "libx264", "-preset", "medium", "-crf", "18",
|
||||
"-c:a", "aac", "-b:a", "192k",
|
||||
"-shortest",
|
||||
"-pix_fmt", "yuv420p",
|
||||
output_path,
|
||||
]
|
||||
result = _sp.run(cmd, capture_output=True, text=True)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"FFmpeg frame encode failed: {result.stderr[-500:]}")
|
||||
|
||||
# Cleanup
|
||||
for f in os.listdir(frame_dir):
|
||||
try:
|
||||
os.remove(os.path.join(frame_dir, f))
|
||||
except OSError:
|
||||
pass
|
||||
try:
|
||||
os.rmdir(frame_dir)
|
||||
os.rmdir(temp_dir)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
logger.info("MediaPipe background removal completed -> %s", output_path)
|
||||
return output_path
|
||||
|
||||
except ImportError:
|
||||
logger.warning("mediapipe/cv2 not available, falling back to FFmpeg portrait mode")
|
||||
return _remove_with_ffmpeg_portrait(input_path, output_path, replacement, replacement_value)
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"MediaPipe background removal failed: {e}")
|
||||
|
||||
|
||||
def _remove_with_ffmpeg_portrait(
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
replacement: str = "blur",
|
||||
replacement_value: str = "",
|
||||
) -> str:
|
||||
"""Fallback: use FFmpeg's colorkey + chromakey for basic background removal.
|
||||
|
||||
This is a crude approximation. For best results, install mediapipe + opencv-python.
|
||||
"""
|
||||
ffmpeg = "ffmpeg"
|
||||
|
||||
# Use a simple chromakey-based approach with a neutral background
|
||||
# This won't work well for most real videos but provides a fallback
|
||||
if replacement == "color":
|
||||
color = replacement_value or "00FF00"
|
||||
filter_complex = f"colorkey=0x{color}:0.3:0.1,chromakey=0x{color}:0.3:0.1"
|
||||
elif replacement == "blur":
|
||||
filter_complex = "gblur=sigma=20:enable='gt(scene,0.01)'"
|
||||
else:
|
||||
filter_complex = "null"
|
||||
|
||||
if filter_complex == "null":
|
||||
# No-op, copy input to output
|
||||
cmd = [ffmpeg, "-y", "-i", input_path, "-c", "copy", output_path]
|
||||
else:
|
||||
cmd = [
|
||||
ffmpeg, "-y",
|
||||
"-i", input_path,
|
||||
"-vf", filter_complex,
|
||||
"-c:v", "libx264", "-preset", "medium", "-crf", "18",
|
||||
"-c:a", "aac", "-b:a", "192k",
|
||||
"-movflags", "+faststart",
|
||||
output_path,
|
||||
]
|
||||
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"FFmpeg background removal failed: {result.stderr[-500:]}")
|
||||
|
||||
logger.info("FFmpeg portait background removal completed -> %s", output_path)
|
||||
return output_path
|
||||
|
||||
@ -117,6 +117,129 @@ def _split_keep_segments_by_speed(
|
||||
return result
|
||||
|
||||
|
||||
def _build_zoom_filter(zoom_config: dict = None) -> str:
|
||||
"""Build FFmpeg video filter snippet for zoom/punch-in effect.
|
||||
|
||||
zoom_config: {enabled, zoomFactor, panX, panY}
|
||||
Returns empty string if disabled. Should be prepended to the video filter chain.
|
||||
"""
|
||||
if not zoom_config or not zoom_config.get("enabled"):
|
||||
return ""
|
||||
factor = float(zoom_config.get("zoomFactor", 1.0))
|
||||
if abs(factor - 1.0) < 0.01:
|
||||
return ""
|
||||
pan_x = float(zoom_config.get("panX", 0.0))
|
||||
pan_y = float(zoom_config.get("panY", 0.0))
|
||||
return f"crop=iw/{factor}:ih/{factor}:((iw-iw/{factor})/2)+({pan_x}*(iw-iw/{factor})/2):((ih-ih/{factor})/2)+({pan_y}*(ih-ih/{factor})/2),scale=iw:ih"
|
||||
|
||||
|
||||
def mix_background_music(
|
||||
video_path: str,
|
||||
music_path: str,
|
||||
output_path: str,
|
||||
volume_db: float = 0.0,
|
||||
ducking_enabled: bool = False,
|
||||
ducking_db: float = 6.0,
|
||||
ducking_attack_ms: float = 10.0,
|
||||
ducking_release_ms: float = 200.0,
|
||||
) -> str:
|
||||
"""Mix background music into a video with optional ducking.
|
||||
|
||||
Uses FFmpeg amix + sidechaincompress. Output is written to output_path.
|
||||
"""
|
||||
ffmpeg = _find_ffmpeg()
|
||||
escaped_music = music_path.replace("\\", "/").replace(":", "\\:")
|
||||
|
||||
# Build the filter graph
|
||||
if ducking_enabled:
|
||||
filter_complex = (
|
||||
f"[0:a]asplit[main][sidechain];"
|
||||
f"movie='{escaped_music}':loop=0,volume={volume_db}dB[music];"
|
||||
f"[main][music]amix=inputs=2:duration=first:dropout_transition=2[mixed];"
|
||||
f"[mixed][sidechain]sidechaincompress="
|
||||
f"threshold=-30dB:ratio=100:attack={ducking_attack_ms}ms:"
|
||||
f"release={ducking_release_ms}ms:makeup=1:level_sc={ducking_db}[outa]"
|
||||
)
|
||||
else:
|
||||
filter_complex = (
|
||||
f"movie='{escaped_music}':loop=0,volume={volume_db}dB[music];"
|
||||
f"[0:a][music]amix=inputs=2:duration=first:dropout_transition=2[outa]"
|
||||
)
|
||||
|
||||
cmd = [
|
||||
ffmpeg, "-y",
|
||||
"-i", video_path,
|
||||
"-filter_complex", filter_complex,
|
||||
"-map", "0:v",
|
||||
"-map", "[outa]",
|
||||
"-c:v", "copy",
|
||||
"-c:a", "aac", "-b:a", "192k",
|
||||
"-shortest",
|
||||
output_path,
|
||||
]
|
||||
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"Background music mix failed: {result.stderr[-500:]}")
|
||||
|
||||
return output_path
|
||||
|
||||
|
||||
def concat_clips(
|
||||
main_path: str,
|
||||
append_paths: list,
|
||||
output_path: str,
|
||||
) -> str:
|
||||
"""Concatenate multiple video clips using FFmpeg concat demuxer.
|
||||
|
||||
The main_path is kept as-is. append_paths are appended after it.
|
||||
"""
|
||||
if not append_paths:
|
||||
raise ValueError("No clips to concatenate")
|
||||
|
||||
ffmpeg = _find_ffmpeg()
|
||||
import tempfile
|
||||
import os
|
||||
|
||||
temp_dir = tempfile.mkdtemp(prefix="aive_concat_")
|
||||
try:
|
||||
segment_files = [main_path]
|
||||
segment_files.extend(append_paths)
|
||||
|
||||
# Create concat file list
|
||||
concat_file = os.path.join(temp_dir, "concat.txt")
|
||||
with open(concat_file, "w") as f:
|
||||
for path in segment_files:
|
||||
resolved = os.path.abspath(path)
|
||||
f.write(f"file '{resolved}'\n")
|
||||
|
||||
cmd = [
|
||||
ffmpeg, "-y",
|
||||
"-f", "concat",
|
||||
"-safe", "0",
|
||||
"-i", concat_file,
|
||||
"-c", "copy",
|
||||
"-movflags", "+faststart",
|
||||
output_path,
|
||||
]
|
||||
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"Clip concat failed: {result.stderr[-500:]}")
|
||||
|
||||
return output_path
|
||||
finally:
|
||||
for f in os.listdir(temp_dir):
|
||||
try:
|
||||
os.remove(os.path.join(temp_dir, f))
|
||||
except OSError:
|
||||
pass
|
||||
try:
|
||||
os.rmdir(temp_dir)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def _find_ffmpeg() -> str:
|
||||
"""Locate ffmpeg binary."""
|
||||
for cmd in ["ffmpeg", "ffmpeg.exe"]:
|
||||
@ -213,6 +336,29 @@ def export_stream_copy(
|
||||
pass
|
||||
|
||||
|
||||
def _apply_zoom_post(input_path: str, output_path: str, zoom_config: dict) -> str:
|
||||
"""Re-encode video applying zoom/punch-in crop+scale as a post-process step."""
|
||||
ffmpeg = _find_ffmpeg()
|
||||
zoom_filter = _build_zoom_filter(zoom_config)
|
||||
if not zoom_filter:
|
||||
return input_path
|
||||
|
||||
cmd = [
|
||||
ffmpeg, "-y",
|
||||
"-i", input_path,
|
||||
"-filter_complex", f"[0:v]{zoom_filter}[v]",
|
||||
"-map", "[v]",
|
||||
"-map", "0:a?",
|
||||
"-c:a", "copy",
|
||||
"-movflags", "+faststart",
|
||||
output_path,
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"Zoom post-process failed: {result.stderr[-500:]}")
|
||||
return output_path
|
||||
|
||||
|
||||
def export_reencode(
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
@ -225,6 +371,7 @@ def export_reencode(
|
||||
global_gain_db: float = 0.0,
|
||||
normalize_loudness: bool = False,
|
||||
normalize_target_lufs: float = -14.0,
|
||||
zoom_config: dict = None,
|
||||
) -> str:
|
||||
"""
|
||||
Export video with full re-encode. Slower but supports resolution changes,
|
||||
@ -421,6 +568,15 @@ def export_reencode(
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"FFmpeg re-encode failed: {result.stderr[-500:]}")
|
||||
|
||||
# Apply zoom post-processing if configured
|
||||
if zoom_config and zoom_config.get("enabled") and has_video:
|
||||
import tempfile as _tf
|
||||
import os as _os
|
||||
zoomed_path = output_path + ".zoomed.mp4"
|
||||
_apply_zoom_post(output_path, zoomed_path, zoom_config)
|
||||
_os.replace(zoomed_path, output_path)
|
||||
logger.info("Zoom/punch-in applied to %s (factor=%s)", output_path, zoom_config.get("zoomFactor", 1.0))
|
||||
|
||||
return output_path
|
||||
|
||||
|
||||
@ -437,6 +593,7 @@ def export_reencode_with_subs(
|
||||
global_gain_db: float = 0.0,
|
||||
normalize_loudness: bool = False,
|
||||
normalize_target_lufs: float = -14.0,
|
||||
zoom_config: dict = None,
|
||||
) -> str:
|
||||
"""
|
||||
Export video with re-encode and burn-in subtitles (ASS format).
|
||||
@ -578,6 +735,15 @@ def export_reencode_with_subs(
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"FFmpeg re-encode with subs failed: {result.stderr[-500:]}")
|
||||
|
||||
# Apply zoom post-processing if configured
|
||||
if zoom_config and zoom_config.get("enabled"):
|
||||
import tempfile as _tf
|
||||
import os as _os
|
||||
zoomed_path = output_path + ".zoomed.mp4"
|
||||
_apply_zoom_post(output_path, zoomed_path, zoom_config)
|
||||
_os.replace(zoomed_path, output_path)
|
||||
logger.info("Zoom/punch-in applied to %s (factor=%s)", output_path, zoom_config.get("zoomFactor", 1.0))
|
||||
|
||||
return output_path
|
||||
|
||||
|
||||
|
||||
@ -7,8 +7,8 @@
|
||||
"dev": "vite",
|
||||
"build": "tsc -b && vite build",
|
||||
"lint": "eslint .",
|
||||
"preview": "vite preview",
|
||||
"test": "vitest run"
|
||||
"test": "vitest run",
|
||||
"preview": "vite preview"
|
||||
},
|
||||
"dependencies": {
|
||||
"@tauri-apps/api": "^2",
|
||||
|
||||
@ -10,6 +10,8 @@ import DevPanel from './components/DevPanel';
|
||||
import MarkersPanel from './components/MarkersPanel';
|
||||
import SilenceTrimmerPanel from './components/SilenceTrimmerPanel';
|
||||
import ZoneEditor from './components/ZoneEditor';
|
||||
import BackgroundMusicPanel from './components/BackgroundMusicPanel';
|
||||
import AppendClipPanel from './components/AppendClipPanel';
|
||||
import { useKeyboardShortcuts } from './hooks/useKeyboardShortcuts';
|
||||
import {
|
||||
Film,
|
||||
@ -27,11 +29,13 @@ import {
|
||||
RefreshCw,
|
||||
Grid3x3,
|
||||
MapPin,
|
||||
Music,
|
||||
ListVideo,
|
||||
} from 'lucide-react';
|
||||
|
||||
const LAST_MEDIA_PATH_KEY = 'talkedit:lastMediaPath';
|
||||
|
||||
type Panel = 'ai' | 'settings' | 'export' | 'silence' | 'zones' | 'markers' | null;
|
||||
type Panel = 'ai' | 'settings' | 'export' | 'silence' | 'zones' | 'markers' | 'music' | 'append' | null;
|
||||
|
||||
export default function App() {
|
||||
const {
|
||||
@ -654,6 +658,20 @@ export default function App() {
|
||||
onClick={() => togglePanel('markers')}
|
||||
disabled={!videoPath}
|
||||
/>
|
||||
<ToolbarButton
|
||||
icon={<Music className="w-4 h-4" />}
|
||||
label="Music"
|
||||
active={activePanel === 'music'}
|
||||
onClick={() => togglePanel('music')}
|
||||
disabled={!videoPath}
|
||||
/>
|
||||
<ToolbarButton
|
||||
icon={<ListVideo className="w-4 h-4" />}
|
||||
label="Append"
|
||||
active={activePanel === 'append'}
|
||||
onClick={() => togglePanel('append')}
|
||||
disabled={!videoPath}
|
||||
/>
|
||||
<div className="flex items-center gap-1.5 px-2 py-1 rounded-md bg-editor-surface border border-editor-border">
|
||||
<select
|
||||
value={whisperModel}
|
||||
@ -812,6 +830,8 @@ export default function App() {
|
||||
)}
|
||||
{activePanel === 'silence' && <SilenceTrimmerPanel />}
|
||||
{activePanel === 'markers' && <MarkersPanel />}
|
||||
{activePanel === 'music' && <BackgroundMusicPanel />}
|
||||
{activePanel === 'append' && <AppendClipPanel />}
|
||||
{activePanel === 'ai' && <AIPanel />}
|
||||
{activePanel === 'export' && <ExportDialog />}
|
||||
{activePanel === 'settings' && <SettingsPanel />}
|
||||
|
||||
78
frontend/src/components/AppendClipPanel.tsx
Normal file
78
frontend/src/components/AppendClipPanel.tsx
Normal file
@ -0,0 +1,78 @@
|
||||
import { useEditorStore } from '../store/editorStore';
|
||||
import { Video, Plus, Trash2, ChevronUp, ChevronDown } from 'lucide-react';
|
||||
|
||||
export default function AppendClipPanel() {
|
||||
const { additionalClips, addAdditionalClip, removeAdditionalClip, reorderAdditionalClip, videoPath } = useEditorStore();
|
||||
|
||||
const handleAddClip = async () => {
|
||||
const path = await window.electronAPI?.openFile();
|
||||
if (path) {
|
||||
addAdditionalClip(path);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="p-4 space-y-3">
|
||||
<h3 className="text-sm font-semibold flex items-center gap-1.5">
|
||||
<Video className="w-4 h-4" />
|
||||
Append Clips
|
||||
</h3>
|
||||
<p className="text-[10px] text-editor-text-muted leading-relaxed">
|
||||
Load additional video clips to append after the main video. Clips are concatenated in order during export.
|
||||
</p>
|
||||
|
||||
{additionalClips.length === 0 ? (
|
||||
<div className="text-[11px] text-editor-text-muted text-center py-3">
|
||||
No additional clips loaded
|
||||
</div>
|
||||
) : (
|
||||
<div className="space-y-1 max-h-60 overflow-y-auto">
|
||||
{additionalClips.map((clip, idx) => (
|
||||
<div
|
||||
key={clip.id}
|
||||
className="flex items-center gap-2 p-2 rounded bg-editor-surface border border-editor-border text-xs"
|
||||
>
|
||||
<Video className="w-3 h-3 text-editor-accent shrink-0" />
|
||||
<span className="flex-1 truncate text-editor-text">{clip.label}</span>
|
||||
<span className="text-[10px] text-editor-text-muted shrink-0">#{idx + 1}</span>
|
||||
<div className="flex items-center gap-0.5 shrink-0">
|
||||
<button
|
||||
onClick={() => reorderAdditionalClip(clip.id, -1)}
|
||||
disabled={idx === 0}
|
||||
className="p-0.5 rounded hover:bg-editor-bg disabled:opacity-30 text-editor-text-muted hover:text-editor-text"
|
||||
title="Move up"
|
||||
>
|
||||
<ChevronUp className="w-3 h-3" />
|
||||
</button>
|
||||
<button
|
||||
onClick={() => reorderAdditionalClip(clip.id, 1)}
|
||||
disabled={idx === additionalClips.length - 1}
|
||||
className="p-0.5 rounded hover:bg-editor-bg disabled:opacity-30 text-editor-text-muted hover:text-editor-text"
|
||||
title="Move down"
|
||||
>
|
||||
<ChevronDown className="w-3 h-3" />
|
||||
</button>
|
||||
</div>
|
||||
<button
|
||||
onClick={() => removeAdditionalClip(clip.id)}
|
||||
className="p-0.5 rounded hover:bg-red-500/20 text-red-400"
|
||||
title="Remove clip"
|
||||
>
|
||||
<Trash2 className="w-3 h-3" />
|
||||
</button>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
|
||||
<button
|
||||
onClick={handleAddClip}
|
||||
disabled={!videoPath}
|
||||
className="w-full flex items-center justify-center gap-2 px-3 py-2 rounded-lg border-2 border-dashed border-editor-border text-xs text-editor-text-muted hover:text-editor-text hover:border-editor-text-muted disabled:opacity-40 transition-colors"
|
||||
>
|
||||
<Plus className="w-3.5 h-3.5" />
|
||||
Add Clip
|
||||
</button>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
139
frontend/src/components/BackgroundMusicPanel.tsx
Normal file
139
frontend/src/components/BackgroundMusicPanel.tsx
Normal file
@ -0,0 +1,139 @@
|
||||
import { useEditorStore } from '../store/editorStore';
|
||||
import { Music, Trash2, Volume2, Disc3 } from 'lucide-react';
|
||||
|
||||
export default function BackgroundMusicPanel() {
|
||||
const { backgroundMusic, setBackgroundMusic, updateBackgroundMusic } = useEditorStore();
|
||||
|
||||
const handleLoadMusic = async () => {
|
||||
const path = await window.electronAPI?.openFile();
|
||||
if (path) {
|
||||
setBackgroundMusic({
|
||||
path,
|
||||
volumeDb: -10,
|
||||
duckingEnabled: true,
|
||||
duckingDb: 6,
|
||||
duckingAttackMs: 10,
|
||||
duckingReleaseMs: 200,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
const handleRemoveMusic = () => {
|
||||
setBackgroundMusic(null);
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="p-4 space-y-4">
|
||||
<h3 className="text-sm font-semibold flex items-center gap-1.5">
|
||||
<Music className="w-4 h-4" />
|
||||
Background Music
|
||||
</h3>
|
||||
|
||||
{!backgroundMusic ? (
|
||||
<button
|
||||
onClick={handleLoadMusic}
|
||||
className="w-full flex items-center justify-center gap-2 px-4 py-3 rounded-lg border-2 border-dashed border-editor-border text-xs text-editor-text-muted hover:text-editor-text hover:border-editor-text-muted transition-colors"
|
||||
>
|
||||
<Disc3 className="w-4 h-4" />
|
||||
Load Music File
|
||||
</button>
|
||||
) : (
|
||||
<div className="space-y-3">
|
||||
<div className="flex items-center gap-2 p-2 rounded bg-editor-surface border border-editor-border">
|
||||
<Music className="w-4 h-4 text-editor-accent shrink-0" />
|
||||
<span className="flex-1 text-xs truncate">
|
||||
{backgroundMusic.path.split(/[/\\]/).pop()}
|
||||
</span>
|
||||
<button
|
||||
onClick={handleRemoveMusic}
|
||||
className="p-1 rounded hover:bg-red-500/20 text-red-400 transition-colors"
|
||||
title="Remove music"
|
||||
>
|
||||
<Trash2 className="w-3 h-3" />
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div className="space-y-2">
|
||||
<div className="flex items-center gap-2">
|
||||
<Volume2 className="w-3 h-3 text-editor-text-muted shrink-0" />
|
||||
<span className="text-[10px] text-editor-text-muted w-16">Volume:</span>
|
||||
<input
|
||||
type="range"
|
||||
min={-30}
|
||||
max={12}
|
||||
step={1}
|
||||
value={backgroundMusic.volumeDb}
|
||||
onChange={(e) => updateBackgroundMusic({ volumeDb: Number(e.target.value) })}
|
||||
className="flex-1 h-1.5"
|
||||
/>
|
||||
<span className="text-xs text-editor-text w-10 text-right">{backgroundMusic.volumeDb} dB</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<label className="flex items-center gap-2 cursor-pointer">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={backgroundMusic.duckingEnabled}
|
||||
onChange={(e) => updateBackgroundMusic({ duckingEnabled: e.target.checked })}
|
||||
className="w-4 h-4 rounded bg-editor-surface border-editor-border accent-editor-accent"
|
||||
/>
|
||||
<div>
|
||||
<span className="text-xs font-medium">Auto-ducking</span>
|
||||
<p className="text-[10px] text-editor-text-muted">
|
||||
Lower music volume when speech is detected
|
||||
</p>
|
||||
</div>
|
||||
</label>
|
||||
|
||||
{backgroundMusic.duckingEnabled && (
|
||||
<div className="pl-6 space-y-2">
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-[10px] text-editor-text-muted w-20">Duck amount:</span>
|
||||
<input
|
||||
type="range"
|
||||
min={1}
|
||||
max={20}
|
||||
step={1}
|
||||
value={backgroundMusic.duckingDb}
|
||||
onChange={(e) => updateBackgroundMusic({ duckingDb: Number(e.target.value) })}
|
||||
className="flex-1 h-1.5"
|
||||
/>
|
||||
<span className="text-xs text-editor-text w-10 text-right">{backgroundMusic.duckingDb} dB</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-[10px] text-editor-text-muted w-20">Attack:</span>
|
||||
<input
|
||||
type="range"
|
||||
min={1}
|
||||
max={100}
|
||||
step={1}
|
||||
value={backgroundMusic.duckingAttackMs}
|
||||
onChange={(e) => updateBackgroundMusic({ duckingAttackMs: Number(e.target.value) })}
|
||||
className="flex-1 h-1.5"
|
||||
/>
|
||||
<span className="text-xs text-editor-text w-10 text-right">{backgroundMusic.duckingAttackMs}ms</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-[10px] text-editor-text-muted w-20">Release:</span>
|
||||
<input
|
||||
type="range"
|
||||
min={10}
|
||||
max={1000}
|
||||
step={10}
|
||||
value={backgroundMusic.duckingReleaseMs}
|
||||
onChange={(e) => updateBackgroundMusic({ duckingReleaseMs: Number(e.target.value) })}
|
||||
className="flex-1 h-1.5"
|
||||
/>
|
||||
<span className="text-xs text-editor-text w-10 text-right">{backgroundMusic.duckingReleaseMs}ms</span>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<p className="text-[10px] text-editor-text-muted leading-relaxed">
|
||||
The music will be mixed during export. Enable auto-ducking to lower music volume whenever speech is active.
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@ -1,10 +1,10 @@
|
||||
import { useState, useCallback } from 'react';
|
||||
import { useEditorStore } from '../store/editorStore';
|
||||
import { Download, Loader2, Zap, Cog, Info, Volume2, FileText } from 'lucide-react';
|
||||
import { Download, Loader2, Zap, Cog, Info, Volume2, FileText, ZoomIn, Video, Music } from 'lucide-react';
|
||||
import type { ExportOptions } from '../types/project';
|
||||
|
||||
export default function ExportDialog() {
|
||||
const { videoPath, words, cutRanges, muteRanges, gainRanges, speedRanges, globalGainDb, isExporting, exportProgress, backendUrl, setExporting, getKeepSegments } =
|
||||
const { videoPath, words, cutRanges, muteRanges, gainRanges, speedRanges, globalGainDb, isExporting, exportProgress, backendUrl, setExporting, getKeepSegments, additionalClips, backgroundMusic } =
|
||||
useEditorStore();
|
||||
|
||||
const hasCuts = cutRanges.length > 0;
|
||||
@ -22,6 +22,10 @@ export default function ExportDialog() {
|
||||
captions: 'none',
|
||||
normalizeAudio: false,
|
||||
normalizeTarget: -14,
|
||||
zoom: { enabled: false, zoomFactor: 1.25, panX: 0, panY: 0 },
|
||||
removeBackground: false,
|
||||
backgroundReplacement: 'blur',
|
||||
backgroundReplacementValue: '',
|
||||
});
|
||||
const [exportError, setExportError] = useState<string | null>(null);
|
||||
const [transcriptFormat, setTranscriptFormat] = useState<'txt' | 'srt'>('txt');
|
||||
@ -147,27 +151,51 @@ export default function ExportDialog() {
|
||||
speed: r.speed,
|
||||
}));
|
||||
|
||||
const body: Record<string, any> = {
|
||||
input_path: videoPath,
|
||||
output_path: outputPath,
|
||||
keep_segments: keepSegments,
|
||||
mute_ranges: muteRanges.length > 0 ? muteRanges.map((r) => ({ start: r.start, end: r.end })) : undefined,
|
||||
gain_ranges: backendGainRanges.length > 0 ? backendGainRanges : undefined,
|
||||
speed_ranges: backendSpeedRanges.length > 0 ? backendSpeedRanges : undefined,
|
||||
global_gain_db: globalGainDb,
|
||||
words: options.captions !== 'none' ? words : undefined,
|
||||
deleted_indices: options.captions !== 'none' ? [...deletedSet] : undefined,
|
||||
mode: options.mode,
|
||||
resolution: options.resolution,
|
||||
format: options.format,
|
||||
enhanceAudio: options.enhanceAudio,
|
||||
normalize_loudness: options.normalizeAudio,
|
||||
normalize_target_lufs: options.normalizeTarget,
|
||||
captions: options.captions,
|
||||
};
|
||||
|
||||
// Zoom
|
||||
if (options.zoom?.enabled) {
|
||||
body.zoom = options.zoom;
|
||||
}
|
||||
|
||||
// Additional clips
|
||||
if (additionalClips.length > 0) {
|
||||
body.additional_clips = additionalClips.map((c) => c.path);
|
||||
}
|
||||
|
||||
// Background music
|
||||
if (backgroundMusic) {
|
||||
body.background_music = backgroundMusic;
|
||||
}
|
||||
|
||||
// Background removal
|
||||
if (options.removeBackground) {
|
||||
body.remove_background = true;
|
||||
body.background_replacement = options.backgroundReplacement || 'blur';
|
||||
body.background_replacement_value = options.backgroundReplacementValue || '';
|
||||
}
|
||||
|
||||
const res = await fetch(`${backendUrl}/export`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
input_path: videoPath,
|
||||
output_path: outputPath,
|
||||
keep_segments: keepSegments,
|
||||
mute_ranges: muteRanges.length > 0 ? muteRanges.map((r) => ({ start: r.start, end: r.end })) : undefined,
|
||||
gain_ranges: backendGainRanges.length > 0 ? backendGainRanges : undefined,
|
||||
speed_ranges: backendSpeedRanges.length > 0 ? backendSpeedRanges : undefined,
|
||||
global_gain_db: globalGainDb,
|
||||
words: options.captions !== 'none' ? words : undefined,
|
||||
deleted_indices: options.captions !== 'none' ? [...deletedSet] : undefined,
|
||||
mode: options.mode,
|
||||
resolution: options.resolution,
|
||||
format: options.format,
|
||||
enhanceAudio: options.enhanceAudio,
|
||||
normalize_loudness: options.normalizeAudio,
|
||||
normalize_target_lufs: options.normalizeTarget,
|
||||
captions: options.captions,
|
||||
}),
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
if (!res.ok) {
|
||||
let detail = res.statusText;
|
||||
@ -185,7 +213,7 @@ export default function ExportDialog() {
|
||||
setExportError(err instanceof Error ? err.message : 'Export failed');
|
||||
setExporting(false);
|
||||
}
|
||||
}, [videoPath, options, backendUrl, setExporting, getKeepSegments, cutRanges, muteRanges, gainRanges, speedRanges, globalGainDb, words, HANDLE_EXPORT_filters]);
|
||||
}, [videoPath, options, backendUrl, setExporting, getKeepSegments, cutRanges, muteRanges, gainRanges, speedRanges, globalGainDb, words, HANDLE_EXPORT_filters, additionalClips, backgroundMusic]);
|
||||
|
||||
return (
|
||||
<div className="p-4 space-y-5">
|
||||
@ -239,6 +267,139 @@ export default function ExportDialog() {
|
||||
]}
|
||||
/>
|
||||
|
||||
{/* Video zoom / punch-in */}
|
||||
<div className="space-y-2 pt-1 border-t border-editor-border">
|
||||
<label className="flex items-center gap-2 cursor-pointer">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={options.zoom?.enabled || false}
|
||||
onChange={(e) => setOptions((o) => ({ ...o, zoom: { ...o.zoom!, enabled: e.target.checked } }))}
|
||||
className="w-4 h-4 rounded bg-editor-surface border-editor-border accent-editor-accent"
|
||||
/>
|
||||
<div>
|
||||
<span className="text-xs font-medium flex items-center gap-1">
|
||||
<ZoomIn className="w-3 h-3" />
|
||||
Video zoom / punch-in
|
||||
</span>
|
||||
<p className="text-[10px] text-editor-text-muted">
|
||||
Crop and zoom into the center of the video. Requires re-encode.
|
||||
</p>
|
||||
</div>
|
||||
</label>
|
||||
{options.zoom?.enabled && (
|
||||
<div className="pl-6 space-y-2">
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-[10px] text-editor-text-muted w-16">Zoom:</span>
|
||||
<input
|
||||
type="range"
|
||||
min={1}
|
||||
max={3}
|
||||
step={0.05}
|
||||
value={options.zoom?.zoomFactor || 1}
|
||||
onChange={(e) => setOptions((o) => ({ ...o, zoom: { ...o.zoom!, zoomFactor: Number(e.target.value) } }))}
|
||||
className="flex-1 h-1.5"
|
||||
/>
|
||||
<span className="text-xs text-editor-text w-10 text-right">{options.zoom?.zoomFactor?.toFixed(2)}x</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-[10px] text-editor-text-muted w-16">Pan X:</span>
|
||||
<input
|
||||
type="range"
|
||||
min={-1}
|
||||
max={1}
|
||||
step={0.05}
|
||||
value={options.zoom?.panX || 0}
|
||||
onChange={(e) => setOptions((o) => ({ ...o, zoom: { ...o.zoom!, panX: Number(e.target.value) } }))}
|
||||
className="flex-1 h-1.5"
|
||||
/>
|
||||
<span className="text-xs text-editor-text w-10 text-right">{((options.zoom?.panX || 0) * 100).toFixed(0)}%</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-[10px] text-editor-text-muted w-16">Pan Y:</span>
|
||||
<input
|
||||
type="range"
|
||||
min={-1}
|
||||
max={1}
|
||||
step={0.05}
|
||||
value={options.zoom?.panY || 0}
|
||||
onChange={(e) => setOptions((o) => ({ ...o, zoom: { ...o.zoom!, panY: Number(e.target.value) } }))}
|
||||
className="flex-1 h-1.5"
|
||||
/>
|
||||
<span className="text-xs text-editor-text w-10 text-right">{((options.zoom?.panY || 0) * 100).toFixed(0)}%</span>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Background removal */}
|
||||
{!isAudioOnly && (
|
||||
<div className="space-y-2 pt-1 border-t border-editor-border">
|
||||
<label className="flex items-center gap-2 cursor-pointer">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={options.removeBackground || false}
|
||||
onChange={(e) => setOptions((o) => ({ ...o, removeBackground: e.target.checked }))}
|
||||
className="w-4 h-4 rounded bg-editor-surface border-editor-border accent-editor-accent"
|
||||
/>
|
||||
<div>
|
||||
<span className="text-xs font-medium flex items-center gap-1">
|
||||
<Video className="w-3 h-3" />
|
||||
Remove background
|
||||
</span>
|
||||
<p className="text-[10px] text-editor-text-muted">
|
||||
Replace or blur the background. Uses MediaPipe if available.
|
||||
</p>
|
||||
</div>
|
||||
</label>
|
||||
{options.removeBackground && (
|
||||
<div className="pl-6 space-y-2">
|
||||
<SelectField
|
||||
label="Background replacement"
|
||||
value={options.backgroundReplacement || 'blur'}
|
||||
onChange={(v) => setOptions((o) => ({ ...o, backgroundReplacement: v as 'blur' | 'color' | 'image' }))}
|
||||
options={[
|
||||
{ value: 'blur', label: 'Blur background' },
|
||||
{ value: 'color', label: 'Solid color' },
|
||||
{ value: 'image', label: 'Custom image' },
|
||||
]}
|
||||
/>
|
||||
{options.backgroundReplacement === 'color' && (
|
||||
<input
|
||||
type="text"
|
||||
value={options.backgroundReplacementValue || '#00FF00'}
|
||||
onChange={(e) => setOptions((o) => ({ ...o, backgroundReplacementValue: e.target.value }))}
|
||||
placeholder="#00FF00"
|
||||
className="w-full px-2 py-1.5 text-xs bg-editor-surface border border-editor-border rounded focus:outline-none focus:border-editor-accent [color-scheme:dark]"
|
||||
/>
|
||||
)}
|
||||
{options.backgroundReplacement === 'image' && (
|
||||
<p className="text-[10px] text-editor-text-muted">Place a background image file path above.</p>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Background music track info */}
|
||||
{backgroundMusic && (
|
||||
<div className="pt-1 border-t border-editor-border">
|
||||
<div className="flex items-center gap-1.5 text-xs text-editor-accent">
|
||||
<Music className="w-3 h-3" />
|
||||
Background music: {backgroundMusic.path.split(/[/\\]/).pop()}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Append clips info */}
|
||||
{additionalClips.length > 0 && (
|
||||
<div className="pt-1 border-t border-editor-border">
|
||||
<div className="flex items-center gap-1.5 text-xs text-editor-accent">
|
||||
<Video className="w-3 h-3" />
|
||||
{additionalClips.length} additional clip{additionalClips.length > 1 ? 's' : ''} appended
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Audio normalization — integrated into export */}
|
||||
<div className="space-y-2 pt-1 border-t border-editor-border">
|
||||
<label className="flex items-center gap-2 cursor-pointer">
|
||||
|
||||
@ -14,6 +14,9 @@ import type {
|
||||
SilenceTrimGroup,
|
||||
TimelineMarker,
|
||||
Chapter,
|
||||
ZoomConfig,
|
||||
ClipInfo,
|
||||
BackgroundMusicConfig,
|
||||
} from '../types/project';
|
||||
|
||||
interface EditorState {
|
||||
@ -50,6 +53,10 @@ interface EditorState {
|
||||
|
||||
backendUrl: string;
|
||||
zonePreviewPaddingSeconds: number;
|
||||
|
||||
zoomConfig: ZoomConfig;
|
||||
additionalClips: ClipInfo[];
|
||||
backgroundMusic: BackgroundMusicConfig | null;
|
||||
}
|
||||
|
||||
interface EditorActions {
|
||||
@ -104,6 +111,12 @@ interface EditorActions {
|
||||
getWordAtTime: (time: number) => number;
|
||||
loadProject: (projectData: any) => void;
|
||||
reset: () => void;
|
||||
setZoomConfig: (config: Partial<ZoomConfig>) => void;
|
||||
addAdditionalClip: (path: string, label?: string) => void;
|
||||
removeAdditionalClip: (id: string) => void;
|
||||
reorderAdditionalClip: (id: string, direction: -1 | 1) => void;
|
||||
setBackgroundMusic: (config: BackgroundMusicConfig | null) => void;
|
||||
updateBackgroundMusic: (updates: Partial<BackgroundMusicConfig>) => void;
|
||||
}
|
||||
|
||||
const ZONE_PREVIEW_PADDING_KEY = 'talkedit-zone-preview-padding-seconds';
|
||||
@ -146,6 +159,9 @@ const initialState: EditorState = {
|
||||
exportProgress: 0,
|
||||
backendUrl: 'http://127.0.0.1:8000',
|
||||
zonePreviewPaddingSeconds: getStoredZonePreviewPaddingSeconds(),
|
||||
zoomConfig: { enabled: false, zoomFactor: 1, panX: 0, panY: 0 },
|
||||
additionalClips: [],
|
||||
backgroundMusic: null,
|
||||
};
|
||||
|
||||
let nextRangeId = 1;
|
||||
@ -190,7 +206,7 @@ export const useEditorStore = create<EditorState & EditorActions>()(
|
||||
setTranscriptionModel: (model) => set({ transcriptionModel: model }),
|
||||
|
||||
saveProject: (): ProjectFile => {
|
||||
const { videoPath, words, segments, cutRanges, muteRanges, gainRanges, speedRanges, globalGainDb, silenceTrimGroups, timelineMarkers, transcriptionModel, language, exportedAudioPath } = get();
|
||||
const { videoPath, words, segments, cutRanges, muteRanges, gainRanges, speedRanges, globalGainDb, silenceTrimGroups, timelineMarkers, transcriptionModel, language, exportedAudioPath, zoomConfig, additionalClips, backgroundMusic } = get();
|
||||
if (!videoPath) throw new Error('No video loaded');
|
||||
const now = new Date().toISOString();
|
||||
// Strip globalStartIndex (runtime-only field) before persisting.
|
||||
@ -214,8 +230,11 @@ export const useEditorStore = create<EditorState & EditorActions>()(
|
||||
silenceTrimGroups,
|
||||
timelineMarkers,
|
||||
language,
|
||||
createdAt: now, // will be overwritten if we track original creation time later
|
||||
createdAt: now,
|
||||
modifiedAt: now,
|
||||
zoomConfig,
|
||||
additionalClips,
|
||||
backgroundMusic: backgroundMusic ?? undefined,
|
||||
};
|
||||
},
|
||||
|
||||
@ -600,6 +619,43 @@ export const useEditorStore = create<EditorState & EditorActions>()(
|
||||
return lo < words.length ? lo : words.length - 1;
|
||||
},
|
||||
|
||||
setZoomConfig: (config) => {
|
||||
const { zoomConfig } = get();
|
||||
set({ zoomConfig: { ...zoomConfig, ...config } });
|
||||
},
|
||||
|
||||
addAdditionalClip: (path, label) => {
|
||||
const { additionalClips } = get();
|
||||
const id = `clip_${Date.now()}_${Math.random().toString(36).slice(2, 6)}`;
|
||||
set({ additionalClips: [...additionalClips, { id, path, label: label || path.split(/[/\\]/).pop() || 'Clip' }] });
|
||||
},
|
||||
|
||||
removeAdditionalClip: (id) => {
|
||||
const { additionalClips } = get();
|
||||
set({ additionalClips: additionalClips.filter((c) => c.id !== id) });
|
||||
},
|
||||
|
||||
reorderAdditionalClip: (id, direction) => {
|
||||
const { additionalClips } = get();
|
||||
const idx = additionalClips.findIndex((c) => c.id === id);
|
||||
if (idx === -1) return;
|
||||
const target = idx + direction;
|
||||
if (target < 0 || target >= additionalClips.length) return;
|
||||
const reordered = [...additionalClips];
|
||||
[reordered[idx], reordered[target]] = [reordered[target], reordered[idx]];
|
||||
set({ additionalClips: reordered });
|
||||
},
|
||||
|
||||
setBackgroundMusic: (config) => {
|
||||
set({ backgroundMusic: config });
|
||||
},
|
||||
|
||||
updateBackgroundMusic: (updates) => {
|
||||
const { backgroundMusic } = get();
|
||||
if (!backgroundMusic) return;
|
||||
set({ backgroundMusic: { ...backgroundMusic, ...updates } });
|
||||
},
|
||||
|
||||
loadProject: (data) => {
|
||||
const { backendUrl, zonePreviewPaddingSeconds, projectFilePath } = get();
|
||||
const url = `${backendUrl}/file?path=${encodeURIComponent(data.videoPath)}`;
|
||||
@ -634,6 +690,9 @@ export const useEditorStore = create<EditorState & EditorActions>()(
|
||||
transcriptionModel: data.transcriptionModel ?? null,
|
||||
language: data.language || '',
|
||||
exportedAudioPath: data.exportedAudioPath ?? null,
|
||||
zoomConfig: data.zoomConfig || { enabled: false, zoomFactor: 1, panX: 0, panY: 0 },
|
||||
additionalClips: data.additionalClips || [],
|
||||
backgroundMusic: data.backgroundMusic || null,
|
||||
});
|
||||
},
|
||||
|
||||
|
||||
@ -76,6 +76,9 @@ export interface ProjectFile {
|
||||
language: string;
|
||||
createdAt: string;
|
||||
modifiedAt: string;
|
||||
zoomConfig?: ZoomConfig;
|
||||
additionalClips?: ClipInfo[];
|
||||
backgroundMusic?: BackgroundMusicConfig;
|
||||
}
|
||||
|
||||
export interface TranscriptionResult {
|
||||
@ -84,6 +87,28 @@ export interface TranscriptionResult {
|
||||
language: string;
|
||||
}
|
||||
|
||||
export interface ZoomConfig {
|
||||
enabled: boolean;
|
||||
zoomFactor: number; // 1.0 = no zoom, 2.0 = 2x zoom
|
||||
panX: number; // -1 to 1, normalized pan offset
|
||||
panY: number;
|
||||
}
|
||||
|
||||
export interface ClipInfo {
|
||||
id: string;
|
||||
path: string;
|
||||
label: string;
|
||||
}
|
||||
|
||||
export interface BackgroundMusicConfig {
|
||||
path: string;
|
||||
volumeDb: number; // gain in dB for music track
|
||||
duckingEnabled: boolean;
|
||||
duckingDb: number; // how much to duck (dB reduction)
|
||||
duckingAttackMs: number;
|
||||
duckingReleaseMs: number;
|
||||
}
|
||||
|
||||
export interface ExportOptions {
|
||||
outputPath: string;
|
||||
mode: 'fast' | 'reencode';
|
||||
@ -92,6 +117,10 @@ export interface ExportOptions {
|
||||
enhanceAudio: boolean;
|
||||
captions: 'none' | 'burn-in' | 'sidecar';
|
||||
captionStyle?: CaptionStyle;
|
||||
zoom?: ZoomConfig;
|
||||
removeBackground?: boolean;
|
||||
backgroundReplacement?: 'blur' | 'color' | 'image';
|
||||
backgroundReplacementValue?: string;
|
||||
}
|
||||
|
||||
export interface TimelineMarker {
|
||||
|
||||
@ -1 +1 @@
|
||||
{"root":["./src/App.tsx","./src/main.tsx","./src/vite-env.d.ts","./src/components/AIPanel.tsx","./src/components/DevPanel.tsx","./src/components/ExportDialog.tsx","./src/components/MarkersPanel.tsx","./src/components/SettingsPanel.tsx","./src/components/SilenceTrimmerPanel.tsx","./src/components/TranscriptEditor.tsx","./src/components/VideoPlayer.tsx","./src/components/VolumePanel.tsx","./src/components/WaveformTimeline.tsx","./src/components/ZoneEditor.tsx","./src/hooks/useKeyboardShortcuts.ts","./src/hooks/useVideoSync.ts","./src/lib/dev-logger.ts","./src/lib/keybindings.ts","./src/lib/tauri-bridge.ts","./src/lib/thumbnails.ts","./src/store/aiStore.ts","./src/store/editorStore.test.ts","./src/store/editorStore.ts","./src/types/project.ts"],"version":"5.9.3"}
|
||||
{"root":["./src/App.tsx","./src/main.tsx","./src/vite-env.d.ts","./src/components/AIPanel.tsx","./src/components/AppendClipPanel.tsx","./src/components/BackgroundMusicPanel.tsx","./src/components/DevPanel.tsx","./src/components/ExportDialog.tsx","./src/components/MarkersPanel.tsx","./src/components/SettingsPanel.tsx","./src/components/SilenceTrimmerPanel.tsx","./src/components/TranscriptEditor.tsx","./src/components/VideoPlayer.tsx","./src/components/VolumePanel.tsx","./src/components/WaveformTimeline.tsx","./src/components/ZoneEditor.tsx","./src/hooks/useKeyboardShortcuts.ts","./src/hooks/useVideoSync.ts","./src/lib/dev-logger.ts","./src/lib/keybindings.ts","./src/lib/tauri-bridge.ts","./src/lib/thumbnails.ts","./src/store/aiStore.ts","./src/store/editorStore.test.ts","./src/store/editorStore.ts","./src/types/project.ts"],"version":"5.9.3"}
|
||||
Reference in New Issue
Block a user