From 810957747bfd96146ba1a77e9d354bf48db368b1 Mon Sep 17 00:00:00 2001 From: dillonj Date: Tue, 5 May 2026 23:31:18 -0600 Subject: [PATCH] clean up of features --- FEATURES.md | 36 +++++ backend/services/background_removal.py | 33 +++-- backend/services/video_editor.py | 130 ++++++++++++------ frontend/src/components/AppendClipPanel.tsx | 7 +- .../src/components/BackgroundMusicPanel.tsx | 7 +- frontend/src/components/ExportDialog.tsx | 35 +++-- frontend/src/components/WaveformTimeline.tsx | 7 +- 7 files changed, 178 insertions(+), 77 deletions(-) diff --git a/FEATURES.md b/FEATURES.md index 7a7fccb..fb26ccf 100644 --- a/FEATURES.md +++ b/FEATURES.md @@ -62,6 +62,42 @@ Features are grouped by priority. Check off items as they are implemented. - [x] [#042] **Background removal** — MediaPipe Selfie Segmentation + FFmpeg frame processing for person/background separation. Configurable replacement: blur, solid color, or custom image. Applied during export. Falls back to FFmpeg colorkey when MediaPipe unavailable. (2026-05-05) +## 🔮 Future — AI-powered editing & resource library + +All AI features use the existing Ollama/OpenAI/Claude provider config — no new auth or setup needed. + +- [ ] [#043] **AI Smart Clean** — one-click chain: filler removal + silence trim + noise reduction + loudness normalization in a single pass. `POST /ai/smart-clean` calls existing services sequentially. + +- [ ] [#044] **AI Transcript Summarization** — generate bullet-point summary from transcript. `POST /ai/summarize`. AIPanel new tab. + +- [ ] [#045] **AI Sentence Rephrase** — right-click word/sentence in transcript → "Rephrase with AI" → see 3 alternatives → click to replace. `POST /ai/rephrase`. TranscriptEditor context menu. + +- [ ] [#046] **AI Smart Speed** — detect slow/low-energy sections → mark as suggested SpeedRange segments. `POST /ai/smart-speed`. Preview in AIPanel. + +- [ ] [#047] **AI Auto-Chapters** — detect topic shifts in transcript → create TimelineMarkers automatically. `POST /ai/chapters`. + +- [ ] [#048] **AI Show Notes** — generate title, description, soundbites, keywords from transcript + markers. `POST /ai/show-notes`. Copy to clipboard or save to file. + +- [ ] [#049] **AI Find Fluff** — AI marks rambles, intros, off-topic chatter for deletion. Extends existing filler detection. `POST /ai/find-fluff`. AIPanel tab showing suggested cut ranges. + +- [ ] [#050] **AI Smooth Cuts** — remove jump cuts between deleted segments using crossfade/blend during re-encode. Export option toggle. + +- [ ] [#051] **AI B-roll** — generate footage from a text prompt to fill visual gaps in the timeline. Uses local SD or API. New "B-roll" section in AIPanel. + +- [ ] [#052] **Smart Layouts** — auto-switch video layout between speakers based on who's talking. Detects active speaker from diarization + volume, applies crop/pad to focus on current speaker during export. + +- [ ] [#053] **Per-track audio levels** — individual gain per speaker track. Extend `GainRange` model with `track_id`, apply per-stream via FFmpeg. + +- [ ] [#054] **Intro/Outro templates** — save segment ranges as reusable templates, apply with one click on export. + +- [ ] [#055] **Built-in free music library** — 5–10 CC0/royalty-free short loops shipped in `frontend/public/resources/music/`. BackgroundMusicPanel gets a "Built-in" tab with play/preview. + +- [ ] [#056] **Stock media browser** — new `MediaLibraryPanel` that browses local `resources/media/` for images, video, audio with thumbnails. Frontend-only via Tauri `readDir`. Drag-to-add for bg removal images, append clips, or music. + +- [ ] [#057] **Sample content downloader** — "Get Sample Video" button on empty state downloads a short public-domain test video + pre-made transcription JSON for trying the app without your own media. + +--- + ## 💡 TalkEdit competitive advantages to lean into These aren't features to build — they're things to make more visible in the UI and README: diff --git a/backend/services/background_removal.py b/backend/services/background_removal.py index 3b5879e..aa4d95f 100644 --- a/backend/services/background_removal.py +++ b/backend/services/background_removal.py @@ -175,30 +175,40 @@ def _remove_with_mediapipe( raise RuntimeError(f"MediaPipe background removal failed: {e}") + def _remove_with_ffmpeg_portrait( input_path: str, output_path: str, replacement: str = "blur", replacement_value: str = "", ) -> str: - """Fallback: use FFmpeg's colorkey + chromakey for basic background removal. + """Fallback: basic FFmpeg-only background blur. - This is a crude approximation. For best results, install mediapipe + opencv-python. + Uses a strong gaussian blur as a crude background replacement. + For proper person segmentation (color/image replacement), install: + pip install mediapipe opencv-python """ ffmpeg = "ffmpeg" - # Use a simple chromakey-based approach with a neutral background - # This won't work well for most real videos but provides a fallback - if replacement == "color": + if replacement == "blur": + filter_complex = "gblur=sigma=30" + elif replacement == "color": color = replacement_value or "00FF00" - filter_complex = f"colorkey=0x{color}:0.3:0.1,chromakey=0x{color}:0.3:0.1" - elif replacement == "blur": - filter_complex = "gblur=sigma=20:enable='gt(scene,0.01)'" + filter_complex = ( + f"split[fg][bg];" + f"[bg]colorkey=0x{color}:0.3:0.1[bg_key];" + f"[fg][bg_key]overlay" + ) + elif replacement == "image" and replacement_value: + escaped = replacement_value.replace("\\", "/").replace(":", "\\:") + filter_complex = ( + f"movie='{escaped}':loop=0,scale=iw:ih[bg];" + f"[0:v][bg]overlay=0:0:shortest=1" + ) else: filter_complex = "null" if filter_complex == "null": - # No-op, copy input to output cmd = [ffmpeg, "-y", "-i", input_path, "-c", "copy", output_path] else: cmd = [ @@ -215,5 +225,8 @@ def _remove_with_ffmpeg_portrait( if result.returncode != 0: raise RuntimeError(f"FFmpeg background removal failed: {result.stderr[-500:]}") - logger.info("FFmpeg portait background removal completed -> %s", output_path) + logger.warning( + "FFmpeg fallback background removal used (no MediaPipe). " + "Install 'mediapipe' and 'opencv-python' for proper person segmentation." + ) return output_path diff --git a/backend/services/video_editor.py b/backend/services/video_editor.py index 00a7d31..7a12a61 100644 --- a/backend/services/video_editor.py +++ b/backend/services/video_editor.py @@ -45,6 +45,24 @@ def _input_has_video_stream(ffmpeg_cmd: str, input_path: str) -> bool: return False +def _input_has_audio_stream(ffmpeg_cmd: str, input_path: str) -> bool: + """Return True if the input contains at least one audio stream.""" + ffprobe = ffmpeg_cmd.replace("ffmpeg", "ffprobe") + cmd = [ + ffprobe, + "-v", "error", + "-select_streams", "a:0", + "-show_entries", "stream=index", + "-of", "csv=p=0", + str(input_path), + ] + try: + result = subprocess.run(cmd, capture_output=True, text=True) + return result.returncode == 0 and bool(result.stdout.strip()) + except Exception: + return False + + def _clamp_speed(speed: float) -> float: return max(0.25, min(4.0, float(speed))) @@ -144,39 +162,65 @@ def mix_background_music( ducking_release_ms: float = 200.0, ) -> str: """Mix background music into a video with optional ducking. - - Uses FFmpeg amix + sidechaincompress. Output is written to output_path. + + Uses FFmpeg amix + sidechaincompress. If the input has no audio, + the music track becomes the sole audio track. Output is written to output_path. """ ffmpeg = _find_ffmpeg() escaped_music = music_path.replace("\\", "/").replace(":", "\\:") - - # Build the filter graph - if ducking_enabled: + has_audio_result = _input_has_audio_stream(ffmpeg, video_path) + + if not has_audio_result: + cmd = [ + ffmpeg, "-y", + "-i", video_path, + "-i", music_path, + "-map", "0:v", + "-map", "1:a", + "-c:v", "copy", + "-c:a", "aac", "-b:a", "192k", + "-shortest", + "-movflags", "+faststart", + output_path, + ] + elif ducking_enabled: + music_source = f"amovie='{escaped_music}',volume={volume_db}dB[music]" filter_complex = ( f"[0:a]asplit[main][sidechain];" - f"movie='{escaped_music}':loop=0,volume={volume_db}dB[music];" + f"{music_source};" f"[main][music]amix=inputs=2:duration=first:dropout_transition=2[mixed];" f"[mixed][sidechain]sidechaincompress=" - f"threshold=-30dB:ratio=100:attack={ducking_attack_ms}ms:" - f"release={ducking_release_ms}ms:makeup=1:level_sc={ducking_db}[outa]" + f"threshold=-30dB:ratio=20:attack={ducking_attack_ms / 1000}:" + f"release={ducking_release_ms / 1000}:makeup=1:level_sc={ducking_db}[outa]" ) + cmd = [ + ffmpeg, "-y", + "-i", video_path, + "-filter_complex", filter_complex, + "-map", "0:v", + "-map", "[outa]", + "-c:v", "copy", + "-c:a", "aac", "-b:a", "192k", + "-shortest", + output_path, + ] else: + music_source = f"amovie='{escaped_music}',volume={volume_db}dB[music]" filter_complex = ( - f"movie='{escaped_music}':loop=0,volume={volume_db}dB[music];" + f"{music_source};" f"[0:a][music]amix=inputs=2:duration=first:dropout_transition=2[outa]" ) - - cmd = [ - ffmpeg, "-y", - "-i", video_path, - "-filter_complex", filter_complex, - "-map", "0:v", - "-map", "[outa]", - "-c:v", "copy", - "-c:a", "aac", "-b:a", "192k", - "-shortest", - output_path, - ] + cmd = [ + ffmpeg, "-y", + "-i", video_path, + "-filter_complex", filter_complex, + "-map", "0:v", + "-map", "[outa]", + "-c:v", "copy", + "-c:a", "aac", "-b:a", "192k", + "-shortest", + output_path, + ] result = subprocess.run(cmd, capture_output=True, text=True) if result.returncode != 0: @@ -191,28 +235,29 @@ def concat_clips( output_path: str, ) -> str: """Concatenate multiple video clips using FFmpeg concat demuxer. - + The main_path is kept as-is. append_paths are appended after it. """ if not append_paths: raise ValueError("No clips to concatenate") - + ffmpeg = _find_ffmpeg() - import tempfile - import os - + resolved_main = str(Path(main_path).resolve()) + + # If output_path collides with an input, write to temp first + all_inputs = [resolved_main] + [str(Path(p).resolve()) for p in append_paths] + needs_rename = str(Path(output_path).resolve()) in all_inputs + final_output = output_path + if needs_rename: + final_output = output_path + ".concat_tmp.mp4" + temp_dir = tempfile.mkdtemp(prefix="aive_concat_") try: - segment_files = [main_path] - segment_files.extend(append_paths) - - # Create concat file list concat_file = os.path.join(temp_dir, "concat.txt") with open(concat_file, "w") as f: - for path in segment_files: - resolved = os.path.abspath(path) - f.write(f"file '{resolved}'\n") - + for path in all_inputs: + f.write(f"file '{path}'\n") + cmd = [ ffmpeg, "-y", "-f", "concat", @@ -220,13 +265,16 @@ def concat_clips( "-i", concat_file, "-c", "copy", "-movflags", "+faststart", - output_path, + final_output, ] - + result = subprocess.run(cmd, capture_output=True, text=True) if result.returncode != 0: raise RuntimeError(f"Clip concat failed: {result.stderr[-500:]}") - + + if needs_rename: + os.replace(final_output, output_path) + return output_path finally: for f in os.listdir(temp_dir): @@ -570,11 +618,9 @@ def export_reencode( # Apply zoom post-processing if configured if zoom_config and zoom_config.get("enabled") and has_video: - import tempfile as _tf - import os as _os zoomed_path = output_path + ".zoomed.mp4" _apply_zoom_post(output_path, zoomed_path, zoom_config) - _os.replace(zoomed_path, output_path) + os.replace(zoomed_path, output_path) logger.info("Zoom/punch-in applied to %s (factor=%s)", output_path, zoom_config.get("zoomFactor", 1.0)) return output_path @@ -737,11 +783,9 @@ def export_reencode_with_subs( # Apply zoom post-processing if configured if zoom_config and zoom_config.get("enabled"): - import tempfile as _tf - import os as _os zoomed_path = output_path + ".zoomed.mp4" _apply_zoom_post(output_path, zoomed_path, zoom_config) - _os.replace(zoomed_path, output_path) + os.replace(zoomed_path, output_path) logger.info("Zoom/punch-in applied to %s (factor=%s)", output_path, zoom_config.get("zoomFactor", 1.0)) return output_path diff --git a/frontend/src/components/AppendClipPanel.tsx b/frontend/src/components/AppendClipPanel.tsx index a40eb84..edec663 100644 --- a/frontend/src/components/AppendClipPanel.tsx +++ b/frontend/src/components/AppendClipPanel.tsx @@ -5,7 +5,12 @@ export default function AppendClipPanel() { const { additionalClips, addAdditionalClip, removeAdditionalClip, reorderAdditionalClip, videoPath } = useEditorStore(); const handleAddClip = async () => { - const path = await window.electronAPI?.openFile(); + const path = await window.electronAPI?.openFile({ + filters: [ + { name: 'Video Files', extensions: ['mp4', 'mkv', 'mov', 'avi', 'webm'] }, + { name: 'All Files', extensions: ['*'] }, + ], + }); if (path) { addAdditionalClip(path); } diff --git a/frontend/src/components/BackgroundMusicPanel.tsx b/frontend/src/components/BackgroundMusicPanel.tsx index 88da7fb..0f360ba 100644 --- a/frontend/src/components/BackgroundMusicPanel.tsx +++ b/frontend/src/components/BackgroundMusicPanel.tsx @@ -5,7 +5,12 @@ export default function BackgroundMusicPanel() { const { backgroundMusic, setBackgroundMusic, updateBackgroundMusic } = useEditorStore(); const handleLoadMusic = async () => { - const path = await window.electronAPI?.openFile(); + const path = await window.electronAPI?.openFile({ + filters: [ + { name: 'Audio Files', extensions: ['mp3', 'wav', 'm4a', 'flac', 'ogg', 'aac', 'wma'] }, + { name: 'All Files', extensions: ['*'] }, + ], + }); if (path) { setBackgroundMusic({ path, diff --git a/frontend/src/components/ExportDialog.tsx b/frontend/src/components/ExportDialog.tsx index 95ac0ad..66d4e73 100644 --- a/frontend/src/components/ExportDialog.tsx +++ b/frontend/src/components/ExportDialog.tsx @@ -9,6 +9,19 @@ export default function ExportDialog() { const hasCuts = cutRanges.length > 0; + // Compute set of deleted word indices from cutRanges + const getDeletedSet = useCallback(() => { + const deletedSet = new Set(); + for (const range of cutRanges) { + for (let i = 0; i < words.length; i++) { + if (words[i].start >= range.start && words[i].end <= range.end) { + deletedSet.add(i); + } + } + } + return deletedSet; + }, [cutRanges, words]); + // Detect if input is audio-only by its extension const audioExtensions = new Set(['.wav', '.mp3', '.flac', '.m4a', '.ogg', '.aac', '.wma']); const inputExt = videoPath ? '.' + videoPath.split('.').pop()?.toLowerCase() : ''; @@ -46,14 +59,7 @@ export default function ExportDialog() { setIsTranscribingTranscript(true); try { // Compute deleted word set - const deletedSet = new Set(); - for (const range of cutRanges) { - for (let i = 0; i < words.length; i++) { - if (words[i].start >= range.start && words[i].end <= range.end) { - deletedSet.add(i); - } - } - } + const deletedSet = getDeletedSet(); // Generate content entirely on the frontend — no backend needed let content: string; @@ -103,7 +109,7 @@ export default function ExportDialog() { } finally { setIsTranscribingTranscript(false); } - }, [videoPath, words, cutRanges, transcriptFormat]); + }, [videoPath, words, getDeletedSet, transcriptFormat]); const HANDLE_EXPORT_filters = useCallback(() => { const ext = options.format; @@ -130,14 +136,7 @@ export default function ExportDialog() { setExportError(null); try { const keepSegments = getKeepSegments(); - - const deletedSet = new Set(); - for (const range of cutRanges) { - for (let i = 0; i < words.length; i++) { - const w = words[i]; - if (w.start >= range.start && w.end <= range.end) deletedSet.add(i); - } - } + const deletedSet = getDeletedSet(); // Map frontend camelCase gain/speed fields to backend snake_case const backendGainRanges = gainRanges.map((r) => ({ @@ -213,7 +212,7 @@ export default function ExportDialog() { setExportError(err instanceof Error ? err.message : 'Export failed'); setExporting(false); } - }, [videoPath, options, backendUrl, setExporting, getKeepSegments, cutRanges, muteRanges, gainRanges, speedRanges, globalGainDb, words, HANDLE_EXPORT_filters, additionalClips, backgroundMusic]); + }, [videoPath, options, backendUrl, setExporting, getKeepSegments, getDeletedSet, muteRanges, gainRanges, speedRanges, globalGainDb, words, HANDLE_EXPORT_filters, additionalClips, backgroundMusic]); return (
diff --git a/frontend/src/components/WaveformTimeline.tsx b/frontend/src/components/WaveformTimeline.tsx index a594e49..85972e6 100644 --- a/frontend/src/components/WaveformTimeline.tsx +++ b/frontend/src/components/WaveformTimeline.tsx @@ -287,10 +287,9 @@ export default function WaveformTimeline({ const [showAdjustedTimeline, setShowAdjustedTimeline] = useState(false); const sourceDuration = duration || waveformDataRef.current?.duration || 0; - const timelineCutRanges = showAdjustedTimeline ? cutRanges : []; const { segments: timelineSegments, displayDuration } = useMemo( - () => buildTimelineSegments(sourceDuration, timelineCutRanges), - [sourceDuration, timelineCutRanges], + () => buildTimelineSegments(sourceDuration, showAdjustedTimeline ? cutRanges : []), + [sourceDuration, cutRanges, showAdjustedTimeline], ); useEffect(() => { @@ -687,7 +686,6 @@ export default function WaveformTimeline({ gainMode, speedMode, selectedZone, - showAdjustedTimeline, markInTime, markOutTime, displayDuration, @@ -696,6 +694,7 @@ export default function WaveformTimeline({ showGainZones, showSpeedZones, timelineSegments, + timelineMarkers, ]); // Keep the ref in sync with the latest drawStaticWaveform closure