From 810957747bfd96146ba1a77e9d354bf48db368b1 Mon Sep 17 00:00:00 2001
From: dillonj <dilljens@gmail.com>
Date: Tue, 5 May 2026 23:31:18 -0600
Subject: [PATCH] clean up of features

---
 FEATURES.md                                   |  36 +++++
 backend/services/background_removal.py        |  33 +++--
 backend/services/video_editor.py              | 130 ++++++++++++------
 frontend/src/components/AppendClipPanel.tsx   |   7 +-
 .../src/components/BackgroundMusicPanel.tsx   |   7 +-
 frontend/src/components/ExportDialog.tsx      |  35 +++--
 frontend/src/components/WaveformTimeline.tsx  |   7 +-
 7 files changed, 178 insertions(+), 77 deletions(-)

diff --git a/FEATURES.md b/FEATURES.md
index 7a7fccb..fb26ccf 100644
--- a/FEATURES.md
+++ b/FEATURES.md
@@ -62,6 +62,42 @@ Features are grouped by priority. Check off items as they are implemented.
 
 - [x] [#042] **Background removal** — MediaPipe Selfie Segmentation + FFmpeg frame processing for person/background separation. Configurable replacement: blur, solid color, or custom image. Applied during export. Falls back to FFmpeg colorkey when MediaPipe unavailable. (2026-05-05)
 
+## 🔮 Future — AI-powered editing & resource library
+
+All AI features use the existing Ollama/OpenAI/Claude provider config — no new auth or setup needed.
+
+- [ ] [#043] **AI Smart Clean** — one-click chain: filler removal + silence trim + noise reduction + loudness normalization in a single pass. `POST /ai/smart-clean` calls existing services sequentially.
+
+- [ ] [#044] **AI Transcript Summarization** — generate bullet-point summary from transcript. `POST /ai/summarize`. AIPanel new tab.
+
+- [ ] [#045] **AI Sentence Rephrase** — right-click word/sentence in transcript → "Rephrase with AI" → see 3 alternatives → click to replace. `POST /ai/rephrase`. TranscriptEditor context menu.
+
+- [ ] [#046] **AI Smart Speed** — detect slow/low-energy sections → mark as suggested SpeedRange segments. `POST /ai/smart-speed`. Preview in AIPanel.
+
+- [ ] [#047] **AI Auto-Chapters** — detect topic shifts in transcript → create TimelineMarkers automatically. `POST /ai/chapters`.
+
+- [ ] [#048] **AI Show Notes** — generate title, description, soundbites, keywords from transcript + markers. `POST /ai/show-notes`. Copy to clipboard or save to file.
+
+- [ ] [#049] **AI Find Fluff** — AI marks rambles, intros, off-topic chatter for deletion. Extends existing filler detection. `POST /ai/find-fluff`. AIPanel tab showing suggested cut ranges.
+
+- [ ] [#050] **AI Smooth Cuts** — remove jump cuts between deleted segments using crossfade/blend during re-encode. Export option toggle.
+
+- [ ] [#051] **AI B-roll** — generate footage from a text prompt to fill visual gaps in the timeline. Uses local SD or API. New "B-roll" section in AIPanel.
+
+- [ ] [#052] **Smart Layouts** — auto-switch video layout between speakers based on who's talking. Detects active speaker from diarization + volume, applies crop/pad to focus on current speaker during export.
+
+- [ ] [#053] **Per-track audio levels** — individual gain per speaker track. Extend `GainRange` model with `track_id`, apply per-stream via FFmpeg.
+
+- [ ] [#054] **Intro/Outro templates** — save segment ranges as reusable templates, apply with one click on export.
+
+- [ ] [#055] **Built-in free music library** — 5–10 CC0/royalty-free short loops shipped in `frontend/public/resources/music/`. BackgroundMusicPanel gets a "Built-in" tab with play/preview.
+
+- [ ] [#056] **Stock media browser** — new `MediaLibraryPanel` that browses local `resources/media/` for images, video, audio with thumbnails. Frontend-only via Tauri `readDir`. Drag-to-add for bg removal images, append clips, or music.
+
+- [ ] [#057] **Sample content downloader** — "Get Sample Video" button on empty state downloads a short public-domain test video + pre-made transcription JSON for trying the app without your own media.
+
+---
+
 ## 💡 TalkEdit competitive advantages to lean into
 
 These aren't features to build — they're things to make more visible in the UI and README:
diff --git a/backend/services/background_removal.py b/backend/services/background_removal.py
index 3b5879e..aa4d95f 100644
--- a/backend/services/background_removal.py
+++ b/backend/services/background_removal.py
@@ -175,30 +175,40 @@ def _remove_with_mediapipe(
         raise RuntimeError(f"MediaPipe background removal failed: {e}")
 
 
+
 def _remove_with_ffmpeg_portrait(
     input_path: str,
     output_path: str,
     replacement: str = "blur",
     replacement_value: str = "",
 ) -> str:
-    """Fallback: use FFmpeg's colorkey + chromakey for basic background removal.
+    """Fallback: basic FFmpeg-only background blur.
 
-    This is a crude approximation. For best results, install mediapipe + opencv-python.
+    Uses a strong gaussian blur as a crude background replacement.
+    For proper person segmentation (color/image replacement), install:
+      pip install mediapipe opencv-python
     """
     ffmpeg = "ffmpeg"
 
-    # Use a simple chromakey-based approach with a neutral background
-    # This won't work well for most real videos but provides a fallback
-    if replacement == "color":
+    if replacement == "blur":
+        filter_complex = "gblur=sigma=30"
+    elif replacement == "color":
         color = replacement_value or "00FF00"
-        filter_complex = f"colorkey=0x{color}:0.3:0.1,chromakey=0x{color}:0.3:0.1"
-    elif replacement == "blur":
-        filter_complex = "gblur=sigma=20:enable='gt(scene,0.01)'"
+        filter_complex = (
+            f"split[fg][bg];"
+            f"[bg]colorkey=0x{color}:0.3:0.1[bg_key];"
+            f"[fg][bg_key]overlay"
+        )
+    elif replacement == "image" and replacement_value:
+        escaped = replacement_value.replace("\\", "/").replace(":", "\\:")
+        filter_complex = (
+            f"movie='{escaped}':loop=0,scale=iw:ih[bg];"
+            f"[0:v][bg]overlay=0:0:shortest=1"
+        )
     else:
         filter_complex = "null"
 
     if filter_complex == "null":
-        # No-op, copy input to output
         cmd = [ffmpeg, "-y", "-i", input_path, "-c", "copy", output_path]
     else:
         cmd = [
@@ -215,5 +225,8 @@ def _remove_with_ffmpeg_portrait(
     if result.returncode != 0:
         raise RuntimeError(f"FFmpeg background removal failed: {result.stderr[-500:]}")
 
-    logger.info("FFmpeg portait background removal completed -> %s", output_path)
+    logger.warning(
+        "FFmpeg fallback background removal used (no MediaPipe). "
+        "Install 'mediapipe' and 'opencv-python' for proper person segmentation."
+    )
     return output_path
diff --git a/backend/services/video_editor.py b/backend/services/video_editor.py
index 00a7d31..7a12a61 100644
--- a/backend/services/video_editor.py
+++ b/backend/services/video_editor.py
@@ -45,6 +45,24 @@ def _input_has_video_stream(ffmpeg_cmd: str, input_path: str) -> bool:
         return False
 
 
+def _input_has_audio_stream(ffmpeg_cmd: str, input_path: str) -> bool:
+    """Return True if the input contains at least one audio stream."""
+    ffprobe = ffmpeg_cmd.replace("ffmpeg", "ffprobe")
+    cmd = [
+        ffprobe,
+        "-v", "error",
+        "-select_streams", "a:0",
+        "-show_entries", "stream=index",
+        "-of", "csv=p=0",
+        str(input_path),
+    ]
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True)
+        return result.returncode == 0 and bool(result.stdout.strip())
+    except Exception:
+        return False
+
+
 def _clamp_speed(speed: float) -> float:
     return max(0.25, min(4.0, float(speed)))
 
@@ -144,39 +162,65 @@ def mix_background_music(
     ducking_release_ms: float = 200.0,
 ) -> str:
     """Mix background music into a video with optional ducking.
-    
-    Uses FFmpeg amix + sidechaincompress. Output is written to output_path.
+
+    Uses FFmpeg amix + sidechaincompress. If the input has no audio,
+    the music track becomes the sole audio track. Output is written to output_path.
     """
     ffmpeg = _find_ffmpeg()
     escaped_music = music_path.replace("\\", "/").replace(":", "\\:")
-    
-    # Build the filter graph
-    if ducking_enabled:
+    has_audio_result = _input_has_audio_stream(ffmpeg, video_path)
+
+    if not has_audio_result:
+        cmd = [
+            ffmpeg, "-y",
+            "-i", video_path,
+            "-i", music_path,
+            "-map", "0:v",
+            "-map", "1:a",
+            "-c:v", "copy",
+            "-c:a", "aac", "-b:a", "192k",
+            "-shortest",
+            "-movflags", "+faststart",
+            output_path,
+        ]
+    elif ducking_enabled:
+        music_source = f"amovie='{escaped_music}',volume={volume_db}dB[music]"
         filter_complex = (
             f"[0:a]asplit[main][sidechain];"
-            f"movie='{escaped_music}':loop=0,volume={volume_db}dB[music];"
+            f"{music_source};"
             f"[main][music]amix=inputs=2:duration=first:dropout_transition=2[mixed];"
             f"[mixed][sidechain]sidechaincompress="
-            f"threshold=-30dB:ratio=100:attack={ducking_attack_ms}ms:"
-            f"release={ducking_release_ms}ms:makeup=1:level_sc={ducking_db}[outa]"
+            f"threshold=-30dB:ratio=20:attack={ducking_attack_ms / 1000}:"
+            f"release={ducking_release_ms / 1000}:makeup=1:level_sc={ducking_db}[outa]"
         )
+        cmd = [
+            ffmpeg, "-y",
+            "-i", video_path,
+            "-filter_complex", filter_complex,
+            "-map", "0:v",
+            "-map", "[outa]",
+            "-c:v", "copy",
+            "-c:a", "aac", "-b:a", "192k",
+            "-shortest",
+            output_path,
+        ]
     else:
+        music_source = f"amovie='{escaped_music}',volume={volume_db}dB[music]"
         filter_complex = (
-            f"movie='{escaped_music}':loop=0,volume={volume_db}dB[music];"
+            f"{music_source};"
             f"[0:a][music]amix=inputs=2:duration=first:dropout_transition=2[outa]"
         )
-    
-    cmd = [
-        ffmpeg, "-y",
-        "-i", video_path,
-        "-filter_complex", filter_complex,
-        "-map", "0:v",
-        "-map", "[outa]",
-        "-c:v", "copy",
-        "-c:a", "aac", "-b:a", "192k",
-        "-shortest",
-        output_path,
-    ]
+        cmd = [
+            ffmpeg, "-y",
+            "-i", video_path,
+            "-filter_complex", filter_complex,
+            "-map", "0:v",
+            "-map", "[outa]",
+            "-c:v", "copy",
+            "-c:a", "aac", "-b:a", "192k",
+            "-shortest",
+            output_path,
+        ]
     
     result = subprocess.run(cmd, capture_output=True, text=True)
     if result.returncode != 0:
@@ -191,28 +235,29 @@ def concat_clips(
     output_path: str,
 ) -> str:
     """Concatenate multiple video clips using FFmpeg concat demuxer.
-    
+
     The main_path is kept as-is. append_paths are appended after it.
     """
     if not append_paths:
         raise ValueError("No clips to concatenate")
-    
+
     ffmpeg = _find_ffmpeg()
-    import tempfile
-    import os
-    
+    resolved_main = str(Path(main_path).resolve())
+
+    # If output_path collides with an input, write to temp first
+    all_inputs = [resolved_main] + [str(Path(p).resolve()) for p in append_paths]
+    needs_rename = str(Path(output_path).resolve()) in all_inputs
+    final_output = output_path
+    if needs_rename:
+        final_output = output_path + ".concat_tmp.mp4"
+
     temp_dir = tempfile.mkdtemp(prefix="aive_concat_")
     try:
-        segment_files = [main_path]
-        segment_files.extend(append_paths)
-        
-        # Create concat file list
         concat_file = os.path.join(temp_dir, "concat.txt")
         with open(concat_file, "w") as f:
-            for path in segment_files:
-                resolved = os.path.abspath(path)
-                f.write(f"file '{resolved}'\n")
-        
+            for path in all_inputs:
+                f.write(f"file '{path}'\n")
+
         cmd = [
             ffmpeg, "-y",
             "-f", "concat",
@@ -220,13 +265,16 @@ def concat_clips(
             "-i", concat_file,
             "-c", "copy",
             "-movflags", "+faststart",
-            output_path,
+            final_output,
         ]
-        
+
         result = subprocess.run(cmd, capture_output=True, text=True)
         if result.returncode != 0:
             raise RuntimeError(f"Clip concat failed: {result.stderr[-500:]}")
-        
+
+        if needs_rename:
+            os.replace(final_output, output_path)
+
         return output_path
     finally:
         for f in os.listdir(temp_dir):
@@ -570,11 +618,9 @@ def export_reencode(
 
     # Apply zoom post-processing if configured
     if zoom_config and zoom_config.get("enabled") and has_video:
-        import tempfile as _tf
-        import os as _os
         zoomed_path = output_path + ".zoomed.mp4"
         _apply_zoom_post(output_path, zoomed_path, zoom_config)
-        _os.replace(zoomed_path, output_path)
+        os.replace(zoomed_path, output_path)
         logger.info("Zoom/punch-in applied to %s (factor=%s)", output_path, zoom_config.get("zoomFactor", 1.0))
 
     return output_path
@@ -737,11 +783,9 @@ def export_reencode_with_subs(
 
     # Apply zoom post-processing if configured
     if zoom_config and zoom_config.get("enabled"):
-        import tempfile as _tf
-        import os as _os
         zoomed_path = output_path + ".zoomed.mp4"
         _apply_zoom_post(output_path, zoomed_path, zoom_config)
-        _os.replace(zoomed_path, output_path)
+        os.replace(zoomed_path, output_path)
         logger.info("Zoom/punch-in applied to %s (factor=%s)", output_path, zoom_config.get("zoomFactor", 1.0))
 
     return output_path
diff --git a/frontend/src/components/AppendClipPanel.tsx b/frontend/src/components/AppendClipPanel.tsx
index a40eb84..edec663 100644
--- a/frontend/src/components/AppendClipPanel.tsx
+++ b/frontend/src/components/AppendClipPanel.tsx
@@ -5,7 +5,12 @@ export default function AppendClipPanel() {
   const { additionalClips, addAdditionalClip, removeAdditionalClip, reorderAdditionalClip, videoPath } = useEditorStore();
 
   const handleAddClip = async () => {
-    const path = await window.electronAPI?.openFile();
+    const path = await window.electronAPI?.openFile({
+      filters: [
+        { name: 'Video Files', extensions: ['mp4', 'mkv', 'mov', 'avi', 'webm'] },
+        { name: 'All Files', extensions: ['*'] },
+      ],
+    });
     if (path) {
       addAdditionalClip(path);
     }
diff --git a/frontend/src/components/BackgroundMusicPanel.tsx b/frontend/src/components/BackgroundMusicPanel.tsx
index 88da7fb..0f360ba 100644
--- a/frontend/src/components/BackgroundMusicPanel.tsx
+++ b/frontend/src/components/BackgroundMusicPanel.tsx
@@ -5,7 +5,12 @@ export default function BackgroundMusicPanel() {
   const { backgroundMusic, setBackgroundMusic, updateBackgroundMusic } = useEditorStore();
 
   const handleLoadMusic = async () => {
-    const path = await window.electronAPI?.openFile();
+    const path = await window.electronAPI?.openFile({
+      filters: [
+        { name: 'Audio Files', extensions: ['mp3', 'wav', 'm4a', 'flac', 'ogg', 'aac', 'wma'] },
+        { name: 'All Files', extensions: ['*'] },
+      ],
+    });
     if (path) {
       setBackgroundMusic({
         path,
diff --git a/frontend/src/components/ExportDialog.tsx b/frontend/src/components/ExportDialog.tsx
index 95ac0ad..66d4e73 100644
--- a/frontend/src/components/ExportDialog.tsx
+++ b/frontend/src/components/ExportDialog.tsx
@@ -9,6 +9,19 @@ export default function ExportDialog() {
 
   const hasCuts = cutRanges.length > 0;
 
+  // Compute set of deleted word indices from cutRanges
+  const getDeletedSet = useCallback(() => {
+    const deletedSet = new Set<number>();
+    for (const range of cutRanges) {
+      for (let i = 0; i < words.length; i++) {
+        if (words[i].start >= range.start && words[i].end <= range.end) {
+          deletedSet.add(i);
+        }
+      }
+    }
+    return deletedSet;
+  }, [cutRanges, words]);
+
   // Detect if input is audio-only by its extension
   const audioExtensions = new Set(['.wav', '.mp3', '.flac', '.m4a', '.ogg', '.aac', '.wma']);
   const inputExt = videoPath ? '.' + videoPath.split('.').pop()?.toLowerCase() : '';
@@ -46,14 +59,7 @@ export default function ExportDialog() {
     setIsTranscribingTranscript(true);
     try {
       // Compute deleted word set
-      const deletedSet = new Set<number>();
-      for (const range of cutRanges) {
-        for (let i = 0; i < words.length; i++) {
-          if (words[i].start >= range.start && words[i].end <= range.end) {
-            deletedSet.add(i);
-          }
-        }
-      }
+      const deletedSet = getDeletedSet();
 
       // Generate content entirely on the frontend — no backend needed
       let content: string;
@@ -103,7 +109,7 @@ export default function ExportDialog() {
     } finally {
       setIsTranscribingTranscript(false);
     }
-  }, [videoPath, words, cutRanges, transcriptFormat]);
+  }, [videoPath, words, getDeletedSet, transcriptFormat]);
 
   const HANDLE_EXPORT_filters = useCallback(() => {
     const ext = options.format;
@@ -130,14 +136,7 @@ export default function ExportDialog() {
     setExportError(null);
     try {
       const keepSegments = getKeepSegments();
-
-      const deletedSet = new Set<number>();
-      for (const range of cutRanges) {
-        for (let i = 0; i < words.length; i++) {
-          const w = words[i];
-          if (w.start >= range.start && w.end <= range.end) deletedSet.add(i);
-        }
-      }
+      const deletedSet = getDeletedSet();
 
       // Map frontend camelCase gain/speed fields to backend snake_case
       const backendGainRanges = gainRanges.map((r) => ({
@@ -213,7 +212,7 @@ export default function ExportDialog() {
       setExportError(err instanceof Error ? err.message : 'Export failed');
       setExporting(false);
     }
-  }, [videoPath, options, backendUrl, setExporting, getKeepSegments, cutRanges, muteRanges, gainRanges, speedRanges, globalGainDb, words, HANDLE_EXPORT_filters, additionalClips, backgroundMusic]);
+  }, [videoPath, options, backendUrl, setExporting, getKeepSegments, getDeletedSet, muteRanges, gainRanges, speedRanges, globalGainDb, words, HANDLE_EXPORT_filters, additionalClips, backgroundMusic]);
 
   return (
     <div className="p-4 space-y-5">
diff --git a/frontend/src/components/WaveformTimeline.tsx b/frontend/src/components/WaveformTimeline.tsx
index a594e49..85972e6 100644
--- a/frontend/src/components/WaveformTimeline.tsx
+++ b/frontend/src/components/WaveformTimeline.tsx
@@ -287,10 +287,9 @@ export default function WaveformTimeline({
   const [showAdjustedTimeline, setShowAdjustedTimeline] = useState(false);
 
   const sourceDuration = duration || waveformDataRef.current?.duration || 0;
-  const timelineCutRanges = showAdjustedTimeline ? cutRanges : [];
   const { segments: timelineSegments, displayDuration } = useMemo(
-    () => buildTimelineSegments(sourceDuration, timelineCutRanges),
-    [sourceDuration, timelineCutRanges],
+    () => buildTimelineSegments(sourceDuration, showAdjustedTimeline ? cutRanges : []),
+    [sourceDuration, cutRanges, showAdjustedTimeline],
   );
 
   useEffect(() => {
@@ -687,7 +686,6 @@ export default function WaveformTimeline({
     gainMode,
     speedMode,
     selectedZone,
-    showAdjustedTimeline,
     markInTime,
     markOutTime,
     displayDuration,
@@ -696,6 +694,7 @@ export default function WaveformTimeline({
     showGainZones,
     showSpeedZones,
     timelineSegments,
+    timelineMarkers,
   ]);
 
   // Keep the ref in sync with the latest drawStaticWaveform closure