From d80ff847d81e5310dac29046eb19759c87f995c6 Mon Sep 17 00:00:00 2001
From: dillonj <dilljens@gmail.com>
Date: Fri, 3 Apr 2026 12:05:44 -0600
Subject: [PATCH] silence trimmer

---
 FEATURES.md                                   |  11 +-
 backend/routers/audio.py                      |  26 ++-
 backend/services/audio_cleaner.py             |  52 +++++
 frontend/src/App.tsx                          |  15 +-
 .../src/components/SilenceTrimmerPanel.tsx    | 184 ++++++++++++++++++
 5 files changed, 284 insertions(+), 4 deletions(-)
 create mode 100644 frontend/src/components/SilenceTrimmerPanel.tsx

diff --git a/FEATURES.md b/FEATURES.md
index 0e70f74..667748d 100644
--- a/FEATURES.md
+++ b/FEATURES.md
@@ -6,9 +6,15 @@ Features are grouped by priority. Check off items as they are implemented.
 
 ## 🔴 High Priority — Core editing gaps
 
-- [ ] **Cut / Mute sections** — select a time range and choose to cut (remove entirely) or mute (silence audio while video continues). Cut sections show as red overlays, mute sections as transparent blue overlays on the timeline over the transcript text and audio waveform. Backend: `ffmpeg -af volume=0` for mute, time-based cutting for removal.
+- [x] **Cut / Mute sections** — select a time range and choose to cut (remove entirely) or mute (silence audio while video continues). Cut sections show as red overlays, mute sections as transparent blue overlays on the timeline over the transcript text and audio waveform. Backend: `ffmpeg -af volume=0` for mute, time-based cutting for removal.
 
-- [ ] **Silence / pause trimmer** — detect and auto-remove pauses longer than X ms. One backend endpoint (`/audio/remove-silence`) + a button in the UI. Saves enormous time in podcast/interview editing.
+- [ ] **Silence / pause trimmer (in progress)** — detect pauses using min duration (ms) + amplitude threshold (dB), then apply detected pauses as cut ranges. Initial endpoint: `/audio/detect-silence`; UI includes filter controls and an "Apply As Cuts" action.
+
+- [ ] **Operation-level undo for batch actions** — explicit undo entry for actions like "Apply Silence Trim" so one shortcut/click reverts the whole operation, while still allowing normal fine-grained undo/redo steps.
+
+- [ ] **Grouped silence-trim zones (editable batch)** — when pauses are applied, tag them as a batch (`trim_group_id`) so the user can: (1) delete all zones from that auto-trim pass at once, and (2) still select/resize/delete individual zones independently.
+
+- [ ] **Edit silence-trim group settings after apply** — allow reopening a trim group and changing its detection settings (min pause ms, threshold dB, pre/post buffers), then reapplying updates to that group without affecting unrelated edits.
 
 - [ ] **Volume / gain control** — per-selection or global audio gain slider. Every editor has this. Descript users constantly complain it's missing. Backend: `ffmpeg -af volume=Xdb`.
 
@@ -84,3 +90,4 @@ These aren't features to build — they're things to make more visible in the UI
 - Multi-format input (MP4, MKV, MOV, AVI, WebM, M4A)
 - Keyboard shortcuts (Space, J/K/L, arrows, Ctrl+Z/Shift+Z, Ctrl+S, Ctrl+E)
 - Settings panel: AI provider config (Ollama, OpenAI, Claude)
+- Cut/mute range creation on timeline with draggable zone edits and Delete-to-remove
diff --git a/backend/routers/audio.py b/backend/routers/audio.py
index 381552f..423ac0d 100644
--- a/backend/routers/audio.py
+++ b/backend/routers/audio.py
@@ -11,7 +11,7 @@ from fastapi import APIRouter, HTTPException, Query
 from fastapi.responses import FileResponse
 from pydantic import BaseModel
 
-from services.audio_cleaner import clean_audio, is_deepfilter_available
+from services.audio_cleaner import clean_audio, detect_silence_ranges, is_deepfilter_available
 
 logger = logging.getLogger(__name__)
 router = APIRouter()
@@ -25,6 +25,12 @@ class AudioCleanRequest(BaseModel):
     output_path: Optional[str] = None
 
 
+class SilenceDetectRequest(BaseModel):
+    input_path: str
+    min_silence_ms: int = 500
+    silence_db: float = -35.0
+
+
 @router.post("/audio/clean")
 async def clean_audio_endpoint(req: AudioCleanRequest):
     try:
@@ -46,6 +52,24 @@ async def audio_capabilities():
     }
 
 
+@router.post("/audio/detect-silence")
+async def detect_silence_endpoint(req: SilenceDetectRequest):
+    try:
+        ranges = detect_silence_ranges(
+            req.input_path,
+            req.min_silence_ms,
+            req.silence_db,
+        )
+        return {
+            "status": "ok",
+            "ranges": ranges,
+            "count": len(ranges),
+        }
+    except Exception as e:
+        logger.error(f"Silence detection failed: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail=str(e))
+
+
 @router.get("/audio/waveform")
 async def get_waveform_audio(path: str = Query(...)):
     """
diff --git a/backend/services/audio_cleaner.py b/backend/services/audio_cleaner.py
index 6e708d7..97d75d5 100644
--- a/backend/services/audio_cleaner.py
+++ b/backend/services/audio_cleaner.py
@@ -4,6 +4,7 @@ Falls back to a basic FFmpeg noise filter if DeepFilterNet is not installed.
 """
 
 import logging
+import re
 import subprocess
 import tempfile
 from pathlib import Path
@@ -77,3 +78,54 @@ def _clean_with_ffmpeg(input_path: str, output_path: str) -> str:
 
 def is_deepfilter_available() -> bool:
     return DEEPFILTER_AVAILABLE
+
+
+def detect_silence_ranges(input_path: str, min_silence_ms: int, silence_db: float):
+    """Detect silence ranges using ffmpeg silencedetect.
+
+    Returns a list of dicts: {start, end, duration} in seconds.
+    """
+    min_silence_seconds = max(0.05, float(min_silence_ms) / 1000.0)
+    noise_threshold = float(silence_db)
+
+    cmd = [
+        "ffmpeg",
+        "-hide_banner",
+        "-i",
+        input_path,
+        "-af",
+        f"silencedetect=noise={noise_threshold}dB:d={min_silence_seconds}",
+        "-f",
+        "null",
+        "-",
+    ]
+    result = subprocess.run(cmd, capture_output=True, text=True)
+
+    # silencedetect prints to stderr even on success.
+    output = result.stderr or ""
+    start_pat = re.compile(r"silence_start:\s*([0-9.]+)")
+    end_pat = re.compile(r"silence_end:\s*([0-9.]+)\s*\|\s*silence_duration:\s*([0-9.]+)")
+
+    starts = [float(m.group(1)) for m in start_pat.finditer(output)]
+    ends = [(float(m.group(1)), float(m.group(2))) for m in end_pat.finditer(output)]
+
+    ranges = []
+    pair_count = min(len(starts), len(ends))
+    for i in range(pair_count):
+        start = max(0.0, starts[i])
+        end, duration = ends[i]
+        if end > start and duration >= min_silence_seconds:
+            ranges.append({
+                "start": round(start, 3),
+                "end": round(end, 3),
+                "duration": round(duration, 3),
+            })
+
+    logger.info(
+        "Detected %s silence ranges in %s (min=%sms, threshold=%sdB)",
+        len(ranges),
+        input_path,
+        min_silence_ms,
+        silence_db,
+    )
+    return ranges
diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx
index 86d7cf4..76feaf8 100644
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@@ -7,6 +7,7 @@ import AIPanel from './components/AIPanel';
 import ExportDialog from './components/ExportDialog';
 import SettingsPanel from './components/SettingsPanel';
 import DevPanel from './components/DevPanel';
+import SilenceTrimmerPanel from './components/SilenceTrimmerPanel';
 import { useKeyboardShortcuts } from './hooks/useKeyboardShortcuts';
 import {
   Film,
@@ -23,7 +24,7 @@ import {
 
 const IS_ELECTRON = !!window.electronAPI;
 
-type Panel = 'ai' | 'settings' | 'export' | null;
+type Panel = 'ai' | 'settings' | 'export' | 'silence' | null;
 
 export default function App() {
   const {
@@ -166,6 +167,10 @@ export default function App() {
     }
   };
 
+  const togglePanel = (panel: Panel) => {
+    setActivePanel((prev) => (prev === panel ? null : panel));
+  };
+
   const handleCut = () => {
     if (selectedWordIndices.length > 0) {
       // If words are selected, apply cut immediately
@@ -337,6 +342,13 @@ export default function App() {
             onClick={handleMute}
             active={muteMode}
           />
+          <ToolbarButton
+            icon={<span className="text-[10px] font-semibold">PA</span>}
+            label="Pause Trim"
+            active={activePanel === 'silence'}
+            onClick={() => togglePanel('silence')}
+            disabled={!videoPath}
+          />
           <ToolbarButton
             icon={<Sparkles className="w-4 h-4" />}
             label="AI"
@@ -411,6 +423,7 @@ export default function App() {
         {/* Right panel (AI / Export / Settings) */}
         {activePanel && (
           <div className="w-80 border-l border-editor-border overflow-y-auto shrink-0">
+            {activePanel === 'silence' && <SilenceTrimmerPanel />}
             {activePanel === 'ai' && <AIPanel />}
             {activePanel === 'export' && <ExportDialog />}
             {activePanel === 'settings' && <SettingsPanel />}
diff --git a/frontend/src/components/SilenceTrimmerPanel.tsx b/frontend/src/components/SilenceTrimmerPanel.tsx
new file mode 100644
index 0000000..cc62d41
--- /dev/null
+++ b/frontend/src/components/SilenceTrimmerPanel.tsx
@@ -0,0 +1,184 @@
+import { useState } from 'react';
+import { useEditorStore } from '../store/editorStore';
+import { Loader2, Scissors } from 'lucide-react';
+
+type SilenceRange = {
+  start: number;
+  end: number;
+  duration: number;
+};
+
+export default function SilenceTrimmerPanel() {
+  const { videoPath, backendUrl, addCutRange, duration } = useEditorStore();
+  const [minSilenceMs, setMinSilenceMs] = useState(500);
+  const [silenceDb, setSilenceDb] = useState(-35);
+  const [preBufferMs, setPreBufferMs] = useState(80);
+  const [postBufferMs, setPostBufferMs] = useState(120);
+  const [isDetecting, setIsDetecting] = useState(false);
+  const [ranges, setRanges] = useState<SilenceRange[]>([]);
+
+  const detectSilence = async () => {
+    if (!videoPath) return;
+    setIsDetecting(true);
+    setRanges([]);
+    try {
+      const res = await fetch(`${backendUrl}/audio/detect-silence`, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({
+          input_path: videoPath,
+          min_silence_ms: minSilenceMs,
+          silence_db: silenceDb,
+        }),
+      });
+
+      if (!res.ok) {
+        let detail = `HTTP ${res.status} ${res.statusText}`;
+        try {
+          const err = await res.json();
+          if (err?.detail) detail += ` - ${String(err.detail)}`;
+        } catch {
+          // ignore JSON parse errors for non-JSON error responses
+        }
+        if (res.status === 404) {
+          detail += ' (endpoint missing: restart backend to load /audio/detect-silence)';
+        }
+        throw new Error(detail);
+      }
+
+      const data = await res.json();
+      setRanges(data.ranges || []);
+    } catch (err) {
+      console.error(err);
+      const message = err instanceof Error ? err.message : 'Unknown error';
+      alert(`Silence detection failed: ${message}`);
+    } finally {
+      setIsDetecting(false);
+    }
+  };
+
+  const applyAsCuts = () => {
+    const preBufferSeconds = preBufferMs / 1000;
+    const postBufferSeconds = postBufferMs / 1000;
+    const maxEnd = duration > 0 ? duration : Number.POSITIVE_INFINITY;
+
+    for (const r of ranges) {
+      // Positive buffers shrink the cut, negative buffers expand it.
+      const start = Math.max(0, r.start + preBufferSeconds);
+      const end = Math.min(maxEnd, r.end - postBufferSeconds);
+      if (end - start >= 0.01) {
+        addCutRange(start, end);
+      }
+    }
+  };
+
+  return (
+    <div className="p-4 space-y-4">
+      <div className="space-y-1">
+        <h3 className="text-sm font-semibold">Silence / Pause Trimmer</h3>
+        <p className="text-xs text-editor-text-muted">
+          Detect pauses and convert them into cut ranges.
+        </p>
+      </div>
+
+      <div className="space-y-3">
+        <div className="space-y-1.5">
+          <label className="text-[11px] text-editor-text-muted font-medium">
+            Minimum pause length (ms)
+          </label>
+          <input
+            type="number"
+            min={100}
+            step={50}
+            value={minSilenceMs}
+            onChange={(e) => setMinSilenceMs(Number(e.target.value) || 500)}
+            className="w-full px-2.5 py-1.5 text-xs bg-editor-surface border border-editor-border rounded focus:border-editor-accent focus:outline-none"
+          />
+        </div>
+
+        <div className="space-y-1.5">
+          <label className="text-[11px] text-editor-text-muted font-medium">
+            Silence threshold (dB)
+          </label>
+          <input
+            type="number"
+            min={-80}
+            max={0}
+            step={1}
+            value={silenceDb}
+            onChange={(e) => setSilenceDb(Number(e.target.value) || -35)}
+            className="w-full px-2.5 py-1.5 text-xs bg-editor-surface border border-editor-border rounded focus:border-editor-accent focus:outline-none"
+          />
+        </div>
+
+        <div className="grid grid-cols-2 gap-2">
+          <div className="space-y-1.5">
+            <label className="text-[11px] text-editor-text-muted font-medium">
+              Buffer before (ms, +shrink / -expand)
+            </label>
+            <input
+              type="number"
+              min={-5000}
+              max={5000}
+              step={10}
+              value={preBufferMs}
+              onChange={(e) => setPreBufferMs(Number(e.target.value) || 0)}
+              className="w-full px-2.5 py-1.5 text-xs bg-editor-surface border border-editor-border rounded focus:border-editor-accent focus:outline-none"
+            />
+          </div>
+          <div className="space-y-1.5">
+            <label className="text-[11px] text-editor-text-muted font-medium">
+              Buffer after (ms, +shrink / -expand)
+            </label>
+            <input
+              type="number"
+              min={-5000}
+              max={5000}
+              step={10}
+              value={postBufferMs}
+              onChange={(e) => setPostBufferMs(Number(e.target.value) || 0)}
+              className="w-full px-2.5 py-1.5 text-xs bg-editor-surface border border-editor-border rounded focus:border-editor-accent focus:outline-none"
+            />
+          </div>
+        </div>
+
+        <button
+          onClick={detectSilence}
+          disabled={isDetecting || !videoPath}
+          className="w-full flex items-center justify-center gap-2 px-4 py-2.5 bg-editor-accent hover:bg-editor-accent-hover disabled:opacity-50 rounded-lg text-sm font-medium transition-colors"
+        >
+          {isDetecting ? (
+            <>
+              <Loader2 className="w-4 h-4 animate-spin" />
+              Detecting pauses...
+            </>
+          ) : (
+            'Detect Pauses'
+          )}
+        </button>
+      </div>
+
+      {ranges.length > 0 && (
+        <div className="space-y-2">
+          <div className="flex items-center justify-between">
+            <span className="text-xs font-medium">Detected {ranges.length} pause ranges</span>
+            <button
+              onClick={applyAsCuts}
+              className="flex items-center gap-1 px-2 py-1 text-xs bg-editor-accent/20 text-editor-accent rounded hover:bg-editor-accent/30"
+            >
+              <Scissors className="w-3 h-3" />
+              Apply As Cuts
+            </button>
+          </div>
+          <div className="max-h-56 overflow-y-auto space-y-1 pr-1">
+            {ranges.slice(0, 50).map((r, i) => (
+              <div key={`${r.start}-${r.end}-${i}`} className="px-2 py-1.5 rounded bg-editor-surface border border-editor-border text-xs">
+                {r.start.toFixed(2)}s - {r.end.toFixed(2)}s ({r.duration.toFixed(2)}s)
+              </div>
+            ))}
+          </div>
+        </div>
+      )}
+    </div>
+  );
+}
\ No newline at end of file