From d80ff847d81e5310dac29046eb19759c87f995c6 Mon Sep 17 00:00:00 2001 From: dillonj Date: Fri, 3 Apr 2026 12:05:44 -0600 Subject: [PATCH] silence trimmer --- FEATURES.md | 11 +- backend/routers/audio.py | 26 ++- backend/services/audio_cleaner.py | 52 +++++ frontend/src/App.tsx | 15 +- .../src/components/SilenceTrimmerPanel.tsx | 184 ++++++++++++++++++ 5 files changed, 284 insertions(+), 4 deletions(-) create mode 100644 frontend/src/components/SilenceTrimmerPanel.tsx diff --git a/FEATURES.md b/FEATURES.md index 0e70f74..667748d 100644 --- a/FEATURES.md +++ b/FEATURES.md @@ -6,9 +6,15 @@ Features are grouped by priority. Check off items as they are implemented. ## 🔴 High Priority — Core editing gaps -- [ ] **Cut / Mute sections** — select a time range and choose to cut (remove entirely) or mute (silence audio while video continues). Cut sections show as red overlays, mute sections as transparent blue overlays on the timeline over the transcript text and audio waveform. Backend: `ffmpeg -af volume=0` for mute, time-based cutting for removal. +- [x] **Cut / Mute sections** — select a time range and choose to cut (remove entirely) or mute (silence audio while video continues). Cut sections show as red overlays, mute sections as transparent blue overlays on the timeline over the transcript text and audio waveform. Backend: `ffmpeg -af volume=0` for mute, time-based cutting for removal. -- [ ] **Silence / pause trimmer** — detect and auto-remove pauses longer than X ms. One backend endpoint (`/audio/remove-silence`) + a button in the UI. Saves enormous time in podcast/interview editing. +- [ ] **Silence / pause trimmer (in progress)** — detect pauses using min duration (ms) + amplitude threshold (dB), then apply detected pauses as cut ranges. Initial endpoint: `/audio/detect-silence`; UI includes filter controls and an "Apply As Cuts" action. + +- [ ] **Operation-level undo for batch actions** — explicit undo entry for actions like "Apply Silence Trim" so one shortcut/click reverts the whole operation, while still allowing normal fine-grained undo/redo steps. + +- [ ] **Grouped silence-trim zones (editable batch)** — when pauses are applied, tag them as a batch (`trim_group_id`) so the user can: (1) delete all zones from that auto-trim pass at once, and (2) still select/resize/delete individual zones independently. + +- [ ] **Edit silence-trim group settings after apply** — allow reopening a trim group and changing its detection settings (min pause ms, threshold dB, pre/post buffers), then reapplying updates to that group without affecting unrelated edits. - [ ] **Volume / gain control** — per-selection or global audio gain slider. Every editor has this. Descript users constantly complain it's missing. Backend: `ffmpeg -af volume=Xdb`. @@ -84,3 +90,4 @@ These aren't features to build — they're things to make more visible in the UI - Multi-format input (MP4, MKV, MOV, AVI, WebM, M4A) - Keyboard shortcuts (Space, J/K/L, arrows, Ctrl+Z/Shift+Z, Ctrl+S, Ctrl+E) - Settings panel: AI provider config (Ollama, OpenAI, Claude) +- Cut/mute range creation on timeline with draggable zone edits and Delete-to-remove diff --git a/backend/routers/audio.py b/backend/routers/audio.py index 381552f..423ac0d 100644 --- a/backend/routers/audio.py +++ b/backend/routers/audio.py @@ -11,7 +11,7 @@ from fastapi import APIRouter, HTTPException, Query from fastapi.responses import FileResponse from pydantic import BaseModel -from services.audio_cleaner import clean_audio, is_deepfilter_available +from services.audio_cleaner import clean_audio, detect_silence_ranges, is_deepfilter_available logger = logging.getLogger(__name__) router = APIRouter() @@ -25,6 +25,12 @@ class AudioCleanRequest(BaseModel): output_path: Optional[str] = None +class SilenceDetectRequest(BaseModel): + input_path: str + min_silence_ms: int = 500 + silence_db: float = -35.0 + + @router.post("/audio/clean") async def clean_audio_endpoint(req: AudioCleanRequest): try: @@ -46,6 +52,24 @@ async def audio_capabilities(): } +@router.post("/audio/detect-silence") +async def detect_silence_endpoint(req: SilenceDetectRequest): + try: + ranges = detect_silence_ranges( + req.input_path, + req.min_silence_ms, + req.silence_db, + ) + return { + "status": "ok", + "ranges": ranges, + "count": len(ranges), + } + except Exception as e: + logger.error(f"Silence detection failed: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=str(e)) + + @router.get("/audio/waveform") async def get_waveform_audio(path: str = Query(...)): """ diff --git a/backend/services/audio_cleaner.py b/backend/services/audio_cleaner.py index 6e708d7..97d75d5 100644 --- a/backend/services/audio_cleaner.py +++ b/backend/services/audio_cleaner.py @@ -4,6 +4,7 @@ Falls back to a basic FFmpeg noise filter if DeepFilterNet is not installed. """ import logging +import re import subprocess import tempfile from pathlib import Path @@ -77,3 +78,54 @@ def _clean_with_ffmpeg(input_path: str, output_path: str) -> str: def is_deepfilter_available() -> bool: return DEEPFILTER_AVAILABLE + + +def detect_silence_ranges(input_path: str, min_silence_ms: int, silence_db: float): + """Detect silence ranges using ffmpeg silencedetect. + + Returns a list of dicts: {start, end, duration} in seconds. + """ + min_silence_seconds = max(0.05, float(min_silence_ms) / 1000.0) + noise_threshold = float(silence_db) + + cmd = [ + "ffmpeg", + "-hide_banner", + "-i", + input_path, + "-af", + f"silencedetect=noise={noise_threshold}dB:d={min_silence_seconds}", + "-f", + "null", + "-", + ] + result = subprocess.run(cmd, capture_output=True, text=True) + + # silencedetect prints to stderr even on success. + output = result.stderr or "" + start_pat = re.compile(r"silence_start:\s*([0-9.]+)") + end_pat = re.compile(r"silence_end:\s*([0-9.]+)\s*\|\s*silence_duration:\s*([0-9.]+)") + + starts = [float(m.group(1)) for m in start_pat.finditer(output)] + ends = [(float(m.group(1)), float(m.group(2))) for m in end_pat.finditer(output)] + + ranges = [] + pair_count = min(len(starts), len(ends)) + for i in range(pair_count): + start = max(0.0, starts[i]) + end, duration = ends[i] + if end > start and duration >= min_silence_seconds: + ranges.append({ + "start": round(start, 3), + "end": round(end, 3), + "duration": round(duration, 3), + }) + + logger.info( + "Detected %s silence ranges in %s (min=%sms, threshold=%sdB)", + len(ranges), + input_path, + min_silence_ms, + silence_db, + ) + return ranges diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 86d7cf4..76feaf8 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -7,6 +7,7 @@ import AIPanel from './components/AIPanel'; import ExportDialog from './components/ExportDialog'; import SettingsPanel from './components/SettingsPanel'; import DevPanel from './components/DevPanel'; +import SilenceTrimmerPanel from './components/SilenceTrimmerPanel'; import { useKeyboardShortcuts } from './hooks/useKeyboardShortcuts'; import { Film, @@ -23,7 +24,7 @@ import { const IS_ELECTRON = !!window.electronAPI; -type Panel = 'ai' | 'settings' | 'export' | null; +type Panel = 'ai' | 'settings' | 'export' | 'silence' | null; export default function App() { const { @@ -166,6 +167,10 @@ export default function App() { } }; + const togglePanel = (panel: Panel) => { + setActivePanel((prev) => (prev === panel ? null : panel)); + }; + const handleCut = () => { if (selectedWordIndices.length > 0) { // If words are selected, apply cut immediately @@ -337,6 +342,13 @@ export default function App() { onClick={handleMute} active={muteMode} /> + PA} + label="Pause Trim" + active={activePanel === 'silence'} + onClick={() => togglePanel('silence')} + disabled={!videoPath} + /> } label="AI" @@ -411,6 +423,7 @@ export default function App() { {/* Right panel (AI / Export / Settings) */} {activePanel && (
+ {activePanel === 'silence' && } {activePanel === 'ai' && } {activePanel === 'export' && } {activePanel === 'settings' && } diff --git a/frontend/src/components/SilenceTrimmerPanel.tsx b/frontend/src/components/SilenceTrimmerPanel.tsx new file mode 100644 index 0000000..cc62d41 --- /dev/null +++ b/frontend/src/components/SilenceTrimmerPanel.tsx @@ -0,0 +1,184 @@ +import { useState } from 'react'; +import { useEditorStore } from '../store/editorStore'; +import { Loader2, Scissors } from 'lucide-react'; + +type SilenceRange = { + start: number; + end: number; + duration: number; +}; + +export default function SilenceTrimmerPanel() { + const { videoPath, backendUrl, addCutRange, duration } = useEditorStore(); + const [minSilenceMs, setMinSilenceMs] = useState(500); + const [silenceDb, setSilenceDb] = useState(-35); + const [preBufferMs, setPreBufferMs] = useState(80); + const [postBufferMs, setPostBufferMs] = useState(120); + const [isDetecting, setIsDetecting] = useState(false); + const [ranges, setRanges] = useState([]); + + const detectSilence = async () => { + if (!videoPath) return; + setIsDetecting(true); + setRanges([]); + try { + const res = await fetch(`${backendUrl}/audio/detect-silence`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + input_path: videoPath, + min_silence_ms: minSilenceMs, + silence_db: silenceDb, + }), + }); + + if (!res.ok) { + let detail = `HTTP ${res.status} ${res.statusText}`; + try { + const err = await res.json(); + if (err?.detail) detail += ` - ${String(err.detail)}`; + } catch { + // ignore JSON parse errors for non-JSON error responses + } + if (res.status === 404) { + detail += ' (endpoint missing: restart backend to load /audio/detect-silence)'; + } + throw new Error(detail); + } + + const data = await res.json(); + setRanges(data.ranges || []); + } catch (err) { + console.error(err); + const message = err instanceof Error ? err.message : 'Unknown error'; + alert(`Silence detection failed: ${message}`); + } finally { + setIsDetecting(false); + } + }; + + const applyAsCuts = () => { + const preBufferSeconds = preBufferMs / 1000; + const postBufferSeconds = postBufferMs / 1000; + const maxEnd = duration > 0 ? duration : Number.POSITIVE_INFINITY; + + for (const r of ranges) { + // Positive buffers shrink the cut, negative buffers expand it. + const start = Math.max(0, r.start + preBufferSeconds); + const end = Math.min(maxEnd, r.end - postBufferSeconds); + if (end - start >= 0.01) { + addCutRange(start, end); + } + } + }; + + return ( +
+
+

Silence / Pause Trimmer

+

+ Detect pauses and convert them into cut ranges. +

+
+ +
+
+ + setMinSilenceMs(Number(e.target.value) || 500)} + className="w-full px-2.5 py-1.5 text-xs bg-editor-surface border border-editor-border rounded focus:border-editor-accent focus:outline-none" + /> +
+ +
+ + setSilenceDb(Number(e.target.value) || -35)} + className="w-full px-2.5 py-1.5 text-xs bg-editor-surface border border-editor-border rounded focus:border-editor-accent focus:outline-none" + /> +
+ +
+
+ + setPreBufferMs(Number(e.target.value) || 0)} + className="w-full px-2.5 py-1.5 text-xs bg-editor-surface border border-editor-border rounded focus:border-editor-accent focus:outline-none" + /> +
+
+ + setPostBufferMs(Number(e.target.value) || 0)} + className="w-full px-2.5 py-1.5 text-xs bg-editor-surface border border-editor-border rounded focus:border-editor-accent focus:outline-none" + /> +
+
+ + +
+ + {ranges.length > 0 && ( +
+
+ Detected {ranges.length} pause ranges + +
+
+ {ranges.slice(0, 50).map((r, i) => ( +
+ {r.start.toFixed(2)}s - {r.end.toFixed(2)}s ({r.duration.toFixed(2)}s) +
+ ))} +
+
+ )} +
+ ); +} \ No newline at end of file