silence trimmer

This commit is contained in:
2026-04-03 12:05:44 -06:00
parent 8a7c94d594
commit d80ff847d8
5 changed files with 284 additions and 4 deletions

View File

@ -6,9 +6,15 @@ Features are grouped by priority. Check off items as they are implemented.
## 🔴 High Priority — Core editing gaps
- [ ] **Cut / Mute sections** — select a time range and choose to cut (remove entirely) or mute (silence audio while video continues). Cut sections show as red overlays, mute sections as transparent blue overlays on the timeline over the transcript text and audio waveform. Backend: `ffmpeg -af volume=0` for mute, time-based cutting for removal.
- [x] **Cut / Mute sections** — select a time range and choose to cut (remove entirely) or mute (silence audio while video continues). Cut sections show as red overlays, mute sections as transparent blue overlays on the timeline over the transcript text and audio waveform. Backend: `ffmpeg -af volume=0` for mute, time-based cutting for removal.
- [ ] **Silence / pause trimmer** — detect and auto-remove pauses longer than X ms. One backend endpoint (`/audio/remove-silence`) + a button in the UI. Saves enormous time in podcast/interview editing.
- [ ] **Silence / pause trimmer (in progress)** — detect pauses using min duration (ms) + amplitude threshold (dB), then apply detected pauses as cut ranges. Initial endpoint: `/audio/detect-silence`; UI includes filter controls and an "Apply As Cuts" action.
- [ ] **Operation-level undo for batch actions** — explicit undo entry for actions like "Apply Silence Trim" so one shortcut/click reverts the whole operation, while still allowing normal fine-grained undo/redo steps.
- [ ] **Grouped silence-trim zones (editable batch)** — when pauses are applied, tag them as a batch (`trim_group_id`) so the user can: (1) delete all zones from that auto-trim pass at once, and (2) still select/resize/delete individual zones independently.
- [ ] **Edit silence-trim group settings after apply** — allow reopening a trim group and changing its detection settings (min pause ms, threshold dB, pre/post buffers), then reapplying updates to that group without affecting unrelated edits.
- [ ] **Volume / gain control** — per-selection or global audio gain slider. Every editor has this. Descript users constantly complain it's missing. Backend: `ffmpeg -af volume=Xdb`.
@ -84,3 +90,4 @@ These aren't features to build — they're things to make more visible in the UI
- Multi-format input (MP4, MKV, MOV, AVI, WebM, M4A)
- Keyboard shortcuts (Space, J/K/L, arrows, Ctrl+Z/Shift+Z, Ctrl+S, Ctrl+E)
- Settings panel: AI provider config (Ollama, OpenAI, Claude)
- Cut/mute range creation on timeline with draggable zone edits and Delete-to-remove

View File

@ -11,7 +11,7 @@ from fastapi import APIRouter, HTTPException, Query
from fastapi.responses import FileResponse
from pydantic import BaseModel
from services.audio_cleaner import clean_audio, is_deepfilter_available
from services.audio_cleaner import clean_audio, detect_silence_ranges, is_deepfilter_available
logger = logging.getLogger(__name__)
router = APIRouter()
@ -25,6 +25,12 @@ class AudioCleanRequest(BaseModel):
output_path: Optional[str] = None
class SilenceDetectRequest(BaseModel):
input_path: str
min_silence_ms: int = 500
silence_db: float = -35.0
@router.post("/audio/clean")
async def clean_audio_endpoint(req: AudioCleanRequest):
try:
@ -46,6 +52,24 @@ async def audio_capabilities():
}
@router.post("/audio/detect-silence")
async def detect_silence_endpoint(req: SilenceDetectRequest):
try:
ranges = detect_silence_ranges(
req.input_path,
req.min_silence_ms,
req.silence_db,
)
return {
"status": "ok",
"ranges": ranges,
"count": len(ranges),
}
except Exception as e:
logger.error(f"Silence detection failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.get("/audio/waveform")
async def get_waveform_audio(path: str = Query(...)):
"""

View File

@ -4,6 +4,7 @@ Falls back to a basic FFmpeg noise filter if DeepFilterNet is not installed.
"""
import logging
import re
import subprocess
import tempfile
from pathlib import Path
@ -77,3 +78,54 @@ def _clean_with_ffmpeg(input_path: str, output_path: str) -> str:
def is_deepfilter_available() -> bool:
return DEEPFILTER_AVAILABLE
def detect_silence_ranges(input_path: str, min_silence_ms: int, silence_db: float):
"""Detect silence ranges using ffmpeg silencedetect.
Returns a list of dicts: {start, end, duration} in seconds.
"""
min_silence_seconds = max(0.05, float(min_silence_ms) / 1000.0)
noise_threshold = float(silence_db)
cmd = [
"ffmpeg",
"-hide_banner",
"-i",
input_path,
"-af",
f"silencedetect=noise={noise_threshold}dB:d={min_silence_seconds}",
"-f",
"null",
"-",
]
result = subprocess.run(cmd, capture_output=True, text=True)
# silencedetect prints to stderr even on success.
output = result.stderr or ""
start_pat = re.compile(r"silence_start:\s*([0-9.]+)")
end_pat = re.compile(r"silence_end:\s*([0-9.]+)\s*\|\s*silence_duration:\s*([0-9.]+)")
starts = [float(m.group(1)) for m in start_pat.finditer(output)]
ends = [(float(m.group(1)), float(m.group(2))) for m in end_pat.finditer(output)]
ranges = []
pair_count = min(len(starts), len(ends))
for i in range(pair_count):
start = max(0.0, starts[i])
end, duration = ends[i]
if end > start and duration >= min_silence_seconds:
ranges.append({
"start": round(start, 3),
"end": round(end, 3),
"duration": round(duration, 3),
})
logger.info(
"Detected %s silence ranges in %s (min=%sms, threshold=%sdB)",
len(ranges),
input_path,
min_silence_ms,
silence_db,
)
return ranges

View File

@ -7,6 +7,7 @@ import AIPanel from './components/AIPanel';
import ExportDialog from './components/ExportDialog';
import SettingsPanel from './components/SettingsPanel';
import DevPanel from './components/DevPanel';
import SilenceTrimmerPanel from './components/SilenceTrimmerPanel';
import { useKeyboardShortcuts } from './hooks/useKeyboardShortcuts';
import {
Film,
@ -23,7 +24,7 @@ import {
const IS_ELECTRON = !!window.electronAPI;
type Panel = 'ai' | 'settings' | 'export' | null;
type Panel = 'ai' | 'settings' | 'export' | 'silence' | null;
export default function App() {
const {
@ -166,6 +167,10 @@ export default function App() {
}
};
const togglePanel = (panel: Panel) => {
setActivePanel((prev) => (prev === panel ? null : panel));
};
const handleCut = () => {
if (selectedWordIndices.length > 0) {
// If words are selected, apply cut immediately
@ -337,6 +342,13 @@ export default function App() {
onClick={handleMute}
active={muteMode}
/>
<ToolbarButton
icon={<span className="text-[10px] font-semibold">PA</span>}
label="Pause Trim"
active={activePanel === 'silence'}
onClick={() => togglePanel('silence')}
disabled={!videoPath}
/>
<ToolbarButton
icon={<Sparkles className="w-4 h-4" />}
label="AI"
@ -411,6 +423,7 @@ export default function App() {
{/* Right panel (AI / Export / Settings) */}
{activePanel && (
<div className="w-80 border-l border-editor-border overflow-y-auto shrink-0">
{activePanel === 'silence' && <SilenceTrimmerPanel />}
{activePanel === 'ai' && <AIPanel />}
{activePanel === 'export' && <ExportDialog />}
{activePanel === 'settings' && <SettingsPanel />}

View File

@ -0,0 +1,184 @@
import { useState } from 'react';
import { useEditorStore } from '../store/editorStore';
import { Loader2, Scissors } from 'lucide-react';
type SilenceRange = {
start: number;
end: number;
duration: number;
};
export default function SilenceTrimmerPanel() {
const { videoPath, backendUrl, addCutRange, duration } = useEditorStore();
const [minSilenceMs, setMinSilenceMs] = useState(500);
const [silenceDb, setSilenceDb] = useState(-35);
const [preBufferMs, setPreBufferMs] = useState(80);
const [postBufferMs, setPostBufferMs] = useState(120);
const [isDetecting, setIsDetecting] = useState(false);
const [ranges, setRanges] = useState<SilenceRange[]>([]);
const detectSilence = async () => {
if (!videoPath) return;
setIsDetecting(true);
setRanges([]);
try {
const res = await fetch(`${backendUrl}/audio/detect-silence`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
input_path: videoPath,
min_silence_ms: minSilenceMs,
silence_db: silenceDb,
}),
});
if (!res.ok) {
let detail = `HTTP ${res.status} ${res.statusText}`;
try {
const err = await res.json();
if (err?.detail) detail += ` - ${String(err.detail)}`;
} catch {
// ignore JSON parse errors for non-JSON error responses
}
if (res.status === 404) {
detail += ' (endpoint missing: restart backend to load /audio/detect-silence)';
}
throw new Error(detail);
}
const data = await res.json();
setRanges(data.ranges || []);
} catch (err) {
console.error(err);
const message = err instanceof Error ? err.message : 'Unknown error';
alert(`Silence detection failed: ${message}`);
} finally {
setIsDetecting(false);
}
};
const applyAsCuts = () => {
const preBufferSeconds = preBufferMs / 1000;
const postBufferSeconds = postBufferMs / 1000;
const maxEnd = duration > 0 ? duration : Number.POSITIVE_INFINITY;
for (const r of ranges) {
// Positive buffers shrink the cut, negative buffers expand it.
const start = Math.max(0, r.start + preBufferSeconds);
const end = Math.min(maxEnd, r.end - postBufferSeconds);
if (end - start >= 0.01) {
addCutRange(start, end);
}
}
};
return (
<div className="p-4 space-y-4">
<div className="space-y-1">
<h3 className="text-sm font-semibold">Silence / Pause Trimmer</h3>
<p className="text-xs text-editor-text-muted">
Detect pauses and convert them into cut ranges.
</p>
</div>
<div className="space-y-3">
<div className="space-y-1.5">
<label className="text-[11px] text-editor-text-muted font-medium">
Minimum pause length (ms)
</label>
<input
type="number"
min={100}
step={50}
value={minSilenceMs}
onChange={(e) => setMinSilenceMs(Number(e.target.value) || 500)}
className="w-full px-2.5 py-1.5 text-xs bg-editor-surface border border-editor-border rounded focus:border-editor-accent focus:outline-none"
/>
</div>
<div className="space-y-1.5">
<label className="text-[11px] text-editor-text-muted font-medium">
Silence threshold (dB)
</label>
<input
type="number"
min={-80}
max={0}
step={1}
value={silenceDb}
onChange={(e) => setSilenceDb(Number(e.target.value) || -35)}
className="w-full px-2.5 py-1.5 text-xs bg-editor-surface border border-editor-border rounded focus:border-editor-accent focus:outline-none"
/>
</div>
<div className="grid grid-cols-2 gap-2">
<div className="space-y-1.5">
<label className="text-[11px] text-editor-text-muted font-medium">
Buffer before (ms, +shrink / -expand)
</label>
<input
type="number"
min={-5000}
max={5000}
step={10}
value={preBufferMs}
onChange={(e) => setPreBufferMs(Number(e.target.value) || 0)}
className="w-full px-2.5 py-1.5 text-xs bg-editor-surface border border-editor-border rounded focus:border-editor-accent focus:outline-none"
/>
</div>
<div className="space-y-1.5">
<label className="text-[11px] text-editor-text-muted font-medium">
Buffer after (ms, +shrink / -expand)
</label>
<input
type="number"
min={-5000}
max={5000}
step={10}
value={postBufferMs}
onChange={(e) => setPostBufferMs(Number(e.target.value) || 0)}
className="w-full px-2.5 py-1.5 text-xs bg-editor-surface border border-editor-border rounded focus:border-editor-accent focus:outline-none"
/>
</div>
</div>
<button
onClick={detectSilence}
disabled={isDetecting || !videoPath}
className="w-full flex items-center justify-center gap-2 px-4 py-2.5 bg-editor-accent hover:bg-editor-accent-hover disabled:opacity-50 rounded-lg text-sm font-medium transition-colors"
>
{isDetecting ? (
<>
<Loader2 className="w-4 h-4 animate-spin" />
Detecting pauses...
</>
) : (
'Detect Pauses'
)}
</button>
</div>
{ranges.length > 0 && (
<div className="space-y-2">
<div className="flex items-center justify-between">
<span className="text-xs font-medium">Detected {ranges.length} pause ranges</span>
<button
onClick={applyAsCuts}
className="flex items-center gap-1 px-2 py-1 text-xs bg-editor-accent/20 text-editor-accent rounded hover:bg-editor-accent/30"
>
<Scissors className="w-3 h-3" />
Apply As Cuts
</button>
</div>
<div className="max-h-56 overflow-y-auto space-y-1 pr-1">
{ranges.slice(0, 50).map((r, i) => (
<div key={`${r.start}-${r.end}-${i}`} className="px-2 py-1.5 rounded bg-editor-surface border border-editor-border text-xs">
{r.start.toFixed(2)}s - {r.end.toFixed(2)}s ({r.duration.toFixed(2)}s)
</div>
))}
</div>
</div>
)}
</div>
);
}