silence trimmer

2026-04-03 12:05:44 -06:00
parent 8a7c94d594
commit d80ff847d8
5 changed files with 284 additions and 4 deletions
--- a/backend/routers/audio.py
+++ b/backend/routers/audio.py
@ -11,7 +11,7 @@ from fastapi import APIRouter, HTTPException, Query
 from fastapi.responses import FileResponse
 from pydantic import BaseModel

-from services.audio_cleaner import clean_audio, is_deepfilter_available
+from services.audio_cleaner import clean_audio, detect_silence_ranges, is_deepfilter_available

 logger = logging.getLogger(__name__)
 router = APIRouter()
@ -25,6 +25,12 @@ class AudioCleanRequest(BaseModel):
    output_path: Optional[str] = None


+class SilenceDetectRequest(BaseModel):
+    input_path: str
+    min_silence_ms: int = 500
+    silence_db: float = -35.0
+
+
@router.post("/audio/clean")
 async def clean_audio_endpoint(req: AudioCleanRequest):
    try:
@ -46,6 +52,24 @@ async def audio_capabilities():
    }


+@router.post("/audio/detect-silence")
+async def detect_silence_endpoint(req: SilenceDetectRequest):
+    try:
+        ranges = detect_silence_ranges(
+            req.input_path,
+            req.min_silence_ms,
+            req.silence_db,
+        )
+        return {
+            "status": "ok",
+            "ranges": ranges,
+            "count": len(ranges),
+        }
+    except Exception as e:
+        logger.error(f"Silence detection failed: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail=str(e))
+
+
@router.get("/audio/waveform")
 async def get_waveform_audio(path: str = Query(...)):
    """
--- a/backend/services/audio_cleaner.py
+++ b/backend/services/audio_cleaner.py
@ -4,6 +4,7 @@ Falls back to a basic FFmpeg noise filter if DeepFilterNet is not installed.
 """

 import logging
+import re
 import subprocess
 import tempfile
 from pathlib import Path
@ -77,3 +78,54 @@ def _clean_with_ffmpeg(input_path: str, output_path: str) -> str:

 def is_deepfilter_available() -> bool:
    return DEEPFILTER_AVAILABLE
+
+
+def detect_silence_ranges(input_path: str, min_silence_ms: int, silence_db: float):
+    """Detect silence ranges using ffmpeg silencedetect.
+
+    Returns a list of dicts: {start, end, duration} in seconds.
+    """
+    min_silence_seconds = max(0.05, float(min_silence_ms) / 1000.0)
+    noise_threshold = float(silence_db)
+
+    cmd = [
+        "ffmpeg",
+        "-hide_banner",
+        "-i",
+        input_path,
+        "-af",
+        f"silencedetect=noise={noise_threshold}dB:d={min_silence_seconds}",
+        "-f",
+        "null",
+        "-",
+    ]
+    result = subprocess.run(cmd, capture_output=True, text=True)
+
+    # silencedetect prints to stderr even on success.
+    output = result.stderr or ""
+    start_pat = re.compile(r"silence_start:\s*([0-9.]+)")
+    end_pat = re.compile(r"silence_end:\s*([0-9.]+)\s*\|\s*silence_duration:\s*([0-9.]+)")
+
+    starts = [float(m.group(1)) for m in start_pat.finditer(output)]
+    ends = [(float(m.group(1)), float(m.group(2))) for m in end_pat.finditer(output)]
+
+    ranges = []
+    pair_count = min(len(starts), len(ends))
+    for i in range(pair_count):
+        start = max(0.0, starts[i])
+        end, duration = ends[i]
+        if end > start and duration >= min_silence_seconds:
+            ranges.append({
+                "start": round(start, 3),
+                "end": round(end, 3),
+                "duration": round(duration, 3),
+            })
+
+    logger.info(
+        "Detected %s silence ranges in %s (min=%sms, threshold=%sdB)",
+        len(ranges),
+        input_path,
+        min_silence_ms,
+        silence_db,
+    )
+    return ranges