silence trimmer
This commit is contained in:
@ -11,7 +11,7 @@ from fastapi import APIRouter, HTTPException, Query
|
||||
from fastapi.responses import FileResponse
|
||||
from pydantic import BaseModel
|
||||
|
||||
from services.audio_cleaner import clean_audio, is_deepfilter_available
|
||||
from services.audio_cleaner import clean_audio, detect_silence_ranges, is_deepfilter_available
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter()
|
||||
@ -25,6 +25,12 @@ class AudioCleanRequest(BaseModel):
|
||||
output_path: Optional[str] = None
|
||||
|
||||
|
||||
class SilenceDetectRequest(BaseModel):
|
||||
input_path: str
|
||||
min_silence_ms: int = 500
|
||||
silence_db: float = -35.0
|
||||
|
||||
|
||||
@router.post("/audio/clean")
|
||||
async def clean_audio_endpoint(req: AudioCleanRequest):
|
||||
try:
|
||||
@ -46,6 +52,24 @@ async def audio_capabilities():
|
||||
}
|
||||
|
||||
|
||||
@router.post("/audio/detect-silence")
|
||||
async def detect_silence_endpoint(req: SilenceDetectRequest):
|
||||
try:
|
||||
ranges = detect_silence_ranges(
|
||||
req.input_path,
|
||||
req.min_silence_ms,
|
||||
req.silence_db,
|
||||
)
|
||||
return {
|
||||
"status": "ok",
|
||||
"ranges": ranges,
|
||||
"count": len(ranges),
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Silence detection failed: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/audio/waveform")
|
||||
async def get_waveform_audio(path: str = Query(...)):
|
||||
"""
|
||||
|
||||
@ -4,6 +4,7 @@ Falls back to a basic FFmpeg noise filter if DeepFilterNet is not installed.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
import subprocess
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
@ -77,3 +78,54 @@ def _clean_with_ffmpeg(input_path: str, output_path: str) -> str:
|
||||
|
||||
def is_deepfilter_available() -> bool:
|
||||
return DEEPFILTER_AVAILABLE
|
||||
|
||||
|
||||
def detect_silence_ranges(input_path: str, min_silence_ms: int, silence_db: float):
|
||||
"""Detect silence ranges using ffmpeg silencedetect.
|
||||
|
||||
Returns a list of dicts: {start, end, duration} in seconds.
|
||||
"""
|
||||
min_silence_seconds = max(0.05, float(min_silence_ms) / 1000.0)
|
||||
noise_threshold = float(silence_db)
|
||||
|
||||
cmd = [
|
||||
"ffmpeg",
|
||||
"-hide_banner",
|
||||
"-i",
|
||||
input_path,
|
||||
"-af",
|
||||
f"silencedetect=noise={noise_threshold}dB:d={min_silence_seconds}",
|
||||
"-f",
|
||||
"null",
|
||||
"-",
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
|
||||
# silencedetect prints to stderr even on success.
|
||||
output = result.stderr or ""
|
||||
start_pat = re.compile(r"silence_start:\s*([0-9.]+)")
|
||||
end_pat = re.compile(r"silence_end:\s*([0-9.]+)\s*\|\s*silence_duration:\s*([0-9.]+)")
|
||||
|
||||
starts = [float(m.group(1)) for m in start_pat.finditer(output)]
|
||||
ends = [(float(m.group(1)), float(m.group(2))) for m in end_pat.finditer(output)]
|
||||
|
||||
ranges = []
|
||||
pair_count = min(len(starts), len(ends))
|
||||
for i in range(pair_count):
|
||||
start = max(0.0, starts[i])
|
||||
end, duration = ends[i]
|
||||
if end > start and duration >= min_silence_seconds:
|
||||
ranges.append({
|
||||
"start": round(start, 3),
|
||||
"end": round(end, 3),
|
||||
"duration": round(duration, 3),
|
||||
})
|
||||
|
||||
logger.info(
|
||||
"Detected %s silence ranges in %s (min=%sms, threshold=%sdB)",
|
||||
len(ranges),
|
||||
input_path,
|
||||
min_silence_ms,
|
||||
silence_db,
|
||||
)
|
||||
return ranges
|
||||
|
||||
Reference in New Issue
Block a user