Files
TalkEdit/backend/routers/audio.py

126 lines
4.2 KiB
Python
Raw Normal View History

"""Audio processing endpoint (noise reduction / Studio Sound)."""
2026-03-28 12:26:45 -06:00
import hashlib
import logging
2026-03-28 12:26:45 -06:00
import subprocess
import tempfile
from pathlib import Path
from typing import Optional
2026-03-28 12:26:45 -06:00
from fastapi import APIRouter, HTTPException, Query
from fastapi.responses import FileResponse
from pydantic import BaseModel
2026-04-03 12:05:44 -06:00
from services.audio_cleaner import clean_audio, detect_silence_ranges, is_deepfilter_available
logger = logging.getLogger(__name__)
router = APIRouter()
2026-03-28 12:26:45 -06:00
# Simple in-process cache: video path → extracted WAV path
_waveform_cache: dict[str, str] = {}
class AudioCleanRequest(BaseModel):
input_path: str
output_path: Optional[str] = None
2026-04-03 12:05:44 -06:00
class SilenceDetectRequest(BaseModel):
input_path: str
min_silence_ms: int = 500
silence_db: float = -35.0
@router.post("/audio/clean")
async def clean_audio_endpoint(req: AudioCleanRequest):
try:
output = clean_audio(req.input_path, req.output_path or "")
return {
"status": "ok",
"output_path": output,
"engine": "deepfilternet" if is_deepfilter_available() else "ffmpeg_anlmdn",
}
except Exception as e:
logger.error(f"Audio cleaning failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.get("/audio/capabilities")
async def audio_capabilities():
return {
"deepfilternet_available": is_deepfilter_available(),
}
2026-03-28 12:26:45 -06:00
2026-04-03 12:05:44 -06:00
@router.post("/audio/detect-silence")
async def detect_silence_endpoint(req: SilenceDetectRequest):
try:
ranges = detect_silence_ranges(
req.input_path,
req.min_silence_ms,
req.silence_db,
)
return {
"status": "ok",
"ranges": ranges,
"count": len(ranges),
}
except Exception as e:
logger.error(f"Silence detection failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
2026-03-28 12:26:45 -06:00
@router.get("/audio/waveform")
async def get_waveform_audio(path: str = Query(...)):
"""
Extract audio from any video/audio file and return it as a WAV.
The WAV is cached on disk for subsequent requests.
Uses FFmpeg directly so it works with MKV, MOV, AVI, MP4, etc.
"""
file_path = Path(path)
if not file_path.is_file():
logger.warning(f"[waveform] File not found: {path}")
raise HTTPException(status_code=404, detail=f"File not found: {path}")
# Cache key based on path + mtime so stale cache is auto-invalidated
mtime = file_path.stat().st_mtime
cache_key = hashlib.md5(f"{path}:{mtime}".encode()).hexdigest()
if cache_key in _waveform_cache:
cached = Path(_waveform_cache[cache_key])
if cached.exists():
logger.info(f"[waveform] Cache hit for {file_path.name}")
return FileResponse(str(cached), media_type="audio/wav")
else:
del _waveform_cache[cache_key]
logger.info(f"[waveform] Extracting audio from: {file_path.name}")
tmp_dir = tempfile.mkdtemp(prefix="talkedit_waveform_")
out_wav = Path(tmp_dir) / f"{cache_key}.wav"
# Downsample to mono 22050 Hz — enough for waveform drawing, small file
cmd = [
"ffmpeg", "-y",
"-i", str(file_path),
"-vn", # drop video
"-ac", "1", # mono
"-ar", "22050", # 22 kHz sample rate
"-acodec", "pcm_s16le", # 16-bit PCM WAV
str(out_wav),
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
logger.error(f"[waveform] FFmpeg failed for {file_path.name}: {result.stderr[-500:]}")
raise HTTPException(
status_code=500,
detail=f"Failed to extract audio: {result.stderr[-300:]}"
)
if not out_wav.exists() or out_wav.stat().st_size == 0:
logger.error(f"[waveform] FFmpeg produced empty WAV for {file_path.name}")
raise HTTPException(status_code=500, detail="Audio extraction produced empty file")
logger.info(f"[waveform] Extracted {out_wav.stat().st_size} bytes for {file_path.name}")
_waveform_cache[cache_key] = str(out_wav)
return FileResponse(str(out_wav), media_type="audio/wav")