"""Audio processing endpoint (noise reduction / Studio Sound).""" import hashlib import logging import subprocess import tempfile from pathlib import Path from typing import Optional from fastapi import APIRouter, HTTPException, Query from fastapi.responses import FileResponse from pydantic import BaseModel from services.audio_cleaner import clean_audio, detect_silence_ranges, is_deepfilter_available logger = logging.getLogger(__name__) router = APIRouter() # Simple in-process cache: video path → extracted WAV path _waveform_cache: dict[str, str] = {} class AudioCleanRequest(BaseModel): input_path: str output_path: Optional[str] = None class SilenceDetectRequest(BaseModel): input_path: str min_silence_ms: int = 500 silence_db: float = -35.0 @router.post("/audio/clean") async def clean_audio_endpoint(req: AudioCleanRequest): try: output = clean_audio(req.input_path, req.output_path or "") return { "status": "ok", "output_path": output, "engine": "deepfilternet" if is_deepfilter_available() else "ffmpeg_anlmdn", } except Exception as e: logger.error(f"Audio cleaning failed: {e}", exc_info=True) raise HTTPException(status_code=500, detail=str(e)) @router.get("/audio/capabilities") async def audio_capabilities(): return { "deepfilternet_available": is_deepfilter_available(), } @router.post("/audio/detect-silence") async def detect_silence_endpoint(req: SilenceDetectRequest): try: ranges = detect_silence_ranges( req.input_path, req.min_silence_ms, req.silence_db, ) return { "status": "ok", "ranges": ranges, "count": len(ranges), } except Exception as e: logger.error(f"Silence detection failed: {e}", exc_info=True) raise HTTPException(status_code=500, detail=str(e)) @router.get("/audio/waveform") async def get_waveform_audio(path: str = Query(...)): """ Extract audio from any video/audio file and return it as a WAV. The WAV is cached on disk for subsequent requests. Uses FFmpeg directly so it works with MKV, MOV, AVI, MP4, etc. """ file_path = Path(path) if not file_path.is_file(): logger.warning(f"[waveform] File not found: {path}") raise HTTPException(status_code=404, detail=f"File not found: {path}") # Cache key based on path + mtime so stale cache is auto-invalidated mtime = file_path.stat().st_mtime cache_key = hashlib.md5(f"{path}:{mtime}".encode()).hexdigest() if cache_key in _waveform_cache: cached = Path(_waveform_cache[cache_key]) if cached.exists(): logger.info(f"[waveform] Cache hit for {file_path.name}") return FileResponse(str(cached), media_type="audio/wav") else: del _waveform_cache[cache_key] logger.info(f"[waveform] Extracting audio from: {file_path.name}") tmp_dir = tempfile.mkdtemp(prefix="talkedit_waveform_") out_wav = Path(tmp_dir) / f"{cache_key}.wav" # Downsample to mono 22050 Hz — enough for waveform drawing, small file cmd = [ "ffmpeg", "-y", "-i", str(file_path), "-vn", # drop video "-ac", "1", # mono "-ar", "22050", # 22 kHz sample rate "-acodec", "pcm_s16le", # 16-bit PCM WAV str(out_wav), ] result = subprocess.run(cmd, capture_output=True, text=True) if result.returncode != 0: logger.error(f"[waveform] FFmpeg failed for {file_path.name}: {result.stderr[-500:]}") raise HTTPException( status_code=500, detail=f"Failed to extract audio: {result.stderr[-300:]}" ) if not out_wav.exists() or out_wav.stat().st_size == 0: logger.error(f"[waveform] FFmpeg produced empty WAV for {file_path.name}") raise HTTPException(status_code=500, detail="Audio extraction produced empty file") logger.info(f"[waveform] Extracted {out_wav.stat().st_size} bytes for {file_path.name}") _waveform_cache[cache_key] = str(out_wav) return FileResponse(str(out_wav), media_type="audio/wav")