TalkEdit/backend/routers/audio.py

"""Audio processing endpoint (noise reduction / Studio Sound)."""

import hashlib
import logging
import subprocess
import tempfile
from pathlib import Path
from typing import Optional

from fastapi import APIRouter, HTTPException, Query, Request
from fastapi.responses import FileResponse
from pydantic import BaseModel

from services.audio_cleaner import clean_audio, detect_silence_ranges, is_deepfilter_available, normalize_audio

logger = logging.getLogger(__name__)
router = APIRouter()

# Simple in-process cache: video path → extracted WAV path
_waveform_cache: dict[str, str] = {}


class AudioCleanRequest(BaseModel):
    input_path: str
    output_path: Optional[str] = None


class SilenceDetectRequest(BaseModel):
    input_path: str
    min_silence_ms: int = 500
    silence_db: float = -35.0


@router.post("/audio/clean")
async def clean_audio_endpoint(req: AudioCleanRequest):
    try:
        output = clean_audio(req.input_path, req.output_path or "")
        return {
            "status": "ok",
            "output_path": output,
            "engine": "deepfilternet" if is_deepfilter_available() else "ffmpeg_anlmdn",
        }
    except Exception as e:
        logger.error(f"Audio cleaning failed: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))


@router.get("/audio/capabilities")
async def audio_capabilities():
    return {
        "deepfilternet_available": is_deepfilter_available(),
    }


@router.post("/audio/detect-silence")
async def detect_silence_endpoint(req: SilenceDetectRequest):
    try:
        ranges = detect_silence_ranges(
            req.input_path,
            req.min_silence_ms,
            req.silence_db,
        )
        return {
            "status": "ok",
            "ranges": ranges,
            "count": len(ranges),
        }
    except Exception as e:
        logger.error(f"Silence detection failed: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))


@router.get("/audio/waveform")
async def get_waveform_audio(request: Request, path: str = Query(...)):
    """
    Extract audio from any video/audio file and return it as a WAV.
    The WAV is cached on disk for subsequent requests.
    Uses FFmpeg directly so it works with MKV, MOV, AVI, MP4, etc.
    """
    req_id = hashlib.md5(f"{path}:{request.url}".encode()).hexdigest()[:10]
    file_path = Path(path)
    logger.info(
        "[waveform:%s] request raw_url=%s raw_query=%s decoded_path=%r path_len=%s",
        req_id,
        str(request.url),
        request.url.query,
        path,
        len(path),
    )

    try:
        resolved_path = file_path.expanduser().resolve(strict=False)
    except Exception:
        resolved_path = file_path

    logger.info(
        "[waveform:%s] normalized path=%s exists=%s is_file=%s",
        req_id,
        resolved_path,
        file_path.exists(),
        file_path.is_file(),
    )

    if not file_path.is_file():
        logger.warning("[waveform:%s] file_not_found path=%r", req_id, path)
        raise HTTPException(status_code=404, detail=f"File not found: {path}")

    # Cache key based on path + mtime so stale cache is auto-invalidated
    mtime = file_path.stat().st_mtime
    cache_key = hashlib.md5(f"{path}:{mtime}".encode()).hexdigest()
    logger.info("[waveform:%s] cache_key=%s mtime=%s", req_id, cache_key, mtime)

    if cache_key in _waveform_cache:
        cached = Path(_waveform_cache[cache_key])
        if cached.exists():
            logger.info("[waveform:%s] cache_hit cached=%s", req_id, cached)
            return FileResponse(str(cached), media_type="audio/wav")
        else:
            del _waveform_cache[cache_key]

    logger.info("[waveform:%s] cache_miss extracting file=%s", req_id, file_path)
    tmp_dir = tempfile.mkdtemp(prefix="talkedit_waveform_")
    out_wav = Path(tmp_dir) / f"{cache_key}.wav"

    # Downsample to mono 8000 Hz — enough for waveform drawing and much smaller payloads
    cmd = [
        "ffmpeg", "-y",
        "-i", str(file_path),
        "-vn",                    # drop video
        "-ac", "1",               # mono
        "-ar", "8000",            # 8 kHz sample rate
        "-acodec", "pcm_s16le",   # 16-bit PCM WAV
        str(out_wav),
    ]
    logger.info("[waveform:%s] ffmpeg_cmd=%s", req_id, " ".join(cmd))
    result = subprocess.run(cmd, capture_output=True, text=True)
    if result.returncode != 0:
        logger.error(
            "[waveform:%s] ffmpeg_failed returncode=%s stderr_tail=%s",
            req_id,
            result.returncode,
            result.stderr[-2000:],
        )
        raise HTTPException(
            status_code=500,
            detail=f"Failed to extract audio: {result.stderr[-300:]}"
        )

    if not out_wav.exists() or out_wav.stat().st_size == 0:
        logger.error(
            "[waveform:%s] empty_output out_wav=%s exists=%s size=%s",
            req_id,
            out_wav,
            out_wav.exists(),
            out_wav.stat().st_size if out_wav.exists() else -1,
        )
        raise HTTPException(status_code=500, detail="Audio extraction produced empty file")

    logger.info(
        "[waveform:%s] extracted_bytes=%s out_wav=%s",
        req_id,
        out_wav.stat().st_size,
        out_wav,
    )
    _waveform_cache[cache_key] = str(out_wav)
    return FileResponse(str(out_wav), media_type="audio/wav")


class NormalizeRequest(BaseModel):
    input_path: str
    output_path: Optional[str] = None
    target_lufs: float = -14.0


@router.post("/audio/normalize")
async def normalize_audio_endpoint(req: NormalizeRequest):
    """Normalize audio loudness to a target LUFS level using FFmpeg loudnorm."""
    if req.target_lufs < -70 or req.target_lufs > 0:
        raise HTTPException(status_code=400, detail="target_lufs must be between -70 and 0")
    try:
        output = normalize_audio(
            req.input_path,
            req.output_path or "",
            target_lufs=req.target_lufs,
        )
        return {
            "status": "ok",
            "output_path": output,
            "target_lufs": req.target_lufs,
        }
    except Exception as e:
        logger.error(f"Audio normalization failed: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))