Files
TalkEdit/backend/routers/audio.py

167 lines
5.3 KiB
Python
Raw Normal View History

"""Audio processing endpoint (noise reduction / Studio Sound)."""
2026-03-28 12:26:45 -06:00
import hashlib
import logging
2026-03-28 12:26:45 -06:00
import subprocess
import tempfile
from pathlib import Path
from typing import Optional
2026-04-09 01:36:28 -06:00
from fastapi import APIRouter, HTTPException, Query, Request
2026-03-28 12:26:45 -06:00
from fastapi.responses import FileResponse
from pydantic import BaseModel
2026-04-03 12:05:44 -06:00
from services.audio_cleaner import clean_audio, detect_silence_ranges, is_deepfilter_available
logger = logging.getLogger(__name__)
router = APIRouter()
2026-03-28 12:26:45 -06:00
# Simple in-process cache: video path → extracted WAV path
_waveform_cache: dict[str, str] = {}
class AudioCleanRequest(BaseModel):
input_path: str
output_path: Optional[str] = None
2026-04-03 12:05:44 -06:00
class SilenceDetectRequest(BaseModel):
input_path: str
min_silence_ms: int = 500
silence_db: float = -35.0
@router.post("/audio/clean")
async def clean_audio_endpoint(req: AudioCleanRequest):
try:
output = clean_audio(req.input_path, req.output_path or "")
return {
"status": "ok",
"output_path": output,
"engine": "deepfilternet" if is_deepfilter_available() else "ffmpeg_anlmdn",
}
except Exception as e:
logger.error(f"Audio cleaning failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.get("/audio/capabilities")
async def audio_capabilities():
return {
"deepfilternet_available": is_deepfilter_available(),
}
2026-03-28 12:26:45 -06:00
2026-04-03 12:05:44 -06:00
@router.post("/audio/detect-silence")
async def detect_silence_endpoint(req: SilenceDetectRequest):
try:
ranges = detect_silence_ranges(
req.input_path,
req.min_silence_ms,
req.silence_db,
)
return {
"status": "ok",
"ranges": ranges,
"count": len(ranges),
}
except Exception as e:
logger.error(f"Silence detection failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
2026-03-28 12:26:45 -06:00
@router.get("/audio/waveform")
2026-04-09 01:36:28 -06:00
async def get_waveform_audio(request: Request, path: str = Query(...)):
2026-03-28 12:26:45 -06:00
"""
Extract audio from any video/audio file and return it as a WAV.
The WAV is cached on disk for subsequent requests.
Uses FFmpeg directly so it works with MKV, MOV, AVI, MP4, etc.
"""
2026-04-09 01:36:28 -06:00
req_id = hashlib.md5(f"{path}:{request.url}".encode()).hexdigest()[:10]
2026-03-28 12:26:45 -06:00
file_path = Path(path)
2026-04-09 01:36:28 -06:00
logger.info(
"[waveform:%s] request raw_url=%s raw_query=%s decoded_path=%r path_len=%s",
req_id,
str(request.url),
request.url.query,
path,
len(path),
)
try:
resolved_path = file_path.expanduser().resolve(strict=False)
except Exception:
resolved_path = file_path
logger.info(
"[waveform:%s] normalized path=%s exists=%s is_file=%s",
req_id,
resolved_path,
file_path.exists(),
file_path.is_file(),
)
2026-03-28 12:26:45 -06:00
if not file_path.is_file():
2026-04-09 01:36:28 -06:00
logger.warning("[waveform:%s] file_not_found path=%r", req_id, path)
2026-03-28 12:26:45 -06:00
raise HTTPException(status_code=404, detail=f"File not found: {path}")
# Cache key based on path + mtime so stale cache is auto-invalidated
mtime = file_path.stat().st_mtime
cache_key = hashlib.md5(f"{path}:{mtime}".encode()).hexdigest()
2026-04-09 01:36:28 -06:00
logger.info("[waveform:%s] cache_key=%s mtime=%s", req_id, cache_key, mtime)
2026-03-28 12:26:45 -06:00
if cache_key in _waveform_cache:
cached = Path(_waveform_cache[cache_key])
if cached.exists():
2026-04-09 01:36:28 -06:00
logger.info("[waveform:%s] cache_hit cached=%s", req_id, cached)
2026-03-28 12:26:45 -06:00
return FileResponse(str(cached), media_type="audio/wav")
else:
del _waveform_cache[cache_key]
2026-04-09 01:36:28 -06:00
logger.info("[waveform:%s] cache_miss extracting file=%s", req_id, file_path)
2026-03-28 12:26:45 -06:00
tmp_dir = tempfile.mkdtemp(prefix="talkedit_waveform_")
out_wav = Path(tmp_dir) / f"{cache_key}.wav"
2026-04-09 01:50:19 -06:00
# Downsample to mono 8000 Hz — enough for waveform drawing and much smaller payloads
2026-03-28 12:26:45 -06:00
cmd = [
"ffmpeg", "-y",
"-i", str(file_path),
"-vn", # drop video
"-ac", "1", # mono
2026-04-09 01:50:19 -06:00
"-ar", "8000", # 8 kHz sample rate
2026-03-28 12:26:45 -06:00
"-acodec", "pcm_s16le", # 16-bit PCM WAV
str(out_wav),
]
2026-04-09 01:36:28 -06:00
logger.info("[waveform:%s] ffmpeg_cmd=%s", req_id, " ".join(cmd))
2026-03-28 12:26:45 -06:00
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
2026-04-09 01:36:28 -06:00
logger.error(
"[waveform:%s] ffmpeg_failed returncode=%s stderr_tail=%s",
req_id,
result.returncode,
result.stderr[-2000:],
)
2026-03-28 12:26:45 -06:00
raise HTTPException(
status_code=500,
detail=f"Failed to extract audio: {result.stderr[-300:]}"
)
if not out_wav.exists() or out_wav.stat().st_size == 0:
2026-04-09 01:36:28 -06:00
logger.error(
"[waveform:%s] empty_output out_wav=%s exists=%s size=%s",
req_id,
out_wav,
out_wav.exists(),
out_wav.stat().st_size if out_wav.exists() else -1,
)
2026-03-28 12:26:45 -06:00
raise HTTPException(status_code=500, detail="Audio extraction produced empty file")
2026-04-09 01:36:28 -06:00
logger.info(
"[waveform:%s] extracted_bytes=%s out_wav=%s",
req_id,
out_wav.stat().st_size,
out_wav,
)
2026-03-28 12:26:45 -06:00
_waveform_cache[cache_key] = str(out_wav)
return FileResponse(str(out_wav), media_type="audio/wav")