2026-03-03 06:31:04 -05:00
|
|
|
"""Audio processing endpoint (noise reduction / Studio Sound)."""
|
|
|
|
|
|
2026-03-28 12:26:45 -06:00
|
|
|
import hashlib
|
2026-03-03 06:31:04 -05:00
|
|
|
import logging
|
2026-03-28 12:26:45 -06:00
|
|
|
import subprocess
|
|
|
|
|
import tempfile
|
|
|
|
|
from pathlib import Path
|
2026-03-03 06:31:04 -05:00
|
|
|
from typing import Optional
|
|
|
|
|
|
2026-04-09 01:36:28 -06:00
|
|
|
from fastapi import APIRouter, HTTPException, Query, Request
|
2026-03-28 12:26:45 -06:00
|
|
|
from fastapi.responses import FileResponse
|
2026-03-03 06:31:04 -05:00
|
|
|
from pydantic import BaseModel
|
|
|
|
|
|
2026-05-04 16:37:25 -06:00
|
|
|
from services.audio_cleaner import clean_audio, detect_silence_ranges, is_deepfilter_available, normalize_audio
|
2026-03-03 06:31:04 -05:00
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
2026-03-28 12:26:45 -06:00
|
|
|
# Simple in-process cache: video path → extracted WAV path
|
|
|
|
|
_waveform_cache: dict[str, str] = {}
|
|
|
|
|
|
2026-03-03 06:31:04 -05:00
|
|
|
|
|
|
|
|
class AudioCleanRequest(BaseModel):
|
|
|
|
|
input_path: str
|
|
|
|
|
output_path: Optional[str] = None
|
|
|
|
|
|
|
|
|
|
|
2026-04-03 12:05:44 -06:00
|
|
|
class SilenceDetectRequest(BaseModel):
|
|
|
|
|
input_path: str
|
|
|
|
|
min_silence_ms: int = 500
|
|
|
|
|
silence_db: float = -35.0
|
|
|
|
|
|
|
|
|
|
|
2026-03-03 06:31:04 -05:00
|
|
|
@router.post("/audio/clean")
|
|
|
|
|
async def clean_audio_endpoint(req: AudioCleanRequest):
|
|
|
|
|
try:
|
|
|
|
|
output = clean_audio(req.input_path, req.output_path or "")
|
|
|
|
|
return {
|
|
|
|
|
"status": "ok",
|
|
|
|
|
"output_path": output,
|
|
|
|
|
"engine": "deepfilternet" if is_deepfilter_available() else "ffmpeg_anlmdn",
|
|
|
|
|
}
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"Audio cleaning failed: {e}", exc_info=True)
|
|
|
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.get("/audio/capabilities")
|
|
|
|
|
async def audio_capabilities():
|
|
|
|
|
return {
|
|
|
|
|
"deepfilternet_available": is_deepfilter_available(),
|
|
|
|
|
}
|
2026-03-28 12:26:45 -06:00
|
|
|
|
|
|
|
|
|
2026-04-03 12:05:44 -06:00
|
|
|
@router.post("/audio/detect-silence")
|
|
|
|
|
async def detect_silence_endpoint(req: SilenceDetectRequest):
|
|
|
|
|
try:
|
|
|
|
|
ranges = detect_silence_ranges(
|
|
|
|
|
req.input_path,
|
|
|
|
|
req.min_silence_ms,
|
|
|
|
|
req.silence_db,
|
|
|
|
|
)
|
|
|
|
|
return {
|
|
|
|
|
"status": "ok",
|
|
|
|
|
"ranges": ranges,
|
|
|
|
|
"count": len(ranges),
|
|
|
|
|
}
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"Silence detection failed: {e}", exc_info=True)
|
|
|
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
|
|
|
|
|
2026-03-28 12:26:45 -06:00
|
|
|
@router.get("/audio/waveform")
|
2026-04-09 01:36:28 -06:00
|
|
|
async def get_waveform_audio(request: Request, path: str = Query(...)):
|
2026-03-28 12:26:45 -06:00
|
|
|
"""
|
|
|
|
|
Extract audio from any video/audio file and return it as a WAV.
|
|
|
|
|
The WAV is cached on disk for subsequent requests.
|
|
|
|
|
Uses FFmpeg directly so it works with MKV, MOV, AVI, MP4, etc.
|
|
|
|
|
"""
|
2026-04-09 01:36:28 -06:00
|
|
|
req_id = hashlib.md5(f"{path}:{request.url}".encode()).hexdigest()[:10]
|
2026-03-28 12:26:45 -06:00
|
|
|
file_path = Path(path)
|
2026-04-09 01:36:28 -06:00
|
|
|
logger.info(
|
|
|
|
|
"[waveform:%s] request raw_url=%s raw_query=%s decoded_path=%r path_len=%s",
|
|
|
|
|
req_id,
|
|
|
|
|
str(request.url),
|
|
|
|
|
request.url.query,
|
|
|
|
|
path,
|
|
|
|
|
len(path),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
resolved_path = file_path.expanduser().resolve(strict=False)
|
|
|
|
|
except Exception:
|
|
|
|
|
resolved_path = file_path
|
|
|
|
|
|
|
|
|
|
logger.info(
|
|
|
|
|
"[waveform:%s] normalized path=%s exists=%s is_file=%s",
|
|
|
|
|
req_id,
|
|
|
|
|
resolved_path,
|
|
|
|
|
file_path.exists(),
|
|
|
|
|
file_path.is_file(),
|
|
|
|
|
)
|
|
|
|
|
|
2026-03-28 12:26:45 -06:00
|
|
|
if not file_path.is_file():
|
2026-04-09 01:36:28 -06:00
|
|
|
logger.warning("[waveform:%s] file_not_found path=%r", req_id, path)
|
2026-03-28 12:26:45 -06:00
|
|
|
raise HTTPException(status_code=404, detail=f"File not found: {path}")
|
|
|
|
|
|
|
|
|
|
# Cache key based on path + mtime so stale cache is auto-invalidated
|
|
|
|
|
mtime = file_path.stat().st_mtime
|
|
|
|
|
cache_key = hashlib.md5(f"{path}:{mtime}".encode()).hexdigest()
|
2026-04-09 01:36:28 -06:00
|
|
|
logger.info("[waveform:%s] cache_key=%s mtime=%s", req_id, cache_key, mtime)
|
2026-03-28 12:26:45 -06:00
|
|
|
|
|
|
|
|
if cache_key in _waveform_cache:
|
|
|
|
|
cached = Path(_waveform_cache[cache_key])
|
|
|
|
|
if cached.exists():
|
2026-04-09 01:36:28 -06:00
|
|
|
logger.info("[waveform:%s] cache_hit cached=%s", req_id, cached)
|
2026-03-28 12:26:45 -06:00
|
|
|
return FileResponse(str(cached), media_type="audio/wav")
|
|
|
|
|
else:
|
|
|
|
|
del _waveform_cache[cache_key]
|
|
|
|
|
|
2026-04-09 01:36:28 -06:00
|
|
|
logger.info("[waveform:%s] cache_miss extracting file=%s", req_id, file_path)
|
2026-03-28 12:26:45 -06:00
|
|
|
tmp_dir = tempfile.mkdtemp(prefix="talkedit_waveform_")
|
|
|
|
|
out_wav = Path(tmp_dir) / f"{cache_key}.wav"
|
|
|
|
|
|
2026-04-09 01:50:19 -06:00
|
|
|
# Downsample to mono 8000 Hz — enough for waveform drawing and much smaller payloads
|
2026-03-28 12:26:45 -06:00
|
|
|
cmd = [
|
|
|
|
|
"ffmpeg", "-y",
|
|
|
|
|
"-i", str(file_path),
|
|
|
|
|
"-vn", # drop video
|
|
|
|
|
"-ac", "1", # mono
|
2026-04-09 01:50:19 -06:00
|
|
|
"-ar", "8000", # 8 kHz sample rate
|
2026-03-28 12:26:45 -06:00
|
|
|
"-acodec", "pcm_s16le", # 16-bit PCM WAV
|
|
|
|
|
str(out_wav),
|
|
|
|
|
]
|
2026-04-09 01:36:28 -06:00
|
|
|
logger.info("[waveform:%s] ffmpeg_cmd=%s", req_id, " ".join(cmd))
|
2026-03-28 12:26:45 -06:00
|
|
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
|
|
|
if result.returncode != 0:
|
2026-04-09 01:36:28 -06:00
|
|
|
logger.error(
|
|
|
|
|
"[waveform:%s] ffmpeg_failed returncode=%s stderr_tail=%s",
|
|
|
|
|
req_id,
|
|
|
|
|
result.returncode,
|
|
|
|
|
result.stderr[-2000:],
|
|
|
|
|
)
|
2026-03-28 12:26:45 -06:00
|
|
|
raise HTTPException(
|
|
|
|
|
status_code=500,
|
|
|
|
|
detail=f"Failed to extract audio: {result.stderr[-300:]}"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if not out_wav.exists() or out_wav.stat().st_size == 0:
|
2026-04-09 01:36:28 -06:00
|
|
|
logger.error(
|
|
|
|
|
"[waveform:%s] empty_output out_wav=%s exists=%s size=%s",
|
|
|
|
|
req_id,
|
|
|
|
|
out_wav,
|
|
|
|
|
out_wav.exists(),
|
|
|
|
|
out_wav.stat().st_size if out_wav.exists() else -1,
|
|
|
|
|
)
|
2026-03-28 12:26:45 -06:00
|
|
|
raise HTTPException(status_code=500, detail="Audio extraction produced empty file")
|
|
|
|
|
|
2026-04-09 01:36:28 -06:00
|
|
|
logger.info(
|
|
|
|
|
"[waveform:%s] extracted_bytes=%s out_wav=%s",
|
|
|
|
|
req_id,
|
|
|
|
|
out_wav.stat().st_size,
|
|
|
|
|
out_wav,
|
|
|
|
|
)
|
2026-03-28 12:26:45 -06:00
|
|
|
_waveform_cache[cache_key] = str(out_wav)
|
|
|
|
|
return FileResponse(str(out_wav), media_type="audio/wav")
|
2026-05-04 16:37:25 -06:00
|
|
|
|
|
|
|
|
|
|
|
|
|
class NormalizeRequest(BaseModel):
|
|
|
|
|
input_path: str
|
|
|
|
|
output_path: Optional[str] = None
|
|
|
|
|
target_lufs: float = -14.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.post("/audio/normalize")
|
|
|
|
|
async def normalize_audio_endpoint(req: NormalizeRequest):
|
|
|
|
|
"""Normalize audio loudness to a target LUFS level using FFmpeg loudnorm."""
|
|
|
|
|
if req.target_lufs < -70 or req.target_lufs > 0:
|
|
|
|
|
raise HTTPException(status_code=400, detail="target_lufs must be between -70 and 0")
|
|
|
|
|
try:
|
|
|
|
|
output = normalize_audio(
|
|
|
|
|
req.input_path,
|
|
|
|
|
req.output_path or "",
|
|
|
|
|
target_lufs=req.target_lufs,
|
|
|
|
|
)
|
|
|
|
|
return {
|
|
|
|
|
"status": "ok",
|
|
|
|
|
"output_path": output,
|
|
|
|
|
"target_lufs": req.target_lufs,
|
|
|
|
|
}
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"Audio normalization failed: {e}", exc_info=True)
|
|
|
|
|
raise HTTPException(status_code=500, detail=str(e))
|