2026-03-03 06:31:04 -05:00
|
|
|
"""Audio processing endpoint (noise reduction / Studio Sound)."""
|
|
|
|
|
|
2026-03-28 12:26:45 -06:00
|
|
|
import hashlib
|
2026-03-03 06:31:04 -05:00
|
|
|
import logging
|
2026-03-28 12:26:45 -06:00
|
|
|
import subprocess
|
|
|
|
|
import tempfile
|
|
|
|
|
from pathlib import Path
|
2026-03-03 06:31:04 -05:00
|
|
|
from typing import Optional
|
|
|
|
|
|
2026-03-28 12:26:45 -06:00
|
|
|
from fastapi import APIRouter, HTTPException, Query
|
|
|
|
|
from fastapi.responses import FileResponse
|
2026-03-03 06:31:04 -05:00
|
|
|
from pydantic import BaseModel
|
|
|
|
|
|
2026-04-03 12:05:44 -06:00
|
|
|
from services.audio_cleaner import clean_audio, detect_silence_ranges, is_deepfilter_available
|
2026-03-03 06:31:04 -05:00
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
2026-03-28 12:26:45 -06:00
|
|
|
# Simple in-process cache: video path → extracted WAV path
|
|
|
|
|
_waveform_cache: dict[str, str] = {}
|
|
|
|
|
|
2026-03-03 06:31:04 -05:00
|
|
|
|
|
|
|
|
class AudioCleanRequest(BaseModel):
|
|
|
|
|
input_path: str
|
|
|
|
|
output_path: Optional[str] = None
|
|
|
|
|
|
|
|
|
|
|
2026-04-03 12:05:44 -06:00
|
|
|
class SilenceDetectRequest(BaseModel):
|
|
|
|
|
input_path: str
|
|
|
|
|
min_silence_ms: int = 500
|
|
|
|
|
silence_db: float = -35.0
|
|
|
|
|
|
|
|
|
|
|
2026-03-03 06:31:04 -05:00
|
|
|
@router.post("/audio/clean")
|
|
|
|
|
async def clean_audio_endpoint(req: AudioCleanRequest):
|
|
|
|
|
try:
|
|
|
|
|
output = clean_audio(req.input_path, req.output_path or "")
|
|
|
|
|
return {
|
|
|
|
|
"status": "ok",
|
|
|
|
|
"output_path": output,
|
|
|
|
|
"engine": "deepfilternet" if is_deepfilter_available() else "ffmpeg_anlmdn",
|
|
|
|
|
}
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"Audio cleaning failed: {e}", exc_info=True)
|
|
|
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@router.get("/audio/capabilities")
|
|
|
|
|
async def audio_capabilities():
|
|
|
|
|
return {
|
|
|
|
|
"deepfilternet_available": is_deepfilter_available(),
|
|
|
|
|
}
|
2026-03-28 12:26:45 -06:00
|
|
|
|
|
|
|
|
|
2026-04-03 12:05:44 -06:00
|
|
|
@router.post("/audio/detect-silence")
|
|
|
|
|
async def detect_silence_endpoint(req: SilenceDetectRequest):
|
|
|
|
|
try:
|
|
|
|
|
ranges = detect_silence_ranges(
|
|
|
|
|
req.input_path,
|
|
|
|
|
req.min_silence_ms,
|
|
|
|
|
req.silence_db,
|
|
|
|
|
)
|
|
|
|
|
return {
|
|
|
|
|
"status": "ok",
|
|
|
|
|
"ranges": ranges,
|
|
|
|
|
"count": len(ranges),
|
|
|
|
|
}
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"Silence detection failed: {e}", exc_info=True)
|
|
|
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
|
|
|
|
|
2026-03-28 12:26:45 -06:00
|
|
|
@router.get("/audio/waveform")
|
|
|
|
|
async def get_waveform_audio(path: str = Query(...)):
|
|
|
|
|
"""
|
|
|
|
|
Extract audio from any video/audio file and return it as a WAV.
|
|
|
|
|
The WAV is cached on disk for subsequent requests.
|
|
|
|
|
Uses FFmpeg directly so it works with MKV, MOV, AVI, MP4, etc.
|
|
|
|
|
"""
|
|
|
|
|
file_path = Path(path)
|
|
|
|
|
if not file_path.is_file():
|
|
|
|
|
logger.warning(f"[waveform] File not found: {path}")
|
|
|
|
|
raise HTTPException(status_code=404, detail=f"File not found: {path}")
|
|
|
|
|
|
|
|
|
|
# Cache key based on path + mtime so stale cache is auto-invalidated
|
|
|
|
|
mtime = file_path.stat().st_mtime
|
|
|
|
|
cache_key = hashlib.md5(f"{path}:{mtime}".encode()).hexdigest()
|
|
|
|
|
|
|
|
|
|
if cache_key in _waveform_cache:
|
|
|
|
|
cached = Path(_waveform_cache[cache_key])
|
|
|
|
|
if cached.exists():
|
|
|
|
|
logger.info(f"[waveform] Cache hit for {file_path.name}")
|
|
|
|
|
return FileResponse(str(cached), media_type="audio/wav")
|
|
|
|
|
else:
|
|
|
|
|
del _waveform_cache[cache_key]
|
|
|
|
|
|
|
|
|
|
logger.info(f"[waveform] Extracting audio from: {file_path.name}")
|
|
|
|
|
tmp_dir = tempfile.mkdtemp(prefix="talkedit_waveform_")
|
|
|
|
|
out_wav = Path(tmp_dir) / f"{cache_key}.wav"
|
|
|
|
|
|
|
|
|
|
# Downsample to mono 22050 Hz — enough for waveform drawing, small file
|
|
|
|
|
cmd = [
|
|
|
|
|
"ffmpeg", "-y",
|
|
|
|
|
"-i", str(file_path),
|
|
|
|
|
"-vn", # drop video
|
|
|
|
|
"-ac", "1", # mono
|
|
|
|
|
"-ar", "22050", # 22 kHz sample rate
|
|
|
|
|
"-acodec", "pcm_s16le", # 16-bit PCM WAV
|
|
|
|
|
str(out_wav),
|
|
|
|
|
]
|
|
|
|
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
|
|
|
if result.returncode != 0:
|
|
|
|
|
logger.error(f"[waveform] FFmpeg failed for {file_path.name}: {result.stderr[-500:]}")
|
|
|
|
|
raise HTTPException(
|
|
|
|
|
status_code=500,
|
|
|
|
|
detail=f"Failed to extract audio: {result.stderr[-300:]}"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if not out_wav.exists() or out_wav.stat().st_size == 0:
|
|
|
|
|
logger.error(f"[waveform] FFmpeg produced empty WAV for {file_path.name}")
|
|
|
|
|
raise HTTPException(status_code=500, detail="Audio extraction produced empty file")
|
|
|
|
|
|
|
|
|
|
logger.info(f"[waveform] Extracted {out_wav.stat().st_size} bytes for {file_path.name}")
|
|
|
|
|
_waveform_cache[cache_key] = str(out_wav)
|
|
|
|
|
return FileResponse(str(out_wav), media_type="audio/wav")
|