changed to python312

2026-03-28 12:26:45 -06:00
parent 4a857d8cbf
commit 2ffc406b10
9 changed files with 443 additions and 64 deletions
--- a/backend/routers/audio.py
+++ b/backend/routers/audio.py
@ -1,9 +1,14 @@
 """Audio processing endpoint (noise reduction / Studio Sound)."""

+import hashlib
 import logging
+import subprocess
+import tempfile
+from pathlib import Path
 from typing import Optional

-from fastapi import APIRouter, HTTPException
+from fastapi import APIRouter, HTTPException, Query
+from fastapi.responses import FileResponse
 from pydantic import BaseModel

 from services.audio_cleaner import clean_audio, is_deepfilter_available
@ -11,6 +16,9 @@ from services.audio_cleaner import clean_audio, is_deepfilter_available
 logger = logging.getLogger(__name__)
 router = APIRouter()

+# Simple in-process cache: video path → extracted WAV path
+_waveform_cache: dict[str, str] = {}
+

 class AudioCleanRequest(BaseModel):
    input_path: str
@ -36,3 +44,58 @@ async def audio_capabilities():
    return {
        "deepfilternet_available": is_deepfilter_available(),
    }
+
+
+@router.get("/audio/waveform")
+async def get_waveform_audio(path: str = Query(...)):
+    """
+    Extract audio from any video/audio file and return it as a WAV.
+    The WAV is cached on disk for subsequent requests.
+    Uses FFmpeg directly so it works with MKV, MOV, AVI, MP4, etc.
+    """
+    file_path = Path(path)
+    if not file_path.is_file():
+        logger.warning(f"[waveform] File not found: {path}")
+        raise HTTPException(status_code=404, detail=f"File not found: {path}")
+
+    # Cache key based on path + mtime so stale cache is auto-invalidated
+    mtime = file_path.stat().st_mtime
+    cache_key = hashlib.md5(f"{path}:{mtime}".encode()).hexdigest()
+
+    if cache_key in _waveform_cache:
+        cached = Path(_waveform_cache[cache_key])
+        if cached.exists():
+            logger.info(f"[waveform] Cache hit for {file_path.name}")
+            return FileResponse(str(cached), media_type="audio/wav")
+        else:
+            del _waveform_cache[cache_key]
+
+    logger.info(f"[waveform] Extracting audio from: {file_path.name}")
+    tmp_dir = tempfile.mkdtemp(prefix="talkedit_waveform_")
+    out_wav = Path(tmp_dir) / f"{cache_key}.wav"
+
+    # Downsample to mono 22050 Hz — enough for waveform drawing, small file
+    cmd = [
+        "ffmpeg", "-y",
+        "-i", str(file_path),
+        "-vn",                    # drop video
+        "-ac", "1",               # mono
+        "-ar", "22050",           # 22 kHz sample rate
+        "-acodec", "pcm_s16le",   # 16-bit PCM WAV
+        str(out_wav),
+    ]
+    result = subprocess.run(cmd, capture_output=True, text=True)
+    if result.returncode != 0:
+        logger.error(f"[waveform] FFmpeg failed for {file_path.name}: {result.stderr[-500:]}")
+        raise HTTPException(
+            status_code=500,
+            detail=f"Failed to extract audio: {result.stderr[-300:]}"
+        )
+
+    if not out_wav.exists() or out_wav.stat().st_size == 0:
+        logger.error(f"[waveform] FFmpeg produced empty WAV for {file_path.name}")
+        raise HTTPException(status_code=500, detail="Audio extraction produced empty file")
+
+    logger.info(f"[waveform] Extracted {out_wav.stat().st_size} bytes for {file_path.name}")
+    _waveform_cache[cache_key] = str(out_wav)
+    return FileResponse(str(out_wav), media_type="audio/wav")