trying to fix bug

This commit is contained in:
2026-04-09 01:36:28 -06:00
parent f9cd2bf579
commit 1d17a8f19a
10 changed files with 133 additions and 27 deletions

View File

@ -7,7 +7,7 @@ import tempfile
from pathlib import Path
from typing import Optional
from fastapi import APIRouter, HTTPException, Query
from fastapi import APIRouter, HTTPException, Query, Request
from fastapi.responses import FileResponse
from pydantic import BaseModel
@ -71,30 +71,54 @@ async def detect_silence_endpoint(req: SilenceDetectRequest):
@router.get("/audio/waveform")
async def get_waveform_audio(path: str = Query(...)):
async def get_waveform_audio(request: Request, path: str = Query(...)):
"""
Extract audio from any video/audio file and return it as a WAV.
The WAV is cached on disk for subsequent requests.
Uses FFmpeg directly so it works with MKV, MOV, AVI, MP4, etc.
"""
req_id = hashlib.md5(f"{path}:{request.url}".encode()).hexdigest()[:10]
file_path = Path(path)
logger.info(
"[waveform:%s] request raw_url=%s raw_query=%s decoded_path=%r path_len=%s",
req_id,
str(request.url),
request.url.query,
path,
len(path),
)
try:
resolved_path = file_path.expanduser().resolve(strict=False)
except Exception:
resolved_path = file_path
logger.info(
"[waveform:%s] normalized path=%s exists=%s is_file=%s",
req_id,
resolved_path,
file_path.exists(),
file_path.is_file(),
)
if not file_path.is_file():
logger.warning(f"[waveform] File not found: {path}")
logger.warning("[waveform:%s] file_not_found path=%r", req_id, path)
raise HTTPException(status_code=404, detail=f"File not found: {path}")
# Cache key based on path + mtime so stale cache is auto-invalidated
mtime = file_path.stat().st_mtime
cache_key = hashlib.md5(f"{path}:{mtime}".encode()).hexdigest()
logger.info("[waveform:%s] cache_key=%s mtime=%s", req_id, cache_key, mtime)
if cache_key in _waveform_cache:
cached = Path(_waveform_cache[cache_key])
if cached.exists():
logger.info(f"[waveform] Cache hit for {file_path.name}")
logger.info("[waveform:%s] cache_hit cached=%s", req_id, cached)
return FileResponse(str(cached), media_type="audio/wav")
else:
del _waveform_cache[cache_key]
logger.info(f"[waveform] Extracting audio from: {file_path.name}")
logger.info("[waveform:%s] cache_miss extracting file=%s", req_id, file_path)
tmp_dir = tempfile.mkdtemp(prefix="talkedit_waveform_")
out_wav = Path(tmp_dir) / f"{cache_key}.wav"
@ -108,18 +132,35 @@ async def get_waveform_audio(path: str = Query(...)):
"-acodec", "pcm_s16le", # 16-bit PCM WAV
str(out_wav),
]
logger.info("[waveform:%s] ffmpeg_cmd=%s", req_id, " ".join(cmd))
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
logger.error(f"[waveform] FFmpeg failed for {file_path.name}: {result.stderr[-500:]}")
logger.error(
"[waveform:%s] ffmpeg_failed returncode=%s stderr_tail=%s",
req_id,
result.returncode,
result.stderr[-2000:],
)
raise HTTPException(
status_code=500,
detail=f"Failed to extract audio: {result.stderr[-300:]}"
)
if not out_wav.exists() or out_wav.stat().st_size == 0:
logger.error(f"[waveform] FFmpeg produced empty WAV for {file_path.name}")
logger.error(
"[waveform:%s] empty_output out_wav=%s exists=%s size=%s",
req_id,
out_wav,
out_wav.exists(),
out_wav.stat().st_size if out_wav.exists() else -1,
)
raise HTTPException(status_code=500, detail="Audio extraction produced empty file")
logger.info(f"[waveform] Extracted {out_wav.stat().st_size} bytes for {file_path.name}")
logger.info(
"[waveform:%s] extracted_bytes=%s out_wav=%s",
req_id,
out_wav.stat().st_size,
out_wav,
)
_waveform_cache[cache_key] = str(out_wav)
return FileResponse(str(out_wav), media_type="audio/wav")