changed to python312

2026-03-28 12:26:45 -06:00
parent 4a857d8cbf
commit 2ffc406b10
9 changed files with 443 additions and 64 deletions
--- a/backend/main.py
+++ b/backend/main.py
@ -61,18 +61,37 @@ async def serve_local_file(request: Request, path: str = Query(...)):
    """Stream a local file with HTTP Range support (required for video seeking)."""
    file_path = Path(path)
    if not file_path.is_file():
+        logger.warning(f"[serve_file] File not found: {path}")
        raise HTTPException(status_code=404, detail=f"File not found: {path}")

    file_size = file_path.stat().st_size
    content_type = MIME_MAP.get(file_path.suffix.lower(), "application/octet-stream")
-
    range_header = request.headers.get("range")
+
+    logger.info(
+        f"[serve_file] {file_path.name} | size={file_size} | "
+        f"type={content_type} | range={range_header or 'none'}"
+    )
+
+    if content_type == "application/octet-stream":
+        logger.warning(
+            f"[serve_file] Unknown MIME type for extension '{file_path.suffix}' — "
+            f"browser may fail to decode audio/video for '{file_path.name}'"
+        )
+
+    if file_size == 0:
+        logger.error(f"[serve_file] File is empty: {path}")
+        raise HTTPException(status_code=422, detail=f"File is empty: {path}")
    if range_header:
-        range_spec = range_header.replace("bytes=", "")
-        range_start_str, range_end_str = range_spec.split("-")
-        range_start = int(range_start_str) if range_start_str else 0
-        range_end = int(range_end_str) if range_end_str else file_size - 1
-        range_end = min(range_end, file_size - 1)
+        try:
+            range_spec = range_header.replace("bytes=", "")
+            range_start_str, range_end_str = range_spec.split("-")
+            range_start = int(range_start_str) if range_start_str else 0
+            range_end = int(range_end_str) if range_end_str else file_size - 1
+            range_end = min(range_end, file_size - 1)
+        except (ValueError, TypeError) as e:
+            logger.error(f"[serve_file] Malformed Range header '{range_header}': {e}")
+            raise HTTPException(status_code=416, detail=f"Invalid Range header: {range_header}")
        content_length = range_end - range_start + 1

        def iter_range():
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@ -1,33 +1,164 @@
-# FastAPI backend
-fastapi>=0.115.0
-uvicorn[standard]>=0.32.0
-websockets>=14.0
-python-multipart>=0.0.12
-
-# Transcription (WhisperX for word-level alignment)
-whisperx>=3.1.0
-faster-whisper>=1.0.0
-
-# Audio / Video processing
-moviepy>=1.0.3
-ffmpeg-python>=0.2.0
-soundfile>=0.10.3
-
-# ML / GPU
-torch>=2.0.0
-torchaudio>=2.0.0
-numpy>=1.24.0
-
-# Speaker diarization
-pyannote.audio>=3.1.1
-
-# AI providers
-openai>=1.50.0
-anthropic>=0.39.0
-requests>=2.28.0
-
-# Audio cleanup
-deepfilternet>=0.5.0
-
-# Utilities
-pydantic>=2.0.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.13.4
+aiosignal==1.4.0
+alembic==1.18.4
+annotated-doc==0.0.4
+annotated-types==0.7.0
+anthropic==0.86.0
+antlr4-python3-runtime==4.9.3
+anyio==4.13.0
+appdirs==1.4.4
+asteroid-filterbanks==0.4.0
+attrs==26.1.0
+av==17.0.0
+certifi==2026.2.25
+cffi==2.0.0
+charset-normalizer==3.4.6
+click==8.3.1
+colorlog==6.10.1
+contourpy==1.3.3
+ctranslate2==4.7.1
+cuda-bindings==12.9.4
+cuda-pathfinder==1.2.2
+cuda-toolkit==12.6.3
+cycler==0.12.1
+Cython==0.29.37
+decorator==5.2.1
+DeepFilterLib==0.5.6
+DeepFilterNet==0.5.6
+distro==1.9.0
+docstring_parser==0.17.0
+einops==0.8.2
+fastapi==0.135.2
+faster-whisper==1.2.1
+ffmpeg-python==0.2.0
+filelock==3.25.2
+flatbuffers==25.12.19
+fonttools==4.62.1
+frozenlist==1.8.0
+fsspec==2026.2.0
+future==1.0.0
+googleapis-common-protos==1.73.1
+greenlet==3.3.2
+grpcio==1.78.0
+h11==0.16.0
+hf-xet==1.4.2
+httpcore==1.0.9
+httptools==0.7.1
+httpx==0.28.1
+huggingface_hub==0.36.2
+HyperPyYAML==1.2.3
+idna==3.11
+ImageIO==2.37.3
+imageio-ffmpeg==0.6.0
+importlib_metadata==8.7.1
+Jinja2==3.1.6
+jiter==0.13.0
+joblib==1.5.3
+julius==0.2.7
+kiwisolver==1.5.0
+lightning==2.6.1
+lightning-utilities==0.15.3
+loguru==0.7.3
+Mako==1.3.10
+markdown-it-py==4.0.0
+MarkupSafe==3.0.3
+matplotlib==3.10.8
+maturin==1.12.6
+mdurl==0.1.2
+moviepy==2.2.1
+mpmath==1.3.0
+multidict==6.7.1
+networkx==3.6.1
+nltk==3.9.4
+numpy==2.4.3
+nvidia-cublas-cu12==12.8.4.1
+nvidia-cuda-cupti-cu12==12.8.90
+nvidia-cuda-nvrtc-cu12==12.8.93
+nvidia-cuda-runtime-cu12==12.8.90
+nvidia-cudnn-cu12==9.10.2.21
+nvidia-cufft-cu12==11.3.3.83
+nvidia-cufile-cu12==1.13.1.3
+nvidia-curand-cu12==10.3.9.90
+nvidia-cusolver-cu12==11.7.3.90
+nvidia-cusparse-cu12==12.5.8.93
+nvidia-cusparselt-cu12==0.7.1
+nvidia-nccl-cu12==2.27.3
+nvidia-nvjitlink-cu12==12.8.93
+nvidia-nvshmem-cu12==3.4.5
+nvidia-nvtx-cu12==12.8.90
+omegaconf==2.3.0
+onnxruntime==1.24.4
+openai==2.30.0
+opentelemetry-api==1.40.0
+opentelemetry-exporter-otlp==1.40.0
+opentelemetry-exporter-otlp-proto-common==1.40.0
+opentelemetry-exporter-otlp-proto-grpc==1.40.0
+opentelemetry-exporter-otlp-proto-http==1.40.0
+opentelemetry-proto==1.40.0
+opentelemetry-sdk==1.40.0
+opentelemetry-semantic-conventions==0.61b0
+optuna==4.8.0
+packaging==23.2
+pandas==3.0.1
+pillow==11.3.0
+primePy==1.3
+proglog==0.1.12
+propcache==0.4.1
+protobuf==6.33.6
+pyannote-audio==4.0.4
+pyannote-core==6.0.1
+pyannote-database==6.1.1
+pyannote-metrics==4.0.0
+pyannote-pipeline==4.0.0
+pyannoteai-sdk==0.4.0
+pycparser==3.0
+pydantic==2.12.5
+pydantic_core==2.41.5
+Pygments==2.19.2
+pyparsing==3.3.2
+python-dateutil==2.9.0.post0
+python-dotenv==1.2.2
+python-multipart==0.0.22
+pytorch-lightning==2.6.1
+pytorch-metric-learning==2.9.0
+PyYAML==6.0.3
+regex==2026.2.28
+requests==2.33.0
+rich==14.3.3
+ruamel.yaml==0.18.17
+ruamel.yaml.clib==0.2.15
+safetensors==0.7.0
+scikit-learn==1.8.0
+scipy==1.17.1
+setuptools==70.2.0
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+sortedcontainers==2.4.0
+soundfile==0.13.1
+SQLAlchemy==2.0.48
+starlette==1.0.0
+sympy==1.14.0
+threadpoolctl==3.6.0
+tokenizers==0.22.2
+torch==2.8.0
+torch-audiomentations==0.12.0
+torch_pitch_shift==1.2.5
+torchaudio==2.8.0
+torchmetrics==1.9.0
+tqdm==4.67.3
+transformers==4.57.6
+triton==3.4.0
+typer==0.24.1
+typing-inspection==0.4.2
+typing_extensions==4.15.0
+urllib3==2.6.3
+uvicorn==0.42.0
+uvloop==0.22.1
+watchfiles==1.1.1
+websockets==16.0
+wheel==0.46.3
+whisperx==3.8.4
+yarl==1.23.0
+zipp==3.23.0
--- a/backend/routers/audio.py
+++ b/backend/routers/audio.py
@ -1,9 +1,14 @@
 """Audio processing endpoint (noise reduction / Studio Sound)."""

+import hashlib
 import logging
+import subprocess
+import tempfile
+from pathlib import Path
 from typing import Optional

-from fastapi import APIRouter, HTTPException
+from fastapi import APIRouter, HTTPException, Query
+from fastapi.responses import FileResponse
 from pydantic import BaseModel

 from services.audio_cleaner import clean_audio, is_deepfilter_available
@ -11,6 +16,9 @@ from services.audio_cleaner import clean_audio, is_deepfilter_available
 logger = logging.getLogger(__name__)
 router = APIRouter()

+# Simple in-process cache: video path → extracted WAV path
+_waveform_cache: dict[str, str] = {}
+

 class AudioCleanRequest(BaseModel):
    input_path: str
@ -36,3 +44,58 @@ async def audio_capabilities():
    return {
        "deepfilternet_available": is_deepfilter_available(),
    }
+
+
+@router.get("/audio/waveform")
+async def get_waveform_audio(path: str = Query(...)):
+    """
+    Extract audio from any video/audio file and return it as a WAV.
+    The WAV is cached on disk for subsequent requests.
+    Uses FFmpeg directly so it works with MKV, MOV, AVI, MP4, etc.
+    """
+    file_path = Path(path)
+    if not file_path.is_file():
+        logger.warning(f"[waveform] File not found: {path}")
+        raise HTTPException(status_code=404, detail=f"File not found: {path}")
+
+    # Cache key based on path + mtime so stale cache is auto-invalidated
+    mtime = file_path.stat().st_mtime
+    cache_key = hashlib.md5(f"{path}:{mtime}".encode()).hexdigest()
+
+    if cache_key in _waveform_cache:
+        cached = Path(_waveform_cache[cache_key])
+        if cached.exists():
+            logger.info(f"[waveform] Cache hit for {file_path.name}")
+            return FileResponse(str(cached), media_type="audio/wav")
+        else:
+            del _waveform_cache[cache_key]
+
+    logger.info(f"[waveform] Extracting audio from: {file_path.name}")
+    tmp_dir = tempfile.mkdtemp(prefix="talkedit_waveform_")
+    out_wav = Path(tmp_dir) / f"{cache_key}.wav"
+
+    # Downsample to mono 22050 Hz — enough for waveform drawing, small file
+    cmd = [
+        "ffmpeg", "-y",
+        "-i", str(file_path),
+        "-vn",                    # drop video
+        "-ac", "1",               # mono
+        "-ar", "22050",           # 22 kHz sample rate
+        "-acodec", "pcm_s16le",   # 16-bit PCM WAV
+        str(out_wav),
+    ]
+    result = subprocess.run(cmd, capture_output=True, text=True)
+    if result.returncode != 0:
+        logger.error(f"[waveform] FFmpeg failed for {file_path.name}: {result.stderr[-500:]}")
+        raise HTTPException(
+            status_code=500,
+            detail=f"Failed to extract audio: {result.stderr[-300:]}"
+        )
+
+    if not out_wav.exists() or out_wav.stat().st_size == 0:
+        logger.error(f"[waveform] FFmpeg produced empty WAV for {file_path.name}")
+        raise HTTPException(status_code=500, detail="Audio extraction produced empty file")
+
+    logger.info(f"[waveform] Extracted {out_wav.stat().st_size} bytes for {file_path.name}")
+    _waveform_cache[cache_key] = str(out_wav)
+    return FileResponse(str(out_wav), media_type="audio/wav")
--- a/backend/utils/audio_processing.py
+++ b/backend/utils/audio_processing.py
@ -15,8 +15,13 @@ _temp_audio_files = []

 def extract_audio(video_path: Path):
    """Extract audio from a video file into a temp directory for automatic cleanup."""
+    logger.info(f"[extract_audio] Extracting audio from: {video_path}")
    try:
        audio = AudioFileClip(str(video_path))
+        if audio.duration is None or audio.duration == 0:
+            logger.error(f"[extract_audio] File has no audio track or zero duration: {video_path}")
+            raise RuntimeError(f"File has no audio track: {video_path}")
+        logger.info(f"[extract_audio] Duration: {audio.duration:.2f}s, fps: {audio.fps}")
        temp_dir = tempfile.mkdtemp(prefix="videotranscriber_")
        audio_path = Path(temp_dir) / f"{video_path.stem}_audio.wav"
        try:
@ -25,9 +30,16 @@ def extract_audio(video_path: Path):
            # moviepy 1.x uses verbose parameter; moviepy 2.x removed it
            audio.write_audiofile(str(audio_path), verbose=False, logger=None)
        audio.close()
+        if not audio_path.exists() or audio_path.stat().st_size == 0:
+            logger.error(f"[extract_audio] Output WAV is empty or missing: {audio_path}")
+            raise RuntimeError(f"Audio extraction produced empty file: {audio_path}")
+        logger.info(f"[extract_audio] Extracted to: {audio_path} ({audio_path.stat().st_size} bytes)")
        _temp_audio_files.append(str(audio_path))
        return audio_path
+    except RuntimeError:
+        raise
    except Exception as e:
+        logger.error(f"[extract_audio] Failed for '{video_path}': {e}", exc_info=True)
        raise RuntimeError(f"Audio extraction failed: {e}")


@ -54,6 +66,9 @@ def get_video_duration(video_path: Path):
        clip = AudioFileClip(str(video_path))
        duration = clip.duration
        clip.close()
+        if duration is None or duration == 0:
+            logger.warning(f"[get_video_duration] Zero or null duration for: {video_path}")
        return duration
-    except Exception:
+    except Exception as e:
+        logger.error(f"[get_video_duration] Failed for '{video_path}': {e}", exc_info=True)
        return None