changed to python312
This commit is contained in:
@ -61,18 +61,37 @@ async def serve_local_file(request: Request, path: str = Query(...)):
|
||||
"""Stream a local file with HTTP Range support (required for video seeking)."""
|
||||
file_path = Path(path)
|
||||
if not file_path.is_file():
|
||||
logger.warning(f"[serve_file] File not found: {path}")
|
||||
raise HTTPException(status_code=404, detail=f"File not found: {path}")
|
||||
|
||||
file_size = file_path.stat().st_size
|
||||
content_type = MIME_MAP.get(file_path.suffix.lower(), "application/octet-stream")
|
||||
|
||||
range_header = request.headers.get("range")
|
||||
|
||||
logger.info(
|
||||
f"[serve_file] {file_path.name} | size={file_size} | "
|
||||
f"type={content_type} | range={range_header or 'none'}"
|
||||
)
|
||||
|
||||
if content_type == "application/octet-stream":
|
||||
logger.warning(
|
||||
f"[serve_file] Unknown MIME type for extension '{file_path.suffix}' — "
|
||||
f"browser may fail to decode audio/video for '{file_path.name}'"
|
||||
)
|
||||
|
||||
if file_size == 0:
|
||||
logger.error(f"[serve_file] File is empty: {path}")
|
||||
raise HTTPException(status_code=422, detail=f"File is empty: {path}")
|
||||
if range_header:
|
||||
range_spec = range_header.replace("bytes=", "")
|
||||
range_start_str, range_end_str = range_spec.split("-")
|
||||
range_start = int(range_start_str) if range_start_str else 0
|
||||
range_end = int(range_end_str) if range_end_str else file_size - 1
|
||||
range_end = min(range_end, file_size - 1)
|
||||
try:
|
||||
range_spec = range_header.replace("bytes=", "")
|
||||
range_start_str, range_end_str = range_spec.split("-")
|
||||
range_start = int(range_start_str) if range_start_str else 0
|
||||
range_end = int(range_end_str) if range_end_str else file_size - 1
|
||||
range_end = min(range_end, file_size - 1)
|
||||
except (ValueError, TypeError) as e:
|
||||
logger.error(f"[serve_file] Malformed Range header '{range_header}': {e}")
|
||||
raise HTTPException(status_code=416, detail=f"Invalid Range header: {range_header}")
|
||||
content_length = range_end - range_start + 1
|
||||
|
||||
def iter_range():
|
||||
|
||||
@ -1,33 +1,164 @@
|
||||
# FastAPI backend
|
||||
fastapi>=0.115.0
|
||||
uvicorn[standard]>=0.32.0
|
||||
websockets>=14.0
|
||||
python-multipart>=0.0.12
|
||||
|
||||
# Transcription (WhisperX for word-level alignment)
|
||||
whisperx>=3.1.0
|
||||
faster-whisper>=1.0.0
|
||||
|
||||
# Audio / Video processing
|
||||
moviepy>=1.0.3
|
||||
ffmpeg-python>=0.2.0
|
||||
soundfile>=0.10.3
|
||||
|
||||
# ML / GPU
|
||||
torch>=2.0.0
|
||||
torchaudio>=2.0.0
|
||||
numpy>=1.24.0
|
||||
|
||||
# Speaker diarization
|
||||
pyannote.audio>=3.1.1
|
||||
|
||||
# AI providers
|
||||
openai>=1.50.0
|
||||
anthropic>=0.39.0
|
||||
requests>=2.28.0
|
||||
|
||||
# Audio cleanup
|
||||
deepfilternet>=0.5.0
|
||||
|
||||
# Utilities
|
||||
pydantic>=2.0.0
|
||||
aiohappyeyeballs==2.6.1
|
||||
aiohttp==3.13.4
|
||||
aiosignal==1.4.0
|
||||
alembic==1.18.4
|
||||
annotated-doc==0.0.4
|
||||
annotated-types==0.7.0
|
||||
anthropic==0.86.0
|
||||
antlr4-python3-runtime==4.9.3
|
||||
anyio==4.13.0
|
||||
appdirs==1.4.4
|
||||
asteroid-filterbanks==0.4.0
|
||||
attrs==26.1.0
|
||||
av==17.0.0
|
||||
certifi==2026.2.25
|
||||
cffi==2.0.0
|
||||
charset-normalizer==3.4.6
|
||||
click==8.3.1
|
||||
colorlog==6.10.1
|
||||
contourpy==1.3.3
|
||||
ctranslate2==4.7.1
|
||||
cuda-bindings==12.9.4
|
||||
cuda-pathfinder==1.2.2
|
||||
cuda-toolkit==12.6.3
|
||||
cycler==0.12.1
|
||||
Cython==0.29.37
|
||||
decorator==5.2.1
|
||||
DeepFilterLib==0.5.6
|
||||
DeepFilterNet==0.5.6
|
||||
distro==1.9.0
|
||||
docstring_parser==0.17.0
|
||||
einops==0.8.2
|
||||
fastapi==0.135.2
|
||||
faster-whisper==1.2.1
|
||||
ffmpeg-python==0.2.0
|
||||
filelock==3.25.2
|
||||
flatbuffers==25.12.19
|
||||
fonttools==4.62.1
|
||||
frozenlist==1.8.0
|
||||
fsspec==2026.2.0
|
||||
future==1.0.0
|
||||
googleapis-common-protos==1.73.1
|
||||
greenlet==3.3.2
|
||||
grpcio==1.78.0
|
||||
h11==0.16.0
|
||||
hf-xet==1.4.2
|
||||
httpcore==1.0.9
|
||||
httptools==0.7.1
|
||||
httpx==0.28.1
|
||||
huggingface_hub==0.36.2
|
||||
HyperPyYAML==1.2.3
|
||||
idna==3.11
|
||||
ImageIO==2.37.3
|
||||
imageio-ffmpeg==0.6.0
|
||||
importlib_metadata==8.7.1
|
||||
Jinja2==3.1.6
|
||||
jiter==0.13.0
|
||||
joblib==1.5.3
|
||||
julius==0.2.7
|
||||
kiwisolver==1.5.0
|
||||
lightning==2.6.1
|
||||
lightning-utilities==0.15.3
|
||||
loguru==0.7.3
|
||||
Mako==1.3.10
|
||||
markdown-it-py==4.0.0
|
||||
MarkupSafe==3.0.3
|
||||
matplotlib==3.10.8
|
||||
maturin==1.12.6
|
||||
mdurl==0.1.2
|
||||
moviepy==2.2.1
|
||||
mpmath==1.3.0
|
||||
multidict==6.7.1
|
||||
networkx==3.6.1
|
||||
nltk==3.9.4
|
||||
numpy==2.4.3
|
||||
nvidia-cublas-cu12==12.8.4.1
|
||||
nvidia-cuda-cupti-cu12==12.8.90
|
||||
nvidia-cuda-nvrtc-cu12==12.8.93
|
||||
nvidia-cuda-runtime-cu12==12.8.90
|
||||
nvidia-cudnn-cu12==9.10.2.21
|
||||
nvidia-cufft-cu12==11.3.3.83
|
||||
nvidia-cufile-cu12==1.13.1.3
|
||||
nvidia-curand-cu12==10.3.9.90
|
||||
nvidia-cusolver-cu12==11.7.3.90
|
||||
nvidia-cusparse-cu12==12.5.8.93
|
||||
nvidia-cusparselt-cu12==0.7.1
|
||||
nvidia-nccl-cu12==2.27.3
|
||||
nvidia-nvjitlink-cu12==12.8.93
|
||||
nvidia-nvshmem-cu12==3.4.5
|
||||
nvidia-nvtx-cu12==12.8.90
|
||||
omegaconf==2.3.0
|
||||
onnxruntime==1.24.4
|
||||
openai==2.30.0
|
||||
opentelemetry-api==1.40.0
|
||||
opentelemetry-exporter-otlp==1.40.0
|
||||
opentelemetry-exporter-otlp-proto-common==1.40.0
|
||||
opentelemetry-exporter-otlp-proto-grpc==1.40.0
|
||||
opentelemetry-exporter-otlp-proto-http==1.40.0
|
||||
opentelemetry-proto==1.40.0
|
||||
opentelemetry-sdk==1.40.0
|
||||
opentelemetry-semantic-conventions==0.61b0
|
||||
optuna==4.8.0
|
||||
packaging==23.2
|
||||
pandas==3.0.1
|
||||
pillow==11.3.0
|
||||
primePy==1.3
|
||||
proglog==0.1.12
|
||||
propcache==0.4.1
|
||||
protobuf==6.33.6
|
||||
pyannote-audio==4.0.4
|
||||
pyannote-core==6.0.1
|
||||
pyannote-database==6.1.1
|
||||
pyannote-metrics==4.0.0
|
||||
pyannote-pipeline==4.0.0
|
||||
pyannoteai-sdk==0.4.0
|
||||
pycparser==3.0
|
||||
pydantic==2.12.5
|
||||
pydantic_core==2.41.5
|
||||
Pygments==2.19.2
|
||||
pyparsing==3.3.2
|
||||
python-dateutil==2.9.0.post0
|
||||
python-dotenv==1.2.2
|
||||
python-multipart==0.0.22
|
||||
pytorch-lightning==2.6.1
|
||||
pytorch-metric-learning==2.9.0
|
||||
PyYAML==6.0.3
|
||||
regex==2026.2.28
|
||||
requests==2.33.0
|
||||
rich==14.3.3
|
||||
ruamel.yaml==0.18.17
|
||||
ruamel.yaml.clib==0.2.15
|
||||
safetensors==0.7.0
|
||||
scikit-learn==1.8.0
|
||||
scipy==1.17.1
|
||||
setuptools==70.2.0
|
||||
shellingham==1.5.4
|
||||
six==1.17.0
|
||||
sniffio==1.3.1
|
||||
sortedcontainers==2.4.0
|
||||
soundfile==0.13.1
|
||||
SQLAlchemy==2.0.48
|
||||
starlette==1.0.0
|
||||
sympy==1.14.0
|
||||
threadpoolctl==3.6.0
|
||||
tokenizers==0.22.2
|
||||
torch==2.8.0
|
||||
torch-audiomentations==0.12.0
|
||||
torch_pitch_shift==1.2.5
|
||||
torchaudio==2.8.0
|
||||
torchmetrics==1.9.0
|
||||
tqdm==4.67.3
|
||||
transformers==4.57.6
|
||||
triton==3.4.0
|
||||
typer==0.24.1
|
||||
typing-inspection==0.4.2
|
||||
typing_extensions==4.15.0
|
||||
urllib3==2.6.3
|
||||
uvicorn==0.42.0
|
||||
uvloop==0.22.1
|
||||
watchfiles==1.1.1
|
||||
websockets==16.0
|
||||
wheel==0.46.3
|
||||
whisperx==3.8.4
|
||||
yarl==1.23.0
|
||||
zipp==3.23.0
|
||||
|
||||
@ -1,9 +1,14 @@
|
||||
"""Audio processing endpoint (noise reduction / Studio Sound)."""
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import subprocess
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from fastapi.responses import FileResponse
|
||||
from pydantic import BaseModel
|
||||
|
||||
from services.audio_cleaner import clean_audio, is_deepfilter_available
|
||||
@ -11,6 +16,9 @@ from services.audio_cleaner import clean_audio, is_deepfilter_available
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter()
|
||||
|
||||
# Simple in-process cache: video path → extracted WAV path
|
||||
_waveform_cache: dict[str, str] = {}
|
||||
|
||||
|
||||
class AudioCleanRequest(BaseModel):
|
||||
input_path: str
|
||||
@ -36,3 +44,58 @@ async def audio_capabilities():
|
||||
return {
|
||||
"deepfilternet_available": is_deepfilter_available(),
|
||||
}
|
||||
|
||||
|
||||
@router.get("/audio/waveform")
|
||||
async def get_waveform_audio(path: str = Query(...)):
|
||||
"""
|
||||
Extract audio from any video/audio file and return it as a WAV.
|
||||
The WAV is cached on disk for subsequent requests.
|
||||
Uses FFmpeg directly so it works with MKV, MOV, AVI, MP4, etc.
|
||||
"""
|
||||
file_path = Path(path)
|
||||
if not file_path.is_file():
|
||||
logger.warning(f"[waveform] File not found: {path}")
|
||||
raise HTTPException(status_code=404, detail=f"File not found: {path}")
|
||||
|
||||
# Cache key based on path + mtime so stale cache is auto-invalidated
|
||||
mtime = file_path.stat().st_mtime
|
||||
cache_key = hashlib.md5(f"{path}:{mtime}".encode()).hexdigest()
|
||||
|
||||
if cache_key in _waveform_cache:
|
||||
cached = Path(_waveform_cache[cache_key])
|
||||
if cached.exists():
|
||||
logger.info(f"[waveform] Cache hit for {file_path.name}")
|
||||
return FileResponse(str(cached), media_type="audio/wav")
|
||||
else:
|
||||
del _waveform_cache[cache_key]
|
||||
|
||||
logger.info(f"[waveform] Extracting audio from: {file_path.name}")
|
||||
tmp_dir = tempfile.mkdtemp(prefix="talkedit_waveform_")
|
||||
out_wav = Path(tmp_dir) / f"{cache_key}.wav"
|
||||
|
||||
# Downsample to mono 22050 Hz — enough for waveform drawing, small file
|
||||
cmd = [
|
||||
"ffmpeg", "-y",
|
||||
"-i", str(file_path),
|
||||
"-vn", # drop video
|
||||
"-ac", "1", # mono
|
||||
"-ar", "22050", # 22 kHz sample rate
|
||||
"-acodec", "pcm_s16le", # 16-bit PCM WAV
|
||||
str(out_wav),
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
if result.returncode != 0:
|
||||
logger.error(f"[waveform] FFmpeg failed for {file_path.name}: {result.stderr[-500:]}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to extract audio: {result.stderr[-300:]}"
|
||||
)
|
||||
|
||||
if not out_wav.exists() or out_wav.stat().st_size == 0:
|
||||
logger.error(f"[waveform] FFmpeg produced empty WAV for {file_path.name}")
|
||||
raise HTTPException(status_code=500, detail="Audio extraction produced empty file")
|
||||
|
||||
logger.info(f"[waveform] Extracted {out_wav.stat().st_size} bytes for {file_path.name}")
|
||||
_waveform_cache[cache_key] = str(out_wav)
|
||||
return FileResponse(str(out_wav), media_type="audio/wav")
|
||||
|
||||
@ -15,8 +15,13 @@ _temp_audio_files = []
|
||||
|
||||
def extract_audio(video_path: Path):
|
||||
"""Extract audio from a video file into a temp directory for automatic cleanup."""
|
||||
logger.info(f"[extract_audio] Extracting audio from: {video_path}")
|
||||
try:
|
||||
audio = AudioFileClip(str(video_path))
|
||||
if audio.duration is None or audio.duration == 0:
|
||||
logger.error(f"[extract_audio] File has no audio track or zero duration: {video_path}")
|
||||
raise RuntimeError(f"File has no audio track: {video_path}")
|
||||
logger.info(f"[extract_audio] Duration: {audio.duration:.2f}s, fps: {audio.fps}")
|
||||
temp_dir = tempfile.mkdtemp(prefix="videotranscriber_")
|
||||
audio_path = Path(temp_dir) / f"{video_path.stem}_audio.wav"
|
||||
try:
|
||||
@ -25,9 +30,16 @@ def extract_audio(video_path: Path):
|
||||
# moviepy 1.x uses verbose parameter; moviepy 2.x removed it
|
||||
audio.write_audiofile(str(audio_path), verbose=False, logger=None)
|
||||
audio.close()
|
||||
if not audio_path.exists() or audio_path.stat().st_size == 0:
|
||||
logger.error(f"[extract_audio] Output WAV is empty or missing: {audio_path}")
|
||||
raise RuntimeError(f"Audio extraction produced empty file: {audio_path}")
|
||||
logger.info(f"[extract_audio] Extracted to: {audio_path} ({audio_path.stat().st_size} bytes)")
|
||||
_temp_audio_files.append(str(audio_path))
|
||||
return audio_path
|
||||
except RuntimeError:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"[extract_audio] Failed for '{video_path}': {e}", exc_info=True)
|
||||
raise RuntimeError(f"Audio extraction failed: {e}")
|
||||
|
||||
|
||||
@ -54,6 +66,9 @@ def get_video_duration(video_path: Path):
|
||||
clip = AudioFileClip(str(video_path))
|
||||
duration = clip.duration
|
||||
clip.close()
|
||||
if duration is None or duration == 0:
|
||||
logger.warning(f"[get_video_duration] Zero or null duration for: {video_path}")
|
||||
return duration
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
logger.error(f"[get_video_duration] Failed for '{video_path}': {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
Reference in New Issue
Block a user