diff --git a/.gitignore b/.gitignore index 9f38f14..1f89a79 100644 --- a/.gitignore +++ b/.gitignore @@ -6,10 +6,15 @@ frontend/dist/ # Python venv/ +.venv312/ __pycache__/ *.pyc *.pyo *.egg-info/ +.pytest_cache/ +.mypy_cache/ +.coverage +htmlcov/ # IDE / Editor .vscode/ @@ -18,16 +23,33 @@ __pycache__/ # OS files .env +.env.local +.env.*.local .DS_Store Thumbs.db +*.swp +*.tmp # Logs *.log +logs/ -# Lock files (root only — frontend lock is committed) -/package-lock.json - -# Electron build output +# Build output +frontend/dist/ dist/ build/ *.asar +target/ +src-tauri/target/ + +# Node.js +node_modules/ +npm-debug.log* +yarn-debug.log* +yarn-error.log* + +# Rust +Cargo.lock + +# Lock files (root only — frontend lock is committed) +/package-lock.json diff --git a/backend/main.py b/backend/main.py index 7954733..c995cad 100644 --- a/backend/main.py +++ b/backend/main.py @@ -61,18 +61,37 @@ async def serve_local_file(request: Request, path: str = Query(...)): """Stream a local file with HTTP Range support (required for video seeking).""" file_path = Path(path) if not file_path.is_file(): + logger.warning(f"[serve_file] File not found: {path}") raise HTTPException(status_code=404, detail=f"File not found: {path}") file_size = file_path.stat().st_size content_type = MIME_MAP.get(file_path.suffix.lower(), "application/octet-stream") - range_header = request.headers.get("range") + + logger.info( + f"[serve_file] {file_path.name} | size={file_size} | " + f"type={content_type} | range={range_header or 'none'}" + ) + + if content_type == "application/octet-stream": + logger.warning( + f"[serve_file] Unknown MIME type for extension '{file_path.suffix}' — " + f"browser may fail to decode audio/video for '{file_path.name}'" + ) + + if file_size == 0: + logger.error(f"[serve_file] File is empty: {path}") + raise HTTPException(status_code=422, detail=f"File is empty: {path}") if range_header: - range_spec = range_header.replace("bytes=", "") - range_start_str, range_end_str = range_spec.split("-") - range_start = int(range_start_str) if range_start_str else 0 - range_end = int(range_end_str) if range_end_str else file_size - 1 - range_end = min(range_end, file_size - 1) + try: + range_spec = range_header.replace("bytes=", "") + range_start_str, range_end_str = range_spec.split("-") + range_start = int(range_start_str) if range_start_str else 0 + range_end = int(range_end_str) if range_end_str else file_size - 1 + range_end = min(range_end, file_size - 1) + except (ValueError, TypeError) as e: + logger.error(f"[serve_file] Malformed Range header '{range_header}': {e}") + raise HTTPException(status_code=416, detail=f"Invalid Range header: {range_header}") content_length = range_end - range_start + 1 def iter_range(): diff --git a/backend/requirements.txt b/backend/requirements.txt index b31aa6d..5908a95 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,33 +1,164 @@ -# FastAPI backend -fastapi>=0.115.0 -uvicorn[standard]>=0.32.0 -websockets>=14.0 -python-multipart>=0.0.12 - -# Transcription (WhisperX for word-level alignment) -whisperx>=3.1.0 -faster-whisper>=1.0.0 - -# Audio / Video processing -moviepy>=1.0.3 -ffmpeg-python>=0.2.0 -soundfile>=0.10.3 - -# ML / GPU -torch>=2.0.0 -torchaudio>=2.0.0 -numpy>=1.24.0 - -# Speaker diarization -pyannote.audio>=3.1.1 - -# AI providers -openai>=1.50.0 -anthropic>=0.39.0 -requests>=2.28.0 - -# Audio cleanup -deepfilternet>=0.5.0 - -# Utilities -pydantic>=2.0.0 +aiohappyeyeballs==2.6.1 +aiohttp==3.13.4 +aiosignal==1.4.0 +alembic==1.18.4 +annotated-doc==0.0.4 +annotated-types==0.7.0 +anthropic==0.86.0 +antlr4-python3-runtime==4.9.3 +anyio==4.13.0 +appdirs==1.4.4 +asteroid-filterbanks==0.4.0 +attrs==26.1.0 +av==17.0.0 +certifi==2026.2.25 +cffi==2.0.0 +charset-normalizer==3.4.6 +click==8.3.1 +colorlog==6.10.1 +contourpy==1.3.3 +ctranslate2==4.7.1 +cuda-bindings==12.9.4 +cuda-pathfinder==1.2.2 +cuda-toolkit==12.6.3 +cycler==0.12.1 +Cython==0.29.37 +decorator==5.2.1 +DeepFilterLib==0.5.6 +DeepFilterNet==0.5.6 +distro==1.9.0 +docstring_parser==0.17.0 +einops==0.8.2 +fastapi==0.135.2 +faster-whisper==1.2.1 +ffmpeg-python==0.2.0 +filelock==3.25.2 +flatbuffers==25.12.19 +fonttools==4.62.1 +frozenlist==1.8.0 +fsspec==2026.2.0 +future==1.0.0 +googleapis-common-protos==1.73.1 +greenlet==3.3.2 +grpcio==1.78.0 +h11==0.16.0 +hf-xet==1.4.2 +httpcore==1.0.9 +httptools==0.7.1 +httpx==0.28.1 +huggingface_hub==0.36.2 +HyperPyYAML==1.2.3 +idna==3.11 +ImageIO==2.37.3 +imageio-ffmpeg==0.6.0 +importlib_metadata==8.7.1 +Jinja2==3.1.6 +jiter==0.13.0 +joblib==1.5.3 +julius==0.2.7 +kiwisolver==1.5.0 +lightning==2.6.1 +lightning-utilities==0.15.3 +loguru==0.7.3 +Mako==1.3.10 +markdown-it-py==4.0.0 +MarkupSafe==3.0.3 +matplotlib==3.10.8 +maturin==1.12.6 +mdurl==0.1.2 +moviepy==2.2.1 +mpmath==1.3.0 +multidict==6.7.1 +networkx==3.6.1 +nltk==3.9.4 +numpy==2.4.3 +nvidia-cublas-cu12==12.8.4.1 +nvidia-cuda-cupti-cu12==12.8.90 +nvidia-cuda-nvrtc-cu12==12.8.93 +nvidia-cuda-runtime-cu12==12.8.90 +nvidia-cudnn-cu12==9.10.2.21 +nvidia-cufft-cu12==11.3.3.83 +nvidia-cufile-cu12==1.13.1.3 +nvidia-curand-cu12==10.3.9.90 +nvidia-cusolver-cu12==11.7.3.90 +nvidia-cusparse-cu12==12.5.8.93 +nvidia-cusparselt-cu12==0.7.1 +nvidia-nccl-cu12==2.27.3 +nvidia-nvjitlink-cu12==12.8.93 +nvidia-nvshmem-cu12==3.4.5 +nvidia-nvtx-cu12==12.8.90 +omegaconf==2.3.0 +onnxruntime==1.24.4 +openai==2.30.0 +opentelemetry-api==1.40.0 +opentelemetry-exporter-otlp==1.40.0 +opentelemetry-exporter-otlp-proto-common==1.40.0 +opentelemetry-exporter-otlp-proto-grpc==1.40.0 +opentelemetry-exporter-otlp-proto-http==1.40.0 +opentelemetry-proto==1.40.0 +opentelemetry-sdk==1.40.0 +opentelemetry-semantic-conventions==0.61b0 +optuna==4.8.0 +packaging==23.2 +pandas==3.0.1 +pillow==11.3.0 +primePy==1.3 +proglog==0.1.12 +propcache==0.4.1 +protobuf==6.33.6 +pyannote-audio==4.0.4 +pyannote-core==6.0.1 +pyannote-database==6.1.1 +pyannote-metrics==4.0.0 +pyannote-pipeline==4.0.0 +pyannoteai-sdk==0.4.0 +pycparser==3.0 +pydantic==2.12.5 +pydantic_core==2.41.5 +Pygments==2.19.2 +pyparsing==3.3.2 +python-dateutil==2.9.0.post0 +python-dotenv==1.2.2 +python-multipart==0.0.22 +pytorch-lightning==2.6.1 +pytorch-metric-learning==2.9.0 +PyYAML==6.0.3 +regex==2026.2.28 +requests==2.33.0 +rich==14.3.3 +ruamel.yaml==0.18.17 +ruamel.yaml.clib==0.2.15 +safetensors==0.7.0 +scikit-learn==1.8.0 +scipy==1.17.1 +setuptools==70.2.0 +shellingham==1.5.4 +six==1.17.0 +sniffio==1.3.1 +sortedcontainers==2.4.0 +soundfile==0.13.1 +SQLAlchemy==2.0.48 +starlette==1.0.0 +sympy==1.14.0 +threadpoolctl==3.6.0 +tokenizers==0.22.2 +torch==2.8.0 +torch-audiomentations==0.12.0 +torch_pitch_shift==1.2.5 +torchaudio==2.8.0 +torchmetrics==1.9.0 +tqdm==4.67.3 +transformers==4.57.6 +triton==3.4.0 +typer==0.24.1 +typing-inspection==0.4.2 +typing_extensions==4.15.0 +urllib3==2.6.3 +uvicorn==0.42.0 +uvloop==0.22.1 +watchfiles==1.1.1 +websockets==16.0 +wheel==0.46.3 +whisperx==3.8.4 +yarl==1.23.0 +zipp==3.23.0 diff --git a/backend/routers/audio.py b/backend/routers/audio.py index 5e42f94..381552f 100644 --- a/backend/routers/audio.py +++ b/backend/routers/audio.py @@ -1,9 +1,14 @@ """Audio processing endpoint (noise reduction / Studio Sound).""" +import hashlib import logging +import subprocess +import tempfile +from pathlib import Path from typing import Optional -from fastapi import APIRouter, HTTPException +from fastapi import APIRouter, HTTPException, Query +from fastapi.responses import FileResponse from pydantic import BaseModel from services.audio_cleaner import clean_audio, is_deepfilter_available @@ -11,6 +16,9 @@ from services.audio_cleaner import clean_audio, is_deepfilter_available logger = logging.getLogger(__name__) router = APIRouter() +# Simple in-process cache: video path → extracted WAV path +_waveform_cache: dict[str, str] = {} + class AudioCleanRequest(BaseModel): input_path: str @@ -36,3 +44,58 @@ async def audio_capabilities(): return { "deepfilternet_available": is_deepfilter_available(), } + + +@router.get("/audio/waveform") +async def get_waveform_audio(path: str = Query(...)): + """ + Extract audio from any video/audio file and return it as a WAV. + The WAV is cached on disk for subsequent requests. + Uses FFmpeg directly so it works with MKV, MOV, AVI, MP4, etc. + """ + file_path = Path(path) + if not file_path.is_file(): + logger.warning(f"[waveform] File not found: {path}") + raise HTTPException(status_code=404, detail=f"File not found: {path}") + + # Cache key based on path + mtime so stale cache is auto-invalidated + mtime = file_path.stat().st_mtime + cache_key = hashlib.md5(f"{path}:{mtime}".encode()).hexdigest() + + if cache_key in _waveform_cache: + cached = Path(_waveform_cache[cache_key]) + if cached.exists(): + logger.info(f"[waveform] Cache hit for {file_path.name}") + return FileResponse(str(cached), media_type="audio/wav") + else: + del _waveform_cache[cache_key] + + logger.info(f"[waveform] Extracting audio from: {file_path.name}") + tmp_dir = tempfile.mkdtemp(prefix="talkedit_waveform_") + out_wav = Path(tmp_dir) / f"{cache_key}.wav" + + # Downsample to mono 22050 Hz — enough for waveform drawing, small file + cmd = [ + "ffmpeg", "-y", + "-i", str(file_path), + "-vn", # drop video + "-ac", "1", # mono + "-ar", "22050", # 22 kHz sample rate + "-acodec", "pcm_s16le", # 16-bit PCM WAV + str(out_wav), + ] + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + logger.error(f"[waveform] FFmpeg failed for {file_path.name}: {result.stderr[-500:]}") + raise HTTPException( + status_code=500, + detail=f"Failed to extract audio: {result.stderr[-300:]}" + ) + + if not out_wav.exists() or out_wav.stat().st_size == 0: + logger.error(f"[waveform] FFmpeg produced empty WAV for {file_path.name}") + raise HTTPException(status_code=500, detail="Audio extraction produced empty file") + + logger.info(f"[waveform] Extracted {out_wav.stat().st_size} bytes for {file_path.name}") + _waveform_cache[cache_key] = str(out_wav) + return FileResponse(str(out_wav), media_type="audio/wav") diff --git a/backend/utils/audio_processing.py b/backend/utils/audio_processing.py index 7da6527..ae54242 100644 --- a/backend/utils/audio_processing.py +++ b/backend/utils/audio_processing.py @@ -15,8 +15,13 @@ _temp_audio_files = [] def extract_audio(video_path: Path): """Extract audio from a video file into a temp directory for automatic cleanup.""" + logger.info(f"[extract_audio] Extracting audio from: {video_path}") try: audio = AudioFileClip(str(video_path)) + if audio.duration is None or audio.duration == 0: + logger.error(f"[extract_audio] File has no audio track or zero duration: {video_path}") + raise RuntimeError(f"File has no audio track: {video_path}") + logger.info(f"[extract_audio] Duration: {audio.duration:.2f}s, fps: {audio.fps}") temp_dir = tempfile.mkdtemp(prefix="videotranscriber_") audio_path = Path(temp_dir) / f"{video_path.stem}_audio.wav" try: @@ -25,9 +30,16 @@ def extract_audio(video_path: Path): # moviepy 1.x uses verbose parameter; moviepy 2.x removed it audio.write_audiofile(str(audio_path), verbose=False, logger=None) audio.close() + if not audio_path.exists() or audio_path.stat().st_size == 0: + logger.error(f"[extract_audio] Output WAV is empty or missing: {audio_path}") + raise RuntimeError(f"Audio extraction produced empty file: {audio_path}") + logger.info(f"[extract_audio] Extracted to: {audio_path} ({audio_path.stat().st_size} bytes)") _temp_audio_files.append(str(audio_path)) return audio_path + except RuntimeError: + raise except Exception as e: + logger.error(f"[extract_audio] Failed for '{video_path}': {e}", exc_info=True) raise RuntimeError(f"Audio extraction failed: {e}") @@ -54,6 +66,9 @@ def get_video_duration(video_path: Path): clip = AudioFileClip(str(video_path)) duration = clip.duration clip.close() + if duration is None or duration == 0: + logger.warning(f"[get_video_duration] Zero or null duration for: {video_path}") return duration - except Exception: + except Exception as e: + logger.error(f"[get_video_duration] Failed for '{video_path}': {e}", exc_info=True) return None diff --git a/frontend/src/components/WaveformTimeline.tsx b/frontend/src/components/WaveformTimeline.tsx index 080a481..6572302 100644 --- a/frontend/src/components/WaveformTimeline.tsx +++ b/frontend/src/components/WaveformTimeline.tsx @@ -10,6 +10,7 @@ export default function WaveformTimeline() { const videoUrl = useEditorStore((s) => s.videoUrl); const videoPath = useEditorStore((s) => s.videoPath); + const backendUrl = useEditorStore((s) => s.backendUrl); const duration = useEditorStore((s) => s.duration); const deletedRanges = useEditorStore((s) => s.deletedRanges); const setCurrentTime = useEditorStore((s) => s.setCurrentTime); @@ -25,18 +26,62 @@ export default function WaveformTimeline() { const loadAudio = async () => { try { + const waveformUrl = `${backendUrl}/audio/waveform?path=${encodeURIComponent(videoPath!)}`; + console.log('[WaveformTimeline] Loading audio from waveform endpoint:', waveformUrl); const ctx = new AudioContext(); audioContextRef.current = ctx; - const response = await fetch(videoUrl); - if (!response.ok) throw new Error(`HTTP ${response.status}`); + const response = await fetch(waveformUrl); + if (!response.ok) { + const body = await response.text().catch(() => ''); + console.error( + `[WaveformTimeline] Fetch failed — HTTP ${response.status} ${response.statusText}`, + { url: waveformUrl, body } + ); + throw new Error(`HTTP ${response.status}: ${response.statusText}`); + } + + const contentType = response.headers.get('content-type') ?? 'unknown'; + const contentLength = response.headers.get('content-length'); + console.log( + `[WaveformTimeline] Fetch OK — content-type: ${contentType}, size: ${contentLength ?? 'unknown'} bytes` + ); + const arrayBuffer = await response.arrayBuffer(); - const audioBuffer = await ctx.decodeAudioData(arrayBuffer); + console.log(`[WaveformTimeline] ArrayBuffer size: ${arrayBuffer.byteLength} bytes`); + + if (arrayBuffer.byteLength === 0) { + throw new Error('Server returned an empty file'); + } + + let audioBuffer: AudioBuffer; + try { + audioBuffer = await ctx.decodeAudioData(arrayBuffer); + } catch (decodeErr) { + console.error( + '[WaveformTimeline] decodeAudioData failed — browser cannot decode this format.', + { + contentType, + byteLength: arrayBuffer.byteLength, + videoPath, + error: decodeErr, + } + ); + throw new Error( + `Browser could not decode audio (${contentType}). ` + + `For best compatibility use MP4/AAC or WebM/Opus. Raw error: ${decodeErr}` + ); + } + + console.log( + `[WaveformTimeline] Decoded OK — duration: ${audioBuffer.duration.toFixed(2)}s, ` + + `channels: ${audioBuffer.numberOfChannels}, sampleRate: ${audioBuffer.sampleRate}Hz` + ); audioBufferRef.current = audioBuffer; drawStaticWaveform(); } catch (err) { - console.warn('Could not decode audio for waveform:', err); - setAudioError('Waveform unavailable — audio could not be decoded'); + console.error('[WaveformTimeline] Waveform load failed:', err); + setAudioError(`Waveform unavailable — ${err instanceof Error ? err.message : 'audio could not be decoded'}`); } }; @@ -45,7 +90,7 @@ export default function WaveformTimeline() { return () => { audioContextRef.current?.close(); }; - }, [videoUrl, videoPath]); + }, [videoUrl, videoPath, backendUrl]); const drawStaticWaveform = useCallback(() => { const canvas = waveCanvasRef.current; diff --git a/open b/open index 48a801a..b6b9cab 100755 --- a/open +++ b/open @@ -1,4 +1,50 @@ #!/bin/bash # Open TalkEdit app (Tauri dev mode) cd "$(dirname "$0")" +PROJECT_DIR="$PWD" + +BACKEND_PORT=8000 +BACKEND_URL="http://127.0.0.1:${BACKEND_PORT}/health" + +# Check if backend is already running +if curl -sf "$BACKEND_URL" > /dev/null 2>&1; then + echo "Backend already running on port ${BACKEND_PORT}." +else + echo "Backend not running — starting it in a new terminal..." + VENV_PYTHON="${PROJECT_DIR}/.venv312/bin/python" + BACKEND_DIR="${PROJECT_DIR}/backend" + + # Try common terminal emulators in order + if command -v ghostty &>/dev/null; then + ghostty -e bash -c "cd '${BACKEND_DIR}' && '${VENV_PYTHON}' -m uvicorn main:app --host 127.0.0.1 --port ${BACKEND_PORT}; exec bash" & + elif command -v kitty &>/dev/null; then + kitty --title "TalkEdit Backend" -- bash -c "cd '${BACKEND_DIR}' && '${VENV_PYTHON}' -m uvicorn main:app --host 127.0.0.1 --port ${BACKEND_PORT}; exec bash" & + elif command -v alacritty &>/dev/null; then + alacritty --title "TalkEdit Backend" -e bash -c "cd '${BACKEND_DIR}' && '${VENV_PYTHON}' -m uvicorn main:app --host 127.0.0.1 --port ${BACKEND_PORT}; exec bash" & + elif command -v konsole &>/dev/null; then + konsole --new-tab -e bash -c "cd '${BACKEND_DIR}' && '${VENV_PYTHON}' -m uvicorn main:app --host 127.0.0.1 --port ${BACKEND_PORT}; exec bash" & + elif command -v gnome-terminal &>/dev/null; then + gnome-terminal --title "TalkEdit Backend" -- bash -c "cd '${BACKEND_DIR}' && '${VENV_PYTHON}' -m uvicorn main:app --host 127.0.0.1 --port ${BACKEND_PORT}; exec bash" & + elif command -v xterm &>/dev/null; then + xterm -T "TalkEdit Backend" -e bash -c "cd '${BACKEND_DIR}' && '${VENV_PYTHON}' -m uvicorn main:app --host 127.0.0.1 --port ${BACKEND_PORT}; exec bash" & + else + echo "No supported terminal emulator found. Starting backend in background..." + cd "${BACKEND_DIR}" && "${VENV_PYTHON}" -m uvicorn main:app --host 127.0.0.1 --port ${BACKEND_PORT} & + fi + + # Wait up to 15s for backend to become ready + echo -n "Waiting for backend" + for i in $(seq 1 15); do + sleep 1 + echo -n "." + if curl -sf "$BACKEND_URL" > /dev/null 2>&1; then + echo " ready!" + break + fi + if [[ $i -eq 15 ]]; then + echo " timed out. Check the backend terminal for errors." + fi + done +fi + npx tauri dev diff --git a/package.json b/package.json index 38342d7..84b421e 100644 --- a/package.json +++ b/package.json @@ -23,8 +23,8 @@ "python-shell": "^5.0.0" }, "build": { - "appId": "com.dataants.cutscript", - "productName": "CutScript", + "appId": "com.talkedit.app", + "productName": "TalkEdit", "files": [ "electron/**/*", "frontend/dist/**/*", diff --git a/src-tauri/src/paths.rs b/src-tauri/src/paths.rs index 1f518c2..e47f73e 100644 --- a/src-tauri/src/paths.rs +++ b/src-tauri/src/paths.rs @@ -1,22 +1,60 @@ use std::path::PathBuf; -/// Resolve the project root from the executable path. -/// In dev mode, the binary lives at: /src-tauri/target/debug/ -/// So the project root is 4 levels above the binary. +/// Resolve the project root at runtime. +/// +/// Dev layout: /src-tauri/target/debug/ → walk up 4 levels +/// Packaged: use TAURI_RESOURCE_DIR env var set by the Tauri runtime, +/// falling back to a sibling `resources/` directory next to the exe. pub fn project_root() -> PathBuf { + // Tauri sets this env var when running packaged; prefer it. + if let Ok(res) = std::env::var("TAURI_RESOURCE_DIR") { + return PathBuf::from(res); + } let exe = std::env::current_exe().expect("Failed to get executable path"); - // exe -> debug/ -> target/ -> src-tauri/ -> root + // Dev: exe is at /src-tauri/target/debug/, walk up 4 levels. + if let Some(root) = exe + .parent() // debug/ + .and_then(|p| p.parent()) // target/ + .and_then(|p| p.parent()) // src-tauri/ + .and_then(|p| p.parent()) // project root + { + if root.join("backend").exists() { + return root.to_path_buf(); + } + } + // Packaged fallback: resources/ lives next to the exe. exe.parent() - .and_then(|p| p.parent()) - .and_then(|p| p.parent()) - .and_then(|p| p.parent()) - .map(|p| p.to_path_buf()) - .unwrap_or_else(|| PathBuf::from(".")) + .map(|p| p.join("resources")) + .unwrap_or_else(|| PathBuf::from("resources")) } -/// Absolute path to the venv Python 3.10 interpreter. +/// Absolute path to the bundled Python interpreter. +/// Tries .venv312 first (new), falls back to .venv (legacy). pub fn python_exe() -> PathBuf { - project_root().join(".venv/bin/python3.10") + let root = project_root(); + // Packaged layout: resources/python/bin/python3 + let bundled = root.join("python").join("bin").join("python3"); + if bundled.exists() { + return bundled; + } + // Dev: prefer .venv312 (Python 3.12), fall back to .venv + let venv312 = root.join(".venv312").join("bin").join("python3.12"); + if venv312.exists() { + return venv312; + } + root.join(".venv").join("bin").join("python3") +} + +/// Absolute path to the bundled ffmpeg binary. +/// Uses a sidecar in resources/bin/ when packaged, otherwise expects it on PATH. +pub fn ffmpeg_exe() -> PathBuf { + let root = project_root(); + let bundled = root.join("bin").join("ffmpeg"); + if bundled.exists() { + return bundled; + } + // Fallback to system ffmpeg during development + PathBuf::from("ffmpeg") } /// Absolute path to a script in the backend directory.