diff --git a/.gitignore b/.gitignore index 8f4d854..ff7950b 100644 --- a/.gitignore +++ b/.gitignore @@ -37,6 +37,7 @@ Thumbs.db *.log logs/ cache/ +*.aive # Build output frontend/dist/ diff --git a/FEATURES.md b/FEATURES.md index 667748d..ed49a76 100644 --- a/FEATURES.md +++ b/FEATURES.md @@ -36,6 +36,8 @@ Features are grouped by priority. Check off items as they are implemented. - [ ] **Re-transcribe selection** — if Whisper gets a section wrong, let the user select a word range and re-run transcription on just that segment (optionally with a different model or language). +- [ ] **Optional VibeVoice-ASR-HF transcription backend (future)** — evaluate as an alternate transcription mode for long-form, speaker-attributed transcripts. Keep WhisperX as the default for word-level timestamp editing. + - [ ] **Word text correction** — allow editing the transcript text of a word without affecting its timing. Whisper gets homophones/proper nouns wrong constantly. Pure frontend state change; no backend needed. - [ ] **Named timeline markers** — drop named marker pins on the waveform (like Resolve markers). Store as `{ id, time, label, color }` in the project. Rendered as colored triangles on the timeline canvas. diff --git a/backend/routers/audio.py b/backend/routers/audio.py index 423ac0d..509e91a 100644 --- a/backend/routers/audio.py +++ b/backend/routers/audio.py @@ -7,7 +7,7 @@ import tempfile from pathlib import Path from typing import Optional -from fastapi import APIRouter, HTTPException, Query +from fastapi import APIRouter, HTTPException, Query, Request from fastapi.responses import FileResponse from pydantic import BaseModel @@ -71,30 +71,54 @@ async def detect_silence_endpoint(req: SilenceDetectRequest): @router.get("/audio/waveform") -async def get_waveform_audio(path: str = Query(...)): +async def get_waveform_audio(request: Request, path: str = Query(...)): """ Extract audio from any video/audio file and return it as a WAV. The WAV is cached on disk for subsequent requests. Uses FFmpeg directly so it works with MKV, MOV, AVI, MP4, etc. """ + req_id = hashlib.md5(f"{path}:{request.url}".encode()).hexdigest()[:10] file_path = Path(path) + logger.info( + "[waveform:%s] request raw_url=%s raw_query=%s decoded_path=%r path_len=%s", + req_id, + str(request.url), + request.url.query, + path, + len(path), + ) + + try: + resolved_path = file_path.expanduser().resolve(strict=False) + except Exception: + resolved_path = file_path + + logger.info( + "[waveform:%s] normalized path=%s exists=%s is_file=%s", + req_id, + resolved_path, + file_path.exists(), + file_path.is_file(), + ) + if not file_path.is_file(): - logger.warning(f"[waveform] File not found: {path}") + logger.warning("[waveform:%s] file_not_found path=%r", req_id, path) raise HTTPException(status_code=404, detail=f"File not found: {path}") # Cache key based on path + mtime so stale cache is auto-invalidated mtime = file_path.stat().st_mtime cache_key = hashlib.md5(f"{path}:{mtime}".encode()).hexdigest() + logger.info("[waveform:%s] cache_key=%s mtime=%s", req_id, cache_key, mtime) if cache_key in _waveform_cache: cached = Path(_waveform_cache[cache_key]) if cached.exists(): - logger.info(f"[waveform] Cache hit for {file_path.name}") + logger.info("[waveform:%s] cache_hit cached=%s", req_id, cached) return FileResponse(str(cached), media_type="audio/wav") else: del _waveform_cache[cache_key] - logger.info(f"[waveform] Extracting audio from: {file_path.name}") + logger.info("[waveform:%s] cache_miss extracting file=%s", req_id, file_path) tmp_dir = tempfile.mkdtemp(prefix="talkedit_waveform_") out_wav = Path(tmp_dir) / f"{cache_key}.wav" @@ -108,18 +132,35 @@ async def get_waveform_audio(path: str = Query(...)): "-acodec", "pcm_s16le", # 16-bit PCM WAV str(out_wav), ] + logger.info("[waveform:%s] ffmpeg_cmd=%s", req_id, " ".join(cmd)) result = subprocess.run(cmd, capture_output=True, text=True) if result.returncode != 0: - logger.error(f"[waveform] FFmpeg failed for {file_path.name}: {result.stderr[-500:]}") + logger.error( + "[waveform:%s] ffmpeg_failed returncode=%s stderr_tail=%s", + req_id, + result.returncode, + result.stderr[-2000:], + ) raise HTTPException( status_code=500, detail=f"Failed to extract audio: {result.stderr[-300:]}" ) if not out_wav.exists() or out_wav.stat().st_size == 0: - logger.error(f"[waveform] FFmpeg produced empty WAV for {file_path.name}") + logger.error( + "[waveform:%s] empty_output out_wav=%s exists=%s size=%s", + req_id, + out_wav, + out_wav.exists(), + out_wav.stat().st_size if out_wav.exists() else -1, + ) raise HTTPException(status_code=500, detail="Audio extraction produced empty file") - logger.info(f"[waveform] Extracted {out_wav.stat().st_size} bytes for {file_path.name}") + logger.info( + "[waveform:%s] extracted_bytes=%s out_wav=%s", + req_id, + out_wav.stat().st_size, + out_wav, + ) _waveform_cache[cache_key] = str(out_wav) return FileResponse(str(out_wav), media_type="audio/wav") diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 76feaf8..3d1cfe3 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -23,6 +23,7 @@ import { } from 'lucide-react'; const IS_ELECTRON = !!window.electronAPI; +const LAST_MEDIA_PATH_KEY = 'talkedit:lastMediaPath'; type Panel = 'ai' | 'settings' | 'export' | 'silence' | null; @@ -74,6 +75,23 @@ export default function App() { // The backend URL is fixed at 127.0.0.1:8000 so we rely on the store default. }, [setBackendUrl]); + useEffect(() => { + if (!IS_ELECTRON || videoPath) return; + const savedPath = sessionStorage.getItem(LAST_MEDIA_PATH_KEY); + if (savedPath) { + loadVideo(savedPath); + } + }, [videoPath, loadVideo]); + + useEffect(() => { + if (!IS_ELECTRON) return; + if (videoPath) { + sessionStorage.setItem(LAST_MEDIA_PATH_KEY, videoPath); + return; + } + sessionStorage.removeItem(LAST_MEDIA_PATH_KEY); + }, [videoPath]); + const handleLoadProject = async () => { if (!IS_ELECTRON) return; try { diff --git a/frontend/src/components/WaveformTimeline.tsx b/frontend/src/components/WaveformTimeline.tsx index d32759d..7509f53 100644 --- a/frontend/src/components/WaveformTimeline.tsx +++ b/frontend/src/components/WaveformTimeline.tsx @@ -67,19 +67,27 @@ export default function WaveformTimeline({ cutMode, muteMode }: { cutMode: boole setAudioError(null); const loadAudio = async () => { + const requestId = `${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`; try { const waveformUrl = `${backendUrl}/audio/waveform?path=${encodeURIComponent(videoPath!)}`; - console.log('[WaveformTimeline] backendUrl:', backendUrl, '| videoPath:', videoPath); - console.log('[WaveformTimeline] Fetching:', waveformUrl); + console.log('[WaveformTimeline] req=', requestId, 'backendUrl=', backendUrl, 'videoPath=', videoPath); + console.log('[WaveformTimeline] req=', requestId, 'fetching=', waveformUrl); const ctx = new AudioContext(); audioContextRef.current = ctx; + const startedAt = performance.now(); const response = await fetch(waveformUrl); + const elapsedMs = Math.round(performance.now() - startedAt); if (!response.ok) { const body = await response.text().catch(() => ''); console.error( - `[WaveformTimeline] Fetch failed — HTTP ${response.status} ${response.statusText}`, - { url: waveformUrl, body } + `[WaveformTimeline] req=${requestId} fetch failed — HTTP ${response.status} ${response.statusText}`, + { + url: waveformUrl, + decodedPath: videoPath, + elapsedMs, + body, + } ); throw new Error(`HTTP ${response.status}: ${response.statusText}`); } @@ -87,11 +95,11 @@ export default function WaveformTimeline({ cutMode, muteMode }: { cutMode: boole const contentType = response.headers.get('content-type') ?? 'unknown'; const contentLength = response.headers.get('content-length'); console.log( - `[WaveformTimeline] Fetch OK — content-type: ${contentType}, size: ${contentLength ?? 'unknown'} bytes` + `[WaveformTimeline] req=${requestId} fetch ok — content-type: ${contentType}, size: ${contentLength ?? 'unknown'} bytes, elapsed: ${elapsedMs}ms` ); const arrayBuffer = await response.arrayBuffer(); - console.log(`[WaveformTimeline] ArrayBuffer size: ${arrayBuffer.byteLength} bytes`); + console.log(`[WaveformTimeline] req=${requestId} arrayBuffer size: ${arrayBuffer.byteLength} bytes`); if (arrayBuffer.byteLength === 0) { throw new Error('Server returned an empty file'); @@ -104,6 +112,7 @@ export default function WaveformTimeline({ cutMode, muteMode }: { cutMode: boole console.error( '[WaveformTimeline] decodeAudioData failed — browser cannot decode this format.', { + requestId, contentType, byteLength: arrayBuffer.byteLength, videoPath, @@ -117,13 +126,19 @@ export default function WaveformTimeline({ cutMode, muteMode }: { cutMode: boole } console.log( - `[WaveformTimeline] Decoded OK — duration: ${audioBuffer.duration.toFixed(2)}s, ` + + `[WaveformTimeline] req=${requestId} decoded ok — duration: ${audioBuffer.duration.toFixed(2)}s, ` + `channels: ${audioBuffer.numberOfChannels}, sampleRate: ${audioBuffer.sampleRate}Hz` ); audioBufferRef.current = audioBuffer; drawStaticWaveform(); } catch (err) { - console.error('[WaveformTimeline] Waveform load failed:', err); + console.error('[WaveformTimeline] waveform load failed', { + requestId, + error: err, + videoPath, + backendUrl, + encodedPath: encodeURIComponent(videoPath ?? ''), + }); const waveformUrl2 = `${backendUrl}/audio/waveform?path=${encodeURIComponent(videoPath ?? '')}`; setAudioError(`Waveform unavailable — ${err instanceof Error ? err.message : 'audio could not be decoded'} [URL: ${waveformUrl2}]`); } diff --git a/open b/open index b6b9cab..ac5e248 100755 --- a/open +++ b/open @@ -11,7 +11,25 @@ if curl -sf "$BACKEND_URL" > /dev/null 2>&1; then echo "Backend already running on port ${BACKEND_PORT}." else echo "Backend not running — starting it in a new terminal..." - VENV_PYTHON="${PROJECT_DIR}/.venv312/bin/python" + VENV_PYTHON="" + PYTHON_CANDIDATES=( + "${PROJECT_DIR}/.venv312/bin/python3.12" + "${PROJECT_DIR}/.venv312/bin/python" + "${PROJECT_DIR}/.venv/bin/python3" + "${PROJECT_DIR}/.venv/bin/python" + "${PROJECT_DIR}/venv/bin/python3" + "${PROJECT_DIR}/venv/bin/python" + ) + for candidate in "${PYTHON_CANDIDATES[@]}"; do + if [[ -x "${candidate}" ]]; then + VENV_PYTHON="${candidate}" + break + fi + done + if [[ -z "${VENV_PYTHON}" ]]; then + echo "No project virtualenv Python found. Checked: .venv312, .venv, venv" + exit 1 + fi BACKEND_DIR="${PROJECT_DIR}/backend" # Try common terminal emulators in order diff --git a/package.json b/package.json index 84b421e..d376a1e 100644 --- a/package.json +++ b/package.json @@ -7,10 +7,10 @@ "scripts": { "tauri": "tauri", "dev": "cd frontend && npm run dev -- --host", - "dev:tauri": "cd backend && python -m uvicorn main:app --reload --port 8642 & cd frontend && cargo tauri dev", + "dev:tauri": "bash -lc 'set -e; PY=; for p in ./.venv312/bin/python3.12 ./.venv312/bin/python ./.venv/bin/python3 ./.venv/bin/python ./venv/bin/python3 ./venv/bin/python; do if [ -x \"$p\" ]; then PY=\"$PWD/${p#./}\"; break; fi; done; if [ -z \"$PY\" ]; then echo \"No project virtualenv Python found (checked .venv312, .venv, venv)\"; exit 1; fi; (cd backend && \"$PY\" -m uvicorn main:app --reload --port 8642) & cd frontend && cargo tauri dev'", "build:tauri": "cd frontend && cargo tauri build", "dev:frontend": "cd frontend && npm run dev", - "dev:backend": "cd backend && python -m uvicorn main:app --reload --port 8642", + "dev:backend": "bash -lc 'set -e; PY=; for p in ./.venv312/bin/python3.12 ./.venv312/bin/python ./.venv/bin/python3 ./.venv/bin/python ./venv/bin/python3 ./venv/bin/python; do if [ -x \"$p\" ]; then PY=\"$PWD/${p#./}\"; break; fi; done; if [ -z \"$PY\" ]; then echo \"No project virtualenv Python found (checked .venv312, .venv, venv)\"; exit 1; fi; cd backend && \"$PY\" -m uvicorn main:app --reload --port 8642'", "lint": "cd frontend && npm run lint" }, "devDependencies": { diff --git a/src-tauri/src/paths.rs b/src-tauri/src/paths.rs index fc3df6d..641eeee 100644 --- a/src-tauri/src/paths.rs +++ b/src-tauri/src/paths.rs @@ -29,7 +29,7 @@ pub fn project_root() -> PathBuf { } /// Absolute path to the bundled Python interpreter. -/// Tries .venv312 first (new), falls back to .venv (legacy). +/// Tries project virtualenvs in a fixed order so all runtime paths agree. pub fn python_exe() -> PathBuf { let root = project_root(); // Packaged layout: resources/python/bin/python3 @@ -37,12 +37,24 @@ pub fn python_exe() -> PathBuf { if bundled.exists() { return bundled; } - // Dev: prefer .venv312 (Python 3.12), fall back to .venv - let venv312 = root.join(".venv312").join("bin").join("python3.12"); - if venv312.exists() { - return venv312; + + let candidates = [ + root.join(".venv312").join("bin").join("python3.12"), + root.join(".venv312").join("bin").join("python"), + root.join(".venv").join("bin").join("python3"), + root.join(".venv").join("bin").join("python"), + root.join("venv").join("bin").join("python3"), + root.join("venv").join("bin").join("python"), + ]; + + for candidate in candidates { + if candidate.exists() { + return candidate; + } } - root.join(".venv").join("bin").join("python3") + + // Last-resort path if no environment is present. + root.join(".venv312").join("bin").join("python3.12") } /// Absolute path to a script in the backend directory. diff --git a/src-tauri/src/transcription.rs b/src-tauri/src/transcription.rs index 1a228cb..b60a18d 100644 --- a/src-tauri/src/transcription.rs +++ b/src-tauri/src/transcription.rs @@ -46,7 +46,6 @@ pub fn transcribe_audio( // Run Python script with timeout let output = Command::new(python_exe) .args(&args) - .env("PYTHONPATH", crate::paths::project_root().join(".venv312").join("lib").join("python3.12").join("site-packages")) .output() .map_err(|e| format!("Failed to run Python script: {}", e))?; diff --git a/test_api.py b/test_api.py index 0553c6f..806c675 100755 --- a/test_api.py +++ b/test_api.py @@ -1,4 +1,4 @@ -#!/home/dillon/_code/TalkEdit/.venv312/bin/python3.12 +#!/usr/bin/env python3 """ Test script for the TalkEdit API. This script tests the new Tauri commands that expose all backend functions.