trying to fix bug

2026-04-09 01:36:28 -06:00
parent f9cd2bf579
commit 1d17a8f19a
10 changed files with 133 additions and 27 deletions
--- a/.gitignore
+++ b/.gitignore
@ -37,6 +37,7 @@ Thumbs.db
 *.log
 logs/
 cache/
+*.aive

 # Build output
 frontend/dist/
--- a/FEATURES.md
+++ b/FEATURES.md
@ -36,6 +36,8 @@ Features are grouped by priority. Check off items as they are implemented.

 - [ ] **Re-transcribe selection** — if Whisper gets a section wrong, let the user select a word range and re-run transcription on just that segment (optionally with a different model or language).

+- [ ] **Optional VibeVoice-ASR-HF transcription backend (future)** — evaluate as an alternate transcription mode for long-form, speaker-attributed transcripts. Keep WhisperX as the default for word-level timestamp editing.
+
 - [ ] **Word text correction** — allow editing the transcript text of a word without affecting its timing. Whisper gets homophones/proper nouns wrong constantly. Pure frontend state change; no backend needed.

 - [ ] **Named timeline markers** — drop named marker pins on the waveform (like Resolve markers). Store as `{ id, time, label, color }` in the project. Rendered as colored triangles on the timeline canvas.
--- a/backend/routers/audio.py
+++ b/backend/routers/audio.py
@ -7,7 +7,7 @@ import tempfile
 from pathlib import Path
 from typing import Optional

-from fastapi import APIRouter, HTTPException, Query
+from fastapi import APIRouter, HTTPException, Query, Request
 from fastapi.responses import FileResponse
 from pydantic import BaseModel

@ -71,30 +71,54 @@ async def detect_silence_endpoint(req: SilenceDetectRequest):


@router.get("/audio/waveform")
-async def get_waveform_audio(path: str = Query(...)):
+async def get_waveform_audio(request: Request, path: str = Query(...)):
    """
    Extract audio from any video/audio file and return it as a WAV.
    The WAV is cached on disk for subsequent requests.
    Uses FFmpeg directly so it works with MKV, MOV, AVI, MP4, etc.
    """
+    req_id = hashlib.md5(f"{path}:{request.url}".encode()).hexdigest()[:10]
    file_path = Path(path)
+    logger.info(
+        "[waveform:%s] request raw_url=%s raw_query=%s decoded_path=%r path_len=%s",
+        req_id,
+        str(request.url),
+        request.url.query,
+        path,
+        len(path),
+    )
+
+    try:
+        resolved_path = file_path.expanduser().resolve(strict=False)
+    except Exception:
+        resolved_path = file_path
+
+    logger.info(
+        "[waveform:%s] normalized path=%s exists=%s is_file=%s",
+        req_id,
+        resolved_path,
+        file_path.exists(),
+        file_path.is_file(),
+    )
+
    if not file_path.is_file():
-        logger.warning(f"[waveform] File not found: {path}")
+        logger.warning("[waveform:%s] file_not_found path=%r", req_id, path)
        raise HTTPException(status_code=404, detail=f"File not found: {path}")

    # Cache key based on path + mtime so stale cache is auto-invalidated
    mtime = file_path.stat().st_mtime
    cache_key = hashlib.md5(f"{path}:{mtime}".encode()).hexdigest()
+    logger.info("[waveform:%s] cache_key=%s mtime=%s", req_id, cache_key, mtime)

    if cache_key in _waveform_cache:
        cached = Path(_waveform_cache[cache_key])
        if cached.exists():
-            logger.info(f"[waveform] Cache hit for {file_path.name}")
+            logger.info("[waveform:%s] cache_hit cached=%s", req_id, cached)
            return FileResponse(str(cached), media_type="audio/wav")
        else:
            del _waveform_cache[cache_key]

-    logger.info(f"[waveform] Extracting audio from: {file_path.name}")
+    logger.info("[waveform:%s] cache_miss extracting file=%s", req_id, file_path)
    tmp_dir = tempfile.mkdtemp(prefix="talkedit_waveform_")
    out_wav = Path(tmp_dir) / f"{cache_key}.wav"

@ -108,18 +132,35 @@ async def get_waveform_audio(path: str = Query(...)):
        "-acodec", "pcm_s16le",   # 16-bit PCM WAV
        str(out_wav),
    ]
+    logger.info("[waveform:%s] ffmpeg_cmd=%s", req_id, " ".join(cmd))
    result = subprocess.run(cmd, capture_output=True, text=True)
    if result.returncode != 0:
-        logger.error(f"[waveform] FFmpeg failed for {file_path.name}: {result.stderr[-500:]}")
+        logger.error(
+            "[waveform:%s] ffmpeg_failed returncode=%s stderr_tail=%s",
+            req_id,
+            result.returncode,
+            result.stderr[-2000:],
+        )
        raise HTTPException(
            status_code=500,
            detail=f"Failed to extract audio: {result.stderr[-300:]}"
        )

    if not out_wav.exists() or out_wav.stat().st_size == 0:
-        logger.error(f"[waveform] FFmpeg produced empty WAV for {file_path.name}")
+        logger.error(
+            "[waveform:%s] empty_output out_wav=%s exists=%s size=%s",
+            req_id,
+            out_wav,
+            out_wav.exists(),
+            out_wav.stat().st_size if out_wav.exists() else -1,
+        )
        raise HTTPException(status_code=500, detail="Audio extraction produced empty file")

-    logger.info(f"[waveform] Extracted {out_wav.stat().st_size} bytes for {file_path.name}")
+    logger.info(
+        "[waveform:%s] extracted_bytes=%s out_wav=%s",
+        req_id,
+        out_wav.stat().st_size,
+        out_wav,
+    )
    _waveform_cache[cache_key] = str(out_wav)
    return FileResponse(str(out_wav), media_type="audio/wav")
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@ -23,6 +23,7 @@ import {
 } from 'lucide-react';

 const IS_ELECTRON = !!window.electronAPI;
+const LAST_MEDIA_PATH_KEY = 'talkedit:lastMediaPath';

 type Panel = 'ai' | 'settings' | 'export' | 'silence' | null;

@ -74,6 +75,23 @@ export default function App() {
    // The backend URL is fixed at 127.0.0.1:8000 so we rely on the store default.
  }, [setBackendUrl]);

+  useEffect(() => {
+    if (!IS_ELECTRON || videoPath) return;
+    const savedPath = sessionStorage.getItem(LAST_MEDIA_PATH_KEY);
+    if (savedPath) {
+      loadVideo(savedPath);
+    }
+  }, [videoPath, loadVideo]);
+
+  useEffect(() => {
+    if (!IS_ELECTRON) return;
+    if (videoPath) {
+      sessionStorage.setItem(LAST_MEDIA_PATH_KEY, videoPath);
+      return;
+    }
+    sessionStorage.removeItem(LAST_MEDIA_PATH_KEY);
+  }, [videoPath]);
+
  const handleLoadProject = async () => {
    if (!IS_ELECTRON) return;
    try {
--- a/frontend/src/components/WaveformTimeline.tsx
+++ b/frontend/src/components/WaveformTimeline.tsx
@ -67,19 +67,27 @@ export default function WaveformTimeline({ cutMode, muteMode }: { cutMode: boole
    setAudioError(null);

    const loadAudio = async () => {
+      const requestId = `${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`;
      try {
        const waveformUrl = `${backendUrl}/audio/waveform?path=${encodeURIComponent(videoPath!)}`;
-        console.log('[WaveformTimeline] backendUrl:', backendUrl, '| videoPath:', videoPath);
-        console.log('[WaveformTimeline] Fetching:', waveformUrl);
+        console.log('[WaveformTimeline] req=', requestId, 'backendUrl=', backendUrl, 'videoPath=', videoPath);
+        console.log('[WaveformTimeline] req=', requestId, 'fetching=', waveformUrl);
        const ctx = new AudioContext();
        audioContextRef.current = ctx;

+        const startedAt = performance.now();
        const response = await fetch(waveformUrl);
+        const elapsedMs = Math.round(performance.now() - startedAt);
        if (!response.ok) {
          const body = await response.text().catch(() => '');
          console.error(
-            `[WaveformTimeline] Fetch failed — HTTP ${response.status} ${response.statusText}`,
-            { url: waveformUrl, body }
+            `[WaveformTimeline] req=${requestId} fetch failed — HTTP ${response.status} ${response.statusText}`,
+            {
+              url: waveformUrl,
+              decodedPath: videoPath,
+              elapsedMs,
+              body,
+            }
          );
          throw new Error(`HTTP ${response.status}: ${response.statusText}`);
        }
@ -87,11 +95,11 @@ export default function WaveformTimeline({ cutMode, muteMode }: { cutMode: boole
        const contentType = response.headers.get('content-type') ?? 'unknown';
        const contentLength = response.headers.get('content-length');
        console.log(
-          `[WaveformTimeline] Fetch OK — content-type: ${contentType}, size: ${contentLength ?? 'unknown'} bytes`
+          `[WaveformTimeline] req=${requestId} fetch ok — content-type: ${contentType}, size: ${contentLength ?? 'unknown'} bytes, elapsed: ${elapsedMs}ms`
        );

        const arrayBuffer = await response.arrayBuffer();
-        console.log(`[WaveformTimeline] ArrayBuffer size: ${arrayBuffer.byteLength} bytes`);
+        console.log(`[WaveformTimeline] req=${requestId} arrayBuffer size: ${arrayBuffer.byteLength} bytes`);

        if (arrayBuffer.byteLength === 0) {
          throw new Error('Server returned an empty file');
@ -104,6 +112,7 @@ export default function WaveformTimeline({ cutMode, muteMode }: { cutMode: boole
          console.error(
            '[WaveformTimeline] decodeAudioData failed — browser cannot decode this format.',
            {
+              requestId,
              contentType,
              byteLength: arrayBuffer.byteLength,
              videoPath,
@ -117,13 +126,19 @@ export default function WaveformTimeline({ cutMode, muteMode }: { cutMode: boole
        }

        console.log(
-          `[WaveformTimeline] Decoded OK — duration: ${audioBuffer.duration.toFixed(2)}s, ` +
+          `[WaveformTimeline] req=${requestId} decoded ok — duration: ${audioBuffer.duration.toFixed(2)}s, ` +
          `channels: ${audioBuffer.numberOfChannels}, sampleRate: ${audioBuffer.sampleRate}Hz`
        );
        audioBufferRef.current = audioBuffer;
        drawStaticWaveform();
      } catch (err) {
-        console.error('[WaveformTimeline] Waveform load failed:', err);
+        console.error('[WaveformTimeline] waveform load failed', {
+          requestId,
+          error: err,
+          videoPath,
+          backendUrl,
+          encodedPath: encodeURIComponent(videoPath ?? ''),
+        });
        const waveformUrl2 = `${backendUrl}/audio/waveform?path=${encodeURIComponent(videoPath ?? '')}`;
        setAudioError(`Waveform unavailable — ${err instanceof Error ? err.message : 'audio could not be decoded'} [URL: ${waveformUrl2}]`);
      }
--- a/20
+++ b/20
@ -11,7 +11,25 @@ if curl -sf "$BACKEND_URL" > /dev/null 2>&1; then
    echo "Backend already running on port ${BACKEND_PORT}."
 else
    echo "Backend not running — starting it in a new terminal..."
-    VENV_PYTHON="${PROJECT_DIR}/.venv312/bin/python"
+    VENV_PYTHON=""
+    PYTHON_CANDIDATES=(
+        "${PROJECT_DIR}/.venv312/bin/python3.12"
+        "${PROJECT_DIR}/.venv312/bin/python"
+        "${PROJECT_DIR}/.venv/bin/python3"
+        "${PROJECT_DIR}/.venv/bin/python"
+        "${PROJECT_DIR}/venv/bin/python3"
+        "${PROJECT_DIR}/venv/bin/python"
+    )
+    for candidate in "${PYTHON_CANDIDATES[@]}"; do
+        if [[ -x "${candidate}" ]]; then
+            VENV_PYTHON="${candidate}"
+            break
+        fi
+    done
+    if [[ -z "${VENV_PYTHON}" ]]; then
+        echo "No project virtualenv Python found. Checked: .venv312, .venv, venv"
+        exit 1
+    fi
    BACKEND_DIR="${PROJECT_DIR}/backend"

    # Try common terminal emulators in order
--- a/package.json
+++ b/package.json
@ -7,10 +7,10 @@
  "scripts": {
    "tauri": "tauri",
    "dev": "cd frontend && npm run dev -- --host",
-    "dev:tauri": "cd backend && python -m uvicorn main:app --reload --port 8642 & cd frontend && cargo tauri dev",
+    "dev:tauri": "bash -lc 'set -e; PY=; for p in ./.venv312/bin/python3.12 ./.venv312/bin/python ./.venv/bin/python3 ./.venv/bin/python ./venv/bin/python3 ./venv/bin/python; do if [ -x \"$p\" ]; then PY=\"$PWD/${p#./}\"; break; fi; done; if [ -z \"$PY\" ]; then echo \"No project virtualenv Python found (checked .venv312, .venv, venv)\"; exit 1; fi; (cd backend && \"$PY\" -m uvicorn main:app --reload --port 8642) & cd frontend && cargo tauri dev'",
    "build:tauri": "cd frontend && cargo tauri build",
    "dev:frontend": "cd frontend && npm run dev",
-    "dev:backend": "cd backend && python -m uvicorn main:app --reload --port 8642",
+    "dev:backend": "bash -lc 'set -e; PY=; for p in ./.venv312/bin/python3.12 ./.venv312/bin/python ./.venv/bin/python3 ./.venv/bin/python ./venv/bin/python3 ./venv/bin/python; do if [ -x \"$p\" ]; then PY=\"$PWD/${p#./}\"; break; fi; done; if [ -z \"$PY\" ]; then echo \"No project virtualenv Python found (checked .venv312, .venv, venv)\"; exit 1; fi; cd backend && \"$PY\" -m uvicorn main:app --reload --port 8642'",
    "lint": "cd frontend && npm run lint"
  },
  "devDependencies": {
--- a/src-tauri/src/paths.rs
+++ b/src-tauri/src/paths.rs
@ -29,7 +29,7 @@ pub fn project_root() -> PathBuf {
 }

 /// Absolute path to the bundled Python interpreter.
-/// Tries .venv312 first (new), falls back to .venv (legacy).
+/// Tries project virtualenvs in a fixed order so all runtime paths agree.
 pub fn python_exe() -> PathBuf {
    let root = project_root();
    // Packaged layout: resources/python/bin/python3
@ -37,12 +37,24 @@ pub fn python_exe() -> PathBuf {
    if bundled.exists() {
        return bundled;
    }
-    // Dev: prefer .venv312 (Python 3.12), fall back to .venv
-    let venv312 = root.join(".venv312").join("bin").join("python3.12");
-    if venv312.exists() {
-        return venv312;
+
+    let candidates = [
+        root.join(".venv312").join("bin").join("python3.12"),
+        root.join(".venv312").join("bin").join("python"),
+        root.join(".venv").join("bin").join("python3"),
+        root.join(".venv").join("bin").join("python"),
+        root.join("venv").join("bin").join("python3"),
+        root.join("venv").join("bin").join("python"),
+    ];
+
+    for candidate in candidates {
+        if candidate.exists() {
+            return candidate;
+        }
    }
-    root.join(".venv").join("bin").join("python3")
+
+    // Last-resort path if no environment is present.
+    root.join(".venv312").join("bin").join("python3.12")
 }

 /// Absolute path to a script in the backend directory.
--- a/src-tauri/src/transcription.rs
+++ b/src-tauri/src/transcription.rs
@ -46,7 +46,6 @@ pub fn transcribe_audio(
    // Run Python script with timeout
    let output = Command::new(python_exe)
        .args(&args)
-        .env("PYTHONPATH", crate::paths::project_root().join(".venv312").join("lib").join("python3.12").join("site-packages"))
        .output()
        .map_err(|e| format!("Failed to run Python script: {}", e))?;

--- a/test_api.py
+++ b/test_api.py
@ -1,4 +1,4 @@
-#!/home/dillon/_code/TalkEdit/.venv312/bin/python3.12
+#!/usr/bin/env python3
 """
 Test script for the TalkEdit API.
 This script tests the new Tauri commands that expose all backend functions.