trying to fix bug

This commit is contained in:
2026-04-09 01:36:28 -06:00
parent f9cd2bf579
commit 1d17a8f19a
10 changed files with 133 additions and 27 deletions

1
.gitignore vendored
View File

@ -37,6 +37,7 @@ Thumbs.db
*.log *.log
logs/ logs/
cache/ cache/
*.aive
# Build output # Build output
frontend/dist/ frontend/dist/

View File

@ -36,6 +36,8 @@ Features are grouped by priority. Check off items as they are implemented.
- [ ] **Re-transcribe selection** if Whisper gets a section wrong, let the user select a word range and re-run transcription on just that segment (optionally with a different model or language). - [ ] **Re-transcribe selection** if Whisper gets a section wrong, let the user select a word range and re-run transcription on just that segment (optionally with a different model or language).
- [ ] **Optional VibeVoice-ASR-HF transcription backend (future)** evaluate as an alternate transcription mode for long-form, speaker-attributed transcripts. Keep WhisperX as the default for word-level timestamp editing.
- [ ] **Word text correction** allow editing the transcript text of a word without affecting its timing. Whisper gets homophones/proper nouns wrong constantly. Pure frontend state change; no backend needed. - [ ] **Word text correction** allow editing the transcript text of a word without affecting its timing. Whisper gets homophones/proper nouns wrong constantly. Pure frontend state change; no backend needed.
- [ ] **Named timeline markers** drop named marker pins on the waveform (like Resolve markers). Store as `{ id, time, label, color }` in the project. Rendered as colored triangles on the timeline canvas. - [ ] **Named timeline markers** drop named marker pins on the waveform (like Resolve markers). Store as `{ id, time, label, color }` in the project. Rendered as colored triangles on the timeline canvas.

View File

@ -7,7 +7,7 @@ import tempfile
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
from fastapi import APIRouter, HTTPException, Query from fastapi import APIRouter, HTTPException, Query, Request
from fastapi.responses import FileResponse from fastapi.responses import FileResponse
from pydantic import BaseModel from pydantic import BaseModel
@ -71,30 +71,54 @@ async def detect_silence_endpoint(req: SilenceDetectRequest):
@router.get("/audio/waveform") @router.get("/audio/waveform")
async def get_waveform_audio(path: str = Query(...)): async def get_waveform_audio(request: Request, path: str = Query(...)):
""" """
Extract audio from any video/audio file and return it as a WAV. Extract audio from any video/audio file and return it as a WAV.
The WAV is cached on disk for subsequent requests. The WAV is cached on disk for subsequent requests.
Uses FFmpeg directly so it works with MKV, MOV, AVI, MP4, etc. Uses FFmpeg directly so it works with MKV, MOV, AVI, MP4, etc.
""" """
req_id = hashlib.md5(f"{path}:{request.url}".encode()).hexdigest()[:10]
file_path = Path(path) file_path = Path(path)
logger.info(
"[waveform:%s] request raw_url=%s raw_query=%s decoded_path=%r path_len=%s",
req_id,
str(request.url),
request.url.query,
path,
len(path),
)
try:
resolved_path = file_path.expanduser().resolve(strict=False)
except Exception:
resolved_path = file_path
logger.info(
"[waveform:%s] normalized path=%s exists=%s is_file=%s",
req_id,
resolved_path,
file_path.exists(),
file_path.is_file(),
)
if not file_path.is_file(): if not file_path.is_file():
logger.warning(f"[waveform] File not found: {path}") logger.warning("[waveform:%s] file_not_found path=%r", req_id, path)
raise HTTPException(status_code=404, detail=f"File not found: {path}") raise HTTPException(status_code=404, detail=f"File not found: {path}")
# Cache key based on path + mtime so stale cache is auto-invalidated # Cache key based on path + mtime so stale cache is auto-invalidated
mtime = file_path.stat().st_mtime mtime = file_path.stat().st_mtime
cache_key = hashlib.md5(f"{path}:{mtime}".encode()).hexdigest() cache_key = hashlib.md5(f"{path}:{mtime}".encode()).hexdigest()
logger.info("[waveform:%s] cache_key=%s mtime=%s", req_id, cache_key, mtime)
if cache_key in _waveform_cache: if cache_key in _waveform_cache:
cached = Path(_waveform_cache[cache_key]) cached = Path(_waveform_cache[cache_key])
if cached.exists(): if cached.exists():
logger.info(f"[waveform] Cache hit for {file_path.name}") logger.info("[waveform:%s] cache_hit cached=%s", req_id, cached)
return FileResponse(str(cached), media_type="audio/wav") return FileResponse(str(cached), media_type="audio/wav")
else: else:
del _waveform_cache[cache_key] del _waveform_cache[cache_key]
logger.info(f"[waveform] Extracting audio from: {file_path.name}") logger.info("[waveform:%s] cache_miss extracting file=%s", req_id, file_path)
tmp_dir = tempfile.mkdtemp(prefix="talkedit_waveform_") tmp_dir = tempfile.mkdtemp(prefix="talkedit_waveform_")
out_wav = Path(tmp_dir) / f"{cache_key}.wav" out_wav = Path(tmp_dir) / f"{cache_key}.wav"
@ -108,18 +132,35 @@ async def get_waveform_audio(path: str = Query(...)):
"-acodec", "pcm_s16le", # 16-bit PCM WAV "-acodec", "pcm_s16le", # 16-bit PCM WAV
str(out_wav), str(out_wav),
] ]
logger.info("[waveform:%s] ffmpeg_cmd=%s", req_id, " ".join(cmd))
result = subprocess.run(cmd, capture_output=True, text=True) result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0: if result.returncode != 0:
logger.error(f"[waveform] FFmpeg failed for {file_path.name}: {result.stderr[-500:]}") logger.error(
"[waveform:%s] ffmpeg_failed returncode=%s stderr_tail=%s",
req_id,
result.returncode,
result.stderr[-2000:],
)
raise HTTPException( raise HTTPException(
status_code=500, status_code=500,
detail=f"Failed to extract audio: {result.stderr[-300:]}" detail=f"Failed to extract audio: {result.stderr[-300:]}"
) )
if not out_wav.exists() or out_wav.stat().st_size == 0: if not out_wav.exists() or out_wav.stat().st_size == 0:
logger.error(f"[waveform] FFmpeg produced empty WAV for {file_path.name}") logger.error(
"[waveform:%s] empty_output out_wav=%s exists=%s size=%s",
req_id,
out_wav,
out_wav.exists(),
out_wav.stat().st_size if out_wav.exists() else -1,
)
raise HTTPException(status_code=500, detail="Audio extraction produced empty file") raise HTTPException(status_code=500, detail="Audio extraction produced empty file")
logger.info(f"[waveform] Extracted {out_wav.stat().st_size} bytes for {file_path.name}") logger.info(
"[waveform:%s] extracted_bytes=%s out_wav=%s",
req_id,
out_wav.stat().st_size,
out_wav,
)
_waveform_cache[cache_key] = str(out_wav) _waveform_cache[cache_key] = str(out_wav)
return FileResponse(str(out_wav), media_type="audio/wav") return FileResponse(str(out_wav), media_type="audio/wav")

View File

@ -23,6 +23,7 @@ import {
} from 'lucide-react'; } from 'lucide-react';
const IS_ELECTRON = !!window.electronAPI; const IS_ELECTRON = !!window.electronAPI;
const LAST_MEDIA_PATH_KEY = 'talkedit:lastMediaPath';
type Panel = 'ai' | 'settings' | 'export' | 'silence' | null; type Panel = 'ai' | 'settings' | 'export' | 'silence' | null;
@ -74,6 +75,23 @@ export default function App() {
// The backend URL is fixed at 127.0.0.1:8000 so we rely on the store default. // The backend URL is fixed at 127.0.0.1:8000 so we rely on the store default.
}, [setBackendUrl]); }, [setBackendUrl]);
useEffect(() => {
if (!IS_ELECTRON || videoPath) return;
const savedPath = sessionStorage.getItem(LAST_MEDIA_PATH_KEY);
if (savedPath) {
loadVideo(savedPath);
}
}, [videoPath, loadVideo]);
useEffect(() => {
if (!IS_ELECTRON) return;
if (videoPath) {
sessionStorage.setItem(LAST_MEDIA_PATH_KEY, videoPath);
return;
}
sessionStorage.removeItem(LAST_MEDIA_PATH_KEY);
}, [videoPath]);
const handleLoadProject = async () => { const handleLoadProject = async () => {
if (!IS_ELECTRON) return; if (!IS_ELECTRON) return;
try { try {

View File

@ -67,19 +67,27 @@ export default function WaveformTimeline({ cutMode, muteMode }: { cutMode: boole
setAudioError(null); setAudioError(null);
const loadAudio = async () => { const loadAudio = async () => {
const requestId = `${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`;
try { try {
const waveformUrl = `${backendUrl}/audio/waveform?path=${encodeURIComponent(videoPath!)}`; const waveformUrl = `${backendUrl}/audio/waveform?path=${encodeURIComponent(videoPath!)}`;
console.log('[WaveformTimeline] backendUrl:', backendUrl, '| videoPath:', videoPath); console.log('[WaveformTimeline] req=', requestId, 'backendUrl=', backendUrl, 'videoPath=', videoPath);
console.log('[WaveformTimeline] Fetching:', waveformUrl); console.log('[WaveformTimeline] req=', requestId, 'fetching=', waveformUrl);
const ctx = new AudioContext(); const ctx = new AudioContext();
audioContextRef.current = ctx; audioContextRef.current = ctx;
const startedAt = performance.now();
const response = await fetch(waveformUrl); const response = await fetch(waveformUrl);
const elapsedMs = Math.round(performance.now() - startedAt);
if (!response.ok) { if (!response.ok) {
const body = await response.text().catch(() => ''); const body = await response.text().catch(() => '');
console.error( console.error(
`[WaveformTimeline] Fetch failed — HTTP ${response.status} ${response.statusText}`, `[WaveformTimeline] req=${requestId} fetch failed — HTTP ${response.status} ${response.statusText}`,
{ url: waveformUrl, body } {
url: waveformUrl,
decodedPath: videoPath,
elapsedMs,
body,
}
); );
throw new Error(`HTTP ${response.status}: ${response.statusText}`); throw new Error(`HTTP ${response.status}: ${response.statusText}`);
} }
@ -87,11 +95,11 @@ export default function WaveformTimeline({ cutMode, muteMode }: { cutMode: boole
const contentType = response.headers.get('content-type') ?? 'unknown'; const contentType = response.headers.get('content-type') ?? 'unknown';
const contentLength = response.headers.get('content-length'); const contentLength = response.headers.get('content-length');
console.log( console.log(
`[WaveformTimeline] Fetch OK — content-type: ${contentType}, size: ${contentLength ?? 'unknown'} bytes` `[WaveformTimeline] req=${requestId} fetch ok — content-type: ${contentType}, size: ${contentLength ?? 'unknown'} bytes, elapsed: ${elapsedMs}ms`
); );
const arrayBuffer = await response.arrayBuffer(); const arrayBuffer = await response.arrayBuffer();
console.log(`[WaveformTimeline] ArrayBuffer size: ${arrayBuffer.byteLength} bytes`); console.log(`[WaveformTimeline] req=${requestId} arrayBuffer size: ${arrayBuffer.byteLength} bytes`);
if (arrayBuffer.byteLength === 0) { if (arrayBuffer.byteLength === 0) {
throw new Error('Server returned an empty file'); throw new Error('Server returned an empty file');
@ -104,6 +112,7 @@ export default function WaveformTimeline({ cutMode, muteMode }: { cutMode: boole
console.error( console.error(
'[WaveformTimeline] decodeAudioData failed — browser cannot decode this format.', '[WaveformTimeline] decodeAudioData failed — browser cannot decode this format.',
{ {
requestId,
contentType, contentType,
byteLength: arrayBuffer.byteLength, byteLength: arrayBuffer.byteLength,
videoPath, videoPath,
@ -117,13 +126,19 @@ export default function WaveformTimeline({ cutMode, muteMode }: { cutMode: boole
} }
console.log( console.log(
`[WaveformTimeline] Decoded OK — duration: ${audioBuffer.duration.toFixed(2)}s, ` + `[WaveformTimeline] req=${requestId} decoded ok — duration: ${audioBuffer.duration.toFixed(2)}s, ` +
`channels: ${audioBuffer.numberOfChannels}, sampleRate: ${audioBuffer.sampleRate}Hz` `channels: ${audioBuffer.numberOfChannels}, sampleRate: ${audioBuffer.sampleRate}Hz`
); );
audioBufferRef.current = audioBuffer; audioBufferRef.current = audioBuffer;
drawStaticWaveform(); drawStaticWaveform();
} catch (err) { } catch (err) {
console.error('[WaveformTimeline] Waveform load failed:', err); console.error('[WaveformTimeline] waveform load failed', {
requestId,
error: err,
videoPath,
backendUrl,
encodedPath: encodeURIComponent(videoPath ?? ''),
});
const waveformUrl2 = `${backendUrl}/audio/waveform?path=${encodeURIComponent(videoPath ?? '')}`; const waveformUrl2 = `${backendUrl}/audio/waveform?path=${encodeURIComponent(videoPath ?? '')}`;
setAudioError(`Waveform unavailable — ${err instanceof Error ? err.message : 'audio could not be decoded'} [URL: ${waveformUrl2}]`); setAudioError(`Waveform unavailable — ${err instanceof Error ? err.message : 'audio could not be decoded'} [URL: ${waveformUrl2}]`);
} }

20
open
View File

@ -11,7 +11,25 @@ if curl -sf "$BACKEND_URL" > /dev/null 2>&1; then
echo "Backend already running on port ${BACKEND_PORT}." echo "Backend already running on port ${BACKEND_PORT}."
else else
echo "Backend not running — starting it in a new terminal..." echo "Backend not running — starting it in a new terminal..."
VENV_PYTHON="${PROJECT_DIR}/.venv312/bin/python" VENV_PYTHON=""
PYTHON_CANDIDATES=(
"${PROJECT_DIR}/.venv312/bin/python3.12"
"${PROJECT_DIR}/.venv312/bin/python"
"${PROJECT_DIR}/.venv/bin/python3"
"${PROJECT_DIR}/.venv/bin/python"
"${PROJECT_DIR}/venv/bin/python3"
"${PROJECT_DIR}/venv/bin/python"
)
for candidate in "${PYTHON_CANDIDATES[@]}"; do
if [[ -x "${candidate}" ]]; then
VENV_PYTHON="${candidate}"
break
fi
done
if [[ -z "${VENV_PYTHON}" ]]; then
echo "No project virtualenv Python found. Checked: .venv312, .venv, venv"
exit 1
fi
BACKEND_DIR="${PROJECT_DIR}/backend" BACKEND_DIR="${PROJECT_DIR}/backend"
# Try common terminal emulators in order # Try common terminal emulators in order

View File

@ -7,10 +7,10 @@
"scripts": { "scripts": {
"tauri": "tauri", "tauri": "tauri",
"dev": "cd frontend && npm run dev -- --host", "dev": "cd frontend && npm run dev -- --host",
"dev:tauri": "cd backend && python -m uvicorn main:app --reload --port 8642 & cd frontend && cargo tauri dev", "dev:tauri": "bash -lc 'set -e; PY=; for p in ./.venv312/bin/python3.12 ./.venv312/bin/python ./.venv/bin/python3 ./.venv/bin/python ./venv/bin/python3 ./venv/bin/python; do if [ -x \"$p\" ]; then PY=\"$PWD/${p#./}\"; break; fi; done; if [ -z \"$PY\" ]; then echo \"No project virtualenv Python found (checked .venv312, .venv, venv)\"; exit 1; fi; (cd backend && \"$PY\" -m uvicorn main:app --reload --port 8642) & cd frontend && cargo tauri dev'",
"build:tauri": "cd frontend && cargo tauri build", "build:tauri": "cd frontend && cargo tauri build",
"dev:frontend": "cd frontend && npm run dev", "dev:frontend": "cd frontend && npm run dev",
"dev:backend": "cd backend && python -m uvicorn main:app --reload --port 8642", "dev:backend": "bash -lc 'set -e; PY=; for p in ./.venv312/bin/python3.12 ./.venv312/bin/python ./.venv/bin/python3 ./.venv/bin/python ./venv/bin/python3 ./venv/bin/python; do if [ -x \"$p\" ]; then PY=\"$PWD/${p#./}\"; break; fi; done; if [ -z \"$PY\" ]; then echo \"No project virtualenv Python found (checked .venv312, .venv, venv)\"; exit 1; fi; cd backend && \"$PY\" -m uvicorn main:app --reload --port 8642'",
"lint": "cd frontend && npm run lint" "lint": "cd frontend && npm run lint"
}, },
"devDependencies": { "devDependencies": {

View File

@ -29,7 +29,7 @@ pub fn project_root() -> PathBuf {
} }
/// Absolute path to the bundled Python interpreter. /// Absolute path to the bundled Python interpreter.
/// Tries .venv312 first (new), falls back to .venv (legacy). /// Tries project virtualenvs in a fixed order so all runtime paths agree.
pub fn python_exe() -> PathBuf { pub fn python_exe() -> PathBuf {
let root = project_root(); let root = project_root();
// Packaged layout: resources/python/bin/python3 // Packaged layout: resources/python/bin/python3
@ -37,12 +37,24 @@ pub fn python_exe() -> PathBuf {
if bundled.exists() { if bundled.exists() {
return bundled; return bundled;
} }
// Dev: prefer .venv312 (Python 3.12), fall back to .venv
let venv312 = root.join(".venv312").join("bin").join("python3.12"); let candidates = [
if venv312.exists() { root.join(".venv312").join("bin").join("python3.12"),
return venv312; root.join(".venv312").join("bin").join("python"),
root.join(".venv").join("bin").join("python3"),
root.join(".venv").join("bin").join("python"),
root.join("venv").join("bin").join("python3"),
root.join("venv").join("bin").join("python"),
];
for candidate in candidates {
if candidate.exists() {
return candidate;
} }
root.join(".venv").join("bin").join("python3") }
// Last-resort path if no environment is present.
root.join(".venv312").join("bin").join("python3.12")
} }
/// Absolute path to a script in the backend directory. /// Absolute path to a script in the backend directory.

View File

@ -46,7 +46,6 @@ pub fn transcribe_audio(
// Run Python script with timeout // Run Python script with timeout
let output = Command::new(python_exe) let output = Command::new(python_exe)
.args(&args) .args(&args)
.env("PYTHONPATH", crate::paths::project_root().join(".venv312").join("lib").join("python3.12").join("site-packages"))
.output() .output()
.map_err(|e| format!("Failed to run Python script: {}", e))?; .map_err(|e| format!("Failed to run Python script: {}", e))?;

View File

@ -1,4 +1,4 @@
#!/home/dillon/_code/TalkEdit/.venv312/bin/python3.12 #!/usr/bin/env python3
""" """
Test script for the TalkEdit API. Test script for the TalkEdit API.
This script tests the new Tauri commands that expose all backend functions. This script tests the new Tauri commands that expose all backend functions.