trying to fix bug
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@ -37,6 +37,7 @@ Thumbs.db
|
||||
*.log
|
||||
logs/
|
||||
cache/
|
||||
*.aive
|
||||
|
||||
# Build output
|
||||
frontend/dist/
|
||||
|
||||
@ -36,6 +36,8 @@ Features are grouped by priority. Check off items as they are implemented.
|
||||
|
||||
- [ ] **Re-transcribe selection** — if Whisper gets a section wrong, let the user select a word range and re-run transcription on just that segment (optionally with a different model or language).
|
||||
|
||||
- [ ] **Optional VibeVoice-ASR-HF transcription backend (future)** — evaluate as an alternate transcription mode for long-form, speaker-attributed transcripts. Keep WhisperX as the default for word-level timestamp editing.
|
||||
|
||||
- [ ] **Word text correction** — allow editing the transcript text of a word without affecting its timing. Whisper gets homophones/proper nouns wrong constantly. Pure frontend state change; no backend needed.
|
||||
|
||||
- [ ] **Named timeline markers** — drop named marker pins on the waveform (like Resolve markers). Store as `{ id, time, label, color }` in the project. Rendered as colored triangles on the timeline canvas.
|
||||
|
||||
@ -7,7 +7,7 @@ import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from fastapi import APIRouter, HTTPException, Query, Request
|
||||
from fastapi.responses import FileResponse
|
||||
from pydantic import BaseModel
|
||||
|
||||
@ -71,30 +71,54 @@ async def detect_silence_endpoint(req: SilenceDetectRequest):
|
||||
|
||||
|
||||
@router.get("/audio/waveform")
|
||||
async def get_waveform_audio(path: str = Query(...)):
|
||||
async def get_waveform_audio(request: Request, path: str = Query(...)):
|
||||
"""
|
||||
Extract audio from any video/audio file and return it as a WAV.
|
||||
The WAV is cached on disk for subsequent requests.
|
||||
Uses FFmpeg directly so it works with MKV, MOV, AVI, MP4, etc.
|
||||
"""
|
||||
req_id = hashlib.md5(f"{path}:{request.url}".encode()).hexdigest()[:10]
|
||||
file_path = Path(path)
|
||||
logger.info(
|
||||
"[waveform:%s] request raw_url=%s raw_query=%s decoded_path=%r path_len=%s",
|
||||
req_id,
|
||||
str(request.url),
|
||||
request.url.query,
|
||||
path,
|
||||
len(path),
|
||||
)
|
||||
|
||||
try:
|
||||
resolved_path = file_path.expanduser().resolve(strict=False)
|
||||
except Exception:
|
||||
resolved_path = file_path
|
||||
|
||||
logger.info(
|
||||
"[waveform:%s] normalized path=%s exists=%s is_file=%s",
|
||||
req_id,
|
||||
resolved_path,
|
||||
file_path.exists(),
|
||||
file_path.is_file(),
|
||||
)
|
||||
|
||||
if not file_path.is_file():
|
||||
logger.warning(f"[waveform] File not found: {path}")
|
||||
logger.warning("[waveform:%s] file_not_found path=%r", req_id, path)
|
||||
raise HTTPException(status_code=404, detail=f"File not found: {path}")
|
||||
|
||||
# Cache key based on path + mtime so stale cache is auto-invalidated
|
||||
mtime = file_path.stat().st_mtime
|
||||
cache_key = hashlib.md5(f"{path}:{mtime}".encode()).hexdigest()
|
||||
logger.info("[waveform:%s] cache_key=%s mtime=%s", req_id, cache_key, mtime)
|
||||
|
||||
if cache_key in _waveform_cache:
|
||||
cached = Path(_waveform_cache[cache_key])
|
||||
if cached.exists():
|
||||
logger.info(f"[waveform] Cache hit for {file_path.name}")
|
||||
logger.info("[waveform:%s] cache_hit cached=%s", req_id, cached)
|
||||
return FileResponse(str(cached), media_type="audio/wav")
|
||||
else:
|
||||
del _waveform_cache[cache_key]
|
||||
|
||||
logger.info(f"[waveform] Extracting audio from: {file_path.name}")
|
||||
logger.info("[waveform:%s] cache_miss extracting file=%s", req_id, file_path)
|
||||
tmp_dir = tempfile.mkdtemp(prefix="talkedit_waveform_")
|
||||
out_wav = Path(tmp_dir) / f"{cache_key}.wav"
|
||||
|
||||
@ -108,18 +132,35 @@ async def get_waveform_audio(path: str = Query(...)):
|
||||
"-acodec", "pcm_s16le", # 16-bit PCM WAV
|
||||
str(out_wav),
|
||||
]
|
||||
logger.info("[waveform:%s] ffmpeg_cmd=%s", req_id, " ".join(cmd))
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
if result.returncode != 0:
|
||||
logger.error(f"[waveform] FFmpeg failed for {file_path.name}: {result.stderr[-500:]}")
|
||||
logger.error(
|
||||
"[waveform:%s] ffmpeg_failed returncode=%s stderr_tail=%s",
|
||||
req_id,
|
||||
result.returncode,
|
||||
result.stderr[-2000:],
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to extract audio: {result.stderr[-300:]}"
|
||||
)
|
||||
|
||||
if not out_wav.exists() or out_wav.stat().st_size == 0:
|
||||
logger.error(f"[waveform] FFmpeg produced empty WAV for {file_path.name}")
|
||||
logger.error(
|
||||
"[waveform:%s] empty_output out_wav=%s exists=%s size=%s",
|
||||
req_id,
|
||||
out_wav,
|
||||
out_wav.exists(),
|
||||
out_wav.stat().st_size if out_wav.exists() else -1,
|
||||
)
|
||||
raise HTTPException(status_code=500, detail="Audio extraction produced empty file")
|
||||
|
||||
logger.info(f"[waveform] Extracted {out_wav.stat().st_size} bytes for {file_path.name}")
|
||||
logger.info(
|
||||
"[waveform:%s] extracted_bytes=%s out_wav=%s",
|
||||
req_id,
|
||||
out_wav.stat().st_size,
|
||||
out_wav,
|
||||
)
|
||||
_waveform_cache[cache_key] = str(out_wav)
|
||||
return FileResponse(str(out_wav), media_type="audio/wav")
|
||||
|
||||
@ -23,6 +23,7 @@ import {
|
||||
} from 'lucide-react';
|
||||
|
||||
const IS_ELECTRON = !!window.electronAPI;
|
||||
const LAST_MEDIA_PATH_KEY = 'talkedit:lastMediaPath';
|
||||
|
||||
type Panel = 'ai' | 'settings' | 'export' | 'silence' | null;
|
||||
|
||||
@ -74,6 +75,23 @@ export default function App() {
|
||||
// The backend URL is fixed at 127.0.0.1:8000 so we rely on the store default.
|
||||
}, [setBackendUrl]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!IS_ELECTRON || videoPath) return;
|
||||
const savedPath = sessionStorage.getItem(LAST_MEDIA_PATH_KEY);
|
||||
if (savedPath) {
|
||||
loadVideo(savedPath);
|
||||
}
|
||||
}, [videoPath, loadVideo]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!IS_ELECTRON) return;
|
||||
if (videoPath) {
|
||||
sessionStorage.setItem(LAST_MEDIA_PATH_KEY, videoPath);
|
||||
return;
|
||||
}
|
||||
sessionStorage.removeItem(LAST_MEDIA_PATH_KEY);
|
||||
}, [videoPath]);
|
||||
|
||||
const handleLoadProject = async () => {
|
||||
if (!IS_ELECTRON) return;
|
||||
try {
|
||||
|
||||
@ -67,19 +67,27 @@ export default function WaveformTimeline({ cutMode, muteMode }: { cutMode: boole
|
||||
setAudioError(null);
|
||||
|
||||
const loadAudio = async () => {
|
||||
const requestId = `${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`;
|
||||
try {
|
||||
const waveformUrl = `${backendUrl}/audio/waveform?path=${encodeURIComponent(videoPath!)}`;
|
||||
console.log('[WaveformTimeline] backendUrl:', backendUrl, '| videoPath:', videoPath);
|
||||
console.log('[WaveformTimeline] Fetching:', waveformUrl);
|
||||
console.log('[WaveformTimeline] req=', requestId, 'backendUrl=', backendUrl, 'videoPath=', videoPath);
|
||||
console.log('[WaveformTimeline] req=', requestId, 'fetching=', waveformUrl);
|
||||
const ctx = new AudioContext();
|
||||
audioContextRef.current = ctx;
|
||||
|
||||
const startedAt = performance.now();
|
||||
const response = await fetch(waveformUrl);
|
||||
const elapsedMs = Math.round(performance.now() - startedAt);
|
||||
if (!response.ok) {
|
||||
const body = await response.text().catch(() => '');
|
||||
console.error(
|
||||
`[WaveformTimeline] Fetch failed — HTTP ${response.status} ${response.statusText}`,
|
||||
{ url: waveformUrl, body }
|
||||
`[WaveformTimeline] req=${requestId} fetch failed — HTTP ${response.status} ${response.statusText}`,
|
||||
{
|
||||
url: waveformUrl,
|
||||
decodedPath: videoPath,
|
||||
elapsedMs,
|
||||
body,
|
||||
}
|
||||
);
|
||||
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
||||
}
|
||||
@ -87,11 +95,11 @@ export default function WaveformTimeline({ cutMode, muteMode }: { cutMode: boole
|
||||
const contentType = response.headers.get('content-type') ?? 'unknown';
|
||||
const contentLength = response.headers.get('content-length');
|
||||
console.log(
|
||||
`[WaveformTimeline] Fetch OK — content-type: ${contentType}, size: ${contentLength ?? 'unknown'} bytes`
|
||||
`[WaveformTimeline] req=${requestId} fetch ok — content-type: ${contentType}, size: ${contentLength ?? 'unknown'} bytes, elapsed: ${elapsedMs}ms`
|
||||
);
|
||||
|
||||
const arrayBuffer = await response.arrayBuffer();
|
||||
console.log(`[WaveformTimeline] ArrayBuffer size: ${arrayBuffer.byteLength} bytes`);
|
||||
console.log(`[WaveformTimeline] req=${requestId} arrayBuffer size: ${arrayBuffer.byteLength} bytes`);
|
||||
|
||||
if (arrayBuffer.byteLength === 0) {
|
||||
throw new Error('Server returned an empty file');
|
||||
@ -104,6 +112,7 @@ export default function WaveformTimeline({ cutMode, muteMode }: { cutMode: boole
|
||||
console.error(
|
||||
'[WaveformTimeline] decodeAudioData failed — browser cannot decode this format.',
|
||||
{
|
||||
requestId,
|
||||
contentType,
|
||||
byteLength: arrayBuffer.byteLength,
|
||||
videoPath,
|
||||
@ -117,13 +126,19 @@ export default function WaveformTimeline({ cutMode, muteMode }: { cutMode: boole
|
||||
}
|
||||
|
||||
console.log(
|
||||
`[WaveformTimeline] Decoded OK — duration: ${audioBuffer.duration.toFixed(2)}s, ` +
|
||||
`[WaveformTimeline] req=${requestId} decoded ok — duration: ${audioBuffer.duration.toFixed(2)}s, ` +
|
||||
`channels: ${audioBuffer.numberOfChannels}, sampleRate: ${audioBuffer.sampleRate}Hz`
|
||||
);
|
||||
audioBufferRef.current = audioBuffer;
|
||||
drawStaticWaveform();
|
||||
} catch (err) {
|
||||
console.error('[WaveformTimeline] Waveform load failed:', err);
|
||||
console.error('[WaveformTimeline] waveform load failed', {
|
||||
requestId,
|
||||
error: err,
|
||||
videoPath,
|
||||
backendUrl,
|
||||
encodedPath: encodeURIComponent(videoPath ?? ''),
|
||||
});
|
||||
const waveformUrl2 = `${backendUrl}/audio/waveform?path=${encodeURIComponent(videoPath ?? '')}`;
|
||||
setAudioError(`Waveform unavailable — ${err instanceof Error ? err.message : 'audio could not be decoded'} [URL: ${waveformUrl2}]`);
|
||||
}
|
||||
|
||||
20
open
20
open
@ -11,7 +11,25 @@ if curl -sf "$BACKEND_URL" > /dev/null 2>&1; then
|
||||
echo "Backend already running on port ${BACKEND_PORT}."
|
||||
else
|
||||
echo "Backend not running — starting it in a new terminal..."
|
||||
VENV_PYTHON="${PROJECT_DIR}/.venv312/bin/python"
|
||||
VENV_PYTHON=""
|
||||
PYTHON_CANDIDATES=(
|
||||
"${PROJECT_DIR}/.venv312/bin/python3.12"
|
||||
"${PROJECT_DIR}/.venv312/bin/python"
|
||||
"${PROJECT_DIR}/.venv/bin/python3"
|
||||
"${PROJECT_DIR}/.venv/bin/python"
|
||||
"${PROJECT_DIR}/venv/bin/python3"
|
||||
"${PROJECT_DIR}/venv/bin/python"
|
||||
)
|
||||
for candidate in "${PYTHON_CANDIDATES[@]}"; do
|
||||
if [[ -x "${candidate}" ]]; then
|
||||
VENV_PYTHON="${candidate}"
|
||||
break
|
||||
fi
|
||||
done
|
||||
if [[ -z "${VENV_PYTHON}" ]]; then
|
||||
echo "No project virtualenv Python found. Checked: .venv312, .venv, venv"
|
||||
exit 1
|
||||
fi
|
||||
BACKEND_DIR="${PROJECT_DIR}/backend"
|
||||
|
||||
# Try common terminal emulators in order
|
||||
|
||||
@ -7,10 +7,10 @@
|
||||
"scripts": {
|
||||
"tauri": "tauri",
|
||||
"dev": "cd frontend && npm run dev -- --host",
|
||||
"dev:tauri": "cd backend && python -m uvicorn main:app --reload --port 8642 & cd frontend && cargo tauri dev",
|
||||
"dev:tauri": "bash -lc 'set -e; PY=; for p in ./.venv312/bin/python3.12 ./.venv312/bin/python ./.venv/bin/python3 ./.venv/bin/python ./venv/bin/python3 ./venv/bin/python; do if [ -x \"$p\" ]; then PY=\"$PWD/${p#./}\"; break; fi; done; if [ -z \"$PY\" ]; then echo \"No project virtualenv Python found (checked .venv312, .venv, venv)\"; exit 1; fi; (cd backend && \"$PY\" -m uvicorn main:app --reload --port 8642) & cd frontend && cargo tauri dev'",
|
||||
"build:tauri": "cd frontend && cargo tauri build",
|
||||
"dev:frontend": "cd frontend && npm run dev",
|
||||
"dev:backend": "cd backend && python -m uvicorn main:app --reload --port 8642",
|
||||
"dev:backend": "bash -lc 'set -e; PY=; for p in ./.venv312/bin/python3.12 ./.venv312/bin/python ./.venv/bin/python3 ./.venv/bin/python ./venv/bin/python3 ./venv/bin/python; do if [ -x \"$p\" ]; then PY=\"$PWD/${p#./}\"; break; fi; done; if [ -z \"$PY\" ]; then echo \"No project virtualenv Python found (checked .venv312, .venv, venv)\"; exit 1; fi; cd backend && \"$PY\" -m uvicorn main:app --reload --port 8642'",
|
||||
"lint": "cd frontend && npm run lint"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
||||
@ -29,7 +29,7 @@ pub fn project_root() -> PathBuf {
|
||||
}
|
||||
|
||||
/// Absolute path to the bundled Python interpreter.
|
||||
/// Tries .venv312 first (new), falls back to .venv (legacy).
|
||||
/// Tries project virtualenvs in a fixed order so all runtime paths agree.
|
||||
pub fn python_exe() -> PathBuf {
|
||||
let root = project_root();
|
||||
// Packaged layout: resources/python/bin/python3
|
||||
@ -37,12 +37,24 @@ pub fn python_exe() -> PathBuf {
|
||||
if bundled.exists() {
|
||||
return bundled;
|
||||
}
|
||||
// Dev: prefer .venv312 (Python 3.12), fall back to .venv
|
||||
let venv312 = root.join(".venv312").join("bin").join("python3.12");
|
||||
if venv312.exists() {
|
||||
return venv312;
|
||||
|
||||
let candidates = [
|
||||
root.join(".venv312").join("bin").join("python3.12"),
|
||||
root.join(".venv312").join("bin").join("python"),
|
||||
root.join(".venv").join("bin").join("python3"),
|
||||
root.join(".venv").join("bin").join("python"),
|
||||
root.join("venv").join("bin").join("python3"),
|
||||
root.join("venv").join("bin").join("python"),
|
||||
];
|
||||
|
||||
for candidate in candidates {
|
||||
if candidate.exists() {
|
||||
return candidate;
|
||||
}
|
||||
}
|
||||
root.join(".venv").join("bin").join("python3")
|
||||
|
||||
// Last-resort path if no environment is present.
|
||||
root.join(".venv312").join("bin").join("python3.12")
|
||||
}
|
||||
|
||||
/// Absolute path to a script in the backend directory.
|
||||
|
||||
@ -46,7 +46,6 @@ pub fn transcribe_audio(
|
||||
// Run Python script with timeout
|
||||
let output = Command::new(python_exe)
|
||||
.args(&args)
|
||||
.env("PYTHONPATH", crate::paths::project_root().join(".venv312").join("lib").join("python3.12").join("site-packages"))
|
||||
.output()
|
||||
.map_err(|e| format!("Failed to run Python script: {}", e))?;
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
#!/home/dillon/_code/TalkEdit/.venv312/bin/python3.12
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script for the TalkEdit API.
|
||||
This script tests the new Tauri commands that expose all backend functions.
|
||||
|
||||
Reference in New Issue
Block a user