diff --git a/backend/routers/audio.py b/backend/routers/audio.py index 509e91a..16ad113 100644 --- a/backend/routers/audio.py +++ b/backend/routers/audio.py @@ -122,13 +122,13 @@ async def get_waveform_audio(request: Request, path: str = Query(...)): tmp_dir = tempfile.mkdtemp(prefix="talkedit_waveform_") out_wav = Path(tmp_dir) / f"{cache_key}.wav" - # Downsample to mono 22050 Hz — enough for waveform drawing, small file + # Downsample to mono 8000 Hz — enough for waveform drawing and much smaller payloads cmd = [ "ffmpeg", "-y", "-i", str(file_path), "-vn", # drop video "-ac", "1", # mono - "-ar", "22050", # 22 kHz sample rate + "-ar", "8000", # 8 kHz sample rate "-acodec", "pcm_s16le", # 16-bit PCM WAV str(out_wav), ] diff --git a/frontend/src/components/WaveformTimeline.tsx b/frontend/src/components/WaveformTimeline.tsx index 7509f53..934cc01 100644 --- a/frontend/src/components/WaveformTimeline.tsx +++ b/frontend/src/components/WaveformTimeline.tsx @@ -4,6 +4,85 @@ import { AlertTriangle } from 'lucide-react'; const RULER_H = 20; // px reserved at top of canvas for the time ruler +type WaveformData = { + samples: Float32Array; + sampleRate: number; + duration: number; +}; + +function parsePcm16Wav(arrayBuffer: ArrayBuffer): WaveformData { + const view = new DataView(arrayBuffer); + if (view.byteLength < 44) { + throw new Error('WAV file too small'); + } + + const text = (offset: number, length: number) => { + let s = ''; + for (let i = 0; i < length; i++) { + s += String.fromCharCode(view.getUint8(offset + i)); + } + return s; + }; + + if (text(0, 4) !== 'RIFF' || text(8, 4) !== 'WAVE') { + throw new Error('Not a RIFF/WAVE file'); + } + + let fmtOffset = -1; + let dataOffset = -1; + let dataSize = 0; + let offset = 12; + + while (offset + 8 <= view.byteLength) { + const chunkId = text(offset, 4); + const chunkSize = view.getUint32(offset + 4, true); + const chunkDataStart = offset + 8; + + if (chunkId === 'fmt ') { + fmtOffset = chunkDataStart; + } else if (chunkId === 'data') { + dataOffset = chunkDataStart; + dataSize = chunkSize; + break; + } + + offset = chunkDataStart + chunkSize + (chunkSize % 2); + } + + if (fmtOffset < 0 || dataOffset < 0) { + throw new Error('Missing WAV fmt/data chunk'); + } + + const audioFormat = view.getUint16(fmtOffset, true); + const channels = view.getUint16(fmtOffset + 2, true); + const sampleRate = view.getUint32(fmtOffset + 4, true); + const bitsPerSample = view.getUint16(fmtOffset + 14, true); + + if (audioFormat !== 1 || bitsPerSample !== 16) { + throw new Error(`Unsupported WAV format (format=${audioFormat}, bits=${bitsPerSample})`); + } + if (channels < 1) { + throw new Error('Invalid channel count in WAV'); + } + + const bytesPerSample = bitsPerSample / 8; + const frameCount = Math.floor(dataSize / (channels * bytesPerSample)); + const samples = new Float32Array(frameCount); + + let p = dataOffset; + for (let i = 0; i < frameCount; i++) { + const sample = view.getInt16(p, true); + samples[i] = sample / 32768; + p += channels * bytesPerSample; + } + + return { + samples, + sampleRate, + duration: frameCount / sampleRate, + }; +} + function formatTime(secs: number): string { const m = Math.floor(secs / 60); const s = secs % 60; @@ -45,8 +124,7 @@ export default function WaveformTimeline({ cutMode, muteMode }: { cutMode: boole const removeCutRange = useEditorStore((s) => s.removeCutRange); const removeMuteRange = useEditorStore((s) => s.removeMuteRange); - const audioContextRef = useRef(null); - const audioBufferRef = useRef(null); + const waveformDataRef = useRef(null); const zoomRef = useRef(1); // 1 = show all, >1 = zoomed in const scrollSecsRef = useRef(0); // seconds scrolled from left const rafRef = useRef(0); @@ -66,17 +144,18 @@ export default function WaveformTimeline({ cutMode, muteMode }: { cutMode: boole if (!videoUrl || !videoPath) return; setAudioError(null); + let cancelled = false; + const controller = new AbortController(); const loadAudio = async () => { const requestId = `${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`; try { const waveformUrl = `${backendUrl}/audio/waveform?path=${encodeURIComponent(videoPath!)}`; console.log('[WaveformTimeline] req=', requestId, 'backendUrl=', backendUrl, 'videoPath=', videoPath); console.log('[WaveformTimeline] req=', requestId, 'fetching=', waveformUrl); - const ctx = new AudioContext(); - audioContextRef.current = ctx; const startedAt = performance.now(); - const response = await fetch(waveformUrl); + const response = await fetch(waveformUrl, { signal: controller.signal }); + if (cancelled) return; const elapsedMs = Math.round(performance.now() - startedAt); if (!response.ok) { const body = await response.text().catch(() => ''); @@ -99,39 +178,27 @@ export default function WaveformTimeline({ cutMode, muteMode }: { cutMode: boole ); const arrayBuffer = await response.arrayBuffer(); + if (cancelled) return; console.log(`[WaveformTimeline] req=${requestId} arrayBuffer size: ${arrayBuffer.byteLength} bytes`); if (arrayBuffer.byteLength === 0) { throw new Error('Server returned an empty file'); } - let audioBuffer: AudioBuffer; - try { - audioBuffer = await ctx.decodeAudioData(arrayBuffer); - } catch (decodeErr) { - console.error( - '[WaveformTimeline] decodeAudioData failed — browser cannot decode this format.', - { - requestId, - contentType, - byteLength: arrayBuffer.byteLength, - videoPath, - error: decodeErr, - } - ); - throw new Error( - `Browser could not decode audio (${contentType}). ` + - `For best compatibility use MP4/AAC or WebM/Opus. Raw error: ${decodeErr}` - ); - } + const waveformData = parsePcm16Wav(arrayBuffer); console.log( - `[WaveformTimeline] req=${requestId} decoded ok — duration: ${audioBuffer.duration.toFixed(2)}s, ` + - `channels: ${audioBuffer.numberOfChannels}, sampleRate: ${audioBuffer.sampleRate}Hz` + `[WaveformTimeline] req=${requestId} parsed wav ok — duration: ${waveformData.duration.toFixed(2)}s, ` + + `sampleRate: ${waveformData.sampleRate}Hz, samples: ${waveformData.samples.length}` ); - audioBufferRef.current = audioBuffer; + if (cancelled) return; + waveformDataRef.current = waveformData; drawStaticWaveform(); } catch (err) { + if (cancelled || (err instanceof DOMException && err.name === 'AbortError')) { + console.log('[WaveformTimeline] req=', requestId, 'aborted/cancelled'); + return; + } console.error('[WaveformTimeline] waveform load failed', { requestId, error: err, @@ -147,14 +214,15 @@ export default function WaveformTimeline({ cutMode, muteMode }: { cutMode: boole loadAudio(); return () => { - audioContextRef.current?.close(); + cancelled = true; + controller.abort(); }; }, [videoUrl, videoPath, backendUrl]); const drawStaticWaveform = useCallback(() => { const canvas = waveCanvasRef.current; - const buffer = audioBufferRef.current; - if (!canvas || !buffer) return; + const waveformData = waveformDataRef.current; + if (!canvas || !waveformData) return; const ctx = canvas.getContext('2d'); if (!ctx) return; @@ -167,12 +235,12 @@ export default function WaveformTimeline({ cutMode, muteMode }: { cutMode: boole const width = rect.width; const height = rect.height; - const dur = buffer.duration; + const dur = waveformData.duration; const zoom = zoomRef.current; const scroll = scrollSecsRef.current; const pxPerSec = (width * zoom) / dur; - const sampleRate = buffer.sampleRate; - const channelData = buffer.getChannelData(0); + const sampleRate = waveformData.sampleRate; + const channelData = waveformData.samples; ctx.clearRect(0, 0, width, height); @@ -344,9 +412,8 @@ export default function WaveformTimeline({ cutMode, muteMode }: { cutMode: boole const ctx = headCanvas.getContext('2d'); if (!ctx) { rafRef.current = requestAnimationFrame(tick); return; } - const buffer = audioBufferRef.current; const video = document.querySelector('video') as HTMLVideoElement | null; - const dur = buffer?.duration ?? 0; + const dur = waveformDataRef.current?.duration ?? 0; const dpr = window.devicePixelRatio || 1; const rect = headCanvas.getBoundingClientRect(); @@ -399,10 +466,10 @@ export default function WaveformTimeline({ cutMode, muteMode }: { cutMode: boole const handleWheel = useCallback((e: React.WheelEvent) => { e.preventDefault(); - const buffer = audioBufferRef.current; const canvas = waveCanvasRef.current; - if (!buffer || !canvas) return; - const dur = buffer.duration; + if (!canvas) return; + const dur = waveformDataRef.current?.duration; + if (!dur) return; const width = canvas.getBoundingClientRect().width; if (e.ctrlKey || e.metaKey) { @@ -428,37 +495,37 @@ export default function WaveformTimeline({ cutMode, muteMode }: { cutMode: boole }, [drawStaticWaveform]); const seekToClientX = useCallback((clientX: number) => { - const buffer = audioBufferRef.current; const canvas = headCanvasRef.current; - if (!canvas || !buffer) return; + const dur = waveformDataRef.current?.duration; + if (!canvas || !dur) return; const rect = canvas.getBoundingClientRect(); const x = clientX - rect.left; - const pxPerSec = (rect.width * zoomRef.current) / buffer.duration; - const newTime = Math.max(0, Math.min(buffer.duration, scrollSecsRef.current + x / pxPerSec)); + const pxPerSec = (rect.width * zoomRef.current) / dur; + const newTime = Math.max(0, Math.min(dur, scrollSecsRef.current + x / pxPerSec)); setCurrentTime(newTime); const video = document.querySelector('video') as HTMLVideoElement | null; if (video) video.currentTime = newTime; }, [setCurrentTime]); const clientXToTime = useCallback((clientX: number): number => { - const buffer = audioBufferRef.current; const canvas = headCanvasRef.current; - if (!canvas || !buffer) return 0; + const dur = waveformDataRef.current?.duration; + if (!canvas || !dur) return 0; const rect = canvas.getBoundingClientRect(); const x = clientX - rect.left; - const pxPerSec = (rect.width * zoomRef.current) / buffer.duration; - return Math.max(0, Math.min(buffer.duration, scrollSecsRef.current + x / pxPerSec)); + const pxPerSec = (rect.width * zoomRef.current) / dur; + return Math.max(0, Math.min(dur, scrollSecsRef.current + x / pxPerSec)); }, []); const getZoneAtPosition = useCallback((clientX: number, clientY: number, forHover: boolean = false) => { - const buffer = audioBufferRef.current; + const dur = waveformDataRef.current?.duration; const canvas = waveCanvasRef.current; - if (!canvas || !buffer) return null; + if (!canvas || !dur) return null; const rect = canvas.getBoundingClientRect(); const x = clientX - rect.left; const y = clientY - rect.top; - const pxPerSec = (rect.width * zoomRef.current) / buffer.duration; + const pxPerSec = (rect.width * zoomRef.current) / dur; const scroll = scrollSecsRef.current; const waveTop = RULER_H + 1; const waveH = canvas.height - waveTop; @@ -760,9 +827,14 @@ export default function WaveformTimeline({ cutMode, muteMode }: { cutMode: boole {audioError ? ( -
- - {audioError} +
+ +
+            {audioError}
+          
) : (