From 1678d28db72b57c755dcb9f1c3f65763502a87fe Mon Sep 17 00:00:00 2001 From: dillonj Date: Mon, 4 May 2026 23:54:14 -0600 Subject: [PATCH] able to re-transcribe --- .github/copilot-instructions.md | 2 + FEATURES.md | 4 +- backend/routers/export.py | 38 ++++++ backend/routers/transcribe.py | 96 ++++++++++++++++ backend/services/transcription.py | 6 +- frontend/src/components/ExportDialog.tsx | 115 ++++++++++++++++++- frontend/src/components/TranscriptEditor.tsx | 58 +++++++++- frontend/src/store/editorStore.ts | 36 ++++++ 8 files changed, 346 insertions(+), 9 deletions(-) diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index e96803a..5809cfd 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -71,6 +71,8 @@ Use project virtualenvs where available (`.venv312`, `.venv`, or `venv`) for bac - **Normalization moved to export**: No longer a standalone button. Integrated as `normalizeAudio` checkbox + LUFS target selector in ExportPanel. Sent as `normalize_loudness`/`normalize_target_lufs` to backend. Applied via `loudnorm` in FFmpeg audio filter chain during export. - **Export camelCase fix**: `ExportDialog.tsx` now manually maps `gainRanges`→`gain_db` and `muteRanges`→`{start,end}` before sending to backend. Prevents Pydantic v2 field rejection. - **color-scheme:dark**: All ` setTranscriptFormat(e.target.value as 'txt' | 'srt')} + className="flex-1 px-2 py-1.5 text-xs bg-editor-surface border border-editor-border rounded focus:outline-none focus:border-editor-accent [color-scheme:dark]" + > + + + + + + + + {/* Export video button */} diff --git a/frontend/src/components/TranscriptEditor.tsx b/frontend/src/components/TranscriptEditor.tsx index 98ac465..327b656 100644 --- a/frontend/src/components/TranscriptEditor.tsx +++ b/frontend/src/components/TranscriptEditor.tsx @@ -1,7 +1,7 @@ import { useCallback, useRef, useEffect, useMemo, useState } from 'react'; import { useEditorStore } from '../store/editorStore'; import { Virtuoso } from 'react-virtuoso'; -import { Scissors, VolumeX, SlidersHorizontal, Gauge, RotateCcw, Search, ChevronUp, ChevronDown, X } from 'lucide-react'; +import { Scissors, VolumeX, SlidersHorizontal, Gauge, RotateCcw, Search, ChevronUp, ChevronDown, X, RefreshCw } from 'lucide-react'; interface TranscriptEditorProps { cutMode: boolean; @@ -30,6 +30,9 @@ export default function TranscriptEditor({ const hoveredWordIndex = useEditorStore((s) => s.hoveredWordIndex); const setSelectedWordIndices = useEditorStore((s) => s.setSelectedWordIndices); const setHoveredWordIndex = useEditorStore((s) => s.setHoveredWordIndex); + const videoPath = useEditorStore((s) => s.videoPath); + const backendUrl = useEditorStore((s) => s.backendUrl); + const replaceWordRange = useEditorStore((s) => s.replaceWordRange); const removeCutRange = useEditorStore((s) => s.removeCutRange); const removeMuteRange = useEditorStore((s) => s.removeMuteRange); const removeGainRange = useEditorStore((s) => s.removeGainRange); @@ -254,6 +257,50 @@ export default function TranscriptEditor({ setEditText(''); }, []); + const [isReTranscribing, setIsReTranscribing] = useState(false); + const reTranscribeGuard = useRef(false); + + const handleReTranscribe = useCallback(async () => { + if (!videoPath || selectedWordIndices.length === 0 || reTranscribeGuard.current) return; + reTranscribeGuard.current = true; + setIsReTranscribing(true); + + // Snapshot indices and word timings before the async gap + const sorted = [...selectedWordIndices].sort((a, b) => a - b); + const startWord = words[sorted[0]]; + const endWord = words[sorted[sorted.length - 1]]; + if (!startWord || !endWord) { + reTranscribeGuard.current = false; + setIsReTranscribing(false); + return; + } + + try { + const res = await fetch(`${backendUrl}/transcribe/segment`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + file_path: videoPath, + start: startWord.start, + end: endWord.end, + }), + }); + if (!res.ok) { + let detail = res.statusText; + try { const body = await res.json(); if (body?.detail) detail = String(body.detail); } catch { /* keep statusText fallback */ } + throw new Error(`Re-transcribe failed: ${detail}`); + } + const data = await res.json(); + replaceWordRange(sorted[0], sorted[sorted.length - 1], data.words); + } catch (err) { + console.error('Re-transcribe error:', err); + alert(err instanceof Error ? err.message : 'Re-transcribe failed'); + } finally { + reTranscribeGuard.current = false; + setIsReTranscribing(false); + } + }, [videoPath, selectedWordIndices, words, backendUrl, replaceWordRange]); + const handleWordDoubleClick = useCallback((index: number) => { if (cutMode || muteMode || gainMode || speedMode) return; startEditing(index); @@ -535,6 +582,15 @@ export default function TranscriptEditor({ Speed {speedModeValue.toFixed(2)}x + )} diff --git a/frontend/src/store/editorStore.ts b/frontend/src/store/editorStore.ts index c698963..955baf3 100644 --- a/frontend/src/store/editorStore.ts +++ b/frontend/src/store/editorStore.ts @@ -92,6 +92,7 @@ interface EditorActions { setTranscribing: (active: boolean, progress?: number, status?: string) => void; setExporting: (active: boolean, progress?: number) => void; setZonePreviewPaddingSeconds: (seconds: number) => void; + replaceWordRange: (startIndex: number, endIndex: number, newWords: Word[]) => void; getKeepSegments: () => Array<{ start: number; end: number }>; getWordAtTime: (time: number) => number; loadProject: (projectData: any) => void; @@ -473,6 +474,41 @@ export const useEditorStore = create()( set({ zonePreviewPaddingSeconds: nextSeconds }); }, + replaceWordRange: (startIndex, endIndex, newWords) => { + const { words } = get(); + if (startIndex < 0 || endIndex >= words.length || startIndex > endIndex) return; + + // Replace words in the range with new words + const before = words.slice(0, startIndex); + const after = words.slice(endIndex + 1); + const updatedWords = [...before, ...newWords, ...after]; + + // Rebuild segments from updated words, grouping by speaker + const rebuiltSegments: Segment[] = []; + let wordIdx = 0; + let cumIdx = 0; + while (wordIdx < updatedWords.length) { + const currentSpeaker = updatedWords[wordIdx].speaker; + const groupWords: Word[] = []; + while (wordIdx < updatedWords.length && updatedWords[wordIdx].speaker === currentSpeaker) { + groupWords.push(updatedWords[wordIdx]); + wordIdx++; + } + rebuiltSegments.push({ + id: rebuiltSegments.length, + start: groupWords[0].start, + end: groupWords[groupWords.length - 1].end, + text: groupWords.map((w) => w.word).join(' '), + words: groupWords, + speaker: currentSpeaker, + globalStartIndex: cumIdx, + }); + cumIdx += groupWords.length; + } + + set({ words: updatedWords, segments: rebuiltSegments, selectedWordIndices: [] }); + }, + getKeepSegments: () => { const { words, cutRanges, duration } = get(); if (words.length === 0) return [{ start: 0, end: duration }];