Files
TalkEdit/frontend/src/components/TranscriptEditor.tsx

622 lines
26 KiB
TypeScript

import { useCallback, useRef, useEffect, useMemo, useState } from 'react';
import { useEditorStore } from '../store/editorStore';
import { useLicenseStore } from '../store/licenseStore';
import { Virtuoso } from 'react-virtuoso';
import { Scissors, VolumeX, SlidersHorizontal, Gauge, RotateCcw, Search, ChevronUp, ChevronDown, X, RefreshCw } from 'lucide-react';
interface TranscriptEditorProps {
cutMode: boolean;
muteMode: boolean;
gainMode: boolean;
gainModeDb: number;
speedMode: boolean;
speedModeValue: number;
}
export default function TranscriptEditor({
cutMode,
muteMode,
gainMode,
gainModeDb,
speedMode,
speedModeValue,
}: TranscriptEditorProps) {
const words = useEditorStore((s) => s.words);
const segments = useEditorStore((s) => s.segments);
const cutRanges = useEditorStore((s) => s.cutRanges);
const muteRanges = useEditorStore((s) => s.muteRanges);
const gainRanges = useEditorStore((s) => s.gainRanges);
const speedRanges = useEditorStore((s) => s.speedRanges);
const selectedWordIndices = useEditorStore((s) => s.selectedWordIndices);
const hoveredWordIndex = useEditorStore((s) => s.hoveredWordIndex);
const setSelectedWordIndices = useEditorStore((s) => s.setSelectedWordIndices);
const setHoveredWordIndex = useEditorStore((s) => s.setHoveredWordIndex);
const videoPath = useEditorStore((s) => s.videoPath);
const backendUrl = useEditorStore((s) => s.backendUrl);
const replaceWordRange = useEditorStore((s) => s.replaceWordRange);
const removeCutRange = useEditorStore((s) => s.removeCutRange);
const removeMuteRange = useEditorStore((s) => s.removeMuteRange);
const removeGainRange = useEditorStore((s) => s.removeGainRange);
const removeSpeedRange = useEditorStore((s) => s.removeSpeedRange);
const addCutRange = useEditorStore((s) => s.addCutRange);
const addMuteRange = useEditorStore((s) => s.addMuteRange);
const addGainRange = useEditorStore((s) => s.addGainRange);
const addSpeedRange = useEditorStore((s) => s.addSpeedRange);
const getWordAtTime = useEditorStore((s) => s.getWordAtTime);
const canEdit = useLicenseStore((s) => s.canEdit);
const selectionStart = useRef<number | null>(null);
const wasDragging = useRef(false);
const virtuosoRef = useRef<any>(null);
const zoneDragStart = useRef<number | null>(null);
const [zoneDragRange, setZoneDragRange] = useState<{ start: number; end: number } | null>(null);
const [searchOpen, setSearchOpen] = useState(false);
const [searchQuery, setSearchQuery] = useState('');
const [activeMatchIdx, setActiveMatchIdx] = useState(0);
const searchInputRef = useRef<HTMLInputElement | null>(null);
const updateWordText = useEditorStore((s) => s.updateWordText);
const [editingWordIndex, setEditingWordIndex] = useState<number | null>(null);
const [editText, setEditText] = useState('');
const editInputRef = useRef<HTMLInputElement | null>(null);
const selectedSet = useMemo(() => new Set(selectedWordIndices), [selectedWordIndices]);
const matchIndices = useMemo(() => {
const q = searchQuery.trim().toLowerCase();
if (!q) return [] as number[];
const matches: number[] = [];
for (let i = 0; i < words.length; i++) {
if (words[i].word.toLowerCase().includes(q)) matches.push(i);
}
return matches;
}, [searchQuery, words]);
const matchSet = useMemo(() => new Set(matchIndices), [matchIndices]);
const safeActiveMatchIdx = matchIndices.length === 0
? 0
: Math.min(activeMatchIdx, matchIndices.length - 1);
const jumpToMatch = useCallback((idx: number) => {
if (matchIndices.length === 0) return;
const nextIdx = ((idx % matchIndices.length) + matchIndices.length) % matchIndices.length;
setActiveMatchIdx(nextIdx);
const wordIndex = matchIndices[nextIdx];
const el = document.getElementById(`word-${wordIndex}`);
if (el) {
el.scrollIntoView({ behavior: 'smooth', block: 'center', inline: 'nearest' });
}
}, [matchIndices]);
useEffect(() => {
const onKeyDown = (e: KeyboardEvent) => {
const target = e.target as HTMLElement | null;
const isInInput = !!target && (target.tagName === 'INPUT' || target.tagName === 'TEXTAREA' || target.tagName === 'SELECT');
if ((e.ctrlKey || e.metaKey) && e.key.toLowerCase() === 'f') {
e.preventDefault();
setSearchOpen(true);
requestAnimationFrame(() => searchInputRef.current?.focus());
return;
}
if (!searchOpen) return;
if (e.key === 'Escape') {
e.preventDefault();
setSearchOpen(false);
return;
}
if (e.key === 'Enter' && !isInInput) {
e.preventDefault();
jumpToMatch(safeActiveMatchIdx + (e.shiftKey ? -1 : 1));
return;
}
if (e.key === 'Enter' && isInInput && target === searchInputRef.current) {
e.preventDefault();
jumpToMatch(safeActiveMatchIdx + (e.shiftKey ? -1 : 1));
}
};
window.addEventListener('keydown', onKeyDown);
return () => window.removeEventListener('keydown', onKeyDown);
}, [jumpToMatch, searchOpen, safeActiveMatchIdx]);
const [activeWordIndex, setActiveWordIndex] = useState(-1);
useEffect(() => {
if (words.length === 0) return;
const interval = setInterval(() => {
const video = document.querySelector('video') as HTMLVideoElement | null;
if (!video) return;
const idx = getWordAtTime(video.currentTime);
setActiveWordIndex((prev) => (prev === idx ? prev : idx));
}, 250);
return () => clearInterval(interval);
}, [words, getWordAtTime]);
// Auto-scroll to active segment via Virtuoso
useEffect(() => {
if (activeWordIndex < 0 || segments.length === 0) return;
const segIdx = segments.findIndex((seg) => {
const start = seg.globalStartIndex ?? 0;
return activeWordIndex >= start && activeWordIndex < start + seg.words.length;
});
if (segIdx >= 0 && virtuosoRef.current) {
virtuosoRef.current.scrollIntoView({ index: segIdx, behavior: 'smooth', align: 'center' });
}
}, [activeWordIndex, segments]);
const handleWordMouseDown = useCallback(
(index: number, e: React.MouseEvent) => {
e.preventDefault();
// Ctrl+click → seek video to this word's start time
if (e.ctrlKey) {
const word = words[index];
if (word) {
const video = document.querySelector('video') as HTMLVideoElement | null;
if (video) video.currentTime = word.start;
}
return;
}
if (cutMode || muteMode || gainMode || speedMode) {
zoneDragStart.current = index;
setZoneDragRange({ start: index, end: index });
selectionStart.current = null;
return;
}
wasDragging.current = false;
if (e.shiftKey && selectedWordIndices.length > 0) {
const first = selectedWordIndices[0];
const start = Math.min(first, index);
const end = Math.max(first, index);
const indices = [];
for (let i = start; i <= end; i++) indices.push(i);
setSelectedWordIndices(indices);
} else {
selectionStart.current = index;
setSelectedWordIndices([index]);
}
},
[words, selectedWordIndices, setSelectedWordIndices, cutMode, muteMode, gainMode, speedMode],
);
const handleWordMouseEnter = useCallback(
(index: number) => {
setHoveredWordIndex(index);
if (zoneDragStart.current !== null) {
setZoneDragRange({
start: Math.min(zoneDragStart.current, index),
end: Math.max(zoneDragStart.current, index),
});
return;
}
if (selectionStart.current !== null) {
wasDragging.current = true;
const start = Math.min(selectionStart.current, index);
const end = Math.max(selectionStart.current, index);
const indices = [];
for (let i = start; i <= end; i++) indices.push(i);
setSelectedWordIndices(indices);
}
},
[setHoveredWordIndex, setSelectedWordIndices],
);
const handleMouseUp = useCallback(() => {
if (zoneDragStart.current !== null && zoneDragRange) {
const startWord = words[zoneDragRange.start];
const endWord = words[zoneDragRange.end];
if (startWord && endWord && canEdit) {
if (cutMode) addCutRange(startWord.start, endWord.end);
if (muteMode) addMuteRange(startWord.start, endWord.end);
if (gainMode) addGainRange(startWord.start, endWord.end, gainModeDb);
if (speedMode) addSpeedRange(startWord.start, endWord.end, speedModeValue);
}
}
zoneDragStart.current = null;
setZoneDragRange(null);
selectionStart.current = null;
}, [zoneDragRange, words, cutMode, muteMode, gainMode, gainModeDb, speedMode, speedModeValue, addCutRange, addMuteRange, addGainRange, addSpeedRange, canEdit]);
const handleClickOutside = useCallback(
(e: React.MouseEvent) => {
if (wasDragging.current) {
wasDragging.current = false;
return;
}
if ((e.target as HTMLElement).dataset.wordIndex === undefined) {
setSelectedWordIndices([]);
}
},
[setSelectedWordIndices],
);
const startEditing = useCallback((index: number) => {
const word = words[index];
if (!word) return;
setEditingWordIndex(index);
setEditText(word.word);
requestAnimationFrame(() => {
editInputRef.current?.focus();
editInputRef.current?.select();
});
}, [words]);
const commitEdit = useCallback(() => {
if (editingWordIndex === null) return;
const trimmed = editText.trim();
if (trimmed && trimmed !== words[editingWordIndex]?.word) {
updateWordText(editingWordIndex, trimmed);
}
setEditingWordIndex(null);
setEditText('');
}, [editingWordIndex, editText, words, updateWordText]);
const cancelEdit = useCallback(() => {
setEditingWordIndex(null);
setEditText('');
}, []);
const [isReTranscribing, setIsReTranscribing] = useState(false);
const reTranscribeGuard = useRef(false);
const handleReTranscribe = useCallback(async () => {
if (!videoPath || selectedWordIndices.length === 0 || reTranscribeGuard.current) return;
reTranscribeGuard.current = true;
setIsReTranscribing(true);
// Snapshot indices and word timings before the async gap
const sorted = [...selectedWordIndices].sort((a, b) => a - b);
const startWord = words[sorted[0]];
const endWord = words[sorted[sorted.length - 1]];
if (!startWord || !endWord) {
reTranscribeGuard.current = false;
setIsReTranscribing(false);
return;
}
try {
const res = await fetch(`${backendUrl}/transcribe/segment`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
file_path: videoPath,
start: startWord.start,
end: endWord.end,
}),
});
if (!res.ok) {
let detail = res.statusText;
try { const body = await res.json(); if (body?.detail) detail = String(body.detail); } catch { /* keep statusText fallback */ }
throw new Error(`Re-transcribe failed: ${detail}`);
}
const data = await res.json();
replaceWordRange(sorted[0], sorted[sorted.length - 1], data.words);
} catch (err) {
console.error('Re-transcribe error:', err);
alert(err instanceof Error ? err.message : 'Re-transcribe failed');
} finally {
reTranscribeGuard.current = false;
setIsReTranscribing(false);
}
}, [videoPath, selectedWordIndices, words, backendUrl, replaceWordRange]);
const handleWordDoubleClick = useCallback((index: number) => {
if (cutMode || muteMode || gainMode || speedMode) return;
if (!canEdit) return;
startEditing(index);
}, [cutMode, muteMode, gainMode, speedMode, startEditing, canEdit]);
// Focus edit input when it appears
useEffect(() => {
if (editingWordIndex !== null && editInputRef.current) {
editInputRef.current.focus();
editInputRef.current.select();
}
}, [editingWordIndex]);
// Global key handler for edit mode
useEffect(() => {
const onKeyDown = (e: KeyboardEvent) => {
if (editingWordIndex === null) return;
if (e.key === 'Enter') {
e.preventDefault();
commitEdit();
} else if (e.key === 'Escape') {
e.preventDefault();
cancelEdit();
}
};
window.addEventListener('keydown', onKeyDown);
return () => window.removeEventListener('keydown', onKeyDown);
}, [editingWordIndex, commitEdit, cancelEdit]);
const cutSelectedWords = useCallback(() => {
if (selectedWordIndices.length === 0) return;
const sorted = [...selectedWordIndices].sort((a, b) => a - b);
const startTime = words[sorted[0]].start;
const endTime = words[sorted[sorted.length - 1]].end;
addCutRange(startTime, endTime);
}, [selectedWordIndices, words, addCutRange]);
const muteSelectedWords = useCallback(() => {
if (selectedWordIndices.length === 0) return;
const sorted = [...selectedWordIndices].sort((a, b) => a - b);
const startTime = words[sorted[0]].start;
const endTime = words[sorted[sorted.length - 1]].end;
addMuteRange(startTime, endTime);
}, [selectedWordIndices, words, addMuteRange]);
const gainSelectedWords = useCallback(() => {
if (selectedWordIndices.length === 0) return;
const sorted = [...selectedWordIndices].sort((a, b) => a - b);
const startTime = words[sorted[0]].start;
const endTime = words[sorted[sorted.length - 1]].end;
addGainRange(startTime, endTime, gainModeDb);
}, [selectedWordIndices, words, addGainRange, gainModeDb]);
const speedSelectedWords = useCallback(() => {
if (selectedWordIndices.length === 0) return;
const sorted = [...selectedWordIndices].sort((a, b) => a - b);
const startTime = words[sorted[0]].start;
const endTime = words[sorted[sorted.length - 1]].end;
addSpeedRange(startTime, endTime, speedModeValue);
}, [selectedWordIndices, words, addSpeedRange, speedModeValue]);
const getCutRangeForWord = useCallback(
(wordIndex: number) => {
const word = words[wordIndex];
if (!word) return null;
return cutRanges.find((r) => word.start >= r.start && word.end <= r.end);
},
[words, cutRanges],
);
const getMuteRangeForWord = useCallback(
(wordIndex: number) => {
const word = words[wordIndex];
if (!word) return null;
return muteRanges.find((r) => word.start >= r.start && word.end <= r.end);
},
[words, muteRanges],
);
const getGainRangeForWord = useCallback(
(wordIndex: number) => {
const word = words[wordIndex];
if (!word) return null;
return gainRanges.find((r) => word.start >= r.start && word.end <= r.end);
},
[words, gainRanges],
);
const getSpeedRangeForWord = useCallback(
(wordIndex: number) => {
const word = words[wordIndex];
if (!word) return null;
return speedRanges.find((r) => word.start >= r.start && word.end <= r.end);
},
[words, speedRanges],
);
const renderSegment = useCallback(
(index: number) => {
const segment = segments[index];
if (!segment) return null;
return (
<div className="mb-3 px-4">
{segment.speaker && (
<div className="text-xs text-editor-accent font-medium mb-1">
{segment.speaker}
</div>
)}
<p className="text-sm leading-relaxed flex flex-wrap">
{segment.words.map((word, localIndex) => {
const globalIndex = (segment.globalStartIndex ?? 0) + localIndex;
const isSelected = selectedSet.has(globalIndex);
const isActive = globalIndex === activeWordIndex;
const isHovered = globalIndex === hoveredWordIndex;
const isZoneDragSelected = zoneDragRange
? globalIndex >= zoneDragRange.start && globalIndex <= zoneDragRange.end
: false;
const cutRange = getCutRangeForWord(globalIndex);
const muteRange = getMuteRangeForWord(globalIndex);
const gainRange = getGainRangeForWord(globalIndex);
const speedRange = getSpeedRangeForWord(globalIndex);
const isSearchMatch = matchSet.has(globalIndex);
const isActiveSearchMatch = matchIndices.length > 0 && matchIndices[safeActiveMatchIdx] === globalIndex;
const isEditing = globalIndex === editingWordIndex;
// Low-confidence highlighting
const CONFIDENCE_THRESHOLD_KEY = 'talkedit:confidenceThreshold';
const storedThreshold = typeof window !== 'undefined' ? Number(window.localStorage.getItem(CONFIDENCE_THRESHOLD_KEY)) : 0;
const confidenceThreshold = Number.isFinite(storedThreshold) ? storedThreshold : 0.6;
const isLowConfidence = word.confidence > 0 && word.confidence < confidenceThreshold && !cutRange && !muteRange && !gainRange && !speedRange;
const confidencePct = word.confidence > 0 ? Math.round(word.confidence * 100) : null;
return (
<span
key={globalIndex}
id={`word-${globalIndex}`}
data-word-index={globalIndex}
title={`${word.start.toFixed(2)}s — confidence: ${confidencePct !== null ? confidencePct + '%' : 'N/A'}${isLowConfidence ? ' ⚠️ Low confidence' : ''} — Ctrl+click to seek, double-click to edit`}
onMouseDown={(e) => handleWordMouseDown(globalIndex, e)}
onMouseEnter={() => handleWordMouseEnter(globalIndex)}
onMouseLeave={() => setHoveredWordIndex(null)}
onDoubleClick={() => handleWordDoubleClick(globalIndex)}
className={`
relative px-[2px] py-[1px] rounded cursor-pointer transition-colors
${cutRange ? 'bg-red-500/20 text-red-100' : ''}
${muteRange ? 'bg-blue-500/20 text-blue-100' : ''}
${gainRange ? 'bg-amber-500/20 text-amber-100' : ''}
${speedRange ? 'bg-emerald-500/20 text-emerald-100' : ''}
${isZoneDragSelected && cutMode ? 'bg-red-500/30 ring-1 ring-red-400/60' : ''}
${isZoneDragSelected && muteMode ? 'bg-blue-500/30 ring-1 ring-blue-400/60' : ''}
${isZoneDragSelected && gainMode ? 'bg-amber-500/30 ring-1 ring-amber-400/60' : ''}
${isZoneDragSelected && speedMode ? 'bg-emerald-500/30 ring-1 ring-emerald-400/60' : ''}
${isSearchMatch && !cutRange && !muteRange && !gainRange && !speedRange ? 'bg-editor-accent/15 ring-1 ring-editor-accent/35' : ''}
${isActiveSearchMatch && !cutRange && !muteRange && !gainRange && !speedRange ? 'bg-editor-accent/35 ring-1 ring-editor-accent text-white' : ''}
${isSelected && !cutRange && !muteRange && !gainRange && !speedRange ? 'bg-editor-word-selected text-white' : ''}
${isActive && !isSelected && !cutRange && !muteRange && !gainRange && !speedRange ? 'bg-editor-accent/20 text-editor-accent' : ''}
${isHovered && !isSelected && !isActive && !cutRange && !muteRange && !gainRange && !speedRange ? 'bg-editor-word-hover' : ''}
${isLowConfidence ? 'border-b border-dashed border-orange-400/60' : ''}
`}
>
{isEditing ? (
<input
ref={editInputRef}
value={editText}
onChange={(e) => setEditText(e.target.value)}
onBlur={commitEdit}
className="w-24 px-1 py-0 text-xs bg-editor-bg border border-editor-accent rounded text-editor-text focus:outline-none"
style={{ minWidth: `${Math.max(word.word.length * 8, 48)}px` }}
/>
) : (
<>{word.word}{' '}</>
)}
{(cutRange || muteRange || gainRange || speedRange) && isHovered && (
<button
onClick={(e) => {
e.stopPropagation();
if (cutRange) removeCutRange(cutRange.id);
if (muteRange) removeMuteRange(muteRange.id);
if (gainRange) removeGainRange(gainRange.id);
if (speedRange) removeSpeedRange(speedRange.id);
}}
className="absolute -top-5 left-1/2 -translate-x-1/2 flex items-center gap-0.5 px-1.5 py-0.5 bg-editor-surface border border-editor-border rounded text-[10px] text-editor-success whitespace-nowrap z-10"
>
<RotateCcw className="w-2.5 h-2.5" /> Restore
</button>
)}
</span>
);
})}
</p>
</div>
);
},
[segments, selectedSet, matchSet, matchIndices, safeActiveMatchIdx, activeWordIndex, hoveredWordIndex, handleWordMouseDown, handleWordMouseEnter, setHoveredWordIndex, getCutRangeForWord, getMuteRangeForWord, getGainRangeForWord, getSpeedRangeForWord, removeCutRange, removeMuteRange, removeGainRange, removeSpeedRange, zoneDragRange, cutMode, muteMode, gainMode, speedMode, editingWordIndex, editText, editInputRef, handleWordDoubleClick, commitEdit, setEditText],
);
return (
<div className="flex-1 flex flex-col min-h-0">
<div className="flex items-center justify-between gap-2 px-4 py-2 border-b border-editor-border shrink-0">
<div className="flex items-center gap-1.5">
<button
onClick={() => {
setSearchOpen(true);
requestAnimationFrame(() => searchInputRef.current?.focus());
}}
className="flex items-center gap-1 px-2 py-1 text-xs text-editor-text-muted hover:text-editor-text hover:bg-editor-surface rounded"
title="Find (Ctrl+F)"
>
<Search className="w-3 h-3" />
Find
</button>
{searchOpen && (
<div className="flex items-center gap-1.5 px-2 py-1 rounded border border-editor-border bg-editor-surface">
<input
ref={searchInputRef}
value={searchQuery}
onChange={(e) => {
setSearchQuery(e.target.value);
setActiveMatchIdx(0);
}}
placeholder="Search transcript"
className="w-40 bg-transparent text-xs text-editor-text focus:outline-none"
/>
<span className="text-[10px] text-editor-text-muted min-w-[52px] text-right">
{matchIndices.length === 0 ? '0/0' : `${safeActiveMatchIdx + 1}/${matchIndices.length}`}
</span>
<button
onClick={() => jumpToMatch(safeActiveMatchIdx - 1)}
className="p-0.5 rounded hover:bg-editor-bg text-editor-text-muted hover:text-editor-text"
title="Previous match (Shift+Enter)"
>
<ChevronUp className="w-3 h-3" />
</button>
<button
onClick={() => jumpToMatch(safeActiveMatchIdx + 1)}
className="p-0.5 rounded hover:bg-editor-bg text-editor-text-muted hover:text-editor-text"
title="Next match (Enter)"
>
<ChevronDown className="w-3 h-3" />
</button>
<button
onClick={() => setSearchOpen(false)}
className="p-0.5 rounded hover:bg-editor-bg text-editor-text-muted hover:text-editor-text"
title="Close search (Esc)"
>
<X className="w-3 h-3" />
</button>
</div>
)}
</div>
{selectedWordIndices.length > 0 && (
<div className="flex items-center gap-1">
<button
onClick={cutSelectedWords}
disabled={!canEdit}
className="flex items-center gap-1 px-2 py-1 text-xs bg-red-500/20 text-red-300 rounded hover:bg-red-500/30 transition-colors disabled:opacity-40"
>
<Scissors className="w-3 h-3" />
Cut
</button>
<button
onClick={muteSelectedWords}
disabled={!canEdit}
className="flex items-center gap-1 px-2 py-1 text-xs bg-blue-500/20 text-blue-300 rounded hover:bg-blue-500/30 transition-colors disabled:opacity-40"
>
<VolumeX className="w-3 h-3" />
Mute
</button>
<button
onClick={gainSelectedWords}
disabled={!canEdit}
className="flex items-center gap-1 px-2 py-1 text-xs bg-amber-500/20 text-amber-300 rounded hover:bg-amber-500/30 transition-colors disabled:opacity-40"
>
<SlidersHorizontal className="w-3 h-3" />
Gain ({gainModeDb > 0 ? '+' : ''}{gainModeDb.toFixed(1)} dB)
</button>
<button
onClick={speedSelectedWords}
disabled={!canEdit}
className="flex items-center gap-1 px-2 py-1 text-xs bg-emerald-500/20 text-emerald-300 rounded hover:bg-emerald-500/30 transition-colors disabled:opacity-40"
>
<Gauge className="w-3 h-3" />
Speed {speedModeValue.toFixed(2)}x
</button>
<button
onClick={handleReTranscribe}
disabled={isReTranscribing || !canEdit}
className="flex items-center gap-1 px-2 py-1 text-xs bg-purple-500/20 text-purple-300 rounded hover:bg-purple-500/30 disabled:opacity-40 transition-colors"
title="Re-run Whisper transcription on this segment"
>
<RefreshCw className={`w-3 h-3 ${isReTranscribing ? 'animate-spin' : ''}`} />
{isReTranscribing ? 'Re-transcribing...' : 'Re-transcribe'}
</button>
</div>
)}
</div>
<div
className="flex-1 min-h-0 select-none"
onMouseUp={handleMouseUp}
onClick={handleClickOutside}
>
<Virtuoso
ref={virtuosoRef}
totalCount={segments.length}
itemContent={renderSegment}
overscan={200}
className="h-full"
style={{ height: '100%' }}
/>
</div>
</div>
);
}