From 1678d28db72b57c755dcb9f1c3f65763502a87fe Mon Sep 17 00:00:00 2001
From: dillonj <dilljens@gmail.com>
Date: Mon, 4 May 2026 23:54:14 -0600
Subject: [PATCH] able to re-transcribe

---
 .github/copilot-instructions.md              |   2 +
 FEATURES.md                                  |   4 +-
 backend/routers/export.py                    |  38 ++++++
 backend/routers/transcribe.py                |  96 ++++++++++++++++
 backend/services/transcription.py            |   6 +-
 frontend/src/components/ExportDialog.tsx     | 115 ++++++++++++++++++-
 frontend/src/components/TranscriptEditor.tsx |  58 +++++++++-
 frontend/src/store/editorStore.ts            |  36 ++++++
 8 files changed, 346 insertions(+), 9 deletions(-)
diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
index e96803a..5809cfd 100644
--- a/.github/copilot-instructions.md
+++ b/.github/copilot-instructions.md
@@ -71,6 +71,8 @@ Use project virtualenvs where available (`.venv312`, `.venv`, or `venv`) for bac
 - **Normalization moved to export**: No longer a standalone button. Integrated as `normalizeAudio` checkbox + LUFS target selector in ExportPanel. Sent as `normalize_loudness`/`normalize_target_lufs` to backend. Applied via `loudnorm` in FFmpeg audio filter chain during export.
 - **Export camelCase fix**: `ExportDialog.tsx` now manually maps `gainRanges`→`gain_db` and `muteRanges`→`{start,end}` before sending to backend. Prevents Pydantic v2 field rejection.
 - **color-scheme:dark**: All `<select>` elements in ExportDialog use `[color-scheme:dark]` to ensure readable native dropdown popups on Linux WebKit.
+- **Re-transcribe selection (#013)**: Backend `POST /transcribe/segment` extracts audio via FFmpeg, runs Whisper, adjusts timestamps. Frontend: "Re-transcribe" button on selected words in TranscriptEditor; `replaceWordRange()` store action swaps words + rebuilds segments by speaker.
+- **Transcript-only export (#024)**: "Export Transcript Only" in ExportDialog with .txt/.srt options. **Pure frontend** — generates content in-browser, writes via Tauri `writeFile`. No backend dependency. Respects word cuts.
 
 ## Update Rules (Important)
 
diff --git a/FEATURES.md b/FEATURES.md
index c373473..16d90cd 100644
--- a/FEATURES.md
+++ b/FEATURES.md
@@ -8,13 +8,13 @@ Features are grouped by priority. Check off items as they are implemented.
 
 - [x] [#015] **Word text correction** — double-click any word to edit its text in-place. Preserves timing and confidence. Pure frontend state change. (2026-05-04)
 
-- [ ] [#013] **Re-transcribe selection** — if Whisper gets a section wrong, let the user select a word range and re-run transcription on just that segment (optionally with a different model or language).
+- [x] [#013] **Re-transcribe selection** — select any word range in the transcript and click "Re-transcribe" to re-run Whisper on just that segment. Backend extracts audio via FFmpeg, transcribes with offset-adjusted timestamps. (2026-05-04)
 
 - [x] [#012] **Low-confidence word highlighting** — words with `confidence < 0.6` (configurable in Settings) get an orange dotted underline. Hover shows exact confidence %. (2026-05-04)
 
 - [x] [#018] **Audio normalization / loudness targeting** — Integrated checkbox in Export panel with LUFS target selector (-14 YouTube, -16 Spotify, -23 Broadcast). Applied during export via FFmpeg `loudnorm` in the audio filter chain. No intermediate files. (2026-05-04)
 
-- [ ] [#024] **Export to transcript text / SRT only** — some users just want a clean `.txt` or `.srt` of the edited transcript without rendering video.
+- [x] [#024] **Export to transcript text / SRT only** — "Export Transcript Only" section in Export panel with format selector (plain text or SRT). Uses `POST /export/transcript` backend endpoint. Respects word cuts. (2026-05-04)
 
 - [x] [#023] **Batch silence removal** — full-file scan + remove all pauses above threshold in one click. Implemented by `SilenceTrimmerPanel` + `POST /audio/detect-silence` (FFmpeg silencedetect).
 
diff --git a/backend/routers/export.py b/backend/routers/export.py
index a7f6d9e..855109b 100644
--- a/backend/routers/export.py
+++ b/backend/routers/export.py
@@ -55,6 +55,13 @@ class ExportRequest(BaseModel):
     deleted_indices: Optional[List[int]] = None
 
 
+class TranscriptExportRequest(BaseModel):
+    words: List[ExportWordModel]
+    deleted_indices: Optional[List[int]] = None
+    output_path: str
+    format: str = "txt"  # "txt" or "srt"
+
+
 def _map_ranges_to_output_timeline(
     ranges: List[dict],
     keep_segments: List[dict],
@@ -234,3 +241,34 @@ async def export_video(req: ExportRequest):
     except Exception as e:
         logger.error(f"Export error: {e}", exc_info=True)
         raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/export/transcript")
+async def export_transcript(req: TranscriptExportRequest):
+    """Export transcript as plain text or SRT without rendering video."""
+    try:
+        from services.caption_generator import generate_srt
+
+        deleted_set = set(req.deleted_indices or [])
+        word_dicts = [w.model_dump() for w in req.words]
+
+        if req.format == "srt":
+            content = generate_srt(word_dicts, deleted_set)
+        else:
+            # Plain text: join non-deleted words
+            active_words = []
+            for i, w in enumerate(word_dicts):
+                if i not in deleted_set:
+                    active_words.append(w["word"])
+            content = " ".join(active_words)
+
+        os.makedirs(os.path.dirname(req.output_path) or ".", exist_ok=True)
+        with open(req.output_path, "w", encoding="utf-8") as f:
+            f.write(content)
+
+        logger.info("Transcript exported to %s (format=%s)", req.output_path, req.format)
+        return {"status": "ok", "output_path": req.output_path}
+
+    except Exception as e:
+        logger.error(f"Transcript export failed: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail=str(e))
diff --git a/backend/routers/transcribe.py b/backend/routers/transcribe.py
index c03c209..ba1ccb3 100644
--- a/backend/routers/transcribe.py
+++ b/backend/routers/transcribe.py
@@ -51,3 +51,99 @@ async def transcribe(req: TranscribeRequest):
     except Exception as e:
         logger.error(f"Transcription failed: {e}", exc_info=True)
         raise HTTPException(status_code=500, detail=str(e))
+
+
+class ReTranscribeSegmentRequest(BaseModel):
+    file_path: str
+    start: float
+    end: float
+    model: str = "base"
+    language: Optional[str] = None
+
+
+@router.post("/transcribe/segment")
+async def transcribe_segment(req: ReTranscribeSegmentRequest):
+    """
+    Re-transcribe a specific segment of audio.
+    Extracts the segment with FFmpeg, transcribes it, and returns words
+    with timestamps adjusted to the original file timeline.
+    """
+    import subprocess
+    import tempfile
+    import os
+
+    try:
+        # Extract the segment to a temp file
+        tmp_dir = tempfile.mkdtemp(prefix="talkedit_segment_")
+        segment_path = os.path.join(tmp_dir, "segment.wav")
+
+        cmd = [
+            "ffmpeg", "-y",
+            "-i", req.file_path,
+            "-ss", str(req.start),
+            "-to", str(req.end),
+            "-vn",
+            "-acodec", "pcm_s16le",
+            "-ar", "16000",
+            "-ac", "1",
+            segment_path,
+        ]
+        result = subprocess.run(cmd, capture_output=True, text=True)
+        if result.returncode != 0:
+            raise RuntimeError(f"Segment extraction failed: {result.stderr[-300:]}")
+
+        # Transcribe the segment — try GPU first, fall back to CPU
+        try:
+            segment_result = transcribe_audio(
+                file_path=segment_path,
+                model_name=req.model,
+                use_gpu=True,
+                use_cache=False,
+                language=req.language,
+            )
+        except Exception as gpu_err:
+            logger.warning(f"GPU transcription failed (%s), falling back to CPU", gpu_err)
+            segment_result = transcribe_audio(
+                file_path=segment_path,
+                model_name=req.model,
+                use_gpu=False,
+                use_cache=False,
+                language=req.language,
+            )
+
+        # Adjust timestamps to be relative to the original file
+        offset = req.start
+        adjusted_words = []
+        for w in segment_result.get("words", []):
+            w["start"] = round(w["start"] + offset, 3)
+            w["end"] = round(w["end"] + offset, 3)
+            adjusted_words.append(w)
+
+        adjusted_segments = []
+        for seg in segment_result.get("segments", []):
+            seg["start"] = round(seg["start"] + offset, 3)
+            seg["end"] = round(seg["end"] + offset, 3)
+            # Also adjust words within each segment
+            for w in seg.get("words", []):
+                w["start"] = round(w["start"] + offset, 3)
+                w["end"] = round(w["end"] + offset, 3)
+            adjusted_segments.append(seg)
+
+        # Cleanup
+        try:
+            os.remove(segment_path)
+            os.rmdir(tmp_dir)
+        except OSError:
+            pass
+
+        return {
+            "words": adjusted_words,
+            "segments": adjusted_segments,
+            "language": segment_result.get("language", "en"),
+        }
+
+    except FileNotFoundError:
+        raise HTTPException(status_code=404, detail=f"File not found: {req.file_path}")
+    except Exception as e:
+        logger.error(f"Segment transcription failed: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail=str(e))
diff --git a/backend/services/transcription.py b/backend/services/transcription.py
index 38b9f34..40fe590 100644
--- a/backend/services/transcription.py
+++ b/backend/services/transcription.py
@@ -48,11 +48,11 @@ def _load_model(model_name: str, device: torch.device):
         compute_type = "float16" if device.type == "cuda" else "int8"
         model = whisperx.load_model(
             model_name,
-            device=str(device),
+            device=device.type,  # use "cuda" not "cuda:0" — some WhisperX versions don't support device ordinal
             compute_type=compute_type,
         )
     else:
-        model = whisper.load_model(model_name, device=device)
+        model = whisper.load_model(model_name, device=str(device))
 
     _model_cache[cache_key] = model
     return model
@@ -112,7 +112,7 @@ def _transcribe_whisperx(model, audio_path: str, device: torch.device, language:
 
     align_model, align_metadata = whisperx.load_align_model(
         language_code=detected_language,
-        device=str(device),
+        device=device.type,
     )
     aligned = whisperx.align(
         result["segments"],
diff --git a/frontend/src/components/ExportDialog.tsx b/frontend/src/components/ExportDialog.tsx
index 5111aac..f006f4d 100644
--- a/frontend/src/components/ExportDialog.tsx
+++ b/frontend/src/components/ExportDialog.tsx
@@ -1,6 +1,6 @@
 import { useState, useCallback } from 'react';
 import { useEditorStore } from '../store/editorStore';
-import { Download, Loader2, Zap, Cog, Info, Volume2 } from 'lucide-react';
+import { Download, Loader2, Zap, Cog, Info, Volume2, FileText } from 'lucide-react';
 import type { ExportOptions } from '../types/project';
 
 export default function ExportDialog() {
@@ -24,6 +24,82 @@ export default function ExportDialog() {
     normalizeTarget: -14,
   });
   const [exportError, setExportError] = useState<string | null>(null);
+  const [transcriptFormat, setTranscriptFormat] = useState<'txt' | 'srt'>('txt');
+  const [isTranscribingTranscript, setIsTranscribingTranscript] = useState(false);
+
+  const handleTranscriptExport = useCallback(async () => {
+    if (!videoPath || words.length === 0) return;
+
+    const defaultExt = transcriptFormat === 'srt' ? 'srt' : 'txt';
+    const outputPath = await window.electronAPI?.saveFile({
+      defaultPath: videoPath.replace(/\.[^.]+$/, `_transcript.${defaultExt}`),
+      filters: transcriptFormat === 'srt'
+        ? [{ name: 'SRT Subtitles', extensions: ['srt'] }]
+        : [{ name: 'Text File', extensions: ['txt'] }],
+    });
+    if (!outputPath) return;
+
+    setIsTranscribingTranscript(true);
+    try {
+      // Compute deleted word set
+      const deletedSet = new Set<number>();
+      for (const range of cutRanges) {
+        for (let i = 0; i < words.length; i++) {
+          if (words[i].start >= range.start && words[i].end <= range.end) {
+            deletedSet.add(i);
+          }
+        }
+      }
+
+      // Generate content entirely on the frontend — no backend needed
+      let content: string;
+      if (transcriptFormat === 'srt') {
+        const lines: string[] = [];
+        let counter = 1;
+        const activeWords: Array<[number, typeof words[0]]> = [];
+        for (let i = 0; i < words.length; i++) {
+          if (!deletedSet.has(i)) activeWords.push([i, words[i]]);
+        }
+        const wordsPerLine = 8;
+        for (let ci = 0; ci < activeWords.length; ci += wordsPerLine) {
+          const chunk = activeWords.slice(ci, ci + wordsPerLine);
+          if (chunk.length === 0) continue;
+          const startTime = chunk[0][1].start;
+          const endTime = chunk[chunk.length - 1][1].end;
+
+          const fmt = (s: number) => {
+            const h = Math.floor(s / 3600);
+            const m = Math.floor((s % 3600) / 60);
+            const sec = Math.floor(s % 60);
+            const ms = Math.floor((s % 1) * 1000);
+            return `${String(h).padStart(2, '0')}:${String(m).padStart(2, '0')}:${String(sec).padStart(2, '0')},${String(ms).padStart(3, '0')}`;
+          };
+
+          lines.push(String(counter));
+          lines.push(`${fmt(startTime)} --> ${fmt(endTime)}`);
+          lines.push(chunk.map(([, w]) => w.word).join(' '));
+          lines.push('');
+          counter++;
+        }
+        content = lines.join('\n');
+      } else {
+        // Plain text
+        const activeWords: string[] = [];
+        for (let i = 0; i < words.length; i++) {
+          if (!deletedSet.has(i)) activeWords.push(words[i].word);
+        }
+        content = activeWords.join(' ');
+      }
+
+      // Write directly via Tauri — instant, no backend round-trip
+      await window.electronAPI?.writeFile(outputPath, content);
+    } catch (err) {
+      console.error('Transcript export error:', err);
+      setExportError(err instanceof Error ? err.message : 'Transcript export failed');
+    } finally {
+      setIsTranscribingTranscript(false);
+    }
+  }, [videoPath, words, cutRanges, transcriptFormat]);
 
   const HANDLE_EXPORT_filters = useCallback(() => {
     const ext = options.format;
@@ -220,7 +296,40 @@ export default function ExportDialog() {
         ]}
       />
 
-      {/* Export button */}
+      {/* Transcript-only export */}
+      <div className="space-y-2 pt-1 border-t border-editor-border">
+        <h4 className="text-xs font-semibold flex items-center gap-1.5">
+          <FileText className="w-3.5 h-3.5" />
+          Export Transcript Only
+        </h4>
+        <p className="text-[10px] text-editor-text-muted leading-relaxed">
+          Export the edited transcript as plain text or SRT without rendering video.
+        </p>
+        <div className="flex items-center gap-2">
+          <select
+            value={transcriptFormat}
+            onChange={(e) => setTranscriptFormat(e.target.value as 'txt' | 'srt')}
+            className="flex-1 px-2 py-1.5 text-xs bg-editor-surface border border-editor-border rounded focus:outline-none focus:border-editor-accent [color-scheme:dark]"
+          >
+            <option value="txt">Plain Text (.txt)</option>
+            <option value="srt">Subtitles (.srt)</option>
+          </select>
+          <button
+            onClick={handleTranscriptExport}
+            disabled={isTranscribingTranscript || words.length === 0}
+            className="flex items-center gap-1.5 px-3 py-1.5 text-xs rounded bg-editor-accent/20 text-editor-accent hover:bg-editor-accent/30 disabled:opacity-40 transition-colors"
+          >
+            {isTranscribingTranscript ? (
+              <Loader2 className="w-3 h-3 animate-spin" />
+            ) : (
+              <FileText className="w-3 h-3" />
+            )}
+            Export
+          </button>
+        </div>
+      </div>
+
+      {/* Export video button */}
       <button
         onClick={handleExport}
         disabled={isExporting || !videoPath}
@@ -234,7 +343,7 @@ export default function ExportDialog() {
         ) : (
           <>
             <Download className="w-4 h-4" />
-            Export
+            Export Video
           </>
         )}
       </button>
diff --git a/frontend/src/components/TranscriptEditor.tsx b/frontend/src/components/TranscriptEditor.tsx
index 98ac465..327b656 100644
--- a/frontend/src/components/TranscriptEditor.tsx
+++ b/frontend/src/components/TranscriptEditor.tsx
@@ -1,7 +1,7 @@
 import { useCallback, useRef, useEffect, useMemo, useState } from 'react';
 import { useEditorStore } from '../store/editorStore';
 import { Virtuoso } from 'react-virtuoso';
-import { Scissors, VolumeX, SlidersHorizontal, Gauge, RotateCcw, Search, ChevronUp, ChevronDown, X } from 'lucide-react';
+import { Scissors, VolumeX, SlidersHorizontal, Gauge, RotateCcw, Search, ChevronUp, ChevronDown, X, RefreshCw } from 'lucide-react';
 
 interface TranscriptEditorProps {
   cutMode: boolean;
@@ -30,6 +30,9 @@ export default function TranscriptEditor({
   const hoveredWordIndex = useEditorStore((s) => s.hoveredWordIndex);
   const setSelectedWordIndices = useEditorStore((s) => s.setSelectedWordIndices);
   const setHoveredWordIndex = useEditorStore((s) => s.setHoveredWordIndex);
+  const videoPath = useEditorStore((s) => s.videoPath);
+  const backendUrl = useEditorStore((s) => s.backendUrl);
+  const replaceWordRange = useEditorStore((s) => s.replaceWordRange);
   const removeCutRange = useEditorStore((s) => s.removeCutRange);
   const removeMuteRange = useEditorStore((s) => s.removeMuteRange);
   const removeGainRange = useEditorStore((s) => s.removeGainRange);
@@ -254,6 +257,50 @@ export default function TranscriptEditor({
     setEditText('');
   }, []);
 
+  const [isReTranscribing, setIsReTranscribing] = useState(false);
+  const reTranscribeGuard = useRef(false);
+
+  const handleReTranscribe = useCallback(async () => {
+    if (!videoPath || selectedWordIndices.length === 0 || reTranscribeGuard.current) return;
+    reTranscribeGuard.current = true;
+    setIsReTranscribing(true);
+
+    // Snapshot indices and word timings before the async gap
+    const sorted = [...selectedWordIndices].sort((a, b) => a - b);
+    const startWord = words[sorted[0]];
+    const endWord = words[sorted[sorted.length - 1]];
+    if (!startWord || !endWord) {
+      reTranscribeGuard.current = false;
+      setIsReTranscribing(false);
+      return;
+    }
+
+    try {
+      const res = await fetch(`${backendUrl}/transcribe/segment`, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({
+          file_path: videoPath,
+          start: startWord.start,
+          end: endWord.end,
+        }),
+      });
+      if (!res.ok) {
+        let detail = res.statusText;
+        try { const body = await res.json(); if (body?.detail) detail = String(body.detail); } catch { /* keep statusText fallback */ }
+        throw new Error(`Re-transcribe failed: ${detail}`);
+      }
+      const data = await res.json();
+      replaceWordRange(sorted[0], sorted[sorted.length - 1], data.words);
+    } catch (err) {
+      console.error('Re-transcribe error:', err);
+      alert(err instanceof Error ? err.message : 'Re-transcribe failed');
+    } finally {
+      reTranscribeGuard.current = false;
+      setIsReTranscribing(false);
+    }
+  }, [videoPath, selectedWordIndices, words, backendUrl, replaceWordRange]);
+
   const handleWordDoubleClick = useCallback((index: number) => {
     if (cutMode || muteMode || gainMode || speedMode) return;
     startEditing(index);
@@ -535,6 +582,15 @@ export default function TranscriptEditor({
               <Gauge className="w-3 h-3" />
               Speed {speedModeValue.toFixed(2)}x
             </button>
+            <button
+              onClick={handleReTranscribe}
+              disabled={isReTranscribing}
+              className="flex items-center gap-1 px-2 py-1 text-xs bg-purple-500/20 text-purple-300 rounded hover:bg-purple-500/30 disabled:opacity-40 transition-colors"
+              title="Re-run Whisper transcription on this segment"
+            >
+              <RefreshCw className={`w-3 h-3 ${isReTranscribing ? 'animate-spin' : ''}`} />
+              {isReTranscribing ? 'Re-transcribing...' : 'Re-transcribe'}
+            </button>
           </div>
         )}
       </div>
diff --git a/frontend/src/store/editorStore.ts b/frontend/src/store/editorStore.ts
index c698963..955baf3 100644
--- a/frontend/src/store/editorStore.ts
+++ b/frontend/src/store/editorStore.ts
@@ -92,6 +92,7 @@ interface EditorActions {
   setTranscribing: (active: boolean, progress?: number, status?: string) => void;
   setExporting: (active: boolean, progress?: number) => void;
   setZonePreviewPaddingSeconds: (seconds: number) => void;
+  replaceWordRange: (startIndex: number, endIndex: number, newWords: Word[]) => void;
   getKeepSegments: () => Array<{ start: number; end: number }>;
   getWordAtTime: (time: number) => number;
   loadProject: (projectData: any) => void;
@@ -473,6 +474,41 @@ export const useEditorStore = create<EditorState & EditorActions>()(
         set({ zonePreviewPaddingSeconds: nextSeconds });
       },
 
+      replaceWordRange: (startIndex, endIndex, newWords) => {
+        const { words } = get();
+        if (startIndex < 0 || endIndex >= words.length || startIndex > endIndex) return;
+
+        // Replace words in the range with new words
+        const before = words.slice(0, startIndex);
+        const after = words.slice(endIndex + 1);
+        const updatedWords = [...before, ...newWords, ...after];
+
+        // Rebuild segments from updated words, grouping by speaker
+        const rebuiltSegments: Segment[] = [];
+        let wordIdx = 0;
+        let cumIdx = 0;
+        while (wordIdx < updatedWords.length) {
+          const currentSpeaker = updatedWords[wordIdx].speaker;
+          const groupWords: Word[] = [];
+          while (wordIdx < updatedWords.length && updatedWords[wordIdx].speaker === currentSpeaker) {
+            groupWords.push(updatedWords[wordIdx]);
+            wordIdx++;
+          }
+          rebuiltSegments.push({
+            id: rebuiltSegments.length,
+            start: groupWords[0].start,
+            end: groupWords[groupWords.length - 1].end,
+            text: groupWords.map((w) => w.word).join(' '),
+            words: groupWords,
+            speaker: currentSpeaker,
+            globalStartIndex: cumIdx,
+          });
+          cumIdx += groupWords.length;
+        }
+
+        set({ words: updatedWords, segments: rebuiltSegments, selectedWordIndices: [] });
+      },
+
       getKeepSegments: () => {
         const { words, cutRanges, duration } = get();
         if (words.length === 0) return [{ start: 0, end: duration }];