TalkEdit/backend/routers/transcribe.py

"""Transcription endpoint using WhisperX."""

import logging
from typing import Optional

from fastapi import APIRouter, HTTPException
from pydantic import BaseModel

from services.transcription import transcribe_audio
from services.diarization import diarize_and_label

logger = logging.getLogger(__name__)
router = APIRouter()


class TranscribeRequest(BaseModel):
    file_path: str
    model: str = "base"
    language: Optional[str] = None
    use_gpu: bool = True
    use_cache: bool = True
    diarize: bool = False
    hf_token: Optional[str] = None
    num_speakers: Optional[int] = None


@router.post("/transcribe")
async def transcribe(req: TranscribeRequest):
    try:
        result = transcribe_audio(
            file_path=req.file_path,
            model_name=req.model,
            use_gpu=req.use_gpu,
            use_cache=req.use_cache,
            language=req.language,
        )

        if req.diarize and req.hf_token:
            result = diarize_and_label(
                transcription_result=result,
                audio_path=req.file_path,
                hf_token=req.hf_token,
                num_speakers=req.num_speakers,
                use_gpu=req.use_gpu,
            )

        return result

    except FileNotFoundError:
        raise HTTPException(status_code=404, detail=f"File not found: {req.file_path}")
    except Exception as e:
        logger.error(f"Transcription failed: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))
Initial CutScript release - Open-source AI-powered text-based video editor CutScript is a local-first, Descript-like video editor where you edit video by editing text. Delete a word from the transcript and it's cut from the video. Features: - Word-level transcription with WhisperX - Text-based video editing with undo/redo - AI filler word removal (Ollama/OpenAI/Claude) - AI clip creation for shorts - Waveform timeline with virtualized transcript - FFmpeg stream-copy (fast) and re-encode (4K) export - Caption burn-in and sidecar SRT generation - Studio Sound audio enhancement (DeepFilterNet) - Keyboard shortcuts (J/K/L, Space, Delete, Ctrl+Z/S/E) - Encrypted API key storage - Project save/load (.aive files) Architecture: - Electron + React + Tailwind (frontend) - FastAPI + Python (backend) - WhisperX for transcription - FFmpeg for video processing - Multi-provider AI support Performance optimizations: - RAF-throttled time updates - Zustand selectors for granular subscriptions - Dual-canvas waveform rendering - Virtualized transcript with react-virtuoso Built on top of DataAnts-AI/VideoTranscriber, completely rewritten as a desktop application. License: MIT 2026-03-03 06:31:04 -05:00			`"""Transcription endpoint using WhisperX."""`

			`import logging`
			`from typing import Optional`

			`from fastapi import APIRouter, HTTPException`
			`from pydantic import BaseModel`

			`from services.transcription import transcribe_audio`
			`from services.diarization import diarize_and_label`

			`logger = logging.getLogger(__name__)`
			`router = APIRouter()`


			`class TranscribeRequest(BaseModel):`
			`file_path: str`
			`model: str = "base"`
			`language: Optional[str] = None`
			`use_gpu: bool = True`
			`use_cache: bool = True`
			`diarize: bool = False`
			`hf_token: Optional[str] = None`
			`num_speakers: Optional[int] = None`


			`@router.post("/transcribe")`
			`async def transcribe(req: TranscribeRequest):`
			`try:`
			`result = transcribe_audio(`
			`file_path=req.file_path,`
			`model_name=req.model,`
			`use_gpu=req.use_gpu,`
			`use_cache=req.use_cache,`
			`language=req.language,`
			`)`

			`if req.diarize and req.hf_token:`
			`result = diarize_and_label(`
			`transcription_result=result,`
			`audio_path=req.file_path,`
			`hf_token=req.hf_token,`
			`num_speakers=req.num_speakers,`
			`use_gpu=req.use_gpu,`
			`)`

			`return result`

			`except FileNotFoundError:`
			`raise HTTPException(status_code=404, detail=f"File not found: {req.file_path}")`
			`except Exception as e:`
			`logger.error(f"Transcription failed: {e}", exc_info=True)`
			`raise HTTPException(status_code=500, detail=str(e))`