Initial CutScript release - Open-source AI-powered text-based video editor
CutScript is a local-first, Descript-like video editor where you edit video by editing text. Delete a word from the transcript and it's cut from the video. Features: - Word-level transcription with WhisperX - Text-based video editing with undo/redo - AI filler word removal (Ollama/OpenAI/Claude) - AI clip creation for shorts - Waveform timeline with virtualized transcript - FFmpeg stream-copy (fast) and re-encode (4K) export - Caption burn-in and sidecar SRT generation - Studio Sound audio enhancement (DeepFilterNet) - Keyboard shortcuts (J/K/L, Space, Delete, Ctrl+Z/S/E) - Encrypted API key storage - Project save/load (.aive files) Architecture: - Electron + React + Tailwind (frontend) - FastAPI + Python (backend) - WhisperX for transcription - FFmpeg for video processing - Multi-provider AI support Performance optimizations: - RAF-throttled time updates - Zustand selectors for granular subscriptions - Dual-canvas waveform rendering - Virtualized transcript with react-virtuoso Built on top of DataAnts-AI/VideoTranscriber, completely rewritten as a desktop application. License: MIT
This commit is contained in:
148
backend/services/caption_generator.py
Normal file
148
backend/services/caption_generator.py
Normal file
@ -0,0 +1,148 @@
|
||||
"""
|
||||
Generate caption files (SRT, VTT, ASS) from word-level timestamps.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _format_srt_time(seconds: float) -> str:
|
||||
h = int(seconds // 3600)
|
||||
m = int((seconds % 3600) // 60)
|
||||
s = int(seconds % 60)
|
||||
ms = int((seconds % 1) * 1000)
|
||||
return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}"
|
||||
|
||||
|
||||
def _format_vtt_time(seconds: float) -> str:
|
||||
h = int(seconds // 3600)
|
||||
m = int((seconds % 3600) // 60)
|
||||
s = int(seconds % 60)
|
||||
ms = int((seconds % 1) * 1000)
|
||||
return f"{h:02d}:{m:02d}:{s:02d}.{ms:03d}"
|
||||
|
||||
|
||||
def _format_ass_time(seconds: float) -> str:
|
||||
h = int(seconds // 3600)
|
||||
m = int((seconds % 3600) // 60)
|
||||
s = int(seconds % 60)
|
||||
cs = int((seconds % 1) * 100)
|
||||
return f"{h}:{m:02d}:{s:02d}.{cs:02d}"
|
||||
|
||||
|
||||
def generate_srt(
|
||||
words: List[dict],
|
||||
deleted_indices: Optional[set] = None,
|
||||
words_per_line: int = 8,
|
||||
) -> str:
|
||||
"""Generate SRT caption content from word-level timestamps."""
|
||||
deleted_indices = deleted_indices or set()
|
||||
active_words = [(i, w) for i, w in enumerate(words) if i not in deleted_indices]
|
||||
|
||||
lines = []
|
||||
counter = 1
|
||||
for chunk_start in range(0, len(active_words), words_per_line):
|
||||
chunk = active_words[chunk_start:chunk_start + words_per_line]
|
||||
if not chunk:
|
||||
continue
|
||||
|
||||
start_time = chunk[0][1]["start"]
|
||||
end_time = chunk[-1][1]["end"]
|
||||
text = " ".join(w["word"] for _, w in chunk)
|
||||
|
||||
lines.append(str(counter))
|
||||
lines.append(f"{_format_srt_time(start_time)} --> {_format_srt_time(end_time)}")
|
||||
lines.append(text)
|
||||
lines.append("")
|
||||
counter += 1
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def generate_vtt(
|
||||
words: List[dict],
|
||||
deleted_indices: Optional[set] = None,
|
||||
words_per_line: int = 8,
|
||||
) -> str:
|
||||
"""Generate WebVTT caption content."""
|
||||
deleted_indices = deleted_indices or set()
|
||||
active_words = [(i, w) for i, w in enumerate(words) if i not in deleted_indices]
|
||||
|
||||
lines = ["WEBVTT", ""]
|
||||
for chunk_start in range(0, len(active_words), words_per_line):
|
||||
chunk = active_words[chunk_start:chunk_start + words_per_line]
|
||||
if not chunk:
|
||||
continue
|
||||
|
||||
start_time = chunk[0][1]["start"]
|
||||
end_time = chunk[-1][1]["end"]
|
||||
text = " ".join(w["word"] for _, w in chunk)
|
||||
|
||||
lines.append(f"{_format_vtt_time(start_time)} --> {_format_vtt_time(end_time)}")
|
||||
lines.append(text)
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def generate_ass(
|
||||
words: List[dict],
|
||||
deleted_indices: Optional[set] = None,
|
||||
words_per_line: int = 8,
|
||||
style: Optional[dict] = None,
|
||||
) -> str:
|
||||
"""Generate ASS subtitle content with styling."""
|
||||
deleted_indices = deleted_indices or set()
|
||||
active_words = [(i, w) for i, w in enumerate(words) if i not in deleted_indices]
|
||||
|
||||
s = style or {}
|
||||
font = s.get("fontName", "Arial")
|
||||
size = s.get("fontSize", 48)
|
||||
color = s.get("fontColor", "&H00FFFFFF")
|
||||
bold = "-1" if s.get("bold", True) else "0"
|
||||
alignment = 2
|
||||
|
||||
header = f"""[Script Info]
|
||||
Title: AI Video Editor Captions
|
||||
ScriptType: v4.00+
|
||||
PlayResX: 1920
|
||||
PlayResY: 1080
|
||||
|
||||
[V4+ Styles]
|
||||
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
|
||||
Style: Default,{font},{size},{color},&H000000FF,&H00000000,&H80000000,{bold},0,0,0,100,100,0,0,1,2,1,{alignment},20,20,40,1
|
||||
|
||||
[Events]
|
||||
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
"""
|
||||
|
||||
events = []
|
||||
for chunk_start in range(0, len(active_words), words_per_line):
|
||||
chunk = active_words[chunk_start:chunk_start + words_per_line]
|
||||
if not chunk:
|
||||
continue
|
||||
|
||||
start_time = chunk[0][1]["start"]
|
||||
end_time = chunk[-1][1]["end"]
|
||||
text = " ".join(w["word"] for _, w in chunk)
|
||||
|
||||
events.append(
|
||||
f"Dialogue: 0,{_format_ass_time(start_time)},{_format_ass_time(end_time)},Default,,0,0,0,,{text}"
|
||||
)
|
||||
|
||||
return header + "\n".join(events) + "\n"
|
||||
|
||||
|
||||
def save_captions(
|
||||
content: str,
|
||||
output_path: str,
|
||||
) -> str:
|
||||
"""Write caption content to a file."""
|
||||
output_path = Path(output_path)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
output_path.write_text(content, encoding="utf-8")
|
||||
logger.info(f"Saved captions to {output_path}")
|
||||
return str(output_path)
|
||||
Reference in New Issue
Block a user