Initial CutScript release - Open-source AI-powered text-based video editor
CutScript is a local-first, Descript-like video editor where you edit video by editing text. Delete a word from the transcript and it's cut from the video. Features: - Word-level transcription with WhisperX - Text-based video editing with undo/redo - AI filler word removal (Ollama/OpenAI/Claude) - AI clip creation for shorts - Waveform timeline with virtualized transcript - FFmpeg stream-copy (fast) and re-encode (4K) export - Caption burn-in and sidecar SRT generation - Studio Sound audio enhancement (DeepFilterNet) - Keyboard shortcuts (J/K/L, Space, Delete, Ctrl+Z/S/E) - Encrypted API key storage - Project save/load (.aive files) Architecture: - Electron + React + Tailwind (frontend) - FastAPI + Python (backend) - WhisperX for transcription - FFmpeg for video processing - Multi-provider AI support Performance optimizations: - RAF-throttled time updates - Zustand selectors for granular subscriptions - Dual-canvas waveform rendering - Virtualized transcript with react-virtuoso Built on top of DataAnts-AI/VideoTranscriber, completely rewritten as a desktop application. License: MIT
This commit is contained in:
0
backend/routers/__init__.py
Normal file
0
backend/routers/__init__.py
Normal file
83
backend/routers/ai.py
Normal file
83
backend/routers/ai.py
Normal file
@ -0,0 +1,83 @@
|
||||
"""AI feature endpoints: filler word detection, clip creation, Ollama model listing."""
|
||||
|
||||
import logging
|
||||
from typing import List, Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
||||
from services.ai_provider import AIProvider, detect_filler_words, create_clip_suggestion
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
class WordInfo(BaseModel):
|
||||
index: int
|
||||
word: str
|
||||
start: Optional[float] = None
|
||||
end: Optional[float] = None
|
||||
|
||||
|
||||
class FillerRequest(BaseModel):
|
||||
transcript: str
|
||||
words: List[WordInfo]
|
||||
provider: str = "ollama"
|
||||
model: Optional[str] = None
|
||||
api_key: Optional[str] = None
|
||||
base_url: Optional[str] = None
|
||||
custom_filler_words: Optional[str] = None
|
||||
|
||||
|
||||
class ClipRequest(BaseModel):
|
||||
transcript: str
|
||||
words: List[WordInfo]
|
||||
provider: str = "ollama"
|
||||
model: Optional[str] = None
|
||||
api_key: Optional[str] = None
|
||||
base_url: Optional[str] = None
|
||||
target_duration: int = 60
|
||||
|
||||
|
||||
@router.post("/ai/filler-removal")
|
||||
async def filler_removal(req: FillerRequest):
|
||||
try:
|
||||
words_dicts = [w.model_dump() for w in req.words]
|
||||
result = detect_filler_words(
|
||||
transcript=req.transcript,
|
||||
words=words_dicts,
|
||||
provider=req.provider,
|
||||
model=req.model,
|
||||
api_key=req.api_key,
|
||||
base_url=req.base_url,
|
||||
custom_filler_words=req.custom_filler_words,
|
||||
)
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"Filler detection failed: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/ai/create-clip")
|
||||
async def create_clip(req: ClipRequest):
|
||||
try:
|
||||
words_dicts = [w.model_dump() for w in req.words]
|
||||
result = create_clip_suggestion(
|
||||
transcript=req.transcript,
|
||||
words=words_dicts,
|
||||
target_duration=req.target_duration,
|
||||
provider=req.provider,
|
||||
model=req.model,
|
||||
api_key=req.api_key,
|
||||
base_url=req.base_url,
|
||||
)
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"Clip creation failed: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/ai/ollama-models")
|
||||
async def ollama_models(base_url: str = "http://localhost:11434"):
|
||||
models = AIProvider.list_ollama_models(base_url)
|
||||
return {"models": models}
|
||||
38
backend/routers/audio.py
Normal file
38
backend/routers/audio.py
Normal file
@ -0,0 +1,38 @@
|
||||
"""Audio processing endpoint (noise reduction / Studio Sound)."""
|
||||
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
||||
from services.audio_cleaner import clean_audio, is_deepfilter_available
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
class AudioCleanRequest(BaseModel):
|
||||
input_path: str
|
||||
output_path: Optional[str] = None
|
||||
|
||||
|
||||
@router.post("/audio/clean")
|
||||
async def clean_audio_endpoint(req: AudioCleanRequest):
|
||||
try:
|
||||
output = clean_audio(req.input_path, req.output_path or "")
|
||||
return {
|
||||
"status": "ok",
|
||||
"output_path": output,
|
||||
"engine": "deepfilternet" if is_deepfilter_available() else "ffmpeg_anlmdn",
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Audio cleaning failed: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/audio/capabilities")
|
||||
async def audio_capabilities():
|
||||
return {
|
||||
"deepfilternet_available": is_deepfilter_available(),
|
||||
}
|
||||
65
backend/routers/captions.py
Normal file
65
backend/routers/captions.py
Normal file
@ -0,0 +1,65 @@
|
||||
"""Caption generation endpoint."""
|
||||
|
||||
import logging
|
||||
from typing import List, Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from fastapi.responses import PlainTextResponse
|
||||
from pydantic import BaseModel
|
||||
|
||||
from services.caption_generator import generate_srt, generate_vtt, generate_ass, save_captions
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
class CaptionWord(BaseModel):
|
||||
word: str
|
||||
start: float
|
||||
end: float
|
||||
confidence: float = 0.0
|
||||
|
||||
|
||||
class CaptionStyle(BaseModel):
|
||||
fontName: str = "Arial"
|
||||
fontSize: int = 48
|
||||
fontColor: str = "&H00FFFFFF"
|
||||
backgroundColor: str = "&H80000000"
|
||||
position: str = "bottom"
|
||||
bold: bool = True
|
||||
|
||||
|
||||
class CaptionRequest(BaseModel):
|
||||
words: List[CaptionWord]
|
||||
deleted_indices: List[int] = []
|
||||
format: str = "srt"
|
||||
words_per_line: int = 8
|
||||
style: Optional[CaptionStyle] = None
|
||||
output_path: Optional[str] = None
|
||||
|
||||
|
||||
@router.post("/captions")
|
||||
async def generate_captions(req: CaptionRequest):
|
||||
try:
|
||||
words_dicts = [w.model_dump() for w in req.words]
|
||||
deleted_set = set(req.deleted_indices)
|
||||
|
||||
if req.format == "srt":
|
||||
content = generate_srt(words_dicts, deleted_set, req.words_per_line)
|
||||
elif req.format == "vtt":
|
||||
content = generate_vtt(words_dicts, deleted_set, req.words_per_line)
|
||||
elif req.format == "ass":
|
||||
style_dict = req.style.model_dump() if req.style else None
|
||||
content = generate_ass(words_dicts, deleted_set, req.words_per_line, style_dict)
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail=f"Unknown format: {req.format}")
|
||||
|
||||
if req.output_path:
|
||||
saved = save_captions(content, req.output_path)
|
||||
return {"status": "ok", "output_path": saved}
|
||||
|
||||
return PlainTextResponse(content, media_type="text/plain")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Caption generation failed: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
156
backend/routers/export.py
Normal file
156
backend/routers/export.py
Normal file
@ -0,0 +1,156 @@
|
||||
"""Export endpoint for video cutting and rendering."""
|
||||
|
||||
import logging
|
||||
import tempfile
|
||||
import os
|
||||
from typing import List, Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
||||
from services.video_editor import export_stream_copy, export_reencode, export_reencode_with_subs
|
||||
from services.audio_cleaner import clean_audio
|
||||
from services.caption_generator import generate_srt, generate_ass, save_captions
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
class SegmentModel(BaseModel):
|
||||
start: float
|
||||
end: float
|
||||
|
||||
|
||||
class ExportWordModel(BaseModel):
|
||||
word: str
|
||||
start: float
|
||||
end: float
|
||||
confidence: float = 0.0
|
||||
|
||||
|
||||
class ExportRequest(BaseModel):
|
||||
input_path: str
|
||||
output_path: str
|
||||
keep_segments: List[SegmentModel]
|
||||
mode: str = "fast"
|
||||
resolution: str = "1080p"
|
||||
format: str = "mp4"
|
||||
enhanceAudio: bool = False
|
||||
captions: str = "none"
|
||||
words: Optional[List[ExportWordModel]] = None
|
||||
deleted_indices: Optional[List[int]] = None
|
||||
|
||||
|
||||
def _mux_audio(video_path: str, audio_path: str, output_path: str) -> str:
|
||||
"""Replace video's audio track with cleaned audio using FFmpeg."""
|
||||
import subprocess
|
||||
cmd = [
|
||||
"ffmpeg", "-y",
|
||||
"-i", video_path,
|
||||
"-i", audio_path,
|
||||
"-c:v", "copy",
|
||||
"-map", "0:v:0",
|
||||
"-map", "1:a:0",
|
||||
"-shortest",
|
||||
output_path,
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"Audio mux failed: {result.stderr[-300:]}")
|
||||
return output_path
|
||||
|
||||
|
||||
@router.post("/export")
|
||||
async def export_video(req: ExportRequest):
|
||||
try:
|
||||
segments = [{"start": s.start, "end": s.end} for s in req.keep_segments]
|
||||
|
||||
if not segments:
|
||||
raise HTTPException(status_code=400, detail="No segments to export")
|
||||
|
||||
use_stream_copy = req.mode == "fast" and len(segments) == 1
|
||||
needs_reencode_for_subs = req.captions == "burn-in"
|
||||
|
||||
# Burn-in captions require re-encode
|
||||
if needs_reencode_for_subs:
|
||||
use_stream_copy = False
|
||||
|
||||
words_dicts = [w.model_dump() for w in req.words] if req.words else []
|
||||
deleted_set = set(req.deleted_indices or [])
|
||||
|
||||
# Generate ASS file for burn-in
|
||||
ass_path = None
|
||||
if req.captions == "burn-in" and words_dicts:
|
||||
ass_content = generate_ass(words_dicts, deleted_set)
|
||||
tmp = tempfile.NamedTemporaryFile(suffix=".ass", delete=False, mode="w", encoding="utf-8")
|
||||
tmp.write(ass_content)
|
||||
tmp.close()
|
||||
ass_path = tmp.name
|
||||
|
||||
try:
|
||||
if use_stream_copy:
|
||||
output = export_stream_copy(req.input_path, req.output_path, segments)
|
||||
elif ass_path:
|
||||
output = export_reencode_with_subs(
|
||||
req.input_path,
|
||||
req.output_path,
|
||||
segments,
|
||||
ass_path,
|
||||
resolution=req.resolution,
|
||||
format_hint=req.format,
|
||||
)
|
||||
else:
|
||||
output = export_reencode(
|
||||
req.input_path,
|
||||
req.output_path,
|
||||
segments,
|
||||
resolution=req.resolution,
|
||||
format_hint=req.format,
|
||||
)
|
||||
finally:
|
||||
if ass_path and os.path.exists(ass_path):
|
||||
os.unlink(ass_path)
|
||||
|
||||
# Audio enhancement: clean, then mux back into the exported video
|
||||
if req.enhanceAudio:
|
||||
try:
|
||||
tmp_dir = tempfile.mkdtemp(prefix="cutscript_audio_")
|
||||
cleaned_audio = os.path.join(tmp_dir, "cleaned.wav")
|
||||
clean_audio(output, cleaned_audio)
|
||||
|
||||
muxed_path = output + ".muxed.mp4"
|
||||
_mux_audio(output, cleaned_audio, muxed_path)
|
||||
|
||||
os.replace(muxed_path, output)
|
||||
logger.info(f"Audio enhanced and muxed into {output}")
|
||||
|
||||
# Cleanup
|
||||
try:
|
||||
os.remove(cleaned_audio)
|
||||
os.rmdir(tmp_dir)
|
||||
except OSError:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.warning(f"Audio enhancement failed (non-fatal): {e}")
|
||||
|
||||
# Sidecar SRT: generate and save alongside video
|
||||
srt_path = None
|
||||
if req.captions == "sidecar" and words_dicts:
|
||||
srt_content = generate_srt(words_dicts, deleted_set)
|
||||
srt_path = req.output_path.rsplit(".", 1)[0] + ".srt"
|
||||
save_captions(srt_content, srt_path)
|
||||
logger.info(f"Sidecar SRT saved to {srt_path}")
|
||||
|
||||
result = {"status": "ok", "output_path": output}
|
||||
if srt_path:
|
||||
result["srt_path"] = srt_path
|
||||
return result
|
||||
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
except RuntimeError as e:
|
||||
logger.error(f"Export failed: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
except Exception as e:
|
||||
logger.error(f"Export error: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
53
backend/routers/transcribe.py
Normal file
53
backend/routers/transcribe.py
Normal file
@ -0,0 +1,53 @@
|
||||
"""Transcription endpoint using WhisperX."""
|
||||
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
||||
from services.transcription import transcribe_audio
|
||||
from services.diarization import diarize_and_label
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
class TranscribeRequest(BaseModel):
|
||||
file_path: str
|
||||
model: str = "base"
|
||||
language: Optional[str] = None
|
||||
use_gpu: bool = True
|
||||
use_cache: bool = True
|
||||
diarize: bool = False
|
||||
hf_token: Optional[str] = None
|
||||
num_speakers: Optional[int] = None
|
||||
|
||||
|
||||
@router.post("/transcribe")
|
||||
async def transcribe(req: TranscribeRequest):
|
||||
try:
|
||||
result = transcribe_audio(
|
||||
file_path=req.file_path,
|
||||
model_name=req.model,
|
||||
use_gpu=req.use_gpu,
|
||||
use_cache=req.use_cache,
|
||||
language=req.language,
|
||||
)
|
||||
|
||||
if req.diarize and req.hf_token:
|
||||
result = diarize_and_label(
|
||||
transcription_result=result,
|
||||
audio_path=req.file_path,
|
||||
hf_token=req.hf_token,
|
||||
num_speakers=req.num_speakers,
|
||||
use_gpu=req.use_gpu,
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
except FileNotFoundError:
|
||||
raise HTTPException(status_code=404, detail=f"File not found: {req.file_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"Transcription failed: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
Reference in New Issue
Block a user