Initial CutScript release - Open-source AI-powered text-based video editor

CutScript is a local-first, Descript-like video editor where you edit video by editing text. Delete a word from the transcript and it's cut from the video. Features: - Word-level transcription with WhisperX - Text-based video editing with undo/redo - AI filler word removal (Ollama/OpenAI/Claude) - AI clip creation for shorts - Waveform timeline with virtualized transcript - FFmpeg stream-copy (fast) and re-encode (4K) export - Caption burn-in and sidecar SRT generation - Studio Sound audio enhancement (DeepFilterNet) - Keyboard shortcuts (J/K/L, Space, Delete, Ctrl+Z/S/E) - Encrypted API key storage - Project save/load (.aive files) Architecture: - Electron + React + Tailwind (frontend) - FastAPI + Python (backend) - WhisperX for transcription - FFmpeg for video processing - Multi-provider AI support Performance optimizations: - RAF-throttled time updates - Zustand selectors for granular subscriptions - Dual-canvas waveform rendering - Virtualized transcript with react-virtuoso Built on top of DataAnts-AI/VideoTranscriber, completely rewritten as a desktop application. License: MIT
2026-03-03 06:31:04 -05:00
parent d1e1fedcae
commit 33cca5f552
73 changed files with 7463 additions and 3906 deletions
--- a/backend/services/ai_provider.py
+++ b/backend/services/ai_provider.py
@ -0,0 +1,211 @@
+"""
+Unified AI provider interface for Ollama, OpenAI, and Claude.
+"""
+
+import json
+import logging
+from typing import Optional, List
+
+import requests
+
+logger = logging.getLogger(__name__)
+
+
+class AIProvider:
+    """Routes completion requests to the configured provider."""
+
+    @staticmethod
+    def complete(
+        prompt: str,
+        provider: str = "ollama",
+        model: Optional[str] = None,
+        api_key: Optional[str] = None,
+        base_url: Optional[str] = None,
+        system_prompt: Optional[str] = None,
+        temperature: float = 0.3,
+    ) -> str:
+        if provider == "ollama":
+            return _ollama_complete(prompt, model or "llama3", base_url or "http://localhost:11434", system_prompt, temperature)
+        elif provider == "openai":
+            return _openai_complete(prompt, model or "gpt-4o", api_key or "", system_prompt, temperature)
+        elif provider == "claude":
+            return _claude_complete(prompt, model or "claude-sonnet-4-20250514", api_key or "", system_prompt, temperature)
+        else:
+            raise ValueError(f"Unknown provider: {provider}")
+
+    @staticmethod
+    def list_ollama_models(base_url: str = "http://localhost:11434") -> List[str]:
+        try:
+            resp = requests.get(f"{base_url}/api/tags", timeout=3)
+            if resp.status_code == 200:
+                return [m["name"] for m in resp.json().get("models", [])]
+        except Exception:
+            pass
+        return []
+
+
+def _ollama_complete(prompt: str, model: str, base_url: str, system_prompt: Optional[str], temperature: float) -> str:
+    body = {
+        "model": model,
+        "prompt": prompt,
+        "stream": False,
+        "options": {"temperature": temperature},
+    }
+    if system_prompt:
+        body["system"] = system_prompt
+
+    try:
+        resp = requests.post(f"{base_url}/api/generate", json=body, timeout=120)
+        resp.raise_for_status()
+        return resp.json().get("response", "").strip()
+    except Exception as e:
+        logger.error(f"Ollama error: {e}")
+        raise
+
+
+def _openai_complete(prompt: str, model: str, api_key: str, system_prompt: Optional[str], temperature: float) -> str:
+    try:
+        from openai import OpenAI
+        client = OpenAI(api_key=api_key)
+        messages = []
+        if system_prompt:
+            messages.append({"role": "system", "content": system_prompt})
+        messages.append({"role": "user", "content": prompt})
+
+        response = client.chat.completions.create(
+            model=model,
+            messages=messages,
+            temperature=temperature,
+        )
+        return response.choices[0].message.content.strip()
+    except Exception as e:
+        logger.error(f"OpenAI error: {e}")
+        raise
+
+
+def _claude_complete(prompt: str, model: str, api_key: str, system_prompt: Optional[str], temperature: float) -> str:
+    try:
+        import anthropic
+        client = anthropic.Anthropic(api_key=api_key)
+        kwargs = {
+            "model": model,
+            "max_tokens": 4096,
+            "temperature": temperature,
+            "messages": [{"role": "user", "content": prompt}],
+        }
+        if system_prompt:
+            kwargs["system"] = system_prompt
+
+        response = client.messages.create(**kwargs)
+        return response.content[0].text.strip()
+    except Exception as e:
+        logger.error(f"Claude error: {e}")
+        raise
+
+
+def detect_filler_words(
+    transcript: str,
+    words: List[dict],
+    provider: str = "ollama",
+    model: Optional[str] = None,
+    api_key: Optional[str] = None,
+    base_url: Optional[str] = None,
+    custom_filler_words: Optional[str] = None,
+) -> dict:
+    """
+    Use an LLM to identify filler words in the transcript.
+    Returns {"wordIndices": [...], "fillerWords": [{"index": N, "word": "...", "reason": "..."}]}
+    """
+    word_list = "\n".join(f"{w['index']}: {w['word']}" for w in words)
+
+    custom_line = ""
+    if custom_filler_words and custom_filler_words.strip():
+        custom_line = f"\n\nAdditionally, flag these user-specified filler words/phrases: {custom_filler_words.strip()}"
+
+    prompt = f"""Analyze this transcript for filler words and verbal hesitations.
+
+Filler words include: um, uh, uh huh, hmm, like (when used as filler), you know, so (when starting sentences unnecessarily), basically, actually, literally, right, I mean, kind of, sort of, well (when used as filler).
+
+Also flag repeated words that indicate stammering (e.g., "I I I" or "the the").{custom_line}
+
+Here are the words with their indices:
+{word_list}
+
+Return ONLY a valid JSON object with this exact structure:
+{{"wordIndices": [list of integer indices to remove], "fillerWords": [{{"index": integer, "word": "the word", "reason": "brief reason"}}]}}
+
+Be conservative -- only flag clear filler words, not words that are part of meaningful sentences."""
+
+    system = "You are a precise text analysis tool. Return only valid JSON, no explanation."
+
+    result_text = AIProvider.complete(
+        prompt=prompt,
+        provider=provider,
+        model=model,
+        api_key=api_key,
+        base_url=base_url,
+        system_prompt=system,
+        temperature=0.1,
+    )
+
+    try:
+        start = result_text.find("{")
+        end = result_text.rfind("}") + 1
+        if start >= 0 and end > start:
+            return json.loads(result_text[start:end])
+    except json.JSONDecodeError:
+        logger.error(f"Failed to parse AI response as JSON: {result_text[:200]}")
+
+    return {"wordIndices": [], "fillerWords": []}
+
+
+def create_clip_suggestion(
+    transcript: str,
+    words: List[dict],
+    target_duration: int = 60,
+    provider: str = "ollama",
+    model: Optional[str] = None,
+    api_key: Optional[str] = None,
+    base_url: Optional[str] = None,
+) -> dict:
+    """
+    Use an LLM to find the best clip segments in a transcript.
+    """
+    word_list = "\n".join(
+        f"{w['index']}: \"{w['word']}\" ({w.get('start', 0):.1f}s - {w.get('end', 0):.1f}s)"
+        for w in words
+    )
+
+    prompt = f"""Analyze this transcript and find the most engaging {target_duration}-second segment(s) that would work well as a YouTube Short or social media clip.
+
+Look for: compelling stories, surprising facts, emotional moments, clear explanations, humor, or quotable statements.
+
+Words with indices and timestamps:
+{word_list}
+
+Return ONLY a valid JSON object:
+{{"clips": [{{"title": "short catchy title", "startWordIndex": integer, "endWordIndex": integer, "startTime": float, "endTime": float, "reason": "why this segment is engaging"}}]}}
+
+Suggest 1-3 clips, each approximately {target_duration} seconds long."""
+
+    system = "You are a viral content expert. Return only valid JSON, no explanation."
+
+    result_text = AIProvider.complete(
+        prompt=prompt,
+        provider=provider,
+        model=model,
+        api_key=api_key,
+        base_url=base_url,
+        system_prompt=system,
+        temperature=0.5,
+    )
+
+    try:
+        start = result_text.find("{")
+        end = result_text.rfind("}") + 1
+        if start >= 0 and end > start:
+            return json.loads(result_text[start:end])
+    except json.JSONDecodeError:
+        logger.error(f"Failed to parse clip suggestions: {result_text[:200]}")
+
+    return {"clips": []}