diff --git a/backend/.python-version b/backend/.python-version new file mode 100644 index 0000000..ed7d51a --- /dev/null +++ b/backend/.python-version @@ -0,0 +1 @@ +3.11.15 diff --git a/electron/python-bridge.js b/electron/python-bridge.js index 4650f28..7db1b41 100644 --- a/electron/python-bridge.js +++ b/electron/python-bridge.js @@ -24,7 +24,7 @@ class PythonBackend { ? path.join(__dirname, '..', 'backend') : path.join(process.resourcesPath, 'backend'); - const pythonCmd = process.platform === 'win32' ? 'python' : 'python3'; + const pythonCmd = process.platform === 'win32' ? 'python' : '/home/dillon/.pyenv/versions/3.11.15/bin/python'; this.process = spawn(pythonCmd, [ '-m', 'uvicorn', 'main:app', diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 42eb0ed..d578310 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -1,11 +1,11 @@ { - "name": "ai-video-editor-frontend", + "name": "cutscript-frontend", "version": "0.1.0", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "ai-video-editor-frontend", + "name": "cutscript-frontend", "version": "0.1.0", "dependencies": { "lucide-react": "^0.468.0", diff --git a/idea summary.md b/idea summary.md new file mode 100644 index 0000000..3679d70 --- /dev/null +++ b/idea summary.md @@ -0,0 +1,60 @@ +Here's a clear, actionable **summary** of what you (as a solo developer using AI tools heavily) should do to build and monetize this product, based on current market demand in 2026. + +### What You Should Do (Step-by-Step Plan) +1. **Fork an existing open-source base** (don't start from scratch) + - Best choice: **CutScript** (newest, explicitly built as "offline Descript alternative" with text-based editing) or **Audapolis** (more mature, ~1.8k stars, wordprocessor-like experience for spoken-word video/audio). + - Reason: The hard parts (local Whisper transcription with word-level timestamps, syncing text deletions to video cuts, FFmpeg handling) are already solved. You save 4–8 weeks and focus on polish. + +2. **Migrate/refactor to Tauri 2.0** (Rust backend + React/Vite + Tailwind + shadcn-ui frontend) + - This gives tiny installers (~5–15 MB), excellent performance, full cross-platform (Windows/macOS/Linux), and a modern, native feel. AI can help you do the migration quickly. + +3. **Keep scope minimal** — ship a delightful MVP in **6–10 weeks**. + - Open-source the core engine on GitHub for trust, feedback, and virality. + - Sell a polished "Pro" version via Gumroad/Stripe (one-time license preferred). + +4. **Monetization model** (low-risk, high-margin): + - **Free forever** for core local use (unlimited processing, no uploads). + - **One-time Pro license** ($49–$69): unlocks batch processing, extra polish presets, custom filler lists, and priority support/updates. + - Optional later: cheap cloud credits for very long videos or faster transcription. + - Launch on Product Hunt, Reddit (r/podcasting, r/videoediting, r/selfhosted), and X. + +5. **Launch & marketing** + - Position it as: **"Offline Descript alternative — edit video like a Google Doc, fully local, no subscriptions, no uploads."** + - Target: Indie podcasters, YouTubers, and creators doing talking-head/interview content who hate cloud costs/privacy issues. + - Goal: Get 500–2,000 users in the first month, with 15–25% converting to Pro. + +This approach minimizes your risk and burn rate while hitting the exact gap: polished, local text-based editing that existing open-source tools lack. + +### Recommended Minimal but Useful Features (MVP) +Focus only on what creators repeatedly say they want for spoken-word content (text-based editing + quick cleanup). Nothing more. + +1. **Drag-and-drop video import** (auto-extracts audio). +2. **One-click local transcription** (using faster-whisper or whisper.cpp — accurate word-level timestamps, runs offline on most laptops). +3. **Text-based editing** (scrollable, Google-Doc-style transcript): + - Click any word → video jumps to that spot. + - Highlight + Delete (or cut) text → corresponding video + audio is automatically removed with smart 150–250 ms crossfades (no jarring jumps). +4. **One magic "Clean it" button** (your original idea): + - Auto-removes long pauses/silences (>0.8s). + - Auto-removes common fillers ("um", "uh", "like", "you know", etc.). + - Optional simple local check for more accuracy. +5. **One-click audio polish** (FFmpeg chain): + - Volume normalization + light compression. + - Basic noise reduction. + - Makes dialogue sound professional instantly. +6. **Simple synced preview + undo stack + project save/load**. +7. **Export** clean MP4 (with optional SRT subtitles or burned-in captions). + +That's it. No multi-track timelines, no voice cloning, no collaboration, no fancy effects. This already cuts editing time dramatically for 80% of podcast/YouTube talking-head work and directly addresses the biggest complaints about Descript (cost, privacy, complexity). + +### Why This Will Work +- **Market demand is real**: Creators love text-based editing because it feels revolutionary for dialogue-heavy videos. They want it faster, cheaper, and private/offline. Existing alternatives are either cloud-based with subscriptions or clunky open-source tools. +- **Competition gap**: CutScript and Audapolis prove interest but lack slick UX and the "one magic button" polish. You can own the "delightful local Descript killer" niche. +- **Solo-dev friendly**: Forking + AI code generation makes this realistic without a team. + +Once you ship the MVP and get initial users, you can add nice-to-haves (e.g., custom filler lists, better subtitle export, optional cloud boost) based on real feedback. + +**Next immediate actions**: +- Clone CutScript or Audapolis today and run it locally to see the current state. +- Set up a new Tauri project and start refactoring the UI/transcript editor. + +If you want, I can give you the exact Git commands, first AI prompts for refactoring, folder structure, or even sample code for the "Clean it" button + FFmpeg polish chain. diff --git a/package.json b/package.json index 8311061..0cb84a3 100644 --- a/package.json +++ b/package.json @@ -1,8 +1,8 @@ { - "name": "cutscript", + "name": "talkedit", "version": "0.1.0", "private": true, - "description": "CutScript — Open-source AI-powered text-based video editor", + "description": "TalkEdit — Open-source AI-powered text-based video editor", "main": "electron/main.js", "scripts": { "dev": "concurrently \"npm run dev:backend\" \"npm run dev:frontend\" \"wait-on http://localhost:5173 && npm run dev:electron\"", diff --git a/plan.md b/plan.md new file mode 100644 index 0000000..6b1b6d5 --- /dev/null +++ b/plan.md @@ -0,0 +1,78 @@ +# Plan for Building TalkEdit (Whisper.cpp + Tauri) + +Based on your original idea summary and our discussions, here's a detailed plan to build a standalone, local audio/video editor app. We'll modify CutScript as the base, migrate to **Tauri 2.0** (Rust backend + React frontend) for tiny, dependency-free installers, and use **Whisper.cpp** for fast, accurate transcription. This keeps the scope minimal, focuses on text-based editing for spoken content, and targets podcasters/YouTubers. + +## 1. Overview +- **Goal**: Create an offline Descript alternative with word-level editing, transcription, and export. Users download one file (~10–20MB), install, and run—no Python, FFmpeg, or external deps. +- **Why This Stack**: Tauri bundles everything into a native app; Whisper.cpp (C++ lib) integrates seamlessly with Rust for CPU-efficient transcription. Faster than rebuilding from scratch. +- **Target Users**: Creators editing podcasts/videos; free core + Pro upgrades. +- **Key Differentiators**: Fully local, text-based editing like Google Docs, smart cuts with fades. + +## 2. Tech Stack +- **Frontend**: React + Vite + Tailwind CSS + shadcn/ui (from CutScript; minimal changes). +- **Backend**: Tauri 2.0 (Rust) – handles file I/O, FFmpeg calls, Whisper.cpp integration. +- **Transcription**: Whisper.cpp (via Rust bindings like `whisper-cpp-sys` or `whisper-rs`). +- **Audio/Video Processing**: FFmpeg (bundled or called via Rust wrappers like `ffmpeg-next`). +- **State Management**: Zustand (from CutScript). +- **Packaging**: Tauri's `tauri build` for cross-platform installers. +- **AI Features**: Local models only (no APIs); optional Ollama for fillers. + +## 3. Step-by-Step Development Plan +1. **Set Up Tauri in CutScript** (1–2 weeks): + - Install `tauri-cli` globally. + - In CutScript root: `npx tauri init` (choose Rust backend, link to existing React frontend). + - Migrate Electron main.js to Tauri's `src/main.rs` (handle window, file dialogs). + - Update `tauri.conf.json` for app metadata, bundle settings. + +2. **Integrate Whisper.cpp in Rust** (2–3 weeks): + - Add `whisper-cpp` as a dependency in `Cargo.toml`. + - Create a Rust module for transcription: Load models, process audio, return word-level timestamps. + - Replace Python backend calls with Tauri commands (e.g., `invoke` from frontend to Rust for transcription). + - Handle model downloads on first run (store in app data dir). + +3. **Migrate Audio/Video Logic** (2 weeks): + - Port FFmpeg calls to Rust (use `ffmpeg-next` for cutting/export). + - Implement segment calculation: From edited transcript, build keep_segments with padding/fades. + - Add audio cleaning (noise reduction via bundled tools or Rust libs). + +4. **Frontend Polish** (1–2 weeks): + - Update UI for Tauri (file dialogs via `tauri-plugin-dialog`). + - Refine transcript editor: Better timestamp syncing, manual adjustments. + - Add export options (MP4 with subs, audio-only). + +5. **Testing & Packaging** (1 week): + - Test on Windows/macOS/Linux; ensure Whisper runs offline. + - Bundle with `tauri build`; verify no external deps. + - Add auto-updater for Pro features. + +6. **Launch & Iterate** (Ongoing): + - Open-source core on GitHub. + - Market on Product Hunt, Reddit; gather feedback. + +## 4. MVP Features (Minimal but Useful) +Focus on what creators need for spoken content: +- **Drag-and-drop import**: Audio/video files; auto-extract audio. +- **One-click transcription**: Whisper.cpp with model choice (Fast - less accurate: tiny/base; Slow - more accurate: small/medium/large). +- **Text-based editing**: Scrollable transcript; click word → jump to video; select/delete words → auto-cut audio with 150ms fades. +- **Smart cleanup**: Remove fillers ("um", pauses >0.8s) via local AI. +- **Preview & Export**: Synced preview; export MP4/audio with optional SRT subs. +- **Undo/Redo**: Full edit history. + +No multi-track, voice cloning, or collaboration—keep it simple. + +## 5. Monetization Model +- **Free Forever**: Core editing/transcription (unlimited local use). +- **Pro License** ($29–49 one-time): Batch processing, high-quality voices (if adding TTS), custom presets, priority support. +- **Optional Add-Ons**: Cloud credits for long videos (rarely needed). + +## 6. Timeline & Milestones +- **Weeks 1–4**: Tauri setup + Whisper integration. +- **Weeks 5–6**: Audio logic migration + frontend tweaks. +- **Weeks 7–8**: Testing, packaging, launch prep. +- **Total**: 6–10 weeks to MVP (solo dev + AI). + +## 7. Risks & Tips +- **Risks**: Whisper.cpp compilation issues; Rust learning curve if new to it. +- **Tips**: Start with small models (base ~70MB); test timestamp accuracy early. Use Tauri's docs for migration. If stuck, fall back to bundling Python for Whisper (but avoid for true standalone). +- **Resources**: Tauri docs, Whisper.cpp GitHub, Rust audio crates. +/home/dillon/_code/audio_editor/plan.md \ No newline at end of file diff --git a/src-tauri/.gitignore b/src-tauri/.gitignore new file mode 100644 index 0000000..502406b --- /dev/null +++ b/src-tauri/.gitignore @@ -0,0 +1,4 @@ +# Generated by Cargo +# will have compiled files and executables +/target/ +/gen/schemas diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml new file mode 100644 index 0000000..1f8a5a5 --- /dev/null +++ b/src-tauri/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "app" +version = "0.1.0" +description = "A Tauri App" +authors = ["you"] +license = "" +repository = "" +edition = "2021" +rust-version = "1.77.2" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[lib] +name = "app_lib" +crate-type = ["staticlib", "cdylib", "rlib"] + +[build-dependencies] +tauri-build = { version = "2.5.6" } + +[dependencies] +serde_json = "1.0" +serde = { version = "1.0", features = ["derive"] } +log = "0.4" +tauri = { version = "2.10.3" } +tauri-plugin-log = "2" diff --git a/src-tauri/build.rs b/src-tauri/build.rs new file mode 100644 index 0000000..795b9b7 --- /dev/null +++ b/src-tauri/build.rs @@ -0,0 +1,3 @@ +fn main() { + tauri_build::build() +} diff --git a/src-tauri/capabilities/default.json b/src-tauri/capabilities/default.json new file mode 100644 index 0000000..c135d7f --- /dev/null +++ b/src-tauri/capabilities/default.json @@ -0,0 +1,11 @@ +{ + "$schema": "../gen/schemas/desktop-schema.json", + "identifier": "default", + "description": "enables the default permissions", + "windows": [ + "main" + ], + "permissions": [ + "core:default" + ] +} diff --git a/src-tauri/icons/128x128.png b/src-tauri/icons/128x128.png new file mode 100644 index 0000000..77e7d23 Binary files /dev/null and b/src-tauri/icons/128x128.png differ diff --git a/src-tauri/icons/128x128@2x.png b/src-tauri/icons/128x128@2x.png new file mode 100644 index 0000000..0f7976f Binary files /dev/null and b/src-tauri/icons/128x128@2x.png differ diff --git a/src-tauri/icons/32x32.png b/src-tauri/icons/32x32.png new file mode 100644 index 0000000..98fda06 Binary files /dev/null and b/src-tauri/icons/32x32.png differ diff --git a/src-tauri/icons/Square107x107Logo.png b/src-tauri/icons/Square107x107Logo.png new file mode 100644 index 0000000..f35d84f Binary files /dev/null and b/src-tauri/icons/Square107x107Logo.png differ diff --git a/src-tauri/icons/Square142x142Logo.png b/src-tauri/icons/Square142x142Logo.png new file mode 100644 index 0000000..1823bb2 Binary files /dev/null and b/src-tauri/icons/Square142x142Logo.png differ diff --git a/src-tauri/icons/Square150x150Logo.png b/src-tauri/icons/Square150x150Logo.png new file mode 100644 index 0000000..dc2b22c Binary files /dev/null and b/src-tauri/icons/Square150x150Logo.png differ diff --git a/src-tauri/icons/Square284x284Logo.png b/src-tauri/icons/Square284x284Logo.png new file mode 100644 index 0000000..0ed3984 Binary files /dev/null and b/src-tauri/icons/Square284x284Logo.png differ diff --git a/src-tauri/icons/Square30x30Logo.png b/src-tauri/icons/Square30x30Logo.png new file mode 100644 index 0000000..60bf0ea Binary files /dev/null and b/src-tauri/icons/Square30x30Logo.png differ diff --git a/src-tauri/icons/Square310x310Logo.png b/src-tauri/icons/Square310x310Logo.png new file mode 100644 index 0000000..c8ca0ad Binary files /dev/null and b/src-tauri/icons/Square310x310Logo.png differ diff --git a/src-tauri/icons/Square44x44Logo.png b/src-tauri/icons/Square44x44Logo.png new file mode 100644 index 0000000..8756459 Binary files /dev/null and b/src-tauri/icons/Square44x44Logo.png differ diff --git a/src-tauri/icons/Square71x71Logo.png b/src-tauri/icons/Square71x71Logo.png new file mode 100644 index 0000000..2c8023c Binary files /dev/null and b/src-tauri/icons/Square71x71Logo.png differ diff --git a/src-tauri/icons/Square89x89Logo.png b/src-tauri/icons/Square89x89Logo.png new file mode 100644 index 0000000..2c5e603 Binary files /dev/null and b/src-tauri/icons/Square89x89Logo.png differ diff --git a/src-tauri/icons/StoreLogo.png b/src-tauri/icons/StoreLogo.png new file mode 100644 index 0000000..17d142c Binary files /dev/null and b/src-tauri/icons/StoreLogo.png differ diff --git a/src-tauri/icons/icon.icns b/src-tauri/icons/icon.icns new file mode 100644 index 0000000..a2993ad Binary files /dev/null and b/src-tauri/icons/icon.icns differ diff --git a/src-tauri/icons/icon.ico b/src-tauri/icons/icon.ico new file mode 100644 index 0000000..06c23c8 Binary files /dev/null and b/src-tauri/icons/icon.ico differ diff --git a/src-tauri/icons/icon.png b/src-tauri/icons/icon.png new file mode 100644 index 0000000..d1756ce Binary files /dev/null and b/src-tauri/icons/icon.png differ diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs new file mode 100644 index 0000000..9c3118c --- /dev/null +++ b/src-tauri/src/lib.rs @@ -0,0 +1,16 @@ +#[cfg_attr(mobile, tauri::mobile_entry_point)] +pub fn run() { + tauri::Builder::default() + .setup(|app| { + if cfg!(debug_assertions) { + app.handle().plugin( + tauri_plugin_log::Builder::default() + .level(log::LevelFilter::Info) + .build(), + )?; + } + Ok(()) + }) + .run(tauri::generate_context!()) + .expect("error while running tauri application"); +} diff --git a/src-tauri/src/main.rs b/src-tauri/src/main.rs new file mode 100644 index 0000000..ad5fe83 --- /dev/null +++ b/src-tauri/src/main.rs @@ -0,0 +1,6 @@ +// Prevents additional console window on Windows in release, DO NOT REMOVE!! +#![cfg_attr(not(debug_assertions), windows_subsystem = "windows")] + +fn main() { + app_lib::run(); +}