diff --git a/backend/dev_main.py b/backend/dev_main.py index dc6548b..cd603dd 100644 --- a/backend/dev_main.py +++ b/backend/dev_main.py @@ -9,6 +9,8 @@ from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import StreamingResponse from pathlib import Path +from routers import audio + app = FastAPI(title="TalkEdit Dev Backend", version="0.0.1") app.add_middleware( @@ -34,6 +36,8 @@ MIME_MAP = { } +app.include_router(audio.router) + @app.get("/health") async def health(): return {"status": "ok"} diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index bc4f545..81dc8df 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -17,7 +17,6 @@ import HelpContent from './components/HelpContent'; import { useKeyboardShortcuts } from './hooks/useKeyboardShortcuts'; import { useLicenseStore } from './store/licenseStore'; import { - Film, FolderOpen, Settings, Sparkles, @@ -136,9 +135,6 @@ export default function App() { const [speedMode, setSpeedMode] = useState(false); const [speedModeValue, setSpeedModeValue] = useState(1.25); const [showReprocessConfirm, setShowReprocessConfirm] = useState(false); - const [showWelcomeOverlay, setShowWelcomeOverlay] = useState(() => { - return localStorage.getItem('talkedit:welcomeDismissed') !== 'true'; - }); const [showUnsavedPrompt, setShowUnsavedPrompt] = useState(false); const [pendingProceedAction, setPendingProceedAction] = useState<(() => Promise) | null>(null); const [lastSavedSignature, setLastSavedSignature] = useState(null); @@ -582,164 +578,6 @@ export default function App() { } }; - if (!videoPath) { - return ( -
- {/* Background pattern */} -
- - {/* Animated audio waveform background — left and right sides filling to center */} -
- {[ - [60, 1.3, 0.0], [100, 1.0, 0.12], [40, 1.6, 0.24], [120, 0.9, 0.08], - [80, 1.2, 0.2], [30, 1.8, 0.04], [110, 1.1, 0.28], [50, 1.5, 0.16], - [70, 1.4, 0.08], [140, 0.85, 0.24], [60, 1.3, 0.12], [90, 1.2, 0.0], - [130, 0.95, 0.2], [45, 1.7, 0.28], [80, 1.4, 0.04], [55, 1.1, 0.16], - [100, 1.3, 0.24], [35, 1.6, 0.12], [120, 0.9, 0.0], [65, 1.5, 0.2], - [85, 1.2, 0.08], [150, 0.8, 0.28], [40, 1.9, 0.16], [95, 1.1, 0.04], - [75, 1.4, 0.24], [25, 2.0, 0.12], [105, 1.05, 0.2], [155, 0.82, 0.08], - [50, 1.55, 0.28], [115, 0.92, 0.16], [70, 1.35, 0.04], [135, 0.88, 0.24], - [90, 1.15, 0.12], [42, 1.75, 0.0], [125, 0.98, 0.2], [58, 1.45, 0.08], - ].map(([peak, dur, delay], i) => ( -
- ))} -
-
- {[ - [100, 1.0, 0.0], [60, 1.3, 0.08], [130, 0.9, 0.16], [40, 1.6, 0.04], - [80, 1.2, 0.12], [150, 0.85, 0.24], [50, 1.5, 0.2], [110, 1.1, 0.28], - [70, 1.4, 0.08], [30, 1.8, 0.16], [140, 0.9, 0.0], [90, 1.2, 0.24], - [60, 1.3, 0.12], [120, 0.95, 0.04], [85, 1.4, 0.2], [45, 1.7, 0.28], - [160, 0.8, 0.08], [55, 1.5, 0.24], [100, 1.1, 0.0], [75, 1.3, 0.16], - [35, 1.9, 0.2], [115, 1.0, 0.12], [65, 1.6, 0.28], [140, 0.88, 0.04], - [95, 1.25, 0.24], [25, 1.85, 0.08], [125, 0.93, 0.16], [155, 0.78, 0.28], - [48, 1.65, 0.12], [82, 1.32, 0.2], [108, 1.08, 0.04], [72, 1.42, 0.24], - [135, 0.9, 0.16], [38, 1.78, 0.0], [62, 1.48, 0.08], [118, 1.02, 0.28], - ].map(([peak, dur, delay], i) => ( -
- ))} -
- -
-
- -
- {/* App icon */} -
- - - - - - -
- -
-

- TalkEdit -

-

- The offline video editor that doesn't slow down on long files. -

-
- - {/* Action row — button + model selector side by side */} -
- -
- Transcription model: - -
-
- - -
- - {licenseStatus?.tag === 'Trial' && ( -
- - - Free trial: {licenseStatus.days_remaining} day{licenseStatus.days_remaining !== 1 ? 's' : ''} remaining - - -
- )} - - {licenseStatus?.tag === 'Expired' && ( -
- - Trial expired - -
- )} -
- ); - } - // Health check timer useEffect(() => { const checkHealth = async () => { @@ -919,7 +757,36 @@ export default function App() {
{/* Video player */}
- + {videoPath ? ( + + ) : ( +
+
+ + + + + + +
+
+ + +
+
+ )}
{/* Draggable divider */} @@ -1044,53 +911,6 @@ export default function App() { - {showWelcomeOverlay && ( -
-
-
- -

Welcome to TalkEdit

-

- The offline video editor for long-form content. No account. No subscription. Your data never leaves your machine. -

-
- -
-
- 1 - - Open a video — TalkEdit transcribes it into a word-level transcript. - -
-
- 2 - - Edit by selecting words — deleting words cuts the matching video. Like editing a doc. - -
-
- 3 - - Export your final cut — with captions, background music, AI cleanup, and more. - -
-
- -
- -

- Press ? anytime to see shortcuts, or click Help in the toolbar. -

-
-
-
- )} - {showReprocessConfirm && (
(null); const containerRef = useRef(null); const [audioError, setAudioError] = useState(null); + const [waveformReady, setWaveformReady] = useState(false); const videoUrl = useEditorStore((s) => s.videoUrl); const videoPath = useEditorStore((s) => s.videoPath); @@ -349,6 +350,7 @@ export default function WaveformTimeline({ if (cancelled) return; waveformDataRef.current = waveformData; drawStaticWaveformRef.current(); + setWaveformReady(true); } catch (err) { if (cancelled || (err instanceof DOMException && err.name === 'AbortError')) { console.log('[WaveformTimeline] req=', requestId, 'aborted/cancelled'); @@ -1328,7 +1330,7 @@ export default function WaveformTimeline({ Retry
- ) : !waveformDataRef.current ? ( + ) : !waveformReady ? (
diff --git a/frontend/src/index.css b/frontend/src/index.css index dc97a47..06fe4ce 100644 --- a/frontend/src/index.css +++ b/frontend/src/index.css @@ -19,15 +19,6 @@ transform-origin: bottom; } -.welcome-audio-bar { - width: 4px; - border-radius: 2px; - background: #6366f1; - transform-origin: bottom; - animation: audioBounce var(--bar-duration) ease-in-out infinite; - animation-delay: var(--bar-delay); -} - * { margin: 0; padding: 0; diff --git a/frontend/tsconfig.tsbuildinfo b/frontend/tsconfig.tsbuildinfo index 6ada096..c6ef652 100644 --- a/frontend/tsconfig.tsbuildinfo +++ b/frontend/tsconfig.tsbuildinfo @@ -1 +1 @@ -{"root":["./src/App.tsx","./src/main.tsx","./src/vite-env.d.ts","./src/components/AIPanel.tsx","./src/components/AppendClipPanel.tsx","./src/components/BackgroundMusicPanel.tsx","./src/components/DevPanel.tsx","./src/components/ExportDialog.tsx","./src/components/LicenseDialog.tsx","./src/components/MarkersPanel.tsx","./src/components/SettingsPanel.tsx","./src/components/SilenceTrimmerPanel.tsx","./src/components/TranscriptEditor.tsx","./src/components/VideoPlayer.tsx","./src/components/VolumePanel.tsx","./src/components/WaveformTimeline.tsx","./src/components/ZoneEditor.tsx","./src/hooks/useKeyboardShortcuts.ts","./src/hooks/useVideoSync.ts","./src/lib/dev-logger.ts","./src/lib/keybindings.ts","./src/lib/tauri-bridge.ts","./src/lib/thumbnails.ts","./src/store/aiStore.ts","./src/store/editorStore.test.ts","./src/store/editorStore.ts","./src/store/licenseStore.ts","./src/types/project.ts"],"version":"5.9.3"} \ No newline at end of file +{"root":["./src/App.tsx","./src/main.tsx","./src/vite-env.d.ts","./src/components/AIPanel.tsx","./src/components/AppendClipPanel.tsx","./src/components/BackgroundMusicPanel.tsx","./src/components/DevPanel.tsx","./src/components/ErrorBoundary.tsx","./src/components/ExportDialog.tsx","./src/components/HelpContent.tsx","./src/components/LicenseDialog.tsx","./src/components/MarkersPanel.tsx","./src/components/SettingsPanel.tsx","./src/components/SilenceTrimmerPanel.tsx","./src/components/TranscriptEditor.tsx","./src/components/VideoPlayer.tsx","./src/components/WaveformTimeline.tsx","./src/components/ZoneEditor.tsx","./src/hooks/useKeyboardShortcuts.ts","./src/hooks/useVideoSync.ts","./src/lib/assert.test.ts","./src/lib/assert.ts","./src/lib/dev-logger.ts","./src/lib/keybindings.ts","./src/lib/tauri-bridge.ts","./src/lib/thumbnails.ts","./src/store/aiStore.test.ts","./src/store/aiStore.ts","./src/store/editorStore.test.ts","./src/store/editorStore.ts","./src/store/licenseStore.test.ts","./src/store/licenseStore.ts","./src/types/project.ts"],"version":"5.9.3"} \ No newline at end of file diff --git a/plan.md b/plan.md index 0a55417..6241457 100644 --- a/plan.md +++ b/plan.md @@ -65,50 +65,69 @@ TalkEdit's defensible position: **works on hour+ files without degrading**, full --- -## Phase 2: Standout features (post-beta) +## Phase 2: Beta Launch (🚧 next — 2–4 weeks) -### Long-form content -- [x] Chapter-based navigation — markers auto-sorted, click to jump (partially done) -- [x] Per-segment re-transcription (done) -- [x] Append multiple clips into one timeline (done) -- [ ] Project stitching — load multiple `.aive` projects, combine into one export -- [ ] Smart chunking for transcription — for files >2hr +**Goal:** Get working builds into real podcasters' hands. Validate the core promise (long-form, offline) before investing in edge-case features. -### Export -- [x] YouTube chapters from markers (done) -- [x] Export transcript formats: SRT, VTT, TXT (done) -- [ ] Batch export — multiple projects/cuts in sequence +### Must-have for beta -### AI features -- [x] AI Smart Clean — filler removal + silence trim + normalize (done) -- [x] AI sentence rephrase (done) -- [x] AI clip suggestions for social media (done) -- [ ] Smart Shorts finder — scan transcript for 10–90s segments -- [ ] AI auto-chapters — topic detection from transcript -- [ ] AI show notes — title, description, key moments -- [ ] AI dead-air finder — content-based silence detection +- [ ] **Smart chunking for transcription** — files >2hr. Without this the niche promise is unproven. Breaks transcription into overlapping chunks, reassembles with correct timestamps. +- [ ] **Hardware detection & model selection** — detect CUDA/ROCm/MPS at startup; expose model backend choice in Settings so beta users can configure their system. +- [ ] **GitHub v1.0.0 release** — tag, binary builds (AppImage + .deb), release notes. + +### Sales & distribution +- [ ] **Stripe integration** — payment processing for one-time purchases (Pro $39, Business $79). License key generation + email delivery on payment success. +- [ ] **Landing page + download site** — simple site with: feature overview, pricing tiers, download links (AppImage/.deb), license activation flow. No auth system needed — Stripe handles payments, license keys unlock the app. + +### Beta program + +- [ ] **Free licenses to 20 podcasters** — in exchange for feedback + permission to quote. Target: r/podcasting regulars, small-to-medium shows (30min–2hr episodes). +- [ ] **Bug/feedback pipeline** — GitHub Issues template for beta testers. Weekly triage. +- [ ] **Messaging for beta landing page:** + 1. "The offline video editor that doesn't slow down on long files" + 2. "No subscription. One price, owned forever." + 3. "AI-powered editing — bring your own API key (Ollama, OpenAI, Claude)" + +--- + +## Phase 3: Post-Beta Enhancements (user-driven priority) + +**Goal:** Build what beta testers actually ask for. Deferred items below are ordered by likely demand, not engineering convenience. ### Bundled local LLM - [ ] Integrate llama.cpp Rust bindings - [ ] Auto-download Qwen3 on first AI use (4B: 2.5GB / 1.7B: 1GB) - [ ] Hardware detection at runtime, model selection in Settings +### Long-form content +- [ ] Project stitching — load multiple `.aive` projects, combine into one export + +### Export +- [ ] Batch export — multiple projects/cuts in sequence + +### AI features +- [ ] Smart Shorts finder — scan transcript for 10–90s segments +- [ ] AI auto-chapters — topic detection from transcript +- [ ] AI show notes — title, description, key moments +- [ ] AI dead-air finder — content-based silence detection + --- -## Phase 3: Marketing & launch (post-beta) +## Phase 4: Public Launch -### Messaging pillars +**Goal:** Convert beta momentum + testimonials into a public release. + +### Messaging pillars (updated) 1. "The offline video editor that doesn't slow down on long files" 2. "No subscription. One price, owned forever." -3. "Zero-setup AI" — bundled Qwen3, no API keys -4. "Your podcast → 10 TikToks in one click" — Smart Shorts finder +3. "Zero-setup AI" — bundled Qwen3, no API keys *(activate when Phase 3 ships)* +4. "Your podcast → 10 TikToks in one click" — Smart Shorts finder *(activate when Phase 3 ships)* ### Channels -- [ ] r/podcasting, r/VideoEditing, r/selfhosted +- [ ] r/podcasting, r/VideoEditing, r/selfhosted — anchored by beta tester testimonials - [ ] Product Hunt, Hacker News "Show HN" -- [ ] YouTube demo (3-5 min walkthrough) -- [ ] Free licenses to 20 podcasters for testimonials -- [ ] GitHub v1.0.0 release with binaries +- [ ] YouTube demo (3-5 min walkthrough) — feature the beta tester stories +- [ ] Pricing goes live publicly ### Pricing - 7-day free trial (no CC, no account) diff --git a/src-tauri/capabilities/default.json b/src-tauri/capabilities/default.json index ad0b7f8..a6d38d0 100644 --- a/src-tauri/capabilities/default.json +++ b/src-tauri/capabilities/default.json @@ -11,8 +11,8 @@ "dialog:allow-open", "dialog:allow-save", "fs:default", - { "identifier": "fs:allow-read-text-file", "allow": [{ "path": "$HOME/**" }] }, - { "identifier": "fs:allow-write-text-file", "allow": [{ "path": "$HOME/**" }] }, + { "identifier": "fs:allow-read-text-file", "allow": [{ "path": "$HOME/**" }, { "path": "**" }] }, + { "identifier": "fs:allow-write-text-file", "allow": [{ "path": "$HOME/**" }, { "path": "**" }] }, "fs:allow-app-read-recursive", "fs:allow-app-write-recursive" ] diff --git a/src-tauri/tauri.conf.json b/src-tauri/tauri.conf.json index a34ddd1..cb2e6e9 100644 --- a/src-tauri/tauri.conf.json +++ b/src-tauri/tauri.conf.json @@ -6,7 +6,7 @@ "build": { "frontendDist": "../frontend/dist", "devUrl": "http://localhost:5173", - "beforeDevCommand": "cd frontend && npm run dev", + "beforeDevCommand": "lsof -ti:5173 | xargs kill -9 2>/dev/null; cd frontend && npm run dev", "beforeBuildCommand": "cd frontend && npm run build" }, "app": { diff --git a/transcribe.py b/transcribe.py index c830e51..68f3166 100644 --- a/transcribe.py +++ b/transcribe.py @@ -45,7 +45,16 @@ def main(): device = "cpu" compute_type = "int8" - model = WhisperModel(model_name, device=device, compute_type=compute_type) + try: + model = WhisperModel(model_name, device=device, compute_type=compute_type) + except RuntimeError as e: + if "out of memory" in str(e).lower() and device == "cuda": + print(f"CUDA OOM, falling back to CPU (int8)", file=sys.stderr) + device = "cpu" + compute_type = "int8" + model = WhisperModel(model_name, device=device, compute_type=compute_type) + else: + raise # Transcribe with progress reporting print(f"Starting transcription of {wav_path} with model {model_name}", file=sys.stderr)