Compare commits

..

5 Commits

Author SHA1 Message Date
1e02bf32d9 still working on crashes 2026-04-08 01:42:00 -06:00
2406b0a2e7 trying to fix crashes 2026-04-08 01:04:27 -06:00
38ca9cfbad crashing from wav file size i think 2026-04-08 00:48:05 -06:00
56be227245 fixed issues from removing other frontend 2026-04-08 00:02:56 -06:00
e25f8a9b63 removed electron 2026-04-07 23:08:27 -06:00
100 changed files with 46415 additions and 13882 deletions

View File

@ -1,16 +0,0 @@
# backend_health_check
# cmd: /home/dillon/_code/TalkEdit/.venv312/bin/python3.12 -c import importlib; importlib.import_module('backend.main'); print('backend import OK')
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "/usr/lib/python3.12/importlib/__init__.py", line 90, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "<frozen importlib._bootstrap>", line 1387, in _gcd_import
File "<frozen importlib._bootstrap>", line 1360, in _find_and_load
File "<frozen importlib._bootstrap>", line 1331, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 935, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 999, in exec_module
File "<frozen importlib._bootstrap>", line 488, in _call_with_frames_removed
File "/home/dillon/_code/TalkEdit/backend/main.py", line 12, in <module>
from routers import transcribe, export, ai, captions, audio
ModuleNotFoundError: No module named 'routers'

View File

@ -1,3 +0,0 @@
# backend_python_version
# cmd: /home/dillon/_code/TalkEdit/.venv312/bin/python3.12 --version
Python 3.12.13

View File

@ -1,3 +0,0 @@
# env_git_head
# cmd: git -C /home/dillon/_code/TalkEdit rev-parse --short HEAD
4f90750

View File

@ -1,10 +0,0 @@
# env_git_status
# cmd: git -C /home/dillon/_code/TalkEdit status --short
M frontend/src/App.tsx
M frontend/src/components/VolumePanel.tsx
M frontend/src/components/WaveformTimeline.tsx
M frontend/src/store/editorStore.ts
?? .diagnostics/
?? AI_dev.md
?? docs/
?? scripts/

View File

@ -1,3 +0,0 @@
# env_node_version
# cmd: node --version
v22.18.0

View File

@ -1,3 +0,0 @@
# env_npm_version
# cmd: npm --version
10.9.3

View File

@ -1,3 +0,0 @@
# env_uname
# cmd: uname -a
Linux cachyos-x 6.19.10-1-cachyos #1 SMP PREEMPT_DYNAMIC Wed, 25 Mar 2026 23:30:07 +0000 x86_64 GNU/Linux

View File

@ -1,11 +0,0 @@
# frontend_build
# cmd: bash -lc cd '/home/dillon/_code/TalkEdit/frontend' && npm run -s build
vite v6.4.1 building for production...
transforming...
✓ 1606 modules transformed.
rendering chunks...
computing gzip size...
dist/index.html 1.20 kB │ gzip: 0.57 kB
dist/assets/index-gyhcOzhr.css 19.31 kB │ gzip: 4.48 kB
dist/assets/index-B5NnH24A.js 354.13 kB │ gzip: 108.13 kB
✓ built in 2.43s

View File

@ -1,3 +0,0 @@
# frontend_lint
# cmd: bash -lc cd '/home/dillon/_code/TalkEdit/frontend' && npm run -s lint
sh: line 1: eslint: command not found

View File

@ -1,72 +0,0 @@
# list_recent_files
# cmd: find /home/dillon/_code/TalkEdit -maxdepth 2 -type f
/home/dillon/_code/TalkEdit/.git/description
/home/dillon/_code/TalkEdit/.git/packed-refs
/home/dillon/_code/TalkEdit/.git/COMMIT_EDITMSG
/home/dillon/_code/TalkEdit/.git/FETCH_HEAD
/home/dillon/_code/TalkEdit/.git/ORIG_HEAD
/home/dillon/_code/TalkEdit/.git/REBASE_HEAD
/home/dillon/_code/TalkEdit/.git/HEAD
/home/dillon/_code/TalkEdit/.git/config
/home/dillon/_code/TalkEdit/.git/index
/home/dillon/_code/TalkEdit/backend/requirements.txt
/home/dillon/_code/TalkEdit/backend/.python-version
/home/dillon/_code/TalkEdit/backend/dev_main.py
/home/dillon/_code/TalkEdit/backend/video_editor.py
/home/dillon/_code/TalkEdit/backend/audio_cleaner.py
/home/dillon/_code/TalkEdit/backend/diarization.py
/home/dillon/_code/TalkEdit/backend/ai_provider.py
/home/dillon/_code/TalkEdit/backend/caption_generator.py
/home/dillon/_code/TalkEdit/backend/background_removal.py
/home/dillon/_code/TalkEdit/backend/main.py
/home/dillon/_code/TalkEdit/frontend/postcss.config.js
/home/dillon/_code/TalkEdit/frontend/tailwind.config.js
/home/dillon/_code/TalkEdit/frontend/tsconfig.json
/home/dillon/_code/TalkEdit/frontend/vite.config.ts
/home/dillon/_code/TalkEdit/frontend/frontend_dev.log
/home/dillon/_code/TalkEdit/frontend/index.html
/home/dillon/_code/TalkEdit/frontend/package-lock.json
/home/dillon/_code/TalkEdit/frontend/package.json
/home/dillon/_code/TalkEdit/frontend/tsconfig.tsbuildinfo
/home/dillon/_code/TalkEdit/shared/project-schema.json
/home/dillon/_code/TalkEdit/node_modules/.package-lock.json
/home/dillon/_code/TalkEdit/src-tauri/.gitignore
/home/dillon/_code/TalkEdit/src-tauri/Cargo.toml
/home/dillon/_code/TalkEdit/src-tauri/build.rs
/home/dillon/_code/TalkEdit/src-tauri/tauri_dev.log
/home/dillon/_code/TalkEdit/src-tauri/Cargo.lock
/home/dillon/_code/TalkEdit/src-tauri/tauri.conf.json
/home/dillon/_code/TalkEdit/.dockerignore
/home/dillon/_code/TalkEdit/.gitattributes
/home/dillon/_code/TalkEdit/FIX-GITHUB-ACTIONS.md
/home/dillon/_code/TalkEdit/LICENSE
/home/dillon/_code/TalkEdit/M4A-SUPPORT.md
/home/dillon/_code/TalkEdit/package-lock.json
/home/dillon/_code/TalkEdit/TECH_FEATURES.md
/home/dillon/_code/TalkEdit/FFmpeg_COMPLIANCE.md
/home/dillon/_code/TalkEdit/transcribe.py
/home/dillon/_code/TalkEdit/test_api.py
/home/dillon/_code/TalkEdit/.vscode/settings.json
/home/dillon/_code/TalkEdit/.venv312/pyvenv.cfg
/home/dillon/_code/TalkEdit/webview.log
/home/dillon/_code/TalkEdit/.gitmodules
/home/dillon/_code/TalkEdit/split_audio.sh
/home/dillon/_code/TalkEdit/venv/.gitignore
/home/dillon/_code/TalkEdit/venv/pyvenv.cfg
/home/dillon/_code/TalkEdit/.gitignore
/home/dillon/_code/TalkEdit/FEATURES.md
/home/dillon/_code/TalkEdit/README.md
/home/dillon/_code/TalkEdit/close
/home/dillon/_code/TalkEdit/electron/main.js
/home/dillon/_code/TalkEdit/electron/preload.js
/home/dillon/_code/TalkEdit/electron/python-bridge.js
/home/dillon/_code/TalkEdit/idea summary.md
/home/dillon/_code/TalkEdit/open
/home/dillon/_code/TalkEdit/package.json
/home/dillon/_code/TalkEdit/plan.md
/home/dillon/_code/TalkEdit/.github/copilot-instructions.md
/home/dillon/_code/TalkEdit/AI_dev.md
/home/dillon/_code/TalkEdit/docs/spec-template.md
/home/dillon/_code/TalkEdit/docs/ai-policy.md
/home/dillon/_code/TalkEdit/scripts/validate-all.sh
/home/dillon/_code/TalkEdit/scripts/collect-diagnostics.sh

View File

@ -1,109 +0,0 @@
# TalkEdit Copilot Instructions (Living Project Context)
Purpose: give AI assistants immediate, accurate context for this repository and define what must be kept in sync when the project evolves.
## How To Use This File
- This is a workspace instruction file for VS Code Chat/Copilot.
- Treat this as the first source of truth for architecture and workflow expectations.
- If your code changes make any section outdated, update this file in the same change.
## Project Snapshot
- Name: TalkEdit
- Product: local-first, AI-powered, text-based audio/video editor.
- Primary runtime: Tauri + React frontend + Python FastAPI backend.
- Desktop only (Electron has been removed; Tauri is the exclusive desktop runtime).
## Tech Stack
- Frontend: React 19, TypeScript, Vite, Tailwind, Zustand.
- Desktop bridge: Tauri API (IPC commands via `window.electronAPI` polyfill in `frontend/src/lib/tauri-bridge.ts` for unified call-site interface).
- Backend: FastAPI + Uvicorn (`backend/main.py`) with routers in `backend/routers` and core services in `backend/services`.
- Media tooling: FFmpeg for edit/export and codec operations.
- AI tooling: WhisperX/faster-whisper for transcription; provider layer supports OpenAI/Anthropic/Ollama.
## Code Map
- `frontend/src/components`: editor UI (player, transcript, waveform, settings, export, AI panel).
- `frontend/src/store`: Zustand state (`editorStore`, `aiStore`).
- `frontend/src/hooks`: keyboard/video sync behavior.
- `backend/routers`: API surface (`/transcribe`, `/export`, `/ai/*`, `/captions`, `/audio/*`).
- `backend/services`: heavy operations (transcription, captioning, diarization, video editing, cleanup).
- `shared/project-schema.json`: saved project schema contract.
- `src-tauri`: Rust/Tauri host code and app configuration.
## Run And Build (Preferred)
- Frontend dev: `npm run dev`
- Backend dev: `npm run dev:backend`
- Tauri dev: `npm run dev:tauri`
- Tauri build: `npm run build:tauri`
Use project virtualenvs where available (`.venv312`, `.venv`, or `venv`) for backend execution.
## Working Conventions
- Keep router files thin; put heavy logic in `backend/services`.
- Preserve response compatibility for existing frontend callers unless task explicitly allows API breakage.
- Frontend uses unified `window.electronAPI` interface (Tauri-backed via tauri-bridge.ts); desktop APIs are implemented exclusively in Tauri.
- Prefer small, focused edits over broad refactors.
## Known Risk Areas
- Startup/rendering on Linux WebKit can regress when reintroducing remote fonts/CSP allowances; prefer local font assets.
- Media URL handling between project load paths should remain consistent to avoid format-specific regressions (especially WAV/MP3 behavior).
- Export pipeline changes must preserve caption modes (`none`, `sidecar`, `burn-in`) and audio enhancement behavior.
- WAV export uses `pcm_s16le` codec — only available for audio-only inputs (no video stream). Format selector conditionally shows WAV based on input file extension.
- `<select>` dropdowns need `[color-scheme:dark]` Tailwind class on Linux WebKit or the native popup renders white-on-light-gray.
- Frontend gain ranges use camelCase (`gainDb`) but the backend expects snake_case (`gain_db`). The ExportDialog maps them before sending. Any new call sites must do the same.
## Recent Changes
### 2026-05-04 — Word text correction, low-confidence highlighting, audio normalization
- **Word text correction (#015)**: Double-click any word in the transcript editor to edit its text inline. Press Enter to commit, Escape to cancel. State is updated in both `words[]` and `segments[]` arrays (segment text recomposed from updated words). Pure frontend; no backend changes needed.
- **Low-confidence word highlighting (#012)**: Words with `confidence < threshold` (default 0.6, configurable in Settings panel) render with an orange dotted underline. Tooltip shows exact confidence percentage. Threshold is persisted in `localStorage` key `talkedit:confidenceThreshold`.
- **Audio normalization (#018)**: New backend endpoint `POST /audio/normalize` in `backend/routers/audio.py`. Two-pass FFmpeg `loudnorm` (measure then apply) implemented in `backend/services/audio_cleaner.py:normalize_audio()`. Falls back to single-pass if measurement fails. Frontend UI in Export panel: target selector (YouTube -14, Spotify -16, Broadcast -23, etc.) with "Normalize" button.
- **Store**: New `updateWordText(index, text)` action in `editorStore.ts` updates both `words[]` and recomputes `segments[].text`.
- **Settings panel**: New confidence threshold slider (01 range).
- **WAV export format**: Format selector shows "WAV (Uncompressed)" for audio-only inputs. Backend uses `pcm_s16le` codec via `_get_codec_args()` helper. Codec selection centralized in `backend/services/video_editor.py:_get_codec_args(format_hint, has_video)`.
- **Normalization moved to export**: No longer a standalone button. Integrated as `normalizeAudio` checkbox + LUFS target selector in ExportPanel. Sent as `normalize_loudness`/`normalize_target_lufs` to backend. Applied via `loudnorm` in FFmpeg audio filter chain during export.
- **Export camelCase fix**: `ExportDialog.tsx` now manually maps `gainRanges``gain_db` and `muteRanges``{start,end}` before sending to backend. Prevents Pydantic v2 field rejection.
- **color-scheme:dark**: All `<select>` elements in ExportDialog use `[color-scheme:dark]` to ensure readable native dropdown popups on Linux WebKit.
- **Re-transcribe selection (#013)**: Backend `POST /transcribe/segment` extracts audio via FFmpeg, runs Whisper, adjusts timestamps. Frontend: "Re-transcribe" button on selected words in TranscriptEditor; `replaceWordRange()` store action swaps words + rebuilds segments by speaker.
- **Transcript-only export (#024)**: "Export Transcript Only" in ExportDialog with .txt/.srt options. **Pure frontend** — generates content in-browser, writes via Tauri `writeFile`. No backend dependency. Respects word cuts.
- **Named timeline markers (#016)**: `TimelineMarker` type in `project.ts`. Store actions: `addTimelineMarker`, `updateTimelineMarker`, `removeTimelineMarker`. Colored pins on waveform canvas. MarkersPanel UI for add/edit/delete. Persisted in project.
- **Chapters (#017)**: `getChapters()` store action derives from sorted markers. "Copy as YouTube timestamps" in MarkersPanel. Zero backend.
- **Clip thumbnail strip (#022)**: `lib/thumbnails.ts` — frontend canvas capture from `<video>`. Toggle button in WaveformTimeline. Clickable frames at 10s intervals.
- **Customizable hotkeys (#041)**: `lib/keybindings.ts` with two presets (standard + left-hand). `useKeyboardShortcuts.ts` reads bindings dynamically. Settings panel includes key remapper with conflict detection and per-key reset. `?` key shows dynamic cheatsheet.
## Update Rules (Important)
When a task changes architecture, app wiring, commands, API shape, project schema, or major conventions, update this file before finishing.
Always update these sections if affected:
- `Project Snapshot`
- `Tech Stack`
- `Code Map`
- `Run And Build (Preferred)`
- `Working Conventions`
- `Known Risk Areas`
- Recent changes section (if applicable)
- `Code Map`
- `Run And Build (Preferred)`
- `Known Risk Areas`
If behavior changed significantly, add a short note under a new `Recent Changes` section with:
- Date (`YYYY-MM-DD`)
- What changed
- What future edits should preserve
## Assistant Behavior For This Repo
- Validate assumptions against current files before editing.
- Prefer existing patterns in neighboring files over introducing new patterns.
- Call out uncertainty explicitly when code and docs disagree.
- If you discover stale docs, fix them as part of the same task when reasonable.

View File

@ -1,23 +0,0 @@
## Summary
Describe what changed and why.
## Spec Link (Required For Feature Changes)
- Spec file in `docs/specs/`: <!-- e.g. docs/specs/2026-04-15-speed-adjustment.md -->
## Acceptance Criteria Checklist
- [ ] Acceptance criteria reviewed against the linked spec
- [ ] User-visible behavior verified for this change
- [ ] Backward compatibility impact assessed
## Validation
- [ ] `./scripts/validate-all.sh` passes locally
- [ ] Added/updated tests for changed behavior
## Risk And Rollback
- Risk level: Low / Medium / High
- Rollback plan:

View File

@ -1,45 +0,0 @@
name: CI
on:
push:
branches: [main]
pull_request:
branches: [main]
jobs:
rust:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
- run: cargo test
working-directory: src-tauri
- run: cargo check --release
working-directory: src-tauri
- run: cargo clippy -- -D warnings
working-directory: src-tauri
continue-on-error: true
frontend:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: 20
- run: npm ci
working-directory: frontend
- run: npx tsc --noEmit
working-directory: frontend
- run: npx vitest run
working-directory: frontend
python:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.12'
- run: pip install pytest
- run: python -m pytest backend/tests/ || true

View File

@ -1,79 +0,0 @@
name: Release
on:
push:
tags:
- 'v*'
jobs:
build:
strategy:
fail-fast: false
matrix:
include:
- platform: ubuntu-24.04
target: x86_64-unknown-linux-gnu
bundles: deb
- platform: ubuntu-24.04
target: x86_64-unknown-linux-gnu
bundles: rpm
- platform: ubuntu-24.04
target: x86_64-unknown-linux-gnu
bundles: archlinux
- platform: windows-latest
target: x86_64-pc-windows-msvc
bundles: msi
runs-on: ${{ matrix.platform }}
steps:
- uses: actions/checkout@v4
- name: Setup Node
uses: actions/setup-node@v4
with:
node-version: 20
cache: npm
cache-dependency-path: frontend/package-lock.json
- name: Install frontend dependencies
run: npm ci
working-directory: frontend
- name: Install Rust
uses: dtolnay/rust-toolchain@stable
with:
targets: ${{ matrix.target }}
- name: Install system dependencies (Linux)
if: runner.os == 'Linux'
run: |
sudo apt-get update
sudo apt-get install -y \
libwebkit2gtk-4.1-dev \
libappindicator3-dev \
librsvg2-dev \
patchelf \
libssl-dev \
libgtk-3-dev \
libayatana-appindicator3-dev
- name: Install RPM build tools
if: matrix.bundles == 'rpm'
run: sudo apt-get install -y rpm
- name: Install ArchLinux build tools
if: matrix.bundles == 'archlinux'
run: sudo apt-get install -y pacman-pkg-strap
- name: Build Tauri app
uses: tauri-apps/tauri-action@v0
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
tagName: ${{ github.ref_name }}
releaseName: 'TalkEdit ${{ github.ref_name }}'
releaseBody: 'See the assets to download and install this version.'
releaseDraft: false
includeUpdaterJson: true
args: --bundles ${{ matrix.bundles }} --target ${{ matrix.target }}

View File

@ -1,58 +0,0 @@
name: Validate All
on:
pull_request:
push:
branches:
- main
jobs:
validate-all:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Node
uses: actions/setup-node@v4
with:
node-version: '20'
cache: npm
cache-dependency-path: |
frontend/package-lock.json
package-lock.json
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Enforce feature spec policy (PR only)
if: github.event_name == 'pull_request'
env:
BASE_SHA: ${{ github.event.pull_request.base.sha }}
run: ./scripts/check-feature-spec.sh
- name: Install frontend dependencies
run: |
cd frontend
npm install
- name: Run validate-all
env:
SKIP_BACKEND_IMPORT_SMOKE: '1'
run: ./scripts/validate-all.sh
- name: Collect diagnostics on failure
if: failure()
run: ./scripts/collect-diagnostics.sh
- name: Upload diagnostics artifact
if: failure()
uses: actions/upload-artifact@v4
with:
name: diagnostics
path: .diagnostics

5
.gitignore vendored
View File

@ -21,9 +21,6 @@ htmlcov/
.idea/
.cursor/
# Submodules (can be cloned separately if needed)
CutScript/
# OS files
.env
.env.local
@ -36,8 +33,6 @@ Thumbs.db
# Logs
*.log
logs/
cache/
*.aive
# Build output
frontend/dist/

View File

@ -1,157 +0,0 @@
# AI Dev Plan (Must-Haves Only)
## Purpose
This is the minimum implementation needed for AI to reliably build, test, and debug TalkEdit with high confidence.
Target: reliable 80-90% autonomous implementation/debugging on scoped tasks.
## Must-Have Pillars
## 1. Single Validation Command
Required:
1. One command that runs lint, build, backend tests, and smoke checks.
2. Works locally and in CI.
Current status:
1. Implemented via scripts/validate-all.sh.
2. Enforced in CI via .github/workflows/validate-all.yml.
## 2. CI Quality Gate
Required:
1. Pull requests fail if validation fails.
2. Failures produce diagnostics artifacts.
Current status:
1. Implemented in .github/workflows/validate-all.yml.
2. Diagnostics collected by scripts/collect-diagnostics.sh on failure.
## 3. Spec Requirement for Feature Changes
Required:
1. Feature code changes must include a spec file update.
2. Spec format must be standardized.
Current status:
1. Implemented via scripts/check-feature-spec.sh.
2. Spec template exists at docs/spec-template.md.
3. Specs folder guidance exists at docs/specs/README.md.
## 4. Backend Contract Test Coverage
Required:
1. Router-level contract tests for success and error paths.
2. Tests are deterministic and mock heavy services.
Current status:
1. Implemented in backend/tests/test_router_contracts.py.
2. Cache utility baseline tests implemented in backend/tests/test_cache_utils.py.
## 5. Error-Tolerant Router Contracts
Required:
1. Expected client errors must remain 4xx.
2. Server failures must return 5xx with useful detail.
Current status:
1. Implemented for captions/export HTTPException passthrough.
2. Covered by contract tests.
## 6. Basic Autonomy Policy
Required:
1. Clear autonomous scope and escalation rules.
2. Clear restrictions for high-risk changes.
Current status:
1. Implemented in docs/ai-policy.md.
## Must-Have Remaining Work
No remaining must-have items.
Completed in this pass:
1. Added lightweight frontend tests and integrated them into scripts/validate-all.sh.
2. Added pull request template with required spec link and acceptance criteria checklist.
3. Added endpoint-level contract assertions for /file range requests and /audio/waveform cache-hit/cache-miss behavior.
4. Confirmed scripts/validate-all.sh passes end-to-end with frontend tests + expanded backend contracts.
## Out of Scope for Must-Have Baseline
Useful later, but not required for strong day-to-day autonomous implementation:
1. Full quality dashboards.
2. Advanced autonomy telemetry.
3. Complete long-term governance expansion.
4. High-autonomy optimization beyond 90% reliability target.
## Definition of Done (Must-Have Plan)
Must-have plan is complete when all are true:
1. scripts/validate-all.sh passes locally and in CI.
2. Feature PRs without spec updates are blocked.
3. Backend router contracts cover core success and error paths.
4. Frontend has at least one stable test command integrated into validation.
## 7. AI Tools Validation Strategy
Required:
1. **Per-edit validation**: After each code change (file edit, replacement, or creation), validate immediately with appropriate tools.
2. **Tool selection by change type**:
- Frontend changes: ESLint (`npm run -s lint`), then TypeScript build (`npm run build`)
- Backend changes: Syntax check via Python import, then run relevant test suite
- Type/interface changes: Full type check via build or `tsc -b`
3. **Failure handling**: If validation fails, fix immediately before proceeding to next edit.
4. **Documentation updates**: When changing architecture, always update [.github/copilot-instructions.md](.github/copilot-instructions.md) as part of the same PR.
5. **Large multi-edit operations**: Use `multi_replace_string_in_file` to batch independent edits and reduce tool call overhead.
6. **Error collection**: Use `get_errors` tool to identify issues across multiple files in one call post-change.
Current implementation:
1. Electron removal completed with post-edit lint and build validation at each phase.
2. Zone editor feature implemented with immediate lint/build validation after component creation and UI integration.
3. Validation tools: `npm run -s lint`, `npm run build`, `get_errors`, `run_in_terminal` for test scripts.
Best practices established:
- Always run lint before build to catch TypeScript errors early
- Run full build after component changes to verify tree-shaking and bundling
- Use `get_errors` for multi-file error detection rather than sequential file reads
- Batch unrelated edits with `multi_replace_string_in_file` for efficiency
- Cache key decisions in session memory to avoid repeated exploration
5. AI policy + diagnostics workflow are active.
## Current State Summary
Completed:
1. Validation and CI enforcement.
2. Diagnostics capture.
3. Spec policy and templates.
4. Backend contract test foundation (including AI endpoints).
5. Core router error-path correctness.
6. Autonomy policy baseline.
7. Frontend test command integrated into validation.
8. PR template requirement added.
9. /file and /audio/waveform contract assertions implemented.
Remaining:
1. No must-have items remaining.

View File

@ -1,181 +1,93 @@
# TalkEdit — Features & Roadmap
# TalkEdit — Feature Roadmap
**Niche:** "Descript for long-form content" — works on hour+ files without degrading, fully offline, one-time payment.
Features are grouped by priority. Check off items as they are implemented.
---
## 🔴 High Priority — Core editing gaps
1. [x] **Cut / Mute sections** — select a time range and choose to cut (remove entirely) or mute (silence audio while video continues). Cut sections show as red overlays, mute sections as transparent blue overlays on the timeline over the transcript text and audio waveform. Backend: `ffmpeg -af volume=0` for mute, time-based cutting for removal.
2. [x] **Silence / pause trimmer** — detect pauses using min duration (ms) + amplitude threshold (dB), then apply detected pauses as cut ranges. Initial endpoint: `/audio/detect-silence`; UI includes filter controls and an "Apply As Cuts" action.
3. [x] **Operation-level undo for batch actions** — explicit undo entry for actions like "Apply Silence Trim" so one shortcut/click reverts the whole operation, while still allowing normal fine-grained undo/redo steps.
4. [ ] **Grouped silence-trim zones (editable batch)** — when pauses are applied, tag them as a batch (`trim_group_id`) so the user can: (1) delete all zones from that auto-trim pass at once, and (2) still select/resize/delete individual zones independently.
5. [ ] **Edit silence-trim group settings after apply** — allow reopening a trim group and changing its detection settings (min pause ms, threshold dB, pre/post buffers), then reapplying updates to that group without affecting unrelated edits.
6. [ ] **Volume / gain control** — per-selection or global audio gain slider. Every editor has this. Descript users constantly complain it's missing. Backend: `ffmpeg -af volume=Xdb`.
7. [ ] **Speed adjustment** — slow down or speed up a selection or the whole clip. Backend: `ffmpeg -filter:v setpts` + `atempo`. Common use case: slightly speed up boring sections.
8. [ ] **Cut preview** — before committing a delete, play what the audio will sound like with that section removed (pre-listen across the edit point). Pure frontend using Web Audio API — splice the AudioBuffer and play the join.
9. [ ] **Timeline shows output length** — deleted regions should visually collapse (or show as narrow gaps) so the user sees the *output* duration, not just the source duration.
---
## 🟡 Medium Priority — Widely expected features
10. [ ] **Transcript search (Ctrl+F)** — find words/phrases in the transcript and highlight matches. Pure frontend. Critical for long-form content. Jump between matches with Enter.
11. [ ] **Mark In / Out + delete (I / O keys)** — keyboard shortcuts to mark a time range on the timeline, then delete it. Faster than click-dragging words. Store the in/out points in state, `Delete` removes them.
12. [ ] **Low-confidence word highlighting** — WhisperX already returns `confidence` per word. Words below a threshold (e.g. < 0.6) should be visually underlined or tinted so the user knows where to double-check.
13. [ ] **Re-transcribe selection** if Whisper gets a section wrong, let the user select a word range and re-run transcription on just that segment (optionally with a different model or language).
14. [ ] **Word text correction** allow editing the transcript text of a word without affecting its timing. Whisper gets homophones/proper nouns wrong constantly. Pure frontend state change; no backend needed.
15. [ ] **Named timeline markers** drop named marker pins on the waveform (like Resolve markers). Store as `{ id, time, label, color }` in the project. Rendered as colored triangles on the timeline canvas.
16. [ ] **Chapters** group markers into named chapter ranges. Useful for podcasts and lectures. Exportable as YouTube chapter timestamps in the description.
---
## 🟢 Lower Priority — Differentiating / power features
17. [ ] **Audio normalization / loudness targeting** single "Normalize" button that targets a LUFS level (-14 for YouTube, -16 for Spotify). Backend: `ffmpeg -af loudnorm`. Very high value for podcasters, ~23 hours of work.
18. [ ] **Background music track** a second audio track for background music with volume ducking. Major gap in Descript that TalkEdit could own. Backend: `ffmpeg` amix + `asendcmd` for auto-ducking.
19. [ ] **Video zoom / punch-in** scale and position the video (crop, zoom, pan). Used constantly on talking-head videos for emphasis. Backend: `ffmpeg -vf crop/scale/zoompan`.
20. [ ] **Multi-clip / append** load a second video and append it to the timeline. Even without a full multi-track timeline, "append clip" is a heavily used workflow.
21. [ ] **Clip thumbnail strip** video frame thumbnails along the timeline so users can navigate visually, not only by waveform. Backend: `ffmpeg` thumbnail extraction at regular intervals.
22. [ ] **Batch silence removal** full-file scan + remove all pauses above threshold in one click. Distinct from the manual trimmer above; this is a "fix the whole file" operation.
23. [ ] **Export to transcript text / SRT only** some users just want a clean `.txt` or `.srt` of the edited transcript without rendering video.
---
## 💡 TalkEdit competitive advantages to lean into
These aren't features to build they're things to make more visible in the UI and README:
24. **100% offline / no account required** CapCut requires login and sends data to servers. Descript is cloud-first. TalkEdit never leaves the machine.
25. **Local AI models** Ollama support means no API costs and no data leaving the device.
26. **Word-level precision** editing by deleting words (not dragging razor cuts) is faster for talking-head content than any timeline-based editor.
27. **Works on long files** virtualized transcript + chunked waveform handles 1hr+ content that bogs down CapCut.
---
## ✅ Already Implemented
### Core editing
- [x] [#001] **Cut / Mute sections** — remove or silence segments from output
- [x] [#002] **Silence / pause trimmer** — batch detect and remove silent pauses
- [x] [#006] **Volume / gain control** — per-zone and global gain adjustment
- [x] [#007] **Speed adjustment** — per-zone playback speed changes (0.25x4x)
- [x] [#008] **Cut preview** — preview zones before export with configurable padding
- [x] [#009] **Timeline shows output length** — adjusted timeline with cut compression
- [x] [#011] **Mark In / Out** — I/O keys to set selection range on timeline
### Transcript
- [x] [#010] **Transcript search (Ctrl+F)** — find words, navigate matches
- [x] [#012] **Low-confidence word highlighting** — orange dotted underline with confidence %
- [x] [#013] **Re-transcribe selection** — re-run Whisper on a selected word range
- [x] [#015] **Word text correction** — double-click any word to edit text in-place
- [x] [#016] **Named timeline markers** — colored pins with labels, editable
- [x] [#017] **Chapters** — auto-form from markers, copy as YouTube timestamps
- [x] [#025] Word-level transcript editing (click, shift+click, drag select)
- [x] [#026] Ctrl+click word → seek video to that timestamp
- [x] [#027] Waveform timeline with zoom (Ctrl+scroll), scroll, drag-to-scrub
- [x] [#028] Auto-scroll waveform when playhead goes off-screen
### AI features
- [x] [#029] **AI filler word detection** — find and remove "um", "uh", "like" etc.
- [x] [#030] **AI clip suggestions** — find best 20-60s segments for social media
- [x] [#031] **Noise reduction** — DeepFilterNet or FFmpeg ANLMDN
- [x] [#034] **Speaker diarization** — label speakers in transcript
- [x] [#042] **Background removal** — MediaPipe segmentation, blur/color/image replacement
### Export
- [x] [#018] **Audio loudness normalization** — LUFS targets (-14 YouTube, -16 Spotify, -23 Broadcast)
- [x] [#019] **Background music** — auto-ducking via FFmpeg sidechain compress
- [x] [#020] **Video zoom / punch-in** — crop, zoom, pan during export
- [x] [#021] **Multi-clip / append** — concatenate multiple video files
- [x] [#024] **Export transcript** — plain text or SRT without video
- [x] [#032] **Export** — fast stream-copy or full re-encode (MP4/MOV/WebM/WAV, 720p4K)
- [x] [#033] **Captions** — SRT, VTT, ASS burn-in with font/color/position options
### Project & state
- [x] [#003] **Undo / redo** — 100-level history via Zundo
- [x] [#004] **Grouped silence-trim zones** — editable batch groups
- [x] [#005] **Edit silence-trim group** settings after applying
- [x] [#022] **Clip thumbnail strip** — canvas capture from video, clickable
- [x] [#035] **Project save / load** — .aive JSON format
- [x] [#037] **Multi-format input** — MP4, MKV, MOV, AVI, WebM, M4A
- [x] [#038] **Keyboard shortcuts** — Space, J/K/L, arrows, Ctrl+Z/S/E, ?
- [x] [#039] **Settings panel** — AI provider config (Ollama, OpenAI, Claude)
- [x] [#040] **Zone creation on timeline** — draggable edits, Delete to remove
- [x] [#041] **Customizable hotkeys** — two presets, click-to-remap, conflict detection
- [x] **[M] Manage Models** — view/delete downloaded Whisper and LLM files
- [x] **[M] Keyboard cheatsheet** — `?` overlay with close button, preset indicator
- [x] **[M] Visual toolbar** — grouped buttons with section dividers
- [x] **[M] Help panel** — full feature documentation in sidebar
- [x] **[M] First-run welcome overlay** — 3-step quick-start guide
- [x] **[M] Responsive welcome screen** — animated audio bars, model picker
- [x] **[M] Error boundary** — catches React crashes, shows fallback + reload
- [x] **[M] Global error logging** — uncaught errors logged to Rust backend
- [x] **[M] Store input validation** — NaN rejection, bounds clamping, min zone duration
- [x] **[M] Runtime assertions** — dev-mode guards in critical paths
- [x] **[M] Backend health check** — polls every 30s, shows reconnecting banner
### Licensing
- [x] **[L] 7-day free trial** — no credit card required
- [x] **[L] License activation** — email confirmation step to deter key sharing
- [x] **[L] Ed25519-signed license keys** — offline verification
- [x] **[L] Trial integrity** — sentinel file prevents delete-and-reset, XOR checksum deters timestamp editing
- [x] **[L] canEdit gate** — defaults to locked, only unlocks after verified status
- [x] **[L] Expired state** — export and loading still work, editing and AI locked
### Robustness
- [x] **[R] Auto-save crash recovery** — every 60s, restore prompt on next launch
- [x] **[R] Bad project state recovery** — auto-prunes invalid zones on load
- [x] **[R] Zone/marker deletion confirmations** — prevents accidental removals
- [x] **[R] Progress bars** — export (determinate), transcription (indeterminate)
- [x] **[R] Loading spinners** — waveform, AI processing
- [x] **[R] Error states with retry** — AIPanel, WaveformTimeline
- [x] **[R] Empty states** — MarkersPanel, AIPanel, ZoneEditor
- [x] **[R] Canvas zone handles enlarged** — radius 6px, hit area increased
- [x] **[R] Search match contrast** — thicker rings, higher opacity
- [x] **[R] Split panes keyboard-accessible** — arrow keys, tabIndex, ARIA
### Testing
- [x] **95 frontend tests** — editorStore (68), licenseStore (22), aiStore (15), assert (4)
- [x] **12 Rust tests** — licensing (7), models (5)
- [x] **CI pipeline** — GitHub Actions (Rust: test+clippy, Frontend: tsc+vitest, Python: pytest)
---
## 🔴 What's Next — highest impact
- [ ] **[LLM] Bundled Qwen3 LLM** — auto-download on first AI use, no API keys needed. Replace Python `ai_provider.py` with llama.cpp Rust bindings. Two sizes: 4B (2.5GB, 8GB+ RAM) and 1.7B (1GB, 4GB+ RAM)
- [ ] **[SHORTS] Smart Shorts finder** — scan transcript for self-contained 1090s segments, ranked by engagement. One-click export as separate clips
- [ ] **[PAYMENT] Wire checkout** — payment page at talked.it, Stripe → license key generation → delivery email
- [ ] **[BETA] Beta testers** — give 510 podcasters free licenses in exchange for feedback
- [ ] **[BUILD] Production builds** — `cargo tauri build` for Windows, macOS, Linux
---
## 🟡 Medium impact — AI features
- [ ] [#044] **AI Transcript Summarization** — bullet-point summary from transcript
- [ ] [#045] **AI Sentence Rephrase** — right-click word → see alternatives → replace
- [ ] [#046] **AI Smart Speed** — detect slow sections → suggest speed adjustments
- [ ] [#047] **AI Auto-Chapters** — topic detection from transcript → markers
- [ ] [#048] **AI Show Notes** — title, description, keywords, timestamps
- [ ] [#049] **AI Find Fluff** — detect rambles, off-topic chatter
- [ ] [#050] **AI Smooth Cuts** — crossfade between deleted segments
---
## 🟢 Lower impact — expansion
- [ ] **Project stitching** — load multiple .aive projects into one export
- [ ] **Batch export** — multiple projects/cuts in sequence
- [ ] **Smart chunking** — overlapping chunks for files >2hr
- [ ] [#014] Alternate transcription backend (VibeVoice-ASR-HF)
- [ ] [#051] **AI B-roll** — generate footage from text prompt
- [ ] [#052] **Smart Layouts** — auto-switch speakers in video frame
- [ ] [#053] **Per-track audio levels** — gain per speaker track
- [ ] [#054] **Intro/Outro templates** — reusable segment presets
- [ ] [#055] **Built-in free music library** — CC0 loops shipped with app
- [ ] [#056] **Stock media browser** — browse local resources/media/
- [ ] [#057] **Sample content downloader** — test video with pre-made transcript
---
## 🎬 OpenShot-inspired (long-term)
- [ ] Keyframe animations — clip position, scale, opacity over time
- [ ] Video transitions — crossfade, wipe between clips
- [ ] Title / text overlays — SVG templates, adjustable font/color
- [ ] Chroma key / greenscreen — per-clip effect
- [ ] Speed ramps — animate speed within a clip
- [ ] Frame-accurate stepping — arrow keys frame by frame
- [ ] Clip trimming on timeline — drag edges to trim
- [ ] Snapping — magnetic snap to markers and edges
---
## 💡 Competitive advantages
- **7-day free trial (no CC)** — full features, no risk
- **One-time purchase** — $39 Pro, $79 Business, no subscription
- **100% offline** — no account, no cloud, no data leaves your machine
- **Local AI** — filler detection, clip suggestions, Smart Clean work offline
- **Word-level precision** — edit video by deleting words, not razor cuts
- **Per-segment re-transcription** — fix transcription errors on just the bad part
- **Auto-ducking background music** — music lowers when speech detected, no keyframing
- **Works on long files** — virtualized transcript + chunked waveform handles 1hr+
---
## 🚫 Explicitly deferred
- Cloud sync / collaboration
- Voice cloning / TTS
- Full multi-track NLE (compositing, keyframes, nested sequences)
- Mobile app
- Subscription model
- Image/video generation models
TalkEdit's advantage is that it isn't a timeline editor — the text-is-the-timeline model makes spoken-word editing drastically faster than dragging razor cuts.
---
## 📦 Launch checklist
- [ ] Landing page at talked.it (features, screenshots, pricing, downloads)
- [ ] Demo video (35 min walkthrough)
- [ ] Product Hunt listing + 50 free licenses
- [ ] r/podcasting, r/VideoEditing, r/selfhosted posts
- [ ] Hacker News "Show HN"
- [ ] GitHub v1.0.0 release with Windows/macOS/Linux binaries
- [ ] Compare page: TalkEdit vs Descript
28. Word-level transcript editing (select, drag, shift-click, delete)
29. Ctrl+click word seek timeline to that position
30. Waveform timeline with zoom (Ctrl+scroll), scroll, drag-to-scrub playhead
31. Auto-scroll waveform when playhead goes off-screen
32. AI filler word detection and removal (Ollama / OpenAI / Claude)
33. AI clip suggestions for social media
34. Noise reduction (DeepFilterNet or FFmpeg ANLMDN)
35. Export: fast stream-copy or full reencode (MP4/MOV/WebM, 720p/1080p/4K)
36. Captions: SRT, VTT, ASS burn-in with font/color/position options
37. Speaker diarization
38. Project save / load (.aive JSON format)
39. Undo / redo (100-level history via Zundo)
40. Multi-format input (MP4, MKV, MOV, AVI, WebM, M4A)
41. Keyboard shortcuts (Space, J/K/L, arrows, Ctrl+Z/Shift+Z, Ctrl+S, Ctrl+E)
42. Settings panel: AI provider config (Ollama, OpenAI, Claude)
43. Cut/mute range creation on timeline with draggable zone edits and Delete-to-remove

205
README.md
View File

@ -1,64 +1,34 @@
# TalkEdit
**Edit video by editing text.** An offline, local-first desktop video editor where deleting a word from the transcript cuts it from the video.
An open-source, local-first, Descript-like text-based audio and video editor powered by AI. Edit audio/video by editing text — delete a word from the transcript and it's cut from the audio/video.
<img width="1034" height="661" alt="TalkEdit screenshot" src="https://github.com/user-attachments/assets/b1ed9505-792e-42ca-bb73-85458d0f02a5" />
<img width="1034" height="661" alt="image" src="https://github.com/user-attachments/assets/b1ed9505-792e-42ca-bb73-85458d0f02a5" />
---
## Features
## Architecture
- **Text-based editing** — delete, reorder, or correct words in the transcript to edit the underlying video. No razor tool, no timeline slicing.
- **Word-level transcription** — Whisper.cpp with per-word timestamps and confidence scores. Low-confidence words get a visual warning.
- **Four zone types** — Cut, Mute, Sound Gain, and Speed Adjust. Create zones on the waveform timeline and drag edges to refine.
- **Waveform timeline** — zoomable, scrollable waveform with playhead scrubbing, zone visualization, markers, chapters, and thumbnail strips.
- **AI-powered editing**
- Filler word detection and removal
- Smart Clean: one-click filler removal + silence trim + noise reduction + loudness normalization
- Clip suggestions for social media shorts
- Sentence rephrase with AI alternatives
- Supports **Ollama** (local), **OpenAI**, and **Claude** backends
- **Background music** — import a second audio track with auto-ducking via sidechain compression.
- **Export** — fast stream-copy or full re-encode to MP4, MOV, WebM, or WAV. Resolution up to 4K.
- **Captions** — generate SRT, VTT, or burn-in ASS subtitles with configurable font, color, and position.
- **Speaker diarization** — identify and label multiple speakers.
- **Audio tools** — noise reduction (DeepFilterNet), loudness normalization (LUFS targeting), background removal (MediaPipe), batch silence removal, video zoom/punch-in.
- **Project save/load** — `.aive` JSON format preserves all edits, zones, markers, and AI config.
- **Customizable hotkeys** — two presets (Standard / Left-hand) with per-key remapping and conflict detection.
- **100% offline, no account required** — everything runs on your machine. No telemetry, no cloud dependency.
- **7-day free trial** with one-time license key purchase. No subscription.
---
## Tech Stack
| Layer | Technology |
|-------|------------|
| Desktop shell | **Tauri 2.0** (Rust) |
| Frontend | **React** + **TypeScript** + **Tailwind CSS** |
| State management | **Zustand** with Zundo undo/redo |
| Transcription | **Whisper.cpp** (word-level timestamps) |
| AI / LLM | **Ollama**, **OpenAI**, **Claude** (plugable backends) |
| Media processing | **FFmpeg** |
| Python services | **FastAPI** (spawned as a child process) |
---
- **Tauri + React** desktop app with Tailwind CSS
- **FastAPI** Python backend (spawned as child process)
- **WhisperX** for word-level transcription with alignment
- **FFmpeg** for video processing (stream-copy and re-encode)
- **Ollama / OpenAI / Claude** for AI features (filler removal, clip creation)
## Quick Start
### Prerequisites
- **Node.js** 18+
- **Python** 3.10+
- **FFmpeg** (in PATH)
- **Rust** toolchain (for Tauri)
- **Ollama** (optional, for local AI features)
- Node.js 18+
- Python 3.10+
- FFmpeg (in PATH)
- (Optional) Ollama for local AI features
### Install
```bash
# Root and frontend dependencies
# Root scripts
npm install
# Frontend dependencies (React, Tailwind, Zustand)
cd frontend && npm install && cd ..
# Backend dependencies
@ -67,90 +37,82 @@ cd backend && pip install -r requirements.txt && cd ..
### Run (Development)
Set a custom backend port once (optional):
```bash
# Start everything: backend + frontend + Tauri
export BACKEND_PORT=8000
```
If you run frontend separately, you can also set:
```bash
export VITE_BACKEND_PORT=$BACKEND_PORT
```
```bash
# Start frontend in browser
npm run dev
# Or start the full desktop app (backend + tauri)
npm run dev:tauri
```
Or run components separately:
Or run them separately:
```bash
# Terminal 1: Python backend
npm run dev:backend
# Terminal 1: Backend
cd backend && python -m uvicorn main:app --reload --port 8000
# Terminal 2: Frontend + Tauri
# Terminal 2: Frontend
cd frontend && npm run dev
# Terminal 3: Tauri app shell
cd frontend && cargo tauri dev
```
### Build
```bash
npm run build:tauri
```
---
## Project Structure
```
talkedit/
├── src-tauri/ # Tauri 2.0 Rust runtime
│ ├── Cargo.toml
├── src-tauri/ # Tauri Rust host
│ ├── src/main.rs # App entry and backend orchestration
│ └── tauri.conf.json
├── frontend/ # React + Vite + Tailwind
│ └── src/
│ ├── main.rs # App entry, backend spawner
│ ├── lib.rs # Command handlers (IPC bridge)
│ ├── transcription.rs # Whisper.cpp integration
── video_editor.rs # FFmpeg-based editing
│ ├── caption_generator.rs
│ ├── diarization.rs
│ ├── ai_provider.rs # Ollama / OpenAI / Claude
│ ├── audio_cleaner.rs
│ ├── background_removal.rs
│ ├── licensing.rs # Trial + key activation
│ ├── models.rs # Shared data types
│ └── paths.rs
├── frontend/ # React + Vite + Tailwind
│ └── src/
│ ├── components/ # UI components
│ │ ├── TranscriptEditor.tsx
│ │ ├── WaveformTimeline.tsx
│ │ ├── VideoPlayer.tsx
│ │ ├── AIPanel.tsx
│ │ ├── ExportDialog.tsx
│ │ ├── SettingsPanel.tsx
│ │ ├── BackgroundMusicPanel.tsx
│ │ ├── MarkersPanel.tsx
│ │ ├── ZoneEditor.tsx
│ │ ├── SilenceTrimmerPanel.tsx
│ │ ├── AppendClipPanel.tsx
│ │ ├── LicenseDialog.tsx
│ │ └── DevPanel.tsx
│ ├── store/ # Zustand state (editorStore, aiStore, settingsStore)
│ ├── hooks/ # Custom React hooks
│ ├── lib/ # Utilities and Tauri bridge
│ └── types/ # TypeScript interfaces
├── backend/ # FastAPI Python services
│ ├── components/ # VideoPlayer, TranscriptEditor, etc.
│ ├── store/ # Zustand state (editorStore, aiStore)
│ ├── hooks/ # useVideoSync, useKeyboardShortcuts
── types/ # TypeScript interfaces
├── backend/ # FastAPI Python backend
│ ├── main.py
│ ├── routers/ # API endpoints
│ ├── transcribe.py
│ ├── ai.py
│ │ ├── audio.py
│ │ ├── captions.py
│ │ └── export.py
│ ├── services/ # Core logic
│ ├── video_editor.py
│ ├── caption_generator.py
│ ├── ai_provider.py
│ ├── diarization.py
│ ├── audio_cleaner.py
│ ├── background_removal.py
│ └── license_server.py
├── shared/ # Schema definitions (project format)
├── models/ # Whisper model storage
└── docs/ # Documentation
│ ├── routers/ # API endpoints
├── services/ # Core logic (transcription, editing, AI)
└── utils/ # GPU, cache, audio helpers
└── shared/ # Project schema
```
---
## Features
| Feature | Status |
|---------|--------|
| Word-level transcription (WhisperX) | Done |
| Text-based video editing | Done |
| Undo/redo | Done |
| Waveform timeline | Done |
| FFmpeg stream-copy export | Done |
| FFmpeg re-encode (up to 4K) | Done |
| AI filler word removal | Done |
| AI clip creation (Shorts) | Done |
| Ollama + OpenAI + Claude | Done |
| Word-level captions (SRT/VTT/ASS) | Done |
| Caption burn-in on export | Done |
| Studio Sound (DeepFilterNet) | Done |
| Keyboard shortcuts (J/K/L) | Done |
| Speaker diarization | Done |
| Virtualized transcript (react-virtuoso) | Done |
| Encrypted API key storage | Done |
| Project save/load (.aive) | Done |
| AI background removal | Planned |
## Keyboard Shortcuts
@ -158,19 +120,28 @@ talkedit/
|-----|--------|
| Space | Play / Pause |
| J / K / L | Reverse / Pause / Forward |
| I / O | Mark In / Mark Out |
| ← / → | Seek ±5 seconds |
| Delete | Delete selected words or zones |
| Delete | Delete selected words |
| Ctrl+Z | Undo |
| Ctrl+Shift+Z | Redo |
| Ctrl+S | Save project |
| Ctrl+E | Export |
| Ctrl+F | Search transcript |
| Ctrl+Scroll | Zoom waveform |
| ? | Shortcut cheatsheet |
---
## API Endpoints
| Method | Endpoint | Description |
|--------|----------|-------------|
| GET | /health | Health check |
| POST | /transcribe | Transcribe video with WhisperX |
| POST | /export | Export edited video (stream copy or re-encode) |
| POST | /ai/filler-removal | Detect filler words via LLM |
| POST | /ai/create-clip | AI-suggested clips for shorts |
| GET | /ai/ollama-models | List local Ollama models |
| POST | /captions | Generate SRT/VTT/ASS captions |
| POST | /audio/clean | Noise reduction (DeepFilterNet) |
| GET | /audio/capabilities | Check audio processing availability |
## License
Source code is MIT — see [LICENSE](LICENSE) for details. The distributed binary includes a 7-day free trial requiring a one-time license key purchase for continued use.
MIT License — see [LICENSE](LICENSE) for details.

View File

@ -1,62 +1,50 @@
# TalkEdit — Tech Stack, Tools, and Features
# TalkEdit — Tech Stack, Tools, and Planned Features
This document summarizes the chosen technology, tooling, the full feature set, recommended additions, and items on the back burner.
This document summarizes the chosen technology, tooling, the full planned feature set for the MVP, recommended additions, removals, and items to put on the back burner.
## Overview
- Goal: Offline, local text-based audio/video editor (Descript-style) focused on spoken-word creators (podcasters, YouTubers). Fast, privacy-first, single-file installer.
## Tech Stack
- Frontend: React 19 + Vite + TypeScript + Tailwind CSS + Zustand (with zundo undo/redo) + Virtuoso (virtualized transcript)
- Backend: Tauri 2.0 (Rust) for file I/O, licensing, licensing crypto (Ed25519), model management, error logging
- Transcription: Python faster-whisper with WhisperX for word-level alignment. Models downloaded on demand.
- Audio/Video Processing: FFmpeg invoked from Rust via Python scripts (video_editor.py, audio_cleaner.py, caption_generator.py)
- AI: Ollama, OpenAI, Claude through Python ai_provider.py. Bundled Qwen3 LLM planned.
- State: Zustand (in-frontend store) + zundo middleware for undo/redo history
- Frontend: React + Vite + Tailwind CSS + shadcn/ui
- Backend: Tauri 2.0 (Rust) for file I/O, invoking native binaries, and exposing commands to the UI
- Transcription: Whisper.cpp (Rust bindings like `whisper-rs` / `whisper-cpp-sys`) — word-level timestamps
- Audio/Video Processing: FFmpeg invoked from Rust (or `ffmpeg-next` Rust crate)
- State: Zustand (in-frontend store)
- Packaging: Tauri `tauri build` for cross-platform installers
- Optional local tools: Ollama (optional local LLMs) for advanced on-device heuristics
## Developer Tools
- Rust toolchain (cargo, rustc)
- Node.js + npm for frontend
- Python 3.11+ (faster-whisper, WhisperX, AI providers)
- Node.js + npm/yarn for frontend
- FFmpeg binaries (platform-specific; bundled or downloaded at install)
- Build/test: Tauri CLI, Vite dev server
- Testing: Vitest (frontend), cargo test (Rust), pytest (Python)
- CI: GitHub Actions (Rust clippy/test, Frontend tsc/vitest, Python pytest)
## Implemented Features
- [x] 1. Media import via file dialog (audio/video auto audio-extract)
- [x] 2. One-click local transcription with model selector (tiny/base → larger models) and model-size chooser
- [x] 3. Scrollable, Google-Doc-style transcript editor (Virtuoso virtualized)
## MVP Feature List (Planned)
1. Drag-and-drop import (audio/video auto audio-extract)
2. One-click local transcription (model selector: tiny/base → larger models)
3. Scrollable, Google-Doc-style transcript editor
- Click word → seek video/audio
- Select words → cut corresponding media segment (smart 150250ms fades)
- [x] 4. Smart Cleanup
- Filler word removal (configurable list per-project)
- Silence trimming
- [x] 5. Audio Polish chain (FFmpeg): normalize, compression, noise reduction
- [x] 6. Preview with synced playback, undo/redo (zundo), project save/load
- [x] 7. Export MP4/audio with SRT/VTT/ASS captions (speaker-labeled)
- [x] 8. Speaker diarization
- [x] 9. Custom filler lists per-project
- [x] 10. Background music with auto-ducking
- [x] 11. Append clips (concatenation)
- [x] 12. Settings: AI provider config (Ollama, OpenAI, Claude)
- [x] 13. Keyboard shortcuts with custom remapping
- [x] 14. Help panel + cheatsheet
- [x] 15. 7-day licensing with Ed25519-signed license keys
- Highlight + Delete → remove corresponding media segment (smart 150250ms fades)
4. One-click "Clean it" button
- Remove fillers (configurable list)
- Remove long pauses (>0.8s) by default
5. One-click audio polish chain (FFmpeg): normalize, light compression, basic noise reduction
6. Preview with synced playback, undo/redo, project save/load
7. Export MP4/audio with optional SRT/VTT captions and burned-in captions
## Recommended Additions (near-term, high ROI)
- [ ] Local GPU/CPU detection & recommended model/settings UI
- [ ] Per-project incremental transcription: re-run only edited segments
- [ ] "Preview cleaning" dry-run that highlights candidate removals before applying
- [ ] Export size/time estimator and suggested export presets
- [ ] Accessibility export presets (podcast vs YouTube presets)
- [ ] Bundled Qwen3 LLM for offline AI features
- Model-size chooser + progressive fallback (start fast, upgrade model later)
- Local GPU/CPU detection & recommended model/settings UI
- Per-project incremental transcription: re-run only edited segments
- "Preview cleaning" dry-run that highlights candidate removals before applying
- Export size/time estimator and suggested export presets
- Custom filler lists per-project and import/export of filler lists
- High-quality offline captions export (SRT + VTT + speaker labels)
- Accessibility export presets (podcast vs YouTube presets)
## Remove / Defer (Back Burner)
These broaden scope or add legal/privacy surface — defer for now.
- Voice cloning / TTS: DEFER
- Multi-track, full timeline NLE features: DEFER
- Real-time collaboration / cloud sync: DEFER
@ -64,20 +52,18 @@ These broaden scope or add legal/privacy surface — defer for now.
## Risks & Mitigations
- Large model sizes: don't bundle large models; download on-demand and document storage location.
- Timestamp accuracy: WhisperX word-level alignment + manual per-segment re-run available.
- Timestamp accuracy: provide manual word-adjust UI and per-segment re-run.
- FFmpeg packaging/licensing: ship platform-specific binaries or use Tauri bundling guidance; document license compliance.
## Prioritized Quick Wins
1. Per-project incremental transcription
1. Model chooser UI + auto-fallback settings
2. "Preview cleaning" dry-run UI
3. Export presets (podcast vs YouTube)
3. Per-project incremental transcription saving
## Next Steps for Implementation
- Bundle Qwen3 LLM for offline AI processing.
- Implement incremental transcription to speed up re-editing workflows.
- Add export presets and size estimation.
- Improve GPU/CPU detection and model recommendations.
- Add model chooser UI and capability detection early in the frontend iteration.
- Implement Rust transcription command and a compact API for incremental transcription.
- Implement FFmpeg polish templates and a minimal preview pipeline.
---
Generated to capture tech, tools, implemented features, and the recommended add/remove/defer list.
Generated as requested to capture tech, tools, planned features, and the recommended add/remove/defer list.

View File

@ -9,8 +9,6 @@ from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse
from pathlib import Path
from routers import audio
app = FastAPI(title="TalkEdit Dev Backend", version="0.0.1")
app.add_middleware(
@ -36,8 +34,6 @@ MIME_MAP = {
}
app.include_router(audio.router)
@app.get("/health")
async def health():
return {"status": "ok"}

View File

@ -1,223 +0,0 @@
#!/usr/bin/env python3
"""
TalkEdit License Server — Stripe webhook + license key generator.
Usage (development):
python backend/license_server.py
Then create a test license:
python backend/license_server.py generate --email test@example.com --tier pro
This is a minimal server. In production, deploy as a Cloudflare Worker,
Vercel function, or a small VPS behind nginx.
"""
import base64
import json
import os
import time
import hmac
import hashlib
from nacl.bindings import (
crypto_sign_seed_keypair,
crypto_sign,
crypto_sign_BYTES,
)
# === CONFIGURATION ===
# The Ed25519 private key (base64-encoded). Keep this secret!
# Generate with: python3 -c "import os,base64; print(base64.b64encode(os.urandom(32)).decode())"
LICENSE_PRIVATE_KEY_B64 = "ONTdT2Hn367fMlovqulz7WYQPQru7uFa/GaSfjhnR9x7Qoe7uBPNwIFeW4p7A0g05Qj14rvaQ4Mm1u/LzgeEsA=="
# Stripe webhook secret (set this in production)
STRIPE_WEBHOOK_SECRET = os.environ.get("STRIPE_WEBHOOK_SECRET", "")
# === TIER DEFINITIONS ===
TIERS = {
"pro": {
"price_id": "price_pro_monthly", # Replace with your Stripe price ID
"features": ["bundled_deps", "auto_updates", "priority_support"],
"max_activations": 1,
"duration_days": 365,
},
"business": {
"price_id": "price_business_monthly",
"features": ["bundled_deps", "auto_updates", "priority_support",
"white_label", "audit_logging", "bulk_deployment"],
"max_activations": 10,
"duration_days": 365,
},
}
def generate_license_key(
customer_email: str,
tier: str = "pro",
license_id: str = None,
duration_days: int = None,
features: list = None,
max_activations: int = None,
) -> str:
"""Generate a signed TalkEdit license key.
Returns a string like: talkedit_v1_<base64(payload)>.<base64(signature)>
"""
if license_id is None:
license_id = f"lic_{int(time.time())}_{os.urandom(4).hex()}"
tier_config = TIERS.get(tier, TIERS["pro"])
if duration_days is None:
duration_days = tier_config["duration_days"]
if features is None:
features = tier_config["features"]
if max_activations is None:
max_activations = tier_config["max_activations"]
now = int(time.time())
payload = {
"license_id": license_id,
"customer_email": customer_email,
"tier": tier,
"features": features,
"issued_at": now,
"expires_at": now + duration_days * 86400,
"max_activations": max_activations,
}
payload_bytes = json.dumps(payload, separators=(",", ":")).encode("utf-8")
# Sign with Ed25519
seed = base64.b64decode(LICENSE_PRIVATE_KEY_B64)
if len(seed) == 64:
seed = seed[:32] # First 32 bytes are the actual seed
pk, sk = crypto_sign_seed_keypair(seed)
signed = crypto_sign(payload_bytes, sk)
signature = signed[:crypto_sign_BYTES]
payload_b64 = base64.b64encode(payload_bytes).decode().rstrip("=")
sig_b64 = base64.b64encode(signature).decode().rstrip("=")
return f"talkedit_v1_{payload_b64}.{sig_b64}"
def verify_stripe_webhook(payload: bytes, sig_header: str) -> dict:
"""Verify Stripe webhook signature and return the event."""
if not STRIPE_WEBHOOK_SECRET:
raise ValueError("STRIPE_WEBHOOK_SECRET not configured")
# Stripe sends signature in the `stripe-signature` header
# Format: t=timestamp,v1=signature
parts = {}
for item in sig_header.split(","):
key, _, value = item.partition("=")
parts[key.strip()] = value.strip()
timestamp = parts.get("t", "")
expected_sig = parts.get("v1", "")
# Compute signature
signed_payload = f"{timestamp}.{payload.decode()}".encode()
computed_sig = hmac.new(
STRIPE_WEBHOOK_SECRET.encode(),
signed_payload,
hashlib.sha256,
).hexdigest()
if not hmac.compare_digest(computed_sig, expected_sig):
raise ValueError("Invalid webhook signature")
return json.loads(payload)
# === CLI ===
def main():
import sys
if len(sys.argv) > 1 and sys.argv[1] == "generate":
# CLI mode: generate a test license key
import argparse
parser = argparse.ArgumentParser(description="Generate TalkEdit license key")
parser.add_argument("--email", default="test@example.com")
parser.add_argument("--tier", default="pro", choices=["pro", "business"])
parser.add_argument("--days", type=int, default=None)
args = parser.parse_args(sys.argv[2:])
key = generate_license_key(
customer_email=args.email,
tier=args.tier,
duration_days=args.days,
)
print()
print("=== TALKEDIT LICENSE KEY ===")
print(key)
print()
print("Paste this into the TalkEdit app to activate.")
return
# Server mode
from http.server import HTTPServer, BaseHTTPRequestHandler
import urllib.parse
class LicenseHandler(BaseHTTPRequestHandler):
def do_POST(self):
path = urllib.parse.urlparse(self.path).path
if path == "/webhook/stripe":
content_length = int(self.headers.get("Content-Length", 0))
body = self.rfile.read(content_length)
sig_header = self.headers.get("Stripe-Signature", "")
try:
event = verify_stripe_webhook(body, sig_header)
event_type = event.get("type", "")
if event_type == "checkout.session.completed":
session = event["data"]["object"]
email = session.get("customer_email", session.get("customer_details", {}).get("email", "unknown"))
tier = "pro" # Map from session["metadata"]["tier"] or line items
license_key = generate_license_key(
customer_email=email,
tier=tier,
)
# In production: email the license key to the customer
print(f"License generated for {email}: {license_key[:40]}...")
self.send_response(200)
self.send_header("Content-Type", "application/json")
self.end_headers()
self.wfile.write(json.dumps({"status": "ok"}).encode())
else:
self.send_response(200)
self.end_headers()
except Exception as e:
print(f"Webhook error: {e}")
self.send_response(400)
self.end_headers()
self.wfile.write(str(e).encode())
else:
self.send_response(404)
self.end_headers()
def log_message(self, format, *args):
print(f"[license-server] {args}")
port = int(os.environ.get("PORT", 8643))
server = HTTPServer(("0.0.0.0", port), LicenseHandler)
print(f"License server listening on http://0.0.0.0:{port}")
print(f" POST /webhook/stripe - Stripe webhook")
print()
print("To generate a test license:")
print(f" python {__file__} generate --email you@example.com --tier pro")
server.serve_forever()
if __name__ == "__main__":
main()

View File

@ -2,20 +2,25 @@ import logging
import os
import stat
import sys
import tempfile
from contextlib import asynccontextmanager
from pathlib import Path
from fastapi import FastAPI, Query, Request, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse
from fastapi.responses import StreamingResponse, FileResponse
import ffmpeg
from routers import transcribe, export, ai, captions, audio
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Dev log file — frontend forwards console.error/warn here so the agent can read it
DEV_LOG_PATH = Path(__file__).parent.parent / "webview.log"
# Dev log file — keep outside workspace to avoid dev watcher reload loops.
DEV_LOG_PATH = Path(
os.environ.get("TALKEDIT_DEV_LOG_PATH", str(Path(tempfile.gettempdir()) / "talkedit-webview.log"))
)
@asynccontextmanager
@ -61,20 +66,69 @@ MIME_MAP = {
@app.get("/file")
async def serve_local_file(request: Request, path: str = Query(...)):
"""Stream a local file with HTTP Range support (required for video seeking)."""
async def serve_local_file(request: Request, path: str = Query(...), format: str = Query(None)):
"""Stream a local file with HTTP Range support (required for video seeking).
Optionally transcode audio files to MP3 for better browser compatibility."""
file_path = Path(path)
if not file_path.is_file():
logger.warning(f"[serve_file] File not found: {path}")
raise HTTPException(status_code=404, detail=f"File not found: {path}")
file_size = file_path.stat().st_size
content_type = MIME_MAP.get(file_path.suffix.lower(), "application/octet-stream")
original_ext = file_path.suffix.lower()
# Check if we should transcode this file
should_transcode = (
original_ext == '.wav' and
(format == 'mp3' or file_size > 10 * 1024 * 1024) # Transcode WAV if > 10MB or explicitly requested
)
if should_transcode:
logger.info(f"[serve_file] Transcoding {file_path.name} to MP3 (size: {file_size})")
# Create cache directory
cache_dir = Path(__file__).parent / "cache"
cache_dir.mkdir(exist_ok=True)
# Create cache filename
import hashlib
file_hash = hashlib.md5(str(file_path).encode()).hexdigest()
cache_path = cache_dir / f"{file_hash}.mp3"
# Check if cached version exists
if not cache_path.exists():
logger.info(f"[serve_file] Creating cached MP3: {cache_path}")
try:
# Transcode to MP3 using ffmpeg
(
ffmpeg
.input(str(file_path))
.output(str(cache_path), acodec='libmp3lame', ab='128k')
.run(overwrite_output=True, quiet=True)
)
except ffmpeg.Error as e:
logger.error(f"[serve_file] Transcoding failed: {e}")
# Fall back to original file
cache_path = file_path
else:
logger.info(f"[serve_file] Transcoding completed: {cache_path}")
else:
logger.info(f"[serve_file] Using cached MP3: {cache_path}")
# Use the transcoded file
file_path = cache_path
file_size = file_path.stat().st_size
content_type = "audio/mpeg"
else:
content_type = MIME_MAP.get(original_ext, "application/octet-stream")
range_header = request.headers.get("range")
logger.info(
f"[serve_file] {file_path.name} | size={file_size} | "
f"type={content_type} | range={range_header or 'none'}"
f"[serve_file] Serving {file_path.name} | size={file_size} | "
f"type={content_type} | range={range_header or 'none'} | transcoded={should_transcode}"
)
if content_type == "application/octet-stream":
@ -153,6 +207,7 @@ async def dev_log(request: Request):
if args:
line += " " + " ".join(args)
line += "\n"
DEV_LOG_PATH.parent.mkdir(parents=True, exist_ok=True)
with open(DEV_LOG_PATH, "a") as f:
f.write(line)
return {"ok": True}
return {"ok": True, "path": str(DEV_LOG_PATH)}

View File

@ -7,11 +7,11 @@ import tempfile
from pathlib import Path
from typing import Optional
from fastapi import APIRouter, HTTPException, Query, Request
from fastapi import APIRouter, HTTPException, Query
from fastapi.responses import FileResponse
from pydantic import BaseModel
from services.audio_cleaner import clean_audio, detect_silence_ranges, is_deepfilter_available, normalize_audio
from services.audio_cleaner import clean_audio, detect_silence_ranges, is_deepfilter_available
logger = logging.getLogger(__name__)
router = APIRouter()
@ -71,123 +71,55 @@ async def detect_silence_endpoint(req: SilenceDetectRequest):
@router.get("/audio/waveform")
async def get_waveform_audio(request: Request, path: str = Query(...)):
async def get_waveform_audio(path: str = Query(...)):
"""
Extract audio from any video/audio file and return it as a WAV.
The WAV is cached on disk for subsequent requests.
Uses FFmpeg directly so it works with MKV, MOV, AVI, MP4, etc.
"""
req_id = hashlib.md5(f"{path}:{request.url}".encode()).hexdigest()[:10]
file_path = Path(path)
logger.info(
"[waveform:%s] request raw_url=%s raw_query=%s decoded_path=%r path_len=%s",
req_id,
str(request.url),
request.url.query,
path,
len(path),
)
try:
resolved_path = file_path.expanduser().resolve(strict=False)
except Exception:
resolved_path = file_path
logger.info(
"[waveform:%s] normalized path=%s exists=%s is_file=%s",
req_id,
resolved_path,
file_path.exists(),
file_path.is_file(),
)
if not file_path.is_file():
logger.warning("[waveform:%s] file_not_found path=%r", req_id, path)
logger.warning(f"[waveform] File not found: {path}")
raise HTTPException(status_code=404, detail=f"File not found: {path}")
# Cache key based on path + mtime so stale cache is auto-invalidated
mtime = file_path.stat().st_mtime
cache_key = hashlib.md5(f"{path}:{mtime}".encode()).hexdigest()
logger.info("[waveform:%s] cache_key=%s mtime=%s", req_id, cache_key, mtime)
if cache_key in _waveform_cache:
cached = Path(_waveform_cache[cache_key])
if cached.exists():
logger.info("[waveform:%s] cache_hit cached=%s", req_id, cached)
logger.info(f"[waveform] Cache hit for {file_path.name}")
return FileResponse(str(cached), media_type="audio/wav")
else:
del _waveform_cache[cache_key]
logger.info("[waveform:%s] cache_miss extracting file=%s", req_id, file_path)
logger.info(f"[waveform] Extracting audio from: {file_path.name}")
tmp_dir = tempfile.mkdtemp(prefix="talkedit_waveform_")
out_wav = Path(tmp_dir) / f"{cache_key}.wav"
# Downsample to mono 8000 Hz — enough for waveform drawing and much smaller payloads
# Downsample to mono 22050 Hz — enough for waveform drawing, small file
cmd = [
"ffmpeg", "-y",
"-i", str(file_path),
"-vn", # drop video
"-ac", "1", # mono
"-ar", "8000", # 8 kHz sample rate
"-ar", "22050", # 22 kHz sample rate
"-acodec", "pcm_s16le", # 16-bit PCM WAV
str(out_wav),
]
logger.info("[waveform:%s] ffmpeg_cmd=%s", req_id, " ".join(cmd))
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
logger.error(
"[waveform:%s] ffmpeg_failed returncode=%s stderr_tail=%s",
req_id,
result.returncode,
result.stderr[-2000:],
)
logger.error(f"[waveform] FFmpeg failed for {file_path.name}: {result.stderr[-500:]}")
raise HTTPException(
status_code=500,
detail=f"Failed to extract audio: {result.stderr[-300:]}"
)
if not out_wav.exists() or out_wav.stat().st_size == 0:
logger.error(
"[waveform:%s] empty_output out_wav=%s exists=%s size=%s",
req_id,
out_wav,
out_wav.exists(),
out_wav.stat().st_size if out_wav.exists() else -1,
)
logger.error(f"[waveform] FFmpeg produced empty WAV for {file_path.name}")
raise HTTPException(status_code=500, detail="Audio extraction produced empty file")
logger.info(
"[waveform:%s] extracted_bytes=%s out_wav=%s",
req_id,
out_wav.stat().st_size,
out_wav,
)
logger.info(f"[waveform] Extracted {out_wav.stat().st_size} bytes for {file_path.name}")
_waveform_cache[cache_key] = str(out_wav)
return FileResponse(str(out_wav), media_type="audio/wav")
class NormalizeRequest(BaseModel):
input_path: str
output_path: Optional[str] = None
target_lufs: float = -14.0
@router.post("/audio/normalize")
async def normalize_audio_endpoint(req: NormalizeRequest):
"""Normalize audio loudness to a target LUFS level using FFmpeg loudnorm."""
if req.target_lufs < -70 or req.target_lufs > 0:
raise HTTPException(status_code=400, detail="target_lufs must be between -70 and 0")
try:
output = normalize_audio(
req.input_path,
req.output_path or "",
target_lufs=req.target_lufs,
)
return {
"status": "ok",
"output_path": output,
"target_lufs": req.target_lufs,
}
except Exception as e:
logger.error(f"Audio normalization failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))

View File

@ -60,8 +60,6 @@ async def generate_captions(req: CaptionRequest):
return PlainTextResponse(content, media_type="text/plain")
except HTTPException:
raise
except Exception as e:
logger.error(f"Caption generation failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))

View File

@ -8,10 +8,9 @@ from typing import List, Optional
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from services.video_editor import export_stream_copy, export_reencode, export_reencode_with_subs, mix_background_music, concat_clips
from services.video_editor import export_stream_copy, export_reencode, export_reencode_with_subs
from services.audio_cleaner import clean_audio
from services.caption_generator import generate_srt, generate_ass, save_captions
from services.background_removal import remove_background_on_export as remove_bg
logger = logging.getLogger(__name__)
router = APIRouter()
@ -22,14 +21,6 @@ class SegmentModel(BaseModel):
end: float
class GainRangeModel(SegmentModel):
gain_db: float
class SpeedRangeModel(SegmentModel):
speed: float
class ExportWordModel(BaseModel):
word: str
start: float
@ -37,90 +28,18 @@ class ExportWordModel(BaseModel):
confidence: float = 0.0
class ZoomConfigModel(BaseModel):
enabled: bool = False
zoomFactor: float = 1.0
panX: float = 0.0
panY: float = 0.0
class BackgroundMusicModel(BaseModel):
path: str
volumeDb: float = 0.0
duckingEnabled: bool = False
duckingDb: float = 6.0
duckingAttackMs: float = 10.0
duckingReleaseMs: float = 200.0
class ExportRequest(BaseModel):
input_path: str
output_path: str
keep_segments: List[SegmentModel]
mute_ranges: Optional[List[SegmentModel]] = None
gain_ranges: Optional[List[GainRangeModel]] = None
speed_ranges: Optional[List[SpeedRangeModel]] = None
global_gain_db: float = 0.0
mode: str = "fast"
resolution: str = "1080p"
format: str = "mp4"
enhanceAudio: bool = False
normalize_loudness: bool = False
normalize_target_lufs: float = -14.0
captions: str = "none"
words: Optional[List[ExportWordModel]] = None
deleted_indices: Optional[List[int]] = None
zoom: Optional[ZoomConfigModel] = None
additional_clips: Optional[List[str]] = None
background_music: Optional[BackgroundMusicModel] = None
remove_background: bool = False
background_replacement: str = "blur"
background_replacement_value: str = ""
class TranscriptExportRequest(BaseModel):
words: List[ExportWordModel]
deleted_indices: Optional[List[int]] = None
output_path: str
format: str = "txt" # "txt" or "srt"
def _map_ranges_to_output_timeline(
ranges: List[dict],
keep_segments: List[dict],
) -> List[dict]:
"""Map source-time ranges to output timeline after cuts are applied."""
if not ranges or not keep_segments:
return []
mapped: List[dict] = []
output_cursor = 0.0
for keep in keep_segments:
keep_start = float(keep["start"])
keep_end = float(keep["end"])
keep_len = max(0.0, keep_end - keep_start)
if keep_len <= 0:
continue
for src_range in ranges:
overlap_start = max(keep_start, float(src_range["start"]))
overlap_end = min(keep_end, float(src_range["end"]))
if overlap_end <= overlap_start:
continue
mapped_range = {
"start": output_cursor + (overlap_start - keep_start),
"end": output_cursor + (overlap_end - keep_start),
}
if "gain_db" in src_range:
mapped_range["gain_db"] = float(src_range["gain_db"])
if "speed" in src_range:
mapped_range["speed"] = float(src_range["speed"])
mapped.append(mapped_range)
output_cursor += keep_len
return mapped
def _mux_audio(video_path: str, audio_path: str, output_path: str) -> str:
@ -147,51 +66,15 @@ async def export_video(req: ExportRequest):
try:
segments = [{"start": s.start, "end": s.end} for s in req.keep_segments]
mute_segments = [{"start": s.start, "end": s.end} for s in req.mute_ranges] if req.mute_ranges else None
gain_segments = [{"start": s.start, "end": s.end, "gain_db": s.gain_db} for s in req.gain_ranges] if req.gain_ranges else None
speed_segments = [{"start": s.start, "end": s.end, "speed": s.speed} for s in req.speed_ranges] if req.speed_ranges else None
if not segments and not mute_segments:
raise HTTPException(status_code=400, detail="No segments to export")
# Convert zoom config to dict
zoom_dict = None
if req.zoom and req.zoom.enabled:
zoom_dict = {
"enabled": True,
"zoomFactor": req.zoom.zoomFactor,
"panX": req.zoom.panX,
"panY": req.zoom.panY,
}
# Handle additional clips: pre-concat before main editing
working_input = req.input_path
has_additional = bool(req.additional_clips)
if has_additional:
try:
concat_output = req.output_path + ".concat.mp4"
concat_clips(req.input_path, req.additional_clips, concat_output)
working_input = concat_output
logger.info("Pre-concatenated %d additional clips into %s", len(req.additional_clips), concat_output)
except Exception as e:
logger.warning(f"Clip concatenation failed (non-fatal): {e}")
# Fall back to main input only
mapped_gain_segments = _map_ranges_to_output_timeline(gain_segments or [], segments)
has_gain = abs(float(req.global_gain_db)) > 1e-6 or bool(gain_segments)
has_speed = bool(speed_segments)
if has_speed and (mute_segments or has_gain):
raise HTTPException(
status_code=400,
detail="Speed zones currently cannot be combined with mute/gain filters in one export",
)
use_stream_copy = req.mode == "fast" and len(segments) == 1 and not mute_segments and not has_gain and not has_speed and not zoom_dict and not has_additional
use_stream_copy = req.mode == "fast" and len(segments) == 1 and not mute_segments
needs_reencode_for_subs = req.captions == "burn-in"
# Burn-in captions or audio filters require re-encode
if needs_reencode_for_subs or mute_segments or has_gain or has_speed:
# Burn-in captions or mute ranges require re-encode
if needs_reencode_for_subs or mute_segments:
use_stream_copy = False
words_dicts = [w.model_dump() for w in req.words] if req.words else []
@ -208,37 +91,25 @@ async def export_video(req: ExportRequest):
try:
if use_stream_copy:
output = export_stream_copy(working_input, req.output_path, segments)
output = export_stream_copy(req.input_path, req.output_path, segments)
elif ass_path:
output = export_reencode_with_subs(
working_input,
req.input_path,
req.output_path,
segments,
ass_path,
resolution=req.resolution,
format_hint=req.format,
mute_ranges=mute_segments,
gain_ranges=mapped_gain_segments,
speed_ranges=speed_segments,
global_gain_db=req.global_gain_db,
normalize_loudness=req.normalize_loudness,
normalize_target_lufs=req.normalize_target_lufs,
zoom_config=zoom_dict,
)
else:
output = export_reencode(
working_input,
req.input_path,
req.output_path,
segments,
resolution=req.resolution,
format_hint=req.format,
mute_ranges=mute_segments,
gain_ranges=mapped_gain_segments,
speed_ranges=speed_segments,
global_gain_db=req.global_gain_db,
normalize_loudness=req.normalize_loudness,
normalize_target_lufs=req.normalize_target_lufs,
zoom_config=zoom_dict,
)
finally:
if ass_path and os.path.exists(ass_path):
@ -247,7 +118,7 @@ async def export_video(req: ExportRequest):
# Audio enhancement: clean, then mux back into the exported video
if req.enhanceAudio:
try:
tmp_dir = tempfile.mkdtemp(prefix="cutscript_audio_")
tmp_dir = tempfile.mkdtemp(prefix="talkedit_audio_")
cleaned_audio = os.path.join(tmp_dir, "cleaned.wav")
clean_audio(output, cleaned_audio)
@ -257,6 +128,7 @@ async def export_video(req: ExportRequest):
os.replace(muxed_path, output)
logger.info(f"Audio enhanced and muxed into {output}")
# Cleanup
try:
os.remove(cleaned_audio)
os.rmdir(tmp_dir)
@ -265,35 +137,6 @@ async def export_video(req: ExportRequest):
except Exception as e:
logger.warning(f"Audio enhancement failed (non-fatal): {e}")
# Background removal (post-process)
if req.remove_background:
try:
bg_output = output + ".nobg.mp4"
remove_bg(output, bg_output, req.background_replacement, req.background_replacement_value)
os.replace(bg_output, output)
logger.info("Background removed from %s", output)
except Exception as e:
logger.warning(f"Background removal failed (non-fatal): {e}")
# Background music mixing (post-process)
if req.background_music:
try:
music_output = output + ".music.mp4"
mix_background_music(
output,
req.background_music.path,
music_output,
volume_db=req.background_music.volumeDb,
ducking_enabled=req.background_music.duckingEnabled,
ducking_db=req.background_music.duckingDb,
ducking_attack_ms=req.background_music.duckingAttackMs,
ducking_release_ms=req.background_music.duckingReleaseMs,
)
os.replace(music_output, output)
logger.info("Background music mixed into %s", output)
except Exception as e:
logger.warning(f"Background music mixing failed (non-fatal): {e}")
# Sidecar SRT: generate and save alongside video
srt_path = None
if req.captions == "sidecar" and words_dicts:
@ -302,20 +145,11 @@ async def export_video(req: ExportRequest):
save_captions(srt_content, srt_path)
logger.info(f"Sidecar SRT saved to {srt_path}")
# Cleanup pre-concat temp file
if has_additional and working_input != req.input_path and os.path.exists(working_input):
try:
os.remove(working_input)
except OSError:
pass
result = {"status": "ok", "output_path": output}
if srt_path:
result["srt_path"] = srt_path
return result
except HTTPException:
raise
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except RuntimeError as e:
@ -324,34 +158,3 @@ async def export_video(req: ExportRequest):
except Exception as e:
logger.error(f"Export error: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.post("/export/transcript")
async def export_transcript(req: TranscriptExportRequest):
"""Export transcript as plain text or SRT without rendering video."""
try:
from services.caption_generator import generate_srt
deleted_set = set(req.deleted_indices or [])
word_dicts = [w.model_dump() for w in req.words]
if req.format == "srt":
content = generate_srt(word_dicts, deleted_set)
else:
# Plain text: join non-deleted words
active_words = []
for i, w in enumerate(word_dicts):
if i not in deleted_set:
active_words.append(w["word"])
content = " ".join(active_words)
os.makedirs(os.path.dirname(req.output_path) or ".", exist_ok=True)
with open(req.output_path, "w", encoding="utf-8") as f:
f.write(content)
logger.info("Transcript exported to %s (format=%s)", req.output_path, req.format)
return {"status": "ok", "output_path": req.output_path}
except Exception as e:
logger.error(f"Transcript export failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))

View File

@ -51,99 +51,3 @@ async def transcribe(req: TranscribeRequest):
except Exception as e:
logger.error(f"Transcription failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
class ReTranscribeSegmentRequest(BaseModel):
file_path: str
start: float
end: float
model: str = "base"
language: Optional[str] = None
@router.post("/transcribe/segment")
async def transcribe_segment(req: ReTranscribeSegmentRequest):
"""
Re-transcribe a specific segment of audio.
Extracts the segment with FFmpeg, transcribes it, and returns words
with timestamps adjusted to the original file timeline.
"""
import subprocess
import tempfile
import os
try:
# Extract the segment to a temp file
tmp_dir = tempfile.mkdtemp(prefix="talkedit_segment_")
segment_path = os.path.join(tmp_dir, "segment.wav")
cmd = [
"ffmpeg", "-y",
"-i", req.file_path,
"-ss", str(req.start),
"-to", str(req.end),
"-vn",
"-acodec", "pcm_s16le",
"-ar", "16000",
"-ac", "1",
segment_path,
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"Segment extraction failed: {result.stderr[-300:]}")
# Transcribe the segment — try GPU first, fall back to CPU
try:
segment_result = transcribe_audio(
file_path=segment_path,
model_name=req.model,
use_gpu=True,
use_cache=False,
language=req.language,
)
except Exception as gpu_err:
logger.warning(f"GPU transcription failed (%s), falling back to CPU", gpu_err)
segment_result = transcribe_audio(
file_path=segment_path,
model_name=req.model,
use_gpu=False,
use_cache=False,
language=req.language,
)
# Adjust timestamps to be relative to the original file
offset = req.start
adjusted_words = []
for w in segment_result.get("words", []):
w["start"] = round(w["start"] + offset, 3)
w["end"] = round(w["end"] + offset, 3)
adjusted_words.append(w)
adjusted_segments = []
for seg in segment_result.get("segments", []):
seg["start"] = round(seg["start"] + offset, 3)
seg["end"] = round(seg["end"] + offset, 3)
# Also adjust words within each segment
for w in seg.get("words", []):
w["start"] = round(w["start"] + offset, 3)
w["end"] = round(w["end"] + offset, 3)
adjusted_segments.append(seg)
# Cleanup
try:
os.remove(segment_path)
os.rmdir(tmp_dir)
except OSError:
pass
return {
"words": adjusted_words,
"segments": adjusted_segments,
"language": segment_result.get("language", "en"),
}
except FileNotFoundError:
raise HTTPException(status_code=404, detail=f"File not found: {req.file_path}")
except Exception as e:
logger.error(f"Segment transcription failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))

View File

@ -7,52 +7,23 @@ import logging
import re
import subprocess
import tempfile
import warnings
from pathlib import Path
logger = logging.getLogger(__name__)
DEEPFILTER_AVAILABLE = None
enhance = None
init_df = None
load_audio = None
save_audio = None
try:
from df.enhance import enhance, init_df, load_audio, save_audio
DEEPFILTER_AVAILABLE = True
except ImportError:
DEEPFILTER_AVAILABLE = False
_df_model = None
_df_state = None
def _ensure_deepfilter_loaded() -> bool:
global DEEPFILTER_AVAILABLE, enhance, init_df, load_audio, save_audio
if DEEPFILTER_AVAILABLE is not None:
return DEEPFILTER_AVAILABLE
try:
# DeepFilterNet currently triggers a third-party torchaudio deprecation warning
# on import in some environments; suppress only this known warning.
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
message=r".*torchaudio\._backend\.common\.AudioMetaData has been moved.*",
category=UserWarning,
)
from df.enhance import enhance as _enhance, init_df as _init_df, load_audio as _load_audio, save_audio as _save_audio
enhance = _enhance
init_df = _init_df
load_audio = _load_audio
save_audio = _save_audio
DEEPFILTER_AVAILABLE = True
except ImportError:
DEEPFILTER_AVAILABLE = False
return DEEPFILTER_AVAILABLE
def _init_deepfilter():
global _df_model, _df_state
if not _ensure_deepfilter_loaded():
raise RuntimeError("DeepFilterNet is not available")
if _df_model is None:
logger.info("Initializing DeepFilterNet model")
_df_model, _df_state, _ = init_df()
@ -75,7 +46,7 @@ def clean_audio(
if not output_path:
output_path = str(input_path.with_stem(input_path.stem + "_clean"))
if is_deepfilter_available():
if DEEPFILTER_AVAILABLE:
return _clean_with_deepfilter(str(input_path), output_path)
else:
return _clean_with_ffmpeg(str(input_path), output_path)
@ -106,7 +77,7 @@ def _clean_with_ffmpeg(input_path: str, output_path: str) -> str:
def is_deepfilter_available() -> bool:
return _ensure_deepfilter_loaded()
return DEEPFILTER_AVAILABLE
def detect_silence_ranges(input_path: str, min_silence_ms: int, silence_db: float):
@ -158,125 +129,3 @@ def detect_silence_ranges(input_path: str, min_silence_ms: int, silence_db: floa
silence_db,
)
return ranges
def normalize_audio(
input_path: str,
output_path: str = "",
target_lufs: float = -14.0,
) -> str:
"""
Normalize audio loudness to a target LUFS level using FFmpeg's loudnorm filter.
Args:
input_path: Path to the input audio/video file.
output_path: Path for the normalized output. Auto-generated if empty.
target_lufs: Target integrated loudness in LUFS.
Common targets: -14 (YouTube), -16 (Spotify), -23 (broadcast).
Returns: path to the normalized audio file.
"""
import os as _os
inp = Path(input_path)
if not output_path:
output_path = str(inp.with_stem(inp.stem + "_normalized"))
# Two-pass loudnorm: first pass measures loudness, second pass applies correction.
# First pass: measure only (print_format=json)
measure_cmd = [
"ffmpeg", "-y",
"-i", str(inp),
"-af", f"loudnorm=I={target_lufs}:LRA=7:TP=-1.5:print_format=json",
"-f", "null",
"-",
]
logger.info("Running loudnorm first pass (measurement): %s", " ".join(measure_cmd))
measure_result = subprocess.run(measure_cmd, capture_output=True, text=True)
# Parse measured parameters from stderr (loudnorm outputs JSON to stderr)
measured = _parse_loudnorm_measurement(measure_result.stderr)
if not measured:
logger.warning(
"loudnorm measurement failed or produced no output; "
"falling back to single-pass normalization"
)
# Single-pass fallback
cmd = [
"ffmpeg", "-y",
"-i", str(inp),
"-af", f"loudnorm=I={target_lufs}:LRA=7:TP=-1.5",
"-c:v", "copy",
output_path,
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"Audio normalization failed: {result.stderr[-300:]}")
logger.info("Single-pass normalized audio saved to %s", output_path)
return output_path
# Second pass: apply normalization using measured values
input_i = measured.get("input_i", target_lufs)
input_lra = measured.get("input_lra", 7.0)
input_tp = measured.get("input_tp", -1.5)
input_thresh = measured.get("input_thresh", -30.0)
offset = measured.get("target_offset", 0.0)
apply_cmd = [
"ffmpeg", "-y",
"-i", str(inp),
"-af",
(
f"loudnorm=I={target_lufs}:LRA=7:TP=-1.5:"
f"measured_I={input_i}:measured_LRA={input_lra}:"
f"measured_TP={input_tp}:measured_thresh={input_thresh}:"
f"offset={offset}:linear=true:print_format=summary"
),
"-c:v", "copy",
output_path,
]
logger.info("Running loudnorm second pass (apply): %s", " ".join(apply_cmd))
result = subprocess.run(apply_cmd, capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"Audio normalization (apply) failed: {result.stderr[-300:]}")
logger.info(
"Normalized audio saved to %s (target=%s LUFS, measured_I=%s)",
output_path,
target_lufs,
input_i,
)
return output_path
def _parse_loudnorm_measurement(stderr_output: str) -> dict:
"""Parse loudnorm JSON measurement output from FFmpeg stderr."""
import json
# loudnorm prints JSON block between "Parsed_loudnorm" lines
lines = stderr_output.split("\n")
json_lines = []
in_json = False
for line in lines:
if "Parsed_loudnorm" in line and "}" in line:
# Single-line JSON
try:
start = line.index("{")
end = line.rindex("}") + 1
return json.loads(line[start:end])
except (ValueError, json.JSONDecodeError):
continue
if "{" in line and not in_json:
in_json = True
if in_json:
json_lines.append(line)
if in_json and "}" in line:
in_json = False
break
if json_lines:
try:
return json.loads("".join(json_lines))
except json.JSONDecodeError:
pass
return {}

View File

@ -1,17 +1,18 @@
"""
AI background removal using MediaPipe for person segmentation.
Applied during export as a post-processing step — no real-time preview.
AI background removal (Phase 5 - future).
Uses MediaPipe or Robust Video Matting for person segmentation.
Export-only -- no real-time preview.
"""
import logging
import subprocess
import tempfile
import os
from pathlib import Path
logger = logging.getLogger(__name__)
# Placeholder for Phase 5 implementation
# Will use mediapipe or rvm for segmentation at export time
MEDIAPIPE_AVAILABLE = False
RVM_AVAILABLE = False
try:
import mediapipe as mp
@ -19,9 +20,14 @@ try:
except ImportError:
pass
try:
pass # rvm import would go here
except ImportError:
pass
def is_available() -> bool:
return MEDIAPIPE_AVAILABLE
return MEDIAPIPE_AVAILABLE or RVM_AVAILABLE
def remove_background_on_export(
@ -31,202 +37,23 @@ def remove_background_on_export(
replacement_value: str = "",
) -> str:
"""
Process video frame-by-frame using FFmpeg chromakey fallback,
or MediaPipe-based segmentation if available.
Process video frame-by-frame to remove/replace background.
Only runs during export (not real-time).
Args:
input_path: source video
output_path: destination
replacement: 'blur', 'color', or 'image'
replacement_value: hex color or image path (for color/image modes)
replacement: 'blur', 'color', 'image', or 'video'
replacement_value: hex color, image path, or video path
Returns:
output_path
"""
input_path = str(Path(input_path).resolve())
output_path = str(Path(output_path).resolve())
if MEDIAPIPE_AVAILABLE:
return _remove_with_mediapipe(input_path, output_path, replacement, replacement_value)
else:
return _remove_with_ffmpeg_portrait(input_path, output_path, replacement, replacement_value)
def _remove_with_mediapipe(
input_path: str,
output_path: str,
replacement: str = "blur",
replacement_value: str = "",
) -> str:
"""Use MediaPipe Selfie Segmentation + FFmpeg for background removal.
Extracts frames, applies segmentation, composites replacement background.
"""
try:
import cv2
import numpy as np
import mediapipe as mp
mp_selfie_segmentation = mp.solutions.selfie_segmentation
# Determine background color/image
if replacement == "color":
color_hex = replacement_value or "#00FF00"
color_hex = color_hex.lstrip("#")
bg_color = tuple(int(color_hex[i:i+2], 16) for i in (0, 2, 4))
bg_color = bg_color[::-1] # RGB -> BGR
elif replacement == "image":
bg_image = cv2.imread(replacement_value) if replacement_value else None
if bg_image is None:
bg_color = (0, 255, 0)
bg_image = None
else:
# Blur background (default)
bg_color = None
# Open video
cap = cv2.VideoCapture(input_path)
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
# Temp directory for processed frames
temp_dir = tempfile.mkdtemp(prefix="aive_bgrem_")
frame_dir = os.path.join(temp_dir, "frames")
os.makedirs(frame_dir, exist_ok=True)
with mp_selfie_segmentation.SelfieSegmentation(model_selection=0) as segmenter:
frame_idx = 0
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
# Convert to RGB for MediaPipe
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
result = segmenter.process(rgb)
mask = result.segmentation_mask
# Threshold the mask
condition = mask > 0.5
if replacement == "blur":
# Apply strong blur to background
blurred = cv2.GaussianBlur(frame, (99, 99), 0)
output_frame = np.where(condition[..., None], frame, blurred)
elif replacement == "color":
bg = np.full(frame.shape, bg_color, dtype=np.uint8)
output_frame = np.where(condition[..., None], frame, bg)
elif replacement == "image" and bg_image is not None:
bg_resized = cv2.resize(bg_image, (width, height))
output_frame = np.where(condition[..., None], frame, bg_resized)
else:
output_frame = frame
out_path = os.path.join(frame_dir, f"frame_{frame_idx:06d}.png")
cv2.imwrite(out_path, output_frame)
frame_idx += 1
if frame_idx % 100 == 0:
logger.info("Background removal: %d/%d frames", frame_idx, total_frames)
cap.release()
# Encode frames back to video using FFmpeg
import subprocess as _sp
ffmpeg = "ffmpeg"
cmd = [
ffmpeg, "-y",
"-framerate", str(fps),
"-i", os.path.join(frame_dir, "frame_%06d.png"),
"-i", input_path,
"-map", "0:v:0",
"-map", "1:a:0?",
"-c:v", "libx264", "-preset", "medium", "-crf", "18",
"-c:a", "aac", "-b:a", "192k",
"-shortest",
"-pix_fmt", "yuv420p",
output_path,
]
result = _sp.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"FFmpeg frame encode failed: {result.stderr[-500:]}")
# Cleanup
for f in os.listdir(frame_dir):
try:
os.remove(os.path.join(frame_dir, f))
except OSError:
pass
try:
os.rmdir(frame_dir)
os.rmdir(temp_dir)
except OSError:
pass
logger.info("MediaPipe background removal completed -> %s", output_path)
return output_path
except ImportError:
logger.warning("mediapipe/cv2 not available, falling back to FFmpeg portrait mode")
return _remove_with_ffmpeg_portrait(input_path, output_path, replacement, replacement_value)
except Exception as e:
raise RuntimeError(f"MediaPipe background removal failed: {e}")
def _remove_with_ffmpeg_portrait(
input_path: str,
output_path: str,
replacement: str = "blur",
replacement_value: str = "",
) -> str:
"""Fallback: basic FFmpeg-only background blur.
Uses a strong gaussian blur as a crude background replacement.
For proper person segmentation (color/image replacement), install:
pip install mediapipe opencv-python
"""
ffmpeg = "ffmpeg"
if replacement == "blur":
filter_complex = "gblur=sigma=30"
elif replacement == "color":
color = replacement_value or "00FF00"
filter_complex = (
f"split[fg][bg];"
f"[bg]colorkey=0x{color}:0.3:0.1[bg_key];"
f"[fg][bg_key]overlay"
if not is_available():
raise RuntimeError(
"Background removal requires mediapipe or robust-video-matting. "
"Install with: pip install mediapipe"
)
elif replacement == "image" and replacement_value:
escaped = replacement_value.replace("\\", "/").replace(":", "\\:")
filter_complex = (
f"movie='{escaped}':loop=0,scale=iw:ih[bg];"
f"[0:v][bg]overlay=0:0:shortest=1"
)
else:
filter_complex = "null"
if filter_complex == "null":
cmd = [ffmpeg, "-y", "-i", input_path, "-c", "copy", output_path]
else:
cmd = [
ffmpeg, "-y",
"-i", input_path,
"-vf", filter_complex,
"-c:v", "libx264", "-preset", "medium", "-crf", "18",
"-c:a", "aac", "-b:a", "192k",
"-movflags", "+faststart",
output_path,
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"FFmpeg background removal failed: {result.stderr[-500:]}")
logger.warning(
"FFmpeg fallback background removal used (no MediaPipe). "
"Install 'mediapipe' and 'opencv-python' for proper person segmentation."
)
return output_path
# Phase 5 implementation will go here
raise NotImplementedError("Background removal is planned for Phase 5")

View File

@ -48,11 +48,11 @@ def _load_model(model_name: str, device: torch.device):
compute_type = "float16" if device.type == "cuda" else "int8"
model = whisperx.load_model(
model_name,
device=device.type, # use "cuda" not "cuda:0" — some WhisperX versions don't support device ordinal
device=str(device),
compute_type=compute_type,
)
else:
model = whisper.load_model(model_name, device=str(device))
model = whisper.load_model(model_name, device=device)
_model_cache[cache_key] = model
return model
@ -112,7 +112,7 @@ def _transcribe_whisperx(model, audio_path: str, device: torch.device, language:
align_model, align_metadata = whisperx.load_align_model(
language_code=detected_language,
device=device.type,
device=str(device),
)
aligned = whisperx.align(
result["segments"],

View File

@ -13,281 +13,6 @@ from typing import List
logger = logging.getLogger(__name__)
def _get_codec_args(format_hint: str, has_video: bool = True) -> list:
"""Return FFmpeg codec arguments for the given format."""
if format_hint == "wav":
return ["-c:a", "pcm_s16le"]
if format_hint == "webm":
if has_video:
return ["-c:v", "libvpx-vp9", "-crf", "30", "-b:v", "0", "-c:a", "libopus"]
return ["-c:a", "libopus", "-b:a", "160k"]
# Default: MP4
if has_video:
return ["-c:v", "libx264", "-preset", "medium", "-crf", "18", "-c:a", "aac", "-b:a", "192k"]
return ["-c:a", "aac", "-b:a", "192k"]
def _input_has_video_stream(ffmpeg_cmd: str, input_path: str) -> bool:
"""Return True if the input contains at least one video stream."""
ffprobe = ffmpeg_cmd.replace("ffmpeg", "ffprobe")
cmd = [
ffprobe,
"-v", "error",
"-select_streams", "v:0",
"-show_entries", "stream=index",
"-of", "csv=p=0",
str(input_path),
]
try:
result = subprocess.run(cmd, capture_output=True, text=True)
return result.returncode == 0 and bool(result.stdout.strip())
except Exception:
return False
def _input_has_audio_stream(ffmpeg_cmd: str, input_path: str) -> bool:
"""Return True if the input contains at least one audio stream."""
ffprobe = ffmpeg_cmd.replace("ffmpeg", "ffprobe")
cmd = [
ffprobe,
"-v", "error",
"-select_streams", "a:0",
"-show_entries", "stream=index",
"-of", "csv=p=0",
str(input_path),
]
try:
result = subprocess.run(cmd, capture_output=True, text=True)
return result.returncode == 0 and bool(result.stdout.strip())
except Exception:
return False
def _clamp_speed(speed: float) -> float:
return max(0.25, min(4.0, float(speed)))
def _build_atempo_chain(speed: float) -> str:
"""Build an FFmpeg atempo chain since each atempo node only supports 0.5..2.0."""
s = _clamp_speed(speed)
filters = []
while s > 2.0:
filters.append("atempo=2.0")
s /= 2.0
while s < 0.5:
filters.append("atempo=0.5")
s /= 0.5
filters.append(f"atempo={s:.6f}")
return ",".join(filters)
def _split_keep_segments_by_speed(
keep_segments: List[dict],
speed_ranges: List[dict] = None,
) -> List[dict]:
"""Split keep segments by speed ranges, attaching speed multiplier per piece."""
if not keep_segments:
return []
normalized_ranges = []
for r in speed_ranges or []:
start = float(r.get("start", 0.0))
end = float(r.get("end", 0.0))
if end <= start:
continue
normalized_ranges.append({
"start": start,
"end": end,
"speed": _clamp_speed(float(r.get("speed", 1.0))),
})
normalized_ranges.sort(key=lambda x: x["start"])
result = []
for keep in keep_segments:
k_start = float(keep["start"])
k_end = float(keep["end"])
if k_end <= k_start:
continue
cuts = {k_start, k_end}
for sr in normalized_ranges:
overlap_start = max(k_start, sr["start"])
overlap_end = min(k_end, sr["end"])
if overlap_end > overlap_start:
cuts.add(overlap_start)
cuts.add(overlap_end)
points = sorted(cuts)
for i in range(len(points) - 1):
seg_start = points[i]
seg_end = points[i + 1]
if seg_end - seg_start < 1e-6:
continue
speed = 1.0
for sr in normalized_ranges:
if seg_start >= sr["start"] and seg_end <= sr["end"]:
speed = sr["speed"]
break
result.append({"start": seg_start, "end": seg_end, "speed": speed})
return result
def _build_zoom_filter(zoom_config: dict = None) -> str:
"""Build FFmpeg video filter snippet for zoom/punch-in effect.
zoom_config: {enabled, zoomFactor, panX, panY}
Returns empty string if disabled. Should be prepended to the video filter chain.
"""
if not zoom_config or not zoom_config.get("enabled"):
return ""
factor = float(zoom_config.get("zoomFactor", 1.0))
if abs(factor - 1.0) < 0.01:
return ""
pan_x = float(zoom_config.get("panX", 0.0))
pan_y = float(zoom_config.get("panY", 0.0))
return f"crop=iw/{factor}:ih/{factor}:((iw-iw/{factor})/2)+({pan_x}*(iw-iw/{factor})/2):((ih-ih/{factor})/2)+({pan_y}*(ih-ih/{factor})/2),scale=iw:ih"
def mix_background_music(
video_path: str,
music_path: str,
output_path: str,
volume_db: float = 0.0,
ducking_enabled: bool = False,
ducking_db: float = 6.0,
ducking_attack_ms: float = 10.0,
ducking_release_ms: float = 200.0,
) -> str:
"""Mix background music into a video with optional ducking.
Uses FFmpeg amix + sidechaincompress. If the input has no audio,
the music track becomes the sole audio track. Output is written to output_path.
"""
ffmpeg = _find_ffmpeg()
escaped_music = music_path.replace("\\", "/").replace(":", "\\:")
has_audio_result = _input_has_audio_stream(ffmpeg, video_path)
if not has_audio_result:
cmd = [
ffmpeg, "-y",
"-i", video_path,
"-i", music_path,
"-map", "0:v",
"-map", "1:a",
"-c:v", "copy",
"-c:a", "aac", "-b:a", "192k",
"-shortest",
"-movflags", "+faststart",
output_path,
]
elif ducking_enabled:
music_source = f"amovie='{escaped_music}',volume={volume_db}dB[music]"
filter_complex = (
f"[0:a]asplit[main][sidechain];"
f"{music_source};"
f"[main][music]amix=inputs=2:duration=first:dropout_transition=2[mixed];"
f"[mixed][sidechain]sidechaincompress="
f"threshold=-30dB:ratio=20:attack={ducking_attack_ms / 1000}:"
f"release={ducking_release_ms / 1000}:makeup=1:level_sc={ducking_db}[outa]"
)
cmd = [
ffmpeg, "-y",
"-i", video_path,
"-filter_complex", filter_complex,
"-map", "0:v",
"-map", "[outa]",
"-c:v", "copy",
"-c:a", "aac", "-b:a", "192k",
"-shortest",
output_path,
]
else:
music_source = f"amovie='{escaped_music}',volume={volume_db}dB[music]"
filter_complex = (
f"{music_source};"
f"[0:a][music]amix=inputs=2:duration=first:dropout_transition=2[outa]"
)
cmd = [
ffmpeg, "-y",
"-i", video_path,
"-filter_complex", filter_complex,
"-map", "0:v",
"-map", "[outa]",
"-c:v", "copy",
"-c:a", "aac", "-b:a", "192k",
"-shortest",
output_path,
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"Background music mix failed: {result.stderr[-500:]}")
return output_path
def concat_clips(
main_path: str,
append_paths: list,
output_path: str,
) -> str:
"""Concatenate multiple video clips using FFmpeg concat demuxer.
The main_path is kept as-is. append_paths are appended after it.
"""
if not append_paths:
raise ValueError("No clips to concatenate")
ffmpeg = _find_ffmpeg()
resolved_main = str(Path(main_path).resolve())
# If output_path collides with an input, write to temp first
all_inputs = [resolved_main] + [str(Path(p).resolve()) for p in append_paths]
needs_rename = str(Path(output_path).resolve()) in all_inputs
final_output = output_path
if needs_rename:
final_output = output_path + ".concat_tmp.mp4"
temp_dir = tempfile.mkdtemp(prefix="aive_concat_")
try:
concat_file = os.path.join(temp_dir, "concat.txt")
with open(concat_file, "w") as f:
for path in all_inputs:
f.write(f"file '{path}'\n")
cmd = [
ffmpeg, "-y",
"-f", "concat",
"-safe", "0",
"-i", concat_file,
"-c", "copy",
"-movflags", "+faststart",
final_output,
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"Clip concat failed: {result.stderr[-500:]}")
if needs_rename:
os.replace(final_output, output_path)
return output_path
finally:
for f in os.listdir(temp_dir):
try:
os.remove(os.path.join(temp_dir, f))
except OSError:
pass
try:
os.rmdir(temp_dir)
except OSError:
pass
def _find_ffmpeg() -> str:
"""Locate ffmpeg binary."""
for cmd in ["ffmpeg", "ffmpeg.exe"]:
@ -320,13 +45,9 @@ def export_stream_copy(
output_path on success
"""
if mute_ranges:
# Mute ranges require audio filtering, so fall back to re-encode
# Mute ranges require audio filtering, so fall back to re-encoding
return export_reencode(input_path, output_path, keep_segments, "1080p", "mp4", mute_ranges)
ffmpeg = _find_ffmpeg()
if not _input_has_video_stream(ffmpeg, input_path):
# Audio-only inputs cannot use TS segment stream-copy concat reliably.
return export_reencode(input_path, output_path, keep_segments)
input_path = str(Path(input_path).resolve())
output_path = str(Path(output_path).resolve())
@ -384,29 +105,6 @@ def export_stream_copy(
pass
def _apply_zoom_post(input_path: str, output_path: str, zoom_config: dict) -> str:
"""Re-encode video applying zoom/punch-in crop+scale as a post-process step."""
ffmpeg = _find_ffmpeg()
zoom_filter = _build_zoom_filter(zoom_config)
if not zoom_filter:
return input_path
cmd = [
ffmpeg, "-y",
"-i", input_path,
"-filter_complex", f"[0:v]{zoom_filter}[v]",
"-map", "[v]",
"-map", "0:a?",
"-c:a", "copy",
"-movflags", "+faststart",
output_path,
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"Zoom post-process failed: {result.stderr[-500:]}")
return output_path
def export_reencode(
input_path: str,
output_path: str,
@ -414,12 +112,6 @@ def export_reencode(
resolution: str = "1080p",
format_hint: str = "mp4",
mute_ranges: List[dict] = None,
gain_ranges: List[dict] = None,
speed_ranges: List[dict] = None,
global_gain_db: float = 0.0,
normalize_loudness: bool = False,
normalize_target_lufs: float = -14.0,
zoom_config: dict = None,
) -> str:
"""
Export video with full re-encode. Slower but supports resolution changes,
@ -436,89 +128,21 @@ def export_reencode(
"4k": "scale=-2:2160",
}
def build_audio_filter() -> str:
filters = []
if abs(float(global_gain_db)) > 1e-6:
filters.append(f"volume={float(global_gain_db)}dB")
for gain_range in gain_ranges or []:
start = gain_range['start']
end = gain_range['end']
gain_db = gain_range.get('gain_db', 0.0)
filters.append(f"volume={float(gain_db)}dB:enable='between(t,{start},{end})'")
for mute_range in mute_ranges or []:
# Handle muting case - keep full video but silence audio ranges
if mute_ranges and len(mute_ranges) > 0:
# Build volume filter for muting
volume_filters = []
for i, mute_range in enumerate(mute_ranges):
start = mute_range['start']
end = mute_range['end']
filters.append(f"volume=0:enable='between(t,{start},{end})'")
# Use volume=0 to mute, enable to specify time range
volume_filters.append(f"volume=0:enable='between(t,{start},{end})'")
if normalize_loudness:
filters.append(f"loudnorm=I={normalize_target_lufs}:LRA=7:TP=-1.5")
return ",".join(filters) if filters else "anull"
has_audio_filters = bool(mute_ranges) or bool(gain_ranges) or abs(float(global_gain_db)) > 1e-6
has_video = _input_has_video_stream(ffmpeg, input_path)
speed_segments = _split_keep_segments_by_speed(keep_segments, speed_ranges)
has_speed = any(abs(seg.get("speed", 1.0) - 1.0) > 1e-6 for seg in speed_segments)
if not has_video:
if not keep_segments:
raise ValueError("No segments to export")
segments_for_concat = speed_segments if speed_segments else _split_keep_segments_by_speed(keep_segments, None)
if not segments_for_concat:
raise ValueError("No segments to export")
filter_parts = []
for i, seg in enumerate(segments_for_concat):
speed = _clamp_speed(seg.get("speed", 1.0))
a_chain = f"atrim=start={seg['start']}:end={seg['end']},asetpts=PTS-STARTPTS"
if abs(speed - 1.0) > 1e-6:
a_chain += f",{_build_atempo_chain(speed)}"
filter_parts.append(f"[0:a]{a_chain}[a{i}];")
n = len(segments_for_concat)
concat_inputs = "".join(f"[a{i}]" for i in range(n))
filter_parts.append(f"{concat_inputs}concat=n={n}:v=0:a=1[outa_raw]")
audio_filter = build_audio_filter()
if audio_filter != "anull":
filter_parts.append(f";[outa_raw]{audio_filter}[outa]")
audio_map = "[outa]"
# Combine all volume filters
if volume_filters:
audio_filter = ",".join(volume_filters)
else:
audio_map = "[outa_raw]"
filter_complex = "".join(filter_parts)
codec_args = _get_codec_args(format_hint, has_video=False)
cmd = [
ffmpeg, "-y",
"-i", input_path,
"-filter_complex", filter_complex,
"-map", audio_map,
*codec_args,
output_path,
]
logger.info(
"Re-encoding audio-only input (%s segments, speed-adjusted=%s) -> %s",
n,
has_speed,
output_path,
)
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"FFmpeg audio-only export failed: {result.stderr[-500:]}")
return output_path
# Handle filtered full-timeline audio case (mute/gain/global gain) when no speed warping is needed
if has_audio_filters and not has_speed:
audio_filter = build_audio_filter()
audio_filter = "anull" # No muting needed
# Video filter - just scaling if needed
scale = scale_map.get(resolution, "")
@ -531,7 +155,9 @@ def export_reencode(
filter_complex = f"[0:a]{audio_filter}[a];[0:v]{video_filter}{video_map}"
codec_args = _get_codec_args(format_hint, has_video)
codec_args = ["-c:v", "libx264", "-preset", "medium", "-crf", "18", "-c:a", "aac", "-b:a", "192k"]
if format_hint == "webm":
codec_args = ["-c:v", "libvpx-vp9", "-crf", "30", "-b:v", "0", "-c:a", "libopus"]
cmd = [
ffmpeg, "-y",
@ -544,45 +170,25 @@ def export_reencode(
output_path,
]
logger.info(
"Re-encoding with audio filters (mute=%s gain=%s global=%s) -> %s (%s)",
len(mute_ranges or []),
len(gain_ranges or []),
global_gain_db,
output_path,
resolution,
)
logger.info(f"Re-encoding with {len(mute_ranges)} mute ranges -> {output_path} ({resolution})")
else:
# Cutting logic with optional per-segment speed changes
# Original cutting logic
if not keep_segments:
raise ValueError("No segments to export")
segments_for_concat = speed_segments if speed_segments else _split_keep_segments_by_speed(keep_segments, None)
if not segments_for_concat:
raise ValueError("No segments to export")
filter_parts = []
for i, seg in enumerate(segments_for_concat):
speed = _clamp_speed(seg.get("speed", 1.0))
v_chain = f"trim=start={seg['start']}:end={seg['end']},setpts=PTS-STARTPTS"
a_chain = f"atrim=start={seg['start']}:end={seg['end']},asetpts=PTS-STARTPTS"
if abs(speed - 1.0) > 1e-6:
v_chain += f",setpts=PTS/{speed:.6f}"
a_chain += f",{_build_atempo_chain(speed)}"
filter_parts.append(f"[0:v]{v_chain}[v{i}];[0:a]{a_chain}[a{i}];")
for i, seg in enumerate(keep_segments):
filter_parts.append(
f"[0:v]trim=start={seg['start']}:end={seg['end']},setpts=PTS-STARTPTS[v{i}];"
f"[0:a]atrim=start={seg['start']}:end={seg['end']},asetpts=PTS-STARTPTS[a{i}];"
)
n = len(segments_for_concat)
n = len(keep_segments)
concat_inputs = "".join(f"[v{i}][a{i}]" for i in range(n))
filter_parts.append(f"{concat_inputs}concat=n={n}:v=1:a=1[outv][outa]")
filter_complex = "".join(filter_parts)
# Add loudnorm to the cutting path audio chain if enabled
audio_map_label = "[outa]"
if normalize_loudness:
filter_complex += f";{audio_map_label}loudnorm=I={normalize_target_lufs}:LRA=7:TP=-1.5[outa_norm]"
audio_map_label = "[outa_norm]"
scale = scale_map.get(resolution, "")
if scale:
filter_complex += f";[outv]{scale}[outv_scaled]"
@ -590,39 +196,27 @@ def export_reencode(
else:
video_map = "[outv]"
codec_args = _get_codec_args(format_hint, has_video)
codec_args = ["-c:v", "libx264", "-preset", "medium", "-crf", "18", "-c:a", "aac", "-b:a", "192k"]
if format_hint == "webm":
codec_args = ["-c:v", "libvpx-vp9", "-crf", "30", "-b:v", "0", "-c:a", "libopus"]
cmd = [
ffmpeg, "-y",
"-i", input_path,
"-filter_complex", filter_complex,
"-map", video_map,
"-map", audio_map_label,
"-map", "[outa]",
*codec_args,
"-movflags", "+faststart",
output_path,
]
logger.info(
"Re-encoding %s segments (speed-adjusted=%s, normalize=%s) -> %s (%s)",
n,
has_speed,
normalize_loudness,
output_path,
resolution,
)
logger.info(f"Re-encoding {n} segments -> {output_path} ({resolution})")
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"FFmpeg re-encode failed: {result.stderr[-500:]}")
# Apply zoom post-processing if configured
if zoom_config and zoom_config.get("enabled") and has_video:
zoomed_path = output_path + ".zoomed.mp4"
_apply_zoom_post(output_path, zoomed_path, zoom_config)
os.replace(zoomed_path, output_path)
logger.info("Zoom/punch-in applied to %s (factor=%s)", output_path, zoom_config.get("zoomFactor", 1.0))
return output_path
@ -634,12 +228,6 @@ def export_reencode_with_subs(
resolution: str = "1080p",
format_hint: str = "mp4",
mute_ranges: List[dict] = None,
gain_ranges: List[dict] = None,
speed_ranges: List[dict] = None,
global_gain_db: float = 0.0,
normalize_loudness: bool = False,
normalize_target_lufs: float = -14.0,
zoom_config: dict = None,
) -> str:
"""
Export video with re-encode and burn-in subtitles (ASS format).
@ -647,9 +235,6 @@ def export_reencode_with_subs(
If mute_ranges are provided, applies audio muting instead of cutting.
"""
ffmpeg = _find_ffmpeg()
if not _input_has_video_stream(ffmpeg, input_path):
raise ValueError("Burn-in captions require a video track")
input_path = str(Path(input_path).resolve())
output_path = str(Path(output_path).resolve())
subtitle_path = str(Path(subtitle_path).resolve())
@ -660,35 +245,19 @@ def export_reencode_with_subs(
"4k": "scale=-2:2160",
}
def build_audio_filter() -> str:
filters = []
if abs(float(global_gain_db)) > 1e-6:
filters.append(f"volume={float(global_gain_db)}dB")
for gain_range in gain_ranges or []:
start = gain_range['start']
end = gain_range['end']
gain_db = gain_range.get('gain_db', 0.0)
filters.append(f"volume={float(gain_db)}dB:enable='between(t,{start},{end})'")
for mute_range in mute_ranges or []:
# Handle muting case - keep full video but silence audio ranges
if mute_ranges and len(mute_ranges) > 0:
# Build volume filter for muting
volume_filters = []
for i, mute_range in enumerate(mute_ranges):
start = mute_range['start']
end = mute_range['end']
filters.append(f"volume=0:enable='between(t,{start},{end})'")
volume_filters.append(f"volume=0:enable='between(t,{start},{end})'")
if normalize_loudness:
filters.append(f"loudnorm=I={normalize_target_lufs}:LRA=7:TP=-1.5")
return ",".join(filters) if filters else "anull"
has_audio_filters = bool(mute_ranges) or bool(gain_ranges) or abs(float(global_gain_db)) > 1e-6
speed_segments = _split_keep_segments_by_speed(keep_segments, speed_ranges)
has_speed = any(abs(seg.get("speed", 1.0) - 1.0) > 1e-6 for seg in speed_segments)
# Handle filtered full-timeline audio case (mute/gain/global gain) when no speed warping is needed
if has_audio_filters and not has_speed:
audio_filter = build_audio_filter()
if volume_filters:
audio_filter = ",".join(volume_filters)
else:
audio_filter = "anull"
# Video filter with subtitles
escaped_sub = subtitle_path.replace("\\", "/").replace(":", "\\:")
@ -700,7 +269,9 @@ def export_reencode_with_subs(
filter_complex = f"[0:a]{audio_filter}[a];[0:v]{video_filter}[v]"
codec_args = _get_codec_args(format_hint, has_video=True)
codec_args = ["-c:v", "libx264", "-preset", "medium", "-crf", "18", "-c:a", "aac", "-b:a", "192k"]
if format_hint == "webm":
codec_args = ["-c:v", "libvpx-vp9", "-crf", "30", "-b:v", "0", "-c:a", "libopus"]
cmd = [
ffmpeg, "-y",
@ -713,34 +284,20 @@ def export_reencode_with_subs(
output_path,
]
logger.info(
"Re-encoding with subtitles and audio filters (mute=%s gain=%s global=%s) -> %s (%s)",
len(mute_ranges or []),
len(gain_ranges or []),
global_gain_db,
output_path,
resolution,
)
logger.info(f"Re-encoding with subtitles and {len(mute_ranges)} mute ranges -> {output_path} ({resolution})")
else:
# Cutting logic with subtitles and optional speed changes
# Original cutting logic with subtitles
if not keep_segments:
raise ValueError("No segments to export")
segments_for_concat = speed_segments if speed_segments else _split_keep_segments_by_speed(keep_segments, None)
if not segments_for_concat:
raise ValueError("No segments to export")
filter_parts = []
for i, seg in enumerate(segments_for_concat):
speed = _clamp_speed(seg.get("speed", 1.0))
v_chain = f"trim=start={seg['start']}:end={seg['end']},setpts=PTS-STARTPTS"
a_chain = f"atrim=start={seg['start']}:end={seg['end']},asetpts=PTS-STARTPTS"
if abs(speed - 1.0) > 1e-6:
v_chain += f",setpts=PTS/{speed:.6f}"
a_chain += f",{_build_atempo_chain(speed)}"
filter_parts.append(f"[0:v]{v_chain}[v{i}];[0:a]{a_chain}[a{i}];")
for i, seg in enumerate(keep_segments):
filter_parts.append(
f"[0:v]trim=start={seg['start']}:end={seg['end']},setpts=PTS-STARTPTS[v{i}];"
f"[0:a]atrim=start={seg['start']}:end={seg['end']},asetpts=PTS-STARTPTS[a{i}];"
)
n = len(segments_for_concat)
n = len(keep_segments)
concat_inputs = "".join(f"[v{i}][a{i}]" for i in range(n))
filter_parts.append(f"{concat_inputs}concat=n={n}:v=1:a=1[outv][outa]")
@ -756,7 +313,9 @@ def export_reencode_with_subs(
filter_complex += f";[outv]ass='{escaped_sub}'[outv_final]"
video_map = "[outv_final]"
codec_args = _get_codec_args(format_hint, has_video=True)
codec_args = ["-c:v", "libx264", "-preset", "medium", "-crf", "18", "-c:a", "aac", "-b:a", "192k"]
if format_hint == "webm":
codec_args = ["-c:v", "libvpx-vp9", "-crf", "30", "-b:v", "0", "-c:a", "libopus"]
cmd = [
ffmpeg, "-y",
@ -769,25 +328,12 @@ def export_reencode_with_subs(
output_path,
]
logger.info(
"Re-encoding %s segments with subtitles (speed-adjusted=%s) -> %s (%s)",
n,
has_speed,
output_path,
resolution,
)
logger.info(f"Re-encoding {n} segments with subtitles -> {output_path} ({resolution})")
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"FFmpeg re-encode with subs failed: {result.stderr[-500:]}")
# Apply zoom post-processing if configured
if zoom_config and zoom_config.get("enabled"):
zoomed_path = output_path + ".zoomed.mp4"
_apply_zoom_post(output_path, zoomed_path, zoom_config)
os.replace(zoomed_path, output_path)
logger.info("Zoom/punch-in applied to %s (factor=%s)", output_path, zoom_config.get("zoomFactor", 1.0))
return output_path

View File

@ -1,57 +0,0 @@
import tempfile
import time
import unittest
from pathlib import Path
from backend.utils import cache as cache_utils
class CacheUtilsTests(unittest.TestCase):
def setUp(self) -> None:
self._tmp_dir = tempfile.TemporaryDirectory()
self._old_cache_dir = cache_utils.CACHE_DIR
cache_utils.CACHE_DIR = Path(self._tmp_dir.name) / "cache"
self._work_dir = Path(self._tmp_dir.name) / "work"
self._work_dir.mkdir(parents=True, exist_ok=True)
self._src_file = self._work_dir / "sample.txt"
self._src_file.write_text("hello", encoding="utf-8")
def tearDown(self) -> None:
cache_utils.CACHE_DIR = self._old_cache_dir
self._tmp_dir.cleanup()
def test_get_file_hash_returns_none_for_missing_file(self) -> None:
missing = self._work_dir / "missing.txt"
self.assertIsNone(cache_utils.get_file_hash(missing))
def test_save_and_load_round_trip(self) -> None:
payload = {"value": 123, "ok": True}
saved = cache_utils.save_to_cache(self._src_file, payload, model="m1", operation="transcribe")
self.assertTrue(saved)
loaded = cache_utils.load_from_cache(self._src_file, model="m1", operation="transcribe")
self.assertEqual(payload, loaded)
def test_load_from_cache_respects_max_age(self) -> None:
payload = {"value": 999}
self.assertTrue(cache_utils.save_to_cache(self._src_file, payload, operation="transcribe"))
time.sleep(0.02)
expired = cache_utils.load_from_cache(self._src_file, operation="transcribe", max_age=0.001)
self.assertIsNone(expired)
def test_clear_cache_deletes_files(self) -> None:
self.assertTrue(cache_utils.save_to_cache(self._src_file, {"a": 1}, operation="transcribe"))
self.assertTrue(cache_utils.save_to_cache(self._src_file, {"a": 2}, operation="summarize"))
deleted_count = cache_utils.clear_cache()
self.assertGreaterEqual(deleted_count, 1)
size_bytes, file_count = cache_utils.get_cache_size()
self.assertEqual(size_bytes, 0)
self.assertEqual(file_count, 0)
if __name__ == "__main__":
unittest.main()

View File

@ -1,451 +0,0 @@
import unittest
from unittest.mock import patch
from pathlib import Path
from tempfile import TemporaryDirectory
import os
from types import SimpleNamespace
from fastapi.testclient import TestClient
from backend.main import app
from routers import audio as audio_router
class RouterContractTests(unittest.TestCase):
@classmethod
def setUpClass(cls) -> None:
cls.client = TestClient(app)
def setUp(self) -> None:
audio_router._waveform_cache.clear()
def test_health_endpoint(self) -> None:
res = self.client.get("/health")
self.assertEqual(res.status_code, 200)
self.assertEqual(res.json(), {"status": "ok"})
def test_file_endpoint_full_content(self) -> None:
with TemporaryDirectory() as tmp:
file_path = Path(tmp) / "sample.wav"
file_path.write_bytes(b"abcdefghij")
res = self.client.get("/file", params={"path": str(file_path)})
self.assertEqual(res.status_code, 200)
self.assertEqual(res.content, b"abcdefghij")
self.assertEqual(res.headers.get("accept-ranges"), "bytes")
def test_file_endpoint_range_request(self) -> None:
with TemporaryDirectory() as tmp:
file_path = Path(tmp) / "sample.wav"
file_path.write_bytes(b"abcdefghij")
res = self.client.get(
"/file",
params={"path": str(file_path)},
headers={"Range": "bytes=2-5"},
)
self.assertEqual(res.status_code, 206)
self.assertEqual(res.content, b"cdef")
self.assertEqual(res.headers.get("content-range"), "bytes 2-5/10")
def test_file_endpoint_missing_file(self) -> None:
res = self.client.get("/file", params={"path": "/tmp/does-not-exist.wav"})
self.assertEqual(res.status_code, 404)
self.assertIn("File not found", res.json()["detail"])
@patch("routers.audio.subprocess.run")
def test_audio_waveform_cache_miss_then_hit(self, mock_subprocess_run) -> None:
with TemporaryDirectory() as tmp:
media_file = Path(tmp) / "input.mp4"
media_file.write_bytes(b"fake-media")
def fake_ffmpeg(cmd, capture_output, text):
out_path = Path(cmd[-1])
out_path.parent.mkdir(parents=True, exist_ok=True)
out_path.write_bytes(b"fake-wav")
return SimpleNamespace(returncode=0, stderr="")
mock_subprocess_run.side_effect = fake_ffmpeg
res1 = self.client.get("/audio/waveform", params={"path": str(media_file)})
self.assertEqual(res1.status_code, 200)
self.assertTrue(res1.headers.get("content-type", "").startswith("audio/wav"))
res2 = self.client.get("/audio/waveform", params={"path": str(media_file)})
self.assertEqual(res2.status_code, 200)
self.assertTrue(res2.headers.get("content-type", "").startswith("audio/wav"))
self.assertEqual(mock_subprocess_run.call_count, 1)
@patch("routers.audio.subprocess.run")
def test_audio_waveform_ffmpeg_failure_returns_500(self, mock_subprocess_run) -> None:
with TemporaryDirectory() as tmp:
media_file = Path(tmp) / "input.mp4"
media_file.write_bytes(b"fake-media")
mock_subprocess_run.return_value = SimpleNamespace(returncode=1, stderr="ffmpeg failed")
res = self.client.get("/audio/waveform", params={"path": str(media_file)})
self.assertEqual(res.status_code, 500)
self.assertIn("Failed to extract audio", res.json()["detail"])
@patch("routers.ai.detect_filler_words")
def test_ai_filler_removal_contract(self, mock_detect_filler_words) -> None:
mock_detect_filler_words.return_value = {
"wordIndices": [2, 5],
"fillerWords": [
{"index": 2, "word": "um", "reason": "filler"},
{"index": 5, "word": "uh", "reason": "filler"},
],
}
payload = {
"transcript": "Hello um world uh",
"words": [
{"index": 0, "word": "Hello"},
{"index": 1, "word": "um"},
{"index": 2, "word": "world"},
],
"provider": "ollama",
"model": "llama3",
}
res = self.client.post("/ai/filler-removal", json=payload)
self.assertEqual(res.status_code, 200)
self.assertIn("wordIndices", res.json())
mock_detect_filler_words.assert_called_once()
@patch("routers.ai.detect_filler_words")
def test_ai_filler_removal_error_returns_500(self, mock_detect_filler_words) -> None:
mock_detect_filler_words.side_effect = RuntimeError("ai-filler-fail")
payload = {
"transcript": "Hello world",
"words": [{"index": 0, "word": "Hello"}],
"provider": "ollama",
}
res = self.client.post("/ai/filler-removal", json=payload)
self.assertEqual(res.status_code, 500)
self.assertEqual(res.json()["detail"], "ai-filler-fail")
@patch("routers.ai.create_clip_suggestion")
def test_ai_create_clip_contract(self, mock_create_clip_suggestion) -> None:
mock_create_clip_suggestion.return_value = {
"title": "Best Moment",
"startWordIndex": 10,
"endWordIndex": 40,
"startTime": 12.3,
"endTime": 48.8,
"reason": "Strong hook",
}
payload = {
"transcript": "Long transcript...",
"words": [{"index": 0, "word": "hello"}],
"provider": "ollama",
"target_duration": 45,
}
res = self.client.post("/ai/create-clip", json=payload)
self.assertEqual(res.status_code, 200)
self.assertEqual(res.json()["title"], "Best Moment")
mock_create_clip_suggestion.assert_called_once()
@patch("routers.ai.create_clip_suggestion")
def test_ai_create_clip_error_returns_500(self, mock_create_clip_suggestion) -> None:
mock_create_clip_suggestion.side_effect = RuntimeError("ai-clip-fail")
payload = {
"transcript": "Hello world",
"words": [{"index": 0, "word": "hello"}],
"provider": "ollama",
}
res = self.client.post("/ai/create-clip", json=payload)
self.assertEqual(res.status_code, 500)
self.assertEqual(res.json()["detail"], "ai-clip-fail")
@patch("routers.ai.AIProvider.list_ollama_models")
def test_ai_ollama_models_contract(self, mock_list_ollama_models) -> None:
mock_list_ollama_models.return_value = ["llama3", "qwen2.5"]
res = self.client.get("/ai/ollama-models?base_url=http://localhost:11434")
self.assertEqual(res.status_code, 200)
self.assertEqual(res.json(), {"models": ["llama3", "qwen2.5"]})
mock_list_ollama_models.assert_called_once_with("http://localhost:11434")
@patch("routers.ai.AIProvider.list_ollama_models")
def test_ai_ollama_models_unhandled_error_returns_500(self, mock_list_ollama_models) -> None:
mock_list_ollama_models.side_effect = RuntimeError("ollama-unreachable")
local_client = TestClient(app, raise_server_exceptions=False)
res = local_client.get("/ai/ollama-models")
self.assertEqual(res.status_code, 500)
@patch("routers.transcribe.transcribe_audio")
def test_transcribe_success(self, mock_transcribe) -> None:
mock_transcribe.return_value = {"words": [], "segments": [], "language": "en"}
payload = {
"file_path": "/tmp/input.wav",
"model": "base",
"use_gpu": False,
"use_cache": True,
}
res = self.client.post("/transcribe", json=payload)
self.assertEqual(res.status_code, 200)
self.assertEqual(res.json(), {"words": [], "segments": [], "language": "en"})
mock_transcribe.assert_called_once()
@patch("routers.transcribe.diarize_and_label")
@patch("routers.transcribe.transcribe_audio")
def test_transcribe_with_diarization(self, mock_transcribe, mock_diarize) -> None:
mock_transcribe.return_value = {"words": [{"word": "hi", "start": 0.0, "end": 0.2}], "segments": []}
mock_diarize.return_value = {"words": [{"word": "hi", "start": 0.0, "end": 0.2, "speaker": "SPEAKER_00"}], "segments": []}
payload = {
"file_path": "/tmp/input.wav",
"model": "base",
"diarize": True,
"hf_token": "hf_xxx",
"num_speakers": 2,
}
res = self.client.post("/transcribe", json=payload)
self.assertEqual(res.status_code, 200)
self.assertIn("words", res.json())
mock_transcribe.assert_called_once()
mock_diarize.assert_called_once()
@patch("routers.transcribe.transcribe_audio")
def test_transcribe_file_not_found_returns_404(self, mock_transcribe) -> None:
mock_transcribe.side_effect = FileNotFoundError("missing")
payload = {
"file_path": "/tmp/missing.wav",
"model": "base",
}
res = self.client.post("/transcribe", json=payload)
self.assertEqual(res.status_code, 404)
self.assertIn("File not found", res.json()["detail"])
@patch("routers.transcribe.transcribe_audio")
def test_transcribe_runtime_failure_returns_500(self, mock_transcribe) -> None:
mock_transcribe.side_effect = RuntimeError("boom")
payload = {
"file_path": "/tmp/in.wav",
"model": "base",
}
res = self.client.post("/transcribe", json=payload)
self.assertEqual(res.status_code, 500)
self.assertEqual(res.json()["detail"], "boom")
@patch("routers.captions.generate_srt")
def test_captions_plain_response(self, mock_generate_srt) -> None:
mock_generate_srt.return_value = "1\n00:00:00,000 --> 00:00:01,000\nHello\n"
payload = {
"words": [{"word": "Hello", "start": 0.0, "end": 1.0}],
"format": "srt",
}
res = self.client.post("/captions", json=payload)
self.assertEqual(res.status_code, 200)
self.assertIn("Hello", res.text)
mock_generate_srt.assert_called_once()
@patch("routers.captions.save_captions")
@patch("routers.captions.generate_srt")
def test_captions_save_output_path(self, mock_generate_srt, mock_save) -> None:
mock_generate_srt.return_value = "1\n00:00:00,000 --> 00:00:01,000\nHello\n"
mock_save.return_value = "/tmp/out.srt"
payload = {
"words": [{"word": "Hello", "start": 0.0, "end": 1.0}],
"format": "srt",
"output_path": "/tmp/out.srt",
}
res = self.client.post("/captions", json=payload)
self.assertEqual(res.status_code, 200)
self.assertEqual(res.json(), {"status": "ok", "output_path": "/tmp/out.srt"})
mock_save.assert_called_once()
def test_captions_unknown_format_returns_400(self) -> None:
payload = {
"words": [{"word": "Hello", "start": 0.0, "end": 1.0}],
"format": "txt",
}
res = self.client.post("/captions", json=payload)
self.assertEqual(res.status_code, 400)
self.assertIn("Unknown format", res.json()["detail"])
@patch("routers.captions.generate_srt")
def test_captions_internal_error_returns_500(self, mock_generate_srt) -> None:
mock_generate_srt.side_effect = RuntimeError("caption-fail")
payload = {
"words": [{"word": "Hello", "start": 0.0, "end": 1.0}],
"format": "srt",
}
res = self.client.post("/captions", json=payload)
self.assertEqual(res.status_code, 500)
self.assertEqual(res.json()["detail"], "caption-fail")
@patch("routers.audio.is_deepfilter_available")
@patch("routers.audio.clean_audio")
def test_audio_clean_contract(self, mock_clean_audio, mock_is_deepfilter_available) -> None:
mock_clean_audio.return_value = "/tmp/cleaned.wav"
mock_is_deepfilter_available.return_value = True
payload = {
"input_path": "/tmp/in.wav",
"output_path": "/tmp/cleaned.wav",
}
res = self.client.post("/audio/clean", json=payload)
self.assertEqual(res.status_code, 200)
body = res.json()
self.assertEqual(body["status"], "ok")
self.assertEqual(body["output_path"], "/tmp/cleaned.wav")
self.assertEqual(body["engine"], "deepfilternet")
@patch("routers.audio.clean_audio")
def test_audio_clean_error_returns_500(self, mock_clean_audio) -> None:
mock_clean_audio.side_effect = RuntimeError("clean-fail")
payload = {
"input_path": "/tmp/in.wav",
"output_path": "/tmp/cleaned.wav",
}
res = self.client.post("/audio/clean", json=payload)
self.assertEqual(res.status_code, 500)
self.assertEqual(res.json()["detail"], "clean-fail")
@patch("routers.audio.detect_silence_ranges")
def test_audio_detect_silence_contract(self, mock_detect_silence_ranges) -> None:
mock_detect_silence_ranges.return_value = [{"start": 1.2, "end": 2.1, "duration": 0.9}]
payload = {
"input_path": "/tmp/in.wav",
"min_silence_ms": 500,
"silence_db": -35.0,
}
res = self.client.post("/audio/detect-silence", json=payload)
self.assertEqual(res.status_code, 200)
body = res.json()
self.assertEqual(body["status"], "ok")
self.assertEqual(body["count"], 1)
self.assertEqual(len(body["ranges"]), 1)
@patch("routers.audio.detect_silence_ranges")
def test_audio_detect_silence_error_returns_500(self, mock_detect_silence_ranges) -> None:
mock_detect_silence_ranges.side_effect = RuntimeError("silence-fail")
payload = {
"input_path": "/tmp/in.wav",
"min_silence_ms": 500,
"silence_db": -35.0,
}
res = self.client.post("/audio/detect-silence", json=payload)
self.assertEqual(res.status_code, 500)
self.assertEqual(res.json()["detail"], "silence-fail")
@patch("routers.audio.is_deepfilter_available")
def test_audio_capabilities_contract(self, mock_is_deepfilter_available) -> None:
mock_is_deepfilter_available.return_value = False
res = self.client.get("/audio/capabilities")
self.assertEqual(res.status_code, 200)
self.assertEqual(res.json(), {"deepfilternet_available": False})
@patch("routers.export.export_stream_copy")
def test_export_fast_contract(self, mock_export_stream_copy) -> None:
mock_export_stream_copy.return_value = "/tmp/out.mp4"
payload = {
"input_path": "/tmp/in.mp4",
"output_path": "/tmp/out.mp4",
"keep_segments": [{"start": 0.0, "end": 2.0}],
"mode": "fast",
"captions": "none",
}
res = self.client.post("/export", json=payload)
self.assertEqual(res.status_code, 200)
self.assertEqual(res.json(), {"status": "ok", "output_path": "/tmp/out.mp4"})
mock_export_stream_copy.assert_called_once()
@patch("routers.export.save_captions")
@patch("routers.export.generate_srt")
@patch("routers.export.export_stream_copy")
def test_export_sidecar_caption_contract(self, mock_export_stream_copy, mock_generate_srt, mock_save_captions) -> None:
mock_export_stream_copy.return_value = "/tmp/out.mp4"
mock_generate_srt.return_value = "1\n00:00:00,000 --> 00:00:01,000\nHello\n"
payload = {
"input_path": "/tmp/in.mp4",
"output_path": "/tmp/out.mp4",
"keep_segments": [{"start": 0.0, "end": 2.0}],
"mode": "fast",
"captions": "sidecar",
"words": [{"word": "Hello", "start": 0.0, "end": 1.0}],
"deleted_indices": [],
}
res = self.client.post("/export", json=payload)
self.assertEqual(res.status_code, 200)
body = res.json()
self.assertEqual(body["status"], "ok")
self.assertEqual(body["output_path"], "/tmp/out.mp4")
self.assertEqual(body["srt_path"], "/tmp/out.srt")
mock_save_captions.assert_called_once()
def test_export_missing_segments_returns_400(self) -> None:
payload = {
"input_path": "/tmp/in.mp4",
"output_path": "/tmp/out.mp4",
"keep_segments": [],
"mode": "fast",
"captions": "none",
}
res = self.client.post("/export", json=payload)
self.assertEqual(res.status_code, 400)
self.assertIn("No segments to export", res.json()["detail"])
@patch("routers.export.export_stream_copy")
def test_export_runtime_error_returns_500(self, mock_export_stream_copy) -> None:
mock_export_stream_copy.side_effect = RuntimeError("export-fail")
payload = {
"input_path": "/tmp/in.mp4",
"output_path": "/tmp/out.mp4",
"keep_segments": [{"start": 0.0, "end": 2.0}],
"mode": "fast",
"captions": "none",
}
res = self.client.post("/export", json=payload)
self.assertEqual(res.status_code, 500)
self.assertEqual(res.json()["detail"], "export-fail")
if __name__ == "__main__":
unittest.main()

File diff suppressed because it is too large Load Diff

70
close
View File

@ -2,46 +2,8 @@
# Close TalkEdit processes (Tauri dev and Python backend)
KILLED_ANY=0
kill_pids() {
local label=$1
shift
local pids=("$@")
[[ ${#pids[@]} -eq 0 ]] && return
echo "Stopping $label (PID(s): ${pids[*]})..."
kill -TERM "${pids[@]}" 2>/dev/null || true
sleep 0.7
local survivors=()
local pid
for pid in "${pids[@]}"; do
if kill -0 "$pid" 2>/dev/null; then
survivors+=("$pid")
fi
done
if [[ ${#survivors[@]} -gt 0 ]]; then
echo "Force killing stubborn $label PID(s): ${survivors[*]}"
kill -KILL "${survivors[@]}" 2>/dev/null || true
fi
KILLED_ANY=1
}
kill_tree() {
local pid=$1
local children
children=$(pgrep -P "$pid" 2>/dev/null || true)
if [[ -n "$children" ]]; then
local child
for child in $children; do
kill_tree "$child"
done
fi
kill_pids "process tree" "$pid"
}
BACKEND_PORT="${BACKEND_PORT:-8000}"
FRONTEND_PORT="${FRONTEND_PORT:-5173}"
kill_port() {
local port=$1
@ -49,12 +11,9 @@ kill_port() {
local pids
pids=$(lsof -ti tcp:"$port" 2>/dev/null)
if [[ -n "$pids" ]]; then
# Kill any children first so watcher subprocesses do not survive.
local pid
for pid in $pids; do
kill_tree "$pid"
done
kill_pids "$name listener on port $port" $pids
echo "Stopping $name (port $port, PID $pids)..."
kill "$pids" 2>/dev/null
KILLED_ANY=1
fi
}
@ -64,20 +23,21 @@ kill_pattern() {
local pids
pids=$(pgrep -f "$pattern" 2>/dev/null)
if [[ -n "$pids" ]]; then
kill_pids "$label" $pids
echo "Stopping $label..."
kill $pids 2>/dev/null
KILLED_ANY=1
fi
}
# --- TalkEdit (Tauri, port 8000) ---
kill_port 8000 "TalkEdit"
kill_port 5173 "TalkEdit frontend"
# --- TalkEdit (Tauri) ---
kill_port "$BACKEND_PORT" "TalkEdit"
kill_pattern "tauri.*TalkEdit\|TalkEdit.*tauri\|cargo.*tauri dev\|/TalkEdit/target/debug" "TalkEdit (Tauri dev)"
# Vite dev server for TalkEdit (fallback when not bound to 5173 yet)
kill_pattern "[/ ]vite([[:space:]]|$)\|[/ ]rsbuild([[:space:]]|$)" "TalkEdit frontend dev server"
# Frontend dev server: first kill by listening port, then by known process patterns.
kill_port "$FRONTEND_PORT" "TalkEdit frontend"
kill_pattern "vite\|rsbuild\|npm.*run dev\|pnpm.*dev\|yarn.*dev" "TalkEdit frontend dev server"
# --- Orphaned uvicorn workers ---
kill_pattern "uvicorn.*main:app.*--port 8000" "leftover uvicorn workers (TalkEdit)"
kill_pattern "uvicorn.*main:app.*--port 8642" "leftover uvicorn workers"
# --- Orphaned uvicorn workers for TalkEdit ---
kill_pattern "uvicorn.*main:app.*--port ${BACKEND_PORT}" "leftover uvicorn workers (TalkEdit)"
if [[ $KILLED_ANY -eq 0 ]]; then
echo "Nothing to close — no TalkEdit processes found."

View File

@ -1,73 +0,0 @@
# AI Execution Policy
Purpose: define what autonomous AI can do in this repository without explicit human approval.
## Default Mode
- AI may implement and debug within approved scope.
- AI must run validation commands after code changes.
- AI must stop and escalate when blocked by policy or ambiguity.
## Allowed Autonomous Actions
1. Edit frontend, backend, shared schema, docs, and scripts.
2. Add/modify tests related to the task.
3. Run non-destructive validation commands.
4. Update project docs and Copilot instructions when behavior changes.
## Restricted Actions (Require Approval)
1. Security/privacy-sensitive logic changes.
2. Data migrations or destructive file operations.
3. Credential handling changes or secrets management changes.
4. Breaking API/schema changes.
5. Build/release signing, packaging, and deployment automation changes.
## Prohibited Actions
1. Destructive git commands (`git reset --hard`, force pushing protected branches).
2. Deleting user project/media data.
3. Bypassing required checks in CI.
## Required Validation Workflow
For each autonomous task:
1. Implement smallest safe change set.
2. Run lint/type/test/build checks for impacted scope.
3. Inspect errors and fix with bounded retries.
4. Re-run checks until green or escalated.
5. Produce concise summary with risks and assumptions.
## Escalation Triggers
AI must ask a human when:
1. Requirements are ambiguous and affect user-visible behavior.
2. Multiple product choices are plausible with no clear preference.
3. Potential legal, security, or compliance impact exists.
4. CI remains failing after 3 repair attempts in the same area.
5. A requested operation conflicts with this policy.
## Required Artifacts In AI PR/Change Summary
1. What changed.
2. Why it changed.
3. Validation commands and outcome.
4. Residual risks.
5. Follow-up tasks.
## Risk Levels
- Low: docs, styling, isolated refactors, non-critical bugfixes.
- Medium: feature additions with contract-stable behavior.
- High: API/schema/security/export pipeline/transcription pipeline changes.
High-risk changes require explicit human review before merge.
## TalkEdit-Specific Rules
1. Preserve compatibility for desktop bridge contracts unless explicitly approved.
2. Keep routers thin and business logic in backend services.
3. Export/transcription pipeline changes must include regression tests.
4. Linux WebKit startup behavior and media URL consistency are mandatory regression targets.

View File

@ -1,113 +0,0 @@
# Error Codes Runbook
Purpose: provide consistent, AI-readable error categories for faster autonomous debugging.
## Format
Use codes in this format: `<SUBSYSTEM>-<CATEGORY>-<ID>`
Examples:
- `BE-EXPORT-001`
- `FE-WAVEFORM-002`
- `HOST-BRIDGE-003`
## Backend (FastAPI / Services)
### Export
- `BE-EXPORT-001`: Export request validation failed.
- Symptoms: HTTP 400, missing/invalid ranges.
- Likely causes: malformed payload, empty segments.
- First checks: request body shape, keep/mute/gain ranges.
- `BE-EXPORT-002`: FFmpeg command failed.
- Symptoms: HTTP 500, stderr contains filter/codec error.
- Likely causes: invalid filter chain, unsupported codec/container.
- First checks: generated FFmpeg args, source media codec, target format.
- `BE-EXPORT-003`: Caption burn-in/subtitle generation failed.
- Symptoms: burn-in export fails while plain export works.
- Likely causes: ASS generation issue, subtitle path/temp file cleanup race.
- First checks: ASS file generation, temp file lifecycle.
### Transcription
- `BE-TRANSCRIBE-001`: Model unavailable or download failure.
- Symptoms: transcription never starts or exits early.
- Likely causes: missing model, network/cache issue.
- First checks: model cache path, ensure-model logs.
- `BE-TRANSCRIBE-002`: Inference pipeline runtime failure.
- Symptoms: mid-run crash, partial output.
- Likely causes: CUDA/CPU mismatch, unsupported media, resource exhaustion.
- First checks: environment, GPU availability, media decoding logs.
### Audio / Waveform
- `BE-AUDIO-001`: Waveform endpoint failed.
- Symptoms: waveform panel shows unavailable/error.
- Likely causes: decode error, invalid file path, unsupported media input.
- First checks: `audio/waveform` response body, file existence, FFmpeg decode path.
## Frontend (React)
### Timeline / Zones
- `FE-TIMELINE-001`: Zone interaction state inconsistency.
- Symptoms: cannot drag/select/delete zones predictably.
- Likely causes: stale selection/editing state, hidden/selected mismatch.
- First checks: zone mode flags, selectedZone state transitions.
- `FE-TIMELINE-002`: Visibility filter mismatch.
- Symptoms: hidden zones still interactive or selected.
- Likely causes: hit-testing ignores visibility flags.
- First checks: hit-test filters and selected-zone reset logic.
### Media UI
- `FE-WAVEFORM-001`: Waveform fetch failed.
- Symptoms: warning banner with URL/error.
- Likely causes: backend unavailable, bad path encoding, CORS/proxy issue.
- First checks: backend health endpoint, waveform URL, network tab logs.
- `FE-PROJECT-001`: Project load mismatch.
- Symptoms: loaded media/transcript differs from saved data.
- Likely causes: schema drift, fallback URL mismatch.
- First checks: project schema fields, loadVideo/loadProject URL parity.
## Host / Bridge (Tauri)
- `HOST-BRIDGE-001`: Desktop API bridge unavailable.
- Symptoms: open/save/transcribe actions no-op or throw.
- Likely causes: bridge init error, host command mismatch.
- First checks: bridge initialization, command names, runtime environment.
- `HOST-WEBKIT-001`: Linux WebKit startup/render regression.
- Symptoms: noisy startup errors, UI load issues.
- Likely causes: CSP/font regressions, unsupported protocol calls.
- First checks: CSP config, remote font usage, bridge fallback behavior.
## Logging Guidance
When raising errors, include:
1. Error code.
2. Human message.
3. Correlation/request id.
4. Relevant paths/ids (sanitized).
5. Suggested first-check hints.
Example structured payload:
```json
{
"code": "BE-EXPORT-002",
"message": "FFmpeg export failed",
"requestId": "exp_20260415_001",
"context": {
"format": "mp4",
"mode": "reencode"
}
}
```

View File

@ -1,113 +0,0 @@
# Feature Spec Template
Use this template for every net-new feature and major behavior change.
## Metadata
- Spec ID: SPEC-YYYYMMDD-<short-name>
- Owner:
- Date:
- Status: draft | approved | in-progress | done
- Related issue/PR:
## Problem Statement
Describe the user problem in 2-5 sentences.
## User Story
As a <user type>, I want <capability>, so that <outcome>.
## Scope
### In Scope
1.
2.
3.
### Out of Scope
1.
2.
## Functional Requirements
1.
2.
3.
## Acceptance Criteria
1. Given <state>, when <action>, then <result>.
2. Given <state>, when <action>, then <result>.
3. Failure handling is deterministic and user-visible.
## UX Notes
- Entry points (toolbar/panel/command):
- Empty/loading/error states:
- Keyboard shortcuts / accessibility expectations:
## API And Data Contracts
- Endpoints impacted:
- Request/response changes:
- Backward compatibility plan:
- Project schema impact (`shared/project-schema.json`):
## Architecture Impact
- Frontend files/components likely affected:
- Backend routers/services likely affected:
- Tauri/bridge changes required:
## Risks
1.
2.
## Test Plan
### Unit Tests
1.
2.
### Integration Tests
1.
2.
### E2E / Smoke Tests
1.
2.
### Regression Tests
List known regressions this spec must prevent.
## Observability
- New logs/error codes:
- Metrics/traces needed:
- Diagnostics artifacts expected on failure:
## Rollout Plan
1. Development and internal validation.
2. Staged rollout or feature flag (if applicable).
3. Rollback path.
## Open Questions
1.
2.
## Definition Of Done
1. Acceptance criteria pass.
2. Tests added and green.
3. Docs/instructions updated.
4. Risks and assumptions recorded in PR summary.

View File

@ -1,18 +0,0 @@
# Feature Specs
Place one feature spec document in this folder for each feature or major behavior change.
Use [docs/spec-template.md](../spec-template.md) as the canonical template.
Recommended naming format:
- `YYYY-MM-DD-short-feature-name.md`
Examples:
- `2026-04-15-gain-zones-and-visibility-filters.md`
- `2026-04-16-speed-adjustment.md`
CI policy:
- Pull requests that change app code are expected to include at least one changed spec file in this folder.

View File

@ -1,26 +0,0 @@
import js from '@eslint/js';
import globals from 'globals';
import reactHooks from 'eslint-plugin-react-hooks';
import reactRefresh from 'eslint-plugin-react-refresh';
import tseslint from 'typescript-eslint';
export default tseslint.config(
{ ignores: ['dist', 'node_modules'] },
{
extends: [js.configs.recommended, ...tseslint.configs.recommended],
files: ['**/*.{ts,tsx}'],
languageOptions: {
ecmaVersion: 2020,
globals: globals.browser,
},
plugins: {
'react-hooks': reactHooks,
'react-refresh': reactRefresh,
},
rules: {
...reactHooks.configs.recommended.rules,
'react-refresh/only-export-components': ['warn', { allowConstantExport: true }],
'@typescript-eslint/no-explicit-any': 'off',
},
},
);

View File

@ -3,10 +3,7 @@
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta http-equiv="Content-Security-Policy" content="default-src 'self'; script-src 'self' 'unsafe-inline' 'unsafe-eval'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com; font-src 'self' data: https://fonts.gstatic.com; connect-src 'self' ipc: http://ipc.localhost http://localhost:* http://127.0.0.1:* ws://localhost:* ws://127.0.0.1:*; media-src 'self' file: blob: http://localhost:* http://127.0.0.1:*; img-src 'self' data: blob:;" />
<link rel="preconnect" href="https://fonts.googleapis.com" />
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet" />
<meta http-equiv="Content-Security-Policy" content="default-src 'self'; script-src 'self' 'unsafe-inline' 'unsafe-eval'; style-src 'self' 'unsafe-inline'; font-src 'self' data:; connect-src 'self' ipc: http://ipc.localhost http://localhost:* http://127.0.0.1:* ws://localhost:* ws://127.0.0.1:*; media-src 'self' file: blob: http://localhost:* http://127.0.0.1:*; img-src 'self' data: blob:;" />
<title>TalkEdit</title>
</head>
<body class="bg-editor-bg text-editor-text antialiased">

File diff suppressed because it is too large Load Diff

View File

@ -7,10 +7,11 @@
"dev": "vite",
"build": "tsc -b && vite build",
"lint": "eslint .",
"test": "vitest run",
"preview": "vite preview"
},
"dependencies": {
"@fontsource/inter": "^5.2.8",
"@fontsource/jetbrains-mono": "^5.2.8",
"@tauri-apps/api": "^2",
"@tauri-apps/plugin-dialog": "^2",
"@tauri-apps/plugin-fs": "^2",
@ -23,22 +24,14 @@
"zustand": "^5.0.0"
},
"devDependencies": {
"@eslint/js": "^9.39.4",
"@tauri-apps/cli": "^2",
"@types/react": "^19.0.0",
"@types/react-dom": "^19.0.0",
"@vitejs/plugin-react": "^4.3.0",
"autoprefixer": "^10.4.20",
"eslint": "^9.39.4",
"eslint-plugin-react-hooks": "^7.0.1",
"eslint-plugin-react-refresh": "^0.5.2",
"globals": "^17.5.0",
"jsdom": "^29.1.1",
"postcss": "^8.4.49",
"tailwindcss": "^3.4.0",
"typescript": "^5.7.0",
"typescript-eslint": "^8.58.2",
"vite": "^6.0.0",
"vitest": "^4.1.4"
"vite": "^6.0.0"
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,20 +1,11 @@
import { useCallback, useState } from 'react';
import { useEditorStore } from '../store/editorStore';
import { useAIStore } from '../store/aiStore';
import { useLicenseStore } from '../store/licenseStore';
import { Sparkles, Scissors, Film, Loader2, Check, X, Play, Download, RotateCcw, RefreshCw, Lock } from 'lucide-react';
import { Sparkles, Scissors, Film, Loader2, Check, X, Play, Download } from 'lucide-react';
import type { ClipSuggestion } from '../types/project';
interface AIPanelProps {
onReprocess: () => void;
whisperModel: string;
setWhisperModel: (model: string) => void;
}
export default function AIPanel({ onReprocess, whisperModel, setWhisperModel }: AIPanelProps) {
export default function AIPanel() {
const { words, videoPath, backendUrl, deleteWordRange, setCurrentTime } = useEditorStore();
const canUseAI = useLicenseStore((s) => s.canUseAI);
const setShowLicenseDialog = useLicenseStore((s) => s.setShowDialog);
const {
defaultProvider,
providers,
@ -29,12 +20,10 @@ export default function AIPanel({ onReprocess, whisperModel, setWhisperModel }:
setProcessing,
} = useAIStore();
const [activeTab, setActiveTab] = useState<'filler' | 'clips' | 'reprocess'>('filler');
const [error, setError] = useState<string | null>(null);
const [activeTab, setActiveTab] = useState<'filler' | 'clips'>('filler');
const detectFillers = useCallback(async () => {
if (words.length === 0) return;
setError(null);
setProcessing(true, 'Detecting filler words...');
try {
const config = providers[defaultProvider];
@ -52,15 +41,11 @@ export default function AIPanel({ onReprocess, whisperModel, setWhisperModel }:
custom_filler_words: customFillerWords || undefined,
}),
});
if (!res.ok) {
const errData = await res.json().catch(() => ({}));
throw new Error(errData.error || `Filler detection failed (${res.status})`);
}
if (!res.ok) throw new Error('Filler detection failed');
const data = await res.json();
setFillerResult(data);
} catch (err) {
console.error(err);
setError(err instanceof Error ? err.message : 'Filler detection failed');
} finally {
setProcessing(false);
}
@ -68,7 +53,6 @@ export default function AIPanel({ onReprocess, whisperModel, setWhisperModel }:
const createClips = useCallback(async () => {
if (words.length === 0) return;
setError(null);
setProcessing(true, 'Finding best clip segments...');
try {
const config = providers[defaultProvider];
@ -91,15 +75,11 @@ export default function AIPanel({ onReprocess, whisperModel, setWhisperModel }:
target_duration: 60,
}),
});
if (!res.ok) {
const errData = await res.json().catch(() => ({}));
throw new Error(errData.error || `Clip creation failed (${res.status})`);
}
if (!res.ok) throw new Error('Clip creation failed');
const data = await res.json();
setClipSuggestions(data.clips || []);
} catch (err) {
console.error(err);
setError(err instanceof Error ? err.message : 'Clip creation failed');
} finally {
setProcessing(false);
}
@ -170,273 +150,154 @@ export default function AIPanel({ onReprocess, whisperModel, setWhisperModel }:
onClick={() => setActiveTab('filler')}
icon={<Scissors className="w-3.5 h-3.5" />}
label="Filler Words"
title="Detect and remove filler words from transcript"
/>
<TabButton
active={activeTab === 'clips'}
onClick={() => setActiveTab('clips')}
icon={<Film className="w-3.5 h-3.5" />}
label="Create Clips"
title="Find the best segments for social media clips"
/>
<TabButton
active={activeTab === 'reprocess'}
onClick={() => setActiveTab('reprocess')}
icon={<RefreshCw className="w-3.5 h-3.5" />}
label="Reprocess"
title="Re-run transcription with a different Whisper model"
/>
</div>
<div className="flex-1 overflow-y-auto p-4">
{activeTab === 'filler' && (
<div className="space-y-4">
{!canUseAI ? (
<div className="text-center py-8 px-4">
<Lock className="w-8 h-8 text-editor-text-muted mx-auto mb-3" />
<p className="text-sm font-medium mb-1">AI editing requires Business</p>
<p className="text-xs text-editor-text-muted mb-4">
Upgrade to Business to unlock filler word removal, clip suggestions, and more.
</p>
<button
onClick={() => setShowLicenseDialog(true)}
className="px-4 py-2 bg-editor-accent hover:bg-editor-accent-hover text-white rounded-lg text-sm font-medium transition-colors"
>
Upgrade Now
</button>
</div>
) : (
<>
<p className="text-xs text-editor-text-muted">
Use AI to detect and remove filler words like "um", "uh", "like", "you know" from
your transcript.
</p>
<div className="space-y-1.5">
<label className="text-[11px] text-editor-text-muted font-medium">
Custom filler words (comma-separated)
</label>
<input
type="text"
value={customFillerWords}
onChange={(e) => setCustomFillerWords(e.target.value)}
placeholder="e.g. okay, alright, anyway"
className="w-full px-2.5 py-1.5 text-xs bg-editor-surface border border-editor-border rounded focus:border-editor-accent focus:outline-none"
/>
</div>
<button
onClick={detectFillers}
disabled={isProcessing || words.length === 0}
title="Scan the entire transcript for filler words (um, uh, like, you know) and mark for removal"
className="w-full flex items-center justify-center gap-2 px-4 py-2.5 bg-editor-accent hover:bg-editor-accent-hover disabled:opacity-40 rounded-lg text-sm font-medium transition-colors"
>
{isProcessing ? (
<>
<Loader2 className="w-4 h-4 animate-spin" />
{processingMessage}
</>
) : (
<>
<Sparkles className="w-4 h-4" />
Detect Filler Words
</>
)}
</button>
<p className="text-xs text-editor-text-muted">
Use AI to detect and remove filler words like "um", "uh", "like", "you know" from
your transcript.
</p>
<div className="space-y-1.5">
<label className="text-[11px] text-editor-text-muted font-medium">
Custom filler words (comma-separated)
</label>
<input
type="text"
value={customFillerWords}
onChange={(e) => setCustomFillerWords(e.target.value)}
placeholder="e.g. okay, alright, anyway"
className="w-full px-2.5 py-1.5 text-xs bg-editor-surface border border-editor-border rounded focus:border-editor-accent focus:outline-none"
/>
</div>
<button
onClick={detectFillers}
disabled={isProcessing || words.length === 0}
className="w-full flex items-center justify-center gap-2 px-4 py-2.5 bg-editor-accent hover:bg-editor-accent-hover disabled:opacity-50 rounded-lg text-sm font-medium transition-colors"
>
{isProcessing ? (
<>
<Loader2 className="w-4 h-4 animate-spin" />
{processingMessage}
</>
) : (
<>
<Sparkles className="w-4 h-4" />
Detect Filler Words
</>
)}
</button>
{error && (
<div className="bg-red-500/10 border border-red-500/40 rounded text-xs text-red-300 p-2 flex items-center justify-between">
<span>{error}</span>
{fillerResult && fillerResult.fillerWords.length > 0 && (
<div className="space-y-3">
<div className="flex items-center justify-between">
<span className="text-xs font-medium">
Found {fillerResult.fillerWords.length} filler words
</span>
<div className="flex gap-1">
<button
onClick={detectFillers}
className="flex items-center gap-1 px-2 py-1 text-xs bg-red-500/20 hover:bg-red-500/30 rounded transition-colors shrink-0 ml-2"
onClick={applyFillerDeletions}
className="flex items-center gap-1 px-2 py-1 text-xs bg-editor-success/20 text-editor-success rounded hover:bg-editor-success/30"
>
<RotateCcw className="w-3 h-3" /> Retry
<Check className="w-3 h-3" /> Apply All
</button>
<button
onClick={() => setFillerResult(null)}
className="flex items-center gap-1 px-2 py-1 text-xs bg-editor-border text-editor-text-muted rounded hover:bg-editor-surface"
>
<X className="w-3 h-3" /> Dismiss
</button>
</div>
)}
{fillerResult && fillerResult.fillerWords.length > 0 && (
<div className="space-y-3">
<div className="flex items-center justify-between">
<span className="text-xs font-medium">
Found {fillerResult.fillerWords.length} filler words
</div>
<div className="space-y-1 max-h-64 overflow-y-auto">
{fillerResult.fillerWords.map((fw) => (
<div
key={fw.index}
className="flex items-center justify-between px-2 py-1.5 bg-editor-word-filler rounded text-xs"
>
<span>
<strong>"{fw.word}"</strong>
<span className="text-editor-text-muted ml-1"> {fw.reason}</span>
</span>
<div className="flex gap-1">
<button
onClick={applyFillerDeletions}
title="Create cut ranges for all detected filler words at once"
className="flex items-center gap-1 px-2 py-1 text-xs bg-editor-success/20 text-editor-success rounded hover:bg-editor-success/30"
>
<Check className="w-3 h-3" /> Apply All
</button>
<button
onClick={() => { setFillerResult(null); setError(null); }}
title="Clear detected filler word results without applying"
className="flex items-center gap-1 px-2 py-1 text-xs bg-editor-border text-editor-text-muted rounded hover:bg-editor-surface"
>
<X className="w-3 h-3" /> Dismiss
</button>
</div>
</div>
<div className="space-y-1 max-h-64 overflow-y-auto">
{fillerResult.fillerWords.map((fw) => (
<div
key={fw.index}
className="flex items-center justify-between px-2 py-1.5 bg-editor-word-filler rounded text-xs"
>
<span>
<strong>"{fw.word}"</strong>
<span className="text-editor-text-muted ml-1"> {fw.reason}</span>
</span>
</div>
))}
</div>
</div>
)}
))}
</div>
</div>
)}
{fillerResult && fillerResult.fillerWords.length === 0 && (
<p className="text-xs text-editor-success">No filler words detected.</p>
)}
</>
{fillerResult && fillerResult.fillerWords.length === 0 && (
<p className="text-xs text-editor-success">No filler words detected.</p>
)}
</div>
)}
{activeTab === 'clips' && (
<div className="space-y-4">
{!canUseAI ? (
<div className="text-center py-8 px-4">
<Lock className="w-8 h-8 text-editor-text-muted mx-auto mb-3" />
<p className="text-sm font-medium mb-1">AI clip suggestions require Business</p>
<p className="text-xs text-editor-text-muted mb-4">
Upgrade to Business to find the best segments for social media clips.
</p>
<button
onClick={() => setShowLicenseDialog(true)}
className="px-4 py-2 bg-editor-accent hover:bg-editor-accent-hover text-white rounded-lg text-sm font-medium transition-colors"
>
Upgrade Now
</button>
</div>
) : (
<>
<p className="text-xs text-editor-text-muted">
AI analyzes your transcript and suggests the most engaging segments for a
YouTube Short or social media clip.
</p>
<button
onClick={createClips}
disabled={isProcessing || words.length === 0}
title="Analyze transcript to find the most engaging 20-60 second segments for social media"
className="w-full flex items-center justify-center gap-2 px-4 py-2.5 bg-editor-accent hover:bg-editor-accent-hover disabled:opacity-40 rounded-lg text-sm font-medium transition-colors"
>
{isProcessing ? (
<>
<Loader2 className="w-4 h-4 animate-spin" />
{processingMessage}
</>
) : (
<>
<Film className="w-4 h-4" />
Find Best Clips
</>
)}
</button>
{error && (
<div className="bg-red-500/10 border border-red-500/40 rounded text-xs text-red-300 p-2 flex items-center justify-between">
<span>{error}</span>
<button
onClick={createClips}
className="flex items-center gap-1 px-2 py-1 text-xs bg-red-500/20 hover:bg-red-500/30 rounded transition-colors shrink-0 ml-2"
>
<RotateCcw className="w-3 h-3" /> Retry
</button>
</div>
)}
{clipSuggestions.length > 0 && (
<div className="space-y-3">
{clipSuggestions.map((clip, i) => (
<div key={i} className="p-3 bg-editor-surface rounded-lg space-y-2">
<div className="flex items-center justify-between">
<span className="text-xs font-semibold">{clip.title}</span>
<span className="text-[10px] text-editor-text-muted">
{Math.round(clip.endTime - clip.startTime)}s
</span>
</div>
<p className="text-[11px] text-editor-text-muted">{clip.reason}</p>
<div className="flex gap-2">
<button
onClick={() => handlePreviewClip(clip)}
title="Seek to this clip's position and play a preview"
className="flex-1 flex items-center justify-center gap-1 px-2 py-1.5 text-xs bg-editor-accent/20 text-editor-accent rounded hover:bg-editor-accent/30 transition-colors"
>
<Play className="w-3 h-3" /> Preview
</button>
<button
onClick={() => handleExportClip(clip, i)}
disabled={exportingClipIndex === i}
title="Export just this segment as a standalone video file"
className="flex-1 flex items-center justify-center gap-1 px-2 py-1.5 text-xs bg-editor-success/20 text-editor-success rounded hover:bg-editor-success/30 disabled:opacity-40 transition-colors"
>
{exportingClipIndex === i ? (
<Loader2 className="w-3 h-3 animate-spin" />
) : (
<Download className="w-3 h-3" />
)}
Export
</button>
</div>
</div>
))}
</div>
)}
</>
)}
</div>
)}
{activeTab === 'reprocess' && (
<div className="space-y-4">
<p className="text-xs text-editor-text-muted">
Re-run transcription with a different model replaces the current transcript entirely.
AI analyzes your transcript and suggests the most engaging segments for a
YouTube Short or social media clip.
</p>
<div className="space-y-1.5">
<label className="text-[11px] text-editor-text-muted font-medium">
Whisper Model
</label>
<select
value={whisperModel}
onChange={(e) => setWhisperModel(e.target.value)}
className="w-full px-2.5 py-1.5 text-xs bg-editor-surface border border-editor-border rounded focus:border-editor-accent focus:outline-none"
>
<optgroup label="Multilingual (any language)">
<option value="tiny">tiny ~75 MB · fastest, low accuracy</option>
<option value="base">base ~140 MB · fast, decent accuracy</option>
<option value="small">small ~460 MB · good balance</option>
<option value="medium">medium ~1.5 GB · better accuracy</option>
<option value="large-v2">large-v2 ~2.9 GB · high accuracy</option>
<option value="large-v3">large-v3 ~2.9 GB · best overall </option>
<option value="large-v3-turbo">large-v3-turbo ~1.6 GB · fast + accurate </option>
<option value="distil-large-v3">distil-large-v3 ~1.5 GB · fast, near large-v3 quality</option>
</optgroup>
<optgroup label="English-only (faster &amp; more accurate for English)">
<option value="tiny.en">tiny.en ~75 MB · fastest English</option>
<option value="base.en">base.en ~140 MB · fast English</option>
<option value="small.en">small.en ~460 MB · good English</option>
<option value="medium.en">medium.en ~1.5 GB · great English</option>
<option value="distil-small.en">distil-small.en ~190 MB · fast English </option>
<option value="distil-medium.en">distil-medium.en ~750 MB · best fast English </option>
</optgroup>
</select>
</div>
<button
onClick={onReprocess}
onClick={createClips}
disabled={isProcessing || words.length === 0}
title="Re-run transcription with the selected model — this will replace all current words"
className="w-full flex items-center justify-center gap-2 px-4 py-2.5 bg-editor-accent hover:bg-editor-accent-hover disabled:opacity-40 rounded-lg text-sm font-medium transition-colors"
className="w-full flex items-center justify-center gap-2 px-4 py-2.5 bg-editor-accent hover:bg-editor-accent-hover disabled:opacity-50 rounded-lg text-sm font-medium transition-colors"
>
<RefreshCw className="w-4 h-4" />
Reprocess Transcript
{isProcessing ? (
<>
<Loader2 className="w-4 h-4 animate-spin" />
{processingMessage}
</>
) : (
<>
<Film className="w-4 h-4" />
Find Best Clips
</>
)}
</button>
{clipSuggestions.length > 0 && (
<div className="space-y-3">
{clipSuggestions.map((clip, i) => (
<div key={i} className="p-3 bg-editor-surface rounded-lg space-y-2">
<div className="flex items-center justify-between">
<span className="text-xs font-semibold">{clip.title}</span>
<span className="text-[10px] text-editor-text-muted">
{Math.round(clip.endTime - clip.startTime)}s
</span>
</div>
<p className="text-[11px] text-editor-text-muted">{clip.reason}</p>
<div className="flex gap-2">
<button
onClick={() => handlePreviewClip(clip)}
className="flex-1 flex items-center justify-center gap-1 px-2 py-1.5 text-xs bg-editor-accent/20 text-editor-accent rounded hover:bg-editor-accent/30 transition-colors"
>
<Play className="w-3 h-3" /> Preview
</button>
<button
onClick={() => handleExportClip(clip, i)}
disabled={exportingClipIndex === i}
className="flex-1 flex items-center justify-center gap-1 px-2 py-1.5 text-xs bg-editor-success/20 text-editor-success rounded hover:bg-editor-success/30 disabled:opacity-50 transition-colors"
>
{exportingClipIndex === i ? (
<Loader2 className="w-3 h-3 animate-spin" />
) : (
<Download className="w-3 h-3" />
)}
Export
</button>
</div>
</div>
))}
</div>
)}
</div>
)}
</div>
@ -449,18 +310,15 @@ function TabButton({
onClick,
icon,
label,
title,
}: {
active: boolean;
onClick: () => void;
icon: React.ReactNode;
label: string;
title?: string;
}) {
return (
<button
onClick={onClick}
title={title}
className={`flex-1 flex items-center justify-center gap-1.5 px-3 py-2.5 text-xs font-medium transition-colors border-b-2 ${
active
? 'border-editor-accent text-editor-accent'

View File

@ -1,84 +0,0 @@
import { useEditorStore } from '../store/editorStore';
import { Video, Plus, Trash2, ChevronUp, ChevronDown } from 'lucide-react';
export default function AppendClipPanel() {
const { additionalClips, addAdditionalClip, removeAdditionalClip, reorderAdditionalClip, videoPath } = useEditorStore();
const handleAddClip = async () => {
const path = await window.electronAPI?.openFile({
filters: [
{ name: 'Video Files', extensions: ['mp4', 'mkv', 'mov', 'avi', 'webm'] },
{ name: 'All Files', extensions: ['*'] },
],
});
if (path) {
addAdditionalClip(path);
}
};
return (
<div className="p-4 space-y-3">
<h3 className="text-sm font-semibold flex items-center gap-1.5">
<Video className="w-4 h-4" />
Append Clips
</h3>
<p className="text-[10px] text-editor-text-muted leading-relaxed">
Load additional video clips to append after the main video. Clips are concatenated in order during export.
</p>
{additionalClips.length === 0 ? (
<div className="text-[11px] text-editor-text-muted text-center py-3">
No additional clips loaded
</div>
) : (
<div className="space-y-1 max-h-60 overflow-y-auto">
{additionalClips.map((clip, idx) => (
<div
key={clip.id}
className="flex items-center gap-2 p-2 rounded bg-editor-surface border border-editor-border text-xs"
>
<Video className="w-3 h-3 text-editor-accent shrink-0" />
<span className="flex-1 truncate text-editor-text">{clip.label}</span>
<span className="text-[10px] text-editor-text-muted shrink-0">#{idx + 1}</span>
<div className="flex items-center gap-0.5 shrink-0">
<button
onClick={() => reorderAdditionalClip(clip.id, -1)}
disabled={idx === 0}
className="p-0.5 rounded hover:bg-editor-bg disabled:opacity-30 text-editor-text-muted hover:text-editor-text"
title="Move up"
>
<ChevronUp className="w-3 h-3" />
</button>
<button
onClick={() => reorderAdditionalClip(clip.id, 1)}
disabled={idx === additionalClips.length - 1}
className="p-0.5 rounded hover:bg-editor-bg disabled:opacity-30 text-editor-text-muted hover:text-editor-text"
title="Move down"
>
<ChevronDown className="w-3 h-3" />
</button>
</div>
<button
onClick={() => removeAdditionalClip(clip.id)}
className="p-0.5 rounded hover:bg-red-500/20 text-red-400"
title="Remove clip"
>
<Trash2 className="w-3 h-3" />
</button>
</div>
))}
</div>
)}
<button
onClick={handleAddClip}
disabled={!videoPath}
className="w-full flex items-center justify-center gap-2 px-3 py-2 rounded-lg border-2 border-dashed border-editor-border text-xs text-editor-text-muted hover:text-editor-text hover:border-editor-text-muted disabled:opacity-40 transition-colors"
title="Select a video or audio file to append during export"
>
<Plus className="w-3.5 h-3.5" />
Add Clip
</button>
</div>
);
}

View File

@ -1,150 +0,0 @@
import { useEditorStore } from '../store/editorStore';
import { Music, Trash2, Volume2, Disc3 } from 'lucide-react';
export default function BackgroundMusicPanel() {
const { backgroundMusic, setBackgroundMusic, updateBackgroundMusic } = useEditorStore();
const handleLoadMusic = async () => {
const path = await window.electronAPI?.openFile({
filters: [
{ name: 'Audio Files', extensions: ['mp3', 'wav', 'm4a', 'flac', 'ogg', 'aac', 'wma'] },
{ name: 'All Files', extensions: ['*'] },
],
});
if (path) {
setBackgroundMusic({
path,
volumeDb: -10,
duckingEnabled: true,
duckingDb: 6,
duckingAttackMs: 10,
duckingReleaseMs: 200,
});
}
};
const handleRemoveMusic = () => {
setBackgroundMusic(null);
};
return (
<div className="p-4 space-y-4">
<h3 className="text-sm font-semibold flex items-center gap-1.5">
<Music className="w-4 h-4" />
Background Music
</h3>
{!backgroundMusic ? (
<button
onClick={handleLoadMusic}
className="w-full flex items-center justify-center gap-2 px-4 py-3 rounded-lg border-2 border-dashed border-editor-border text-xs text-editor-text-muted hover:text-editor-text hover:border-editor-text-muted transition-colors"
title="Select an audio file to use as background music"
>
<Disc3 className="w-4 h-4" />
Load Music File
</button>
) : (
<div className="space-y-3">
<div className="flex items-center gap-2 p-2 rounded bg-editor-surface border border-editor-border">
<Music className="w-4 h-4 text-editor-accent shrink-0" />
<span className="flex-1 text-xs truncate">
{backgroundMusic.path.split(/[/\\]/).pop()}
</span>
<button
onClick={handleRemoveMusic}
className="p-1 rounded hover:bg-red-500/20 text-red-400 transition-colors"
title="Remove music"
>
<Trash2 className="w-3 h-3" />
</button>
</div>
<div className="space-y-2">
<div className="flex items-center gap-2">
<Volume2 className="w-3 h-3 text-editor-text-muted shrink-0" />
<span className="text-[10px] text-editor-text-muted w-16">Volume:</span>
<input
type="range"
min={-30}
max={12}
step={1}
value={backgroundMusic.volumeDb}
onChange={(e) => updateBackgroundMusic({ volumeDb: Number(e.target.value) })}
className="flex-1 h-1.5"
title="Background music volume relative to main audio — positive boosts, negative reduces"
/>
<span className="text-xs text-editor-text w-10 text-right">{backgroundMusic.volumeDb} dB</span>
</div>
</div>
<label className="flex items-center gap-2 cursor-pointer">
<input
type="checkbox"
checked={backgroundMusic.duckingEnabled}
onChange={(e) => updateBackgroundMusic({ duckingEnabled: e.target.checked })}
className="w-4 h-4 rounded bg-editor-surface border-editor-border accent-editor-accent"
title="Automatically lower music volume when speech is detected"
/>
<div>
<span className="text-xs font-medium">Auto-ducking</span>
<p className="text-[10px] text-editor-text-muted">
Lower music volume when speech is detected
</p>
</div>
</label>
{backgroundMusic.duckingEnabled && (
<div className="pl-6 space-y-2">
<div className="flex items-center gap-2">
<span className="text-[10px] text-editor-text-muted w-20">Duck amount:</span>
<input
type="range"
min={1}
max={20}
step={1}
value={backgroundMusic.duckingDb}
onChange={(e) => updateBackgroundMusic({ duckingDb: Number(e.target.value) })}
className="flex-1 h-1.5"
title="How much to reduce music volume during speech (1-20 dB)"
/>
<span className="text-xs text-editor-text w-10 text-right">{backgroundMusic.duckingDb} dB</span>
</div>
<div className="flex items-center gap-2">
<span className="text-[10px] text-editor-text-muted w-20">Attack:</span>
<input
type="range"
min={1}
max={100}
step={1}
value={backgroundMusic.duckingAttackMs}
onChange={(e) => updateBackgroundMusic({ duckingAttackMs: Number(e.target.value) })}
className="flex-1 h-1.5"
title="How quickly the ducking effect engages when speech starts"
/>
<span className="text-xs text-editor-text w-10 text-right">{backgroundMusic.duckingAttackMs}ms</span>
</div>
<div className="flex items-center gap-2">
<span className="text-[10px] text-editor-text-muted w-20">Release:</span>
<input
type="range"
min={10}
max={1000}
step={10}
value={backgroundMusic.duckingReleaseMs}
onChange={(e) => updateBackgroundMusic({ duckingReleaseMs: Number(e.target.value) })}
className="flex-1 h-1.5"
title="How quickly the ducking effect fades when speech ends"
/>
<span className="text-xs text-editor-text w-10 text-right">{backgroundMusic.duckingReleaseMs}ms</span>
</div>
</div>
)}
<p className="text-[10px] text-editor-text-muted leading-relaxed">
The music will be mixed during export. Enable auto-ducking to lower music volume whenever speech is active.
</p>
</div>
)}
</div>
);
}

View File

@ -1,13 +1,12 @@
import { useState, useCallback } from 'react';
import { useEditorStore } from '../store/editorStore';
import { Terminal, ChevronDown, ChevronUp, Play, Wifi, AlertTriangle } from 'lucide-react';
import { Terminal, ChevronDown, ChevronUp, Play, Wifi } from 'lucide-react';
export default function DevPanel() {
const [open, setOpen] = useState(false);
const [pathInput, setPathInput] = useState('');
const [testResult, setTestResult] = useState<string | null>(null);
const [testing, setTesting] = useState(false);
const [showResetConfirm, setShowResetConfirm] = useState(false);
const { backendUrl, videoPath, loadVideo } = useEditorStore();
@ -122,37 +121,6 @@ export default function DevPanel() {
{testResult}
</pre>
)}
{/* Danger Zone */}
<div className="space-y-1">
<div className="text-[#ef4444] uppercase tracking-wider text-[9px]">Danger Zone</div>
{!showResetConfirm ? (
<button
onClick={() => setShowResetConfirm(true)}
className="w-full px-2 py-1.5 rounded border border-red-500/40 text-red-400 hover:bg-red-500/10 text-xs flex items-center justify-center gap-1.5"
>
<AlertTriangle className="w-3 h-3" />
Reset Editor State
</button>
) : (
<div className="bg-[#1e1020] border border-red-500/40 rounded p-2 space-y-1.5">
<p className="text-[#fca5a5] text-[10px]">This will clear all editor data and reload the page. Unsaved changes will be lost.</p>
<div className="flex gap-1">
<button
onClick={() => setShowResetConfirm(false)}
className="flex-1 px-2 py-1 rounded text-[10px] text-[#6b7280] hover:text-white hover:bg-[#2a2d3e]"
>
Cancel
</button>
<button
onClick={() => { useEditorStore.getState().reset(); window.location.reload(); }}
className="flex-1 px-2 py-1 rounded text-[10px] border border-red-500/40 text-red-400 hover:bg-red-500/10"
>
Confirm Reset
</button>
</div>
</div>
)}
</div>
</div>
)}
</div>

View File

@ -1,90 +0,0 @@
import { Component, type ReactNode } from 'react';
interface Props {
children: ReactNode;
}
interface State {
hasError: boolean;
error: Error | null;
}
export default class ErrorBoundary extends Component<Props, State> {
constructor(props: Props) {
super(props);
this.state = { hasError: false, error: null };
}
static getDerivedStateFromError(error: Error): State {
return { hasError: true, error };
}
componentDidCatch(error: Error, info: React.ErrorInfo) {
console.error('ErrorBoundary caught:', error, info.componentStack);
try {
window.electronAPI?.logError?.(error.message, error.stack || '', info.componentStack || '');
} catch {}
}
handleReload = () => {
window.location.reload();
};
handleReset = () => {
try {
localStorage.clear();
sessionStorage.clear();
} catch {}
window.location.reload();
};
render() {
if (this.state.hasError) {
return (
<div className="h-screen flex flex-col items-center justify-center gap-6 bg-editor-bg px-6">
<div className="flex flex-col items-center gap-3 max-w-md text-center">
<div className="w-12 h-12 rounded-full bg-red-500/20 flex items-center justify-center">
<svg className="w-6 h-6 text-red-400" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-2.5L13.732 4c-.77-.833-1.964-.833-2.732 0L4.082 16.5c-.77.833.192 2.5 1.732 2.5z" />
</svg>
</div>
<h2 className="text-lg font-semibold text-editor-text">Something went wrong</h2>
<p className="text-xs text-editor-text-muted leading-relaxed">
An unexpected error occurred. Your work may still be recoverable.
</p>
</div>
{this.state.error && (
<details className="max-w-md w-full">
<summary className="text-xs text-editor-text-muted cursor-pointer hover:text-editor-text">
Error details
</summary>
<pre className="mt-2 p-3 rounded bg-editor-surface border border-editor-border text-[10px] text-red-300 overflow-auto max-h-32 whitespace-pre-wrap">
{this.state.error.message}
{'\n'}
{this.state.error.stack}
</pre>
</details>
)}
<div className="flex flex-col items-center gap-2">
<button
onClick={this.handleReload}
className="px-4 py-2 bg-editor-accent hover:bg-editor-accent-hover rounded-lg text-sm font-medium transition-colors"
>
Reload App
</button>
<button
onClick={this.handleReset}
className="text-xs text-editor-text-muted hover:text-editor-text underline transition-colors"
>
Reset & Clear All Data
</button>
</div>
</div>
);
}
return this.props.children;
}
}

View File

@ -1,220 +1,64 @@
import { useState, useCallback } from 'react';
import { useState, useCallback, useMemo } from 'react';
import { useEditorStore } from '../store/editorStore';
import { Download, Loader2, Zap, Cog, Info, Volume2, FileText, ZoomIn, Video, Music } from 'lucide-react';
import { Download, Loader2, Zap, Cog, Info } from 'lucide-react';
import type { ExportOptions } from '../types/project';
import { assert } from '../lib/assert';
export default function ExportDialog() {
const { videoPath, words, cutRanges, muteRanges, gainRanges, speedRanges, globalGainDb, isExporting, exportProgress, backendUrl, setExporting, getKeepSegments, additionalClips, backgroundMusic } =
const { videoPath, words, deletedRanges, cutRanges, muteRanges, isExporting, exportProgress, backendUrl, setExporting, getKeepSegments } =
useEditorStore();
const hasCuts = cutRanges.length > 0;
const hasCuts = deletedRanges.length > 0;
// Compute set of deleted word indices from cutRanges
const getDeletedSet = useCallback(() => {
const deletedSet = new Set<number>();
for (const range of cutRanges) {
for (let i = 0; i < words.length; i++) {
if (words[i].start >= range.start && words[i].end <= range.end) {
deletedSet.add(i);
}
}
}
return deletedSet;
}, [cutRanges, words]);
// Detect if input is audio-only by its extension
const audioExtensions = new Set(['.wav', '.mp3', '.flac', '.m4a', '.ogg', '.aac', '.wma']);
const inputExt = videoPath ? '.' + videoPath.split('.').pop()?.toLowerCase() : '';
const isAudioOnly = videoPath ? audioExtensions.has(inputExt) : false;
const [options, setOptions] = useState<Omit<ExportOptions, 'outputPath'> & { normalizeAudio: boolean; normalizeTarget: number }>({
mode: isAudioOnly ? 'reencode' : 'fast',
const [options, setOptions] = useState<Omit<ExportOptions, 'outputPath'>>({
mode: 'fast',
resolution: '1080p',
format: isAudioOnly ? 'wav' : 'mp4',
format: 'mp4',
enhanceAudio: false,
captions: 'none',
normalizeAudio: false,
normalizeTarget: -14,
zoom: { enabled: false, zoomFactor: 1.25, panX: 0, panY: 0 },
removeBackground: false,
backgroundReplacement: 'blur',
backgroundReplacementValue: '',
});
const [exportError, setExportError] = useState<string | null>(null);
const [transcriptFormat, setTranscriptFormat] = useState<'txt' | 'srt'>('txt');
const [isTranscribingTranscript, setIsTranscribingTranscript] = useState(false);
const handleTranscriptExport = useCallback(async () => {
if (!videoPath || words.length === 0) return;
const defaultExt = transcriptFormat === 'srt' ? 'srt' : 'txt';
const outputPath = await window.electronAPI?.saveFile({
defaultPath: videoPath.replace(/\.[^.]+$/, `_transcript.${defaultExt}`),
filters: transcriptFormat === 'srt'
? [{ name: 'SRT Subtitles', extensions: ['srt'] }]
: [{ name: 'Text File', extensions: ['txt'] }],
});
if (!outputPath) return;
setIsTranscribingTranscript(true);
try {
// Compute deleted word set
const deletedSet = getDeletedSet();
// Generate content entirely on the frontend — no backend needed
let content: string;
if (transcriptFormat === 'srt') {
const lines: string[] = [];
let counter = 1;
const activeWords: Array<[number, typeof words[0]]> = [];
for (let i = 0; i < words.length; i++) {
if (!deletedSet.has(i)) activeWords.push([i, words[i]]);
}
const wordsPerLine = 8;
for (let ci = 0; ci < activeWords.length; ci += wordsPerLine) {
const chunk = activeWords.slice(ci, ci + wordsPerLine);
if (chunk.length === 0) continue;
const startTime = chunk[0][1].start;
const endTime = chunk[chunk.length - 1][1].end;
const fmt = (s: number) => {
const h = Math.floor(s / 3600);
const m = Math.floor((s % 3600) / 60);
const sec = Math.floor(s % 60);
const ms = Math.floor((s % 1) * 1000);
return `${String(h).padStart(2, '0')}:${String(m).padStart(2, '0')}:${String(sec).padStart(2, '0')},${String(ms).padStart(3, '0')}`;
};
lines.push(String(counter));
lines.push(`${fmt(startTime)} --> ${fmt(endTime)}`);
lines.push(chunk.map(([, w]) => w.word).join(' '));
lines.push('');
counter++;
}
content = lines.join('\n');
} else {
// Plain text
const activeWords: string[] = [];
for (let i = 0; i < words.length; i++) {
if (!deletedSet.has(i)) activeWords.push(words[i].word);
}
content = activeWords.join(' ');
}
// Write directly via Tauri — instant, no backend round-trip
await window.electronAPI?.writeFile(outputPath, content);
} catch (err) {
console.error('Transcript export error:', err);
setExportError(err instanceof Error ? err.message : 'Transcript export failed');
} finally {
setIsTranscribingTranscript(false);
}
}, [videoPath, words, getDeletedSet, transcriptFormat]);
const HANDLE_EXPORT_filters = useCallback(() => {
const ext = options.format;
const nameMap: Record<string, string> = {
mp4: 'MP4',
mov: 'MOV',
webm: 'WebM',
wav: 'WAV Audio',
};
return [{ name: nameMap[ext] || 'File', extensions: [ext] }];
}, [options.format]);
const handleExport = useCallback(async () => {
if (!videoPath) return;
const defaultExt = options.format === 'wav' ? 'wav' : 'mp4';
const outputPath = await window.electronAPI?.saveFile({
defaultPath: videoPath.replace(/\.[^.]+$/, `_edited.${defaultExt}`),
filters: HANDLE_EXPORT_filters(),
const outputPath = await window.desktopAPI?.saveFile({
defaultPath: videoPath.replace(/\.[^.]+$/, '_edited.mp4'),
filters: [
{ name: 'MP4', extensions: ['mp4'] },
{ name: 'MOV', extensions: ['mov'] },
{ name: 'WebM', extensions: ['webm'] },
],
});
if (!outputPath) return;
setExporting(true, 0);
setExportError(null);
try {
const keepSegments = getKeepSegments();
assert(words.length > 0, 'handleExport: words is empty before building keep segments');
const deletedSet = getDeletedSet();
// Map frontend camelCase gain/speed fields to backend snake_case
const backendGainRanges = gainRanges.map((r) => ({
start: r.start,
end: r.end,
gain_db: r.gainDb,
}));
const backendSpeedRanges = speedRanges.map((r) => ({
start: r.start,
end: r.end,
speed: r.speed,
}));
const body: Record<string, any> = {
input_path: videoPath,
output_path: outputPath,
keep_segments: keepSegments,
mute_ranges: muteRanges.length > 0 ? muteRanges.map((r) => ({ start: r.start, end: r.end })) : undefined,
gain_ranges: backendGainRanges.length > 0 ? backendGainRanges : undefined,
speed_ranges: backendSpeedRanges.length > 0 ? backendSpeedRanges : undefined,
global_gain_db: globalGainDb,
words: options.captions !== 'none' ? words : undefined,
deleted_indices: options.captions !== 'none' ? [...deletedSet] : undefined,
mode: options.mode,
resolution: options.resolution,
format: options.format,
enhanceAudio: options.enhanceAudio,
normalize_loudness: options.normalizeAudio,
normalize_target_lufs: options.normalizeTarget,
captions: options.captions,
};
// Zoom
if (options.zoom?.enabled) {
body.zoom = options.zoom;
}
// Additional clips
if (additionalClips.length > 0) {
body.additional_clips = additionalClips.map((c) => c.path);
}
// Background music
if (backgroundMusic) {
body.background_music = backgroundMusic;
}
// Background removal
if (options.removeBackground) {
body.remove_background = true;
body.background_replacement = options.backgroundReplacement || 'blur';
body.background_replacement_value = options.backgroundReplacementValue || '';
const deletedSet = new Set<number>();
for (const range of deletedRanges) {
for (const idx of range.wordIndices) deletedSet.add(idx);
}
const res = await fetch(`${backendUrl}/export`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(body),
body: JSON.stringify({
input_path: videoPath,
output_path: outputPath,
keep_segments: keepSegments,
mute_ranges: muteRanges,
words: options.captions !== 'none' ? words : undefined,
deleted_indices: options.captions !== 'none' ? [...deletedSet] : undefined,
...options,
}),
});
if (!res.ok) {
let detail = res.statusText;
try {
const body = await res.json();
if (body?.detail) detail = String(body.detail);
} catch {
// Keep statusText fallback when response body is not JSON.
}
throw new Error(`Export failed: ${detail}`);
}
if (!res.ok) throw new Error(`Export failed: ${res.statusText}`);
setExporting(false, 100);
} catch (err) {
console.error('Export error:', err);
setExportError(err instanceof Error ? err.message : 'Export failed');
setExporting(false);
}
}, [videoPath, options, backendUrl, setExporting, getKeepSegments, getDeletedSet, muteRanges, gainRanges, speedRanges, globalGainDb, words, HANDLE_EXPORT_filters, additionalClips, backgroundMusic]);
}, [videoPath, options, backendUrl, setExporting, getKeepSegments]);
return (
<div className="p-4 space-y-5">
@ -230,7 +74,6 @@ export default function ExportDialog() {
icon={<Zap className="w-4 h-4" />}
title="Fast"
desc="Stream copy, seconds"
tooltip="Stream copy — fast, no quality loss, but does not apply cuts or effects"
/>
<ModeCard
active={options.mode === 'reencode'}
@ -238,7 +81,6 @@ export default function ExportDialog() {
icon={<Cog className="w-4 h-4" />}
title="Re-encode"
desc="Custom quality, slower"
tooltip="Full re-encode — applies cuts, gain, speed, zoom, captions, and effects"
/>
</div>
</fieldset>
@ -254,7 +96,6 @@ export default function ExportDialog() {
{ value: '1080p', label: '1080p (Full HD)' },
{ value: '4k', label: '4K (Ultra HD)' },
]}
title="Output video resolution — higher resolution = larger file"
/>
)}
@ -267,185 +108,9 @@ export default function ExportDialog() {
{ value: 'mp4', label: 'MP4 (H.264)' },
{ value: 'mov', label: 'MOV (QuickTime)' },
{ value: 'webm', label: 'WebM (VP9)' },
...(isAudioOnly ? [{ value: 'wav' as const, label: 'WAV (Uncompressed)' }] : []),
]}
title="Output container format — MP4 is most compatible"
/>
{/* Video zoom / punch-in */}
<div className="space-y-2 pt-1 border-t border-editor-border">
<label className="flex items-center gap-2 cursor-pointer">
<input
type="checkbox"
checked={options.zoom?.enabled || false}
onChange={(e) => setOptions((o) => ({ ...o, zoom: { ...o.zoom!, enabled: e.target.checked } }))}
className="w-4 h-4 rounded bg-editor-surface border-editor-border accent-editor-accent"
title="Crop and reposition the video frame — useful for removing black bars or reframing"
/>
<div>
<span className="text-xs font-medium flex items-center gap-1">
<ZoomIn className="w-3 h-3" />
Video zoom / punch-in
</span>
<p className="text-[10px] text-editor-text-muted">
Crop and zoom into the center of the video. Requires re-encode.
</p>
</div>
</label>
{options.zoom?.enabled && (
<div className="pl-6 space-y-2">
<div className="flex items-center gap-2">
<span className="text-[10px] text-editor-text-muted w-16">Zoom:</span>
<input
type="range"
min={1}
max={3}
step={0.05}
value={options.zoom?.zoomFactor || 1}
onChange={(e) => setOptions((o) => ({ ...o, zoom: { ...o.zoom!, zoomFactor: Number(e.target.value) } }))}
className="flex-1 h-1.5"
title="Magnification level — 1.0x is original, higher values zoom in"
/>
<span className="text-xs text-editor-text w-10 text-right">{options.zoom?.zoomFactor?.toFixed(2)}x</span>
</div>
<div className="flex items-center gap-2">
<span className="text-[10px] text-editor-text-muted w-16">Pan X:</span>
<input
type="range"
min={-1}
max={1}
step={0.05}
value={options.zoom?.panX || 0}
onChange={(e) => setOptions((o) => ({ ...o, zoom: { ...o.zoom!, panX: Number(e.target.value) } }))}
className="flex-1 h-1.5"
title="Horizontal position of the crop window — negative moves left, positive moves right"
/>
<span className="text-xs text-editor-text w-10 text-right">{((options.zoom?.panX || 0) * 100).toFixed(0)}%</span>
</div>
<div className="flex items-center gap-2">
<span className="text-[10px] text-editor-text-muted w-16">Pan Y:</span>
<input
type="range"
min={-1}
max={1}
step={0.05}
value={options.zoom?.panY || 0}
onChange={(e) => setOptions((o) => ({ ...o, zoom: { ...o.zoom!, panY: Number(e.target.value) } }))}
className="flex-1 h-1.5"
title="Vertical position of the crop window — negative moves up, positive moves down"
/>
<span className="text-xs text-editor-text w-10 text-right">{((options.zoom?.panY || 0) * 100).toFixed(0)}%</span>
</div>
</div>
)}
</div>
{/* Background removal */}
{!isAudioOnly && (
<div className="space-y-2 pt-1 border-t border-editor-border">
<label className="flex items-center gap-2 cursor-pointer">
<input
type="checkbox"
checked={options.removeBackground || false}
onChange={(e) => setOptions((o) => ({ ...o, removeBackground: e.target.checked }))}
className="w-4 h-4 rounded bg-editor-surface border-editor-border accent-editor-accent"
title="Remove or replace the background behind the speaker"
/>
<div>
<span className="text-xs font-medium flex items-center gap-1">
<Video className="w-3 h-3" />
Remove background
</span>
<p className="text-[10px] text-editor-text-muted">
Replace or blur the background. Uses MediaPipe if available.
</p>
</div>
</label>
{options.removeBackground && (
<div className="pl-6 space-y-2">
<SelectField
label="Background replacement"
value={options.backgroundReplacement || 'blur'}
onChange={(v) => setOptions((o) => ({ ...o, backgroundReplacement: v as 'blur' | 'color' | 'image' }))}
options={[
{ value: 'blur', label: 'Blur background' },
{ value: 'color', label: 'Solid color' },
{ value: 'image', label: 'Custom image' },
]}
/>
{options.backgroundReplacement === 'color' && (
<input
type="text"
value={options.backgroundReplacementValue || '#00FF00'}
onChange={(e) => setOptions((o) => ({ ...o, backgroundReplacementValue: e.target.value }))}
placeholder="#00FF00"
className="w-full px-2 py-1.5 text-xs bg-editor-surface border border-editor-border rounded focus:outline-none focus:border-editor-accent [color-scheme:dark]"
/>
)}
{options.backgroundReplacement === 'image' && (
<p className="text-[10px] text-editor-text-muted">Place a background image file path above.</p>
)}
</div>
)}
</div>
)}
{/* Background music track info */}
{backgroundMusic && (
<div className="pt-1 border-t border-editor-border">
<div className="flex items-center gap-1.5 text-xs text-editor-accent">
<Music className="w-3 h-3" />
Background music: {backgroundMusic.path.split(/[/\\]/).pop()}
</div>
</div>
)}
{/* Append clips info */}
{additionalClips.length > 0 && (
<div className="pt-1 border-t border-editor-border">
<div className="flex items-center gap-1.5 text-xs text-editor-accent">
<Video className="w-3 h-3" />
{additionalClips.length} additional clip{additionalClips.length > 1 ? 's' : ''} appended
</div>
</div>
)}
{/* Audio normalization — integrated into export */}
<div className="space-y-2 pt-1 border-t border-editor-border">
<label className="flex items-center gap-2 cursor-pointer">
<input
type="checkbox"
checked={options.normalizeAudio}
onChange={(e) => setOptions((o) => ({ ...o, normalizeAudio: e.target.checked }))}
className="w-4 h-4 rounded bg-editor-surface border-editor-border accent-editor-accent"
title="Normalize audio to a consistent loudness target"
/>
<div>
<span className="text-xs font-medium">Normalize loudness</span>
<p className="text-[10px] text-editor-text-muted">
Apply LUFS normalization during export. Requires re-encode.
</p>
</div>
</label>
{options.normalizeAudio && (
<div className="flex items-center gap-2 pl-6">
<Volume2 className="w-3 h-3 text-editor-text-muted shrink-0" />
<select
value={options.normalizeTarget}
onChange={(e) => setOptions((o) => ({ ...o, normalizeTarget: Number(e.target.value) }))}
className="flex-1 px-2 py-1.5 text-xs bg-editor-surface border border-editor-border rounded focus:outline-none focus:border-editor-accent [color-scheme:dark]"
title="Loudness target — YouTube (-14), Spotify (-16), Broadcast (-23)"
>
<option value={-14}>YouTube (-14 LUFS)</option>
<option value={-16}>Spotify (-16 LUFS)</option>
<option value={-23}>Broadcast (-23 LUFS)</option>
<option value={-11}>Loud (-11 LUFS)</option>
<option value={-9}>Very Loud (-9 LUFS)</option>
</select>
</div>
)}
</div>
{/* Audio enhancement */}
<label className="flex items-center gap-2 cursor-pointer">
<input
@ -453,7 +118,6 @@ export default function ExportDialog() {
checked={options.enhanceAudio}
onChange={(e) => setOptions((o) => ({ ...o, enhanceAudio: e.target.checked }))}
className="w-4 h-4 rounded bg-editor-surface border-editor-border accent-editor-accent"
title="Apply noise reduction and speech enhancement"
/>
<span className="text-xs">Enhance audio (Studio Sound)</span>
</label>
@ -468,78 +132,27 @@ export default function ExportDialog() {
{ value: 'burn-in', label: 'Burn-in (permanent)' },
{ value: 'sidecar', label: 'Sidecar SRT file' },
]}
title="Burn captions into video, export as separate SRT/VTT file, or none"
/>
{/* Transcript-only export */}
<div className="space-y-2 pt-1 border-t border-editor-border">
<h4 className="text-xs font-semibold flex items-center gap-1.5">
<FileText className="w-3.5 h-3.5" />
Export Transcript Only
</h4>
<p className="text-[10px] text-editor-text-muted leading-relaxed">
Export the edited transcript as plain text or SRT without rendering video.
</p>
<div className="flex items-center gap-2">
<select
value={transcriptFormat}
onChange={(e) => setTranscriptFormat(e.target.value as 'txt' | 'srt')}
className="flex-1 px-2 py-1.5 text-xs bg-editor-surface border border-editor-border rounded focus:outline-none focus:border-editor-accent [color-scheme:dark]"
>
<option value="txt">Plain Text (.txt)</option>
<option value="srt">Subtitles (.srt)</option>
</select>
<button
onClick={handleTranscriptExport}
disabled={isTranscribingTranscript || words.length === 0}
className="flex items-center gap-1.5 px-3 py-1.5 text-xs rounded bg-editor-accent/20 text-editor-accent hover:bg-editor-accent/30 disabled:opacity-40 transition-colors"
title="Export just the transcript text or subtitles without the video"
>
{isTranscribingTranscript ? (
<Loader2 className="w-3 h-3 animate-spin" />
) : (
<FileText className="w-3 h-3" />
)}
Export
</button>
</div>
</div>
{/* Export video button */}
{/* Export button */}
<button
onClick={handleExport}
disabled={isExporting || !videoPath}
className="w-full flex items-center justify-center gap-2 px-4 py-3 bg-editor-accent hover:bg-editor-accent-hover disabled:opacity-40 rounded-lg text-sm font-semibold transition-colors"
title="Start export with current settings"
className="w-full flex items-center justify-center gap-2 px-4 py-3 bg-editor-accent hover:bg-editor-accent-hover disabled:opacity-50 rounded-lg text-sm font-semibold transition-colors"
>
<Download className="w-4 h-4" />
Export Video
{isExporting ? (
<>
<Loader2 className="w-4 h-4 animate-spin" />
Exporting... {Math.round(exportProgress)}%
</>
) : (
<>
<Download className="w-4 h-4" />
Export
</>
)}
</button>
{/* Export progress */}
{isExporting && (
<div className="space-y-2">
<div className="flex items-center gap-2">
<Loader2 className="w-4 h-4 animate-spin text-editor-accent" />
<span className="text-xs font-medium">Exporting...</span>
<span className="text-xs text-editor-text-muted">{Math.round(exportProgress)}%</span>
</div>
<div className="w-full h-2 bg-editor-border rounded-full overflow-hidden">
<div
className="h-full bg-editor-accent rounded-full transition-all duration-300"
style={{ width: `${exportProgress}%` }}
/>
</div>
<p className="text-xs text-editor-text-muted">Export in progress...</p>
</div>
)}
{exportError && (
<div className="rounded border border-red-500/40 bg-red-500/10 px-3 py-2 text-xs text-red-300">
{exportError}
</div>
)}
{options.mode === 'fast' && !hasCuts && (
<p className="text-[10px] text-editor-text-muted text-center">
Fast mode uses stream copy &mdash; no quality loss, exports in seconds.
@ -564,19 +177,16 @@ function ModeCard({
icon,
title,
desc,
tooltip,
}: {
active: boolean;
onClick: () => void;
icon: React.ReactNode;
title: string;
desc: string;
tooltip?: string;
}) {
return (
<button
onClick={onClick}
title={tooltip}
className={`flex flex-col items-center gap-1 p-3 rounded-lg border-2 transition-colors ${
active
? 'border-editor-accent bg-editor-accent/10'
@ -595,22 +205,19 @@ function SelectField({
value,
onChange,
options,
title,
}: {
label: string;
value: string;
onChange: (value: string) => void;
options: Array<{ value: string; label: string }>;
title?: string;
}) {
return (
<div className="space-y-1">
<label className="text-xs text-editor-text-muted font-medium">{label}</label>
<select
title={title}
value={value}
onChange={(e) => onChange(e.target.value)}
className="w-full px-3 py-2 bg-editor-surface border border-editor-border rounded-lg text-xs text-editor-text focus:outline-none focus:border-editor-accent [color-scheme:dark]"
className="w-full px-3 py-2 bg-editor-surface border border-editor-border rounded-lg text-xs text-editor-text focus:outline-none focus:border-editor-accent"
>
{options.map((opt) => (
<option key={opt.value} value={opt.value}>

View File

@ -1,156 +0,0 @@
import { HelpCircle, Scissors, VolumeX, SlidersHorizontal, Gauge, Film, Search, FileText, Download, Music, MapPin, ListVideo, Sparkles, Keyboard } from 'lucide-react';
export default function HelpContent() {
return (
<div className="p-4 space-y-5 overflow-y-auto">
<h3 className="text-sm font-semibold flex items-center gap-1.5">
<HelpCircle className="w-4 h-4" />
Help &amp; Reference
</h3>
<Section title="Getting Started" icon={<Film className="w-3.5 h-3.5" />}>
<Step num={1}>Open a video file click <strong>File &gt; Open File</strong> or press <kbd>Ctrl+O</kbd></Step>
<Step num={2}>Wait for transcription Whisper processes your audio and creates a word-level transcript</Step>
<Step num={3}>Edit by selecting words choose <strong>Cut</strong>, <strong>Mute</strong>, <strong>Sound Gain</strong>, or <strong>Speed Adjust</strong> from the toolbar</Step>
<Step num={4}>Use AI tools detect filler words, find clips, re-transcribe with a different model</Step>
<Step num={5}>Export apply all edits and save your final video</Step>
<Step>Press <kbd>?</kbd> anytime to see all keyboard shortcuts</Step>
</Section>
<Section title="Cut / Mute / Sound Gain / Speed Adjust" icon={<Scissors className="w-3.5 h-3.5" />}>
<P>These are time-range edits applied during export. You create them in three ways:</P>
<Bullet>Select words in the transcript the toolbar buttons create a zone from the selected word range</Bullet>
<Bullet>Use <strong>Mark In</strong> (<kbd>I</kbd>) and <strong>Mark Out</strong> (<kbd>O</kbd>) on the timeline, then clicking the toolbar button</Bullet>
<Bullet>Click a toolbar button to enter <strong>zone mode</strong>, then drag on the waveform timeline to draw a zone</Bullet>
<P className="mt-2">
<strong>Cut</strong> removes the segment from the output entirely<br />
<strong>Mute</strong> silences the audio but keeps the video<br />
<strong>Sound Gain</strong> adjusts volume (positive = louder, negative = quieter)<br />
<strong>Speed Adjust</strong> changes playback speed (1.0x = normal, 2.0x = double)
</P>
<P>View and manage all zones in the <strong>Edit Zones</strong> panel. Click a zone on the waveform to select it drag edges to resize, drag the body to move.</P>
</Section>
<Section title="Waveform Timeline" icon={<Film className="w-3.5 h-3.5" />}>
<Bullet>Click to seek, drag to scrub through the video</Bullet>
<Bullet>Enter zone mode from the toolbar, then drag on the waveform to create a zone</Bullet>
<Bullet>Click an existing zone to select it drag edges to resize, drag body to move</Bullet>
<Bullet><kbd>Delete</kbd> or <kbd>Backspace</kbd> removes the selected zone (with confirmation)</Bullet>
<Bullet><kbd>Ctrl+Scroll</kbd> to zoom in/out, scroll to pan horizontally</Bullet>
<Bullet>Toggle individual zone types on/off with the colored buttons above the waveform</Bullet>
<Bullet>"Show adjusted timeline" compresses cut regions to preview the output</Bullet>
</Section>
<Section title="Transcript Editing" icon={<FileText className="w-3.5 h-3.5" />}>
<Bullet>Click a word to select it, <kbd>Shift+Click</kbd> to extend the selection</Bullet>
<Bullet><kbd>Ctrl+Click</kbd> any word to seek the video to that exact timestamp</Bullet>
<Bullet>Double-click any word to edit its text directly</Bullet>
<Bullet>Words with low confidence get an orange dotted underline adjust the threshold in Settings</Bullet>
<Bullet><kbd>Ctrl+F</kbd> to search the transcript navigate matches with <kbd>Enter</kbd> / <kbd>Shift+Enter</kbd></Bullet>
<Bullet>Select a word range and click <strong>Re-transcribe</strong> to re-run Whisper on just that segment</Bullet>
</Section>
<Section title="Chapter Marks" icon={<MapPin className="w-3.5 h-3.5" />}>
<Bullet>Add markers at the current playhead position with a label and color</Bullet>
<Bullet>Use <kbd>I</kbd> / <kbd>O</kbd> keys to set mark in/out points on the timeline</Bullet>
<Bullet>Markers auto-sort as chapters click <strong>Copy as YouTube timestamps</strong> to get chapter text</Bullet>
</Section>
<Section title="AI Tools" icon={<Sparkles className="w-3.5 h-3.5" />}>
<P><strong>Filler Words</strong> detects "um", "uh", "like", "you know" and similar words. Add custom fillers (e.g. "okay", "alright"). <strong>Apply All</strong> creates cut ranges for every detection at once.</P>
<P><strong>Create Clips</strong> analyzes your transcript to find the best 20-60 second segments for TikTok, YouTube Shorts, or Instagram Reels.</P>
<P><strong>Reprocess</strong> re-run transcription with a different Whisper model. Larger models are more accurate but slower. English-only models are faster for English content.</P>
<P>AI features work with the bundled local model (no setup needed), or via Ollama/OpenAI/Claude configure in Settings.</P>
</Section>
<Section title="Export" icon={<Download className="w-3.5 h-3.5" />}>
<Bullet><strong>Fast mode</strong> (stream copy): instant, no quality loss but doesn't apply cuts or effects</Bullet>
<Bullet><strong>Re-encode mode</strong>: applies all edits — cuts, gain, speed, zoom, captions, background music</Bullet>
<Bullet>Captions can be burned into the video or exported as separate SRT/VTT files</Bullet>
<Bullet>Loudness normalization targets: YouTube (-14 LUFS), Spotify (-16), Broadcast (-23)</Bullet>
<Bullet>Audio enhancement: noise reduction and speech clarity</Bullet>
<Bullet>Export Transcript Only — get SRT or plain text without the video</Bullet>
</Section>
<Section title="Background Music + Add Clips" icon={<Music className="w-3.5 h-3.5" />}>
<Bullet><strong>Bkg. Music</strong> — add a music track with auto-ducking: the music automatically lowers when someone speaks. Adjust volume, duck amount, attack, and release times.</Bullet>
<Bullet><strong>Add Clips</strong> — load additional video files to concatenate during export. Drag to reorder.</Bullet>
<Bullet>Both are applied during re-encode export only</Bullet>
</Section>
<Section title="Keyboard Shortcuts" icon={<Keyboard className="w-3.5 h-3.5" />}>
<P>Press <kbd>?</kbd> anytime to see the full cheatsheet overlay. Remap any shortcut in Settings.</P>
<div className="grid grid-cols-2 gap-1 mt-2">
<Shortcut keys="Space" desc="Play / Pause" />
<Shortcut keys="J K L" desc="Slow / Pause / Speed" />
<Shortcut keys="← →" desc="Skip 5s back / forward" />
<Shortcut keys="I / O" desc="Mark In / Out points" />
<Shortcut keys="Delete" desc="Cut selected / marked range" />
<Shortcut keys="Ctrl+Z" desc="Undo" />
<Shortcut keys="Ctrl+Shift+Z" desc="Redo" />
<Shortcut keys="Ctrl+S" desc="Save project" />
<Shortcut keys="Ctrl+E" desc="Export" />
<Shortcut keys="Ctrl+F" desc="Find in transcript" />
<Shortcut keys="?" desc="Toggle cheatsheet" />
</div>
<button
onClick={() => window.dispatchEvent(new KeyboardEvent('keydown', { key: '?' }))}
className="text-editor-accent hover:underline text-xs mt-2"
>
View full keyboard shortcut reference
</button>
</Section>
<div className="text-[10px] text-editor-text-muted leading-relaxed border-t border-editor-border pt-4">
TalkEdit is 100% offline. No account required. No data leaves your machine. No subscription — buy once, own forever.
</div>
</div>
);
}
function Section({ title, icon, children }: { title: string; icon: React.ReactNode; children: React.ReactNode }) {
return (
<div className="space-y-2 p-3 bg-editor-surface rounded-lg">
<h4 className="text-xs font-semibold flex items-center gap-1.5 text-editor-text">
{icon}
{title}
</h4>
<div className="space-y-1.5">
{children}
</div>
</div>
);
}
function P({ children, className = '' }: { children: React.ReactNode; className?: string }) {
return <p className={`text-xs text-editor-text-muted leading-relaxed ${className}`}>{children}</p>;
}
function Bullet({ children }: { children: React.ReactNode }) {
return (
<div className="flex items-start gap-1.5">
<span className="text-editor-accent mt-1.5 w-1 h-1 rounded-full bg-editor-accent shrink-0" />
<span className="text-xs text-editor-text-muted leading-relaxed">{children}</span>
</div>
);
}
function Step({ num, children }: { num?: number; children: React.ReactNode }) {
return (
<div className="flex items-start gap-2">
<span className="w-5 h-5 rounded-full bg-editor-accent/20 text-editor-accent text-[10px] font-semibold flex items-center justify-center shrink-0 mt-0.5">
{num}
</span>
<span className="text-xs text-editor-text-muted leading-relaxed">{children}</span>
</div>
);
}
function Shortcut({ keys, desc }: { keys: string; desc: string }) {
return (
<div className="flex items-center gap-2 text-xs">
<kbd className="px-1.5 py-0.5 text-[10px] font-mono bg-editor-bg border border-editor-border rounded text-editor-text min-w-[72px] text-center">{keys}</kbd>
<span className="text-editor-text-muted">{desc}</span>
</div>
);
}

View File

@ -1,296 +0,0 @@
import { useState } from 'react';
import { useLicenseStore } from '../store/licenseStore';
import { Key, Check, X, Loader2, Shield, Clock, AlertTriangle } from 'lucide-react';
export default function LicenseDialog() {
const { status, showDialog, setShowDialog, activateLicense } = useLicenseStore();
const [key, setKey] = useState('');
const [error, setError] = useState<string | null>(null);
const [activating, setActivating] = useState(false);
const [confirmedEmail, setConfirmedEmail] = useState<string | null>(null);
const [verifying, setVerifying] = useState(false);
const handleActivate = async () => {
if (!key.trim()) return;
setError(null);
// If we already verified and the user confirmed, complete activation
if (confirmedEmail) {
setActivating(true);
const ok = await activateLicense(key.trim());
if (!ok) {
setError('Invalid license key. Check it was entered correctly.');
}
setActivating(false);
return;
}
// Step 1: Verify the key (don't cache yet) to get the email
setVerifying(true);
try {
const payload = await window.electronAPI?.verifyLicense(key.trim());
if (payload?.customer_email) {
setConfirmedEmail(payload.customer_email);
} else {
setError('Invalid license key. Check it was entered correctly.');
}
} catch {
setError('Invalid license key. Check it was entered correctly.');
}
setVerifying(false);
};
const handleDeny = () => {
setConfirmedEmail(null);
setKey('');
setError(null);
};
const formatDate = (ts: number) => {
const d = new Date(ts * 1000);
return d.toLocaleDateString('en-US', { year: 'numeric', month: 'short', day: 'numeric' });
};
if (!status) return null;
if (status.tag === 'Licensed') {
return (
<div className="fixed bottom-4 right-4 z-50">
<div className="flex items-center gap-2 px-3 py-2 rounded-lg bg-editor-surface border border-editor-border shadow-lg text-xs">
<Shield className="w-3.5 h-3.5 text-editor-success" />
<span className="text-editor-text-muted">
{status.license.tier === 'business' ? 'Business' : 'Pro'} {status.license.customer_email}
</span>
<span className="text-editor-text-muted/50">
expires {formatDate(status.license.expires_at)}
</span>
</div>
</div>
);
}
if (status.tag === 'Trial') {
return (
<>
<div className="fixed bottom-4 right-4 z-50">
<button
onClick={() => setShowDialog(true)}
className="flex items-center gap-2 px-3 py-2 rounded-lg bg-editor-surface border border-editor-border shadow-lg text-xs hover:bg-editor-bg transition-colors"
>
<Clock className="w-3.5 h-3.5 text-editor-accent" />
<span className="text-editor-text-muted">
Trial {status.days_remaining} day{status.days_remaining !== 1 ? 's' : ''} left
</span>
</button>
</div>
{showDialog && (
<LicenseActivateDialog
onClose={() => { setShowDialog(false); handleDeny(); }}
onActivate={handleActivate}
onDeny={handleDeny}
keyValue={key}
setKeyValue={setKey}
error={error}
activating={activating}
verifying={verifying}
confirmedEmail={confirmedEmail}
trialEnding={status.days_remaining <= 3}
/>
)}
</>
);
}
// Expired — show banner + activation dialog (both dismissible)
return (
<>
<ExpiredBanner onActivate={() => setShowDialog(true)} />
{showDialog && (
<LicenseActivateDialog
onClose={() => { setShowDialog(false); handleDeny(); }}
onActivate={handleActivate}
onDeny={handleDeny}
keyValue={key}
setKeyValue={setKey}
error={error}
activating={activating}
verifying={verifying}
confirmedEmail={confirmedEmail}
expired
/>
)}
</>
);
}
/** Persistent top banner shown when trial expired — still allows export and loading */
function ExpiredBanner({ onActivate }: { onActivate: () => void }) {
return (
<div className="h-9 flex items-center justify-center gap-3 px-4 bg-red-500/15 border-b border-red-500/30 shrink-0">
<AlertTriangle className="w-3.5 h-3.5 text-red-400 shrink-0" />
<span className="text-xs text-red-300">
Trial expired export and project loading still work.&nbsp;
<button onClick={onActivate} className="underline font-medium hover:text-red-200">
Activate license
</button>
&nbsp;to restore editing.
</span>
</div>
);
}
function LicenseActivateDialog({
onClose,
onActivate,
onDeny,
keyValue,
setKeyValue,
error,
activating,
verifying,
confirmedEmail,
trialEnding,
expired,
}: {
onClose: () => void;
onActivate: () => void;
onDeny: () => void;
keyValue: string;
setKeyValue: (v: string) => void;
error: string | null;
activating: boolean;
verifying: boolean;
confirmedEmail: string | null;
trialEnding?: boolean;
expired?: boolean;
}) {
const isProcessing = activating || verifying;
if (confirmedEmail) {
return (
<div className="fixed inset-0 z-[80] flex items-center justify-center bg-black/60 px-4">
<div
className="w-full max-w-md rounded-xl border border-editor-border bg-editor-bg p-6 space-y-4"
onClick={(e) => e.stopPropagation()}
>
<div className="flex items-center gap-2">
<Shield className="w-5 h-5 text-editor-accent" />
<h3 className="text-sm font-semibold">Confirm License</h3>
</div>
<div className="p-3 rounded-lg bg-editor-surface border border-editor-border space-y-1">
<p className="text-xs text-editor-text-muted">
This license key is registered to:
</p>
<p className="text-sm font-medium text-editor-text">{confirmedEmail}</p>
</div>
<p className="text-xs text-editor-text-muted leading-relaxed">
License keys are tied to your email. Sharing this key may result in deactivation.
</p>
<div className="flex items-center justify-end gap-2 pt-1">
<button
onClick={onDeny}
className="px-3 py-1.5 rounded-md text-xs text-editor-text-muted hover:text-editor-text hover:bg-editor-surface"
>
Cancel
</button>
<button
onClick={onActivate}
disabled={activating}
className="px-4 py-2 bg-editor-accent hover:bg-editor-accent-hover disabled:opacity-40 rounded-lg text-sm font-medium transition-colors flex items-center gap-2"
>
{activating ? (
<Loader2 className="w-4 h-4 animate-spin" />
) : (
<Check className="w-4 h-4" />
)}
Activate
</button>
</div>
</div>
</div>
);
}
return (
<div className="fixed inset-0 z-[80] flex items-center justify-center bg-black/60 px-4">
<div
className="w-full max-w-md rounded-xl border border-editor-border bg-editor-bg p-6 space-y-4"
onClick={(e) => e.stopPropagation()}
>
<div className="flex items-center justify-between">
<div className="flex items-center gap-2">
<Key className="w-5 h-5 text-editor-accent" />
<h3 className="text-sm font-semibold">
{expired ? 'Trial Expired' : 'Activate TalkEdit'}
</h3>
</div>
<button
onClick={onClose}
className="p-1 rounded hover:bg-editor-surface text-editor-text-muted"
title="Close dialog"
>
<X className="w-4 h-4" />
</button>
</div>
{expired && (
<div className="text-xs text-editor-text-muted leading-relaxed space-y-1">
<p className="font-medium text-red-300">Your 30-day trial has ended.</p>
<p>
You can still <strong>export videos</strong> and <strong>load projects</strong>.
Enter a license key to restore editing, AI tools, and all other features.
</p>
</div>
)}
{trialEnding && !expired && (
<div className="flex items-start gap-2 p-3 rounded-lg bg-amber-500/10 border border-amber-500/30">
<AlertTriangle className="w-4 h-4 text-amber-400 shrink-0 mt-0.5" />
<p className="text-xs text-amber-300">Your trial ends soon. Activate now to keep using all features.</p>
</div>
)}
{!expired && !trialEnding && (
<p className="text-xs text-editor-text-muted leading-relaxed">
Enter your license key to activate TalkEdit Pro or Business.
You received this key by email after purchase.
</p>
)}
<div className="space-y-1.5">
<label className="text-xs text-editor-text-muted font-medium">License Key</label>
<textarea
value={keyValue}
onChange={(e) => { setKeyValue(e.target.value); }}
placeholder="talkedit_v1_..."
rows={3}
className="w-full px-3 py-2 text-xs font-mono bg-editor-surface border border-editor-border rounded-lg text-editor-text placeholder:text-editor-text-muted/50 focus:outline-none focus:border-editor-accent resize-none"
/>
{error && <p className="text-xs text-red-400">{error}</p>}
</div>
<button
onClick={onActivate}
disabled={isProcessing || !keyValue.trim()}
className="w-full flex items-center justify-center gap-2 px-4 py-2.5 bg-editor-accent hover:bg-editor-accent-hover disabled:opacity-40 rounded-lg text-sm font-medium transition-colors"
>
{isProcessing ? (
<Loader2 className="w-4 h-4 animate-spin" />
) : (
<Key className="w-4 h-4" />
)}
{verifying ? 'Verifying...' : 'Verify Key'}
</button>
<p className="text-[10px] text-editor-text-muted text-center">
No license? <a href="#" className="text-editor-accent hover:underline">Purchase at talked.it</a>
</p>
</div>
</div>
);
}

View File

@ -1,171 +0,0 @@
import { useState } from 'react';
import { useEditorStore } from '../store/editorStore';
import { MapPin, Trash2, PencilLine, Check, X, Copy } from 'lucide-react';
const COLOR_NAMES: Record<string, string> = {
'#6366f1': 'Indigo',
'#ef4444': 'Red',
'#22c55e': 'Green',
'#f59e0b': 'Amber',
'#3b82f6': 'Blue',
'#ec4899': 'Pink',
'#8b5cf6': 'Purple',
'#14b8a6': 'Teal',
};
const COLORS = ['#6366f1', '#ef4444', '#22c55e', '#f59e0b', '#3b82f6', '#ec4899', '#8b5cf6', '#14b8a6'];
export default function MarkersPanel() {
const { timelineMarkers, addTimelineMarker, updateTimelineMarker, removeTimelineMarker, getChapters } =
useEditorStore();
const currentTime = useEditorStore((s) => s.currentTime);
const [editingId, setEditingId] = useState<string | null>(null);
const [editLabel, setEditLabel] = useState('');
const [newLabel, setNewLabel] = useState('');
const [newColor, setNewColor] = useState(COLORS[0]);
const [showChapters, setShowChapters] = useState(false);
const chapters = getChapters();
const addAtCurrentTime = () => {
addTimelineMarker(currentTime, newLabel || undefined, newColor);
setNewLabel('');
};
const startEdit = (id: string, label: string) => {
setEditingId(id);
setEditLabel(label);
};
const commitEdit = (id: string) => {
if (editLabel.trim()) {
updateTimelineMarker(id, { label: editLabel.trim() });
}
setEditingId(null);
};
const exportChapters = () => {
const lines = chapters.map((ch) => {
const h = Math.floor(ch.startTime / 3600);
const m = Math.floor((ch.startTime % 3600) / 60);
const s = Math.floor(ch.startTime % 60);
const timeStr = `${h > 0 ? `${h}:` : ''}${String(m).padStart(2, '0')}:${String(s).padStart(2, '0')}`;
return `${timeStr} ${ch.label}`;
});
const text = lines.join('\n');
navigator.clipboard.writeText(text).catch(() => {});
};
return (
<div className="p-4 space-y-4">
<div className="space-y-1">
<h3 className="text-sm font-semibold flex items-center gap-1.5">
<MapPin className="w-4 h-4" />
Timeline Markers
</h3>
<p className="text-xs text-editor-text-muted">
Drop markers at key points. Markers become YouTube-compatible chapters.
</p>
</div>
{/* Add marker at current time */}
<div className="space-y-2">
<div className="flex items-center gap-2">
<input
value={newLabel}
onChange={(e) => setNewLabel(e.target.value)}
placeholder={`${currentTime.toFixed(2)}s`}
className="flex-1 px-2 py-1.5 text-xs bg-editor-surface border border-editor-border rounded focus:outline-none focus:border-editor-accent"
/>
<div className="flex gap-0.5">
{COLORS.map((c) => (
<button
key={c}
onClick={() => setNewColor(c)}
className={`w-4 h-4 rounded-full border ${newColor === c ? 'border-white ring-1 ring-white' : 'border-transparent'}`}
style={{ backgroundColor: c }}
title={COLOR_NAMES[c]}
/>
))}
</div>
</div>
<button
onClick={addAtCurrentTime}
className="w-full flex items-center justify-center gap-1 px-2 py-1.5 text-xs bg-editor-accent/20 text-editor-accent hover:bg-editor-accent/30 rounded"
title="Add a marker at the current playhead position"
>
<MapPin className="w-3 h-3" />
Add
</button>
</div>
{/* Marker list */}
{timelineMarkers.length > 0 ? (
<div className="space-y-1 max-h-60 overflow-y-auto">
{timelineMarkers.map((m) => (
<div
key={m.id}
className="flex items-center gap-2 px-2 py-1.5 rounded bg-editor-surface border border-editor-border text-xs"
>
<div className="w-2.5 h-2.5 rounded-full shrink-0" style={{ backgroundColor: m.color }} />
<span className="text-[10px] text-editor-text-muted w-14 shrink-0">{m.time.toFixed(2)}s</span>
{editingId === m.id ? (
<>
<input
value={editLabel}
onChange={(e) => setEditLabel(e.target.value)}
autoFocus
className="flex-1 px-1.5 py-0.5 text-xs bg-editor-bg border border-editor-border rounded focus:outline-none focus:border-editor-accent"
/>
<button onClick={() => commitEdit(m.id)} className="p-0.5 text-editor-success"><Check className="w-3 h-3" /></button>
<button onClick={() => setEditingId(null)} className="p-0.5 text-editor-text-muted"><X className="w-3 h-3" /></button>
</>
) : (
<>
<span className="flex-1 truncate">{m.label}</span>
<button onClick={() => startEdit(m.id, m.label)} className="p-0.5 hover:text-editor-accent" title="Edit marker label and color"><PencilLine className="w-3 h-3" /></button>
<button onClick={() => { if (window.confirm("Delete this marker?")) removeTimelineMarker(m.id); }} className="p-0.5 hover:text-editor-danger" title="Delete this marker"><Trash2 className="w-3 h-3" /></button>
</>
)}
</div>
))}
</div>
) : (
<div className="p-4 rounded border border-dashed border-editor-border text-center">
<p className="text-xs text-editor-text-muted">
No markers yet. Press I and O on the timeline to set mark in/out points, then add a marker here.
</p>
</div>
)}
{/* Chapters */}
{chapters.length > 0 && (
<div className="space-y-2 pt-1 border-t border-editor-border">
<button
onClick={() => setShowChapters(!showChapters)}
className="flex items-center gap-1 text-xs font-medium text-editor-text-muted hover:text-editor-text"
>
{showChapters ? '▼' : '▶'} Chapters ({chapters.length})
</button>
{showChapters && (
<div className="space-y-1">
{chapters.map((ch) => (
<div key={ch.markerId} className="flex items-center gap-2 text-[10px] text-editor-text-muted">
<span className="font-mono">{ch.label}</span>
</div>
))}
<button
onClick={exportChapters}
className="flex items-center gap-1 text-[10px] text-editor-accent hover:underline"
title="Copy chapter timestamps to clipboard in YouTube format"
>
<Copy className="w-2.5 h-2.5" />
Copy as YouTube timestamps
</button>
</div>
)}
</div>
)}
</div>
);
}

View File

@ -1,115 +1,17 @@
import { useAIStore } from '../store/aiStore';
import { useState, useEffect, useCallback } from 'react';
import type { AIProvider, KeyBinding, HotkeyPreset } from '../types/project';
import { useState, useEffect } from 'react';
import type { AIProvider } from '../types/project';
import { useEditorStore } from '../store/editorStore';
import { Bot, Cloud, Brain, RefreshCw, Keyboard, Trash2, HardDrive } from 'lucide-react';
import { loadBindings, saveBindings, applyPreset as applyKeyPreset, DEFAULT_PRESETS, detectConflicts as detectKeyConflicts } from '../lib/keybindings';
import { Bot, Cloud, Brain, RefreshCw } from 'lucide-react';
export default function SettingsPanel() {
const { providers, defaultProvider, setProviderConfig, setDefaultProvider } = useAIStore();
const { backendUrl, zonePreviewPaddingSeconds, setZonePreviewPaddingSeconds } = useEditorStore();
const CONFIDENCE_THRESHOLD_KEY = 'talkedit:confidenceThreshold';
const [confidenceThreshold, setConfidenceThresholdState] = useState(() => {
const stored = typeof window !== 'undefined' ? Number(window.localStorage.getItem(CONFIDENCE_THRESHOLD_KEY)) : 0;
return Number.isFinite(stored) ? stored : 0.6;
});
const setConfidenceThreshold = (value: number) => {
const clamped = Math.max(0, Math.min(1, value));
setConfidenceThresholdState(clamped);
if (typeof window !== 'undefined') {
window.localStorage.setItem(CONFIDENCE_THRESHOLD_KEY, String(clamped));
}
};
// Keyboard shortcuts state
const [bindings, setBindings] = useState<KeyBinding[]>(() => {
try { return loadBindings(); } catch { return DEFAULT_PRESETS['standard']; }
});
const [editingKey, setEditingKey] = useState<string | null>(null);
const [editKeyValue, setEditKeyValue] = useState('');
const conflicts = detectKeyConflicts(bindings);
const persistBindings = (newB: KeyBinding[]) => {
saveBindings(newB);
setBindings(newB);
};
const applyPresetAction = (preset: HotkeyPreset) => {
persistBindings(applyKeyPreset(preset));
};
const startKeyEdit = (idx: number) => {
setEditingKey(bindings[idx].id);
setEditKeyValue(bindings[idx].keys);
};
const handleKeyCapture = (e: React.KeyboardEvent, idx: number) => {
e.preventDefault();
const parts: string[] = [];
if (e.ctrlKey || e.metaKey) parts.push('Ctrl');
if (e.shiftKey) parts.push('Shift');
if (e.altKey) parts.push('Alt');
const key = e.key === ' ' ? 'Space' : e.key.length === 1 ? e.key.toUpperCase() : e.key;
if (!['Control', 'Shift', 'Alt', 'Meta'].includes(key)) parts.push(key);
if (parts.length === 0) return;
const combo = parts.join('+');
const newBindings = bindings.map((b, i) => (i === idx ? { ...b, keys: combo } : b));
setEditKeyValue(combo);
setEditingKey(null);
persistBindings(newBindings);
};
const handleReset = (idx: number) => {
const standard = DEFAULT_PRESETS['standard'];
const existing = standard.find((b: KeyBinding) => b.id === bindings[idx].id);
if (!existing) return;
persistBindings(bindings.map((b, i) => (i === idx ? { ...existing } : b)));
};
const [models, setModels] = useState<ModelInfo[]>([]);
const [loadingModels, setLoadingModels] = useState(false);
const [deleting, setDeleting] = useState<string | null>(null);
const fetchModels = useCallback(async () => {
setLoadingModels(true);
try {
const list = await window.electronAPI.listModels();
setModels(list);
} catch {
setModels([]);
} finally {
setLoadingModels(false);
}
}, []);
useEffect(() => {
fetchModels();
}, [fetchModels]);
const handleDeleteModel = useCallback(async (model: ModelInfo) => {
if (deleting) return;
setDeleting(model.path);
try {
await window.electronAPI.deleteModel(model.path);
setModels((prev) => prev.filter((m) => m.path !== model.path));
} catch {
// Model deletion failed silently
} finally {
setDeleting(null);
}
}, [deleting]);
const formatBytes = (bytes: number) => {
if (bytes < 1024) return `${bytes} B`;
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
};
const { backendUrl } = useEditorStore();
const [ollamaModels, setOllamaModels] = useState<string[]>([]);
const [loadingOllamaModels, setLoadingOllamaModels] = useState(false);
const [loadingModels, setLoadingModels] = useState(false);
const fetchOllamaModels = useCallback(async () => {
setLoadingOllamaModels(true);
const fetchOllamaModels = async () => {
setLoadingModels(true);
try {
const res = await fetch(`${backendUrl}/ai/ollama-models`);
if (res.ok) {
@ -119,13 +21,13 @@ export default function SettingsPanel() {
} catch {
setOllamaModels([]);
} finally {
setLoadingOllamaModels(false);
setLoadingModels(false);
}
}, [backendUrl]);
};
useEffect(() => {
fetchOllamaModels();
}, [fetchOllamaModels]);
}, [backendUrl]);
const providerIcons: Record<AIProvider, React.ReactNode> = {
ollama: <Bot className="w-4 h-4" />,
@ -133,132 +35,15 @@ export default function SettingsPanel() {
claude: <Brain className="w-4 h-4" />,
};
const providerLabels: Record<AIProvider, string> = {
ollama: 'Ollama (Local)',
openai: 'OpenAI',
claude: 'Claude (Anthropic)',
};
return (
<div className="p-4 space-y-6">
<h3 className="text-sm font-semibold">Settings</h3>
<ProviderSection title="Playback" icon={<RefreshCw className="w-4 h-4" />}>
<div className="space-y-1">
<label className="text-xs text-editor-text-muted">Zone preview padding (seconds before and after)</label>
<div className="flex items-center gap-2">
<input
type="range"
min={0}
max={10}
step={0.25}
value={zonePreviewPaddingSeconds}
onChange={(e) => setZonePreviewPaddingSeconds(Number(e.target.value) || 0)}
className="flex-1 h-1.5"
title="Extra time in seconds to show before and after each zone during preview"
/>
<input
type="number"
min={0}
max={10}
step={0.25}
value={zonePreviewPaddingSeconds}
onChange={(e) => setZonePreviewPaddingSeconds(Number(e.target.value) || 0)}
className="w-16 px-2 py-1 bg-editor-bg border border-editor-border rounded-lg text-xs text-editor-text focus:outline-none focus:border-editor-accent"
title="Extra time in seconds to show before and after each zone during preview"
/>
<span className="text-xs text-editor-text-muted w-6">s</span>
</div>
</div>
</ProviderSection>
{/* Confidence threshold */}
<div className="space-y-2">
<label className="text-xs text-editor-text-muted font-medium">Low-Confidence Word Threshold</label>
<p className="text-[10px] text-editor-text-muted leading-relaxed">
Words with confidence below this value are highlighted with an orange dotted underline.
Whisper often gets homophones and proper nouns wrong at low confidence.
</p>
<div className="flex items-center gap-2">
<input
type="range"
min={0}
max={1}
step={0.05}
value={confidenceThreshold}
onChange={(e) => setConfidenceThreshold(Number(e.target.value))}
className="flex-1 h-1.5"
title="Words below this confidence get an orange underline — lower values show fewer warnings"
/>
<input
type="number"
min={0}
max={1}
step={0.05}
value={confidenceThreshold}
onChange={(e) => setConfidenceThreshold(Math.max(0, Math.min(1, Number(e.target.value) || 0)))}
className="w-16 px-2 py-1 bg-editor-bg border border-editor-border rounded-lg text-xs text-editor-text focus:outline-none focus:border-editor-accent"
title="Words below this confidence get an orange underline — lower values show fewer warnings"
/>
</div>
<div className="flex items-center justify-between text-[10px]">
<span className="text-editor-text-muted">Show all</span>
<span className="font-medium text-editor-text">{confidenceThreshold.toFixed(2)}</span>
<span className="text-editor-text-muted">Strict</span>
</div>
</div>
{/* Keyboard shortcuts */}
<div className="space-y-2 pt-1 border-t border-editor-border">
<h4 className="text-xs font-semibold flex items-center gap-1.5">
<Keyboard className="w-3.5 h-3.5" />
Keyboard Shortcuts
</h4>
<div className="flex items-center gap-2">
<button
onClick={() => applyPresetAction('standard')}
className="flex-1 px-2 py-1.5 text-xs rounded bg-editor-accent/20 text-editor-accent hover:bg-editor-accent/30"
title="Reset all shortcuts to the Standard preset"
>
Standard Preset
</button>
<button
onClick={() => applyPresetAction('left-hand')}
className="flex-1 px-2 py-1.5 text-xs rounded bg-editor-accent/20 text-editor-accent hover:bg-editor-accent/30"
title="Reset all shortcuts to the Left-Hand preset"
>
Left-Hand Preset
</button>
</div>
{conflicts.length > 0 && (
<div className="px-2 py-1 rounded border border-red-500/40 bg-red-500/10 text-[10px] text-red-300">
{conflicts.join('; ')}
</div>
)}
<div className="max-h-52 overflow-y-auto space-y-1 pr-1">
{bindings.map((b, i) => (
<div key={b.id} className="flex items-center gap-2 text-[11px]">
<span className="flex-1 truncate text-editor-text-muted">{b.label}</span>
<input
value={editingKey === b.id ? editKeyValue : b.keys}
onFocus={() => startKeyEdit(i)}
onChange={(e) => {
setEditingKey(b.id);
setEditKeyValue(e.target.value);
}}
onKeyDown={(e) => handleKeyCapture(e, i)}
className="w-28 px-2 py-1 text-[10px] font-mono bg-editor-bg border border-editor-border rounded text-center focus:outline-none focus:border-editor-accent"
placeholder="Type shortcut"
title="Click then press the desired key combination"
/>
<button
onClick={() => handleReset(i)}
className="text-[10px] text-editor-text-muted hover:text-editor-text px-1"
title="Reset this shortcut to default"
>
</button>
</div>
))}
</div>
<p className="text-[10px] text-editor-text-muted">
Press <kbd>?</kbd> anytime to view shortcuts. Changes apply immediately.
</p>
</div>
<h3 className="text-sm font-semibold">AI Settings</h3>
{/* Default provider selector */}
<div className="space-y-2">
@ -268,11 +53,6 @@ export default function SettingsPanel() {
<button
key={p}
onClick={() => setDefaultProvider(p)}
title={`Use ${p.charAt(0).toUpperCase() + p.slice(1)} for AI features — ${
p === 'ollama' ? 'Use a local Ollama instance' :
p === 'openai' ? "Use OpenAI's API (requires API key)" :
"Use Anthropic's Claude API (requires API key)"
}`}
className={`flex flex-col items-center gap-1 p-2 rounded-lg border transition-colors text-[10px] ${
defaultProvider === p
? 'border-editor-accent bg-editor-accent/10 text-editor-accent'
@ -286,52 +66,6 @@ export default function SettingsPanel() {
</div>
</div>
{/* Manage downloaded models */}
<div className="space-y-2 pt-1 border-t border-editor-border">
<h4 className="text-xs font-semibold flex items-center gap-1.5">
<HardDrive className="w-3.5 h-3.5" />
Manage Models
</h4>
<p className="text-[10px] text-editor-text-muted leading-relaxed">
Downloaded Whisper transcription models and bundled LLM files.
</p>
{models.length === 0 ? (
<p className="text-xs text-editor-text-muted">No downloaded models found.</p>
) : (
<div className="space-y-1.5">
{models.map((m) => (
<div key={m.path} className="flex items-center gap-2 p-2 rounded bg-editor-bg border border-editor-border">
<div className="flex-1 min-w-0">
<p className="text-xs text-editor-text truncate">{m.name}</p>
<p className="text-[10px] text-editor-text-muted">
{formatBytes(m.size_bytes)} &middot; {m.kind === 'whisper' ? 'Whisper' : 'LLM'}
</p>
</div>
<button
onClick={() => handleDeleteModel(m)}
disabled={deleting === m.path}
className="p-1.5 rounded text-editor-text-muted hover:text-red-400 hover:bg-red-500/10 transition-colors disabled:opacity-40"
title="Delete model"
>
<Trash2 className="w-3.5 h-3.5" />
</button>
</div>
))}
</div>
)}
<button
onClick={fetchModels}
disabled={loadingModels}
className="text-[10px] text-editor-accent hover:underline flex items-center gap-0.5"
title="Refresh list of downloaded models"
>
<RefreshCw className={`w-2.5 h-2.5 ${loadingModels ? 'animate-spin' : ''}`} />
Refresh
</button>
</div>
<h4 className="text-xs font-semibold uppercase tracking-wide text-editor-text-muted">AI Settings</h4>
{/* Ollama settings */}
<ProviderSection title="Ollama (Local)" icon={providerIcons.ollama}>
<InputField
@ -339,18 +73,16 @@ export default function SettingsPanel() {
value={providers.ollama.baseUrl || ''}
onChange={(v) => setProviderConfig('ollama', { baseUrl: v })}
placeholder="http://localhost:11434"
title="URL of your Ollama instance — http://localhost:11434 by default"
/>
<div className="space-y-1">
<div className="flex items-center justify-between">
<label className="text-xs text-editor-text-muted">Model</label>
<button
onClick={fetchOllamaModels}
disabled={loadingOllamaModels}
disabled={loadingModels}
className="text-[10px] text-editor-accent hover:underline flex items-center gap-0.5"
title="Refresh available Ollama models"
>
<RefreshCw className={`w-2.5 h-2.5 ${loadingOllamaModels ? 'animate-spin' : ''}`} />
<RefreshCw className={`w-2.5 h-2.5 ${loadingModels ? 'animate-spin' : ''}`} />
Refresh
</button>
</div>
@ -359,7 +91,6 @@ export default function SettingsPanel() {
value={providers.ollama.model}
onChange={(e) => setProviderConfig('ollama', { model: e.target.value })}
className="w-full px-3 py-2 bg-editor-surface border border-editor-border rounded-lg text-xs text-white focus:outline-none focus:border-editor-accent"
title="Which Ollama model to use for AI features"
>
{ollamaModels.map((m) => (
<option key={m} value={m}>{m}</option>
@ -371,7 +102,6 @@ export default function SettingsPanel() {
value={providers.ollama.model}
onChange={(v) => setProviderConfig('ollama', { model: v })}
placeholder="llama3"
title="Which Ollama model to use for AI features"
/>
)}
</div>
@ -385,14 +115,12 @@ export default function SettingsPanel() {
onChange={(v) => setProviderConfig('openai', { apiKey: v })}
placeholder="sk-..."
type="password"
title="Your OpenAI API key — stored encrypted on your machine"
/>
<InputField
label="Model"
value={providers.openai.model}
onChange={(v) => setProviderConfig('openai', { model: v })}
placeholder="gpt-4o"
title="OpenAI model to use (e.g. gpt-4o, gpt-4o-mini)"
/>
</ProviderSection>
@ -404,14 +132,12 @@ export default function SettingsPanel() {
onChange={(v) => setProviderConfig('claude', { apiKey: v })}
placeholder="sk-ant-..."
type="password"
title="Your Anthropic Claude API key — stored encrypted on your machine"
/>
<InputField
label="Model"
value={providers.claude.model}
onChange={(v) => setProviderConfig('claude', { model: v })}
placeholder="claude-sonnet-4-20250514"
title="Claude model to use (e.g. claude-sonnet-4-20250514)"
/>
</ProviderSection>
</div>
@ -444,14 +170,12 @@ function InputField({
onChange,
placeholder,
type = 'text',
title,
}: {
label: string;
value: string;
onChange: (value: string) => void;
placeholder: string;
type?: string;
title?: string;
}) {
return (
<div className="space-y-1">
@ -461,7 +185,6 @@ function InputField({
value={value}
onChange={(e) => onChange(e.target.value)}
placeholder={placeholder}
title={title}
className="w-full px-3 py-2 bg-editor-bg border border-editor-border rounded-lg text-xs text-editor-text placeholder:text-editor-text-muted/50 focus:outline-none focus:border-editor-accent"
/>
</div>

View File

@ -1,36 +1,38 @@
import { useState } from 'react';
import { useState, useEffect } from 'react';
import { useEditorStore } from '../store/editorStore';
import { Loader2, Scissors, Trash2, RotateCcw, PencilLine, Layers } from 'lucide-react';
import type { SilenceDetectionRange, SilenceTrimSettings } from '../types/project';
import { Loader2, Scissors, Trash2, Play, Pause } from 'lucide-react';
type SilenceRange = {
start: number;
end: number;
duration: number;
};
type TrimAction = 'cut' | 'mute';
export default function SilenceTrimmerPanel() {
const {
videoPath,
backendUrl,
silenceTrimGroups,
const {
videoPath,
backendUrl,
addCutRange,
addMuteRange,
removeCutRange,
removeMuteRange,
cutRanges,
applySilenceTrimGroup,
removeSilenceTrimGroup,
muteRanges,
duration,
pauseUndo,
resumeUndo
} = useEditorStore();
const [minSilenceMs, setMinSilenceMs] = useState(500);
const [silenceDb, setSilenceDb] = useState(-35);
const [preBufferMs, setPreBufferMs] = useState(80);
const [postBufferMs, setPostBufferMs] = useState(120);
const [isDetecting, setIsDetecting] = useState(false);
const [ranges, setRanges] = useState<SilenceDetectionRange[]>([]);
const [selectedGroupId, setSelectedGroupId] = useState<string | null>(null);
const [status, setStatus] = useState<string | null>(null);
const selectedGroup = selectedGroupId
? silenceTrimGroups.find((group) => group.id === selectedGroupId) ?? null
: null;
const buildSettings = (): SilenceTrimSettings => ({
minSilenceMs,
silenceDb,
preBufferMs,
postBufferMs,
});
const [ranges, setRanges] = useState<SilenceRange[]>([]);
const [trimAction, setTrimAction] = useState<TrimAction>('cut');
const [isActive, setIsActive] = useState(false);
const detectSilence = async () => {
if (!videoPath) return;
@ -63,7 +65,6 @@ export default function SilenceTrimmerPanel() {
const data = await res.json();
setRanges(data.ranges || []);
setStatus(`Detected ${(data.ranges || []).length} pause ranges.`);
} catch (err) {
console.error(err);
const message = err instanceof Error ? err.message : 'Unknown error';
@ -73,46 +74,84 @@ export default function SilenceTrimmerPanel() {
}
};
const applyAsNewGroup = () => {
if (ranges.length === 0) return;
const result = applySilenceTrimGroup({
sourceRanges: ranges,
settings: buildSettings(),
});
setSelectedGroupId(result.groupId);
setStatus(`Applied ${result.appliedCount} cut ranges as ${result.groupId}. Undo will revert this pass in one step.`);
};
const applyAsCuts = () => {
// Pause undo tracking to group all cuts into a single undo operation
pauseUndo();
const loadGroupForEditing = (groupId: string) => {
const group = silenceTrimGroups.find((entry) => entry.id === groupId);
if (!group) return;
setSelectedGroupId(groupId);
setRanges(group.sourceRanges);
setMinSilenceMs(group.settings.minSilenceMs);
setSilenceDb(group.settings.silenceDb);
setPreBufferMs(group.settings.preBufferMs);
setPostBufferMs(group.settings.postBufferMs);
setStatus(`Loaded ${group.id} for editing. Adjust settings and reapply.`);
};
const preBufferSeconds = preBufferMs / 1000;
const postBufferSeconds = postBufferMs / 1000;
const maxEnd = duration > 0 ? duration : Number.POSITIVE_INFINITY;
const reapplySelectedGroup = () => {
if (!selectedGroupId || ranges.length === 0) return;
const result = applySilenceTrimGroup({
groupId: selectedGroupId,
sourceRanges: ranges,
settings: buildSettings(),
});
setStatus(`Reapplied ${result.groupId} with ${result.appliedCount} cut ranges.`);
};
const removeGroup = (groupId: string) => {
removeSilenceTrimGroup(groupId);
if (selectedGroupId === groupId) {
setSelectedGroupId(null);
for (const r of ranges) {
// Positive buffers shrink the cut, negative buffers expand it.
const start = Math.max(0, r.start + preBufferSeconds);
const end = Math.min(maxEnd, r.end - postBufferSeconds);
if (end - start >= 0.01) {
if (trimAction === 'cut') {
addCutRange(start, end);
} else {
addMuteRange(start, end);
}
}
}
setStatus(`Removed all cut ranges from ${groupId}.`);
// Resume undo tracking - this creates a single undo entry for the entire batch
resumeUndo();
};
const removeExistingTrims = () => {
pauseUndo();
try {
// Remove all cut ranges that match detected silence ranges
cutRanges.forEach(range => {
ranges.forEach(silenceRange => {
if (Math.abs(range.start - silenceRange.start) < 0.1 &&
Math.abs(range.end - silenceRange.end) < 0.1) {
removeCutRange(range.id);
}
});
});
// Remove all mute ranges that match detected silence ranges
muteRanges.forEach(range => {
ranges.forEach(silenceRange => {
if (Math.abs(range.start - silenceRange.start) < 0.1 &&
Math.abs(range.end - silenceRange.end) < 0.1) {
removeMuteRange(range.id);
}
});
});
} finally {
resumeUndo();
}
};
const toggleActive = () => {
setIsActive(!isActive);
if (!isActive) {
// When activating, detect silence and apply
detectSilence().then(() => {
if (ranges.length > 0) {
applyAsCuts();
}
});
} else {
// When deactivating, remove existing trims
removeExistingTrims();
}
};
// Auto-detect when video changes
useEffect(() => {
if (videoPath && isActive) {
detectSilence().then(() => {
if (ranges.length > 0) {
applyAsCuts();
}
});
}
}, [videoPath]);
return (
<div className="p-4 space-y-4">
<div className="space-y-1">
@ -134,7 +173,6 @@ export default function SilenceTrimmerPanel() {
value={minSilenceMs}
onChange={(e) => setMinSilenceMs(Number(e.target.value) || 500)}
className="w-full px-2.5 py-1.5 text-xs bg-editor-surface border border-editor-border rounded focus:border-editor-accent focus:outline-none"
title="Minimum duration of silence to detect in milliseconds"
/>
</div>
@ -150,7 +188,6 @@ export default function SilenceTrimmerPanel() {
value={silenceDb}
onChange={(e) => setSilenceDb(Number(e.target.value) || -35)}
className="w-full px-2.5 py-1.5 text-xs bg-editor-surface border border-editor-border rounded focus:border-editor-accent focus:outline-none"
title="Volume threshold in dB — lower values detect quieter sounds as silence"
/>
</div>
@ -167,7 +204,6 @@ export default function SilenceTrimmerPanel() {
value={preBufferMs}
onChange={(e) => setPreBufferMs(Number(e.target.value) || 0)}
className="w-full px-2.5 py-1.5 text-xs bg-editor-surface border border-editor-border rounded focus:border-editor-accent focus:outline-none"
title="Extra time to add before each detected silence"
/>
</div>
<div className="space-y-1.5">
@ -182,114 +218,99 @@ export default function SilenceTrimmerPanel() {
value={postBufferMs}
onChange={(e) => setPostBufferMs(Number(e.target.value) || 0)}
className="w-full px-2.5 py-1.5 text-xs bg-editor-surface border border-editor-border rounded focus:border-editor-accent focus:outline-none"
title="Extra time to add after each detected silence"
/>
</div>
</div>
<button
onClick={detectSilence}
disabled={isDetecting || !videoPath}
className="w-full flex items-center justify-center gap-2 px-4 py-2.5 bg-editor-accent hover:bg-editor-accent-hover disabled:opacity-40 rounded-lg text-sm font-medium transition-colors"
title="Scan the entire audio track for silent pauses"
>
{isDetecting ? (
<>
<Loader2 className="w-4 h-4 animate-spin" />
Detecting pauses...
</>
) : (
'Detect Pauses'
)}
</button>
</div>
{status && (
<div className="text-[11px] text-editor-text-muted bg-editor-surface border border-editor-border rounded px-2.5 py-2">
{status}
<div className="space-y-1.5">
<label className="text-[11px] text-editor-text-muted font-medium">
Trim Action
</label>
<select
value={trimAction}
onChange={(e) => setTrimAction(e.target.value as TrimAction)}
className="w-full px-2.5 py-1.5 text-xs bg-editor-surface border border-editor-border rounded focus:border-editor-accent focus:outline-none"
>
<option value="cut">Cut (remove silence)</option>
<option value="mute">Mute (silence audio)</option>
</select>
</div>
)}
{ranges.length > 0 && (
<div className="space-y-2">
<div className="flex items-center justify-between">
<span className="text-xs font-medium">Detected {ranges.length} pause ranges</span>
<div className="flex items-center gap-1">
{selectedGroup && (
<button
onClick={reapplySelectedGroup}
className="flex items-center gap-1 px-2 py-1 text-xs bg-editor-warning/20 text-editor-warning rounded hover:bg-editor-warning/30"
title="Re-apply this silence trim group with current settings"
>
<RotateCcw className="w-3 h-3" />
Reapply Group
</button>
)}
<div className="flex items-center justify-between">
<label className="text-[11px] text-editor-text-muted font-medium">
Active Mode
</label>
<button
onClick={toggleActive}
className={`flex items-center gap-2 px-3 py-1.5 text-xs rounded transition-colors ${
isActive
? 'bg-green-600 hover:bg-green-700 text-white'
: 'bg-editor-surface border border-editor-border hover:bg-editor-surface-hover'
}`}
>
{isActive ? (
<>
<Pause className="w-3 h-3" />
Active
</>
) : (
<>
<Play className="w-3 h-3" />
Inactive
</>
)}
</button>
</div>
<div className="flex gap-2">
<button
onClick={detectSilence}
disabled={isDetecting || !videoPath}
className="flex-1 flex items-center justify-center gap-2 px-3 py-2 bg-editor-accent hover:bg-editor-accent-hover disabled:opacity-50 rounded text-xs font-medium transition-colors"
>
{isDetecting ? (
<>
<Loader2 className="w-3 h-3 animate-spin" />
Detecting...
</>
) : (
'Detect Pauses'
)}
</button>
{ranges.length > 0 && (
<button
onClick={removeExistingTrims}
className="flex items-center gap-1 px-3 py-2 text-xs bg-red-600 hover:bg-red-700 text-white rounded transition-colors"
>
<Trash2 className="w-3 h-3" />
Remove Trims
</button>
)}
</div>
{ranges.length > 0 && (
<div className="space-y-2">
<div className="flex items-center justify-between">
<span className="text-xs font-medium">Detected {ranges.length} pause ranges</span>
<button
onClick={applyAsNewGroup}
onClick={applyAsCuts}
className="flex items-center gap-1 px-2 py-1 text-xs bg-editor-accent/20 text-editor-accent rounded hover:bg-editor-accent/30"
title="Create a new silence trim group from detected pauses"
>
<Scissors className="w-3 h-3" />
Apply As New Group
Apply As {trimAction === 'cut' ? 'Cuts' : 'Mutes'}
</button>
</div>
</div>
<div className="max-h-56 overflow-y-auto space-y-1 pr-1">
{ranges.slice(0, 50).map((r, i) => (
<div key={`${r.start}-${r.end}-${i}`} className="px-2 py-1.5 rounded bg-editor-surface border border-editor-border text-xs">
{r.start.toFixed(2)}s - {r.end.toFixed(2)}s ({r.duration.toFixed(2)}s)
</div>
))}
</div>
</div>
)}
{silenceTrimGroups.length > 0 && (
<div className="space-y-2 pt-1">
<div className="text-xs font-medium flex items-center gap-1">
<Layers className="w-3 h-3" />
Silence Trim Groups
</div>
<div className="max-h-48 overflow-y-auto space-y-1 pr-1">
{silenceTrimGroups.map((group) => {
const groupCutCount = cutRanges.filter((range) => range.trimGroupId === group.id).length;
const isActive = selectedGroupId === group.id;
return (
<div
key={group.id}
className={`rounded border px-2 py-1.5 text-xs ${isActive ? 'border-editor-accent bg-editor-accent/10' : 'border-editor-border bg-editor-surface'}`}
>
<div className="flex items-center justify-between gap-2">
<div className="min-w-0">
<div className="font-medium truncate">{group.id}</div>
<div className="text-[10px] text-editor-text-muted">
{groupCutCount} cuts · {group.sourceRanges.length} source pauses
</div>
</div>
<div className="flex items-center gap-1 shrink-0">
<button
onClick={() => loadGroupForEditing(group.id)}
className="px-1.5 py-1 rounded hover:bg-editor-accent/20 text-editor-accent"
title="Edit and reapply this group"
>
<PencilLine className="w-3 h-3" />
</button>
<button
onClick={() => removeGroup(group.id)}
className="px-1.5 py-1 rounded hover:bg-editor-danger/20 text-editor-danger"
title="Delete all cuts from this group"
>
<Trash2 className="w-3 h-3" />
</button>
</div>
</div>
<div className="max-h-56 overflow-y-auto space-y-1 pr-1">
{ranges.slice(0, 50).map((r, i) => (
<div key={`${r.start}-${r.end}-${i}`} className="px-2 py-1.5 rounded bg-editor-surface border border-editor-border text-xs">
{r.start.toFixed(2)}s - {r.end.toFixed(2)}s ({r.duration.toFixed(2)}s)
</div>
);
})}
))}
</div>
</div>
</div>
)}
)}
</div>
</div>
);
}

View File

@ -1,126 +1,38 @@
import { useCallback, useRef, useEffect, useMemo, useState } from 'react';
import { useEditorStore } from '../store/editorStore';
import { useLicenseStore } from '../store/licenseStore';
import { Virtuoso } from 'react-virtuoso';
import { Scissors, VolumeX, SlidersHorizontal, Gauge, RotateCcw, Search, ChevronUp, ChevronDown, X, RefreshCw } from 'lucide-react';
import { assert } from '../lib/assert';
import { Trash2, RotateCcw } from 'lucide-react';
interface TranscriptEditorProps {
cutMode: boolean;
muteMode: boolean;
gainMode: boolean;
gainModeDb: number;
speedMode: boolean;
speedModeValue: number;
}
export default function TranscriptEditor({
cutMode,
muteMode,
gainMode,
gainModeDb,
speedMode,
speedModeValue,
}: TranscriptEditorProps) {
export default function TranscriptEditor() {
const words = useEditorStore((s) => s.words);
const segments = useEditorStore((s) => s.segments);
const deletedRanges = useEditorStore((s) => s.deletedRanges);
const cutRanges = useEditorStore((s) => s.cutRanges);
const muteRanges = useEditorStore((s) => s.muteRanges);
const gainRanges = useEditorStore((s) => s.gainRanges);
const speedRanges = useEditorStore((s) => s.speedRanges);
const selectedWordIndices = useEditorStore((s) => s.selectedWordIndices);
const hoveredWordIndex = useEditorStore((s) => s.hoveredWordIndex);
const setSelectedWordIndices = useEditorStore((s) => s.setSelectedWordIndices);
const setHoveredWordIndex = useEditorStore((s) => s.setHoveredWordIndex);
const videoPath = useEditorStore((s) => s.videoPath);
const backendUrl = useEditorStore((s) => s.backendUrl);
const replaceWordRange = useEditorStore((s) => s.replaceWordRange);
const deleteSelectedWords = useEditorStore((s) => s.deleteSelectedWords);
const restoreRange = useEditorStore((s) => s.restoreRange);
const removeCutRange = useEditorStore((s) => s.removeCutRange);
const removeMuteRange = useEditorStore((s) => s.removeMuteRange);
const removeGainRange = useEditorStore((s) => s.removeGainRange);
const removeSpeedRange = useEditorStore((s) => s.removeSpeedRange);
const addCutRange = useEditorStore((s) => s.addCutRange);
const addMuteRange = useEditorStore((s) => s.addMuteRange);
const addGainRange = useEditorStore((s) => s.addGainRange);
const addSpeedRange = useEditorStore((s) => s.addSpeedRange);
const getWordAtTime = useEditorStore((s) => s.getWordAtTime);
const canEdit = useLicenseStore((s) => s.canEdit);
const selectionStart = useRef<number | null>(null);
const wasDragging = useRef(false);
const virtuosoRef = useRef<any>(null);
const zoneDragStart = useRef<number | null>(null);
const [zoneDragRange, setZoneDragRange] = useState<{ start: number; end: number } | null>(null);
const [searchOpen, setSearchOpen] = useState(false);
const [searchQuery, setSearchQuery] = useState('');
const [activeMatchIdx, setActiveMatchIdx] = useState(0);
const searchInputRef = useRef<HTMLInputElement | null>(null);
const updateWordText = useEditorStore((s) => s.updateWordText);
const [editingWordIndex, setEditingWordIndex] = useState<number | null>(null);
const [editText, setEditText] = useState('');
const editInputRef = useRef<HTMLInputElement | null>(null);
const deletedSet = useMemo(() => {
const s = new Set<number>();
for (const range of deletedRanges) {
for (const idx of range.wordIndices) s.add(idx);
}
return s;
}, [deletedRanges]);
const selectedSet = useMemo(() => new Set(selectedWordIndices), [selectedWordIndices]);
const matchIndices = useMemo(() => {
const q = searchQuery.trim().toLowerCase();
if (!q) return [] as number[];
const matches: number[] = [];
for (let i = 0; i < words.length; i++) {
if (words[i].word.toLowerCase().includes(q)) matches.push(i);
}
return matches;
}, [searchQuery, words]);
const matchSet = useMemo(() => new Set(matchIndices), [matchIndices]);
const safeActiveMatchIdx = matchIndices.length === 0
? 0
: Math.min(activeMatchIdx, matchIndices.length - 1);
const jumpToMatch = useCallback((idx: number) => {
if (matchIndices.length === 0) return;
const nextIdx = ((idx % matchIndices.length) + matchIndices.length) % matchIndices.length;
setActiveMatchIdx(nextIdx);
const wordIndex = matchIndices[nextIdx];
const el = document.getElementById(`word-${wordIndex}`);
if (el) {
el.scrollIntoView({ behavior: 'smooth', block: 'center', inline: 'nearest' });
}
}, [matchIndices]);
useEffect(() => {
const onKeyDown = (e: KeyboardEvent) => {
const target = e.target as HTMLElement | null;
const isInInput = !!target && (target.tagName === 'INPUT' || target.tagName === 'TEXTAREA' || target.tagName === 'SELECT');
if ((e.ctrlKey || e.metaKey) && e.key.toLowerCase() === 'f') {
e.preventDefault();
setSearchOpen(true);
requestAnimationFrame(() => searchInputRef.current?.focus());
return;
}
if (!searchOpen) return;
if (e.key === 'Escape') {
e.preventDefault();
setSearchOpen(false);
return;
}
if (e.key === 'Enter' && !isInInput) {
e.preventDefault();
jumpToMatch(safeActiveMatchIdx + (e.shiftKey ? -1 : 1));
return;
}
if (e.key === 'Enter' && isInInput && target === searchInputRef.current) {
e.preventDefault();
jumpToMatch(safeActiveMatchIdx + (e.shiftKey ? -1 : 1));
}
};
window.addEventListener('keydown', onKeyDown);
return () => window.removeEventListener('keydown', onKeyDown);
}, [jumpToMatch, searchOpen, safeActiveMatchIdx]);
const [activeWordIndex, setActiveWordIndex] = useState(-1);
@ -159,14 +71,6 @@ export default function TranscriptEditor({
}
return;
}
if (cutMode || muteMode || gainMode || speedMode) {
zoneDragStart.current = index;
setZoneDragRange({ start: index, end: index });
selectionStart.current = null;
return;
}
wasDragging.current = false;
if (e.shiftKey && selectedWordIndices.length > 0) {
const first = selectedWordIndices[0];
@ -180,19 +84,12 @@ export default function TranscriptEditor({
setSelectedWordIndices([index]);
}
},
[words, selectedWordIndices, setSelectedWordIndices, cutMode, muteMode, gainMode, speedMode],
[words, selectedWordIndices, setSelectedWordIndices],
);
const handleWordMouseEnter = useCallback(
(index: number) => {
setHoveredWordIndex(index);
if (zoneDragStart.current !== null) {
setZoneDragRange({
start: Math.min(zoneDragStart.current, index),
end: Math.max(zoneDragStart.current, index),
});
return;
}
if (selectionStart.current !== null) {
wasDragging.current = true;
const start = Math.min(selectionStart.current, index);
@ -206,22 +103,8 @@ export default function TranscriptEditor({
);
const handleMouseUp = useCallback(() => {
if (zoneDragStart.current !== null && zoneDragRange) {
assert(zoneDragRange.start >= 0 && zoneDragRange.start < words.length, 'handleMouseUp: zoneDragRange.start out of bounds');
assert(zoneDragRange.end >= 0 && zoneDragRange.end < words.length, 'handleMouseUp: zoneDragRange.end out of bounds');
const startWord = words[zoneDragRange.start];
const endWord = words[zoneDragRange.end];
if (startWord && endWord && canEdit) {
if (cutMode) addCutRange(startWord.start, endWord.end);
if (muteMode) addMuteRange(startWord.start, endWord.end);
if (gainMode) addGainRange(startWord.start, endWord.end, gainModeDb);
if (speedMode) addSpeedRange(startWord.start, endWord.end, speedModeValue);
}
}
zoneDragStart.current = null;
setZoneDragRange(null);
selectionStart.current = null;
}, [zoneDragRange, words, cutMode, muteMode, gainMode, gainModeDb, speedMode, speedModeValue, addCutRange, addMuteRange, addGainRange, addSpeedRange, canEdit]);
}, []);
const handleClickOutside = useCallback(
(e: React.MouseEvent) => {
@ -236,141 +119,19 @@ export default function TranscriptEditor({
[setSelectedWordIndices],
);
const startEditing = useCallback((index: number) => {
const word = words[index];
if (!word) return;
setEditingWordIndex(index);
setEditText(word.word);
requestAnimationFrame(() => {
editInputRef.current?.focus();
editInputRef.current?.select();
});
}, [words]);
const commitEdit = useCallback(() => {
if (editingWordIndex === null) return;
const trimmed = editText.trim();
if (trimmed && trimmed !== words[editingWordIndex]?.word) {
updateWordText(editingWordIndex, trimmed);
}
setEditingWordIndex(null);
setEditText('');
}, [editingWordIndex, editText, words, updateWordText]);
const cancelEdit = useCallback(() => {
setEditingWordIndex(null);
setEditText('');
}, []);
const [isReTranscribing, setIsReTranscribing] = useState(false);
const reTranscribeGuard = useRef(false);
const handleReTranscribe = useCallback(async () => {
if (!videoPath || selectedWordIndices.length === 0 || reTranscribeGuard.current) return;
reTranscribeGuard.current = true;
setIsReTranscribing(true);
// Snapshot indices and word timings before the async gap
const sorted = [...selectedWordIndices].sort((a, b) => a - b);
assert(sorted[0] >= 0 && sorted[sorted.length - 1] < words.length, 'handleReTranscribe: sorted indices out of bounds');
const startWord = words[sorted[0]];
const endWord = words[sorted[sorted.length - 1]];
if (!startWord || !endWord) {
reTranscribeGuard.current = false;
setIsReTranscribing(false);
return;
}
try {
const res = await fetch(`${backendUrl}/transcribe/segment`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
file_path: videoPath,
start: startWord.start,
end: endWord.end,
}),
});
if (!res.ok) {
let detail = res.statusText;
try { const body = await res.json(); if (body?.detail) detail = String(body.detail); } catch { /* keep statusText fallback */ }
throw new Error(`Re-transcribe failed: ${detail}`);
}
const data = await res.json();
replaceWordRange(sorted[0], sorted[sorted.length - 1], data.words);
} catch (err) {
console.error('Re-transcribe error:', err);
alert(err instanceof Error ? err.message : 'Re-transcribe failed');
} finally {
reTranscribeGuard.current = false;
setIsReTranscribing(false);
}
}, [videoPath, selectedWordIndices, words, backendUrl, replaceWordRange]);
const handleWordDoubleClick = useCallback((index: number) => {
if (cutMode || muteMode || gainMode || speedMode) return;
if (!canEdit) return;
startEditing(index);
}, [cutMode, muteMode, gainMode, speedMode, startEditing, canEdit]);
// Focus edit input when it appears
useEffect(() => {
if (editingWordIndex !== null && editInputRef.current) {
editInputRef.current.focus();
editInputRef.current.select();
}
}, [editingWordIndex]);
// Global key handler for edit mode
useEffect(() => {
const onKeyDown = (e: KeyboardEvent) => {
if (editingWordIndex === null) return;
if (e.key === 'Enter') {
e.preventDefault();
commitEdit();
} else if (e.key === 'Escape') {
e.preventDefault();
cancelEdit();
}
};
window.addEventListener('keydown', onKeyDown);
return () => window.removeEventListener('keydown', onKeyDown);
}, [editingWordIndex, commitEdit, cancelEdit]);
const getRangeForWord = useCallback(
(wordIndex: number) => deletedRanges.find((r) => r.wordIndices.includes(wordIndex)),
[deletedRanges],
);
const cutSelectedWords = useCallback(() => {
if (selectedWordIndices.length === 0) return;
const sorted = [...selectedWordIndices].sort((a, b) => a - b);
assert(sorted[0] >= 0 && sorted[0] < words.length, 'cutSelectedWords: sorted[0] out of bounds');
assert(sorted[sorted.length - 1] >= 0 && sorted[sorted.length - 1] < words.length, 'cutSelectedWords: sorted[last] out of bounds');
const startTime = words[sorted[0]].start;
const endTime = words[sorted[sorted.length - 1]].end;
addCutRange(startTime, endTime);
}, [selectedWordIndices, words, addCutRange]);
const muteSelectedWords = useCallback(() => {
if (selectedWordIndices.length === 0) return;
const sorted = [...selectedWordIndices].sort((a, b) => a - b);
const startTime = words[sorted[0]].start;
const endTime = words[sorted[sorted.length - 1]].end;
addMuteRange(startTime, endTime);
}, [selectedWordIndices, words, addMuteRange]);
const gainSelectedWords = useCallback(() => {
if (selectedWordIndices.length === 0) return;
const sorted = [...selectedWordIndices].sort((a, b) => a - b);
const startTime = words[sorted[0]].start;
const endTime = words[sorted[sorted.length - 1]].end;
addGainRange(startTime, endTime, gainModeDb);
}, [selectedWordIndices, words, addGainRange, gainModeDb]);
const speedSelectedWords = useCallback(() => {
if (selectedWordIndices.length === 0) return;
const sorted = [...selectedWordIndices].sort((a, b) => a - b);
const startTime = words[sorted[0]].start;
const endTime = words[sorted[sorted.length - 1]].end;
addSpeedRange(startTime, endTime, speedModeValue);
}, [selectedWordIndices, words, addSpeedRange, speedModeValue]);
const getCutRangeForWord = useCallback(
(wordIndex: number) => {
const word = words[wordIndex];
@ -389,24 +150,6 @@ export default function TranscriptEditor({
[words, muteRanges],
);
const getGainRangeForWord = useCallback(
(wordIndex: number) => {
const word = words[wordIndex];
if (!word) return null;
return gainRanges.find((r) => word.start >= r.start && word.end <= r.end);
},
[words, gainRanges],
);
const getSpeedRangeForWord = useCallback(
(wordIndex: number) => {
const word = words[wordIndex];
if (!word) return null;
return speedRanges.find((r) => word.start >= r.start && word.end <= r.end);
},
[words, speedRanges],
);
const renderSegment = useCallback(
(index: number) => {
const segment = segments[index];
@ -421,76 +164,51 @@ export default function TranscriptEditor({
<p className="text-sm leading-relaxed flex flex-wrap">
{segment.words.map((word, localIndex) => {
const globalIndex = (segment.globalStartIndex ?? 0) + localIndex;
const isDeleted = deletedSet.has(globalIndex);
const isSelected = selectedSet.has(globalIndex);
const isActive = globalIndex === activeWordIndex;
const isHovered = globalIndex === hoveredWordIndex;
const isZoneDragSelected = zoneDragRange
? globalIndex >= zoneDragRange.start && globalIndex <= zoneDragRange.end
: false;
const deletedRange = isDeleted ? getRangeForWord(globalIndex) : null;
const cutRange = getCutRangeForWord(globalIndex);
const muteRange = getMuteRangeForWord(globalIndex);
const gainRange = getGainRangeForWord(globalIndex);
const speedRange = getSpeedRangeForWord(globalIndex);
const isSearchMatch = matchSet.has(globalIndex);
const isActiveSearchMatch = matchIndices.length > 0 && matchIndices[safeActiveMatchIdx] === globalIndex;
const isEditing = globalIndex === editingWordIndex;
// Low-confidence highlighting
const CONFIDENCE_THRESHOLD_KEY = 'talkedit:confidenceThreshold';
const storedThreshold = typeof window !== 'undefined' ? Number(window.localStorage.getItem(CONFIDENCE_THRESHOLD_KEY)) : 0;
const confidenceThreshold = Number.isFinite(storedThreshold) ? storedThreshold : 0.6;
const isLowConfidence = word.confidence > 0 && word.confidence < confidenceThreshold && !cutRange && !muteRange && !gainRange && !speedRange;
const confidencePct = word.confidence > 0 ? Math.round(word.confidence * 100) : null;
return (
<span
key={globalIndex}
id={`word-${globalIndex}`}
data-word-index={globalIndex}
title={`${word.start.toFixed(2)}s — confidence: ${confidencePct !== null ? confidencePct + '%' : 'N/A'}${isLowConfidence ? ' ⚠️ Low confidence' : ''} — Ctrl+click to seek, double-click to edit`}
title={`${word.start.toFixed(2)}s — Ctrl+click to seek`}
onMouseDown={(e) => handleWordMouseDown(globalIndex, e)}
onMouseEnter={() => handleWordMouseEnter(globalIndex)}
onMouseLeave={() => setHoveredWordIndex(null)}
onDoubleClick={() => handleWordDoubleClick(globalIndex)}
className={`
relative px-[2px] py-[1px] rounded cursor-pointer transition-colors
${isDeleted ? 'line-through text-editor-text-muted/40 bg-editor-word-deleted' : ''}
${cutRange ? 'bg-red-500/20 text-red-100' : ''}
${muteRange ? 'bg-blue-500/20 text-blue-100' : ''}
${gainRange ? 'bg-amber-500/20 text-amber-100' : ''}
${speedRange ? 'bg-emerald-500/20 text-emerald-100' : ''}
${isZoneDragSelected && cutMode ? 'bg-red-500/30 ring-1 ring-red-400/60' : ''}
${isZoneDragSelected && muteMode ? 'bg-blue-500/30 ring-1 ring-blue-400/60' : ''}
${isZoneDragSelected && gainMode ? 'bg-amber-500/30 ring-1 ring-amber-400/60' : ''}
${isZoneDragSelected && speedMode ? 'bg-emerald-500/30 ring-1 ring-emerald-400/60' : ''}
${isSearchMatch && !cutRange && !muteRange && !gainRange && !speedRange ? 'bg-editor-accent/15 ring-2 ring-editor-accent/50' : ''}
${isActiveSearchMatch && !cutRange && !muteRange && !gainRange && !speedRange ? 'bg-editor-accent/35 ring-2 ring-editor-accent text-white font-medium' : ''}
${isSelected && !cutRange && !muteRange && !gainRange && !speedRange ? 'bg-editor-word-selected text-white' : ''}
${isActive && !isSelected && !cutRange && !muteRange && !gainRange && !speedRange ? 'bg-editor-accent/20 text-editor-accent' : ''}
${isHovered && !isSelected && !isActive && !cutRange && !muteRange && !gainRange && !speedRange ? 'bg-editor-word-hover' : ''}
${isLowConfidence ? 'border-b border-dashed border-orange-400/60' : ''}
${isSelected && !isDeleted && !cutRange && !muteRange ? 'bg-editor-word-selected text-white' : ''}
${isActive && !isDeleted && !isSelected && !cutRange && !muteRange ? 'bg-editor-accent/20 text-editor-accent' : ''}
${isHovered && !isDeleted && !isSelected && !isActive && !cutRange && !muteRange ? 'bg-editor-word-hover' : ''}
`}
>
{isEditing ? (
<input
ref={editInputRef}
value={editText}
onChange={(e) => setEditText(e.target.value)}
onBlur={commitEdit}
className="w-24 px-1 py-0 text-xs bg-editor-bg border border-editor-accent rounded text-editor-text focus:outline-none"
style={{ minWidth: `${Math.max(word.word.length * 8, 48)}px` }}
/>
) : (
<>{word.word}{' '}</>
{word.word}{' '}
{isDeleted && isHovered && deletedRange && (
<button
onClick={(e) => {
e.stopPropagation();
restoreRange(deletedRange.id);
}}
className="absolute -top-5 left-1/2 -translate-x-1/2 flex items-center gap-0.5 px-1.5 py-0.5 bg-editor-surface border border-editor-border rounded text-[10px] text-editor-success whitespace-nowrap z-10"
>
<RotateCcw className="w-2.5 h-2.5" /> Restore
</button>
)}
{(cutRange || muteRange || gainRange || speedRange) && isHovered && (
{(cutRange || muteRange) && isHovered && (
<button
onClick={(e) => {
e.stopPropagation();
if (cutRange) removeCutRange(cutRange.id);
if (muteRange) removeMuteRange(muteRange.id);
if (gainRange) removeGainRange(gainRange.id);
if (speedRange) removeSpeedRange(speedRange.id);
}}
className="absolute -top-5 left-1/2 -translate-x-1/2 flex items-center gap-0.5 px-1.5 py-0.5 bg-editor-surface border border-editor-border rounded text-[10px] text-editor-success whitespace-nowrap z-10"
>
@ -504,111 +222,23 @@ export default function TranscriptEditor({
</div>
);
},
[segments, selectedSet, matchSet, matchIndices, safeActiveMatchIdx, activeWordIndex, hoveredWordIndex, handleWordMouseDown, handleWordMouseEnter, setHoveredWordIndex, getCutRangeForWord, getMuteRangeForWord, getGainRangeForWord, getSpeedRangeForWord, removeCutRange, removeMuteRange, removeGainRange, removeSpeedRange, zoneDragRange, cutMode, muteMode, gainMode, speedMode, editingWordIndex, editText, editInputRef, handleWordDoubleClick, commitEdit, setEditText],
[segments, deletedSet, selectedSet, activeWordIndex, hoveredWordIndex, handleWordMouseDown, handleWordMouseEnter, setHoveredWordIndex, getRangeForWord, getCutRangeForWord, getMuteRangeForWord, restoreRange, removeCutRange, removeMuteRange],
);
return (
<div className="flex-1 flex flex-col min-h-0">
<div className="flex items-center justify-between gap-2 px-4 py-2 border-b border-editor-border shrink-0">
<div className="flex items-center gap-1.5">
<button
onClick={() => {
setSearchOpen(true);
requestAnimationFrame(() => searchInputRef.current?.focus());
}}
className="flex items-center gap-1 px-2 py-1 text-xs text-editor-text-muted hover:text-editor-text hover:bg-editor-surface rounded"
title="Find (Ctrl+F)"
>
<Search className="w-3 h-3" />
Find
</button>
{searchOpen && (
<div className="flex items-center gap-1.5 px-2 py-1 rounded border border-editor-border bg-editor-surface">
<input
ref={searchInputRef}
value={searchQuery}
onChange={(e) => {
setSearchQuery(e.target.value);
setActiveMatchIdx(0);
}}
placeholder="Search transcript"
className="w-40 bg-transparent text-xs text-editor-text focus:outline-none"
/>
<span className="text-[10px] text-editor-text-muted min-w-[52px] text-right">
{matchIndices.length === 0 ? '0/0' : `${safeActiveMatchIdx + 1}/${matchIndices.length}`}
</span>
<button
onClick={() => jumpToMatch(safeActiveMatchIdx - 1)}
className="p-0.5 rounded hover:bg-editor-bg text-editor-text-muted hover:text-editor-text"
title="Previous match (Shift+Enter)"
>
<ChevronUp className="w-3 h-3" />
</button>
<button
onClick={() => jumpToMatch(safeActiveMatchIdx + 1)}
className="p-0.5 rounded hover:bg-editor-bg text-editor-text-muted hover:text-editor-text"
title="Next match (Enter)"
>
<ChevronDown className="w-3 h-3" />
</button>
<button
onClick={() => setSearchOpen(false)}
className="p-0.5 rounded hover:bg-editor-bg text-editor-text-muted hover:text-editor-text"
title="Close search (Esc)"
>
<X className="w-3 h-3" />
</button>
</div>
)}
</div>
<div className="flex items-center gap-2 px-4 py-2 border-b border-editor-border shrink-0">
<span className="text-xs text-editor-text-muted flex-1">
{words.length} words &middot; {deletedRanges.length} cuts &middot; {cutRanges.length} cut ranges &middot; {muteRanges.length} mute ranges
</span>
{selectedWordIndices.length > 0 && (
<div className="flex items-center gap-1">
<button
onClick={cutSelectedWords}
disabled={!canEdit}
className="flex items-center gap-1 px-2 py-1 text-xs bg-red-500/20 text-red-300 rounded hover:bg-red-500/30 transition-colors disabled:opacity-40"
title="Remove this word range from the output"
>
<Scissors className="w-3 h-3" />
Cut
</button>
<button
onClick={muteSelectedWords}
disabled={!canEdit}
className="flex items-center gap-1 px-2 py-1 text-xs bg-blue-500/20 text-blue-300 rounded hover:bg-blue-500/30 transition-colors disabled:opacity-40"
title="Silence audio for this word range"
>
<VolumeX className="w-3 h-3" />
Mute
</button>
<button
onClick={gainSelectedWords}
disabled={!canEdit}
className="flex items-center gap-1 px-2 py-1 text-xs bg-amber-500/20 text-amber-300 rounded hover:bg-amber-500/30 transition-colors disabled:opacity-40"
title="Adjust volume for this word range — positive boosts, negative reduces"
>
<SlidersHorizontal className="w-3 h-3" />
Gain ({gainModeDb > 0 ? '+' : ''}{gainModeDb.toFixed(1)} dB)
</button>
<button
onClick={speedSelectedWords}
disabled={!canEdit}
className="flex items-center gap-1 px-2 py-1 text-xs bg-emerald-500/20 text-emerald-300 rounded hover:bg-emerald-500/30 transition-colors disabled:opacity-40"
title="Change playback speed for this word range — lower is slower, higher is faster"
>
<Gauge className="w-3 h-3" />
Speed {speedModeValue.toFixed(2)}x
</button>
<button
onClick={handleReTranscribe}
disabled={isReTranscribing || !canEdit}
className="flex items-center gap-1 px-2 py-1 text-xs bg-purple-500/20 text-purple-300 rounded hover:bg-purple-500/30 disabled:opacity-40 transition-colors"
title="Re-run Whisper transcription on this segment"
>
<RefreshCw className={`w-3 h-3 ${isReTranscribing ? 'animate-spin' : ''}`} />
{isReTranscribing ? 'Re-transcribing...' : 'Re-transcribe'}
</button>
</div>
<button
onClick={cutSelectedWords}
className="flex items-center gap-1 px-2 py-1 text-xs bg-editor-danger/20 text-editor-danger rounded hover:bg-editor-danger/30 transition-colors"
>
<Trash2 className="w-3 h-3" />
Cut {selectedWordIndices.length} words
</button>
)}
</div>

View File

@ -5,24 +5,31 @@ import { Play, Pause, SkipBack, SkipForward, Volume2 } from 'lucide-react';
export default function VideoPlayer() {
const videoRef = useRef<HTMLVideoElement>(null);
const audioRef = useRef<HTMLAudioElement>(null);
const videoUrl = useEditorStore((s) => s.videoUrl);
const isPlaying = useEditorStore((s) => s.isPlaying);
const duration = useEditorStore((s) => s.duration);
const { seekTo, togglePlay } = useVideoSync(videoRef);
// Determine if this is an audio file based on the URL
const isAudioFile = videoUrl && (videoUrl.includes('.wav') || videoUrl.includes('.mp3') || videoUrl.includes('.m4a') || videoUrl.includes('.aac'));
const mediaRef = isAudioFile ? audioRef : videoRef;
const { seekTo, togglePlay } = useVideoSync(mediaRef as React.RefObject<HTMLVideoElement | HTMLAudioElement | null>);
const [displayTime, setDisplayTime] = useState(0);
useEffect(() => {
const video = videoRef.current;
if (!video) return;
const media = mediaRef.current;
if (!media) return;
let raf = 0;
const tick = () => {
setDisplayTime(video.currentTime);
setDisplayTime(media.currentTime);
raf = requestAnimationFrame(tick);
};
raf = requestAnimationFrame(tick);
return () => cancelAnimationFrame(raf);
}, [videoUrl]);
}, [videoUrl, mediaRef]);
const formatTime = (seconds: number) => {
const m = Math.floor(seconds / 60);
@ -41,11 +48,11 @@ export default function VideoPlayer() {
const skip = useCallback(
(delta: number) => {
const video = videoRef.current;
if (!video) return;
seekTo(Math.max(0, Math.min(duration, video.currentTime + delta)));
const media = mediaRef.current;
if (!media) return;
seekTo(Math.max(0, Math.min(duration, media.currentTime + delta)));
},
[seekTo, duration],
[seekTo, duration, mediaRef],
);
if (!videoUrl) {
@ -59,13 +66,45 @@ export default function VideoPlayer() {
return (
<div className="w-full h-full flex flex-col">
<div className="flex-1 flex items-center justify-center bg-black rounded-lg overflow-hidden min-h-0">
<video
ref={videoRef}
src={videoUrl}
className="max-w-full max-h-full object-contain"
playsInline
onClick={togglePlay}
/>
{isAudioFile ? (
<audio
ref={audioRef}
src={videoUrl}
className="max-w-full max-h-full"
controls={false}
preload="none"
onClick={togglePlay}
onError={(e) => {
console.error('Audio load error:', e);
console.error('Audio src:', videoUrl);
}}
onLoadStart={() => console.log('Audio load start:', videoUrl)}
onLoadedData={() => console.log('Audio loaded data')}
onCanPlay={() => console.log('Audio can play')}
onProgress={() => console.log('Audio progress event')}
onStalled={() => console.log('Audio stalled')}
onSuspend={() => console.log('Audio suspended')}
/>
) : (
<video
ref={videoRef}
src={videoUrl}
className="max-w-full max-h-full object-contain"
playsInline
preload="none"
onClick={togglePlay}
onError={(e) => {
console.error('Video load error:', e);
console.error('Video src:', videoUrl);
}}
onLoadStart={() => console.log('Video load start:', videoUrl)}
onLoadedData={() => console.log('Video loaded data')}
onCanPlay={() => console.log('Video can play')}
onProgress={() => console.log('Video progress event')}
onStalled={() => console.log('Video stalled')}
onSuspend={() => console.log('Video suspended')}
/>
)}
</div>
<div className="pt-2 space-y-1.5 shrink-0">

File diff suppressed because it is too large Load Diff

View File

@ -1,459 +0,0 @@
import { useCallback, useEffect, useMemo, useRef, useState } from 'react';
import { useEditorStore } from '../store/editorStore';
import { Trash2, Scissors, Volume2, SlidersHorizontal, Gauge, Play } from 'lucide-react';
function formatTimelineLikeTime(secs: number): string {
const m = Math.floor(secs / 60);
const s = secs % 60;
if (m > 0) return `${m}:${String(Math.floor(s)).padStart(2, '0')}.${Math.floor((s % 1) * 10)}`;
return `${s.toFixed(1)}s`;
}
export default function ZoneEditor() {
const [viewMode, setViewMode] = useState<'all' | 'cut' | 'mute' | 'gain' | 'speed'>('all');
const [focusedZone, setFocusedZone] = useState<{ type: 'cut' | 'mute' | 'gain' | 'speed'; id: string } | null>(null);
const previewFrameRef = useRef<number | null>(null);
const {
cutRanges,
muteRanges,
gainRanges,
speedRanges,
duration,
setCurrentTime,
zonePreviewPaddingSeconds,
setZonePreviewPaddingSeconds,
globalGainDb,
setGlobalGainDb,
removeCutRange,
removeMuteRange,
removeGainRange,
removeSpeedRange,
updateGainRange,
updateSpeedRange,
} = useEditorStore();
const stopPreviewLoop = useCallback(() => {
if (previewFrameRef.current !== null) {
cancelAnimationFrame(previewFrameRef.current);
previewFrameRef.current = null;
}
}, []);
useEffect(() => stopPreviewLoop, [stopPreviewLoop]);
const previewZone = useCallback((start: number, end: number) => {
const video = document.querySelector('video');
if (!(video instanceof HTMLVideoElement)) return;
stopPreviewLoop();
const previewStart = Math.max(0, start - zonePreviewPaddingSeconds);
const maxDuration = Number.isFinite(duration) && duration > 0 ? duration : video.duration;
const previewEnd = Math.min(maxDuration || end + zonePreviewPaddingSeconds, end + zonePreviewPaddingSeconds);
video.currentTime = previewStart;
setCurrentTime(previewStart);
const tick = () => {
if (video.paused || video.ended) {
previewFrameRef.current = null;
return;
}
if (video.currentTime >= previewEnd) {
video.pause();
video.currentTime = previewEnd;
setCurrentTime(previewEnd);
previewFrameRef.current = null;
return;
}
previewFrameRef.current = requestAnimationFrame(tick);
};
void video.play();
previewFrameRef.current = requestAnimationFrame(tick);
}, [duration, setCurrentTime, stopPreviewLoop, zonePreviewPaddingSeconds]);
const renderPreviewButton = (start: number, end: number, accentClass: string) => (
<button
onClick={(e) => {
e.stopPropagation();
previewZone(start, end);
}}
className={`p-1 rounded opacity-0 group-hover:opacity-100 transition-opacity ${accentClass}`}
title={`Play ${zonePreviewPaddingSeconds.toFixed(2)}s before and after zone`}
>
<Play className="w-3.5 h-3.5" />
</button>
);
const totalZones = cutRanges.length + muteRanges.length + gainRanges.length + speedRanges.length;
const getZoneTypeColor = (type: 'cut' | 'mute' | 'gain' | 'speed') => {
switch (type) {
case 'cut':
return 'border-red-500/40 bg-red-500/5';
case 'mute':
return 'border-blue-500/40 bg-blue-500/20';
case 'gain':
return 'border-amber-500/40 bg-amber-500/5';
case 'speed':
return 'border-emerald-500/40 bg-emerald-500/5';
}
};
const activeFocusedZone = useMemo(() => {
if (!focusedZone) return null;
const exists = focusedZone.type === 'cut'
? cutRanges.some((range) => range.id === focusedZone.id)
: focusedZone.type === 'mute'
? muteRanges.some((range) => range.id === focusedZone.id)
: focusedZone.type === 'gain'
? gainRanges.some((range) => range.id === focusedZone.id)
: speedRanges.some((range) => range.id === focusedZone.id);
return exists ? focusedZone : null;
}, [cutRanges, focusedZone, gainRanges, muteRanges, speedRanges]);
const isZoneFocused = useCallback(
(type: 'cut' | 'mute' | 'gain' | 'speed', id: string) => activeFocusedZone?.type === type && activeFocusedZone.id === id,
[activeFocusedZone],
);
const removeZone = useCallback((type: 'cut' | 'mute' | 'gain' | 'speed', id: string) => {
if (!window.confirm("Delete this zone?")) return;
if (type === 'cut') removeCutRange(id);
else if (type === 'mute') removeMuteRange(id);
else if (type === 'gain') removeGainRange(id);
else removeSpeedRange(id);
setFocusedZone((current) => (current?.type === type && current.id === id ? null : current));
}, [removeCutRange, removeGainRange, removeMuteRange, removeSpeedRange]);
useEffect(() => {
const handleKeyDown = (e: KeyboardEvent) => {
const target = e.target as HTMLElement | null;
if (target && (target.tagName === 'INPUT' || target.tagName === 'TEXTAREA' || target.tagName === 'SELECT')) {
return;
}
if (e.key === 'Escape') {
setFocusedZone(null);
return;
}
if ((e.key === 'Delete' || e.key === 'Backspace') && activeFocusedZone) {
e.preventDefault();
removeZone(activeFocusedZone.type, activeFocusedZone.id);
}
};
window.addEventListener('keydown', handleKeyDown, { capture: true });
return () => window.removeEventListener('keydown', handleKeyDown, { capture: true });
}, [activeFocusedZone, removeZone]);
return (
<div className="p-4 space-y-4">
<div className="space-y-2">
<div className="space-y-1">
<div className="flex items-start justify-between gap-3">
<div>
<h3 className="text-sm font-semibold flex items-center gap-2">
Zone Editor
</h3>
<p className="text-xs text-editor-text-muted">
Manage all timeline zones ({totalZones} total)
</p>
</div>
<div className="min-w-[160px] rounded border border-editor-border bg-editor-surface px-2 py-1.5">
<div className="flex items-center justify-between gap-2">
<span className="text-[10px] uppercase tracking-wide text-editor-text-muted">Preview</span>
<span className="text-[10px] text-editor-text-muted">before/after</span>
</div>
<div className="mt-1 flex items-center gap-1.5">
<input
type="number"
min={0}
max={10}
step={0.25}
value={zonePreviewPaddingSeconds}
onChange={(e) => setZonePreviewPaddingSeconds(Number(e.target.value) || 0)}
className="w-16 px-2 py-1 bg-editor-bg border border-editor-border rounded text-xs text-editor-text focus:outline-none focus:border-editor-accent"
title="Preview time before and after each zone"
/>
<span className="text-xs text-editor-text-muted">sec</span>
</div>
</div>
</div>
</div>
{/* View Mode Toggle */}
<div className="flex items-center gap-1 rounded bg-editor-surface border border-editor-border p-1">
<button
onClick={() => setViewMode('all')}
className={`px-2 py-1 text-xs rounded transition-colors ${
viewMode === 'all'
? 'bg-editor-accent text-white'
: 'text-editor-text-muted hover:text-editor-text'
}`}
title="Show all zones"
>
All
</button>
<button
onClick={() => setViewMode('cut')}
className={`px-2 py-1 text-xs rounded transition-colors ${
viewMode === 'cut'
? 'bg-red-500/30 text-red-500'
: 'text-editor-text-muted hover:text-editor-text'
}`}
title="Show only Cut zones"
>
Cut
</button>
<button
onClick={() => setViewMode('mute')}
className={`px-2 py-1 text-xs rounded transition-colors ${
viewMode === 'mute'
? 'bg-blue-500/20 text-blue-400'
: 'text-editor-text-muted hover:text-editor-text'
}`}
title="Show only Mute zones"
>
Mute
</button>
<button
onClick={() => setViewMode('gain')}
className={`px-2 py-1 text-xs rounded transition-colors ${
viewMode === 'gain'
? 'bg-amber-500/30 text-amber-500'
: 'text-editor-text-muted hover:text-editor-text'
}`}
title="Show only Gain zones"
>
Gain
</button>
<button
onClick={() => setViewMode('speed')}
className={`px-2 py-1 text-xs rounded transition-colors ${
viewMode === 'speed'
? 'bg-emerald-500/30 text-emerald-500'
: 'text-editor-text-muted hover:text-editor-text'
}`}
title="Show only Speed zones"
>
Speed
</button>
</div>
</div>
{totalZones === 0 ? (
<div className="p-4 rounded-lg border border-dashed border-editor-border text-center">
<p className="text-xs text-editor-text-muted">
No zones yet. Create zones from the toolbar or by highlighting words.
</p>
</div>
) : (
<div className="space-y-3">
{/* Cut Zones */}
{(viewMode === 'all' || viewMode === 'cut') && cutRanges.length > 0 && (
<div className="space-y-2">
<div className="text-xs font-semibold text-red-500/80 flex items-center gap-2">
<Scissors className="w-3.5 h-3.5" />
Cut Zones ({cutRanges.length})
</div>
<div className="space-y-1">
{cutRanges.map((range) => (
<div
key={range.id}
onClick={() => setFocusedZone({ type: 'cut', id: range.id })}
className={`px-2 py-1.5 rounded-lg border text-xs flex items-center gap-2 group cursor-pointer transition-colors ${getZoneTypeColor('cut')} ${isZoneFocused('cut', range.id) ? 'ring-1 ring-red-400 border-red-400/80 bg-red-500/12' : ''}`}
>
<div className="flex-1 min-w-0">
<div className="font-medium truncate">
{formatTimelineLikeTime(range.start)} - {formatTimelineLikeTime(range.end)}
</div>
</div>
{renderPreviewButton(range.start, range.end, 'hover:bg-red-500/20 text-red-500/70 hover:text-red-500')}
<button
onClick={(e) => {
e.stopPropagation();
removeZone('cut', range.id);
}}
className="p-1 rounded hover:bg-red-500/20 text-red-500/70 hover:text-red-500 opacity-0 group-hover:opacity-100 transition-opacity"
title="Delete cut zone"
>
<Trash2 className="w-3.5 h-3.5" />
</button>
</div>
))}
</div>
</div>
)}
{/* Mute Zones */}
{(viewMode === 'all' || viewMode === 'mute') && muteRanges.length > 0 && (
<div className="space-y-2">
<div className="text-xs font-semibold text-blue-400 flex items-center gap-2">
<Volume2 className="w-3.5 h-3.5" />
Mute Zones ({muteRanges.length})
</div>
<div className="space-y-1">
{muteRanges.map((range) => (
<div
key={range.id}
onClick={() => setFocusedZone({ type: 'mute', id: range.id })}
className={`px-2 py-1.5 rounded-lg border text-xs flex items-center gap-2 group cursor-pointer transition-colors ${getZoneTypeColor('mute')} ${isZoneFocused('mute', range.id) ? 'ring-1 ring-blue-400 border-blue-400/80 bg-blue-500/20' : ''}`}
>
<div className="flex-1 min-w-0">
<div className="font-medium truncate">
{formatTimelineLikeTime(range.start)} - {formatTimelineLikeTime(range.end)}
</div>
</div>
{renderPreviewButton(range.start, range.end, 'hover:bg-blue-500/20 text-blue-400 hover:text-blue-400')}
<button
onClick={(e) => {
e.stopPropagation();
removeZone('mute', range.id);
}}
className="p-1 rounded hover:bg-blue-500/20 text-blue-400 hover:text-blue-400 opacity-0 group-hover:opacity-100 transition-opacity"
title="Delete mute zone"
>
<Trash2 className="w-3.5 h-3.5" />
</button>
</div>
))}
</div>
</div>
)}
{/* Sound Gain */}
{(viewMode === 'all' || viewMode === 'gain') && gainRanges.length > 0 && (
<div className="space-y-2">
<div className="text-xs font-semibold text-amber-500/80 flex items-center gap-2">
<SlidersHorizontal className="w-3.5 h-3.5" />
Sound Gain ({gainRanges.length})
</div>
{/* Global Gain Slider */}
<div className="px-2 py-2 rounded border border-amber-500/20 bg-amber-500/5 space-y-2">
<label className="text-xs text-editor-text-muted font-medium">Global Gain</label>
<div className="flex items-center gap-2">
<input
type="range"
min={-24}
max={24}
step={0.5}
value={globalGainDb}
onChange={(e) => setGlobalGainDb(Number(e.target.value))}
className="flex-1 h-1.5"
/>
<input
type="number"
min={-24}
max={24}
step={0.5}
value={globalGainDb}
onChange={(e) => setGlobalGainDb(Math.max(-24, Math.min(24, Number(e.target.value) || 0)))}
className="w-14 px-1.5 py-0.5 text-xs bg-editor-surface border border-editor-border rounded focus:border-editor-accent focus:outline-none"
title="Volume adjustment in decibels — +6 dB doubles volume, -6 dB halves it"
/>
<span className="text-xs text-amber-500/80 font-medium w-6 text-right">dB</span>
</div>
</div>
<div className="space-y-1">
{gainRanges.map((range) => (
<div
key={range.id}
onClick={() => setFocusedZone({ type: 'gain', id: range.id })}
className={`px-2 py-1.5 rounded-lg border text-xs flex items-center gap-2 group cursor-pointer transition-colors ${getZoneTypeColor('gain')} ${isZoneFocused('gain', range.id) ? 'ring-1 ring-amber-400 border-amber-400/80 bg-amber-500/12' : ''}`}
>
<div className="flex-1 min-w-0">
<div className="font-medium truncate">
{formatTimelineLikeTime(range.start)} - {formatTimelineLikeTime(range.end)}
</div>
<div className="text-editor-text-muted text-[10px]">
{range.gainDb > 0 ? '+' : ''}{range.gainDb.toFixed(1)} dB
</div>
</div>
<input
type="number"
min={-24}
max={24}
step={0.5}
value={range.gainDb}
onClick={(e) => e.stopPropagation()}
onChange={(e) => updateGainRange(range.id, Number(e.target.value) || 0)}
className="w-16 px-1.5 py-0.5 text-xs bg-editor-surface border border-editor-border rounded focus:border-editor-accent focus:outline-none"
title="Volume adjustment in decibels — +6 dB doubles volume, -6 dB halves it"
/>
{renderPreviewButton(range.start, range.end, 'hover:bg-amber-500/20 text-amber-500/70 hover:text-amber-500')}
<button
onClick={(e) => {
e.stopPropagation();
removeZone('gain', range.id);
}}
className="p-1 rounded hover:bg-amber-500/20 text-amber-500/70 hover:text-amber-500 opacity-0 group-hover:opacity-100 transition-opacity"
title="Delete gain zone"
>
<Trash2 className="w-3.5 h-3.5" />
</button>
</div>
))}
</div>
</div>
)}
{/* Speed Adjust */}
{(viewMode === 'all' || viewMode === 'speed') && speedRanges.length > 0 && (
<div className="space-y-2">
<div className="text-xs font-semibold text-emerald-500/80 flex items-center gap-2">
<Gauge className="w-3.5 h-3.5" />
Speed Adjust ({speedRanges.length})
</div>
<div className="space-y-1">
{speedRanges.map((range) => (
<div
key={range.id}
onClick={() => setFocusedZone({ type: 'speed', id: range.id })}
className={`px-2 py-1.5 rounded-lg border text-xs flex items-center gap-2 group cursor-pointer transition-colors ${getZoneTypeColor('speed')} ${isZoneFocused('speed', range.id) ? 'ring-1 ring-emerald-400 border-emerald-400/80 bg-emerald-500/12' : ''}`}
>
<div className="flex-1 min-w-0">
<div className="font-medium truncate">
{formatTimelineLikeTime(range.start)} - {formatTimelineLikeTime(range.end)}
</div>
<div className="text-editor-text-muted text-[10px]">
{range.speed.toFixed(2)}x
</div>
</div>
<input
type="number"
min={0.25}
max={4}
step={0.05}
value={range.speed}
onClick={(e) => e.stopPropagation()}
onChange={(e) => updateSpeedRange(range.id, Number(e.target.value) || 1)}
className="w-16 px-1.5 py-0.5 text-xs bg-editor-surface border border-editor-border rounded focus:border-editor-accent focus:outline-none"
title="Playback speed multiplier — 1.0x is normal, 2.0x is twice as fast"
/>
{renderPreviewButton(range.start, range.end, 'hover:bg-emerald-500/20 text-emerald-500/70 hover:text-emerald-500')}
<button
onClick={(e) => {
e.stopPropagation();
removeZone('speed', range.id);
}}
className="p-1 rounded hover:bg-emerald-500/20 text-emerald-500/70 hover:text-emerald-500 opacity-0 group-hover:opacity-100 transition-opacity"
title="Delete speed zone"
>
<Trash2 className="w-3.5 h-3.5" />
</button>
</div>
))}
</div>
</div>
)}
</div>
)}
</div>
);
}

View File

@ -1,23 +1,13 @@
import { useEffect, useRef } from 'react';
import { useEditorStore } from '../store/editorStore';
import { loadBindings, DEFAULT_PRESETS } from '../lib/keybindings';
import type { KeyBinding } from '../types/project';
export function useKeyboardShortcuts() {
const deleteSelectedWords = useEditorStore((s) => s.deleteSelectedWords);
const addCutRange = useEditorStore((s) => s.addCutRange);
const markInTime = useEditorStore((s) => s.markInTime);
const markOutTime = useEditorStore((s) => s.markOutTime);
const setMarkInTime = useEditorStore((s) => s.setMarkInTime);
const setMarkOutTime = useEditorStore((s) => s.setMarkOutTime);
const clearMarkRange = useEditorStore((s) => s.clearMarkRange);
const selectedWordIndices = useEditorStore((s) => s.selectedWordIndices);
const words = useEditorStore((s) => s.words);
const playbackRateRef = useRef(1);
// Read bindings fresh from localStorage on every call to avoid stale closures
const getBindings = (): KeyBinding[] => {
try { return loadBindings(); } catch { return []; }
};
const playbackRateRef = useRef(1);
useEffect(() => {
const getVideo = (): HTMLVideoElement | null => document.querySelector('video');
@ -28,58 +18,70 @@ export function useKeyboardShortcuts() {
const video = getVideo();
// Build a key string from the event for matching
const parts: string[] = [];
if (e.ctrlKey || e.metaKey) parts.push('Ctrl');
if (e.shiftKey && !['Shift'].includes(e.key)) parts.push('Shift');
if (e.altKey) parts.push('Alt');
const keyStr = e.key === ' ' ? 'Space' : e.key.length === 1 ? e.key.toUpperCase() : e.key;
parts.push(keyStr);
const combo = parts.join('+');
// Look up binding — fresh read every keystroke so Settings changes take effect immediately
const currentBindings = getBindings();
const binding = currentBindings.find((b) => b.keys === combo);
if (!binding) return; // Unbound key — ignore
e.preventDefault();
switch (binding.id) {
case 'undo':
useEditorStore.temporal.getState().undo();
return;
case 'redo':
switch (true) {
// --- Undo / Redo ---
case e.key === 'z' && (e.ctrlKey || e.metaKey) && e.shiftKey: {
e.preventDefault();
useEditorStore.temporal.getState().redo();
return;
case 'cut': {
}
case e.key === 'z' && (e.ctrlKey || e.metaKey): {
e.preventDefault();
useEditorStore.temporal.getState().undo();
return;
}
// --- Delete / Backspace: cut selected words ---
case e.key === 'Delete' || e.key === 'Backspace': {
if (selectedWordIndices.length > 0) {
e.preventDefault();
const sorted = [...selectedWordIndices].sort((a, b) => a - b);
addCutRange(words[sorted[0]].start, words[sorted[sorted.length - 1]].end);
return;
}
if (markInTime !== null && markOutTime !== null) {
const start = Math.min(markInTime, markOutTime);
const end = Math.max(markInTime, markOutTime);
if (end - start >= 0.01) addCutRange(start, end);
clearMarkRange();
const startTime = words[sorted[0]].start;
const endTime = words[sorted[sorted.length - 1]].end;
addCutRange(startTime, endTime);
}
return;
}
case 'play-pause':
if (video) { if (video.paused) video.play(); else video.pause(); }
// --- Space: play / pause ---
case e.key === ' ' && !e.ctrlKey: {
e.preventDefault();
if (video) {
if (video.paused) video.play();
else video.pause();
}
return;
case 'slow-down': {
}
// --- J: reverse / slow down ---
case e.key === 'j' || e.key === 'J': {
e.preventDefault();
if (video) {
playbackRateRef.current = Math.max(-2, playbackRateRef.current - 0.5);
if (playbackRateRef.current < 0) video.currentTime = Math.max(0, video.currentTime - 2);
else { video.playbackRate = playbackRateRef.current; if (video.paused) video.play(); }
if (playbackRateRef.current < 0) {
// HTML5 video doesn't support negative rates natively; step back
video.currentTime = Math.max(0, video.currentTime - 2);
} else {
video.playbackRate = playbackRateRef.current;
if (video.paused) video.play();
}
}
return;
}
case 'pause':
if (video) { video.pause(); playbackRateRef.current = 1; }
// --- K: pause ---
case e.key === 'k' || e.key === 'K': {
e.preventDefault();
if (video) {
video.pause();
playbackRateRef.current = 1;
}
return;
case 'speed-up': {
}
// --- L: forward / speed up ---
case e.key === 'l' || e.key === 'L': {
e.preventDefault();
if (video) {
playbackRateRef.current = Math.min(4, playbackRateRef.current + 0.5);
video.playbackRate = Math.max(0.25, playbackRateRef.current);
@ -87,37 +89,58 @@ export function useKeyboardShortcuts() {
}
return;
}
case 'rewind':
// --- Arrow Left: seek back 5s ---
case e.key === 'ArrowLeft' && !e.ctrlKey: {
e.preventDefault();
if (video) video.currentTime = Math.max(0, video.currentTime - 5);
return;
case 'forward':
}
// --- Arrow Right: seek forward 5s ---
case e.key === 'ArrowRight' && !e.ctrlKey: {
e.preventDefault();
if (video) video.currentTime = Math.min(video.duration, video.currentTime + 5);
return;
case 'mark-in':
if (video) setMarkInTime(video.currentTime);
return;
case 'mark-out':
if (video) setMarkOutTime(video.currentTime);
return;
case 'save': {
const saveBtn = document.querySelector('[title="Save"]') as HTMLButtonElement | null;
if (saveBtn) saveBtn.click();
else saveProject();
}
// --- [ mark in-point (home) ---
case e.key === '[': {
e.preventDefault();
if (video) video.currentTime = 0;
return;
}
case 'export': {
// --- ] mark out-point (end) ---
case e.key === ']': {
e.preventDefault();
if (video) video.currentTime = video.duration;
return;
}
// --- Ctrl+S: save project ---
case e.key === 's' && (e.ctrlKey || e.metaKey): {
e.preventDefault();
saveProject();
return;
}
// --- Ctrl+E: export ---
case e.key === 'e' && (e.ctrlKey || e.metaKey): {
e.preventDefault();
// Trigger export panel via DOM click
const exportBtn = document.querySelector('[title="Export"]') as HTMLButtonElement;
if (exportBtn) exportBtn.click();
return;
}
case 'search': {
const findBtn = document.querySelector('[title="Find (Ctrl+F)"]') as HTMLButtonElement;
if (findBtn) findBtn.click();
// --- ?: show shortcut cheatsheet ---
case e.key === '?' || (e.key === '/' && e.shiftKey): {
e.preventDefault();
toggleCheatsheet();
return;
}
case 'help':
toggleCheatsheet(currentBindings);
return;
default:
break;
}
@ -125,7 +148,7 @@ export function useKeyboardShortcuts() {
window.addEventListener('keydown', handler);
return () => window.removeEventListener('keydown', handler);
}, [addCutRange, markInTime, markOutTime, setMarkInTime, setMarkOutTime, clearMarkRange, selectedWordIndices, words]);
}, [deleteSelectedWords, selectedWordIndices]);
}
async function saveProject() {
@ -133,80 +156,86 @@ async function saveProject() {
if (!state.videoPath || state.words.length === 0) return;
try {
const projectData = state.saveProject();
let outputPath = state.projectFilePath;
const projectData = {
version: 1,
videoPath: state.videoPath,
words: state.words,
segments: state.segments,
deletedRanges: state.deletedRanges,
language: state.language,
createdAt: new Date().toISOString(),
modifiedAt: new Date().toISOString(),
};
if (!outputPath) {
outputPath = await window.electronAPI?.saveFile({
defaultPath: state.videoPath.replace(/\.[^.]+$/, '.aive'),
filters: [{ name: 'TalkEdit Project', extensions: ['aive'] }],
});
}
const outputPath = await window.desktopAPI?.saveFile({
defaultPath: state.videoPath.replace(/\.[^.]+$/, '.aive'),
filters: [{ name: 'TalkEdit Project', extensions: ['aive'] }],
});
if (!outputPath) return;
const resolvedPath = outputPath.endsWith('.aive') ? outputPath : `${outputPath}.aive`;
if (window.electronAPI?.writeFile) {
await window.electronAPI.writeFile(resolvedPath, JSON.stringify(projectData, null, 2));
useEditorStore.getState().setProjectFilePath(resolvedPath);
} else {
const blob = new Blob([JSON.stringify(projectData, null, 2)], { type: 'application/json' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = resolvedPath.split(/[\\/]/).pop() || 'project.aive';
a.click();
URL.revokeObjectURL(url);
useEditorStore.getState().setProjectFilePath(resolvedPath);
if (outputPath) {
if (window.desktopAPI?.writeFile) {
await window.desktopAPI.writeFile(outputPath, JSON.stringify(projectData, null, 2));
} else {
const blob = new Blob([JSON.stringify(projectData, null, 2)], { type: 'application/json' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = outputPath.split(/[\\/]/).pop() || 'project.aive';
a.click();
URL.revokeObjectURL(url);
}
}
} catch (err) {
console.error('Failed to save project:', err);
}
}
function toggleCheatsheet(bindings: KeyBinding[]) {
let cheatsheetVisible = false;
function toggleCheatsheet() {
const existing = document.getElementById('keyboard-cheatsheet');
if (existing) {
existing.remove();
cheatsheetVisible = false;
return;
}
cheatsheetVisible = true;
const overlay = document.createElement('div');
overlay.id = 'keyboard-cheatsheet';
overlay.style.cssText =
'position:fixed;inset:0;z-index:9999;display:flex;align-items:center;justify-content:center;background:rgba(0,0,0,0.7);';
overlay.onclick = () => {
overlay.remove();
cheatsheetVisible = false;
};
const presetName = JSON.stringify(bindings) === JSON.stringify(DEFAULT_PRESETS['left-hand']) ? 'Left-Hand Preset' : 'Standard Preset';
const shortcuts = [
['Space', 'Play / Pause'],
['J', 'Reverse / Slow down'],
['K', 'Pause'],
['L', 'Forward / Speed up'],
['\u2190 / \u2192', 'Seek \u00b15 seconds'],
['Delete', 'Cut selected words'],
['Ctrl+Z', 'Undo'],
['Ctrl+Shift+Z', 'Redo'],
['Ctrl+S', 'Save project'],
['Ctrl+E', 'Export'],
['?', 'This cheatsheet'],
];
const rows = bindings
const rows = shortcuts
.map(
(b) =>
`<tr><td style="padding:6px 16px 6px 0;font-family:monospace;color:#818cf8;font-weight:600;white-space:nowrap">${b.keys}</td><td style="padding:6px 0;color:#e2e8f0">${b.label}</td><td style="padding:6px 0 6px 12px;font-size:10px;color:#94a3b8">${b.category}</td></tr>`,
([key, desc]) =>
`<tr><td style="padding:6px 16px 6px 0;font-family:monospace;color:#818cf8;font-weight:600">${key}</td><td style="padding:6px 0;color:#e2e8f0">${desc}</td></tr>`,
)
.join('');
overlay.innerHTML = `<div style="background:#1a1d27;border:1px solid #2a2d3a;border-radius:12px;padding:24px 32px;max-width:450px;position:relative;" onclick="event.stopPropagation()">
<div style="font-size:11px;color:#94a3b8;margin-bottom:12px">Active preset: <span style="color:#818cf8;font-weight:500">${presetName}</span></div>
overlay.innerHTML = `<div style="background:#1a1d27;border:1px solid #2a2d3a;border-radius:12px;padding:24px 32px;max-width:400px;" onclick="event.stopPropagation()">
<h3 style="margin:0 0 16px;font-size:14px;font-weight:600;color:#e2e8f0">Keyboard Shortcuts</h3>
<table style="font-size:13px">${rows}</table>
<p style="margin:16px 0 0;font-size:11px;color:#94a3b8;text-align:center">Customize in Settings &bull; Press ? to close</p>
<button id="cheatsheet-close" style="position:absolute;top:12px;right:16px;background:none;border:none;color:#94a3b8;font-size:18px;cursor:pointer;line-height:1;padding:4px;">&times;</button>
<p style="margin:16px 0 0;font-size:11px;color:#94a3b8;text-align:center">Press ? or click outside to close</p>
</div>`;
document.body.appendChild(overlay);
const closeBtn = overlay.querySelector('#cheatsheet-close') as HTMLButtonElement;
if (closeBtn) closeBtn.onclick = () => overlay.remove();
const escHandler = (e: KeyboardEvent) => {
if (e.key === 'Escape') {
overlay.remove();
document.removeEventListener('keydown', escHandler);
}
};
document.addEventListener('keydown', escHandler);
}

View File

@ -1,81 +1,24 @@
import { useCallback, useRef, useEffect } from 'react';
import { useEditorStore } from '../store/editorStore';
export function useVideoSync(videoRef: React.RefObject<HTMLVideoElement | null>) {
export function useVideoSync(videoRef: React.RefObject<HTMLVideoElement | HTMLAudioElement | null>) {
const rafRef = useRef<number>(0);
const {
setCurrentTime,
setDuration,
setIsPlaying,
deletedRanges,
cutRanges,
muteRanges,
speedRanges,
} = useEditorStore();
const getPlaybackRateAtTime = useCallback(
(time: number) => {
for (const range of speedRanges) {
if (time >= range.start && time < range.end) {
return range.speed;
}
}
return 1;
},
[speedRanges],
);
const applyVideoEffects = useCallback(
(video: HTMLVideoElement) => {
let t = video.currentTime;
const allSkipRanges = [...cutRanges];
let skipCount = 0;
const maxSkips = 10;
while (skipCount < maxSkips) {
let shouldSkip = false;
for (const range of allSkipRanges) {
if (t >= range.start && t < range.end) {
t = range.end;
shouldSkip = true;
skipCount++;
break;
}
}
if (!shouldSkip) break;
}
if (skipCount > 0 && video.currentTime !== t) {
video.currentTime = t;
}
let shouldMute = false;
for (const range of muteRanges) {
if (t >= range.start && t < range.end) {
shouldMute = true;
break;
}
}
video.muted = shouldMute;
const playbackRate = getPlaybackRateAtTime(t);
if (video.playbackRate !== playbackRate) {
video.playbackRate = playbackRate;
}
setCurrentTime(t);
return t;
},
[cutRanges, muteRanges, getPlaybackRateAtTime, setCurrentTime],
);
const seekTo = useCallback(
(time: number) => {
if (videoRef.current) {
let targetTime = time;
// If seeking into cut or deleted ranges, skip to the end (handle overlapping/chained ranges)
const allSkipRanges = [...cutRanges];
const allSkipRanges = [...deletedRanges, ...cutRanges];
let skipCount = 0;
const maxSkips = 10; // Prevent infinite loops
@ -93,11 +36,10 @@ export function useVideoSync(videoRef: React.RefObject<HTMLVideoElement | null>)
}
videoRef.current.currentTime = targetTime;
videoRef.current.playbackRate = getPlaybackRateAtTime(targetTime);
setCurrentTime(targetTime);
}
},
[videoRef, cutRanges, getPlaybackRateAtTime, setCurrentTime],
[videoRef, deletedRanges, cutRanges, setCurrentTime],
);
const togglePlay = useCallback(() => {
@ -110,55 +52,70 @@ export function useVideoSync(videoRef: React.RefObject<HTMLVideoElement | null>)
}, [videoRef]);
useEffect(() => {
const video = videoRef.current;
if (!video) return;
const updateWhilePlaying = () => {
applyVideoEffects(video);
if (!video.paused && !video.ended) {
rafRef.current = requestAnimationFrame(updateWhilePlaying);
}
};
const media = videoRef.current;
if (!media) return;
const onTimeUpdate = () => {
cancelAnimationFrame(rafRef.current);
rafRef.current = requestAnimationFrame(() => {
applyVideoEffects(video);
let t = media.currentTime;
// Skip over deleted ranges and cut ranges (handle overlapping/chained ranges)
const allSkipRanges = [...deletedRanges, ...cutRanges];
let skipCount = 0;
const maxSkips = 10; // Prevent infinite loops
while (skipCount < maxSkips) {
let shouldSkip = false;
for (const range of allSkipRanges) {
if (t >= range.start && t < range.end) {
t = range.end;
shouldSkip = true;
skipCount++;
break;
}
}
if (!shouldSkip) break;
}
if (skipCount > 0) {
media.currentTime = t;
return;
}
// Mute/unmute based on mute ranges (only for video elements)
if ('muted' in media) {
let shouldMute = false;
for (const range of muteRanges) {
if (t >= range.start && t < range.end) {
shouldMute = true;
break;
}
}
media.muted = shouldMute;
}
setCurrentTime(t);
});
};
const onPlay = () => {
setIsPlaying(true);
cancelAnimationFrame(rafRef.current);
rafRef.current = requestAnimationFrame(updateWhilePlaying);
};
const onPause = () => {
setIsPlaying(false);
cancelAnimationFrame(rafRef.current);
applyVideoEffects(video);
};
const onLoadedMetadata = () => {
setDuration(video.duration);
applyVideoEffects(video);
};
const onSeeked = () => applyVideoEffects(video);
const onPlay = () => setIsPlaying(true);
const onPause = () => setIsPlaying(false);
const onLoadedMetadata = () => setDuration(media.duration);
video.addEventListener('timeupdate', onTimeUpdate);
video.addEventListener('play', onPlay);
video.addEventListener('pause', onPause);
video.addEventListener('loadedmetadata', onLoadedMetadata);
video.addEventListener('seeked', onSeeked);
media.addEventListener('timeupdate', onTimeUpdate);
media.addEventListener('play', onPlay);
media.addEventListener('pause', onPause);
media.addEventListener('loadedmetadata', onLoadedMetadata);
return () => {
video.removeEventListener('timeupdate', onTimeUpdate);
video.removeEventListener('play', onPlay);
video.removeEventListener('pause', onPause);
video.removeEventListener('loadedmetadata', onLoadedMetadata);
video.removeEventListener('seeked', onSeeked);
media.removeEventListener('timeupdate', onTimeUpdate);
media.removeEventListener('play', onPlay);
media.removeEventListener('pause', onPause);
media.removeEventListener('loadedmetadata', onLoadedMetadata);
cancelAnimationFrame(rafRef.current);
video.playbackRate = 1;
};
}, [videoRef, applyVideoEffects, setIsPlaying, setDuration]);
}, [videoRef, deletedRanges, cutRanges, muteRanges, setCurrentTime, setIsPlaying, setDuration]);
return { seekTo, togglePlay };
}

View File

@ -1,3 +1,11 @@
@import '@fontsource/inter/300.css';
@import '@fontsource/inter/400.css';
@import '@fontsource/inter/500.css';
@import '@fontsource/inter/600.css';
@import '@fontsource/inter/700.css';
@import '@fontsource/jetbrains-mono/400.css';
@import '@fontsource/jetbrains-mono/500.css';
@tailwind base;
@tailwind components;
@tailwind utilities;
@ -8,12 +16,6 @@
100% { transform: scaleY(0.3); opacity: 0.5; }
}
@keyframes audioBounce {
0% { height: 12px; }
50% { height: var(--bar-peak); }
100% { height: 12px; }
}
.wave-bar {
animation: waveBar 0.9s ease-in-out infinite;
transform-origin: bottom;
@ -52,7 +54,3 @@ body {
video::-webkit-media-controls {
display: none !important;
}

View File

@ -1,26 +0,0 @@
import { describe, expect, test } from 'vitest';
import { assert } from './assert';
describe('assert', () => {
test('does not throw for true condition', () => {
expect(() => assert(true, 'should not throw')).not.toThrow();
});
test('throws in dev mode for false condition', () => {
expect(() => assert(false, 'should throw')).toThrow('Assertion failed: should throw');
});
test('includes message in error', () => {
try {
assert(false, 'custom message here');
} catch (e: any) {
expect(e.message).toContain('custom message here');
}
});
test('does not throw for truthy values', () => {
expect(() => assert(1 === 1, 'math works')).not.toThrow();
expect(() => assert('hello' === 'hello', 'strings work')).not.toThrow();
});
});

View File

@ -1,11 +0,0 @@
export function assert(condition: boolean, message: string): asserts condition {
if (!condition) {
const error = new Error(`Assertion failed: ${message}`);
if (import.meta.env.DEV) {
console.error('[Assertion]', message, error.stack);
throw error;
} else {
console.warn('[Assertion] (prod silenced):', message);
}
}
}

View File

@ -1,13 +1,29 @@
/**
* Dev-only console interceptor.
* Forwards console.error / console.warn to the backend /dev/log endpoint,
* which appends them to webview.log so the agent can read it.
* which appends them to a backend-managed dev log file.
*/
if (import.meta.env.DEV) {
const BACKEND = 'http://127.0.0.1:8000';
type ConsoleFn = (...args: unknown[]) => void;
const serialize = (value: unknown): string => {
if (typeof value === 'string') return value;
if (value instanceof Error) {
return JSON.stringify({
name: value.name,
message: value.message,
stack: value.stack,
});
}
try {
return JSON.stringify(value);
} catch {
return String(value);
}
};
const forward = (level: string, orig: ConsoleFn): ConsoleFn =>
(...args: unknown[]) => {
orig(...args);
@ -15,7 +31,7 @@ if (import.meta.env.DEV) {
fetch(`${BACKEND}/dev/log`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ level, message: String(first ?? ''), args: rest.map(String) }),
body: JSON.stringify({ level, message: serialize(first ?? ''), args: rest.map(serialize) }),
}).catch(() => {/* backend not running yet */});
};

View File

@ -1,83 +0,0 @@
/**
* Configurable keyboard shortcuts system.
* Stores bindings in localStorage under 'talkedit:keybindings'.
* Provides default presets and conflict detection.
*/
import type { KeyBinding, HotkeyPreset } from '../types/project';
const STORAGE_KEY = 'talkedit:keybindings';
export const DEFAULT_PRESETS: Record<HotkeyPreset, KeyBinding[]> = {
'left-hand': [
{ id: 'play-pause', label: 'Play / Pause', keys: 'Space', category: 'transport' },
{ id: 'rewind', label: 'Rewind 5s', keys: 'Q', category: 'transport' },
{ id: 'forward', label: 'Forward 5s', keys: 'E', category: 'transport' },
{ id: 'speed-up', label: 'Speed Up', keys: 'W', category: 'transport' },
{ id: 'slow-down', label: 'Slow Down', keys: 'S', category: 'transport' },
{ id: 'pause', label: 'Pause', keys: 'D', category: 'transport' },
{ id: 'mark-in', label: 'Mark In Point', keys: 'A', category: 'edit' },
{ id: 'mark-out', label: 'Mark Out Point', keys: 'F', category: 'edit' },
{ id: 'cut', label: 'Cut Selection', keys: 'X', category: 'edit' },
{ id: 'undo', label: 'Undo', keys: 'Ctrl+Z', category: 'edit' },
{ id: 'redo', label: 'Redo', keys: 'Ctrl+Shift+Z', category: 'edit' },
{ id: 'save', label: 'Save', keys: 'Ctrl+S', category: 'file' },
{ id: 'export', label: 'Export', keys: 'Ctrl+E', category: 'file' },
{ id: 'search', label: 'Find in Transcript', keys: 'Ctrl+F', category: 'edit' },
{ id: 'help', label: 'Shortcut Help', keys: '?', category: 'view' },
],
'standard': [
{ id: 'play-pause', label: 'Play / Pause', keys: 'Space', category: 'transport' },
{ id: 'rewind', label: 'Rewind 5s', keys: 'ArrowLeft', category: 'transport' },
{ id: 'forward', label: 'Forward 5s', keys: 'ArrowRight', category: 'transport' },
{ id: 'speed-up', label: 'Speed Up', keys: 'L', category: 'transport' },
{ id: 'slow-down', label: 'Slow Down', keys: 'J', category: 'transport' },
{ id: 'pause', label: 'Pause', keys: 'K', category: 'transport' },
{ id: 'mark-in', label: 'Mark In Point', keys: 'I', category: 'edit' },
{ id: 'mark-out', label: 'Mark Out Point', keys: 'O', category: 'edit' },
{ id: 'cut', label: 'Cut Selection', keys: 'Delete', category: 'edit' },
{ id: 'undo', label: 'Undo', keys: 'Ctrl+Z', category: 'edit' },
{ id: 'redo', label: 'Redo', keys: 'Ctrl+Shift+Z', category: 'edit' },
{ id: 'save', label: 'Save', keys: 'Ctrl+S', category: 'file' },
{ id: 'export', label: 'Export', keys: 'Ctrl+E', category: 'file' },
{ id: 'search', label: 'Find in Transcript', keys: 'Ctrl+F', category: 'edit' },
{ id: 'help', label: 'Shortcut Help', keys: '?', category: 'view' },
],
};
export function loadBindings(): KeyBinding[] {
try {
const stored = localStorage.getItem(STORAGE_KEY);
if (stored) return JSON.parse(stored);
} catch { /* use defaults */ }
return DEFAULT_PRESETS['standard'];
}
export function saveBindings(bindings: KeyBinding[]) {
localStorage.setItem(STORAGE_KEY, JSON.stringify(bindings));
}
export function applyPreset(preset: HotkeyPreset): KeyBinding[] {
const bindings = DEFAULT_PRESETS[preset];
saveBindings(bindings);
return bindings;
}
export function detectConflicts(bindings: KeyBinding[]): string[] {
const conflicts: string[] = [];
const seen = new Map<string, string>();
for (const b of bindings) {
if (seen.has(b.keys)) {
conflicts.push(`"${b.keys}" is used by both "${seen.get(b.keys)}" and "${b.label}"`);
}
seen.set(b.keys, b.label);
}
return conflicts;
}
export function findBinding(bindings: KeyBinding[], id: string): KeyBinding | undefined {
return bindings.find((b) => b.id === id);
}
export function getBoundKey(bindings: KeyBinding[], id: string): string {
return findBinding(bindings, id)?.keys || '';
}

View File

@ -1,8 +1,8 @@
/**
* tauri-bridge.ts
*
* Polyfills window.electronAPI with Tauri equivalents so all existing
* call-sites in App.tsx, hooks, and stores continue to work unchanged.
* Exposes window.desktopAPI using Tauri equivalents so UI code can stay
* desktop-runtime agnostic.
*
* Imported once at the top of main.tsx.
*/
@ -11,9 +11,6 @@ import { invoke } from '@tauri-apps/api/core';
import { open, save } from '@tauri-apps/plugin-dialog';
import { readTextFile, writeTextFile } from '@tauri-apps/plugin-fs';
const backendPort = import.meta.env.VITE_BACKEND_PORT || '8000';
const backendUrl = `http://127.0.0.1:${backendPort}`;
const VIDEO_FILTERS = [
{ name: 'Audio and Video Files', extensions: ['mp4', 'avi', 'mov', 'mkv', 'webm', 'm4a', 'wav', 'mp3', 'flac'] },
{ name: 'All Files', extensions: ['*'] },
@ -28,48 +25,56 @@ const EXPORT_FILTERS = [
{ name: 'Project Files', extensions: ['aive'] },
];
window.electronAPI = {
const BACKEND_PORT = import.meta.env.VITE_BACKEND_PORT || '8000';
const BACKEND_URL = `http://127.0.0.1:${BACKEND_PORT}`;
const debugBridge = (event: string, details?: Record<string, unknown>) => {
if (!import.meta.env.DEV) return;
console.log('[tauri-bridge]', event, details ?? {});
};
window.desktopAPI = {
openFile: async (_options?: Record<string, unknown>): Promise<string | null> => {
void _options;
debugBridge('openFile:dialogOpen');
const result = await open({
multiple: false,
filters: VIDEO_FILTERS,
});
return typeof result === 'string' ? result : null;
const path = typeof result === 'string' ? result : null;
debugBridge('openFile:dialogResult', { path });
return path;
},
saveFile: async (options?: Record<string, unknown>): Promise<string | null> => {
const result = await save({
defaultPath: typeof options?.defaultPath === 'string' ? options.defaultPath : undefined,
filters: Array.isArray(options?.filters)
? (options.filters as Array<{ name: string; extensions: string[] }>)
: EXPORT_FILTERS,
});
return result ?? null;
saveFile: async (_options?: Record<string, unknown>): Promise<string | null> => {
debugBridge('saveFile:dialogOpen');
const result = await save({ filters: EXPORT_FILTERS });
const path = result ?? null;
debugBridge('saveFile:dialogResult', { path });
return path;
},
openProject: async (): Promise<string | null> => {
const projectDir = await invoke<string>('get_projects_directory');
debugBridge('openProject:dialogOpen');
const result = await open({
multiple: false,
defaultPath: projectDir,
filters: PROJECT_FILTERS,
});
return typeof result === 'string' ? result : null;
const path = typeof result === 'string' ? result : null;
debugBridge('openProject:dialogResult', { path });
return path;
},
saveProject: async (): Promise<string | null> => {
const projectDir = await invoke<string>('get_projects_directory');
const result = await save({
defaultPath: `${projectDir}/project.aive`,
filters: PROJECT_FILTERS,
});
return result ?? null;
debugBridge('saveProject:dialogOpen');
const result = await save({ filters: PROJECT_FILTERS });
const path = result ?? null;
debugBridge('saveProject:dialogResult', { path });
return path;
},
getBackendUrl: (): Promise<string> => {
// Avoid invoke() here because Linux/WebKit2GTK can log noisy ipc:// CSP warnings.
return Promise.resolve(backendUrl);
// Use env-driven backend URL and avoid invoke() to bypass ipc:// noise on Linux/WebKit2GTK.
return Promise.resolve(BACKEND_URL);
},
encryptString: (data: string): Promise<string> => {
@ -89,55 +94,13 @@ window.electronAPI = {
},
readFile: (path: string): Promise<string> => {
debugBridge('readFile', { path });
return readTextFile(path);
},
writeFile: async (path: string, content: string): Promise<boolean> => {
debugBridge('writeFile', { path, size: content.length });
await writeTextFile(path, content);
return true;
},
activateLicense: (key: string): Promise<any> => {
return invoke('activate_license', { licenseKey: key });
},
verifyLicense: (key: string): Promise<any> => {
return invoke('verify_license', { licenseKey: key });
},
getAppStatus: (): Promise<any> => {
return invoke('get_app_status');
},
deactivateLicense: (): Promise<void> => {
return invoke('deactivate_license');
},
hasLicenseFeature: (feature: string): Promise<boolean> => {
return invoke('has_license_feature', { feature });
},
listModels: (): Promise<ModelInfo[]> => {
return invoke('list_models');
},
deleteModel: (path: string): Promise<void> => {
return invoke('delete_model', { path });
},
logError: (message: string, stack: string, componentStack: string): Promise<void> => {
return invoke('log_error', { message, stack, componentStack });
},
writeAutosave: (data: string): Promise<void> => {
return invoke('write_autosave', { data });
},
readAutosave: (): Promise<string | null> => {
return invoke('read_autosave');
},
deleteAutosave: (): Promise<void> => {
return invoke('delete_autosave');
},
};

View File

@ -1,81 +0,0 @@
/**
* Frontend-side video thumbnail extraction.
* Captures frames from the <video> element using canvas.
*/
const THUMBNAIL_CACHE = new Map<string, string>();
export function extractThumbnail(video: HTMLVideoElement, time: number, width = 160, height = 90): string | null {
const cacheKey = `${video.src}_${time.toFixed(3)}_${width}x${height}`;
const cached = THUMBNAIL_CACHE.get(cacheKey);
if (cached) return cached;
// Seek to the time, wait for seeked, then capture
// Since this is synchronous, we use the current ready frame
const canvas = document.createElement('canvas');
canvas.width = width;
canvas.height = height;
const ctx = canvas.getContext('2d');
if (!ctx) return null;
// Try to draw the current frame at the requested time
const originalTime = video.currentTime;
video.currentTime = time;
// We can't synchronously wait for seek, so catch the 'seeked' event externally
// For now, draw whatever video frame is available
ctx.drawImage(video, 0, 0, width, height);
// Return to original time (best-effort)
video.currentTime = originalTime;
const dataUrl = canvas.toDataURL('image/jpeg', 0.6);
THUMBNAIL_CACHE.set(cacheKey, dataUrl);
return dataUrl;
}
export async function extractThumbnails(
video: HTMLVideoElement,
times: number[],
width = 160,
height = 90,
): Promise<Map<number, string>> {
const results = new Map<number, string>();
const originalTime = video.currentTime;
for (const time of times) {
const cacheKey = `${video.src}_${time.toFixed(3)}_${width}x${height}`;
const cached = THUMBNAIL_CACHE.get(cacheKey);
if (cached) {
results.set(time, cached);
continue;
}
// Seek and wait for the frame to be available
video.currentTime = time;
await new Promise<void>((resolve) => {
const handler = () => {
video.removeEventListener('seeked', handler);
resolve();
};
video.addEventListener('seeked', handler);
// Fallback: resolve after a short timeout
setTimeout(resolve, 500);
});
const canvas = document.createElement('canvas');
canvas.width = width;
canvas.height = height;
const ctx = canvas.getContext('2d');
if (ctx) {
ctx.drawImage(video, 0, 0, width, height);
const dataUrl = canvas.toDataURL('image/jpeg', 0.5);
THUMBNAIL_CACHE.set(cacheKey, dataUrl);
results.set(time, dataUrl);
}
}
// Restore original position
video.currentTime = originalTime;
return results;
}

View File

@ -1,34 +1,14 @@
import React from 'react';
import ReactDOM from 'react-dom/client';
// Forward console.error/warn/log to backend in dev mode so we can tail webview.log
// Forward console.error/warn/log to backend in dev mode so we can tail the backend dev log.
import './lib/dev-logger';
// Tauri bridge polyfill: must be imported before App so window.electronAPI is available to all components
// Must be imported before App so window.desktopAPI is patched before any component runs.
import './lib/tauri-bridge';
import App from './App';
import ErrorBoundary from './components/ErrorBoundary';
import './index.css';
window.addEventListener('error', (e) => {
if (e.error) {
try {
console.error('[GlobalError]', e.error.message, e.error.stack);
window.electronAPI?.logError?.(e.error.message, e.error.stack || '', '');
} catch {}
}
});
window.addEventListener('unhandledrejection', (e) => {
const reason = e.reason instanceof Error ? e.reason : new Error(String(e.reason));
try {
console.error('[UnhandledRejection]', reason.message, reason.stack);
window.electronAPI?.logError?.(reason.message, reason.stack || '', '');
} catch {}
});
ReactDOM.createRoot(document.getElementById('root')!).render(
<React.StrictMode>
<ErrorBoundary>
<App />
</ErrorBoundary>
<App />
</React.StrictMode>,
);

View File

@ -1,127 +0,0 @@
import { beforeEach, describe, expect, test, vi } from 'vitest';
import { useAIStore } from './aiStore';
function mockElectronAPI() {
(window as any).electronAPI = {
encryptString: vi.fn().mockResolvedValue('encrypted-value'),
decryptString: vi.fn().mockResolvedValue('decrypted-key'),
};
}
describe('aiStore', () => {
beforeEach(() => {
mockElectronAPI();
useAIStore.setState({
providers: {
ollama: { provider: 'ollama', baseUrl: 'http://localhost:11434', model: 'llama3' },
openai: { provider: 'openai', apiKey: '', model: 'gpt-4o' },
claude: { provider: 'claude', apiKey: '', model: 'claude-sonnet-4-20250514' },
},
defaultProvider: 'ollama',
customFillerWords: '',
fillerResult: null,
clipSuggestions: [],
isProcessing: false,
processingMessage: '',
_keysHydrated: false,
});
});
describe('setProviderConfig', () => {
test('updates Ollama base URL', () => {
useAIStore.getState().setProviderConfig('ollama', { baseUrl: 'http://custom:11434' });
expect(useAIStore.getState().providers.ollama.baseUrl).toBe('http://custom:11434');
});
test('updates Ollama model', () => {
useAIStore.getState().setProviderConfig('ollama', { model: 'llama3.2' });
expect(useAIStore.getState().providers.ollama.model).toBe('llama3.2');
});
test('updates OpenAI apiKey and encrypts', async () => {
useAIStore.getState().setProviderConfig('openai', { apiKey: 'sk-test123' });
expect(useAIStore.getState().providers.openai.apiKey).toBe('sk-test123');
expect((window as any).electronAPI.encryptString).toHaveBeenCalledWith('sk-test123');
});
test('updates Claude model', () => {
useAIStore.getState().setProviderConfig('claude', { model: 'claude-opus-4-20250514' });
expect(useAIStore.getState().providers.claude.model).toBe('claude-opus-4-20250514');
});
test('preserves existing config when updating partial fields', () => {
useAIStore.getState().setProviderConfig('openai', { apiKey: 'sk-new', model: 'gpt-4o-mini' });
expect(useAIStore.getState().providers.openai.apiKey).toBe('sk-new');
expect(useAIStore.getState().providers.openai.model).toBe('gpt-4o-mini');
});
});
describe('setDefaultProvider', () => {
test('changes default provider', () => {
useAIStore.getState().setDefaultProvider('openai');
expect(useAIStore.getState().defaultProvider).toBe('openai');
});
test('can switch to claude', () => {
useAIStore.getState().setDefaultProvider('claude');
expect(useAIStore.getState().defaultProvider).toBe('claude');
});
});
describe('setCustomFillerWords', () => {
test('sets custom filler words', () => {
useAIStore.getState().setCustomFillerWords('okay, alright, anyway');
expect(useAIStore.getState().customFillerWords).toBe('okay, alright, anyway');
});
test('clears custom filler words', () => {
useAIStore.getState().setCustomFillerWords('test');
useAIStore.getState().setCustomFillerWords('');
expect(useAIStore.getState().customFillerWords).toBe('');
});
});
describe('setFillerResult', () => {
test('sets filler result', () => {
const result = { fillers: [{ word: 'um', start: 1.0, end: 1.3 }], totalCount: 1 };
useAIStore.getState().setFillerResult(result as any);
expect(useAIStore.getState().fillerResult).toEqual(result);
});
test('clears filler result', () => {
useAIStore.getState().setFillerResult({ fillers: [], totalCount: 0 } as any);
useAIStore.getState().setFillerResult(null);
expect(useAIStore.getState().fillerResult).toBeNull();
});
});
describe('setProcessing', () => {
test('sets processing true with message', () => {
useAIStore.getState().setProcessing(true, 'Analyzing transcript...');
expect(useAIStore.getState().isProcessing).toBe(true);
expect(useAIStore.getState().processingMessage).toBe('Analyzing transcript...');
});
test('sets processing false', () => {
useAIStore.getState().setProcessing(true, 'Working...');
useAIStore.getState().setProcessing(false);
expect(useAIStore.getState().isProcessing).toBe(false);
});
});
describe('setClipSuggestions', () => {
test('sets clip suggestions', () => {
const clips = [{ title: 'Best moment', start: 10, end: 40, reason: 'Engaging' }];
useAIStore.getState().setClipSuggestions(clips as any);
expect(useAIStore.getState().clipSuggestions).toEqual(clips);
});
test('clears clip suggestions', () => {
useAIStore.getState().setClipSuggestions([{ title: 'x', start: 0, end: 10, reason: 'y' }] as any);
useAIStore.getState().setClipSuggestions([]);
expect(useAIStore.getState().clipSuggestions).toEqual([]);
});
});
});

View File

@ -30,15 +30,26 @@ async function encryptAndStore(key: string, value: string): Promise<void> {
localStorage.removeItem(ENCRYPTED_KEY_PREFIX + key);
return;
}
const encrypted = await window.electronAPI.encryptString(value);
localStorage.setItem(ENCRYPTED_KEY_PREFIX + key, encrypted);
if (window.desktopAPI) {
const encrypted = await window.desktopAPI.encryptString(value);
localStorage.setItem(ENCRYPTED_KEY_PREFIX + key, encrypted);
} else {
localStorage.setItem(ENCRYPTED_KEY_PREFIX + key, btoa(value));
}
}
async function loadAndDecrypt(key: string): Promise<string> {
const stored = localStorage.getItem(ENCRYPTED_KEY_PREFIX + key);
if (!stored) return '';
if (window.desktopAPI) {
try {
return await window.desktopAPI.decryptString(stored);
} catch {
return '';
}
}
try {
return await window.electronAPI.decryptString(stored);
return atob(stored);
} catch {
return '';
}

View File

@ -1,407 +0,0 @@
import { beforeEach, describe, expect, test } from 'vitest';
import { useEditorStore } from './editorStore';
function seedWords(count: number) {
const words: { word: string; start: number; end: number; confidence: number }[] = [];
for (let i = 0; i < count; i++) {
words.push({ word: `word${i}`, start: i * 0.5, end: i * 0.5 + 0.4, confidence: 0.95 });
}
const segments = [{
id: 0, start: 0, end: count * 0.5,
text: words.map(w => w.word).join(' '),
words,
globalStartIndex: 0,
}];
useEditorStore.getState().setTranscription({ words, segments, language: 'en' });
}
describe('editorStore', () => {
beforeEach(() => {
useEditorStore.getState().reset();
});
describe('global gain', () => {
test('clamps to upper bound', () => {
useEditorStore.getState().setGlobalGainDb(100);
expect(useEditorStore.getState().globalGainDb).toBe(24);
});
test('clamps to lower bound', () => {
useEditorStore.getState().setGlobalGainDb(-100);
expect(useEditorStore.getState().globalGainDb).toBe(-24);
});
test('rejects NaN by falling back to 0', () => {
useEditorStore.getState().setGlobalGainDb(NaN);
expect(useEditorStore.getState().globalGainDb).toBe(0);
});
test('rejects Infinity', () => {
useEditorStore.getState().setGlobalGainDb(Infinity);
expect(useEditorStore.getState().globalGainDb).toBe(0);
});
test('accepts value in range', () => {
useEditorStore.getState().setGlobalGainDb(6);
expect(useEditorStore.getState().globalGainDb).toBe(6);
});
});
describe('zone ranges', () => {
beforeEach(() => {
useEditorStore.getState().setDuration(100);
});
test('addCutRange creates a zone with correct times', () => {
useEditorStore.getState().addCutRange(1, 5);
const ranges = useEditorStore.getState().cutRanges;
expect(ranges.length).toBe(1);
expect(ranges[0].start).toBe(1);
expect(ranges[0].end).toBe(5);
});
test('addCutRange generates unique ids', () => {
useEditorStore.getState().addCutRange(1, 2);
useEditorStore.getState().addCutRange(3, 4);
const ranges = useEditorStore.getState().cutRanges;
expect(ranges[0].id).not.toBe(ranges[1].id);
});
test('addCutRange rejects start >= end', () => {
useEditorStore.getState().addCutRange(5, 5);
expect(useEditorStore.getState().cutRanges.length).toBe(0);
});
test('addCutRange rejects start > end', () => {
useEditorStore.getState().addCutRange(5, 1);
expect(useEditorStore.getState().cutRanges.length).toBe(0);
});
test('addCutRange rejects duration < 0.01s', () => {
useEditorStore.getState().addCutRange(0, 0.005);
expect(useEditorStore.getState().cutRanges.length).toBe(0);
});
test('addCutRange rejects negative start', () => {
useEditorStore.getState().addCutRange(-1, 5);
expect(useEditorStore.getState().cutRanges.length).toBe(0);
});
test('addCutRange rejects NaN values', () => {
useEditorStore.getState().addCutRange(NaN, 5);
expect(useEditorStore.getState().cutRanges.length).toBe(0);
});
test('addMuteRange creates a zone', () => {
useEditorStore.getState().addMuteRange(2, 6);
const ranges = useEditorStore.getState().muteRanges;
expect(ranges.length).toBe(1);
expect(ranges[0].start).toBe(2);
expect(ranges[0].end).toBe(6);
});
test('addGainRange creates a zone with gain value', () => {
useEditorStore.getState().addGainRange(1, 4, 3.5);
const ranges = useEditorStore.getState().gainRanges;
expect(ranges.length).toBe(1);
expect(ranges[0].gainDb).toBe(3.5);
});
test('addSpeedRange creates a zone with speed value', () => {
useEditorStore.getState().addSpeedRange(0, 10, 1.5);
const ranges = useEditorStore.getState().speedRanges;
expect(ranges.length).toBe(1);
expect(ranges[0].speed).toBe(1.5);
});
test('removeCutRange removes by id', () => {
useEditorStore.getState().addCutRange(1, 2);
const id = useEditorStore.getState().cutRanges[0].id;
useEditorStore.getState().removeCutRange(id);
expect(useEditorStore.getState().cutRanges.length).toBe(0);
});
test('removeCutRange does nothing for missing id', () => {
useEditorStore.getState().addCutRange(1, 2);
useEditorStore.getState().removeCutRange('nonexistent');
expect(useEditorStore.getState().cutRanges.length).toBe(1);
});
test('updateCutRange updates bounds', () => {
useEditorStore.getState().addCutRange(1, 5);
const id = useEditorStore.getState().cutRanges[0].id;
useEditorStore.getState().updateCutRange(id, 2, 8);
const range = useEditorStore.getState().cutRanges[0];
expect(range.start).toBe(2);
expect(range.end).toBe(8);
});
test('removeMuteRange, removeGainRange, removeSpeedRange work', () => {
useEditorStore.getState().addMuteRange(1, 2);
useEditorStore.getState().addGainRange(2, 4, 3);
useEditorStore.getState().addSpeedRange(3, 6, 1.2);
useEditorStore.getState().removeMuteRange(useEditorStore.getState().muteRanges[0].id);
useEditorStore.getState().removeGainRange(useEditorStore.getState().gainRanges[0].id);
useEditorStore.getState().removeSpeedRange(useEditorStore.getState().speedRanges[0].id);
expect(useEditorStore.getState().muteRanges.length).toBe(0);
expect(useEditorStore.getState().gainRanges.length).toBe(0);
expect(useEditorStore.getState().speedRanges.length).toBe(0);
});
test('rejects zones beyond duration', () => {
useEditorStore.getState().setDuration(10);
useEditorStore.getState().addCutRange(5, 20);
expect(useEditorStore.getState().cutRanges.length).toBe(0);
});
test('rejects zone with end beyond duration', () => {
useEditorStore.getState().setDuration(5);
useEditorStore.getState().addCutRange(1, 10);
expect(useEditorStore.getState().cutRanges.length).toBe(0);
});
});
describe('word selection', () => {
beforeEach(() => { seedWords(10); });
test('setSelectedWordIndices updates selection', () => {
useEditorStore.getState().setSelectedWordIndices([0, 1, 2]);
expect(useEditorStore.getState().selectedWordIndices).toEqual([0, 1, 2]);
});
test('setSelectedWordIndices handles empty', () => {
useEditorStore.getState().setSelectedWordIndices([0]);
useEditorStore.getState().setSelectedWordIndices([]);
expect(useEditorStore.getState().selectedWordIndices).toEqual([]);
});
test('updateWordText updates the word at index', () => {
useEditorStore.getState().updateWordText(0, 'hello');
expect(useEditorStore.getState().words[0].word).toBe('hello');
});
test('updateWordText preserves timing', () => {
const origStart = useEditorStore.getState().words[3].start;
useEditorStore.getState().updateWordText(3, 'changed');
expect(useEditorStore.getState().words[3].start).toBe(origStart);
});
test('updateWordText rejects out-of-bounds index', () => {
useEditorStore.getState().updateWordText(999, 'oops');
expect(useEditorStore.getState().words.length).toBe(10);
});
test('updateWordText rejects empty string', () => {
useEditorStore.getState().updateWordText(0, '');
expect(useEditorStore.getState().words[0].word).toBe('word0');
});
test('replaceWordRange replaces words in middle', () => {
const newWords = [
{ word: 'new1', start: 1.5, end: 1.9, confidence: 0.99 },
{ word: 'new2', start: 2.0, end: 2.4, confidence: 0.99 },
];
useEditorStore.getState().replaceWordRange(3, 5, newWords);
const words = useEditorStore.getState().words;
expect(words.length).toBe(10 - (5 - 3 + 1) + 2);
expect(words[3].word).toBe('new1');
expect(words[4].word).toBe('new2');
});
test('getWordAtTime returns correct index', () => {
const idx = useEditorStore.getState().getWordAtTime(1.0);
expect(idx).toBe(2);
});
test('getWordAtTime returns 0 for time before first word', () => {
const idx = useEditorStore.getState().getWordAtTime(-1);
expect(idx).toBe(0);
});
test('getWordAtTime returns -1 for no words', () => {
useEditorStore.getState().reset();
expect(useEditorStore.getState().getWordAtTime(0)).toBe(-1);
});
});
describe('markers', () => {
beforeEach(() => {
useEditorStore.getState().setDuration(120);
});
test('setMarkInTime sets and clears', () => {
useEditorStore.getState().setMarkInTime(10);
expect(useEditorStore.getState().markInTime).toBe(10);
useEditorStore.getState().setMarkInTime(null);
expect(useEditorStore.getState().markInTime).toBeNull();
});
test('setMarkInTime rejects NaN', () => {
useEditorStore.getState().setMarkInTime(NaN);
expect(useEditorStore.getState().markInTime).toBeNull();
});
test('clearMarkRange clears both', () => {
useEditorStore.getState().setMarkInTime(5);
useEditorStore.getState().setMarkOutTime(10);
useEditorStore.getState().clearMarkRange();
expect(useEditorStore.getState().markInTime).toBeNull();
expect(useEditorStore.getState().markOutTime).toBeNull();
});
test('addTimelineMarker adds with correct data', () => {
useEditorStore.getState().addTimelineMarker(5, 'Intro', '#ef4444');
const markers = useEditorStore.getState().timelineMarkers;
expect(markers.length).toBe(1);
expect(markers[0].time).toBe(5);
expect(markers[0].label).toBe('Intro');
expect(markers[0].color).toBe('#ef4444');
});
test('addTimelineMarker defaults empty label to Marker', () => {
useEditorStore.getState().addTimelineMarker(10, '', '#6366f1');
expect(useEditorStore.getState().timelineMarkers[0].label).toBe('Marker');
});
test('addTimelineMarker rejects NaN time', () => {
useEditorStore.getState().addTimelineMarker(NaN, 'test', '#6366f1');
expect(useEditorStore.getState().timelineMarkers.length).toBe(0);
});
test('removeTimelineMarker removes by id', () => {
useEditorStore.getState().addTimelineMarker(5, 'Intro', '#ef4444');
const id = useEditorStore.getState().timelineMarkers[0].id;
useEditorStore.getState().removeTimelineMarker(id);
expect(useEditorStore.getState().timelineMarkers.length).toBe(0);
});
test('updateTimelineMarker updates label and color', () => {
useEditorStore.getState().addTimelineMarker(5, 'Intro', '#ef4444');
const id = useEditorStore.getState().timelineMarkers[0].id;
useEditorStore.getState().updateTimelineMarker(id, { label: 'Chapter 1', color: '#22c55e' });
const m = useEditorStore.getState().timelineMarkers[0];
expect(m.label).toBe('Chapter 1');
expect(m.color).toBe('#22c55e');
});
});
describe('transcription', () => {
test('setTranscription sets words and segments', () => {
seedWords(5);
expect(useEditorStore.getState().words.length).toBe(5);
expect(useEditorStore.getState().segments.length).toBe(1);
});
test('setTranscription clears segments when words are empty', () => {
useEditorStore.getState().setTranscription({ words: [], segments: [], language: 'en' });
expect(useEditorStore.getState().segments.length).toBe(0);
});
test('setTranscriptionModel ignores null', () => {
useEditorStore.getState().setTranscriptionModel('base');
useEditorStore.getState().setTranscriptionModel(null);
expect(useEditorStore.getState().transcriptionModel).toBe('base');
});
test('setTranscriptionModel ignores empty string', () => {
useEditorStore.getState().setTranscriptionModel('base');
useEditorStore.getState().setTranscriptionModel('');
expect(useEditorStore.getState().transcriptionModel).toBe('base');
});
test('setTranscribing toggles state and status', () => {
useEditorStore.getState().setTranscribing(true, 50, 'Loading...');
expect(useEditorStore.getState().isTranscribing).toBe(true);
expect(useEditorStore.getState().transcriptionProgress).toBe(50);
expect(useEditorStore.getState().transcriptionStatus).toBe('Loading...');
});
});
describe('project file', () => {
test('saveProject includes all zone types', () => {
useEditorStore.getState().loadVideo('test.mp4');
useEditorStore.getState().setDuration(100);
useEditorStore.getState().addCutRange(1, 2);
useEditorStore.getState().addMuteRange(2, 3);
useEditorStore.getState().addGainRange(3, 4, 3);
useEditorStore.getState().addSpeedRange(4, 5, 1.5);
const project = useEditorStore.getState().saveProject();
expect(project.cutRanges).toBeDefined();
expect(project.cutRanges!.length).toBe(1);
expect(project.muteRanges).toBeDefined();
expect(project.muteRanges!.length).toBe(1);
expect(project.gainRanges).toBeDefined();
expect(project.gainRanges!.length).toBe(1);
expect(project.speedRanges).toBeDefined();
expect(project.speedRanges!.length).toBe(1);
});
test('setProjectFilePath sets and reads back', () => {
useEditorStore.getState().setProjectFilePath('/path/to/project.aive');
expect(useEditorStore.getState().projectFilePath).toBe('/path/to/project.aive');
});
});
describe('duration and current time', () => {
test('setDuration sets duration value', () => {
useEditorStore.getState().setDuration(120);
expect(useEditorStore.getState().duration).toBe(120);
});
test('setCurrentTime sets time without clamping', () => {
useEditorStore.getState().setDuration(60);
useEditorStore.getState().setCurrentTime(120);
expect(useEditorStore.getState().currentTime).toBe(120);
});
test('setCurrentTime accepts negative values', () => {
useEditorStore.getState().setCurrentTime(-10);
expect(useEditorStore.getState().currentTime).toBe(-10);
});
test('setIsPlaying toggles', () => {
useEditorStore.getState().setIsPlaying(true);
expect(useEditorStore.getState().isPlaying).toBe(true);
useEditorStore.getState().setIsPlaying(false);
expect(useEditorStore.getState().isPlaying).toBe(false);
});
});
describe('loadVideo', () => {
test('loadVideo rejects empty path', () => {
useEditorStore.getState().loadVideo('');
expect(useEditorStore.getState().videoUrl).toBeNull();
});
test('loadVideo resets state', () => {
seedWords(5);
useEditorStore.getState().addCutRange(1, 2);
useEditorStore.getState().loadVideo('new-video.mp4');
expect(useEditorStore.getState().words.length).toBe(0);
expect(useEditorStore.getState().cutRanges.length).toBe(0);
});
});
describe('zone preview padding', () => {
test('sets padding value', () => {
useEditorStore.getState().setZonePreviewPaddingSeconds(3);
expect(useEditorStore.getState().zonePreviewPaddingSeconds).toBe(3);
});
test('rejects NaN', () => {
useEditorStore.getState().setZonePreviewPaddingSeconds(2);
useEditorStore.getState().setZonePreviewPaddingSeconds(NaN);
expect(useEditorStore.getState().zonePreviewPaddingSeconds).toBe(2);
});
test('clamps to upper bound', () => {
useEditorStore.getState().setZonePreviewPaddingSeconds(20);
expect(useEditorStore.getState().zonePreviewPaddingSeconds).toBe(10);
});
});
});

View File

@ -1,47 +1,22 @@
import { create } from 'zustand';
import { persist } from 'zustand/middleware';
import { temporal } from 'zundo';
import { assert } from '../lib/assert';
import type {
Word,
Segment,
CutRange,
MuteRange,
GainRange,
SpeedRange,
TranscriptionResult,
ProjectFile,
SilenceDetectionRange,
SilenceTrimSettings,
SilenceTrimGroup,
TimelineMarker,
Chapter,
ZoomConfig,
ClipInfo,
BackgroundMusicConfig,
} from '../types/project';
import type { Word, Segment, DeletedRange, CutRange, MuteRange, TranscriptionResult, ProjectFile } from '../types/project';
interface EditorState {
projectFilePath: string | null;
videoPath: string | null;
videoUrl: string | null;
exportedAudioPath: string | null; // path to modified audio from a previous export
words: Word[];
segments: Segment[];
deletedRanges: DeletedRange[];
cutRanges: CutRange[];
muteRanges: MuteRange[];
gainRanges: GainRange[];
speedRanges: SpeedRange[];
globalGainDb: number;
silenceTrimGroups: SilenceTrimGroup[];
timelineMarkers: TimelineMarker[];
transcriptionModel: string | null;
language: string;
currentTime: number;
duration: number;
isPlaying: boolean;
markInTime: number | null;
markOutTime: number | null;
selectedWordIndices: number[];
hoveredWordIndex: number | null;
@ -53,104 +28,51 @@ interface EditorState {
exportProgress: number;
backendUrl: string;
zonePreviewPaddingSeconds: number;
zoomConfig: ZoomConfig;
additionalClips: ClipInfo[];
backgroundMusic: BackgroundMusicConfig | null;
}
interface EditorActions {
setBackendUrl: (url: string) => void;
setProjectFilePath: (path: string | null) => void;
loadVideo: (path: string) => void;
setExportedAudioPath: (path: string | null) => void;
setTranscriptionModel: (model: string | null) => void;
saveProject: () => ProjectFile;
setTranscription: (result: TranscriptionResult) => void;
setCurrentTime: (time: number) => void;
setDuration: (duration: number) => void;
setIsPlaying: (playing: boolean) => void;
setMarkInTime: (time: number | null) => void;
setMarkOutTime: (time: number | null) => void;
clearMarkRange: () => void;
setSelectedWordIndices: (indices: number[]) => void;
setHoveredWordIndex: (index: number | null) => void;
deleteSelectedWords: () => void;
deleteWordRange: (startIndex: number, endIndex: number) => void;
updateWordText: (index: number, text: string) => void;
addCutRange: (start: number, end: number, trimGroupId?: string) => void;
restoreRange: (rangeId: string) => void;
addCutRange: (start: number, end: number) => void;
addMuteRange: (start: number, end: number) => void;
addGainRange: (start: number, end: number, gainDb: number) => void;
addSpeedRange: (start: number, end: number, speed: number) => void;
updateCutRange: (id: string, start: number, end: number) => void;
updateMuteRange: (id: string, start: number, end: number) => void;
updateGainRangeBounds: (id: string, start: number, end: number) => void;
updateGainRange: (id: string, gainDb: number) => void;
updateSpeedRangeBounds: (id: string, start: number, end: number) => void;
updateSpeedRange: (id: string, speed: number) => void;
removeCutRange: (id: string) => void;
removeMuteRange: (id: string) => void;
removeGainRange: (id: string) => void;
removeSpeedRange: (id: string) => void;
setGlobalGainDb: (gainDb: number) => void;
applySilenceTrimGroup: (args: {
groupId?: string;
sourceRanges: SilenceDetectionRange[];
settings: SilenceTrimSettings;
}) => { groupId: string; appliedCount: number };
removeSilenceTrimGroup: (groupId: string) => void;
addTimelineMarker: (time: number, label?: string, color?: string) => void;
updateTimelineMarker: (id: string, updates: Partial<TimelineMarker>) => void;
removeTimelineMarker: (id: string) => void;
getChapters: () => Chapter[];
setTranscribing: (active: boolean, progress?: number, status?: string) => void;
setExporting: (active: boolean, progress?: number) => void;
setZonePreviewPaddingSeconds: (seconds: number) => void;
replaceWordRange: (startIndex: number, endIndex: number, newWords: Word[]) => void;
getKeepSegments: () => Array<{ start: number; end: number }>;
getWordAtTime: (time: number) => number;
loadProject: (projectData: any) => number;
loadProject: (projectData: any) => void;
reset: () => void;
setZoomConfig: (config: Partial<ZoomConfig>) => void;
addAdditionalClip: (path: string, label?: string) => void;
removeAdditionalClip: (id: string) => void;
reorderAdditionalClip: (id: string, direction: -1 | 1) => void;
setBackgroundMusic: (config: BackgroundMusicConfig | null) => void;
updateBackgroundMusic: (updates: Partial<BackgroundMusicConfig>) => void;
}
const ZONE_PREVIEW_PADDING_KEY = 'talkedit-zone-preview-padding-seconds';
function getStoredZonePreviewPaddingSeconds() {
if (typeof window === 'undefined') return 1;
const stored = window.localStorage.getItem(ZONE_PREVIEW_PADDING_KEY);
const parsed = stored ? Number(stored) : 1;
if (!Number.isFinite(parsed)) return 1;
return Math.max(0, Math.min(10, parsed));
pauseUndo: () => void;
resumeUndo: () => void;
}
const initialState: EditorState = {
projectFilePath: null,
videoPath: null,
videoUrl: null,
exportedAudioPath: null,
words: [],
segments: [],
deletedRanges: [],
cutRanges: [],
muteRanges: [],
gainRanges: [],
speedRanges: [],
globalGainDb: 0,
silenceTrimGroups: [],
timelineMarkers: [],
transcriptionModel: null,
language: '',
currentTime: 0,
duration: 0,
isPlaying: false,
markInTime: null,
markOutTime: null,
selectedWordIndices: [],
hoveredWordIndex: null,
isTranscribing: false,
@ -159,118 +81,90 @@ const initialState: EditorState = {
isExporting: false,
exportProgress: 0,
backendUrl: 'http://127.0.0.1:8000',
zonePreviewPaddingSeconds: getStoredZonePreviewPaddingSeconds(),
zoomConfig: { enabled: false, zoomFactor: 1, panX: 0, panY: 0 },
additionalClips: [],
backgroundMusic: null,
};
let nextRangeId = 1;
let nextTrimGroupId = 1;
function buildTrimCutRanges(
sourceRanges: SilenceDetectionRange[],
settings: SilenceTrimSettings,
maxEnd: number,
trimGroupId: string,
): CutRange[] {
const preBufferSeconds = settings.preBufferMs / 1000;
const postBufferSeconds = settings.postBufferMs / 1000;
const capEnd = maxEnd > 0 ? maxEnd : Number.POSITIVE_INFINITY;
const built: CutRange[] = [];
for (const range of sourceRanges) {
const start = Math.max(0, range.start + preBufferSeconds);
const end = Math.min(capEnd, range.end - postBufferSeconds);
if (end - start < 0.01) continue;
built.push({
id: `cut_${nextRangeId++}`,
start,
end,
trimGroupId,
});
}
return built;
}
const debugEditorStore = (event: string, details?: Record<string, unknown>) => {
if (!import.meta.env.DEV) return;
console.log('[editor-store]', event, details ?? {});
};
export const useEditorStore = create<EditorState & EditorActions>()(
temporal(
(set, get) => ({
persist(
temporal(
(set, get) => ({
...initialState,
setBackendUrl: (url) => set({ backendUrl: url }),
setProjectFilePath: (path) => set({ projectFilePath: path }),
setBackendUrl: (url) => {
debugEditorStore('setBackendUrl', { url });
set({ backendUrl: url });
},
setExportedAudioPath: (path) => set({ exportedAudioPath: path }),
setTranscriptionModel: (model) => {
if (model === null || model === '') return;
set({ transcriptionModel: model });
},
saveProject: (): ProjectFile => {
const { videoPath, words, segments, cutRanges, muteRanges, gainRanges, speedRanges, globalGainDb, silenceTrimGroups, timelineMarkers, transcriptionModel, language, exportedAudioPath, zoomConfig, additionalClips, backgroundMusic } = get();
const { videoPath, words, segments, deletedRanges, cutRanges, muteRanges, language, exportedAudioPath } = get();
if (!videoPath) throw new Error('No video loaded');
const now = new Date().toISOString();
// Strip globalStartIndex (runtime-only field) before persisting.
const persistSegments = segments.map((seg) => {
const rest = { ...seg };
delete (rest as Partial<Segment>).globalStartIndex;
return rest;
});
// Strip globalStartIndex (runtime-only field) before persisting
const persistSegments = segments.map(({ globalStartIndex: _drop, ...rest }) => rest);
return {
version: 1,
videoPath,
exportedAudioPath: exportedAudioPath ?? undefined,
transcriptionModel: transcriptionModel ?? undefined,
words,
segments: persistSegments as unknown as Segment[],
deletedRanges,
cutRanges,
muteRanges,
gainRanges,
speedRanges,
globalGainDb,
silenceTrimGroups,
timelineMarkers,
language,
createdAt: now,
createdAt: now, // will be overwritten if we track original creation time later
modifiedAt: now,
zoomConfig,
additionalClips,
backgroundMusic: backgroundMusic ?? undefined,
};
},
loadVideo: (path) => {
if (!path) return;
const { backendUrl, zonePreviewPaddingSeconds } = get();
const url = `${backendUrl}/file?path=${encodeURIComponent(path)}`;
const backend = get().backendUrl;
const buildMediaUrl = (filePath: string) => {
const isWav = filePath.toLowerCase().endsWith('.wav');
return isWav
? `${backend}/file?path=${encodeURIComponent(filePath)}&format=mp3`
: `${backend}/file?path=${encodeURIComponent(filePath)}`;
};
const url = buildMediaUrl(path);
debugEditorStore('loadVideo:start', {
path,
backend,
previousVideoPath: get().videoPath,
});
set({
...initialState,
backendUrl,
zonePreviewPaddingSeconds,
projectFilePath: null,
backendUrl: backend,
videoPath: path,
videoUrl: url,
});
debugEditorStore('loadVideo:done', { path, url });
},
setTranscription: (result) => {
if (!result.words || result.words.length === 0) {
set({ words: [], segments: [], selectedWordIndices: [] });
return;
}
let globalIdx = 0;
const annotatedSegments = result.segments.map((seg) => {
const annotated = { ...seg, globalStartIndex: globalIdx };
globalIdx += seg.words.length;
return annotated;
});
debugEditorStore('setTranscription', {
wordCount: result.words?.length ?? 0,
segmentCount: result.segments?.length ?? 0,
language: result.language,
currentVideoPath: get().videoPath,
});
set({
words: result.words,
segments: annotatedSegments,
language: result.language,
deletedRanges: [],
selectedWordIndices: [],
});
},
@ -278,77 +172,62 @@ export const useEditorStore = create<EditorState & EditorActions>()(
setCurrentTime: (time) => set({ currentTime: time }),
setDuration: (duration) => set({ duration }),
setIsPlaying: (playing) => set({ isPlaying: playing }),
setMarkInTime: (time) => {
if (time !== null && !isFinite(time)) return;
set({ markInTime: time });
},
setMarkOutTime: (time) => {
if (time !== null && !isFinite(time)) return;
set({ markOutTime: time });
},
clearMarkRange: () => set({ markInTime: null, markOutTime: null }),
setSelectedWordIndices: (indices) => set({ selectedWordIndices: indices }),
setHoveredWordIndex: (index) => set({ hoveredWordIndex: index }),
deleteSelectedWords: () => {
const { selectedWordIndices, words } = get();
const { selectedWordIndices, words, deletedRanges } = get();
if (selectedWordIndices.length === 0) return;
const sorted = [...selectedWordIndices].sort((a, b) => a - b);
const startWord = words[sorted[0]];
const endWord = words[sorted[sorted.length - 1]];
get().addCutRange(startWord.start, endWord.end);
set({ selectedWordIndices: [] });
const newRange: DeletedRange = {
id: `dr_${nextRangeId++}`,
start: startWord.start,
end: endWord.end,
wordIndices: sorted,
};
set({
deletedRanges: [...deletedRanges, newRange],
selectedWordIndices: [],
});
},
deleteWordRange: (startIndex, endIndex) => {
const { words } = get();
get().addCutRange(words[startIndex].start, words[endIndex].end);
const { words, deletedRanges } = get();
const indices = [];
for (let i = startIndex; i <= endIndex; i++) indices.push(i);
const newRange: DeletedRange = {
id: `dr_${nextRangeId++}`,
start: words[startIndex].start,
end: words[endIndex].end,
wordIndices: indices,
};
set({ deletedRanges: [...deletedRanges, newRange] });
},
updateWordText: (index, text) => {
const { words, segments } = get();
if (index < 0 || index >= words.length || !text) return;
const newWords = words.map((w, i) =>
i === index ? { ...w, word: text } : w
);
// Also update the corresponding segment's words and text
let globalIdx = 0;
const newSegments = segments.map((seg) => {
const start = globalIdx;
globalIdx += seg.words.length;
if (index >= start && index < start + seg.words.length) {
const localIdx = index - start;
const updatedSegWords = seg.words.map((w, i) =>
i === localIdx ? { ...w, word: text } : w
);
return {
...seg,
words: updatedSegWords,
text: updatedSegWords.map((w) => w.word).join(' '),
};
}
return seg;
});
set({ words: newWords, segments: newSegments });
restoreRange: (rangeId) => {
const { deletedRanges } = get();
set({ deletedRanges: deletedRanges.filter((r) => r.id !== rangeId) });
},
addCutRange: (start, end, trimGroupId) => {
const { cutRanges, duration } = get();
if (!isFinite(start) || !isFinite(end) || start < 0 || end - start < 0.01 || end > duration) return;
addCutRange: (start, end) => {
const { cutRanges } = get();
const newRange: CutRange = {
id: `cut_${nextRangeId++}`,
start,
end,
trimGroupId,
};
set({ cutRanges: [...cutRanges, newRange] });
},
addMuteRange: (start, end) => {
const { muteRanges, duration } = get();
if (!isFinite(start) || !isFinite(end) || start < 0 || end - start < 0.01 || end > duration) return;
const { muteRanges } = get();
const newRange: MuteRange = {
id: `mute_${nextRangeId++}`,
start,
@ -357,30 +236,6 @@ export const useEditorStore = create<EditorState & EditorActions>()(
set({ muteRanges: [...muteRanges, newRange] });
},
addGainRange: (start, end, gainDb) => {
const { gainRanges, duration } = get();
if (!isFinite(start) || !isFinite(end) || start < 0 || end - start < 0.01 || end > duration) return;
const newRange: GainRange = {
id: `gain_${nextRangeId++}`,
start,
end,
gainDb,
};
set({ gainRanges: [...gainRanges, newRange] });
},
addSpeedRange: (start, end, speed) => {
const { speedRanges, duration } = get();
if (!isFinite(start) || !isFinite(end) || start < 0 || end - start < 0.01 || end > duration) return;
const newRange: SpeedRange = {
id: `speed_${nextRangeId++}`,
start,
end,
speed: Math.max(0.25, Math.min(4, speed)),
};
set({ speedRanges: [...speedRanges, newRange] });
},
updateCutRange: (id, start, end) => {
const { cutRanges } = get();
set({
@ -399,42 +254,6 @@ export const useEditorStore = create<EditorState & EditorActions>()(
});
},
updateGainRangeBounds: (id, start, end) => {
const { gainRanges } = get();
set({
gainRanges: gainRanges.map((r) =>
r.id === id ? { ...r, start, end } : r
),
});
},
updateGainRange: (id, gainDb) => {
const { gainRanges } = get();
set({
gainRanges: gainRanges.map((r) =>
r.id === id ? { ...r, gainDb } : r
),
});
},
updateSpeedRangeBounds: (id, start, end) => {
const { speedRanges } = get();
set({
speedRanges: speedRanges.map((r) =>
r.id === id ? { ...r, start, end } : r
),
});
},
updateSpeedRange: (id, speed) => {
const { speedRanges } = get();
set({
speedRanges: speedRanges.map((r) =>
r.id === id ? { ...r, speed: Math.max(0.25, Math.min(4, speed)) } : r
),
});
},
removeCutRange: (id) => {
const { cutRanges } = get();
set({ cutRanges: cutRanges.filter((r) => r.id !== id) });
@ -445,101 +264,6 @@ export const useEditorStore = create<EditorState & EditorActions>()(
set({ muteRanges: muteRanges.filter((r) => r.id !== id) });
},
removeGainRange: (id) => {
const { gainRanges } = get();
set({ gainRanges: gainRanges.filter((r) => r.id !== id) });
},
removeSpeedRange: (id) => {
const { speedRanges } = get();
set({ speedRanges: speedRanges.filter((r) => r.id !== id) });
},
setGlobalGainDb: (gainDb) => {
if (!isFinite(gainDb)) {
set({ globalGainDb: 0 });
return;
}
set({ globalGainDb: Math.max(-24, Math.min(24, gainDb)) });
},
applySilenceTrimGroup: ({ groupId, sourceRanges, settings }) => {
const { duration, cutRanges, silenceTrimGroups } = get();
const now = new Date().toISOString();
const existingGroup = groupId
? silenceTrimGroups.find((group) => group.id === groupId)
: null;
const resolvedGroupId = existingGroup?.id ?? `trimgrp_${nextTrimGroupId++}`;
const nextGroup: SilenceTrimGroup = {
id: resolvedGroupId,
sourceRanges,
settings,
createdAt: existingGroup?.createdAt ?? now,
updatedAt: now,
};
const withoutGroupCuts = cutRanges.filter((range) => range.trimGroupId !== resolvedGroupId);
const generatedCuts = buildTrimCutRanges(sourceRanges, settings, duration, resolvedGroupId);
const nextGroups = existingGroup
? silenceTrimGroups.map((group) => (group.id === resolvedGroupId ? nextGroup : group))
: [...silenceTrimGroups, nextGroup];
set({
cutRanges: [...withoutGroupCuts, ...generatedCuts],
silenceTrimGroups: nextGroups,
});
return {
groupId: resolvedGroupId,
appliedCount: generatedCuts.length,
};
},
removeSilenceTrimGroup: (groupId) => {
const { cutRanges, silenceTrimGroups } = get();
set({
cutRanges: cutRanges.filter((range) => range.trimGroupId !== groupId),
silenceTrimGroups: silenceTrimGroups.filter((group) => group.id !== groupId),
});
},
addTimelineMarker: (time, label, color) => {
if (!isFinite(time) || time < 0) return;
const { timelineMarkers, duration } = get();
if (time > duration) return;
const newMarker: TimelineMarker = {
id: `marker_${nextRangeId++}`,
time,
label: label || 'Marker',
color: color || '#6366f1',
};
set({ timelineMarkers: [...timelineMarkers, newMarker].sort((a, b) => a.time - b.time) });
},
updateTimelineMarker: (id, updates) => {
const { timelineMarkers } = get();
set({
timelineMarkers: timelineMarkers
.map((m) => (m.id === id ? { ...m, ...updates } : m))
.sort((a, b) => a.time - b.time),
});
},
removeTimelineMarker: (id) => {
const { timelineMarkers } = get();
set({ timelineMarkers: timelineMarkers.filter((m) => m.id !== id) });
},
getChapters: () => {
const { timelineMarkers } = get();
return timelineMarkers.map((m) => ({
markerId: m.id,
label: m.label,
startTime: m.time,
}));
},
setTranscribing: (active, progress, status) =>
set({
isTranscribing: active,
@ -553,58 +277,16 @@ export const useEditorStore = create<EditorState & EditorActions>()(
exportProgress: progress ?? (active ? 0 : 100),
}),
setZonePreviewPaddingSeconds: (seconds) => {
if (!isFinite(seconds)) return;
const nextSeconds = Math.max(0, Math.min(10, seconds));
if (typeof window !== 'undefined') {
window.localStorage.setItem(ZONE_PREVIEW_PADDING_KEY, String(nextSeconds));
}
set({ zonePreviewPaddingSeconds: nextSeconds });
},
replaceWordRange: (startIndex, endIndex, newWords) => {
const { words } = get();
assert(startIndex >= 0 && startIndex < words.length, 'replaceWordRange: startIndex out of bounds');
assert(endIndex >= 0 && endIndex < words.length, 'replaceWordRange: endIndex out of bounds');
assert(startIndex <= endIndex, 'replaceWordRange: startIndex > endIndex');
if (startIndex < 0 || endIndex >= words.length || startIndex > endIndex) return;
// Replace words in the range with new words
const before = words.slice(0, startIndex);
const after = words.slice(endIndex + 1);
const updatedWords = [...before, ...newWords, ...after];
// Rebuild segments from updated words, grouping by speaker
const rebuiltSegments: Segment[] = [];
let wordIdx = 0;
let cumIdx = 0;
while (wordIdx < updatedWords.length) {
const currentSpeaker = updatedWords[wordIdx].speaker;
const groupWords: Word[] = [];
while (wordIdx < updatedWords.length && updatedWords[wordIdx].speaker === currentSpeaker) {
groupWords.push(updatedWords[wordIdx]);
wordIdx++;
}
rebuiltSegments.push({
id: rebuiltSegments.length,
start: groupWords[0].start,
end: groupWords[groupWords.length - 1].end,
text: groupWords.map((w) => w.word).join(' '),
words: groupWords,
speaker: currentSpeaker,
globalStartIndex: cumIdx,
});
cumIdx += groupWords.length;
}
set({ words: updatedWords, segments: rebuiltSegments, selectedWordIndices: [] });
},
getKeepSegments: () => {
const { words, cutRanges, duration } = get();
const { words, deletedRanges, cutRanges, duration } = get();
if (words.length === 0) return [{ start: 0, end: duration }];
const deletedSet = new Set<number>();
for (const range of deletedRanges) {
for (const idx of range.wordIndices) deletedSet.add(idx);
}
// Also exclude words that fall within cut ranges
for (const cutRange of cutRanges) {
for (let i = 0; i < words.length; i++) {
const word = words[i];
@ -648,69 +330,20 @@ export const useEditorStore = create<EditorState & EditorActions>()(
return lo < words.length ? lo : words.length - 1;
},
setZoomConfig: (config) => {
const { zoomConfig } = get();
set({ zoomConfig: { ...zoomConfig, ...config } });
},
addAdditionalClip: (path, label) => {
const { additionalClips } = get();
const id = `clip_${Date.now()}_${Math.random().toString(36).slice(2, 6)}`;
set({ additionalClips: [...additionalClips, { id, path, label: label || path.split(/[/\\]/).pop() || 'Clip' }] });
},
removeAdditionalClip: (id) => {
const { additionalClips } = get();
set({ additionalClips: additionalClips.filter((c) => c.id !== id) });
},
reorderAdditionalClip: (id, direction) => {
const { additionalClips } = get();
const idx = additionalClips.findIndex((c) => c.id === id);
if (idx === -1) return;
const target = idx + direction;
if (target < 0 || target >= additionalClips.length) return;
const reordered = [...additionalClips];
[reordered[idx], reordered[target]] = [reordered[target], reordered[idx]];
set({ additionalClips: reordered });
},
setBackgroundMusic: (config) => {
if (!config || !config.path) {
set({ backgroundMusic: null });
return;
}
set({ backgroundMusic: config });
},
updateBackgroundMusic: (updates) => {
const { backgroundMusic } = get();
if (!backgroundMusic) return;
set({ backgroundMusic: { ...backgroundMusic, ...updates } });
},
loadProject: (data) => {
const { backendUrl, zonePreviewPaddingSeconds, projectFilePath, duration } = get();
const url = `${backendUrl}/file?path=${encodeURIComponent(data.videoPath)}`;
const isValidZone = (r: { start: number; end: number }) =>
isFinite(r.start) && isFinite(r.end) && r.start >= 0 && r.end - r.start >= 0.01 && (duration <= 0 || r.end <= duration);
let removed = 0;
const filterZones = <T extends { start: number; end: number }>(ranges: T[]): T[] => {
const result: T[] = [];
for (const r of ranges) {
if (isValidZone(r)) { result.push(r); } else { removed++; }
}
return result;
};
// Backward compat: merge legacy deletedRanges into cutRanges as time-range cuts
const legacyCuts = (data.deletedRanges || []).map((r: any) => ({ id: r.id, start: r.start, end: r.end }));
const cleanedCutRanges = filterZones<CutRange>([...(data.cutRanges || []), ...legacyCuts]);
const cleanedMuteRanges = filterZones<MuteRange>(data.muteRanges || []);
const cleanedGainRanges = filterZones<GainRange>(data.gainRanges || []);
const cleanedSpeedRanges = filterZones<SpeedRange>(data.speedRanges || []);
const backend = get().backendUrl;
const resolvedVideoPath = typeof data?.videoPath === 'string' ? data.videoPath : null;
if (!resolvedVideoPath) {
debugEditorStore('loadProject:invalidVideoPath', {
videoPathType: typeof data?.videoPath,
hasKeys: data && typeof data === 'object' ? Object.keys(data as Record<string, unknown>) : [],
});
throw new Error('Project file missing required videoPath string');
}
const isWav = resolvedVideoPath.toLowerCase().endsWith('.wav');
const url = isWav
? `${backend}/file?path=${encodeURIComponent(resolvedVideoPath)}&format=mp3`
: `${backend}/file?path=${encodeURIComponent(resolvedVideoPath)}`;
let globalIdx = 0;
const annotatedSegments = (data.segments || []).map((seg: Segment) => {
@ -719,38 +352,93 @@ export const useEditorStore = create<EditorState & EditorActions>()(
return annotated;
});
debugEditorStore('loadProject:start', {
videoPath: resolvedVideoPath,
words: Array.isArray(data?.words) ? data.words.length : null,
segments: Array.isArray(data?.segments) ? data.segments.length : null,
cutRanges: Array.isArray(data?.cutRanges) ? data.cutRanges.length : null,
muteRanges: Array.isArray(data?.muteRanges) ? data.muteRanges.length : null,
deletedRanges: Array.isArray(data?.deletedRanges) ? data.deletedRanges.length : null,
previousVideoPath: get().videoPath,
});
set({
...initialState,
backendUrl,
zonePreviewPaddingSeconds,
projectFilePath,
videoPath: data.videoPath,
backendUrl: backend,
videoPath: resolvedVideoPath,
videoUrl: url,
words: data.words || [],
segments: annotatedSegments,
cutRanges: cleanedCutRanges,
muteRanges: cleanedMuteRanges,
gainRanges: cleanedGainRanges,
speedRanges: cleanedSpeedRanges,
globalGainDb: typeof data.globalGainDb === 'number' ? data.globalGainDb : 0,
silenceTrimGroups: data.silenceTrimGroups || [],
timelineMarkers: data.timelineMarkers || [],
transcriptionModel: data.transcriptionModel ?? null,
deletedRanges: data.deletedRanges || [],
cutRanges: data.cutRanges || [],
muteRanges: data.muteRanges || [],
language: data.language || '',
exportedAudioPath: data.exportedAudioPath ?? null,
zoomConfig: data.zoomConfig || { enabled: false, zoomFactor: 1, panX: 0, panY: 0 },
additionalClips: data.additionalClips || [],
backgroundMusic: data.backgroundMusic || null,
});
return removed;
debugEditorStore('loadProject:done', {
videoPath: resolvedVideoPath,
url,
});
},
reset: () => {
const { zonePreviewPaddingSeconds } = get();
set({ ...initialState, zonePreviewPaddingSeconds, projectFilePath: null });
const stack = new Error().stack?.split('\n').slice(1, 6).join(' | ');
debugEditorStore('reset', {
previousVideoPath: get().videoPath,
stack,
});
set(initialState);
},
}),
{ limit: 100 },
pauseUndo: () => {
// Access the temporal store through the useEditorStore
const temporalStore = (useEditorStore as any).temporal;
if (temporalStore) {
temporalStore.getState().pause();
}
},
resumeUndo: () => {
// Access the temporal store through the useEditorStore
const temporalStore = (useEditorStore as any).temporal;
if (temporalStore) {
temporalStore.getState().resume();
}
},
}),
{ limit: 100 },
),
{
name: 'talkedit-editor-session',
version: 1,
partialize: (state) => ({
videoPath: state.videoPath,
videoUrl: state.videoUrl,
exportedAudioPath: state.exportedAudioPath,
words: state.words,
segments: state.segments,
deletedRanges: state.deletedRanges,
cutRanges: state.cutRanges,
muteRanges: state.muteRanges,
language: state.language,
backendUrl: state.backendUrl,
currentTime: state.currentTime,
duration: state.duration,
}),
onRehydrateStorage: () => (state, error) => {
if (error) {
debugEditorStore('persist:rehydrate:error', { error: String(error) });
return;
}
debugEditorStore('persist:rehydrate:done', {
videoPath: state?.videoPath ?? null,
words: state?.words?.length ?? 0,
segments: state?.segments?.length ?? 0,
cutRanges: state?.cutRanges?.length ?? 0,
muteRanges: state?.muteRanges?.length ?? 0,
});
},
},
),
);

View File

@ -1,213 +0,0 @@
import { beforeEach, describe, expect, test, vi } from 'vitest';
import { useLicenseStore } from './licenseStore';
function mockElectronAPI(overrides: Record<string, any> = {}) {
(window as any).electronAPI = {
getAppStatus: vi.fn().mockResolvedValue({ tag: 'Expired' }),
activateLicense: vi.fn().mockResolvedValue(null),
deactivateLicense: vi.fn().mockResolvedValue(undefined),
hasLicenseFeature: vi.fn().mockResolvedValue(false),
...overrides,
};
}
describe('licenseStore', () => {
beforeEach(() => {
mockElectronAPI();
useLicenseStore.setState({ status: null, isLoaded: false, canEdit: true, canUseAI: false, showDialog: false });
});
describe('canEdit', () => {
test('is true for Licensed status', async () => {
mockElectronAPI({
getAppStatus: vi.fn().mockResolvedValue({ tag: 'Licensed', license: { license_id: 'x', tier: 'pro' } }),
});
await useLicenseStore.getState().checkStatus();
expect(useLicenseStore.getState().canEdit).toBe(true);
});
test('is true for Trial status', async () => {
mockElectronAPI({
getAppStatus: vi.fn().mockResolvedValue({ tag: 'Trial', days_remaining: 20, started_at: Date.now() }),
});
await useLicenseStore.getState().checkStatus();
expect(useLicenseStore.getState().canEdit).toBe(true);
});
test('is false for Expired status', async () => {
mockElectronAPI({
getAppStatus: vi.fn().mockResolvedValue({ tag: 'Expired' }),
});
await useLicenseStore.getState().checkStatus();
expect(useLicenseStore.getState().canEdit).toBe(false);
});
test('is false when status is null', () => {
useLicenseStore.setState({ status: null, canEdit: true, canUseAI: false });
useLicenseStore.getState().setStatus(null);
expect(useLicenseStore.getState().canEdit).toBe(false);
});
test('is true for Licensed status', () => {
useLicenseStore.getState().setStatus({ tag: 'Licensed', license: { license_id: 'x', tier: 'pro', customer_email: 'a@b.com', expires_at: 9999999999, features: [], issued_at: 1, max_activations: 1 } });
expect(useLicenseStore.getState().canEdit).toBe(true);
expect(useLicenseStore.getState().canUseAI).toBe(false);
});
test('is true for Licensed Business status', () => {
useLicenseStore.getState().setStatus({ tag: 'Licensed', license: { license_id: 'x', tier: 'business', customer_email: 'a@b.com', expires_at: 9999999999, features: [], issued_at: 1, max_activations: 5 } });
expect(useLicenseStore.getState().canEdit).toBe(true);
expect(useLicenseStore.getState().canUseAI).toBe(true);
});
test('is false for Trial status', () => {
useLicenseStore.getState().setStatus({ tag: 'Trial', days_remaining: 5, started_at: Date.now() });
expect(useLicenseStore.getState().canEdit).toBe(true);
expect(useLicenseStore.getState().canUseAI).toBe(false);
});
test('is false for Expired status', () => {
useLicenseStore.getState().setStatus({ tag: 'Expired' });
expect(useLicenseStore.getState().canEdit).toBe(false);
expect(useLicenseStore.getState().canUseAI).toBe(false);
});
});
describe('checkStatus', () => {
test('sets status to Licensed when backend returns Licensed', async () => {
const license = { license_id: 'l1', tier: 'pro', customer_email: 'a@b.com', expires_at: 9999999999, features: [], issued_at: 1, max_activations: 1 };
mockElectronAPI({ getAppStatus: vi.fn().mockResolvedValue({ tag: 'Licensed', license }) });
await useLicenseStore.getState().checkStatus();
expect(useLicenseStore.getState().status?.tag).toBe('Licensed');
});
test('sets status to Trial when backend returns Trial', async () => {
mockElectronAPI({ getAppStatus: vi.fn().mockResolvedValue({ tag: 'Trial', days_remaining: 15, started_at: Date.now() }) });
await useLicenseStore.getState().checkStatus();
expect(useLicenseStore.getState().status?.tag).toBe('Trial');
});
test('sets status to Expired when backend returns Expired', async () => {
mockElectronAPI({ getAppStatus: vi.fn().mockResolvedValue({ tag: 'Expired' }) });
await useLicenseStore.getState().checkStatus();
expect(useLicenseStore.getState().status?.tag).toBe('Expired');
});
test('handles API error gracefully', async () => {
mockElectronAPI({ getAppStatus: vi.fn().mockRejectedValue(new Error('network error')) });
await useLicenseStore.getState().checkStatus();
expect(useLicenseStore.getState().status?.tag).toBe('Expired');
expect(useLicenseStore.getState().canEdit).toBe(false);
expect(useLicenseStore.getState().canUseAI).toBe(false);
});
test('handles missing electronAPI', async () => {
delete (window as any).electronAPI;
await useLicenseStore.getState().checkStatus();
expect(useLicenseStore.getState().status?.tag).toBe('Expired');
expect(useLicenseStore.getState().canEdit).toBe(false);
expect(useLicenseStore.getState().canUseAI).toBe(false);
});
test('sets isLoaded to true after check', async () => {
await useLicenseStore.getState().checkStatus();
expect(useLicenseStore.getState().isLoaded).toBe(true);
});
});
describe('activateLicense', () => {
test('sets Licensed on valid key', async () => {
const license = { license_id: 'l2', tier: 'pro', customer_email: 'x@y.com', expires_at: 9999999999, features: ['bg_removal'], issued_at: 1, max_activations: 1 };
mockElectronAPI({ activateLicense: vi.fn().mockResolvedValue(license) });
const result = await useLicenseStore.getState().activateLicense('talkedit_v1_validKey');
expect(result).toBe(true);
expect(useLicenseStore.getState().status?.tag).toBe('Licensed');
expect(useLicenseStore.getState().canEdit).toBe(true);
expect(useLicenseStore.getState().canUseAI).toBe(false);
});
test('returns false on invalid key', async () => {
mockElectronAPI({ activateLicense: vi.fn().mockResolvedValue(null) });
const result = await useLicenseStore.getState().activateLicense('invalid-key');
expect(result).toBe(false);
});
test('returns false on API error', async () => {
mockElectronAPI({ activateLicense: vi.fn().mockRejectedValue(new Error('bad key')) });
const result = await useLicenseStore.getState().activateLicense('bad-key');
expect(result).toBe(false);
});
test('closes dialog on success', async () => {
useLicenseStore.setState({ showDialog: true });
const license = { license_id: 'l3', tier: 'business', customer_email: 'z@z.com', expires_at: 9999999999, features: [], issued_at: 1, max_activations: 5 };
mockElectronAPI({ activateLicense: vi.fn().mockResolvedValue(license) });
await useLicenseStore.getState().activateLicense('talkedit_v1_key');
expect(useLicenseStore.getState().showDialog).toBe(false);
});
});
describe('deactivateLicense', () => {
test('sets Expired when trial is over', async () => {
mockElectronAPI({
deactivateLicense: vi.fn().mockResolvedValue(undefined),
getAppStatus: vi.fn().mockResolvedValue({ tag: 'Expired' }),
});
await useLicenseStore.getState().deactivateLicense();
expect(useLicenseStore.getState().status?.tag).toBe('Expired');
expect(useLicenseStore.getState().canEdit).toBe(false);
expect(useLicenseStore.getState().canUseAI).toBe(false);
});
test('restores Trial when trial is still valid', async () => {
mockElectronAPI({
deactivateLicense: vi.fn().mockResolvedValue(undefined),
getAppStatus: vi.fn().mockResolvedValue({ tag: 'Trial', days_remaining: 5, started_at: Date.now() }),
});
await useLicenseStore.getState().deactivateLicense();
expect(useLicenseStore.getState().status?.tag).toBe('Trial');
expect(useLicenseStore.getState().canEdit).toBe(true);
expect(useLicenseStore.getState().canUseAI).toBe(false);
});
test('handles API error', async () => {
mockElectronAPI({ deactivateLicense: vi.fn().mockRejectedValue(new Error('fail')) });
useLicenseStore.setState({ status: { tag: 'Licensed', license: { license_id: 'x', tier: 'pro', customer_email: 'a@b.com', expires_at: 9999999999, features: [], issued_at: 1, max_activations: 1 } }, canEdit: true, canUseAI: false });
await useLicenseStore.getState().deactivateLicense();
expect(useLicenseStore.getState().status?.tag).toBe('Expired');
expect(useLicenseStore.getState().canEdit).toBe(false);
expect(useLicenseStore.getState().canUseAI).toBe(false);
});
});
describe('hasFeature', () => {
test('returns true when feature exists', async () => {
mockElectronAPI({ hasLicenseFeature: vi.fn().mockResolvedValue(true) });
const result = await useLicenseStore.getState().hasFeature('bg_removal');
expect(result).toBe(true);
});
test('returns false when feature missing', async () => {
mockElectronAPI({ hasLicenseFeature: vi.fn().mockResolvedValue(false) });
const result = await useLicenseStore.getState().hasFeature('nonexistent');
expect(result).toBe(false);
});
test('returns false on API error', async () => {
mockElectronAPI({ hasLicenseFeature: vi.fn().mockRejectedValue(new Error('fail')) });
const result = await useLicenseStore.getState().hasFeature('bg_removal');
expect(result).toBe(false);
});
});
describe('setShowDialog', () => {
test('toggles dialog', () => {
useLicenseStore.getState().setShowDialog(true);
expect(useLicenseStore.getState().showDialog).toBe(true);
useLicenseStore.getState().setShowDialog(false);
expect(useLicenseStore.getState().showDialog).toBe(false);
});
});
});

View File

@ -1,110 +0,0 @@
import { create } from 'zustand';
import { persist } from 'zustand/middleware';
export interface LicensePayload {
license_id: string;
customer_email: string;
tier: 'pro' | 'business';
features: string[];
issued_at: number;
expires_at: number;
max_activations: number;
}
export interface TrialState {
started_at: number;
}
export type AppStatus =
| { tag: 'Licensed'; license: LicensePayload }
| { tag: 'Trial'; days_remaining: number; started_at: number }
| { tag: 'Expired' };
interface LicenseState {
status: AppStatus | null;
isLoaded: boolean;
showDialog: boolean;
canEdit: boolean;
canUseAI: boolean;
}
interface LicenseActions {
setStatus: (status: AppStatus | null) => void;
setShowDialog: (show: boolean) => void;
checkStatus: () => Promise<void>;
activateLicense: (key: string) => Promise<boolean>;
deactivateLicense: () => Promise<void>;
hasFeature: (feature: string) => Promise<boolean>;
}
export const useLicenseStore = create<LicenseState & LicenseActions>()(
persist(
(set) => ({
status: null,
isLoaded: false,
showDialog: false,
canEdit: false,
canUseAI: false,
setStatus: (status) => {
const canEdit = status?.tag === 'Licensed' || status?.tag === 'Trial';
const canUseAI = status?.tag === 'Licensed' && status.license.tier === 'business';
set({ status, isLoaded: true, canEdit, canUseAI });
},
setShowDialog: (show) => set({ showDialog: show }),
checkStatus: async () => {
try {
const status = await window.electronAPI?.getAppStatus();
const canEdit = status?.tag === 'Licensed' || status?.tag === 'Trial';
const canUseAI = status?.tag === 'Licensed' && status.license.tier === 'business';
set({ status: status || { tag: 'Expired' }, isLoaded: true, canEdit, canUseAI });
} catch {
set({ status: { tag: 'Expired' }, isLoaded: true, canEdit: false, canUseAI: false });
}
},
activateLicense: async (key: string): Promise<boolean> => {
try {
const license = await window.electronAPI?.activateLicense(key);
if (!license) return false;
set({ status: { tag: 'Licensed', license }, showDialog: false, canEdit: true, canUseAI: license.tier === 'business' });
return true;
} catch {
return false;
}
},
deactivateLicense: async () => {
try {
await window.electronAPI?.deactivateLicense();
const s = await window.electronAPI?.getAppStatus();
const canEdit = s?.tag === 'Licensed' || s?.tag === 'Trial';
const canUseAI = s?.tag === 'Licensed' && s.license.tier === 'business';
set({ status: s || { tag: 'Expired' }, isLoaded: true, canEdit, canUseAI });
} catch {
set({ status: { tag: 'Expired' }, isLoaded: true, canEdit: false, canUseAI: false });
}
},
hasFeature: async (feature: string): Promise<boolean> => {
try {
return await window.electronAPI?.hasLicenseFeature(feature) || false;
} catch {
return false;
}
},
}),
{
name: 'talkedit-license',
partialize: (state) => {
// Only persist Licensed status (trial is ephemeral)
if (state.status?.tag === 'Licensed') {
return { status: state.status };
}
return {};
},
},
),
);

View File

@ -21,64 +21,31 @@ export interface TimeRange {
end: number;
}
export interface DeletedRange extends TimeRange {
id: string;
wordIndices: number[];
}
export interface CutRange extends TimeRange {
id: string;
trimGroupId?: string;
}
export interface MuteRange extends TimeRange {
id: string;
}
export interface GainRange extends TimeRange {
id: string;
gainDb: number;
}
export interface SpeedRange extends TimeRange {
id: string;
speed: number;
}
export interface SilenceDetectionRange extends TimeRange {
duration: number;
}
export interface SilenceTrimSettings {
minSilenceMs: number;
silenceDb: number;
preBufferMs: number;
postBufferMs: number;
}
export interface SilenceTrimGroup {
id: string;
settings: SilenceTrimSettings;
sourceRanges: SilenceDetectionRange[];
createdAt: string;
updatedAt: string;
}
export interface ProjectFile {
version: 1;
videoPath: string;
exportedAudioPath?: string; // path to modified/processed audio if it exists
transcriptionModel?: string;
words: Word[];
segments: Segment[];
deletedRanges: DeletedRange[];
cutRanges: CutRange[];
muteRanges: MuteRange[];
gainRanges?: GainRange[];
speedRanges?: SpeedRange[];
globalGainDb?: number;
silenceTrimGroups?: SilenceTrimGroup[];
timelineMarkers?: TimelineMarker[];
language: string;
createdAt: string;
modifiedAt: string;
zoomConfig?: ZoomConfig;
additionalClips?: ClipInfo[];
backgroundMusic?: BackgroundMusicConfig;
}
export interface TranscriptionResult {
@ -87,64 +54,16 @@ export interface TranscriptionResult {
language: string;
}
export interface ZoomConfig {
enabled: boolean;
zoomFactor: number; // 1.0 = no zoom, 2.0 = 2x zoom
panX: number; // -1 to 1, normalized pan offset
panY: number;
}
export interface ClipInfo {
id: string;
path: string;
label: string;
}
export interface BackgroundMusicConfig {
path: string;
volumeDb: number; // gain in dB for music track
duckingEnabled: boolean;
duckingDb: number; // how much to duck (dB reduction)
duckingAttackMs: number;
duckingReleaseMs: number;
}
export interface ExportOptions {
outputPath: string;
mode: 'fast' | 'reencode';
resolution: '720p' | '1080p' | '4k';
format: 'mp4' | 'mov' | 'webm' | 'wav';
format: 'mp4' | 'mov' | 'webm';
enhanceAudio: boolean;
captions: 'none' | 'burn-in' | 'sidecar';
captionStyle?: CaptionStyle;
zoom?: ZoomConfig;
removeBackground?: boolean;
backgroundReplacement?: 'blur' | 'color' | 'image';
backgroundReplacementValue?: string;
}
export interface TimelineMarker {
id: string;
time: number;
label: string;
color: string;
}
export interface Chapter {
markerId: string;
label: string;
startTime: number;
}
export interface KeyBinding {
id: string;
label: string;
keys: string; // e.g. "Ctrl+Z"
category: string; // "transport", "edit", "file", "view"
}
export type HotkeyPreset = 'left-hand' | 'standard';
export interface CaptionStyle {
fontName: string;
fontSize: number;

View File

@ -4,17 +4,6 @@ interface ImportMetaEnv {
readonly VITE_BACKEND_PORT?: string;
}
interface ImportMeta {
readonly env: ImportMetaEnv;
}
interface ModelInfo {
name: string;
path: string;
size_bytes: number;
kind: string;
}
interface DesktopAPI {
openFile: (options?: Record<string, unknown>) => Promise<string | null>;
saveFile: (options?: Record<string, unknown>) => Promise<string | null>;
@ -27,19 +16,8 @@ interface DesktopAPI {
transcribe: (filePath: string, modelName: string, language?: string) => Promise<any>;
readFile: (path: string) => Promise<string>;
writeFile: (path: string, content: string) => Promise<boolean>;
activateLicense: (key: string) => Promise<any>;
getAppStatus: () => Promise<any>;
verifyLicense: (key: string) => Promise<any>;
deactivateLicense: () => Promise<void>;
hasLicenseFeature: (feature: string) => Promise<boolean>;
listModels: () => Promise<ModelInfo[]>;
deleteModel: (path: string) => Promise<void>;
logError: (message: string, stack: string, componentStack: string) => Promise<void>;
writeAutosave: (data: string) => Promise<void>;
readAutosave: () => Promise<string | null>;
deleteAutosave: () => Promise<void>;
}
interface Window {
electronAPI: DesktopAPI;
desktopAPI?: DesktopAPI;
}

View File

@ -1 +1 @@
{"root":["./src/App.tsx","./src/main.tsx","./src/vite-env.d.ts","./src/components/AIPanel.tsx","./src/components/AppendClipPanel.tsx","./src/components/BackgroundMusicPanel.tsx","./src/components/DevPanel.tsx","./src/components/ErrorBoundary.tsx","./src/components/ExportDialog.tsx","./src/components/HelpContent.tsx","./src/components/LicenseDialog.tsx","./src/components/MarkersPanel.tsx","./src/components/SettingsPanel.tsx","./src/components/SilenceTrimmerPanel.tsx","./src/components/TranscriptEditor.tsx","./src/components/VideoPlayer.tsx","./src/components/WaveformTimeline.tsx","./src/components/ZoneEditor.tsx","./src/hooks/useKeyboardShortcuts.ts","./src/hooks/useVideoSync.ts","./src/lib/assert.test.ts","./src/lib/assert.ts","./src/lib/dev-logger.ts","./src/lib/keybindings.ts","./src/lib/tauri-bridge.ts","./src/lib/thumbnails.ts","./src/store/aiStore.test.ts","./src/store/aiStore.ts","./src/store/editorStore.test.ts","./src/store/editorStore.ts","./src/store/licenseStore.test.ts","./src/store/licenseStore.ts","./src/types/project.ts"],"version":"5.9.3"}
{"root":["./src/App.tsx","./src/main.tsx","./src/vite-env.d.ts","./src/components/AIPanel.tsx","./src/components/DevPanel.tsx","./src/components/ExportDialog.tsx","./src/components/SettingsPanel.tsx","./src/components/SilenceTrimmerPanel.tsx","./src/components/TranscriptEditor.tsx","./src/components/VideoPlayer.tsx","./src/components/WaveformTimeline.tsx","./src/hooks/useKeyboardShortcuts.ts","./src/hooks/useVideoSync.ts","./src/lib/dev-logger.ts","./src/lib/tauri-bridge.ts","./src/store/aiStore.ts","./src/store/editorStore.ts","./src/types/project.ts"],"version":"5.9.3"}

View File

@ -1,8 +0,0 @@
import { defineConfig } from 'vitest/config';
export default defineConfig({
test: {
environment: 'jsdom',
globals: true,
},
});

View File

@ -1,8 +1,8 @@
Here's a clear, actionable **summary** of what you (as a solo developer using AI tools heavily) should do to build and monetize this product, based on current market demand in 2026.
### What You Should Do (Step-by-Step Plan)
1. **Fork an existing open-source base** (don't start from scratch)
- Best choice: **CutScript** (newest, explicitly built as "offline Descript alternative" with text-based editing) or **Audapolis** (more mature, ~1.8k stars, wordprocessor-like experience for spoken-word video/audio).
1. **Build from the existing TalkEdit base** (don't start from scratch)
- Keep TalkEdit as the primary codebase and borrow ideas from mature open-source editors like **Audapolis** where useful.
- Reason: The hard parts (local Whisper transcription with word-level timestamps, syncing text deletions to video cuts, FFmpeg handling) are already solved. You save 48 weeks and focus on polish.
2. **Migrate/refactor to Tauri 2.0** (Rust backend + React/Vite + Tailwind + shadcn-ui frontend)
@ -48,13 +48,13 @@ That's it. No multi-track timelines, no voice cloning, no collaboration, no fanc
### Why This Will Work
- **Market demand is real**: Creators love text-based editing because it feels revolutionary for dialogue-heavy videos. They want it faster, cheaper, and private/offline. Existing alternatives are either cloud-based with subscriptions or clunky open-source tools.
- **Competition gap**: CutScript and Audapolis prove interest but lack slick UX and the "one magic button" polish. You can own the "delightful local Descript killer" niche.
- **Competition gap**: Existing local editors prove interest but often lack slick UX and the "one magic button" polish. You can own the "delightful local Descript killer" niche.
- **Solo-dev friendly**: Forking + AI code generation makes this realistic without a team.
Once you ship the MVP and get initial users, you can add nice-to-haves (e.g., custom filler lists, better subtitle export, optional cloud boost) based on real feedback.
**Next immediate actions**:
- Clone CutScript or Audapolis today and run it locally to see the current state.
- Continue from TalkEdit and benchmark against Audapolis today to compare current UX quality.
- Set up a new Tauri project and start refactoring the UI/transcript editor.
If you want, I can give you the exact Git commands, first AI prompts for refactoring, folder structure, or even sample code for the "Clean it" button + FFmpeg polish chain.

46
open
View File

@ -3,51 +3,36 @@
cd "$(dirname "$0")"
PROJECT_DIR="$PWD"
BACKEND_PORT=8000
export BACKEND_PORT="${BACKEND_PORT:-8000}"
export VITE_BACKEND_PORT="${VITE_BACKEND_PORT:-$BACKEND_PORT}"
export TALKEDIT_DEV_LOG_PATH="${TALKEDIT_DEV_LOG_PATH:-/tmp/talkedit-webview.log}"
BACKEND_URL="http://127.0.0.1:${BACKEND_PORT}/health"
FRONTEND_URL="http://127.0.0.1:5173"
# Check if backend is already running
if curl -sf "$BACKEND_URL" > /dev/null 2>&1; then
echo "Backend already running on port ${BACKEND_PORT}."
else
echo "Backend not running — starting it in a new terminal..."
VENV_PYTHON=""
PYTHON_CANDIDATES=(
"${PROJECT_DIR}/.venv312/bin/python3.12"
"${PROJECT_DIR}/.venv312/bin/python"
"${PROJECT_DIR}/.venv/bin/python3"
"${PROJECT_DIR}/.venv/bin/python"
"${PROJECT_DIR}/venv/bin/python3"
"${PROJECT_DIR}/venv/bin/python"
)
for candidate in "${PYTHON_CANDIDATES[@]}"; do
if [[ -x "${candidate}" ]]; then
VENV_PYTHON="${candidate}"
break
fi
done
if [[ -z "${VENV_PYTHON}" ]]; then
echo "No project virtualenv Python found. Checked: .venv312, .venv, venv"
exit 1
fi
VENV_PYTHON="${PROJECT_DIR}/.venv312/bin/python"
BACKEND_DIR="${PROJECT_DIR}/backend"
# Try common terminal emulators in order
if command -v ghostty &>/dev/null; then
ghostty -e bash -c "cd '${BACKEND_DIR}' && '${VENV_PYTHON}' -m uvicorn main:app --host 127.0.0.1 --port ${BACKEND_PORT}; exec bash" &
ghostty -e bash -c "cd '${BACKEND_DIR}' && TALKEDIT_DEV_LOG_PATH='${TALKEDIT_DEV_LOG_PATH}' '${VENV_PYTHON}' -m uvicorn main:app --host 127.0.0.1 --port ${BACKEND_PORT}; exec bash" &
elif command -v kitty &>/dev/null; then
kitty --title "TalkEdit Backend" -- bash -c "cd '${BACKEND_DIR}' && '${VENV_PYTHON}' -m uvicorn main:app --host 127.0.0.1 --port ${BACKEND_PORT}; exec bash" &
kitty --title "TalkEdit Backend" -- bash -c "cd '${BACKEND_DIR}' && TALKEDIT_DEV_LOG_PATH='${TALKEDIT_DEV_LOG_PATH}' '${VENV_PYTHON}' -m uvicorn main:app --host 127.0.0.1 --port ${BACKEND_PORT}; exec bash" &
elif command -v alacritty &>/dev/null; then
alacritty --title "TalkEdit Backend" -e bash -c "cd '${BACKEND_DIR}' && '${VENV_PYTHON}' -m uvicorn main:app --host 127.0.0.1 --port ${BACKEND_PORT}; exec bash" &
alacritty --title "TalkEdit Backend" -e bash -c "cd '${BACKEND_DIR}' && TALKEDIT_DEV_LOG_PATH='${TALKEDIT_DEV_LOG_PATH}' '${VENV_PYTHON}' -m uvicorn main:app --host 127.0.0.1 --port ${BACKEND_PORT}; exec bash" &
elif command -v konsole &>/dev/null; then
konsole --new-tab -e bash -c "cd '${BACKEND_DIR}' && '${VENV_PYTHON}' -m uvicorn main:app --host 127.0.0.1 --port ${BACKEND_PORT}; exec bash" &
konsole --new-tab -e bash -c "cd '${BACKEND_DIR}' && TALKEDIT_DEV_LOG_PATH='${TALKEDIT_DEV_LOG_PATH}' '${VENV_PYTHON}' -m uvicorn main:app --host 127.0.0.1 --port ${BACKEND_PORT}; exec bash" &
elif command -v gnome-terminal &>/dev/null; then
gnome-terminal --title "TalkEdit Backend" -- bash -c "cd '${BACKEND_DIR}' && '${VENV_PYTHON}' -m uvicorn main:app --host 127.0.0.1 --port ${BACKEND_PORT}; exec bash" &
gnome-terminal --title "TalkEdit Backend" -- bash -c "cd '${BACKEND_DIR}' && TALKEDIT_DEV_LOG_PATH='${TALKEDIT_DEV_LOG_PATH}' '${VENV_PYTHON}' -m uvicorn main:app --host 127.0.0.1 --port ${BACKEND_PORT}; exec bash" &
elif command -v xterm &>/dev/null; then
xterm -T "TalkEdit Backend" -e bash -c "cd '${BACKEND_DIR}' && '${VENV_PYTHON}' -m uvicorn main:app --host 127.0.0.1 --port ${BACKEND_PORT}; exec bash" &
xterm -T "TalkEdit Backend" -e bash -c "cd '${BACKEND_DIR}' && TALKEDIT_DEV_LOG_PATH='${TALKEDIT_DEV_LOG_PATH}' '${VENV_PYTHON}' -m uvicorn main:app --host 127.0.0.1 --port ${BACKEND_PORT}; exec bash" &
else
echo "No supported terminal emulator found. Starting backend in background..."
cd "${BACKEND_DIR}" && "${VENV_PYTHON}" -m uvicorn main:app --host 127.0.0.1 --port ${BACKEND_PORT} &
cd "${BACKEND_DIR}" && TALKEDIT_DEV_LOG_PATH="${TALKEDIT_DEV_LOG_PATH}" "${VENV_PYTHON}" -m uvicorn main:app --host 127.0.0.1 --port ${BACKEND_PORT} &
fi
# Wait up to 15s for backend to become ready
@ -65,4 +50,11 @@ else
done
fi
# Check if frontend is already running
if curl -sf "$FRONTEND_URL" > /dev/null 2>&1; then
echo "Frontend already running on port 5173."
else
echo "Frontend not running — Tauri will start it automatically."
fi
npx tauri dev

View File

@ -5,18 +5,13 @@
"description": "TalkEdit — Open-source AI-powered text-based video editor",
"scripts": {
"tauri": "tauri",
"dev": "cd frontend && npm run dev -- --host",
"dev:tauri": "bash -lc 'set -e; PY=; for p in ./.venv312/bin/python3.12 ./.venv312/bin/python ./.venv/bin/python3 ./.venv/bin/python ./venv/bin/python3 ./venv/bin/python; do if [ -x \"$p\" ]; then PY=\"$PWD/${p#./}\"; break; fi; done; if [ -z \"$PY\" ]; then echo \"No project virtualenv Python found (checked .venv312, .venv, venv)\"; exit 1; fi; (cd backend && \"$PY\" -m uvicorn main:app --reload --port 8642) & cd frontend && cargo tauri dev'",
"dev": "VITE_BACKEND_PORT=${VITE_BACKEND_PORT:-${BACKEND_PORT:-8000}}; cd frontend && VITE_BACKEND_PORT=$VITE_BACKEND_PORT npm run dev -- --host",
"dev:tauri": "BACKEND_PORT=${BACKEND_PORT:-8000}; VITE_BACKEND_PORT=${VITE_BACKEND_PORT:-$BACKEND_PORT}; cd backend && python -m uvicorn main:app --reload --port $BACKEND_PORT & cd frontend && VITE_BACKEND_PORT=$VITE_BACKEND_PORT cargo tauri dev",
"build:tauri": "cd frontend && cargo tauri build",
"dev:frontend": "cd frontend && npm run dev",
"dev:backend": "bash -lc 'set -e; PY=; for p in ./.venv312/bin/python3.12 ./.venv312/bin/python ./.venv/bin/python3 ./.venv/bin/python ./venv/bin/python3 ./venv/bin/python; do if [ -x \"$p\" ]; then PY=\"$PWD/${p#./}\"; break; fi; done; if [ -z \"$PY\" ]; then echo \"No project virtualenv Python found (checked .venv312, .venv, venv)\"; exit 1; fi; cd backend && \"$PY\" -m uvicorn main:app --reload --port 8642'",
"dev:frontend": "VITE_BACKEND_PORT=${VITE_BACKEND_PORT:-${BACKEND_PORT:-8000}}; cd frontend && VITE_BACKEND_PORT=$VITE_BACKEND_PORT npm run dev",
"dev:backend": "BACKEND_PORT=${BACKEND_PORT:-8000}; cd backend && python -m uvicorn main:app --reload --port $BACKEND_PORT",
"lint": "cd frontend && npm run lint"
},
"devDependencies": {
"concurrently": "^9.1.0",
"wait-on": "^8.0.0"
},
"dependencies": {
"python-shell": "^5.0.0"
}
"devDependencies": {},
"dependencies": {}
}

197
plan.md
View File

@ -1,146 +1,81 @@
# TalkEdit — Launch Plan
# Plan for Building TalkEdit (Whisper.cpp + Tauri)
## Niche: "Descript for long-form content"
Based on your original idea summary and our discussions, here's a detailed plan to build a standalone, local audio/video editor app. We'll continue evolving the existing TalkEdit codebase on **Tauri 2.0** (Rust backend + React frontend) for tiny, dependency-free installers, and use **Whisper.cpp** for fast, accurate transcription. This keeps the scope minimal, focuses on text-based editing for spoken content, and targets podcasters/YouTubers.
TalkEdit's defensible position: **works on hour+ files without degrading**, fully offline, one-time payment. No competitor owns this — Descript chokes on long content, CapCut limits mobile uploads, and both require accounts.
## 1. Overview
- **Goal**: Create an offline Descript alternative with word-level editing, transcription, and export. Users download one file (~1020MB), install, and run—no Python, FFmpeg, or external deps.
- **Why This Stack**: Tauri bundles everything into a native app; Whisper.cpp (C++ lib) integrates seamlessly with Rust for CPU-efficient transcription. Faster than rebuilding from scratch.
- **Target Users**: Creators editing podcasts/videos; free core + Pro upgrades.
- **Key Differentiators**: Fully local, text-based editing like Google Docs, smart cuts with fades.
**Current status (May 2026):** All core editing features are built and stable. Polish pass completed. 107 automated tests (95 frontend + 12 Rust). Ready for beta testing.
## 2. Tech Stack
- **Frontend**: React + Vite + Tailwind CSS + shadcn/ui.
- **Backend**: Tauri 2.0 (Rust) handles file I/O, FFmpeg calls, Whisper.cpp integration.
- **Transcription**: Whisper.cpp (via Rust bindings like `whisper-cpp-sys` or `whisper-rs`).
- **Audio/Video Processing**: FFmpeg (bundled or called via Rust wrappers like `ffmpeg-next`).
- **State Management**: Zustand.
- **Packaging**: Tauri's `tauri build` for cross-platform installers.
- **AI Features**: Local models only (no APIs); optional Ollama for fillers.
---
## 3. Step-by-Step Development Plan
1. **Set Up Tauri in TalkEdit** (12 weeks):
- Install `tauri-cli` globally.
- In TalkEdit root: `npx tauri init` (choose Rust backend, link to existing React frontend).
- Implement Tauri `src/main.rs` host flow (window lifecycle, file dialogs, backend coordination).
- Update `tauri.conf.json` for app metadata, bundle settings.
## Phase 1: Polish ✅ COMPLETED
2. **Integrate Whisper.cpp in Rust** (23 weeks):
- Add `whisper-cpp` as a dependency in `Cargo.toml`.
- Create a Rust module for transcription: Load models, process audio, return word-level timestamps.
- Replace Python backend calls with Tauri commands (e.g., `invoke` from frontend to Rust for transcription).
- Handle model downloads on first run (store in app data dir).
### Reliability & error handling ✅
- [x] Backend health check — polls `/health` every 30s, shows reconnecting banner
- [x] Export failure reporting — surfaces FFmpeg stderr with copy-to-clipboard
- [x] React ErrorBoundary catches render crashes, shows fallback with reload
- [x] Global JS error logging — `window.onerror` + `onunhandledrejection` logged to Rust backend
3. **Migrate Audio/Video Logic** (2 weeks):
- Port FFmpeg calls to Rust (use `ffmpeg-next` for cutting/export).
- Implement segment calculation: From edited transcript, build keep_segments with padding/fades.
- Add audio cleaning (noise reduction via bundled tools or Rust libs).
### UX polish ✅
- [x] Tooltips on every button/control across all panels
- [x] Loading spinners for waveform, waveform retry button
- [x] Export progress bar (visual, not just text)
- [x] Help panel with full feature documentation
- [x] Keyboard cheatsheet overlay with close button and preset indicator
- [x] First-run welcome overlay with 3-step guide
- [x] `?` keyboard shortcut opens cheatsheet (accessible from Help panel)
- [x] Empty states: MarkersPanel, AIPanel, WaveformTimeline
- [x] Error states: AIPanel with retry, WaveformTimeline with retry
- [x] Auto-save crash recovery every 60s, restore prompt on next launch
- [x] Confirmation dialogs for zone/marker deletion
- [x] Disabled state for all buttons during export/transcription
- [x] Export button disabled when no video loaded
4. **Frontend Polish** (12 weeks):
- Update UI for Tauri (file dialogs via `tauri-plugin-dialog`).
- Refine transcript editor: Better timestamp syncing, manual adjustments.
- Add export options (MP4 with subs, audio-only).
### Consistency ✅
- [x] Mute zone color unified (blue everywhere)
- [x] Disabled opacity unified (40% everywhere)
- [x] Zone list items border radius unified (`rounded-lg`)
- [x] Toolbar button groups separated with visual dividers
- [x] Labels simplified: "Sound Gain", "Speed Adjust", "Trim Silence", "Chapter Marks", "Edit Zones", "Add Clips", "Bkg. Music", "AI Tools"
- [x] Model selector moved to AIPanel reprocess tab
- [x] Orphaned VolumePanel.tsx removed
5. **Testing & Packaging** (1 week):
- Test on Windows/macOS/Linux; ensure Whisper runs offline.
- Bundle with `tauri build`; verify no external deps.
- Add auto-updater for Pro features.
### Trial & licensing
- [x] Trial duration: 7 days
- [x] Trial bar on welcome screen with days remaining
- [x] Sentinel file prevents deleting trial.json to reset trial
- [x] XOR integrity check prevents editing trial.json timestamp
- [x] `canEdit` defaults to `false` (locked until status check confirms)
- [x] Email confirmation step before license activation (deters key sharing)
- [x] `verify_license` command (verify without caching)
- [x] Expired banner explains what still works (export, loading)
6. **Launch & Iterate** (Ongoing):
- Open-source core on GitHub.
- Market on Product Hunt, Reddit; gather feedback.
### Robustness ✅
- [x] React ErrorBoundary
- [x] Store-level input validation (reject NaN, clamp bounds, enforce min zone duration)
- [x] Runtime assertions in critical paths (TranscriptEditor, WaveformTimeline, ExportDialog)
- [x] Auto-save crash recovery
- [x] CI pipeline (GitHub Actions: Rust + Frontend + Python)
- [x] Bad project state recovery (auto-prunes invalid zones on load, Dev Panel reset button)
- [x] 95 frontend tests (editorStore, licenseStore, aiStore, assert)
- [x] 12 Rust tests (licensing, models)
- [x] Canvas zone handles enlarged (r=6), hit area increased
- [x] Search match contrast improved
- [x] Split panes keyboard-accessible (arrow keys, tabIndex, ARIA)
## 4. MVP Features (Minimal but Useful)
Focus on what creators need for spoken content:
- **Drag-and-drop import**: Audio/video files; auto-extract audio.
- **One-click transcription**: Whisper.cpp with model choice (Fast - less accurate: tiny/base; Slow - more accurate: small/medium/large).
- **Text-based editing**: Scrollable transcript; click word → jump to video; select/delete words → auto-cut audio with 150ms fades.
- **Smart cleanup**: Remove fillers ("um", pauses >0.8s) via local AI.
- **Preview & Export**: Synced preview; export MP4/audio with optional SRT subs.
- **Undo/Redo**: Full edit history.
---
No multi-track, voice cloning, or collaboration—keep it simple.
## Phase 2: Beta Launch (🚧 next — 24 weeks)
## 4. Notes
- Consider adding Parakeet TDT as a transcription option in the future for users who want alternatives to Whisper.
**Goal:** Get working builds into real podcasters' hands. Validate the core promise (long-form, offline) before investing in edge-case features.
## 5. Monetization Model
- **Free Forever**: Core editing/transcription (unlimited local use).
- **Pro License** ($2949 one-time): Batch processing, high-quality voices (if adding TTS), custom presets, priority support.
- **Optional Add-Ons**: Cloud credits for long videos (rarely needed).
### Must-have for beta
## 6. Timeline & Milestones
- **Weeks 14**: Tauri setup + Whisper integration.
- **Weeks 56**: Audio logic migration + frontend tweaks.
- **Weeks 78**: Testing, packaging, launch prep.
- **Total**: 610 weeks to MVP (solo dev + AI).
- [ ] **Smart chunking for transcription** — files >2hr. Without this the niche promise is unproven. Breaks transcription into overlapping chunks, reassembles with correct timestamps.
- [ ] **Hardware detection & model selection** — detect CUDA/ROCm/MPS at startup; expose model backend choice in Settings so beta users can configure their system.
- [ ] **GitHub v1.0.0 release** — tag, binary builds (AppImage + .deb), release notes.
### Sales & distribution
- [ ] **Stripe integration** — payment processing for one-time purchases (Pro $39, Business $79). License key generation + email delivery on payment success.
- [ ] **Landing page + download site** — simple site with: feature overview, pricing tiers, download links (AppImage/.deb), license activation flow. No auth system needed — Stripe handles payments, license keys unlock the app.
### Beta program
- [ ] **Free licenses to 20 podcasters** — in exchange for feedback + permission to quote. Target: r/podcasting regulars, small-to-medium shows (30min2hr episodes).
- [ ] **Bug/feedback pipeline** — GitHub Issues template for beta testers. Weekly triage.
- [ ] **Messaging for beta landing page:**
1. "The offline video editor that doesn't slow down on long files"
2. "No subscription. One price, owned forever."
3. "AI-powered editing — bring your own API key (Ollama, OpenAI, Claude)"
---
## Phase 3: Post-Beta Enhancements (user-driven priority)
**Goal:** Build what beta testers actually ask for. Deferred items below are ordered by likely demand, not engineering convenience.
### Bundled local LLM
- [ ] Integrate llama.cpp Rust bindings
- [ ] Auto-download Qwen3 on first AI use (4B: 2.5GB / 1.7B: 1GB)
- [ ] Hardware detection at runtime, model selection in Settings
### Long-form content
- [ ] Project stitching — load multiple `.aive` projects, combine into one export
### Export
- [ ] Batch export — multiple projects/cuts in sequence
### AI features
- [ ] Smart Shorts finder — scan transcript for 1090s segments
- [ ] AI auto-chapters — topic detection from transcript
- [ ] AI show notes — title, description, key moments
- [ ] AI dead-air finder — content-based silence detection
---
## Phase 4: Public Launch
**Goal:** Convert beta momentum + testimonials into a public release.
### Messaging pillars (updated)
1. "The offline video editor that doesn't slow down on long files"
2. "No subscription. One price, owned forever."
3. "Zero-setup AI" — bundled Qwen3, no API keys *(activate when Phase 3 ships)*
4. "Your podcast → 10 TikToks in one click" — Smart Shorts finder *(activate when Phase 3 ships)*
### Channels
- [ ] r/podcasting, r/VideoEditing, r/selfhosted — anchored by beta tester testimonials
- [ ] Product Hunt, Hacker News "Show HN"
- [ ] YouTube demo (3-5 min walkthrough) — feature the beta tester stories
- [ ] Pricing goes live publicly
### Pricing
- 7-day free trial (no CC, no account)
- Pro: $39 one-time
- Business: $79 one-time (priority support, volume licensing)
---
## Non-goals (explicitly deferred)
- Cloud sync / collaboration
- Voice cloning / TTS
- Full multi-track NLE timeline
- Mobile app
- Subscription model
- Image/video generation models
## 7. Risks & Tips
- **Risks**: Whisper.cpp compilation issues; Rust learning curve if new to it.
- **Tips**: Start with small models (base ~70MB); test timestamp accuracy early. Use Tauri's docs for migration. If stuck, fall back to bundling Python for Whisper (but avoid for true standalone).
- **Resources**: Tauri docs, Whisper.cpp GitHub, Rust audio crates.</content>
<parameter name="filePath">/home/dillon/_code/audio_editor/plan.md

View File

@ -1,342 +0,0 @@
# TalkEdit — Testing &amp; Robustness Plan
Tests are critical before launch to prevent regressions and ensure the app is stable. Below is every function in the codebase that needs test coverage, organized by module.
---
## 1. Rust backend (`src-tauri/src/`)
### licensing.rs — already has partial coverage (4 tests)
- [ ] `verify_license_key` — valid key, invalid format, invalid signature, expired key ✅ (exists)
- [ ] `load_cached_license` — file exists, file missing, malformed file
- [ ] `cache_license` — write and read back
- [ ] `remove_license` — removes file, no-ops if missing
- [ ] `get_or_start_trial` — creates new trial file, loads existing, handles corrupt file
- [ ] `get_trial_info` — active trial (29 days), expired trial (0 days), exactly at boundary
- [ ] `get_trial_days_remaining` — active returns Some, expired returns None,
- [ ] `clear_trial` — removes file, no-ops if missing
- [ ] `get_app_status` — licensed (cached license), trial active, expired (no license, no trial)
- [ ] `has_feature` — feature exists, feature missing, empty features list
### models.rs — no existing tests
- [ ] `list_models` — whisper models found, llm models found, mixed, empty dirs
- [ ] `delete_model` — deletes file, deletes directory, path doesn't exist
- [ ] `huggingface_cache_dir` — HF_HOME set, XDG_CACHE_HOME set, defaults to ~/.cache
### transcription.rs — no existing tests
- [ ] `ensure_model_downloaded` — returns success (stub function)
### paths.rs — no existing tests
- [ ] `project_root` — dev layout, packaged (TAURI_RESOURCE_DIR), fallback
- [ ] `python_exe` — bundled path, venv paths, fallback to .venv312
- [ ] `backend_script` — joins project_root/backend
- [ ] `root_script` — joins project_root
---
## 2. Frontend store (`frontend/src/store/`)
### editorStore.ts (Zustand) — partially covered (2 tests)
- [ ] `reset` ✅ (in beforeEach)
- [ ] `setGlobalGainDb` — clamps to -24/+24 ✅ (exists)
- [ ] `addGainRange` — adds with correct start/end/gain ✅ (exists)
- [ ] `addCutRange` — adds with correct start/end, handles overlapping ranges
- [ ] `addMuteRange` — adds with correct start/end
- [ ] `addSpeedRange` — adds with correct start/end/speed
- [ ] `removeCutRange` — removes existing, no-ops on missing
- [ ] `removeMuteRange` — removes existing, no-ops on missing
- [ ] `removeGainRange` — removes existing, no-ops on missing
- [ ] `removeSpeedRange` — removes existing, no-ops on missing
- [ ] `updateCutRange` — updates bounds, prevents negative duration
- [ ] `updateMuteRange` — updates bounds
- [ ] `updateGainRangeBounds` — updates bounds
- [ ] `updateSpeedRangeBounds` — updates bounds
- [ ] `updateGainRange` — updates gain value
- [ ] `updateSpeedRange` — updates speed value
- [ ] `setSelectedWordIndices` — single, multiple, empty, out of range handled
- [ ] `replaceWordRange` — replaces words in middle, replaces at start, handles invalid indices
- [ ] `updateWordText` — updates word, preserves timing, no-ops on bad index
- [ ] `getWordAtTime` — exact match, between words, before first word, after last word, no words
- [ ] `loadVideo` — sets videoUrl, resets state, handles missing file
- [ ] `setCurrentTime` — sets time, clamps to 0-duration
- [ ] `setTranscribing` — toggles flag, sets status
- [ ] `setTranscription` — sets words and segments, handles empty arrays
- [ ] `setMarkInTime` / `setMarkOutTime` — sets and clears
- [ ] `clearMarkRange` — clears both marks
- [ ] `addTimelineMarker` — adds with label/color/time
- [ ] `removeTimelineMarker` — removes by id
- [ ] `updateTimelineMarker` — updates label/color
- [ ] `setZonePreviewPaddingSeconds` — sets and clamps
- [ ] `setBackgroundMusic` / `updateBackgroundMusic` — sets and updates
- [ ] `setAdditionalClips` — adds, removes, reorders
- [ ] `setSilenceTrimGroups` — sets groups
### licenseStore.ts — no existing tests
- [ ] `canEdit` — true for Licensed, true for Trial, false for Expired, false for null
- [ ] `checkStatus` — calls getAppStatus, sets correct state, handles error (falls to Expired)
- [ ] `activateLicense` — valid key sets Licensed, invalid key returns false
- [ ] `deactivateLicense` — reverts to trial if valid, falls to Expired otherwise
- [ ] `hasFeature` — returns true for matching, false for missing
- [ ] `setShowDialog` — toggles dialog visibility
- [ ] Persist middleware — Licensed status persists, Trial/Expired does not
### aiStore.ts — no existing tests
- [ ] `setProviderConfig` — updates provider, encrypts API keys
- [ ] `setDefaultProvider` — changes default
- [ ] `setCustomFillerWords` — sets and clears
- [ ] `setFillerResult` — sets and clears
- [ ] `setProcessing` — toggles with message
---
## 3. Frontend hooks (`frontend/src/hooks/`)
### useKeyboardShortcuts.ts — no existing tests
- [ ] Keyboard event dispatch — space plays/pauses, J/K/L speed controls, I/O marks, Delete cuts, Ctrl+Z undo
- [ ] `toggleCheatsheet` — creates overlay with correct content, toggles off
- [ ] Skip logic — skips correctly forward and back from current playhead
### useVideoSync.ts — no existing tests
- [ ] Synchronization of store `isPlaying` with video element, audio element
- [ ] `togglePlay` — starts playing, pauses
- [ ] `seekTo` — seeks video to correct time, seeks audio to correct time
- [ ] Handles video element ref being null (doesn't crash)
---
## 4. Frontend lib (`frontend/src/lib/`)
### keybindings.ts — no existing tests
- [ ] `loadBindings` — loads from localStorage, falls back to standard preset when missing
- [ ] `saveBindings` — persists and reloads correctly
- [ ] `applyPreset` — 'standard' and 'left-hand' both apply all required bindings
- [ ] `detectConflicts` — detects duplicate keys, returns empty when no conflicts
---
## 5. Backend services (`backend/services/`)
### video_editor.py — no existing tests
- [ ] `apply_cut_segments` — keeps correct segments from transcript word list, FFmpeg concat cmd
- [ ] `apply_mute_ranges` — cuts audio for muted ranges
- [ ] `apply_gain_ranges` — adjusts volume (positive and negative) for FFmpeg filter chains
- [ ] `apply_speed_ranges` — time-stretches or compresses segments
- [ ] `mix_background_music` — mixes with ducking enabled, mixes without ducking, handles no music
- [ ] `build_export_filters` — stitches together all zone types into correct filter order
### audio_cleaner.py — no existing tests
- [ ] `detect_silence` — detects pauses above threshold, returns correct time ranges
- [ ] `remove_silence` — splits by silence, re-concatenates keep segments
### ai_provider.py — no existing tests
- [ ] `complete` — Ollama completion succeeds, OpenAI completion succeeds, Claude completion succeeds
- [ ] `complete` — handles missing provider, timeout, bad JSON response
- [ ] `list_ollama_models` — returns models list, handles connection error
### transcription.py — no existing tests
- [ ] `transcribe_file` — returns words with correct format (word, start, end, confidence)
- [ ] `transcribe_segment` — re-transcribes a range with offset-adjusted timestamps
- [ ] `_load_model` — caches model, returns existing from cache, handles GPU/CPU
### caption_generator.py — no existing tests
- [ ] `generate_srt` — correct SRT format, sequential numbering, proper timestamps
- [ ] `generate_vtt` — correct VTT format with header, chroma key tags
- [ ] `generate_ass` — correct ASS subtitle format
### gpu_utils.py — no existing tests
- [ ] `get_optimal_device` — returns CUDA when available, returns CPU otherwise
### audio_processing.py — no existing tests
- [ ] `extract_audio` — extracts wav from video, handles audio-only input, temp file cleanup
---
## 6. Frontend components — integration tests
### TranscriptEditor.tsx
- [ ] Word selection: click, shift+click range, drag select
- [ ] Ctrl+click seeks video to correct time
- [ ] Double-click enters edit mode, Enter commits, Escape cancels
- [ ] Zone mode drag creates correct zone type
- [ ] Search finds matches, navigates with Enter/Shift+Enter
- [ ] "Restore" button appears on hover over words in a zone, removes the zone
- [ ] Re-transcribe calls backend and updates words
- [ ] Selection toolbar buttons create correct zone types
- [ ] When `canEdit` is false, buttons are disabled and zone creation is blocked
### WaveformTimeline.tsx
- [ ] Canvas renders waveform when audio data loads
- [ ] Click seeks to correct time
- [ ] Zone drag creates zone on mouse up
- [ ] Zone selection and resize with handles
- [ ] Delete key removes selected zone
- [ ] Zoom and scroll work correctly
- [ ] Zone toggle buttons show/hide overlay layers
- [ ] Loading spinner shows when no waveform data
- [ ] Error message with retry button on load failure
### ExportDialog.tsx
- [ ] Fast mode card and re-encode card toggle correctly
- [ ] Resolution selector only visible in re-encode mode
- [ ] Format selector disables WAV for video files
- [ ] Export button triggers export with correct parameters
- [ ] Progress bar updates during export
- [ ] Loudness normalization checkbox shows LUFS target selector
### AIPanel.tsx
- [ ] Filler words tab: detect button sends request, displays results
- [ ] Apply All creates cut ranges for all fillers
- [ ] Clips tab: find clips shows suggestions
- [ ] Reprocess tab: model selector + reprocess button
- [ ] Error state with retry on API failure
### App.tsx — layout and toolbar
- [ ] Welcome screen shows when no video loaded
- [ ] Trial bar shows on welcome screen for Trial/Expired states
- [ ] Toolbar buttons toggle modes correctly (Cut, Mute, Gain, Speed)
- [ ] Toolbar buttons open correct panels (Zones, Silence, Markers, Music, Append, AI, Export, Settings, Help)
- [ ] File menu opens/closes, items work
- [ ] Split pane dividers are draggable and keyboard-accessible
- [ ] First-run welcome overlay shows once
- [ ] Hotkeys work: Escape clears modes, ? opens cheatsheet
---
## 7. Error handling regression tests
- [ ] Backend crash: show reconnect banner, not broken UI
- [ ] Transcription failure: show error, allow retry with different model
- [ ] Export failure: show FFmpeg stderr, allow copy
- [ ] Model download timeout: show error, allow retry
- [ ] File not found: handled gracefully on open/load
- [ ] Permission denied: handled gracefully on save/export
- [ ] Concurrent operations: block export during transcription, block transcription during export
---
## 8. Licensing & trial flow tests
- [ ] Fresh install: shows 30-day trial
- [ ] Day 29: still allows editing
- [ ] Day 31: shows expired, editing locked, export still works
- [ ] Activate valid license: switches to Licensed, clears trial
- [ ] Activate invalid license: shows error, stays on trial
- [ ] Deactivate license: returns to trial if valid, expires if trial over
- [ ] Expired banner shows correct message and activate link
- [ ] `canEdit` prop correctly gates all editing controls across all components
---
## Test infrastructure
| Layer | Framework | Run command |
|-------|-----------|-------------|
| Rust | `cargo test` (built-in) | `cd src-tauri && cargo test` |
| Frontend (Vitest) | Vitest + jsdom | `cd frontend && npx vitest run` |
| Frontend (components) | Playwright or Vitest + testing-library | `cd frontend && npx vitest run` |
| Python backend | pytest | `cd backend && python -m pytest` |
### Setup needed
- [ ] Frontend: `npm install -D vitest @testing-library/react @testing-library/jest-dom jsdom` (vitest likely already installed)
- [ ] Frontend: add `"test": "vitest run"` to `package.json` if not present
- [ ] Python: ensure pytest is installed (`pip install pytest pytest-asyncio`)
- [ ] CI: add GitHub Actions workflow for `cargo test && vitest run && pytest`
---
## Priority order
1. **store tests** (editorStore, licenseStore, aiStore) — core data integrity
2. **Rust licensing tests** — payment/trial logic must never break
3. **Rust models tests** — filesystem operations must be safe
4. **Backend service unit tests** — export pipeline, transcription, AI
5. **Component integration tests** — user-facing behavior
6. **Error handling regression tests** — robustness
---
## Robustness beyond tests
### React Error Boundary
The app has no error boundary — a single JS error in any component crashes the entire UI to a white screen. Wrap the app in a `<ErrorBoundary>` that catches render errors and shows a fallback with "Something went wrong" + a reload button.
- [ ] Create `ErrorBoundary.tsx` component (`componentDidCatch` pattern)
- [ ] Wrap the entire `<App />` in `main.tsx`
- [ ] Fallback shows: error message, stack trace (collapsed), "Reload" button, "Reset & Clear State" button
### Global JS error logging
Uncaught errors in async code and event handlers silently break the app. Add a `window.onerror` and `window.onunhandledrejection` handler that logs to the Tauri backend and shows a toast notification.
- [ ] Add global error handler in `main.tsx` that intercepts all uncaught errors
- [ ] Log to Rust backend via `invoke('log_error', { message, stack })`
- [ ] Show a non-blocking toast notification (bottom-right) for non-fatal errors
- [ ] Fatal errors still go to the ErrorBoundary
### Input validation layer
The app trusts user input too much. Invalid values in number inputs, empty file paths, or negative durations can cause crashes or silent failures. Add validation at the store level.
- [ ] `editorStore.ts` — validate all setters: clamp numbers, reject empty strings for paths, enforce min/max on dB and speed values
- [ ] `licenseStore.ts` — validate license key format before sending to Rust (prefix check, base64 pattern)
- [ ] `aiStore.ts` — validate API key formatting, model name not empty
- [ ] Export options — validate resolution, format, loudness target against allowed values before sending to backend
### Frontend runtime assertions
The app makes assumptions about data shapes (e.g. `words[sorted[0]].start` assumes the index exists). Add assertion checks in critical paths that log a clear error instead of silently producing NaN or undefined.
- [ ] Add an `assert` utility function: `assert(condition, message)` that throws a descriptive error in dev, warns in prod
- [ ] Guard all array index access in TranscriptEditor, WaveformTimeline, ExportDialog
- [ ] Guard null/undefined checks on store actions that expect existing data
### Auto-save crash recovery
If the app crashes or the system loses power during editing, current work is lost. Add periodic auto-save to a temp file that gets restored on next launch.
- [ ] Every 60 seconds, save the full editor state to `app_data_dir/autosave.json`
- [ ] On launch, check if autosave.json exists and is newer than the last manual save
- [ ] Show a "Recover unsaved work?" prompt with date/time of autosave
- [ ] Clean up autosave after a manual save or after recovery is accepted/dismissed
### Backend health check & self-diagnostics
When the Python backend dies mid-session, the app doesn't know until a request fails. Add periodic health checks and a diagnostics panel.
- [ ] Poll `GET /health` every 15 seconds from the frontend
- [ ] If backend is unreachable: show a non-blocking banner "Backend disconnected — retrying..."
- [ ] When backend comes back online, dismiss the banner automatically
- [ ] Add a `/diagnostics` endpoint that reports: Python version, available FFmpeg, GPU detection, model cache sizes, disk space
- [ ] Wire to a "System Info" section in Settings or DevPanel
### CI pipeline with automated checks
Currently no CI exists. A GitHub Actions workflow would catch regressions on every push.
- [ ] Add `.github/workflows/ci.yml`:
- `cargo test` — all Rust tests
- `cargo clippy -- -D warnings` — enforce Rust lint rules
- `npx vitest run` — all frontend tests
- `npx tsc --noEmit` — TypeScript type check
- `python -m pytest backend/tests/` — Python backend tests
- `cargo build --release` — verify release build succeeds
- [ ] Run on push to `main` and on PRs
- [ ] Add Rust `#[deny(clippy::all)]` to catch common mistakes at build time
### Fuzz testing for Rust deserialization
The app deserializes user-provided JSON (project files, API responses) — any malformed input could crash the Rust backend. Add fuzz tests for the critical deserialization paths.
- [ ] Fuzz `TranscriptionResult` deserialization — malformed word/segment JSON
- [ ] Fuzz `.aive` project file loading — corrupted JSON, missing fields, wrong types
- [ ] Fuzz `LicensePayload` deserialization — tampered license payloads
- [ ] Use `cargo-fuzz` or `proptest` crate for property-based testing
### Performance telemetry (opt-in)
Without real-world data, you can't know where the app is slow. Add lightweight timing around operations that users complain about.
- [ ] Log timing for: transcription time, export time, AI completion time, model download time
- [ ] Store in localStorage (not sent anywhere — privacy-first)
- [ ] Show in DevPanel: last N operation timings
- [ ] Use this data to identify slow paths before users report them
### Recovery from bad project state
A corrupted `.aive` file or a partially-loaded project can leave the app in an unusable state. Add recovery paths.
- [ ] On project load failure: show error, offer "Load anyway with partial data" or "Cancel"
- [ ] Add "Reset Editor State" button in DevPanel (clears all state back to empty)
- [ ] Store action: validate all zone ranges are within video duration, auto-remove invalid ones on save

View File

@ -1,63 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$ROOT_DIR"
log() {
printf '[check-feature-spec] %s\n' "$1"
}
BASE_SHA="${BASE_SHA:-}"
if [[ -z "$BASE_SHA" ]]; then
if git rev-parse --verify origin/main >/dev/null 2>&1; then
BASE_SHA="$(git merge-base origin/main HEAD)"
else
log "No BASE_SHA and origin/main unavailable; skipping check."
exit 0
fi
fi
if ! git rev-parse --verify "$BASE_SHA" >/dev/null 2>&1; then
log "BASE_SHA '$BASE_SHA' not found; skipping check."
exit 0
fi
changed_files="$(git diff --name-only "$BASE_SHA"...HEAD)"
if [[ -z "$changed_files" ]]; then
log "No changed files; nothing to enforce."
exit 0
fi
code_changed=0
spec_changed=0
while IFS= read -r path; do
[[ -z "$path" ]] && continue
case "$path" in
frontend/src/*|backend/*|src-tauri/src/*|shared/project-schema.json)
code_changed=1
;;
esac
case "$path" in
docs/specs/*.md)
spec_changed=1
;;
esac
done <<< "$changed_files"
if [[ "$code_changed" -eq 0 ]]; then
log "No app code changes detected; spec file not required."
exit 0
fi
if [[ "$spec_changed" -eq 1 ]]; then
log "Spec requirement satisfied."
exit 0
fi
log "Code changes detected without spec update in docs/specs/."
log "Add or update at least one spec file using docs/spec-template.md."
exit 1

View File

@ -1,72 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
OUT_BASE="$ROOT_DIR/.diagnostics"
TS="$(date +%Y%m%d_%H%M%S)"
OUT_DIR="$OUT_BASE/diag_$TS"
mkdir -p "$OUT_DIR"
log() {
printf '[collect-diagnostics] %s\n' "$1"
}
capture_cmd() {
local name="$1"
shift
{
echo "# $name"
echo "# cmd: $*"
"$@"
} >"$OUT_DIR/$name.txt" 2>&1 || true
}
log "output: $OUT_DIR"
capture_cmd "env_uname" uname -a
capture_cmd "env_node_version" node --version
capture_cmd "env_npm_version" npm --version
capture_cmd "env_git_status" git -C "$ROOT_DIR" status --short
capture_cmd "env_git_head" git -C "$ROOT_DIR" rev-parse --short HEAD
if [[ -f "$ROOT_DIR/webview.log" ]]; then
cp "$ROOT_DIR/webview.log" "$OUT_DIR/webview.log" || true
fi
if [[ -f "$ROOT_DIR/backend.log" ]]; then
cp "$ROOT_DIR/backend.log" "$OUT_DIR/backend.log" || true
fi
if [[ -f "$ROOT_DIR/frontend/package.json" ]]; then
capture_cmd "frontend_lint" bash -lc "cd '$ROOT_DIR/frontend' && npm run -s lint"
capture_cmd "frontend_build" bash -lc "cd '$ROOT_DIR/frontend' && npm run -s build"
fi
PY=""
for p in \
"$ROOT_DIR/.venv312/bin/python3.12" \
"$ROOT_DIR/.venv312/bin/python" \
"$ROOT_DIR/.venv/bin/python3" \
"$ROOT_DIR/.venv/bin/python" \
"$ROOT_DIR/venv/bin/python3" \
"$ROOT_DIR/venv/bin/python"; do
if [[ -x "$p" ]]; then
PY="$p"
break
fi
done
if [[ -n "$PY" ]]; then
capture_cmd "backend_python_version" "$PY" --version
capture_cmd "backend_health_check" env PYTHONPATH="$ROOT_DIR/backend:$ROOT_DIR" "$PY" -c "import importlib; importlib.import_module('backend.main'); print('backend import OK')"
fi
capture_cmd "list_recent_files" find "$ROOT_DIR" -maxdepth 2 -type f | head -n 200
if command -v tar >/dev/null 2>&1; then
tar -czf "$OUT_DIR.tar.gz" -C "$OUT_BASE" "diag_$TS"
log "archive: $OUT_DIR.tar.gz"
fi
log "done"

View File

@ -1,128 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
log() {
printf '[validate-all] %s\n' "$1"
}
run_if_present() {
local cmd="$1"
if command -v ${cmd%% *} >/dev/null 2>&1; then
eval "$cmd"
return 0
fi
return 1
}
log "root: $ROOT_DIR"
cd "$ROOT_DIR"
log "Step 1/7: frontend dependency check"
if [[ ! -d "frontend/node_modules" ]]; then
log "frontend/node_modules missing; install with: cd frontend && npm install"
fi
log "Step 2/7: frontend lint"
if [[ -f "frontend/package.json" ]]; then
(
cd frontend
if npm run -s lint; then
log "frontend lint: OK"
else
log "frontend lint failed"
exit 1
fi
)
else
log "frontend/package.json not found; skipping"
fi
log "Step 3/7: frontend build"
if [[ -f "frontend/package.json" ]]; then
(
cd frontend
if npm run -s build; then
log "frontend build: OK"
else
log "frontend build failed"
exit 1
fi
)
fi
log "Step 4/7: frontend tests"
if [[ -f "frontend/package.json" ]]; then
(
cd frontend
if npm run -s test; then
log "frontend tests: OK"
else
log "frontend tests failed"
exit 1
fi
)
fi
log "Step 5/7: backend syntax check"
PY=""
for p in \
"$ROOT_DIR/.venv312/bin/python3.12" \
"$ROOT_DIR/.venv312/bin/python" \
"$ROOT_DIR/.venv/bin/python3" \
"$ROOT_DIR/.venv/bin/python" \
"$ROOT_DIR/venv/bin/python3" \
"$ROOT_DIR/venv/bin/python"; do
if [[ -x "$p" ]]; then
PY="$p"
break
fi
done
if [[ -z "$PY" ]]; then
if command -v python3 >/dev/null 2>&1; then
PY="$(command -v python3)"
elif command -v python >/dev/null 2>&1; then
PY="$(command -v python)"
fi
fi
if [[ -n "$PY" ]]; then
log "using python: $PY"
"$PY" -m py_compile "$ROOT_DIR/backend/main.py" "$ROOT_DIR/backend/routers/export.py"
log "backend syntax check: OK"
else
log "no project python found (.venv312/.venv/venv); skipping backend syntax check"
fi
log "Step 6/7: backend unit tests"
if [[ -n "$PY" ]]; then
if find "$ROOT_DIR/backend/tests" -type f -name 'test_*.py' -print -quit 2>/dev/null | grep -q .; then
PYTHONPATH="$ROOT_DIR/backend:$ROOT_DIR" "$PY" -m unittest discover -s "$ROOT_DIR/backend/tests" -p 'test_*.py' -v
log "backend unit tests: OK"
else
log "backend unit tests: skipped (no tests found)"
fi
fi
log "Step 7/7: backend health import smoke"
if [[ -n "$PY" ]]; then
if [[ "${SKIP_BACKEND_IMPORT_SMOKE:-0}" == "1" ]]; then
log "backend import smoke: skipped (SKIP_BACKEND_IMPORT_SMOKE=1)"
else
PYTHONPATH="$ROOT_DIR/backend:$ROOT_DIR" "$PY" - <<'PYCODE'
import importlib
mods = [
"backend.main",
"backend.routers.export",
"backend.services.video_editor",
]
for m in mods:
importlib.import_module(m)
print("backend import smoke: OK")
PYCODE
fi
fi
log "Validation complete"

110
src-tauri/Cargo.lock generated
View File

@ -79,9 +79,7 @@ checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
name = "app"
version = "0.1.0"
dependencies = [
"base64 0.22.1",
"dirs 5.0.1",
"ed25519-dalek",
"hound",
"log",
"serde",
@ -148,12 +146,6 @@ version = "0.22.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
[[package]]
name = "base64ct"
version = "1.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06"
[[package]]
name = "bit-set"
version = "0.8.0"
@ -458,12 +450,6 @@ dependencies = [
"memchr",
]
[[package]]
name = "const-oid"
version = "0.9.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8"
[[package]]
name = "convert_case"
version = "0.4.0"
@ -613,33 +599,6 @@ dependencies = [
"syn 2.0.117",
]
[[package]]
name = "curve25519-dalek"
version = "4.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97fb8b7c4503de7d6ae7b42ab72a5a59857b4c937ec27a3d4539dba95b5ab2be"
dependencies = [
"cfg-if",
"cpufeatures",
"curve25519-dalek-derive",
"digest",
"fiat-crypto",
"rustc_version",
"subtle",
"zeroize",
]
[[package]]
name = "curve25519-dalek-derive"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.117",
]
[[package]]
name = "darling"
version = "0.23.0"
@ -674,16 +633,6 @@ dependencies = [
"syn 2.0.117",
]
[[package]]
name = "der"
version = "0.7.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb"
dependencies = [
"const-oid",
"zeroize",
]
[[package]]
name = "deranged"
version = "0.5.8"
@ -877,30 +826,6 @@ version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555"
[[package]]
name = "ed25519"
version = "2.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "115531babc129696a58c64a4fef0a8bf9e9698629fb97e9e40767d235cfbcd53"
dependencies = [
"pkcs8",
"signature",
]
[[package]]
name = "ed25519-dalek"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70e796c081cee67dc755e1a36a0a172b897fab85fc3f6bc48307991f64e4eca9"
dependencies = [
"curve25519-dalek",
"ed25519",
"serde",
"sha2",
"subtle",
"zeroize",
]
[[package]]
name = "embed-resource"
version = "3.0.8"
@ -982,12 +907,6 @@ dependencies = [
"log",
]
[[package]]
name = "fiat-crypto"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28dea519a9695b9977216879a3ebfddf92f1c08c05d984f8996aecd6ecdc811d"
[[package]]
name = "field-offset"
version = "0.3.6"
@ -2617,16 +2536,6 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
name = "pkcs8"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7"
dependencies = [
"der",
"spki",
]
[[package]]
name = "pkg-config"
version = "0.3.32"
@ -3488,15 +3397,6 @@ version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "signature"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de"
dependencies = [
"rand_core 0.6.4",
]
[[package]]
name = "simd-adler32"
version = "0.3.8"
@ -3591,16 +3491,6 @@ dependencies = [
"system-deps",
]
[[package]]
name = "spki"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d"
dependencies = [
"base64ct",
"der",
]
[[package]]
name = "stable_deref_trait"
version = "1.2.1"

View File

@ -1,13 +1,15 @@
[package]
name = "app"
version = "0.1.0"
description = "TalkEdit - AI-powered video editor"
description = "A Tauri App"
authors = ["you"]
license = ""
repository = ""
edition = "2021"
rust-version = "1.77.2"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
name = "app_lib"
crate-type = ["staticlib", "cdylib", "rlib"]
@ -27,5 +29,3 @@ dirs = "5.0"
ureq = "2.9"
hound = "3.5"
tempfile = "3.10"
ed25519-dalek = "2"
base64 = "0.22"

View File

@ -11,8 +11,8 @@
"dialog:allow-open",
"dialog:allow-save",
"fs:default",
{ "identifier": "fs:allow-read-text-file", "allow": [{ "path": "$HOME/**" }, { "path": "**" }] },
{ "identifier": "fs:allow-write-text-file", "allow": [{ "path": "$HOME/**" }, { "path": "**" }] },
{ "identifier": "fs:allow-read-text-file", "allow": [{ "path": "$HOME/**" }] },
{ "identifier": "fs:allow-write-text-file", "allow": [{ "path": "$HOME/**" }] },
"fs:allow-app-read-recursive",
"fs:allow-app-write-recursive"
]

View File

@ -1,7 +1,5 @@
// --- Commands ---
use tauri::Manager;
mod paths;
mod transcription;
mod video_editor;
@ -10,16 +8,6 @@ mod diarization;
mod ai_provider;
mod caption_generator;
mod background_removal;
mod licensing;
mod models;
#[tauri::command]
fn get_projects_directory() -> Result<String, String> {
let dir = paths::project_root().join("Projects");
std::fs::create_dir_all(&dir)
.map_err(|e| format!("Failed to create Projects directory: {e}"))?;
Ok(dir.to_string_lossy().to_string())
}
/// Returns the backend URL.
#[tauri::command]
@ -208,106 +196,6 @@ async fn save_captions(content: String, output_path: String) -> Result<String, S
.map_err(|e| format!("Task error: {:?}", e))?
}
/// List downloaded models (Whisper + LLM) with sizes.
#[tauri::command]
fn log_error(message: String, stack: String, component_stack: String) {
log::error!(
"[Frontend Error] {} — Stack: {} — Component: {}",
message,
stack,
component_stack,
);
}
/// List downloaded models (Whisper + LLM) with sizes.
#[tauri::command]
fn list_models(app_handle: tauri::AppHandle) -> Result<Vec<models::ModelInfo>, String> {
let data_dir = app_handle
.path()
.app_data_dir()
.map_err(|e| format!("No app data directory: {e}"))?;
Ok(models::list_models(&data_dir))
}
/// Delete a downloaded model by path.
#[tauri::command]
fn delete_model(path: String) -> Result<(), String> {
models::delete_model(&path)
}
/// Get the combined app status: licensed, trial, or expired.
#[tauri::command]
fn get_app_status(app_handle: tauri::AppHandle) -> Result<licensing::AppStatus, String> {
let data_dir = app_handle
.path()
.app_data_dir()
.map_err(|e| format!("No app data directory: {e}"))?;
Ok(licensing::get_app_status(&data_dir))
}
/// Verify a license key signature without caching. Returns the payload.
#[tauri::command]
fn verify_license(license_key: String) -> Result<licensing::LicensePayload, String> {
licensing::verify_license_key(&license_key)
.map_err(|e| e.to_string())
}
/// Verify and activate a license key.
#[tauri::command]
fn activate_license(app_handle: tauri::AppHandle, license_key: String) -> Result<licensing::LicensePayload, String> {
let data_dir = app_handle
.path()
.app_data_dir()
.map_err(|e| format!("No app data directory: {e}"))?;
let payload = licensing::verify_license_key(&license_key)
.map_err(|e| e.to_string())?;
licensing::cache_license(&data_dir, &license_key)
.map_err(|e| e.to_string())?;
// Clear trial state since user has a valid license
licensing::clear_trial(&data_dir);
Ok(payload)
}
/// Remove the cached license (deactivate). Trial will resume if still valid.
#[tauri::command]
fn deactivate_license(app_handle: tauri::AppHandle) -> Result<(), String> {
let data_dir = app_handle
.path()
.app_data_dir()
.map_err(|e| format!("No app data directory: {e}"))?;
licensing::remove_license(&data_dir);
Ok(())
}
/// Start the free trial if not already started. Returns trial info.
#[tauri::command]
fn start_trial(app_handle: tauri::AppHandle) -> Result<licensing::TrialState, String> {
let data_dir = app_handle
.path()
.app_data_dir()
.map_err(|e| format!("No app data directory: {e}"))?;
let trial = licensing::get_or_start_trial(&data_dir);
Ok(trial)
}
/// Check if a specific feature is enabled (requires valid license).
#[tauri::command]
fn has_license_feature(app_handle: tauri::AppHandle, feature: String) -> Result<bool, String> {
let data_dir = app_handle
.path()
.app_data_dir()
.map_err(|e| format!("No app data directory: {e}"))?;
match licensing::load_cached_license(&data_dir) {
Ok(payload) => Ok(licensing::has_feature(&payload, &feature)),
Err(_) => Ok(false),
}
}
/// Check if background removal is available
#[tauri::command]
async fn is_background_removal_available() -> Result<bool, String> {
@ -328,39 +216,6 @@ async fn remove_background_on_export(input_path: String, output_path: String, re
.map_err(|e| format!("Task error: {:?}", e))?
}
/// Write autosave data to the app data directory
#[tauri::command]
fn write_autosave(app_handle: tauri::AppHandle, data: String) -> Result<(), String> {
let data_dir = app_handle.path().app_data_dir().map_err(|e| format!("No app data directory: {e}"))?;
let path = data_dir.join("autosave.json");
std::fs::write(&path, data).map_err(|e| format!("Failed to write autosave: {e}"))?;
Ok(())
}
/// Read autosave data if it exists
#[tauri::command]
fn read_autosave(app_handle: tauri::AppHandle) -> Result<Option<String>, String> {
let data_dir = app_handle.path().app_data_dir().map_err(|e| format!("No app data directory: {e}"))?;
let path = data_dir.join("autosave.json");
match std::fs::read_to_string(&path) {
Ok(data) => Ok(Some(data)),
Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(None),
Err(e) => Err(format!("Failed to read autosave: {e}")),
}
}
/// Delete the autosave file
#[tauri::command]
fn delete_autosave(app_handle: tauri::AppHandle) -> Result<(), String> {
let data_dir = app_handle.path().app_data_dir().map_err(|e| format!("No app data directory: {e}"))?;
let path = data_dir.join("autosave.json");
match std::fs::remove_file(&path) {
Ok(()) => Ok(()),
Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
Err(e) => Err(format!("Failed to delete autosave: {e}")),
}
}
// --- App entry point ---
#[cfg_attr(mobile, tauri::mobile_entry_point)]
@ -376,31 +231,9 @@ pub fn run() {
.build(),
)?;
}
// Check for cached license or trial at startup
if let Ok(data_dir) = app.path().app_data_dir() {
match licensing::get_app_status(&data_dir) {
licensing::AppStatus::Licensed { license: payload } => {
log::info!(
"License: {} ({}), expires {}",
payload.customer_email,
payload.tier,
payload.expires_at,
);
}
licensing::AppStatus::Trial { days_remaining, .. } => {
log::info!("Trial active: {days_remaining} days remaining");
}
licensing::AppStatus::Expired => {
log::info!("Trial expired — license activation required");
}
}
}
Ok(())
})
.invoke_handler(tauri::generate_handler![
get_projects_directory,
get_backend_url,
encrypt_string,
decrypt_string,
@ -421,18 +254,6 @@ pub fn run() {
save_captions,
is_background_removal_available,
remove_background_on_export,
get_app_status,
activate_license,
deactivate_license,
verify_license,
start_trial,
has_license_feature,
list_models,
delete_model,
log_error,
write_autosave,
read_autosave,
delete_autosave,
])
.run(tauri::generate_context!())
.expect("error while running tauri application");

View File

@ -1,293 +0,0 @@
use base64::engine::general_purpose::STANDARD_NO_PAD as BASE64;
use base64::Engine;
use ed25519_dalek::{Signature, VerifyingKey};
use serde::{Deserialize, Serialize};
use std::path::PathBuf;
use std::time::{SystemTime, UNIX_EPOCH};
pub const TALKEDIT_PUBLIC_KEY: [u8; 32] = [228, 216, 102, 187, 61, 187, 236, 140, 37, 32, 158, 153, 35, 80, 20, 129, 172, 167, 96, 115, 141, 56, 244, 123, 237, 7, 255, 18, 92, 114, 152, 31];
pub const TRIAL_DURATION_SECS: u64 = 7 * 86400;
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct LicensePayload {
pub license_id: String,
pub customer_email: String,
pub tier: String,
pub features: Vec<String>,
pub issued_at: u64,
pub expires_at: u64,
pub max_activations: u32,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct TrialState {
pub started_at: u64,
}
/// On-disk format with integrity seed to deter tampering.
#[derive(Debug, Serialize, Deserialize)]
struct TrialFile {
started_at: u64,
seed: u64,
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(tag = "tag")]
pub enum AppStatus {
Licensed { license: LicensePayload },
Trial { days_remaining: u32, started_at: u64 },
Expired,
}
#[derive(Debug)]
pub enum LicenseError {
InvalidFormat,
InvalidSignature,
Expired,
DecodeError(String),
CryptoError(String),
}
impl std::fmt::Display for LicenseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::InvalidFormat => write!(f, "Invalid license key format"),
Self::InvalidSignature => write!(f, "License signature is invalid"),
Self::Expired => write!(f, "License has expired"),
Self::DecodeError(e) => write!(f, "License decode error: {e}"),
Self::CryptoError(e) => write!(f, "Crypto error: {e}"),
}
}
}
// --- License key verification ---
pub fn verify_license_key(license_key: &str) -> Result<LicensePayload, LicenseError> {
let stripped = license_key
.strip_prefix("talkedit_v1_")
.ok_or(LicenseError::InvalidFormat)?;
let dot_pos = stripped.rfind('.').ok_or(LicenseError::InvalidFormat)?;
let payload_b64 = &stripped[..dot_pos];
let sig_b64 = &stripped[dot_pos + 1..];
if payload_b64.is_empty() || sig_b64.is_empty() {
return Err(LicenseError::InvalidFormat);
}
let payload_bytes = BASE64
.decode(payload_b64)
.map_err(|e| LicenseError::DecodeError(e.to_string()))?;
let sig_bytes = BASE64
.decode(sig_b64)
.map_err(|e| LicenseError::DecodeError(e.to_string()))?;
let verifying_key = VerifyingKey::from_bytes(&TALKEDIT_PUBLIC_KEY)
.map_err(|e| LicenseError::CryptoError(e.to_string()))?;
let signature = Signature::from_slice(&sig_bytes)
.map_err(|e| LicenseError::DecodeError(e.to_string()))?;
verifying_key
.verify_strict(&payload_bytes, &signature)
.map_err(|_| LicenseError::InvalidSignature)?;
let payload: LicensePayload = serde_json::from_slice(&payload_bytes)
.map_err(|e| LicenseError::DecodeError(e.to_string()))?;
let now = now_secs();
if now > payload.expires_at {
return Err(LicenseError::Expired);
}
Ok(payload)
}
// --- License file I/O ---
fn now_secs() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs()
}
pub fn license_file_path(app_data_dir: &PathBuf) -> PathBuf {
app_data_dir.join("license.key")
}
pub fn trial_file_path(app_data_dir: &PathBuf) -> PathBuf {
app_data_dir.join("trial.json")
}
pub fn trial_sentinel_path(app_data_dir: &PathBuf) -> PathBuf {
app_data_dir.join(".trial_lock")
}
// Simple integrity check constant — not crypto-grade, but deters casual editing.
const TRIAL_SEED: u64 = 0x9F3A_2E7D_C1B8_5604;
pub fn get_or_start_trial(app_data_dir: &PathBuf) -> TrialState {
let path = trial_file_path(app_data_dir);
let sentinel = trial_sentinel_path(app_data_dir);
let now = now_secs();
// If sentinel exists but trial was deleted, refuse to create a new one.
let sentinel_exists = sentinel.exists();
if let Ok(content) = std::fs::read_to_string(&path) {
if let Ok(wrapped) = serde_json::from_str::<TrialFile>(&content) {
// Verify integrity
if (wrapped.started_at ^ TRIAL_SEED) == wrapped.seed {
return TrialState { started_at: wrapped.started_at };
}
}
}
if sentinel_exists {
// Trial was tampered with — return an expired trial
return TrialState { started_at: 0 };
}
// Start new trial
let trial = TrialState { started_at: now };
if let Some(parent) = path.parent() {
let _ = std::fs::create_dir_all(parent);
}
let _ = std::fs::write(&sentinel, "1");
let _ = std::fs::write(
&path,
serde_json::to_string(&TrialFile {
started_at: trial.started_at,
seed: trial.started_at ^ TRIAL_SEED,
})
.unwrap(),
);
trial
}
pub fn load_cached_license(app_data_dir: &PathBuf) -> Result<LicensePayload, LicenseError> {
let path = license_file_path(app_data_dir);
let content = std::fs::read_to_string(&path).map_err(|_| LicenseError::InvalidFormat)?;
verify_license_key(content.trim())
}
pub fn cache_license(app_data_dir: &PathBuf, license_key: &str) -> Result<(), LicenseError> {
let path = license_file_path(app_data_dir);
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent)
.map_err(|e| LicenseError::DecodeError(e.to_string()))?;
}
std::fs::write(&path, license_key)
.map_err(|e| LicenseError::DecodeError(e.to_string()))
}
pub fn remove_license(app_data_dir: &PathBuf) {
let path = license_file_path(app_data_dir);
let _ = std::fs::remove_file(&path);
}
// --- Trial logic ---
pub fn get_trial_info(trial: &TrialState) -> (u64, u64, bool) {
let now = now_secs();
let elapsed = now.saturating_sub(trial.started_at);
let remaining = TRIAL_DURATION_SECS.saturating_sub(elapsed);
let days_remaining = remaining / 86400;
let is_active = remaining > 0;
(days_remaining, remaining, is_active)
}
/// Check if trial is still valid and return remaining days.
pub fn get_trial_days_remaining(trial: &TrialState) -> Option<u32> {
let (days, _, active) = get_trial_info(trial);
if active {
Some(days as u32)
} else {
None
}
}
pub fn clear_trial(app_data_dir: &PathBuf) {
let _ = std::fs::remove_file(trial_file_path(app_data_dir));
let _ = std::fs::remove_file(trial_sentinel_path(app_data_dir));
}
/// Get the overall app status: Licensed > Trial > Expired.
pub fn get_app_status(app_data_dir: &PathBuf) -> AppStatus {
// 1. Check for valid license
if let Ok(license) = load_cached_license(app_data_dir) {
return AppStatus::Licensed { license };
}
// 2. Check for active trial
let trial = get_or_start_trial(app_data_dir);
if let Some(days) = get_trial_days_remaining(&trial) {
return AppStatus::Trial {
days_remaining: days,
started_at: trial.started_at,
};
}
// 3. Everything expired
AppStatus::Expired
}
pub fn has_feature(payload: &LicensePayload, feature: &str) -> bool {
payload.features.iter().any(|f| f == feature)
}
#[cfg(test)]
mod tests {
use super::*;
const TEST_KEY: &str = "talkedit_v1_eyJsaWNlbnNlX2lkIjoidGVzdF8wMDEiLCJjdXN0b21lcl9lbWFpbCI6InRlc3RAZXhhbXBsZS5jb20iLCJ0aWVyIjoicHJvIiwiZmVhdHVyZXMiOlsiYnVuZGxlZF9kZXBzIiwiYXV0b191cGRhdGVzIiwiYmdfcmVtb3ZhbCJdLCJpc3N1ZWRfYXQiOjE3NzgwMDAwMDAsImV4cGlyZXNfYXQiOjE4MDk1MzYwMDAsIm1heF9hY3RpdmF0aW9ucyI6M30.1Hw9FT6USo+05lB0NSJmTvCgAby9ep/BFWv95CvACn0wNpJl5Z6uGDIuIEe077t+CszqGJ8Lci+ZlZyb41foDQ";
#[test]
fn test_verify_valid_license() {
let result = verify_license_key(TEST_KEY);
assert!(result.is_ok(), "Expected OK, got: {:?}", result);
let payload = result.unwrap();
assert_eq!(payload.customer_email, "test@example.com");
assert_eq!(payload.tier, "pro");
assert!(payload.features.contains(&"bg_removal".to_string()));
}
#[test]
fn test_verify_invalid_signature() {
let dot = TEST_KEY.rfind('.').unwrap();
let prefix = &TEST_KEY[..dot + 1];
let bad_sig = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
let tampered = format!("{prefix}{bad_sig}");
let result = verify_license_key(&tampered);
assert!(matches!(result, Err(LicenseError::InvalidSignature)));
}
#[test]
fn test_verify_bad_format() {
let result = verify_license_key("not-a-license-key");
assert!(matches!(result, Err(LicenseError::InvalidFormat)));
}
#[test]
fn test_has_feature() {
let payload = verify_license_key(TEST_KEY).unwrap();
assert!(has_feature(&payload, "bg_removal"));
assert!(!has_feature(&payload, "nonexistent_feature"));
}
#[test]
fn test_trial_dates() {
let trial = TrialState { started_at: now_secs() };
let days = get_trial_days_remaining(&trial);
assert!(days.is_some());
assert_eq!(days.unwrap(), 7);
}
#[test]
fn test_expired_trial() {
let trial = TrialState { started_at: 0 }; // epoch = 1970, definitely expired
let days = get_trial_days_remaining(&trial);
assert!(days.is_none());
}
}

View File

@ -1,190 +0,0 @@
use serde::{Deserialize, Serialize};
use std::path::PathBuf;
#[derive(Debug, Serialize, Deserialize)]
pub struct ModelInfo {
pub name: String,
pub path: String,
pub size_bytes: u64,
pub kind: String,
}
fn huggingface_cache_dir() -> PathBuf {
// Follows huggingface_hub default cache location
if let Ok(custom) = std::env::var("HF_HOME") {
return PathBuf::from(custom).join("hub");
}
if let Ok(custom) = std::env::var("XDG_CACHE_HOME") {
return PathBuf::from(custom).join("huggingface").join("hub");
}
dirs::home_dir()
.unwrap_or_default()
.join(".cache")
.join("huggingface")
.join("hub")
}
fn scan_whisper_models() -> Vec<ModelInfo> {
let cache_dir = huggingface_cache_dir();
if !cache_dir.exists() {
return vec![];
}
let mut models = vec![];
let pattern = "models--Systran--faster-whisper-";
let Ok(entries) = std::fs::read_dir(&cache_dir) else {
return vec![];
};
for entry in entries.flatten() {
let name = entry.file_name();
let name_str = name.to_string_lossy();
if !name_str.starts_with(pattern) {
continue;
}
let model_name = name_str
.strip_prefix(pattern)
.unwrap_or(&name_str)
.to_string();
// The actual model files are in snapshots/ subdirectory
let snapshots_dir = entry.path().join("snapshots");
let mut total_size = 0u64;
if let Ok(snap_entries) = std::fs::read_dir(&snapshots_dir) {
for snap in snap_entries.flatten() {
total_size += dir_size(&snap.path());
}
}
// If no snapshots dir, try blobs/
if total_size == 0 {
let blobs_dir = entry.path().join("blobs");
if blobs_dir.exists() {
total_size = dir_size(&blobs_dir);
}
}
models.push(ModelInfo {
name: model_name,
path: entry.path().to_string_lossy().to_string(),
size_bytes: total_size,
kind: "whisper".to_string(),
});
}
models
}
fn scan_llm_models(app_data_dir: &PathBuf) -> Vec<ModelInfo> {
let models_dir = app_data_dir.join("models");
if !models_dir.exists() {
return vec![];
}
let mut models = vec![];
let Ok(entries) = std::fs::read_dir(&models_dir) else {
return vec![];
};
for entry in entries.flatten() {
let path = entry.path();
if path.extension().map(|e| e == "gguf").unwrap_or(false) {
let meta = std::fs::metadata(&path).ok();
models.push(ModelInfo {
name: entry.file_name().to_string_lossy().to_string(),
path: path.to_string_lossy().to_string(),
size_bytes: meta.map(|m| m.len()).unwrap_or(0),
kind: "llm".to_string(),
});
}
}
models
}
fn dir_size(path: &std::path::Path) -> u64 {
let mut total = 0u64;
if let Ok(entries) = std::fs::read_dir(path) {
for entry in entries.flatten() {
let path = entry.path();
if path.is_dir() {
total += dir_size(&path);
} else if let Ok(meta) = std::fs::metadata(&path) {
total += meta.len();
}
}
}
total
}
pub fn list_models(app_data_dir: &PathBuf) -> Vec<ModelInfo> {
let mut models = scan_whisper_models();
models.extend(scan_llm_models(app_data_dir));
models
}
pub fn delete_model(path: &str) -> Result<(), String> {
let path = std::path::Path::new(path);
if !path.exists() {
return Err("Model path not found".to_string());
}
if path.is_dir() {
std::fs::remove_dir_all(path)
.map_err(|e| format!("Failed to delete model: {e}"))?;
} else {
std::fs::remove_file(path)
.map_err(|e| format!("Failed to delete model: {e}"))?;
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
#[test]
fn test_dir_size_empty() {
let size = dir_size(&PathBuf::from("/nonexistent/path/12345"));
assert_eq!(size, 0);
}
#[test]
fn test_scan_whisper_models_empty() {
let models = scan_whisper_models();
// In CI there won't be any whisper models
// Just verify it doesn't panic
assert!(models.len() >= 0);
}
#[test]
fn test_scan_llm_models_empty() {
let models = scan_llm_models(&PathBuf::from("/nonexistent/app_data"));
assert!(models.is_empty());
}
#[test]
fn test_list_models_empty() {
let models = list_models(&PathBuf::from("/nonexistent/app_data"));
// No models should be found in a non-existent directory
let whisper_models = models.iter().filter(|m| m.kind == "whisper").count();
let llm_models = models.iter().filter(|m| m.kind == "llm").count();
assert_eq!(llm_models, 0);
// whisper models may or may not exist on dev machine
assert!(whisper_models >= 0);
}
#[test]
fn test_delete_model_nonexistent() {
let result = delete_model("/nonexistent/model/path.gguf");
assert!(result.is_err());
}
#[test]
fn test_delete_model_empty_path() {
let result = delete_model("");
assert!(result.is_err());
}
}

View File

@ -29,7 +29,7 @@ pub fn project_root() -> PathBuf {
}
/// Absolute path to the bundled Python interpreter.
/// Tries project virtualenvs in a fixed order so all runtime paths agree.
/// Tries .venv312 first (new), falls back to .venv (legacy).
pub fn python_exe() -> PathBuf {
let root = project_root();
// Packaged layout: resources/python/bin/python3
@ -37,24 +37,12 @@ pub fn python_exe() -> PathBuf {
if bundled.exists() {
return bundled;
}
let candidates = [
root.join(".venv312").join("bin").join("python3.12"),
root.join(".venv312").join("bin").join("python"),
root.join(".venv").join("bin").join("python3"),
root.join(".venv").join("bin").join("python"),
root.join("venv").join("bin").join("python3"),
root.join("venv").join("bin").join("python"),
];
for candidate in candidates {
if candidate.exists() {
return candidate;
}
// Dev: prefer .venv312 (Python 3.12), fall back to .venv
let venv312 = root.join(".venv312").join("bin").join("python3.12");
if venv312.exists() {
return venv312;
}
// Last-resort path if no environment is present.
root.join(".venv312").join("bin").join("python3.12")
root.join(".venv").join("bin").join("python3")
}
/// Absolute path to a script in the backend directory.

View File

@ -46,6 +46,7 @@ pub fn transcribe_audio(
// Run Python script with timeout
let output = Command::new(python_exe)
.args(&args)
.env("PYTHONPATH", crate::paths::project_root().join(".venv312").join("lib").join("python3.12").join("site-packages"))
.output()
.map_err(|e| format!("Failed to run Python script: {}", e))?;

View File

@ -6,7 +6,7 @@
"build": {
"frontendDist": "../frontend/dist",
"devUrl": "http://localhost:5173",
"beforeDevCommand": "lsof -ti:5173 | xargs kill -9 2>/dev/null; cd frontend && npm run dev",
"beforeDevCommand": "cd frontend && (lsof -i :5173 >/dev/null 2>&1 && echo 'Frontend dev server already running on port 5173' || npm run dev)",
"beforeBuildCommand": "cd frontend && npm run build"
},
"app": {
@ -23,7 +23,7 @@
}
],
"security": {
"csp": "default-src 'self'; connect-src 'self' http://127.0.0.1:8000; media-src 'self' http://127.0.0.1:8000; script-src 'self' 'unsafe-inline' 'unsafe-eval'; style-src 'self' 'unsafe-inline'; img-src 'self' data: blob:"
"csp": "default-src 'self'; connect-src 'self' http://127.0.0.1:* http://localhost:* ws://127.0.0.1:* ws://localhost:*; media-src 'self' http://127.0.0.1:* http://localhost:* file: blob:; script-src 'self' 'unsafe-inline' 'unsafe-eval'; style-src 'self' 'unsafe-inline'; font-src 'self' data:; img-src 'self' data: blob:"
}
},
"bundle": {

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/home/dillon/_code/TalkEdit/.venv312/bin/python3.12
"""
Test script for the TalkEdit API.
This script tests the new Tauri commands that expose all backend functions.

View File

@ -45,16 +45,7 @@ def main():
device = "cpu"
compute_type = "int8"
try:
model = WhisperModel(model_name, device=device, compute_type=compute_type)
except RuntimeError as e:
if "out of memory" in str(e).lower() and device == "cuda":
print(f"CUDA OOM, falling back to CPU (int8)", file=sys.stderr)
device = "cpu"
compute_type = "int8"
model = WhisperModel(model_name, device=device, compute_type=compute_type)
else:
raise
model = WhisperModel(model_name, device=device, compute_type=compute_type)
# Transcribe with progress reporting
print(f"Starting transcription of {wav_path} with model {model_name}", file=sys.stderr)