added api for ai; got backend working

2026-03-26 23:39:31 -06:00
parent 164b2f87d4
commit 4a857d8cbf
20 changed files with 1436 additions and 280 deletions
--- a/backend/ai_provider.py
+++ b/backend/ai_provider.py
@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+"""
+AI provider interface for Ollama, OpenAI, and Claude.
+"""
+
+import json
+import sys
+from pathlib import Path
+
+# Add backend to path
+sys.path.insert(0, str(Path(__file__).parent))
+
+from services.ai_provider import AIProvider
+
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: python ai_provider.py <command> [args...]", file=sys.stderr)
+        sys.exit(1)
+
+    command = sys.argv[1]
+
+    try:
+        if command == "complete":
+            if len(sys.argv) < 4:
+                print("Usage: python ai_provider.py complete <prompt> <provider> [model] [api_key] [base_url] [system_prompt] [temperature]", file=sys.stderr)
+                sys.exit(1)
+            prompt = sys.argv[2]
+            provider = sys.argv[3]
+            model = sys.argv[4] if len(sys.argv) > 4 else None
+            api_key = sys.argv[5] if len(sys.argv) > 5 else None
+            base_url = sys.argv[6] if len(sys.argv) > 6 else None
+            system_prompt = sys.argv[7] if len(sys.argv) > 7 else None
+            temperature = float(sys.argv[8]) if len(sys.argv) > 8 else 0.3
+
+            result = AIProvider.complete(prompt, provider, model, api_key, base_url, system_prompt, temperature)
+            print(json.dumps({"response": result}))
+
+        elif command == "list_ollama_models":
+            base_url = sys.argv[2] if len(sys.argv) > 2 else "http://localhost:11434"
+            result = AIProvider.list_ollama_models(base_url)
+            print(json.dumps({"models": result}))
+
+        else:
+            print(f"Unknown command: {command}", file=sys.stderr)
+            sys.exit(1)
+
+    except Exception as e:
+        print(json.dumps({"error": str(e)}), file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/backend/audio_cleaner.py
+++ b/backend/audio_cleaner.py
@ -0,0 +1,47 @@
+#!/usr/bin/env python3
+"""
+Audio cleaning operations using DeepFilterNet or FFmpeg fallback.
+"""
+
+import json
+import sys
+from pathlib import Path
+
+# Add backend to path
+sys.path.insert(0, str(Path(__file__).parent))
+
+from services.audio_cleaner import clean_audio, is_deepfilter_available
+
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: python audio_cleaner.py <command> [args...]", file=sys.stderr)
+        sys.exit(1)
+
+    command = sys.argv[1]
+
+    try:
+        if command == "clean_audio":
+            if len(sys.argv) != 4:
+                print("Usage: python audio_cleaner.py clean_audio <input_path> <output_path>", file=sys.stderr)
+                sys.exit(1)
+            input_path = sys.argv[2]
+            output_path = sys.argv[3]
+            result = clean_audio(input_path, output_path)
+            print(json.dumps({"output_path": result}))
+
+        elif command == "is_deepfilter_available":
+            result = is_deepfilter_available()
+            print(json.dumps({"available": result}))
+
+        else:
+            print(f"Unknown command: {command}", file=sys.stderr)
+            sys.exit(1)
+
+    except Exception as e:
+        print(json.dumps({"error": str(e)}), file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/backend/background_removal.py
+++ b/backend/background_removal.py
@ -0,0 +1,50 @@
+#!/usr/bin/env python3
+"""
+Background removal operations (placeholder for Phase 5).
+"""
+
+import json
+import sys
+from pathlib import Path
+
+# Add backend to path
+sys.path.insert(0, str(Path(__file__).parent))
+
+from services.background_removal import is_available, remove_background_on_export
+
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: python background_removal.py <command> [args...]", file=sys.stderr)
+        sys.exit(1)
+
+    command = sys.argv[1]
+
+    try:
+        if command == "is_available":
+            result = is_available()
+            print(json.dumps({"available": result}))
+
+        elif command == "remove_background_on_export":
+            if len(sys.argv) != 6:
+                print("Usage: python background_removal.py remove_background_on_export <input_path> <output_path> <replacement> <replacement_value>", file=sys.stderr)
+                sys.exit(1)
+            input_path = sys.argv[2]
+            output_path = sys.argv[3]
+            replacement = sys.argv[4]
+            replacement_value = sys.argv[5]
+
+            result = remove_background_on_export(input_path, output_path, replacement, replacement_value)
+            print(json.dumps({"output_path": result}))
+
+        else:
+            print(f"Unknown command: {command}", file=sys.stderr)
+            sys.exit(1)
+
+    except Exception as e:
+        print(json.dumps({"error": str(e)}), file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/backend/caption_generator.py
+++ b/backend/caption_generator.py
@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+"""
+Generate caption files from word-level timestamps.
+"""
+
+import json
+import sys
+from pathlib import Path
+
+# Add backend to path
+sys.path.insert(0, str(Path(__file__).parent))
+
+from services.caption_generator import generate_srt, generate_vtt, generate_ass, save_captions
+
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: python caption_generator.py <command> [args...]", file=sys.stderr)
+        sys.exit(1)
+
+    command = sys.argv[1]
+
+    try:
+        if command == "generate_srt":
+            if len(sys.argv) < 4:
+                print("Usage: python caption_generator.py generate_srt <words_json> [deleted_indices_json] [words_per_line]", file=sys.stderr)
+                sys.exit(1)
+            words = json.loads(sys.argv[2])
+            deleted_indices = set(json.loads(sys.argv[3])) if len(sys.argv) > 3 and sys.argv[3] != "null" else None
+            words_per_line = int(sys.argv[4]) if len(sys.argv) > 4 else 8
+
+            result = generate_srt(words, deleted_indices, words_per_line)
+            print(json.dumps({"content": result}))
+
+        elif command == "generate_vtt":
+            if len(sys.argv) < 4:
+                print("Usage: python caption_generator.py generate_vtt <words_json> [deleted_indices_json] [words_per_line]", file=sys.stderr)
+                sys.exit(1)
+            words = json.loads(sys.argv[2])
+            deleted_indices = set(json.loads(sys.argv[3])) if len(sys.argv) > 3 and sys.argv[3] != "null" else None
+            words_per_line = int(sys.argv[4]) if len(sys.argv) > 4 else 8
+
+            result = generate_vtt(words, deleted_indices, words_per_line)
+            print(json.dumps({"content": result}))
+
+        elif command == "generate_ass":
+            if len(sys.argv) < 4:
+                print("Usage: python caption_generator.py generate_ass <words_json> [deleted_indices_json] [words_per_line] [style_json]", file=sys.stderr)
+                sys.exit(1)
+            words = json.loads(sys.argv[2])
+            deleted_indices = set(json.loads(sys.argv[3])) if len(sys.argv) > 3 and sys.argv[3] != "null" else None
+            words_per_line = int(sys.argv[4]) if len(sys.argv) > 4 else 8
+            style = json.loads(sys.argv[5]) if len(sys.argv) > 5 and sys.argv[5] != "null" else None
+
+            result = generate_ass(words, deleted_indices, words_per_line, style)
+            print(json.dumps({"content": result}))
+
+        elif command == "save_captions":
+            if len(sys.argv) != 4:
+                print("Usage: python caption_generator.py save_captions <content> <output_path>", file=sys.stderr)
+                sys.exit(1)
+            content = sys.argv[2]
+            output_path = sys.argv[3]
+
+            result = save_captions(content, output_path)
+            print(json.dumps({"output_path": result}))
+
+        else:
+            print(f"Unknown command: {command}", file=sys.stderr)
+            sys.exit(1)
+
+    except Exception as e:
+        print(json.dumps({"error": str(e)}), file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/backend/diarization.py
+++ b/backend/diarization.py
@ -0,0 +1,47 @@
+#!/usr/bin/env python3
+"""
+Speaker diarization using pyannote.audio.
+"""
+
+import json
+import sys
+from pathlib import Path
+
+# Add backend to path
+sys.path.insert(0, str(Path(__file__).parent))
+
+from services.diarization import diarize_and_label
+
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: python diarization.py <command> [args...]", file=sys.stderr)
+        sys.exit(1)
+
+    command = sys.argv[1]
+
+    try:
+        if command == "diarize_and_label":
+            if len(sys.argv) < 4:
+                print("Usage: python diarization.py diarize_and_label <transcription_result_json> <audio_path> [hf_token] [num_speakers] [use_gpu]", file=sys.stderr)
+                sys.exit(1)
+            transcription_result = json.loads(sys.argv[2])
+            audio_path = sys.argv[3]
+            hf_token = sys.argv[4] if len(sys.argv) > 4 else None
+            num_speakers = int(sys.argv[5]) if len(sys.argv) > 5 and sys.argv[5] != "null" else None
+            use_gpu = sys.argv[6].lower() == "true" if len(sys.argv) > 6 else True
+
+            result = diarize_and_label(transcription_result, audio_path, hf_token, num_speakers, use_gpu)
+            print(json.dumps(result))
+
+        else:
+            print(f"Unknown command: {command}", file=sys.stderr)
+            sys.exit(1)
+
+    except Exception as e:
+        print(json.dumps({"error": str(e)}), file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/backend/video_editor.py
+++ b/backend/video_editor.py
@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+"""
+Video editing operations using FFmpeg.
+"""
+
+import json
+import sys
+from pathlib import Path
+
+# Add backend to path
+sys.path.insert(0, str(Path(__file__).parent))
+
+from services.video_editor import export_stream_copy, export_reencode, export_reencode_with_subs, get_video_info
+
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: python video_editor.py <command> [args...]", file=sys.stderr)
+        sys.exit(1)
+
+    command = sys.argv[1]
+
+    try:
+        if command == "export_stream_copy":
+            if len(sys.argv) != 5:
+                print("Usage: python video_editor.py export_stream_copy <input_path> <output_path> <keep_segments_json>", file=sys.stderr)
+                sys.exit(1)
+            input_path = sys.argv[2]
+            output_path = sys.argv[3]
+            keep_segments = json.loads(sys.argv[4])
+            result = export_stream_copy(input_path, output_path, keep_segments)
+            print(json.dumps({"output_path": result}))
+
+        elif command == "export_reencode":
+            if len(sys.argv) != 7:
+                print("Usage: python video_editor.py export_reencode <input_path> <output_path> <keep_segments_json> <resolution> <format_hint>", file=sys.stderr)
+                sys.exit(1)
+            input_path = sys.argv[2]
+            output_path = sys.argv[3]
+            keep_segments = json.loads(sys.argv[4])
+            resolution = sys.argv[5]
+            format_hint = sys.argv[6]
+            result = export_reencode(input_path, output_path, keep_segments, resolution, format_hint)
+            print(json.dumps({"output_path": result}))
+
+        elif command == "export_reencode_with_subs":
+            if len(sys.argv) != 8:
+                print("Usage: python video_editor.py export_reencode_with_subs <input_path> <output_path> <keep_segments_json> <subtitle_path> <resolution> <format_hint>", file=sys.stderr)
+                sys.exit(1)
+            input_path = sys.argv[2]
+            output_path = sys.argv[3]
+            keep_segments = json.loads(sys.argv[4])
+            subtitle_path = sys.argv[5]
+            resolution = sys.argv[6]
+            format_hint = sys.argv[7]
+            result = export_reencode_with_subs(input_path, output_path, keep_segments, subtitle_path, resolution, format_hint)
+            print(json.dumps({"output_path": result}))
+
+        elif command == "get_video_info":
+            if len(sys.argv) != 3:
+                print("Usage: python video_editor.py get_video_info <input_path>", file=sys.stderr)
+                sys.exit(1)
+            input_path = sys.argv[2]
+            result = get_video_info(input_path)
+            print(json.dumps(result))
+
+        else:
+            print(f"Unknown command: {command}", file=sys.stderr)
+            sys.exit(1)
+
+    except Exception as e:
+        print(json.dumps({"error": str(e)}), file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/plan.md
+++ b/plan.md
@ -60,6 +60,9 @@ Focus on what creators need for spoken content:

 No multi-track, voice cloning, or collaboration—keep it simple.

+## 4. Notes
+- Consider adding Parakeet TDT as a transcription option in the future for users who want alternatives to Whisper.
+
 ## 5. Monetization Model
 - **Free Forever**: Core editing/transcription (unlimited local use).
 - **Pro License** ($29–49 one-time): Batch processing, high-quality voices (if adding TTS), custom presets, priority support.
--- a/src-tauri/Cargo.lock
+++ b/src-tauri/Cargo.lock
@ -91,7 +91,6 @@ dependencies = [
 "tauri-plugin-log",
 "tempfile",
 "ureq",
- "whisper-rs",
 ]

 [[package]]
@ -147,26 +146,6 @@ version = "0.22.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"

-[[package]]
-name = "bindgen"
-version = "0.72.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
-dependencies = [
- "bitflags 2.11.0",
- "cexpr",
- "clang-sys",
- "itertools",
- "log",
- "prettyplease",
- "proc-macro2",
- "quote",
- "regex",
- "rustc-hash",
- "shlex",
- "syn 2.0.117",
-]
-
 [[package]]
 name = "bit-set"
 version = "0.8.0"
@ -416,15 +395,6 @@ version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c"

-[[package]]
-name = "cexpr"
-version = "0.6.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
-dependencies = [
- "nom",
-]
-
 [[package]]
 name = "cfb"
 version = "0.7.3"
@ -470,26 +440,6 @@ dependencies = [
 "windows-link 0.2.1",
 ]

-[[package]]
-name = "clang-sys"
-version = "1.8.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
-dependencies = [
- "glob",
- "libc",
- "libloading 0.8.9",
-]
-
-[[package]]
-name = "cmake"
-version = "0.1.57"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d"
-dependencies = [
- "cc",
-]
-
 [[package]]
 name = "combine"
 version = "4.6.7"
@ -876,12 +826,6 @@ version = "1.0.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555"

-[[package]]
-name = "either"
-version = "1.15.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
-
 [[package]]
 name = "embed-resource"
 version = "3.0.8"
@ -1043,12 +987,6 @@ dependencies = [
 "percent-encoding",
 ]

-[[package]]
-name = "fs_extra"
-version = "1.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
-
 [[package]]
 name = "funty"
 version = "2.0.0"
@ -1798,15 +1736,6 @@ dependencies = [
 "serde",
 ]

-[[package]]
-name = "itertools"
-version = "0.13.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
-dependencies = [
- "either",
-]
-
 [[package]]
 name = "itoa"
 version = "1.0.18"
@ -1961,7 +1890,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6e9ec52138abedcc58dc17a7c6c0c00a2bdb4f3427c7f63fa97fd0d859155caf"
 dependencies = [
 "gtk-sys",
- "libloading 0.7.4",
+ "libloading",
 "once_cell",
 ]

@ -1981,16 +1910,6 @@ dependencies = [
 "winapi",
 ]

-[[package]]
-name = "libloading"
-version = "0.8.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
-dependencies = [
- "cfg-if",
- "windows-link 0.2.1",
-]
-
 [[package]]
 name = "libredox"
 version = "0.1.15"
@ -2099,12 +2018,6 @@ version = "0.3.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"

-[[package]]
-name = "minimal-lexical"
-version = "0.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
-
 [[package]]
 name = "miniz_oxide"
 version = "0.8.9"
@ -2189,16 +2102,6 @@ version = "0.1.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"

-[[package]]
-name = "nom"
-version = "7.1.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
-dependencies = [
- "memchr",
- "minimal-lexical",
-]
-
 [[package]]
 name = "num-conv"
 version = "0.2.1"
@ -4892,28 +4795,6 @@ dependencies = [
 "windows-core 0.61.2",
 ]

-[[package]]
-name = "whisper-rs"
-version = "0.16.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2088172d00f936c348d6a72f488dc2660ab3f507263a195df308a3c2383229f6"
-dependencies = [
- "whisper-rs-sys",
-]
-
-[[package]]
-name = "whisper-rs-sys"
-version = "0.15.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6986c0fe081241d391f09b9a071fbcbb59720c3563628c3c829057cf69f2a56f"
-dependencies = [
- "bindgen",
- "cfg-if",
- "cmake",
- "fs_extra",
- "semver",
-]
-
 [[package]]
 name = "winapi"
 version = "0.3.9"
--- a/src-tauri/Cargo.toml
+++ b/src-tauri/Cargo.toml
@ -27,6 +27,5 @@ tauri-plugin-fs = "2"
 tauri-plugin-log = "2"
 dirs = "5.0"
 ureq = "2.9"
-whisper-rs = "0.16.0"
 hound = "3.5"
 tempfile = "3.10"
--- a/src-tauri/src/ai_provider.rs
+++ b/src-tauri/src/ai_provider.rs
@ -0,0 +1,98 @@
+use std::process::Command;
+use serde_json;
+use serde::{Deserialize, Serialize};
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct AICompleteResult {
+    pub response: String,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct OllamaModelsResult {
+    pub models: Vec<String>,
+}
+
+/// Complete text using AI provider
+pub fn complete(
+    prompt: &str,
+    provider: &str,
+    model: Option<&str>,
+    api_key: Option<&str>,
+    base_url: Option<&str>,
+    system_prompt: Option<&str>,
+    temperature: f64,
+) -> Result<String, String> {
+    let python_exe = crate::paths::python_exe();
+    let python_exe = python_exe.to_str().unwrap_or_default();
+    let script_path = crate::paths::backend_script("ai_provider.py");
+    let script_path = script_path.to_str().unwrap_or_default();
+
+    let mut args = vec![script_path, "complete", prompt, provider];
+
+    if let Some(m) = model {
+        args.push(m);
+    } else {
+        args.push("null");
+    }
+
+    if let Some(key) = api_key {
+        args.push(key);
+    } else {
+        args.push("null");
+    }
+
+    if let Some(url) = base_url {
+        args.push(url);
+    } else {
+        args.push("null");
+    }
+
+    if let Some(sys) = system_prompt {
+        args.push(sys);
+    } else {
+        args.push("null");
+    }
+
+    let temp_str = temperature.to_string();
+    args.push(&temp_str);
+
+    let output = Command::new(python_exe)
+        .args(&args)
+        .output()
+        .map_err(|e| format!("Failed to run Python script: {}", e))?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        return Err(format!("Python script failed: {}", stderr));
+    }
+
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let result: AICompleteResult = serde_json::from_str(&stdout.trim())
+        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
+
+    Ok(result.response)
+}
+
+/// List available Ollama models
+pub fn list_ollama_models(base_url: &str) -> Result<Vec<String>, String> {
+    let python_exe = crate::paths::python_exe();
+    let python_exe = python_exe.to_str().unwrap_or_default();
+    let script_path = crate::paths::backend_script("ai_provider.py");
+    let script_path = script_path.to_str().unwrap_or_default();
+
+    let output = Command::new(python_exe)
+        .args(&[script_path, "list_ollama_models", base_url])
+        .output()
+        .map_err(|e| format!("Failed to run Python script: {}", e))?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        return Err(format!("Python script failed: {}", stderr));
+    }
+
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let result: OllamaModelsResult = serde_json::from_str(&stdout.trim())
+        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
+
+    Ok(result.models)
+}
--- a/src-tauri/src/audio_cleaner.rs
+++ b/src-tauri/src/audio_cleaner.rs
@ -0,0 +1,61 @@
+use std::process::Command;
+use serde_json;
+use serde::{Deserialize, Serialize};
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct CleanAudioResult {
+    pub output_path: String,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct DeepFilterStatus {
+    pub available: bool,
+}
+
+/// Clean audio using DeepFilterNet or FFmpeg fallback
+pub fn clean_audio(input_path: &str, output_path: &str) -> Result<String, String> {
+    let python_exe = crate::paths::python_exe();
+    let python_exe = python_exe.to_str().unwrap_or_default();
+    let script_path = crate::paths::backend_script("audio_cleaner.py");
+    let script_path = script_path.to_str().unwrap_or_default();
+
+    let output = Command::new(python_exe)
+        .args(&[script_path, "clean_audio", input_path, output_path])
+        .output()
+        .map_err(|e| format!("Failed to run Python script: {}", e))?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        return Err(format!("Python script failed: {}", stderr));
+    }
+
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let result: CleanAudioResult = serde_json::from_str(&stdout.trim())
+        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
+
+    Ok(result.output_path)
+}
+
+/// Check if DeepFilterNet is available
+pub fn is_deepfilter_available() -> Result<bool, String> {
+    let python_exe = crate::paths::python_exe();
+    let python_exe = python_exe.to_str().unwrap_or_default();
+    let script_path = crate::paths::backend_script("audio_cleaner.py");
+    let script_path = script_path.to_str().unwrap_or_default();
+
+    let output = Command::new(python_exe)
+        .args(&[script_path, "is_deepfilter_available"])
+        .output()
+        .map_err(|e| format!("Failed to run Python script: {}", e))?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        return Err(format!("Python script failed: {}", stderr));
+    }
+
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let result: DeepFilterStatus = serde_json::from_str(&stdout.trim())
+        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
+
+    Ok(result.available)
+}
--- a/src-tauri/src/background_removal.rs
+++ b/src-tauri/src/background_removal.rs
@ -0,0 +1,66 @@
+use std::process::Command;
+use serde_json;
+use serde::{Deserialize, Serialize};
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct BackgroundRemovalStatus {
+    pub available: bool,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct BackgroundRemovalResult {
+    pub output_path: String,
+}
+
+/// Check if background removal is available
+pub fn is_available() -> Result<bool, String> {
+    let python_exe = crate::paths::python_exe();
+    let python_exe = python_exe.to_str().unwrap_or_default();
+    let script_path = crate::paths::backend_script("background_removal.py");
+    let script_path = script_path.to_str().unwrap_or_default();
+
+    let output = Command::new(python_exe)
+        .args(&[script_path, "is_available"])
+        .output()
+        .map_err(|e| format!("Failed to run Python script: {}", e))?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        return Err(format!("Python script failed: {}", stderr));
+    }
+
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let result: BackgroundRemovalStatus = serde_json::from_str(&stdout.trim())
+        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
+
+    Ok(result.available)
+}
+
+/// Remove background on export (placeholder for Phase 5)
+pub fn remove_background_on_export(
+    input_path: &str,
+    output_path: &str,
+    replacement: &str,
+    replacement_value: &str,
+) -> Result<String, String> {
+    let python_exe = crate::paths::python_exe();
+    let python_exe = python_exe.to_str().unwrap_or_default();
+    let script_path = crate::paths::backend_script("background_removal.py");
+    let script_path = script_path.to_str().unwrap_or_default();
+
+    let output = Command::new(python_exe)
+        .args(&[script_path, "remove_background_on_export", input_path, output_path, replacement, replacement_value])
+        .output()
+        .map_err(|e| format!("Failed to run Python script: {}", e))?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        return Err(format!("Python script failed: {}", stderr));
+    }
+
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let result: BackgroundRemovalResult = serde_json::from_str(&stdout.trim())
+        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
+
+    Ok(result.output_path)
+}
--- a/src-tauri/src/caption_generator.rs
+++ b/src-tauri/src/caption_generator.rs
@ -0,0 +1,177 @@
+use std::process::Command;
+use serde_json;
+use serde::{Deserialize, Serialize};
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct Word {
+    pub word: String,
+    pub start: f64,
+    pub end: f64,
+    pub confidence: f64,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub speaker: Option<String>,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct CaptionStyle {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub font_name: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub font_size: Option<u32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub font_color: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub bold: Option<bool>,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct CaptionContent {
+    pub content: String,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct SaveCaptionsResult {
+    pub output_path: String,
+}
+
+/// Generate SRT caption content
+pub fn generate_srt(
+    words: &[Word],
+    deleted_indices: Option<&std::collections::HashSet<usize>>,
+    words_per_line: usize,
+) -> Result<String, String> {
+    let python_exe = crate::paths::python_exe();
+    let python_exe = python_exe.to_str().unwrap_or_default();
+    let script_path = crate::paths::backend_script("caption_generator.py");
+    let script_path = script_path.to_str().unwrap_or_default();
+
+    let words_json = serde_json::to_string(words)
+        .map_err(|e| format!("Failed to serialize words: {}", e))?;
+
+    let deleted_json = match deleted_indices {
+        Some(indices) => serde_json::to_string(indices)
+            .map_err(|e| format!("Failed to serialize deleted indices: {}", e))?,
+        None => "null".to_string(),
+    };
+
+    let output = Command::new(python_exe)
+        .args(&[script_path, "generate_srt", &words_json, &deleted_json, &words_per_line.to_string()])
+        .output()
+        .map_err(|e| format!("Failed to run Python script: {}", e))?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        return Err(format!("Python script failed: {}", stderr));
+    }
+
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let result: CaptionContent = serde_json::from_str(&stdout.trim())
+        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
+
+    Ok(result.content)
+}
+
+/// Generate VTT caption content
+pub fn generate_vtt(
+    words: &[Word],
+    deleted_indices: Option<&std::collections::HashSet<usize>>,
+    words_per_line: usize,
+) -> Result<String, String> {
+    let python_exe = crate::paths::python_exe();
+    let python_exe = python_exe.to_str().unwrap_or_default();
+    let script_path = crate::paths::backend_script("caption_generator.py");
+    let script_path = script_path.to_str().unwrap_or_default();
+
+    let words_json = serde_json::to_string(words)
+        .map_err(|e| format!("Failed to serialize words: {}", e))?;
+
+    let deleted_json = match deleted_indices {
+        Some(indices) => serde_json::to_string(indices)
+            .map_err(|e| format!("Failed to serialize deleted indices: {}", e))?,
+        None => "null".to_string(),
+    };
+
+    let output = Command::new(python_exe)
+        .args(&[script_path, "generate_vtt", &words_json, &deleted_json, &words_per_line.to_string()])
+        .output()
+        .map_err(|e| format!("Failed to run Python script: {}", e))?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        return Err(format!("Python script failed: {}", stderr));
+    }
+
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let result: CaptionContent = serde_json::from_str(&stdout.trim())
+        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
+
+    Ok(result.content)
+}
+
+/// Generate ASS subtitle content
+pub fn generate_ass(
+    words: &[Word],
+    deleted_indices: Option<&std::collections::HashSet<usize>>,
+    words_per_line: usize,
+    style: Option<&CaptionStyle>,
+) -> Result<String, String> {
+    let python_exe = crate::paths::python_exe();
+    let python_exe = python_exe.to_str().unwrap_or_default();
+    let script_path = crate::paths::backend_script("caption_generator.py");
+    let script_path = script_path.to_str().unwrap_or_default();
+
+    let words_json = serde_json::to_string(words)
+        .map_err(|e| format!("Failed to serialize words: {}", e))?;
+
+    let deleted_json = match deleted_indices {
+        Some(indices) => serde_json::to_string(indices)
+            .map_err(|e| format!("Failed to serialize deleted indices: {}", e))?,
+        None => "null".to_string(),
+    };
+
+    let style_json = match style {
+        Some(s) => serde_json::to_string(s)
+            .map_err(|e| format!("Failed to serialize style: {}", e))?,
+        None => "null".to_string(),
+    };
+
+    let output = Command::new(python_exe)
+        .args(&[script_path, "generate_ass", &words_json, &deleted_json, &words_per_line.to_string(), &style_json])
+        .output()
+        .map_err(|e| format!("Failed to run Python script: {}", e))?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        return Err(format!("Python script failed: {}", stderr));
+    }
+
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let result: CaptionContent = serde_json::from_str(&stdout.trim())
+        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
+
+    Ok(result.content)
+}
+
+/// Save caption content to file
+pub fn save_captions(content: &str, output_path: &str) -> Result<String, String> {
+    let python_exe = crate::paths::python_exe();
+    let python_exe = python_exe.to_str().unwrap_or_default();
+    let script_path = crate::paths::backend_script("caption_generator.py");
+    let script_path = script_path.to_str().unwrap_or_default();
+
+    let output = Command::new(python_exe)
+        .args(&[script_path, "save_captions", content, output_path])
+        .output()
+        .map_err(|e| format!("Failed to run Python script: {}", e))?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        return Err(format!("Python script failed: {}", stderr));
+    }
+
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let result: SaveCaptionsResult = serde_json::from_str(&stdout.trim())
+        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
+
+    Ok(result.output_path)
+}
--- a/src-tauri/src/diarization.rs
+++ b/src-tauri/src/diarization.rs
@ -0,0 +1,82 @@
+use std::process::Command;
+use serde_json;
+use serde::{Deserialize, Serialize};
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct Word {
+    pub word: String,
+    pub start: f64,
+    pub end: f64,
+    pub confidence: f64,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub speaker: Option<String>,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct Segment {
+    pub id: usize,
+    pub start: f64,
+    pub end: f64,
+    pub text: String,
+    pub words: Vec<Word>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub speaker: Option<String>,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct TranscriptionResult {
+    pub words: Vec<Word>,
+    pub segments: Vec<Segment>,
+    pub language: String,
+}
+
+/// Apply speaker diarization to transcription result
+pub fn diarize_and_label(
+    transcription_result: &TranscriptionResult,
+    audio_path: &str,
+    hf_token: Option<&str>,
+    num_speakers: Option<u32>,
+    use_gpu: bool,
+) -> Result<TranscriptionResult, String> {
+    let python_exe = crate::paths::python_exe();
+    let python_exe = python_exe.to_str().unwrap_or_default();
+    let script_path = crate::paths::backend_script("diarization.py");
+    let script_path = script_path.to_str().unwrap_or_default();
+
+    let transcription_json = serde_json::to_string(transcription_result)
+        .map_err(|e| format!("Failed to serialize transcription: {}", e))?;
+
+    let mut args = vec![script_path, "diarize_and_label", &transcription_json, audio_path];
+
+    if let Some(token) = hf_token {
+        args.push(token);
+    } else {
+        args.push("null");
+    }
+
+    let speakers_str;
+    if let Some(speakers) = num_speakers {
+        speakers_str = speakers.to_string();
+        args.push(&speakers_str);
+    } else {
+        args.push("null");
+    }
+
+    args.push(if use_gpu { "true" } else { "false" });
+
+    let output = Command::new(python_exe)
+        .args(&args)
+        .output()
+        .map_err(|e| format!("Failed to run Python script: {}", e))?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        return Err(format!("Python script failed: {}", stderr));
+    }
+
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let result: TranscriptionResult = serde_json::from_str(&stdout.trim())
+        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
+
+    Ok(result)
+}
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@ -1,6 +1,13 @@
 // --- Commands ---

+mod paths;
 mod transcription;
+mod video_editor;
+mod audio_cleaner;
+mod diarization;
+mod ai_provider;
+mod caption_generator;
+mod background_removal;

 /// Returns the backend URL. Stubbed for now; will be replaced once the
 /// Python/Rust backend is fully wired up.
@ -56,6 +63,162 @@ async fn transcribe_audio(file_path: String, model_name: String, language: Optio
    .map_err(|e| format!("Task error: {:?}", e))?
 }

+/// Export video using stream copy (fast, lossless)
+#[tauri::command]
+async fn export_stream_copy(input_path: String, output_path: String, keep_segments: serde_json::Value) -> Result<String, String> {
+    tauri::async_runtime::spawn_blocking(move || {
+        video_editor::export_stream_copy(&input_path, &output_path, &keep_segments)
+    })
+    .await
+    .map_err(|e| format!("Task error: {:?}", e))?
+}
+
+/// Export video with re-encoding
+#[tauri::command]
+async fn export_reencode(input_path: String, output_path: String, keep_segments: serde_json::Value, resolution: String, format_hint: String) -> Result<String, String> {
+    tauri::async_runtime::spawn_blocking(move || {
+        video_editor::export_reencode(&input_path, &output_path, &keep_segments, &resolution, &format_hint)
+    })
+    .await
+    .map_err(|e| format!("Task error: {:?}", e))?
+}
+
+/// Export video with re-encoding and subtitles
+#[tauri::command]
+async fn export_reencode_with_subs(input_path: String, output_path: String, keep_segments: serde_json::Value, subtitle_path: String, resolution: String, format_hint: String) -> Result<String, String> {
+    tauri::async_runtime::spawn_blocking(move || {
+        video_editor::export_reencode_with_subs(&input_path, &output_path, &keep_segments, &subtitle_path, &resolution, &format_hint)
+    })
+    .await
+    .map_err(|e| format!("Task error: {:?}", e))?
+}
+
+/// Get video information
+#[tauri::command]
+async fn get_video_info(input_path: String) -> Result<video_editor::VideoInfo, String> {
+    tauri::async_runtime::spawn_blocking(move || {
+        video_editor::get_video_info(&input_path)
+    })
+    .await
+    .map_err(|e| format!("Task error: {:?}", e))?
+}
+
+/// Clean audio using DeepFilterNet or FFmpeg fallback
+#[tauri::command]
+async fn clean_audio(input_path: String, output_path: String) -> Result<String, String> {
+    tauri::async_runtime::spawn_blocking(move || {
+        audio_cleaner::clean_audio(&input_path, &output_path)
+    })
+    .await
+    .map_err(|e| format!("Task error: {:?}", e))?
+}
+
+/// Check if DeepFilterNet is available
+#[tauri::command]
+async fn is_deepfilter_available() -> Result<bool, String> {
+    tauri::async_runtime::spawn_blocking(move || {
+        audio_cleaner::is_deepfilter_available()
+    })
+    .await
+    .map_err(|e| format!("Task error: {:?}", e))?
+}
+
+/// Apply speaker diarization to transcription result
+#[tauri::command]
+async fn diarize_and_label(transcription_result: diarization::TranscriptionResult, audio_path: String, hf_token: Option<String>, num_speakers: Option<u32>, use_gpu: Option<bool>) -> Result<diarization::TranscriptionResult, String> {
+    let use_gpu = use_gpu.unwrap_or(true);
+    tauri::async_runtime::spawn_blocking(move || {
+        diarization::diarize_and_label(&transcription_result, &audio_path, hf_token.as_deref(), num_speakers, use_gpu)
+    })
+    .await
+    .map_err(|e| format!("Task error: {:?}", e))?
+}
+
+/// Complete text using AI provider
+#[tauri::command]
+async fn ai_complete(prompt: String, provider: String, model: Option<String>, api_key: Option<String>, base_url: Option<String>, system_prompt: Option<String>, temperature: Option<f64>) -> Result<String, String> {
+    let temperature = temperature.unwrap_or(0.3);
+    tauri::async_runtime::spawn_blocking(move || {
+        ai_provider::complete(&prompt, &provider, model.as_deref(), api_key.as_deref(), base_url.as_deref(), system_prompt.as_deref(), temperature)
+    })
+    .await
+    .map_err(|e| format!("Task error: {:?}", e))?
+}
+
+/// List available Ollama models
+#[tauri::command]
+async fn list_ollama_models(base_url: Option<String>) -> Result<Vec<String>, String> {
+    let base_url = base_url.unwrap_or_else(|| "http://localhost:11434".to_string());
+    tauri::async_runtime::spawn_blocking(move || {
+        ai_provider::list_ollama_models(&base_url)
+    })
+    .await
+    .map_err(|e| format!("Task error: {:?}", e))?
+}
+
+/// Generate SRT caption content
+#[tauri::command]
+async fn generate_srt(words: Vec<caption_generator::Word>, deleted_indices: Option<std::collections::HashSet<usize>>, words_per_line: Option<usize>) -> Result<String, String> {
+    let words_per_line = words_per_line.unwrap_or(8);
+    tauri::async_runtime::spawn_blocking(move || {
+        caption_generator::generate_srt(&words, deleted_indices.as_ref(), words_per_line)
+    })
+    .await
+    .map_err(|e| format!("Task error: {:?}", e))?
+}
+
+/// Generate VTT caption content
+#[tauri::command]
+async fn generate_vtt(words: Vec<caption_generator::Word>, deleted_indices: Option<std::collections::HashSet<usize>>, words_per_line: Option<usize>) -> Result<String, String> {
+    let words_per_line = words_per_line.unwrap_or(8);
+    tauri::async_runtime::spawn_blocking(move || {
+        caption_generator::generate_vtt(&words, deleted_indices.as_ref(), words_per_line)
+    })
+    .await
+    .map_err(|e| format!("Task error: {:?}", e))?
+}
+
+/// Generate ASS subtitle content
+#[tauri::command]
+async fn generate_ass(words: Vec<caption_generator::Word>, deleted_indices: Option<std::collections::HashSet<usize>>, words_per_line: Option<usize>, style: Option<caption_generator::CaptionStyle>) -> Result<String, String> {
+    let words_per_line = words_per_line.unwrap_or(8);
+    tauri::async_runtime::spawn_blocking(move || {
+        caption_generator::generate_ass(&words, deleted_indices.as_ref(), words_per_line, style.as_ref())
+    })
+    .await
+    .map_err(|e| format!("Task error: {:?}", e))?
+}
+
+/// Save caption content to file
+#[tauri::command]
+async fn save_captions(content: String, output_path: String) -> Result<String, String> {
+    tauri::async_runtime::spawn_blocking(move || {
+        caption_generator::save_captions(&content, &output_path)
+    })
+    .await
+    .map_err(|e| format!("Task error: {:?}", e))?
+}
+
+/// Check if background removal is available
+#[tauri::command]
+async fn is_background_removal_available() -> Result<bool, String> {
+    tauri::async_runtime::spawn_blocking(move || {
+        background_removal::is_available()
+    })
+    .await
+    .map_err(|e| format!("Task error: {:?}", e))?
+}
+
+/// Remove background on export (placeholder for Phase 5)
+#[tauri::command]
+async fn remove_background_on_export(input_path: String, output_path: String, replacement: String, replacement_value: String) -> Result<String, String> {
+    tauri::async_runtime::spawn_blocking(move || {
+        background_removal::remove_background_on_export(&input_path, &output_path, &replacement, &replacement_value)
+    })
+    .await
+    .map_err(|e| format!("Task error: {:?}", e))?
+}
+
 // --- App entry point ---

 #[cfg_attr(mobile, tauri::mobile_entry_point)]
@ -79,6 +242,21 @@ pub fn run() {
            decrypt_string,
            ensure_model,
            transcribe_audio,
+            export_stream_copy,
+            export_reencode,
+            export_reencode_with_subs,
+            get_video_info,
+            clean_audio,
+            is_deepfilter_available,
+            diarize_and_label,
+            ai_complete,
+            list_ollama_models,
+            generate_srt,
+            generate_vtt,
+            generate_ass,
+            save_captions,
+            is_background_removal_available,
+            remove_background_on_export,
        ])
        .run(tauri::generate_context!())
        .expect("error while running tauri application");
--- a/src-tauri/src/paths.rs
+++ b/src-tauri/src/paths.rs
@ -0,0 +1,30 @@
+use std::path::PathBuf;
+
+/// Resolve the project root from the executable path.
+/// In dev mode, the binary lives at: <root>/src-tauri/target/debug/<bin>
+/// So the project root is 4 levels above the binary.
+pub fn project_root() -> PathBuf {
+    let exe = std::env::current_exe().expect("Failed to get executable path");
+    // exe -> debug/ -> target/ -> src-tauri/ -> root
+    exe.parent()
+        .and_then(|p| p.parent())
+        .and_then(|p| p.parent())
+        .and_then(|p| p.parent())
+        .map(|p| p.to_path_buf())
+        .unwrap_or_else(|| PathBuf::from("."))
+}
+
+/// Absolute path to the venv Python 3.10 interpreter.
+pub fn python_exe() -> PathBuf {
+    project_root().join(".venv/bin/python3.10")
+}
+
+/// Absolute path to a script in the backend directory.
+pub fn backend_script(name: &str) -> PathBuf {
+    project_root().join("backend").join(name)
+}
+
+/// Absolute path to a script at the project root.
+pub fn root_script(name: &str) -> PathBuf {
+    project_root().join(name)
+}
--- a/src-tauri/src/transcription.rs
+++ b/src-tauri/src/transcription.rs
@ -1,6 +1,5 @@
-use std::fs;
 use std::process::Command;
-use whisper_rs::{WhisperContext, WhisperContextParameters, FullParams, SamplingStrategy};
+use serde_json;

 #[derive(serde::Serialize, serde::Deserialize, Clone, Debug)]
 pub struct TranscriptionResult {
@ -26,176 +25,46 @@ pub struct Segment {
    pub words: Vec<Word>,
 }

-/// Extract audio from a video/audio file to a 16kHz mono WAV using ffmpeg
-fn extract_to_wav(input_path: &str, output_path: &str) -> Result<(), String> {
-    let status = Command::new("ffmpeg")
-        .args(["-y", "-i", input_path, "-vn", "-ar", "16000", "-ac", "1", "-f", "wav", output_path])
-        .status()
-        .map_err(|e| format!("Failed to run ffmpeg: {}", e))?;
-
-    if !status.success() {
-        return Err(format!("ffmpeg exited with code: {:?}", status.code()));
-    }
-    Ok(())
-}
-
-/// Transcribe audio file using whisper-rs (real Whisper.cpp inference)
+/// Transcribe audio file using Python faster-whisper
 pub fn transcribe_audio(
    file_path: &str,
    model_name: &str,
    language: Option<&str>,
 ) -> Result<TranscriptionResult, String> {
-    // Ensure model is downloaded
-    let model_path = ensure_model_downloaded(model_name)?;
+    // Path to Python venv and script
+    let python_exe = crate::paths::python_exe();
+    let python_exe = python_exe.to_str().unwrap_or_default();
+    let script_path = crate::paths::root_script("transcribe.py");
+    let script_path = script_path.to_str().unwrap_or_default();

-    // Extract audio to temp 16kHz mono WAV
-    let tmp_wav = tempfile::Builder::new()
-        .suffix(".wav")
-        .tempfile()
-        .map_err(|e| format!("Failed to create temp file: {}", e))?;
-    let wav_path = tmp_wav.path().to_string_lossy().to_string();
-
-    extract_to_wav(file_path, &wav_path)?;
-
-    // Read WAV as f32 samples
-    let mut reader = hound::WavReader::open(&wav_path)
-        .map_err(|e| format!("Failed to read WAV: {}", e))?;
-    let spec = reader.spec();
-    let samples: Vec<f32> = match spec.sample_format {
-        hound::SampleFormat::Int => reader
-            .samples::<i16>()
-            .map(|s| s.map(|v| v as f32 / 32768.0).map_err(|e| format!("{}", e)))
-            .collect::<Result<Vec<f32>, _>>()?,
-        hound::SampleFormat::Float => reader
-            .samples::<f32>()
-            .map(|s| s.map_err(|e| format!("{}", e)))
-            .collect::<Result<Vec<f32>, _>>()?,
-    };
-
-    // Load Whisper model and transcribe
-    let ctx_params = WhisperContextParameters::default();
-    let ctx = WhisperContext::new_with_params(&model_path, ctx_params)
-        .map_err(|e| format!("Failed to load model: {:?}", e))?;
-    let mut state = ctx.create_state()
-        .map_err(|e| format!("Failed to create state: {:?}", e))?;
-
-    let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 1 });
-    params.set_print_special(false);
-    params.set_print_progress(false);
-    params.set_print_realtime(false);
-    params.set_print_timestamps(false);
-    params.set_token_timestamps(true);
-    params.set_single_segment(false);
+    // Build command args
+    let mut args = vec![script_path, file_path, model_name];
    if let Some(lang) = language {
-        params.set_language(Some(lang));
+        args.push(lang);
    }

-    state.full(params, &samples)
-        .map_err(|e| format!("Transcription failed: {:?}", e))?;
+    // Run Python script
+    let output = Command::new(python_exe)
+        .args(&args)
+        .output()
+        .map_err(|e| format!("Failed to run Python script: {}", e))?;

-    // Extract word-level results using the 0.16.0 iterator API
-    let mut all_words: Vec<Word> = Vec::new();
-    let mut segments: Vec<Segment> = Vec::new();
-    let detected_language = language.unwrap_or("en").to_string();
-
-    for (seg_idx, segment) in state.as_iter().enumerate() {
-        let seg_text = segment.to_str_lossy()
-            .map_err(|e| format!("Segment text error: {:?}", e))?;
-        let seg_t0 = segment.start_timestamp() as f64 / 100.0;
-        let seg_t1 = segment.end_timestamp() as f64 / 100.0;
-
-        let mut seg_words: Vec<Word> = Vec::new();
-
-        for tok_i in 0..segment.n_tokens() {
-            if let Some(token) = segment.get_token(tok_i) {
-                let token_text = match token.to_str_lossy() {
-                    Ok(t) => t.into_owned(),
-                    Err(_) => continue,
-                };
-                let token_data = token.token_data();
-
-                // Skip special tokens
-                let trimmed = token_text.trim();
-                if trimmed.is_empty() || trimmed.starts_with('[') || trimmed.starts_with('<') {
-                    continue;
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        return Err(format!("Python script failed: {}", stderr));
    }

-                let word = Word {
-                    word: trimmed.to_string(),
-                    start: token_data.t0 as f64 / 100.0,
-                    end: token_data.t1 as f64 / 100.0,
-                    confidence: token_data.p as f64,
-                };
-                all_words.push(word.clone());
-                seg_words.push(word);
-            }
+    // Parse JSON output
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let result: TranscriptionResult = serde_json::from_str(&stdout.trim())
+        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
+
+    Ok(result)
 }

-        segments.push(Segment {
-            id: seg_idx,
-            start: seg_t0,
-            end: seg_t1,
-            text: seg_text.trim().to_string(),
-            words: seg_words,
-        });
-    }
-
-    Ok(TranscriptionResult {
-        words: all_words,
-        segments,
-        language: detected_language,
-    })
-}
-
-/// Download and cache Whisper model
-pub fn ensure_model_downloaded(model_name: &str) -> Result<String, String> {
-    // Get app data directory for storing models
-    let app_data_dir = dirs::data_dir()
-        .ok_or("Could not find app data directory")?
-        .join("TalkEdit")
-        .join("models");
-
-    // Create directory if it doesn't exist
-    fs::create_dir_all(&app_data_dir)
-        .map_err(|e| format!("Failed to create models directory: {}", e))?;
-
-    let model_path = app_data_dir.join(format!("ggml-{}.bin", model_name));
-
-    // Check if model already exists
-    if model_path.exists() {
-        return Ok(model_path.to_string_lossy().to_string());
-    }
-
-    // Only download smaller models automatically
-    let allowed_models = ["tiny", "base", "small"];
-    if !allowed_models.contains(&model_name) {
-        return Err(format!("Model '{}' is not available for automatic download. Only tiny, base, and small models are supported.", model_name));
-    }
-
-    println!("Downloading Whisper model: {}...", model_name);
-
-    // Download the model from ggerganov's whisper.cpp repo
-    let url = format!("https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-{}.bin", model_name);
-    let response = ureq::get(&url)
-        .call()
-        .map_err(|e| format!("Failed to download model: {}", e))?;
-
-    let len = response
-        .header("content-length")
-        .and_then(|s| s.parse::<usize>().ok())
-        .unwrap_or(0);
-
-    println!("Model size: {} bytes", len);
-
-    let mut reader = response.into_reader();
-    let mut file = fs::File::create(&model_path)
-        .map_err(|e| format!("Failed to create model file: {}", e))?;
-
-    std::io::copy(&mut reader, &mut file)
-        .map_err(|e| format!("Failed to write model file: {}", e))?;
-
-    println!("Model downloaded successfully: {}", model_path.display());
-
-    Ok(model_path.to_string_lossy().to_string())
+/// Ensure model is available (faster-whisper handles this automatically)
+pub fn ensure_model_downloaded(_model_name: &str) -> Result<String, String> {
+    // faster-whisper downloads models on first use, so just return success
+    Ok("Model ready".to_string())
 }

--- a/src-tauri/src/video_editor.rs
+++ b/src-tauri/src/video_editor.rs
@ -0,0 +1,138 @@
+use std::process::Command;
+use serde_json;
+use serde::{Deserialize, Serialize};
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct VideoInfo {
+    pub duration: f64,
+    pub size: u64,
+    pub format: String,
+    pub width: u32,
+    pub height: u32,
+    pub codec: String,
+    pub fps: f64,
+}
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct ExportResult {
+    pub output_path: String,
+}
+
+/// Export video using stream copy (fast, lossless)
+pub fn export_stream_copy(
+    input_path: &str,
+    output_path: &str,
+    keep_segments: &serde_json::Value,
+) -> Result<String, String> {
+    let python_exe = crate::paths::python_exe();
+    let python_exe = python_exe.to_str().unwrap_or_default();
+    let script_path = crate::paths::backend_script("video_editor.py");
+    let script_path = script_path.to_str().unwrap_or_default();
+
+    let keep_segments_str = keep_segments.to_string();
+
+    let output = Command::new(python_exe)
+        .args(&[script_path, "export_stream_copy", input_path, output_path, &keep_segments_str])
+        .output()
+        .map_err(|e| format!("Failed to run Python script: {}", e))?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        return Err(format!("Python script failed: {}", stderr));
+    }
+
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let result: ExportResult = serde_json::from_str(&stdout.trim())
+        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
+
+    Ok(result.output_path)
+}
+
+/// Export video with re-encoding
+pub fn export_reencode(
+    input_path: &str,
+    output_path: &str,
+    keep_segments: &serde_json::Value,
+    resolution: &str,
+    format_hint: &str,
+) -> Result<String, String> {
+    let python_exe = crate::paths::python_exe();
+    let python_exe = python_exe.to_str().unwrap_or_default();
+    let script_path = crate::paths::backend_script("video_editor.py");
+    let script_path = script_path.to_str().unwrap_or_default();
+
+    let keep_segments_str = keep_segments.to_string();
+
+    let output = Command::new(python_exe)
+        .args(&[script_path, "export_reencode", input_path, output_path, &keep_segments_str, resolution, format_hint])
+        .output()
+        .map_err(|e| format!("Failed to run Python script: {}", e))?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        return Err(format!("Python script failed: {}", stderr));
+    }
+
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let result: ExportResult = serde_json::from_str(&stdout.trim())
+        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
+
+    Ok(result.output_path)
+}
+
+/// Export video with re-encoding and subtitles
+pub fn export_reencode_with_subs(
+    input_path: &str,
+    output_path: &str,
+    keep_segments: &serde_json::Value,
+    subtitle_path: &str,
+    resolution: &str,
+    format_hint: &str,
+) -> Result<String, String> {
+    let python_exe = crate::paths::python_exe();
+    let python_exe = python_exe.to_str().unwrap_or_default();
+    let script_path = crate::paths::backend_script("video_editor.py");
+    let script_path = script_path.to_str().unwrap_or_default();
+
+    let keep_segments_str = keep_segments.to_string();
+
+    let output = Command::new(python_exe)
+        .args(&[script_path, "export_reencode_with_subs", input_path, output_path, &keep_segments_str, subtitle_path, resolution, format_hint])
+        .output()
+        .map_err(|e| format!("Failed to run Python script: {}", e))?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        return Err(format!("Python script failed: {}", stderr));
+    }
+
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let result: ExportResult = serde_json::from_str(&stdout.trim())
+        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
+
+    Ok(result.output_path)
+}
+
+/// Get video information
+pub fn get_video_info(input_path: &str) -> Result<VideoInfo, String> {
+    let python_exe = crate::paths::python_exe();
+    let python_exe = python_exe.to_str().unwrap_or_default();
+    let script_path = crate::paths::backend_script("video_editor.py");
+    let script_path = script_path.to_str().unwrap_or_default();
+
+    let output = Command::new(python_exe)
+        .args(&[script_path, "get_video_info", input_path])
+        .output()
+        .map_err(|e| format!("Failed to run Python script: {}", e))?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        return Err(format!("Python script failed: {}", stderr));
+    }
+
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let result: VideoInfo = serde_json::from_str(&stdout.trim())
+        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
+
+    Ok(result)
+}
--- a/test_api.py
+++ b/test_api.py
@ -0,0 +1,130 @@
+#!/home/dillon/_code/TalkEdit/.venv/bin/python3.10
+"""
+Test script for the TalkEdit API.
+This script tests the new Tauri commands that expose all backend functions.
+"""
+
+import json
+import sys
+import os
+from pathlib import Path
+
+# Add backend to path for direct testing
+sys.path.insert(0, str(Path(__file__).parent / "backend"))
+
+def test_video_info():
+    """Test get_video_info function"""
+    from services.video_editor import get_video_info
+
+    # Use a test video file if available
+    test_video = "/path/to/test/video.mp4"  # Replace with actual test file
+    if os.path.exists(test_video):
+        try:
+            info = get_video_info(test_video)
+            print("✓ Video info test passed")
+            print(f"  Duration: {info['duration']}")
+            print(f"  Resolution: {info['width']}x{info['height']}")
+            return True
+        except Exception as e:
+            print(f"✗ Video info test failed: {e}")
+            return False
+    else:
+        print("⚠ Video info test skipped (no test file)")
+        return True
+
+def test_caption_generation():
+    """Test caption generation functions"""
+    from services.caption_generator import generate_srt, generate_vtt
+
+    # Sample word data
+    words = [
+        {"word": "Hello", "start": 0.0, "end": 0.5, "confidence": 0.9},
+        {"word": "world", "start": 0.5, "end": 1.0, "confidence": 0.95},
+        {"word": "this", "start": 1.0, "end": 1.3, "confidence": 0.8},
+        {"word": "is", "start": 1.3, "end": 1.5, "confidence": 0.9},
+        {"word": "a", "start": 1.5, "end": 1.6, "confidence": 0.85},
+        {"word": "test", "start": 1.6, "end": 2.0, "confidence": 0.95},
+    ]
+
+    try:
+        srt_content = generate_srt(words)
+        vtt_content = generate_vtt(words)
+
+        if "Hello world" in srt_content and "WEBVTT" in vtt_content:
+            print("✓ Caption generation test passed")
+            return True
+        else:
+            print("✗ Caption generation test failed: unexpected content")
+            return False
+    except Exception as e:
+        print(f"✗ Caption generation test failed: {e}")
+        return False
+
+def test_ai_provider():
+    """Test AI provider functions"""
+    from services.ai_provider import AIProvider
+
+    try:
+        # Test listing Ollama models (may fail if Ollama not running)
+        models = AIProvider.list_ollama_models()
+        print(f"✓ AI provider test passed (found {len(models)} models)")
+        return True
+    except Exception as e:
+        print(f"⚠ AI provider test skipped: {e}")
+        return True
+
+def test_deepfilter_status():
+    """Test DeepFilterNet availability check"""
+    from services.audio_cleaner import is_deepfilter_available
+
+    try:
+        available = is_deepfilter_available()
+        print(f"✓ DeepFilter status test passed (available: {available})")
+        return True
+    except Exception as e:
+        print(f"✗ DeepFilter status test failed: {e}")
+        return False
+
+def main():
+    print("Testing TalkEdit API functions...")
+    print("=" * 50)
+
+    tests = [
+        ("Video Info", test_video_info),
+        ("Caption Generation", test_caption_generation),
+        ("AI Provider", test_ai_provider),
+        ("DeepFilter Status", test_deepfilter_status),
+    ]
+
+    passed = 0
+    total = len(tests)
+
+    for name, test_func in tests:
+        print(f"\nTesting {name}:")
+        if test_func():
+            passed += 1
+
+    print("\n" + "=" * 50)
+    print(f"Results: {passed}/{total} tests passed")
+
+    if passed == total:
+        print("🎉 All tests passed! The API is ready for AI testing.")
+    else:
+        print("⚠️  Some tests failed. Check the output above.")
+
+    print("\nAvailable Tauri Commands:")
+    commands = [
+        "transcribe_audio",
+        "export_stream_copy", "export_reencode", "export_reencode_with_subs", "get_video_info",
+        "clean_audio", "is_deepfilter_available",
+        "diarize_and_label",
+        "ai_complete", "list_ollama_models",
+        "generate_srt", "generate_vtt", "generate_ass", "save_captions",
+        "is_background_removal_available", "remove_background_on_export",
+    ]
+
+    for cmd in commands:
+        print(f"  - {cmd}")
+
+if __name__ == "__main__":
+    main()
--- a/transcribe.py
+++ b/transcribe.py
@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+import sys
+import json
+import tempfile
+import subprocess
+from faster_whisper import WhisperModel
+
+def extract_audio(input_path, output_path):
+    """Extract audio from video/audio file to 16kHz mono WAV"""
+    cmd = [
+        'ffmpeg', '-y', '-i', input_path, '-vn', '-ar', '16000', '-ac', '1', '-f', 'wav', output_path
+    ]
+    subprocess.run(cmd, check=True)
+
+def main():
+    if len(sys.argv) < 3:
+        print("Usage: python transcribe.py <audio_file> <model_name> [language]", file=sys.stderr)
+        sys.exit(1)
+
+    audio_file = sys.argv[1]
+    model_name = sys.argv[2]
+    language = sys.argv[3] if len(sys.argv) > 3 else None
+
+    # Extract audio to temp WAV if needed
+    with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
+        wav_path = tmp.name
+
+    try:
+        extract_audio(audio_file, wav_path)
+
+        # Load model - use GPU if CUDA is available, else CPU with int8
+        import ctypes
+        try:
+            ctypes.CDLL("libcublas.so.12")
+            device = "cuda"
+            compute_type = "float16"
+        except OSError:
+            device = "cpu"
+            compute_type = "int8"
+
+        model = WhisperModel(model_name, device=device, compute_type=compute_type)
+
+        # Transcribe
+        segments, info = model.transcribe(
+            wav_path,
+            language=language,
+            beam_size=5,
+            word_timestamps=True,
+            vad_filter=True,
+            vad_parameters=dict(threshold=0.5, min_speech_duration_ms=250),
+            without_timestamps=False
+        )
+
+        # Convert to our format
+        words = []
+        segments_list = []
+
+        for segment in segments:
+            seg_words = []
+            for word in segment.words:
+                w = {
+                    "word": word.word,
+                    "start": word.start,
+                    "end": word.end,
+                    "confidence": word.probability
+                }
+                words.append(w)
+                seg_words.append(w)
+
+            segments_list.append({
+                "id": len(segments_list),
+                "start": segment.start,
+                "end": segment.end,
+                "text": segment.text,
+                "words": seg_words
+            })
+
+        result = {
+            "words": words,
+            "segments": segments_list,
+            "language": info.language
+        }
+
+        print(json.dumps(result))
+
+    finally:
+        import os
+        os.unlink(wav_path)
+
+if __name__ == "__main__":
+    main()