added api for ai; got backend working

2026-03-26 23:39:31 -06:00
parent 164b2f87d4
commit 4a857d8cbf
20 changed files with 1436 additions and 280 deletions
--- a/backend/ai_provider.py
+++ b/backend/ai_provider.py
@ -0,0 +1,54 @@
 #!/usr/bin/env python3
 """
 AI provider interface for Ollama, OpenAI, and Claude.
 """
 import json
 import sys
 from pathlib import Path
 # Add backend to path
 sys.path.insert(0, str(Path(__file__).parent))
 from services.ai_provider import AIProvider
 def main():
    if len(sys.argv) < 2:
        print("Usage: python ai_provider.py <command> [args...]", file=sys.stderr)
        sys.exit(1)
    command = sys.argv[1]
    try:
        if command == "complete":
            if len(sys.argv) < 4:
                print("Usage: python ai_provider.py complete <prompt> <provider> [model] [api_key] [base_url] [system_prompt] [temperature]", file=sys.stderr)
                sys.exit(1)
            prompt = sys.argv[2]
            provider = sys.argv[3]
            model = sys.argv[4] if len(sys.argv) > 4 else None
            api_key = sys.argv[5] if len(sys.argv) > 5 else None
            base_url = sys.argv[6] if len(sys.argv) > 6 else None
            system_prompt = sys.argv[7] if len(sys.argv) > 7 else None
            temperature = float(sys.argv[8]) if len(sys.argv) > 8 else 0.3
            result = AIProvider.complete(prompt, provider, model, api_key, base_url, system_prompt, temperature)
            print(json.dumps({"response": result}))
        elif command == "list_ollama_models":
            base_url = sys.argv[2] if len(sys.argv) > 2 else "http://localhost:11434"
            result = AIProvider.list_ollama_models(base_url)
            print(json.dumps({"models": result}))
        else:
            print(f"Unknown command: {command}", file=sys.stderr)
            sys.exit(1)
    except Exception as e:
        print(json.dumps({"error": str(e)}), file=sys.stderr)
        sys.exit(1)
 if __name__ == "__main__":
    main()
--- a/backend/audio_cleaner.py
+++ b/backend/audio_cleaner.py
@ -0,0 +1,47 @@
 #!/usr/bin/env python3
 """
 Audio cleaning operations using DeepFilterNet or FFmpeg fallback.
 """
 import json
 import sys
 from pathlib import Path
 # Add backend to path
 sys.path.insert(0, str(Path(__file__).parent))
 from services.audio_cleaner import clean_audio, is_deepfilter_available
 def main():
    if len(sys.argv) < 2:
        print("Usage: python audio_cleaner.py <command> [args...]", file=sys.stderr)
        sys.exit(1)
    command = sys.argv[1]
    try:
        if command == "clean_audio":
            if len(sys.argv) != 4:
                print("Usage: python audio_cleaner.py clean_audio <input_path> <output_path>", file=sys.stderr)
                sys.exit(1)
            input_path = sys.argv[2]
            output_path = sys.argv[3]
            result = clean_audio(input_path, output_path)
            print(json.dumps({"output_path": result}))
        elif command == "is_deepfilter_available":
            result = is_deepfilter_available()
            print(json.dumps({"available": result}))
        else:
            print(f"Unknown command: {command}", file=sys.stderr)
            sys.exit(1)
    except Exception as e:
        print(json.dumps({"error": str(e)}), file=sys.stderr)
        sys.exit(1)
 if __name__ == "__main__":
    main()
--- a/backend/background_removal.py
+++ b/backend/background_removal.py
@ -0,0 +1,50 @@
 #!/usr/bin/env python3
 """
 Background removal operations (placeholder for Phase 5).
 """
 import json
 import sys
 from pathlib import Path
 # Add backend to path
 sys.path.insert(0, str(Path(__file__).parent))
 from services.background_removal import is_available, remove_background_on_export
 def main():
    if len(sys.argv) < 2:
        print("Usage: python background_removal.py <command> [args...]", file=sys.stderr)
        sys.exit(1)
    command = sys.argv[1]
    try:
        if command == "is_available":
            result = is_available()
            print(json.dumps({"available": result}))
        elif command == "remove_background_on_export":
            if len(sys.argv) != 6:
                print("Usage: python background_removal.py remove_background_on_export <input_path> <output_path> <replacement> <replacement_value>", file=sys.stderr)
                sys.exit(1)
            input_path = sys.argv[2]
            output_path = sys.argv[3]
            replacement = sys.argv[4]
            replacement_value = sys.argv[5]
            result = remove_background_on_export(input_path, output_path, replacement, replacement_value)
            print(json.dumps({"output_path": result}))
        else:
            print(f"Unknown command: {command}", file=sys.stderr)
            sys.exit(1)
    except Exception as e:
        print(json.dumps({"error": str(e)}), file=sys.stderr)
        sys.exit(1)
 if __name__ == "__main__":
    main()
--- a/backend/caption_generator.py
+++ b/backend/caption_generator.py
@ -0,0 +1,78 @@
 #!/usr/bin/env python3
 """
 Generate caption files from word-level timestamps.
 """
 import json
 import sys
 from pathlib import Path
 # Add backend to path
 sys.path.insert(0, str(Path(__file__).parent))
 from services.caption_generator import generate_srt, generate_vtt, generate_ass, save_captions
 def main():
    if len(sys.argv) < 2:
        print("Usage: python caption_generator.py <command> [args...]", file=sys.stderr)
        sys.exit(1)
    command = sys.argv[1]
    try:
        if command == "generate_srt":
            if len(sys.argv) < 4:
                print("Usage: python caption_generator.py generate_srt <words_json> [deleted_indices_json] [words_per_line]", file=sys.stderr)
                sys.exit(1)
            words = json.loads(sys.argv[2])
            deleted_indices = set(json.loads(sys.argv[3])) if len(sys.argv) > 3 and sys.argv[3] != "null" else None
            words_per_line = int(sys.argv[4]) if len(sys.argv) > 4 else 8
            result = generate_srt(words, deleted_indices, words_per_line)
            print(json.dumps({"content": result}))
        elif command == "generate_vtt":
            if len(sys.argv) < 4:
                print("Usage: python caption_generator.py generate_vtt <words_json> [deleted_indices_json] [words_per_line]", file=sys.stderr)
                sys.exit(1)
            words = json.loads(sys.argv[2])
            deleted_indices = set(json.loads(sys.argv[3])) if len(sys.argv) > 3 and sys.argv[3] != "null" else None
            words_per_line = int(sys.argv[4]) if len(sys.argv) > 4 else 8
            result = generate_vtt(words, deleted_indices, words_per_line)
            print(json.dumps({"content": result}))
        elif command == "generate_ass":
            if len(sys.argv) < 4:
                print("Usage: python caption_generator.py generate_ass <words_json> [deleted_indices_json] [words_per_line] [style_json]", file=sys.stderr)
                sys.exit(1)
            words = json.loads(sys.argv[2])
            deleted_indices = set(json.loads(sys.argv[3])) if len(sys.argv) > 3 and sys.argv[3] != "null" else None
            words_per_line = int(sys.argv[4]) if len(sys.argv) > 4 else 8
            style = json.loads(sys.argv[5]) if len(sys.argv) > 5 and sys.argv[5] != "null" else None
            result = generate_ass(words, deleted_indices, words_per_line, style)
            print(json.dumps({"content": result}))
        elif command == "save_captions":
            if len(sys.argv) != 4:
                print("Usage: python caption_generator.py save_captions <content> <output_path>", file=sys.stderr)
                sys.exit(1)
            content = sys.argv[2]
            output_path = sys.argv[3]
            result = save_captions(content, output_path)
            print(json.dumps({"output_path": result}))
        else:
            print(f"Unknown command: {command}", file=sys.stderr)
            sys.exit(1)
    except Exception as e:
        print(json.dumps({"error": str(e)}), file=sys.stderr)
        sys.exit(1)
 if __name__ == "__main__":
    main()
--- a/backend/diarization.py
+++ b/backend/diarization.py
@ -0,0 +1,47 @@
 #!/usr/bin/env python3
 """
 Speaker diarization using pyannote.audio.
 """
 import json
 import sys
 from pathlib import Path
 # Add backend to path
 sys.path.insert(0, str(Path(__file__).parent))
 from services.diarization import diarize_and_label
 def main():
    if len(sys.argv) < 2:
        print("Usage: python diarization.py <command> [args...]", file=sys.stderr)
        sys.exit(1)
    command = sys.argv[1]
    try:
        if command == "diarize_and_label":
            if len(sys.argv) < 4:
                print("Usage: python diarization.py diarize_and_label <transcription_result_json> <audio_path> [hf_token] [num_speakers] [use_gpu]", file=sys.stderr)
                sys.exit(1)
            transcription_result = json.loads(sys.argv[2])
            audio_path = sys.argv[3]
            hf_token = sys.argv[4] if len(sys.argv) > 4 else None
            num_speakers = int(sys.argv[5]) if len(sys.argv) > 5 and sys.argv[5] != "null" else None
            use_gpu = sys.argv[6].lower() == "true" if len(sys.argv) > 6 else True
            result = diarize_and_label(transcription_result, audio_path, hf_token, num_speakers, use_gpu)
            print(json.dumps(result))
        else:
            print(f"Unknown command: {command}", file=sys.stderr)
            sys.exit(1)
    except Exception as e:
        print(json.dumps({"error": str(e)}), file=sys.stderr)
        sys.exit(1)
 if __name__ == "__main__":
    main()
--- a/backend/video_editor.py
+++ b/backend/video_editor.py
@ -0,0 +1,77 @@
 #!/usr/bin/env python3
 """
 Video editing operations using FFmpeg.
 """
 import json
 import sys
 from pathlib import Path
 # Add backend to path
 sys.path.insert(0, str(Path(__file__).parent))
 from services.video_editor import export_stream_copy, export_reencode, export_reencode_with_subs, get_video_info
 def main():
    if len(sys.argv) < 2:
        print("Usage: python video_editor.py <command> [args...]", file=sys.stderr)
        sys.exit(1)
    command = sys.argv[1]
    try:
        if command == "export_stream_copy":
            if len(sys.argv) != 5:
                print("Usage: python video_editor.py export_stream_copy <input_path> <output_path> <keep_segments_json>", file=sys.stderr)
                sys.exit(1)
            input_path = sys.argv[2]
            output_path = sys.argv[3]
            keep_segments = json.loads(sys.argv[4])
            result = export_stream_copy(input_path, output_path, keep_segments)
            print(json.dumps({"output_path": result}))
        elif command == "export_reencode":
            if len(sys.argv) != 7:
                print("Usage: python video_editor.py export_reencode <input_path> <output_path> <keep_segments_json> <resolution> <format_hint>", file=sys.stderr)
                sys.exit(1)
            input_path = sys.argv[2]
            output_path = sys.argv[3]
            keep_segments = json.loads(sys.argv[4])
            resolution = sys.argv[5]
            format_hint = sys.argv[6]
            result = export_reencode(input_path, output_path, keep_segments, resolution, format_hint)
            print(json.dumps({"output_path": result}))
        elif command == "export_reencode_with_subs":
            if len(sys.argv) != 8:
                print("Usage: python video_editor.py export_reencode_with_subs <input_path> <output_path> <keep_segments_json> <subtitle_path> <resolution> <format_hint>", file=sys.stderr)
                sys.exit(1)
            input_path = sys.argv[2]
            output_path = sys.argv[3]
            keep_segments = json.loads(sys.argv[4])
            subtitle_path = sys.argv[5]
            resolution = sys.argv[6]
            format_hint = sys.argv[7]
            result = export_reencode_with_subs(input_path, output_path, keep_segments, subtitle_path, resolution, format_hint)
            print(json.dumps({"output_path": result}))
        elif command == "get_video_info":
            if len(sys.argv) != 3:
                print("Usage: python video_editor.py get_video_info <input_path>", file=sys.stderr)
                sys.exit(1)
            input_path = sys.argv[2]
            result = get_video_info(input_path)
            print(json.dumps(result))
        else:
            print(f"Unknown command: {command}", file=sys.stderr)
            sys.exit(1)
    except Exception as e:
        print(json.dumps({"error": str(e)}), file=sys.stderr)
        sys.exit(1)
 if __name__ == "__main__":
    main()
--- a/plan.md
+++ b/plan.md
@ -60,6 +60,9 @@ Focus on what creators need for spoken content:
 No multi-track, voice cloning, or collaboration—keep it simple.
 ## 4. Notes
 - Consider adding Parakeet TDT as a transcription option in the future for users who want alternatives to Whisper.
 ## 5. Monetization Model
 - **Free Forever**: Core editing/transcription (unlimited local use).
 - **Pro License** ($29–49 one-time): Batch processing, high-quality voices (if adding TTS), custom presets, priority support.
--- a/src-tauri/Cargo.lock
+++ b/src-tauri/Cargo.lock
@ -91,7 +91,6 @@ dependencies = [
 "tauri-plugin-log",
 "tempfile",
 "ureq",
 "whisper-rs",
 ]
 [[package]]
@ -147,26 +146,6 @@ version = "0.22.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
 [[package]]
 name = "bindgen"
 version = "0.72.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
 dependencies = [
 "bitflags 2.11.0",
 "cexpr",
 "clang-sys",
 "itertools",
 "log",
 "prettyplease",
 "proc-macro2",
 "quote",
 "regex",
 "rustc-hash",
 "shlex",
 "syn 2.0.117",
 ]
 [[package]]
 name = "bit-set"
 version = "0.8.0"
@ -416,15 +395,6 @@ version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c"
 [[package]]
 name = "cexpr"
 version = "0.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
 dependencies = [
 "nom",
 ]
 [[package]]
 name = "cfb"
 version = "0.7.3"
@ -470,26 +440,6 @@ dependencies = [
 "windows-link 0.2.1",
 ]
 [[package]]
 name = "clang-sys"
 version = "1.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
 dependencies = [
 "glob",
 "libc",
 "libloading 0.8.9",
 ]
 [[package]]
 name = "cmake"
 version = "0.1.57"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d"
 dependencies = [
 "cc",
 ]
 [[package]]
 name = "combine"
 version = "4.6.7"
@ -876,12 +826,6 @@ version = "1.0.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555"
 [[package]]
 name = "either"
 version = "1.15.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
 [[package]]
 name = "embed-resource"
 version = "3.0.8"
@ -1043,12 +987,6 @@ dependencies = [
 "percent-encoding",
 ]
 [[package]]
 name = "fs_extra"
 version = "1.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
 [[package]]
 name = "funty"
 version = "2.0.0"
@ -1798,15 +1736,6 @@ dependencies = [
 "serde",
 ]
 [[package]]
 name = "itertools"
 version = "0.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
 dependencies = [
 "either",
 ]
 [[package]]
 name = "itoa"
 version = "1.0.18"
@ -1961,7 +1890,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6e9ec52138abedcc58dc17a7c6c0c00a2bdb4f3427c7f63fa97fd0d859155caf"
 dependencies = [
 "gtk-sys",
- "libloading 0.7.4",
+ "libloading",
 "once_cell",
 ]
@ -1981,16 +1910,6 @@ dependencies = [
 "winapi",
 ]
 [[package]]
 name = "libloading"
 version = "0.8.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
 dependencies = [
 "cfg-if",
 "windows-link 0.2.1",
 ]
 [[package]]
 name = "libredox"
 version = "0.1.15"
@ -2099,12 +2018,6 @@ version = "0.3.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
 [[package]]
 name = "minimal-lexical"
 version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
 [[package]]
 name = "miniz_oxide"
 version = "0.8.9"
@ -2189,16 +2102,6 @@ version = "0.1.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"
 [[package]]
 name = "nom"
 version = "7.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
 dependencies = [
 "memchr",
 "minimal-lexical",
 ]
 [[package]]
 name = "num-conv"
 version = "0.2.1"
@ -4892,28 +4795,6 @@ dependencies = [
 "windows-core 0.61.2",
 ]
 [[package]]
 name = "whisper-rs"
 version = "0.16.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2088172d00f936c348d6a72f488dc2660ab3f507263a195df308a3c2383229f6"
 dependencies = [
 "whisper-rs-sys",
 ]
 [[package]]
 name = "whisper-rs-sys"
 version = "0.15.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6986c0fe081241d391f09b9a071fbcbb59720c3563628c3c829057cf69f2a56f"
 dependencies = [
 "bindgen",
 "cfg-if",
 "cmake",
 "fs_extra",
 "semver",
 ]
 [[package]]
 name = "winapi"
 version = "0.3.9"
--- a/src-tauri/Cargo.toml
+++ b/src-tauri/Cargo.toml
@ -27,6 +27,5 @@ tauri-plugin-fs = "2"
 tauri-plugin-log = "2"
 dirs = "5.0"
 ureq = "2.9"
 whisper-rs = "0.16.0"
 hound = "3.5"
 tempfile = "3.10"
--- a/src-tauri/src/ai_provider.rs
+++ b/src-tauri/src/ai_provider.rs
@ -0,0 +1,98 @@
 use std::process::Command;
 use serde_json;
 use serde::{Deserialize, Serialize};
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct AICompleteResult {
    pub response: String,
 }
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct OllamaModelsResult {
    pub models: Vec<String>,
 }
 /// Complete text using AI provider
 pub fn complete(
    prompt: &str,
    provider: &str,
    model: Option<&str>,
    api_key: Option<&str>,
    base_url: Option<&str>,
    system_prompt: Option<&str>,
    temperature: f64,
 ) -> Result<String, String> {
    let python_exe = crate::paths::python_exe();
    let python_exe = python_exe.to_str().unwrap_or_default();
    let script_path = crate::paths::backend_script("ai_provider.py");
    let script_path = script_path.to_str().unwrap_or_default();
    let mut args = vec![script_path, "complete", prompt, provider];
    if let Some(m) = model {
        args.push(m);
    } else {
        args.push("null");
    }
    if let Some(key) = api_key {
        args.push(key);
    } else {
        args.push("null");
    }
    if let Some(url) = base_url {
        args.push(url);
    } else {
        args.push("null");
    }
    if let Some(sys) = system_prompt {
        args.push(sys);
    } else {
        args.push("null");
    }
    let temp_str = temperature.to_string();
    args.push(&temp_str);
    let output = Command::new(python_exe)
        .args(&args)
        .output()
        .map_err(|e| format!("Failed to run Python script: {}", e))?;
    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr);
        return Err(format!("Python script failed: {}", stderr));
    }
    let stdout = String::from_utf8_lossy(&output.stdout);
    let result: AICompleteResult = serde_json::from_str(&stdout.trim())
        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
    Ok(result.response)
 }
 /// List available Ollama models
 pub fn list_ollama_models(base_url: &str) -> Result<Vec<String>, String> {
    let python_exe = crate::paths::python_exe();
    let python_exe = python_exe.to_str().unwrap_or_default();
    let script_path = crate::paths::backend_script("ai_provider.py");
    let script_path = script_path.to_str().unwrap_or_default();
    let output = Command::new(python_exe)
        .args(&[script_path, "list_ollama_models", base_url])
        .output()
        .map_err(|e| format!("Failed to run Python script: {}", e))?;
    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr);
        return Err(format!("Python script failed: {}", stderr));
    }
    let stdout = String::from_utf8_lossy(&output.stdout);
    let result: OllamaModelsResult = serde_json::from_str(&stdout.trim())
        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
    Ok(result.models)
 }
--- a/src-tauri/src/audio_cleaner.rs
+++ b/src-tauri/src/audio_cleaner.rs
@ -0,0 +1,61 @@
 use std::process::Command;
 use serde_json;
 use serde::{Deserialize, Serialize};
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct CleanAudioResult {
    pub output_path: String,
 }
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct DeepFilterStatus {
    pub available: bool,
 }
 /// Clean audio using DeepFilterNet or FFmpeg fallback
 pub fn clean_audio(input_path: &str, output_path: &str) -> Result<String, String> {
    let python_exe = crate::paths::python_exe();
    let python_exe = python_exe.to_str().unwrap_or_default();
    let script_path = crate::paths::backend_script("audio_cleaner.py");
    let script_path = script_path.to_str().unwrap_or_default();
    let output = Command::new(python_exe)
        .args(&[script_path, "clean_audio", input_path, output_path])
        .output()
        .map_err(|e| format!("Failed to run Python script: {}", e))?;
    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr);
        return Err(format!("Python script failed: {}", stderr));
    }
    let stdout = String::from_utf8_lossy(&output.stdout);
    let result: CleanAudioResult = serde_json::from_str(&stdout.trim())
        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
    Ok(result.output_path)
 }
 /// Check if DeepFilterNet is available
 pub fn is_deepfilter_available() -> Result<bool, String> {
    let python_exe = crate::paths::python_exe();
    let python_exe = python_exe.to_str().unwrap_or_default();
    let script_path = crate::paths::backend_script("audio_cleaner.py");
    let script_path = script_path.to_str().unwrap_or_default();
    let output = Command::new(python_exe)
        .args(&[script_path, "is_deepfilter_available"])
        .output()
        .map_err(|e| format!("Failed to run Python script: {}", e))?;
    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr);
        return Err(format!("Python script failed: {}", stderr));
    }
    let stdout = String::from_utf8_lossy(&output.stdout);
    let result: DeepFilterStatus = serde_json::from_str(&stdout.trim())
        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
    Ok(result.available)
 }
--- a/src-tauri/src/background_removal.rs
+++ b/src-tauri/src/background_removal.rs
@ -0,0 +1,66 @@
 use std::process::Command;
 use serde_json;
 use serde::{Deserialize, Serialize};
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct BackgroundRemovalStatus {
    pub available: bool,
 }
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct BackgroundRemovalResult {
    pub output_path: String,
 }
 /// Check if background removal is available
 pub fn is_available() -> Result<bool, String> {
    let python_exe = crate::paths::python_exe();
    let python_exe = python_exe.to_str().unwrap_or_default();
    let script_path = crate::paths::backend_script("background_removal.py");
    let script_path = script_path.to_str().unwrap_or_default();
    let output = Command::new(python_exe)
        .args(&[script_path, "is_available"])
        .output()
        .map_err(|e| format!("Failed to run Python script: {}", e))?;
    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr);
        return Err(format!("Python script failed: {}", stderr));
    }
    let stdout = String::from_utf8_lossy(&output.stdout);
    let result: BackgroundRemovalStatus = serde_json::from_str(&stdout.trim())
        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
    Ok(result.available)
 }
 /// Remove background on export (placeholder for Phase 5)
 pub fn remove_background_on_export(
    input_path: &str,
    output_path: &str,
    replacement: &str,
    replacement_value: &str,
 ) -> Result<String, String> {
    let python_exe = crate::paths::python_exe();
    let python_exe = python_exe.to_str().unwrap_or_default();
    let script_path = crate::paths::backend_script("background_removal.py");
    let script_path = script_path.to_str().unwrap_or_default();
    let output = Command::new(python_exe)
        .args(&[script_path, "remove_background_on_export", input_path, output_path, replacement, replacement_value])
        .output()
        .map_err(|e| format!("Failed to run Python script: {}", e))?;
    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr);
        return Err(format!("Python script failed: {}", stderr));
    }
    let stdout = String::from_utf8_lossy(&output.stdout);
    let result: BackgroundRemovalResult = serde_json::from_str(&stdout.trim())
        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
    Ok(result.output_path)
 }
--- a/src-tauri/src/caption_generator.rs
+++ b/src-tauri/src/caption_generator.rs
@ -0,0 +1,177 @@
 use std::process::Command;
 use serde_json;
 use serde::{Deserialize, Serialize};
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct Word {
    pub word: String,
    pub start: f64,
    pub end: f64,
    pub confidence: f64,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub speaker: Option<String>,
 }
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct CaptionStyle {
    #[serde(skip_serializing_if = "Option::is_none")]
    pub font_name: Option<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub font_size: Option<u32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub font_color: Option<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub bold: Option<bool>,
 }
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct CaptionContent {
    pub content: String,
 }
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct SaveCaptionsResult {
    pub output_path: String,
 }
 /// Generate SRT caption content
 pub fn generate_srt(
    words: &[Word],
    deleted_indices: Option<&std::collections::HashSet<usize>>,
    words_per_line: usize,
 ) -> Result<String, String> {
    let python_exe = crate::paths::python_exe();
    let python_exe = python_exe.to_str().unwrap_or_default();
    let script_path = crate::paths::backend_script("caption_generator.py");
    let script_path = script_path.to_str().unwrap_or_default();
    let words_json = serde_json::to_string(words)
        .map_err(|e| format!("Failed to serialize words: {}", e))?;
    let deleted_json = match deleted_indices {
        Some(indices) => serde_json::to_string(indices)
            .map_err(|e| format!("Failed to serialize deleted indices: {}", e))?,
        None => "null".to_string(),
    };
    let output = Command::new(python_exe)
        .args(&[script_path, "generate_srt", &words_json, &deleted_json, &words_per_line.to_string()])
        .output()
        .map_err(|e| format!("Failed to run Python script: {}", e))?;
    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr);
        return Err(format!("Python script failed: {}", stderr));
    }
    let stdout = String::from_utf8_lossy(&output.stdout);
    let result: CaptionContent = serde_json::from_str(&stdout.trim())
        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
    Ok(result.content)
 }
 /// Generate VTT caption content
 pub fn generate_vtt(
    words: &[Word],
    deleted_indices: Option<&std::collections::HashSet<usize>>,
    words_per_line: usize,
 ) -> Result<String, String> {
    let python_exe = crate::paths::python_exe();
    let python_exe = python_exe.to_str().unwrap_or_default();
    let script_path = crate::paths::backend_script("caption_generator.py");
    let script_path = script_path.to_str().unwrap_or_default();
    let words_json = serde_json::to_string(words)
        .map_err(|e| format!("Failed to serialize words: {}", e))?;
    let deleted_json = match deleted_indices {
        Some(indices) => serde_json::to_string(indices)
            .map_err(|e| format!("Failed to serialize deleted indices: {}", e))?,
        None => "null".to_string(),
    };
    let output = Command::new(python_exe)
        .args(&[script_path, "generate_vtt", &words_json, &deleted_json, &words_per_line.to_string()])
        .output()
        .map_err(|e| format!("Failed to run Python script: {}", e))?;
    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr);
        return Err(format!("Python script failed: {}", stderr));
    }
    let stdout = String::from_utf8_lossy(&output.stdout);
    let result: CaptionContent = serde_json::from_str(&stdout.trim())
        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
    Ok(result.content)
 }
 /// Generate ASS subtitle content
 pub fn generate_ass(
    words: &[Word],
    deleted_indices: Option<&std::collections::HashSet<usize>>,
    words_per_line: usize,
    style: Option<&CaptionStyle>,
 ) -> Result<String, String> {
    let python_exe = crate::paths::python_exe();
    let python_exe = python_exe.to_str().unwrap_or_default();
    let script_path = crate::paths::backend_script("caption_generator.py");
    let script_path = script_path.to_str().unwrap_or_default();
    let words_json = serde_json::to_string(words)
        .map_err(|e| format!("Failed to serialize words: {}", e))?;
    let deleted_json = match deleted_indices {
        Some(indices) => serde_json::to_string(indices)
            .map_err(|e| format!("Failed to serialize deleted indices: {}", e))?,
        None => "null".to_string(),
    };
    let style_json = match style {
        Some(s) => serde_json::to_string(s)
            .map_err(|e| format!("Failed to serialize style: {}", e))?,
        None => "null".to_string(),
    };
    let output = Command::new(python_exe)
        .args(&[script_path, "generate_ass", &words_json, &deleted_json, &words_per_line.to_string(), &style_json])
        .output()
        .map_err(|e| format!("Failed to run Python script: {}", e))?;
    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr);
        return Err(format!("Python script failed: {}", stderr));
    }
    let stdout = String::from_utf8_lossy(&output.stdout);
    let result: CaptionContent = serde_json::from_str(&stdout.trim())
        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
    Ok(result.content)
 }
 /// Save caption content to file
 pub fn save_captions(content: &str, output_path: &str) -> Result<String, String> {
    let python_exe = crate::paths::python_exe();
    let python_exe = python_exe.to_str().unwrap_or_default();
    let script_path = crate::paths::backend_script("caption_generator.py");
    let script_path = script_path.to_str().unwrap_or_default();
    let output = Command::new(python_exe)
        .args(&[script_path, "save_captions", content, output_path])
        .output()
        .map_err(|e| format!("Failed to run Python script: {}", e))?;
    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr);
        return Err(format!("Python script failed: {}", stderr));
    }
    let stdout = String::from_utf8_lossy(&output.stdout);
    let result: SaveCaptionsResult = serde_json::from_str(&stdout.trim())
        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
    Ok(result.output_path)
 }
--- a/src-tauri/src/diarization.rs
+++ b/src-tauri/src/diarization.rs
@ -0,0 +1,82 @@
 use std::process::Command;
 use serde_json;
 use serde::{Deserialize, Serialize};
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct Word {
    pub word: String,
    pub start: f64,
    pub end: f64,
    pub confidence: f64,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub speaker: Option<String>,
 }
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct Segment {
    pub id: usize,
    pub start: f64,
    pub end: f64,
    pub text: String,
    pub words: Vec<Word>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub speaker: Option<String>,
 }
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct TranscriptionResult {
    pub words: Vec<Word>,
    pub segments: Vec<Segment>,
    pub language: String,
 }
 /// Apply speaker diarization to transcription result
 pub fn diarize_and_label(
    transcription_result: &TranscriptionResult,
    audio_path: &str,
    hf_token: Option<&str>,
    num_speakers: Option<u32>,
    use_gpu: bool,
 ) -> Result<TranscriptionResult, String> {
    let python_exe = crate::paths::python_exe();
    let python_exe = python_exe.to_str().unwrap_or_default();
    let script_path = crate::paths::backend_script("diarization.py");
    let script_path = script_path.to_str().unwrap_or_default();
    let transcription_json = serde_json::to_string(transcription_result)
        .map_err(|e| format!("Failed to serialize transcription: {}", e))?;
    let mut args = vec![script_path, "diarize_and_label", &transcription_json, audio_path];
    if let Some(token) = hf_token {
        args.push(token);
    } else {
        args.push("null");
    }
    let speakers_str;
    if let Some(speakers) = num_speakers {
        speakers_str = speakers.to_string();
        args.push(&speakers_str);
    } else {
        args.push("null");
    }
    args.push(if use_gpu { "true" } else { "false" });
    let output = Command::new(python_exe)
        .args(&args)
        .output()
        .map_err(|e| format!("Failed to run Python script: {}", e))?;
    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr);
        return Err(format!("Python script failed: {}", stderr));
    }
    let stdout = String::from_utf8_lossy(&output.stdout);
    let result: TranscriptionResult = serde_json::from_str(&stdout.trim())
        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
    Ok(result)
 }
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@ -1,6 +1,13 @@
 // --- Commands ---
 mod paths;
 mod transcription;
 mod video_editor;
 mod audio_cleaner;
 mod diarization;
 mod ai_provider;
 mod caption_generator;
 mod background_removal;
 /// Returns the backend URL. Stubbed for now; will be replaced once the
 /// Python/Rust backend is fully wired up.
@ -56,6 +63,162 @@ async fn transcribe_audio(file_path: String, model_name: String, language: Optio
    .map_err(|e| format!("Task error: {:?}", e))?
 }
 /// Export video using stream copy (fast, lossless)
 #[tauri::command]
 async fn export_stream_copy(input_path: String, output_path: String, keep_segments: serde_json::Value) -> Result<String, String> {
    tauri::async_runtime::spawn_blocking(move || {
        video_editor::export_stream_copy(&input_path, &output_path, &keep_segments)
    })
    .await
    .map_err(|e| format!("Task error: {:?}", e))?
 }
 /// Export video with re-encoding
 #[tauri::command]
 async fn export_reencode(input_path: String, output_path: String, keep_segments: serde_json::Value, resolution: String, format_hint: String) -> Result<String, String> {
    tauri::async_runtime::spawn_blocking(move || {
        video_editor::export_reencode(&input_path, &output_path, &keep_segments, &resolution, &format_hint)
    })
    .await
    .map_err(|e| format!("Task error: {:?}", e))?
 }
 /// Export video with re-encoding and subtitles
 #[tauri::command]
 async fn export_reencode_with_subs(input_path: String, output_path: String, keep_segments: serde_json::Value, subtitle_path: String, resolution: String, format_hint: String) -> Result<String, String> {
    tauri::async_runtime::spawn_blocking(move || {
        video_editor::export_reencode_with_subs(&input_path, &output_path, &keep_segments, &subtitle_path, &resolution, &format_hint)
    })
    .await
    .map_err(|e| format!("Task error: {:?}", e))?
 }
 /// Get video information
 #[tauri::command]
 async fn get_video_info(input_path: String) -> Result<video_editor::VideoInfo, String> {
    tauri::async_runtime::spawn_blocking(move || {
        video_editor::get_video_info(&input_path)
    })
    .await
    .map_err(|e| format!("Task error: {:?}", e))?
 }
 /// Clean audio using DeepFilterNet or FFmpeg fallback
 #[tauri::command]
 async fn clean_audio(input_path: String, output_path: String) -> Result<String, String> {
    tauri::async_runtime::spawn_blocking(move || {
        audio_cleaner::clean_audio(&input_path, &output_path)
    })
    .await
    .map_err(|e| format!("Task error: {:?}", e))?
 }
 /// Check if DeepFilterNet is available
 #[tauri::command]
 async fn is_deepfilter_available() -> Result<bool, String> {
    tauri::async_runtime::spawn_blocking(move || {
        audio_cleaner::is_deepfilter_available()
    })
    .await
    .map_err(|e| format!("Task error: {:?}", e))?
 }
 /// Apply speaker diarization to transcription result
 #[tauri::command]
 async fn diarize_and_label(transcription_result: diarization::TranscriptionResult, audio_path: String, hf_token: Option<String>, num_speakers: Option<u32>, use_gpu: Option<bool>) -> Result<diarization::TranscriptionResult, String> {
    let use_gpu = use_gpu.unwrap_or(true);
    tauri::async_runtime::spawn_blocking(move || {
        diarization::diarize_and_label(&transcription_result, &audio_path, hf_token.as_deref(), num_speakers, use_gpu)
    })
    .await
    .map_err(|e| format!("Task error: {:?}", e))?
 }
 /// Complete text using AI provider
 #[tauri::command]
 async fn ai_complete(prompt: String, provider: String, model: Option<String>, api_key: Option<String>, base_url: Option<String>, system_prompt: Option<String>, temperature: Option<f64>) -> Result<String, String> {
    let temperature = temperature.unwrap_or(0.3);
    tauri::async_runtime::spawn_blocking(move || {
        ai_provider::complete(&prompt, &provider, model.as_deref(), api_key.as_deref(), base_url.as_deref(), system_prompt.as_deref(), temperature)
    })
    .await
    .map_err(|e| format!("Task error: {:?}", e))?
 }
 /// List available Ollama models
 #[tauri::command]
 async fn list_ollama_models(base_url: Option<String>) -> Result<Vec<String>, String> {
    let base_url = base_url.unwrap_or_else(|| "http://localhost:11434".to_string());
    tauri::async_runtime::spawn_blocking(move || {
        ai_provider::list_ollama_models(&base_url)
    })
    .await
    .map_err(|e| format!("Task error: {:?}", e))?
 }
 /// Generate SRT caption content
 #[tauri::command]
 async fn generate_srt(words: Vec<caption_generator::Word>, deleted_indices: Option<std::collections::HashSet<usize>>, words_per_line: Option<usize>) -> Result<String, String> {
    let words_per_line = words_per_line.unwrap_or(8);
    tauri::async_runtime::spawn_blocking(move || {
        caption_generator::generate_srt(&words, deleted_indices.as_ref(), words_per_line)
    })
    .await
    .map_err(|e| format!("Task error: {:?}", e))?
 }
 /// Generate VTT caption content
 #[tauri::command]
 async fn generate_vtt(words: Vec<caption_generator::Word>, deleted_indices: Option<std::collections::HashSet<usize>>, words_per_line: Option<usize>) -> Result<String, String> {
    let words_per_line = words_per_line.unwrap_or(8);
    tauri::async_runtime::spawn_blocking(move || {
        caption_generator::generate_vtt(&words, deleted_indices.as_ref(), words_per_line)
    })
    .await
    .map_err(|e| format!("Task error: {:?}", e))?
 }
 /// Generate ASS subtitle content
 #[tauri::command]
 async fn generate_ass(words: Vec<caption_generator::Word>, deleted_indices: Option<std::collections::HashSet<usize>>, words_per_line: Option<usize>, style: Option<caption_generator::CaptionStyle>) -> Result<String, String> {
    let words_per_line = words_per_line.unwrap_or(8);
    tauri::async_runtime::spawn_blocking(move || {
        caption_generator::generate_ass(&words, deleted_indices.as_ref(), words_per_line, style.as_ref())
    })
    .await
    .map_err(|e| format!("Task error: {:?}", e))?
 }
 /// Save caption content to file
 #[tauri::command]
 async fn save_captions(content: String, output_path: String) -> Result<String, String> {
    tauri::async_runtime::spawn_blocking(move || {
        caption_generator::save_captions(&content, &output_path)
    })
    .await
    .map_err(|e| format!("Task error: {:?}", e))?
 }
 /// Check if background removal is available
 #[tauri::command]
 async fn is_background_removal_available() -> Result<bool, String> {
    tauri::async_runtime::spawn_blocking(move || {
        background_removal::is_available()
    })
    .await
    .map_err(|e| format!("Task error: {:?}", e))?
 }
 /// Remove background on export (placeholder for Phase 5)
 #[tauri::command]
 async fn remove_background_on_export(input_path: String, output_path: String, replacement: String, replacement_value: String) -> Result<String, String> {
    tauri::async_runtime::spawn_blocking(move || {
        background_removal::remove_background_on_export(&input_path, &output_path, &replacement, &replacement_value)
    })
    .await
    .map_err(|e| format!("Task error: {:?}", e))?
 }
 // --- App entry point ---
 #[cfg_attr(mobile, tauri::mobile_entry_point)]
@ -79,6 +242,21 @@ pub fn run() {
            decrypt_string,
            ensure_model,
            transcribe_audio,
            export_stream_copy,
            export_reencode,
            export_reencode_with_subs,
            get_video_info,
            clean_audio,
            is_deepfilter_available,
            diarize_and_label,
            ai_complete,
            list_ollama_models,
            generate_srt,
            generate_vtt,
            generate_ass,
            save_captions,
            is_background_removal_available,
            remove_background_on_export,
        ])
        .run(tauri::generate_context!())
        .expect("error while running tauri application");
--- a/src-tauri/src/paths.rs
+++ b/src-tauri/src/paths.rs
@ -0,0 +1,30 @@
 use std::path::PathBuf;
 /// Resolve the project root from the executable path.
 /// In dev mode, the binary lives at: <root>/src-tauri/target/debug/<bin>
 /// So the project root is 4 levels above the binary.
 pub fn project_root() -> PathBuf {
    let exe = std::env::current_exe().expect("Failed to get executable path");
    // exe -> debug/ -> target/ -> src-tauri/ -> root
    exe.parent()
        .and_then(|p| p.parent())
        .and_then(|p| p.parent())
        .and_then(|p| p.parent())
        .map(|p| p.to_path_buf())
        .unwrap_or_else(|| PathBuf::from("."))
 }
 /// Absolute path to the venv Python 3.10 interpreter.
 pub fn python_exe() -> PathBuf {
    project_root().join(".venv/bin/python3.10")
 }
 /// Absolute path to a script in the backend directory.
 pub fn backend_script(name: &str) -> PathBuf {
    project_root().join("backend").join(name)
 }
 /// Absolute path to a script at the project root.
 pub fn root_script(name: &str) -> PathBuf {
    project_root().join(name)
 }
--- a/src-tauri/src/transcription.rs
+++ b/src-tauri/src/transcription.rs
@ -1,6 +1,5 @@
 use std::fs;
 use std::process::Command;
-use whisper_rs::{WhisperContext, WhisperContextParameters, FullParams, SamplingStrategy};
+use serde_json;
 #[derive(serde::Serialize, serde::Deserialize, Clone, Debug)]
 pub struct TranscriptionResult {
@ -26,176 +25,46 @@ pub struct Segment {
    pub words: Vec<Word>,
 }
-/// Extract audio from a video/audio file to a 16kHz mono WAV using ffmpeg
+/// Transcribe audio file using Python faster-whisper
 fn extract_to_wav(input_path: &str, output_path: &str) -> Result<(), String> {
    let status = Command::new("ffmpeg")
        .args(["-y", "-i", input_path, "-vn", "-ar", "16000", "-ac", "1", "-f", "wav", output_path])
        .status()
        .map_err(|e| format!("Failed to run ffmpeg: {}", e))?;
    if !status.success() {
        return Err(format!("ffmpeg exited with code: {:?}", status.code()));
    }
    Ok(())
 }
 /// Transcribe audio file using whisper-rs (real Whisper.cpp inference)
 pub fn transcribe_audio(
    file_path: &str,
    model_name: &str,
    language: Option<&str>,
 ) -> Result<TranscriptionResult, String> {
-    // Ensure model is downloaded
+    // Path to Python venv and script
-    let model_path = ensure_model_downloaded(model_name)?;
+    let python_exe = crate::paths::python_exe();
    let python_exe = python_exe.to_str().unwrap_or_default();
    let script_path = crate::paths::root_script("transcribe.py");
    let script_path = script_path.to_str().unwrap_or_default();
-    // Extract audio to temp 16kHz mono WAV
+    // Build command args
-    let tmp_wav = tempfile::Builder::new()
+    let mut args = vec![script_path, file_path, model_name];
        .suffix(".wav")
        .tempfile()
        .map_err(|e| format!("Failed to create temp file: {}", e))?;
    let wav_path = tmp_wav.path().to_string_lossy().to_string();
    extract_to_wav(file_path, &wav_path)?;
    // Read WAV as f32 samples
    let mut reader = hound::WavReader::open(&wav_path)
        .map_err(|e| format!("Failed to read WAV: {}", e))?;
    let spec = reader.spec();
    let samples: Vec<f32> = match spec.sample_format {
        hound::SampleFormat::Int => reader
            .samples::<i16>()
            .map(|s| s.map(|v| v as f32 / 32768.0).map_err(|e| format!("{}", e)))
            .collect::<Result<Vec<f32>, _>>()?,
        hound::SampleFormat::Float => reader
            .samples::<f32>()
            .map(|s| s.map_err(|e| format!("{}", e)))
            .collect::<Result<Vec<f32>, _>>()?,
    };
    // Load Whisper model and transcribe
    let ctx_params = WhisperContextParameters::default();
    let ctx = WhisperContext::new_with_params(&model_path, ctx_params)
        .map_err(|e| format!("Failed to load model: {:?}", e))?;
    let mut state = ctx.create_state()
        .map_err(|e| format!("Failed to create state: {:?}", e))?;
    let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 1 });
    params.set_print_special(false);
    params.set_print_progress(false);
    params.set_print_realtime(false);
    params.set_print_timestamps(false);
    params.set_token_timestamps(true);
    params.set_single_segment(false);
    if let Some(lang) = language {
-        params.set_language(Some(lang));
+        args.push(lang);
    }
-    state.full(params, &samples)
+    // Run Python script
-        .map_err(|e| format!("Transcription failed: {:?}", e))?;
+    let output = Command::new(python_exe)
        .args(&args)
        .output()
        .map_err(|e| format!("Failed to run Python script: {}", e))?;
-    // Extract word-level results using the 0.16.0 iterator API
+    if !output.status.success() {
-    let mut all_words: Vec<Word> = Vec::new();
+        let stderr = String::from_utf8_lossy(&output.stderr);
-    let mut segments: Vec<Segment> = Vec::new();
+        return Err(format!("Python script failed: {}", stderr));
    let detected_language = language.unwrap_or("en").to_string();
    for (seg_idx, segment) in state.as_iter().enumerate() {
        let seg_text = segment.to_str_lossy()
            .map_err(|e| format!("Segment text error: {:?}", e))?;
        let seg_t0 = segment.start_timestamp() as f64 / 100.0;
        let seg_t1 = segment.end_timestamp() as f64 / 100.0;
        let mut seg_words: Vec<Word> = Vec::new();
        for tok_i in 0..segment.n_tokens() {
            if let Some(token) = segment.get_token(tok_i) {
                let token_text = match token.to_str_lossy() {
                    Ok(t) => t.into_owned(),
                    Err(_) => continue,
                };
                let token_data = token.token_data();
                // Skip special tokens
                let trimmed = token_text.trim();
                if trimmed.is_empty() || trimmed.starts_with('[') || trimmed.starts_with('<') {
                    continue;
    }
-                let word = Word {
+    // Parse JSON output
-                    word: trimmed.to_string(),
+    let stdout = String::from_utf8_lossy(&output.stdout);
-                    start: token_data.t0 as f64 / 100.0,
+    let result: TranscriptionResult = serde_json::from_str(&stdout.trim())
-                    end: token_data.t1 as f64 / 100.0,
+        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
-                    confidence: token_data.p as f64,
+
-                };
+    Ok(result)
                all_words.push(word.clone());
                seg_words.push(word);
            }
 }
-        segments.push(Segment {
+/// Ensure model is available (faster-whisper handles this automatically)
-            id: seg_idx,
+pub fn ensure_model_downloaded(_model_name: &str) -> Result<String, String> {
-            start: seg_t0,
+    // faster-whisper downloads models on first use, so just return success
-            end: seg_t1,
+    Ok("Model ready".to_string())
            text: seg_text.trim().to_string(),
            words: seg_words,
        });
    }
    Ok(TranscriptionResult {
        words: all_words,
        segments,
        language: detected_language,
    })
 }
 /// Download and cache Whisper model
 pub fn ensure_model_downloaded(model_name: &str) -> Result<String, String> {
    // Get app data directory for storing models
    let app_data_dir = dirs::data_dir()
        .ok_or("Could not find app data directory")?
        .join("TalkEdit")
        .join("models");
    // Create directory if it doesn't exist
    fs::create_dir_all(&app_data_dir)
        .map_err(|e| format!("Failed to create models directory: {}", e))?;
    let model_path = app_data_dir.join(format!("ggml-{}.bin", model_name));
    // Check if model already exists
    if model_path.exists() {
        return Ok(model_path.to_string_lossy().to_string());
    }
    // Only download smaller models automatically
    let allowed_models = ["tiny", "base", "small"];
    if !allowed_models.contains(&model_name) {
        return Err(format!("Model '{}' is not available for automatic download. Only tiny, base, and small models are supported.", model_name));
    }
    println!("Downloading Whisper model: {}...", model_name);
    // Download the model from ggerganov's whisper.cpp repo
    let url = format!("https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-{}.bin", model_name);
    let response = ureq::get(&url)
        .call()
        .map_err(|e| format!("Failed to download model: {}", e))?;
    let len = response
        .header("content-length")
        .and_then(|s| s.parse::<usize>().ok())
        .unwrap_or(0);
    println!("Model size: {} bytes", len);
    let mut reader = response.into_reader();
    let mut file = fs::File::create(&model_path)
        .map_err(|e| format!("Failed to create model file: {}", e))?;
    std::io::copy(&mut reader, &mut file)
        .map_err(|e| format!("Failed to write model file: {}", e))?;
    println!("Model downloaded successfully: {}", model_path.display());
    Ok(model_path.to_string_lossy().to_string())
 }
--- a/src-tauri/src/video_editor.rs
+++ b/src-tauri/src/video_editor.rs
@ -0,0 +1,138 @@
 use std::process::Command;
 use serde_json;
 use serde::{Deserialize, Serialize};
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct VideoInfo {
    pub duration: f64,
    pub size: u64,
    pub format: String,
    pub width: u32,
    pub height: u32,
    pub codec: String,
    pub fps: f64,
 }
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct ExportResult {
    pub output_path: String,
 }
 /// Export video using stream copy (fast, lossless)
 pub fn export_stream_copy(
    input_path: &str,
    output_path: &str,
    keep_segments: &serde_json::Value,
 ) -> Result<String, String> {
    let python_exe = crate::paths::python_exe();
    let python_exe = python_exe.to_str().unwrap_or_default();
    let script_path = crate::paths::backend_script("video_editor.py");
    let script_path = script_path.to_str().unwrap_or_default();
    let keep_segments_str = keep_segments.to_string();
    let output = Command::new(python_exe)
        .args(&[script_path, "export_stream_copy", input_path, output_path, &keep_segments_str])
        .output()
        .map_err(|e| format!("Failed to run Python script: {}", e))?;
    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr);
        return Err(format!("Python script failed: {}", stderr));
    }
    let stdout = String::from_utf8_lossy(&output.stdout);
    let result: ExportResult = serde_json::from_str(&stdout.trim())
        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
    Ok(result.output_path)
 }
 /// Export video with re-encoding
 pub fn export_reencode(
    input_path: &str,
    output_path: &str,
    keep_segments: &serde_json::Value,
    resolution: &str,
    format_hint: &str,
 ) -> Result<String, String> {
    let python_exe = crate::paths::python_exe();
    let python_exe = python_exe.to_str().unwrap_or_default();
    let script_path = crate::paths::backend_script("video_editor.py");
    let script_path = script_path.to_str().unwrap_or_default();
    let keep_segments_str = keep_segments.to_string();
    let output = Command::new(python_exe)
        .args(&[script_path, "export_reencode", input_path, output_path, &keep_segments_str, resolution, format_hint])
        .output()
        .map_err(|e| format!("Failed to run Python script: {}", e))?;
    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr);
        return Err(format!("Python script failed: {}", stderr));
    }
    let stdout = String::from_utf8_lossy(&output.stdout);
    let result: ExportResult = serde_json::from_str(&stdout.trim())
        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
    Ok(result.output_path)
 }
 /// Export video with re-encoding and subtitles
 pub fn export_reencode_with_subs(
    input_path: &str,
    output_path: &str,
    keep_segments: &serde_json::Value,
    subtitle_path: &str,
    resolution: &str,
    format_hint: &str,
 ) -> Result<String, String> {
    let python_exe = crate::paths::python_exe();
    let python_exe = python_exe.to_str().unwrap_or_default();
    let script_path = crate::paths::backend_script("video_editor.py");
    let script_path = script_path.to_str().unwrap_or_default();
    let keep_segments_str = keep_segments.to_string();
    let output = Command::new(python_exe)
        .args(&[script_path, "export_reencode_with_subs", input_path, output_path, &keep_segments_str, subtitle_path, resolution, format_hint])
        .output()
        .map_err(|e| format!("Failed to run Python script: {}", e))?;
    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr);
        return Err(format!("Python script failed: {}", stderr));
    }
    let stdout = String::from_utf8_lossy(&output.stdout);
    let result: ExportResult = serde_json::from_str(&stdout.trim())
        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
    Ok(result.output_path)
 }
 /// Get video information
 pub fn get_video_info(input_path: &str) -> Result<VideoInfo, String> {
    let python_exe = crate::paths::python_exe();
    let python_exe = python_exe.to_str().unwrap_or_default();
    let script_path = crate::paths::backend_script("video_editor.py");
    let script_path = script_path.to_str().unwrap_or_default();
    let output = Command::new(python_exe)
        .args(&[script_path, "get_video_info", input_path])
        .output()
        .map_err(|e| format!("Failed to run Python script: {}", e))?;
    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr);
        return Err(format!("Python script failed: {}", stderr));
    }
    let stdout = String::from_utf8_lossy(&output.stdout);
    let result: VideoInfo = serde_json::from_str(&stdout.trim())
        .map_err(|e| format!("Failed to parse JSON: {}", e))?;
    Ok(result)
 }
--- a/test_api.py
+++ b/test_api.py
@ -0,0 +1,130 @@
 #!/home/dillon/_code/TalkEdit/.venv/bin/python3.10
 """
 Test script for the TalkEdit API.
 This script tests the new Tauri commands that expose all backend functions.
 """
 import json
 import sys
 import os
 from pathlib import Path
 # Add backend to path for direct testing
 sys.path.insert(0, str(Path(__file__).parent / "backend"))
 def test_video_info():
    """Test get_video_info function"""
    from services.video_editor import get_video_info
    # Use a test video file if available
    test_video = "/path/to/test/video.mp4"  # Replace with actual test file
    if os.path.exists(test_video):
        try:
            info = get_video_info(test_video)
            print("✓ Video info test passed")
            print(f"  Duration: {info['duration']}")
            print(f"  Resolution: {info['width']}x{info['height']}")
            return True
        except Exception as e:
            print(f"✗ Video info test failed: {e}")
            return False
    else:
        print("⚠ Video info test skipped (no test file)")
        return True
 def test_caption_generation():
    """Test caption generation functions"""
    from services.caption_generator import generate_srt, generate_vtt
    # Sample word data
    words = [
        {"word": "Hello", "start": 0.0, "end": 0.5, "confidence": 0.9},
        {"word": "world", "start": 0.5, "end": 1.0, "confidence": 0.95},
        {"word": "this", "start": 1.0, "end": 1.3, "confidence": 0.8},
        {"word": "is", "start": 1.3, "end": 1.5, "confidence": 0.9},
        {"word": "a", "start": 1.5, "end": 1.6, "confidence": 0.85},
        {"word": "test", "start": 1.6, "end": 2.0, "confidence": 0.95},
    ]
    try:
        srt_content = generate_srt(words)
        vtt_content = generate_vtt(words)
        if "Hello world" in srt_content and "WEBVTT" in vtt_content:
            print("✓ Caption generation test passed")
            return True
        else:
            print("✗ Caption generation test failed: unexpected content")
            return False
    except Exception as e:
        print(f"✗ Caption generation test failed: {e}")
        return False
 def test_ai_provider():
    """Test AI provider functions"""
    from services.ai_provider import AIProvider
    try:
        # Test listing Ollama models (may fail if Ollama not running)
        models = AIProvider.list_ollama_models()
        print(f"✓ AI provider test passed (found {len(models)} models)")
        return True
    except Exception as e:
        print(f"⚠ AI provider test skipped: {e}")
        return True
 def test_deepfilter_status():
    """Test DeepFilterNet availability check"""
    from services.audio_cleaner import is_deepfilter_available
    try:
        available = is_deepfilter_available()
        print(f"✓ DeepFilter status test passed (available: {available})")
        return True
    except Exception as e:
        print(f"✗ DeepFilter status test failed: {e}")
        return False
 def main():
    print("Testing TalkEdit API functions...")
    print("=" * 50)
    tests = [
        ("Video Info", test_video_info),
        ("Caption Generation", test_caption_generation),
        ("AI Provider", test_ai_provider),
        ("DeepFilter Status", test_deepfilter_status),
    ]
    passed = 0
    total = len(tests)
    for name, test_func in tests:
        print(f"\nTesting {name}:")
        if test_func():
            passed += 1
    print("\n" + "=" * 50)
    print(f"Results: {passed}/{total} tests passed")
    if passed == total:
        print("🎉 All tests passed! The API is ready for AI testing.")
    else:
        print("⚠️  Some tests failed. Check the output above.")
    print("\nAvailable Tauri Commands:")
    commands = [
        "transcribe_audio",
        "export_stream_copy", "export_reencode", "export_reencode_with_subs", "get_video_info",
        "clean_audio", "is_deepfilter_available",
        "diarize_and_label",
        "ai_complete", "list_ollama_models",
        "generate_srt", "generate_vtt", "generate_ass", "save_captions",
        "is_background_removal_available", "remove_background_on_export",
    ]
    for cmd in commands:
        print(f"  - {cmd}")
 if __name__ == "__main__":
    main()
--- a/transcribe.py
+++ b/transcribe.py
@ -0,0 +1,91 @@
 #!/usr/bin/env python3
 import sys
 import json
 import tempfile
 import subprocess
 from faster_whisper import WhisperModel
 def extract_audio(input_path, output_path):
    """Extract audio from video/audio file to 16kHz mono WAV"""
    cmd = [
        'ffmpeg', '-y', '-i', input_path, '-vn', '-ar', '16000', '-ac', '1', '-f', 'wav', output_path
    ]
    subprocess.run(cmd, check=True)
 def main():
    if len(sys.argv) < 3:
        print("Usage: python transcribe.py <audio_file> <model_name> [language]", file=sys.stderr)
        sys.exit(1)
    audio_file = sys.argv[1]
    model_name = sys.argv[2]
    language = sys.argv[3] if len(sys.argv) > 3 else None
    # Extract audio to temp WAV if needed
    with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
        wav_path = tmp.name
    try:
        extract_audio(audio_file, wav_path)
        # Load model - use GPU if CUDA is available, else CPU with int8
        import ctypes
        try:
            ctypes.CDLL("libcublas.so.12")
            device = "cuda"
            compute_type = "float16"
        except OSError:
            device = "cpu"
            compute_type = "int8"
        model = WhisperModel(model_name, device=device, compute_type=compute_type)
        # Transcribe
        segments, info = model.transcribe(
            wav_path,
            language=language,
            beam_size=5,
            word_timestamps=True,
            vad_filter=True,
            vad_parameters=dict(threshold=0.5, min_speech_duration_ms=250),
            without_timestamps=False
        )
        # Convert to our format
        words = []
        segments_list = []
        for segment in segments:
            seg_words = []
            for word in segment.words:
                w = {
                    "word": word.word,
                    "start": word.start,
                    "end": word.end,
                    "confidence": word.probability
                }
                words.append(w)
                seg_words.append(w)
            segments_list.append({
                "id": len(segments_list),
                "start": segment.start,
                "end": segment.end,
                "text": segment.text,
                "words": seg_words
            })
        result = {
            "words": words,
            "segments": segments_list,
            "language": info.language
        }
        print(json.dumps(result))
    finally:
        import os
        os.unlink(wav_path)
 if __name__ == "__main__":
    main()