diff --git a/backend/ai_provider.py b/backend/ai_provider.py new file mode 100644 index 0000000..24e2c57 --- /dev/null +++ b/backend/ai_provider.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 +""" +AI provider interface for Ollama, OpenAI, and Claude. +""" + +import json +import sys +from pathlib import Path + +# Add backend to path +sys.path.insert(0, str(Path(__file__).parent)) + +from services.ai_provider import AIProvider + + +def main(): + if len(sys.argv) < 2: + print("Usage: python ai_provider.py [args...]", file=sys.stderr) + sys.exit(1) + + command = sys.argv[1] + + try: + if command == "complete": + if len(sys.argv) < 4: + print("Usage: python ai_provider.py complete [model] [api_key] [base_url] [system_prompt] [temperature]", file=sys.stderr) + sys.exit(1) + prompt = sys.argv[2] + provider = sys.argv[3] + model = sys.argv[4] if len(sys.argv) > 4 else None + api_key = sys.argv[5] if len(sys.argv) > 5 else None + base_url = sys.argv[6] if len(sys.argv) > 6 else None + system_prompt = sys.argv[7] if len(sys.argv) > 7 else None + temperature = float(sys.argv[8]) if len(sys.argv) > 8 else 0.3 + + result = AIProvider.complete(prompt, provider, model, api_key, base_url, system_prompt, temperature) + print(json.dumps({"response": result})) + + elif command == "list_ollama_models": + base_url = sys.argv[2] if len(sys.argv) > 2 else "http://localhost:11434" + result = AIProvider.list_ollama_models(base_url) + print(json.dumps({"models": result})) + + else: + print(f"Unknown command: {command}", file=sys.stderr) + sys.exit(1) + + except Exception as e: + print(json.dumps({"error": str(e)}), file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/backend/audio_cleaner.py b/backend/audio_cleaner.py new file mode 100644 index 0000000..451bbb9 --- /dev/null +++ b/backend/audio_cleaner.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +""" +Audio cleaning operations using DeepFilterNet or FFmpeg fallback. +""" + +import json +import sys +from pathlib import Path + +# Add backend to path +sys.path.insert(0, str(Path(__file__).parent)) + +from services.audio_cleaner import clean_audio, is_deepfilter_available + + +def main(): + if len(sys.argv) < 2: + print("Usage: python audio_cleaner.py [args...]", file=sys.stderr) + sys.exit(1) + + command = sys.argv[1] + + try: + if command == "clean_audio": + if len(sys.argv) != 4: + print("Usage: python audio_cleaner.py clean_audio ", file=sys.stderr) + sys.exit(1) + input_path = sys.argv[2] + output_path = sys.argv[3] + result = clean_audio(input_path, output_path) + print(json.dumps({"output_path": result})) + + elif command == "is_deepfilter_available": + result = is_deepfilter_available() + print(json.dumps({"available": result})) + + else: + print(f"Unknown command: {command}", file=sys.stderr) + sys.exit(1) + + except Exception as e: + print(json.dumps({"error": str(e)}), file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/backend/background_removal.py b/backend/background_removal.py new file mode 100644 index 0000000..f3263c2 --- /dev/null +++ b/backend/background_removal.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 +""" +Background removal operations (placeholder for Phase 5). +""" + +import json +import sys +from pathlib import Path + +# Add backend to path +sys.path.insert(0, str(Path(__file__).parent)) + +from services.background_removal import is_available, remove_background_on_export + + +def main(): + if len(sys.argv) < 2: + print("Usage: python background_removal.py [args...]", file=sys.stderr) + sys.exit(1) + + command = sys.argv[1] + + try: + if command == "is_available": + result = is_available() + print(json.dumps({"available": result})) + + elif command == "remove_background_on_export": + if len(sys.argv) != 6: + print("Usage: python background_removal.py remove_background_on_export ", file=sys.stderr) + sys.exit(1) + input_path = sys.argv[2] + output_path = sys.argv[3] + replacement = sys.argv[4] + replacement_value = sys.argv[5] + + result = remove_background_on_export(input_path, output_path, replacement, replacement_value) + print(json.dumps({"output_path": result})) + + else: + print(f"Unknown command: {command}", file=sys.stderr) + sys.exit(1) + + except Exception as e: + print(json.dumps({"error": str(e)}), file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/backend/caption_generator.py b/backend/caption_generator.py new file mode 100644 index 0000000..d3b4b83 --- /dev/null +++ b/backend/caption_generator.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +""" +Generate caption files from word-level timestamps. +""" + +import json +import sys +from pathlib import Path + +# Add backend to path +sys.path.insert(0, str(Path(__file__).parent)) + +from services.caption_generator import generate_srt, generate_vtt, generate_ass, save_captions + + +def main(): + if len(sys.argv) < 2: + print("Usage: python caption_generator.py [args...]", file=sys.stderr) + sys.exit(1) + + command = sys.argv[1] + + try: + if command == "generate_srt": + if len(sys.argv) < 4: + print("Usage: python caption_generator.py generate_srt [deleted_indices_json] [words_per_line]", file=sys.stderr) + sys.exit(1) + words = json.loads(sys.argv[2]) + deleted_indices = set(json.loads(sys.argv[3])) if len(sys.argv) > 3 and sys.argv[3] != "null" else None + words_per_line = int(sys.argv[4]) if len(sys.argv) > 4 else 8 + + result = generate_srt(words, deleted_indices, words_per_line) + print(json.dumps({"content": result})) + + elif command == "generate_vtt": + if len(sys.argv) < 4: + print("Usage: python caption_generator.py generate_vtt [deleted_indices_json] [words_per_line]", file=sys.stderr) + sys.exit(1) + words = json.loads(sys.argv[2]) + deleted_indices = set(json.loads(sys.argv[3])) if len(sys.argv) > 3 and sys.argv[3] != "null" else None + words_per_line = int(sys.argv[4]) if len(sys.argv) > 4 else 8 + + result = generate_vtt(words, deleted_indices, words_per_line) + print(json.dumps({"content": result})) + + elif command == "generate_ass": + if len(sys.argv) < 4: + print("Usage: python caption_generator.py generate_ass [deleted_indices_json] [words_per_line] [style_json]", file=sys.stderr) + sys.exit(1) + words = json.loads(sys.argv[2]) + deleted_indices = set(json.loads(sys.argv[3])) if len(sys.argv) > 3 and sys.argv[3] != "null" else None + words_per_line = int(sys.argv[4]) if len(sys.argv) > 4 else 8 + style = json.loads(sys.argv[5]) if len(sys.argv) > 5 and sys.argv[5] != "null" else None + + result = generate_ass(words, deleted_indices, words_per_line, style) + print(json.dumps({"content": result})) + + elif command == "save_captions": + if len(sys.argv) != 4: + print("Usage: python caption_generator.py save_captions ", file=sys.stderr) + sys.exit(1) + content = sys.argv[2] + output_path = sys.argv[3] + + result = save_captions(content, output_path) + print(json.dumps({"output_path": result})) + + else: + print(f"Unknown command: {command}", file=sys.stderr) + sys.exit(1) + + except Exception as e: + print(json.dumps({"error": str(e)}), file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/backend/diarization.py b/backend/diarization.py new file mode 100644 index 0000000..5541908 --- /dev/null +++ b/backend/diarization.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +""" +Speaker diarization using pyannote.audio. +""" + +import json +import sys +from pathlib import Path + +# Add backend to path +sys.path.insert(0, str(Path(__file__).parent)) + +from services.diarization import diarize_and_label + + +def main(): + if len(sys.argv) < 2: + print("Usage: python diarization.py [args...]", file=sys.stderr) + sys.exit(1) + + command = sys.argv[1] + + try: + if command == "diarize_and_label": + if len(sys.argv) < 4: + print("Usage: python diarization.py diarize_and_label [hf_token] [num_speakers] [use_gpu]", file=sys.stderr) + sys.exit(1) + transcription_result = json.loads(sys.argv[2]) + audio_path = sys.argv[3] + hf_token = sys.argv[4] if len(sys.argv) > 4 else None + num_speakers = int(sys.argv[5]) if len(sys.argv) > 5 and sys.argv[5] != "null" else None + use_gpu = sys.argv[6].lower() == "true" if len(sys.argv) > 6 else True + + result = diarize_and_label(transcription_result, audio_path, hf_token, num_speakers, use_gpu) + print(json.dumps(result)) + + else: + print(f"Unknown command: {command}", file=sys.stderr) + sys.exit(1) + + except Exception as e: + print(json.dumps({"error": str(e)}), file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/backend/video_editor.py b/backend/video_editor.py new file mode 100644 index 0000000..aa84617 --- /dev/null +++ b/backend/video_editor.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 +""" +Video editing operations using FFmpeg. +""" + +import json +import sys +from pathlib import Path + +# Add backend to path +sys.path.insert(0, str(Path(__file__).parent)) + +from services.video_editor import export_stream_copy, export_reencode, export_reencode_with_subs, get_video_info + + +def main(): + if len(sys.argv) < 2: + print("Usage: python video_editor.py [args...]", file=sys.stderr) + sys.exit(1) + + command = sys.argv[1] + + try: + if command == "export_stream_copy": + if len(sys.argv) != 5: + print("Usage: python video_editor.py export_stream_copy ", file=sys.stderr) + sys.exit(1) + input_path = sys.argv[2] + output_path = sys.argv[3] + keep_segments = json.loads(sys.argv[4]) + result = export_stream_copy(input_path, output_path, keep_segments) + print(json.dumps({"output_path": result})) + + elif command == "export_reencode": + if len(sys.argv) != 7: + print("Usage: python video_editor.py export_reencode ", file=sys.stderr) + sys.exit(1) + input_path = sys.argv[2] + output_path = sys.argv[3] + keep_segments = json.loads(sys.argv[4]) + resolution = sys.argv[5] + format_hint = sys.argv[6] + result = export_reencode(input_path, output_path, keep_segments, resolution, format_hint) + print(json.dumps({"output_path": result})) + + elif command == "export_reencode_with_subs": + if len(sys.argv) != 8: + print("Usage: python video_editor.py export_reencode_with_subs ", file=sys.stderr) + sys.exit(1) + input_path = sys.argv[2] + output_path = sys.argv[3] + keep_segments = json.loads(sys.argv[4]) + subtitle_path = sys.argv[5] + resolution = sys.argv[6] + format_hint = sys.argv[7] + result = export_reencode_with_subs(input_path, output_path, keep_segments, subtitle_path, resolution, format_hint) + print(json.dumps({"output_path": result})) + + elif command == "get_video_info": + if len(sys.argv) != 3: + print("Usage: python video_editor.py get_video_info ", file=sys.stderr) + sys.exit(1) + input_path = sys.argv[2] + result = get_video_info(input_path) + print(json.dumps(result)) + + else: + print(f"Unknown command: {command}", file=sys.stderr) + sys.exit(1) + + except Exception as e: + print(json.dumps({"error": str(e)}), file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/plan.md b/plan.md index 6b1b6d5..4f0d3b1 100644 --- a/plan.md +++ b/plan.md @@ -60,6 +60,9 @@ Focus on what creators need for spoken content: No multi-track, voice cloning, or collaboration—keep it simple. +## 4. Notes +- Consider adding Parakeet TDT as a transcription option in the future for users who want alternatives to Whisper. + ## 5. Monetization Model - **Free Forever**: Core editing/transcription (unlimited local use). - **Pro License** ($29–49 one-time): Batch processing, high-quality voices (if adding TTS), custom presets, priority support. diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock index d9226fb..38f25ab 100644 --- a/src-tauri/Cargo.lock +++ b/src-tauri/Cargo.lock @@ -91,7 +91,6 @@ dependencies = [ "tauri-plugin-log", "tempfile", "ureq", - "whisper-rs", ] [[package]] @@ -147,26 +146,6 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" -[[package]] -name = "bindgen" -version = "0.72.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" -dependencies = [ - "bitflags 2.11.0", - "cexpr", - "clang-sys", - "itertools", - "log", - "prettyplease", - "proc-macro2", - "quote", - "regex", - "rustc-hash", - "shlex", - "syn 2.0.117", -] - [[package]] name = "bit-set" version = "0.8.0" @@ -416,15 +395,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" -[[package]] -name = "cexpr" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" -dependencies = [ - "nom", -] - [[package]] name = "cfb" version = "0.7.3" @@ -470,26 +440,6 @@ dependencies = [ "windows-link 0.2.1", ] -[[package]] -name = "clang-sys" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" -dependencies = [ - "glob", - "libc", - "libloading 0.8.9", -] - -[[package]] -name = "cmake" -version = "0.1.57" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d" -dependencies = [ - "cc", -] - [[package]] name = "combine" version = "4.6.7" @@ -876,12 +826,6 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" -[[package]] -name = "either" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" - [[package]] name = "embed-resource" version = "3.0.8" @@ -1043,12 +987,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "fs_extra" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" - [[package]] name = "funty" version = "2.0.0" @@ -1798,15 +1736,6 @@ dependencies = [ "serde", ] -[[package]] -name = "itertools" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" -dependencies = [ - "either", -] - [[package]] name = "itoa" version = "1.0.18" @@ -1961,7 +1890,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e9ec52138abedcc58dc17a7c6c0c00a2bdb4f3427c7f63fa97fd0d859155caf" dependencies = [ "gtk-sys", - "libloading 0.7.4", + "libloading", "once_cell", ] @@ -1981,16 +1910,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "libloading" -version = "0.8.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" -dependencies = [ - "cfg-if", - "windows-link 0.2.1", -] - [[package]] name = "libredox" version = "0.1.15" @@ -2099,12 +2018,6 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - [[package]] name = "miniz_oxide" version = "0.8.9" @@ -2189,16 +2102,6 @@ version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" -[[package]] -name = "nom" -version = "7.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" -dependencies = [ - "memchr", - "minimal-lexical", -] - [[package]] name = "num-conv" version = "0.2.1" @@ -4892,28 +4795,6 @@ dependencies = [ "windows-core 0.61.2", ] -[[package]] -name = "whisper-rs" -version = "0.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2088172d00f936c348d6a72f488dc2660ab3f507263a195df308a3c2383229f6" -dependencies = [ - "whisper-rs-sys", -] - -[[package]] -name = "whisper-rs-sys" -version = "0.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6986c0fe081241d391f09b9a071fbcbb59720c3563628c3c829057cf69f2a56f" -dependencies = [ - "bindgen", - "cfg-if", - "cmake", - "fs_extra", - "semver", -] - [[package]] name = "winapi" version = "0.3.9" diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index f5fd11a..abe40d9 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -27,6 +27,5 @@ tauri-plugin-fs = "2" tauri-plugin-log = "2" dirs = "5.0" ureq = "2.9" -whisper-rs = "0.16.0" hound = "3.5" tempfile = "3.10" diff --git a/src-tauri/src/ai_provider.rs b/src-tauri/src/ai_provider.rs new file mode 100644 index 0000000..1371493 --- /dev/null +++ b/src-tauri/src/ai_provider.rs @@ -0,0 +1,98 @@ +use std::process::Command; +use serde_json; +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct AICompleteResult { + pub response: String, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct OllamaModelsResult { + pub models: Vec, +} + +/// Complete text using AI provider +pub fn complete( + prompt: &str, + provider: &str, + model: Option<&str>, + api_key: Option<&str>, + base_url: Option<&str>, + system_prompt: Option<&str>, + temperature: f64, +) -> Result { + let python_exe = crate::paths::python_exe(); + let python_exe = python_exe.to_str().unwrap_or_default(); + let script_path = crate::paths::backend_script("ai_provider.py"); + let script_path = script_path.to_str().unwrap_or_default(); + + let mut args = vec![script_path, "complete", prompt, provider]; + + if let Some(m) = model { + args.push(m); + } else { + args.push("null"); + } + + if let Some(key) = api_key { + args.push(key); + } else { + args.push("null"); + } + + if let Some(url) = base_url { + args.push(url); + } else { + args.push("null"); + } + + if let Some(sys) = system_prompt { + args.push(sys); + } else { + args.push("null"); + } + + let temp_str = temperature.to_string(); + args.push(&temp_str); + + let output = Command::new(python_exe) + .args(&args) + .output() + .map_err(|e| format!("Failed to run Python script: {}", e))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(format!("Python script failed: {}", stderr)); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let result: AICompleteResult = serde_json::from_str(&stdout.trim()) + .map_err(|e| format!("Failed to parse JSON: {}", e))?; + + Ok(result.response) +} + +/// List available Ollama models +pub fn list_ollama_models(base_url: &str) -> Result, String> { + let python_exe = crate::paths::python_exe(); + let python_exe = python_exe.to_str().unwrap_or_default(); + let script_path = crate::paths::backend_script("ai_provider.py"); + let script_path = script_path.to_str().unwrap_or_default(); + + let output = Command::new(python_exe) + .args(&[script_path, "list_ollama_models", base_url]) + .output() + .map_err(|e| format!("Failed to run Python script: {}", e))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(format!("Python script failed: {}", stderr)); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let result: OllamaModelsResult = serde_json::from_str(&stdout.trim()) + .map_err(|e| format!("Failed to parse JSON: {}", e))?; + + Ok(result.models) +} \ No newline at end of file diff --git a/src-tauri/src/audio_cleaner.rs b/src-tauri/src/audio_cleaner.rs new file mode 100644 index 0000000..6a60d69 --- /dev/null +++ b/src-tauri/src/audio_cleaner.rs @@ -0,0 +1,61 @@ +use std::process::Command; +use serde_json; +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct CleanAudioResult { + pub output_path: String, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct DeepFilterStatus { + pub available: bool, +} + +/// Clean audio using DeepFilterNet or FFmpeg fallback +pub fn clean_audio(input_path: &str, output_path: &str) -> Result { + let python_exe = crate::paths::python_exe(); + let python_exe = python_exe.to_str().unwrap_or_default(); + let script_path = crate::paths::backend_script("audio_cleaner.py"); + let script_path = script_path.to_str().unwrap_or_default(); + + let output = Command::new(python_exe) + .args(&[script_path, "clean_audio", input_path, output_path]) + .output() + .map_err(|e| format!("Failed to run Python script: {}", e))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(format!("Python script failed: {}", stderr)); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let result: CleanAudioResult = serde_json::from_str(&stdout.trim()) + .map_err(|e| format!("Failed to parse JSON: {}", e))?; + + Ok(result.output_path) +} + +/// Check if DeepFilterNet is available +pub fn is_deepfilter_available() -> Result { + let python_exe = crate::paths::python_exe(); + let python_exe = python_exe.to_str().unwrap_or_default(); + let script_path = crate::paths::backend_script("audio_cleaner.py"); + let script_path = script_path.to_str().unwrap_or_default(); + + let output = Command::new(python_exe) + .args(&[script_path, "is_deepfilter_available"]) + .output() + .map_err(|e| format!("Failed to run Python script: {}", e))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(format!("Python script failed: {}", stderr)); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let result: DeepFilterStatus = serde_json::from_str(&stdout.trim()) + .map_err(|e| format!("Failed to parse JSON: {}", e))?; + + Ok(result.available) +} \ No newline at end of file diff --git a/src-tauri/src/background_removal.rs b/src-tauri/src/background_removal.rs new file mode 100644 index 0000000..c970820 --- /dev/null +++ b/src-tauri/src/background_removal.rs @@ -0,0 +1,66 @@ +use std::process::Command; +use serde_json; +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct BackgroundRemovalStatus { + pub available: bool, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct BackgroundRemovalResult { + pub output_path: String, +} + +/// Check if background removal is available +pub fn is_available() -> Result { + let python_exe = crate::paths::python_exe(); + let python_exe = python_exe.to_str().unwrap_or_default(); + let script_path = crate::paths::backend_script("background_removal.py"); + let script_path = script_path.to_str().unwrap_or_default(); + + let output = Command::new(python_exe) + .args(&[script_path, "is_available"]) + .output() + .map_err(|e| format!("Failed to run Python script: {}", e))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(format!("Python script failed: {}", stderr)); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let result: BackgroundRemovalStatus = serde_json::from_str(&stdout.trim()) + .map_err(|e| format!("Failed to parse JSON: {}", e))?; + + Ok(result.available) +} + +/// Remove background on export (placeholder for Phase 5) +pub fn remove_background_on_export( + input_path: &str, + output_path: &str, + replacement: &str, + replacement_value: &str, +) -> Result { + let python_exe = crate::paths::python_exe(); + let python_exe = python_exe.to_str().unwrap_or_default(); + let script_path = crate::paths::backend_script("background_removal.py"); + let script_path = script_path.to_str().unwrap_or_default(); + + let output = Command::new(python_exe) + .args(&[script_path, "remove_background_on_export", input_path, output_path, replacement, replacement_value]) + .output() + .map_err(|e| format!("Failed to run Python script: {}", e))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(format!("Python script failed: {}", stderr)); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let result: BackgroundRemovalResult = serde_json::from_str(&stdout.trim()) + .map_err(|e| format!("Failed to parse JSON: {}", e))?; + + Ok(result.output_path) +} \ No newline at end of file diff --git a/src-tauri/src/caption_generator.rs b/src-tauri/src/caption_generator.rs new file mode 100644 index 0000000..3e07a2a --- /dev/null +++ b/src-tauri/src/caption_generator.rs @@ -0,0 +1,177 @@ +use std::process::Command; +use serde_json; +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct Word { + pub word: String, + pub start: f64, + pub end: f64, + pub confidence: f64, + #[serde(skip_serializing_if = "Option::is_none")] + pub speaker: Option, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct CaptionStyle { + #[serde(skip_serializing_if = "Option::is_none")] + pub font_name: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub font_size: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub font_color: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub bold: Option, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct CaptionContent { + pub content: String, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct SaveCaptionsResult { + pub output_path: String, +} + +/// Generate SRT caption content +pub fn generate_srt( + words: &[Word], + deleted_indices: Option<&std::collections::HashSet>, + words_per_line: usize, +) -> Result { + let python_exe = crate::paths::python_exe(); + let python_exe = python_exe.to_str().unwrap_or_default(); + let script_path = crate::paths::backend_script("caption_generator.py"); + let script_path = script_path.to_str().unwrap_or_default(); + + let words_json = serde_json::to_string(words) + .map_err(|e| format!("Failed to serialize words: {}", e))?; + + let deleted_json = match deleted_indices { + Some(indices) => serde_json::to_string(indices) + .map_err(|e| format!("Failed to serialize deleted indices: {}", e))?, + None => "null".to_string(), + }; + + let output = Command::new(python_exe) + .args(&[script_path, "generate_srt", &words_json, &deleted_json, &words_per_line.to_string()]) + .output() + .map_err(|e| format!("Failed to run Python script: {}", e))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(format!("Python script failed: {}", stderr)); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let result: CaptionContent = serde_json::from_str(&stdout.trim()) + .map_err(|e| format!("Failed to parse JSON: {}", e))?; + + Ok(result.content) +} + +/// Generate VTT caption content +pub fn generate_vtt( + words: &[Word], + deleted_indices: Option<&std::collections::HashSet>, + words_per_line: usize, +) -> Result { + let python_exe = crate::paths::python_exe(); + let python_exe = python_exe.to_str().unwrap_or_default(); + let script_path = crate::paths::backend_script("caption_generator.py"); + let script_path = script_path.to_str().unwrap_or_default(); + + let words_json = serde_json::to_string(words) + .map_err(|e| format!("Failed to serialize words: {}", e))?; + + let deleted_json = match deleted_indices { + Some(indices) => serde_json::to_string(indices) + .map_err(|e| format!("Failed to serialize deleted indices: {}", e))?, + None => "null".to_string(), + }; + + let output = Command::new(python_exe) + .args(&[script_path, "generate_vtt", &words_json, &deleted_json, &words_per_line.to_string()]) + .output() + .map_err(|e| format!("Failed to run Python script: {}", e))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(format!("Python script failed: {}", stderr)); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let result: CaptionContent = serde_json::from_str(&stdout.trim()) + .map_err(|e| format!("Failed to parse JSON: {}", e))?; + + Ok(result.content) +} + +/// Generate ASS subtitle content +pub fn generate_ass( + words: &[Word], + deleted_indices: Option<&std::collections::HashSet>, + words_per_line: usize, + style: Option<&CaptionStyle>, +) -> Result { + let python_exe = crate::paths::python_exe(); + let python_exe = python_exe.to_str().unwrap_or_default(); + let script_path = crate::paths::backend_script("caption_generator.py"); + let script_path = script_path.to_str().unwrap_or_default(); + + let words_json = serde_json::to_string(words) + .map_err(|e| format!("Failed to serialize words: {}", e))?; + + let deleted_json = match deleted_indices { + Some(indices) => serde_json::to_string(indices) + .map_err(|e| format!("Failed to serialize deleted indices: {}", e))?, + None => "null".to_string(), + }; + + let style_json = match style { + Some(s) => serde_json::to_string(s) + .map_err(|e| format!("Failed to serialize style: {}", e))?, + None => "null".to_string(), + }; + + let output = Command::new(python_exe) + .args(&[script_path, "generate_ass", &words_json, &deleted_json, &words_per_line.to_string(), &style_json]) + .output() + .map_err(|e| format!("Failed to run Python script: {}", e))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(format!("Python script failed: {}", stderr)); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let result: CaptionContent = serde_json::from_str(&stdout.trim()) + .map_err(|e| format!("Failed to parse JSON: {}", e))?; + + Ok(result.content) +} + +/// Save caption content to file +pub fn save_captions(content: &str, output_path: &str) -> Result { + let python_exe = crate::paths::python_exe(); + let python_exe = python_exe.to_str().unwrap_or_default(); + let script_path = crate::paths::backend_script("caption_generator.py"); + let script_path = script_path.to_str().unwrap_or_default(); + + let output = Command::new(python_exe) + .args(&[script_path, "save_captions", content, output_path]) + .output() + .map_err(|e| format!("Failed to run Python script: {}", e))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(format!("Python script failed: {}", stderr)); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let result: SaveCaptionsResult = serde_json::from_str(&stdout.trim()) + .map_err(|e| format!("Failed to parse JSON: {}", e))?; + + Ok(result.output_path) +} \ No newline at end of file diff --git a/src-tauri/src/diarization.rs b/src-tauri/src/diarization.rs new file mode 100644 index 0000000..4afada1 --- /dev/null +++ b/src-tauri/src/diarization.rs @@ -0,0 +1,82 @@ +use std::process::Command; +use serde_json; +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct Word { + pub word: String, + pub start: f64, + pub end: f64, + pub confidence: f64, + #[serde(skip_serializing_if = "Option::is_none")] + pub speaker: Option, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct Segment { + pub id: usize, + pub start: f64, + pub end: f64, + pub text: String, + pub words: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub speaker: Option, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct TranscriptionResult { + pub words: Vec, + pub segments: Vec, + pub language: String, +} + +/// Apply speaker diarization to transcription result +pub fn diarize_and_label( + transcription_result: &TranscriptionResult, + audio_path: &str, + hf_token: Option<&str>, + num_speakers: Option, + use_gpu: bool, +) -> Result { + let python_exe = crate::paths::python_exe(); + let python_exe = python_exe.to_str().unwrap_or_default(); + let script_path = crate::paths::backend_script("diarization.py"); + let script_path = script_path.to_str().unwrap_or_default(); + + let transcription_json = serde_json::to_string(transcription_result) + .map_err(|e| format!("Failed to serialize transcription: {}", e))?; + + let mut args = vec![script_path, "diarize_and_label", &transcription_json, audio_path]; + + if let Some(token) = hf_token { + args.push(token); + } else { + args.push("null"); + } + + let speakers_str; + if let Some(speakers) = num_speakers { + speakers_str = speakers.to_string(); + args.push(&speakers_str); + } else { + args.push("null"); + } + + args.push(if use_gpu { "true" } else { "false" }); + + let output = Command::new(python_exe) + .args(&args) + .output() + .map_err(|e| format!("Failed to run Python script: {}", e))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(format!("Python script failed: {}", stderr)); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let result: TranscriptionResult = serde_json::from_str(&stdout.trim()) + .map_err(|e| format!("Failed to parse JSON: {}", e))?; + + Ok(result) +} \ No newline at end of file diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index b904236..d7a9a20 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -1,6 +1,13 @@ // --- Commands --- +mod paths; mod transcription; +mod video_editor; +mod audio_cleaner; +mod diarization; +mod ai_provider; +mod caption_generator; +mod background_removal; /// Returns the backend URL. Stubbed for now; will be replaced once the /// Python/Rust backend is fully wired up. @@ -56,6 +63,162 @@ async fn transcribe_audio(file_path: String, model_name: String, language: Optio .map_err(|e| format!("Task error: {:?}", e))? } +/// Export video using stream copy (fast, lossless) +#[tauri::command] +async fn export_stream_copy(input_path: String, output_path: String, keep_segments: serde_json::Value) -> Result { + tauri::async_runtime::spawn_blocking(move || { + video_editor::export_stream_copy(&input_path, &output_path, &keep_segments) + }) + .await + .map_err(|e| format!("Task error: {:?}", e))? +} + +/// Export video with re-encoding +#[tauri::command] +async fn export_reencode(input_path: String, output_path: String, keep_segments: serde_json::Value, resolution: String, format_hint: String) -> Result { + tauri::async_runtime::spawn_blocking(move || { + video_editor::export_reencode(&input_path, &output_path, &keep_segments, &resolution, &format_hint) + }) + .await + .map_err(|e| format!("Task error: {:?}", e))? +} + +/// Export video with re-encoding and subtitles +#[tauri::command] +async fn export_reencode_with_subs(input_path: String, output_path: String, keep_segments: serde_json::Value, subtitle_path: String, resolution: String, format_hint: String) -> Result { + tauri::async_runtime::spawn_blocking(move || { + video_editor::export_reencode_with_subs(&input_path, &output_path, &keep_segments, &subtitle_path, &resolution, &format_hint) + }) + .await + .map_err(|e| format!("Task error: {:?}", e))? +} + +/// Get video information +#[tauri::command] +async fn get_video_info(input_path: String) -> Result { + tauri::async_runtime::spawn_blocking(move || { + video_editor::get_video_info(&input_path) + }) + .await + .map_err(|e| format!("Task error: {:?}", e))? +} + +/// Clean audio using DeepFilterNet or FFmpeg fallback +#[tauri::command] +async fn clean_audio(input_path: String, output_path: String) -> Result { + tauri::async_runtime::spawn_blocking(move || { + audio_cleaner::clean_audio(&input_path, &output_path) + }) + .await + .map_err(|e| format!("Task error: {:?}", e))? +} + +/// Check if DeepFilterNet is available +#[tauri::command] +async fn is_deepfilter_available() -> Result { + tauri::async_runtime::spawn_blocking(move || { + audio_cleaner::is_deepfilter_available() + }) + .await + .map_err(|e| format!("Task error: {:?}", e))? +} + +/// Apply speaker diarization to transcription result +#[tauri::command] +async fn diarize_and_label(transcription_result: diarization::TranscriptionResult, audio_path: String, hf_token: Option, num_speakers: Option, use_gpu: Option) -> Result { + let use_gpu = use_gpu.unwrap_or(true); + tauri::async_runtime::spawn_blocking(move || { + diarization::diarize_and_label(&transcription_result, &audio_path, hf_token.as_deref(), num_speakers, use_gpu) + }) + .await + .map_err(|e| format!("Task error: {:?}", e))? +} + +/// Complete text using AI provider +#[tauri::command] +async fn ai_complete(prompt: String, provider: String, model: Option, api_key: Option, base_url: Option, system_prompt: Option, temperature: Option) -> Result { + let temperature = temperature.unwrap_or(0.3); + tauri::async_runtime::spawn_blocking(move || { + ai_provider::complete(&prompt, &provider, model.as_deref(), api_key.as_deref(), base_url.as_deref(), system_prompt.as_deref(), temperature) + }) + .await + .map_err(|e| format!("Task error: {:?}", e))? +} + +/// List available Ollama models +#[tauri::command] +async fn list_ollama_models(base_url: Option) -> Result, String> { + let base_url = base_url.unwrap_or_else(|| "http://localhost:11434".to_string()); + tauri::async_runtime::spawn_blocking(move || { + ai_provider::list_ollama_models(&base_url) + }) + .await + .map_err(|e| format!("Task error: {:?}", e))? +} + +/// Generate SRT caption content +#[tauri::command] +async fn generate_srt(words: Vec, deleted_indices: Option>, words_per_line: Option) -> Result { + let words_per_line = words_per_line.unwrap_or(8); + tauri::async_runtime::spawn_blocking(move || { + caption_generator::generate_srt(&words, deleted_indices.as_ref(), words_per_line) + }) + .await + .map_err(|e| format!("Task error: {:?}", e))? +} + +/// Generate VTT caption content +#[tauri::command] +async fn generate_vtt(words: Vec, deleted_indices: Option>, words_per_line: Option) -> Result { + let words_per_line = words_per_line.unwrap_or(8); + tauri::async_runtime::spawn_blocking(move || { + caption_generator::generate_vtt(&words, deleted_indices.as_ref(), words_per_line) + }) + .await + .map_err(|e| format!("Task error: {:?}", e))? +} + +/// Generate ASS subtitle content +#[tauri::command] +async fn generate_ass(words: Vec, deleted_indices: Option>, words_per_line: Option, style: Option) -> Result { + let words_per_line = words_per_line.unwrap_or(8); + tauri::async_runtime::spawn_blocking(move || { + caption_generator::generate_ass(&words, deleted_indices.as_ref(), words_per_line, style.as_ref()) + }) + .await + .map_err(|e| format!("Task error: {:?}", e))? +} + +/// Save caption content to file +#[tauri::command] +async fn save_captions(content: String, output_path: String) -> Result { + tauri::async_runtime::spawn_blocking(move || { + caption_generator::save_captions(&content, &output_path) + }) + .await + .map_err(|e| format!("Task error: {:?}", e))? +} + +/// Check if background removal is available +#[tauri::command] +async fn is_background_removal_available() -> Result { + tauri::async_runtime::spawn_blocking(move || { + background_removal::is_available() + }) + .await + .map_err(|e| format!("Task error: {:?}", e))? +} + +/// Remove background on export (placeholder for Phase 5) +#[tauri::command] +async fn remove_background_on_export(input_path: String, output_path: String, replacement: String, replacement_value: String) -> Result { + tauri::async_runtime::spawn_blocking(move || { + background_removal::remove_background_on_export(&input_path, &output_path, &replacement, &replacement_value) + }) + .await + .map_err(|e| format!("Task error: {:?}", e))? +} + // --- App entry point --- #[cfg_attr(mobile, tauri::mobile_entry_point)] @@ -79,6 +242,21 @@ pub fn run() { decrypt_string, ensure_model, transcribe_audio, + export_stream_copy, + export_reencode, + export_reencode_with_subs, + get_video_info, + clean_audio, + is_deepfilter_available, + diarize_and_label, + ai_complete, + list_ollama_models, + generate_srt, + generate_vtt, + generate_ass, + save_captions, + is_background_removal_available, + remove_background_on_export, ]) .run(tauri::generate_context!()) .expect("error while running tauri application"); diff --git a/src-tauri/src/paths.rs b/src-tauri/src/paths.rs new file mode 100644 index 0000000..1f518c2 --- /dev/null +++ b/src-tauri/src/paths.rs @@ -0,0 +1,30 @@ +use std::path::PathBuf; + +/// Resolve the project root from the executable path. +/// In dev mode, the binary lives at: /src-tauri/target/debug/ +/// So the project root is 4 levels above the binary. +pub fn project_root() -> PathBuf { + let exe = std::env::current_exe().expect("Failed to get executable path"); + // exe -> debug/ -> target/ -> src-tauri/ -> root + exe.parent() + .and_then(|p| p.parent()) + .and_then(|p| p.parent()) + .and_then(|p| p.parent()) + .map(|p| p.to_path_buf()) + .unwrap_or_else(|| PathBuf::from(".")) +} + +/// Absolute path to the venv Python 3.10 interpreter. +pub fn python_exe() -> PathBuf { + project_root().join(".venv/bin/python3.10") +} + +/// Absolute path to a script in the backend directory. +pub fn backend_script(name: &str) -> PathBuf { + project_root().join("backend").join(name) +} + +/// Absolute path to a script at the project root. +pub fn root_script(name: &str) -> PathBuf { + project_root().join(name) +} diff --git a/src-tauri/src/transcription.rs b/src-tauri/src/transcription.rs index 2a23674..9fece24 100644 --- a/src-tauri/src/transcription.rs +++ b/src-tauri/src/transcription.rs @@ -1,6 +1,5 @@ -use std::fs; use std::process::Command; -use whisper_rs::{WhisperContext, WhisperContextParameters, FullParams, SamplingStrategy}; +use serde_json; #[derive(serde::Serialize, serde::Deserialize, Clone, Debug)] pub struct TranscriptionResult { @@ -26,176 +25,46 @@ pub struct Segment { pub words: Vec, } -/// Extract audio from a video/audio file to a 16kHz mono WAV using ffmpeg -fn extract_to_wav(input_path: &str, output_path: &str) -> Result<(), String> { - let status = Command::new("ffmpeg") - .args(["-y", "-i", input_path, "-vn", "-ar", "16000", "-ac", "1", "-f", "wav", output_path]) - .status() - .map_err(|e| format!("Failed to run ffmpeg: {}", e))?; - - if !status.success() { - return Err(format!("ffmpeg exited with code: {:?}", status.code())); - } - Ok(()) -} - -/// Transcribe audio file using whisper-rs (real Whisper.cpp inference) +/// Transcribe audio file using Python faster-whisper pub fn transcribe_audio( file_path: &str, model_name: &str, language: Option<&str>, ) -> Result { - // Ensure model is downloaded - let model_path = ensure_model_downloaded(model_name)?; + // Path to Python venv and script + let python_exe = crate::paths::python_exe(); + let python_exe = python_exe.to_str().unwrap_or_default(); + let script_path = crate::paths::root_script("transcribe.py"); + let script_path = script_path.to_str().unwrap_or_default(); - // Extract audio to temp 16kHz mono WAV - let tmp_wav = tempfile::Builder::new() - .suffix(".wav") - .tempfile() - .map_err(|e| format!("Failed to create temp file: {}", e))?; - let wav_path = tmp_wav.path().to_string_lossy().to_string(); - - extract_to_wav(file_path, &wav_path)?; - - // Read WAV as f32 samples - let mut reader = hound::WavReader::open(&wav_path) - .map_err(|e| format!("Failed to read WAV: {}", e))?; - let spec = reader.spec(); - let samples: Vec = match spec.sample_format { - hound::SampleFormat::Int => reader - .samples::() - .map(|s| s.map(|v| v as f32 / 32768.0).map_err(|e| format!("{}", e))) - .collect::, _>>()?, - hound::SampleFormat::Float => reader - .samples::() - .map(|s| s.map_err(|e| format!("{}", e))) - .collect::, _>>()?, - }; - - // Load Whisper model and transcribe - let ctx_params = WhisperContextParameters::default(); - let ctx = WhisperContext::new_with_params(&model_path, ctx_params) - .map_err(|e| format!("Failed to load model: {:?}", e))?; - let mut state = ctx.create_state() - .map_err(|e| format!("Failed to create state: {:?}", e))?; - - let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 1 }); - params.set_print_special(false); - params.set_print_progress(false); - params.set_print_realtime(false); - params.set_print_timestamps(false); - params.set_token_timestamps(true); - params.set_single_segment(false); + // Build command args + let mut args = vec![script_path, file_path, model_name]; if let Some(lang) = language { - params.set_language(Some(lang)); + args.push(lang); } - state.full(params, &samples) - .map_err(|e| format!("Transcription failed: {:?}", e))?; + // Run Python script + let output = Command::new(python_exe) + .args(&args) + .output() + .map_err(|e| format!("Failed to run Python script: {}", e))?; - // Extract word-level results using the 0.16.0 iterator API - let mut all_words: Vec = Vec::new(); - let mut segments: Vec = Vec::new(); - let detected_language = language.unwrap_or("en").to_string(); - - for (seg_idx, segment) in state.as_iter().enumerate() { - let seg_text = segment.to_str_lossy() - .map_err(|e| format!("Segment text error: {:?}", e))?; - let seg_t0 = segment.start_timestamp() as f64 / 100.0; - let seg_t1 = segment.end_timestamp() as f64 / 100.0; - - let mut seg_words: Vec = Vec::new(); - - for tok_i in 0..segment.n_tokens() { - if let Some(token) = segment.get_token(tok_i) { - let token_text = match token.to_str_lossy() { - Ok(t) => t.into_owned(), - Err(_) => continue, - }; - let token_data = token.token_data(); - - // Skip special tokens - let trimmed = token_text.trim(); - if trimmed.is_empty() || trimmed.starts_with('[') || trimmed.starts_with('<') { - continue; - } - - let word = Word { - word: trimmed.to_string(), - start: token_data.t0 as f64 / 100.0, - end: token_data.t1 as f64 / 100.0, - confidence: token_data.p as f64, - }; - all_words.push(word.clone()); - seg_words.push(word); - } - } - - segments.push(Segment { - id: seg_idx, - start: seg_t0, - end: seg_t1, - text: seg_text.trim().to_string(), - words: seg_words, - }); + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(format!("Python script failed: {}", stderr)); } - Ok(TranscriptionResult { - words: all_words, - segments, - language: detected_language, - }) + // Parse JSON output + let stdout = String::from_utf8_lossy(&output.stdout); + let result: TranscriptionResult = serde_json::from_str(&stdout.trim()) + .map_err(|e| format!("Failed to parse JSON: {}", e))?; + + Ok(result) } -/// Download and cache Whisper model -pub fn ensure_model_downloaded(model_name: &str) -> Result { - // Get app data directory for storing models - let app_data_dir = dirs::data_dir() - .ok_or("Could not find app data directory")? - .join("TalkEdit") - .join("models"); - - // Create directory if it doesn't exist - fs::create_dir_all(&app_data_dir) - .map_err(|e| format!("Failed to create models directory: {}", e))?; - - let model_path = app_data_dir.join(format!("ggml-{}.bin", model_name)); - - // Check if model already exists - if model_path.exists() { - return Ok(model_path.to_string_lossy().to_string()); - } - - // Only download smaller models automatically - let allowed_models = ["tiny", "base", "small"]; - if !allowed_models.contains(&model_name) { - return Err(format!("Model '{}' is not available for automatic download. Only tiny, base, and small models are supported.", model_name)); - } - - println!("Downloading Whisper model: {}...", model_name); - - // Download the model from ggerganov's whisper.cpp repo - let url = format!("https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-{}.bin", model_name); - let response = ureq::get(&url) - .call() - .map_err(|e| format!("Failed to download model: {}", e))?; - - let len = response - .header("content-length") - .and_then(|s| s.parse::().ok()) - .unwrap_or(0); - - println!("Model size: {} bytes", len); - - let mut reader = response.into_reader(); - let mut file = fs::File::create(&model_path) - .map_err(|e| format!("Failed to create model file: {}", e))?; - - std::io::copy(&mut reader, &mut file) - .map_err(|e| format!("Failed to write model file: {}", e))?; - - println!("Model downloaded successfully: {}", model_path.display()); - - Ok(model_path.to_string_lossy().to_string()) +/// Ensure model is available (faster-whisper handles this automatically) +pub fn ensure_model_downloaded(_model_name: &str) -> Result { + // faster-whisper downloads models on first use, so just return success + Ok("Model ready".to_string()) } diff --git a/src-tauri/src/video_editor.rs b/src-tauri/src/video_editor.rs new file mode 100644 index 0000000..89c44e9 --- /dev/null +++ b/src-tauri/src/video_editor.rs @@ -0,0 +1,138 @@ +use std::process::Command; +use serde_json; +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct VideoInfo { + pub duration: f64, + pub size: u64, + pub format: String, + pub width: u32, + pub height: u32, + pub codec: String, + pub fps: f64, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct ExportResult { + pub output_path: String, +} + +/// Export video using stream copy (fast, lossless) +pub fn export_stream_copy( + input_path: &str, + output_path: &str, + keep_segments: &serde_json::Value, +) -> Result { + let python_exe = crate::paths::python_exe(); + let python_exe = python_exe.to_str().unwrap_or_default(); + let script_path = crate::paths::backend_script("video_editor.py"); + let script_path = script_path.to_str().unwrap_or_default(); + + let keep_segments_str = keep_segments.to_string(); + + let output = Command::new(python_exe) + .args(&[script_path, "export_stream_copy", input_path, output_path, &keep_segments_str]) + .output() + .map_err(|e| format!("Failed to run Python script: {}", e))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(format!("Python script failed: {}", stderr)); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let result: ExportResult = serde_json::from_str(&stdout.trim()) + .map_err(|e| format!("Failed to parse JSON: {}", e))?; + + Ok(result.output_path) +} + +/// Export video with re-encoding +pub fn export_reencode( + input_path: &str, + output_path: &str, + keep_segments: &serde_json::Value, + resolution: &str, + format_hint: &str, +) -> Result { + let python_exe = crate::paths::python_exe(); + let python_exe = python_exe.to_str().unwrap_or_default(); + let script_path = crate::paths::backend_script("video_editor.py"); + let script_path = script_path.to_str().unwrap_or_default(); + + let keep_segments_str = keep_segments.to_string(); + + let output = Command::new(python_exe) + .args(&[script_path, "export_reencode", input_path, output_path, &keep_segments_str, resolution, format_hint]) + .output() + .map_err(|e| format!("Failed to run Python script: {}", e))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(format!("Python script failed: {}", stderr)); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let result: ExportResult = serde_json::from_str(&stdout.trim()) + .map_err(|e| format!("Failed to parse JSON: {}", e))?; + + Ok(result.output_path) +} + +/// Export video with re-encoding and subtitles +pub fn export_reencode_with_subs( + input_path: &str, + output_path: &str, + keep_segments: &serde_json::Value, + subtitle_path: &str, + resolution: &str, + format_hint: &str, +) -> Result { + let python_exe = crate::paths::python_exe(); + let python_exe = python_exe.to_str().unwrap_or_default(); + let script_path = crate::paths::backend_script("video_editor.py"); + let script_path = script_path.to_str().unwrap_or_default(); + + let keep_segments_str = keep_segments.to_string(); + + let output = Command::new(python_exe) + .args(&[script_path, "export_reencode_with_subs", input_path, output_path, &keep_segments_str, subtitle_path, resolution, format_hint]) + .output() + .map_err(|e| format!("Failed to run Python script: {}", e))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(format!("Python script failed: {}", stderr)); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let result: ExportResult = serde_json::from_str(&stdout.trim()) + .map_err(|e| format!("Failed to parse JSON: {}", e))?; + + Ok(result.output_path) +} + +/// Get video information +pub fn get_video_info(input_path: &str) -> Result { + let python_exe = crate::paths::python_exe(); + let python_exe = python_exe.to_str().unwrap_or_default(); + let script_path = crate::paths::backend_script("video_editor.py"); + let script_path = script_path.to_str().unwrap_or_default(); + + let output = Command::new(python_exe) + .args(&[script_path, "get_video_info", input_path]) + .output() + .map_err(|e| format!("Failed to run Python script: {}", e))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(format!("Python script failed: {}", stderr)); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let result: VideoInfo = serde_json::from_str(&stdout.trim()) + .map_err(|e| format!("Failed to parse JSON: {}", e))?; + + Ok(result) +} \ No newline at end of file diff --git a/test_api.py b/test_api.py new file mode 100755 index 0000000..b2c0622 --- /dev/null +++ b/test_api.py @@ -0,0 +1,130 @@ +#!/home/dillon/_code/TalkEdit/.venv/bin/python3.10 +""" +Test script for the TalkEdit API. +This script tests the new Tauri commands that expose all backend functions. +""" + +import json +import sys +import os +from pathlib import Path + +# Add backend to path for direct testing +sys.path.insert(0, str(Path(__file__).parent / "backend")) + +def test_video_info(): + """Test get_video_info function""" + from services.video_editor import get_video_info + + # Use a test video file if available + test_video = "/path/to/test/video.mp4" # Replace with actual test file + if os.path.exists(test_video): + try: + info = get_video_info(test_video) + print("✓ Video info test passed") + print(f" Duration: {info['duration']}") + print(f" Resolution: {info['width']}x{info['height']}") + return True + except Exception as e: + print(f"✗ Video info test failed: {e}") + return False + else: + print("⚠ Video info test skipped (no test file)") + return True + +def test_caption_generation(): + """Test caption generation functions""" + from services.caption_generator import generate_srt, generate_vtt + + # Sample word data + words = [ + {"word": "Hello", "start": 0.0, "end": 0.5, "confidence": 0.9}, + {"word": "world", "start": 0.5, "end": 1.0, "confidence": 0.95}, + {"word": "this", "start": 1.0, "end": 1.3, "confidence": 0.8}, + {"word": "is", "start": 1.3, "end": 1.5, "confidence": 0.9}, + {"word": "a", "start": 1.5, "end": 1.6, "confidence": 0.85}, + {"word": "test", "start": 1.6, "end": 2.0, "confidence": 0.95}, + ] + + try: + srt_content = generate_srt(words) + vtt_content = generate_vtt(words) + + if "Hello world" in srt_content and "WEBVTT" in vtt_content: + print("✓ Caption generation test passed") + return True + else: + print("✗ Caption generation test failed: unexpected content") + return False + except Exception as e: + print(f"✗ Caption generation test failed: {e}") + return False + +def test_ai_provider(): + """Test AI provider functions""" + from services.ai_provider import AIProvider + + try: + # Test listing Ollama models (may fail if Ollama not running) + models = AIProvider.list_ollama_models() + print(f"✓ AI provider test passed (found {len(models)} models)") + return True + except Exception as e: + print(f"⚠ AI provider test skipped: {e}") + return True + +def test_deepfilter_status(): + """Test DeepFilterNet availability check""" + from services.audio_cleaner import is_deepfilter_available + + try: + available = is_deepfilter_available() + print(f"✓ DeepFilter status test passed (available: {available})") + return True + except Exception as e: + print(f"✗ DeepFilter status test failed: {e}") + return False + +def main(): + print("Testing TalkEdit API functions...") + print("=" * 50) + + tests = [ + ("Video Info", test_video_info), + ("Caption Generation", test_caption_generation), + ("AI Provider", test_ai_provider), + ("DeepFilter Status", test_deepfilter_status), + ] + + passed = 0 + total = len(tests) + + for name, test_func in tests: + print(f"\nTesting {name}:") + if test_func(): + passed += 1 + + print("\n" + "=" * 50) + print(f"Results: {passed}/{total} tests passed") + + if passed == total: + print("🎉 All tests passed! The API is ready for AI testing.") + else: + print("⚠️ Some tests failed. Check the output above.") + + print("\nAvailable Tauri Commands:") + commands = [ + "transcribe_audio", + "export_stream_copy", "export_reencode", "export_reencode_with_subs", "get_video_info", + "clean_audio", "is_deepfilter_available", + "diarize_and_label", + "ai_complete", "list_ollama_models", + "generate_srt", "generate_vtt", "generate_ass", "save_captions", + "is_background_removal_available", "remove_background_on_export", + ] + + for cmd in commands: + print(f" - {cmd}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/transcribe.py b/transcribe.py new file mode 100644 index 0000000..ac82a27 --- /dev/null +++ b/transcribe.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +import sys +import json +import tempfile +import subprocess +from faster_whisper import WhisperModel + +def extract_audio(input_path, output_path): + """Extract audio from video/audio file to 16kHz mono WAV""" + cmd = [ + 'ffmpeg', '-y', '-i', input_path, '-vn', '-ar', '16000', '-ac', '1', '-f', 'wav', output_path + ] + subprocess.run(cmd, check=True) + +def main(): + if len(sys.argv) < 3: + print("Usage: python transcribe.py [language]", file=sys.stderr) + sys.exit(1) + + audio_file = sys.argv[1] + model_name = sys.argv[2] + language = sys.argv[3] if len(sys.argv) > 3 else None + + # Extract audio to temp WAV if needed + with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp: + wav_path = tmp.name + + try: + extract_audio(audio_file, wav_path) + + # Load model - use GPU if CUDA is available, else CPU with int8 + import ctypes + try: + ctypes.CDLL("libcublas.so.12") + device = "cuda" + compute_type = "float16" + except OSError: + device = "cpu" + compute_type = "int8" + + model = WhisperModel(model_name, device=device, compute_type=compute_type) + + # Transcribe + segments, info = model.transcribe( + wav_path, + language=language, + beam_size=5, + word_timestamps=True, + vad_filter=True, + vad_parameters=dict(threshold=0.5, min_speech_duration_ms=250), + without_timestamps=False + ) + + # Convert to our format + words = [] + segments_list = [] + + for segment in segments: + seg_words = [] + for word in segment.words: + w = { + "word": word.word, + "start": word.start, + "end": word.end, + "confidence": word.probability + } + words.append(w) + seg_words.append(w) + + segments_list.append({ + "id": len(segments_list), + "start": segment.start, + "end": segment.end, + "text": segment.text, + "words": seg_words + }) + + result = { + "words": words, + "segments": segments_list, + "language": info.language + } + + print(json.dumps(result)) + + finally: + import os + os.unlink(wav_path) + +if __name__ == "__main__": + main() \ No newline at end of file