2026-03-26 00:58:57 -06:00
|
|
|
use std::process::Command;
|
2026-03-26 23:39:31 -06:00
|
|
|
use serde_json;
|
2026-03-26 00:58:57 -06:00
|
|
|
|
|
|
|
|
#[derive(serde::Serialize, serde::Deserialize, Clone, Debug)]
|
|
|
|
|
pub struct TranscriptionResult {
|
|
|
|
|
pub words: Vec<Word>,
|
|
|
|
|
pub segments: Vec<Segment>,
|
|
|
|
|
pub language: String,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(serde::Serialize, serde::Deserialize, Clone, Debug)]
|
|
|
|
|
pub struct Word {
|
|
|
|
|
pub word: String,
|
|
|
|
|
pub start: f64,
|
|
|
|
|
pub end: f64,
|
|
|
|
|
pub confidence: f64,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(serde::Serialize, serde::Deserialize, Clone, Debug)]
|
|
|
|
|
pub struct Segment {
|
|
|
|
|
pub id: usize,
|
|
|
|
|
pub start: f64,
|
|
|
|
|
pub end: f64,
|
|
|
|
|
pub text: String,
|
|
|
|
|
pub words: Vec<Word>,
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-26 23:39:31 -06:00
|
|
|
/// Transcribe audio file using Python faster-whisper
|
2026-03-26 00:58:57 -06:00
|
|
|
pub fn transcribe_audio(
|
|
|
|
|
file_path: &str,
|
|
|
|
|
model_name: &str,
|
|
|
|
|
language: Option<&str>,
|
|
|
|
|
) -> Result<TranscriptionResult, String> {
|
2026-03-26 23:39:31 -06:00
|
|
|
// Path to Python venv and script
|
|
|
|
|
let python_exe = crate::paths::python_exe();
|
|
|
|
|
let python_exe = python_exe.to_str().unwrap_or_default();
|
|
|
|
|
let script_path = crate::paths::root_script("transcribe.py");
|
|
|
|
|
let script_path = script_path.to_str().unwrap_or_default();
|
|
|
|
|
|
|
|
|
|
// Build command args
|
|
|
|
|
let mut args = vec![script_path, file_path, model_name];
|
2026-03-26 00:58:57 -06:00
|
|
|
if let Some(lang) = language {
|
2026-03-26 23:39:31 -06:00
|
|
|
args.push(lang);
|
2026-03-26 00:58:57 -06:00
|
|
|
}
|
|
|
|
|
|
2026-04-03 10:46:26 -06:00
|
|
|
// Run Python script with timeout
|
2026-03-26 23:39:31 -06:00
|
|
|
let output = Command::new(python_exe)
|
|
|
|
|
.args(&args)
|
|
|
|
|
.output()
|
|
|
|
|
.map_err(|e| format!("Failed to run Python script: {}", e))?;
|
2026-03-26 00:58:57 -06:00
|
|
|
|
2026-04-03 10:46:26 -06:00
|
|
|
// Check for timeout or other errors
|
2026-03-26 23:39:31 -06:00
|
|
|
if !output.status.success() {
|
|
|
|
|
let stderr = String::from_utf8_lossy(&output.stderr);
|
2026-04-03 10:46:26 -06:00
|
|
|
let stdout = String::from_utf8_lossy(&output.stdout);
|
|
|
|
|
return Err(format!("Python script failed: {}\nStdout: {}\nStderr: {}",
|
|
|
|
|
output.status, stdout, stderr));
|
2026-03-26 00:58:57 -06:00
|
|
|
}
|
|
|
|
|
|
2026-03-26 23:39:31 -06:00
|
|
|
// Parse JSON output
|
|
|
|
|
let stdout = String::from_utf8_lossy(&output.stdout);
|
|
|
|
|
let result: TranscriptionResult = serde_json::from_str(&stdout.trim())
|
|
|
|
|
.map_err(|e| format!("Failed to parse JSON: {}", e))?;
|
2026-03-26 00:58:57 -06:00
|
|
|
|
2026-03-26 23:39:31 -06:00
|
|
|
Ok(result)
|
|
|
|
|
}
|
2026-03-26 00:58:57 -06:00
|
|
|
|
2026-03-26 23:39:31 -06:00
|
|
|
/// Ensure model is available (faster-whisper handles this automatically)
|
|
|
|
|
pub fn ensure_model_downloaded(_model_name: &str) -> Result<String, String> {
|
|
|
|
|
// faster-whisper downloads models on first use, so just return success
|
|
|
|
|
Ok("Model ready".to_string())
|
2026-03-26 00:58:57 -06:00
|
|
|
}
|
|
|
|
|
|