diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index d160236..0fffdb9 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -13,7 +13,6 @@ import { Settings, Sparkles, Download, - Loader2, FolderSearch, FileInput, } from 'lucide-react'; @@ -28,6 +27,7 @@ export default function App() { words, isTranscribing, transcriptionProgress, + transcriptionStatus, loadVideo, setBackendUrl, setTranscription, @@ -88,15 +88,19 @@ export default function App() { }; const transcribeVideo = async (path: string) => { - setTranscribing(true, 0); + setTranscribing(true, 0, 'Checking model...'); try { - const res = await fetch(`${backendUrl}/transcribe`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ file_path: path, model: whisperModel }), - }); - if (!res.ok) throw new Error(`Transcription failed: ${res.statusText}`); - const data = await res.json(); + if (!window.electronAPI?.transcribe) { + throw new Error('Transcription not available'); + } + // Step 1: ensure model is downloaded (may take a while on first run) + const modelLabel = whisperModel === 'tiny' ? '~75 MB' : whisperModel === 'base' ? '~140 MB' : '~460 MB'; + setTranscribing(true, 5, `Downloading ${whisperModel} model (${modelLabel})...`); + await window.electronAPI.ensureModel(whisperModel); + + // Step 2: run transcription + setTranscribing(true, 20, 'Transcribing audio...'); + const data = await window.electronAPI.transcribe(path, whisperModel); setTranscription(data); } catch (err) { console.error('Transcription error:', err); @@ -244,11 +248,24 @@ export default function App() { {/* Transcript */}
{isTranscribing ? ( -
- -

- Transcribing... {Math.round(transcriptionProgress)}% -

+
+ {/* Animated waveform */} +
+ {[35, 60, 45, 80, 55, 70, 40, 65, 50, 75, 40, 58].map((h, i) => ( +
+ ))} +
+
+

Processing audio

+

{transcriptionStatus || 'Please wait...'}

+
) : words.length > 0 ? ( diff --git a/frontend/src/index.css b/frontend/src/index.css index 80a0377..e8b1087 100644 --- a/frontend/src/index.css +++ b/frontend/src/index.css @@ -2,6 +2,17 @@ @tailwind components; @tailwind utilities; +@keyframes waveBar { + 0% { transform: scaleY(0.3); opacity: 0.5; } + 50% { transform: scaleY(1); opacity: 1; } + 100% { transform: scaleY(0.3); opacity: 0.5; } +} + +.wave-bar { + animation: waveBar 0.9s ease-in-out infinite; + transform-origin: bottom; +} + * { margin: 0; padding: 0; diff --git a/frontend/src/lib/tauri-bridge.ts b/frontend/src/lib/tauri-bridge.ts index 9690f71..a94bdf5 100644 --- a/frontend/src/lib/tauri-bridge.ts +++ b/frontend/src/lib/tauri-bridge.ts @@ -59,6 +59,14 @@ window.electronAPI = { return invoke('decrypt_string', { encrypted }); }, + ensureModel: (modelName: string): Promise => { + return invoke('ensure_model', { modelName }); + }, + + transcribe: (filePath: string, modelName: string, language?: string): Promise => { + return invoke('transcribe_audio', { filePath, modelName, language }); + }, + readFile: (path: string): Promise => { return readTextFile(path); }, diff --git a/frontend/src/store/editorStore.ts b/frontend/src/store/editorStore.ts index ec9448d..ffb7bb4 100644 --- a/frontend/src/store/editorStore.ts +++ b/frontend/src/store/editorStore.ts @@ -19,6 +19,7 @@ interface EditorState { isTranscribing: boolean; transcriptionProgress: number; + transcriptionStatus: string; isExporting: boolean; exportProgress: number; @@ -37,7 +38,7 @@ interface EditorActions { deleteSelectedWords: () => void; deleteWordRange: (startIndex: number, endIndex: number) => void; restoreRange: (rangeId: string) => void; - setTranscribing: (active: boolean, progress?: number) => void; + setTranscribing: (active: boolean, progress?: number, status?: string) => void; setExporting: (active: boolean, progress?: number) => void; getKeepSegments: () => Array<{ start: number; end: number }>; getWordAtTime: (time: number) => number; @@ -59,6 +60,7 @@ const initialState: EditorState = { hoveredWordIndex: null, isTranscribing: false, transcriptionProgress: 0, + transcriptionStatus: '', isExporting: false, exportProgress: 0, backendUrl: 'http://localhost:8642', @@ -147,10 +149,11 @@ export const useEditorStore = create()( set({ deletedRanges: deletedRanges.filter((r) => r.id !== rangeId) }); }, - setTranscribing: (active, progress) => + setTranscribing: (active, progress, status) => set({ isTranscribing: active, transcriptionProgress: progress ?? (active ? 0 : 100), + transcriptionStatus: status ?? (active ? '' : ''), }), setExporting: (active, progress) => diff --git a/frontend/src/vite-env.d.ts b/frontend/src/vite-env.d.ts index 089ea01..06d4ad8 100644 --- a/frontend/src/vite-env.d.ts +++ b/frontend/src/vite-env.d.ts @@ -7,6 +7,8 @@ interface ElectronAPI { getBackendUrl: () => Promise; encryptString: (data: string) => Promise; decryptString: (encrypted: string) => Promise; + ensureModel: (modelName: string) => Promise; + transcribe: (filePath: string, modelName: string, language?: string) => Promise; readFile: (path: string) => Promise; writeFile: (path: string, content: string) => Promise; } diff --git a/open b/open new file mode 100755 index 0000000..48a801a --- /dev/null +++ b/open @@ -0,0 +1,4 @@ +#!/bin/bash +# Open TalkEdit app (Tauri dev mode) +cd "$(dirname "$0")" +npx tauri dev diff --git a/open-cutscript b/open-cutscript new file mode 100755 index 0000000..d8fad28 --- /dev/null +++ b/open-cutscript @@ -0,0 +1,11 @@ +#!/bin/bash +# To run CutScript in dev mode: +# 1. In a separate terminal, run: +# cd CutScript/frontend +# npm install # only needed once +# npm run dev -- --host +# 2. Then run this script to launch Electron: +# ./open-cutscript.sh + +cd "$(dirname "$0")/CutScript" +npx electron . diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock index 375ae8f..d9226fb 100644 --- a/src-tauri/Cargo.lock +++ b/src-tauri/Cargo.lock @@ -79,6 +79,8 @@ checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" name = "app" version = "0.1.0" dependencies = [ + "dirs 5.0.1", + "hound", "log", "serde", "serde_json", @@ -87,6 +89,9 @@ dependencies = [ "tauri-plugin-dialog", "tauri-plugin-fs", "tauri-plugin-log", + "tempfile", + "ureq", + "whisper-rs", ] [[package]] @@ -142,6 +147,26 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "bindgen" +version = "0.72.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" +dependencies = [ + "bitflags 2.11.0", + "cexpr", + "clang-sys", + "itertools", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn 2.0.117", +] + [[package]] name = "bit-set" version = "0.8.0" @@ -391,6 +416,15 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + [[package]] name = "cfb" version = "0.7.3" @@ -436,6 +470,26 @@ dependencies = [ "windows-link 0.2.1", ] +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading 0.8.9", +] + +[[package]] +name = "cmake" +version = "0.1.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d" +dependencies = [ + "cc", +] + [[package]] name = "combine" version = "4.6.7" @@ -683,13 +737,34 @@ dependencies = [ "crypto-common", ] +[[package]] +name = "dirs" +version = "5.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" +dependencies = [ + "dirs-sys 0.4.1", +] + [[package]] name = "dirs" version = "6.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" dependencies = [ - "dirs-sys", + "dirs-sys 0.5.0", +] + +[[package]] +name = "dirs-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" +dependencies = [ + "libc", + "option-ext", + "redox_users 0.4.6", + "windows-sys 0.48.0", ] [[package]] @@ -700,7 +775,7 @@ checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" dependencies = [ "libc", "option-ext", - "redox_users", + "redox_users 0.5.2", "windows-sys 0.61.2", ] @@ -801,6 +876,12 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "embed-resource" version = "3.0.8" @@ -848,6 +929,16 @@ dependencies = [ "typeid", ] +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + [[package]] name = "fastrand" version = "2.3.0" @@ -952,6 +1043,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + [[package]] name = "funty" version = "2.0.0" @@ -1394,6 +1491,12 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "hound" +version = "3.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62adaabb884c94955b19907d60019f4e145d091c75345379e70d1ee696f7854f" + [[package]] name = "html5ever" version = "0.29.1" @@ -1695,6 +1798,15 @@ dependencies = [ "serde", ] +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.18" @@ -1849,7 +1961,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e9ec52138abedcc58dc17a7c6c0c00a2bdb4f3427c7f63fa97fd0d859155caf" dependencies = [ "gtk-sys", - "libloading", + "libloading 0.7.4", "once_cell", ] @@ -1869,6 +1981,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link 0.2.1", +] + [[package]] name = "libredox" version = "0.1.15" @@ -1878,6 +2000,12 @@ dependencies = [ "libc", ] +[[package]] +name = "linux-raw-sys" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" + [[package]] name = "litemap" version = "0.8.1" @@ -1971,6 +2099,12 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "miniz_oxide" version = "0.8.9" @@ -2055,6 +2189,16 @@ version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "num-conv" version = "0.2.1" @@ -2781,6 +2925,17 @@ dependencies = [ "bitflags 2.11.0", ] +[[package]] +name = "redox_users" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" +dependencies = [ + "getrandom 0.2.17", + "libredox", + "thiserror 1.0.69", +] + [[package]] name = "redox_users" version = "0.5.2" @@ -2908,6 +3063,20 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "ring" +version = "0.17.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.17", + "libc", + "untrusted", + "windows-sys 0.52.0", +] + [[package]] name = "rkyv" version = "0.7.46" @@ -2968,6 +3137,54 @@ dependencies = [ "semver", ] +[[package]] +name = "rustix" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" +dependencies = [ + "bitflags 2.11.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustls" +version = "0.23.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4" +dependencies = [ + "log", + "once_cell", + "ring", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-pki-types" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" +dependencies = [ + "zeroize", +] + +[[package]] +name = "rustls-webpki" +version = "0.103.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef" +dependencies = [ + "ring", + "rustls-pki-types", + "untrusted", +] + [[package]] name = "rustversion" version = "1.0.22" @@ -3432,6 +3649,12 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + [[package]] name = "swift-rs" version = "1.0.7" @@ -3568,7 +3791,7 @@ dependencies = [ "anyhow", "bytes", "cookie", - "dirs", + "dirs 6.0.0", "dunce", "embed_plist", "getrandom 0.3.4", @@ -3618,7 +3841,7 @@ checksum = "4bbc990d1dbf57a8e1c7fa2327f2a614d8b757805603c1b9ba5c81bade09fd4d" dependencies = [ "anyhow", "cargo_toml", - "dirs", + "dirs 6.0.0", "glob", "heck 0.5.0", "json-patch", @@ -3852,6 +4075,19 @@ dependencies = [ "toml 0.9.12+spec-1.1.0", ] +[[package]] +name = "tempfile" +version = "3.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" +dependencies = [ + "fastrand", + "getrandom 0.4.2", + "once_cell", + "rustix", + "windows-sys 0.61.2", +] + [[package]] name = "tendril" version = "0.4.3" @@ -4174,7 +4410,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a5e85aa143ceb072062fc4d6356c1b520a51d636e7bc8e77ec94be3608e5e80c" dependencies = [ "crossbeam-channel", - "dirs", + "dirs 6.0.0", "libappindicator", "muda", "objc2", @@ -4266,6 +4502,28 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "ureq" +version = "2.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d" +dependencies = [ + "base64 0.22.1", + "flate2", + "log", + "once_cell", + "rustls", + "rustls-pki-types", + "url", + "webpki-roots 0.26.11", +] + [[package]] name = "url" version = "2.5.8" @@ -4580,6 +4838,24 @@ dependencies = [ "system-deps", ] +[[package]] +name = "webpki-roots" +version = "0.26.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" +dependencies = [ + "webpki-roots 1.0.6", +] + +[[package]] +name = "webpki-roots" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "webview2-com" version = "0.38.2" @@ -4616,6 +4892,28 @@ dependencies = [ "windows-core 0.61.2", ] +[[package]] +name = "whisper-rs" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2088172d00f936c348d6a72f488dc2660ab3f507263a195df308a3c2383229f6" +dependencies = [ + "whisper-rs-sys", +] + +[[package]] +name = "whisper-rs-sys" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6986c0fe081241d391f09b9a071fbcbb59720c3563628c3c829057cf69f2a56f" +dependencies = [ + "bindgen", + "cfg-if", + "cmake", + "fs_extra", + "semver", +] + [[package]] name = "winapi" version = "0.3.9" @@ -4810,6 +5108,24 @@ dependencies = [ "windows-targets 0.42.2", ] +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.6", +] + [[package]] name = "windows-sys" version = "0.59.0" @@ -4852,6 +5168,21 @@ dependencies = [ "windows_x86_64_msvc 0.42.2", ] +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + [[package]] name = "windows-targets" version = "0.52.6" @@ -4909,6 +5240,12 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" @@ -4927,6 +5264,12 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + [[package]] name = "windows_aarch64_msvc" version = "0.52.6" @@ -4945,6 +5288,12 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + [[package]] name = "windows_i686_gnu" version = "0.52.6" @@ -4975,6 +5324,12 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + [[package]] name = "windows_i686_msvc" version = "0.52.6" @@ -4993,6 +5348,12 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + [[package]] name = "windows_x86_64_gnu" version = "0.52.6" @@ -5011,6 +5372,12 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" @@ -5029,6 +5396,12 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + [[package]] name = "windows_x86_64_msvc" version = "0.52.6" @@ -5179,7 +5552,7 @@ dependencies = [ "block2", "cookie", "crossbeam-channel", - "dirs", + "dirs 6.0.0", "dom_query", "dpi", "dunce", @@ -5307,6 +5680,12 @@ dependencies = [ "synstructure", ] +[[package]] +name = "zeroize" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" + [[package]] name = "zerotrie" version = "0.2.3" diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index 72273fb..f5fd11a 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -25,3 +25,8 @@ tauri = { version = "2.10.3", features = [] } tauri-plugin-dialog = "2" tauri-plugin-fs = "2" tauri-plugin-log = "2" +dirs = "5.0" +ureq = "2.9" +whisper-rs = "0.16.0" +hound = "3.5" +tempfile = "3.10" diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index c6566eb..b904236 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -1,5 +1,7 @@ // --- Commands --- +mod transcription; + /// Returns the backend URL. Stubbed for now; will be replaced once the /// Python/Rust backend is fully wired up. #[tauri::command] @@ -34,6 +36,26 @@ fn decrypt_string(encrypted: String) -> Result { .and_then(|b| String::from_utf8(b).map_err(|e| format!("utf8 error: {e}"))) } +/// Ensure a Whisper model is downloaded, downloading it if not present. +#[tauri::command] +async fn ensure_model(model_name: String) -> Result { + tauri::async_runtime::spawn_blocking(move || { + transcription::ensure_model_downloaded(&model_name) + }) + .await + .map_err(|e| format!("Task error: {:?}", e))? +} + +/// Transcribe audio file using Whisper.cpp (runs on a background thread) +#[tauri::command] +async fn transcribe_audio(file_path: String, model_name: String, language: Option) -> Result { + tauri::async_runtime::spawn_blocking(move || { + transcription::transcribe_audio(&file_path, &model_name, language.as_deref()) + }) + .await + .map_err(|e| format!("Task error: {:?}", e))? +} + // --- App entry point --- #[cfg_attr(mobile, tauri::mobile_entry_point)] @@ -55,6 +77,8 @@ pub fn run() { get_backend_url, encrypt_string, decrypt_string, + ensure_model, + transcribe_audio, ]) .run(tauri::generate_context!()) .expect("error while running tauri application"); diff --git a/src-tauri/src/transcription.rs b/src-tauri/src/transcription.rs new file mode 100644 index 0000000..2a23674 --- /dev/null +++ b/src-tauri/src/transcription.rs @@ -0,0 +1,201 @@ +use std::fs; +use std::process::Command; +use whisper_rs::{WhisperContext, WhisperContextParameters, FullParams, SamplingStrategy}; + +#[derive(serde::Serialize, serde::Deserialize, Clone, Debug)] +pub struct TranscriptionResult { + pub words: Vec, + pub segments: Vec, + pub language: String, +} + +#[derive(serde::Serialize, serde::Deserialize, Clone, Debug)] +pub struct Word { + pub word: String, + pub start: f64, + pub end: f64, + pub confidence: f64, +} + +#[derive(serde::Serialize, serde::Deserialize, Clone, Debug)] +pub struct Segment { + pub id: usize, + pub start: f64, + pub end: f64, + pub text: String, + pub words: Vec, +} + +/// Extract audio from a video/audio file to a 16kHz mono WAV using ffmpeg +fn extract_to_wav(input_path: &str, output_path: &str) -> Result<(), String> { + let status = Command::new("ffmpeg") + .args(["-y", "-i", input_path, "-vn", "-ar", "16000", "-ac", "1", "-f", "wav", output_path]) + .status() + .map_err(|e| format!("Failed to run ffmpeg: {}", e))?; + + if !status.success() { + return Err(format!("ffmpeg exited with code: {:?}", status.code())); + } + Ok(()) +} + +/// Transcribe audio file using whisper-rs (real Whisper.cpp inference) +pub fn transcribe_audio( + file_path: &str, + model_name: &str, + language: Option<&str>, +) -> Result { + // Ensure model is downloaded + let model_path = ensure_model_downloaded(model_name)?; + + // Extract audio to temp 16kHz mono WAV + let tmp_wav = tempfile::Builder::new() + .suffix(".wav") + .tempfile() + .map_err(|e| format!("Failed to create temp file: {}", e))?; + let wav_path = tmp_wav.path().to_string_lossy().to_string(); + + extract_to_wav(file_path, &wav_path)?; + + // Read WAV as f32 samples + let mut reader = hound::WavReader::open(&wav_path) + .map_err(|e| format!("Failed to read WAV: {}", e))?; + let spec = reader.spec(); + let samples: Vec = match spec.sample_format { + hound::SampleFormat::Int => reader + .samples::() + .map(|s| s.map(|v| v as f32 / 32768.0).map_err(|e| format!("{}", e))) + .collect::, _>>()?, + hound::SampleFormat::Float => reader + .samples::() + .map(|s| s.map_err(|e| format!("{}", e))) + .collect::, _>>()?, + }; + + // Load Whisper model and transcribe + let ctx_params = WhisperContextParameters::default(); + let ctx = WhisperContext::new_with_params(&model_path, ctx_params) + .map_err(|e| format!("Failed to load model: {:?}", e))?; + let mut state = ctx.create_state() + .map_err(|e| format!("Failed to create state: {:?}", e))?; + + let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 1 }); + params.set_print_special(false); + params.set_print_progress(false); + params.set_print_realtime(false); + params.set_print_timestamps(false); + params.set_token_timestamps(true); + params.set_single_segment(false); + if let Some(lang) = language { + params.set_language(Some(lang)); + } + + state.full(params, &samples) + .map_err(|e| format!("Transcription failed: {:?}", e))?; + + // Extract word-level results using the 0.16.0 iterator API + let mut all_words: Vec = Vec::new(); + let mut segments: Vec = Vec::new(); + let detected_language = language.unwrap_or("en").to_string(); + + for (seg_idx, segment) in state.as_iter().enumerate() { + let seg_text = segment.to_str_lossy() + .map_err(|e| format!("Segment text error: {:?}", e))?; + let seg_t0 = segment.start_timestamp() as f64 / 100.0; + let seg_t1 = segment.end_timestamp() as f64 / 100.0; + + let mut seg_words: Vec = Vec::new(); + + for tok_i in 0..segment.n_tokens() { + if let Some(token) = segment.get_token(tok_i) { + let token_text = match token.to_str_lossy() { + Ok(t) => t.into_owned(), + Err(_) => continue, + }; + let token_data = token.token_data(); + + // Skip special tokens + let trimmed = token_text.trim(); + if trimmed.is_empty() || trimmed.starts_with('[') || trimmed.starts_with('<') { + continue; + } + + let word = Word { + word: trimmed.to_string(), + start: token_data.t0 as f64 / 100.0, + end: token_data.t1 as f64 / 100.0, + confidence: token_data.p as f64, + }; + all_words.push(word.clone()); + seg_words.push(word); + } + } + + segments.push(Segment { + id: seg_idx, + start: seg_t0, + end: seg_t1, + text: seg_text.trim().to_string(), + words: seg_words, + }); + } + + Ok(TranscriptionResult { + words: all_words, + segments, + language: detected_language, + }) +} + +/// Download and cache Whisper model +pub fn ensure_model_downloaded(model_name: &str) -> Result { + // Get app data directory for storing models + let app_data_dir = dirs::data_dir() + .ok_or("Could not find app data directory")? + .join("TalkEdit") + .join("models"); + + // Create directory if it doesn't exist + fs::create_dir_all(&app_data_dir) + .map_err(|e| format!("Failed to create models directory: {}", e))?; + + let model_path = app_data_dir.join(format!("ggml-{}.bin", model_name)); + + // Check if model already exists + if model_path.exists() { + return Ok(model_path.to_string_lossy().to_string()); + } + + // Only download smaller models automatically + let allowed_models = ["tiny", "base", "small"]; + if !allowed_models.contains(&model_name) { + return Err(format!("Model '{}' is not available for automatic download. Only tiny, base, and small models are supported.", model_name)); + } + + println!("Downloading Whisper model: {}...", model_name); + + // Download the model from ggerganov's whisper.cpp repo + let url = format!("https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-{}.bin", model_name); + let response = ureq::get(&url) + .call() + .map_err(|e| format!("Failed to download model: {}", e))?; + + let len = response + .header("content-length") + .and_then(|s| s.parse::().ok()) + .unwrap_or(0); + + println!("Model size: {} bytes", len); + + let mut reader = response.into_reader(); + let mut file = fs::File::create(&model_path) + .map_err(|e| format!("Failed to create model file: {}", e))?; + + std::io::copy(&mut reader, &mut file) + .map_err(|e| format!("Failed to write model file: {}", e))?; + + println!("Model downloaded successfully: {}", model_path.display()); + + Ok(model_path.to_string_lossy().to_string()) +} +