changed to python312

This commit is contained in:
2026-03-28 12:26:45 -06:00
parent 4a857d8cbf
commit 2ffc406b10
9 changed files with 443 additions and 64 deletions

30
.gitignore vendored
View File

@ -6,10 +6,15 @@ frontend/dist/
# Python
venv/
.venv312/
__pycache__/
*.pyc
*.pyo
*.egg-info/
.pytest_cache/
.mypy_cache/
.coverage
htmlcov/
# IDE / Editor
.vscode/
@ -18,16 +23,33 @@ __pycache__/
# OS files
.env
.env.local
.env.*.local
.DS_Store
Thumbs.db
*.swp
*.tmp
# Logs
*.log
logs/
# Lock files (root only — frontend lock is committed)
/package-lock.json
# Electron build output
# Build output
frontend/dist/
dist/
build/
*.asar
target/
src-tauri/target/
# Node.js
node_modules/
npm-debug.log*
yarn-debug.log*
yarn-error.log*
# Rust
Cargo.lock
# Lock files (root only — frontend lock is committed)
/package-lock.json

View File

@ -61,18 +61,37 @@ async def serve_local_file(request: Request, path: str = Query(...)):
"""Stream a local file with HTTP Range support (required for video seeking)."""
file_path = Path(path)
if not file_path.is_file():
logger.warning(f"[serve_file] File not found: {path}")
raise HTTPException(status_code=404, detail=f"File not found: {path}")
file_size = file_path.stat().st_size
content_type = MIME_MAP.get(file_path.suffix.lower(), "application/octet-stream")
range_header = request.headers.get("range")
logger.info(
f"[serve_file] {file_path.name} | size={file_size} | "
f"type={content_type} | range={range_header or 'none'}"
)
if content_type == "application/octet-stream":
logger.warning(
f"[serve_file] Unknown MIME type for extension '{file_path.suffix}'"
f"browser may fail to decode audio/video for '{file_path.name}'"
)
if file_size == 0:
logger.error(f"[serve_file] File is empty: {path}")
raise HTTPException(status_code=422, detail=f"File is empty: {path}")
if range_header:
try:
range_spec = range_header.replace("bytes=", "")
range_start_str, range_end_str = range_spec.split("-")
range_start = int(range_start_str) if range_start_str else 0
range_end = int(range_end_str) if range_end_str else file_size - 1
range_end = min(range_end, file_size - 1)
except (ValueError, TypeError) as e:
logger.error(f"[serve_file] Malformed Range header '{range_header}': {e}")
raise HTTPException(status_code=416, detail=f"Invalid Range header: {range_header}")
content_length = range_end - range_start + 1
def iter_range():

View File

@ -1,33 +1,164 @@
# FastAPI backend
fastapi>=0.115.0
uvicorn[standard]>=0.32.0
websockets>=14.0
python-multipart>=0.0.12
# Transcription (WhisperX for word-level alignment)
whisperx>=3.1.0
faster-whisper>=1.0.0
# Audio / Video processing
moviepy>=1.0.3
ffmpeg-python>=0.2.0
soundfile>=0.10.3
# ML / GPU
torch>=2.0.0
torchaudio>=2.0.0
numpy>=1.24.0
# Speaker diarization
pyannote.audio>=3.1.1
# AI providers
openai>=1.50.0
anthropic>=0.39.0
requests>=2.28.0
# Audio cleanup
deepfilternet>=0.5.0
# Utilities
pydantic>=2.0.0
aiohappyeyeballs==2.6.1
aiohttp==3.13.4
aiosignal==1.4.0
alembic==1.18.4
annotated-doc==0.0.4
annotated-types==0.7.0
anthropic==0.86.0
antlr4-python3-runtime==4.9.3
anyio==4.13.0
appdirs==1.4.4
asteroid-filterbanks==0.4.0
attrs==26.1.0
av==17.0.0
certifi==2026.2.25
cffi==2.0.0
charset-normalizer==3.4.6
click==8.3.1
colorlog==6.10.1
contourpy==1.3.3
ctranslate2==4.7.1
cuda-bindings==12.9.4
cuda-pathfinder==1.2.2
cuda-toolkit==12.6.3
cycler==0.12.1
Cython==0.29.37
decorator==5.2.1
DeepFilterLib==0.5.6
DeepFilterNet==0.5.6
distro==1.9.0
docstring_parser==0.17.0
einops==0.8.2
fastapi==0.135.2
faster-whisper==1.2.1
ffmpeg-python==0.2.0
filelock==3.25.2
flatbuffers==25.12.19
fonttools==4.62.1
frozenlist==1.8.0
fsspec==2026.2.0
future==1.0.0
googleapis-common-protos==1.73.1
greenlet==3.3.2
grpcio==1.78.0
h11==0.16.0
hf-xet==1.4.2
httpcore==1.0.9
httptools==0.7.1
httpx==0.28.1
huggingface_hub==0.36.2
HyperPyYAML==1.2.3
idna==3.11
ImageIO==2.37.3
imageio-ffmpeg==0.6.0
importlib_metadata==8.7.1
Jinja2==3.1.6
jiter==0.13.0
joblib==1.5.3
julius==0.2.7
kiwisolver==1.5.0
lightning==2.6.1
lightning-utilities==0.15.3
loguru==0.7.3
Mako==1.3.10
markdown-it-py==4.0.0
MarkupSafe==3.0.3
matplotlib==3.10.8
maturin==1.12.6
mdurl==0.1.2
moviepy==2.2.1
mpmath==1.3.0
multidict==6.7.1
networkx==3.6.1
nltk==3.9.4
numpy==2.4.3
nvidia-cublas-cu12==12.8.4.1
nvidia-cuda-cupti-cu12==12.8.90
nvidia-cuda-nvrtc-cu12==12.8.93
nvidia-cuda-runtime-cu12==12.8.90
nvidia-cudnn-cu12==9.10.2.21
nvidia-cufft-cu12==11.3.3.83
nvidia-cufile-cu12==1.13.1.3
nvidia-curand-cu12==10.3.9.90
nvidia-cusolver-cu12==11.7.3.90
nvidia-cusparse-cu12==12.5.8.93
nvidia-cusparselt-cu12==0.7.1
nvidia-nccl-cu12==2.27.3
nvidia-nvjitlink-cu12==12.8.93
nvidia-nvshmem-cu12==3.4.5
nvidia-nvtx-cu12==12.8.90
omegaconf==2.3.0
onnxruntime==1.24.4
openai==2.30.0
opentelemetry-api==1.40.0
opentelemetry-exporter-otlp==1.40.0
opentelemetry-exporter-otlp-proto-common==1.40.0
opentelemetry-exporter-otlp-proto-grpc==1.40.0
opentelemetry-exporter-otlp-proto-http==1.40.0
opentelemetry-proto==1.40.0
opentelemetry-sdk==1.40.0
opentelemetry-semantic-conventions==0.61b0
optuna==4.8.0
packaging==23.2
pandas==3.0.1
pillow==11.3.0
primePy==1.3
proglog==0.1.12
propcache==0.4.1
protobuf==6.33.6
pyannote-audio==4.0.4
pyannote-core==6.0.1
pyannote-database==6.1.1
pyannote-metrics==4.0.0
pyannote-pipeline==4.0.0
pyannoteai-sdk==0.4.0
pycparser==3.0
pydantic==2.12.5
pydantic_core==2.41.5
Pygments==2.19.2
pyparsing==3.3.2
python-dateutil==2.9.0.post0
python-dotenv==1.2.2
python-multipart==0.0.22
pytorch-lightning==2.6.1
pytorch-metric-learning==2.9.0
PyYAML==6.0.3
regex==2026.2.28
requests==2.33.0
rich==14.3.3
ruamel.yaml==0.18.17
ruamel.yaml.clib==0.2.15
safetensors==0.7.0
scikit-learn==1.8.0
scipy==1.17.1
setuptools==70.2.0
shellingham==1.5.4
six==1.17.0
sniffio==1.3.1
sortedcontainers==2.4.0
soundfile==0.13.1
SQLAlchemy==2.0.48
starlette==1.0.0
sympy==1.14.0
threadpoolctl==3.6.0
tokenizers==0.22.2
torch==2.8.0
torch-audiomentations==0.12.0
torch_pitch_shift==1.2.5
torchaudio==2.8.0
torchmetrics==1.9.0
tqdm==4.67.3
transformers==4.57.6
triton==3.4.0
typer==0.24.1
typing-inspection==0.4.2
typing_extensions==4.15.0
urllib3==2.6.3
uvicorn==0.42.0
uvloop==0.22.1
watchfiles==1.1.1
websockets==16.0
wheel==0.46.3
whisperx==3.8.4
yarl==1.23.0
zipp==3.23.0

View File

@ -1,9 +1,14 @@
"""Audio processing endpoint (noise reduction / Studio Sound)."""
import hashlib
import logging
import subprocess
import tempfile
from pathlib import Path
from typing import Optional
from fastapi import APIRouter, HTTPException
from fastapi import APIRouter, HTTPException, Query
from fastapi.responses import FileResponse
from pydantic import BaseModel
from services.audio_cleaner import clean_audio, is_deepfilter_available
@ -11,6 +16,9 @@ from services.audio_cleaner import clean_audio, is_deepfilter_available
logger = logging.getLogger(__name__)
router = APIRouter()
# Simple in-process cache: video path → extracted WAV path
_waveform_cache: dict[str, str] = {}
class AudioCleanRequest(BaseModel):
input_path: str
@ -36,3 +44,58 @@ async def audio_capabilities():
return {
"deepfilternet_available": is_deepfilter_available(),
}
@router.get("/audio/waveform")
async def get_waveform_audio(path: str = Query(...)):
"""
Extract audio from any video/audio file and return it as a WAV.
The WAV is cached on disk for subsequent requests.
Uses FFmpeg directly so it works with MKV, MOV, AVI, MP4, etc.
"""
file_path = Path(path)
if not file_path.is_file():
logger.warning(f"[waveform] File not found: {path}")
raise HTTPException(status_code=404, detail=f"File not found: {path}")
# Cache key based on path + mtime so stale cache is auto-invalidated
mtime = file_path.stat().st_mtime
cache_key = hashlib.md5(f"{path}:{mtime}".encode()).hexdigest()
if cache_key in _waveform_cache:
cached = Path(_waveform_cache[cache_key])
if cached.exists():
logger.info(f"[waveform] Cache hit for {file_path.name}")
return FileResponse(str(cached), media_type="audio/wav")
else:
del _waveform_cache[cache_key]
logger.info(f"[waveform] Extracting audio from: {file_path.name}")
tmp_dir = tempfile.mkdtemp(prefix="talkedit_waveform_")
out_wav = Path(tmp_dir) / f"{cache_key}.wav"
# Downsample to mono 22050 Hz — enough for waveform drawing, small file
cmd = [
"ffmpeg", "-y",
"-i", str(file_path),
"-vn", # drop video
"-ac", "1", # mono
"-ar", "22050", # 22 kHz sample rate
"-acodec", "pcm_s16le", # 16-bit PCM WAV
str(out_wav),
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
logger.error(f"[waveform] FFmpeg failed for {file_path.name}: {result.stderr[-500:]}")
raise HTTPException(
status_code=500,
detail=f"Failed to extract audio: {result.stderr[-300:]}"
)
if not out_wav.exists() or out_wav.stat().st_size == 0:
logger.error(f"[waveform] FFmpeg produced empty WAV for {file_path.name}")
raise HTTPException(status_code=500, detail="Audio extraction produced empty file")
logger.info(f"[waveform] Extracted {out_wav.stat().st_size} bytes for {file_path.name}")
_waveform_cache[cache_key] = str(out_wav)
return FileResponse(str(out_wav), media_type="audio/wav")

View File

@ -15,8 +15,13 @@ _temp_audio_files = []
def extract_audio(video_path: Path):
"""Extract audio from a video file into a temp directory for automatic cleanup."""
logger.info(f"[extract_audio] Extracting audio from: {video_path}")
try:
audio = AudioFileClip(str(video_path))
if audio.duration is None or audio.duration == 0:
logger.error(f"[extract_audio] File has no audio track or zero duration: {video_path}")
raise RuntimeError(f"File has no audio track: {video_path}")
logger.info(f"[extract_audio] Duration: {audio.duration:.2f}s, fps: {audio.fps}")
temp_dir = tempfile.mkdtemp(prefix="videotranscriber_")
audio_path = Path(temp_dir) / f"{video_path.stem}_audio.wav"
try:
@ -25,9 +30,16 @@ def extract_audio(video_path: Path):
# moviepy 1.x uses verbose parameter; moviepy 2.x removed it
audio.write_audiofile(str(audio_path), verbose=False, logger=None)
audio.close()
if not audio_path.exists() or audio_path.stat().st_size == 0:
logger.error(f"[extract_audio] Output WAV is empty or missing: {audio_path}")
raise RuntimeError(f"Audio extraction produced empty file: {audio_path}")
logger.info(f"[extract_audio] Extracted to: {audio_path} ({audio_path.stat().st_size} bytes)")
_temp_audio_files.append(str(audio_path))
return audio_path
except RuntimeError:
raise
except Exception as e:
logger.error(f"[extract_audio] Failed for '{video_path}': {e}", exc_info=True)
raise RuntimeError(f"Audio extraction failed: {e}")
@ -54,6 +66,9 @@ def get_video_duration(video_path: Path):
clip = AudioFileClip(str(video_path))
duration = clip.duration
clip.close()
if duration is None or duration == 0:
logger.warning(f"[get_video_duration] Zero or null duration for: {video_path}")
return duration
except Exception:
except Exception as e:
logger.error(f"[get_video_duration] Failed for '{video_path}': {e}", exc_info=True)
return None

View File

@ -10,6 +10,7 @@ export default function WaveformTimeline() {
const videoUrl = useEditorStore((s) => s.videoUrl);
const videoPath = useEditorStore((s) => s.videoPath);
const backendUrl = useEditorStore((s) => s.backendUrl);
const duration = useEditorStore((s) => s.duration);
const deletedRanges = useEditorStore((s) => s.deletedRanges);
const setCurrentTime = useEditorStore((s) => s.setCurrentTime);
@ -25,18 +26,62 @@ export default function WaveformTimeline() {
const loadAudio = async () => {
try {
const waveformUrl = `${backendUrl}/audio/waveform?path=${encodeURIComponent(videoPath!)}`;
console.log('[WaveformTimeline] Loading audio from waveform endpoint:', waveformUrl);
const ctx = new AudioContext();
audioContextRef.current = ctx;
const response = await fetch(videoUrl);
if (!response.ok) throw new Error(`HTTP ${response.status}`);
const response = await fetch(waveformUrl);
if (!response.ok) {
const body = await response.text().catch(() => '');
console.error(
`[WaveformTimeline] Fetch failed — HTTP ${response.status} ${response.statusText}`,
{ url: waveformUrl, body }
);
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
const contentType = response.headers.get('content-type') ?? 'unknown';
const contentLength = response.headers.get('content-length');
console.log(
`[WaveformTimeline] Fetch OK — content-type: ${contentType}, size: ${contentLength ?? 'unknown'} bytes`
);
const arrayBuffer = await response.arrayBuffer();
const audioBuffer = await ctx.decodeAudioData(arrayBuffer);
console.log(`[WaveformTimeline] ArrayBuffer size: ${arrayBuffer.byteLength} bytes`);
if (arrayBuffer.byteLength === 0) {
throw new Error('Server returned an empty file');
}
let audioBuffer: AudioBuffer;
try {
audioBuffer = await ctx.decodeAudioData(arrayBuffer);
} catch (decodeErr) {
console.error(
'[WaveformTimeline] decodeAudioData failed — browser cannot decode this format.',
{
contentType,
byteLength: arrayBuffer.byteLength,
videoPath,
error: decodeErr,
}
);
throw new Error(
`Browser could not decode audio (${contentType}). ` +
`For best compatibility use MP4/AAC or WebM/Opus. Raw error: ${decodeErr}`
);
}
console.log(
`[WaveformTimeline] Decoded OK — duration: ${audioBuffer.duration.toFixed(2)}s, ` +
`channels: ${audioBuffer.numberOfChannels}, sampleRate: ${audioBuffer.sampleRate}Hz`
);
audioBufferRef.current = audioBuffer;
drawStaticWaveform();
} catch (err) {
console.warn('Could not decode audio for waveform:', err);
setAudioError('Waveform unavailable — audio could not be decoded');
console.error('[WaveformTimeline] Waveform load failed:', err);
setAudioError(`Waveform unavailable — ${err instanceof Error ? err.message : 'audio could not be decoded'}`);
}
};
@ -45,7 +90,7 @@ export default function WaveformTimeline() {
return () => {
audioContextRef.current?.close();
};
}, [videoUrl, videoPath]);
}, [videoUrl, videoPath, backendUrl]);
const drawStaticWaveform = useCallback(() => {
const canvas = waveCanvasRef.current;

46
open
View File

@ -1,4 +1,50 @@
#!/bin/bash
# Open TalkEdit app (Tauri dev mode)
cd "$(dirname "$0")"
PROJECT_DIR="$PWD"
BACKEND_PORT=8000
BACKEND_URL="http://127.0.0.1:${BACKEND_PORT}/health"
# Check if backend is already running
if curl -sf "$BACKEND_URL" > /dev/null 2>&1; then
echo "Backend already running on port ${BACKEND_PORT}."
else
echo "Backend not running — starting it in a new terminal..."
VENV_PYTHON="${PROJECT_DIR}/.venv312/bin/python"
BACKEND_DIR="${PROJECT_DIR}/backend"
# Try common terminal emulators in order
if command -v ghostty &>/dev/null; then
ghostty -e bash -c "cd '${BACKEND_DIR}' && '${VENV_PYTHON}' -m uvicorn main:app --host 127.0.0.1 --port ${BACKEND_PORT}; exec bash" &
elif command -v kitty &>/dev/null; then
kitty --title "TalkEdit Backend" -- bash -c "cd '${BACKEND_DIR}' && '${VENV_PYTHON}' -m uvicorn main:app --host 127.0.0.1 --port ${BACKEND_PORT}; exec bash" &
elif command -v alacritty &>/dev/null; then
alacritty --title "TalkEdit Backend" -e bash -c "cd '${BACKEND_DIR}' && '${VENV_PYTHON}' -m uvicorn main:app --host 127.0.0.1 --port ${BACKEND_PORT}; exec bash" &
elif command -v konsole &>/dev/null; then
konsole --new-tab -e bash -c "cd '${BACKEND_DIR}' && '${VENV_PYTHON}' -m uvicorn main:app --host 127.0.0.1 --port ${BACKEND_PORT}; exec bash" &
elif command -v gnome-terminal &>/dev/null; then
gnome-terminal --title "TalkEdit Backend" -- bash -c "cd '${BACKEND_DIR}' && '${VENV_PYTHON}' -m uvicorn main:app --host 127.0.0.1 --port ${BACKEND_PORT}; exec bash" &
elif command -v xterm &>/dev/null; then
xterm -T "TalkEdit Backend" -e bash -c "cd '${BACKEND_DIR}' && '${VENV_PYTHON}' -m uvicorn main:app --host 127.0.0.1 --port ${BACKEND_PORT}; exec bash" &
else
echo "No supported terminal emulator found. Starting backend in background..."
cd "${BACKEND_DIR}" && "${VENV_PYTHON}" -m uvicorn main:app --host 127.0.0.1 --port ${BACKEND_PORT} &
fi
# Wait up to 15s for backend to become ready
echo -n "Waiting for backend"
for i in $(seq 1 15); do
sleep 1
echo -n "."
if curl -sf "$BACKEND_URL" > /dev/null 2>&1; then
echo " ready!"
break
fi
if [[ $i -eq 15 ]]; then
echo " timed out. Check the backend terminal for errors."
fi
done
fi
npx tauri dev

View File

@ -23,8 +23,8 @@
"python-shell": "^5.0.0"
},
"build": {
"appId": "com.dataants.cutscript",
"productName": "CutScript",
"appId": "com.talkedit.app",
"productName": "TalkEdit",
"files": [
"electron/**/*",
"frontend/dist/**/*",

View File

@ -1,22 +1,60 @@
use std::path::PathBuf;
/// Resolve the project root from the executable path.
/// In dev mode, the binary lives at: <root>/src-tauri/target/debug/<bin>
/// So the project root is 4 levels above the binary.
/// Resolve the project root at runtime.
///
/// Dev layout: <root>/src-tauri/target/debug/<bin> → walk up 4 levels
/// Packaged: use TAURI_RESOURCE_DIR env var set by the Tauri runtime,
/// falling back to a sibling `resources/` directory next to the exe.
pub fn project_root() -> PathBuf {
// Tauri sets this env var when running packaged; prefer it.
if let Ok(res) = std::env::var("TAURI_RESOURCE_DIR") {
return PathBuf::from(res);
}
let exe = std::env::current_exe().expect("Failed to get executable path");
// exe -> debug/ -> target/ -> src-tauri/ -> root
// Dev: exe is at <root>/src-tauri/target/debug/<bin>, walk up 4 levels.
if let Some(root) = exe
.parent() // debug/
.and_then(|p| p.parent()) // target/
.and_then(|p| p.parent()) // src-tauri/
.and_then(|p| p.parent()) // project root
{
if root.join("backend").exists() {
return root.to_path_buf();
}
}
// Packaged fallback: resources/ lives next to the exe.
exe.parent()
.and_then(|p| p.parent())
.and_then(|p| p.parent())
.and_then(|p| p.parent())
.map(|p| p.to_path_buf())
.unwrap_or_else(|| PathBuf::from("."))
.map(|p| p.join("resources"))
.unwrap_or_else(|| PathBuf::from("resources"))
}
/// Absolute path to the venv Python 3.10 interpreter.
/// Absolute path to the bundled Python interpreter.
/// Tries .venv312 first (new), falls back to .venv (legacy).
pub fn python_exe() -> PathBuf {
project_root().join(".venv/bin/python3.10")
let root = project_root();
// Packaged layout: resources/python/bin/python3
let bundled = root.join("python").join("bin").join("python3");
if bundled.exists() {
return bundled;
}
// Dev: prefer .venv312 (Python 3.12), fall back to .venv
let venv312 = root.join(".venv312").join("bin").join("python3.12");
if venv312.exists() {
return venv312;
}
root.join(".venv").join("bin").join("python3")
}
/// Absolute path to the bundled ffmpeg binary.
/// Uses a sidecar in resources/bin/ when packaged, otherwise expects it on PATH.
pub fn ffmpeg_exe() -> PathBuf {
let root = project_root();
let bundled = root.join("bin").join("ffmpeg");
if bundled.exists() {
return bundled;
}
// Fallback to system ffmpeg during development
PathBuf::from("ffmpeg")
}
/// Absolute path to a script in the backend directory.