2025-01-28 17:00:03 -05:00
|
|
|
from pathlib import Path
|
2026-02-18 10:26:09 -05:00
|
|
|
import tempfile
|
|
|
|
|
import os
|
|
|
|
|
import logging
|
2025-01-28 17:00:03 -05:00
|
|
|
|
2026-02-13 00:15:07 -05:00
|
|
|
try:
|
|
|
|
|
from moviepy import AudioFileClip
|
|
|
|
|
except ImportError:
|
|
|
|
|
from moviepy.editor import AudioFileClip
|
|
|
|
|
|
2026-02-18 10:26:09 -05:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
_temp_audio_files = []
|
|
|
|
|
|
|
|
|
|
|
2025-01-28 17:00:03 -05:00
|
|
|
def extract_audio(video_path: Path):
|
2026-02-18 10:26:09 -05:00
|
|
|
"""Extract audio from a video file into a temp directory for automatic cleanup."""
|
2026-03-28 12:26:45 -06:00
|
|
|
logger.info(f"[extract_audio] Extracting audio from: {video_path}")
|
2025-01-28 17:00:03 -05:00
|
|
|
try:
|
|
|
|
|
audio = AudioFileClip(str(video_path))
|
2026-03-28 12:26:45 -06:00
|
|
|
if audio.duration is None or audio.duration == 0:
|
|
|
|
|
logger.error(f"[extract_audio] File has no audio track or zero duration: {video_path}")
|
|
|
|
|
raise RuntimeError(f"File has no audio track: {video_path}")
|
|
|
|
|
logger.info(f"[extract_audio] Duration: {audio.duration:.2f}s, fps: {audio.fps}")
|
2026-02-18 10:26:09 -05:00
|
|
|
temp_dir = tempfile.mkdtemp(prefix="videotranscriber_")
|
|
|
|
|
audio_path = Path(temp_dir) / f"{video_path.stem}_audio.wav"
|
2026-03-03 02:10:52 -05:00
|
|
|
try:
|
|
|
|
|
audio.write_audiofile(str(audio_path), logger=None)
|
|
|
|
|
except TypeError:
|
|
|
|
|
# moviepy 1.x uses verbose parameter; moviepy 2.x removed it
|
|
|
|
|
audio.write_audiofile(str(audio_path), verbose=False, logger=None)
|
2026-02-13 00:15:07 -05:00
|
|
|
audio.close()
|
2026-03-28 12:26:45 -06:00
|
|
|
if not audio_path.exists() or audio_path.stat().st_size == 0:
|
|
|
|
|
logger.error(f"[extract_audio] Output WAV is empty or missing: {audio_path}")
|
|
|
|
|
raise RuntimeError(f"Audio extraction produced empty file: {audio_path}")
|
|
|
|
|
logger.info(f"[extract_audio] Extracted to: {audio_path} ({audio_path.stat().st_size} bytes)")
|
2026-02-18 10:26:09 -05:00
|
|
|
_temp_audio_files.append(str(audio_path))
|
2025-01-28 17:00:03 -05:00
|
|
|
return audio_path
|
2026-03-28 12:26:45 -06:00
|
|
|
except RuntimeError:
|
|
|
|
|
raise
|
2025-01-28 17:00:03 -05:00
|
|
|
except Exception as e:
|
2026-03-28 12:26:45 -06:00
|
|
|
logger.error(f"[extract_audio] Failed for '{video_path}': {e}", exc_info=True)
|
2025-01-28 17:00:03 -05:00
|
|
|
raise RuntimeError(f"Audio extraction failed: {e}")
|
2026-02-18 10:26:09 -05:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def cleanup_temp_audio():
|
|
|
|
|
"""Remove all temporary audio files created during processing."""
|
|
|
|
|
cleaned = 0
|
|
|
|
|
for fpath in _temp_audio_files:
|
|
|
|
|
try:
|
|
|
|
|
if os.path.exists(fpath):
|
|
|
|
|
os.remove(fpath)
|
|
|
|
|
parent = os.path.dirname(fpath)
|
|
|
|
|
if os.path.isdir(parent) and not os.listdir(parent):
|
|
|
|
|
os.rmdir(parent)
|
|
|
|
|
cleaned += 1
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.warning(f"Could not remove temp file {fpath}: {e}")
|
|
|
|
|
_temp_audio_files.clear()
|
|
|
|
|
return cleaned
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_video_duration(video_path: Path):
|
|
|
|
|
"""Get duration of a video/audio file in seconds."""
|
|
|
|
|
try:
|
|
|
|
|
clip = AudioFileClip(str(video_path))
|
|
|
|
|
duration = clip.duration
|
|
|
|
|
clip.close()
|
2026-03-28 12:26:45 -06:00
|
|
|
if duration is None or duration == 0:
|
|
|
|
|
logger.warning(f"[get_video_duration] Zero or null duration for: {video_path}")
|
2026-02-18 10:26:09 -05:00
|
|
|
return duration
|
2026-03-28 12:26:45 -06:00
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"[get_video_duration] Failed for '{video_path}': {e}", exc_info=True)
|
2026-02-18 10:26:09 -05:00
|
|
|
return None
|