307 lines
10 KiB
Python
307 lines
10 KiB
Python
|
|
#!/usr/bin/env python
|
||
|
|
"""
|
||
|
|
Installation script for OBS Recording Transcriber.
|
||
|
|
This script helps install all required dependencies and checks for common issues.
|
||
|
|
"""
|
||
|
|
|
||
|
|
import os
|
||
|
|
import sys
|
||
|
|
import platform
|
||
|
|
import subprocess
|
||
|
|
import shutil
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
def print_header(text):
|
||
|
|
"""Print a formatted header."""
|
||
|
|
print("\n" + "=" * 80)
|
||
|
|
print(f" {text}")
|
||
|
|
print("=" * 80)
|
||
|
|
|
||
|
|
def print_step(text):
|
||
|
|
"""Print a step in the installation process."""
|
||
|
|
print(f"\n>> {text}")
|
||
|
|
|
||
|
|
def run_command(command, check=True):
|
||
|
|
"""Run a shell command and return the result."""
|
||
|
|
try:
|
||
|
|
result = subprocess.run(
|
||
|
|
command,
|
||
|
|
shell=True,
|
||
|
|
check=check,
|
||
|
|
stdout=subprocess.PIPE,
|
||
|
|
stderr=subprocess.PIPE,
|
||
|
|
text=True
|
||
|
|
)
|
||
|
|
return result
|
||
|
|
except subprocess.CalledProcessError as e:
|
||
|
|
print(f"Error executing command: {command}")
|
||
|
|
print(f"Error message: {e.stderr}")
|
||
|
|
return None
|
||
|
|
|
||
|
|
def check_python_version():
|
||
|
|
"""Check if Python version is 3.8 or higher."""
|
||
|
|
print_step("Checking Python version")
|
||
|
|
version = sys.version_info
|
||
|
|
if version.major < 3 or (version.major == 3 and version.minor < 8):
|
||
|
|
print(f"Python 3.8 or higher is required. You have {sys.version}")
|
||
|
|
print("Please upgrade your Python installation.")
|
||
|
|
return False
|
||
|
|
print(f"Python version: {sys.version}")
|
||
|
|
return True
|
||
|
|
|
||
|
|
def check_ffmpeg():
|
||
|
|
"""Check if FFmpeg is installed."""
|
||
|
|
print_step("Checking FFmpeg installation")
|
||
|
|
result = shutil.which("ffmpeg")
|
||
|
|
if result is None:
|
||
|
|
print("FFmpeg not found in PATH.")
|
||
|
|
print("Please install FFmpeg:")
|
||
|
|
if platform.system() == "Windows":
|
||
|
|
print(" - Download from: https://www.gyan.dev/ffmpeg/builds/")
|
||
|
|
print(" - Extract and add the bin folder to your PATH")
|
||
|
|
elif platform.system() == "Darwin": # macOS
|
||
|
|
print(" - Install with Homebrew: brew install ffmpeg")
|
||
|
|
else: # Linux
|
||
|
|
print(" - Install with apt: sudo apt update && sudo apt install ffmpeg")
|
||
|
|
return False
|
||
|
|
|
||
|
|
# Check FFmpeg version
|
||
|
|
version_result = run_command("ffmpeg -version")
|
||
|
|
if version_result:
|
||
|
|
print(f"FFmpeg is installed: {version_result.stdout.splitlines()[0]}")
|
||
|
|
return True
|
||
|
|
return False
|
||
|
|
|
||
|
|
def check_gpu():
|
||
|
|
"""Check for GPU availability."""
|
||
|
|
print_step("Checking GPU availability")
|
||
|
|
|
||
|
|
# Check for NVIDIA GPU
|
||
|
|
if platform.system() == "Windows":
|
||
|
|
nvidia_smi = shutil.which("nvidia-smi")
|
||
|
|
if nvidia_smi:
|
||
|
|
result = run_command("nvidia-smi", check=False)
|
||
|
|
if result and result.returncode == 0:
|
||
|
|
print("NVIDIA GPU detected:")
|
||
|
|
for line in result.stdout.splitlines()[:10]:
|
||
|
|
print(f" {line}")
|
||
|
|
return "nvidia"
|
||
|
|
|
||
|
|
# Check for Apple Silicon
|
||
|
|
if platform.system() == "Darwin" and platform.machine() == "arm64":
|
||
|
|
print("Apple Silicon (M1/M2) detected")
|
||
|
|
return "apple"
|
||
|
|
|
||
|
|
print("No GPU detected or GPU drivers not installed. CPU will be used for processing.")
|
||
|
|
return "cpu"
|
||
|
|
|
||
|
|
def setup_virtual_env():
|
||
|
|
"""Set up a virtual environment."""
|
||
|
|
print_step("Setting up virtual environment")
|
||
|
|
|
||
|
|
# Check if venv module is available
|
||
|
|
try:
|
||
|
|
import venv
|
||
|
|
print("Python venv module is available")
|
||
|
|
except ImportError:
|
||
|
|
print("Python venv module is not available. Please install it.")
|
||
|
|
return False
|
||
|
|
|
||
|
|
# Create virtual environment if it doesn't exist
|
||
|
|
venv_path = Path("venv")
|
||
|
|
if venv_path.exists():
|
||
|
|
print(f"Virtual environment already exists at {venv_path}")
|
||
|
|
activate_venv()
|
||
|
|
return True
|
||
|
|
|
||
|
|
print(f"Creating virtual environment at {venv_path}")
|
||
|
|
try:
|
||
|
|
subprocess.run([sys.executable, "-m", "venv", "venv"], check=True)
|
||
|
|
print("Virtual environment created successfully")
|
||
|
|
activate_venv()
|
||
|
|
return True
|
||
|
|
except subprocess.CalledProcessError as e:
|
||
|
|
print(f"Error creating virtual environment: {e}")
|
||
|
|
return False
|
||
|
|
|
||
|
|
def activate_venv():
|
||
|
|
"""Activate the virtual environment."""
|
||
|
|
print_step("Activating virtual environment")
|
||
|
|
|
||
|
|
venv_path = Path("venv")
|
||
|
|
if not venv_path.exists():
|
||
|
|
print("Virtual environment not found")
|
||
|
|
return False
|
||
|
|
|
||
|
|
# Get the path to the activate script
|
||
|
|
if platform.system() == "Windows":
|
||
|
|
activate_script = venv_path / "Scripts" / "activate.bat"
|
||
|
|
activate_cmd = f"call {activate_script}"
|
||
|
|
else:
|
||
|
|
activate_script = venv_path / "bin" / "activate"
|
||
|
|
activate_cmd = f"source {activate_script}"
|
||
|
|
|
||
|
|
print(f"To activate the virtual environment, run:")
|
||
|
|
print(f" {activate_cmd}")
|
||
|
|
|
||
|
|
# We can't actually activate the venv in this script because it would only
|
||
|
|
# affect the subprocess, not the parent process. We just provide instructions.
|
||
|
|
return True
|
||
|
|
|
||
|
|
def install_pytorch(gpu_type):
|
||
|
|
"""Install PyTorch with appropriate GPU support."""
|
||
|
|
print_step("Installing PyTorch")
|
||
|
|
|
||
|
|
if gpu_type == "nvidia":
|
||
|
|
print("Installing PyTorch with CUDA support")
|
||
|
|
cmd = "pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118"
|
||
|
|
elif gpu_type == "apple":
|
||
|
|
print("Installing PyTorch with MPS support")
|
||
|
|
cmd = "pip install torch torchvision torchaudio"
|
||
|
|
else:
|
||
|
|
print("Installing PyTorch (CPU version)")
|
||
|
|
cmd = "pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu"
|
||
|
|
|
||
|
|
result = run_command(cmd)
|
||
|
|
if result and result.returncode == 0:
|
||
|
|
print("PyTorch installed successfully")
|
||
|
|
return True
|
||
|
|
else:
|
||
|
|
print("Failed to install PyTorch")
|
||
|
|
return False
|
||
|
|
|
||
|
|
def install_dependencies():
|
||
|
|
"""Install dependencies from requirements.txt."""
|
||
|
|
print_step("Installing dependencies from requirements.txt")
|
||
|
|
|
||
|
|
requirements_path = Path("requirements.txt")
|
||
|
|
if not requirements_path.exists():
|
||
|
|
print("requirements.txt not found")
|
||
|
|
return False
|
||
|
|
|
||
|
|
result = run_command("pip install -r requirements.txt")
|
||
|
|
if result and result.returncode == 0:
|
||
|
|
print("Dependencies installed successfully")
|
||
|
|
return True
|
||
|
|
else:
|
||
|
|
print("Some dependencies failed to install. See error messages above.")
|
||
|
|
return False
|
||
|
|
|
||
|
|
def install_tokenizers():
|
||
|
|
"""Install tokenizers package separately."""
|
||
|
|
print_step("Installing tokenizers package")
|
||
|
|
|
||
|
|
# First try the normal installation
|
||
|
|
result = run_command("pip install tokenizers", check=False)
|
||
|
|
if result and result.returncode == 0:
|
||
|
|
print("Tokenizers installed successfully")
|
||
|
|
return True
|
||
|
|
|
||
|
|
# If that fails, try the no-binary option
|
||
|
|
print("Standard installation failed, trying alternative method...")
|
||
|
|
result = run_command("pip install tokenizers --no-binary tokenizers", check=False)
|
||
|
|
if result and result.returncode == 0:
|
||
|
|
print("Tokenizers installed successfully with alternative method")
|
||
|
|
return True
|
||
|
|
|
||
|
|
print("Failed to install tokenizers. You may need to install Rust or Visual C++ Build Tools.")
|
||
|
|
if platform.system() == "Windows":
|
||
|
|
print("Download Visual C++ Build Tools: https://visualstudio.microsoft.com/visual-cpp-build-tools/")
|
||
|
|
print("Install Rust: https://rustup.rs/")
|
||
|
|
return False
|
||
|
|
|
||
|
|
def check_installation():
|
||
|
|
"""Verify the installation by importing key packages."""
|
||
|
|
print_step("Verifying installation")
|
||
|
|
|
||
|
|
packages_to_check = [
|
||
|
|
"streamlit",
|
||
|
|
"torch",
|
||
|
|
"transformers",
|
||
|
|
"whisper",
|
||
|
|
"numpy",
|
||
|
|
"sklearn"
|
||
|
|
]
|
||
|
|
|
||
|
|
all_successful = True
|
||
|
|
for package in packages_to_check:
|
||
|
|
try:
|
||
|
|
__import__(package)
|
||
|
|
print(f"✓ {package} imported successfully")
|
||
|
|
except ImportError:
|
||
|
|
print(f"✗ Failed to import {package}")
|
||
|
|
all_successful = False
|
||
|
|
|
||
|
|
# Check optional packages
|
||
|
|
optional_packages = [
|
||
|
|
"pyannote.audio",
|
||
|
|
"iso639"
|
||
|
|
]
|
||
|
|
|
||
|
|
print("\nChecking optional packages:")
|
||
|
|
for package in optional_packages:
|
||
|
|
try:
|
||
|
|
if package == "pyannote.audio":
|
||
|
|
# Just try to import pyannote
|
||
|
|
__import__("pyannote")
|
||
|
|
else:
|
||
|
|
__import__(package)
|
||
|
|
print(f"✓ {package} imported successfully")
|
||
|
|
except ImportError:
|
||
|
|
print(f"⚠ {package} not available (required for some advanced features)")
|
||
|
|
|
||
|
|
return all_successful
|
||
|
|
|
||
|
|
def main():
|
||
|
|
"""Main installation function."""
|
||
|
|
print_header("OBS Recording Transcriber - Installation Script")
|
||
|
|
|
||
|
|
# Check prerequisites
|
||
|
|
if not check_python_version():
|
||
|
|
return
|
||
|
|
|
||
|
|
ffmpeg_available = check_ffmpeg()
|
||
|
|
gpu_type = check_gpu()
|
||
|
|
|
||
|
|
# Setup environment
|
||
|
|
if not setup_virtual_env():
|
||
|
|
print("Failed to set up virtual environment. Continuing with system Python...")
|
||
|
|
|
||
|
|
# Install packages
|
||
|
|
print("\nReady to install packages. Make sure your virtual environment is activated.")
|
||
|
|
input("Press Enter to continue...")
|
||
|
|
|
||
|
|
install_pytorch(gpu_type)
|
||
|
|
install_dependencies()
|
||
|
|
install_tokenizers()
|
||
|
|
|
||
|
|
# Verify installation
|
||
|
|
success = check_installation()
|
||
|
|
|
||
|
|
print_header("Installation Summary")
|
||
|
|
print(f"Python: {'✓ OK' if check_python_version() else '✗ Needs upgrade'}")
|
||
|
|
print(f"FFmpeg: {'✓ Installed' if ffmpeg_available else '✗ Not found'}")
|
||
|
|
print(f"GPU Support: {gpu_type.upper()}")
|
||
|
|
print(f"Dependencies: {'✓ Installed' if success else '⚠ Some issues'}")
|
||
|
|
|
||
|
|
print("\nNext steps:")
|
||
|
|
if not ffmpeg_available:
|
||
|
|
print("1. Install FFmpeg (required for audio processing)")
|
||
|
|
|
||
|
|
print("1. Activate your virtual environment:")
|
||
|
|
if platform.system() == "Windows":
|
||
|
|
print(" venv\\Scripts\\activate")
|
||
|
|
else:
|
||
|
|
print(" source venv/bin/activate")
|
||
|
|
|
||
|
|
print("2. Run the application:")
|
||
|
|
print(" streamlit run app.py")
|
||
|
|
|
||
|
|
print("\nFor advanced features like speaker diarization:")
|
||
|
|
print("1. Get a HuggingFace token: https://huggingface.co/settings/tokens")
|
||
|
|
print("2. Request access to pyannote models: https://huggingface.co/pyannote/speaker-diarization-3.0")
|
||
|
|
|
||
|
|
print("\nSee INSTALLATION.md for more details and troubleshooting.")
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|