""" GPU utilities for the OBS Recording Transcriber. Provides functions to detect and configure GPU acceleration. """ import logging import os import platform import subprocess import torch # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def get_gpu_info(): """ Get information about available GPUs. Returns: dict: Information about available GPUs """ gpu_info = { "cuda_available": torch.cuda.is_available(), "cuda_device_count": torch.cuda.device_count() if torch.cuda.is_available() else 0, "cuda_devices": [], "mps_available": hasattr(torch.backends, "mps") and torch.backends.mps.is_available() } # Get CUDA device information if gpu_info["cuda_available"]: for i in range(gpu_info["cuda_device_count"]): device_props = torch.cuda.get_device_properties(i) gpu_info["cuda_devices"].append({ "index": i, "name": device_props.name, "total_memory": device_props.total_memory, "compute_capability": f"{device_props.major}.{device_props.minor}" }) return gpu_info def get_optimal_device(): """ Get the optimal device for computation. Returns: torch.device: The optimal device (cuda, mps, or cpu) """ if torch.cuda.is_available(): # If multiple GPUs are available, select the one with the most memory if torch.cuda.device_count() > 1: max_memory = 0 best_device = 0 for i in range(torch.cuda.device_count()): device_props = torch.cuda.get_device_properties(i) if device_props.total_memory > max_memory: max_memory = device_props.total_memory best_device = i return torch.device(f"cuda:{best_device}") return torch.device("cuda:0") elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): return torch.device("mps") else: return torch.device("cpu") def set_memory_limits(memory_fraction=0.8): """ Set memory limits for GPU usage. Args: memory_fraction (float): Fraction of GPU memory to use (0.0 to 1.0) Returns: bool: True if successful, False otherwise """ if not torch.cuda.is_available(): return False try: # Import only if CUDA is available import torch.cuda # Set memory fraction for each device for i in range(torch.cuda.device_count()): torch.cuda.set_per_process_memory_fraction(memory_fraction, i) return True except Exception as e: logger.error(f"Error setting memory limits: {e}") return False def optimize_for_inference(): """ Apply optimizations for inference. Returns: bool: True if successful, False otherwise """ try: # Set deterministic algorithms for reproducibility torch.backends.cudnn.deterministic = True # Enable cuDNN benchmark mode for optimized performance torch.backends.cudnn.benchmark = True # Disable gradient calculation for inference torch.set_grad_enabled(False) return True except Exception as e: logger.error(f"Error optimizing for inference: {e}") return False def get_recommended_batch_size(model_size="base"): """ Get recommended batch size based on available GPU memory. Args: model_size (str): Size of the model (tiny, base, small, medium, large) Returns: int: Recommended batch size """ # Default batch sizes for CPU default_batch_sizes = { "tiny": 16, "base": 8, "small": 4, "medium": 2, "large": 1 } # If CUDA is not available, return default CPU batch size if not torch.cuda.is_available(): return default_batch_sizes.get(model_size, 1) # Approximate memory requirements in GB for different model sizes memory_requirements = { "tiny": 1, "base": 2, "small": 4, "medium": 8, "large": 16 } # Get available GPU memory device = get_optimal_device() if device.type == "cuda": device_idx = device.index device_props = torch.cuda.get_device_properties(device_idx) available_memory_gb = device_props.total_memory / (1024 ** 3) # Calculate batch size based on available memory model_memory = memory_requirements.get(model_size, 2) max_batch_size = int(available_memory_gb / model_memory) # Ensure batch size is at least 1 return max(1, max_batch_size) # For MPS or other devices, return default return default_batch_sizes.get(model_size, 1) def configure_gpu(model_size="base", memory_fraction=0.8): """ Configure GPU settings for optimal performance. Args: model_size (str): Size of the model (tiny, base, small, medium, large) memory_fraction (float): Fraction of GPU memory to use (0.0 to 1.0) Returns: dict: Configuration information """ gpu_info = get_gpu_info() device = get_optimal_device() # Set memory limits if using CUDA if device.type == "cuda": set_memory_limits(memory_fraction) # Apply inference optimizations optimize_for_inference() # Get recommended batch size batch_size = get_recommended_batch_size(model_size) config = { "device": device, "batch_size": batch_size, "gpu_info": gpu_info, "memory_fraction": memory_fraction if device.type == "cuda" else None } logger.info(f"GPU configuration: Using {device} with batch size {batch_size}") return config