Refactor Dockerfile and requirements.txt for improved dependency management; update README.md with clearer installation options and instructions for prebuilt images.

2025-07-23 14:45:19 -04:00
parent dcf13c1423
commit 3346b0df0f
7 changed files with 289 additions and 17 deletions
--- a/.github/workflows/docker-build.yml
+++ b/.github/workflows/docker-build.yml
@ -0,0 +1,73 @@
 name: Build and Push Docker Images
 on:
  push:
    branches: [ main, develop ]
    tags: [ 'v*' ]
  pull_request:
    branches: [ main ]
  release:
    types: [published]
 env:
  REGISTRY: ghcr.io
  IMAGE_NAME: ${{ github.repository }}
 jobs:
  build:
    runs-on: ubuntu-latest
    permissions:
      contents: read
      packages: write
    steps:
    - name: Checkout repository
      uses: actions/checkout@v4
    - name: Set up Docker Buildx
      uses: docker/setup-buildx-action@v3
    - name: Log in to Container Registry
      uses: docker/login-action@v3
      with:
        registry: ${{ env.REGISTRY }}
        username: ${{ github.actor }}
        password: ${{ secrets.GITHUB_TOKEN }}
    - name: Extract metadata
      id: meta
      uses: docker/metadata-action@v5
      with:
        images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
        tags: |
          type=ref,event=branch
          type=ref,event=pr
          type=semver,pattern={{version}}
          type=semver,pattern={{major}}.{{minor}}
          type=semver,pattern={{major}}
          type=raw,value=latest,enable={{is_default_branch}}
    - name: Build and push Docker image
      uses: docker/build-push-action@v5
      with:
        context: .
        platforms: linux/amd64,linux/arm64
        push: true
        tags: ${{ steps.meta.outputs.tags }}
        labels: ${{ steps.meta.outputs.labels }}
        cache-from: type=gha
        cache-to: type=gha,mode=max
    - name: Build and push GPU-enabled image
      uses: docker/build-push-action@v5
      with:
        context: .
        file: Dockerfile.gpu
        platforms: linux/amd64
        push: true
        tags: |
          ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest-gpu
          ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}-gpu
        labels: ${{ steps.meta.outputs.labels }}
        cache-from: type=gha
        cache-to: type=gha,mode=max 
--- a/7
+++ b/7
@ -15,11 +15,12 @@ RUN apt-get update && apt-get install -y \
 # Copy requirements first for better Docker layer caching
 COPY requirements.txt .
-# Install Python dependencies
+# Install Python dependencies with pinned versions
 RUN pip install --no-cache-dir -r requirements.txt
-# Install PyTorch with CUDA support (adjust based on your needs)
+# Optional: Install CUDA-specific PyTorch if GPU support needed
-RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
+# Uncomment and modify for your CUDA version:
 # RUN pip install --force-reinstall torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
 # Copy application code
 COPY . .
--- a/Dockerfile.gpu
+++ b/Dockerfile.gpu
@ -0,0 +1,53 @@
 FROM python:3.11-slim
 # Set working directory
 WORKDIR /app
 # Install system dependencies including CUDA-related packages
 RUN apt-get update && apt-get install -y \
    ffmpeg \
    git \
    wget \
    curl \
    build-essential \
    && rm -rf /var/lib/apt/lists/*
 # Copy requirements first for better Docker layer caching
 COPY requirements.txt .
 # Install CPU versions from requirements.txt first
 RUN pip install --no-cache-dir -r requirements.txt
 # Install CUDA-optimized PyTorch (overwrites CPU versions)
 RUN pip install --force-reinstall \
    torch==2.0.1+cu118 \
    torchvision==0.15.2+cu118 \
    torchaudio==2.0.2+cu118 \
    --index-url https://download.pytorch.org/whl/cu118
 # Copy application code
 COPY . .
 # Create directories for mounted volumes
 RUN mkdir -p /app/data/videos /app/data/outputs /app/data/cache
 # Set environment variables
 ENV STREAMLIT_SERVER_PORT=8501
 ENV STREAMLIT_SERVER_ADDRESS=0.0.0.0
 ENV STREAMLIT_SERVER_HEADLESS=true
 ENV STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
 # GPU-specific environment variables
 ENV CUDA_VISIBLE_DEVICES=0
 ENV NVIDIA_VISIBLE_DEVICES=all
 ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
 # Expose Streamlit port
 EXPOSE 8501
 # Health check
 HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
    CMD curl -f http://localhost:8501/_stcore/health || exit 1
 # Start the application
 CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"] 
--- a/QUICK-FIX.md
+++ b/QUICK-FIX.md
@ -0,0 +1,60 @@
 # 🚨 Quick Fix for PyTorch Compatibility Error
 If you're seeing the `torch.compiler.disable` error, here's how to fix it:
 ## Immediate Fix
 ```bash
 # Stop the current container
 docker-compose down
 # Remove the old image to force rebuild with fixed versions
 docker rmi $(docker images | grep videotranscriber | awk '{print $3}')
 # Rebuild with fixed dependencies
 docker-compose up -d --build
 ```
 ## Better Solution: Use Prebuilt Images
 Once available, use the prebuilt images instead:
 ```bash
 # Stop current container
 docker-compose down
 # Use prebuilt image (no build required)
 docker-compose -f docker-compose.prebuilt.yml up -d
 ```
 ## What Was Fixed
 1. **Version Pinning**: Updated `requirements.txt` with compatible versions:
   - `torch==2.0.1` (was `>=1.7.0`)
   - `pytorch-lightning==2.0.6` (compatible with torch 2.0.1)
   - `pyannote.audio==3.1.1` (updated to compatible version)
 2. **Build Process**: Removed duplicate PyTorch installation that could cause conflicts
 3. **Prebuilt Images**: Created GitHub Actions to build reliable, tested images
 ## Verification
 After fixing, you should see the Streamlit app load without errors at `http://localhost:8501`
 ## If Still Having Issues
 1. **Clear Docker cache**:
   ```bash
   docker system prune -a
   ```
 2. **Check logs**:
   ```bash
   docker-compose logs -f
   ```
 3. **Manual rebuild**:
   ```bash
   docker build --no-cache -t videotranscriber .
   ``` 
--- a/README.md
+++ b/README.md
@ -17,8 +17,9 @@ https://github.com/user-attachments/assets/990e63fc-232e-46a0-afdf-ca8836d46a13
 **Benefits**: Isolated environment, no dependency conflicts, easy deployment
 #### Option A: Prebuilt Images (Fastest & Most Reliable)
 ```bash
-# 1. Clone repository
+# 1. Clone repository for config files
 git clone https://github.com/DataAnts-AI/VideoTranscriber.git
 cd VideoTranscriber
@ -30,13 +31,19 @@ cp docker.env.example .env
 ollama serve  # In separate terminal
 ollama pull llama3
-# 4. Start with Docker Compose
+# 4. Start with prebuilt image
-docker-compose up -d
+docker-compose -f docker-compose.prebuilt.yml up -d
 # 5. Access application
 # Open browser to: http://localhost:8501
 ```
 #### Option B: Build from Source (Development)
 ```bash
 # Use the local build approach
 docker-compose up -d
 ```
 See [DOCKER.md](DOCKER.md) for complete Docker setup guide.
 ### Easy Installation (Recommended)
--- a/docker-compose.prebuilt.yml
+++ b/docker-compose.prebuilt.yml
@ -0,0 +1,70 @@
 version: '3.8'
 services:
  videotranscriber:
    # Use prebuilt image from GitHub Container Registry
    image: ghcr.io/dataants-ai/videotranscriber:latest
    container_name: videotranscriber
    ports:
      - "8501:8501"
    volumes:
      # Mount your video files directory (change the left path to your actual videos folder)
      - "${VIDEO_PATH:-./videos}:/app/data/videos"
      # Mount output directory for transcripts and summaries
      - "${OUTPUT_PATH:-./outputs}:/app/data/outputs" 
      # Mount cache directory for model caching (optional, improves performance)
      - "${CACHE_PATH:-./cache}:/app/data/cache"
      # Mount a config directory if needed
      - "${CONFIG_PATH:-./config}:/app/config"
    environment:
      # Ollama configuration for host access
      - OLLAMA_API_URL=${OLLAMA_API_URL:-http://host.docker.internal:11434/api}
      # Optional: HuggingFace token for advanced features
      - HF_TOKEN=${HF_TOKEN:-}
      # GPU configuration
      - CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-}
      # Cache settings
      - TRANSFORMERS_CACHE=/app/data/cache/transformers
      - WHISPER_CACHE=/app/data/cache/whisper
    restart: unless-stopped
    # Use bridge networking for Windows/Mac with host.docker.internal
    networks:
      - videotranscriber-network
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8501/_stcore/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 60s
  # Alternative GPU-enabled service (uncomment to use)
  # videotranscriber-gpu:
  #   image: ghcr.io/dataants-ai/videotranscriber:latest-gpu
  #   container_name: videotranscriber-gpu
  #   ports:
  #     - "8501:8501"
  #   volumes:
  #     - "${VIDEO_PATH:-./videos}:/app/data/videos"
  #     - "${OUTPUT_PATH:-./outputs}:/app/data/outputs" 
  #     - "${CACHE_PATH:-./cache}:/app/data/cache"
  #     - "${CONFIG_PATH:-./config}:/app/config"
  #   environment:
  #     - OLLAMA_API_URL=${OLLAMA_API_URL:-http://host.docker.internal:11434/api}
  #     - HF_TOKEN=${HF_TOKEN:-}
  #     - CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0}
  #     - TRANSFORMERS_CACHE=/app/data/cache/transformers
  #     - WHISPER_CACHE=/app/data/cache/whisper
  #   deploy:
  #     resources:
  #       reservations:
  #         devices:
  #           - driver: nvidia
  #             count: 1
  #             capabilities: [gpu]
  #   restart: unless-stopped
  #   networks:
  #     - videotranscriber-network
 networks:
  videotranscriber-network:
    driver: bridge 
--- a/requirements.txt
+++ b/requirements.txt
@ -1,24 +1,32 @@
 # OBS Recording Transcriber Dependencies
-# Core dependencies
+# Core dependencies with pinned compatible versions
 streamlit==1.26.0
 moviepy==1.0.3
 openai-whisper==20231117
 transformers>=4.21.1
 torch>=1.7.0
 torchaudio>=0.7.0
 requests>=2.28.0
 humanize>=4.6.0
-# Phase 2 dependencies
+# PyTorch ecosystem - pinned for compatibility
-scikit-learn>=1.0.0
+torch==2.0.1
-numpy>=1.20.0
+torchaudio==2.0.2
 torchvision==0.15.2
-# Phase 3 dependencies
+# Transformers ecosystem - compatible versions
-pyannote.audio>=2.1.1
+transformers==4.30.2
 tokenizers==0.13.3
 # ML dependencies with compatible versions
 numpy==1.24.3
 scipy==1.10.1
 scikit-learn==1.3.0
 # Audio processing and ML models
 pyannote.audio==3.1.1
 pytorch-lightning==2.0.6
 # Other dependencies
 iso639>=0.1.4
 protobuf>=3.20.0,<4.0.0
 tokenizers>=0.13.2
 scipy>=1.7.0
 matplotlib>=3.5.0
 soundfile>=0.10.3
 ffmpeg-python>=0.2.0