Refactor Dockerfile and requirements.txt for improved dependency management; update README.md with clearer installation options and instructions for prebuilt images.
This commit is contained in:
73
.github/workflows/docker-build.yml
vendored
Normal file
73
.github/workflows/docker-build.yml
vendored
Normal file
@ -0,0 +1,73 @@
|
||||
name: Build and Push Docker Images
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main, develop ]
|
||||
tags: [ 'v*' ]
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
env:
|
||||
REGISTRY: ghcr.io
|
||||
IMAGE_NAME: ${{ github.repository }}
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Log in to Container Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Extract metadata
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
tags: |
|
||||
type=ref,event=branch
|
||||
type=ref,event=pr
|
||||
type=semver,pattern={{version}}
|
||||
type=semver,pattern={{major}}.{{minor}}
|
||||
type=semver,pattern={{major}}
|
||||
type=raw,value=latest,enable={{is_default_branch}}
|
||||
|
||||
- name: Build and push Docker image
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
platforms: linux/amd64,linux/arm64
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
- name: Build and push GPU-enabled image
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile.gpu
|
||||
platforms: linux/amd64
|
||||
push: true
|
||||
tags: |
|
||||
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest-gpu
|
||||
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}-gpu
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
@ -15,11 +15,12 @@ RUN apt-get update && apt-get install -y \
|
||||
# Copy requirements first for better Docker layer caching
|
||||
COPY requirements.txt .
|
||||
|
||||
# Install Python dependencies
|
||||
# Install Python dependencies with pinned versions
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Install PyTorch with CUDA support (adjust based on your needs)
|
||||
RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
|
||||
# Optional: Install CUDA-specific PyTorch if GPU support needed
|
||||
# Uncomment and modify for your CUDA version:
|
||||
# RUN pip install --force-reinstall torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
|
||||
|
||||
# Copy application code
|
||||
COPY . .
|
||||
|
||||
53
Dockerfile.gpu
Normal file
53
Dockerfile.gpu
Normal file
@ -0,0 +1,53 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies including CUDA-related packages
|
||||
RUN apt-get update && apt-get install -y \
|
||||
ffmpeg \
|
||||
git \
|
||||
wget \
|
||||
curl \
|
||||
build-essential \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy requirements first for better Docker layer caching
|
||||
COPY requirements.txt .
|
||||
|
||||
# Install CPU versions from requirements.txt first
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Install CUDA-optimized PyTorch (overwrites CPU versions)
|
||||
RUN pip install --force-reinstall \
|
||||
torch==2.0.1+cu118 \
|
||||
torchvision==0.15.2+cu118 \
|
||||
torchaudio==2.0.2+cu118 \
|
||||
--index-url https://download.pytorch.org/whl/cu118
|
||||
|
||||
# Copy application code
|
||||
COPY . .
|
||||
|
||||
# Create directories for mounted volumes
|
||||
RUN mkdir -p /app/data/videos /app/data/outputs /app/data/cache
|
||||
|
||||
# Set environment variables
|
||||
ENV STREAMLIT_SERVER_PORT=8501
|
||||
ENV STREAMLIT_SERVER_ADDRESS=0.0.0.0
|
||||
ENV STREAMLIT_SERVER_HEADLESS=true
|
||||
ENV STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
|
||||
|
||||
# GPU-specific environment variables
|
||||
ENV CUDA_VISIBLE_DEVICES=0
|
||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||
|
||||
# Expose Streamlit port
|
||||
EXPOSE 8501
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
||||
CMD curl -f http://localhost:8501/_stcore/health || exit 1
|
||||
|
||||
# Start the application
|
||||
CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
|
||||
60
QUICK-FIX.md
Normal file
60
QUICK-FIX.md
Normal file
@ -0,0 +1,60 @@
|
||||
# 🚨 Quick Fix for PyTorch Compatibility Error
|
||||
|
||||
If you're seeing the `torch.compiler.disable` error, here's how to fix it:
|
||||
|
||||
## Immediate Fix
|
||||
|
||||
```bash
|
||||
# Stop the current container
|
||||
docker-compose down
|
||||
|
||||
# Remove the old image to force rebuild with fixed versions
|
||||
docker rmi $(docker images | grep videotranscriber | awk '{print $3}')
|
||||
|
||||
# Rebuild with fixed dependencies
|
||||
docker-compose up -d --build
|
||||
```
|
||||
|
||||
## Better Solution: Use Prebuilt Images
|
||||
|
||||
Once available, use the prebuilt images instead:
|
||||
|
||||
```bash
|
||||
# Stop current container
|
||||
docker-compose down
|
||||
|
||||
# Use prebuilt image (no build required)
|
||||
docker-compose -f docker-compose.prebuilt.yml up -d
|
||||
```
|
||||
|
||||
## What Was Fixed
|
||||
|
||||
1. **Version Pinning**: Updated `requirements.txt` with compatible versions:
|
||||
- `torch==2.0.1` (was `>=1.7.0`)
|
||||
- `pytorch-lightning==2.0.6` (compatible with torch 2.0.1)
|
||||
- `pyannote.audio==3.1.1` (updated to compatible version)
|
||||
|
||||
2. **Build Process**: Removed duplicate PyTorch installation that could cause conflicts
|
||||
|
||||
3. **Prebuilt Images**: Created GitHub Actions to build reliable, tested images
|
||||
|
||||
## Verification
|
||||
|
||||
After fixing, you should see the Streamlit app load without errors at `http://localhost:8501`
|
||||
|
||||
## If Still Having Issues
|
||||
|
||||
1. **Clear Docker cache**:
|
||||
```bash
|
||||
docker system prune -a
|
||||
```
|
||||
|
||||
2. **Check logs**:
|
||||
```bash
|
||||
docker-compose logs -f
|
||||
```
|
||||
|
||||
3. **Manual rebuild**:
|
||||
```bash
|
||||
docker build --no-cache -t videotranscriber .
|
||||
```
|
||||
13
README.md
13
README.md
@ -17,8 +17,9 @@ https://github.com/user-attachments/assets/990e63fc-232e-46a0-afdf-ca8836d46a13
|
||||
|
||||
**Benefits**: Isolated environment, no dependency conflicts, easy deployment
|
||||
|
||||
#### Option A: Prebuilt Images (Fastest & Most Reliable)
|
||||
```bash
|
||||
# 1. Clone repository
|
||||
# 1. Clone repository for config files
|
||||
git clone https://github.com/DataAnts-AI/VideoTranscriber.git
|
||||
cd VideoTranscriber
|
||||
|
||||
@ -30,13 +31,19 @@ cp docker.env.example .env
|
||||
ollama serve # In separate terminal
|
||||
ollama pull llama3
|
||||
|
||||
# 4. Start with Docker Compose
|
||||
docker-compose up -d
|
||||
# 4. Start with prebuilt image
|
||||
docker-compose -f docker-compose.prebuilt.yml up -d
|
||||
|
||||
# 5. Access application
|
||||
# Open browser to: http://localhost:8501
|
||||
```
|
||||
|
||||
#### Option B: Build from Source (Development)
|
||||
```bash
|
||||
# Use the local build approach
|
||||
docker-compose up -d
|
||||
```
|
||||
|
||||
See [DOCKER.md](DOCKER.md) for complete Docker setup guide.
|
||||
|
||||
### Easy Installation (Recommended)
|
||||
|
||||
70
docker-compose.prebuilt.yml
Normal file
70
docker-compose.prebuilt.yml
Normal file
@ -0,0 +1,70 @@
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
videotranscriber:
|
||||
# Use prebuilt image from GitHub Container Registry
|
||||
image: ghcr.io/dataants-ai/videotranscriber:latest
|
||||
container_name: videotranscriber
|
||||
ports:
|
||||
- "8501:8501"
|
||||
volumes:
|
||||
# Mount your video files directory (change the left path to your actual videos folder)
|
||||
- "${VIDEO_PATH:-./videos}:/app/data/videos"
|
||||
# Mount output directory for transcripts and summaries
|
||||
- "${OUTPUT_PATH:-./outputs}:/app/data/outputs"
|
||||
# Mount cache directory for model caching (optional, improves performance)
|
||||
- "${CACHE_PATH:-./cache}:/app/data/cache"
|
||||
# Mount a config directory if needed
|
||||
- "${CONFIG_PATH:-./config}:/app/config"
|
||||
environment:
|
||||
# Ollama configuration for host access
|
||||
- OLLAMA_API_URL=${OLLAMA_API_URL:-http://host.docker.internal:11434/api}
|
||||
# Optional: HuggingFace token for advanced features
|
||||
- HF_TOKEN=${HF_TOKEN:-}
|
||||
# GPU configuration
|
||||
- CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-}
|
||||
# Cache settings
|
||||
- TRANSFORMERS_CACHE=/app/data/cache/transformers
|
||||
- WHISPER_CACHE=/app/data/cache/whisper
|
||||
restart: unless-stopped
|
||||
# Use bridge networking for Windows/Mac with host.docker.internal
|
||||
networks:
|
||||
- videotranscriber-network
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8501/_stcore/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 60s
|
||||
|
||||
# Alternative GPU-enabled service (uncomment to use)
|
||||
# videotranscriber-gpu:
|
||||
# image: ghcr.io/dataants-ai/videotranscriber:latest-gpu
|
||||
# container_name: videotranscriber-gpu
|
||||
# ports:
|
||||
# - "8501:8501"
|
||||
# volumes:
|
||||
# - "${VIDEO_PATH:-./videos}:/app/data/videos"
|
||||
# - "${OUTPUT_PATH:-./outputs}:/app/data/outputs"
|
||||
# - "${CACHE_PATH:-./cache}:/app/data/cache"
|
||||
# - "${CONFIG_PATH:-./config}:/app/config"
|
||||
# environment:
|
||||
# - OLLAMA_API_URL=${OLLAMA_API_URL:-http://host.docker.internal:11434/api}
|
||||
# - HF_TOKEN=${HF_TOKEN:-}
|
||||
# - CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0}
|
||||
# - TRANSFORMERS_CACHE=/app/data/cache/transformers
|
||||
# - WHISPER_CACHE=/app/data/cache/whisper
|
||||
# deploy:
|
||||
# resources:
|
||||
# reservations:
|
||||
# devices:
|
||||
# - driver: nvidia
|
||||
# count: 1
|
||||
# capabilities: [gpu]
|
||||
# restart: unless-stopped
|
||||
# networks:
|
||||
# - videotranscriber-network
|
||||
|
||||
networks:
|
||||
videotranscriber-network:
|
||||
driver: bridge
|
||||
@ -1,24 +1,32 @@
|
||||
# OBS Recording Transcriber Dependencies
|
||||
# Core dependencies
|
||||
# Core dependencies with pinned compatible versions
|
||||
streamlit==1.26.0
|
||||
moviepy==1.0.3
|
||||
openai-whisper==20231117
|
||||
transformers>=4.21.1
|
||||
torch>=1.7.0
|
||||
torchaudio>=0.7.0
|
||||
requests>=2.28.0
|
||||
humanize>=4.6.0
|
||||
|
||||
# Phase 2 dependencies
|
||||
scikit-learn>=1.0.0
|
||||
numpy>=1.20.0
|
||||
# PyTorch ecosystem - pinned for compatibility
|
||||
torch==2.0.1
|
||||
torchaudio==2.0.2
|
||||
torchvision==0.15.2
|
||||
|
||||
# Phase 3 dependencies
|
||||
pyannote.audio>=2.1.1
|
||||
# Transformers ecosystem - compatible versions
|
||||
transformers==4.30.2
|
||||
tokenizers==0.13.3
|
||||
|
||||
# ML dependencies with compatible versions
|
||||
numpy==1.24.3
|
||||
scipy==1.10.1
|
||||
scikit-learn==1.3.0
|
||||
|
||||
# Audio processing and ML models
|
||||
pyannote.audio==3.1.1
|
||||
pytorch-lightning==2.0.6
|
||||
|
||||
# Other dependencies
|
||||
iso639>=0.1.4
|
||||
protobuf>=3.20.0,<4.0.0
|
||||
tokenizers>=0.13.2
|
||||
scipy>=1.7.0
|
||||
matplotlib>=3.5.0
|
||||
soundfile>=0.10.3
|
||||
ffmpeg-python>=0.2.0
|
||||
|
||||
Reference in New Issue
Block a user