Refactor Dockerfile and requirements.txt for improved dependency management; update README.md with clearer installation options and instructions for prebuilt images.
This commit is contained in:
73
.github/workflows/docker-build.yml
vendored
Normal file
73
.github/workflows/docker-build.yml
vendored
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
name: Build and Push Docker Images
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [ main, develop ]
|
||||||
|
tags: [ 'v*' ]
|
||||||
|
pull_request:
|
||||||
|
branches: [ main ]
|
||||||
|
release:
|
||||||
|
types: [published]
|
||||||
|
|
||||||
|
env:
|
||||||
|
REGISTRY: ghcr.io
|
||||||
|
IMAGE_NAME: ${{ github.repository }}
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
packages: write
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
|
- name: Log in to Container Registry
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
registry: ${{ env.REGISTRY }}
|
||||||
|
username: ${{ github.actor }}
|
||||||
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
- name: Extract metadata
|
||||||
|
id: meta
|
||||||
|
uses: docker/metadata-action@v5
|
||||||
|
with:
|
||||||
|
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||||
|
tags: |
|
||||||
|
type=ref,event=branch
|
||||||
|
type=ref,event=pr
|
||||||
|
type=semver,pattern={{version}}
|
||||||
|
type=semver,pattern={{major}}.{{minor}}
|
||||||
|
type=semver,pattern={{major}}
|
||||||
|
type=raw,value=latest,enable={{is_default_branch}}
|
||||||
|
|
||||||
|
- name: Build and push Docker image
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
platforms: linux/amd64,linux/arm64
|
||||||
|
push: true
|
||||||
|
tags: ${{ steps.meta.outputs.tags }}
|
||||||
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
|
cache-from: type=gha
|
||||||
|
cache-to: type=gha,mode=max
|
||||||
|
|
||||||
|
- name: Build and push GPU-enabled image
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
file: Dockerfile.gpu
|
||||||
|
platforms: linux/amd64
|
||||||
|
push: true
|
||||||
|
tags: |
|
||||||
|
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest-gpu
|
||||||
|
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}-gpu
|
||||||
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
|
cache-from: type=gha
|
||||||
|
cache-to: type=gha,mode=max
|
||||||
@ -15,11 +15,12 @@ RUN apt-get update && apt-get install -y \
|
|||||||
# Copy requirements first for better Docker layer caching
|
# Copy requirements first for better Docker layer caching
|
||||||
COPY requirements.txt .
|
COPY requirements.txt .
|
||||||
|
|
||||||
# Install Python dependencies
|
# Install Python dependencies with pinned versions
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
# Install PyTorch with CUDA support (adjust based on your needs)
|
# Optional: Install CUDA-specific PyTorch if GPU support needed
|
||||||
RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
|
# Uncomment and modify for your CUDA version:
|
||||||
|
# RUN pip install --force-reinstall torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
|
||||||
|
|
||||||
# Copy application code
|
# Copy application code
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|||||||
53
Dockerfile.gpu
Normal file
53
Dockerfile.gpu
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
# Set working directory
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install system dependencies including CUDA-related packages
|
||||||
|
RUN apt-get update && apt-get install -y \
|
||||||
|
ffmpeg \
|
||||||
|
git \
|
||||||
|
wget \
|
||||||
|
curl \
|
||||||
|
build-essential \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Copy requirements first for better Docker layer caching
|
||||||
|
COPY requirements.txt .
|
||||||
|
|
||||||
|
# Install CPU versions from requirements.txt first
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Install CUDA-optimized PyTorch (overwrites CPU versions)
|
||||||
|
RUN pip install --force-reinstall \
|
||||||
|
torch==2.0.1+cu118 \
|
||||||
|
torchvision==0.15.2+cu118 \
|
||||||
|
torchaudio==2.0.2+cu118 \
|
||||||
|
--index-url https://download.pytorch.org/whl/cu118
|
||||||
|
|
||||||
|
# Copy application code
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Create directories for mounted volumes
|
||||||
|
RUN mkdir -p /app/data/videos /app/data/outputs /app/data/cache
|
||||||
|
|
||||||
|
# Set environment variables
|
||||||
|
ENV STREAMLIT_SERVER_PORT=8501
|
||||||
|
ENV STREAMLIT_SERVER_ADDRESS=0.0.0.0
|
||||||
|
ENV STREAMLIT_SERVER_HEADLESS=true
|
||||||
|
ENV STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
|
||||||
|
|
||||||
|
# GPU-specific environment variables
|
||||||
|
ENV CUDA_VISIBLE_DEVICES=0
|
||||||
|
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||||
|
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||||
|
|
||||||
|
# Expose Streamlit port
|
||||||
|
EXPOSE 8501
|
||||||
|
|
||||||
|
# Health check
|
||||||
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
||||||
|
CMD curl -f http://localhost:8501/_stcore/health || exit 1
|
||||||
|
|
||||||
|
# Start the application
|
||||||
|
CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
|
||||||
60
QUICK-FIX.md
Normal file
60
QUICK-FIX.md
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
# 🚨 Quick Fix for PyTorch Compatibility Error
|
||||||
|
|
||||||
|
If you're seeing the `torch.compiler.disable` error, here's how to fix it:
|
||||||
|
|
||||||
|
## Immediate Fix
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Stop the current container
|
||||||
|
docker-compose down
|
||||||
|
|
||||||
|
# Remove the old image to force rebuild with fixed versions
|
||||||
|
docker rmi $(docker images | grep videotranscriber | awk '{print $3}')
|
||||||
|
|
||||||
|
# Rebuild with fixed dependencies
|
||||||
|
docker-compose up -d --build
|
||||||
|
```
|
||||||
|
|
||||||
|
## Better Solution: Use Prebuilt Images
|
||||||
|
|
||||||
|
Once available, use the prebuilt images instead:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Stop current container
|
||||||
|
docker-compose down
|
||||||
|
|
||||||
|
# Use prebuilt image (no build required)
|
||||||
|
docker-compose -f docker-compose.prebuilt.yml up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
## What Was Fixed
|
||||||
|
|
||||||
|
1. **Version Pinning**: Updated `requirements.txt` with compatible versions:
|
||||||
|
- `torch==2.0.1` (was `>=1.7.0`)
|
||||||
|
- `pytorch-lightning==2.0.6` (compatible with torch 2.0.1)
|
||||||
|
- `pyannote.audio==3.1.1` (updated to compatible version)
|
||||||
|
|
||||||
|
2. **Build Process**: Removed duplicate PyTorch installation that could cause conflicts
|
||||||
|
|
||||||
|
3. **Prebuilt Images**: Created GitHub Actions to build reliable, tested images
|
||||||
|
|
||||||
|
## Verification
|
||||||
|
|
||||||
|
After fixing, you should see the Streamlit app load without errors at `http://localhost:8501`
|
||||||
|
|
||||||
|
## If Still Having Issues
|
||||||
|
|
||||||
|
1. **Clear Docker cache**:
|
||||||
|
```bash
|
||||||
|
docker system prune -a
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Check logs**:
|
||||||
|
```bash
|
||||||
|
docker-compose logs -f
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Manual rebuild**:
|
||||||
|
```bash
|
||||||
|
docker build --no-cache -t videotranscriber .
|
||||||
|
```
|
||||||
13
README.md
13
README.md
@ -17,8 +17,9 @@ https://github.com/user-attachments/assets/990e63fc-232e-46a0-afdf-ca8836d46a13
|
|||||||
|
|
||||||
**Benefits**: Isolated environment, no dependency conflicts, easy deployment
|
**Benefits**: Isolated environment, no dependency conflicts, easy deployment
|
||||||
|
|
||||||
|
#### Option A: Prebuilt Images (Fastest & Most Reliable)
|
||||||
```bash
|
```bash
|
||||||
# 1. Clone repository
|
# 1. Clone repository for config files
|
||||||
git clone https://github.com/DataAnts-AI/VideoTranscriber.git
|
git clone https://github.com/DataAnts-AI/VideoTranscriber.git
|
||||||
cd VideoTranscriber
|
cd VideoTranscriber
|
||||||
|
|
||||||
@ -30,13 +31,19 @@ cp docker.env.example .env
|
|||||||
ollama serve # In separate terminal
|
ollama serve # In separate terminal
|
||||||
ollama pull llama3
|
ollama pull llama3
|
||||||
|
|
||||||
# 4. Start with Docker Compose
|
# 4. Start with prebuilt image
|
||||||
docker-compose up -d
|
docker-compose -f docker-compose.prebuilt.yml up -d
|
||||||
|
|
||||||
# 5. Access application
|
# 5. Access application
|
||||||
# Open browser to: http://localhost:8501
|
# Open browser to: http://localhost:8501
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### Option B: Build from Source (Development)
|
||||||
|
```bash
|
||||||
|
# Use the local build approach
|
||||||
|
docker-compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
See [DOCKER.md](DOCKER.md) for complete Docker setup guide.
|
See [DOCKER.md](DOCKER.md) for complete Docker setup guide.
|
||||||
|
|
||||||
### Easy Installation (Recommended)
|
### Easy Installation (Recommended)
|
||||||
|
|||||||
70
docker-compose.prebuilt.yml
Normal file
70
docker-compose.prebuilt.yml
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
videotranscriber:
|
||||||
|
# Use prebuilt image from GitHub Container Registry
|
||||||
|
image: ghcr.io/dataants-ai/videotranscriber:latest
|
||||||
|
container_name: videotranscriber
|
||||||
|
ports:
|
||||||
|
- "8501:8501"
|
||||||
|
volumes:
|
||||||
|
# Mount your video files directory (change the left path to your actual videos folder)
|
||||||
|
- "${VIDEO_PATH:-./videos}:/app/data/videos"
|
||||||
|
# Mount output directory for transcripts and summaries
|
||||||
|
- "${OUTPUT_PATH:-./outputs}:/app/data/outputs"
|
||||||
|
# Mount cache directory for model caching (optional, improves performance)
|
||||||
|
- "${CACHE_PATH:-./cache}:/app/data/cache"
|
||||||
|
# Mount a config directory if needed
|
||||||
|
- "${CONFIG_PATH:-./config}:/app/config"
|
||||||
|
environment:
|
||||||
|
# Ollama configuration for host access
|
||||||
|
- OLLAMA_API_URL=${OLLAMA_API_URL:-http://host.docker.internal:11434/api}
|
||||||
|
# Optional: HuggingFace token for advanced features
|
||||||
|
- HF_TOKEN=${HF_TOKEN:-}
|
||||||
|
# GPU configuration
|
||||||
|
- CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-}
|
||||||
|
# Cache settings
|
||||||
|
- TRANSFORMERS_CACHE=/app/data/cache/transformers
|
||||||
|
- WHISPER_CACHE=/app/data/cache/whisper
|
||||||
|
restart: unless-stopped
|
||||||
|
# Use bridge networking for Windows/Mac with host.docker.internal
|
||||||
|
networks:
|
||||||
|
- videotranscriber-network
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:8501/_stcore/health"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
start_period: 60s
|
||||||
|
|
||||||
|
# Alternative GPU-enabled service (uncomment to use)
|
||||||
|
# videotranscriber-gpu:
|
||||||
|
# image: ghcr.io/dataants-ai/videotranscriber:latest-gpu
|
||||||
|
# container_name: videotranscriber-gpu
|
||||||
|
# ports:
|
||||||
|
# - "8501:8501"
|
||||||
|
# volumes:
|
||||||
|
# - "${VIDEO_PATH:-./videos}:/app/data/videos"
|
||||||
|
# - "${OUTPUT_PATH:-./outputs}:/app/data/outputs"
|
||||||
|
# - "${CACHE_PATH:-./cache}:/app/data/cache"
|
||||||
|
# - "${CONFIG_PATH:-./config}:/app/config"
|
||||||
|
# environment:
|
||||||
|
# - OLLAMA_API_URL=${OLLAMA_API_URL:-http://host.docker.internal:11434/api}
|
||||||
|
# - HF_TOKEN=${HF_TOKEN:-}
|
||||||
|
# - CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0}
|
||||||
|
# - TRANSFORMERS_CACHE=/app/data/cache/transformers
|
||||||
|
# - WHISPER_CACHE=/app/data/cache/whisper
|
||||||
|
# deploy:
|
||||||
|
# resources:
|
||||||
|
# reservations:
|
||||||
|
# devices:
|
||||||
|
# - driver: nvidia
|
||||||
|
# count: 1
|
||||||
|
# capabilities: [gpu]
|
||||||
|
# restart: unless-stopped
|
||||||
|
# networks:
|
||||||
|
# - videotranscriber-network
|
||||||
|
|
||||||
|
networks:
|
||||||
|
videotranscriber-network:
|
||||||
|
driver: bridge
|
||||||
@ -1,24 +1,32 @@
|
|||||||
# OBS Recording Transcriber Dependencies
|
# OBS Recording Transcriber Dependencies
|
||||||
# Core dependencies
|
# Core dependencies with pinned compatible versions
|
||||||
streamlit==1.26.0
|
streamlit==1.26.0
|
||||||
moviepy==1.0.3
|
moviepy==1.0.3
|
||||||
openai-whisper==20231117
|
openai-whisper==20231117
|
||||||
transformers>=4.21.1
|
|
||||||
torch>=1.7.0
|
|
||||||
torchaudio>=0.7.0
|
|
||||||
requests>=2.28.0
|
requests>=2.28.0
|
||||||
humanize>=4.6.0
|
humanize>=4.6.0
|
||||||
|
|
||||||
# Phase 2 dependencies
|
# PyTorch ecosystem - pinned for compatibility
|
||||||
scikit-learn>=1.0.0
|
torch==2.0.1
|
||||||
numpy>=1.20.0
|
torchaudio==2.0.2
|
||||||
|
torchvision==0.15.2
|
||||||
|
|
||||||
# Phase 3 dependencies
|
# Transformers ecosystem - compatible versions
|
||||||
pyannote.audio>=2.1.1
|
transformers==4.30.2
|
||||||
|
tokenizers==0.13.3
|
||||||
|
|
||||||
|
# ML dependencies with compatible versions
|
||||||
|
numpy==1.24.3
|
||||||
|
scipy==1.10.1
|
||||||
|
scikit-learn==1.3.0
|
||||||
|
|
||||||
|
# Audio processing and ML models
|
||||||
|
pyannote.audio==3.1.1
|
||||||
|
pytorch-lightning==2.0.6
|
||||||
|
|
||||||
|
# Other dependencies
|
||||||
iso639>=0.1.4
|
iso639>=0.1.4
|
||||||
protobuf>=3.20.0,<4.0.0
|
protobuf>=3.20.0,<4.0.0
|
||||||
tokenizers>=0.13.2
|
|
||||||
scipy>=1.7.0
|
|
||||||
matplotlib>=3.5.0
|
matplotlib>=3.5.0
|
||||||
soundfile>=0.10.3
|
soundfile>=0.10.3
|
||||||
ffmpeg-python>=0.2.0
|
ffmpeg-python>=0.2.0
|
||||||
|
|||||||
Reference in New Issue
Block a user