Add installation scripts and update documentation for Phase 3 features

2025-03-01 20:37:52 -05:00
parent a653ac7f28
commit 7ea098bd05
16 changed files with 3023 additions and 43 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1,42 @@
+# OBS Recording Transcriber Dependencies
+# Core dependencies
 streamlit==1.26.0
 moviepy==1.0.3
-whisper
-transformers==4.21.1
+openai-whisper>=20230314
+transformers>=4.21.1
 torch>=1.7.0
+torchaudio>=0.7.0
+requests>=2.28.0
+humanize>=4.6.0
+
+# Phase 2 dependencies
+scikit-learn>=1.0.0
+numpy>=1.20.0
+
+# Phase 3 dependencies
+pyannote.audio>=2.1.1
+iso639>=0.1.4
+protobuf>=3.20.0,<4.0.0
+tokenizers>=0.13.2
+scipy>=1.7.0
+matplotlib>=3.5.0
+soundfile>=0.10.3
+ffmpeg-python>=0.2.0
+
+# Optional: Ollama Python client (uncomment to install)
+# ollama
+
+# Installation notes:
+# 1. For Windows users, you may need to install PyTorch separately:
+#    pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
+#
+# 2. For tokenizers issues, try installing Visual C++ Build Tools:
+#    https://visualstudio.microsoft.com/visual-cpp-build-tools/
+#
+# 3. For pyannote.audio, you'll need a HuggingFace token with access to:
+#    https://huggingface.co/pyannote/speaker-diarization-3.0
+#
+# 4. FFmpeg is required for audio processing:
+#    Windows: https://www.gyan.dev/ffmpeg/builds/
+#    Mac: brew install ffmpeg
+#    Linux: apt-get install ffmpeg