feat: Add streaming Ollama support, model caching, and UI improvements

- Add streaming summarization via Ollama API (stream_summarize_with_ollama) - Cache ML models with @st.cache_resource (diarization, NER, translation, Whisper) - Add temp file cleanup for extracted audio - Add system capabilities detection (FFmpeg, GPU info) - Add get_video_duration utility - Improve validation with FFmpeg check - Rewrite app.py with streaming support and UI enhancements - Clean up redundant comments and unused imports across all utils
2026-02-18 10:26:09 -05:00
parent ce398ae1d4
commit 70c5d32413
10 changed files with 998 additions and 707 deletions
--- a/utils/keyword_extraction.py
+++ b/utils/keyword_extraction.py
@ -1,5 +1,5 @@
 """
-Keyword extraction utilities for the OBS Recording Transcriber.
+Keyword extraction utilities for the Video Transcriber.
 Provides functions to extract keywords and link them to timestamps.
 """

@ -8,25 +8,30 @@ import re
 import torch
 import numpy as np
 from pathlib import Path
-from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
+from transformers import pipeline
 from sklearn.feature_extraction.text import TfidfVectorizer
 from collections import Counter
+import streamlit as st

-# Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)

-# Try to import GPU utilities, but don't fail if not available
 try:
    from utils.gpu_utils import get_optimal_device
    GPU_UTILS_AVAILABLE = True
 except ImportError:
    GPU_UTILS_AVAILABLE = False

-# Default models
 NER_MODEL = "dslim/bert-base-NER"


+@st.cache_resource
+def _load_ner_pipeline(model_name, device_int):
+    """Load and cache the NER pipeline."""
+    logger.info(f"Loading NER model: {model_name}")
+    return pipeline("ner", model=model_name, device=device_int, aggregation_strategy="simple")
+
+
 def extract_keywords_tfidf(text, max_keywords=10, ngram_range=(1, 2)):
    """
    Extract keywords using TF-IDF.
@ -107,8 +112,7 @@ def extract_named_entities(text, model=NER_MODEL, use_gpu=True):
        device_arg = -1
    
    try:
-        # Initialize the pipeline
-        ner_pipeline = pipeline("ner", model=model, device=device_arg, aggregation_strategy="simple")
+        ner_pipeline = _load_ner_pipeline(model, device_arg)
        
        # Split text into manageable chunks if too long
        max_length = 512