TalkEdit/utils/ollama_integration.py

"""
Ollama integration for local AI model inference.
Provides functions to use Ollama's API for text summarization.
"""

import requests
import json
import logging
from pathlib import Path
import os

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Default Ollama API endpoint
OLLAMA_API_URL = "http://localhost:11434/api"


def check_ollama_available():
    """
    Check if Ollama service is available.

    Returns:
        bool: True if Ollama is available, False otherwise
    """
    try:
        response = requests.get(f"{OLLAMA_API_URL}/tags", timeout=2)
        return response.status_code == 200
    except requests.exceptions.RequestException:
        return False


def list_available_models():
    """
    List available models in Ollama.

    Returns:
        list: List of available model names
    """
    try:
        response = requests.get(f"{OLLAMA_API_URL}/tags")
        if response.status_code == 200:
            models = response.json().get('models', [])
            return [model['name'] for model in models]
        return []
    except requests.exceptions.RequestException as e:
        logger.error(f"Error listing Ollama models: {e}")
        return []


def summarize_with_ollama(text, model="llama3", max_length=150):
    """
    Summarize text using Ollama's local API.

    Args:
        text (str): Text to summarize
        model (str): Ollama model to use
        max_length (int): Maximum length of the summary

    Returns:
        str: Summarized text or None if failed
    """
    if not check_ollama_available():
        logger.warning("Ollama service is not available")
        return None

    # Check if the model is available
    available_models = list_available_models()
    if model not in available_models:
        logger.warning(f"Model {model} not available in Ollama. Available models: {available_models}")
        return None

    # Prepare the prompt for summarization
    prompt = f"Summarize the following text in about {max_length} words:\n\n{text}"

    try:
        # Make the API request
        response = requests.post(
            f"{OLLAMA_API_URL}/generate",
            json={
                "model": model,
                "prompt": prompt,
                "stream": False,
                "options": {
                    "temperature": 0.3,
                    "top_p": 0.9,
                    "max_tokens": max_length * 2  # Approximate token count
                }
            }
        )

        if response.status_code == 200:
            result = response.json()
            return result.get('response', '').strip()
        else:
            logger.error(f"Ollama API error: {response.status_code} - {response.text}")
            return None
    except requests.exceptions.RequestException as e:
        logger.error(f"Error communicating with Ollama: {e}")
        return None


def chunk_and_summarize(text, model="llama3", chunk_size=4000, max_length=150):
    """
    Chunk long text and summarize each chunk, then combine the summaries.

    Args:
        text (str): Text to summarize
        model (str): Ollama model to use
        chunk_size (int): Maximum size of each chunk in characters
        max_length (int): Maximum length of the final summary

    Returns:
        str: Combined summary or None if failed
    """
    if len(text) <= chunk_size:
        return summarize_with_ollama(text, model, max_length)

    # Split text into chunks
    words = text.split()
    chunks = []
    current_chunk = []
    current_length = 0

    for word in words:
        if current_length + len(word) + 1 <= chunk_size:
            current_chunk.append(word)
            current_length += len(word) + 1
        else:
            chunks.append(' '.join(current_chunk))
            current_chunk = [word]
            current_length = len(word) + 1

    if current_chunk:
        chunks.append(' '.join(current_chunk))

    # Summarize each chunk
    chunk_summaries = []
    for i, chunk in enumerate(chunks):
        logger.info(f"Summarizing chunk {i+1}/{len(chunks)}")
        summary = summarize_with_ollama(chunk, model, max_length // len(chunks))
        if summary:
            chunk_summaries.append(summary)

    if not chunk_summaries:
        return None

    # If there's only one chunk summary, return it
    if len(chunk_summaries) == 1:
        return chunk_summaries[0]

    # Otherwise, combine the summaries and summarize again
    combined_summary = " ".join(chunk_summaries)
    return summarize_with_ollama(combined_summary, model, max_length)