Files
TalkEdit/utils/ollama_integration.py

155 lines
4.6 KiB
Python

"""
Ollama integration for local AI model inference.
Provides functions to use Ollama's API for text summarization.
"""
import requests
import json
import logging
from pathlib import Path
import os
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Default Ollama API endpoint
OLLAMA_API_URL = "http://localhost:11434/api"
def check_ollama_available():
"""
Check if Ollama service is available.
Returns:
bool: True if Ollama is available, False otherwise
"""
try:
response = requests.get(f"{OLLAMA_API_URL}/tags", timeout=2)
return response.status_code == 200
except requests.exceptions.RequestException:
return False
def list_available_models():
"""
List available models in Ollama.
Returns:
list: List of available model names
"""
try:
response = requests.get(f"{OLLAMA_API_URL}/tags")
if response.status_code == 200:
models = response.json().get('models', [])
return [model['name'] for model in models]
return []
except requests.exceptions.RequestException as e:
logger.error(f"Error listing Ollama models: {e}")
return []
def summarize_with_ollama(text, model="llama3", max_length=150):
"""
Summarize text using Ollama's local API.
Args:
text (str): Text to summarize
model (str): Ollama model to use
max_length (int): Maximum length of the summary
Returns:
str: Summarized text or None if failed
"""
if not check_ollama_available():
logger.warning("Ollama service is not available")
return None
# Check if the model is available
available_models = list_available_models()
if model not in available_models:
logger.warning(f"Model {model} not available in Ollama. Available models: {available_models}")
return None
# Prepare the prompt for summarization
prompt = f"Summarize the following text in about {max_length} words:\n\n{text}"
try:
# Make the API request
response = requests.post(
f"{OLLAMA_API_URL}/generate",
json={
"model": model,
"prompt": prompt,
"stream": False,
"options": {
"temperature": 0.3,
"top_p": 0.9,
"max_tokens": max_length * 2 # Approximate token count
}
}
)
if response.status_code == 200:
result = response.json()
return result.get('response', '').strip()
else:
logger.error(f"Ollama API error: {response.status_code} - {response.text}")
return None
except requests.exceptions.RequestException as e:
logger.error(f"Error communicating with Ollama: {e}")
return None
def chunk_and_summarize(text, model="llama3", chunk_size=4000, max_length=150):
"""
Chunk long text and summarize each chunk, then combine the summaries.
Args:
text (str): Text to summarize
model (str): Ollama model to use
chunk_size (int): Maximum size of each chunk in characters
max_length (int): Maximum length of the final summary
Returns:
str: Combined summary or None if failed
"""
if len(text) <= chunk_size:
return summarize_with_ollama(text, model, max_length)
# Split text into chunks
words = text.split()
chunks = []
current_chunk = []
current_length = 0
for word in words:
if current_length + len(word) + 1 <= chunk_size:
current_chunk.append(word)
current_length += len(word) + 1
else:
chunks.append(' '.join(current_chunk))
current_chunk = [word]
current_length = len(word) + 1
if current_chunk:
chunks.append(' '.join(current_chunk))
# Summarize each chunk
chunk_summaries = []
for i, chunk in enumerate(chunks):
logger.info(f"Summarizing chunk {i+1}/{len(chunks)}")
summary = summarize_with_ollama(chunk, model, max_length // len(chunks))
if summary:
chunk_summaries.append(summary)
if not chunk_summaries:
return None
# If there's only one chunk summary, return it
if len(chunk_summaries) == 1:
return chunk_summaries[0]
# Otherwise, combine the summaries and summarize again
combined_summary = " ".join(chunk_summaries)
return summarize_with_ollama(combined_summary, model, max_length)