Files
audiobook_creator/create_audiobook_nem.py
2026-02-25 11:37:35 -07:00

177 lines
8.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
audiobook_nem.py
────────────────
Generate the Book of the Nem audiobook — one unique voice per book/section.
Usage:
python audiobook_nem.py
To skip a section, comment out its entry in BOOKS below.
Output .wav files are written to OUTPUT_DIR (created automatically).
"""
import re
import numpy as np
import soundfile as sf
import torch
from pathlib import Path
from kokoro import KPipeline
# ── Config ─────────────────────────────────────────────────────────────────────
_FIXED_FILE = Path("Audio Master Nem Full (TTS Fixed).txt")
_ORIG_FILE = Path("Audio Master Nem Full.txt")
SOURCE_FILE = _FIXED_FILE if _FIXED_FILE.exists() else _ORIG_FILE
OUTPUT_DIR = Path("output_audiobook")
SAMPLE_RATE = 24000
SPEED = 1.0
LANG_CODE = "a" # 'a' = American English
# ── Available Kokoro voices (American English, lang_code='a') ──────────────────
# af_heart warm American female [downloaded]
# af_nicole American female [downloaded]
# am_adam American male (deep) [downloaded]
# am_echo American male [downloaded]
# am_eric American male [downloaded]
# am_fenrir American male [downloaded]
# am_liam American male [downloaded]
# am_michael American male (clear) [downloaded]
# am_onyx American male [downloaded]
# am_puck American male [downloaded]
# am_santa American male [downloaded] (not used)
# ── Book definitions ───────────────────────────────────────────────────────────
# Format: (label, start_marker, voice, output_wav)
# start_marker exact text of the FIRST line of the section header in the source
# (leading/trailing whitespace is ignored when matching)
# voice Kokoro voice name
# output_wav filename saved inside OUTPUT_DIR
#
# Comment out any line to skip that section entirely.
BOOKS = [
# label start_marker voice output_wav
("Introduction", "Introduction", "af_heart", "00_introduction.wav"),
("Book of Hagoth", "THE BOOK OF HAGOTH", "am_fenrir", "01_hagoth.wav"),
("Shi-Tugo I", "THE FIRST BOOK OF SHI-TUGO", "am_eric", "02_shi_tugo_1.wav"),
("Sanempet", "THE BOOK OF SANEMPET", "am_liam", "03_sanempet.wav"),
("Oug", "THE BOOK OF OUG", "am_michael", "04_oug.wav"),
("Temple Writings of Oug", "THE BOOK OF", "am_michael", "05_temple_writings_oug.wav"),
("Sacred Temple Writings", "THE SACRED", "am_michael", "06_sacred_temple_writings.wav"),
("Samuel the Lamanite I", "THE FIRST BOOK", "am_echo", "07_samuel_lamanite_1.wav"),
("Samuel the Lamanite II", "THE SECOND BOOK", "am_echo", "08_samuel_lamanite_2.wav"),
("Manti", "THE BOOK OF MANTI", "am_onyx", "09_manti.wav"),
("Pa Nat I", "THE FIRST BOOK OF PA NAT", "af_nicole", "10_pa_nat_1.wav"),
("Moroni I", "THE FIRST BOOK OF MORONI", "am_adam", "11_moroni_1.wav"),
("Moroni II", "THE SECOND BOOK OF MORONI", "am_adam", "12_moroni_2.wav"),
("Moroni III", "THE THIRD BOOK OF MORONI", "am_adam", "13_moroni_3.wav"),
("Shioni", "THE BOOK OF SHIONI", "am_puck", "14_shioni.wav"),
]
# ── Helpers ────────────────────────────────────────────────────────────────────
def load_and_split(source: Path, books: list) -> dict[str, str]:
"""
Read the source file and split it into sections keyed by label.
Each section starts at its start_marker line and ends just before the
next section's start_marker.
"""
raw_lines = source.read_text(encoding="utf-8").splitlines()
# Build a mapping: marker_text → index in BOOKS
markers = [(label, marker.strip()) for label, marker, _, _ in books]
# Find the line index of each marker's first occurrence
marker_positions: list[tuple[int, int]] = [] # (line_idx, books_idx)
for book_idx, (label, marker) in enumerate(markers):
for line_idx, line in enumerate(raw_lines):
if line.strip() == marker:
marker_positions.append((line_idx, book_idx))
break
else:
print(f" ⚠ Marker not found for '{label}': '{marker}' — skipping")
marker_positions.sort(key=lambda x: x[0])
sections: dict[str, str] = {}
for rank, (line_idx, book_idx) in enumerate(marker_positions):
label = markers[book_idx][0]
if rank + 1 < len(marker_positions):
end_line = marker_positions[rank + 1][0]
else:
end_line = len(raw_lines)
text = "\n".join(raw_lines[line_idx:end_line]).strip()
sections[label] = text
return sections
def clean_text(text: str) -> str:
"""
Strip formatting artifacts, underscores, and normalise whitespace
so the TTS receives clean prose.
"""
# Remove lines that are pure underscores (horizontal rules)
text = re.sub(r"^_{3,}\s*$", "", text, flags=re.MULTILINE)
# Remove leading chapter headers that are all-caps lines
# (keep them as natural spoken title for context)
# Collapse excess blank lines
text = re.sub(r"\n{3,}", "\n\n", text)
return text.strip()
def generate_audio(pipeline: KPipeline, text: str, voice: str,
output_path: Path) -> None:
chunks = []
for _, _, chunk_audio in pipeline(text, voice=voice, speed=SPEED):
if hasattr(chunk_audio, "numpy"):
chunk_audio = chunk_audio.cpu().numpy()
chunk_audio = np.atleast_1d(chunk_audio.squeeze())
if chunk_audio.size > 0:
chunks.append(chunk_audio)
if chunks:
audio = np.concatenate(chunks, axis=0)
sf.write(str(output_path), audio, SAMPLE_RATE)
duration = len(audio) / SAMPLE_RATE
print(f" ✓ Saved '{output_path.name}' ({duration:.1f}s)")
else:
print(f" ✗ No audio produced for voice='{voice}'")
# ── Main ───────────────────────────────────────────────────────────────────────
def main() -> None:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {device}")
if device == "cuda":
print(f"GPU: {torch.cuda.get_device_name(0)}")
OUTPUT_DIR.mkdir(exist_ok=True)
print(f"\nSource: '{SOURCE_FILE}'"
+ (" ✓ (TTS fixed)" if SOURCE_FILE == _FIXED_FILE else
" ⚠ (original — run 'Apply Fixes to Text' in the GUI to use phonetic fixes)"))
sections = load_and_split(SOURCE_FILE, BOOKS)
print(f" Found {len(sections)} sections.\n")
print("Initialising Kokoro pipeline …")
pipeline = KPipeline(lang_code=LANG_CODE)
for label, marker, voice, wav_name in BOOKS:
if label not in sections:
continue # marker was not found; warning already printed
print(f"\n[{label}] voice={voice}{wav_name}")
text = clean_text(sections[label])
if not text:
print(" ⚠ Empty text — skipping")
continue
out_path = OUTPUT_DIR / wav_name
generate_audio(pipeline, text, voice, out_path)
print("\nDone.")
if __name__ == "__main__":
main()