Initial commit: audiobook generator, proper noun auditor GUI

2026-02-24 14:40:31 -07:00
commit 58a236d181
15 changed files with 14975 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,13 @@
+# Python
+__pycache__/
+*.pyc
+*.pyo
+.venv/
+
+# Generated audio output (large binary files)
+output_audiobook/
+output_proper_nouns/
+*.wav
+
+# TTS fixed text output
+**(TTS Fixed)*.txt
--- a/1_8.txt
+++ b/1_8.txt
--- a/Shioni.txt
+++ b/Shioni.txt
--- a/Full.txt
+++ b/Full.txt
--- a/contents.txt
+++ b/contents.txt
@ -0,0 +1,21 @@
+Table of Contents
+
+Contents	
+Introduction …………………………………………………..………………..	4
+The Book of Hagoth …………………………………………………………...	6-12
+The First Book of Shi-Tugo ……………………………………………………	12-30
+The Book of Sanempet …………………………………………………………	30-35
+The Book of Oug (The Son of Sanempet) ……………………………………...	35-54
+The Book of the Temple Writings of Oug ……………………………………...	55-59
+The Sacred Temple Writings ……………………………………………………	59-105
+The Law of Chastity …………………………………………………………	105-106
+The Law of Consecration ……………………………………………………	107-109
+The Law of Sacrifice ………………………………………………………..	109-110
+The First Book of Samuel The Lamanite ……………………………………….	111-125
+The Second Book of Samuel The Lamanite …………………………………….	126-158
+The Book of Manti (The Son of Oug) …………………………………………..	159-205
+The First Book of Pa Nat (The Daughter of Shimlei) ……………………………………..	206-249
+The First Book of Moroni (The Son of Mormon) ………………………………	249-259
+The Second Book of Moroni (The Son of Mormon) ……………………………	259-269
+The Third Book of Moroni (The Son of Mormon) ……………………………..	269-331
+The Book of Shioni (The Son of Moroni) ……………………………………….	331-392
--- a/audiobook_nem.py
+++ b/audiobook_nem.py
@ -0,0 +1,172 @@
+"""
+audiobook_nem.py
+────────────────
+Generate the Book of the Nem audiobook — one unique voice per book/section.
+
+Usage:
+    python audiobook_nem.py
+
+To skip a section, comment out its entry in BOOKS below.
+Output .wav files are written to OUTPUT_DIR (created automatically).
+"""
+
+import re
+import numpy as np
+import soundfile as sf
+import torch
+from pathlib import Path
+from kokoro import KPipeline
+
+# ── Config ─────────────────────────────────────────────────────────────────────
+SOURCE_FILE = Path("Audio Master Nem Full.txt")
+OUTPUT_DIR  = Path("output_audiobook")
+SAMPLE_RATE = 24000
+SPEED       = 1.0
+LANG_CODE   = "a"   # 'a' = American English
+
+# ── Available Kokoro voices (American English, lang_code='a') ──────────────────
+#   af_heart   – warm American female      [downloaded]
+#   af_nicole  – American female             [downloaded]
+#   am_adam    – American male (deep)        [downloaded]
+#   am_echo    – American male               [downloaded]
+#   am_eric    – American male               [downloaded]
+#   am_fenrir  – American male               [downloaded]
+#   am_liam    – American male               [downloaded]
+#   am_michael – American male (clear)       [downloaded]
+#   am_onyx    – American male               [downloaded]
+#   am_puck    – American male               [downloaded]
+#   am_santa   – American male               [downloaded] (not used)
+
+# ── Book definitions ───────────────────────────────────────────────────────────
+# Format: (label, start_marker, voice, output_wav)
+#   start_marker – exact text of the FIRST line of the section header in the source
+#                  (leading/trailing whitespace is ignored when matching)
+#   voice        – Kokoro voice name
+#   output_wav   – filename saved inside OUTPUT_DIR
+#
+# Comment out any line to skip that section entirely.
+BOOKS = [
+    # label                       start_marker                       voice         output_wav
+    ("Introduction",              "Introduction",                    "af_heart",   "00_introduction.wav"),
+    ("Book of Hagoth",            "THE BOOK OF HAGOTH",              "am_fenrir",  "01_hagoth.wav"),
+    ("Shi-Tugo I",                "THE FIRST BOOK OF SHI-TUGO",      "am_eric",    "02_shi_tugo_1.wav"),
+    ("Sanempet",                  "THE BOOK OF SANEMPET",            "am_liam",    "03_sanempet.wav"),
+    ("Oug",                       "THE BOOK OF OUG",                 "am_michael", "04_oug.wav"),
+    ("Temple Writings of Oug",    "THE BOOK OF",                     "am_michael", "05_temple_writings_oug.wav"),
+    ("Sacred Temple Writings",    "THE SACRED",                      "am_michael", "06_sacred_temple_writings.wav"),
+    ("Samuel the Lamanite I",     "THE FIRST BOOK",                  "am_echo",    "07_samuel_lamanite_1.wav"),
+    ("Samuel the Lamanite II",    "THE SECOND BOOK",                 "am_echo",    "08_samuel_lamanite_2.wav"),
+    ("Manti",                     "THE BOOK OF MANTI",               "am_onyx",    "09_manti.wav"),
+    ("Pa Nat I",                  "THE FIRST BOOK OF PA NAT",        "af_nicole",  "10_pa_nat_1.wav"),
+    ("Moroni I",                  "THE FIRST BOOK OF MORONI",        "am_adam",    "11_moroni_1.wav"),
+    ("Moroni II",                 "THE SECOND BOOK OF MORONI",       "am_adam",    "12_moroni_2.wav"),
+    ("Moroni III",                "THE THIRD BOOK OF MORONI",        "am_adam",    "13_moroni_3.wav"),
+    ("Shioni",                    "THE BOOK OF SHIONI",              "am_puck",    "14_shioni.wav"),
+]
+
+# ── Helpers ────────────────────────────────────────────────────────────────────
+
+def load_and_split(source: Path, books: list) -> dict[str, str]:
+    """
+    Read the source file and split it into sections keyed by label.
+    Each section starts at its start_marker line and ends just before the
+    next section's start_marker.
+    """
+    raw_lines = source.read_text(encoding="utf-8").splitlines()
+
+    # Build a mapping: marker_text → index in BOOKS
+    markers = [(label, marker.strip()) for label, marker, _, _ in books]
+
+    # Find the line index of each marker's first occurrence
+    marker_positions: list[tuple[int, int]] = []   # (line_idx, books_idx)
+    for book_idx, (label, marker) in enumerate(markers):
+        for line_idx, line in enumerate(raw_lines):
+            if line.strip() == marker:
+                marker_positions.append((line_idx, book_idx))
+                break
+        else:
+            print(f"  ⚠  Marker not found for '{label}': '{marker}' — skipping")
+
+    marker_positions.sort(key=lambda x: x[0])
+
+    sections: dict[str, str] = {}
+    for rank, (line_idx, book_idx) in enumerate(marker_positions):
+        label = markers[book_idx][0]
+        if rank + 1 < len(marker_positions):
+            end_line = marker_positions[rank + 1][0]
+        else:
+            end_line = len(raw_lines)
+        text = "\n".join(raw_lines[line_idx:end_line]).strip()
+        sections[label] = text
+
+    return sections
+
+
+def clean_text(text: str) -> str:
+    """
+    Strip formatting artifacts, underscores, and normalise whitespace
+    so the TTS receives clean prose.
+    """
+    # Remove lines that are pure underscores (horizontal rules)
+    text = re.sub(r"^_{3,}\s*$", "", text, flags=re.MULTILINE)
+    # Remove leading chapter headers that are all-caps lines
+    # (keep them as natural spoken title for context)
+    # Collapse excess blank lines
+    text = re.sub(r"\n{3,}", "\n\n", text)
+    return text.strip()
+
+
+def generate_audio(pipeline: KPipeline, text: str, voice: str,
+                   output_path: Path) -> None:
+    chunks = []
+    for _, _, chunk_audio in pipeline(text, voice=voice, speed=SPEED):
+        if hasattr(chunk_audio, "numpy"):
+            chunk_audio = chunk_audio.cpu().numpy()
+        chunk_audio = np.atleast_1d(chunk_audio.squeeze())
+        if chunk_audio.size > 0:
+            chunks.append(chunk_audio)
+
+    if chunks:
+        audio = np.concatenate(chunks, axis=0)
+        sf.write(str(output_path), audio, SAMPLE_RATE)
+        duration = len(audio) / SAMPLE_RATE
+        print(f"  ✓  Saved '{output_path.name}'  ({duration:.1f}s)")
+    else:
+        print(f"  ✗  No audio produced for voice='{voice}'")
+
+
+# ── Main ───────────────────────────────────────────────────────────────────────
+
+def main() -> None:
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    print(f"Device: {device}")
+    if device == "cuda":
+        print(f"GPU:    {torch.cuda.get_device_name(0)}")
+
+    OUTPUT_DIR.mkdir(exist_ok=True)
+
+    print(f"\nParsing '{SOURCE_FILE}' …")
+    sections = load_and_split(SOURCE_FILE, BOOKS)
+    print(f"  Found {len(sections)} sections.\n")
+
+    print("Initialising Kokoro pipeline …")
+    pipeline = KPipeline(lang_code=LANG_CODE)
+
+    for label, marker, voice, wav_name in BOOKS:
+        if label not in sections:
+            continue  # marker was not found; warning already printed
+
+        print(f"\n[{label}]  voice={voice}  →  {wav_name}")
+        text = clean_text(sections[label])
+        if not text:
+            print("  ⚠  Empty text — skipping")
+            continue
+
+        out_path = OUTPUT_DIR / wav_name
+        generate_audio(pipeline, text, voice, out_path)
+
+    print("\nDone.")
+
+
+if __name__ == "__main__":
+    main()
--- a/extract_proper_nouns.py
+++ b/extract_proper_nouns.py
@ -0,0 +1,141 @@
+"""
+extract_proper_nouns.py
+───────────────────────
+Scan 'Audio Master Nem Full.txt' and extract all proper nouns into
+'proper_nouns.txt', grouped by type and sorted alphabetically.
+
+Uses spaCy for:
+  • NER  (PERSON, GPE, LOC, ORG, …)   – named entity recognition
+  • POS  (PROPN)                        – catches names spaCy's NER misses
+    because they are not in its training vocabulary (e.g. Hagoth, Meninta)
+
+Run:
+    .venv/bin/python extract_proper_nouns.py
+"""
+
+import re
+from collections import defaultdict
+from pathlib import Path
+
+import spacy
+
+SOURCE = Path("Audio Master Nem Full.txt")
+OUTPUT = Path("proper_nouns.txt")
+
+# ── spaCy setup ────────────────────────────────────────────────────────────────
+print("Loading spaCy model …")
+nlp = spacy.load("en_core_web_sm")
+# Increase max length for the large source file
+nlp.max_length = 2_000_000
+
+# ── NER label groups ───────────────────────────────────────────────────────────
+PERSON_LABELS = {"PERSON"}
+PLACE_LABELS  = {"GPE", "LOC", "FAC"}
+ORG_LABELS    = {"ORG", "NORP"}
+OTHER_LABELS  = {"EVENT", "WORK_OF_ART", "LAW", "PRODUCT", "LANGUAGE"}
+
+# ── Noise filters ──────────────────────────────────────────────────────────────
+# All-caps lines are section headers, not spoken names — skip them.
+# Also skip very short tokens that are likely artefacts.
+SKIP_PATTERNS = re.compile(
+    r"^(THE|A|AN|AND|OF|IN|TO|FOR|BY|AT|IS|WAS|BE|HE|SHE|IT|"
+    r"CHAPTER|VERSE|YEA|BEHOLD|LORD|GOD|CHRIST|HOLY|GHOST)$"
+)
+
+def is_noise(text: str) -> bool:
+    t = text.strip()
+    if len(t) <= 1:
+        return True
+    if t.isupper() and len(t) > 4:      # all-caps section header word
+        return True
+    if SKIP_PATTERNS.match(t.upper()):
+        return True
+    if re.search(r"[^a-zA-Z\-' ]", t):  # contains digits or symbols
+        return True
+    return False
+
+
+def canonical(text: str) -> str:
+    """Normalise whitespace and title-case."""
+    return " ".join(text.split()).title()
+
+
+# ── Read and process ───────────────────────────────────────────────────────────
+print(f"Reading '{SOURCE}' …")
+raw_text = SOURCE.read_text(encoding="utf-8")
+
+print("Running spaCy pipeline (this may take a minute) …")
+doc = nlp(raw_text)
+
+# Buckets: keyed by display-group name → set of canonical strings
+buckets: dict[str, set[str]] = defaultdict(set)
+
+# 1. NER pass — trust spaCy's entity labels
+for ent in doc.ents:
+    name = canonical(ent.text)
+    if is_noise(name):
+        continue
+    if ent.label_ in PERSON_LABELS:
+        buckets["People & Characters"].add(name)
+    elif ent.label_ in PLACE_LABELS:
+        buckets["Places & Lands"].add(name)
+    elif ent.label_ in ORG_LABELS:
+        buckets["Groups & Nations"].add(name)
+    elif ent.label_ in OTHER_LABELS:
+        buckets["Other Named Things"].add(name)
+    else:
+        buckets["Other Named Things"].add(name)
+
+# 2. PROPN pass — catch names spaCy didn't recognise as entities
+#    Only include tokens that are inside a sentence (not at position 0)
+#    and are title-cased (filters out all-caps headers).
+for token in doc:
+    if token.pos_ != "PROPN":
+        continue
+    text = token.text.strip()
+    if not text[0].isupper() or text.isupper():
+        continue                          # skip all-caps
+    if token.i == token.sent.start:
+        continue                          # skip sentence-initial (could be any word)
+    name = canonical(text)
+    if is_noise(name):
+        continue
+    # Only add if not already captured by NER
+    already_captured = any(name in s for s in buckets.values())
+    if not already_captured:
+        buckets["Unclassified Proper Nouns"].add(name)
+
+# ── Write output ───────────────────────────────────────────────────────────────
+GROUP_ORDER = [
+    "People & Characters",
+    "Places & Lands",
+    "Groups & Nations",
+    "Other Named Things",
+    "Unclassified Proper Nouns",
+]
+
+lines: list[str] = []
+lines.append("PROPER NOUNS — Book of the Nem")
+lines.append("=" * 50)
+lines.append(
+    "Review this list for TTS mispronunciations.\n"
+    "Each entry is the form that appears in the text.\n"
+)
+
+total = 0
+for group in GROUP_ORDER:
+    names = sorted(buckets.get(group, set()), key=str.casefold)
+    if not names:
+        continue
+    lines.append(f"\n{'─' * 50}")
+    lines.append(f"{group.upper()}  ({len(names)})")
+    lines.append(f"{'─' * 50}")
+    for name in names:
+        lines.append(f"  {name}")
+    total += len(names)
+
+lines.append(f"\n{'=' * 50}")
+lines.append(f"TOTAL: {total} unique proper nouns")
+
+OUTPUT.write_text("\n".join(lines), encoding="utf-8")
+print(f"\n✓  Written '{OUTPUT}'  ({total} unique proper nouns)")
--- a/generate_proper_noun_audio.py
+++ b/generate_proper_noun_audio.py
@ -0,0 +1,145 @@
+"""
+generate_proper_noun_audio.py
+──────────────────────────────
+Read proper_nouns.txt, generate a short TTS audio clip for every entry
+using am_michael, and save a JSON manifest for the GUI.
+
+Outputs:
+    output_proper_nouns/<slug>.wav   – one wav per entry
+    output_proper_nouns/manifest.json – { "Word" : "slug.wav", … }
+
+Already-generated files are skipped, so re-runs are fast.
+
+Run:
+    .venv/bin/python generate_proper_noun_audio.py
+"""
+
+import json
+import re
+import sys
+import numpy as np
+import soundfile as sf
+import torch
+from pathlib import Path
+from kokoro import KPipeline
+
+PROPER_NOUNS_FILE = Path("proper_nouns.txt")
+OUTPUT_DIR        = Path("output_proper_nouns")
+MANIFEST_FILE     = OUTPUT_DIR / "manifest.json"
+VOICE             = "am_michael"
+SAMPLE_RATE       = 24000
+SPEED             = 1.0
+
+# ── Parse proper_nouns.txt ─────────────────────────────────────────────────────
+
+def parse_entries(path: Path) -> list[tuple[str, str]]:
+    """Return list of (category, entry) pairs."""
+    entries: list[tuple[str, str]] = []
+    current_cat = "Uncategorised"
+    header_re = re.compile(r"^[A-Z &]+\s+\(\d+\)$")
+
+    for line in path.read_text(encoding="utf-8").splitlines():
+        stripped = line.strip()
+        if not stripped:
+            continue
+        if stripped.startswith("=") or stripped.startswith("─"):
+            continue
+        if header_re.match(stripped):
+            # e.g.  "PEOPLE & CHARACTERS  (301)"
+            current_cat = stripped.rsplit("(", 1)[0].strip().title()
+            continue
+        if stripped.startswith("TOTAL:"):
+            continue
+        if stripped.startswith("Review this") or stripped.startswith("Each entry"):
+            continue
+        if stripped.startswith("PROPER NOUNS"):
+            continue
+        # Regular entry — indented two spaces in the file
+        if line.startswith("  "):
+            entries.append((current_cat, stripped))
+
+    return entries
+
+
+def slugify(text: str) -> str:
+    """Convert 'Hagoth-II foo' → 'hagoth_ii_foo'."""
+    s = text.lower()
+    s = re.sub(r"[^a-z0-9]+", "_", s)
+    return s.strip("_")
+
+
+# ── TTS generation ─────────────────────────────────────────────────────────────
+
+def generate(pipeline: KPipeline, text: str, out_path: Path) -> bool:
+    chunks = []
+    # Speak the word in a short carrier phrase so the TTS pronounces it
+    # naturally (isolated tokens sometimes get clipped prosody).
+    spoken = text
+    for _, _, chunk in pipeline(spoken, voice=VOICE, speed=SPEED):
+        if hasattr(chunk, "numpy"):
+            chunk = chunk.cpu().numpy()
+        chunk = np.atleast_1d(chunk.squeeze())
+        if chunk.size > 0:
+            chunks.append(chunk)
+    if chunks:
+        audio = np.concatenate(chunks)
+        sf.write(str(out_path), audio, SAMPLE_RATE)
+        return True
+    return False
+
+
+# ── Main ───────────────────────────────────────────────────────────────────────
+
+def main() -> None:
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    print(f"Device: {device}")
+    if device == "cuda":
+        print(f"GPU:    {torch.cuda.get_device_name(0)}")
+
+    OUTPUT_DIR.mkdir(exist_ok=True)
+
+    print(f"Parsing '{PROPER_NOUNS_FILE}' …")
+    entries = parse_entries(PROPER_NOUNS_FILE)
+    print(f"  {len(entries)} entries found.\n")
+
+    # Load existing manifest so we can skip already-done words
+    if MANIFEST_FILE.exists():
+        manifest: dict = json.loads(MANIFEST_FILE.read_text())
+    else:
+        manifest = {}
+
+    print("Initialising Kokoro pipeline …")
+    pipeline = KPipeline(lang_code="a")
+
+    skipped = 0
+    generated = 0
+    failed = 0
+
+    for i, (cat, entry) in enumerate(entries):
+        slug = slugify(entry)
+        wav_name = f"{slug}.wav"
+        wav_path = OUTPUT_DIR / wav_name
+
+        if entry in manifest and wav_path.exists():
+            skipped += 1
+            continue
+
+        sys.stdout.write(f"\r[{i+1}/{len(entries)}] {entry[:55]:<55}")
+        sys.stdout.flush()
+
+        ok = generate(pipeline, entry, wav_path)
+        if ok:
+            manifest[entry] = wav_name
+            generated += 1
+        else:
+            print(f"\n  ✗  Failed: {entry}")
+            failed += 1
+
+    print(f"\n\nDone.  generated={generated}  skipped={skipped}  failed={failed}")
+
+    MANIFEST_FILE.write_text(json.dumps(manifest, ensure_ascii=False, indent=2))
+    print(f"Manifest saved → '{MANIFEST_FILE}'")
+
+
+if __name__ == "__main__":
+    main()
--- a/proper_noun_player.py
+++ b/proper_noun_player.py
@ -0,0 +1,620 @@
+"""
+proper_noun_player.py
+──────────────────────
+GUI for auditing proper noun pronunciations.
+
+Three columns (all persisted as JSON, original manifest never modified):
+  • Review   – words not yet audited
+  • Correct  – words that already pronounce fine
+  • Fixes    – linked list: original word → phonetic replacement
+               e.g.  "Nephi" → "Kneephi"
+
+Hotkeys (always active):
+  Space          – replay current word
+  s              – stop audio
+  Escape         – reset fix entry to original word, refocus review list
+
+On the Review list:
+  ↑ / ↓          – navigate
+  Click / Enter  – play word AND focus fix entry
+
+On the fix entry (bottom bar, right of the word label):
+  Start typing to overwrite the pre-filled word.
+  Enter  →  if text == original word  →  mark Correct, advance to next
+            if text differs           →  add as Fix, advance to next
+  Escape →  reset text to original word, return focus to review list
+
+On the Correct list:
+  Delete / BackSpace – move selected word back to Review
+
+On the Fixes list:
+  Delete / BackSpace – move selected fix back to Review
+
+"Apply Fixes to Text" writes a TTS-ready copy of the source file with all
+substitutions applied (case-sensitive whole-word replace).
+
+Data files (auto-created in output_proper_nouns/):
+  correct_words.json       – list of correct words
+  pronunciation_fixes.json – { "Nephi": "Kneephi", … }
+
+Run:
+    .venv/bin/python proper_noun_player.py
+"""
+
+import json
+import re
+import threading
+import tkinter as tk
+from tkinter import ttk, messagebox
+from pathlib import Path
+
+import sounddevice as sd
+import soundfile as sf
+
+MANIFEST_FILE        = Path("output_proper_nouns/manifest.json")
+OUTPUT_DIR           = Path("output_proper_nouns")
+REPLACEMENTS_DIR     = OUTPUT_DIR / "replacements_cache"
+CORRECT_FILE         = OUTPUT_DIR / "correct_words.json"
+FIXES_FILE           = OUTPUT_DIR / "pronunciation_fixes.json"
+SOURCE_TEXT          = Path("Audio Master Nem Full.txt")
+FIXED_TEXT_OUT       = Path("Audio Master Nem Full (TTS Fixed).txt")
+
+VOICE                = "am_michael"
+SAMPLE_RATE          = 24000
+
+# ── Colours ────────────────────────────────────────────────────────────────────
+BG      = "#1e1e2e"
+BG2     = "#181825"
+BG3     = "#313244"
+FG      = "#cdd6f4"
+FG_DIM  = "#6c7086"
+GREEN   = "#a6e3a1"
+BLUE    = "#89b4fa"
+RED     = "#f38ba8"
+YELLOW  = "#f9e2af"
+MAUVE   = "#cba6f7"
+
+# ── Audio ──────────────────────────────────────────────────────────────────────
+
+def play_async(path: Path) -> None:
+    sd.stop()
+    def _play():
+        data, sr = sf.read(str(path), dtype="float32")
+        sd.play(data, sr)
+    threading.Thread(target=_play, daemon=True).start()
+
+
+def _slug(text: str) -> str:
+    """Safe filename from arbitrary text."""
+    return re.sub(r"[^a-zA-Z0-9_-]", "_", text).strip("_")[:80]
+
+
+# Lazy KPipeline singleton — only imported+loaded on first synthesis request
+_pipeline = None
+_pipeline_lock = threading.Lock()
+
+def _get_pipeline():
+    global _pipeline
+    if _pipeline is None:
+        with _pipeline_lock:
+            if _pipeline is None:
+                import warnings
+                from kokoro import KPipeline  # type: ignore
+                with warnings.catch_warnings():
+                    warnings.filterwarnings("ignore", category=UserWarning)
+                    warnings.filterwarnings("ignore", category=FutureWarning)
+                    _pipeline = KPipeline(lang_code="a", repo_id="hexgrad/Kokoro-82M")
+    return _pipeline
+
+
+def synth_and_play(text: str, on_ready=None) -> None:
+    """Synthesise *text* with Kokoro (cached) and play it.
+    Runs entirely on a daemon thread so the GUI never blocks.
+    *on_ready(path)* is called on the same thread once the file is written.
+    """
+    def _run():
+        path = _synth_to_cache(text)
+        if path:
+            if on_ready:
+                on_ready(path)
+            play_async(path)
+
+    threading.Thread(target=_run, daemon=True).start()
+
+
+def _synth_to_cache(text: str) -> "Path | None":
+    """Synthesise *text* to a cached WAV and return its path (or None on failure).
+    Skips synthesis if the file already exists.  Safe to call from any thread.
+    """
+    REPLACEMENTS_DIR.mkdir(parents=True, exist_ok=True)
+    cache_path = REPLACEMENTS_DIR / f"{_slug(text)}.wav"
+    if not cache_path.exists():
+        import warnings
+        import numpy as np
+        pipeline = _get_pipeline()
+        chunks = []
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", category=UserWarning)
+            for _, _, audio in pipeline(text, voice=VOICE):
+                if audio is not None:
+                    chunks.append(audio)
+        if chunks:
+            combined = np.concatenate(chunks)
+            sf.write(str(cache_path), combined, SAMPLE_RATE)
+    return cache_path if cache_path.exists() else None
+
+
+# ── Persistence helpers ────────────────────────────────────────────────────────
+
+def load_json(path: Path, default):
+    if path.exists():
+        return json.loads(path.read_text(encoding="utf-8"))
+    return default
+
+def save_json(path: Path, obj) -> None:
+    path.write_text(json.dumps(obj, ensure_ascii=False, indent=2), encoding="utf-8")
+
+
+# ── Styled widget helpers ──────────────────────────────────────────────────────
+
+def make_listbox(parent) -> tuple[tk.Listbox, tk.Frame]:
+    frame = tk.Frame(parent, bg=BG2, bd=0)
+    sb = ttk.Scrollbar(frame, orient="vertical")
+    sb.pack(side="right", fill="y")
+    lb = tk.Listbox(
+        frame,
+        yscrollcommand=sb.set,
+        font=("Helvetica", 11),
+        bg=BG2, fg=FG,
+        selectbackground=BLUE, selectforeground=BG,
+        activestyle="none", bd=0, highlightthickness=0, relief="flat",
+        exportselection=False,
+    )
+    lb.pack(side="left", fill="both", expand=True)
+    sb.config(command=lb.yview)
+    return lb, frame
+
+def styled_btn(parent, text, command, color=FG, bg=BG3, **kw):
+    return tk.Button(
+        parent, text=text, command=command,
+        bg=bg, fg=color, activebackground=BG2, activeforeground=color,
+        font=("Helvetica", 10, "bold"), relief="flat", bd=0,
+        padx=10, pady=5, cursor="hand2", **kw
+    )
+
+def section_label(parent, text):
+    return tk.Label(parent, text=text, bg=BG, fg=FG_DIM,
+                    font=("Helvetica", 9, "bold"), anchor="w")
+
+
+# ── Main app ───────────────────────────────────────────────────────────────────
+
+class ProperNounAuditor(tk.Tk):
+
+    # tracks which word is currently loaded into the fix entry
+    _fix_entry_word: str = ""
+
+    def __init__(self, manifest: dict[str, str]) -> None:
+        super().__init__()
+        self.title("Proper Noun Pronunciation Auditor")
+        self.geometry("1020x700")
+        self.minsize(800, 500)
+        self.configure(bg=BG)
+
+        self.manifest: dict[str, str] = manifest
+        self.all_words: list[str] = sorted(manifest.keys(), key=str.casefold)
+
+        # Persistent data
+        self.correct: set[str]      = set(load_json(CORRECT_FILE, []))
+        self.fixes: dict[str, str]  = load_json(FIXES_FILE, {})
+
+        self._build_ui()
+        self._refresh_all()
+
+        # Window-level hotkeys (work even when a listbox has keyboard focus)
+        self.bind("<space>",  lambda e: self._replay())
+        self.bind("s",        lambda e: sd.stop())
+        self.bind("<Escape>", lambda e: self._reset_fix_entry())
+
+    # ── UI construction ────────────────────────────────────────────────────────
+
+    def _build_ui(self) -> None:
+        PAD = 8
+
+        # Title bar
+        title_bar = tk.Frame(self, bg=BG, pady=6)
+        title_bar.pack(fill="x", padx=PAD)
+        tk.Label(title_bar, text="Proper Noun Pronunciation Auditor",
+                 font=("Helvetica", 15, "bold"), bg=BG, fg=FG).pack(side="left")
+        hint = "Space=replay  s=stop  Esc=reset fix  Del=remove from list  Enter=correct|fix"
+        tk.Label(title_bar, text=hint,
+                 font=("Helvetica", 8), bg=BG, fg=FG_DIM).pack(side="left", padx=14)
+
+        # Three-column body
+        body = tk.Frame(self, bg=BG)
+        body.pack(fill="both", expand=True, padx=PAD, pady=(0, PAD))
+        body.columnconfigure(0, weight=3)
+        body.columnconfigure(1, weight=2)
+        body.columnconfigure(2, weight=2)
+        body.rowconfigure(0, weight=1)
+
+        # ── Column 0: Review list ──────────────────────────────────────────────
+        col0 = tk.Frame(body, bg=BG)
+        col0.grid(row=0, column=0, sticky="nsew", padx=(0, PAD))
+
+        filter_row = tk.Frame(col0, bg=BG)
+        filter_row.pack(fill="x", pady=(0, 4))
+        tk.Label(filter_row, text="Filter:", bg=BG, fg=FG,
+                 font=("Helvetica", 10)).pack(side="left", padx=(0, 4))
+        self.search_var = tk.StringVar()
+        self.search_var.trace_add("write", lambda *_: self._refresh_review())
+        self._filter_entry = tk.Entry(
+            filter_row, textvariable=self.search_var,
+            font=("Helvetica", 11), bg=BG3, fg=FG,
+            insertbackground=FG, relief="flat", bd=4)
+        self._filter_entry.pack(side="left", fill="x", expand=True)
+        self._filter_entry.focus_set()
+        styled_btn(filter_row, "✕", lambda: self.search_var.set(""),
+                   color=RED, bg=BG3).pack(side="left", padx=(3, 0))
+
+        hdr0 = tk.Frame(col0, bg=BG)
+        hdr0.pack(fill="x")
+        section_label(hdr0, "TO REVIEW").pack(side="left")
+        self.review_count_var = tk.StringVar()
+        tk.Label(hdr0, textvariable=self.review_count_var, bg=BG, fg=FG_DIM,
+                 font=("Helvetica", 9)).pack(side="right")
+
+        self.review_lb, review_frame = make_listbox(col0)
+        review_frame.pack(fill="both", expand=True)
+        self.review_lb.bind("<<ListboxSelect>>", self._on_review_select)
+        self.review_lb.bind("<Return>", self._on_review_select)
+
+        # ── Column 1: Correct list ─────────────────────────────────────────────
+        col1 = tk.Frame(body, bg=BG)
+        col1.grid(row=0, column=1, sticky="nsew", padx=(0, PAD))
+
+        hdr1 = tk.Frame(col1, bg=BG)
+        hdr1.pack(fill="x")
+        section_label(hdr1, "✓ CORRECT  [Del=remove]").pack(side="left")
+        self.correct_count_var = tk.StringVar()
+        tk.Label(hdr1, textvariable=self.correct_count_var, bg=BG, fg=FG_DIM,
+                 font=("Helvetica", 9)).pack(side="right")
+
+        self.correct_lb, correct_frame = make_listbox(col1)
+        correct_frame.pack(fill="both", expand=True)
+        self.correct_lb.bind("<<ListboxSelect>>",
+                             lambda e: self._on_side_select(self.correct_lb))
+        self.correct_lb.bind("<Delete>",
+                             lambda e: self._move_back(self.correct_lb, is_dict=False))
+        self.correct_lb.bind("<BackSpace>",
+                             lambda e: self._move_back(self.correct_lb, is_dict=False))
+
+        styled_btn(col1, "← Back to Review  [Del]",
+                   lambda: self._move_back(self.correct_lb, is_dict=False),
+                   color=YELLOW).pack(fill="x", pady=(4, 0))
+
+        # ── Column 2: Fixes list ───────────────────────────────────────────────
+        col2 = tk.Frame(body, bg=BG)
+        col2.grid(row=0, column=2, sticky="nsew")
+
+        hdr2 = tk.Frame(col2, bg=BG)
+        hdr2.pack(fill="x")
+        section_label(hdr2, "⇄ FIXES  (original → phonetic)").pack(side="left")
+        self.fixes_count_var = tk.StringVar()
+        tk.Label(hdr2, textvariable=self.fixes_count_var, bg=BG, fg=FG_DIM,
+                 font=("Helvetica", 9)).pack(side="right")
+
+        self.fixes_lb, fixes_frame = make_listbox(col2)
+        fixes_frame.pack(fill="both", expand=True)
+        self.fixes_lb.bind("<<ListboxSelect>>",
+                           lambda e: self._on_side_select(self.fixes_lb))
+        self.fixes_lb.bind("<Delete>",
+                           lambda e: self._move_back(self.fixes_lb, is_dict=True))
+        self.fixes_lb.bind("<BackSpace>",
+                           lambda e: self._move_back(self.fixes_lb, is_dict=True))
+
+        styled_btn(col2, "← Back to Review  [Del]",
+                   lambda: self._move_back(self.fixes_lb, is_dict=True),
+                   color=YELLOW).pack(fill="x", pady=(4, 0))
+
+        # ── Bottom action bar ──────────────────────────────────────────────────
+        action_bar = tk.Frame(self, bg=BG3, pady=8)
+        action_bar.pack(fill="x")
+
+        # Now-playing word label
+        tk.Label(action_bar, text="▶", bg=BG3, fg=GREEN,
+                 font=("Helvetica", 11)).pack(side="left", padx=(10, 2))
+        self.now_playing_var = tk.StringVar(value="—")
+        tk.Label(action_bar, textvariable=self.now_playing_var,
+                 bg=BG3, fg=GREEN, font=("Helvetica", 11, "bold"),
+                 width=20, anchor="w").pack(side="left")
+
+        # Inline fix entry — right next to the word, auto-focused on word click
+        tk.Label(action_bar, text="→", bg=BG3, fg=MAUVE,
+                 font=("Helvetica", 13, "bold")).pack(side="left", padx=(6, 3))
+        self.fix_var = tk.StringVar()
+        self._fix_entry = tk.Entry(
+            action_bar, textvariable=self.fix_var,
+            font=("Helvetica", 11), bg=BG2, fg=MAUVE,
+            insertbackground=MAUVE, relief="flat", bd=4, width=22)
+        self._fix_entry.pack(side="left")
+        self._fix_entry.bind("<Return>", lambda e: self._enter_action())
+        self._fix_entry.bind("<Escape>", lambda e: self._reset_fix_entry())
+
+        tk.Label(action_bar, text="Enter=correct  (edit first for fix)  Esc=reset",
+                 bg=BG3, fg=FG_DIM, font=("Helvetica", 8)).pack(side="left", padx=(5, 10))
+
+        tk.Label(action_bar, text="│", bg=BG3, fg=FG_DIM).pack(side="left", padx=4)
+        styled_btn(action_bar, "■ Stop  [s]", sd.stop,
+                   color=RED).pack(side="left", padx=4)
+        styled_btn(action_bar, "↺ Replay  [Space]", self._replay,
+                   color=BLUE).pack(side="left", padx=2)
+
+        tk.Label(action_bar, text="│", bg=BG3, fg=FG_DIM).pack(side="left", padx=4)
+        styled_btn(action_bar, "⇄ Apply Fixes to Text",
+                   self._apply_fixes, color=YELLOW, bg=BG2).pack(side="left", padx=4)
+
+        tk.Label(action_bar, text="│", bg=BG3, fg=FG_DIM).pack(side="left", padx=4)
+        self._pregen_btn = styled_btn(
+            action_bar, "↻ Pre-gen Fix Audio",
+            self._pregen_all_fix_audio, color=MAUVE, bg=BG2)
+        self._pregen_btn.pack(side="left", padx=4)
+        self._pregen_status_var = tk.StringVar(value="")
+        tk.Label(action_bar, textvariable=self._pregen_status_var,
+                 bg=BG3, fg=FG_DIM, font=("Helvetica", 8),
+                 width=28, anchor="w").pack(side="left", padx=(4, 10))
+
+    # ── Refresh helpers ────────────────────────────────────────────────────────
+
+    def _review_words(self) -> list[str]:
+        excluded = self.correct | set(self.fixes.keys())
+        q = self.search_var.get().strip().casefold()
+        words = [w for w in self.all_words if w not in excluded]
+        if q:
+            words = [w for w in words if q in w.casefold()]
+        return words
+
+    def _refresh_review(self) -> None:
+        words = self._review_words()
+        self.review_lb.delete(0, "end")
+        for w in words:
+            self.review_lb.insert("end", f"  {w}")
+        self.review_count_var.set(f"{len(words)}")
+
+    def _refresh_correct(self) -> None:
+        self.correct_lb.delete(0, "end")
+        for w in sorted(self.correct, key=str.casefold):
+            self.correct_lb.insert("end", f"  {w}")
+        self.correct_count_var.set(f"{len(self.correct)}")
+
+    def _refresh_fixes(self) -> None:
+        self.fixes_lb.delete(0, "end")
+        for orig, rep in sorted(self.fixes.items(), key=lambda x: x[0].casefold()):
+            self.fixes_lb.insert("end", f"  {orig}  →  {rep}")
+        self.fixes_count_var.set(f"{len(self.fixes)}")
+
+    def _refresh_all(self) -> None:
+        self._refresh_review()
+        self._refresh_correct()
+        self._refresh_fixes()
+
+    # ── Playback ───────────────────────────────────────────────────────────────
+
+    def _play_word(self, word: str) -> None:
+        wav_name = self.manifest.get(word)
+        if not wav_name:
+            return
+        wav_path = OUTPUT_DIR / wav_name
+        if not wav_path.exists():
+            messagebox.showwarning("Missing audio",
+                                   f"No audio file for '{word}'.\n"
+                                   "Run generate_proper_noun_audio.py first.")
+            return
+        self.now_playing_var.set(word)
+        play_async(wav_path)
+
+    # ── Selection callbacks ────────────────────────────────────────────────────
+
+    def _on_review_select(self, event=None) -> None:
+        sel = self.review_lb.curselection()
+        if not sel:
+            return
+        word = self.review_lb.get(sel[0]).strip()
+        self._fix_entry_word = word
+        self.fix_var.set(word)              # pre-fill fix entry with the word
+        self._fix_entry.selection_range(0, "end")
+        self._fix_entry.icursor("end")
+        # Defer focus so the listbox doesn't reclaim it after the click event settles
+        self.after(0, self._fix_entry.focus_set)
+        self._play_word(word)
+
+    def _on_side_select(self, listbox: tk.Listbox) -> None:
+        sel = listbox.curselection()
+        if not sel:
+            return
+        row = listbox.get(sel[0]).strip()
+        parts = row.split("  →  ")
+        original = parts[0].strip()
+
+        if listbox is self.fixes_lb and len(parts) == 2:
+            # Play the phonetic replacement text
+            replacement = parts[1].strip()
+            self.now_playing_var.set(f"… {replacement}")
+            def _on_ready(_path):
+                self.after(0, lambda: self.now_playing_var.set(replacement))
+            synth_and_play(replacement, on_ready=_on_ready)
+        else:
+            self._play_word(original)
+
+    # ── Actions ────────────────────────────────────────────────────────────────
+
+    def _selected_review_word(self) -> str | None:
+        sel = self.review_lb.curselection()
+        if not sel:
+            return None
+        return self.review_lb.get(sel[0]).strip()
+
+    def _enter_action(self) -> None:
+        """Smart Enter handler for the fix entry.
+
+        If the entry text matches the original word  → mark Correct.
+        If the entry text differs from the original  → add as Fix.
+        """
+        word = self._fix_entry_word or self._selected_review_word()
+        if not word:
+            return
+        text = self.fix_var.get().strip()
+        if not text or text == word:
+            self._mark_correct_word(word)
+        else:
+            self._add_fix_for_word(word, text)
+
+    def _reset_fix_entry(self) -> None:
+        """Escape: reset fix entry to the original word, refocus the review list."""
+        self.fix_var.set(self._fix_entry_word)
+        self.review_lb.focus_set()
+
+    def _replay(self) -> None:
+        if self._fix_entry_word:
+            self._play_word(self._fix_entry_word)
+
+    def _advance_review(self) -> None:
+        """After an action, select the first remaining word in the review list."""
+        if self.review_lb.size() > 0:
+            self.review_lb.selection_clear(0, "end")
+            self.review_lb.selection_set(0)
+            self.review_lb.see(0)
+            self.review_lb.event_generate("<<ListboxSelect>>")
+
+    def _mark_correct_word(self, word: str) -> None:
+        self.correct.add(word)
+        save_json(CORRECT_FILE, sorted(self.correct))
+        self._fix_entry_word = ""
+        self.fix_var.set("")
+        self.now_playing_var.set("—")
+        self._refresh_all()
+        self._advance_review()
+
+    def _mark_correct(self) -> None:
+        word = self._selected_review_word()
+        if not word:
+            messagebox.showinfo("Nothing selected",
+                                "Select a word from the Review list first.")
+            return
+        self._mark_correct_word(word)
+
+    def _add_fix_for_word(self, word: str, replacement: str) -> None:
+        self.fixes[word] = replacement
+        save_json(FIXES_FILE, self.fixes)
+        self._fix_entry_word = ""
+        self.fix_var.set("")
+        self.now_playing_var.set("—")
+        self._refresh_all()
+        self._advance_review()
+
+    def _add_fix(self) -> None:
+        word = self._selected_review_word()
+        replacement = self.fix_var.get().strip()
+        if not word:
+            messagebox.showinfo("Nothing selected",
+                                "Select a word from the Review list first.")
+            return
+        if not replacement or replacement == word:
+            messagebox.showinfo("No replacement",
+                                "Type the phonetic replacement in the Fix box.")
+            return
+        self._add_fix_for_word(word, replacement)
+
+    def _move_back(self, listbox: tk.Listbox, is_dict: bool) -> None:
+        sel = listbox.curselection()
+        if not sel:
+            return
+        raw = listbox.get(sel[0]).strip().split("  →  ")[0].strip()
+        if is_dict:
+            self.fixes.pop(raw, None)
+            save_json(FIXES_FILE, self.fixes)
+        else:
+            self.correct.discard(raw)
+            save_json(CORRECT_FILE, sorted(self.correct))
+        self._refresh_all()
+
+    # ── Apply fixes to source text ─────────────────────────────────────────────
+
+    def _pregen_all_fix_audio(self) -> None:
+        """Synthesise and cache audio for every replacement phonetic string."""
+        if not self.fixes:
+            messagebox.showinfo("No fixes", "The Fixes list is empty.")
+            return
+
+        replacements = list(self.fixes.values())
+        total = len(replacements)
+        already = sum(
+            1 for r in replacements
+            if (REPLACEMENTS_DIR / f"{_slug(r)}.wav").exists()
+        )
+
+        # Confirm if it'll take a while
+        new_count = total - already
+        if new_count == 0:
+            messagebox.showinfo("Already done",
+                                f"All {total} replacement clips already exist.")
+            return
+
+        self._pregen_btn.config(state="disabled")
+        self._pregen_status_var.set(f"0 / {new_count} new  ({already} cached)")
+
+        def _run():
+            done = 0
+            for rep in replacements:
+                cache_path = REPLACEMENTS_DIR / f"{_slug(rep)}.wav"
+                if not cache_path.exists():
+                    _synth_to_cache(rep)
+                    done += 1
+                    self.after(0, lambda d=done, t=new_count:
+                               self._pregen_status_var.set(f"{d} / {t} synthesised…"))
+            self.after(0, lambda: self._pregen_status_var.set(
+                f"Done — {total} clips ready"))
+            self.after(0, lambda: self._pregen_btn.config(state="normal"))
+
+        threading.Thread(target=_run, daemon=True).start()
+
+    def _apply_fixes(self) -> None:
+        if not self.fixes:
+            messagebox.showinfo("No fixes", "The Fixes list is empty.")
+            return
+        if not SOURCE_TEXT.exists():
+            messagebox.showerror("Source not found", f"Cannot find:\n{SOURCE_TEXT}")
+            return
+        text = SOURCE_TEXT.read_text(encoding="utf-8")
+        count_total = 0
+        for original, replacement in self.fixes.items():
+            pattern = r'\b' + re.escape(original) + r'\b'
+            new_text, n = re.subn(pattern, replacement, text)
+            if n:
+                text = new_text
+                count_total += n
+        FIXED_TEXT_OUT.write_text(text, encoding="utf-8")
+        messagebox.showinfo(
+            "Done",
+            f"Applied {len(self.fixes)} fix rules ({count_total} replacements).\n\n"
+            f"Saved to:\n{FIXED_TEXT_OUT}"
+        )
+
+
+# ── Entry point ────────────────────────────────────────────────────────────────
+
+def main() -> None:
+    if not MANIFEST_FILE.exists():
+        print(f"Manifest not found: '{MANIFEST_FILE}'")
+        print("Run generate_proper_noun_audio.py first.")
+        raise SystemExit(1)
+
+    manifest: dict[str, str] = json.loads(MANIFEST_FILE.read_text(encoding="utf-8"))
+    print(f"Loaded {len(manifest)} entries from manifest.")
+
+    app = ProperNounAuditor(manifest)
+    app.mainloop()
+
+
+if __name__ == "__main__":
+    main()
--- a/proper_nouns.txt
+++ b/proper_nouns.txt
--- a/render_voices.py
+++ b/render_voices.py
@ -0,0 +1,44 @@
+import torch
+import numpy as np
+import soundfile as sf
+from kokoro import KPipeline
+from text_input import TEXT
+
+# ── Device setup ──────────────────────────────────────────────────────────────
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using device: {device}")
+if device == "cuda":
+    print(f"GPU: {torch.cuda.get_device_name(0)}")
+
+SAMPLE_RATE = 24000
+SPEED = 1.0
+VOICES = [
+    ("af_heart",   "output_af_heart.wav"),    # warm American female
+    ("am_michael", "output_am_michael.wav"),   # best American male
+]
+
+pipeline = KPipeline(lang_code="a")
+
+
+def generate(voice: str, output_file: str) -> None:
+    print(f"\nGenerating '{voice}' → {output_file} …")
+    chunks = []
+    for _, _, chunk_audio in pipeline(TEXT, voice=voice, speed=SPEED):
+        if hasattr(chunk_audio, "numpy"):
+            chunk_audio = chunk_audio.cpu().numpy()
+        chunk_audio = np.atleast_1d(chunk_audio.squeeze())
+        if chunk_audio.size > 0:
+            chunks.append(chunk_audio)
+
+    if chunks:
+        audio = np.concatenate(chunks, axis=0)
+        sf.write(output_file, audio, SAMPLE_RATE)
+        print(f"  ✓ Saved '{output_file}'  ({len(audio) / SAMPLE_RATE:.1f}s, {SAMPLE_RATE} Hz)")
+    else:
+        print(f"  ✗ No audio produced for '{voice}'")
+
+
+for voice, path in VOICES:
+    generate(voice, path)
+
+print("\nDone.")
--- a/run_michael.py
+++ b/run_michael.py
@ -0,0 +1,19 @@
+import torch, numpy as np, soundfile as sf
+from kokoro import KPipeline
+from text_input import TEXT
+
+pipeline = KPipeline(lang_code="a")
+print(f"GPU: {torch.cuda.get_device_name(0)}" if torch.cuda.is_available() else "CPU")
+print("Generating am_michael ...")
+
+chunks = []
+for _, _, chunk_audio in pipeline(TEXT, voice="am_michael", speed=1.0):
+    if hasattr(chunk_audio, "numpy"):
+        chunk_audio = chunk_audio.cpu().numpy()
+    chunk_audio = np.atleast_1d(chunk_audio.squeeze())
+    if chunk_audio.size > 0:
+        chunks.append(chunk_audio)
+
+audio = np.concatenate(chunks)
+sf.write("output_am_michael.wav", audio, 24000)
+print(f"Saved output_am_michael.wav  ({len(audio)/24000:.1f}s)")
--- a/copy.py
+++ b/copy.py
@ -0,0 +1,35 @@
+TEXT = (
+    "The Book of the Nem. "
+    "Another Testament of Jesus Christ. "
+    "From the Nem People. "
+    "Accounts Written by the Hands of Nem Prophets and Recordkeepers. "
+    "Taken from the Written Records of the Nem, the People of the Spirit. "
+    "\n\n"
+    "The Book of the Nem is written to the descendants of the Lamanites of the "
+    "Book of Mormon, who are a remnant of the House of Israel, and also to the "
+    "descendants of the Nem, and to the Gentiles, by way of commandment, and also "
+    "by the spirit of prophecy and of revelation, to the convincing of all that "
+    "Jesus Christ is the Son of God, the Savior of all mankind throughout the world. "
+    "\n\n"
+    "It is a record of the people of Hagoth, the shipbuilder, and the people of "
+    "Corianton, the son of Alma the younger, who traveled into the Land Northward "
+    "to escape the wickedness of the Nephites and secret combinations of the "
+    "Gadianton Robbers who were beginning to spread across the land. It is to show "
+    "unto the remnant of the House of Israel what great things the Lord hath done "
+    "for their fathers and brothers; and that they may know the covenants of the "
+    "Lord, that they are not cast off forever, and also to the convincing of the "
+    "Gentiles that Jesus is the Christ, the Eternal God, manifesting himself unto "
+    "all nations, including the ancestors of the Nem long ago. And now, if there "
+    "are faults in this record, they are the mistakes of men; wherefore, condemn "
+    "not the things of God. Search these records, ponder, and pray that Jesus "
+    "Christ may reveal the truth of it unto you by the power of the Holy Ghost, "
+    "for by the power of the Holy Ghost, ye may know the truth of all things. "
+    "See Moroni chapter ten, verses three through five. "
+    "\n\n"
+    "The purpose of this record is to assist in the gathering of the House of "
+    "Israel, to bring to light the words of Christ given to His other sheep, to "
+    "prepare the Remnant of Joseph to rise up, and the Remnant of Jacob to return; "
+    "that they may fulfill their covenants, build the New Jerusalem, and establish "
+    "Zion in these last days before the return of our Lord and Savior, Jesus "
+    "Christ, in glory."
+)
--- a/text_input.py
+++ b/text_input.py
@ -0,0 +1,12 @@
+TEXT = (
+    "The Book of the Nem. "
+    "Another Testament of Jesus Christ. "
+    "From the Kneephites People. "
+    
+    "The purpose of this record is to assist in the gathering of the House of "
+    "Israel, to bring to light the words of Christ given to His other sheep, to "
+    "prepare the Remnant of Joseph to rise up, and the Remnant of Jacob to return; "
+    "that they may fulfill their covenants, build the New Jerusalem, and establish "
+    "Zion in these last days before the return of our Lord and Savior, Jesus "
+    "Christ, in glory."
+)
--- a/tts_test.py
+++ b/tts_test.py
@ -0,0 +1,49 @@
+import torch
+import soundfile as sf
+from kokoro import KPipeline
+
+# ── Device setup ──────────────────────────────────────────────────────────────
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using device: {device}")
+if device == "cuda":
+    print(f"GPU: {torch.cuda.get_device_name(0)}")
+
+# ── Test paragraph ─────────────────────────────────────────────────────────────
+TEXT = (
+    "The world of artificial intelligence is evolving at a remarkable pace. "
+    "Modern language models can now read, write, and even speak with surprising "
+    "clarity and nuance. This audio was generated entirely on a local machine "
+    "using the Kokoro text-to-speech model, running on an NVIDIA RTX 3060 GPU. "
+    "No cloud, no API keys — just raw local compute turning words into sound."
+)
+
+# ── Build pipeline ─────────────────────────────────────────────────────────────
+# lang_code: 'a' = American English, 'b' = British English
+# voices: af_heart, af_bella, af_nova, am_adam, am_michael, bf_emma, bm_george …
+pipeline = KPipeline(lang_code="a")
+
+OUTPUT_FILE = "output.wav"
+VOICE = "af_heart"          # warm American female voice
+SPEED = 1.0                 # 1.0 = normal speed
+
+# ── Generate audio ─────────────────────────────────────────────────────────────
+print(f"Generating speech with voice '{VOICE}' …")
+
+import numpy as np
+
+audio_chunks = []
+for _, _, chunk_audio in pipeline(TEXT, voice=VOICE, speed=SPEED):
+    # chunk_audio is a torch.Tensor of shape [N], dtype float32
+    if hasattr(chunk_audio, "numpy"):
+        chunk_audio = chunk_audio.cpu().numpy()
+    chunk_audio = np.atleast_1d(chunk_audio.squeeze())
+    if chunk_audio.size > 0:
+        audio_chunks.append(chunk_audio)
+
+if audio_chunks:
+    audio = np.concatenate(audio_chunks, axis=0)
+    sf.write(OUTPUT_FILE, audio, 24000)
+    duration = len(audio) / 24000
+    print(f"✓ Saved '{OUTPUT_FILE}'  ({duration:.1f}s, 24 kHz)")
+else:
+    print("No audio generated — check input text.")