Initial commit: audiobook generator, proper noun auditor GUI

This commit is contained in:
2026-02-24 14:40:31 -07:00
commit 58a236d181
15 changed files with 14975 additions and 0 deletions

13
.gitignore vendored Normal file
View File

@ -0,0 +1,13 @@
# Python
__pycache__/
*.pyc
*.pyo
.venv/
# Generated audio output (large binary files)
output_audiobook/
output_proper_nouns/
*.wav
# TTS fixed text output
**(TTS Fixed)*.txt

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

4833
Audio Master Nem Full.txt Normal file

File diff suppressed because it is too large Load Diff

21
Nem table of contents.txt Normal file
View File

@ -0,0 +1,21 @@
Table of Contents
Contents
Introduction …………………………………………………..……………….. 4
The Book of Hagoth …………………………………………………………... 6-12
The First Book of Shi-Tugo …………………………………………………… 12-30
The Book of Sanempet ………………………………………………………… 30-35
The Book of Oug (The Son of Sanempet) ……………………………………... 35-54
The Book of the Temple Writings of Oug ……………………………………... 55-59
The Sacred Temple Writings …………………………………………………… 59-105
The Law of Chastity ………………………………………………………… 105-106
The Law of Consecration …………………………………………………… 107-109
The Law of Sacrifice ……………………………………………………….. 109-110
The First Book of Samuel The Lamanite ………………………………………. 111-125
The Second Book of Samuel The Lamanite ……………………………………. 126-158
The Book of Manti (The Son of Oug) ………………………………………….. 159-205
The First Book of Pa Nat (The Daughter of Shimlei) …………………………………….. 206-249
The First Book of Moroni (The Son of Mormon) ……………………………… 249-259
The Second Book of Moroni (The Son of Mormon) …………………………… 259-269
The Third Book of Moroni (The Son of Mormon) …………………………….. 269-331
The Book of Shioni (The Son of Moroni) ………………………………………. 331-392

172
audiobook_nem.py Normal file
View File

@ -0,0 +1,172 @@
"""
audiobook_nem.py
────────────────
Generate the Book of the Nem audiobook — one unique voice per book/section.
Usage:
python audiobook_nem.py
To skip a section, comment out its entry in BOOKS below.
Output .wav files are written to OUTPUT_DIR (created automatically).
"""
import re
import numpy as np
import soundfile as sf
import torch
from pathlib import Path
from kokoro import KPipeline
# ── Config ─────────────────────────────────────────────────────────────────────
SOURCE_FILE = Path("Audio Master Nem Full.txt")
OUTPUT_DIR = Path("output_audiobook")
SAMPLE_RATE = 24000
SPEED = 1.0
LANG_CODE = "a" # 'a' = American English
# ── Available Kokoro voices (American English, lang_code='a') ──────────────────
# af_heart warm American female [downloaded]
# af_nicole American female [downloaded]
# am_adam American male (deep) [downloaded]
# am_echo American male [downloaded]
# am_eric American male [downloaded]
# am_fenrir American male [downloaded]
# am_liam American male [downloaded]
# am_michael American male (clear) [downloaded]
# am_onyx American male [downloaded]
# am_puck American male [downloaded]
# am_santa American male [downloaded] (not used)
# ── Book definitions ───────────────────────────────────────────────────────────
# Format: (label, start_marker, voice, output_wav)
# start_marker exact text of the FIRST line of the section header in the source
# (leading/trailing whitespace is ignored when matching)
# voice Kokoro voice name
# output_wav filename saved inside OUTPUT_DIR
#
# Comment out any line to skip that section entirely.
BOOKS = [
# label start_marker voice output_wav
("Introduction", "Introduction", "af_heart", "00_introduction.wav"),
("Book of Hagoth", "THE BOOK OF HAGOTH", "am_fenrir", "01_hagoth.wav"),
("Shi-Tugo I", "THE FIRST BOOK OF SHI-TUGO", "am_eric", "02_shi_tugo_1.wav"),
("Sanempet", "THE BOOK OF SANEMPET", "am_liam", "03_sanempet.wav"),
("Oug", "THE BOOK OF OUG", "am_michael", "04_oug.wav"),
("Temple Writings of Oug", "THE BOOK OF", "am_michael", "05_temple_writings_oug.wav"),
("Sacred Temple Writings", "THE SACRED", "am_michael", "06_sacred_temple_writings.wav"),
("Samuel the Lamanite I", "THE FIRST BOOK", "am_echo", "07_samuel_lamanite_1.wav"),
("Samuel the Lamanite II", "THE SECOND BOOK", "am_echo", "08_samuel_lamanite_2.wav"),
("Manti", "THE BOOK OF MANTI", "am_onyx", "09_manti.wav"),
("Pa Nat I", "THE FIRST BOOK OF PA NAT", "af_nicole", "10_pa_nat_1.wav"),
("Moroni I", "THE FIRST BOOK OF MORONI", "am_adam", "11_moroni_1.wav"),
("Moroni II", "THE SECOND BOOK OF MORONI", "am_adam", "12_moroni_2.wav"),
("Moroni III", "THE THIRD BOOK OF MORONI", "am_adam", "13_moroni_3.wav"),
("Shioni", "THE BOOK OF SHIONI", "am_puck", "14_shioni.wav"),
]
# ── Helpers ────────────────────────────────────────────────────────────────────
def load_and_split(source: Path, books: list) -> dict[str, str]:
"""
Read the source file and split it into sections keyed by label.
Each section starts at its start_marker line and ends just before the
next section's start_marker.
"""
raw_lines = source.read_text(encoding="utf-8").splitlines()
# Build a mapping: marker_text → index in BOOKS
markers = [(label, marker.strip()) for label, marker, _, _ in books]
# Find the line index of each marker's first occurrence
marker_positions: list[tuple[int, int]] = [] # (line_idx, books_idx)
for book_idx, (label, marker) in enumerate(markers):
for line_idx, line in enumerate(raw_lines):
if line.strip() == marker:
marker_positions.append((line_idx, book_idx))
break
else:
print(f" ⚠ Marker not found for '{label}': '{marker}' — skipping")
marker_positions.sort(key=lambda x: x[0])
sections: dict[str, str] = {}
for rank, (line_idx, book_idx) in enumerate(marker_positions):
label = markers[book_idx][0]
if rank + 1 < len(marker_positions):
end_line = marker_positions[rank + 1][0]
else:
end_line = len(raw_lines)
text = "\n".join(raw_lines[line_idx:end_line]).strip()
sections[label] = text
return sections
def clean_text(text: str) -> str:
"""
Strip formatting artifacts, underscores, and normalise whitespace
so the TTS receives clean prose.
"""
# Remove lines that are pure underscores (horizontal rules)
text = re.sub(r"^_{3,}\s*$", "", text, flags=re.MULTILINE)
# Remove leading chapter headers that are all-caps lines
# (keep them as natural spoken title for context)
# Collapse excess blank lines
text = re.sub(r"\n{3,}", "\n\n", text)
return text.strip()
def generate_audio(pipeline: KPipeline, text: str, voice: str,
output_path: Path) -> None:
chunks = []
for _, _, chunk_audio in pipeline(text, voice=voice, speed=SPEED):
if hasattr(chunk_audio, "numpy"):
chunk_audio = chunk_audio.cpu().numpy()
chunk_audio = np.atleast_1d(chunk_audio.squeeze())
if chunk_audio.size > 0:
chunks.append(chunk_audio)
if chunks:
audio = np.concatenate(chunks, axis=0)
sf.write(str(output_path), audio, SAMPLE_RATE)
duration = len(audio) / SAMPLE_RATE
print(f" ✓ Saved '{output_path.name}' ({duration:.1f}s)")
else:
print(f" ✗ No audio produced for voice='{voice}'")
# ── Main ───────────────────────────────────────────────────────────────────────
def main() -> None:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {device}")
if device == "cuda":
print(f"GPU: {torch.cuda.get_device_name(0)}")
OUTPUT_DIR.mkdir(exist_ok=True)
print(f"\nParsing '{SOURCE_FILE}'")
sections = load_and_split(SOURCE_FILE, BOOKS)
print(f" Found {len(sections)} sections.\n")
print("Initialising Kokoro pipeline …")
pipeline = KPipeline(lang_code=LANG_CODE)
for label, marker, voice, wav_name in BOOKS:
if label not in sections:
continue # marker was not found; warning already printed
print(f"\n[{label}] voice={voice}{wav_name}")
text = clean_text(sections[label])
if not text:
print(" ⚠ Empty text — skipping")
continue
out_path = OUTPUT_DIR / wav_name
generate_audio(pipeline, text, voice, out_path)
print("\nDone.")
if __name__ == "__main__":
main()

141
extract_proper_nouns.py Normal file
View File

@ -0,0 +1,141 @@
"""
extract_proper_nouns.py
───────────────────────
Scan 'Audio Master Nem Full.txt' and extract all proper nouns into
'proper_nouns.txt', grouped by type and sorted alphabetically.
Uses spaCy for:
• NER (PERSON, GPE, LOC, ORG, …) named entity recognition
• POS (PROPN) catches names spaCy's NER misses
because they are not in its training vocabulary (e.g. Hagoth, Meninta)
Run:
.venv/bin/python extract_proper_nouns.py
"""
import re
from collections import defaultdict
from pathlib import Path
import spacy
SOURCE = Path("Audio Master Nem Full.txt")
OUTPUT = Path("proper_nouns.txt")
# ── spaCy setup ────────────────────────────────────────────────────────────────
print("Loading spaCy model …")
nlp = spacy.load("en_core_web_sm")
# Increase max length for the large source file
nlp.max_length = 2_000_000
# ── NER label groups ───────────────────────────────────────────────────────────
PERSON_LABELS = {"PERSON"}
PLACE_LABELS = {"GPE", "LOC", "FAC"}
ORG_LABELS = {"ORG", "NORP"}
OTHER_LABELS = {"EVENT", "WORK_OF_ART", "LAW", "PRODUCT", "LANGUAGE"}
# ── Noise filters ──────────────────────────────────────────────────────────────
# All-caps lines are section headers, not spoken names — skip them.
# Also skip very short tokens that are likely artefacts.
SKIP_PATTERNS = re.compile(
r"^(THE|A|AN|AND|OF|IN|TO|FOR|BY|AT|IS|WAS|BE|HE|SHE|IT|"
r"CHAPTER|VERSE|YEA|BEHOLD|LORD|GOD|CHRIST|HOLY|GHOST)$"
)
def is_noise(text: str) -> bool:
t = text.strip()
if len(t) <= 1:
return True
if t.isupper() and len(t) > 4: # all-caps section header word
return True
if SKIP_PATTERNS.match(t.upper()):
return True
if re.search(r"[^a-zA-Z\-' ]", t): # contains digits or symbols
return True
return False
def canonical(text: str) -> str:
"""Normalise whitespace and title-case."""
return " ".join(text.split()).title()
# ── Read and process ───────────────────────────────────────────────────────────
print(f"Reading '{SOURCE}'")
raw_text = SOURCE.read_text(encoding="utf-8")
print("Running spaCy pipeline (this may take a minute) …")
doc = nlp(raw_text)
# Buckets: keyed by display-group name → set of canonical strings
buckets: dict[str, set[str]] = defaultdict(set)
# 1. NER pass — trust spaCy's entity labels
for ent in doc.ents:
name = canonical(ent.text)
if is_noise(name):
continue
if ent.label_ in PERSON_LABELS:
buckets["People & Characters"].add(name)
elif ent.label_ in PLACE_LABELS:
buckets["Places & Lands"].add(name)
elif ent.label_ in ORG_LABELS:
buckets["Groups & Nations"].add(name)
elif ent.label_ in OTHER_LABELS:
buckets["Other Named Things"].add(name)
else:
buckets["Other Named Things"].add(name)
# 2. PROPN pass — catch names spaCy didn't recognise as entities
# Only include tokens that are inside a sentence (not at position 0)
# and are title-cased (filters out all-caps headers).
for token in doc:
if token.pos_ != "PROPN":
continue
text = token.text.strip()
if not text[0].isupper() or text.isupper():
continue # skip all-caps
if token.i == token.sent.start:
continue # skip sentence-initial (could be any word)
name = canonical(text)
if is_noise(name):
continue
# Only add if not already captured by NER
already_captured = any(name in s for s in buckets.values())
if not already_captured:
buckets["Unclassified Proper Nouns"].add(name)
# ── Write output ───────────────────────────────────────────────────────────────
GROUP_ORDER = [
"People & Characters",
"Places & Lands",
"Groups & Nations",
"Other Named Things",
"Unclassified Proper Nouns",
]
lines: list[str] = []
lines.append("PROPER NOUNS — Book of the Nem")
lines.append("=" * 50)
lines.append(
"Review this list for TTS mispronunciations.\n"
"Each entry is the form that appears in the text.\n"
)
total = 0
for group in GROUP_ORDER:
names = sorted(buckets.get(group, set()), key=str.casefold)
if not names:
continue
lines.append(f"\n{'' * 50}")
lines.append(f"{group.upper()} ({len(names)})")
lines.append(f"{'' * 50}")
for name in names:
lines.append(f" {name}")
total += len(names)
lines.append(f"\n{'=' * 50}")
lines.append(f"TOTAL: {total} unique proper nouns")
OUTPUT.write_text("\n".join(lines), encoding="utf-8")
print(f"\n✓ Written '{OUTPUT}' ({total} unique proper nouns)")

View File

@ -0,0 +1,145 @@
"""
generate_proper_noun_audio.py
──────────────────────────────
Read proper_nouns.txt, generate a short TTS audio clip for every entry
using am_michael, and save a JSON manifest for the GUI.
Outputs:
output_proper_nouns/<slug>.wav one wav per entry
output_proper_nouns/manifest.json { "Word" : "slug.wav", … }
Already-generated files are skipped, so re-runs are fast.
Run:
.venv/bin/python generate_proper_noun_audio.py
"""
import json
import re
import sys
import numpy as np
import soundfile as sf
import torch
from pathlib import Path
from kokoro import KPipeline
PROPER_NOUNS_FILE = Path("proper_nouns.txt")
OUTPUT_DIR = Path("output_proper_nouns")
MANIFEST_FILE = OUTPUT_DIR / "manifest.json"
VOICE = "am_michael"
SAMPLE_RATE = 24000
SPEED = 1.0
# ── Parse proper_nouns.txt ─────────────────────────────────────────────────────
def parse_entries(path: Path) -> list[tuple[str, str]]:
"""Return list of (category, entry) pairs."""
entries: list[tuple[str, str]] = []
current_cat = "Uncategorised"
header_re = re.compile(r"^[A-Z &]+\s+\(\d+\)$")
for line in path.read_text(encoding="utf-8").splitlines():
stripped = line.strip()
if not stripped:
continue
if stripped.startswith("=") or stripped.startswith(""):
continue
if header_re.match(stripped):
# e.g. "PEOPLE & CHARACTERS (301)"
current_cat = stripped.rsplit("(", 1)[0].strip().title()
continue
if stripped.startswith("TOTAL:"):
continue
if stripped.startswith("Review this") or stripped.startswith("Each entry"):
continue
if stripped.startswith("PROPER NOUNS"):
continue
# Regular entry — indented two spaces in the file
if line.startswith(" "):
entries.append((current_cat, stripped))
return entries
def slugify(text: str) -> str:
"""Convert 'Hagoth-II foo''hagoth_ii_foo'."""
s = text.lower()
s = re.sub(r"[^a-z0-9]+", "_", s)
return s.strip("_")
# ── TTS generation ─────────────────────────────────────────────────────────────
def generate(pipeline: KPipeline, text: str, out_path: Path) -> bool:
chunks = []
# Speak the word in a short carrier phrase so the TTS pronounces it
# naturally (isolated tokens sometimes get clipped prosody).
spoken = text
for _, _, chunk in pipeline(spoken, voice=VOICE, speed=SPEED):
if hasattr(chunk, "numpy"):
chunk = chunk.cpu().numpy()
chunk = np.atleast_1d(chunk.squeeze())
if chunk.size > 0:
chunks.append(chunk)
if chunks:
audio = np.concatenate(chunks)
sf.write(str(out_path), audio, SAMPLE_RATE)
return True
return False
# ── Main ───────────────────────────────────────────────────────────────────────
def main() -> None:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {device}")
if device == "cuda":
print(f"GPU: {torch.cuda.get_device_name(0)}")
OUTPUT_DIR.mkdir(exist_ok=True)
print(f"Parsing '{PROPER_NOUNS_FILE}'")
entries = parse_entries(PROPER_NOUNS_FILE)
print(f" {len(entries)} entries found.\n")
# Load existing manifest so we can skip already-done words
if MANIFEST_FILE.exists():
manifest: dict = json.loads(MANIFEST_FILE.read_text())
else:
manifest = {}
print("Initialising Kokoro pipeline …")
pipeline = KPipeline(lang_code="a")
skipped = 0
generated = 0
failed = 0
for i, (cat, entry) in enumerate(entries):
slug = slugify(entry)
wav_name = f"{slug}.wav"
wav_path = OUTPUT_DIR / wav_name
if entry in manifest and wav_path.exists():
skipped += 1
continue
sys.stdout.write(f"\r[{i+1}/{len(entries)}] {entry[:55]:<55}")
sys.stdout.flush()
ok = generate(pipeline, entry, wav_path)
if ok:
manifest[entry] = wav_name
generated += 1
else:
print(f"\n ✗ Failed: {entry}")
failed += 1
print(f"\n\nDone. generated={generated} skipped={skipped} failed={failed}")
MANIFEST_FILE.write_text(json.dumps(manifest, ensure_ascii=False, indent=2))
print(f"Manifest saved → '{MANIFEST_FILE}'")
if __name__ == "__main__":
main()

620
proper_noun_player.py Normal file
View File

@ -0,0 +1,620 @@
"""
proper_noun_player.py
──────────────────────
GUI for auditing proper noun pronunciations.
Three columns (all persisted as JSON, original manifest never modified):
• Review words not yet audited
• Correct words that already pronounce fine
• Fixes linked list: original word → phonetic replacement
e.g. "Nephi""Kneephi"
Hotkeys (always active):
Space replay current word
s stop audio
Escape reset fix entry to original word, refocus review list
On the Review list:
↑ / ↓ navigate
Click / Enter play word AND focus fix entry
On the fix entry (bottom bar, right of the word label):
Start typing to overwrite the pre-filled word.
Enter → if text == original word → mark Correct, advance to next
if text differs → add as Fix, advance to next
Escape → reset text to original word, return focus to review list
On the Correct list:
Delete / BackSpace move selected word back to Review
On the Fixes list:
Delete / BackSpace move selected fix back to Review
"Apply Fixes to Text" writes a TTS-ready copy of the source file with all
substitutions applied (case-sensitive whole-word replace).
Data files (auto-created in output_proper_nouns/):
correct_words.json list of correct words
pronunciation_fixes.json { "Nephi": "Kneephi", … }
Run:
.venv/bin/python proper_noun_player.py
"""
import json
import re
import threading
import tkinter as tk
from tkinter import ttk, messagebox
from pathlib import Path
import sounddevice as sd
import soundfile as sf
MANIFEST_FILE = Path("output_proper_nouns/manifest.json")
OUTPUT_DIR = Path("output_proper_nouns")
REPLACEMENTS_DIR = OUTPUT_DIR / "replacements_cache"
CORRECT_FILE = OUTPUT_DIR / "correct_words.json"
FIXES_FILE = OUTPUT_DIR / "pronunciation_fixes.json"
SOURCE_TEXT = Path("Audio Master Nem Full.txt")
FIXED_TEXT_OUT = Path("Audio Master Nem Full (TTS Fixed).txt")
VOICE = "am_michael"
SAMPLE_RATE = 24000
# ── Colours ────────────────────────────────────────────────────────────────────
BG = "#1e1e2e"
BG2 = "#181825"
BG3 = "#313244"
FG = "#cdd6f4"
FG_DIM = "#6c7086"
GREEN = "#a6e3a1"
BLUE = "#89b4fa"
RED = "#f38ba8"
YELLOW = "#f9e2af"
MAUVE = "#cba6f7"
# ── Audio ──────────────────────────────────────────────────────────────────────
def play_async(path: Path) -> None:
sd.stop()
def _play():
data, sr = sf.read(str(path), dtype="float32")
sd.play(data, sr)
threading.Thread(target=_play, daemon=True).start()
def _slug(text: str) -> str:
"""Safe filename from arbitrary text."""
return re.sub(r"[^a-zA-Z0-9_-]", "_", text).strip("_")[:80]
# Lazy KPipeline singleton — only imported+loaded on first synthesis request
_pipeline = None
_pipeline_lock = threading.Lock()
def _get_pipeline():
global _pipeline
if _pipeline is None:
with _pipeline_lock:
if _pipeline is None:
import warnings
from kokoro import KPipeline # type: ignore
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
_pipeline = KPipeline(lang_code="a", repo_id="hexgrad/Kokoro-82M")
return _pipeline
def synth_and_play(text: str, on_ready=None) -> None:
"""Synthesise *text* with Kokoro (cached) and play it.
Runs entirely on a daemon thread so the GUI never blocks.
*on_ready(path)* is called on the same thread once the file is written.
"""
def _run():
path = _synth_to_cache(text)
if path:
if on_ready:
on_ready(path)
play_async(path)
threading.Thread(target=_run, daemon=True).start()
def _synth_to_cache(text: str) -> "Path | None":
"""Synthesise *text* to a cached WAV and return its path (or None on failure).
Skips synthesis if the file already exists. Safe to call from any thread.
"""
REPLACEMENTS_DIR.mkdir(parents=True, exist_ok=True)
cache_path = REPLACEMENTS_DIR / f"{_slug(text)}.wav"
if not cache_path.exists():
import warnings
import numpy as np
pipeline = _get_pipeline()
chunks = []
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=UserWarning)
for _, _, audio in pipeline(text, voice=VOICE):
if audio is not None:
chunks.append(audio)
if chunks:
combined = np.concatenate(chunks)
sf.write(str(cache_path), combined, SAMPLE_RATE)
return cache_path if cache_path.exists() else None
# ── Persistence helpers ────────────────────────────────────────────────────────
def load_json(path: Path, default):
if path.exists():
return json.loads(path.read_text(encoding="utf-8"))
return default
def save_json(path: Path, obj) -> None:
path.write_text(json.dumps(obj, ensure_ascii=False, indent=2), encoding="utf-8")
# ── Styled widget helpers ──────────────────────────────────────────────────────
def make_listbox(parent) -> tuple[tk.Listbox, tk.Frame]:
frame = tk.Frame(parent, bg=BG2, bd=0)
sb = ttk.Scrollbar(frame, orient="vertical")
sb.pack(side="right", fill="y")
lb = tk.Listbox(
frame,
yscrollcommand=sb.set,
font=("Helvetica", 11),
bg=BG2, fg=FG,
selectbackground=BLUE, selectforeground=BG,
activestyle="none", bd=0, highlightthickness=0, relief="flat",
exportselection=False,
)
lb.pack(side="left", fill="both", expand=True)
sb.config(command=lb.yview)
return lb, frame
def styled_btn(parent, text, command, color=FG, bg=BG3, **kw):
return tk.Button(
parent, text=text, command=command,
bg=bg, fg=color, activebackground=BG2, activeforeground=color,
font=("Helvetica", 10, "bold"), relief="flat", bd=0,
padx=10, pady=5, cursor="hand2", **kw
)
def section_label(parent, text):
return tk.Label(parent, text=text, bg=BG, fg=FG_DIM,
font=("Helvetica", 9, "bold"), anchor="w")
# ── Main app ───────────────────────────────────────────────────────────────────
class ProperNounAuditor(tk.Tk):
# tracks which word is currently loaded into the fix entry
_fix_entry_word: str = ""
def __init__(self, manifest: dict[str, str]) -> None:
super().__init__()
self.title("Proper Noun Pronunciation Auditor")
self.geometry("1020x700")
self.minsize(800, 500)
self.configure(bg=BG)
self.manifest: dict[str, str] = manifest
self.all_words: list[str] = sorted(manifest.keys(), key=str.casefold)
# Persistent data
self.correct: set[str] = set(load_json(CORRECT_FILE, []))
self.fixes: dict[str, str] = load_json(FIXES_FILE, {})
self._build_ui()
self._refresh_all()
# Window-level hotkeys (work even when a listbox has keyboard focus)
self.bind("<space>", lambda e: self._replay())
self.bind("s", lambda e: sd.stop())
self.bind("<Escape>", lambda e: self._reset_fix_entry())
# ── UI construction ────────────────────────────────────────────────────────
def _build_ui(self) -> None:
PAD = 8
# Title bar
title_bar = tk.Frame(self, bg=BG, pady=6)
title_bar.pack(fill="x", padx=PAD)
tk.Label(title_bar, text="Proper Noun Pronunciation Auditor",
font=("Helvetica", 15, "bold"), bg=BG, fg=FG).pack(side="left")
hint = "Space=replay s=stop Esc=reset fix Del=remove from list Enter=correct|fix"
tk.Label(title_bar, text=hint,
font=("Helvetica", 8), bg=BG, fg=FG_DIM).pack(side="left", padx=14)
# Three-column body
body = tk.Frame(self, bg=BG)
body.pack(fill="both", expand=True, padx=PAD, pady=(0, PAD))
body.columnconfigure(0, weight=3)
body.columnconfigure(1, weight=2)
body.columnconfigure(2, weight=2)
body.rowconfigure(0, weight=1)
# ── Column 0: Review list ──────────────────────────────────────────────
col0 = tk.Frame(body, bg=BG)
col0.grid(row=0, column=0, sticky="nsew", padx=(0, PAD))
filter_row = tk.Frame(col0, bg=BG)
filter_row.pack(fill="x", pady=(0, 4))
tk.Label(filter_row, text="Filter:", bg=BG, fg=FG,
font=("Helvetica", 10)).pack(side="left", padx=(0, 4))
self.search_var = tk.StringVar()
self.search_var.trace_add("write", lambda *_: self._refresh_review())
self._filter_entry = tk.Entry(
filter_row, textvariable=self.search_var,
font=("Helvetica", 11), bg=BG3, fg=FG,
insertbackground=FG, relief="flat", bd=4)
self._filter_entry.pack(side="left", fill="x", expand=True)
self._filter_entry.focus_set()
styled_btn(filter_row, "", lambda: self.search_var.set(""),
color=RED, bg=BG3).pack(side="left", padx=(3, 0))
hdr0 = tk.Frame(col0, bg=BG)
hdr0.pack(fill="x")
section_label(hdr0, "TO REVIEW").pack(side="left")
self.review_count_var = tk.StringVar()
tk.Label(hdr0, textvariable=self.review_count_var, bg=BG, fg=FG_DIM,
font=("Helvetica", 9)).pack(side="right")
self.review_lb, review_frame = make_listbox(col0)
review_frame.pack(fill="both", expand=True)
self.review_lb.bind("<<ListboxSelect>>", self._on_review_select)
self.review_lb.bind("<Return>", self._on_review_select)
# ── Column 1: Correct list ─────────────────────────────────────────────
col1 = tk.Frame(body, bg=BG)
col1.grid(row=0, column=1, sticky="nsew", padx=(0, PAD))
hdr1 = tk.Frame(col1, bg=BG)
hdr1.pack(fill="x")
section_label(hdr1, "✓ CORRECT [Del=remove]").pack(side="left")
self.correct_count_var = tk.StringVar()
tk.Label(hdr1, textvariable=self.correct_count_var, bg=BG, fg=FG_DIM,
font=("Helvetica", 9)).pack(side="right")
self.correct_lb, correct_frame = make_listbox(col1)
correct_frame.pack(fill="both", expand=True)
self.correct_lb.bind("<<ListboxSelect>>",
lambda e: self._on_side_select(self.correct_lb))
self.correct_lb.bind("<Delete>",
lambda e: self._move_back(self.correct_lb, is_dict=False))
self.correct_lb.bind("<BackSpace>",
lambda e: self._move_back(self.correct_lb, is_dict=False))
styled_btn(col1, "← Back to Review [Del]",
lambda: self._move_back(self.correct_lb, is_dict=False),
color=YELLOW).pack(fill="x", pady=(4, 0))
# ── Column 2: Fixes list ───────────────────────────────────────────────
col2 = tk.Frame(body, bg=BG)
col2.grid(row=0, column=2, sticky="nsew")
hdr2 = tk.Frame(col2, bg=BG)
hdr2.pack(fill="x")
section_label(hdr2, "⇄ FIXES (original → phonetic)").pack(side="left")
self.fixes_count_var = tk.StringVar()
tk.Label(hdr2, textvariable=self.fixes_count_var, bg=BG, fg=FG_DIM,
font=("Helvetica", 9)).pack(side="right")
self.fixes_lb, fixes_frame = make_listbox(col2)
fixes_frame.pack(fill="both", expand=True)
self.fixes_lb.bind("<<ListboxSelect>>",
lambda e: self._on_side_select(self.fixes_lb))
self.fixes_lb.bind("<Delete>",
lambda e: self._move_back(self.fixes_lb, is_dict=True))
self.fixes_lb.bind("<BackSpace>",
lambda e: self._move_back(self.fixes_lb, is_dict=True))
styled_btn(col2, "← Back to Review [Del]",
lambda: self._move_back(self.fixes_lb, is_dict=True),
color=YELLOW).pack(fill="x", pady=(4, 0))
# ── Bottom action bar ──────────────────────────────────────────────────
action_bar = tk.Frame(self, bg=BG3, pady=8)
action_bar.pack(fill="x")
# Now-playing word label
tk.Label(action_bar, text="", bg=BG3, fg=GREEN,
font=("Helvetica", 11)).pack(side="left", padx=(10, 2))
self.now_playing_var = tk.StringVar(value="")
tk.Label(action_bar, textvariable=self.now_playing_var,
bg=BG3, fg=GREEN, font=("Helvetica", 11, "bold"),
width=20, anchor="w").pack(side="left")
# Inline fix entry — right next to the word, auto-focused on word click
tk.Label(action_bar, text="", bg=BG3, fg=MAUVE,
font=("Helvetica", 13, "bold")).pack(side="left", padx=(6, 3))
self.fix_var = tk.StringVar()
self._fix_entry = tk.Entry(
action_bar, textvariable=self.fix_var,
font=("Helvetica", 11), bg=BG2, fg=MAUVE,
insertbackground=MAUVE, relief="flat", bd=4, width=22)
self._fix_entry.pack(side="left")
self._fix_entry.bind("<Return>", lambda e: self._enter_action())
self._fix_entry.bind("<Escape>", lambda e: self._reset_fix_entry())
tk.Label(action_bar, text="Enter=correct (edit first for fix) Esc=reset",
bg=BG3, fg=FG_DIM, font=("Helvetica", 8)).pack(side="left", padx=(5, 10))
tk.Label(action_bar, text="", bg=BG3, fg=FG_DIM).pack(side="left", padx=4)
styled_btn(action_bar, "■ Stop [s]", sd.stop,
color=RED).pack(side="left", padx=4)
styled_btn(action_bar, "↺ Replay [Space]", self._replay,
color=BLUE).pack(side="left", padx=2)
tk.Label(action_bar, text="", bg=BG3, fg=FG_DIM).pack(side="left", padx=4)
styled_btn(action_bar, "⇄ Apply Fixes to Text",
self._apply_fixes, color=YELLOW, bg=BG2).pack(side="left", padx=4)
tk.Label(action_bar, text="", bg=BG3, fg=FG_DIM).pack(side="left", padx=4)
self._pregen_btn = styled_btn(
action_bar, "↻ Pre-gen Fix Audio",
self._pregen_all_fix_audio, color=MAUVE, bg=BG2)
self._pregen_btn.pack(side="left", padx=4)
self._pregen_status_var = tk.StringVar(value="")
tk.Label(action_bar, textvariable=self._pregen_status_var,
bg=BG3, fg=FG_DIM, font=("Helvetica", 8),
width=28, anchor="w").pack(side="left", padx=(4, 10))
# ── Refresh helpers ────────────────────────────────────────────────────────
def _review_words(self) -> list[str]:
excluded = self.correct | set(self.fixes.keys())
q = self.search_var.get().strip().casefold()
words = [w for w in self.all_words if w not in excluded]
if q:
words = [w for w in words if q in w.casefold()]
return words
def _refresh_review(self) -> None:
words = self._review_words()
self.review_lb.delete(0, "end")
for w in words:
self.review_lb.insert("end", f" {w}")
self.review_count_var.set(f"{len(words)}")
def _refresh_correct(self) -> None:
self.correct_lb.delete(0, "end")
for w in sorted(self.correct, key=str.casefold):
self.correct_lb.insert("end", f" {w}")
self.correct_count_var.set(f"{len(self.correct)}")
def _refresh_fixes(self) -> None:
self.fixes_lb.delete(0, "end")
for orig, rep in sorted(self.fixes.items(), key=lambda x: x[0].casefold()):
self.fixes_lb.insert("end", f" {orig}{rep}")
self.fixes_count_var.set(f"{len(self.fixes)}")
def _refresh_all(self) -> None:
self._refresh_review()
self._refresh_correct()
self._refresh_fixes()
# ── Playback ───────────────────────────────────────────────────────────────
def _play_word(self, word: str) -> None:
wav_name = self.manifest.get(word)
if not wav_name:
return
wav_path = OUTPUT_DIR / wav_name
if not wav_path.exists():
messagebox.showwarning("Missing audio",
f"No audio file for '{word}'.\n"
"Run generate_proper_noun_audio.py first.")
return
self.now_playing_var.set(word)
play_async(wav_path)
# ── Selection callbacks ────────────────────────────────────────────────────
def _on_review_select(self, event=None) -> None:
sel = self.review_lb.curselection()
if not sel:
return
word = self.review_lb.get(sel[0]).strip()
self._fix_entry_word = word
self.fix_var.set(word) # pre-fill fix entry with the word
self._fix_entry.selection_range(0, "end")
self._fix_entry.icursor("end")
# Defer focus so the listbox doesn't reclaim it after the click event settles
self.after(0, self._fix_entry.focus_set)
self._play_word(word)
def _on_side_select(self, listbox: tk.Listbox) -> None:
sel = listbox.curselection()
if not sel:
return
row = listbox.get(sel[0]).strip()
parts = row.split("")
original = parts[0].strip()
if listbox is self.fixes_lb and len(parts) == 2:
# Play the phonetic replacement text
replacement = parts[1].strip()
self.now_playing_var.set(f"{replacement}")
def _on_ready(_path):
self.after(0, lambda: self.now_playing_var.set(replacement))
synth_and_play(replacement, on_ready=_on_ready)
else:
self._play_word(original)
# ── Actions ────────────────────────────────────────────────────────────────
def _selected_review_word(self) -> str | None:
sel = self.review_lb.curselection()
if not sel:
return None
return self.review_lb.get(sel[0]).strip()
def _enter_action(self) -> None:
"""Smart Enter handler for the fix entry.
If the entry text matches the original word → mark Correct.
If the entry text differs from the original → add as Fix.
"""
word = self._fix_entry_word or self._selected_review_word()
if not word:
return
text = self.fix_var.get().strip()
if not text or text == word:
self._mark_correct_word(word)
else:
self._add_fix_for_word(word, text)
def _reset_fix_entry(self) -> None:
"""Escape: reset fix entry to the original word, refocus the review list."""
self.fix_var.set(self._fix_entry_word)
self.review_lb.focus_set()
def _replay(self) -> None:
if self._fix_entry_word:
self._play_word(self._fix_entry_word)
def _advance_review(self) -> None:
"""After an action, select the first remaining word in the review list."""
if self.review_lb.size() > 0:
self.review_lb.selection_clear(0, "end")
self.review_lb.selection_set(0)
self.review_lb.see(0)
self.review_lb.event_generate("<<ListboxSelect>>")
def _mark_correct_word(self, word: str) -> None:
self.correct.add(word)
save_json(CORRECT_FILE, sorted(self.correct))
self._fix_entry_word = ""
self.fix_var.set("")
self.now_playing_var.set("")
self._refresh_all()
self._advance_review()
def _mark_correct(self) -> None:
word = self._selected_review_word()
if not word:
messagebox.showinfo("Nothing selected",
"Select a word from the Review list first.")
return
self._mark_correct_word(word)
def _add_fix_for_word(self, word: str, replacement: str) -> None:
self.fixes[word] = replacement
save_json(FIXES_FILE, self.fixes)
self._fix_entry_word = ""
self.fix_var.set("")
self.now_playing_var.set("")
self._refresh_all()
self._advance_review()
def _add_fix(self) -> None:
word = self._selected_review_word()
replacement = self.fix_var.get().strip()
if not word:
messagebox.showinfo("Nothing selected",
"Select a word from the Review list first.")
return
if not replacement or replacement == word:
messagebox.showinfo("No replacement",
"Type the phonetic replacement in the Fix box.")
return
self._add_fix_for_word(word, replacement)
def _move_back(self, listbox: tk.Listbox, is_dict: bool) -> None:
sel = listbox.curselection()
if not sel:
return
raw = listbox.get(sel[0]).strip().split("")[0].strip()
if is_dict:
self.fixes.pop(raw, None)
save_json(FIXES_FILE, self.fixes)
else:
self.correct.discard(raw)
save_json(CORRECT_FILE, sorted(self.correct))
self._refresh_all()
# ── Apply fixes to source text ─────────────────────────────────────────────
def _pregen_all_fix_audio(self) -> None:
"""Synthesise and cache audio for every replacement phonetic string."""
if not self.fixes:
messagebox.showinfo("No fixes", "The Fixes list is empty.")
return
replacements = list(self.fixes.values())
total = len(replacements)
already = sum(
1 for r in replacements
if (REPLACEMENTS_DIR / f"{_slug(r)}.wav").exists()
)
# Confirm if it'll take a while
new_count = total - already
if new_count == 0:
messagebox.showinfo("Already done",
f"All {total} replacement clips already exist.")
return
self._pregen_btn.config(state="disabled")
self._pregen_status_var.set(f"0 / {new_count} new ({already} cached)")
def _run():
done = 0
for rep in replacements:
cache_path = REPLACEMENTS_DIR / f"{_slug(rep)}.wav"
if not cache_path.exists():
_synth_to_cache(rep)
done += 1
self.after(0, lambda d=done, t=new_count:
self._pregen_status_var.set(f"{d} / {t} synthesised…"))
self.after(0, lambda: self._pregen_status_var.set(
f"Done — {total} clips ready"))
self.after(0, lambda: self._pregen_btn.config(state="normal"))
threading.Thread(target=_run, daemon=True).start()
def _apply_fixes(self) -> None:
if not self.fixes:
messagebox.showinfo("No fixes", "The Fixes list is empty.")
return
if not SOURCE_TEXT.exists():
messagebox.showerror("Source not found", f"Cannot find:\n{SOURCE_TEXT}")
return
text = SOURCE_TEXT.read_text(encoding="utf-8")
count_total = 0
for original, replacement in self.fixes.items():
pattern = r'\b' + re.escape(original) + r'\b'
new_text, n = re.subn(pattern, replacement, text)
if n:
text = new_text
count_total += n
FIXED_TEXT_OUT.write_text(text, encoding="utf-8")
messagebox.showinfo(
"Done",
f"Applied {len(self.fixes)} fix rules ({count_total} replacements).\n\n"
f"Saved to:\n{FIXED_TEXT_OUT}"
)
# ── Entry point ────────────────────────────────────────────────────────────────
def main() -> None:
if not MANIFEST_FILE.exists():
print(f"Manifest not found: '{MANIFEST_FILE}'")
print("Run generate_proper_noun_audio.py first.")
raise SystemExit(1)
manifest: dict[str, str] = json.loads(MANIFEST_FILE.read_text(encoding="utf-8"))
print(f"Loaded {len(manifest)} entries from manifest.")
app = ProperNounAuditor(manifest)
app.mainloop()
if __name__ == "__main__":
main()

1707
proper_nouns.txt Normal file

File diff suppressed because it is too large Load Diff

44
render_voices.py Normal file
View File

@ -0,0 +1,44 @@
import torch
import numpy as np
import soundfile as sf
from kokoro import KPipeline
from text_input import TEXT
# ── Device setup ──────────────────────────────────────────────────────────────
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
if device == "cuda":
print(f"GPU: {torch.cuda.get_device_name(0)}")
SAMPLE_RATE = 24000
SPEED = 1.0
VOICES = [
("af_heart", "output_af_heart.wav"), # warm American female
("am_michael", "output_am_michael.wav"), # best American male
]
pipeline = KPipeline(lang_code="a")
def generate(voice: str, output_file: str) -> None:
print(f"\nGenerating '{voice}'{output_file}")
chunks = []
for _, _, chunk_audio in pipeline(TEXT, voice=voice, speed=SPEED):
if hasattr(chunk_audio, "numpy"):
chunk_audio = chunk_audio.cpu().numpy()
chunk_audio = np.atleast_1d(chunk_audio.squeeze())
if chunk_audio.size > 0:
chunks.append(chunk_audio)
if chunks:
audio = np.concatenate(chunks, axis=0)
sf.write(output_file, audio, SAMPLE_RATE)
print(f" ✓ Saved '{output_file}' ({len(audio) / SAMPLE_RATE:.1f}s, {SAMPLE_RATE} Hz)")
else:
print(f" ✗ No audio produced for '{voice}'")
for voice, path in VOICES:
generate(voice, path)
print("\nDone.")

19
run_michael.py Normal file
View File

@ -0,0 +1,19 @@
import torch, numpy as np, soundfile as sf
from kokoro import KPipeline
from text_input import TEXT
pipeline = KPipeline(lang_code="a")
print(f"GPU: {torch.cuda.get_device_name(0)}" if torch.cuda.is_available() else "CPU")
print("Generating am_michael ...")
chunks = []
for _, _, chunk_audio in pipeline(TEXT, voice="am_michael", speed=1.0):
if hasattr(chunk_audio, "numpy"):
chunk_audio = chunk_audio.cpu().numpy()
chunk_audio = np.atleast_1d(chunk_audio.squeeze())
if chunk_audio.size > 0:
chunks.append(chunk_audio)
audio = np.concatenate(chunks)
sf.write("output_am_michael.wav", audio, 24000)
print(f"Saved output_am_michael.wav ({len(audio)/24000:.1f}s)")

35
text_input copy.py Normal file
View File

@ -0,0 +1,35 @@
TEXT = (
"The Book of the Nem. "
"Another Testament of Jesus Christ. "
"From the Nem People. "
"Accounts Written by the Hands of Nem Prophets and Recordkeepers. "
"Taken from the Written Records of the Nem, the People of the Spirit. "
"\n\n"
"The Book of the Nem is written to the descendants of the Lamanites of the "
"Book of Mormon, who are a remnant of the House of Israel, and also to the "
"descendants of the Nem, and to the Gentiles, by way of commandment, and also "
"by the spirit of prophecy and of revelation, to the convincing of all that "
"Jesus Christ is the Son of God, the Savior of all mankind throughout the world. "
"\n\n"
"It is a record of the people of Hagoth, the shipbuilder, and the people of "
"Corianton, the son of Alma the younger, who traveled into the Land Northward "
"to escape the wickedness of the Nephites and secret combinations of the "
"Gadianton Robbers who were beginning to spread across the land. It is to show "
"unto the remnant of the House of Israel what great things the Lord hath done "
"for their fathers and brothers; and that they may know the covenants of the "
"Lord, that they are not cast off forever, and also to the convincing of the "
"Gentiles that Jesus is the Christ, the Eternal God, manifesting himself unto "
"all nations, including the ancestors of the Nem long ago. And now, if there "
"are faults in this record, they are the mistakes of men; wherefore, condemn "
"not the things of God. Search these records, ponder, and pray that Jesus "
"Christ may reveal the truth of it unto you by the power of the Holy Ghost, "
"for by the power of the Holy Ghost, ye may know the truth of all things. "
"See Moroni chapter ten, verses three through five. "
"\n\n"
"The purpose of this record is to assist in the gathering of the House of "
"Israel, to bring to light the words of Christ given to His other sheep, to "
"prepare the Remnant of Joseph to rise up, and the Remnant of Jacob to return; "
"that they may fulfill their covenants, build the New Jerusalem, and establish "
"Zion in these last days before the return of our Lord and Savior, Jesus "
"Christ, in glory."
)

12
text_input.py Normal file
View File

@ -0,0 +1,12 @@
TEXT = (
"The Book of the Nem. "
"Another Testament of Jesus Christ. "
"From the Kneephites People. "
"The purpose of this record is to assist in the gathering of the House of "
"Israel, to bring to light the words of Christ given to His other sheep, to "
"prepare the Remnant of Joseph to rise up, and the Remnant of Jacob to return; "
"that they may fulfill their covenants, build the New Jerusalem, and establish "
"Zion in these last days before the return of our Lord and Savior, Jesus "
"Christ, in glory."
)

49
tts_test.py Normal file
View File

@ -0,0 +1,49 @@
import torch
import soundfile as sf
from kokoro import KPipeline
# ── Device setup ──────────────────────────────────────────────────────────────
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
if device == "cuda":
print(f"GPU: {torch.cuda.get_device_name(0)}")
# ── Test paragraph ─────────────────────────────────────────────────────────────
TEXT = (
"The world of artificial intelligence is evolving at a remarkable pace. "
"Modern language models can now read, write, and even speak with surprising "
"clarity and nuance. This audio was generated entirely on a local machine "
"using the Kokoro text-to-speech model, running on an NVIDIA RTX 3060 GPU. "
"No cloud, no API keys — just raw local compute turning words into sound."
)
# ── Build pipeline ─────────────────────────────────────────────────────────────
# lang_code: 'a' = American English, 'b' = British English
# voices: af_heart, af_bella, af_nova, am_adam, am_michael, bf_emma, bm_george …
pipeline = KPipeline(lang_code="a")
OUTPUT_FILE = "output.wav"
VOICE = "af_heart" # warm American female voice
SPEED = 1.0 # 1.0 = normal speed
# ── Generate audio ─────────────────────────────────────────────────────────────
print(f"Generating speech with voice '{VOICE}'")
import numpy as np
audio_chunks = []
for _, _, chunk_audio in pipeline(TEXT, voice=VOICE, speed=SPEED):
# chunk_audio is a torch.Tensor of shape [N], dtype float32
if hasattr(chunk_audio, "numpy"):
chunk_audio = chunk_audio.cpu().numpy()
chunk_audio = np.atleast_1d(chunk_audio.squeeze())
if chunk_audio.size > 0:
audio_chunks.append(chunk_audio)
if audio_chunks:
audio = np.concatenate(audio_chunks, axis=0)
sf.write(OUTPUT_FILE, audio, 24000)
duration = len(audio) / 24000
print(f"✓ Saved '{OUTPUT_FILE}' ({duration:.1f}s, 24 kHz)")
else:
print("No audio generated — check input text.")