Initial commit: audiobook generator, proper noun auditor GUI
This commit is contained in:
13
.gitignore
vendored
Normal file
13
.gitignore
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
# Python
|
||||
__pycache__/
|
||||
*.pyc
|
||||
*.pyo
|
||||
.venv/
|
||||
|
||||
# Generated audio output (large binary files)
|
||||
output_audiobook/
|
||||
output_proper_nouns/
|
||||
*.wav
|
||||
|
||||
# TTS fixed text output
|
||||
**(TTS Fixed)*.txt
|
||||
4833
Audio Master Nem (Columns Pdf) - Front Matter to Pa Nat 1_8.txt
Normal file
4833
Audio Master Nem (Columns Pdf) - Front Matter to Pa Nat 1_8.txt
Normal file
File diff suppressed because it is too large
Load Diff
2331
Audio Master Nem (Columns Pdf) - Pa Nat 1_9 to Shioni.txt
Normal file
2331
Audio Master Nem (Columns Pdf) - Pa Nat 1_9 to Shioni.txt
Normal file
File diff suppressed because it is too large
Load Diff
4833
Audio Master Nem Full.txt
Normal file
4833
Audio Master Nem Full.txt
Normal file
File diff suppressed because it is too large
Load Diff
21
Nem table of contents.txt
Normal file
21
Nem table of contents.txt
Normal file
@ -0,0 +1,21 @@
|
||||
Table of Contents
|
||||
|
||||
Contents
|
||||
Introduction …………………………………………………..……………….. 4
|
||||
The Book of Hagoth …………………………………………………………... 6-12
|
||||
The First Book of Shi-Tugo …………………………………………………… 12-30
|
||||
The Book of Sanempet ………………………………………………………… 30-35
|
||||
The Book of Oug (The Son of Sanempet) ……………………………………... 35-54
|
||||
The Book of the Temple Writings of Oug ……………………………………... 55-59
|
||||
The Sacred Temple Writings …………………………………………………… 59-105
|
||||
The Law of Chastity ………………………………………………………… 105-106
|
||||
The Law of Consecration …………………………………………………… 107-109
|
||||
The Law of Sacrifice ……………………………………………………….. 109-110
|
||||
The First Book of Samuel The Lamanite ………………………………………. 111-125
|
||||
The Second Book of Samuel The Lamanite ……………………………………. 126-158
|
||||
The Book of Manti (The Son of Oug) ………………………………………….. 159-205
|
||||
The First Book of Pa Nat (The Daughter of Shimlei) …………………………………….. 206-249
|
||||
The First Book of Moroni (The Son of Mormon) ……………………………… 249-259
|
||||
The Second Book of Moroni (The Son of Mormon) …………………………… 259-269
|
||||
The Third Book of Moroni (The Son of Mormon) …………………………….. 269-331
|
||||
The Book of Shioni (The Son of Moroni) ………………………………………. 331-392
|
||||
172
audiobook_nem.py
Normal file
172
audiobook_nem.py
Normal file
@ -0,0 +1,172 @@
|
||||
"""
|
||||
audiobook_nem.py
|
||||
────────────────
|
||||
Generate the Book of the Nem audiobook — one unique voice per book/section.
|
||||
|
||||
Usage:
|
||||
python audiobook_nem.py
|
||||
|
||||
To skip a section, comment out its entry in BOOKS below.
|
||||
Output .wav files are written to OUTPUT_DIR (created automatically).
|
||||
"""
|
||||
|
||||
import re
|
||||
import numpy as np
|
||||
import soundfile as sf
|
||||
import torch
|
||||
from pathlib import Path
|
||||
from kokoro import KPipeline
|
||||
|
||||
# ── Config ─────────────────────────────────────────────────────────────────────
|
||||
SOURCE_FILE = Path("Audio Master Nem Full.txt")
|
||||
OUTPUT_DIR = Path("output_audiobook")
|
||||
SAMPLE_RATE = 24000
|
||||
SPEED = 1.0
|
||||
LANG_CODE = "a" # 'a' = American English
|
||||
|
||||
# ── Available Kokoro voices (American English, lang_code='a') ──────────────────
|
||||
# af_heart – warm American female [downloaded]
|
||||
# af_nicole – American female [downloaded]
|
||||
# am_adam – American male (deep) [downloaded]
|
||||
# am_echo – American male [downloaded]
|
||||
# am_eric – American male [downloaded]
|
||||
# am_fenrir – American male [downloaded]
|
||||
# am_liam – American male [downloaded]
|
||||
# am_michael – American male (clear) [downloaded]
|
||||
# am_onyx – American male [downloaded]
|
||||
# am_puck – American male [downloaded]
|
||||
# am_santa – American male [downloaded] (not used)
|
||||
|
||||
# ── Book definitions ───────────────────────────────────────────────────────────
|
||||
# Format: (label, start_marker, voice, output_wav)
|
||||
# start_marker – exact text of the FIRST line of the section header in the source
|
||||
# (leading/trailing whitespace is ignored when matching)
|
||||
# voice – Kokoro voice name
|
||||
# output_wav – filename saved inside OUTPUT_DIR
|
||||
#
|
||||
# Comment out any line to skip that section entirely.
|
||||
BOOKS = [
|
||||
# label start_marker voice output_wav
|
||||
("Introduction", "Introduction", "af_heart", "00_introduction.wav"),
|
||||
("Book of Hagoth", "THE BOOK OF HAGOTH", "am_fenrir", "01_hagoth.wav"),
|
||||
("Shi-Tugo I", "THE FIRST BOOK OF SHI-TUGO", "am_eric", "02_shi_tugo_1.wav"),
|
||||
("Sanempet", "THE BOOK OF SANEMPET", "am_liam", "03_sanempet.wav"),
|
||||
("Oug", "THE BOOK OF OUG", "am_michael", "04_oug.wav"),
|
||||
("Temple Writings of Oug", "THE BOOK OF", "am_michael", "05_temple_writings_oug.wav"),
|
||||
("Sacred Temple Writings", "THE SACRED", "am_michael", "06_sacred_temple_writings.wav"),
|
||||
("Samuel the Lamanite I", "THE FIRST BOOK", "am_echo", "07_samuel_lamanite_1.wav"),
|
||||
("Samuel the Lamanite II", "THE SECOND BOOK", "am_echo", "08_samuel_lamanite_2.wav"),
|
||||
("Manti", "THE BOOK OF MANTI", "am_onyx", "09_manti.wav"),
|
||||
("Pa Nat I", "THE FIRST BOOK OF PA NAT", "af_nicole", "10_pa_nat_1.wav"),
|
||||
("Moroni I", "THE FIRST BOOK OF MORONI", "am_adam", "11_moroni_1.wav"),
|
||||
("Moroni II", "THE SECOND BOOK OF MORONI", "am_adam", "12_moroni_2.wav"),
|
||||
("Moroni III", "THE THIRD BOOK OF MORONI", "am_adam", "13_moroni_3.wav"),
|
||||
("Shioni", "THE BOOK OF SHIONI", "am_puck", "14_shioni.wav"),
|
||||
]
|
||||
|
||||
# ── Helpers ────────────────────────────────────────────────────────────────────
|
||||
|
||||
def load_and_split(source: Path, books: list) -> dict[str, str]:
|
||||
"""
|
||||
Read the source file and split it into sections keyed by label.
|
||||
Each section starts at its start_marker line and ends just before the
|
||||
next section's start_marker.
|
||||
"""
|
||||
raw_lines = source.read_text(encoding="utf-8").splitlines()
|
||||
|
||||
# Build a mapping: marker_text → index in BOOKS
|
||||
markers = [(label, marker.strip()) for label, marker, _, _ in books]
|
||||
|
||||
# Find the line index of each marker's first occurrence
|
||||
marker_positions: list[tuple[int, int]] = [] # (line_idx, books_idx)
|
||||
for book_idx, (label, marker) in enumerate(markers):
|
||||
for line_idx, line in enumerate(raw_lines):
|
||||
if line.strip() == marker:
|
||||
marker_positions.append((line_idx, book_idx))
|
||||
break
|
||||
else:
|
||||
print(f" ⚠ Marker not found for '{label}': '{marker}' — skipping")
|
||||
|
||||
marker_positions.sort(key=lambda x: x[0])
|
||||
|
||||
sections: dict[str, str] = {}
|
||||
for rank, (line_idx, book_idx) in enumerate(marker_positions):
|
||||
label = markers[book_idx][0]
|
||||
if rank + 1 < len(marker_positions):
|
||||
end_line = marker_positions[rank + 1][0]
|
||||
else:
|
||||
end_line = len(raw_lines)
|
||||
text = "\n".join(raw_lines[line_idx:end_line]).strip()
|
||||
sections[label] = text
|
||||
|
||||
return sections
|
||||
|
||||
|
||||
def clean_text(text: str) -> str:
|
||||
"""
|
||||
Strip formatting artifacts, underscores, and normalise whitespace
|
||||
so the TTS receives clean prose.
|
||||
"""
|
||||
# Remove lines that are pure underscores (horizontal rules)
|
||||
text = re.sub(r"^_{3,}\s*$", "", text, flags=re.MULTILINE)
|
||||
# Remove leading chapter headers that are all-caps lines
|
||||
# (keep them as natural spoken title for context)
|
||||
# Collapse excess blank lines
|
||||
text = re.sub(r"\n{3,}", "\n\n", text)
|
||||
return text.strip()
|
||||
|
||||
|
||||
def generate_audio(pipeline: KPipeline, text: str, voice: str,
|
||||
output_path: Path) -> None:
|
||||
chunks = []
|
||||
for _, _, chunk_audio in pipeline(text, voice=voice, speed=SPEED):
|
||||
if hasattr(chunk_audio, "numpy"):
|
||||
chunk_audio = chunk_audio.cpu().numpy()
|
||||
chunk_audio = np.atleast_1d(chunk_audio.squeeze())
|
||||
if chunk_audio.size > 0:
|
||||
chunks.append(chunk_audio)
|
||||
|
||||
if chunks:
|
||||
audio = np.concatenate(chunks, axis=0)
|
||||
sf.write(str(output_path), audio, SAMPLE_RATE)
|
||||
duration = len(audio) / SAMPLE_RATE
|
||||
print(f" ✓ Saved '{output_path.name}' ({duration:.1f}s)")
|
||||
else:
|
||||
print(f" ✗ No audio produced for voice='{voice}'")
|
||||
|
||||
|
||||
# ── Main ───────────────────────────────────────────────────────────────────────
|
||||
|
||||
def main() -> None:
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
print(f"Device: {device}")
|
||||
if device == "cuda":
|
||||
print(f"GPU: {torch.cuda.get_device_name(0)}")
|
||||
|
||||
OUTPUT_DIR.mkdir(exist_ok=True)
|
||||
|
||||
print(f"\nParsing '{SOURCE_FILE}' …")
|
||||
sections = load_and_split(SOURCE_FILE, BOOKS)
|
||||
print(f" Found {len(sections)} sections.\n")
|
||||
|
||||
print("Initialising Kokoro pipeline …")
|
||||
pipeline = KPipeline(lang_code=LANG_CODE)
|
||||
|
||||
for label, marker, voice, wav_name in BOOKS:
|
||||
if label not in sections:
|
||||
continue # marker was not found; warning already printed
|
||||
|
||||
print(f"\n[{label}] voice={voice} → {wav_name}")
|
||||
text = clean_text(sections[label])
|
||||
if not text:
|
||||
print(" ⚠ Empty text — skipping")
|
||||
continue
|
||||
|
||||
out_path = OUTPUT_DIR / wav_name
|
||||
generate_audio(pipeline, text, voice, out_path)
|
||||
|
||||
print("\nDone.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
141
extract_proper_nouns.py
Normal file
141
extract_proper_nouns.py
Normal file
@ -0,0 +1,141 @@
|
||||
"""
|
||||
extract_proper_nouns.py
|
||||
───────────────────────
|
||||
Scan 'Audio Master Nem Full.txt' and extract all proper nouns into
|
||||
'proper_nouns.txt', grouped by type and sorted alphabetically.
|
||||
|
||||
Uses spaCy for:
|
||||
• NER (PERSON, GPE, LOC, ORG, …) – named entity recognition
|
||||
• POS (PROPN) – catches names spaCy's NER misses
|
||||
because they are not in its training vocabulary (e.g. Hagoth, Meninta)
|
||||
|
||||
Run:
|
||||
.venv/bin/python extract_proper_nouns.py
|
||||
"""
|
||||
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
import spacy
|
||||
|
||||
SOURCE = Path("Audio Master Nem Full.txt")
|
||||
OUTPUT = Path("proper_nouns.txt")
|
||||
|
||||
# ── spaCy setup ────────────────────────────────────────────────────────────────
|
||||
print("Loading spaCy model …")
|
||||
nlp = spacy.load("en_core_web_sm")
|
||||
# Increase max length for the large source file
|
||||
nlp.max_length = 2_000_000
|
||||
|
||||
# ── NER label groups ───────────────────────────────────────────────────────────
|
||||
PERSON_LABELS = {"PERSON"}
|
||||
PLACE_LABELS = {"GPE", "LOC", "FAC"}
|
||||
ORG_LABELS = {"ORG", "NORP"}
|
||||
OTHER_LABELS = {"EVENT", "WORK_OF_ART", "LAW", "PRODUCT", "LANGUAGE"}
|
||||
|
||||
# ── Noise filters ──────────────────────────────────────────────────────────────
|
||||
# All-caps lines are section headers, not spoken names — skip them.
|
||||
# Also skip very short tokens that are likely artefacts.
|
||||
SKIP_PATTERNS = re.compile(
|
||||
r"^(THE|A|AN|AND|OF|IN|TO|FOR|BY|AT|IS|WAS|BE|HE|SHE|IT|"
|
||||
r"CHAPTER|VERSE|YEA|BEHOLD|LORD|GOD|CHRIST|HOLY|GHOST)$"
|
||||
)
|
||||
|
||||
def is_noise(text: str) -> bool:
|
||||
t = text.strip()
|
||||
if len(t) <= 1:
|
||||
return True
|
||||
if t.isupper() and len(t) > 4: # all-caps section header word
|
||||
return True
|
||||
if SKIP_PATTERNS.match(t.upper()):
|
||||
return True
|
||||
if re.search(r"[^a-zA-Z\-' ]", t): # contains digits or symbols
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def canonical(text: str) -> str:
|
||||
"""Normalise whitespace and title-case."""
|
||||
return " ".join(text.split()).title()
|
||||
|
||||
|
||||
# ── Read and process ───────────────────────────────────────────────────────────
|
||||
print(f"Reading '{SOURCE}' …")
|
||||
raw_text = SOURCE.read_text(encoding="utf-8")
|
||||
|
||||
print("Running spaCy pipeline (this may take a minute) …")
|
||||
doc = nlp(raw_text)
|
||||
|
||||
# Buckets: keyed by display-group name → set of canonical strings
|
||||
buckets: dict[str, set[str]] = defaultdict(set)
|
||||
|
||||
# 1. NER pass — trust spaCy's entity labels
|
||||
for ent in doc.ents:
|
||||
name = canonical(ent.text)
|
||||
if is_noise(name):
|
||||
continue
|
||||
if ent.label_ in PERSON_LABELS:
|
||||
buckets["People & Characters"].add(name)
|
||||
elif ent.label_ in PLACE_LABELS:
|
||||
buckets["Places & Lands"].add(name)
|
||||
elif ent.label_ in ORG_LABELS:
|
||||
buckets["Groups & Nations"].add(name)
|
||||
elif ent.label_ in OTHER_LABELS:
|
||||
buckets["Other Named Things"].add(name)
|
||||
else:
|
||||
buckets["Other Named Things"].add(name)
|
||||
|
||||
# 2. PROPN pass — catch names spaCy didn't recognise as entities
|
||||
# Only include tokens that are inside a sentence (not at position 0)
|
||||
# and are title-cased (filters out all-caps headers).
|
||||
for token in doc:
|
||||
if token.pos_ != "PROPN":
|
||||
continue
|
||||
text = token.text.strip()
|
||||
if not text[0].isupper() or text.isupper():
|
||||
continue # skip all-caps
|
||||
if token.i == token.sent.start:
|
||||
continue # skip sentence-initial (could be any word)
|
||||
name = canonical(text)
|
||||
if is_noise(name):
|
||||
continue
|
||||
# Only add if not already captured by NER
|
||||
already_captured = any(name in s for s in buckets.values())
|
||||
if not already_captured:
|
||||
buckets["Unclassified Proper Nouns"].add(name)
|
||||
|
||||
# ── Write output ───────────────────────────────────────────────────────────────
|
||||
GROUP_ORDER = [
|
||||
"People & Characters",
|
||||
"Places & Lands",
|
||||
"Groups & Nations",
|
||||
"Other Named Things",
|
||||
"Unclassified Proper Nouns",
|
||||
]
|
||||
|
||||
lines: list[str] = []
|
||||
lines.append("PROPER NOUNS — Book of the Nem")
|
||||
lines.append("=" * 50)
|
||||
lines.append(
|
||||
"Review this list for TTS mispronunciations.\n"
|
||||
"Each entry is the form that appears in the text.\n"
|
||||
)
|
||||
|
||||
total = 0
|
||||
for group in GROUP_ORDER:
|
||||
names = sorted(buckets.get(group, set()), key=str.casefold)
|
||||
if not names:
|
||||
continue
|
||||
lines.append(f"\n{'─' * 50}")
|
||||
lines.append(f"{group.upper()} ({len(names)})")
|
||||
lines.append(f"{'─' * 50}")
|
||||
for name in names:
|
||||
lines.append(f" {name}")
|
||||
total += len(names)
|
||||
|
||||
lines.append(f"\n{'=' * 50}")
|
||||
lines.append(f"TOTAL: {total} unique proper nouns")
|
||||
|
||||
OUTPUT.write_text("\n".join(lines), encoding="utf-8")
|
||||
print(f"\n✓ Written '{OUTPUT}' ({total} unique proper nouns)")
|
||||
145
generate_proper_noun_audio.py
Normal file
145
generate_proper_noun_audio.py
Normal file
@ -0,0 +1,145 @@
|
||||
"""
|
||||
generate_proper_noun_audio.py
|
||||
──────────────────────────────
|
||||
Read proper_nouns.txt, generate a short TTS audio clip for every entry
|
||||
using am_michael, and save a JSON manifest for the GUI.
|
||||
|
||||
Outputs:
|
||||
output_proper_nouns/<slug>.wav – one wav per entry
|
||||
output_proper_nouns/manifest.json – { "Word" : "slug.wav", … }
|
||||
|
||||
Already-generated files are skipped, so re-runs are fast.
|
||||
|
||||
Run:
|
||||
.venv/bin/python generate_proper_noun_audio.py
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
import numpy as np
|
||||
import soundfile as sf
|
||||
import torch
|
||||
from pathlib import Path
|
||||
from kokoro import KPipeline
|
||||
|
||||
PROPER_NOUNS_FILE = Path("proper_nouns.txt")
|
||||
OUTPUT_DIR = Path("output_proper_nouns")
|
||||
MANIFEST_FILE = OUTPUT_DIR / "manifest.json"
|
||||
VOICE = "am_michael"
|
||||
SAMPLE_RATE = 24000
|
||||
SPEED = 1.0
|
||||
|
||||
# ── Parse proper_nouns.txt ─────────────────────────────────────────────────────
|
||||
|
||||
def parse_entries(path: Path) -> list[tuple[str, str]]:
|
||||
"""Return list of (category, entry) pairs."""
|
||||
entries: list[tuple[str, str]] = []
|
||||
current_cat = "Uncategorised"
|
||||
header_re = re.compile(r"^[A-Z &]+\s+\(\d+\)$")
|
||||
|
||||
for line in path.read_text(encoding="utf-8").splitlines():
|
||||
stripped = line.strip()
|
||||
if not stripped:
|
||||
continue
|
||||
if stripped.startswith("=") or stripped.startswith("─"):
|
||||
continue
|
||||
if header_re.match(stripped):
|
||||
# e.g. "PEOPLE & CHARACTERS (301)"
|
||||
current_cat = stripped.rsplit("(", 1)[0].strip().title()
|
||||
continue
|
||||
if stripped.startswith("TOTAL:"):
|
||||
continue
|
||||
if stripped.startswith("Review this") or stripped.startswith("Each entry"):
|
||||
continue
|
||||
if stripped.startswith("PROPER NOUNS"):
|
||||
continue
|
||||
# Regular entry — indented two spaces in the file
|
||||
if line.startswith(" "):
|
||||
entries.append((current_cat, stripped))
|
||||
|
||||
return entries
|
||||
|
||||
|
||||
def slugify(text: str) -> str:
|
||||
"""Convert 'Hagoth-II foo' → 'hagoth_ii_foo'."""
|
||||
s = text.lower()
|
||||
s = re.sub(r"[^a-z0-9]+", "_", s)
|
||||
return s.strip("_")
|
||||
|
||||
|
||||
# ── TTS generation ─────────────────────────────────────────────────────────────
|
||||
|
||||
def generate(pipeline: KPipeline, text: str, out_path: Path) -> bool:
|
||||
chunks = []
|
||||
# Speak the word in a short carrier phrase so the TTS pronounces it
|
||||
# naturally (isolated tokens sometimes get clipped prosody).
|
||||
spoken = text
|
||||
for _, _, chunk in pipeline(spoken, voice=VOICE, speed=SPEED):
|
||||
if hasattr(chunk, "numpy"):
|
||||
chunk = chunk.cpu().numpy()
|
||||
chunk = np.atleast_1d(chunk.squeeze())
|
||||
if chunk.size > 0:
|
||||
chunks.append(chunk)
|
||||
if chunks:
|
||||
audio = np.concatenate(chunks)
|
||||
sf.write(str(out_path), audio, SAMPLE_RATE)
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# ── Main ───────────────────────────────────────────────────────────────────────
|
||||
|
||||
def main() -> None:
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
print(f"Device: {device}")
|
||||
if device == "cuda":
|
||||
print(f"GPU: {torch.cuda.get_device_name(0)}")
|
||||
|
||||
OUTPUT_DIR.mkdir(exist_ok=True)
|
||||
|
||||
print(f"Parsing '{PROPER_NOUNS_FILE}' …")
|
||||
entries = parse_entries(PROPER_NOUNS_FILE)
|
||||
print(f" {len(entries)} entries found.\n")
|
||||
|
||||
# Load existing manifest so we can skip already-done words
|
||||
if MANIFEST_FILE.exists():
|
||||
manifest: dict = json.loads(MANIFEST_FILE.read_text())
|
||||
else:
|
||||
manifest = {}
|
||||
|
||||
print("Initialising Kokoro pipeline …")
|
||||
pipeline = KPipeline(lang_code="a")
|
||||
|
||||
skipped = 0
|
||||
generated = 0
|
||||
failed = 0
|
||||
|
||||
for i, (cat, entry) in enumerate(entries):
|
||||
slug = slugify(entry)
|
||||
wav_name = f"{slug}.wav"
|
||||
wav_path = OUTPUT_DIR / wav_name
|
||||
|
||||
if entry in manifest and wav_path.exists():
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
sys.stdout.write(f"\r[{i+1}/{len(entries)}] {entry[:55]:<55}")
|
||||
sys.stdout.flush()
|
||||
|
||||
ok = generate(pipeline, entry, wav_path)
|
||||
if ok:
|
||||
manifest[entry] = wav_name
|
||||
generated += 1
|
||||
else:
|
||||
print(f"\n ✗ Failed: {entry}")
|
||||
failed += 1
|
||||
|
||||
print(f"\n\nDone. generated={generated} skipped={skipped} failed={failed}")
|
||||
|
||||
MANIFEST_FILE.write_text(json.dumps(manifest, ensure_ascii=False, indent=2))
|
||||
print(f"Manifest saved → '{MANIFEST_FILE}'")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
620
proper_noun_player.py
Normal file
620
proper_noun_player.py
Normal file
@ -0,0 +1,620 @@
|
||||
"""
|
||||
proper_noun_player.py
|
||||
──────────────────────
|
||||
GUI for auditing proper noun pronunciations.
|
||||
|
||||
Three columns (all persisted as JSON, original manifest never modified):
|
||||
• Review – words not yet audited
|
||||
• Correct – words that already pronounce fine
|
||||
• Fixes – linked list: original word → phonetic replacement
|
||||
e.g. "Nephi" → "Kneephi"
|
||||
|
||||
Hotkeys (always active):
|
||||
Space – replay current word
|
||||
s – stop audio
|
||||
Escape – reset fix entry to original word, refocus review list
|
||||
|
||||
On the Review list:
|
||||
↑ / ↓ – navigate
|
||||
Click / Enter – play word AND focus fix entry
|
||||
|
||||
On the fix entry (bottom bar, right of the word label):
|
||||
Start typing to overwrite the pre-filled word.
|
||||
Enter → if text == original word → mark Correct, advance to next
|
||||
if text differs → add as Fix, advance to next
|
||||
Escape → reset text to original word, return focus to review list
|
||||
|
||||
On the Correct list:
|
||||
Delete / BackSpace – move selected word back to Review
|
||||
|
||||
On the Fixes list:
|
||||
Delete / BackSpace – move selected fix back to Review
|
||||
|
||||
"Apply Fixes to Text" writes a TTS-ready copy of the source file with all
|
||||
substitutions applied (case-sensitive whole-word replace).
|
||||
|
||||
Data files (auto-created in output_proper_nouns/):
|
||||
correct_words.json – list of correct words
|
||||
pronunciation_fixes.json – { "Nephi": "Kneephi", … }
|
||||
|
||||
Run:
|
||||
.venv/bin/python proper_noun_player.py
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
import threading
|
||||
import tkinter as tk
|
||||
from tkinter import ttk, messagebox
|
||||
from pathlib import Path
|
||||
|
||||
import sounddevice as sd
|
||||
import soundfile as sf
|
||||
|
||||
MANIFEST_FILE = Path("output_proper_nouns/manifest.json")
|
||||
OUTPUT_DIR = Path("output_proper_nouns")
|
||||
REPLACEMENTS_DIR = OUTPUT_DIR / "replacements_cache"
|
||||
CORRECT_FILE = OUTPUT_DIR / "correct_words.json"
|
||||
FIXES_FILE = OUTPUT_DIR / "pronunciation_fixes.json"
|
||||
SOURCE_TEXT = Path("Audio Master Nem Full.txt")
|
||||
FIXED_TEXT_OUT = Path("Audio Master Nem Full (TTS Fixed).txt")
|
||||
|
||||
VOICE = "am_michael"
|
||||
SAMPLE_RATE = 24000
|
||||
|
||||
# ── Colours ────────────────────────────────────────────────────────────────────
|
||||
BG = "#1e1e2e"
|
||||
BG2 = "#181825"
|
||||
BG3 = "#313244"
|
||||
FG = "#cdd6f4"
|
||||
FG_DIM = "#6c7086"
|
||||
GREEN = "#a6e3a1"
|
||||
BLUE = "#89b4fa"
|
||||
RED = "#f38ba8"
|
||||
YELLOW = "#f9e2af"
|
||||
MAUVE = "#cba6f7"
|
||||
|
||||
# ── Audio ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
def play_async(path: Path) -> None:
|
||||
sd.stop()
|
||||
def _play():
|
||||
data, sr = sf.read(str(path), dtype="float32")
|
||||
sd.play(data, sr)
|
||||
threading.Thread(target=_play, daemon=True).start()
|
||||
|
||||
|
||||
def _slug(text: str) -> str:
|
||||
"""Safe filename from arbitrary text."""
|
||||
return re.sub(r"[^a-zA-Z0-9_-]", "_", text).strip("_")[:80]
|
||||
|
||||
|
||||
# Lazy KPipeline singleton — only imported+loaded on first synthesis request
|
||||
_pipeline = None
|
||||
_pipeline_lock = threading.Lock()
|
||||
|
||||
def _get_pipeline():
|
||||
global _pipeline
|
||||
if _pipeline is None:
|
||||
with _pipeline_lock:
|
||||
if _pipeline is None:
|
||||
import warnings
|
||||
from kokoro import KPipeline # type: ignore
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings("ignore", category=UserWarning)
|
||||
warnings.filterwarnings("ignore", category=FutureWarning)
|
||||
_pipeline = KPipeline(lang_code="a", repo_id="hexgrad/Kokoro-82M")
|
||||
return _pipeline
|
||||
|
||||
|
||||
def synth_and_play(text: str, on_ready=None) -> None:
|
||||
"""Synthesise *text* with Kokoro (cached) and play it.
|
||||
Runs entirely on a daemon thread so the GUI never blocks.
|
||||
*on_ready(path)* is called on the same thread once the file is written.
|
||||
"""
|
||||
def _run():
|
||||
path = _synth_to_cache(text)
|
||||
if path:
|
||||
if on_ready:
|
||||
on_ready(path)
|
||||
play_async(path)
|
||||
|
||||
threading.Thread(target=_run, daemon=True).start()
|
||||
|
||||
|
||||
def _synth_to_cache(text: str) -> "Path | None":
|
||||
"""Synthesise *text* to a cached WAV and return its path (or None on failure).
|
||||
Skips synthesis if the file already exists. Safe to call from any thread.
|
||||
"""
|
||||
REPLACEMENTS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
cache_path = REPLACEMENTS_DIR / f"{_slug(text)}.wav"
|
||||
if not cache_path.exists():
|
||||
import warnings
|
||||
import numpy as np
|
||||
pipeline = _get_pipeline()
|
||||
chunks = []
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings("ignore", category=UserWarning)
|
||||
for _, _, audio in pipeline(text, voice=VOICE):
|
||||
if audio is not None:
|
||||
chunks.append(audio)
|
||||
if chunks:
|
||||
combined = np.concatenate(chunks)
|
||||
sf.write(str(cache_path), combined, SAMPLE_RATE)
|
||||
return cache_path if cache_path.exists() else None
|
||||
|
||||
|
||||
# ── Persistence helpers ────────────────────────────────────────────────────────
|
||||
|
||||
def load_json(path: Path, default):
|
||||
if path.exists():
|
||||
return json.loads(path.read_text(encoding="utf-8"))
|
||||
return default
|
||||
|
||||
def save_json(path: Path, obj) -> None:
|
||||
path.write_text(json.dumps(obj, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
|
||||
|
||||
# ── Styled widget helpers ──────────────────────────────────────────────────────
|
||||
|
||||
def make_listbox(parent) -> tuple[tk.Listbox, tk.Frame]:
|
||||
frame = tk.Frame(parent, bg=BG2, bd=0)
|
||||
sb = ttk.Scrollbar(frame, orient="vertical")
|
||||
sb.pack(side="right", fill="y")
|
||||
lb = tk.Listbox(
|
||||
frame,
|
||||
yscrollcommand=sb.set,
|
||||
font=("Helvetica", 11),
|
||||
bg=BG2, fg=FG,
|
||||
selectbackground=BLUE, selectforeground=BG,
|
||||
activestyle="none", bd=0, highlightthickness=0, relief="flat",
|
||||
exportselection=False,
|
||||
)
|
||||
lb.pack(side="left", fill="both", expand=True)
|
||||
sb.config(command=lb.yview)
|
||||
return lb, frame
|
||||
|
||||
def styled_btn(parent, text, command, color=FG, bg=BG3, **kw):
|
||||
return tk.Button(
|
||||
parent, text=text, command=command,
|
||||
bg=bg, fg=color, activebackground=BG2, activeforeground=color,
|
||||
font=("Helvetica", 10, "bold"), relief="flat", bd=0,
|
||||
padx=10, pady=5, cursor="hand2", **kw
|
||||
)
|
||||
|
||||
def section_label(parent, text):
|
||||
return tk.Label(parent, text=text, bg=BG, fg=FG_DIM,
|
||||
font=("Helvetica", 9, "bold"), anchor="w")
|
||||
|
||||
|
||||
# ── Main app ───────────────────────────────────────────────────────────────────
|
||||
|
||||
class ProperNounAuditor(tk.Tk):
|
||||
|
||||
# tracks which word is currently loaded into the fix entry
|
||||
_fix_entry_word: str = ""
|
||||
|
||||
def __init__(self, manifest: dict[str, str]) -> None:
|
||||
super().__init__()
|
||||
self.title("Proper Noun Pronunciation Auditor")
|
||||
self.geometry("1020x700")
|
||||
self.minsize(800, 500)
|
||||
self.configure(bg=BG)
|
||||
|
||||
self.manifest: dict[str, str] = manifest
|
||||
self.all_words: list[str] = sorted(manifest.keys(), key=str.casefold)
|
||||
|
||||
# Persistent data
|
||||
self.correct: set[str] = set(load_json(CORRECT_FILE, []))
|
||||
self.fixes: dict[str, str] = load_json(FIXES_FILE, {})
|
||||
|
||||
self._build_ui()
|
||||
self._refresh_all()
|
||||
|
||||
# Window-level hotkeys (work even when a listbox has keyboard focus)
|
||||
self.bind("<space>", lambda e: self._replay())
|
||||
self.bind("s", lambda e: sd.stop())
|
||||
self.bind("<Escape>", lambda e: self._reset_fix_entry())
|
||||
|
||||
# ── UI construction ────────────────────────────────────────────────────────
|
||||
|
||||
def _build_ui(self) -> None:
|
||||
PAD = 8
|
||||
|
||||
# Title bar
|
||||
title_bar = tk.Frame(self, bg=BG, pady=6)
|
||||
title_bar.pack(fill="x", padx=PAD)
|
||||
tk.Label(title_bar, text="Proper Noun Pronunciation Auditor",
|
||||
font=("Helvetica", 15, "bold"), bg=BG, fg=FG).pack(side="left")
|
||||
hint = "Space=replay s=stop Esc=reset fix Del=remove from list Enter=correct|fix"
|
||||
tk.Label(title_bar, text=hint,
|
||||
font=("Helvetica", 8), bg=BG, fg=FG_DIM).pack(side="left", padx=14)
|
||||
|
||||
# Three-column body
|
||||
body = tk.Frame(self, bg=BG)
|
||||
body.pack(fill="both", expand=True, padx=PAD, pady=(0, PAD))
|
||||
body.columnconfigure(0, weight=3)
|
||||
body.columnconfigure(1, weight=2)
|
||||
body.columnconfigure(2, weight=2)
|
||||
body.rowconfigure(0, weight=1)
|
||||
|
||||
# ── Column 0: Review list ──────────────────────────────────────────────
|
||||
col0 = tk.Frame(body, bg=BG)
|
||||
col0.grid(row=0, column=0, sticky="nsew", padx=(0, PAD))
|
||||
|
||||
filter_row = tk.Frame(col0, bg=BG)
|
||||
filter_row.pack(fill="x", pady=(0, 4))
|
||||
tk.Label(filter_row, text="Filter:", bg=BG, fg=FG,
|
||||
font=("Helvetica", 10)).pack(side="left", padx=(0, 4))
|
||||
self.search_var = tk.StringVar()
|
||||
self.search_var.trace_add("write", lambda *_: self._refresh_review())
|
||||
self._filter_entry = tk.Entry(
|
||||
filter_row, textvariable=self.search_var,
|
||||
font=("Helvetica", 11), bg=BG3, fg=FG,
|
||||
insertbackground=FG, relief="flat", bd=4)
|
||||
self._filter_entry.pack(side="left", fill="x", expand=True)
|
||||
self._filter_entry.focus_set()
|
||||
styled_btn(filter_row, "✕", lambda: self.search_var.set(""),
|
||||
color=RED, bg=BG3).pack(side="left", padx=(3, 0))
|
||||
|
||||
hdr0 = tk.Frame(col0, bg=BG)
|
||||
hdr0.pack(fill="x")
|
||||
section_label(hdr0, "TO REVIEW").pack(side="left")
|
||||
self.review_count_var = tk.StringVar()
|
||||
tk.Label(hdr0, textvariable=self.review_count_var, bg=BG, fg=FG_DIM,
|
||||
font=("Helvetica", 9)).pack(side="right")
|
||||
|
||||
self.review_lb, review_frame = make_listbox(col0)
|
||||
review_frame.pack(fill="both", expand=True)
|
||||
self.review_lb.bind("<<ListboxSelect>>", self._on_review_select)
|
||||
self.review_lb.bind("<Return>", self._on_review_select)
|
||||
|
||||
# ── Column 1: Correct list ─────────────────────────────────────────────
|
||||
col1 = tk.Frame(body, bg=BG)
|
||||
col1.grid(row=0, column=1, sticky="nsew", padx=(0, PAD))
|
||||
|
||||
hdr1 = tk.Frame(col1, bg=BG)
|
||||
hdr1.pack(fill="x")
|
||||
section_label(hdr1, "✓ CORRECT [Del=remove]").pack(side="left")
|
||||
self.correct_count_var = tk.StringVar()
|
||||
tk.Label(hdr1, textvariable=self.correct_count_var, bg=BG, fg=FG_DIM,
|
||||
font=("Helvetica", 9)).pack(side="right")
|
||||
|
||||
self.correct_lb, correct_frame = make_listbox(col1)
|
||||
correct_frame.pack(fill="both", expand=True)
|
||||
self.correct_lb.bind("<<ListboxSelect>>",
|
||||
lambda e: self._on_side_select(self.correct_lb))
|
||||
self.correct_lb.bind("<Delete>",
|
||||
lambda e: self._move_back(self.correct_lb, is_dict=False))
|
||||
self.correct_lb.bind("<BackSpace>",
|
||||
lambda e: self._move_back(self.correct_lb, is_dict=False))
|
||||
|
||||
styled_btn(col1, "← Back to Review [Del]",
|
||||
lambda: self._move_back(self.correct_lb, is_dict=False),
|
||||
color=YELLOW).pack(fill="x", pady=(4, 0))
|
||||
|
||||
# ── Column 2: Fixes list ───────────────────────────────────────────────
|
||||
col2 = tk.Frame(body, bg=BG)
|
||||
col2.grid(row=0, column=2, sticky="nsew")
|
||||
|
||||
hdr2 = tk.Frame(col2, bg=BG)
|
||||
hdr2.pack(fill="x")
|
||||
section_label(hdr2, "⇄ FIXES (original → phonetic)").pack(side="left")
|
||||
self.fixes_count_var = tk.StringVar()
|
||||
tk.Label(hdr2, textvariable=self.fixes_count_var, bg=BG, fg=FG_DIM,
|
||||
font=("Helvetica", 9)).pack(side="right")
|
||||
|
||||
self.fixes_lb, fixes_frame = make_listbox(col2)
|
||||
fixes_frame.pack(fill="both", expand=True)
|
||||
self.fixes_lb.bind("<<ListboxSelect>>",
|
||||
lambda e: self._on_side_select(self.fixes_lb))
|
||||
self.fixes_lb.bind("<Delete>",
|
||||
lambda e: self._move_back(self.fixes_lb, is_dict=True))
|
||||
self.fixes_lb.bind("<BackSpace>",
|
||||
lambda e: self._move_back(self.fixes_lb, is_dict=True))
|
||||
|
||||
styled_btn(col2, "← Back to Review [Del]",
|
||||
lambda: self._move_back(self.fixes_lb, is_dict=True),
|
||||
color=YELLOW).pack(fill="x", pady=(4, 0))
|
||||
|
||||
# ── Bottom action bar ──────────────────────────────────────────────────
|
||||
action_bar = tk.Frame(self, bg=BG3, pady=8)
|
||||
action_bar.pack(fill="x")
|
||||
|
||||
# Now-playing word label
|
||||
tk.Label(action_bar, text="▶", bg=BG3, fg=GREEN,
|
||||
font=("Helvetica", 11)).pack(side="left", padx=(10, 2))
|
||||
self.now_playing_var = tk.StringVar(value="—")
|
||||
tk.Label(action_bar, textvariable=self.now_playing_var,
|
||||
bg=BG3, fg=GREEN, font=("Helvetica", 11, "bold"),
|
||||
width=20, anchor="w").pack(side="left")
|
||||
|
||||
# Inline fix entry — right next to the word, auto-focused on word click
|
||||
tk.Label(action_bar, text="→", bg=BG3, fg=MAUVE,
|
||||
font=("Helvetica", 13, "bold")).pack(side="left", padx=(6, 3))
|
||||
self.fix_var = tk.StringVar()
|
||||
self._fix_entry = tk.Entry(
|
||||
action_bar, textvariable=self.fix_var,
|
||||
font=("Helvetica", 11), bg=BG2, fg=MAUVE,
|
||||
insertbackground=MAUVE, relief="flat", bd=4, width=22)
|
||||
self._fix_entry.pack(side="left")
|
||||
self._fix_entry.bind("<Return>", lambda e: self._enter_action())
|
||||
self._fix_entry.bind("<Escape>", lambda e: self._reset_fix_entry())
|
||||
|
||||
tk.Label(action_bar, text="Enter=correct (edit first for fix) Esc=reset",
|
||||
bg=BG3, fg=FG_DIM, font=("Helvetica", 8)).pack(side="left", padx=(5, 10))
|
||||
|
||||
tk.Label(action_bar, text="│", bg=BG3, fg=FG_DIM).pack(side="left", padx=4)
|
||||
styled_btn(action_bar, "■ Stop [s]", sd.stop,
|
||||
color=RED).pack(side="left", padx=4)
|
||||
styled_btn(action_bar, "↺ Replay [Space]", self._replay,
|
||||
color=BLUE).pack(side="left", padx=2)
|
||||
|
||||
tk.Label(action_bar, text="│", bg=BG3, fg=FG_DIM).pack(side="left", padx=4)
|
||||
styled_btn(action_bar, "⇄ Apply Fixes to Text",
|
||||
self._apply_fixes, color=YELLOW, bg=BG2).pack(side="left", padx=4)
|
||||
|
||||
tk.Label(action_bar, text="│", bg=BG3, fg=FG_DIM).pack(side="left", padx=4)
|
||||
self._pregen_btn = styled_btn(
|
||||
action_bar, "↻ Pre-gen Fix Audio",
|
||||
self._pregen_all_fix_audio, color=MAUVE, bg=BG2)
|
||||
self._pregen_btn.pack(side="left", padx=4)
|
||||
self._pregen_status_var = tk.StringVar(value="")
|
||||
tk.Label(action_bar, textvariable=self._pregen_status_var,
|
||||
bg=BG3, fg=FG_DIM, font=("Helvetica", 8),
|
||||
width=28, anchor="w").pack(side="left", padx=(4, 10))
|
||||
|
||||
# ── Refresh helpers ────────────────────────────────────────────────────────
|
||||
|
||||
def _review_words(self) -> list[str]:
|
||||
excluded = self.correct | set(self.fixes.keys())
|
||||
q = self.search_var.get().strip().casefold()
|
||||
words = [w for w in self.all_words if w not in excluded]
|
||||
if q:
|
||||
words = [w for w in words if q in w.casefold()]
|
||||
return words
|
||||
|
||||
def _refresh_review(self) -> None:
|
||||
words = self._review_words()
|
||||
self.review_lb.delete(0, "end")
|
||||
for w in words:
|
||||
self.review_lb.insert("end", f" {w}")
|
||||
self.review_count_var.set(f"{len(words)}")
|
||||
|
||||
def _refresh_correct(self) -> None:
|
||||
self.correct_lb.delete(0, "end")
|
||||
for w in sorted(self.correct, key=str.casefold):
|
||||
self.correct_lb.insert("end", f" {w}")
|
||||
self.correct_count_var.set(f"{len(self.correct)}")
|
||||
|
||||
def _refresh_fixes(self) -> None:
|
||||
self.fixes_lb.delete(0, "end")
|
||||
for orig, rep in sorted(self.fixes.items(), key=lambda x: x[0].casefold()):
|
||||
self.fixes_lb.insert("end", f" {orig} → {rep}")
|
||||
self.fixes_count_var.set(f"{len(self.fixes)}")
|
||||
|
||||
def _refresh_all(self) -> None:
|
||||
self._refresh_review()
|
||||
self._refresh_correct()
|
||||
self._refresh_fixes()
|
||||
|
||||
# ── Playback ───────────────────────────────────────────────────────────────
|
||||
|
||||
def _play_word(self, word: str) -> None:
|
||||
wav_name = self.manifest.get(word)
|
||||
if not wav_name:
|
||||
return
|
||||
wav_path = OUTPUT_DIR / wav_name
|
||||
if not wav_path.exists():
|
||||
messagebox.showwarning("Missing audio",
|
||||
f"No audio file for '{word}'.\n"
|
||||
"Run generate_proper_noun_audio.py first.")
|
||||
return
|
||||
self.now_playing_var.set(word)
|
||||
play_async(wav_path)
|
||||
|
||||
# ── Selection callbacks ────────────────────────────────────────────────────
|
||||
|
||||
def _on_review_select(self, event=None) -> None:
|
||||
sel = self.review_lb.curselection()
|
||||
if not sel:
|
||||
return
|
||||
word = self.review_lb.get(sel[0]).strip()
|
||||
self._fix_entry_word = word
|
||||
self.fix_var.set(word) # pre-fill fix entry with the word
|
||||
self._fix_entry.selection_range(0, "end")
|
||||
self._fix_entry.icursor("end")
|
||||
# Defer focus so the listbox doesn't reclaim it after the click event settles
|
||||
self.after(0, self._fix_entry.focus_set)
|
||||
self._play_word(word)
|
||||
|
||||
def _on_side_select(self, listbox: tk.Listbox) -> None:
|
||||
sel = listbox.curselection()
|
||||
if not sel:
|
||||
return
|
||||
row = listbox.get(sel[0]).strip()
|
||||
parts = row.split(" → ")
|
||||
original = parts[0].strip()
|
||||
|
||||
if listbox is self.fixes_lb and len(parts) == 2:
|
||||
# Play the phonetic replacement text
|
||||
replacement = parts[1].strip()
|
||||
self.now_playing_var.set(f"… {replacement}")
|
||||
def _on_ready(_path):
|
||||
self.after(0, lambda: self.now_playing_var.set(replacement))
|
||||
synth_and_play(replacement, on_ready=_on_ready)
|
||||
else:
|
||||
self._play_word(original)
|
||||
|
||||
# ── Actions ────────────────────────────────────────────────────────────────
|
||||
|
||||
def _selected_review_word(self) -> str | None:
|
||||
sel = self.review_lb.curselection()
|
||||
if not sel:
|
||||
return None
|
||||
return self.review_lb.get(sel[0]).strip()
|
||||
|
||||
def _enter_action(self) -> None:
|
||||
"""Smart Enter handler for the fix entry.
|
||||
|
||||
If the entry text matches the original word → mark Correct.
|
||||
If the entry text differs from the original → add as Fix.
|
||||
"""
|
||||
word = self._fix_entry_word or self._selected_review_word()
|
||||
if not word:
|
||||
return
|
||||
text = self.fix_var.get().strip()
|
||||
if not text or text == word:
|
||||
self._mark_correct_word(word)
|
||||
else:
|
||||
self._add_fix_for_word(word, text)
|
||||
|
||||
def _reset_fix_entry(self) -> None:
|
||||
"""Escape: reset fix entry to the original word, refocus the review list."""
|
||||
self.fix_var.set(self._fix_entry_word)
|
||||
self.review_lb.focus_set()
|
||||
|
||||
def _replay(self) -> None:
|
||||
if self._fix_entry_word:
|
||||
self._play_word(self._fix_entry_word)
|
||||
|
||||
def _advance_review(self) -> None:
|
||||
"""After an action, select the first remaining word in the review list."""
|
||||
if self.review_lb.size() > 0:
|
||||
self.review_lb.selection_clear(0, "end")
|
||||
self.review_lb.selection_set(0)
|
||||
self.review_lb.see(0)
|
||||
self.review_lb.event_generate("<<ListboxSelect>>")
|
||||
|
||||
def _mark_correct_word(self, word: str) -> None:
|
||||
self.correct.add(word)
|
||||
save_json(CORRECT_FILE, sorted(self.correct))
|
||||
self._fix_entry_word = ""
|
||||
self.fix_var.set("")
|
||||
self.now_playing_var.set("—")
|
||||
self._refresh_all()
|
||||
self._advance_review()
|
||||
|
||||
def _mark_correct(self) -> None:
|
||||
word = self._selected_review_word()
|
||||
if not word:
|
||||
messagebox.showinfo("Nothing selected",
|
||||
"Select a word from the Review list first.")
|
||||
return
|
||||
self._mark_correct_word(word)
|
||||
|
||||
def _add_fix_for_word(self, word: str, replacement: str) -> None:
|
||||
self.fixes[word] = replacement
|
||||
save_json(FIXES_FILE, self.fixes)
|
||||
self._fix_entry_word = ""
|
||||
self.fix_var.set("")
|
||||
self.now_playing_var.set("—")
|
||||
self._refresh_all()
|
||||
self._advance_review()
|
||||
|
||||
def _add_fix(self) -> None:
|
||||
word = self._selected_review_word()
|
||||
replacement = self.fix_var.get().strip()
|
||||
if not word:
|
||||
messagebox.showinfo("Nothing selected",
|
||||
"Select a word from the Review list first.")
|
||||
return
|
||||
if not replacement or replacement == word:
|
||||
messagebox.showinfo("No replacement",
|
||||
"Type the phonetic replacement in the Fix box.")
|
||||
return
|
||||
self._add_fix_for_word(word, replacement)
|
||||
|
||||
def _move_back(self, listbox: tk.Listbox, is_dict: bool) -> None:
|
||||
sel = listbox.curselection()
|
||||
if not sel:
|
||||
return
|
||||
raw = listbox.get(sel[0]).strip().split(" → ")[0].strip()
|
||||
if is_dict:
|
||||
self.fixes.pop(raw, None)
|
||||
save_json(FIXES_FILE, self.fixes)
|
||||
else:
|
||||
self.correct.discard(raw)
|
||||
save_json(CORRECT_FILE, sorted(self.correct))
|
||||
self._refresh_all()
|
||||
|
||||
# ── Apply fixes to source text ─────────────────────────────────────────────
|
||||
|
||||
def _pregen_all_fix_audio(self) -> None:
|
||||
"""Synthesise and cache audio for every replacement phonetic string."""
|
||||
if not self.fixes:
|
||||
messagebox.showinfo("No fixes", "The Fixes list is empty.")
|
||||
return
|
||||
|
||||
replacements = list(self.fixes.values())
|
||||
total = len(replacements)
|
||||
already = sum(
|
||||
1 for r in replacements
|
||||
if (REPLACEMENTS_DIR / f"{_slug(r)}.wav").exists()
|
||||
)
|
||||
|
||||
# Confirm if it'll take a while
|
||||
new_count = total - already
|
||||
if new_count == 0:
|
||||
messagebox.showinfo("Already done",
|
||||
f"All {total} replacement clips already exist.")
|
||||
return
|
||||
|
||||
self._pregen_btn.config(state="disabled")
|
||||
self._pregen_status_var.set(f"0 / {new_count} new ({already} cached)")
|
||||
|
||||
def _run():
|
||||
done = 0
|
||||
for rep in replacements:
|
||||
cache_path = REPLACEMENTS_DIR / f"{_slug(rep)}.wav"
|
||||
if not cache_path.exists():
|
||||
_synth_to_cache(rep)
|
||||
done += 1
|
||||
self.after(0, lambda d=done, t=new_count:
|
||||
self._pregen_status_var.set(f"{d} / {t} synthesised…"))
|
||||
self.after(0, lambda: self._pregen_status_var.set(
|
||||
f"Done — {total} clips ready"))
|
||||
self.after(0, lambda: self._pregen_btn.config(state="normal"))
|
||||
|
||||
threading.Thread(target=_run, daemon=True).start()
|
||||
|
||||
def _apply_fixes(self) -> None:
|
||||
if not self.fixes:
|
||||
messagebox.showinfo("No fixes", "The Fixes list is empty.")
|
||||
return
|
||||
if not SOURCE_TEXT.exists():
|
||||
messagebox.showerror("Source not found", f"Cannot find:\n{SOURCE_TEXT}")
|
||||
return
|
||||
text = SOURCE_TEXT.read_text(encoding="utf-8")
|
||||
count_total = 0
|
||||
for original, replacement in self.fixes.items():
|
||||
pattern = r'\b' + re.escape(original) + r'\b'
|
||||
new_text, n = re.subn(pattern, replacement, text)
|
||||
if n:
|
||||
text = new_text
|
||||
count_total += n
|
||||
FIXED_TEXT_OUT.write_text(text, encoding="utf-8")
|
||||
messagebox.showinfo(
|
||||
"Done",
|
||||
f"Applied {len(self.fixes)} fix rules ({count_total} replacements).\n\n"
|
||||
f"Saved to:\n{FIXED_TEXT_OUT}"
|
||||
)
|
||||
|
||||
|
||||
# ── Entry point ────────────────────────────────────────────────────────────────
|
||||
|
||||
def main() -> None:
|
||||
if not MANIFEST_FILE.exists():
|
||||
print(f"Manifest not found: '{MANIFEST_FILE}'")
|
||||
print("Run generate_proper_noun_audio.py first.")
|
||||
raise SystemExit(1)
|
||||
|
||||
manifest: dict[str, str] = json.loads(MANIFEST_FILE.read_text(encoding="utf-8"))
|
||||
print(f"Loaded {len(manifest)} entries from manifest.")
|
||||
|
||||
app = ProperNounAuditor(manifest)
|
||||
app.mainloop()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
1707
proper_nouns.txt
Normal file
1707
proper_nouns.txt
Normal file
File diff suppressed because it is too large
Load Diff
44
render_voices.py
Normal file
44
render_voices.py
Normal file
@ -0,0 +1,44 @@
|
||||
import torch
|
||||
import numpy as np
|
||||
import soundfile as sf
|
||||
from kokoro import KPipeline
|
||||
from text_input import TEXT
|
||||
|
||||
# ── Device setup ──────────────────────────────────────────────────────────────
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
print(f"Using device: {device}")
|
||||
if device == "cuda":
|
||||
print(f"GPU: {torch.cuda.get_device_name(0)}")
|
||||
|
||||
SAMPLE_RATE = 24000
|
||||
SPEED = 1.0
|
||||
VOICES = [
|
||||
("af_heart", "output_af_heart.wav"), # warm American female
|
||||
("am_michael", "output_am_michael.wav"), # best American male
|
||||
]
|
||||
|
||||
pipeline = KPipeline(lang_code="a")
|
||||
|
||||
|
||||
def generate(voice: str, output_file: str) -> None:
|
||||
print(f"\nGenerating '{voice}' → {output_file} …")
|
||||
chunks = []
|
||||
for _, _, chunk_audio in pipeline(TEXT, voice=voice, speed=SPEED):
|
||||
if hasattr(chunk_audio, "numpy"):
|
||||
chunk_audio = chunk_audio.cpu().numpy()
|
||||
chunk_audio = np.atleast_1d(chunk_audio.squeeze())
|
||||
if chunk_audio.size > 0:
|
||||
chunks.append(chunk_audio)
|
||||
|
||||
if chunks:
|
||||
audio = np.concatenate(chunks, axis=0)
|
||||
sf.write(output_file, audio, SAMPLE_RATE)
|
||||
print(f" ✓ Saved '{output_file}' ({len(audio) / SAMPLE_RATE:.1f}s, {SAMPLE_RATE} Hz)")
|
||||
else:
|
||||
print(f" ✗ No audio produced for '{voice}'")
|
||||
|
||||
|
||||
for voice, path in VOICES:
|
||||
generate(voice, path)
|
||||
|
||||
print("\nDone.")
|
||||
19
run_michael.py
Normal file
19
run_michael.py
Normal file
@ -0,0 +1,19 @@
|
||||
import torch, numpy as np, soundfile as sf
|
||||
from kokoro import KPipeline
|
||||
from text_input import TEXT
|
||||
|
||||
pipeline = KPipeline(lang_code="a")
|
||||
print(f"GPU: {torch.cuda.get_device_name(0)}" if torch.cuda.is_available() else "CPU")
|
||||
print("Generating am_michael ...")
|
||||
|
||||
chunks = []
|
||||
for _, _, chunk_audio in pipeline(TEXT, voice="am_michael", speed=1.0):
|
||||
if hasattr(chunk_audio, "numpy"):
|
||||
chunk_audio = chunk_audio.cpu().numpy()
|
||||
chunk_audio = np.atleast_1d(chunk_audio.squeeze())
|
||||
if chunk_audio.size > 0:
|
||||
chunks.append(chunk_audio)
|
||||
|
||||
audio = np.concatenate(chunks)
|
||||
sf.write("output_am_michael.wav", audio, 24000)
|
||||
print(f"Saved output_am_michael.wav ({len(audio)/24000:.1f}s)")
|
||||
35
text_input copy.py
Normal file
35
text_input copy.py
Normal file
@ -0,0 +1,35 @@
|
||||
TEXT = (
|
||||
"The Book of the Nem. "
|
||||
"Another Testament of Jesus Christ. "
|
||||
"From the Nem People. "
|
||||
"Accounts Written by the Hands of Nem Prophets and Recordkeepers. "
|
||||
"Taken from the Written Records of the Nem, the People of the Spirit. "
|
||||
"\n\n"
|
||||
"The Book of the Nem is written to the descendants of the Lamanites of the "
|
||||
"Book of Mormon, who are a remnant of the House of Israel, and also to the "
|
||||
"descendants of the Nem, and to the Gentiles, by way of commandment, and also "
|
||||
"by the spirit of prophecy and of revelation, to the convincing of all that "
|
||||
"Jesus Christ is the Son of God, the Savior of all mankind throughout the world. "
|
||||
"\n\n"
|
||||
"It is a record of the people of Hagoth, the shipbuilder, and the people of "
|
||||
"Corianton, the son of Alma the younger, who traveled into the Land Northward "
|
||||
"to escape the wickedness of the Nephites and secret combinations of the "
|
||||
"Gadianton Robbers who were beginning to spread across the land. It is to show "
|
||||
"unto the remnant of the House of Israel what great things the Lord hath done "
|
||||
"for their fathers and brothers; and that they may know the covenants of the "
|
||||
"Lord, that they are not cast off forever, and also to the convincing of the "
|
||||
"Gentiles that Jesus is the Christ, the Eternal God, manifesting himself unto "
|
||||
"all nations, including the ancestors of the Nem long ago. And now, if there "
|
||||
"are faults in this record, they are the mistakes of men; wherefore, condemn "
|
||||
"not the things of God. Search these records, ponder, and pray that Jesus "
|
||||
"Christ may reveal the truth of it unto you by the power of the Holy Ghost, "
|
||||
"for by the power of the Holy Ghost, ye may know the truth of all things. "
|
||||
"See Moroni chapter ten, verses three through five. "
|
||||
"\n\n"
|
||||
"The purpose of this record is to assist in the gathering of the House of "
|
||||
"Israel, to bring to light the words of Christ given to His other sheep, to "
|
||||
"prepare the Remnant of Joseph to rise up, and the Remnant of Jacob to return; "
|
||||
"that they may fulfill their covenants, build the New Jerusalem, and establish "
|
||||
"Zion in these last days before the return of our Lord and Savior, Jesus "
|
||||
"Christ, in glory."
|
||||
)
|
||||
12
text_input.py
Normal file
12
text_input.py
Normal file
@ -0,0 +1,12 @@
|
||||
TEXT = (
|
||||
"The Book of the Nem. "
|
||||
"Another Testament of Jesus Christ. "
|
||||
"From the Kneephites People. "
|
||||
|
||||
"The purpose of this record is to assist in the gathering of the House of "
|
||||
"Israel, to bring to light the words of Christ given to His other sheep, to "
|
||||
"prepare the Remnant of Joseph to rise up, and the Remnant of Jacob to return; "
|
||||
"that they may fulfill their covenants, build the New Jerusalem, and establish "
|
||||
"Zion in these last days before the return of our Lord and Savior, Jesus "
|
||||
"Christ, in glory."
|
||||
)
|
||||
49
tts_test.py
Normal file
49
tts_test.py
Normal file
@ -0,0 +1,49 @@
|
||||
import torch
|
||||
import soundfile as sf
|
||||
from kokoro import KPipeline
|
||||
|
||||
# ── Device setup ──────────────────────────────────────────────────────────────
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
print(f"Using device: {device}")
|
||||
if device == "cuda":
|
||||
print(f"GPU: {torch.cuda.get_device_name(0)}")
|
||||
|
||||
# ── Test paragraph ─────────────────────────────────────────────────────────────
|
||||
TEXT = (
|
||||
"The world of artificial intelligence is evolving at a remarkable pace. "
|
||||
"Modern language models can now read, write, and even speak with surprising "
|
||||
"clarity and nuance. This audio was generated entirely on a local machine "
|
||||
"using the Kokoro text-to-speech model, running on an NVIDIA RTX 3060 GPU. "
|
||||
"No cloud, no API keys — just raw local compute turning words into sound."
|
||||
)
|
||||
|
||||
# ── Build pipeline ─────────────────────────────────────────────────────────────
|
||||
# lang_code: 'a' = American English, 'b' = British English
|
||||
# voices: af_heart, af_bella, af_nova, am_adam, am_michael, bf_emma, bm_george …
|
||||
pipeline = KPipeline(lang_code="a")
|
||||
|
||||
OUTPUT_FILE = "output.wav"
|
||||
VOICE = "af_heart" # warm American female voice
|
||||
SPEED = 1.0 # 1.0 = normal speed
|
||||
|
||||
# ── Generate audio ─────────────────────────────────────────────────────────────
|
||||
print(f"Generating speech with voice '{VOICE}' …")
|
||||
|
||||
import numpy as np
|
||||
|
||||
audio_chunks = []
|
||||
for _, _, chunk_audio in pipeline(TEXT, voice=VOICE, speed=SPEED):
|
||||
# chunk_audio is a torch.Tensor of shape [N], dtype float32
|
||||
if hasattr(chunk_audio, "numpy"):
|
||||
chunk_audio = chunk_audio.cpu().numpy()
|
||||
chunk_audio = np.atleast_1d(chunk_audio.squeeze())
|
||||
if chunk_audio.size > 0:
|
||||
audio_chunks.append(chunk_audio)
|
||||
|
||||
if audio_chunks:
|
||||
audio = np.concatenate(audio_chunks, axis=0)
|
||||
sf.write(OUTPUT_FILE, audio, 24000)
|
||||
duration = len(audio) / 24000
|
||||
print(f"✓ Saved '{OUTPUT_FILE}' ({duration:.1f}s, 24 kHz)")
|
||||
else:
|
||||
print("No audio generated — check input text.")
|
||||
Reference in New Issue
Block a user