From 224f97d0c68478be96be72f4e3c9b98267b08a2c Mon Sep 17 00:00:00 2001 From: dillonj Date: Mon, 9 Mar 2026 23:36:50 -0600 Subject: [PATCH] prep for win 11 --- README.md | 112 ++++++ create_audiobook_lightbringer.py | 306 ++++++++++++++++ create_audiobook_nem.py | 30 +- create_temple_voices.py | 352 +++++++++++++++++++ output_proper_nouns/correct_words.json | 4 +- output_proper_nouns/pronunciation_fixes.json | 29 +- run_audiobook.bat | 42 +++ run_gui.bat | 21 ++ setup_windows.bat | 86 +++++ 9 files changed, 966 insertions(+), 16 deletions(-) create mode 100644 create_audiobook_lightbringer.py create mode 100644 create_temple_voices.py create mode 100644 run_audiobook.bat create mode 100644 run_gui.bat create mode 100644 setup_windows.bat diff --git a/README.md b/README.md index e69de29..a1106c0 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,112 @@ +# Audiobook Generator — Windows 11 Setup Guide + +This guide is written for someone who has never used Python or the command line. +Follow the steps in order and you'll be generating audiobook chapters with a gaming GPU. + +--- + +## What you'll need + +| Requirement | Why | +|---|---| +| Windows 11 PC with a modern NVIDIA GPU | Fast audio generation using CUDA | +| ~5 GB free disk space | Python, PyTorch, and the TTS model | +| Internet connection (first-time only) | Downloads packages and the AI voice model | + +--- + +## Step 1 — Install Python + +1. Go to **https://www.python.org/downloads/** +2. Click the big yellow **"Download Python 3.11.x"** button +3. Run the installer +4. **IMPORTANT:** On the first screen, tick the box that says **"Add Python to PATH"** before you click Install Now + +If you skipped that checkbox, uninstall Python and reinstall with the box ticked. + +--- + +## Step 2 — Get the project files + +You should have a folder (e.g. `voice_model`) containing the project. Make sure it contains: + +``` +setup_windows.bat +run_gui.bat +run_audiobook.bat +requirements.txt +gui_proper_noun_player.py +create_audiobook_lightbringer.py +Audio Text for Novel Lightbringer\ ← your text files go here +``` + +--- + +## Step 3 — Run Setup (one time only) + +1. Open the `voice_model` folder in File Explorer +2. Double-click **`setup_windows.bat`** +3. A black terminal window will open and run through 5 steps: + - Checks Python is installed + - Creates a private Python environment + - Downloads PyTorch with GPU (CUDA) support — **~2.5 GB, be patient** + - Installs the remaining packages + - Downloads the Kokoro AI voice model — **~330 MB** +4. When it says **"Setup complete!"**, press any key to close + +You only need to do this once. + +--- + +## Step 4 — Launch the GUI (Proper Noun Player) + +1. Double-click **`run_gui.bat`** +2. The Proper Noun Player window opens +3. Use it to review and fix how proper nouns are pronounced before generating audio + +**Controls:** +- Click a word in the Review list to hear it +- Type a phonetic spelling in the box at the bottom and press Enter to save a fix +- Press Enter without changing anything to mark the word as Correct +- Press Space to replay the current word +- Click "Apply Fixes to Text" when done to save a pronunciation-corrected text file + +--- + +## Step 5 — Create the Audiobook + +1. Double-click **`run_audiobook.bat`** +2. A menu appears: + - **1** — Generate ALL chapters (this can take many hours — leave it running overnight) + - **2** — Just list what chapters were detected (safe, instant) + - **3** — Generate a short preview clip of each chapter (quick test) + - **4** — Generate specific chapter numbers only +3. Choose an option and press Enter +4. When finished, the `.wav` files will be in the `output_audiobook_lightbringer` folder + +--- + +## Troubleshooting + +**"Python was not found"** +→ Python is not installed, or you forgot to tick "Add Python to PATH". Reinstall Python. + +**The window opens and immediately closes** +→ Right-click the `.bat` file → "Run as administrator", or open a new terminal window first: +press `Win + R`, type `cmd`, press Enter, then drag the `.bat` file into that window and press Enter. + +**Audio generation is very slow** +→ The GPU (CUDA) version of PyTorch may not have installed correctly. Re-run `setup_windows.bat`. + +**"No .txt files found in Audio Text for Novel Lightbringer"** +→ Make sure your chapter text files are placed in the `Audio Text for Novel Lightbringer` subfolder. + +--- + +## Output files + +| Folder | Contents | +|---|---| +| `output_audiobook_lightbringer\` | One `.wav` file per chapter | +| `output_proper_nouns\` | Pronunciation fix data (JSON) | +| `proper_nouns_audio\` | Cached audio for each proper noun | diff --git a/create_audiobook_lightbringer.py b/create_audiobook_lightbringer.py new file mode 100644 index 0000000..81952fe --- /dev/null +++ b/create_audiobook_lightbringer.py @@ -0,0 +1,306 @@ +""" +create_audiobook_lightbringer.py +───────────────────────────────── +Generate the "A Darkness Rising" audiobook — one file per chapter/prologue. + +Reads all .txt files from NOVEL_DIR, detects Prologue + Chapter headings, +and writes one .wav per chapter into OUTPUT_DIR. + +Usage: + python create_audiobook_lightbringer.py # all chapters + python create_audiobook_lightbringer.py --list # list detected chapters + python create_audiobook_lightbringer.py 0 1 2 # prologue + ch1 + ch2 + python create_audiobook_lightbringer.py --preview # short preview clips + +Output filenames: + chapter_00_prologue.wav + chapter_01_homecoming.wav + chapter_02_the_anhuil_ehlar.wav + ... +""" + +import argparse +import re +import time +import numpy as np +import soundfile as sf +import torch +from pathlib import Path +from kokoro import KPipeline + +# ── Config ───────────────────────────────────────────────────────────────────── +NOVEL_DIR = Path("Audio Text for Novel Lightbringer") +OUTPUT_DIR = Path("output_audiobook_lightbringer") +SAMPLE_RATE = 24000 +SPEED = 1.0 +LANG_CODE = "a" # American English +VOICE = "am_onyx" # default narrator voice + +# Regex that matches a chapter/prologue heading line (case-insensitive). +# Group 1 captures the chapter number (or None for Prologue). +# Group 2 captures the optional subtitle after " - ". +_HEADING_RE = re.compile( + r"^(?:Chapter\s+(\d+)\s*(?:-\s*(.+))?|(Prologue))\s*$", + re.IGNORECASE, +) + + +# ── Helpers ──────────────────────────────────────────────────────────────────── + +def _slug(text: str) -> str: + """Convert title text to a filesystem-safe slug.""" + text = text.lower() + text = re.sub(r"[^a-z0-9]+", "_", text) + return text.strip("_") + + +def load_all_chapters(novel_dir: Path) -> list[dict]: + """ + Read all .txt files in *novel_dir* in sorted order, detect Prologue / + Chapter headings, and return a list of chapter dicts: + { + "num": int, # 0 = Prologue + "title": str, # subtitle portion, e.g. "Homecoming" + "label": str, # human label, e.g. "Chapter 1 - Homecoming" + "slug": str, # e.g. "chapter_01_homecoming" + "text": str, # full body text of the chapter + } + Chapters from multiple files are concatenated in sorted-filename order. + """ + txt_files = sorted(novel_dir.glob("*.txt")) + if not txt_files: + raise FileNotFoundError(f"No .txt files found in '{novel_dir}'") + + # Collect (chapter_num, title_line, body_lines) across all files + raw: list[tuple[int, str, list[str]]] = [] # (num, heading_text, body) + current_num: int | None = None + current_heading: str = "" + current_body: list[str] = [] + + def _flush(): + if current_num is not None: + raw.append((current_num, current_heading, list(current_body))) + + for fpath in txt_files: + lines = fpath.read_text(encoding="utf-8").splitlines() + for line in lines: + m = _HEADING_RE.match(line.strip()) + if m: + _flush() + if m.group(3): # Prologue + current_num = 0 + current_heading = "Prologue" + else: # Chapter N + current_num = int(m.group(1)) + subtitle = (m.group(2) or "").strip() + current_heading = f"Chapter {current_num}" + (f" - {subtitle}" if subtitle else "") + current_body = [line] # keep heading inside text + else: + if current_num is not None: + current_body.append(line) + _flush() + + # Build chapter dicts, deduplicated and sorted by number + seen: set[int] = set() + chapters: list[dict] = [] + for num, heading, body in sorted(raw, key=lambda x: x[0]): + if num in seen: + continue + seen.add(num) + # Derive subtitle / slug + subtitle = "" + sm = re.match(r"Chapter\s+\d+\s*-\s*(.+)", heading, re.IGNORECASE) + if sm: + subtitle = sm.group(1).strip() + elif heading.lower() == "prologue": + subtitle = "Prologue" + + num_str = f"{num:02d}" + if subtitle: + slug = f"chapter_{num_str}_{_slug(subtitle)}" + else: + slug = f"chapter_{num_str}" + + chapters.append({ + "num": num, + "title": subtitle or heading, + "label": heading, + "slug": slug, + "text": "\n".join(body), + }) + + return chapters + + +def clean_text(text: str) -> str: + """Strip formatting artifacts and normalise whitespace for TTS.""" + # Remove horizontal-rule lines (underscores / asterisks / dashes) + text = re.sub(r"^[_\-\*\s]{3,}\s*$", "", text, flags=re.MULTILINE) + # Collapse 3+ blank lines to 2 + text = re.sub(r"\n{3,}", "\n\n", text) + return text.strip() + + +def _fmt_duration(seconds: float) -> str: + if seconds >= 60: + m, s = divmod(int(seconds), 60) + return f"{m}m {s:02d}s" + return f"{seconds:.0f}s" + + +def generate_audio(pipeline: KPipeline, text: str, voice: str, + output_path: Path) -> float: + """Generate audio and return wall-clock seconds elapsed.""" + t0 = time.monotonic() + chunks = [] + for _, _, chunk_audio in pipeline(text, voice=voice, speed=SPEED): + if hasattr(chunk_audio, "numpy"): + chunk_audio = chunk_audio.cpu().numpy() + chunk_audio = np.atleast_1d(chunk_audio.squeeze()) + if chunk_audio.size > 0: + chunks.append(chunk_audio) + + elapsed = time.monotonic() - t0 + if chunks: + audio = np.concatenate(chunks, axis=0) + sf.write(str(output_path), audio, SAMPLE_RATE) + duration = len(audio) / SAMPLE_RATE + print(f" ✓ Saved '{output_path.name}' " + f"({duration:.1f}s audio | {elapsed:.1f}s wall-clock)") + else: + print(f" ✗ No audio produced for voice='{voice}'") + return elapsed + + +# ── Main ─────────────────────────────────────────────────────────────────────── + +def main() -> None: + parser = argparse.ArgumentParser( + description="Generate 'A Darkness Rising' audiobook, one file per chapter." + ) + parser.add_argument( + "chapters", nargs="*", type=int, + help="Chapter numbers to generate (0 = Prologue). Default: all.", + ) + parser.add_argument( + "--list", action="store_true", + help="Print detected chapters and exit.", + ) + parser.add_argument( + "--voice", default=VOICE, + help=f"Kokoro voice to use (default: {VOICE}).", + ) + parser.add_argument( + "--preview", nargs="?", const=3000, type=int, metavar="CHARS", + help="Generate short preview clips (default: 3000 chars). " + "Output filenames get a _preview suffix.", + ) + args = parser.parse_args() + + print("Loading chapters …") + all_chapters = load_all_chapters(NOVEL_DIR) + + if args.list: + print(f"\nDetected {len(all_chapters)} chapters:\n") + print(f" {'#':>4} {'Label':<45} {'Chars':>8} {'Output filename'}") + print(f" {'─'*4} {'─'*45} {'─'*8} {'─'*30}") + for ch in all_chapters: + chars = len(clean_text(ch["text"])) + print(f" {ch['num']:>4} {ch['label']:<45} {chars:>8,} {ch['slug']}.wav") + return + + # Filter to requested subset + if args.chapters: + requested = set(args.chapters) + run_chapters = [ch for ch in all_chapters if ch["num"] in requested] + missing = requested - {ch["num"] for ch in run_chapters} + if missing: + print(f"⚠ Chapter(s) not found: {sorted(missing)}") + else: + run_chapters = all_chapters + + if not run_chapters: + print("No chapters selected. Use --list to see available chapters.") + return + + voice = args.voice + device = "cuda" if torch.cuda.is_available() else "cpu" + print(f"Device: {device}") + if device == "cuda": + print(f"GPU: {torch.cuda.get_device_name(0)}") + print(f"Voice: {voice}") + + OUTPUT_DIR.mkdir(exist_ok=True) + + # Pre-compute char counts + chapter_chars = {ch["num"]: len(clean_text(ch["text"])) for ch in run_chapters} + + preview_note = (f" ⚡ PREVIEW MODE — capped at {args.preview:,} chars/chapter\n" + if args.preview else "") + print(f"\n{preview_note}{'─'*65}") + print(f" {'#':>4} {'Label':<40} {'Chars':>8}") + print(f" {'─'*4} {'─'*40} {'─'*8}") + for ch in run_chapters: + print(f" {ch['num']:>4} {ch['label']:<40} {chapter_chars[ch['num']]:>8,}") + print(f" {'─'*55}") + total_chars = sum(chapter_chars.values()) + print(f" {'TOTAL':<45} {total_chars:>8,}\n") + + print("Initialising Kokoro pipeline …") + pipeline = KPipeline(lang_code=LANG_CODE) + + chars_per_sec: float | None = None + timing_rows: list[tuple[str, int, float]] = [] + + for ch in run_chapters: + text = clean_text(ch["text"]) + if not text: + print(f"\n[{ch['label']}] ⚠ Empty text — skipping") + continue + + preview_chars = args.preview + if preview_chars and len(text) > preview_chars: + cut = text.rfind(" ", 0, preview_chars) + text = text[: cut if cut > 0 else preview_chars] + + chars = len(text) + preview_tag = "_preview" if args.preview else "" + out_path = OUTPUT_DIR / f"{ch['slug']}{preview_tag}.wav" + + if chars_per_sec is not None: + eta_str = _fmt_duration(chars / chars_per_sec) + print(f"\n[{ch['label']}] voice={voice} → {out_path.name} (est. {eta_str})") + else: + print(f"\n[{ch['label']}] voice={voice} → {out_path.name} (calibration run)") + + elapsed = generate_audio(pipeline, text, voice, out_path) + timing_rows.append((ch["label"], chars, elapsed)) + + total_done = sum(c for _, c, _ in timing_rows) + total_elapsed_done = sum(e for _, _, e in timing_rows) + if total_elapsed_done > 0: + chars_per_sec = total_done / total_elapsed_done + print(f" ⏱ Calibration: {chars_per_sec:.0f} chars/sec") + + # Summary + print("\n" + "─" * 65) + print(f" {'Chapter':<35} {'Chars':>7} {'Actual':>8} {'Est':>8}") + print("─" * 65) + for i, (label, chars, elapsed) in enumerate(timing_rows): + actual_str = _fmt_duration(elapsed) + prior_chars = sum(c for _, c, _ in timing_rows[:i]) + prior_elapsed = sum(e for _, _, e in timing_rows[:i]) + if prior_elapsed > 0: + est_str = _fmt_duration(chars / (prior_chars / prior_elapsed)) + else: + est_str = "(first)" + print(f" {label:<35} {chars:>7,} {actual_str:>8} {est_str:>8}") + total_elapsed = sum(e for _, _, e in timing_rows) + print("─" * 65) + print(f" {'TOTAL':<35} {sum(c for _,c,_ in timing_rows):>7,} " + f"{_fmt_duration(total_elapsed):>8}") + print("\nDone.") + + +if __name__ == "__main__": + main() diff --git a/create_audiobook_nem.py b/create_audiobook_nem.py index 41fa1ea..c74f81d 100644 --- a/create_audiobook_nem.py +++ b/create_audiobook_nem.py @@ -33,8 +33,12 @@ SPEED = 1.0 LANG_CODE = "a" # 'a' = American English # ── Available Kokoro voices (American English, lang_code='a') ────────────────── -# af_heart – warm American female [downloaded] +# af_bella – American female [downloaded] +# af_heart – warm American female [downloaded] # af_nicole – American female [downloaded] +# af_river – American female [downloaded] +# af_sarah – American female [downloaded] +# af_sky – American female [downloaded] # am_adam – American male (deep) [downloaded] # am_echo – American male [downloaded] # am_eric – American male [downloaded] @@ -56,7 +60,7 @@ LANG_CODE = "a" # 'a' = American English BOOKS = [ # label (start_line1, start_line2) voice output_wav ("Introduction", ("Introduction", "The Book of the Nem"), "af_heart", "00_introduction.wav"), - ("Book of Hagoth", ("THE BOOK OF HAGOTH", "THE SON OF HAGMENI,"), "am_fenrir", "01_hagoth.wav"), + ("Book of Hagoth", ("THE BOOK OF HAGOTH", "THE SON OF HAGMENI,"), "am_santa", "01_hagoth.wav"), ("Shi-Tugo I", ("THE FIRST BOOK OF SHI-TUGO", "FORMER WARRIOR, AMMONITE"), "am_eric", "02_shi_tugo_1.wav"), ("Sanempet", ("THE BOOK OF SANEMPET", "THE SON OF HAGMENI,"), "am_liam", "03_sanempet.wav"), ("Oug", ("THE BOOK OF OUG", "THE SON OF SANEMPET"), "am_michael", "04_oug.wav"), @@ -65,7 +69,7 @@ BOOKS = [ ("Samuel the Lamanite I", ("THE FIRST BOOK", "OF SAMUEL THE LAMANITE"), "am_echo", "07_samuel_lamanite_1.wav"), ("Samuel the Lamanite II", ("THE SECOND BOOK", "OF SAMUEL THE LAMANITE"), "am_echo", "08_samuel_lamanite_2.wav"), ("Manti", ("THE BOOK OF MANTI", "THE SON OF OUG"), "am_onyx", "09_manti.wav"), - ("Pa Nat I", ("THE FIRST BOOK OF PA NAT", "THE DAUGHTER OF SHIMLEI"), "af_nicole", "10_pa_nat_1.wav"), + ("Pa Nat I", ("THE FIRST BOOK OF PA NAT", "THE DAUGHTER OF SHIMLEI"), "af_bella", "10_pa_nat_1.wav"), ("Moroni I", ("THE FIRST BOOK OF MORONI", "THE SON OF MORMON,"), "am_adam", "11_moroni_1.wav"), ("Moroni II", ("THE SECOND BOOK OF MORONI", "THE SON OF MORMON,"), "am_adam", "12_moroni_2.wav"), ("Moroni III", ("THE THIRD BOOK OF MORONI", "THE SON OF MORMON,"), "am_adam", "13_moroni_3.wav"), @@ -183,6 +187,11 @@ def main() -> None: "--list", action="store_true", help="Print all enabled book labels and exit." ) + parser.add_argument( + "--preview", nargs="?", const=3000, type=int, metavar="CHARS", + help="Generate a short preview clip per book (default: 3000 chars). " + "Output filenames get a _preview suffix." + ) args = parser.parse_args() enabled_labels = [label for label, _, _, _ in BOOKS] @@ -230,7 +239,8 @@ def main() -> None: } # Print char count summary before starting - print(f"\n{'─' * 52}") + preview_note = f" ⚡ PREVIEW MODE — capped at {args.preview:,} chars/book\n" if args.preview else "" + print(f"\n{preview_note}{'─' * 52}") print(f" {'Section':<30} {'Chars':>8}") print(f"{'─' * 52}") for label, _, _, wav_name in run_books: @@ -253,7 +263,14 @@ def main() -> None: print(f"\n[{label}] ⚠ Empty text — skipping") continue - chars = section_chars[label] + # Preview mode: truncate to requested char limit at a word boundary + preview_chars = args.preview + if preview_chars: + if len(text) > preview_chars: + cut = text.rfind(" ", 0, preview_chars) + text = text[: cut if cut > 0 else preview_chars] + + chars = len(text) # Print ETA once we have a calibration rate if chars_per_sec is not None: @@ -264,7 +281,8 @@ def main() -> None: print(f"\n[{label}] voice={voice} → {wav_name} (timing calibration run)") stem, ext = wav_name.rsplit(".", 1) - out_path = OUTPUT_DIR / f"{stem}_{voice}.{ext}" + preview_tag = "_preview" if preview_chars else "" + out_path = OUTPUT_DIR / f"{stem}_{voice}{preview_tag}.{ext}" elapsed = generate_audio(pipeline, text, voice, out_path) timing_rows.append((label, chars, elapsed)) diff --git a/create_temple_voices.py b/create_temple_voices.py new file mode 100644 index 0000000..957e838 --- /dev/null +++ b/create_temple_voices.py @@ -0,0 +1,352 @@ +""" +create_temple_voices.py +──────────────────────── +Generate the "Sacred Temple Writings" section of the Nem audiobook using one +distinct Microsoft Edge neural TTS voice per character (NOT Kokoro). + +Uses the free edge-tts library which streams Microsoft Azure neural voices. +Audio is stitched into a single WAV and saved to OUTPUT_DIR. + +Usage: + python create_temple_voices.py # full render + python create_temple_voices.py --preview 40 # first 40 segments only + python create_temple_voices.py --print-segments # inspect parsed segments + python create_temple_voices.py --list-voices # list available en voices + +Voice assignments live in CHARACTER_VOICES below — easy to customise. +Run --list-voices to discover all available edge-tts voice names. +""" + +import argparse +import asyncio +import re +import subprocess +import time +from collections import Counter +from pathlib import Path + +import numpy as np +import soundfile as sf +import edge_tts + +# ── File / output config ─────────────────────────────────────────────────────── +_FIXED_FILE = Path("Audio Master Nem Full (TTS Fixed).txt") +_ORIG_FILE = Path("Audio Master Nem Full.txt") +SOURCE_FILE = _FIXED_FILE if _FIXED_FILE.exists() else _ORIG_FILE + +OUTPUT_DIR = Path("output_temple_voices") +OUTPUT_FILE = "sacred_temple_writings_multivoice.wav" + +SAMPLE_RATE = 24_000 # Hz — final WAV sample rate +PAUSE_SAME = 350 # ms silence between same-speaker segments +PAUSE_CHANGE = 650 # ms silence between different-speaker segments + +# ── Section boundary markers (match create_audiobook_nem.py BOOKS order) ────── +# Sacred Temple Writings starts at "THE SACRED" / "TEMPLE WRITINGS" +# and ends just before "THE FIRST BOOK" / "OF SAMUEL THE LAMANITE" +_SEC_START_L1 = "THE SACRED" +_SEC_START_L2 = "TEMPLE WRITINGS" +_SEC_END_L1 = "THE FIRST BOOK" +_SEC_END_L2 = "OF SAMUEL THE LAMANITE" + +# ── Character → edge-tts voice ──────────────────────────────────────────────── +# Run python create_temple_voices.py --list-voices to see all available voices. +# Keys must match the speaker labels exactly as they appear in the source file. +CHARACTER_VOICES: dict[str, str] = { + # ── Celestial beings ─────────────────────────────────────────────────────── + "Narrator": "en-US-GuyNeural", # calm neutral narrator + "Elohim Heavenly Mother": "en-US-JennyNeural", # warm, wise matriarch + "Elohim Heavenly Father": "en-US-AndrewMultilingualNeural", # expressive, authoritative + "Jehovah": "en-US-AndrewNeural", # clear, gentle divine + "Angel of the Lord": "en-US-BrianNeural", # ethereal divine messenger + "Holy Ghost": "en-US-EricNeural", # quiet, inward, spiritual + "Holy Ghost Elders": "en-US-BrianNeural", # measured elder council + + # ── Dark beings ──────────────────────────────────────────────────────────── + "Lucifer": "en-CA-LiamNeural", # smooth, persuasive tempter + "Satan": "en-US-SteffanNeural", # cold, commanding adversary + + # ── Mortal / earth characters ────────────────────────────────────────────── + "Michael": "en-US-RogerNeural", # noble warrior archangel + "Adam": "en-US-ChristopherNeural", # earnest first man + "Eve": "en-US-AriaNeural", # curious, warm first woman + + # ── Apostles ─────────────────────────────────────────────────────────────── + "Peter": "en-GB-RyanNeural", # firm British apostle + "James": "en-AU-WilliamMultilingualNeural", # steady Australian voice + "John": "en-IE-ConnorNeural", # gentle Irish apostle + + # ── Other roles ──────────────────────────────────────────────────────────── + "Preacher": "en-US-AvaNeural", # bold emphatic preacher + "Mob": "en-US-MichelleNeural", # crowd / multitude voice + "The Voice of the Mob": "en-US-MichelleNeural", # alias used in some editions +} + +# Voice used when a speaker label isn't found in CHARACTER_VOICES +FALLBACK_VOICE = "en-US-GuyNeural" + +# Lines/patterns that are ceremony stage-directions → read by Narrator +_STAGE_NARRATOR = re.compile( + r"^(Break for Instruction|Resume Session|All\s+arise|" + r"CHAPTER\s*\d*|________________+|────+)", + re.IGNORECASE, +) + +# Lines to skip entirely (decorative / empty) +_SKIP_RE = re.compile(r"^[—\-_\s\u2014\u2013]*$") + + +# ── Section extraction ───────────────────────────────────────────────────────── + +def extract_section(source: Path) -> str: + """Return text of the Sacred Temple Writings section.""" + lines = source.read_text(encoding="utf-8").splitlines() + in_sec = False + out: list[str] = [] + + for i, line in enumerate(lines): + s = line.strip() + if not in_sec: + if (s.upper() == _SEC_START_L1 and + i + 1 < len(lines) and + lines[i + 1].strip().upper().startswith(_SEC_START_L2)): + in_sec = True + else: + # End just before the next section + if (s.upper() == _SEC_END_L1 and + i + 1 < len(lines) and + lines[i + 1].strip().upper().startswith(_SEC_END_L2)): + break + out.append(line) + + if not out: + raise RuntimeError( + f"Could not locate 'Sacred Temple Writings' in '{source}'.\n" + "Ensure the source file has a line exactly matching " + f"'{_SEC_START_L1}' followed by '{_SEC_START_L2}'." + ) + return "\n".join(out) + + +# ── Segment parser ───────────────────────────────────────────────────────────── + +def _speaker_regex(characters: list[str]) -> re.Pattern: + """Regex matching [optional-number] CharacterName: text""" + # Sort longest-first so "Holy Ghost Elders" matches before "Holy Ghost" + names = sorted(characters, key=len, reverse=True) + pat = "|".join(re.escape(n) for n in names) + return re.compile(r"^\d*\s*(" + pat + r")\s*:\s*(.*)", re.IGNORECASE) + + +def parse_segments(text: str) -> list[tuple[str, str]]: + """ + Convert section text into a list of (normalised_speaker, spoken_text) tuples. + Non-attributed prose becomes Narrator lines. + """ + char_re = _speaker_regex(list(CHARACTER_VOICES.keys())) + + # Build a quick lowercase→canonical lookup for speaker name normalisation + canon: dict[str, str] = {k.lower(): k for k in CHARACTER_VOICES} + + segments: list[tuple[str, str]] = [] + cur_speaker = "Narrator" + buf: list[str] = [] + + def flush() -> None: + combined = " ".join(l.strip() for l in buf if l.strip()) + if combined: + segments.append((cur_speaker, combined)) + buf.clear() + + for raw in text.splitlines(): + line = raw.strip() + + if not line or _SKIP_RE.match(line): + continue + + # Stage direction → Narrator reads it + if _STAGE_NARRATOR.match(line): + flush() + cur_speaker = "Narrator" + buf.append(line) + continue + + # "The words of Jehovah … are in blue." — formatting note, skip + if re.search(r"are in blue|words of jehovah", line, re.IGNORECASE): + continue + + m = char_re.match(line) + if m: + flush() + raw_name = m.group(1) + cur_speaker = canon.get(raw_name.lower(), raw_name) + spoken = m.group(2).strip() + if spoken: + buf.append(spoken) + else: + # Continuation of current speaker (or unattributed narrator prose) + buf.append(line) + + flush() + return segments + + +# ── Audio generation ─────────────────────────────────────────────────────────── + +async def _tts_bytes(text: str, voice: str) -> bytes: + """Stream edge-tts and return raw MP3 bytes.""" + communicate = edge_tts.Communicate(text, voice) + data = bytearray() + async for chunk in communicate.stream(): + if chunk["type"] == "audio": + data.extend(chunk["data"]) + return bytes(data) + + +def _mp3_to_numpy(mp3: bytes) -> np.ndarray: + """Decode MP3 bytes → mono float32 numpy array at SAMPLE_RATE using ffmpeg.""" + cmd = [ + "ffmpeg", "-hide_banner", "-loglevel", "error", + "-i", "pipe:0", # read MP3 from stdin + "-f", "f32le", # raw 32-bit little-endian float PCM + "-acodec", "pcm_f32le", + "-ac", "1", # mono + "-ar", str(SAMPLE_RATE), # resample to target rate + "pipe:1", # write PCM to stdout + ] + result = subprocess.run(cmd, input=mp3, capture_output=True, check=True) + return np.frombuffer(result.stdout, dtype=np.float32).copy() + + +def _silence(ms: int) -> np.ndarray: + return np.zeros(int(SAMPLE_RATE * ms / 1000), dtype=np.float32) + + +async def render( + segments: list[tuple[str, str]], + preview: int | None = None, +) -> np.ndarray: + """Generate and stitch all segment audio; return concatenated float32 array.""" + if preview is not None: + segments = segments[:preview] + + parts: list[np.ndarray] = [] + last_speaker: str | None = None + t0 = time.monotonic() + + for idx, (speaker, text) in enumerate(segments, 1): + voice = CHARACTER_VOICES.get(speaker, FALLBACK_VOICE) + marker = "⚠" if speaker not in CHARACTER_VOICES else " " + print(f" {marker}[{idx:>4}/{len(segments)}] {speaker:<28} {voice}") + + try: + mp3 = await _tts_bytes(text, voice) + except Exception as exc: + print(f" ↳ ERROR with '{voice}': {exc} — falling back to {FALLBACK_VOICE}") + mp3 = await _tts_bytes(text, FALLBACK_VOICE) + + audio = _mp3_to_numpy(mp3) + + if parts: + gap = PAUSE_SAME if speaker == last_speaker else PAUSE_CHANGE + parts.append(_silence(gap)) + parts.append(audio) + last_speaker = speaker + + elapsed = time.monotonic() - t0 + print(f"\n ✓ {len(segments)} segments in {elapsed:.0f}s") + return np.concatenate(parts) if parts else np.array([], dtype=np.float32) + + +# ── Voice listing ────────────────────────────────────────────────────────────── + +async def _list_voices_async() -> None: + voices = await edge_tts.list_voices() + english = sorted( + (v for v in voices if v["Locale"].startswith("en-")), + key=lambda v: (v["Locale"], v["ShortName"]), + ) + print(f"\n {'Locale':<12} {'Short Name':<45} Gender") + print(" " + "─" * 68) + for v in english: + print(f" {v['Locale']:<12} {v['ShortName']:<45} {v['Gender']}") + print(f"\n {len(english)} English voices total.") + + +# ── CLI / main ───────────────────────────────────────────────────────────────── + +def main() -> None: + ap = argparse.ArgumentParser( + description="Render Sacred Temple Writings with per-character edge-tts voices." + ) + ap.add_argument("--list-voices", action="store_true", + help="Print all available English edge-tts voices and exit.") + ap.add_argument("--print-segments", action="store_true", + help="Print parsed (speaker, text) segments and exit.") + ap.add_argument("--preview", type=int, metavar="N", + help="Render only the first N segments (quick test).") + args = ap.parse_args() + + if args.list_voices: + asyncio.run(_list_voices_async()) + return + + # ── Extract & parse ──────────────────────────────────────────────────────── + print(f"Source : {SOURCE_FILE}") + text = extract_section(SOURCE_FILE) + print(f"Section: {len(text):,} chars extracted\n") + + segments = parse_segments(text) + + if args.print_segments: + print(f"Parsed {len(segments)} segments:\n") + for i, (spkr, txt) in enumerate(segments, 1): + snippet = txt[:90] + ("…" if len(txt) > 90 else "") + voice = CHARACTER_VOICES.get(spkr, f"{FALLBACK_VOICE} ⚠") + print(f" {i:>4}. [{spkr}] ({voice})\n {snippet}\n") + return + + # ── Summary table ────────────────────────────────────────────────────────── + counts = Counter(s for s, _ in segments) + unrecognised = {s for s in counts if s not in CHARACTER_VOICES} + + print(f"Parsed {len(segments)} segments across {len(counts)} speakers:\n") + print(f" {'Speaker':<28} {'Segs':>5} {'Voice'}") + print(f" {'─'*28} {'─'*5} {'─'*45}") + for spkr, voice in CHARACTER_VOICES.items(): + if counts[spkr]: + print(f" {spkr:<28} {counts[spkr]:>5} {voice}") + for spkr in sorted(unrecognised): + print(f" {spkr:<28} {counts[spkr]:>5} {FALLBACK_VOICE} ⚠ unrecognised") + + total_chars = sum(len(t) for _, t in segments) + print(f"\n Total chars: {total_chars:,}") + if args.preview: + print(f" ⚡ PREVIEW MODE — rendering first {args.preview} segments only") + + # ── GPU note ─────────────────────────────────────────────────────────────── + # edge-tts is cloud-based (Microsoft Azure neural, free) — GPU not used. + print("\nNote: edge-tts uses Microsoft's servers (free, no API key needed).\n" + " Render speed depends on your internet connection.\n") + + # ── Render ───────────────────────────────────────────────────────────────── + OUTPUT_DIR.mkdir(exist_ok=True) + out_path = OUTPUT_DIR / ( + f"sacred_temple_writings_preview{args.preview}.wav" + if args.preview else OUTPUT_FILE + ) + + print("Rendering segments …\n") + audio = asyncio.run(render(segments, args.preview)) + + if audio.size > 0: + sf.write(str(out_path), audio, SAMPLE_RATE) + dur = len(audio) / SAMPLE_RATE + m, s = divmod(int(dur), 60) + print(f"\n✓ Saved '{out_path}' ({m}m {s:02d}s audio | {SAMPLE_RATE} Hz)") + else: + print("✗ No audio produced — check parsing with --print-segments") + + +if __name__ == "__main__": + main() diff --git a/output_proper_nouns/correct_words.json b/output_proper_nouns/correct_words.json index 4a6f93f..174005b 100644 --- a/output_proper_nouns/correct_words.json +++ b/output_proper_nouns/correct_words.json @@ -1,4 +1,7 @@ [ + "Hagar", + "Ammonite", + "Seth", "Ninety-Two", "Gilgal", "Nat", @@ -107,7 +110,6 @@ "Ninety", "Nemenha", "Nem", - "Lord'S", "Levitical", "Obedience", "Consecration", diff --git a/output_proper_nouns/pronunciation_fixes.json b/output_proper_nouns/pronunciation_fixes.json index a405c87..a2be557 100644 --- a/output_proper_nouns/pronunciation_fixes.json +++ b/output_proper_nouns/pronunciation_fixes.json @@ -6,19 +6,30 @@ "Lehis": "Leehis", "Lehies": "Leehis", "Liahona": "Leeahona", - "Alma": "Al-ma", "Gadiantons": "Gadeeantuns", "Laban": "Layban", "Mosiah": "Moziah", "Nehors": "Kneehores", "Tarry": "Tarery", - "Nephihah": "Kneefihah", - "Nephihet": "Kneefihet", - "Nephite": "Kneefite", "Nephites": "Kneefites", - "Nephi-Im": "Kneefi-Im", - "Nephitish": "Kneefitish", - "Zenephi": "Zekneefi", - "Moroni": "Mor-oh-nye", - "Nephi": "Knee-fye" + "Anti-Nephi-Lehies": "Anti-Kneef-eye-Leehis", + "Lamanite": "Laymanite", + "Lamanites": "Laymanites", + "Lamb'S": "Lamb's", + "Sarai": "Sa-rye", + "Telestial": "Tea-lestial", + "Lord'S": "Lord's", + "Helaman": "He-la-mun", + "Alma": "Al-ma", + "Nephihah": "Kneef-eyehah", + "Nephihet": "Kneef-eyehet", + "Nephite": "Kneefight", + "Nephi-Im": "Kneef-eye-Im", + "Zenephi": "Ze-kneef-eye", + "Nephitish": "Kneefight-ish", + "Moroni": "Moh-roh-nye", + "Nephi": "Knee-fye", + "Hagar": "Hag-ar", + "Oug": "Ohg", + "Ougan": "Ohgan" } \ No newline at end of file diff --git a/run_audiobook.bat b/run_audiobook.bat new file mode 100644 index 0000000..ce938fc --- /dev/null +++ b/run_audiobook.bat @@ -0,0 +1,42 @@ +@echo off +title Create Audiobook + +:: Change to the folder this .bat file lives in +cd /d "%~dp0" + +:: Check setup has been run +if not exist .venv\Scripts\python.exe ( + echo ERROR: Setup has not been run yet. + echo Please double-click setup_windows.bat first. + pause + exit /b 1 +) + +echo ============================================================ +echo Audiobook Creator +echo ============================================================ +echo. +echo Options: +echo 1 - Generate ALL chapters (may take many hours) +echo 2 - List detected chapters only +echo 3 - Generate a short PREVIEW of each chapter +echo 4 - Generate specific chapters (enter numbers next) +echo. +set /p CHOICE="Enter choice (1/2/3/4): " + +if "%CHOICE%"=="1" ( + .venv\Scripts\python create_audiobook_lightbringer.py +) else if "%CHOICE%"=="2" ( + .venv\Scripts\python create_audiobook_lightbringer.py --list +) else if "%CHOICE%"=="3" ( + .venv\Scripts\python create_audiobook_lightbringer.py --preview +) else if "%CHOICE%"=="4" ( + set /p CHAPTERS="Enter chapter numbers separated by spaces (e.g. 0 1 2): " + .venv\Scripts\python create_audiobook_lightbringer.py %CHAPTERS% +) else ( + echo Invalid choice. +) + +echo. +echo Done. Output files are in the output_audiobook_lightbringer folder. +pause diff --git a/run_gui.bat b/run_gui.bat new file mode 100644 index 0000000..b9060b3 --- /dev/null +++ b/run_gui.bat @@ -0,0 +1,21 @@ +@echo off +title Proper Noun GUI + +:: Change to the folder this .bat file lives in +cd /d "%~dp0" + +:: Check setup has been run +if not exist .venv\Scripts\python.exe ( + echo ERROR: Setup has not been run yet. + echo Please double-click setup_windows.bat first. + pause + exit /b 1 +) + +echo Starting Proper Noun Player GUI... +.venv\Scripts\python gui_proper_noun_player.py +if errorlevel 1 ( + echo. + echo The application closed with an error. See message above. + pause +) diff --git a/setup_windows.bat b/setup_windows.bat new file mode 100644 index 0000000..d18e324 --- /dev/null +++ b/setup_windows.bat @@ -0,0 +1,86 @@ +@echo off +setlocal EnableDelayedExpansion +title Audiobook Setup + +echo ============================================================ +echo Audiobook Setup for Windows 11 +echo ============================================================ +echo. + +:: ── 1. Check Python ────────────────────────────────────────────────────────── +echo [1/5] Checking Python installation... +python --version >nul 2>&1 +if errorlevel 1 ( + echo. + echo ERROR: Python was not found. + echo. + echo Please install Python 3.11 from https://www.python.org/downloads/ + echo IMPORTANT: On the installer, tick "Add Python to PATH" before clicking Install. + echo. + echo After installing, close this window and double-click setup_windows.bat again. + pause + exit /b 1 +) + +for /f "tokens=2 delims= " %%v in ('python --version 2^>^&1') do set PY_VER=%%v +echo Found Python %PY_VER% +echo. + +:: ── 2. Create virtual environment ──────────────────────────────────────────── +echo [2/5] Creating virtual environment (.venv)... +if exist .venv ( + echo .venv already exists, skipping creation. +) else ( + python -m venv .venv + if errorlevel 1 ( + echo ERROR: Failed to create virtual environment. + pause + exit /b 1 + ) + echo Virtual environment created. +) +echo. + +:: ── 3. Install PyTorch with CUDA (for gaming GPU) ──────────────────────────── +echo [3/5] Installing PyTorch with CUDA 12.4 support (this may take a while)... +echo Downloading ~2.5 GB — please be patient. +echo. +.venv\Scripts\pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124 +if errorlevel 1 ( + echo. + echo WARNING: CUDA build failed. Falling back to CPU-only PyTorch. + echo Audio generation will be slower but will still work. + .venv\Scripts\pip install torch +) +echo. + +:: ── 4. Install remaining packages ──────────────────────────────────────────── +echo [4/5] Installing remaining packages (kokoro, soundfile, sounddevice)... +.venv\Scripts\pip install -r requirements.txt +if errorlevel 1 ( + echo ERROR: Package installation failed. Check your internet connection. + pause + exit /b 1 +) +echo. + +:: ── 5. Download the Kokoro TTS model ───────────────────────────────────────── +echo [5/5] Downloading the Kokoro TTS model (hexgrad/Kokoro-82M, ~330 MB)... +echo This only happens once. +echo. +.venv\Scripts\python -c "from kokoro import KPipeline; KPipeline(lang_code='a', repo_id='hexgrad/Kokoro-82M'); print('Model ready.')" +if errorlevel 1 ( + echo. + echo WARNING: Model download failed. It will retry the first time you run the app. + echo Make sure you have an internet connection on first launch. +) + +echo. +echo ============================================================ +echo Setup complete! +echo. +echo To launch the GUI: double-click run_gui.bat +echo To create the audiobook: double-click run_audiobook.bat +echo ============================================================ +echo. +pause