""" create_audiobook_lightbringer.py ───────────────────────────────── Generate the "A Darkness Rising" audiobook — one file per chapter/prologue. Reads all .txt files from NOVEL_DIR, detects Prologue + Chapter headings, and writes one .wav per chapter into OUTPUT_DIR. Usage: python create_audiobook_lightbringer.py # all chapters python create_audiobook_lightbringer.py --list # list detected chapters python create_audiobook_lightbringer.py 0 1 2 # prologue + ch1 + ch2 python create_audiobook_lightbringer.py --preview # short preview clips Output filenames: chapter_00_prologue.wav chapter_01_homecoming.wav chapter_02_the_anhuil_ehlar.wav ... """ import argparse import re import time import numpy as np import soundfile as sf import torch from pathlib import Path from kokoro import KPipeline # ── Config ───────────────────────────────────────────────────────────────────── NOVEL_DIR = Path("Audio Text for Novel Lightbringer") OUTPUT_DIR = Path("output_audiobook_lightbringer") SAMPLE_RATE = 24000 SPEED = 1.0 LANG_CODE = "a" # American English VOICE = "am_onyx" # default narrator voice # Regex that matches a chapter/prologue heading line (case-insensitive). # Group 1 captures the chapter number (or None for Prologue). # Group 2 captures the optional subtitle after " - ". _HEADING_RE = re.compile( r"^(?:Chapter\s+(\d+)\s*(?:-\s*(.+))?|(Prologue))\s*$", re.IGNORECASE, ) # ── Helpers ──────────────────────────────────────────────────────────────────── def _slug(text: str) -> str: """Convert title text to a filesystem-safe slug.""" text = text.lower() text = re.sub(r"[^a-z0-9]+", "_", text) return text.strip("_") def load_all_chapters(novel_dir: Path) -> list[dict]: """ Read all .txt files in *novel_dir* in sorted order, detect Prologue / Chapter headings, and return a list of chapter dicts: { "num": int, # 0 = Prologue "title": str, # subtitle portion, e.g. "Homecoming" "label": str, # human label, e.g. "Chapter 1 - Homecoming" "slug": str, # e.g. "chapter_01_homecoming" "text": str, # full body text of the chapter } Chapters from multiple files are concatenated in sorted-filename order. """ txt_files = sorted(novel_dir.glob("*.txt")) if not txt_files: raise FileNotFoundError(f"No .txt files found in '{novel_dir}'") # Collect (chapter_num, title_line, body_lines) across all files raw: list[tuple[int, str, list[str]]] = [] # (num, heading_text, body) current_num: int | None = None current_heading: str = "" current_body: list[str] = [] def _flush(): if current_num is not None: raw.append((current_num, current_heading, list(current_body))) for fpath in txt_files: lines = fpath.read_text(encoding="utf-8").splitlines() for line in lines: m = _HEADING_RE.match(line.strip()) if m: _flush() if m.group(3): # Prologue current_num = 0 current_heading = "Prologue" else: # Chapter N current_num = int(m.group(1)) subtitle = (m.group(2) or "").strip() current_heading = f"Chapter {current_num}" + (f" - {subtitle}" if subtitle else "") current_body = [line] # keep heading inside text else: if current_num is not None: current_body.append(line) _flush() # Build chapter dicts, deduplicated and sorted by number seen: set[int] = set() chapters: list[dict] = [] for num, heading, body in sorted(raw, key=lambda x: x[0]): if num in seen: continue seen.add(num) # Derive subtitle / slug subtitle = "" sm = re.match(r"Chapter\s+\d+\s*-\s*(.+)", heading, re.IGNORECASE) if sm: subtitle = sm.group(1).strip() elif heading.lower() == "prologue": subtitle = "Prologue" num_str = f"{num:02d}" if subtitle: slug = f"chapter_{num_str}_{_slug(subtitle)}" else: slug = f"chapter_{num_str}" chapters.append({ "num": num, "title": subtitle or heading, "label": heading, "slug": slug, "text": "\n".join(body), }) return chapters def clean_text(text: str) -> str: """Strip formatting artifacts and normalise whitespace for TTS.""" # Remove horizontal-rule lines (underscores / asterisks / dashes) text = re.sub(r"^[_\-\*\s]{3,}\s*$", "", text, flags=re.MULTILINE) # Collapse 3+ blank lines to 2 text = re.sub(r"\n{3,}", "\n\n", text) return text.strip() def _fmt_duration(seconds: float) -> str: if seconds >= 60: m, s = divmod(int(seconds), 60) return f"{m}m {s:02d}s" return f"{seconds:.0f}s" def generate_audio(pipeline: KPipeline, text: str, voice: str, output_path: Path) -> float: """Generate audio and return wall-clock seconds elapsed.""" t0 = time.monotonic() chunks = [] for _, _, chunk_audio in pipeline(text, voice=voice, speed=SPEED): if hasattr(chunk_audio, "numpy"): chunk_audio = chunk_audio.cpu().numpy() chunk_audio = np.atleast_1d(chunk_audio.squeeze()) if chunk_audio.size > 0: chunks.append(chunk_audio) elapsed = time.monotonic() - t0 if chunks: audio = np.concatenate(chunks, axis=0) sf.write(str(output_path), audio, SAMPLE_RATE) duration = len(audio) / SAMPLE_RATE print(f" ✓ Saved '{output_path.name}' " f"({duration:.1f}s audio | {elapsed:.1f}s wall-clock)") else: print(f" ✗ No audio produced for voice='{voice}'") return elapsed # ── Main ─────────────────────────────────────────────────────────────────────── def main() -> None: parser = argparse.ArgumentParser( description="Generate 'A Darkness Rising' audiobook, one file per chapter." ) parser.add_argument( "chapters", nargs="*", type=int, help="Chapter numbers to generate (0 = Prologue). Default: all.", ) parser.add_argument( "--list", action="store_true", help="Print detected chapters and exit.", ) parser.add_argument( "--voice", default=VOICE, help=f"Kokoro voice to use (default: {VOICE}).", ) parser.add_argument( "--preview", nargs="?", const=3000, type=int, metavar="CHARS", help="Generate short preview clips (default: 3000 chars). " "Output filenames get a _preview suffix.", ) args = parser.parse_args() print("Loading chapters …") all_chapters = load_all_chapters(NOVEL_DIR) if args.list: print(f"\nDetected {len(all_chapters)} chapters:\n") print(f" {'#':>4} {'Label':<45} {'Chars':>8} {'Output filename'}") print(f" {'─'*4} {'─'*45} {'─'*8} {'─'*30}") for ch in all_chapters: chars = len(clean_text(ch["text"])) print(f" {ch['num']:>4} {ch['label']:<45} {chars:>8,} {ch['slug']}.wav") return # Filter to requested subset if args.chapters: requested = set(args.chapters) run_chapters = [ch for ch in all_chapters if ch["num"] in requested] missing = requested - {ch["num"] for ch in run_chapters} if missing: print(f"⚠ Chapter(s) not found: {sorted(missing)}") else: run_chapters = all_chapters if not run_chapters: print("No chapters selected. Use --list to see available chapters.") return voice = args.voice device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Device: {device}") if device == "cuda": print(f"GPU: {torch.cuda.get_device_name(0)}") print(f"Voice: {voice}") OUTPUT_DIR.mkdir(exist_ok=True) # Pre-compute char counts chapter_chars = {ch["num"]: len(clean_text(ch["text"])) for ch in run_chapters} preview_note = (f" ⚡ PREVIEW MODE — capped at {args.preview:,} chars/chapter\n" if args.preview else "") print(f"\n{preview_note}{'─'*65}") print(f" {'#':>4} {'Label':<40} {'Chars':>8}") print(f" {'─'*4} {'─'*40} {'─'*8}") for ch in run_chapters: print(f" {ch['num']:>4} {ch['label']:<40} {chapter_chars[ch['num']]:>8,}") print(f" {'─'*55}") total_chars = sum(chapter_chars.values()) print(f" {'TOTAL':<45} {total_chars:>8,}\n") print("Initialising Kokoro pipeline …") pipeline = KPipeline(lang_code=LANG_CODE) chars_per_sec: float | None = None timing_rows: list[tuple[str, int, float]] = [] for ch in run_chapters: text = clean_text(ch["text"]) if not text: print(f"\n[{ch['label']}] ⚠ Empty text — skipping") continue preview_chars = args.preview if preview_chars and len(text) > preview_chars: cut = text.rfind(" ", 0, preview_chars) text = text[: cut if cut > 0 else preview_chars] chars = len(text) preview_tag = "_preview" if args.preview else "" out_path = OUTPUT_DIR / f"{ch['slug']}{preview_tag}.wav" if chars_per_sec is not None: eta_str = _fmt_duration(chars / chars_per_sec) print(f"\n[{ch['label']}] voice={voice} → {out_path.name} (est. {eta_str})") else: print(f"\n[{ch['label']}] voice={voice} → {out_path.name} (calibration run)") elapsed = generate_audio(pipeline, text, voice, out_path) timing_rows.append((ch["label"], chars, elapsed)) total_done = sum(c for _, c, _ in timing_rows) total_elapsed_done = sum(e for _, _, e in timing_rows) if total_elapsed_done > 0: chars_per_sec = total_done / total_elapsed_done print(f" ⏱ Calibration: {chars_per_sec:.0f} chars/sec") # Summary print("\n" + "─" * 65) print(f" {'Chapter':<35} {'Chars':>7} {'Actual':>8} {'Est':>8}") print("─" * 65) for i, (label, chars, elapsed) in enumerate(timing_rows): actual_str = _fmt_duration(elapsed) prior_chars = sum(c for _, c, _ in timing_rows[:i]) prior_elapsed = sum(e for _, _, e in timing_rows[:i]) if prior_elapsed > 0: est_str = _fmt_duration(chars / (prior_chars / prior_elapsed)) else: est_str = "(first)" print(f" {label:<35} {chars:>7,} {actual_str:>8} {est_str:>8}") total_elapsed = sum(e for _, _, e in timing_rows) print("─" * 65) print(f" {'TOTAL':<35} {sum(c for _,c,_ in timing_rows):>7,} " f"{_fmt_duration(total_elapsed):>8}") print("\nDone.") if __name__ == "__main__": main()