audio gen in gui

This commit is contained in:
2026-04-08 01:42:29 -06:00
parent 69639342e3
commit 894144c84a
10 changed files with 1123 additions and 371 deletions

2
.envrc Normal file
View File

@ -0,0 +1,2 @@
export VIRTUAL_ENV="$PWD/.venv"
export PATH="$VIRTUAL_ENV/bin:$PATH"

3
.gitignore vendored
View File

@ -3,6 +3,9 @@ __pycache__/
*.pyc *.pyc
*.pyo *.pyo
.venv/ .venv/
build/
dist/
*.spec
# Audio files # Audio files
*.wav *.wav

4
.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,4 @@
{
"python.defaultInterpreterPath": ".venv/bin/python",
"python.terminal.activateEnvironment": true
}

View File

@ -34,14 +34,24 @@ requirements.txt
setup_windows.bat ← one-click Windows setup setup_windows.bat ← one-click Windows setup
run_gui.bat ← launch GUI on Windows run_gui.bat ← launch GUI on Windows
run_audiobook.bat ← generate audiobook on Windows run_audiobook.bat ← generate audiobook on Windows
``` ---
## Setup (Windows - Easiest for Non-Tech Users)
1. **Download** the project as a ZIP file from GitHub
2. **Extract** the ZIP to a folder on your computer (e.g., `C:\audiobook-creator`)
3. **Double-click** `setup_windows.bat` and wait for it to finish installing everything (may take 10-20 minutes)
4. **Double-click** `run_gui.bat` to launch the Proper Noun Player GUI
5. **Double-click** `run_audiobook.bat` to generate audiobook chapters
That's it! The setup script handles Python installation, virtual environment, and all dependencies automatically.
--- ---
## Setup (Linux / Mac) ## Setup (Linux / Mac)
```bash ```bash
python3.11 -m venv .venv python3.12 -m venv .venv
source .venv/bin/activate source .venv/bin/activate
pip install torch --index-url https://download.pytorch.org/whl/cu124 # CUDA 12.4 pip install torch --index-url https://download.pytorch.org/whl/cu124 # CUDA 12.4
pip install -r requirements.txt pip install -r requirements.txt

View File

@ -18,7 +18,7 @@ Follow the steps in order and you will be generating audiobook chapters with you
## Step 1 — Install Python ## Step 1 — Install Python
1. Go to **https://www.python.org/downloads/** 1. Go to **https://www.python.org/downloads/**
2. Click the big yellow **"Download Python 3.11.x"** button 2. Click the big yellow **"Download Python 3.12.x"** button
3. Run the installer 3. Run the installer
4. **IMPORTANT:** On the very first screen of the installer, tick the checkbox that says **"Add Python to PATH"** before clicking Install Now 4. **IMPORTANT:** On the very first screen of the installer, tick the checkbox that says **"Add Python to PATH"** before clicking Install Now

402
create_audiobook.py Normal file
View File

@ -0,0 +1,402 @@
"""
create_audiobook.py
------------------
Generic audiobook generator for text files that contain chapter headings.
Supported heading formats (single-line headings):
- Prologue
- Chapter 12
- Chapter 12 - Chapter Name
- Chapter - 12
- Chapter - 12 - Chapter Name
Features:
- Parses chapters from one or more input files/directories
- Caches parsed chapter data for faster re-runs when source files are unchanged
- Warns about missing chapter numbers (example: found 1,2,4 -> warns about 3)
- Generates one .wav per chapter with Kokoro
Examples:
python create_audiobook.py --input "Audio Text for Novel Lightbringer"
python create_audiobook.py --input novel.txt --list
python create_audiobook.py --input novel.txt 0 1 2 --voice am_michael
python create_audiobook.py --input novel.txt --preview 3000
"""
from __future__ import annotations
import argparse
import hashlib
import json
import re
import time
from pathlib import Path
import numpy as np
import soundfile as sf
import torch
from kokoro import KPipeline
SAMPLE_RATE = 24000
SPEED = 1.0
LANG_CODE = "a"
VOICE = "am_onyx"
CACHE_VERSION = 1
PROLOGUE_RE = re.compile(r"^\s*Prologue\s*$", re.IGNORECASE)
CHAPTER_RE_1 = re.compile(r"^\s*Chapter\s*-\s*(\d+)(?:\s*-\s*(.+))?\s*$", re.IGNORECASE)
CHAPTER_RE_2 = re.compile(r"^\s*Chapter\s+(\d+)(?:\s*-\s*(.+))?\s*$", re.IGNORECASE)
RULE_RE = re.compile(r"^[_\-*\s]{3,}\s*$")
def _slug(text: str) -> str:
text = text.lower()
text = re.sub(r"[^a-z0-9]+", "_", text)
return text.strip("_")
def _clean_text(text: str) -> str:
text = RULE_RE.sub("", text)
text = re.sub(r"\n{3,}", "\n\n", text)
return text.strip()
def _fmt_duration(seconds: float) -> str:
h, rem = divmod(int(seconds), 3600)
m, s = divmod(rem, 60)
if h > 0:
return f"{h}h {m:02d}m {s:02d}s"
if m > 0:
return f"{m}m {s:02d}s"
return f"{s}s"
def _chapter_heading(line: str) -> tuple[int, str, str] | None:
stripped = line.strip()
if PROLOGUE_RE.match(stripped):
return (0, "Prologue", "Prologue")
m = CHAPTER_RE_1.match(stripped)
if not m:
m = CHAPTER_RE_2.match(stripped)
if not m:
return None
num = int(m.group(1))
title = (m.group(2) or "").strip()
label = f"Chapter {num}" + (f" - {title}" if title else "")
return (num, title, label)
def _resolve_txt_files(inputs: list[str]) -> list[Path]:
txt_files: list[Path] = []
for raw in inputs:
path = Path(raw)
if path.is_file():
if path.suffix.lower() == ".txt":
txt_files.append(path)
continue
if path.is_dir():
txt_files.extend(sorted(path.glob("*.txt")))
deduped = sorted({p.resolve() for p in txt_files})
return deduped
def _signature_for_files(files: list[Path]) -> list[dict]:
sig = []
for p in files:
st = p.stat()
sig.append({
"path": str(p),
"size": st.st_size,
"mtime_ns": st.st_mtime_ns,
})
return sig
def _cache_path(output_dir: Path, files: list[Path]) -> Path:
cache_dir = output_dir / ".cache"
digest = hashlib.sha256("\n".join(str(p) for p in files).encode("utf-8")).hexdigest()[:12]
return cache_dir / f"parse_{digest}.json"
def _load_cached_chapters(cache_file: Path, file_sig: list[dict]) -> list[dict] | None:
if not cache_file.exists():
return None
try:
data = json.loads(cache_file.read_text(encoding="utf-8"))
except Exception:
return None
if data.get("version") != CACHE_VERSION:
return None
if data.get("file_signature") != file_sig:
return None
chapters = data.get("chapters")
if not isinstance(chapters, list):
return None
return chapters
def _save_cached_chapters(cache_file: Path, file_sig: list[dict], chapters: list[dict]) -> None:
cache_file.parent.mkdir(parents=True, exist_ok=True)
payload = {
"version": CACHE_VERSION,
"file_signature": file_sig,
"chapters": chapters,
}
cache_file.write_text(json.dumps(payload, ensure_ascii=False), encoding="utf-8")
def _parse_chapters(files: list[Path]) -> tuple[list[dict], set[int]]:
chapters: list[dict] = []
duplicates: set[int] = set()
seen: set[int] = set()
current: dict | None = None
def flush_current() -> None:
if current is not None:
current["text"] = "".join(current.pop("lines"))
num = current["num"]
if num in seen:
duplicates.add(num)
return
seen.add(num)
chapters.append(current)
for fpath in files:
with fpath.open("r", encoding="utf-8") as fh:
for line in fh:
info = _chapter_heading(line)
if info is not None:
flush_current()
num, title, label = info
num_str = f"{num:02d}"
if num == 0:
slug = "chapter_00_prologue"
elif title:
slug = f"chapter_{num_str}_{_slug(title)}"
else:
slug = f"chapter_{num_str}"
current = {
"num": num,
"title": title,
"label": label,
"slug": slug,
"lines": [line],
}
elif current is not None:
current["lines"].append(line)
flush_current()
chapters.sort(key=lambda c: c["num"])
return chapters, duplicates
def load_all_chapters_with_cache(inputs: list[str], output_dir: Path, force_reparse: bool = False) -> tuple[list[dict], bool, set[int], list[Path]]:
files = _resolve_txt_files(inputs)
if not files:
raise FileNotFoundError("No .txt files found in --input paths")
file_sig = _signature_for_files(files)
cache_file = _cache_path(output_dir, files)
if not force_reparse:
cached = _load_cached_chapters(cache_file, file_sig)
if cached is not None:
return cached, True, set(), files
chapters, duplicates = _parse_chapters(files)
_save_cached_chapters(cache_file, file_sig, chapters)
return chapters, False, duplicates, files
def warn_missing_chapters(chapters: list[dict]) -> None:
nums = sorted(ch["num"] for ch in chapters if ch["num"] > 0)
if not nums:
return
missing = [n for n in range(nums[0], nums[-1] + 1) if n not in set(nums)]
if missing:
print(f"WARNING: missing chapter numbers detected: {missing}")
def generate_audio(pipeline: KPipeline, text: str, voice: str, output_path: Path) -> float:
t0 = time.monotonic()
chunks = []
for _, _, chunk_audio in pipeline(text, voice=voice, speed=SPEED):
if hasattr(chunk_audio, "numpy"):
chunk_audio = chunk_audio.cpu().numpy()
chunk_audio = np.atleast_1d(chunk_audio.squeeze())
if chunk_audio.size > 0:
chunks.append(chunk_audio)
elapsed = time.monotonic() - t0
if chunks:
audio = np.concatenate(chunks, axis=0)
sf.write(str(output_path), audio, SAMPLE_RATE)
duration = len(audio) / SAMPLE_RATE
print(
f" OK saved '{output_path.name}' "
f"({_fmt_duration(duration)} audio | {_fmt_duration(elapsed)} wall-clock)"
)
else:
print(f" ERROR no audio produced for voice='{voice}'")
return elapsed
def main() -> None:
parser = argparse.ArgumentParser(description="Generate an audiobook from chapterized text files.")
parser.add_argument(
"chapters",
nargs="*",
type=int,
help="Chapter numbers to generate (0 = Prologue). Default: all.",
)
parser.add_argument(
"--input",
nargs="+",
required=True,
help="One or more .txt files and/or directories containing .txt files.",
)
parser.add_argument(
"--output",
default="output_audiobook",
help="Output directory for generated chapter audio.",
)
parser.add_argument("--list", action="store_true", help="Print detected chapters and exit.")
parser.add_argument("--voice", default=VOICE, help=f"Kokoro voice to use (default: {VOICE}).")
parser.add_argument(
"--preview",
nargs="?",
const=3000,
type=int,
metavar="CHARS",
help="Generate short preview clips capped at CHARS (default: 3000).",
)
parser.add_argument(
"--reparse",
action="store_true",
help="Ignore cache and re-parse chapters from source files.",
)
args = parser.parse_args()
output_dir = Path(args.output)
output_dir.mkdir(parents=True, exist_ok=True)
print("Loading chapters...")
chapters, used_cache, duplicates, files = load_all_chapters_with_cache(
args.input, output_dir, force_reparse=args.reparse
)
print(f"Input files: {len(files)}")
print(f"Parse cache: {'HIT' if used_cache else 'MISS'}")
if duplicates:
print(f"WARNING: duplicate chapter numbers were found and ignored: {sorted(duplicates)}")
if not chapters:
print("WARNING: no chapters found.")
print("Expected headings like: 'Prologue' or 'Chapter 12 - Name' or 'Chapter - 12'")
return
warn_missing_chapters(chapters)
if args.list:
print(f"\nDetected {len(chapters)} chapters:\n")
print(f" {'#':>4} {'Label':<45} {'Chars':>8} {'Output filename'}")
print(f" {'-' * 4} {'-' * 45} {'-' * 8} {'-' * 30}")
for ch in chapters:
chars = len(_clean_text(ch["text"]))
print(f" {ch['num']:>4} {ch['label']:<45} {chars:>8,} {ch['slug']}.wav")
return
if args.chapters:
requested = set(args.chapters)
run_chapters = [ch for ch in chapters if ch["num"] in requested]
missing_req = sorted(requested - {ch["num"] for ch in run_chapters})
if missing_req:
print(f"WARNING: requested chapter(s) not found: {missing_req}")
else:
run_chapters = chapters
if not run_chapters:
print("No chapters selected. Use --list to see available chapters.")
return
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {device}")
if device == "cuda":
print(f"GPU: {torch.cuda.get_device_name(0)}")
print(f"Voice: {args.voice}")
chapter_chars = {ch["num"]: len(_clean_text(ch["text"])) for ch in run_chapters}
total_chars = sum(chapter_chars.values())
preview_note = f"PREVIEW MODE: capped at {args.preview:,} chars/chapter" if args.preview else ""
if preview_note:
print(preview_note)
print("\nPlan:")
for ch in run_chapters:
print(f" {ch['num']:>3} {ch['label']} ({chapter_chars[ch['num']]:,} chars)")
print(f" TOTAL: {total_chars:,} chars\n")
print("Initializing Kokoro pipeline...")
pipeline = KPipeline(lang_code=LANG_CODE)
chars_per_sec: float | None = None
timing_rows: list[tuple[str, int, float]] = []
for ch in run_chapters:
text = _clean_text(ch["text"])
if not text:
print(f"[{ch['label']}] WARNING empty text, skipping")
continue
if args.preview and len(text) > args.preview:
cut = text.rfind(" ", 0, args.preview)
text = text[: cut if cut > 0 else args.preview]
chars = len(text)
preview_tag = "_preview" if args.preview else ""
out_path = output_dir / f"{ch['slug']}{preview_tag}.wav"
if chars_per_sec is not None:
eta = _fmt_duration(chars / chars_per_sec)
print(f"\n[{ch['label']}] -> {out_path.name} (est. {eta})")
else:
print(f"\n[{ch['label']}] -> {out_path.name} (calibration run)")
elapsed = generate_audio(pipeline, text, args.voice, out_path)
timing_rows.append((ch["label"], chars, elapsed))
done_chars = sum(c for _, c, _ in timing_rows)
done_elapsed = sum(e for _, _, e in timing_rows)
if done_elapsed > 0:
chars_per_sec = done_chars / done_elapsed
remaining = total_chars - done_chars
eta_total = _fmt_duration(remaining / chars_per_sec) if remaining > 0 else "0s"
print(f" Speed: {chars_per_sec:.0f} chars/sec | Estimated remaining: {eta_total}")
print("\nSummary:")
print(f" {'Chapter':<35} {'Chars':>7} {'Actual':>8} {'Est':>8}")
print(" " + "-" * 65)
for i, (label, chars, elapsed) in enumerate(timing_rows):
actual_str = _fmt_duration(elapsed)
prior_chars = sum(c for _, c, _ in timing_rows[:i])
prior_elapsed = sum(e for _, _, e in timing_rows[:i])
est_str = _fmt_duration(chars / (prior_chars / prior_elapsed)) if prior_elapsed > 0 else "(first)"
print(f" {label:<35} {chars:>7,} {actual_str:>8} {est_str:>8}")
total_elapsed = sum(e for _, _, e in timing_rows)
total_done_chars = sum(c for _, c, _ in timing_rows)
print(" " + "-" * 65)
print(f" {'TOTAL':<35} {total_done_chars:>7,} {_fmt_duration(total_elapsed):>8}")
print("\nDone.")
if __name__ == "__main__":
main()

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,30 @@
{
"Adam": "adam.wav",
"Adam-Ondi-Ahman": "adam_ondi_ahman.wav",
"Ahman": "ahman.wav",
"Alma": "alma.wav",
"Apostles": "apostles.wav",
"Brethren": "brethren.wav",
"Cardston": "cardston.wav",
"Ephraim": "ephraim.wav",
"Evolving": "evolving.wav",
"Holies": "holies.wav",
"Israel": "israel.wav",
"Joseph": "joseph.wav",
"Knelt": "knelt.wav",
"Lehi": "lehi.wav",
"Liahona": "liahona.wav",
"Millennium": "millennium.wav",
"Mormon": "mormon.wav",
"Moroni": "moroni.wav",
"Mosiah": "mosiah.wav",
"Nauvoo": "nauvoo.wav",
"Quorum": "quorum.wav",
"Rachael": "rachael.wav",
"Savior": "savior.wav",
"Thummim": "thummim.wav",
"Urim": "urim.wav",
"Vignette": "vignette.wav",
"Zachary": "zachary.wav",
"Zion": "zion.wav"
}

14
projects.json Normal file
View File

@ -0,0 +1,14 @@
[
{
"name": "Audio Text for Novel Lightbringer",
"source_paths": [
"/home/dillon/_code/voice_model/Audio Text for Novel Lightbringer/Audio Text for Novel Lightbringer.txt"
]
},
{
"name": "visions glory canada",
"source_paths": [
"/home/dillon/_code/voice_model/Visions of Glory_ Zion in Canada pg 162-193.txt"
]
}
]

View File

@ -14,7 +14,7 @@ if errorlevel 1 (
echo. echo.
echo ERROR: Python was not found. echo ERROR: Python was not found.
echo. echo.
echo Please install Python 3.11 from https://www.python.org/downloads/ echo Please install Python 3.12 from https://www.python.org/downloads/
echo IMPORTANT: On the installer, tick "Add Python to PATH" before clicking Install. echo IMPORTANT: On the installer, tick "Add Python to PATH" before clicking Install.
echo. echo.
echo After installing, close this window and double-click setup_windows.bat again. echo After installing, close this window and double-click setup_windows.bat again.