improved proper noun parsing
This commit is contained in:
@ -11,6 +11,7 @@ Output .wav files are written to OUTPUT_DIR (created automatically).
|
||||
"""
|
||||
|
||||
import re
|
||||
import time
|
||||
import numpy as np
|
||||
import soundfile as sf
|
||||
import torch
|
||||
@ -51,19 +52,19 @@ BOOKS = [
|
||||
# label start_marker voice output_wav
|
||||
("Introduction", "Introduction", "af_heart", "00_introduction.wav"),
|
||||
("Book of Hagoth", "THE BOOK OF HAGOTH", "am_fenrir", "01_hagoth.wav"),
|
||||
("Shi-Tugo I", "THE FIRST BOOK OF SHI-TUGO", "am_eric", "02_shi_tugo_1.wav"),
|
||||
("Sanempet", "THE BOOK OF SANEMPET", "am_liam", "03_sanempet.wav"),
|
||||
("Oug", "THE BOOK OF OUG", "am_michael", "04_oug.wav"),
|
||||
("Temple Writings of Oug", "THE BOOK OF", "am_michael", "05_temple_writings_oug.wav"),
|
||||
("Sacred Temple Writings", "THE SACRED", "am_michael", "06_sacred_temple_writings.wav"),
|
||||
("Samuel the Lamanite I", "THE FIRST BOOK", "am_echo", "07_samuel_lamanite_1.wav"),
|
||||
("Samuel the Lamanite II", "THE SECOND BOOK", "am_echo", "08_samuel_lamanite_2.wav"),
|
||||
("Manti", "THE BOOK OF MANTI", "am_onyx", "09_manti.wav"),
|
||||
("Pa Nat I", "THE FIRST BOOK OF PA NAT", "af_nicole", "10_pa_nat_1.wav"),
|
||||
("Moroni I", "THE FIRST BOOK OF MORONI", "am_adam", "11_moroni_1.wav"),
|
||||
("Moroni II", "THE SECOND BOOK OF MORONI", "am_adam", "12_moroni_2.wav"),
|
||||
("Moroni III", "THE THIRD BOOK OF MORONI", "am_adam", "13_moroni_3.wav"),
|
||||
("Shioni", "THE BOOK OF SHIONI", "am_puck", "14_shioni.wav"),
|
||||
# ("Shi-Tugo I", "THE FIRST BOOK OF SHI-TUGO", "am_eric", "02_shi_tugo_1.wav"),
|
||||
# ("Sanempet", "THE BOOK OF SANEMPET", "am_liam", "03_sanempet.wav"),
|
||||
# ("Oug", "THE BOOK OF OUG", "am_michael", "04_oug.wav"),
|
||||
# ("Temple Writings of Oug", "THE BOOK OF", "am_michael", "05_temple_writings_oug.wav"),
|
||||
# ("Sacred Temple Writings", "THE SACRED", "am_michael", "06_sacred_temple_writings.wav"),
|
||||
# ("Samuel the Lamanite I", "THE FIRST BOOK", "am_echo", "07_samuel_lamanite_1.wav"),
|
||||
# ("Samuel the Lamanite II", "THE SECOND BOOK", "am_echo", "08_samuel_lamanite_2.wav"),
|
||||
# ("Manti", "THE BOOK OF MANTI", "am_onyx", "09_manti.wav"),
|
||||
# ("Pa Nat I", "THE FIRST BOOK OF PA NAT", "af_nicole", "10_pa_nat_1.wav"),
|
||||
# ("Moroni I", "THE FIRST BOOK OF MORONI", "am_adam", "11_moroni_1.wav"),
|
||||
# ("Moroni II", "THE SECOND BOOK OF MORONI", "am_adam", "12_moroni_2.wav"),
|
||||
# ("Moroni III", "THE THIRD BOOK OF MORONI", "am_adam", "13_moroni_3.wav"),
|
||||
# ("Shioni", "THE BOOK OF SHIONI", "am_puck", "14_shioni.wav"),
|
||||
]
|
||||
|
||||
# ── Helpers ────────────────────────────────────────────────────────────────────
|
||||
@ -118,8 +119,18 @@ def clean_text(text: str) -> str:
|
||||
return text.strip()
|
||||
|
||||
|
||||
def _fmt_duration(seconds: float) -> str:
|
||||
"""Format seconds as 'Xm Ys' or 'Xs'."""
|
||||
if seconds >= 60:
|
||||
m, s = divmod(int(seconds), 60)
|
||||
return f"{m}m {s:02d}s"
|
||||
return f"{seconds:.0f}s"
|
||||
|
||||
|
||||
def generate_audio(pipeline: KPipeline, text: str, voice: str,
|
||||
output_path: Path) -> None:
|
||||
output_path: Path) -> float:
|
||||
"""Generate audio and return wall-clock seconds elapsed."""
|
||||
t0 = time.monotonic()
|
||||
chunks = []
|
||||
for _, _, chunk_audio in pipeline(text, voice=voice, speed=SPEED):
|
||||
if hasattr(chunk_audio, "numpy"):
|
||||
@ -131,10 +142,13 @@ def generate_audio(pipeline: KPipeline, text: str, voice: str,
|
||||
if chunks:
|
||||
audio = np.concatenate(chunks, axis=0)
|
||||
sf.write(str(output_path), audio, SAMPLE_RATE)
|
||||
elapsed = time.monotonic() - t0
|
||||
duration = len(audio) / SAMPLE_RATE
|
||||
print(f" ✓ Saved '{output_path.name}' ({duration:.1f}s)")
|
||||
print(f" ✓ Saved '{output_path.name}' ({duration:.1f}s audio | {elapsed:.1f}s wall-clock)")
|
||||
else:
|
||||
elapsed = time.monotonic() - t0
|
||||
print(f" ✗ No audio produced for voice='{voice}'")
|
||||
return elapsed
|
||||
|
||||
|
||||
# ── Main ───────────────────────────────────────────────────────────────────────
|
||||
@ -156,19 +170,59 @@ def main() -> None:
|
||||
print("Initialising Kokoro pipeline …")
|
||||
pipeline = KPipeline(lang_code=LANG_CODE)
|
||||
|
||||
# Pre-compute char counts for all sections so we can estimate ETAs
|
||||
section_chars: dict[str, int] = {
|
||||
label: len(clean_text(sections[label]))
|
||||
for label, _, _, _ in BOOKS
|
||||
if label in sections
|
||||
}
|
||||
|
||||
chars_per_sec: float | None = None # derived from the first book that finishes
|
||||
timing_rows: list[tuple[str, int, float]] = [] # (label, chars, elapsed)
|
||||
|
||||
for label, marker, voice, wav_name in BOOKS:
|
||||
if label not in sections:
|
||||
continue # marker was not found; warning already printed
|
||||
|
||||
print(f"\n[{label}] voice={voice} → {wav_name}")
|
||||
text = clean_text(sections[label])
|
||||
if not text:
|
||||
print(" ⚠ Empty text — skipping")
|
||||
continue
|
||||
|
||||
out_path = OUTPUT_DIR / wav_name
|
||||
generate_audio(pipeline, text, voice, out_path)
|
||||
text = clean_text(sections[label])
|
||||
if not text:
|
||||
print(f"\n[{label}] ⚠ Empty text — skipping")
|
||||
continue
|
||||
|
||||
chars = section_chars[label]
|
||||
|
||||
# Print ETA once we have a calibration rate
|
||||
if chars_per_sec is not None:
|
||||
eta_sec = chars / chars_per_sec
|
||||
eta_str = _fmt_duration(eta_sec)
|
||||
print(f"\n[{label}] voice={voice} → {wav_name} (est. {eta_str})")
|
||||
else:
|
||||
print(f"\n[{label}] voice={voice} → {wav_name} (timing calibration run)")
|
||||
|
||||
stem, ext = wav_name.rsplit(".", 1)
|
||||
out_path = OUTPUT_DIR / f"{stem}_{voice}.{ext}"
|
||||
elapsed = generate_audio(pipeline, text, voice, out_path)
|
||||
timing_rows.append((label, chars, elapsed))
|
||||
|
||||
# Calibrate from first completed book
|
||||
if chars_per_sec is None and elapsed > 0:
|
||||
chars_per_sec = chars / elapsed
|
||||
print(f" ⏱ Calibrated: {chars_per_sec:.0f} chars/sec")
|
||||
|
||||
# ── Summary ────────────────────────────────────────────────────────────────
|
||||
print("\n" + "─" * 60)
|
||||
print(f" {'Section':<30} {'Chars':>7} {'Actual':>8} {'Est':>8}")
|
||||
print("─" * 60)
|
||||
for i, (label, chars, elapsed) in enumerate(timing_rows):
|
||||
actual_str = _fmt_duration(elapsed)
|
||||
if i == 0 or chars_per_sec is None:
|
||||
est_str = "(calibration)"
|
||||
else:
|
||||
est_str = _fmt_duration(chars / chars_per_sec)
|
||||
print(f" {label:<30} {chars:>7,} {actual_str:>8} {est_str:>8}")
|
||||
total_elapsed = sum(e for _, _, e in timing_rows)
|
||||
print("─" * 60)
|
||||
print(f" {'TOTAL':<30} {sum(c for _,c,_ in timing_rows):>7,} {_fmt_duration(total_elapsed):>8}")
|
||||
print("\nDone.")
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user