Files
audiobook_creator/render_voices.py

45 lines
1.4 KiB
Python
Raw Permalink Normal View History

import torch
import numpy as np
import soundfile as sf
from kokoro import KPipeline
from text_input import TEXT
# ── Device setup ──────────────────────────────────────────────────────────────
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
if device == "cuda":
print(f"GPU: {torch.cuda.get_device_name(0)}")
SAMPLE_RATE = 24000
SPEED = 1.0
VOICES = [
("af_heart", "output_af_heart.wav"), # warm American female
("am_michael", "output_am_michael.wav"), # best American male
]
pipeline = KPipeline(lang_code="a")
def generate(voice: str, output_file: str) -> None:
print(f"\nGenerating '{voice}'{output_file}")
chunks = []
for _, _, chunk_audio in pipeline(TEXT, voice=voice, speed=SPEED):
if hasattr(chunk_audio, "numpy"):
chunk_audio = chunk_audio.cpu().numpy()
chunk_audio = np.atleast_1d(chunk_audio.squeeze())
if chunk_audio.size > 0:
chunks.append(chunk_audio)
if chunks:
audio = np.concatenate(chunks, axis=0)
sf.write(output_file, audio, SAMPLE_RATE)
print(f" ✓ Saved '{output_file}' ({len(audio) / SAMPLE_RATE:.1f}s, {SAMPLE_RATE} Hz)")
else:
print(f" ✗ No audio produced for '{voice}'")
for voice, path in VOICES:
generate(voice, path)
print("\nDone.")