From 69639342e35187be196b27f1bb41ed1d749587d8 Mon Sep 17 00:00:00 2001 From: dillonj Date: Tue, 24 Mar 2026 01:42:34 -0600 Subject: [PATCH] format doc script --- .gitignore | 4 + format_scripture.py | 801 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 805 insertions(+) create mode 100644 format_scripture.py diff --git a/.gitignore b/.gitignore index ecd3db0..287f949 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,10 @@ proper_nouns_audio/ # Generated data (JSON files in output_proper_nouns/ are tracked) output_proper_nouns/remaining_review.txt +# Generated PDFs and LaTeX files +*.pdf +*.tex + # Text files (except proper_nouns.txt) *.txt !proper_nouns.txt diff --git a/format_scripture.py b/format_scripture.py new file mode 100644 index 0000000..71d1ce6 --- /dev/null +++ b/format_scripture.py @@ -0,0 +1,801 @@ +#!/usr/bin/env python3 +""" +create_scripture_pdf.py +════════════════════════ +Convert the Book of the Nem plain-text file into two scripture-style PDFs: + + nem_kindle.pdf – single-column, sized for e-readers (4.5" × 6.5") + nem_paper.pdf – two-column, Book of Mormon style (5.5" × 8.5") + +Requirements (Debian/Ubuntu): + sudo apt-get install texlive-latex-extra texlive-fonts-recommended + + The key packages used are: + extsizes – for 9 pt document class (paper format) + tgpagella – TeX Gyre Pagella (Palatino-clone) font + multicol – two-column layout without hard page breaks + microtype – improved text justification and hyphenation + fancyhdr – running headers and footers + needspace – prevent orphaned headings + +Usage: + python create_scripture_pdf.py + python create_scripture_pdf.py --input "Audio Master Nem Full.txt" + python create_scripture_pdf.py --kindle-only + python create_scripture_pdf.py --paper-only + python create_scripture_pdf.py --output-dir ./pdfs + python create_scripture_pdf.py --keep-tex # keep .tex files for debugging +""" + +import argparse +import re +import subprocess +import sys +import tempfile +from dataclasses import dataclass +from pathlib import Path +from typing import Optional + +# ── Default paths ────────────────────────────────────────────────────────────── +INPUT_FILE = Path("Audio Master Nem Full.txt") +OUTPUT_DIR = Path("output_pdf") + +# ══════════════════════════════════════════════════════════════════════════════ +# LaTeX helper +# ══════════════════════════════════════════════════════════════════════════════ + +_LATEX_TRANS = str.maketrans({ + "\\": r"\textbackslash{}", + "&": r"\&", + "%": r"\%", + "$": r"\$", + "#": r"\#", + "_": r"\_", + "{": r"\{", + "}": r"\}", + "~": r"\textasciitilde{}", + "^": r"\textasciicircum{}", + "\u2014": "---", # em dash + "\u2013": "--", # en dash + "\u2018": "`", # left single quote + "\u2019": "'", # right single quote + "\u201c": "``", # left double quote + "\u201d": "''", # right double quote + "\u2026": r"\ldots{}", # ellipsis + "\u00e9": r"\'e", + "\u00e8": r"\`e", + "\u00ea": r"\^e", + "\u00e0": r"\`a", + "\u00e2": r"\^a", + "\u00f3": r"\'o", + "\u00ed": r"\'{\i}", +}) + + +def esc(text: str) -> str: + """Escape special LaTeX characters in a string.""" + return text.translate(_LATEX_TRANS) + + +# ══════════════════════════════════════════════════════════════════════════════ +# Document element types +# ══════════════════════════════════════════════════════════════════════════════ + +@dataclass +class TitlePage: + lines: list + + +@dataclass +class BookHeader: + """One or more heading lines that introduce a new book/section.""" + lines: list # list of str + + +@dataclass +class Chapter: + num: int + subtitle: Optional[str] = None + + +@dataclass +class SectionHeading: + """Short heading within a chapter (e.g. MARRIAGE, BAPTISM).""" + text: str + + +@dataclass +class Verse: + num: int + text: str + + +@dataclass +class Paragraph: + text: str + + +# ══════════════════════════════════════════════════════════════════════════════ +# Parser +# ══════════════════════════════════════════════════════════════════════════════ + +_RE_VERSE = re.compile(r"^\s*(\d+)\s+(.*)") +_RE_CHAPTER = re.compile(r"^\s*CHAPTER\s+(\d+)\s*$", re.IGNORECASE) +_RE_DIVIDER = re.compile(r"^_{4,}") + +# Lines longer than this are treated as body paragraphs rather than headings +MAX_HEADING_LEN = 120 + + +def _is_verse(line: str) -> bool: + """Line starts with a verse number followed by text.""" + m = _RE_VERSE.match(line) + return bool(m) and int(m.group(1)) > 0 + + +def _is_chapter(line: str) -> bool: + return bool(_RE_CHAPTER.match(line.strip())) + + +def _is_divider(line: str) -> bool: + return bool(_RE_DIVIDER.match(line.strip())) + + +def _is_allcaps(line: str) -> bool: + s = line.strip() + return bool(s) and s == s.upper() and any(c.isalpha() for c in s) + + +def parse(text: str) -> list: + """Parse the scripture text into a list of Element objects.""" + lines = text.splitlines() + elements = [] + n = len(lines) + i = 0 + + # ── Title page: short lines before the first divider ────────────────────── + # Short lines (≤80 chars) are the actual title. Long prose before the first + # divider is ignored so it does not duplicate the later labeled Introduction. + title_lines = [] + while i < n and not _is_divider(lines[i]): + title_lines.append(lines[i]) + i += 1 + actual_title = [] + for l in title_lines: + s = l.strip() + if not s: + continue + if len(s) <= 80: + actual_title.append(s) + if actual_title: + elements.append(TitlePage(lines=actual_title)) + + # ── Main pass ───────────────────────────────────────────────────────────── + after_divider = False + + while i < n: + raw = lines[i] + line = raw.strip() + + # ── Divider ─────────────────────────────────────────────────────────── + if _is_divider(raw): + after_divider = True + i += 1 + continue + + # ── Blank line ──────────────────────────────────────────────────────── + if not line: + i += 1 + continue + + # ── After a divider: collect section/book header ─────────────────── + # Collect all short non-verse non-chapter lines immediately following + # the divider. Stop as soon as we hit a long prose line or body content. + if after_divider: + after_divider = False + header_lines = [] + j = i + while j < n: + s = lines[j].strip() + if not s: # blank: keep scanning + j += 1 + continue + if _is_verse(lines[j]) or _is_chapter(lines[j]): + break # reached verse/chapter body + if len(s) > MAX_HEADING_LEN: + break # long prose line: stop here + header_lines.append(s) + j += 1 + if header_lines: + elements.append(BookHeader(lines=header_lines)) + i = j + continue + + # ── Chapter heading ──────────────────────────────────────────────── + m = _RE_CHAPTER.match(line) + if m: + num = int(m.group(1)) + # Look ahead for an optional subtitle (short non-verse line) + j = i + 1 + subtitle = None + while j < n and not lines[j].strip(): + j += 1 + if j < n: + ns = lines[j].strip() + if (ns + and not _is_verse(lines[j]) + and not _is_chapter(lines[j]) + and not _is_divider(lines[j]) + and len(ns) <= MAX_HEADING_LEN): + subtitle = ns + i = j + 1 + else: + i += 1 + else: + i += 1 + elements.append(Chapter(num=num, subtitle=subtitle)) + continue + + # ── All-caps lines: either a BookHeader cluster or a SectionHeading ─ + # If the cluster of consecutive all-caps lines is followed (after any + # blanks) by a CHAPTER heading, treat the whole cluster as a BookHeader. + # Otherwise treat only the first line as a SectionHeading. + if _is_allcaps(line) and len(line) <= MAX_HEADING_LEN and not _is_verse(raw): + # Gather consecutive all-caps lines (blanks skipped) + j = i + caps_block = [] + while j < n: + s = lines[j].strip() + if not s: + j += 1 + continue + if (_is_allcaps(s) + and len(s) <= MAX_HEADING_LEN + and not _is_verse(lines[j]) + and not _is_chapter(lines[j]) + and not _is_divider(lines[j])): + caps_block.append(s) + j += 1 + else: + break + # Look past any blanks to see if a chapter heading follows + k = j + while k < n and not lines[k].strip(): + k += 1 + if k < n and _is_chapter(lines[k]): + # This cluster is a book/section header + elements.append(BookHeader(lines=caps_block)) + i = j + else: + # Single inline section subheading (MARRIAGE, BAPTISM, etc.) + elements.append(SectionHeading(text=caps_block[0] if caps_block else line)) + i = i + 1 + continue + + # ── Verse ───────────────────────────────────────────────────────── + if _is_verse(raw): + mfull = _RE_VERSE.match(raw) + elements.append(Verse(num=int(mfull.group(1)), text=mfull.group(2).strip())) + i += 1 + continue + + # ── Paragraph ───────────────────────────────────────────────────── + elements.append(Paragraph(text=line)) + i += 1 + + return elements + + +# ══════════════════════════════════════════════════════════════════════════════ +# LaTeX generation +# ══════════════════════════════════════════════════════════════════════════════ + +_PREAMBLE_SHARED = r""" +\usepackage[T1]{fontenc} +\usepackage[utf8]{inputenc} +\usepackage{tgpagella} +\usepackage{microtype} +\usepackage{fancyhdr} +\usepackage{needspace} +\setlength{\headheight}{14pt} +\addtolength{\topmargin}{-2pt} +\usepackage[hidelinks]{hyperref} +""" + + +def _hrule() -> str: + return r"\noindent\rule{\linewidth}{0.3pt}" + + +# ── Kindle (single-column, e-reader sized) ──────────────────────────────────── + +def build_kindle_latex(elements: list) -> str: + """Build a single-column LaTeX document sized for e-readers.""" + out = [] + # extarticle (from extsizes) gives us 11pt; plain article also supports it + out.append(r"\documentclass[11pt]{extarticle}") + out.append(r""" +\usepackage[paperwidth=4.5in,paperheight=6.5in, + top=0.08in,bottom=0.5in, + inner=0.42in,outer=0.38in, + headheight=12pt,headsep=6pt, + includehead]{geometry}""") + out.append(_PREAMBLE_SHARED) + out.append(r""" +\pagestyle{fancy} +\fancyhf{} +\fancyhead[C]{\small\itshape\nouppercase{\leftmark}} +\fancyfoot[C]{\small\thepage} +\renewcommand{\headrulewidth}{0.3pt} + +\setlength{\parindent}{0pt} +\setlength{\parskip}{3pt plus 1pt minus 1pt} + +\begin{document} +""") + # Handle title page separately so we can insert TOC after it + title_els = [e for e in elements if isinstance(e, TitlePage)] + body_els = [e for e in elements if not isinstance(e, TitlePage)] + if title_els: + out.append(r"\clearpage") + out.append(r"\thispagestyle{empty}") + out.append(r"\vspace*{1.3in}") + out.append(r"\begin{center}") + for j, tl in enumerate(title_els[0].lines): + s = tl.strip() + if not s: + continue + if j < 3: + out.append(r"{\LARGE\bfseries " + esc(s) + r"} \\[8pt]") + else: + out.append(r"{\large " + esc(s) + r"} \\[4pt]") + out.append(r"\end{center}") + out.append(r"\clearpage") + out.append(r"\renewcommand{\contentsname}{Table of Contents}") + out.append(r"\tableofcontents") + out.append(r"\clearpage") + _emit_elements(out, body_els, kindle=True) + out.append(r"\end{document}") + return "\n".join(out) + + +# ── Paper / BOM style (two-column) ──────────────────────────────────────────── + +def build_paper_latex(elements: list) -> str: + """Build a two-column, Book of Mormon-style LaTeX document.""" + out = [] + # extarticle (from extsizes) for 9pt support + out.append(r"\documentclass[9pt,twoside]{extarticle}") + out.append(r""" +\usepackage[paperwidth=5.5in,paperheight=8.5in, + top=0.08in,bottom=0.55in, + inner=0.5in,outer=0.42in, + headheight=10pt,headsep=5pt, + includehead]{geometry}""") + out.append(_PREAMBLE_SHARED) + out.append(r""" +\usepackage{multicol} +\setlength{\columnsep}{0.22in} +\setlength{\columnseprule}{0.3pt} + +\pagestyle{fancy} +\fancyhf{} +\fancyhead[LE]{\footnotesize\itshape\nouppercase{\leftmark}} +\fancyhead[RO]{\footnotesize\itshape\nouppercase{\rightmark}} +\fancyfoot[C]{\scriptsize\thepage} +\renewcommand{\headrulewidth}{0.3pt} + +\setlength{\parindent}{0pt} +\setlength{\parskip}{1pt} + +\begin{document} +""") + + # Emit the title page outside multicols (single-column block) + title_els = [e for e in elements if isinstance(e, TitlePage)] + body_els = [e for e in elements if not isinstance(e, TitlePage)] + + if title_els: + out.append(r"\begin{center}") + for j, tl in enumerate(title_els[0].lines): + s = tl.strip() + if not s: + continue + if j < 3: + out.append(r"{\large\bfseries " + esc(s) + r"} \\[3pt]") + else: + out.append(r"{\small " + esc(s) + r"} \\[1pt]") + out.append(r"\end{center}") + out.append(r"\medskip") + + out.append(r"\renewcommand{\contentsname}{Table of Contents}") + out.append(r"\tableofcontents") + out.append(r"\clearpage") + + # Skip any leading front-matter paragraphs before the first section header. + # For paper output, the intro should begin at the labeled "Introduction" + # section rather than repeating the pre-divider prose block. + first_section = next( + (i for i, el in enumerate(body_els) if isinstance(el, BookHeader)), + len(body_els), + ) + paper_body_els = body_els[first_section:] + + # Split intro (before first real book) from main body. + # A "real book" is a BookHeader that is followed by at least one Chapter + # before the next BookHeader. "Introduction" and similar preamble sections + # are BookHeaders too but have no chapters, so they stay in the intro. + first_book = len(paper_body_els) + for i, el in enumerate(paper_body_els): + if isinstance(el, BookHeader): + # Check if a Chapter follows before the next BookHeader + for j in range(i + 1, len(paper_body_els)): + if isinstance(paper_body_els[j], Chapter): + first_book = i + break + if isinstance(paper_body_els[j], BookHeader): + break + if first_book < len(paper_body_els): + break + intro_els = paper_body_els[:first_book] + main_els = paper_body_els[first_book:] + + if intro_els: + _emit_elements(out, intro_els, kindle=True, compact_headers=True) + out.append(r"\clearpage") + + out.append(r"\begin{multicols}{2}") + _emit_elements(out, main_els, kindle=False) + out.append(r"\end{multicols}") + out.append(r"\end{document}") + return "\n".join(out) + + +# ── Body emitter ────────────────────────────────────────────────────────────── + +def _emit_elements( + out: list, + elements: list, + kindle: bool, + indent: bool = False, + compact_headers: bool = False, +) -> None: + """Translate parsed Element objects into LaTeX markup.""" + + for el in elements: + + # ── Title page (kindle only; paper handles it before multicols) ────── + if isinstance(el, TitlePage): + if kindle: + out.append(r"\clearpage") + out.append(r"\thispagestyle{empty}") + out.append(r"\vspace*{1.3in}") + out.append(r"\begin{center}") + for j, tl in enumerate(el.lines): + s = tl.strip() + if not s: + continue + if j < 3: + out.append(r"{\LARGE\bfseries " + esc(s) + r"} \\[8pt]") + else: + out.append(r"{\large " + esc(s) + r"} \\[4pt]") + out.append(r"\end{center}") + out.append(r"\clearpage") + + # ── Book / section header ──────────────────────────────────────────── + elif isinstance(el, BookHeader): + lines = el.lines + + if kindle: + # Start a new page for each major book + out.append(r"\clearpage") + out.append(r"\phantomsection\addcontentsline{toc}{section}{" + esc(lines[0]) + r"}") + out.append(r"\vspace*{0pt}" if compact_headers else r"\vspace*{0.1in}") + out.append(r"\begin{center}") + out.append(_hrule()) + out.append(r"\\[6pt]") + out.append(r"{\bfseries\large " + esc(lines[0]) + r"}") + for ln in lines[1:]: + out.append(r"\\ [3pt]{\normalsize\itshape " + esc(ln) + r"}") + out.append(r"\\[6pt]") + out.append(_hrule()) + out.append(r"\end{center}") + out.append(r"\markboth{" + esc(lines[0]) + r"}{" + esc(lines[0]) + r"}") + out.append(r"\vspace{5pt}") + + else: + # Inline heading within the two-column flow + # Refuse to start a new book in the bottom half of a column + out.append(r"\needspace{0.5\textheight}") + out.append(r"\phantomsection\addcontentsline{toc}{section}{" + esc(lines[0]) + r"}") + out.append(r"\begin{center}") + out.append(_hrule()) + out.append(r"\\[2pt]") + out.append(r"{\bfseries " + esc(lines[0]) + r"}") + for ln in lines[1:]: + out.append(r"\\ {\small\itshape " + esc(ln) + r"}") + out.append(r"\\[2pt]") + out.append(_hrule()) + out.append(r"\end{center}") + out.append(r"\markboth{" + esc(lines[0]) + r"}{" + esc(lines[0]) + r"}") + out.append(r"\vspace{2pt}") + + # ── Chapter heading ────────────────────────────────────────────────── + elif isinstance(el, Chapter): + label = f"CHAPTER {el.num}" + + if kindle: + out.append(r"\phantomsection\addcontentsline{toc}{subsection}{" + esc(label) + r"}") + out.append(r"\needspace{4\baselineskip}") + out.append(r"\vspace{14pt}") + out.append(r"\begin{center}") + out.append(r"{\bfseries\large " + esc(label) + r"}") + if el.subtitle: + out.append(r"\\ [3pt]{\normalsize\itshape " + esc(el.subtitle) + r"}") + out.append(r"\end{center}") + out.append(r"\markright{" + esc(label) + r"}") + out.append(r"\vspace{6pt}") + + else: + out.append(r"\phantomsection\addcontentsline{toc}{subsection}{" + esc(label) + r"}") + out.append(r"\needspace{2\baselineskip}") + out.append(r"\vspace{3pt}") + out.append(r"\begin{center}") + out.append(r"{\bfseries " + esc(label) + r"}") + if el.subtitle: + out.append(r"\\ {\small\itshape " + esc(el.subtitle) + r"}") + out.append(r"\end{center}") + out.append(r"\markright{" + esc(label) + r"}") + out.append(r"\vspace{1pt}") + + # ── Section subheading (MARRIAGE, BAPTISM, etc.) ──────────────────── + elif isinstance(el, SectionHeading): + if kindle: + out.append(r"\vspace{8pt}") + out.append(r"\begin{center}{\bfseries " + esc(el.text) + r"}\end{center}") + out.append(r"\vspace{4pt}") + else: + out.append(r"\vspace{3pt}") + out.append( + r"\begin{center}{\bfseries\small " + esc(el.text) + r"}\end{center}" + ) + out.append(r"\vspace{1pt}") + + # ── Verse ──────────────────────────────────────────────────────────── + elif isinstance(el, Verse): + body = esc(el.text) + if kindle: + # Bold inline number (not superscript) for readability on screen + vnum = r"\textbf{" + str(el.num) + r"}" + out.append(r"\noindent " + vnum + r"~" + body) + out.append(r"\par\smallskip") + else: + vnum = r"\textbf{" + str(el.num) + r"}" + out.append(r"\noindent " + vnum + r"~" + body + r"\par") + + # ── Paragraph (prose intro, commentary, etc.) ─────────────────────── + elif isinstance(el, Paragraph): + body = esc(el.text) + if kindle: + out.append(r"\noindent " + body) + out.append(r"\par\smallskip") + elif indent: + out.append(body + r"\par\medskip") + else: + out.append(r"\noindent " + body + r"\par") + + +# ══════════════════════════════════════════════════════════════════════════════ +# Utility: book limiter +# ══════════════════════════════════════════════════════════════════════════════ + +def truncate_to_books(elements: list, max_books: int) -> list: + """Return only the first *max_books* BookHeader sections (and their content). + Title-page and front-matter paragraphs before the first BookHeader are always kept. + """ + if max_books <= 0: + return elements + count = 0 + result = [] + for el in elements: + if isinstance(el, BookHeader): + count += 1 + if count > max_books: + break + result.append(el) + return result + + +# ══════════════════════════════════════════════════════════════════════════════ +# PDF compilation +# ══════════════════════════════════════════════════════════════════════════════ + +def _find_compiler() -> tuple: + """Return (compiler_path, compiler_type) or (None, None) if none found.""" + import shutil + # Also probe common absolute paths in case the dir isn't on $PATH + candidates = { + "pdflatex": ["/usr/bin/pdflatex", "/usr/local/bin/pdflatex"], + "tectonic": ["/usr/bin/tectonic", "/usr/local/bin/tectonic"], + } + for cmd, extra_paths in candidates.items(): + found = shutil.which(cmd) + if found: + return found, cmd + for p in extra_paths: + if Path(p).exists(): + return p, cmd + return None, None + + +def compile_pdf(tex_src: str, output_pdf: Path, + keep_tex: bool = False, + compiler_path: str = "/usr/bin/pdflatex", + compiler_type: str = "pdflatex") -> bool: + """ + Write *tex_src* into a temp directory, run the LaTeX compiler, and copy + the resulting PDF to *output_pdf*. Supports ``pdflatex`` and ``tectonic``. + Returns True on success. + """ + with tempfile.TemporaryDirectory() as tmp: + tmp_path = Path(tmp) + tex_file = tmp_path / "document.tex" + tex_file.write_text(tex_src, encoding="utf-8") + + if compiler_type == "tectonic": + # Tectonic compiles in one pass and downloads missing packages. + passes = 1 + cmd_base = [compiler_path, "document.tex"] + else: + # pdflatex needs two passes to get page headers right. + passes = 2 + cmd_base = [compiler_path, "-interaction=nonstopmode", + "-halt-on-error", "document.tex"] + + for pass_num in range(1, passes + 1): + result = subprocess.run( + cmd_base, cwd=tmp, capture_output=True, text=True, + ) + if result.returncode != 0: + print(f" [compiler error on pass {pass_num}]", file=sys.stderr) + print(result.stdout[-3000:], file=sys.stderr) + if result.stderr: + print(result.stderr[-1000:], file=sys.stderr) + if keep_tex: + dest = output_pdf.with_suffix(".tex") + dest.write_text(tex_src, encoding="utf-8") + print(f" TeX source saved to: {dest}", file=sys.stderr) + return False + + pdf_out = tmp_path / "document.pdf" + if pdf_out.exists(): + output_pdf.parent.mkdir(parents=True, exist_ok=True) + output_pdf.write_bytes(pdf_out.read_bytes()) + if keep_tex: + dest = output_pdf.with_suffix(".tex") + dest.write_text(tex_src, encoding="utf-8") + return True + + print(" [compiler ran but document.pdf was not produced]", file=sys.stderr) + return False + + +# ══════════════════════════════════════════════════════════════════════════════ +# Main +# ══════════════════════════════════════════════════════════════════════════════ + +_INSTALL_INSTRUCTIONS = """ +No LaTeX compiler found. Install one of the following: + + Arch / CachyOS / Manjaro: + sudo pacman -S texlive-basic texlive-latex texlive-latexrecommended \\ + texlive-latexextra texlive-fontsrecommended + + Debian / Ubuntu: + sudo apt-get install texlive-latex-extra texlive-fonts-recommended + + --- OR --- (self-contained, downloads packages on first use) + sudo pacman -S tectonic + # or: cargo install tectonic +""" + + +def main(): + parser = argparse.ArgumentParser( + description="Generate scripture-style PDFs from the Book of the Nem text.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + parser.add_argument( + "--input", type=Path, default=INPUT_FILE, + help=f"Input plain-text file (default: {INPUT_FILE})", + ) + parser.add_argument( + "--output-dir", type=Path, default=OUTPUT_DIR, + help=f"Output directory (default: {OUTPUT_DIR})", + ) + parser.add_argument( + "--kindle-only", action="store_true", + help="Generate only the Kindle (single-column) PDF.", + ) + parser.add_argument( + "--paper-only", action="store_true", + help="Generate only the paper (two-column) PDF.", + ) + parser.add_argument( + "--keep-tex", action="store_true", + help="Save the intermediate .tex files alongside each PDF.", + ) + parser.add_argument( + "--max-books", type=int, default=0, metavar="N", + help="Limit output to the first N book sections (0 = no limit).", + ) + parser.add_argument( + "--tex-only", action="store_true", + help="Write .tex files only — do not attempt PDF compilation. " + "Useful when a LaTeX compiler is not available.", + ) + args = parser.parse_args() + + src_path: Path = args.input + if not src_path.exists(): + sys.exit(f"ERROR: Input file not found: {src_path}") + + print(f"Reading: {src_path}") + text = src_path.read_text(encoding="utf-8", errors="replace") + + elements = parse(text) + if args.max_books > 0: + elements = truncate_to_books(elements, args.max_books) + print(f" Limiting to first {args.max_books} book(s).") + books = sum(1 for e in elements if isinstance(e, BookHeader)) + chapters = sum(1 for e in elements if isinstance(e, Chapter)) + verses = sum(1 for e in elements if isinstance(e, Verse)) + print(f" Parsed: {books} books/sections, {chapters} chapters, {verses} verses") + + out_dir: Path = args.output_dir + out_dir.mkdir(parents=True, exist_ok=True) + + # Locate compiler (unless --tex-only) + compiler_path, compiler_type = None, None + if not args.tex_only: + compiler_path, compiler_type = _find_compiler() + if not compiler_path: + print(_INSTALL_INSTRUCTIONS, file=sys.stderr) + print("Falling back to --tex-only mode: .tex files will be written " + "but not compiled.", file=sys.stderr) + args.tex_only = True + else: + print(f" Using compiler: {compiler_path}") + + def _write_or_compile(tex: str, pdf_path: Path, label: str): + if args.tex_only or args.keep_tex: + tex_path = pdf_path.with_suffix(".tex") + tex_path.write_text(tex, encoding="utf-8") + print(f" ✓ TeX saved: {tex_path}") + if args.tex_only: + return + print(f" Compiling {label} PDF …") + ok = compile_pdf(tex, pdf_path, keep_tex=args.keep_tex, + compiler_path=compiler_path, + compiler_type=compiler_type) + if ok: + print(f" ✓ {pdf_path}") + else: + print(f" ✗ {label} PDF failed — see errors above.") + + # ── Kindle PDF ──────────────────────────────────────────────────────────── + if not args.paper_only: + print(f"\nKindle PDF (single-column, 4.5\"×6.5\") …") + tex = build_kindle_latex(elements) + _write_or_compile(tex, out_dir / "nem_phone.pdf", "Kindle") + + # ── Paper / BOM-style PDF ──────────────────────────────────────────────── + if not args.kindle_only: + print(f"\nPaper PDF (two-column BOM style, 5.5\"×8.5\") …") + tex = build_paper_latex(elements) + _write_or_compile(tex, out_dir / "nem_paper.pdf", "Paper") + + +if __name__ == "__main__": + main()