#!/usr/bin/env python3 """ create_scripture_pdf.py ════════════════════════ Convert the Book of the Nem plain-text file into two scripture-style PDFs: nem_kindle.pdf – single-column, sized for e-readers (4.5" × 6.5") nem_paper.pdf – two-column, Book of Mormon style (5.5" × 8.5") Requirements (Debian/Ubuntu): sudo apt-get install texlive-latex-extra texlive-fonts-recommended The key packages used are: extsizes – for 9 pt document class (paper format) tgpagella – TeX Gyre Pagella (Palatino-clone) font multicol – two-column layout without hard page breaks microtype – improved text justification and hyphenation fancyhdr – running headers and footers needspace – prevent orphaned headings Usage: python create_scripture_pdf.py python create_scripture_pdf.py --input "Audio Master Nem Full.txt" python create_scripture_pdf.py --kindle-only python create_scripture_pdf.py --paper-only python create_scripture_pdf.py --output-dir ./pdfs python create_scripture_pdf.py --keep-tex # keep .tex files for debugging """ import argparse import re import subprocess import sys import tempfile from dataclasses import dataclass from pathlib import Path from typing import Optional # ── Default paths ────────────────────────────────────────────────────────────── INPUT_FILE = Path("Audio Master Nem Full.txt") OUTPUT_DIR = Path("output_pdf") # ══════════════════════════════════════════════════════════════════════════════ # LaTeX helper # ══════════════════════════════════════════════════════════════════════════════ _LATEX_TRANS = str.maketrans({ "\\": r"\textbackslash{}", "&": r"\&", "%": r"\%", "$": r"\$", "#": r"\#", "_": r"\_", "{": r"\{", "}": r"\}", "~": r"\textasciitilde{}", "^": r"\textasciicircum{}", "\u2014": "---", # em dash "\u2013": "--", # en dash "\u2018": "`", # left single quote "\u2019": "'", # right single quote "\u201c": "``", # left double quote "\u201d": "''", # right double quote "\u2026": r"\ldots{}", # ellipsis "\u00e9": r"\'e", "\u00e8": r"\`e", "\u00ea": r"\^e", "\u00e0": r"\`a", "\u00e2": r"\^a", "\u00f3": r"\'o", "\u00ed": r"\'{\i}", }) def esc(text: str) -> str: """Escape special LaTeX characters in a string.""" return text.translate(_LATEX_TRANS) # ══════════════════════════════════════════════════════════════════════════════ # Document element types # ══════════════════════════════════════════════════════════════════════════════ @dataclass class TitlePage: lines: list @dataclass class BookHeader: """One or more heading lines that introduce a new book/section.""" lines: list # list of str @dataclass class Chapter: num: int subtitle: Optional[str] = None @dataclass class SectionHeading: """Short heading within a chapter (e.g. MARRIAGE, BAPTISM).""" text: str @dataclass class Verse: num: int text: str @dataclass class Paragraph: text: str # ══════════════════════════════════════════════════════════════════════════════ # Parser # ══════════════════════════════════════════════════════════════════════════════ _RE_VERSE = re.compile(r"^\s*(\d+)\s+(.*)") _RE_CHAPTER = re.compile(r"^\s*CHAPTER\s+(\d+)\s*$", re.IGNORECASE) _RE_DIVIDER = re.compile(r"^_{4,}") # Lines longer than this are treated as body paragraphs rather than headings MAX_HEADING_LEN = 120 def _is_verse(line: str) -> bool: """Line starts with a verse number followed by text.""" m = _RE_VERSE.match(line) return bool(m) and int(m.group(1)) > 0 def _is_chapter(line: str) -> bool: return bool(_RE_CHAPTER.match(line.strip())) def _is_divider(line: str) -> bool: return bool(_RE_DIVIDER.match(line.strip())) def _is_allcaps(line: str) -> bool: s = line.strip() return bool(s) and s == s.upper() and any(c.isalpha() for c in s) def parse(text: str) -> list: """Parse the scripture text into a list of Element objects.""" lines = text.splitlines() elements = [] n = len(lines) i = 0 # ── Title page: short lines before the first divider ────────────────────── # Short lines (≤80 chars) are the actual title. Long prose before the first # divider is ignored so it does not duplicate the later labeled Introduction. title_lines = [] while i < n and not _is_divider(lines[i]): title_lines.append(lines[i]) i += 1 actual_title = [] for l in title_lines: s = l.strip() if not s: continue if len(s) <= 80: actual_title.append(s) if actual_title: elements.append(TitlePage(lines=actual_title)) # ── Main pass ───────────────────────────────────────────────────────────── after_divider = False while i < n: raw = lines[i] line = raw.strip() # ── Divider ─────────────────────────────────────────────────────────── if _is_divider(raw): after_divider = True i += 1 continue # ── Blank line ──────────────────────────────────────────────────────── if not line: i += 1 continue # ── After a divider: collect section/book header ─────────────────── # Collect all short non-verse non-chapter lines immediately following # the divider. Stop as soon as we hit a long prose line or body content. if after_divider: after_divider = False header_lines = [] j = i while j < n: s = lines[j].strip() if not s: # blank: keep scanning j += 1 continue if _is_verse(lines[j]) or _is_chapter(lines[j]): break # reached verse/chapter body if len(s) > MAX_HEADING_LEN: break # long prose line: stop here header_lines.append(s) j += 1 if header_lines: elements.append(BookHeader(lines=header_lines)) i = j continue # ── Chapter heading ──────────────────────────────────────────────── m = _RE_CHAPTER.match(line) if m: num = int(m.group(1)) # Look ahead for an optional subtitle (short non-verse line) j = i + 1 subtitle = None while j < n and not lines[j].strip(): j += 1 if j < n: ns = lines[j].strip() if (ns and not _is_verse(lines[j]) and not _is_chapter(lines[j]) and not _is_divider(lines[j]) and len(ns) <= MAX_HEADING_LEN): subtitle = ns i = j + 1 else: i += 1 else: i += 1 elements.append(Chapter(num=num, subtitle=subtitle)) continue # ── All-caps lines: either a BookHeader cluster or a SectionHeading ─ # If the cluster of consecutive all-caps lines is followed (after any # blanks) by a CHAPTER heading, treat the whole cluster as a BookHeader. # Otherwise treat only the first line as a SectionHeading. if _is_allcaps(line) and len(line) <= MAX_HEADING_LEN and not _is_verse(raw): # Gather consecutive all-caps lines (blanks skipped) j = i caps_block = [] while j < n: s = lines[j].strip() if not s: j += 1 continue if (_is_allcaps(s) and len(s) <= MAX_HEADING_LEN and not _is_verse(lines[j]) and not _is_chapter(lines[j]) and not _is_divider(lines[j])): caps_block.append(s) j += 1 else: break # Look past any blanks to see if a chapter heading follows k = j while k < n and not lines[k].strip(): k += 1 if k < n and _is_chapter(lines[k]): # This cluster is a book/section header elements.append(BookHeader(lines=caps_block)) i = j else: # Single inline section subheading (MARRIAGE, BAPTISM, etc.) elements.append(SectionHeading(text=caps_block[0] if caps_block else line)) i = i + 1 continue # ── Verse ───────────────────────────────────────────────────────── if _is_verse(raw): mfull = _RE_VERSE.match(raw) elements.append(Verse(num=int(mfull.group(1)), text=mfull.group(2).strip())) i += 1 continue # ── Paragraph ───────────────────────────────────────────────────── elements.append(Paragraph(text=line)) i += 1 return elements # ══════════════════════════════════════════════════════════════════════════════ # LaTeX generation # ══════════════════════════════════════════════════════════════════════════════ _PREAMBLE_SHARED = r""" \usepackage[T1]{fontenc} \usepackage[utf8]{inputenc} \usepackage{tgpagella} \usepackage{microtype} \usepackage{fancyhdr} \usepackage{needspace} \setlength{\headheight}{14pt} \addtolength{\topmargin}{-2pt} \usepackage[hidelinks]{hyperref} """ def _hrule() -> str: return r"\noindent\rule{\linewidth}{0.3pt}" # ── Kindle (single-column, e-reader sized) ──────────────────────────────────── def build_kindle_latex(elements: list) -> str: """Build a single-column LaTeX document sized for e-readers.""" out = [] # extarticle (from extsizes) gives us 11pt; plain article also supports it out.append(r"\documentclass[11pt]{extarticle}") out.append(r""" \usepackage[paperwidth=4.5in,paperheight=6.5in, top=0.08in,bottom=0.5in, inner=0.42in,outer=0.38in, headheight=12pt,headsep=6pt, includehead]{geometry}""") out.append(_PREAMBLE_SHARED) out.append(r""" \pagestyle{fancy} \fancyhf{} \fancyhead[C]{\small\itshape\nouppercase{\leftmark}} \fancyfoot[C]{\small\thepage} \renewcommand{\headrulewidth}{0.3pt} \setlength{\parindent}{0pt} \setlength{\parskip}{3pt plus 1pt minus 1pt} \begin{document} """) # Handle title page separately so we can insert TOC after it title_els = [e for e in elements if isinstance(e, TitlePage)] body_els = [e for e in elements if not isinstance(e, TitlePage)] if title_els: out.append(r"\clearpage") out.append(r"\thispagestyle{empty}") out.append(r"\vspace*{1.3in}") out.append(r"\begin{center}") for j, tl in enumerate(title_els[0].lines): s = tl.strip() if not s: continue if j < 3: out.append(r"{\LARGE\bfseries " + esc(s) + r"} \\[8pt]") else: out.append(r"{\large " + esc(s) + r"} \\[4pt]") out.append(r"\end{center}") out.append(r"\clearpage") out.append(r"\renewcommand{\contentsname}{Table of Contents}") out.append(r"\tableofcontents") out.append(r"\clearpage") _emit_elements(out, body_els, kindle=True) out.append(r"\end{document}") return "\n".join(out) # ── Paper / BOM style (two-column) ──────────────────────────────────────────── def build_paper_latex(elements: list) -> str: """Build a two-column, Book of Mormon-style LaTeX document.""" out = [] # extarticle (from extsizes) for 9pt support out.append(r"\documentclass[9pt,twoside]{extarticle}") out.append(r""" \usepackage[paperwidth=5.5in,paperheight=8.5in, top=0.08in,bottom=0.55in, inner=0.5in,outer=0.42in, headheight=10pt,headsep=5pt, includehead]{geometry}""") out.append(_PREAMBLE_SHARED) out.append(r""" \usepackage{multicol} \setlength{\columnsep}{0.22in} \setlength{\columnseprule}{0.3pt} \pagestyle{fancy} \fancyhf{} \fancyhead[LE]{\footnotesize\itshape\nouppercase{\leftmark}} \fancyhead[RO]{\footnotesize\itshape\nouppercase{\rightmark}} \fancyfoot[C]{\scriptsize\thepage} \renewcommand{\headrulewidth}{0.3pt} \setlength{\parindent}{0pt} \setlength{\parskip}{1pt} \begin{document} """) # Emit the title page outside multicols (single-column block) title_els = [e for e in elements if isinstance(e, TitlePage)] body_els = [e for e in elements if not isinstance(e, TitlePage)] if title_els: out.append(r"\begin{center}") for j, tl in enumerate(title_els[0].lines): s = tl.strip() if not s: continue if j < 3: out.append(r"{\large\bfseries " + esc(s) + r"} \\[3pt]") else: out.append(r"{\small " + esc(s) + r"} \\[1pt]") out.append(r"\end{center}") out.append(r"\medskip") out.append(r"\renewcommand{\contentsname}{Table of Contents}") out.append(r"\tableofcontents") out.append(r"\clearpage") # Skip any leading front-matter paragraphs before the first section header. # For paper output, the intro should begin at the labeled "Introduction" # section rather than repeating the pre-divider prose block. first_section = next( (i for i, el in enumerate(body_els) if isinstance(el, BookHeader)), len(body_els), ) paper_body_els = body_els[first_section:] # Split intro (before first real book) from main body. # A "real book" is a BookHeader that is followed by at least one Chapter # before the next BookHeader. "Introduction" and similar preamble sections # are BookHeaders too but have no chapters, so they stay in the intro. first_book = len(paper_body_els) for i, el in enumerate(paper_body_els): if isinstance(el, BookHeader): # Check if a Chapter follows before the next BookHeader for j in range(i + 1, len(paper_body_els)): if isinstance(paper_body_els[j], Chapter): first_book = i break if isinstance(paper_body_els[j], BookHeader): break if first_book < len(paper_body_els): break intro_els = paper_body_els[:first_book] main_els = paper_body_els[first_book:] if intro_els: _emit_elements(out, intro_els, kindle=True, compact_headers=True) out.append(r"\clearpage") out.append(r"\begin{multicols}{2}") _emit_elements(out, main_els, kindle=False) out.append(r"\end{multicols}") out.append(r"\end{document}") return "\n".join(out) # ── Body emitter ────────────────────────────────────────────────────────────── def _emit_elements( out: list, elements: list, kindle: bool, indent: bool = False, compact_headers: bool = False, ) -> None: """Translate parsed Element objects into LaTeX markup.""" for el in elements: # ── Title page (kindle only; paper handles it before multicols) ────── if isinstance(el, TitlePage): if kindle: out.append(r"\clearpage") out.append(r"\thispagestyle{empty}") out.append(r"\vspace*{1.3in}") out.append(r"\begin{center}") for j, tl in enumerate(el.lines): s = tl.strip() if not s: continue if j < 3: out.append(r"{\LARGE\bfseries " + esc(s) + r"} \\[8pt]") else: out.append(r"{\large " + esc(s) + r"} \\[4pt]") out.append(r"\end{center}") out.append(r"\clearpage") # ── Book / section header ──────────────────────────────────────────── elif isinstance(el, BookHeader): lines = el.lines if kindle: # Start a new page for each major book out.append(r"\clearpage") out.append(r"\phantomsection\addcontentsline{toc}{section}{" + esc(lines[0]) + r"}") out.append(r"\vspace*{0pt}" if compact_headers else r"\vspace*{0.1in}") out.append(r"\begin{center}") out.append(_hrule()) out.append(r"\\[6pt]") out.append(r"{\bfseries\large " + esc(lines[0]) + r"}") for ln in lines[1:]: out.append(r"\\ [3pt]{\normalsize\itshape " + esc(ln) + r"}") out.append(r"\\[6pt]") out.append(_hrule()) out.append(r"\end{center}") out.append(r"\markboth{" + esc(lines[0]) + r"}{" + esc(lines[0]) + r"}") out.append(r"\vspace{5pt}") else: # Inline heading within the two-column flow # Refuse to start a new book in the bottom half of a column out.append(r"\needspace{0.5\textheight}") out.append(r"\phantomsection\addcontentsline{toc}{section}{" + esc(lines[0]) + r"}") out.append(r"\begin{center}") out.append(_hrule()) out.append(r"\\[2pt]") out.append(r"{\bfseries " + esc(lines[0]) + r"}") for ln in lines[1:]: out.append(r"\\ {\small\itshape " + esc(ln) + r"}") out.append(r"\\[2pt]") out.append(_hrule()) out.append(r"\end{center}") out.append(r"\markboth{" + esc(lines[0]) + r"}{" + esc(lines[0]) + r"}") out.append(r"\vspace{2pt}") # ── Chapter heading ────────────────────────────────────────────────── elif isinstance(el, Chapter): label = f"CHAPTER {el.num}" if kindle: out.append(r"\phantomsection\addcontentsline{toc}{subsection}{" + esc(label) + r"}") out.append(r"\needspace{4\baselineskip}") out.append(r"\vspace{14pt}") out.append(r"\begin{center}") out.append(r"{\bfseries\large " + esc(label) + r"}") if el.subtitle: out.append(r"\\ [3pt]{\normalsize\itshape " + esc(el.subtitle) + r"}") out.append(r"\end{center}") out.append(r"\markright{" + esc(label) + r"}") out.append(r"\vspace{6pt}") else: out.append(r"\phantomsection\addcontentsline{toc}{subsection}{" + esc(label) + r"}") out.append(r"\needspace{2\baselineskip}") out.append(r"\vspace{3pt}") out.append(r"\begin{center}") out.append(r"{\bfseries " + esc(label) + r"}") if el.subtitle: out.append(r"\\ {\small\itshape " + esc(el.subtitle) + r"}") out.append(r"\end{center}") out.append(r"\markright{" + esc(label) + r"}") out.append(r"\vspace{1pt}") # ── Section subheading (MARRIAGE, BAPTISM, etc.) ──────────────────── elif isinstance(el, SectionHeading): if kindle: out.append(r"\vspace{8pt}") out.append(r"\begin{center}{\bfseries " + esc(el.text) + r"}\end{center}") out.append(r"\vspace{4pt}") else: out.append(r"\vspace{3pt}") out.append( r"\begin{center}{\bfseries\small " + esc(el.text) + r"}\end{center}" ) out.append(r"\vspace{1pt}") # ── Verse ──────────────────────────────────────────────────────────── elif isinstance(el, Verse): body = esc(el.text) if kindle: # Bold inline number (not superscript) for readability on screen vnum = r"\textbf{" + str(el.num) + r"}" out.append(r"\noindent " + vnum + r"~" + body) out.append(r"\par\smallskip") else: vnum = r"\textbf{" + str(el.num) + r"}" out.append(r"\noindent " + vnum + r"~" + body + r"\par") # ── Paragraph (prose intro, commentary, etc.) ─────────────────────── elif isinstance(el, Paragraph): body = esc(el.text) if kindle: out.append(r"\noindent " + body) out.append(r"\par\smallskip") elif indent: out.append(body + r"\par\medskip") else: out.append(r"\noindent " + body + r"\par") # ══════════════════════════════════════════════════════════════════════════════ # Utility: book limiter # ══════════════════════════════════════════════════════════════════════════════ def truncate_to_books(elements: list, max_books: int) -> list: """Return only the first *max_books* BookHeader sections (and their content). Title-page and front-matter paragraphs before the first BookHeader are always kept. """ if max_books <= 0: return elements count = 0 result = [] for el in elements: if isinstance(el, BookHeader): count += 1 if count > max_books: break result.append(el) return result # ══════════════════════════════════════════════════════════════════════════════ # PDF compilation # ══════════════════════════════════════════════════════════════════════════════ def _find_compiler() -> tuple: """Return (compiler_path, compiler_type) or (None, None) if none found.""" import shutil # Also probe common absolute paths in case the dir isn't on $PATH candidates = { "pdflatex": ["/usr/bin/pdflatex", "/usr/local/bin/pdflatex"], "tectonic": ["/usr/bin/tectonic", "/usr/local/bin/tectonic"], } for cmd, extra_paths in candidates.items(): found = shutil.which(cmd) if found: return found, cmd for p in extra_paths: if Path(p).exists(): return p, cmd return None, None def compile_pdf(tex_src: str, output_pdf: Path, keep_tex: bool = False, compiler_path: str = "/usr/bin/pdflatex", compiler_type: str = "pdflatex") -> bool: """ Write *tex_src* into a temp directory, run the LaTeX compiler, and copy the resulting PDF to *output_pdf*. Supports ``pdflatex`` and ``tectonic``. Returns True on success. """ with tempfile.TemporaryDirectory() as tmp: tmp_path = Path(tmp) tex_file = tmp_path / "document.tex" tex_file.write_text(tex_src, encoding="utf-8") if compiler_type == "tectonic": # Tectonic compiles in one pass and downloads missing packages. passes = 1 cmd_base = [compiler_path, "document.tex"] else: # pdflatex needs two passes to get page headers right. passes = 2 cmd_base = [compiler_path, "-interaction=nonstopmode", "-halt-on-error", "document.tex"] for pass_num in range(1, passes + 1): result = subprocess.run( cmd_base, cwd=tmp, capture_output=True, text=True, ) if result.returncode != 0: print(f" [compiler error on pass {pass_num}]", file=sys.stderr) print(result.stdout[-3000:], file=sys.stderr) if result.stderr: print(result.stderr[-1000:], file=sys.stderr) if keep_tex: dest = output_pdf.with_suffix(".tex") dest.write_text(tex_src, encoding="utf-8") print(f" TeX source saved to: {dest}", file=sys.stderr) return False pdf_out = tmp_path / "document.pdf" if pdf_out.exists(): output_pdf.parent.mkdir(parents=True, exist_ok=True) output_pdf.write_bytes(pdf_out.read_bytes()) if keep_tex: dest = output_pdf.with_suffix(".tex") dest.write_text(tex_src, encoding="utf-8") return True print(" [compiler ran but document.pdf was not produced]", file=sys.stderr) return False # ══════════════════════════════════════════════════════════════════════════════ # Main # ══════════════════════════════════════════════════════════════════════════════ _INSTALL_INSTRUCTIONS = """ No LaTeX compiler found. Install one of the following: Arch / CachyOS / Manjaro: sudo pacman -S texlive-basic texlive-latex texlive-latexrecommended \\ texlive-latexextra texlive-fontsrecommended Debian / Ubuntu: sudo apt-get install texlive-latex-extra texlive-fonts-recommended --- OR --- (self-contained, downloads packages on first use) sudo pacman -S tectonic # or: cargo install tectonic """ def main(): parser = argparse.ArgumentParser( description="Generate scripture-style PDFs from the Book of the Nem text.", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=__doc__, ) parser.add_argument( "--input", type=Path, default=INPUT_FILE, help=f"Input plain-text file (default: {INPUT_FILE})", ) parser.add_argument( "--output-dir", type=Path, default=OUTPUT_DIR, help=f"Output directory (default: {OUTPUT_DIR})", ) parser.add_argument( "--kindle-only", action="store_true", help="Generate only the Kindle (single-column) PDF.", ) parser.add_argument( "--paper-only", action="store_true", help="Generate only the paper (two-column) PDF.", ) parser.add_argument( "--keep-tex", action="store_true", help="Save the intermediate .tex files alongside each PDF.", ) parser.add_argument( "--max-books", type=int, default=0, metavar="N", help="Limit output to the first N book sections (0 = no limit).", ) parser.add_argument( "--tex-only", action="store_true", help="Write .tex files only — do not attempt PDF compilation. " "Useful when a LaTeX compiler is not available.", ) args = parser.parse_args() src_path: Path = args.input if not src_path.exists(): sys.exit(f"ERROR: Input file not found: {src_path}") print(f"Reading: {src_path}") text = src_path.read_text(encoding="utf-8", errors="replace") elements = parse(text) if args.max_books > 0: elements = truncate_to_books(elements, args.max_books) print(f" Limiting to first {args.max_books} book(s).") books = sum(1 for e in elements if isinstance(e, BookHeader)) chapters = sum(1 for e in elements if isinstance(e, Chapter)) verses = sum(1 for e in elements if isinstance(e, Verse)) print(f" Parsed: {books} books/sections, {chapters} chapters, {verses} verses") out_dir: Path = args.output_dir out_dir.mkdir(parents=True, exist_ok=True) # Locate compiler (unless --tex-only) compiler_path, compiler_type = None, None if not args.tex_only: compiler_path, compiler_type = _find_compiler() if not compiler_path: print(_INSTALL_INSTRUCTIONS, file=sys.stderr) print("Falling back to --tex-only mode: .tex files will be written " "but not compiled.", file=sys.stderr) args.tex_only = True else: print(f" Using compiler: {compiler_path}") def _write_or_compile(tex: str, pdf_path: Path, label: str): if args.tex_only or args.keep_tex: tex_path = pdf_path.with_suffix(".tex") tex_path.write_text(tex, encoding="utf-8") print(f" ✓ TeX saved: {tex_path}") if args.tex_only: return print(f" Compiling {label} PDF …") ok = compile_pdf(tex, pdf_path, keep_tex=args.keep_tex, compiler_path=compiler_path, compiler_type=compiler_type) if ok: print(f" ✓ {pdf_path}") else: print(f" ✗ {label} PDF failed — see errors above.") # ── Kindle PDF ──────────────────────────────────────────────────────────── if not args.paper_only: print(f"\nKindle PDF (single-column, 4.5\"×6.5\") …") tex = build_kindle_latex(elements) _write_or_compile(tex, out_dir / "nem_phone.pdf", "Kindle") # ── Paper / BOM-style PDF ──────────────────────────────────────────────── if not args.kindle_only: print(f"\nPaper PDF (two-column BOM style, 5.5\"×8.5\") …") tex = build_paper_latex(elements) _write_or_compile(tex, out_dir / "nem_paper.pdf", "Paper") if __name__ == "__main__": main()