format doc script
This commit is contained in:
4
.gitignore
vendored
4
.gitignore
vendored
@ -14,6 +14,10 @@ proper_nouns_audio/
|
|||||||
# Generated data (JSON files in output_proper_nouns/ are tracked)
|
# Generated data (JSON files in output_proper_nouns/ are tracked)
|
||||||
output_proper_nouns/remaining_review.txt
|
output_proper_nouns/remaining_review.txt
|
||||||
|
|
||||||
|
# Generated PDFs and LaTeX files
|
||||||
|
*.pdf
|
||||||
|
*.tex
|
||||||
|
|
||||||
# Text files (except proper_nouns.txt)
|
# Text files (except proper_nouns.txt)
|
||||||
*.txt
|
*.txt
|
||||||
!proper_nouns.txt
|
!proper_nouns.txt
|
||||||
|
|||||||
801
format_scripture.py
Normal file
801
format_scripture.py
Normal file
@ -0,0 +1,801 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
create_scripture_pdf.py
|
||||||
|
════════════════════════
|
||||||
|
Convert the Book of the Nem plain-text file into two scripture-style PDFs:
|
||||||
|
|
||||||
|
nem_kindle.pdf – single-column, sized for e-readers (4.5" × 6.5")
|
||||||
|
nem_paper.pdf – two-column, Book of Mormon style (5.5" × 8.5")
|
||||||
|
|
||||||
|
Requirements (Debian/Ubuntu):
|
||||||
|
sudo apt-get install texlive-latex-extra texlive-fonts-recommended
|
||||||
|
|
||||||
|
The key packages used are:
|
||||||
|
extsizes – for 9 pt document class (paper format)
|
||||||
|
tgpagella – TeX Gyre Pagella (Palatino-clone) font
|
||||||
|
multicol – two-column layout without hard page breaks
|
||||||
|
microtype – improved text justification and hyphenation
|
||||||
|
fancyhdr – running headers and footers
|
||||||
|
needspace – prevent orphaned headings
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python create_scripture_pdf.py
|
||||||
|
python create_scripture_pdf.py --input "Audio Master Nem Full.txt"
|
||||||
|
python create_scripture_pdf.py --kindle-only
|
||||||
|
python create_scripture_pdf.py --paper-only
|
||||||
|
python create_scripture_pdf.py --output-dir ./pdfs
|
||||||
|
python create_scripture_pdf.py --keep-tex # keep .tex files for debugging
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
# ── Default paths ──────────────────────────────────────────────────────────────
|
||||||
|
INPUT_FILE = Path("Audio Master Nem Full.txt")
|
||||||
|
OUTPUT_DIR = Path("output_pdf")
|
||||||
|
|
||||||
|
# ══════════════════════════════════════════════════════════════════════════════
|
||||||
|
# LaTeX helper
|
||||||
|
# ══════════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
_LATEX_TRANS = str.maketrans({
|
||||||
|
"\\": r"\textbackslash{}",
|
||||||
|
"&": r"\&",
|
||||||
|
"%": r"\%",
|
||||||
|
"$": r"\$",
|
||||||
|
"#": r"\#",
|
||||||
|
"_": r"\_",
|
||||||
|
"{": r"\{",
|
||||||
|
"}": r"\}",
|
||||||
|
"~": r"\textasciitilde{}",
|
||||||
|
"^": r"\textasciicircum{}",
|
||||||
|
"\u2014": "---", # em dash
|
||||||
|
"\u2013": "--", # en dash
|
||||||
|
"\u2018": "`", # left single quote
|
||||||
|
"\u2019": "'", # right single quote
|
||||||
|
"\u201c": "``", # left double quote
|
||||||
|
"\u201d": "''", # right double quote
|
||||||
|
"\u2026": r"\ldots{}", # ellipsis
|
||||||
|
"\u00e9": r"\'e",
|
||||||
|
"\u00e8": r"\`e",
|
||||||
|
"\u00ea": r"\^e",
|
||||||
|
"\u00e0": r"\`a",
|
||||||
|
"\u00e2": r"\^a",
|
||||||
|
"\u00f3": r"\'o",
|
||||||
|
"\u00ed": r"\'{\i}",
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
def esc(text: str) -> str:
|
||||||
|
"""Escape special LaTeX characters in a string."""
|
||||||
|
return text.translate(_LATEX_TRANS)
|
||||||
|
|
||||||
|
|
||||||
|
# ══════════════════════════════════════════════════════════════════════════════
|
||||||
|
# Document element types
|
||||||
|
# ══════════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TitlePage:
|
||||||
|
lines: list
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class BookHeader:
|
||||||
|
"""One or more heading lines that introduce a new book/section."""
|
||||||
|
lines: list # list of str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Chapter:
|
||||||
|
num: int
|
||||||
|
subtitle: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SectionHeading:
|
||||||
|
"""Short heading within a chapter (e.g. MARRIAGE, BAPTISM)."""
|
||||||
|
text: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Verse:
|
||||||
|
num: int
|
||||||
|
text: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Paragraph:
|
||||||
|
text: str
|
||||||
|
|
||||||
|
|
||||||
|
# ══════════════════════════════════════════════════════════════════════════════
|
||||||
|
# Parser
|
||||||
|
# ══════════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
_RE_VERSE = re.compile(r"^\s*(\d+)\s+(.*)")
|
||||||
|
_RE_CHAPTER = re.compile(r"^\s*CHAPTER\s+(\d+)\s*$", re.IGNORECASE)
|
||||||
|
_RE_DIVIDER = re.compile(r"^_{4,}")
|
||||||
|
|
||||||
|
# Lines longer than this are treated as body paragraphs rather than headings
|
||||||
|
MAX_HEADING_LEN = 120
|
||||||
|
|
||||||
|
|
||||||
|
def _is_verse(line: str) -> bool:
|
||||||
|
"""Line starts with a verse number followed by text."""
|
||||||
|
m = _RE_VERSE.match(line)
|
||||||
|
return bool(m) and int(m.group(1)) > 0
|
||||||
|
|
||||||
|
|
||||||
|
def _is_chapter(line: str) -> bool:
|
||||||
|
return bool(_RE_CHAPTER.match(line.strip()))
|
||||||
|
|
||||||
|
|
||||||
|
def _is_divider(line: str) -> bool:
|
||||||
|
return bool(_RE_DIVIDER.match(line.strip()))
|
||||||
|
|
||||||
|
|
||||||
|
def _is_allcaps(line: str) -> bool:
|
||||||
|
s = line.strip()
|
||||||
|
return bool(s) and s == s.upper() and any(c.isalpha() for c in s)
|
||||||
|
|
||||||
|
|
||||||
|
def parse(text: str) -> list:
|
||||||
|
"""Parse the scripture text into a list of Element objects."""
|
||||||
|
lines = text.splitlines()
|
||||||
|
elements = []
|
||||||
|
n = len(lines)
|
||||||
|
i = 0
|
||||||
|
|
||||||
|
# ── Title page: short lines before the first divider ──────────────────────
|
||||||
|
# Short lines (≤80 chars) are the actual title. Long prose before the first
|
||||||
|
# divider is ignored so it does not duplicate the later labeled Introduction.
|
||||||
|
title_lines = []
|
||||||
|
while i < n and not _is_divider(lines[i]):
|
||||||
|
title_lines.append(lines[i])
|
||||||
|
i += 1
|
||||||
|
actual_title = []
|
||||||
|
for l in title_lines:
|
||||||
|
s = l.strip()
|
||||||
|
if not s:
|
||||||
|
continue
|
||||||
|
if len(s) <= 80:
|
||||||
|
actual_title.append(s)
|
||||||
|
if actual_title:
|
||||||
|
elements.append(TitlePage(lines=actual_title))
|
||||||
|
|
||||||
|
# ── Main pass ─────────────────────────────────────────────────────────────
|
||||||
|
after_divider = False
|
||||||
|
|
||||||
|
while i < n:
|
||||||
|
raw = lines[i]
|
||||||
|
line = raw.strip()
|
||||||
|
|
||||||
|
# ── Divider ───────────────────────────────────────────────────────────
|
||||||
|
if _is_divider(raw):
|
||||||
|
after_divider = True
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# ── Blank line ────────────────────────────────────────────────────────
|
||||||
|
if not line:
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# ── After a divider: collect section/book header ───────────────────
|
||||||
|
# Collect all short non-verse non-chapter lines immediately following
|
||||||
|
# the divider. Stop as soon as we hit a long prose line or body content.
|
||||||
|
if after_divider:
|
||||||
|
after_divider = False
|
||||||
|
header_lines = []
|
||||||
|
j = i
|
||||||
|
while j < n:
|
||||||
|
s = lines[j].strip()
|
||||||
|
if not s: # blank: keep scanning
|
||||||
|
j += 1
|
||||||
|
continue
|
||||||
|
if _is_verse(lines[j]) or _is_chapter(lines[j]):
|
||||||
|
break # reached verse/chapter body
|
||||||
|
if len(s) > MAX_HEADING_LEN:
|
||||||
|
break # long prose line: stop here
|
||||||
|
header_lines.append(s)
|
||||||
|
j += 1
|
||||||
|
if header_lines:
|
||||||
|
elements.append(BookHeader(lines=header_lines))
|
||||||
|
i = j
|
||||||
|
continue
|
||||||
|
|
||||||
|
# ── Chapter heading ────────────────────────────────────────────────
|
||||||
|
m = _RE_CHAPTER.match(line)
|
||||||
|
if m:
|
||||||
|
num = int(m.group(1))
|
||||||
|
# Look ahead for an optional subtitle (short non-verse line)
|
||||||
|
j = i + 1
|
||||||
|
subtitle = None
|
||||||
|
while j < n and not lines[j].strip():
|
||||||
|
j += 1
|
||||||
|
if j < n:
|
||||||
|
ns = lines[j].strip()
|
||||||
|
if (ns
|
||||||
|
and not _is_verse(lines[j])
|
||||||
|
and not _is_chapter(lines[j])
|
||||||
|
and not _is_divider(lines[j])
|
||||||
|
and len(ns) <= MAX_HEADING_LEN):
|
||||||
|
subtitle = ns
|
||||||
|
i = j + 1
|
||||||
|
else:
|
||||||
|
i += 1
|
||||||
|
else:
|
||||||
|
i += 1
|
||||||
|
elements.append(Chapter(num=num, subtitle=subtitle))
|
||||||
|
continue
|
||||||
|
|
||||||
|
# ── All-caps lines: either a BookHeader cluster or a SectionHeading ─
|
||||||
|
# If the cluster of consecutive all-caps lines is followed (after any
|
||||||
|
# blanks) by a CHAPTER heading, treat the whole cluster as a BookHeader.
|
||||||
|
# Otherwise treat only the first line as a SectionHeading.
|
||||||
|
if _is_allcaps(line) and len(line) <= MAX_HEADING_LEN and not _is_verse(raw):
|
||||||
|
# Gather consecutive all-caps lines (blanks skipped)
|
||||||
|
j = i
|
||||||
|
caps_block = []
|
||||||
|
while j < n:
|
||||||
|
s = lines[j].strip()
|
||||||
|
if not s:
|
||||||
|
j += 1
|
||||||
|
continue
|
||||||
|
if (_is_allcaps(s)
|
||||||
|
and len(s) <= MAX_HEADING_LEN
|
||||||
|
and not _is_verse(lines[j])
|
||||||
|
and not _is_chapter(lines[j])
|
||||||
|
and not _is_divider(lines[j])):
|
||||||
|
caps_block.append(s)
|
||||||
|
j += 1
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
# Look past any blanks to see if a chapter heading follows
|
||||||
|
k = j
|
||||||
|
while k < n and not lines[k].strip():
|
||||||
|
k += 1
|
||||||
|
if k < n and _is_chapter(lines[k]):
|
||||||
|
# This cluster is a book/section header
|
||||||
|
elements.append(BookHeader(lines=caps_block))
|
||||||
|
i = j
|
||||||
|
else:
|
||||||
|
# Single inline section subheading (MARRIAGE, BAPTISM, etc.)
|
||||||
|
elements.append(SectionHeading(text=caps_block[0] if caps_block else line))
|
||||||
|
i = i + 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# ── Verse ─────────────────────────────────────────────────────────
|
||||||
|
if _is_verse(raw):
|
||||||
|
mfull = _RE_VERSE.match(raw)
|
||||||
|
elements.append(Verse(num=int(mfull.group(1)), text=mfull.group(2).strip()))
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# ── Paragraph ─────────────────────────────────────────────────────
|
||||||
|
elements.append(Paragraph(text=line))
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
return elements
|
||||||
|
|
||||||
|
|
||||||
|
# ══════════════════════════════════════════════════════════════════════════════
|
||||||
|
# LaTeX generation
|
||||||
|
# ══════════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
_PREAMBLE_SHARED = r"""
|
||||||
|
\usepackage[T1]{fontenc}
|
||||||
|
\usepackage[utf8]{inputenc}
|
||||||
|
\usepackage{tgpagella}
|
||||||
|
\usepackage{microtype}
|
||||||
|
\usepackage{fancyhdr}
|
||||||
|
\usepackage{needspace}
|
||||||
|
\setlength{\headheight}{14pt}
|
||||||
|
\addtolength{\topmargin}{-2pt}
|
||||||
|
\usepackage[hidelinks]{hyperref}
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def _hrule() -> str:
|
||||||
|
return r"\noindent\rule{\linewidth}{0.3pt}"
|
||||||
|
|
||||||
|
|
||||||
|
# ── Kindle (single-column, e-reader sized) ────────────────────────────────────
|
||||||
|
|
||||||
|
def build_kindle_latex(elements: list) -> str:
|
||||||
|
"""Build a single-column LaTeX document sized for e-readers."""
|
||||||
|
out = []
|
||||||
|
# extarticle (from extsizes) gives us 11pt; plain article also supports it
|
||||||
|
out.append(r"\documentclass[11pt]{extarticle}")
|
||||||
|
out.append(r"""
|
||||||
|
\usepackage[paperwidth=4.5in,paperheight=6.5in,
|
||||||
|
top=0.08in,bottom=0.5in,
|
||||||
|
inner=0.42in,outer=0.38in,
|
||||||
|
headheight=12pt,headsep=6pt,
|
||||||
|
includehead]{geometry}""")
|
||||||
|
out.append(_PREAMBLE_SHARED)
|
||||||
|
out.append(r"""
|
||||||
|
\pagestyle{fancy}
|
||||||
|
\fancyhf{}
|
||||||
|
\fancyhead[C]{\small\itshape\nouppercase{\leftmark}}
|
||||||
|
\fancyfoot[C]{\small\thepage}
|
||||||
|
\renewcommand{\headrulewidth}{0.3pt}
|
||||||
|
|
||||||
|
\setlength{\parindent}{0pt}
|
||||||
|
\setlength{\parskip}{3pt plus 1pt minus 1pt}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
""")
|
||||||
|
# Handle title page separately so we can insert TOC after it
|
||||||
|
title_els = [e for e in elements if isinstance(e, TitlePage)]
|
||||||
|
body_els = [e for e in elements if not isinstance(e, TitlePage)]
|
||||||
|
if title_els:
|
||||||
|
out.append(r"\clearpage")
|
||||||
|
out.append(r"\thispagestyle{empty}")
|
||||||
|
out.append(r"\vspace*{1.3in}")
|
||||||
|
out.append(r"\begin{center}")
|
||||||
|
for j, tl in enumerate(title_els[0].lines):
|
||||||
|
s = tl.strip()
|
||||||
|
if not s:
|
||||||
|
continue
|
||||||
|
if j < 3:
|
||||||
|
out.append(r"{\LARGE\bfseries " + esc(s) + r"} \\[8pt]")
|
||||||
|
else:
|
||||||
|
out.append(r"{\large " + esc(s) + r"} \\[4pt]")
|
||||||
|
out.append(r"\end{center}")
|
||||||
|
out.append(r"\clearpage")
|
||||||
|
out.append(r"\renewcommand{\contentsname}{Table of Contents}")
|
||||||
|
out.append(r"\tableofcontents")
|
||||||
|
out.append(r"\clearpage")
|
||||||
|
_emit_elements(out, body_els, kindle=True)
|
||||||
|
out.append(r"\end{document}")
|
||||||
|
return "\n".join(out)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Paper / BOM style (two-column) ────────────────────────────────────────────
|
||||||
|
|
||||||
|
def build_paper_latex(elements: list) -> str:
|
||||||
|
"""Build a two-column, Book of Mormon-style LaTeX document."""
|
||||||
|
out = []
|
||||||
|
# extarticle (from extsizes) for 9pt support
|
||||||
|
out.append(r"\documentclass[9pt,twoside]{extarticle}")
|
||||||
|
out.append(r"""
|
||||||
|
\usepackage[paperwidth=5.5in,paperheight=8.5in,
|
||||||
|
top=0.08in,bottom=0.55in,
|
||||||
|
inner=0.5in,outer=0.42in,
|
||||||
|
headheight=10pt,headsep=5pt,
|
||||||
|
includehead]{geometry}""")
|
||||||
|
out.append(_PREAMBLE_SHARED)
|
||||||
|
out.append(r"""
|
||||||
|
\usepackage{multicol}
|
||||||
|
\setlength{\columnsep}{0.22in}
|
||||||
|
\setlength{\columnseprule}{0.3pt}
|
||||||
|
|
||||||
|
\pagestyle{fancy}
|
||||||
|
\fancyhf{}
|
||||||
|
\fancyhead[LE]{\footnotesize\itshape\nouppercase{\leftmark}}
|
||||||
|
\fancyhead[RO]{\footnotesize\itshape\nouppercase{\rightmark}}
|
||||||
|
\fancyfoot[C]{\scriptsize\thepage}
|
||||||
|
\renewcommand{\headrulewidth}{0.3pt}
|
||||||
|
|
||||||
|
\setlength{\parindent}{0pt}
|
||||||
|
\setlength{\parskip}{1pt}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
""")
|
||||||
|
|
||||||
|
# Emit the title page outside multicols (single-column block)
|
||||||
|
title_els = [e for e in elements if isinstance(e, TitlePage)]
|
||||||
|
body_els = [e for e in elements if not isinstance(e, TitlePage)]
|
||||||
|
|
||||||
|
if title_els:
|
||||||
|
out.append(r"\begin{center}")
|
||||||
|
for j, tl in enumerate(title_els[0].lines):
|
||||||
|
s = tl.strip()
|
||||||
|
if not s:
|
||||||
|
continue
|
||||||
|
if j < 3:
|
||||||
|
out.append(r"{\large\bfseries " + esc(s) + r"} \\[3pt]")
|
||||||
|
else:
|
||||||
|
out.append(r"{\small " + esc(s) + r"} \\[1pt]")
|
||||||
|
out.append(r"\end{center}")
|
||||||
|
out.append(r"\medskip")
|
||||||
|
|
||||||
|
out.append(r"\renewcommand{\contentsname}{Table of Contents}")
|
||||||
|
out.append(r"\tableofcontents")
|
||||||
|
out.append(r"\clearpage")
|
||||||
|
|
||||||
|
# Skip any leading front-matter paragraphs before the first section header.
|
||||||
|
# For paper output, the intro should begin at the labeled "Introduction"
|
||||||
|
# section rather than repeating the pre-divider prose block.
|
||||||
|
first_section = next(
|
||||||
|
(i for i, el in enumerate(body_els) if isinstance(el, BookHeader)),
|
||||||
|
len(body_els),
|
||||||
|
)
|
||||||
|
paper_body_els = body_els[first_section:]
|
||||||
|
|
||||||
|
# Split intro (before first real book) from main body.
|
||||||
|
# A "real book" is a BookHeader that is followed by at least one Chapter
|
||||||
|
# before the next BookHeader. "Introduction" and similar preamble sections
|
||||||
|
# are BookHeaders too but have no chapters, so they stay in the intro.
|
||||||
|
first_book = len(paper_body_els)
|
||||||
|
for i, el in enumerate(paper_body_els):
|
||||||
|
if isinstance(el, BookHeader):
|
||||||
|
# Check if a Chapter follows before the next BookHeader
|
||||||
|
for j in range(i + 1, len(paper_body_els)):
|
||||||
|
if isinstance(paper_body_els[j], Chapter):
|
||||||
|
first_book = i
|
||||||
|
break
|
||||||
|
if isinstance(paper_body_els[j], BookHeader):
|
||||||
|
break
|
||||||
|
if first_book < len(paper_body_els):
|
||||||
|
break
|
||||||
|
intro_els = paper_body_els[:first_book]
|
||||||
|
main_els = paper_body_els[first_book:]
|
||||||
|
|
||||||
|
if intro_els:
|
||||||
|
_emit_elements(out, intro_els, kindle=True, compact_headers=True)
|
||||||
|
out.append(r"\clearpage")
|
||||||
|
|
||||||
|
out.append(r"\begin{multicols}{2}")
|
||||||
|
_emit_elements(out, main_els, kindle=False)
|
||||||
|
out.append(r"\end{multicols}")
|
||||||
|
out.append(r"\end{document}")
|
||||||
|
return "\n".join(out)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Body emitter ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _emit_elements(
|
||||||
|
out: list,
|
||||||
|
elements: list,
|
||||||
|
kindle: bool,
|
||||||
|
indent: bool = False,
|
||||||
|
compact_headers: bool = False,
|
||||||
|
) -> None:
|
||||||
|
"""Translate parsed Element objects into LaTeX markup."""
|
||||||
|
|
||||||
|
for el in elements:
|
||||||
|
|
||||||
|
# ── Title page (kindle only; paper handles it before multicols) ──────
|
||||||
|
if isinstance(el, TitlePage):
|
||||||
|
if kindle:
|
||||||
|
out.append(r"\clearpage")
|
||||||
|
out.append(r"\thispagestyle{empty}")
|
||||||
|
out.append(r"\vspace*{1.3in}")
|
||||||
|
out.append(r"\begin{center}")
|
||||||
|
for j, tl in enumerate(el.lines):
|
||||||
|
s = tl.strip()
|
||||||
|
if not s:
|
||||||
|
continue
|
||||||
|
if j < 3:
|
||||||
|
out.append(r"{\LARGE\bfseries " + esc(s) + r"} \\[8pt]")
|
||||||
|
else:
|
||||||
|
out.append(r"{\large " + esc(s) + r"} \\[4pt]")
|
||||||
|
out.append(r"\end{center}")
|
||||||
|
out.append(r"\clearpage")
|
||||||
|
|
||||||
|
# ── Book / section header ────────────────────────────────────────────
|
||||||
|
elif isinstance(el, BookHeader):
|
||||||
|
lines = el.lines
|
||||||
|
|
||||||
|
if kindle:
|
||||||
|
# Start a new page for each major book
|
||||||
|
out.append(r"\clearpage")
|
||||||
|
out.append(r"\phantomsection\addcontentsline{toc}{section}{" + esc(lines[0]) + r"}")
|
||||||
|
out.append(r"\vspace*{0pt}" if compact_headers else r"\vspace*{0.1in}")
|
||||||
|
out.append(r"\begin{center}")
|
||||||
|
out.append(_hrule())
|
||||||
|
out.append(r"\\[6pt]")
|
||||||
|
out.append(r"{\bfseries\large " + esc(lines[0]) + r"}")
|
||||||
|
for ln in lines[1:]:
|
||||||
|
out.append(r"\\ [3pt]{\normalsize\itshape " + esc(ln) + r"}")
|
||||||
|
out.append(r"\\[6pt]")
|
||||||
|
out.append(_hrule())
|
||||||
|
out.append(r"\end{center}")
|
||||||
|
out.append(r"\markboth{" + esc(lines[0]) + r"}{" + esc(lines[0]) + r"}")
|
||||||
|
out.append(r"\vspace{5pt}")
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Inline heading within the two-column flow
|
||||||
|
# Refuse to start a new book in the bottom half of a column
|
||||||
|
out.append(r"\needspace{0.5\textheight}")
|
||||||
|
out.append(r"\phantomsection\addcontentsline{toc}{section}{" + esc(lines[0]) + r"}")
|
||||||
|
out.append(r"\begin{center}")
|
||||||
|
out.append(_hrule())
|
||||||
|
out.append(r"\\[2pt]")
|
||||||
|
out.append(r"{\bfseries " + esc(lines[0]) + r"}")
|
||||||
|
for ln in lines[1:]:
|
||||||
|
out.append(r"\\ {\small\itshape " + esc(ln) + r"}")
|
||||||
|
out.append(r"\\[2pt]")
|
||||||
|
out.append(_hrule())
|
||||||
|
out.append(r"\end{center}")
|
||||||
|
out.append(r"\markboth{" + esc(lines[0]) + r"}{" + esc(lines[0]) + r"}")
|
||||||
|
out.append(r"\vspace{2pt}")
|
||||||
|
|
||||||
|
# ── Chapter heading ──────────────────────────────────────────────────
|
||||||
|
elif isinstance(el, Chapter):
|
||||||
|
label = f"CHAPTER {el.num}"
|
||||||
|
|
||||||
|
if kindle:
|
||||||
|
out.append(r"\phantomsection\addcontentsline{toc}{subsection}{" + esc(label) + r"}")
|
||||||
|
out.append(r"\needspace{4\baselineskip}")
|
||||||
|
out.append(r"\vspace{14pt}")
|
||||||
|
out.append(r"\begin{center}")
|
||||||
|
out.append(r"{\bfseries\large " + esc(label) + r"}")
|
||||||
|
if el.subtitle:
|
||||||
|
out.append(r"\\ [3pt]{\normalsize\itshape " + esc(el.subtitle) + r"}")
|
||||||
|
out.append(r"\end{center}")
|
||||||
|
out.append(r"\markright{" + esc(label) + r"}")
|
||||||
|
out.append(r"\vspace{6pt}")
|
||||||
|
|
||||||
|
else:
|
||||||
|
out.append(r"\phantomsection\addcontentsline{toc}{subsection}{" + esc(label) + r"}")
|
||||||
|
out.append(r"\needspace{2\baselineskip}")
|
||||||
|
out.append(r"\vspace{3pt}")
|
||||||
|
out.append(r"\begin{center}")
|
||||||
|
out.append(r"{\bfseries " + esc(label) + r"}")
|
||||||
|
if el.subtitle:
|
||||||
|
out.append(r"\\ {\small\itshape " + esc(el.subtitle) + r"}")
|
||||||
|
out.append(r"\end{center}")
|
||||||
|
out.append(r"\markright{" + esc(label) + r"}")
|
||||||
|
out.append(r"\vspace{1pt}")
|
||||||
|
|
||||||
|
# ── Section subheading (MARRIAGE, BAPTISM, etc.) ────────────────────
|
||||||
|
elif isinstance(el, SectionHeading):
|
||||||
|
if kindle:
|
||||||
|
out.append(r"\vspace{8pt}")
|
||||||
|
out.append(r"\begin{center}{\bfseries " + esc(el.text) + r"}\end{center}")
|
||||||
|
out.append(r"\vspace{4pt}")
|
||||||
|
else:
|
||||||
|
out.append(r"\vspace{3pt}")
|
||||||
|
out.append(
|
||||||
|
r"\begin{center}{\bfseries\small " + esc(el.text) + r"}\end{center}"
|
||||||
|
)
|
||||||
|
out.append(r"\vspace{1pt}")
|
||||||
|
|
||||||
|
# ── Verse ────────────────────────────────────────────────────────────
|
||||||
|
elif isinstance(el, Verse):
|
||||||
|
body = esc(el.text)
|
||||||
|
if kindle:
|
||||||
|
# Bold inline number (not superscript) for readability on screen
|
||||||
|
vnum = r"\textbf{" + str(el.num) + r"}"
|
||||||
|
out.append(r"\noindent " + vnum + r"~" + body)
|
||||||
|
out.append(r"\par\smallskip")
|
||||||
|
else:
|
||||||
|
vnum = r"\textbf{" + str(el.num) + r"}"
|
||||||
|
out.append(r"\noindent " + vnum + r"~" + body + r"\par")
|
||||||
|
|
||||||
|
# ── Paragraph (prose intro, commentary, etc.) ───────────────────────
|
||||||
|
elif isinstance(el, Paragraph):
|
||||||
|
body = esc(el.text)
|
||||||
|
if kindle:
|
||||||
|
out.append(r"\noindent " + body)
|
||||||
|
out.append(r"\par\smallskip")
|
||||||
|
elif indent:
|
||||||
|
out.append(body + r"\par\medskip")
|
||||||
|
else:
|
||||||
|
out.append(r"\noindent " + body + r"\par")
|
||||||
|
|
||||||
|
|
||||||
|
# ══════════════════════════════════════════════════════════════════════════════
|
||||||
|
# Utility: book limiter
|
||||||
|
# ══════════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
def truncate_to_books(elements: list, max_books: int) -> list:
|
||||||
|
"""Return only the first *max_books* BookHeader sections (and their content).
|
||||||
|
Title-page and front-matter paragraphs before the first BookHeader are always kept.
|
||||||
|
"""
|
||||||
|
if max_books <= 0:
|
||||||
|
return elements
|
||||||
|
count = 0
|
||||||
|
result = []
|
||||||
|
for el in elements:
|
||||||
|
if isinstance(el, BookHeader):
|
||||||
|
count += 1
|
||||||
|
if count > max_books:
|
||||||
|
break
|
||||||
|
result.append(el)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
# ══════════════════════════════════════════════════════════════════════════════
|
||||||
|
# PDF compilation
|
||||||
|
# ══════════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
def _find_compiler() -> tuple:
|
||||||
|
"""Return (compiler_path, compiler_type) or (None, None) if none found."""
|
||||||
|
import shutil
|
||||||
|
# Also probe common absolute paths in case the dir isn't on $PATH
|
||||||
|
candidates = {
|
||||||
|
"pdflatex": ["/usr/bin/pdflatex", "/usr/local/bin/pdflatex"],
|
||||||
|
"tectonic": ["/usr/bin/tectonic", "/usr/local/bin/tectonic"],
|
||||||
|
}
|
||||||
|
for cmd, extra_paths in candidates.items():
|
||||||
|
found = shutil.which(cmd)
|
||||||
|
if found:
|
||||||
|
return found, cmd
|
||||||
|
for p in extra_paths:
|
||||||
|
if Path(p).exists():
|
||||||
|
return p, cmd
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
|
||||||
|
def compile_pdf(tex_src: str, output_pdf: Path,
|
||||||
|
keep_tex: bool = False,
|
||||||
|
compiler_path: str = "/usr/bin/pdflatex",
|
||||||
|
compiler_type: str = "pdflatex") -> bool:
|
||||||
|
"""
|
||||||
|
Write *tex_src* into a temp directory, run the LaTeX compiler, and copy
|
||||||
|
the resulting PDF to *output_pdf*. Supports ``pdflatex`` and ``tectonic``.
|
||||||
|
Returns True on success.
|
||||||
|
"""
|
||||||
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
tmp_path = Path(tmp)
|
||||||
|
tex_file = tmp_path / "document.tex"
|
||||||
|
tex_file.write_text(tex_src, encoding="utf-8")
|
||||||
|
|
||||||
|
if compiler_type == "tectonic":
|
||||||
|
# Tectonic compiles in one pass and downloads missing packages.
|
||||||
|
passes = 1
|
||||||
|
cmd_base = [compiler_path, "document.tex"]
|
||||||
|
else:
|
||||||
|
# pdflatex needs two passes to get page headers right.
|
||||||
|
passes = 2
|
||||||
|
cmd_base = [compiler_path, "-interaction=nonstopmode",
|
||||||
|
"-halt-on-error", "document.tex"]
|
||||||
|
|
||||||
|
for pass_num in range(1, passes + 1):
|
||||||
|
result = subprocess.run(
|
||||||
|
cmd_base, cwd=tmp, capture_output=True, text=True,
|
||||||
|
)
|
||||||
|
if result.returncode != 0:
|
||||||
|
print(f" [compiler error on pass {pass_num}]", file=sys.stderr)
|
||||||
|
print(result.stdout[-3000:], file=sys.stderr)
|
||||||
|
if result.stderr:
|
||||||
|
print(result.stderr[-1000:], file=sys.stderr)
|
||||||
|
if keep_tex:
|
||||||
|
dest = output_pdf.with_suffix(".tex")
|
||||||
|
dest.write_text(tex_src, encoding="utf-8")
|
||||||
|
print(f" TeX source saved to: {dest}", file=sys.stderr)
|
||||||
|
return False
|
||||||
|
|
||||||
|
pdf_out = tmp_path / "document.pdf"
|
||||||
|
if pdf_out.exists():
|
||||||
|
output_pdf.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
output_pdf.write_bytes(pdf_out.read_bytes())
|
||||||
|
if keep_tex:
|
||||||
|
dest = output_pdf.with_suffix(".tex")
|
||||||
|
dest.write_text(tex_src, encoding="utf-8")
|
||||||
|
return True
|
||||||
|
|
||||||
|
print(" [compiler ran but document.pdf was not produced]", file=sys.stderr)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
# ══════════════════════════════════════════════════════════════════════════════
|
||||||
|
# Main
|
||||||
|
# ══════════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
_INSTALL_INSTRUCTIONS = """
|
||||||
|
No LaTeX compiler found. Install one of the following:
|
||||||
|
|
||||||
|
Arch / CachyOS / Manjaro:
|
||||||
|
sudo pacman -S texlive-basic texlive-latex texlive-latexrecommended \\
|
||||||
|
texlive-latexextra texlive-fontsrecommended
|
||||||
|
|
||||||
|
Debian / Ubuntu:
|
||||||
|
sudo apt-get install texlive-latex-extra texlive-fonts-recommended
|
||||||
|
|
||||||
|
--- OR --- (self-contained, downloads packages on first use)
|
||||||
|
sudo pacman -S tectonic
|
||||||
|
# or: cargo install tectonic
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Generate scripture-style PDFs from the Book of the Nem text.",
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
|
epilog=__doc__,
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--input", type=Path, default=INPUT_FILE,
|
||||||
|
help=f"Input plain-text file (default: {INPUT_FILE})",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output-dir", type=Path, default=OUTPUT_DIR,
|
||||||
|
help=f"Output directory (default: {OUTPUT_DIR})",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--kindle-only", action="store_true",
|
||||||
|
help="Generate only the Kindle (single-column) PDF.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--paper-only", action="store_true",
|
||||||
|
help="Generate only the paper (two-column) PDF.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--keep-tex", action="store_true",
|
||||||
|
help="Save the intermediate .tex files alongside each PDF.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--max-books", type=int, default=0, metavar="N",
|
||||||
|
help="Limit output to the first N book sections (0 = no limit).",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--tex-only", action="store_true",
|
||||||
|
help="Write .tex files only — do not attempt PDF compilation. "
|
||||||
|
"Useful when a LaTeX compiler is not available.",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
src_path: Path = args.input
|
||||||
|
if not src_path.exists():
|
||||||
|
sys.exit(f"ERROR: Input file not found: {src_path}")
|
||||||
|
|
||||||
|
print(f"Reading: {src_path}")
|
||||||
|
text = src_path.read_text(encoding="utf-8", errors="replace")
|
||||||
|
|
||||||
|
elements = parse(text)
|
||||||
|
if args.max_books > 0:
|
||||||
|
elements = truncate_to_books(elements, args.max_books)
|
||||||
|
print(f" Limiting to first {args.max_books} book(s).")
|
||||||
|
books = sum(1 for e in elements if isinstance(e, BookHeader))
|
||||||
|
chapters = sum(1 for e in elements if isinstance(e, Chapter))
|
||||||
|
verses = sum(1 for e in elements if isinstance(e, Verse))
|
||||||
|
print(f" Parsed: {books} books/sections, {chapters} chapters, {verses} verses")
|
||||||
|
|
||||||
|
out_dir: Path = args.output_dir
|
||||||
|
out_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Locate compiler (unless --tex-only)
|
||||||
|
compiler_path, compiler_type = None, None
|
||||||
|
if not args.tex_only:
|
||||||
|
compiler_path, compiler_type = _find_compiler()
|
||||||
|
if not compiler_path:
|
||||||
|
print(_INSTALL_INSTRUCTIONS, file=sys.stderr)
|
||||||
|
print("Falling back to --tex-only mode: .tex files will be written "
|
||||||
|
"but not compiled.", file=sys.stderr)
|
||||||
|
args.tex_only = True
|
||||||
|
else:
|
||||||
|
print(f" Using compiler: {compiler_path}")
|
||||||
|
|
||||||
|
def _write_or_compile(tex: str, pdf_path: Path, label: str):
|
||||||
|
if args.tex_only or args.keep_tex:
|
||||||
|
tex_path = pdf_path.with_suffix(".tex")
|
||||||
|
tex_path.write_text(tex, encoding="utf-8")
|
||||||
|
print(f" ✓ TeX saved: {tex_path}")
|
||||||
|
if args.tex_only:
|
||||||
|
return
|
||||||
|
print(f" Compiling {label} PDF …")
|
||||||
|
ok = compile_pdf(tex, pdf_path, keep_tex=args.keep_tex,
|
||||||
|
compiler_path=compiler_path,
|
||||||
|
compiler_type=compiler_type)
|
||||||
|
if ok:
|
||||||
|
print(f" ✓ {pdf_path}")
|
||||||
|
else:
|
||||||
|
print(f" ✗ {label} PDF failed — see errors above.")
|
||||||
|
|
||||||
|
# ── Kindle PDF ────────────────────────────────────────────────────────────
|
||||||
|
if not args.paper_only:
|
||||||
|
print(f"\nKindle PDF (single-column, 4.5\"×6.5\") …")
|
||||||
|
tex = build_kindle_latex(elements)
|
||||||
|
_write_or_compile(tex, out_dir / "nem_phone.pdf", "Kindle")
|
||||||
|
|
||||||
|
# ── Paper / BOM-style PDF ────────────────────────────────────────────────
|
||||||
|
if not args.kindle_only:
|
||||||
|
print(f"\nPaper PDF (two-column BOM style, 5.5\"×8.5\") …")
|
||||||
|
tex = build_paper_latex(elements)
|
||||||
|
_write_or_compile(tex, out_dir / "nem_paper.pdf", "Paper")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user