format doc script
This commit is contained in:
4
.gitignore
vendored
4
.gitignore
vendored
@ -14,6 +14,10 @@ proper_nouns_audio/
|
||||
# Generated data (JSON files in output_proper_nouns/ are tracked)
|
||||
output_proper_nouns/remaining_review.txt
|
||||
|
||||
# Generated PDFs and LaTeX files
|
||||
*.pdf
|
||||
*.tex
|
||||
|
||||
# Text files (except proper_nouns.txt)
|
||||
*.txt
|
||||
!proper_nouns.txt
|
||||
|
||||
801
format_scripture.py
Normal file
801
format_scripture.py
Normal file
@ -0,0 +1,801 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
create_scripture_pdf.py
|
||||
════════════════════════
|
||||
Convert the Book of the Nem plain-text file into two scripture-style PDFs:
|
||||
|
||||
nem_kindle.pdf – single-column, sized for e-readers (4.5" × 6.5")
|
||||
nem_paper.pdf – two-column, Book of Mormon style (5.5" × 8.5")
|
||||
|
||||
Requirements (Debian/Ubuntu):
|
||||
sudo apt-get install texlive-latex-extra texlive-fonts-recommended
|
||||
|
||||
The key packages used are:
|
||||
extsizes – for 9 pt document class (paper format)
|
||||
tgpagella – TeX Gyre Pagella (Palatino-clone) font
|
||||
multicol – two-column layout without hard page breaks
|
||||
microtype – improved text justification and hyphenation
|
||||
fancyhdr – running headers and footers
|
||||
needspace – prevent orphaned headings
|
||||
|
||||
Usage:
|
||||
python create_scripture_pdf.py
|
||||
python create_scripture_pdf.py --input "Audio Master Nem Full.txt"
|
||||
python create_scripture_pdf.py --kindle-only
|
||||
python create_scripture_pdf.py --paper-only
|
||||
python create_scripture_pdf.py --output-dir ./pdfs
|
||||
python create_scripture_pdf.py --keep-tex # keep .tex files for debugging
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
# ── Default paths ──────────────────────────────────────────────────────────────
|
||||
INPUT_FILE = Path("Audio Master Nem Full.txt")
|
||||
OUTPUT_DIR = Path("output_pdf")
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
# LaTeX helper
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
_LATEX_TRANS = str.maketrans({
|
||||
"\\": r"\textbackslash{}",
|
||||
"&": r"\&",
|
||||
"%": r"\%",
|
||||
"$": r"\$",
|
||||
"#": r"\#",
|
||||
"_": r"\_",
|
||||
"{": r"\{",
|
||||
"}": r"\}",
|
||||
"~": r"\textasciitilde{}",
|
||||
"^": r"\textasciicircum{}",
|
||||
"\u2014": "---", # em dash
|
||||
"\u2013": "--", # en dash
|
||||
"\u2018": "`", # left single quote
|
||||
"\u2019": "'", # right single quote
|
||||
"\u201c": "``", # left double quote
|
||||
"\u201d": "''", # right double quote
|
||||
"\u2026": r"\ldots{}", # ellipsis
|
||||
"\u00e9": r"\'e",
|
||||
"\u00e8": r"\`e",
|
||||
"\u00ea": r"\^e",
|
||||
"\u00e0": r"\`a",
|
||||
"\u00e2": r"\^a",
|
||||
"\u00f3": r"\'o",
|
||||
"\u00ed": r"\'{\i}",
|
||||
})
|
||||
|
||||
|
||||
def esc(text: str) -> str:
|
||||
"""Escape special LaTeX characters in a string."""
|
||||
return text.translate(_LATEX_TRANS)
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
# Document element types
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
@dataclass
|
||||
class TitlePage:
|
||||
lines: list
|
||||
|
||||
|
||||
@dataclass
|
||||
class BookHeader:
|
||||
"""One or more heading lines that introduce a new book/section."""
|
||||
lines: list # list of str
|
||||
|
||||
|
||||
@dataclass
|
||||
class Chapter:
|
||||
num: int
|
||||
subtitle: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class SectionHeading:
|
||||
"""Short heading within a chapter (e.g. MARRIAGE, BAPTISM)."""
|
||||
text: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class Verse:
|
||||
num: int
|
||||
text: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class Paragraph:
|
||||
text: str
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
# Parser
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
_RE_VERSE = re.compile(r"^\s*(\d+)\s+(.*)")
|
||||
_RE_CHAPTER = re.compile(r"^\s*CHAPTER\s+(\d+)\s*$", re.IGNORECASE)
|
||||
_RE_DIVIDER = re.compile(r"^_{4,}")
|
||||
|
||||
# Lines longer than this are treated as body paragraphs rather than headings
|
||||
MAX_HEADING_LEN = 120
|
||||
|
||||
|
||||
def _is_verse(line: str) -> bool:
|
||||
"""Line starts with a verse number followed by text."""
|
||||
m = _RE_VERSE.match(line)
|
||||
return bool(m) and int(m.group(1)) > 0
|
||||
|
||||
|
||||
def _is_chapter(line: str) -> bool:
|
||||
return bool(_RE_CHAPTER.match(line.strip()))
|
||||
|
||||
|
||||
def _is_divider(line: str) -> bool:
|
||||
return bool(_RE_DIVIDER.match(line.strip()))
|
||||
|
||||
|
||||
def _is_allcaps(line: str) -> bool:
|
||||
s = line.strip()
|
||||
return bool(s) and s == s.upper() and any(c.isalpha() for c in s)
|
||||
|
||||
|
||||
def parse(text: str) -> list:
|
||||
"""Parse the scripture text into a list of Element objects."""
|
||||
lines = text.splitlines()
|
||||
elements = []
|
||||
n = len(lines)
|
||||
i = 0
|
||||
|
||||
# ── Title page: short lines before the first divider ──────────────────────
|
||||
# Short lines (≤80 chars) are the actual title. Long prose before the first
|
||||
# divider is ignored so it does not duplicate the later labeled Introduction.
|
||||
title_lines = []
|
||||
while i < n and not _is_divider(lines[i]):
|
||||
title_lines.append(lines[i])
|
||||
i += 1
|
||||
actual_title = []
|
||||
for l in title_lines:
|
||||
s = l.strip()
|
||||
if not s:
|
||||
continue
|
||||
if len(s) <= 80:
|
||||
actual_title.append(s)
|
||||
if actual_title:
|
||||
elements.append(TitlePage(lines=actual_title))
|
||||
|
||||
# ── Main pass ─────────────────────────────────────────────────────────────
|
||||
after_divider = False
|
||||
|
||||
while i < n:
|
||||
raw = lines[i]
|
||||
line = raw.strip()
|
||||
|
||||
# ── Divider ───────────────────────────────────────────────────────────
|
||||
if _is_divider(raw):
|
||||
after_divider = True
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# ── Blank line ────────────────────────────────────────────────────────
|
||||
if not line:
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# ── After a divider: collect section/book header ───────────────────
|
||||
# Collect all short non-verse non-chapter lines immediately following
|
||||
# the divider. Stop as soon as we hit a long prose line or body content.
|
||||
if after_divider:
|
||||
after_divider = False
|
||||
header_lines = []
|
||||
j = i
|
||||
while j < n:
|
||||
s = lines[j].strip()
|
||||
if not s: # blank: keep scanning
|
||||
j += 1
|
||||
continue
|
||||
if _is_verse(lines[j]) or _is_chapter(lines[j]):
|
||||
break # reached verse/chapter body
|
||||
if len(s) > MAX_HEADING_LEN:
|
||||
break # long prose line: stop here
|
||||
header_lines.append(s)
|
||||
j += 1
|
||||
if header_lines:
|
||||
elements.append(BookHeader(lines=header_lines))
|
||||
i = j
|
||||
continue
|
||||
|
||||
# ── Chapter heading ────────────────────────────────────────────────
|
||||
m = _RE_CHAPTER.match(line)
|
||||
if m:
|
||||
num = int(m.group(1))
|
||||
# Look ahead for an optional subtitle (short non-verse line)
|
||||
j = i + 1
|
||||
subtitle = None
|
||||
while j < n and not lines[j].strip():
|
||||
j += 1
|
||||
if j < n:
|
||||
ns = lines[j].strip()
|
||||
if (ns
|
||||
and not _is_verse(lines[j])
|
||||
and not _is_chapter(lines[j])
|
||||
and not _is_divider(lines[j])
|
||||
and len(ns) <= MAX_HEADING_LEN):
|
||||
subtitle = ns
|
||||
i = j + 1
|
||||
else:
|
||||
i += 1
|
||||
else:
|
||||
i += 1
|
||||
elements.append(Chapter(num=num, subtitle=subtitle))
|
||||
continue
|
||||
|
||||
# ── All-caps lines: either a BookHeader cluster or a SectionHeading ─
|
||||
# If the cluster of consecutive all-caps lines is followed (after any
|
||||
# blanks) by a CHAPTER heading, treat the whole cluster as a BookHeader.
|
||||
# Otherwise treat only the first line as a SectionHeading.
|
||||
if _is_allcaps(line) and len(line) <= MAX_HEADING_LEN and not _is_verse(raw):
|
||||
# Gather consecutive all-caps lines (blanks skipped)
|
||||
j = i
|
||||
caps_block = []
|
||||
while j < n:
|
||||
s = lines[j].strip()
|
||||
if not s:
|
||||
j += 1
|
||||
continue
|
||||
if (_is_allcaps(s)
|
||||
and len(s) <= MAX_HEADING_LEN
|
||||
and not _is_verse(lines[j])
|
||||
and not _is_chapter(lines[j])
|
||||
and not _is_divider(lines[j])):
|
||||
caps_block.append(s)
|
||||
j += 1
|
||||
else:
|
||||
break
|
||||
# Look past any blanks to see if a chapter heading follows
|
||||
k = j
|
||||
while k < n and not lines[k].strip():
|
||||
k += 1
|
||||
if k < n and _is_chapter(lines[k]):
|
||||
# This cluster is a book/section header
|
||||
elements.append(BookHeader(lines=caps_block))
|
||||
i = j
|
||||
else:
|
||||
# Single inline section subheading (MARRIAGE, BAPTISM, etc.)
|
||||
elements.append(SectionHeading(text=caps_block[0] if caps_block else line))
|
||||
i = i + 1
|
||||
continue
|
||||
|
||||
# ── Verse ─────────────────────────────────────────────────────────
|
||||
if _is_verse(raw):
|
||||
mfull = _RE_VERSE.match(raw)
|
||||
elements.append(Verse(num=int(mfull.group(1)), text=mfull.group(2).strip()))
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# ── Paragraph ─────────────────────────────────────────────────────
|
||||
elements.append(Paragraph(text=line))
|
||||
i += 1
|
||||
|
||||
return elements
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
# LaTeX generation
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
_PREAMBLE_SHARED = r"""
|
||||
\usepackage[T1]{fontenc}
|
||||
\usepackage[utf8]{inputenc}
|
||||
\usepackage{tgpagella}
|
||||
\usepackage{microtype}
|
||||
\usepackage{fancyhdr}
|
||||
\usepackage{needspace}
|
||||
\setlength{\headheight}{14pt}
|
||||
\addtolength{\topmargin}{-2pt}
|
||||
\usepackage[hidelinks]{hyperref}
|
||||
"""
|
||||
|
||||
|
||||
def _hrule() -> str:
|
||||
return r"\noindent\rule{\linewidth}{0.3pt}"
|
||||
|
||||
|
||||
# ── Kindle (single-column, e-reader sized) ────────────────────────────────────
|
||||
|
||||
def build_kindle_latex(elements: list) -> str:
|
||||
"""Build a single-column LaTeX document sized for e-readers."""
|
||||
out = []
|
||||
# extarticle (from extsizes) gives us 11pt; plain article also supports it
|
||||
out.append(r"\documentclass[11pt]{extarticle}")
|
||||
out.append(r"""
|
||||
\usepackage[paperwidth=4.5in,paperheight=6.5in,
|
||||
top=0.08in,bottom=0.5in,
|
||||
inner=0.42in,outer=0.38in,
|
||||
headheight=12pt,headsep=6pt,
|
||||
includehead]{geometry}""")
|
||||
out.append(_PREAMBLE_SHARED)
|
||||
out.append(r"""
|
||||
\pagestyle{fancy}
|
||||
\fancyhf{}
|
||||
\fancyhead[C]{\small\itshape\nouppercase{\leftmark}}
|
||||
\fancyfoot[C]{\small\thepage}
|
||||
\renewcommand{\headrulewidth}{0.3pt}
|
||||
|
||||
\setlength{\parindent}{0pt}
|
||||
\setlength{\parskip}{3pt plus 1pt minus 1pt}
|
||||
|
||||
\begin{document}
|
||||
""")
|
||||
# Handle title page separately so we can insert TOC after it
|
||||
title_els = [e for e in elements if isinstance(e, TitlePage)]
|
||||
body_els = [e for e in elements if not isinstance(e, TitlePage)]
|
||||
if title_els:
|
||||
out.append(r"\clearpage")
|
||||
out.append(r"\thispagestyle{empty}")
|
||||
out.append(r"\vspace*{1.3in}")
|
||||
out.append(r"\begin{center}")
|
||||
for j, tl in enumerate(title_els[0].lines):
|
||||
s = tl.strip()
|
||||
if not s:
|
||||
continue
|
||||
if j < 3:
|
||||
out.append(r"{\LARGE\bfseries " + esc(s) + r"} \\[8pt]")
|
||||
else:
|
||||
out.append(r"{\large " + esc(s) + r"} \\[4pt]")
|
||||
out.append(r"\end{center}")
|
||||
out.append(r"\clearpage")
|
||||
out.append(r"\renewcommand{\contentsname}{Table of Contents}")
|
||||
out.append(r"\tableofcontents")
|
||||
out.append(r"\clearpage")
|
||||
_emit_elements(out, body_els, kindle=True)
|
||||
out.append(r"\end{document}")
|
||||
return "\n".join(out)
|
||||
|
||||
|
||||
# ── Paper / BOM style (two-column) ────────────────────────────────────────────
|
||||
|
||||
def build_paper_latex(elements: list) -> str:
|
||||
"""Build a two-column, Book of Mormon-style LaTeX document."""
|
||||
out = []
|
||||
# extarticle (from extsizes) for 9pt support
|
||||
out.append(r"\documentclass[9pt,twoside]{extarticle}")
|
||||
out.append(r"""
|
||||
\usepackage[paperwidth=5.5in,paperheight=8.5in,
|
||||
top=0.08in,bottom=0.55in,
|
||||
inner=0.5in,outer=0.42in,
|
||||
headheight=10pt,headsep=5pt,
|
||||
includehead]{geometry}""")
|
||||
out.append(_PREAMBLE_SHARED)
|
||||
out.append(r"""
|
||||
\usepackage{multicol}
|
||||
\setlength{\columnsep}{0.22in}
|
||||
\setlength{\columnseprule}{0.3pt}
|
||||
|
||||
\pagestyle{fancy}
|
||||
\fancyhf{}
|
||||
\fancyhead[LE]{\footnotesize\itshape\nouppercase{\leftmark}}
|
||||
\fancyhead[RO]{\footnotesize\itshape\nouppercase{\rightmark}}
|
||||
\fancyfoot[C]{\scriptsize\thepage}
|
||||
\renewcommand{\headrulewidth}{0.3pt}
|
||||
|
||||
\setlength{\parindent}{0pt}
|
||||
\setlength{\parskip}{1pt}
|
||||
|
||||
\begin{document}
|
||||
""")
|
||||
|
||||
# Emit the title page outside multicols (single-column block)
|
||||
title_els = [e for e in elements if isinstance(e, TitlePage)]
|
||||
body_els = [e for e in elements if not isinstance(e, TitlePage)]
|
||||
|
||||
if title_els:
|
||||
out.append(r"\begin{center}")
|
||||
for j, tl in enumerate(title_els[0].lines):
|
||||
s = tl.strip()
|
||||
if not s:
|
||||
continue
|
||||
if j < 3:
|
||||
out.append(r"{\large\bfseries " + esc(s) + r"} \\[3pt]")
|
||||
else:
|
||||
out.append(r"{\small " + esc(s) + r"} \\[1pt]")
|
||||
out.append(r"\end{center}")
|
||||
out.append(r"\medskip")
|
||||
|
||||
out.append(r"\renewcommand{\contentsname}{Table of Contents}")
|
||||
out.append(r"\tableofcontents")
|
||||
out.append(r"\clearpage")
|
||||
|
||||
# Skip any leading front-matter paragraphs before the first section header.
|
||||
# For paper output, the intro should begin at the labeled "Introduction"
|
||||
# section rather than repeating the pre-divider prose block.
|
||||
first_section = next(
|
||||
(i for i, el in enumerate(body_els) if isinstance(el, BookHeader)),
|
||||
len(body_els),
|
||||
)
|
||||
paper_body_els = body_els[first_section:]
|
||||
|
||||
# Split intro (before first real book) from main body.
|
||||
# A "real book" is a BookHeader that is followed by at least one Chapter
|
||||
# before the next BookHeader. "Introduction" and similar preamble sections
|
||||
# are BookHeaders too but have no chapters, so they stay in the intro.
|
||||
first_book = len(paper_body_els)
|
||||
for i, el in enumerate(paper_body_els):
|
||||
if isinstance(el, BookHeader):
|
||||
# Check if a Chapter follows before the next BookHeader
|
||||
for j in range(i + 1, len(paper_body_els)):
|
||||
if isinstance(paper_body_els[j], Chapter):
|
||||
first_book = i
|
||||
break
|
||||
if isinstance(paper_body_els[j], BookHeader):
|
||||
break
|
||||
if first_book < len(paper_body_els):
|
||||
break
|
||||
intro_els = paper_body_els[:first_book]
|
||||
main_els = paper_body_els[first_book:]
|
||||
|
||||
if intro_els:
|
||||
_emit_elements(out, intro_els, kindle=True, compact_headers=True)
|
||||
out.append(r"\clearpage")
|
||||
|
||||
out.append(r"\begin{multicols}{2}")
|
||||
_emit_elements(out, main_els, kindle=False)
|
||||
out.append(r"\end{multicols}")
|
||||
out.append(r"\end{document}")
|
||||
return "\n".join(out)
|
||||
|
||||
|
||||
# ── Body emitter ──────────────────────────────────────────────────────────────
|
||||
|
||||
def _emit_elements(
|
||||
out: list,
|
||||
elements: list,
|
||||
kindle: bool,
|
||||
indent: bool = False,
|
||||
compact_headers: bool = False,
|
||||
) -> None:
|
||||
"""Translate parsed Element objects into LaTeX markup."""
|
||||
|
||||
for el in elements:
|
||||
|
||||
# ── Title page (kindle only; paper handles it before multicols) ──────
|
||||
if isinstance(el, TitlePage):
|
||||
if kindle:
|
||||
out.append(r"\clearpage")
|
||||
out.append(r"\thispagestyle{empty}")
|
||||
out.append(r"\vspace*{1.3in}")
|
||||
out.append(r"\begin{center}")
|
||||
for j, tl in enumerate(el.lines):
|
||||
s = tl.strip()
|
||||
if not s:
|
||||
continue
|
||||
if j < 3:
|
||||
out.append(r"{\LARGE\bfseries " + esc(s) + r"} \\[8pt]")
|
||||
else:
|
||||
out.append(r"{\large " + esc(s) + r"} \\[4pt]")
|
||||
out.append(r"\end{center}")
|
||||
out.append(r"\clearpage")
|
||||
|
||||
# ── Book / section header ────────────────────────────────────────────
|
||||
elif isinstance(el, BookHeader):
|
||||
lines = el.lines
|
||||
|
||||
if kindle:
|
||||
# Start a new page for each major book
|
||||
out.append(r"\clearpage")
|
||||
out.append(r"\phantomsection\addcontentsline{toc}{section}{" + esc(lines[0]) + r"}")
|
||||
out.append(r"\vspace*{0pt}" if compact_headers else r"\vspace*{0.1in}")
|
||||
out.append(r"\begin{center}")
|
||||
out.append(_hrule())
|
||||
out.append(r"\\[6pt]")
|
||||
out.append(r"{\bfseries\large " + esc(lines[0]) + r"}")
|
||||
for ln in lines[1:]:
|
||||
out.append(r"\\ [3pt]{\normalsize\itshape " + esc(ln) + r"}")
|
||||
out.append(r"\\[6pt]")
|
||||
out.append(_hrule())
|
||||
out.append(r"\end{center}")
|
||||
out.append(r"\markboth{" + esc(lines[0]) + r"}{" + esc(lines[0]) + r"}")
|
||||
out.append(r"\vspace{5pt}")
|
||||
|
||||
else:
|
||||
# Inline heading within the two-column flow
|
||||
# Refuse to start a new book in the bottom half of a column
|
||||
out.append(r"\needspace{0.5\textheight}")
|
||||
out.append(r"\phantomsection\addcontentsline{toc}{section}{" + esc(lines[0]) + r"}")
|
||||
out.append(r"\begin{center}")
|
||||
out.append(_hrule())
|
||||
out.append(r"\\[2pt]")
|
||||
out.append(r"{\bfseries " + esc(lines[0]) + r"}")
|
||||
for ln in lines[1:]:
|
||||
out.append(r"\\ {\small\itshape " + esc(ln) + r"}")
|
||||
out.append(r"\\[2pt]")
|
||||
out.append(_hrule())
|
||||
out.append(r"\end{center}")
|
||||
out.append(r"\markboth{" + esc(lines[0]) + r"}{" + esc(lines[0]) + r"}")
|
||||
out.append(r"\vspace{2pt}")
|
||||
|
||||
# ── Chapter heading ──────────────────────────────────────────────────
|
||||
elif isinstance(el, Chapter):
|
||||
label = f"CHAPTER {el.num}"
|
||||
|
||||
if kindle:
|
||||
out.append(r"\phantomsection\addcontentsline{toc}{subsection}{" + esc(label) + r"}")
|
||||
out.append(r"\needspace{4\baselineskip}")
|
||||
out.append(r"\vspace{14pt}")
|
||||
out.append(r"\begin{center}")
|
||||
out.append(r"{\bfseries\large " + esc(label) + r"}")
|
||||
if el.subtitle:
|
||||
out.append(r"\\ [3pt]{\normalsize\itshape " + esc(el.subtitle) + r"}")
|
||||
out.append(r"\end{center}")
|
||||
out.append(r"\markright{" + esc(label) + r"}")
|
||||
out.append(r"\vspace{6pt}")
|
||||
|
||||
else:
|
||||
out.append(r"\phantomsection\addcontentsline{toc}{subsection}{" + esc(label) + r"}")
|
||||
out.append(r"\needspace{2\baselineskip}")
|
||||
out.append(r"\vspace{3pt}")
|
||||
out.append(r"\begin{center}")
|
||||
out.append(r"{\bfseries " + esc(label) + r"}")
|
||||
if el.subtitle:
|
||||
out.append(r"\\ {\small\itshape " + esc(el.subtitle) + r"}")
|
||||
out.append(r"\end{center}")
|
||||
out.append(r"\markright{" + esc(label) + r"}")
|
||||
out.append(r"\vspace{1pt}")
|
||||
|
||||
# ── Section subheading (MARRIAGE, BAPTISM, etc.) ────────────────────
|
||||
elif isinstance(el, SectionHeading):
|
||||
if kindle:
|
||||
out.append(r"\vspace{8pt}")
|
||||
out.append(r"\begin{center}{\bfseries " + esc(el.text) + r"}\end{center}")
|
||||
out.append(r"\vspace{4pt}")
|
||||
else:
|
||||
out.append(r"\vspace{3pt}")
|
||||
out.append(
|
||||
r"\begin{center}{\bfseries\small " + esc(el.text) + r"}\end{center}"
|
||||
)
|
||||
out.append(r"\vspace{1pt}")
|
||||
|
||||
# ── Verse ────────────────────────────────────────────────────────────
|
||||
elif isinstance(el, Verse):
|
||||
body = esc(el.text)
|
||||
if kindle:
|
||||
# Bold inline number (not superscript) for readability on screen
|
||||
vnum = r"\textbf{" + str(el.num) + r"}"
|
||||
out.append(r"\noindent " + vnum + r"~" + body)
|
||||
out.append(r"\par\smallskip")
|
||||
else:
|
||||
vnum = r"\textbf{" + str(el.num) + r"}"
|
||||
out.append(r"\noindent " + vnum + r"~" + body + r"\par")
|
||||
|
||||
# ── Paragraph (prose intro, commentary, etc.) ───────────────────────
|
||||
elif isinstance(el, Paragraph):
|
||||
body = esc(el.text)
|
||||
if kindle:
|
||||
out.append(r"\noindent " + body)
|
||||
out.append(r"\par\smallskip")
|
||||
elif indent:
|
||||
out.append(body + r"\par\medskip")
|
||||
else:
|
||||
out.append(r"\noindent " + body + r"\par")
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
# Utility: book limiter
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
def truncate_to_books(elements: list, max_books: int) -> list:
|
||||
"""Return only the first *max_books* BookHeader sections (and their content).
|
||||
Title-page and front-matter paragraphs before the first BookHeader are always kept.
|
||||
"""
|
||||
if max_books <= 0:
|
||||
return elements
|
||||
count = 0
|
||||
result = []
|
||||
for el in elements:
|
||||
if isinstance(el, BookHeader):
|
||||
count += 1
|
||||
if count > max_books:
|
||||
break
|
||||
result.append(el)
|
||||
return result
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
# PDF compilation
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
def _find_compiler() -> tuple:
|
||||
"""Return (compiler_path, compiler_type) or (None, None) if none found."""
|
||||
import shutil
|
||||
# Also probe common absolute paths in case the dir isn't on $PATH
|
||||
candidates = {
|
||||
"pdflatex": ["/usr/bin/pdflatex", "/usr/local/bin/pdflatex"],
|
||||
"tectonic": ["/usr/bin/tectonic", "/usr/local/bin/tectonic"],
|
||||
}
|
||||
for cmd, extra_paths in candidates.items():
|
||||
found = shutil.which(cmd)
|
||||
if found:
|
||||
return found, cmd
|
||||
for p in extra_paths:
|
||||
if Path(p).exists():
|
||||
return p, cmd
|
||||
return None, None
|
||||
|
||||
|
||||
def compile_pdf(tex_src: str, output_pdf: Path,
|
||||
keep_tex: bool = False,
|
||||
compiler_path: str = "/usr/bin/pdflatex",
|
||||
compiler_type: str = "pdflatex") -> bool:
|
||||
"""
|
||||
Write *tex_src* into a temp directory, run the LaTeX compiler, and copy
|
||||
the resulting PDF to *output_pdf*. Supports ``pdflatex`` and ``tectonic``.
|
||||
Returns True on success.
|
||||
"""
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
tex_file = tmp_path / "document.tex"
|
||||
tex_file.write_text(tex_src, encoding="utf-8")
|
||||
|
||||
if compiler_type == "tectonic":
|
||||
# Tectonic compiles in one pass and downloads missing packages.
|
||||
passes = 1
|
||||
cmd_base = [compiler_path, "document.tex"]
|
||||
else:
|
||||
# pdflatex needs two passes to get page headers right.
|
||||
passes = 2
|
||||
cmd_base = [compiler_path, "-interaction=nonstopmode",
|
||||
"-halt-on-error", "document.tex"]
|
||||
|
||||
for pass_num in range(1, passes + 1):
|
||||
result = subprocess.run(
|
||||
cmd_base, cwd=tmp, capture_output=True, text=True,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
print(f" [compiler error on pass {pass_num}]", file=sys.stderr)
|
||||
print(result.stdout[-3000:], file=sys.stderr)
|
||||
if result.stderr:
|
||||
print(result.stderr[-1000:], file=sys.stderr)
|
||||
if keep_tex:
|
||||
dest = output_pdf.with_suffix(".tex")
|
||||
dest.write_text(tex_src, encoding="utf-8")
|
||||
print(f" TeX source saved to: {dest}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
pdf_out = tmp_path / "document.pdf"
|
||||
if pdf_out.exists():
|
||||
output_pdf.parent.mkdir(parents=True, exist_ok=True)
|
||||
output_pdf.write_bytes(pdf_out.read_bytes())
|
||||
if keep_tex:
|
||||
dest = output_pdf.with_suffix(".tex")
|
||||
dest.write_text(tex_src, encoding="utf-8")
|
||||
return True
|
||||
|
||||
print(" [compiler ran but document.pdf was not produced]", file=sys.stderr)
|
||||
return False
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
# Main
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
_INSTALL_INSTRUCTIONS = """
|
||||
No LaTeX compiler found. Install one of the following:
|
||||
|
||||
Arch / CachyOS / Manjaro:
|
||||
sudo pacman -S texlive-basic texlive-latex texlive-latexrecommended \\
|
||||
texlive-latexextra texlive-fontsrecommended
|
||||
|
||||
Debian / Ubuntu:
|
||||
sudo apt-get install texlive-latex-extra texlive-fonts-recommended
|
||||
|
||||
--- OR --- (self-contained, downloads packages on first use)
|
||||
sudo pacman -S tectonic
|
||||
# or: cargo install tectonic
|
||||
"""
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generate scripture-style PDFs from the Book of the Nem text.",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog=__doc__,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--input", type=Path, default=INPUT_FILE,
|
||||
help=f"Input plain-text file (default: {INPUT_FILE})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-dir", type=Path, default=OUTPUT_DIR,
|
||||
help=f"Output directory (default: {OUTPUT_DIR})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--kindle-only", action="store_true",
|
||||
help="Generate only the Kindle (single-column) PDF.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--paper-only", action="store_true",
|
||||
help="Generate only the paper (two-column) PDF.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--keep-tex", action="store_true",
|
||||
help="Save the intermediate .tex files alongside each PDF.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-books", type=int, default=0, metavar="N",
|
||||
help="Limit output to the first N book sections (0 = no limit).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tex-only", action="store_true",
|
||||
help="Write .tex files only — do not attempt PDF compilation. "
|
||||
"Useful when a LaTeX compiler is not available.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
src_path: Path = args.input
|
||||
if not src_path.exists():
|
||||
sys.exit(f"ERROR: Input file not found: {src_path}")
|
||||
|
||||
print(f"Reading: {src_path}")
|
||||
text = src_path.read_text(encoding="utf-8", errors="replace")
|
||||
|
||||
elements = parse(text)
|
||||
if args.max_books > 0:
|
||||
elements = truncate_to_books(elements, args.max_books)
|
||||
print(f" Limiting to first {args.max_books} book(s).")
|
||||
books = sum(1 for e in elements if isinstance(e, BookHeader))
|
||||
chapters = sum(1 for e in elements if isinstance(e, Chapter))
|
||||
verses = sum(1 for e in elements if isinstance(e, Verse))
|
||||
print(f" Parsed: {books} books/sections, {chapters} chapters, {verses} verses")
|
||||
|
||||
out_dir: Path = args.output_dir
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Locate compiler (unless --tex-only)
|
||||
compiler_path, compiler_type = None, None
|
||||
if not args.tex_only:
|
||||
compiler_path, compiler_type = _find_compiler()
|
||||
if not compiler_path:
|
||||
print(_INSTALL_INSTRUCTIONS, file=sys.stderr)
|
||||
print("Falling back to --tex-only mode: .tex files will be written "
|
||||
"but not compiled.", file=sys.stderr)
|
||||
args.tex_only = True
|
||||
else:
|
||||
print(f" Using compiler: {compiler_path}")
|
||||
|
||||
def _write_or_compile(tex: str, pdf_path: Path, label: str):
|
||||
if args.tex_only or args.keep_tex:
|
||||
tex_path = pdf_path.with_suffix(".tex")
|
||||
tex_path.write_text(tex, encoding="utf-8")
|
||||
print(f" ✓ TeX saved: {tex_path}")
|
||||
if args.tex_only:
|
||||
return
|
||||
print(f" Compiling {label} PDF …")
|
||||
ok = compile_pdf(tex, pdf_path, keep_tex=args.keep_tex,
|
||||
compiler_path=compiler_path,
|
||||
compiler_type=compiler_type)
|
||||
if ok:
|
||||
print(f" ✓ {pdf_path}")
|
||||
else:
|
||||
print(f" ✗ {label} PDF failed — see errors above.")
|
||||
|
||||
# ── Kindle PDF ────────────────────────────────────────────────────────────
|
||||
if not args.paper_only:
|
||||
print(f"\nKindle PDF (single-column, 4.5\"×6.5\") …")
|
||||
tex = build_kindle_latex(elements)
|
||||
_write_or_compile(tex, out_dir / "nem_phone.pdf", "Kindle")
|
||||
|
||||
# ── Paper / BOM-style PDF ────────────────────────────────────────────────
|
||||
if not args.kindle_only:
|
||||
print(f"\nPaper PDF (two-column BOM style, 5.5\"×8.5\") …")
|
||||
tex = build_paper_latex(elements)
|
||||
_write_or_compile(tex, out_dir / "nem_paper.pdf", "Paper")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user