From 42d2587704f6f4aeb49e6143aea80d4295919543 Mon Sep 17 00:00:00 2001 From: Jonathan Bernard Date: Sun, 14 Jun 2026 08:14:27 -0500 Subject: [PATCH] Add private MEV embedded support --- src/bibleref.nim | 9 +- src/embedded_bible.nim | 112 ++++++++++++ src/kjv.nim | 106 +----------- src/mev.nim | 13 ++ src/offline_data.nim | 16 +- tools/generate_mev_data.nim | 330 ++++++++++++++++++++++++++++++++++++ 6 files changed, 473 insertions(+), 113 deletions(-) create mode 100644 src/embedded_bible.nim create mode 100644 src/mev.nim create mode 100644 tools/generate_mev_data.nim diff --git a/src/bibleref.nim b/src/bibleref.nim index 4fcdb5c..a77e59b 100644 --- a/src/bibleref.nim +++ b/src/bibleref.nim @@ -9,6 +9,7 @@ import cliutils, docopt, zero_functional import ./api_bible import ./esv import ./kjv +import ./mev proc formatMarkdown(raw, translation: string): string = var reference = "" @@ -80,6 +81,8 @@ proc fetchPassages(reference, translation: string, cfg: CombinedConfig): seq[str cfg.getVal("esv-api-root", "https://api.esv.org")) of "akjv", "kjv": kjv.fetchPassages(reference) + of "mev": + mev.fetchPassages(reference) of "amp", "nkjv", "niv": api_bible.fetchPassages( reference, @@ -92,7 +95,7 @@ proc fetchPassages(reference, translation: string, cfg: CombinedConfig): seq[str else: raise newException(ValueError, "unsupported translation '" & translation & - "'; supported translations: akjv, amp, esv, kjv, nkjv, niv") + "'; supported translations: akjv, amp, esv, kjv, mev, nkjv, niv") when isMainModule: const USAGE = """Usage: @@ -110,8 +113,8 @@ Options: -t, --translation Select a specific translation. Supported values - are 'akjv', 'amp', 'esv', 'kjv', 'nkjv', and - 'niv'. Defaults to 'esv'. + are 'akjv', 'amp', 'esv', 'kjv', 'mev', + 'nkjv', and 'niv'. Defaults to 'esv'. --esv-api-token Provide the API token on the command line. By default this will be read either from the diff --git a/src/embedded_bible.nim b/src/embedded_bible.nim new file mode 100644 index 0000000..5f45b17 --- /dev/null +++ b/src/embedded_bible.nim @@ -0,0 +1,112 @@ +import std/[strutils, tables] + +import ./reference_parser + +type BibleIndex = object + verses: Table[string, string] + lastVerseByChapter: Table[string, int] + lastChapterByBook: Table[string, int] + translationName: string + +proc verseKey(code: string, chapter, verse: int): string = + code & "\t" & $chapter & "\t" & $verse + +proc chapterKey(code: string, chapter: int): string = + code & "\t" & $chapter + +proc loadBibleIndex(rows, translationName: string): BibleIndex = + result.translationName = translationName + + for line in rows.splitLines: + if line.strip.len == 0: + continue + + let parts = line.split('\t', maxsplit = 3) + if parts.len != 4: + raise newException(ValueError, + "invalid embedded " & translationName & " row: " & line) + + let code = parts[0] + let chapter = parseInt(parts[1]) + let verse = parseInt(parts[2]) + let text = parts[3] + + result.verses[verseKey(code, chapter, verse)] = text + + let cKey = chapterKey(code, chapter) + if not result.lastVerseByChapter.hasKey(cKey) or + verse > result.lastVerseByChapter[cKey]: + result.lastVerseByChapter[cKey] = verse + + if not result.lastChapterByBook.hasKey(code) or + chapter > result.lastChapterByBook[code]: + result.lastChapterByBook[code] = chapter + +proc requireLastChapter(index: BibleIndex, code: string): int = + if not index.lastChapterByBook.hasKey(code): + raise newException(ValueError, + "no embedded " & index.translationName & " data for " & code) + index.lastChapterByBook[code] + +proc requireLastVerse(index: BibleIndex, code: string, chapter: int): int = + let cKey = chapterKey(code, chapter) + if not index.lastVerseByChapter.hasKey(cKey): + raise newException(ValueError, + "no embedded " & index.translationName & " data for " & + bookInfo(code).name & " " & $chapter) + index.lastVerseByChapter[cKey] + +proc requireVerse(index: BibleIndex, code: string, chapter, verse: int): string = + let vKey = verseKey(code, chapter, verse) + if not index.verses.hasKey(vKey): + raise newException(ValueError, + "no embedded " & index.translationName & " data for " & + bookInfo(code).name & " " & $chapter & ":" & $verse) + index.verses[vKey] + +proc addVerseLines( + lines: var seq[string], + index: BibleIndex, + reference: PassageReference, + range: RefRange) = + + let code = reference.book.code + discard index.requireLastChapter(code) + + for chapter in range.start.chapter .. range.finish.chapter: + let startVerse = + if chapter == range.start.chapter and range.start.verse > 0: + range.start.verse + else: + 1 + + let endVerse = + if chapter == range.finish.chapter and range.finish.verse > 0: + range.finish.verse + else: + index.requireLastVerse(code, chapter) + + if startVerse > endVerse: + raise newException(ValueError, "reference range starts after it ends") + + for verse in startVerse .. endVerse: + lines.add(" [" & $verse & "] " & index.requireVerse(code, chapter, verse)) + +proc fetchReference(index: BibleIndex, reference: PassageReference): string = + var lines = @[$reference] + let code = reference.book.code + + if reference.ranges.len == 0: + for chapter in 1 .. index.requireLastChapter(code): + for verse in 1 .. index.requireLastVerse(code, chapter): + lines.add(" [" & $verse & "] " & index.requireVerse(code, chapter, verse)) + else: + for range in reference.ranges: + lines.addVerseLines(index, reference, range) + + lines.join("\n") + +proc fetchPassages*(rows, reference, translationName: string): seq[string] = + let index = loadBibleIndex(rows, translationName) + for parsedReference in parseReferences(reference): + result.add(fetchReference(index, parsedReference)) diff --git a/src/kjv.nim b/src/kjv.nim index 14f5448..57a8438 100644 --- a/src/kjv.nim +++ b/src/kjv.nim @@ -1,109 +1,7 @@ -import std/[strutils, tables] - +import ./embedded_bible import ./offline_data -import ./reference_parser const kjvRows = embeddedTranslationData("kjv") -type BibleIndex = object - verses: Table[string, string] - lastVerseByChapter: Table[string, int] - lastChapterByBook: Table[string, int] - -proc verseKey(code: string, chapter, verse: int): string = - code & "\t" & $chapter & "\t" & $verse - -proc chapterKey(code: string, chapter: int): string = - code & "\t" & $chapter - -proc loadBibleIndex(): BibleIndex = - for line in kjvRows.splitLines: - if line.strip.len == 0: - continue - - let parts = line.split('\t', maxsplit = 3) - if parts.len != 4: - raise newException(ValueError, "invalid embedded KJV row: " & line) - - let code = parts[0] - let chapter = parseInt(parts[1]) - let verse = parseInt(parts[2]) - let text = parts[3] - - result.verses[verseKey(code, chapter, verse)] = text - - let cKey = chapterKey(code, chapter) - if not result.lastVerseByChapter.hasKey(cKey) or - verse > result.lastVerseByChapter[cKey]: - result.lastVerseByChapter[cKey] = verse - - if not result.lastChapterByBook.hasKey(code) or - chapter > result.lastChapterByBook[code]: - result.lastChapterByBook[code] = chapter - -proc requireLastChapter(index: BibleIndex, code: string): int = - if not index.lastChapterByBook.hasKey(code): - raise newException(ValueError, "no embedded KJV data for " & code) - index.lastChapterByBook[code] - -proc requireLastVerse(index: BibleIndex, code: string, chapter: int): int = - let cKey = chapterKey(code, chapter) - if not index.lastVerseByChapter.hasKey(cKey): - raise newException(ValueError, - "no embedded KJV data for " & bookInfo(code).name & " " & $chapter) - index.lastVerseByChapter[cKey] - -proc requireVerse(index: BibleIndex, code: string, chapter, verse: int): string = - let vKey = verseKey(code, chapter, verse) - if not index.verses.hasKey(vKey): - raise newException(ValueError, - "no embedded KJV data for " & bookInfo(code).name & " " & - $chapter & ":" & $verse) - index.verses[vKey] - -proc addVerseLines( - lines: var seq[string], - index: BibleIndex, - reference: PassageReference, - range: RefRange) = - - let code = reference.book.code - discard index.requireLastChapter(code) - - for chapter in range.start.chapter .. range.finish.chapter: - let startVerse = - if chapter == range.start.chapter and range.start.verse > 0: - range.start.verse - else: - 1 - - let endVerse = - if chapter == range.finish.chapter and range.finish.verse > 0: - range.finish.verse - else: - index.requireLastVerse(code, chapter) - - if startVerse > endVerse: - raise newException(ValueError, "reference range starts after it ends") - - for verse in startVerse .. endVerse: - lines.add(" [" & $verse & "] " & index.requireVerse(code, chapter, verse)) - -proc fetchReference(index: BibleIndex, reference: PassageReference): string = - var lines = @[$reference] - let code = reference.book.code - - if reference.ranges.len == 0: - for chapter in 1 .. index.requireLastChapter(code): - for verse in 1 .. index.requireLastVerse(code, chapter): - lines.add(" [" & $verse & "] " & index.requireVerse(code, chapter, verse)) - else: - for range in reference.ranges: - lines.addVerseLines(index, reference, range) - - lines.join("\n") - proc fetchPassages*(reference: string): seq[string] = - let index = loadBibleIndex() - for parsedReference in parseReferences(reference): - result.add(fetchReference(index, parsedReference)) + embedded_bible.fetchPassages(kjvRows, reference, "KJV") diff --git a/src/mev.nim b/src/mev.nim new file mode 100644 index 0000000..56c5b8c --- /dev/null +++ b/src/mev.nim @@ -0,0 +1,13 @@ +import ./offline_data + +when hasEmbeddedTranslationData("mev"): + import ./embedded_bible + + const mevRows = embeddedTranslationData("mev") + +proc fetchPassages*(reference: string): seq[string] = + when hasEmbeddedTranslationData("mev"): + embedded_bible.fetchPassages(mevRows, reference, "MEV") + else: + raise newException(ValueError, + "MEV data is not embedded; generate data/private/mev.tsv and rebuild") diff --git a/src/offline_data.nim b/src/offline_data.nim index abb7805..67321bb 100644 --- a/src/offline_data.nim +++ b/src/offline_data.nim @@ -1,11 +1,15 @@ import std/os -template embeddedTranslationData*(name: static[string]): string = +template translationDataPath(name: static[string], visibility: static[string]): string = const dataRoot = currentSourcePath().parentDir.parentDir / "data" - const privatePath = dataRoot / "private" / (name & ".tsv") - const publicPath = dataRoot / "public" / (name & ".tsv") + dataRoot / visibility / (name & ".tsv") - when fileExists(privatePath): - staticRead(privatePath) +template hasEmbeddedTranslationData*(name: static[string]): bool = + fileExists(translationDataPath(name, "private")) or + fileExists(translationDataPath(name, "public")) + +template embeddedTranslationData*(name: static[string]): string = + when fileExists(translationDataPath(name, "private")): + staticRead(translationDataPath(name, "private")) else: - staticRead(publicPath) + staticRead(translationDataPath(name, "public")) diff --git a/tools/generate_mev_data.nim b/tools/generate_mev_data.nim new file mode 100644 index 0000000..afb4ae7 --- /dev/null +++ b/tools/generate_mev_data.nim @@ -0,0 +1,330 @@ +import std/[ + htmlparser, + os, + osproc, + streams, + strutils, + xmlparser, + xmltree +] + +import ../src/reference_parser + +type + TocEntry = object + label: string + code: string + fileIndex: int + + BookSource = object + code: string + startIndex: int + endIndex: int + + ParseState = object + code: string + chapter: int + verse: int + verseText: string + rows: seq[string] + +proc normalizeWhitespace(s: string): string = + var lastWasSpace = false + for ch in s.replace("\xC2\xA0", " "): + if ch.isSpaceAscii: + if not lastWasSpace: + result.add(' ') + lastWasSpace = true + else: + result.add(ch) + lastWasSpace = false + result = result.strip + +proc markerText(s: string): string = + normalizeWhitespace(s).replace(" ", "") + +proc numberAfterPrefix(s, prefix: string): int = + let text = normalizeWhitespace(s).toUpperAscii + if not text.startsWith(prefix): + return 0 + + var digits = "" + for ch in text[prefix.len .. ^1].strip: + if ch.isDigit: + digits.add(ch) + elif digits.len > 0: + break + elif not ch.isSpaceAscii: + break + + if digits.len > 0: + result = parseInt(digits) + +proc isPositiveIntText(s: string): bool = + let text = markerText(s) + text.len > 0 and text.allCharsInSet({'0'..'9'}) and parseInt(text) > 0 + +proc readEpubEntry(epubPath, entryPath: string): string = + let process = startProcess( + "unzip", + args = ["-p", epubPath, entryPath], + options = {poUsePath, poStdErrToStdOut}) + result = process.outputStream.readAll() + let exitCode = process.waitForExit() + process.close() + + if exitCode != 0: + raise newException(IOError, + "could not read " & entryPath & " from " & epubPath & ": " & result) + +proc textContent(node: XmlNode): string = + case node.kind + of xnText: + result = node.text + of xnElement: + for child in node.items: + result.add(textContent(child)) + else: + discard + +proc firstDescendant(node: XmlNode, tag: string): XmlNode = + if node.kind == xnElement: + if node.tag == tag: + return node + + for child in node.items: + let found = firstDescendant(child, tag) + if not found.isNil: + return found + +proc descendantText(node: XmlNode, tag: string): string = + let found = firstDescendant(node, tag) + if found.isNil: "" + else: normalizeWhitespace(textContent(found)) + +proc descendantAttr(node: XmlNode, tag, attrName: string): string = + let found = firstDescendant(node, tag) + if found.isNil: "" + else: found.attr(attrName) + +proc bookCodeForLabel(label: string): string = + let bookName = label.split("(", maxsplit = 1)[0].strip + if bookName == "Solomon": + return "SNG" + + for book in CanonBooks: + if book.name == bookName: + return book.code + +proc indexFromSplitFile(path: string): int = + let filename = path.split('#', maxsplit = 1)[0].extractFilename + if not filename.startsWith("index_split_") or not filename.endsWith(".html"): + return 0 + + parseInt(filename["index_split_".len ..< filename.len - ".html".len]) + +proc parseTocEntries(epubPath: string): seq[TocEntry] = + let toc = parseXml(newStringStream(readEpubEntry(epubPath, "toc.ncx"))) + var entries: seq[TocEntry] = @[] + + proc walk(node: XmlNode) = + if node.kind == xnElement and node.tag == "navPoint": + let label = node.descendantText("text") + let src = node.descendantAttr("content", "src") + let fileIndex = indexFromSplitFile(src) + if fileIndex > 0: + entries.add(TocEntry( + label: label, + code: bookCodeForLabel(label), + fileIndex: fileIndex)) + + if node.kind == xnElement: + for child in node.items: + walk(child) + + walk(toc) + entries + +proc bookSources(entries: seq[TocEntry]): seq[BookSource] = + for idx, entry in entries: + if entry.code.len == 0: + continue + + let endIndex = + if idx + 1 < entries.len: + entries[idx + 1].fileIndex - 1 + else: + entry.fileIndex + + result.add(BookSource( + code: entry.code, + startIndex: entry.fileIndex, + endIndex: endIndex)) + + if result.len != CanonBooks.len: + raise newException(ValueError, + "expected " & $CanonBooks.len & " canonical books in EPUB TOC, found " & + $result.len) + + for idx, book in CanonBooks: + if result[idx].code != book.code: + raise newException(ValueError, + "expected " & book.code & " at position " & $idx & ", found " & + result[idx].code) + +proc hasClass(node: XmlNode, className: string): bool = + if node.kind != xnElement: + return false + + for value in node.attr("class").splitWhitespace: + if value == className: + return true + +proc shouldSkipElement(node: XmlNode): bool = + node.hasClass("calibre_29") or # section headings + node.hasClass("calibre_6") or # parallel/cross-reference paragraphs + node.hasClass("calibre_26") # Psalm superscriptions/cross-references + +proc hasHref(node: XmlNode): bool = + if node.kind == xnElement: + if node.attr("href").len > 0: + return true + + for child in node.items: + if hasHref(child): + return true + +proc isBlockElement(node: XmlNode): bool = + node.kind == xnElement and + node.tag in ["blockquote", "br", "div", "h1", "h2", "h3", "li", "p"] + +proc chapterMarker(node: XmlNode): int = + if node.kind == xnElement and node.tag == "span" and node.hasClass("calibre1"): + let text = markerText(textContent(node)) + if text.isPositiveIntText: + return parseInt(text) + +proc headingChapterMarker(node: XmlNode, code: string): int = + if node.kind != xnElement or node.tag != "p": + return 0 + + let text = textContent(node) + result = numberAfterPrefix(text, "CHAPTER ") + if result > 0: + return + + if code == "PSA": + result = numberAfterPrefix(text, "PSALM ") + +proc verseMarker(node: XmlNode): int = + if node.kind == xnElement and node.tag == "sup" and not node.hasHref: + let text = markerText(textContent(node)) + if text.isPositiveIntText: + return parseInt(text) + +proc leadingVerseText(s: string): tuple[verse: int, rest: string] = + let text = s.replace("\xC2\xA0", " ") + var idx = 0 + while idx < text.len and text[idx].isSpaceAscii: + inc idx + + let digitStart = idx + while idx < text.len and text[idx].isDigit: + inc idx + + if idx == digitStart: + return + + let numberText = text[digitStart ..< idx] + while idx < text.len and text[idx].isSpaceAscii: + inc idx + + result.verse = parseInt(numberText) + if idx < text.len: + result.rest = text[idx .. ^1] + +proc flushVerse(state: var ParseState) = + if state.chapter > 0 and state.verse > 0: + let text = normalizeWhitespace(state.verseText).replace("\t", " ") + if text.len > 0: + state.rows.add([state.code, $state.chapter, $state.verse, text].join("\t")) + + state.verseText = "" + +proc walkPassageText(node: XmlNode, state: var ParseState) = + case node.kind + of xnText: + if state.chapter > 0: + if state.verse == 0: + let leading = leadingVerseText(node.text) + if leading.verse > 0: + state.verse = leading.verse + state.verseText.add(leading.rest) + elif state.verse > 0: + state.verseText.add(node.text) + of xnElement: + let headingChapter = headingChapterMarker(node, state.code) + if headingChapter > 0: + state.flushVerse() + state.chapter = headingChapter + state.verse = 0 + return + + if node.shouldSkipElement: + return + + let chapter = chapterMarker(node) + if chapter > 0: + state.flushVerse() + state.chapter = chapter + state.verse = 1 + return + + let verse = verseMarker(node) + if verse > 0: + state.flushVerse() + state.verse = verse + return + + if node.tag == "sup": + return + + for child in node.items: + walkPassageText(child, state) + + if node.isBlockElement and state.chapter > 0 and state.verse > 0: + state.verseText.add(' ') + else: + discard + +proc indexSplitFile(index: int): string = + "index_split_" & align($index, 3, '0') & ".html" + +proc parseBook(epubPath: string, source: BookSource): seq[string] = + var state = ParseState(code: source.code) + if bookInfo(source.code).singleChapter: + state.chapter = 1 + + for index in source.startIndex .. source.endIndex: + let html = readEpubEntry(epubPath, indexSplitFile(index)) + let doc = parseHtml(newStringStream(html)) + walkPassageText(doc, state) + + state.flushVerse() + state.rows + +proc generate(epubPath, outputPath: string) = + let sources = bookSources(parseTocEntries(epubPath)) + var rows: seq[string] = @[] + + for source in sources: + rows.add(parseBook(epubPath, source)) + + createDir(outputPath.parentDir) + writeFile(outputPath, rows.join("\n") & "\n") + +when isMainModule: + if paramCount() != 2: + quit("Usage: generate_mev_data ", QuitFailure) + + generate(paramStr(1), paramStr(2))