13 Commits

19 changed files with 32643 additions and 103 deletions
+4
View File
@@ -1,2 +1,6 @@
esv_api esv_api
bibleref
tests/test_offline_kjv
tests/test_passage_query
data/private/
*.sw? *.sw?
+19
View File
@@ -0,0 +1,19 @@
# Package
version = "1.1.0"
author = "Jonathan Bernard"
description = "Simple Nim CLI for retrieving Biblical passages"
license = "MIT"
srcDir = "src"
bin = @["bibleref"]
# Dependencies
requires "nim >= 1.6.10"
requires "docopt"
requires "nimquery"
requires "zero_functional"
# dependencies from git.jdb-software.com/jdb/nim-packages.git
requires "cliutils"
+31102
View File
File diff suppressed because it is too large Load Diff
-17
View File
@@ -1,17 +0,0 @@
# Package
version = "0.2.1"
author = "Jonathan Bernard"
description = "Simple Nim CLI wrapper around the ESV API (api.esv.org)"
license = "MIT"
srcDir = "src"
bin = @["esv_api"]
# Dependencies
requires "nim >= 1.6.10"
requires @["docopt", "zero_functional"]
# dependencies from git.jdb-software.com/jdb/nim-packages.git
requires @["cliutils"]
+2
View File
@@ -0,0 +1,2 @@
[tools]
nim = "latest"
+103
View File
@@ -0,0 +1,103 @@
import std/[httpclient, json, logging, strutils, uri]
const apiBibleRoot* = "https://rest.api.bible/v1"
proc configBibleIdKey(translation: string): string =
let normalizedTranslation = translation.toLowerAscii
"apiBible" & normalizedTranslation[0].toUpperAscii &
normalizedTranslation[1..^1] & "BibleId"
proc defaultBibleId*(translation: string): string =
case translation.toLowerAscii
of "niv": "78a9f6124f344018-01"
else: ""
proc apiGet(apiRoot, path, query, apiKey: string): JsonNode =
var root = apiRoot
while root.endsWith("/"): root.setLen(root.len - 1)
var urlPath = root & path
if query.len > 0:
urlPath &= "?" & query
debug "requesting " & urlPath
let http = newHttpClient()
http.headers = newHttpHeaders({"api-key": apiKey})
parseJson(http.getContent(urlPath))
proc resolveBibleId(translation, apiKey, apiRoot, configuredBibleId: string): string =
let normalizedTranslation = translation.toLowerAscii
if configuredBibleId.strip.len > 0:
return configuredBibleId.strip
let defaultId = defaultBibleId(normalizedTranslation)
if defaultId.len > 0:
return defaultId
let translationCode = normalizedTranslation.toUpperAscii
let respJson = apiGet(
apiRoot,
"/bibles",
"language=eng&abbreviation=" & encodeUrl(translationCode) &
"&include-full-details=false",
apiKey)
for bible in respJson["data"].getElems:
let abbreviation =
if bible.hasKey("abbreviation"): bible["abbreviation"].getStr else: ""
let abbreviationLocal =
if bible.hasKey("abbreviationLocal"): bible["abbreviationLocal"].getStr else: ""
if abbreviation.toLowerAscii == normalizedTranslation or
abbreviationLocal.toLowerAscii == normalizedTranslation:
return bible["id"].getStr
if respJson["data"].getElems.len > 0:
return respJson["data"].getElems[0]["id"].getStr
raise newException(ValueError,
"could not find an API.Bible Bible ID for '" & translation &
"'; configure " & configBibleIdKey(normalizedTranslation))
proc resolvePassageId(reference, bibleId, apiKey, apiRoot: string): string =
let respJson = apiGet(
apiRoot,
"/bibles/" & encodeUrl(bibleId) & "/search",
"query=" & encodeUrl(reference) & "&limit=1&sort=canonical",
apiKey)
if respJson["data"].hasKey("passages"):
let passages = respJson["data"]["passages"].getElems
if passages.len > 0:
return passages[0]["id"].getStr
if respJson["data"].hasKey("verses"):
let verses = respJson["data"]["verses"].getElems
if verses.len == 1:
return verses[0]["id"].getStr
raise newException(ValueError,
"could not resolve passage reference '" & reference & "' using API.Bible")
proc fetchPassages*(
reference,
translation,
apiKey,
apiRoot,
configuredBibleId: string): seq[string] =
let bibleId = resolveBibleId(translation, apiKey, apiRoot, configuredBibleId)
let passageId = resolvePassageId(reference, bibleId, apiKey, apiRoot)
let respJson = apiGet(
apiRoot,
"/bibles/" & encodeUrl(bibleId) & "/passages/" & encodeUrl(passageId),
"content-type=text&include-notes=false&include-titles=true" &
"&include-chapter-numbers=false&include-verse-numbers=true" &
"&include-verse-spans=false",
apiKey)
let passage = respJson["data"]
@[passage["reference"].getStr & "\n" & passage["content"].getStr]
+188
View File
@@ -0,0 +1,188 @@
# Nim CLI for retrieving Biblical passages
# © 2023 Jonathan Bernard
## Simple command-line tool for retrieving Biblical passages.
import std/[json, logging, os, re, strutils, wordwrap]
import cliutils, docopt, zero_functional
import ./api_bible
import ./esv
import ./kjv
import ./mev
import ./passage_query
proc formatMarkdown(raw, translation: string): string =
var reference = ""
var inVerse = false
var verseLines = newSeq[string]()
for line in raw.splitLines:
if reference.len == 0: reference = line.strip
if inVerse:
if line.startsWith("Footnotes"): inVerse = false
elif line.isEmptyOrWhitespace and verseLines[^1] != "":
verseLines.add("")
elif not line.match(re"^\s+[^\s]"): continue
elif line.match(re"$(.*)\(ESV\)$"): verseLines.add(line[0 ..< ^5])
else: verseLines.add(line)
elif line.match(re"^\s+\[\d+\]"):
inVerse = true
verseLines.add(line)
let wrapped = (verseLines -->
map(if it.len > 90: it.strip else: it & " ").
map(it.multiReplace([(re"\((\d+)\)", ""), (re"\[(\d+)\]", "**$1**")])).
map(wrapWords(it, maxLineWidth = 74, newLine = "\p"))).join("\p")
result = (wrapped.splitLines --> map("> " & it)).
join("\p") & "\p> -- *" & reference & " (" &
translation.toUpperAscii & ")*"
proc formatPlain(
raw,
translation: string,
keepVerseNumbers = true): string =
var reference = ""
var inVerse = false
var verseLines = newSeq[string]()
for line in raw.splitLines:
if reference.len == 0: reference = line.strip
if inVerse:
if line.startsWith("Footnotes"): inVerse = false
elif line.isEmptyOrWhitespace and verseLines[^1] != "":
verseLines.add("")
elif not line.match(re"^\s+[^\s]"): continue
elif line.match(re"$(.*)\(ESV\)$"): verseLines.add(line[0 ..< ^5])
else: verseLines.add(line)
elif line.match(re"^\s+\[\d+\]"):
inVerse = true
verseLines.add(line)
let wrapped = (verseLines -->
map(if it.len > 90: it.strip else: it & " ").
map(
if keepVerseNumbers:
it.multiReplace([(re"\((\d+)\)", ""), (re"\[(\d+)\]", "$1")])
else:
it.multiReplace([(re"\((\d+)\)", ""), (re"\[(\d+)\]", "")])).
map(wrapWords(it, maxLineWidth = 74, newLine = "\p"))).join("\p")
result = (wrapped.splitLines --> map(it)).
join("\p") & "\p " & reference & " (" & translation.toUpperAscii & ")"
proc fetchPassages(reference, translation: string, cfg: CombinedConfig): seq[string] =
case translation
of "esv":
esv.fetchPassages(
reference,
cfg.getVal("esv-api-token"),
cfg.getVal("esv-api-root", "https://api.esv.org"))
of "akjv", "kjv":
kjv.fetchPassages(reference)
of "mev":
mev.fetchPassages(reference)
of "amp", "nkjv", "niv":
api_bible.fetchPassages(
reference,
translation,
cfg.getVal("api-bible-api-key"),
cfg.getVal("api-bible-root", api_bible.apiBibleRoot),
cfg.getVal(
"api-bible-" & translation & "-bible-id",
api_bible.defaultBibleId(translation)))
else:
raise newException(ValueError,
"unsupported translation '" & translation &
"'; supported translations: " & supportedTranslationsList())
when isMainModule:
const USAGE = """Usage:
bibleref <reference> [options]
Options:
--debug Log debug information.
--echo-args Echo back the arguments that were passed on the
command line for debugging purposes.
-f, --output-format <format> Select a specific output format. Valid values
are 'raw', 'markdown', 'plain', 'reading'.
-t, --translation <translation>
Select a specific translation. Supported values
are 'akjv', 'amp', 'esv', 'kjv', 'mev',
'nkjv', and 'niv'. Defaults to 'esv'.
Individual references may override this with a
trailing marker, for example:
'John 3:16 (KJV); John 3:16 (ESV)'.
--esv-api-token <token> Provide the API token on the command line. By
default this will be read either from the
.bibleref.cfg.json file or the ESV_API_TOKEN
envionment variable.
--api-bible-api-key <key> Provide the API.Bible API key for translations
backed by api.bible.
--api-bible-root <url> Override the API.Bible API root. Defaults to
https://rest.api.bible/v1.
--api-bible-amp-bible-id <id> Override the API.Bible Bible ID for AMP.
--api-bible-niv-bible-id <id> Override the API.Bible Bible ID for NIV.
--api-bible-nkjv-bible-id <id>
Override the API.Bible Bible ID for NKJV.
"""
let consoleLogger = newConsoleLogger(
levelThreshold=lvlInfo,
fmtStr="bibleref - $levelname: ")
logging.addHandler(consoleLogger)
try:
# Parse arguments
let args = docopt(USAGE, version = "1.1.0")
if args["--debug"]:
consoleLogger.levelThreshold = lvlDebug
if args["--echo-args"]: stderr.writeLine($args)
let cfgFilePath = getEnv("HOME") / ".bibleref.cfg.json"
var cfgFileJson = newJObject()
if fileExists(cfgFilePath):
debug "Loading config from " & cfgFilePath
cfgFileJson = parseFile(cfgFilePath)
let cfg = CombinedConfig(docopt: args, json: cfgFileJson)
let defaultTranslation = cfg.getVal("translation", "esv")
let reference = $args["<reference>"]
let queries = parsePassageQueries(reference, defaultTranslation)
var formattedPassages: seq[string] = @[]
for query in queries:
for passage in fetchPassages(query.referenceText, query.translation, cfg):
formattedPassages.add(
case $args["--output-format"]:
of "plain":
formatPlain(passage, query.translation)
of "reading":
formatPlain(passage, query.translation, keepVerseNumbers = false)
of "text":
passage.multiReplace([(re"\[(\d+)\]", "$1")])
of "raw":
passage
else:
formatMarkdown(passage, query.translation))
echo formattedPassages.join("\p\p")
except CatchableError:
fatal getCurrentExceptionMsg()
debug getCurrentException().getStackTrace()
quit(QuitFailure)
+112
View File
@@ -0,0 +1,112 @@
import std/[strutils, tables]
import ./reference_parser
type BibleIndex = object
verses: Table[string, string]
lastVerseByChapter: Table[string, int]
lastChapterByBook: Table[string, int]
translationName: string
proc verseKey(code: string, chapter, verse: int): string =
code & "\t" & $chapter & "\t" & $verse
proc chapterKey(code: string, chapter: int): string =
code & "\t" & $chapter
proc loadBibleIndex(rows, translationName: string): BibleIndex =
result.translationName = translationName
for line in rows.splitLines:
if line.strip.len == 0:
continue
let parts = line.split('\t', maxsplit = 3)
if parts.len != 4:
raise newException(ValueError,
"invalid embedded " & translationName & " row: " & line)
let code = parts[0]
let chapter = parseInt(parts[1])
let verse = parseInt(parts[2])
let text = parts[3]
result.verses[verseKey(code, chapter, verse)] = text
let cKey = chapterKey(code, chapter)
if not result.lastVerseByChapter.hasKey(cKey) or
verse > result.lastVerseByChapter[cKey]:
result.lastVerseByChapter[cKey] = verse
if not result.lastChapterByBook.hasKey(code) or
chapter > result.lastChapterByBook[code]:
result.lastChapterByBook[code] = chapter
proc requireLastChapter(index: BibleIndex, code: string): int =
if not index.lastChapterByBook.hasKey(code):
raise newException(ValueError,
"no embedded " & index.translationName & " data for " & code)
index.lastChapterByBook[code]
proc requireLastVerse(index: BibleIndex, code: string, chapter: int): int =
let cKey = chapterKey(code, chapter)
if not index.lastVerseByChapter.hasKey(cKey):
raise newException(ValueError,
"no embedded " & index.translationName & " data for " &
bookInfo(code).name & " " & $chapter)
index.lastVerseByChapter[cKey]
proc requireVerse(index: BibleIndex, code: string, chapter, verse: int): string =
let vKey = verseKey(code, chapter, verse)
if not index.verses.hasKey(vKey):
raise newException(ValueError,
"no embedded " & index.translationName & " data for " &
bookInfo(code).name & " " & $chapter & ":" & $verse)
index.verses[vKey]
proc addVerseLines(
lines: var seq[string],
index: BibleIndex,
reference: PassageReference,
range: RefRange) =
let code = reference.book.code
discard index.requireLastChapter(code)
for chapter in range.start.chapter .. range.finish.chapter:
let startVerse =
if chapter == range.start.chapter and range.start.verse > 0:
range.start.verse
else:
1
let endVerse =
if chapter == range.finish.chapter and range.finish.verse > 0:
range.finish.verse
else:
index.requireLastVerse(code, chapter)
if startVerse > endVerse:
raise newException(ValueError, "reference range starts after it ends")
for verse in startVerse .. endVerse:
lines.add(" [" & $verse & "] " & index.requireVerse(code, chapter, verse))
proc fetchReference(index: BibleIndex, reference: PassageReference): string =
var lines = @[$reference]
let code = reference.book.code
if reference.ranges.len == 0:
for chapter in 1 .. index.requireLastChapter(code):
for verse in 1 .. index.requireLastVerse(code, chapter):
lines.add(" [" & $verse & "] " & index.requireVerse(code, chapter, verse))
else:
for range in reference.ranges:
lines.addVerseLines(index, reference, range)
lines.join("\n")
proc fetchPassages*(rows, reference, translationName: string): seq[string] =
let index = loadBibleIndex(rows, translationName)
for parsedReference in parseReferences(reference):
result.add(fetchReference(index, parsedReference))
+13
View File
@@ -0,0 +1,13 @@
import std/[httpclient, json, logging, uri]
proc fetchPassages*(reference, apiToken, apiRoot: string): seq[string] =
let http = newHttpClient()
http.headers = newHttpHeaders({"Authorization": "Token " & apiToken})
let urlPath = apiRoot & "/v3/passage/text/?q=" & encodeUrl(reference)
debug "requesting " & urlPath
let respJson = parseJson(http.getContent(urlPath))
result = @[]
for passage in respJson["passages"].getElems:
result.add(passage.getStr)
-86
View File
@@ -1,86 +0,0 @@
# Nim CLI Wrapper for the ESV API
# © 2023 Jonathan Bernard
## Simple command-line wrapper around the ESV API.
import std/[httpclient, json, logging, os, re, strutils, uri, wordwrap]
import cliutils, docopt, zero_functional
proc formatMarkdown(raw: string): string =
let rawLines = raw.splitLines --> map(it.strip)
let wrapped = (rawLines -->
filter(not isEmptyOrWhitespace(it.strip) and match(it, re"^\[\d+\].*")).
map(it.multiReplace([(re"\((\d+)\)", ""), (re"\[(\d+)\]", "**$1**")])).
map(wrapWords(it, maxLineWidth = 74, newLine = "\p"))).
join("\p")
result = (wrapped.splitLines --> map("> " & it)).join("\p") &
"\p>\p> -- *" & rawLines[0] & " (ESV)*"
when isMainModule:
const USAGE = """Usage:
esv_api <reference> [options]
Options:
--debug Log debug information.
--echo-args Echo back the arguments that were passed on the
command line for debugging purposes.
-f, --output-format <format> Select a specific output format. Valid values
are 'raw', 'markdown', 'plain'.
-t, --esv-api-token <token> Provide the API token on the command line. By
default this will be read either from the
.esv_api.cfg.json file or the ESV_API_TOKEN
envionment variable.
"""
let consoleLogger = newConsoleLogger(
levelThreshold=lvlInfo,
fmtStr="esv_api - $levelname: ")
logging.addHandler(consoleLogger)
try:
# Parse arguments
let args = docopt(USAGE, version = "0.2.1")
if args["--debug"]:
consoleLogger.levelThreshold = lvlDebug
if args["--echo-args"]: stderr.writeLine($args)
let cfgFilePath = getEnv("HOME") / ".esv_api.cfg.json"
var cfgFileJson = newJObject()
if fileExists(cfgFilePath):
debug "Loading config from " & cfgFilePath
cfgFileJson = parseFile(cfgFilePath)
let cfg = CombinedConfig(docopt: args, json: cfgFileJson)
let apiToken = cfg.getVal("esv-api-token")
let apiRoot = cfg.getVal("esv-api-root", "https://api.esv.org")
let reference = $args["<reference>"]
let http = newHttpClient()
http.headers = newHttpHeaders({"Authorization": "Token " & apiToken})
let urlPath = apiRoot & "/v3/passage/text/?q=" & encodeUrl(reference)
debug "requesting " & urlPath
let respJson = parseJson(http.getContent(urlPath))
let formattedPassages =
case $args["--output-format"]:
of "text":
respJson["passages"].getElems -->
map(it.getStr.multiReplace([(re"\[(\d+)\]", "$1")]))
of "raw": respJson["passages"].getElems --> map(it.getStr)
else:
respJson["passages"].getElems --> map(formatMarkdown(it.getStr))
echo formattedPassages.join("\p\p")
except CatchableError:
fatal getCurrentExceptionMsg()
debug getCurrentException().getStackTrace()
quit(QuitFailure)
+7
View File
@@ -0,0 +1,7 @@
import ./embedded_bible
import ./offline_data
const kjvRows = embeddedTranslationData("kjv")
proc fetchPassages*(reference: string): seq[string] =
embedded_bible.fetchPassages(kjvRows, reference, "KJV")
+13
View File
@@ -0,0 +1,13 @@
import ./offline_data
when hasEmbeddedTranslationData("mev"):
import ./embedded_bible
const mevRows = embeddedTranslationData("mev")
proc fetchPassages*(reference: string): seq[string] =
when hasEmbeddedTranslationData("mev"):
embedded_bible.fetchPassages(mevRows, reference, "MEV")
else:
raise newException(ValueError,
"MEV data is not embedded; generate data/private/mev.tsv and rebuild")
+15
View File
@@ -0,0 +1,15 @@
import std/os
template translationDataPath(name: static[string], visibility: static[string]): string =
const dataRoot = currentSourcePath().parentDir.parentDir / "data"
dataRoot / visibility / (name & ".tsv")
template hasEmbeddedTranslationData*(name: static[string]): bool =
fileExists(translationDataPath(name, "private")) or
fileExists(translationDataPath(name, "public"))
template embeddedTranslationData*(name: static[string]): string =
when fileExists(translationDataPath(name, "private")):
staticRead(translationDataPath(name, "private"))
else:
staticRead(translationDataPath(name, "public"))
+64
View File
@@ -0,0 +1,64 @@
import std/strutils
import ./reference_parser
type PassageQuery* = object
reference*: PassageReference
translation*: string
const SupportedTranslations* = [
"akjv", "amp", "esv", "kjv", "mev", "niv", "nkjv"
]
proc supportedTranslationsList*(): string =
SupportedTranslations.join(", ")
proc normalizeTranslation*(translation: string): string =
result = translation.strip.toLowerAscii
for supported in SupportedTranslations:
if result == supported:
return
raise newException(ValueError,
"unsupported translation '" & translation &
"'; supported translations: " & supportedTranslationsList())
proc splitTrailingTranslationMarker(
input: string): tuple[referenceText: string, translation: string] =
let text = input.strip
if not text.endsWith(")"):
return (text, "")
let openIdx = text.rfind("(")
if openIdx < 0:
return (text, "")
let referenceText = text[0 ..< openIdx].strip
let translation = text[openIdx + 1 ..< text.len - 1].strip
if referenceText.len == 0 or translation.len == 0:
return (text, "")
(referenceText, translation)
proc parsePassageQuery*(input, defaultTranslation: string): PassageQuery =
let parsed = splitTrailingTranslationMarker(input)
result.reference = parseReference(parsed.referenceText)
result.translation =
if parsed.translation.len > 0:
normalizeTranslation(parsed.translation)
else:
normalizeTranslation(defaultTranslation)
proc parsePassageQueries*(input, defaultTranslation: string): seq[PassageQuery] =
for rawRef in input.split(';'):
let refText = rawRef.strip
if refText.len > 0:
result.add(parsePassageQuery(refText, defaultTranslation))
if result.len == 0:
raise newException(ValueError, "empty Bible reference")
proc referenceText*(query: PassageQuery): string =
$query.reference
+399
View File
@@ -0,0 +1,399 @@
import std/[strutils]
type
BookInfo* = object
code*: string
name*: string
singleChapter*: bool
RefPoint* = object
chapter*: int
verse*: int
RefRange* = object
start*: RefPoint
finish*: RefPoint
PassageReference* = object
book*: BookInfo
ranges*: seq[RefRange]
const CanonBooks*: array[66, BookInfo] = [
BookInfo(code: "GEN", name: "Genesis"),
BookInfo(code: "EXO", name: "Exodus"),
BookInfo(code: "LEV", name: "Leviticus"),
BookInfo(code: "NUM", name: "Numbers"),
BookInfo(code: "DEU", name: "Deuteronomy"),
BookInfo(code: "JOS", name: "Joshua"),
BookInfo(code: "JDG", name: "Judges"),
BookInfo(code: "RUT", name: "Ruth"),
BookInfo(code: "1SA", name: "1 Samuel"),
BookInfo(code: "2SA", name: "2 Samuel"),
BookInfo(code: "1KI", name: "1 Kings"),
BookInfo(code: "2KI", name: "2 Kings"),
BookInfo(code: "1CH", name: "1 Chronicles"),
BookInfo(code: "2CH", name: "2 Chronicles"),
BookInfo(code: "EZR", name: "Ezra"),
BookInfo(code: "NEH", name: "Nehemiah"),
BookInfo(code: "EST", name: "Esther"),
BookInfo(code: "JOB", name: "Job"),
BookInfo(code: "PSA", name: "Psalms"),
BookInfo(code: "PRO", name: "Proverbs"),
BookInfo(code: "ECC", name: "Ecclesiastes"),
BookInfo(code: "SNG", name: "Song of Solomon"),
BookInfo(code: "ISA", name: "Isaiah"),
BookInfo(code: "JER", name: "Jeremiah"),
BookInfo(code: "LAM", name: "Lamentations"),
BookInfo(code: "EZK", name: "Ezekiel"),
BookInfo(code: "DAN", name: "Daniel"),
BookInfo(code: "HOS", name: "Hosea"),
BookInfo(code: "JOL", name: "Joel"),
BookInfo(code: "AMO", name: "Amos"),
BookInfo(code: "OBA", name: "Obadiah", singleChapter: true),
BookInfo(code: "JON", name: "Jonah"),
BookInfo(code: "MIC", name: "Micah"),
BookInfo(code: "NAM", name: "Nahum"),
BookInfo(code: "HAB", name: "Habakkuk"),
BookInfo(code: "ZEP", name: "Zephaniah"),
BookInfo(code: "HAG", name: "Haggai"),
BookInfo(code: "ZEC", name: "Zechariah"),
BookInfo(code: "MAL", name: "Malachi"),
BookInfo(code: "MAT", name: "Matthew"),
BookInfo(code: "MRK", name: "Mark"),
BookInfo(code: "LUK", name: "Luke"),
BookInfo(code: "JHN", name: "John"),
BookInfo(code: "ACT", name: "Acts"),
BookInfo(code: "ROM", name: "Romans"),
BookInfo(code: "1CO", name: "1 Corinthians"),
BookInfo(code: "2CO", name: "2 Corinthians"),
BookInfo(code: "GAL", name: "Galatians"),
BookInfo(code: "EPH", name: "Ephesians"),
BookInfo(code: "PHP", name: "Philippians"),
BookInfo(code: "COL", name: "Colossians"),
BookInfo(code: "1TH", name: "1 Thessalonians"),
BookInfo(code: "2TH", name: "2 Thessalonians"),
BookInfo(code: "1TI", name: "1 Timothy"),
BookInfo(code: "2TI", name: "2 Timothy"),
BookInfo(code: "TIT", name: "Titus"),
BookInfo(code: "PHM", name: "Philemon", singleChapter: true),
BookInfo(code: "HEB", name: "Hebrews"),
BookInfo(code: "JAS", name: "James"),
BookInfo(code: "1PE", name: "1 Peter"),
BookInfo(code: "2PE", name: "2 Peter"),
BookInfo(code: "1JN", name: "1 John"),
BookInfo(code: "2JN", name: "2 John", singleChapter: true),
BookInfo(code: "3JN", name: "3 John", singleChapter: true),
BookInfo(code: "JUD", name: "Jude", singleChapter: true),
BookInfo(code: "REV", name: "Revelation")
]
const bookAliases = [
("GEN", "genesis"), ("GEN", "gen"),
("EXO", "exodus"), ("EXO", "exod"), ("EXO", "exo"),
("LEV", "leviticus"), ("LEV", "lev"),
("NUM", "numbers"), ("NUM", "num"), ("NUM", "numb"),
("DEU", "deuteronomy"), ("DEU", "deut"), ("DEU", "deu"),
("JOS", "joshua"), ("JOS", "josh"), ("JOS", "jos"),
("JDG", "judges"), ("JDG", "judg"), ("JDG", "jdg"),
("RUT", "ruth"), ("RUT", "rut"),
("1SA", "1 samuel"), ("1SA", "1 sam"), ("1SA", "i samuel"), ("1SA", "first samuel"),
("2SA", "2 samuel"), ("2SA", "2 sam"), ("2SA", "ii samuel"), ("2SA", "second samuel"),
("1KI", "1 kings"), ("1KI", "1 kgs"), ("1KI", "1 kin"), ("1KI", "i kings"), ("1KI", "first kings"),
("2KI", "2 kings"), ("2KI", "2 kgs"), ("2KI", "2 kin"), ("2KI", "ii kings"), ("2KI", "second kings"),
("1CH", "1 chronicles"), ("1CH", "1 chron"), ("1CH", "1 chr"), ("1CH", "i chronicles"), ("1CH", "first chronicles"),
("2CH", "2 chronicles"), ("2CH", "2 chron"), ("2CH", "2 chr"), ("2CH", "ii chronicles"), ("2CH", "second chronicles"),
("EZR", "ezra"), ("EZR", "ezr"),
("NEH", "nehemiah"), ("NEH", "neh"),
("EST", "esther"), ("EST", "est"),
("JOB", "job"),
("PSA", "psalms"), ("PSA", "psalm"), ("PSA", "ps"), ("PSA", "psa"),
("PRO", "proverbs"), ("PRO", "prov"), ("PRO", "pro"),
("ECC", "ecclesiastes"), ("ECC", "eccl"), ("ECC", "ecc"),
("SNG", "song of solomon"), ("SNG", "song"), ("SNG", "songs"), ("SNG", "canticles"), ("SNG", "sng"),
("ISA", "isaiah"), ("ISA", "isa"),
("JER", "jeremiah"), ("JER", "jer"),
("LAM", "lamentations"), ("LAM", "lam"),
("EZK", "ezekiel"), ("EZK", "ezek"), ("EZK", "ezk"),
("DAN", "daniel"), ("DAN", "dan"),
("HOS", "hosea"), ("HOS", "hos"),
("JOL", "joel"), ("JOL", "jol"),
("AMO", "amos"), ("AMO", "amo"),
("OBA", "obadiah"), ("OBA", "obad"), ("OBA", "oba"),
("JON", "jonah"), ("JON", "jon"),
("MIC", "micah"), ("MIC", "mic"),
("NAM", "nahum"), ("NAM", "nah"),
("HAB", "habakkuk"), ("HAB", "hab"),
("ZEP", "zephaniah"), ("ZEP", "zeph"), ("ZEP", "zep"),
("HAG", "haggai"), ("HAG", "hag"),
("ZEC", "zechariah"), ("ZEC", "zech"), ("ZEC", "zec"),
("MAL", "malachi"), ("MAL", "mal"),
("MAT", "matthew"), ("MAT", "matt"), ("MAT", "mat"), ("MAT", "mt"),
("MRK", "mark"), ("MRK", "mrk"), ("MRK", "mk"),
("LUK", "luke"), ("LUK", "luk"), ("LUK", "lk"),
("JHN", "john"), ("JHN", "jhn"), ("JHN", "jn"),
("ACT", "acts"), ("ACT", "act"),
("ROM", "romans"), ("ROM", "rom"),
("1CO", "1 corinthians"), ("1CO", "1 cor"), ("1CO", "1 co"), ("1CO", "i corinthians"), ("1CO", "first corinthians"),
("2CO", "2 corinthians"), ("2CO", "2 cor"), ("2CO", "2 co"), ("2CO", "ii corinthians"), ("2CO", "second corinthians"),
("GAL", "galatians"), ("GAL", "gal"),
("EPH", "ephesians"), ("EPH", "eph"),
("PHP", "philippians"), ("PHP", "php"),
("COL", "colossians"), ("COL", "col"),
("1TH", "1 thessalonians"), ("1TH", "1 thess"), ("1TH", "1 thes"), ("1TH", "i thessalonians"), ("1TH", "first thessalonians"),
("2TH", "2 thessalonians"), ("2TH", "2 thess"), ("2TH", "2 thes"), ("2TH", "ii thessalonians"), ("2TH", "second thessalonians"),
("1TI", "1 timothy"), ("1TI", "1 tim"), ("1TI", "i timothy"), ("1TI", "first timothy"),
("2TI", "2 timothy"), ("2TI", "2 tim"), ("2TI", "ii timothy"), ("2TI", "second timothy"),
("TIT", "titus"), ("TIT", "tit"),
("PHM", "philemon"), ("PHM", "philem"), ("PHM", "phm"),
("HEB", "hebrews"), ("HEB", "heb"),
("JAS", "james"), ("JAS", "jas"), ("JAS", "jam"),
("1PE", "1 peter"), ("1PE", "1 pet"), ("1PE", "1 pe"), ("1PE", "i peter"), ("1PE", "first peter"),
("2PE", "2 peter"), ("2PE", "2 pet"), ("2PE", "2 pe"), ("2PE", "ii peter"), ("2PE", "second peter"),
("1JN", "1 john"), ("1JN", "1 jn"), ("1JN", "1 jhn"), ("1JN", "i john"), ("1JN", "first john"),
("2JN", "2 john"), ("2JN", "2 jn"), ("2JN", "2 jhn"), ("2JN", "ii john"), ("2JN", "second john"),
("3JN", "3 john"), ("3JN", "3 jn"), ("3JN", "3 jhn"), ("3JN", "iii john"), ("3JN", "third john"),
("JUD", "jude"), ("JUD", "jud"),
("REV", "revelation"), ("REV", "revelations"), ("REV", "rev"), ("REV", "apocalypse")
]
proc bookInfo*(code: string): BookInfo =
for book in CanonBooks:
if book.code == code:
return book
raise newException(ValueError, "unknown Bible book code '" & code & "'")
proc bookIndex*(code: string): int =
for idx, book in CanonBooks:
if book.code == code:
return idx
raise newException(ValueError, "unknown Bible book code '" & code & "'")
proc normalizeReferenceInput(s: string): string =
s.multiReplace([
("", "-"),
("", "-"),
("", "-")
]).strip
proc normalizeBookPrefix(s: string): string =
for ch in s:
if ch.isAlphaAscii:
result.add(ch.toLowerAscii)
elif ch.isDigit:
result.add(ch)
proc canonicalNamePrefixMatches(prefix: string): seq[BookInfo] =
for book in CanonBooks:
if normalizeBookPrefix(book.name).startsWith(prefix):
result.add(book)
proc formatBookList(books: seq[BookInfo]): string =
var names: seq[string] = @[]
for book in books:
names.add(book.name)
names.join(", ")
proc matchCanonicalBookPrefix(input: string): tuple[
matched: bool,
ambiguous: bool,
book: BookInfo,
consumed: int,
prefix: string,
matches: seq[BookInfo]] =
for idx in 1 .. input.len:
if idx < input.len and input[idx].isAlphaAscii:
continue
let prefix = normalizeBookPrefix(input[0 ..< idx])
if prefix.len == 0:
continue
let matches = canonicalNamePrefixMatches(prefix)
if matches.len == 1:
result.matched = true
result.ambiguous = false
result.book = matches[0]
result.consumed = idx
result.prefix = input[0 ..< idx].strip
result.matches = matches
elif matches.len > 1 and not result.matched:
result.ambiguous = true
result.consumed = idx
result.prefix = input[0 ..< idx].strip
result.matches = matches
proc matchAlias(input, alias: string): int =
var i = 0
var j = 0
while j < alias.len:
let aliasCh = alias[j]
if aliasCh.isSpaceAscii or aliasCh == '.':
while i < input.len and (input[i].isSpaceAscii or input[i] == '.'):
inc i
inc j
else:
while i < input.len and input[i] == '.':
inc i
if i >= input.len or input[i].toLowerAscii != aliasCh.toLowerAscii:
return -1
inc i
inc j
while i < input.len and input[i] == '.':
inc i
if i < input.len and input[i].isAlphaAscii:
return -1
i
proc parseBook(input: string): tuple[book: BookInfo, rest: string] =
let canonicalPrefix = matchCanonicalBookPrefix(input)
if canonicalPrefix.matched:
result.book = canonicalPrefix.book
result.rest = input[canonicalPrefix.consumed .. ^1].strip
return
var bestCode = ""
var bestLen = -1
for row in bookAliases:
let consumed = matchAlias(input, row[1])
if consumed > bestLen:
bestCode = row[0]
bestLen = consumed
if bestLen < 0:
if canonicalPrefix.ambiguous:
raise newException(ValueError,
"ambiguous Bible book prefix '" & canonicalPrefix.prefix & "' in '" &
input & "'; matches " & canonicalPrefix.matches.formatBookList)
raise newException(ValueError, "could not parse Bible book in '" & input & "'")
result.book = bookInfo(bestCode)
result.rest = input[bestLen .. ^1].strip
proc parsePositiveInt(s, label: string): int =
if s.len == 0 or not s.allCharsInSet({'0'..'9'}):
raise newException(ValueError, "invalid " & label & " '" & s & "'")
result = parseInt(s)
if result <= 0:
raise newException(ValueError, label & " must be positive")
proc parsePoint(token: string, defaultChapter: int, singleChapter: bool): RefPoint =
let normalized = token.strip
if normalized.len == 0:
raise newException(ValueError, "empty reference point")
let colonIdx = normalized.find(':')
if colonIdx >= 0:
return RefPoint(
chapter: parsePositiveInt(normalized[0 ..< colonIdx], "chapter"),
verse: parsePositiveInt(normalized[colonIdx + 1 .. ^1], "verse"))
let value = parsePositiveInt(normalized, "reference number")
if singleChapter:
RefPoint(chapter: 1, verse: value)
elif defaultChapter > 0:
RefPoint(chapter: defaultChapter, verse: value)
else:
RefPoint(chapter: value, verse: 0)
proc parseRange(segment: string, defaultChapter: int, singleChapter: bool): RefRange =
let normalized = segment.strip
let dashIdx = normalized.find('-')
if dashIdx >= 0:
result.start = parsePoint(normalized[0 ..< dashIdx], defaultChapter, singleChapter)
let endDefaultChapter =
if result.start.verse > 0: result.start.chapter
else: 0
result.finish = parsePoint(normalized[dashIdx + 1 .. ^1], endDefaultChapter, singleChapter)
else:
result.start = parsePoint(normalized, defaultChapter, singleChapter)
result.finish = result.start
if result.finish.chapter < result.start.chapter:
raise newException(ValueError, "range ends before it starts: '" & segment & "'")
if result.finish.chapter == result.start.chapter and
result.start.verse > 0 and
result.finish.verse > 0 and
result.finish.verse < result.start.verse:
raise newException(ValueError, "range ends before it starts: '" & segment & "'")
proc parsePassageSpec(spec: string, book: BookInfo): seq[RefRange] =
var currentChapter = 0
for rawSegment in spec.split(','):
let segment = rawSegment.strip
if segment.len == 0:
raise newException(ValueError, "empty passage range in '" & spec & "'")
let range = parseRange(segment, currentChapter, book.singleChapter)
result.add(range)
if segment.contains(':') or (range.start.verse > 0 and range.finish.verse > 0):
currentChapter = range.start.chapter
else:
currentChapter = 0
proc parseReference*(input: string): PassageReference =
let normalized = normalizeReferenceInput(input)
let parsedBook = parseBook(normalized)
result.book = parsedBook.book
if parsedBook.rest.len > 0:
result.ranges = parsePassageSpec(parsedBook.rest, result.book)
proc parseReferences*(input: string): seq[PassageReference] =
for rawRef in input.split(';'):
let refText = rawRef.strip
if refText.len > 0:
result.add(parseReference(refText))
if result.len == 0:
raise newException(ValueError, "empty Bible reference")
proc `$`*(point: RefPoint): string =
if point.verse > 0: $point.chapter & ":" & $point.verse
else: $point.chapter
proc `$`*(range: RefRange): string =
if range.start == range.finish:
return $range.start
if range.start.chapter == range.finish.chapter and
range.start.verse > 0 and
range.finish.verse > 0:
return $range.start.chapter & ":" & $range.start.verse & "-" & $range.finish.verse
$range.start & "-" & $range.finish
proc formatSingleChapterRange(range: RefRange): string =
if range.start == range.finish:
return $range.start.verse
if range.start.chapter == range.finish.chapter:
return $range.start.verse & "-" & $range.finish.verse
$range.start & "-" & $range.finish
proc `$`*(reference: PassageReference): string =
result = reference.book.name
if reference.ranges.len > 0:
var rangeText: seq[string] = @[]
for range in reference.ranges:
if reference.book.singleChapter:
rangeText.add(formatSingleChapterRange(range))
else:
rangeText.add($range)
result.add(" " & rangeText.join(", "))
+84
View File
@@ -0,0 +1,84 @@
import std/[strutils, unittest]
import ../src/kjv
import ../src/reference_parser
suite "reference parser":
test "parses single verse references":
let reference = parseReference("John 3:16")
check reference.book.code == "JHN"
check reference.ranges.len == 1
check reference.ranges[0].start.chapter == 3
check reference.ranges[0].start.verse == 16
check reference.ranges[0].finish == reference.ranges[0].start
check $reference == "John 3:16"
test "parses verse lists using the previous chapter":
let reference = parseReference("John 3:16,20-21")
check reference.ranges.len == 2
check reference.ranges[1].start.chapter == 3
check reference.ranges[1].start.verse == 20
check reference.ranges[1].finish.chapter == 3
check reference.ranges[1].finish.verse == 21
check $reference == "John 3:16, 3:20-21"
test "parses chapter ranges":
let reference = parseReference("John 3-4")
check reference.ranges.len == 1
check reference.ranges[0].start.chapter == 3
check reference.ranges[0].start.verse == 0
check reference.ranges[0].finish.chapter == 4
check reference.ranges[0].finish.verse == 0
check $reference == "John 3-4"
test "parses abbreviated numbered books":
let reference = parseReference("1 Jn 1:9")
check reference.book.code == "1JN"
check reference.ranges[0].start.chapter == 1
check reference.ranges[0].start.verse == 9
check $reference == "1 John 1:9"
test "parses unique canonical book prefixes":
check parseReference("Gene 1:1").book.code == "GEN"
check parseReference("Phile 3").book.code == "PHM"
check parseReference("Phili 1:6").book.code == "PHP"
test "rejects ambiguous canonical book prefixes":
expect ValueError:
discard parseReference("Phil 1")
test "normalizes single-chapter book references":
let reference = parseReference("Jude 3-4")
check reference.book.code == "JUD"
check reference.ranges[0].start.chapter == 1
check reference.ranges[0].start.verse == 3
check reference.ranges[0].finish.chapter == 1
check reference.ranges[0].finish.verse == 4
check $reference == "Jude 3-4"
test "parses semicolon-separated references":
let references = parseReferences("Psalm 23; John 3:16")
check references.len == 2
check references[0].book.code == "PSA"
check references[1].book.code == "JHN"
suite "offline KJV backend":
test "fetches a single embedded verse":
let passages = kjv.fetchPassages("John 3:16")
check passages.len == 1
check passages[0].startsWith("John 3:16\n")
check passages[0].contains(" [16] ")
test "fetches a single-chapter embedded verse":
let passages = kjv.fetchPassages("Jude 3")
check passages.len == 1
check passages[0].startsWith("Jude 3\n")
check passages[0].contains(" [3] ")
+44
View File
@@ -0,0 +1,44 @@
import std/unittest
import ../src/passage_query
suite "passage query parser":
test "uses the default translation when no marker is present":
let queries = parsePassageQueries("John 3:16", "kjv")
check queries.len == 1
check queries[0].referenceText == "John 3:16"
check queries[0].translation == "kjv"
test "uses a trailing translation marker":
let queries = parsePassageQueries("2 John 5 (KJV)", "esv")
check queries.len == 1
check queries[0].referenceText == "2 John 5"
check queries[0].translation == "kjv"
test "parses mixed translation queries":
let queries = parsePassageQueries("2 John 5 (KJV); 2 John 5 (ESV)", "mev")
check queries.len == 2
check queries[0].referenceText == "2 John 5"
check queries[0].translation == "kjv"
check queries[1].referenceText == "2 John 5"
check queries[1].translation == "esv"
test "uses the default translation per unmarked reference":
let queries = parsePassageQueries("John 3:16; Psalm 23 (MEV)", "nkjv")
check queries.len == 2
check queries[0].referenceText == "John 3:16"
check queries[0].translation == "nkjv"
check queries[1].referenceText == "Psalms 23"
check queries[1].translation == "mev"
test "rejects unknown translation markers":
expect ValueError:
discard parsePassageQueries("John 3:16 (XYZ)", "esv")
test "rejects unknown default translations":
expect ValueError:
discard parsePassageQueries("John 3:16", "xyz")
+144
View File
@@ -0,0 +1,144 @@
import std/[os, strutils, tables]
# Source archive: https://ebible.org/Scriptures/eng-kjv_usfm.zip
const canonBookCodes = [
"GEN", "EXO", "LEV", "NUM", "DEU", "JOS", "JDG", "RUT",
"1SA", "2SA", "1KI", "2KI", "1CH", "2CH", "EZR", "NEH",
"EST", "JOB", "PSA", "PRO", "ECC", "SNG", "ISA", "JER",
"LAM", "EZK", "DAN", "HOS", "JOL", "AMO", "OBA", "JON",
"MIC", "NAM", "HAB", "ZEP", "HAG", "ZEC", "MAL", "MAT",
"MRK", "LUK", "JHN", "ACT", "ROM", "1CO", "2CO", "GAL",
"EPH", "PHP", "COL", "1TH", "2TH", "1TI", "2TI", "TIT",
"PHM", "HEB", "JAS", "1PE", "2PE", "1JN", "2JN", "3JN",
"JUD", "REV"
]
proc normalizeWhitespace(s: string): string =
var lastWasSpace = false
for ch in s:
if ch.isSpaceAscii:
if not lastWasSpace:
result.add(' ')
lastWasSpace = true
else:
result.add(ch)
lastWasSpace = false
result = result.strip
proc removeFootnotes(s: string): string =
var i = 0
while i < s.len:
if s.continuesWith("\\f ", i) or s.continuesWith("\\f +", i):
let closeIdx = s.find("\\f*", i + 2)
if closeIdx < 0:
break
i = closeIdx + 3
else:
result.add(s[i])
inc i
proc stripUsfmMarkup(s: string): string =
let withoutFootnotes = removeFootnotes(s)
var i = 0
while i < withoutFootnotes.len:
case withoutFootnotes[i]
of '\\':
inc i
if i < withoutFootnotes.len and withoutFootnotes[i] == '+':
inc i
while i < withoutFootnotes.len and
(withoutFootnotes[i].isAlphaAscii or
withoutFootnotes[i].isDigit or
withoutFootnotes[i] == '-'):
inc i
let isClosingMarker = i < withoutFootnotes.len and withoutFootnotes[i] == '*'
if isClosingMarker:
inc i
while not isClosingMarker and
i < withoutFootnotes.len and
withoutFootnotes[i].isSpaceAscii:
inc i
of '|':
while i < withoutFootnotes.len and withoutFootnotes[i] != '\\':
inc i
of '\t':
result.add(' ')
inc i
else:
result.add(withoutFootnotes[i])
inc i
result = normalizeWhitespace(result)
proc parseVerseLine(line: string): tuple[verse: int, text: string] =
var rest = line[3..^1].strip
let numberEnd = rest.find(' ')
if numberEnd < 0:
raise newException(ValueError, "verse marker without text: " & line)
result.verse = parseInt(rest[0 ..< numberEnd])
result.text = stripUsfmMarkup(rest[numberEnd + 1 .. ^1])
proc findCanonFiles(inputDir: string): Table[string, string] =
for path in walkFiles(inputDir / "*eng-kjv.usfm"):
let name = path.extractFilename
let dashIdx = name.find('-')
let suffixIdx = name.find("eng-kjv.usfm")
if dashIdx >= 0 and suffixIdx > dashIdx:
let code = name[dashIdx + 1 ..< suffixIdx]
if canonBookCodes.contains(code):
result[code] = path
proc generate(inputDir, outputPath: string) =
let canonFiles = findCanonFiles(inputDir)
var rows: seq[string] = @[]
for code in canonBookCodes:
if not canonFiles.hasKey(code):
raise newException(ValueError, "missing USFM file for " & code)
var chapter = 0
var verse = 0
var verseText = ""
proc flushVerse() =
if chapter > 0 and verse > 0:
let text = normalizeWhitespace(verseText).replace("\t", " ")
if text.len > 0:
rows.add([code, $chapter, $verse, text].join("\t"))
verse = 0
verseText = ""
for rawLine in canonFiles[code].lines:
let line = rawLine.strip
if line.startsWith("\\c "):
flushVerse()
chapter = parseInt(line[3..^1].strip)
elif line.startsWith("\\v "):
flushVerse()
let parsed = parseVerseLine(line)
verse = parsed.verse
verseText = parsed.text
elif verse > 0:
let continued = stripUsfmMarkup(line)
if continued.len > 0:
if verseText.len > 0:
verseText.add(' ')
verseText.add(continued)
flushVerse()
createDir(outputPath.parentDir)
writeFile(outputPath, rows.join("\n") & "\n")
when isMainModule:
if paramCount() != 2:
quit("Usage: generate_kjv_data <usfm-dir> <output-tsv>", QuitFailure)
generate(paramStr(1), paramStr(2))
+330
View File
@@ -0,0 +1,330 @@
import std/[
htmlparser,
os,
osproc,
streams,
strutils,
xmlparser,
xmltree
]
import ../src/reference_parser
type
TocEntry = object
label: string
code: string
fileIndex: int
BookSource = object
code: string
startIndex: int
endIndex: int
ParseState = object
code: string
chapter: int
verse: int
verseText: string
rows: seq[string]
proc normalizeWhitespace(s: string): string =
var lastWasSpace = false
for ch in s.replace("\xC2\xA0", " "):
if ch.isSpaceAscii:
if not lastWasSpace:
result.add(' ')
lastWasSpace = true
else:
result.add(ch)
lastWasSpace = false
result = result.strip
proc markerText(s: string): string =
normalizeWhitespace(s).replace(" ", "")
proc numberAfterPrefix(s, prefix: string): int =
let text = normalizeWhitespace(s).toUpperAscii
if not text.startsWith(prefix):
return 0
var digits = ""
for ch in text[prefix.len .. ^1].strip:
if ch.isDigit:
digits.add(ch)
elif digits.len > 0:
break
elif not ch.isSpaceAscii:
break
if digits.len > 0:
result = parseInt(digits)
proc isPositiveIntText(s: string): bool =
let text = markerText(s)
text.len > 0 and text.allCharsInSet({'0'..'9'}) and parseInt(text) > 0
proc readEpubEntry(epubPath, entryPath: string): string =
let process = startProcess(
"unzip",
args = ["-p", epubPath, entryPath],
options = {poUsePath, poStdErrToStdOut})
result = process.outputStream.readAll()
let exitCode = process.waitForExit()
process.close()
if exitCode != 0:
raise newException(IOError,
"could not read " & entryPath & " from " & epubPath & ": " & result)
proc textContent(node: XmlNode): string =
case node.kind
of xnText:
result = node.text
of xnElement:
for child in node.items:
result.add(textContent(child))
else:
discard
proc firstDescendant(node: XmlNode, tag: string): XmlNode =
if node.kind == xnElement:
if node.tag == tag:
return node
for child in node.items:
let found = firstDescendant(child, tag)
if not found.isNil:
return found
proc descendantText(node: XmlNode, tag: string): string =
let found = firstDescendant(node, tag)
if found.isNil: ""
else: normalizeWhitespace(textContent(found))
proc descendantAttr(node: XmlNode, tag, attrName: string): string =
let found = firstDescendant(node, tag)
if found.isNil: ""
else: found.attr(attrName)
proc bookCodeForLabel(label: string): string =
let bookName = label.split("(", maxsplit = 1)[0].strip
if bookName == "Solomon":
return "SNG"
for book in CanonBooks:
if book.name == bookName:
return book.code
proc indexFromSplitFile(path: string): int =
let filename = path.split('#', maxsplit = 1)[0].extractFilename
if not filename.startsWith("index_split_") or not filename.endsWith(".html"):
return 0
parseInt(filename["index_split_".len ..< filename.len - ".html".len])
proc parseTocEntries(epubPath: string): seq[TocEntry] =
let toc = parseXml(newStringStream(readEpubEntry(epubPath, "toc.ncx")))
var entries: seq[TocEntry] = @[]
proc walk(node: XmlNode) =
if node.kind == xnElement and node.tag == "navPoint":
let label = node.descendantText("text")
let src = node.descendantAttr("content", "src")
let fileIndex = indexFromSplitFile(src)
if fileIndex > 0:
entries.add(TocEntry(
label: label,
code: bookCodeForLabel(label),
fileIndex: fileIndex))
if node.kind == xnElement:
for child in node.items:
walk(child)
walk(toc)
entries
proc bookSources(entries: seq[TocEntry]): seq[BookSource] =
for idx, entry in entries:
if entry.code.len == 0:
continue
let endIndex =
if idx + 1 < entries.len:
entries[idx + 1].fileIndex - 1
else:
entry.fileIndex
result.add(BookSource(
code: entry.code,
startIndex: entry.fileIndex,
endIndex: endIndex))
if result.len != CanonBooks.len:
raise newException(ValueError,
"expected " & $CanonBooks.len & " canonical books in EPUB TOC, found " &
$result.len)
for idx, book in CanonBooks:
if result[idx].code != book.code:
raise newException(ValueError,
"expected " & book.code & " at position " & $idx & ", found " &
result[idx].code)
proc hasClass(node: XmlNode, className: string): bool =
if node.kind != xnElement:
return false
for value in node.attr("class").splitWhitespace:
if value == className:
return true
proc shouldSkipElement(node: XmlNode): bool =
node.hasClass("calibre_29") or # section headings
node.hasClass("calibre_6") or # parallel/cross-reference paragraphs
node.hasClass("calibre_26") # Psalm superscriptions/cross-references
proc hasHref(node: XmlNode): bool =
if node.kind == xnElement:
if node.attr("href").len > 0:
return true
for child in node.items:
if hasHref(child):
return true
proc isBlockElement(node: XmlNode): bool =
node.kind == xnElement and
node.tag in ["blockquote", "br", "div", "h1", "h2", "h3", "li", "p"]
proc chapterMarker(node: XmlNode): int =
if node.kind == xnElement and node.tag == "span" and node.hasClass("calibre1"):
let text = markerText(textContent(node))
if text.isPositiveIntText:
return parseInt(text)
proc headingChapterMarker(node: XmlNode, code: string): int =
if node.kind != xnElement or node.tag != "p":
return 0
let text = textContent(node)
result = numberAfterPrefix(text, "CHAPTER ")
if result > 0:
return
if code == "PSA":
result = numberAfterPrefix(text, "PSALM ")
proc verseMarker(node: XmlNode): int =
if node.kind == xnElement and node.tag == "sup" and not node.hasHref:
let text = markerText(textContent(node))
if text.isPositiveIntText:
return parseInt(text)
proc leadingVerseText(s: string): tuple[verse: int, rest: string] =
let text = s.replace("\xC2\xA0", " ")
var idx = 0
while idx < text.len and text[idx].isSpaceAscii:
inc idx
let digitStart = idx
while idx < text.len and text[idx].isDigit:
inc idx
if idx == digitStart:
return
let numberText = text[digitStart ..< idx]
while idx < text.len and text[idx].isSpaceAscii:
inc idx
result.verse = parseInt(numberText)
if idx < text.len:
result.rest = text[idx .. ^1]
proc flushVerse(state: var ParseState) =
if state.chapter > 0 and state.verse > 0:
let text = normalizeWhitespace(state.verseText).replace("\t", " ")
if text.len > 0:
state.rows.add([state.code, $state.chapter, $state.verse, text].join("\t"))
state.verseText = ""
proc walkPassageText(node: XmlNode, state: var ParseState) =
case node.kind
of xnText:
if state.chapter > 0:
if state.verse == 0:
let leading = leadingVerseText(node.text)
if leading.verse > 0:
state.verse = leading.verse
state.verseText.add(leading.rest)
elif state.verse > 0:
state.verseText.add(node.text)
of xnElement:
let headingChapter = headingChapterMarker(node, state.code)
if headingChapter > 0:
state.flushVerse()
state.chapter = headingChapter
state.verse = 0
return
if node.shouldSkipElement:
return
let chapter = chapterMarker(node)
if chapter > 0:
state.flushVerse()
state.chapter = chapter
state.verse = 1
return
let verse = verseMarker(node)
if verse > 0:
state.flushVerse()
state.verse = verse
return
if node.tag == "sup":
return
for child in node.items:
walkPassageText(child, state)
if node.isBlockElement and state.chapter > 0 and state.verse > 0:
state.verseText.add(' ')
else:
discard
proc indexSplitFile(index: int): string =
"index_split_" & align($index, 3, '0') & ".html"
proc parseBook(epubPath: string, source: BookSource): seq[string] =
var state = ParseState(code: source.code)
if bookInfo(source.code).singleChapter:
state.chapter = 1
for index in source.startIndex .. source.endIndex:
let html = readEpubEntry(epubPath, indexSplitFile(index))
let doc = parseHtml(newStringStream(html))
walkPassageText(doc, state)
state.flushVerse()
state.rows
proc generate(epubPath, outputPath: string) =
let sources = bookSources(parseTocEntries(epubPath))
var rows: seq[string] = @[]
for source in sources:
rows.add(parseBook(epubPath, source))
createDir(outputPath.parentDir)
writeFile(outputPath, rows.join("\n") & "\n")
when isMainModule:
if paramCount() != 2:
quit("Usage: generate_mev_data <mev-epub> <output-tsv>", QuitFailure)
generate(paramStr(1), paramStr(2))