Compare commits
12 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| dbc39480f7 | |||
| 176fa46816 | |||
| 42d2587704 | |||
| 544062902b | |||
| 2d78d8e5c0 | |||
| da6ba66e1c | |||
| 17f953882f | |||
| ffe3118ddf | |||
| 49594610dc | |||
| 8f83c07693 | |||
| 8422199d7b | |||
| a0c17bcad9 |
@@ -1,2 +1,6 @@
|
||||
esv_api
|
||||
bibleref
|
||||
tests/test_offline_kjv
|
||||
tests/test_passage_query
|
||||
data/private/
|
||||
*.sw?
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
# Package
|
||||
|
||||
version = "1.1.0"
|
||||
author = "Jonathan Bernard"
|
||||
description = "Simple Nim CLI for retrieving Biblical passages"
|
||||
license = "MIT"
|
||||
srcDir = "src"
|
||||
bin = @["bibleref"]
|
||||
|
||||
|
||||
# Dependencies
|
||||
|
||||
requires "nim >= 1.6.10"
|
||||
requires "docopt"
|
||||
requires "nimquery"
|
||||
requires "zero_functional"
|
||||
|
||||
# dependencies from git.jdb-software.com/jdb/nim-packages.git
|
||||
requires "cliutils"
|
||||
+31102
File diff suppressed because it is too large
Load Diff
@@ -1,17 +0,0 @@
|
||||
# Package
|
||||
|
||||
version = "0.2.2"
|
||||
author = "Jonathan Bernard"
|
||||
description = "Simple Nim CLI wrapper around the ESV API (api.esv.org)"
|
||||
license = "MIT"
|
||||
srcDir = "src"
|
||||
bin = @["esv_api"]
|
||||
|
||||
|
||||
# Dependencies
|
||||
|
||||
requires "nim >= 1.6.10"
|
||||
requires @["docopt", "zero_functional"]
|
||||
|
||||
# dependencies from git.jdb-software.com/jdb/nim-packages.git
|
||||
requires @["cliutils"]
|
||||
@@ -0,0 +1,103 @@
|
||||
import std/[httpclient, json, logging, strutils, uri]
|
||||
|
||||
const apiBibleRoot* = "https://rest.api.bible/v1"
|
||||
|
||||
proc configBibleIdKey(translation: string): string =
|
||||
let normalizedTranslation = translation.toLowerAscii
|
||||
"apiBible" & normalizedTranslation[0].toUpperAscii &
|
||||
normalizedTranslation[1..^1] & "BibleId"
|
||||
|
||||
proc defaultBibleId*(translation: string): string =
|
||||
case translation.toLowerAscii
|
||||
of "niv": "78a9f6124f344018-01"
|
||||
else: ""
|
||||
|
||||
proc apiGet(apiRoot, path, query, apiKey: string): JsonNode =
|
||||
var root = apiRoot
|
||||
while root.endsWith("/"): root.setLen(root.len - 1)
|
||||
|
||||
var urlPath = root & path
|
||||
if query.len > 0:
|
||||
urlPath &= "?" & query
|
||||
|
||||
debug "requesting " & urlPath
|
||||
|
||||
let http = newHttpClient()
|
||||
http.headers = newHttpHeaders({"api-key": apiKey})
|
||||
parseJson(http.getContent(urlPath))
|
||||
|
||||
proc resolveBibleId(translation, apiKey, apiRoot, configuredBibleId: string): string =
|
||||
let normalizedTranslation = translation.toLowerAscii
|
||||
|
||||
if configuredBibleId.strip.len > 0:
|
||||
return configuredBibleId.strip
|
||||
|
||||
let defaultId = defaultBibleId(normalizedTranslation)
|
||||
if defaultId.len > 0:
|
||||
return defaultId
|
||||
|
||||
let translationCode = normalizedTranslation.toUpperAscii
|
||||
let respJson = apiGet(
|
||||
apiRoot,
|
||||
"/bibles",
|
||||
"language=eng&abbreviation=" & encodeUrl(translationCode) &
|
||||
"&include-full-details=false",
|
||||
apiKey)
|
||||
|
||||
for bible in respJson["data"].getElems:
|
||||
let abbreviation =
|
||||
if bible.hasKey("abbreviation"): bible["abbreviation"].getStr else: ""
|
||||
let abbreviationLocal =
|
||||
if bible.hasKey("abbreviationLocal"): bible["abbreviationLocal"].getStr else: ""
|
||||
|
||||
if abbreviation.toLowerAscii == normalizedTranslation or
|
||||
abbreviationLocal.toLowerAscii == normalizedTranslation:
|
||||
return bible["id"].getStr
|
||||
|
||||
if respJson["data"].getElems.len > 0:
|
||||
return respJson["data"].getElems[0]["id"].getStr
|
||||
|
||||
raise newException(ValueError,
|
||||
"could not find an API.Bible Bible ID for '" & translation &
|
||||
"'; configure " & configBibleIdKey(normalizedTranslation))
|
||||
|
||||
proc resolvePassageId(reference, bibleId, apiKey, apiRoot: string): string =
|
||||
let respJson = apiGet(
|
||||
apiRoot,
|
||||
"/bibles/" & encodeUrl(bibleId) & "/search",
|
||||
"query=" & encodeUrl(reference) & "&limit=1&sort=canonical",
|
||||
apiKey)
|
||||
|
||||
if respJson["data"].hasKey("passages"):
|
||||
let passages = respJson["data"]["passages"].getElems
|
||||
if passages.len > 0:
|
||||
return passages[0]["id"].getStr
|
||||
|
||||
if respJson["data"].hasKey("verses"):
|
||||
let verses = respJson["data"]["verses"].getElems
|
||||
if verses.len == 1:
|
||||
return verses[0]["id"].getStr
|
||||
|
||||
raise newException(ValueError,
|
||||
"could not resolve passage reference '" & reference & "' using API.Bible")
|
||||
|
||||
proc fetchPassages*(
|
||||
reference,
|
||||
translation,
|
||||
apiKey,
|
||||
apiRoot,
|
||||
configuredBibleId: string): seq[string] =
|
||||
|
||||
let bibleId = resolveBibleId(translation, apiKey, apiRoot, configuredBibleId)
|
||||
let passageId = resolvePassageId(reference, bibleId, apiKey, apiRoot)
|
||||
|
||||
let respJson = apiGet(
|
||||
apiRoot,
|
||||
"/bibles/" & encodeUrl(bibleId) & "/passages/" & encodeUrl(passageId),
|
||||
"content-type=text&include-notes=false&include-titles=true" &
|
||||
"&include-chapter-numbers=false&include-verse-numbers=true" &
|
||||
"&include-verse-spans=false",
|
||||
apiKey)
|
||||
|
||||
let passage = respJson["data"]
|
||||
@[passage["reference"].getStr & "\n" & passage["content"].getStr]
|
||||
@@ -0,0 +1,188 @@
|
||||
# Nim CLI for retrieving Biblical passages
|
||||
# © 2023 Jonathan Bernard
|
||||
|
||||
## Simple command-line tool for retrieving Biblical passages.
|
||||
|
||||
import std/[json, logging, os, re, strutils, wordwrap]
|
||||
import cliutils, docopt, zero_functional
|
||||
|
||||
import ./api_bible
|
||||
import ./esv
|
||||
import ./kjv
|
||||
import ./mev
|
||||
import ./passage_query
|
||||
|
||||
proc formatMarkdown(raw, translation: string): string =
|
||||
var reference = ""
|
||||
var inVerse = false
|
||||
var verseLines = newSeq[string]()
|
||||
|
||||
for line in raw.splitLines:
|
||||
if reference.len == 0: reference = line.strip
|
||||
if inVerse:
|
||||
if line.startsWith("Footnotes"): inVerse = false
|
||||
elif line.isEmptyOrWhitespace and verseLines[^1] != "":
|
||||
verseLines.add("")
|
||||
elif not line.match(re"^\s+[^\s]"): continue
|
||||
elif line.match(re"$(.*)\(ESV\)$"): verseLines.add(line[0 ..< ^5])
|
||||
else: verseLines.add(line)
|
||||
elif line.match(re"^\s+\[\d+\]"):
|
||||
inVerse = true
|
||||
verseLines.add(line)
|
||||
|
||||
let wrapped = (verseLines -->
|
||||
map(if it.len > 90: it.strip else: it & " ").
|
||||
map(it.multiReplace([(re"\((\d+)\)", ""), (re"\[(\d+)\]", "**$1**")])).
|
||||
map(wrapWords(it, maxLineWidth = 74, newLine = "\p"))).join("\p")
|
||||
|
||||
result = (wrapped.splitLines --> map("> " & it)).
|
||||
join("\p") & "\p> -- *" & reference & " (" &
|
||||
translation.toUpperAscii & ")*"
|
||||
|
||||
proc formatPlain(
|
||||
raw,
|
||||
translation: string,
|
||||
keepVerseNumbers = true): string =
|
||||
|
||||
var reference = ""
|
||||
var inVerse = false
|
||||
var verseLines = newSeq[string]()
|
||||
|
||||
for line in raw.splitLines:
|
||||
if reference.len == 0: reference = line.strip
|
||||
if inVerse:
|
||||
if line.startsWith("Footnotes"): inVerse = false
|
||||
elif line.isEmptyOrWhitespace and verseLines[^1] != "":
|
||||
verseLines.add("")
|
||||
elif not line.match(re"^\s+[^\s]"): continue
|
||||
elif line.match(re"$(.*)\(ESV\)$"): verseLines.add(line[0 ..< ^5])
|
||||
else: verseLines.add(line)
|
||||
elif line.match(re"^\s+\[\d+\]"):
|
||||
inVerse = true
|
||||
verseLines.add(line)
|
||||
|
||||
let wrapped = (verseLines -->
|
||||
map(if it.len > 90: it.strip else: it & " ").
|
||||
map(
|
||||
if keepVerseNumbers:
|
||||
it.multiReplace([(re"\((\d+)\)", ""), (re"\[(\d+)\]", "$1")])
|
||||
else:
|
||||
it.multiReplace([(re"\((\d+)\)", ""), (re"\[(\d+)\]", "")])).
|
||||
map(wrapWords(it, maxLineWidth = 74, newLine = "\p"))).join("\p")
|
||||
|
||||
result = (wrapped.splitLines --> map(it)).
|
||||
join("\p") & "\p– " & reference & " (" & translation.toUpperAscii & ")"
|
||||
|
||||
proc fetchPassages(reference, translation: string, cfg: CombinedConfig): seq[string] =
|
||||
case translation
|
||||
of "esv":
|
||||
esv.fetchPassages(
|
||||
reference,
|
||||
cfg.getVal("esv-api-token"),
|
||||
cfg.getVal("esv-api-root", "https://api.esv.org"))
|
||||
of "akjv", "kjv":
|
||||
kjv.fetchPassages(reference)
|
||||
of "mev":
|
||||
mev.fetchPassages(reference)
|
||||
of "amp", "nkjv", "niv":
|
||||
api_bible.fetchPassages(
|
||||
reference,
|
||||
translation,
|
||||
cfg.getVal("api-bible-api-key"),
|
||||
cfg.getVal("api-bible-root", api_bible.apiBibleRoot),
|
||||
cfg.getVal(
|
||||
"api-bible-" & translation & "-bible-id",
|
||||
api_bible.defaultBibleId(translation)))
|
||||
else:
|
||||
raise newException(ValueError,
|
||||
"unsupported translation '" & translation &
|
||||
"'; supported translations: " & supportedTranslationsList())
|
||||
|
||||
when isMainModule:
|
||||
const USAGE = """Usage:
|
||||
bibleref <reference> [options]
|
||||
|
||||
Options:
|
||||
|
||||
--debug Log debug information.
|
||||
|
||||
--echo-args Echo back the arguments that were passed on the
|
||||
command line for debugging purposes.
|
||||
|
||||
-f, --output-format <format> Select a specific output format. Valid values
|
||||
are 'raw', 'markdown', 'plain', 'reading'.
|
||||
|
||||
-t, --translation <translation>
|
||||
Select a specific translation. Supported values
|
||||
are 'akjv', 'amp', 'esv', 'kjv', 'mev',
|
||||
'nkjv', and 'niv'. Defaults to 'esv'.
|
||||
Individual references may override this with a
|
||||
trailing marker, for example:
|
||||
'John 3:16 (KJV); John 3:16 (ESV)'.
|
||||
|
||||
--esv-api-token <token> Provide the API token on the command line. By
|
||||
default this will be read either from the
|
||||
.bibleref.cfg.json file or the ESV_API_TOKEN
|
||||
envionment variable.
|
||||
|
||||
--api-bible-api-key <key> Provide the API.Bible API key for translations
|
||||
backed by api.bible.
|
||||
|
||||
--api-bible-root <url> Override the API.Bible API root. Defaults to
|
||||
https://rest.api.bible/v1.
|
||||
|
||||
--api-bible-amp-bible-id <id> Override the API.Bible Bible ID for AMP.
|
||||
|
||||
--api-bible-niv-bible-id <id> Override the API.Bible Bible ID for NIV.
|
||||
|
||||
--api-bible-nkjv-bible-id <id>
|
||||
Override the API.Bible Bible ID for NKJV.
|
||||
"""
|
||||
|
||||
let consoleLogger = newConsoleLogger(
|
||||
levelThreshold=lvlInfo,
|
||||
fmtStr="bibleref - $levelname: ")
|
||||
logging.addHandler(consoleLogger)
|
||||
|
||||
try:
|
||||
# Parse arguments
|
||||
let args = docopt(USAGE, version = "1.1.0")
|
||||
|
||||
if args["--debug"]:
|
||||
consoleLogger.levelThreshold = lvlDebug
|
||||
|
||||
if args["--echo-args"]: stderr.writeLine($args)
|
||||
|
||||
let cfgFilePath = getEnv("HOME") / ".bibleref.cfg.json"
|
||||
var cfgFileJson = newJObject()
|
||||
if fileExists(cfgFilePath):
|
||||
debug "Loading config from " & cfgFilePath
|
||||
cfgFileJson = parseFile(cfgFilePath)
|
||||
|
||||
let cfg = CombinedConfig(docopt: args, json: cfgFileJson)
|
||||
let defaultTranslation = cfg.getVal("translation", "esv")
|
||||
let reference = $args["<reference>"]
|
||||
let queries = parsePassageQueries(reference, defaultTranslation)
|
||||
|
||||
var formattedPassages: seq[string] = @[]
|
||||
for query in queries:
|
||||
for passage in fetchPassages(query.referenceText, query.translation, cfg):
|
||||
formattedPassages.add(
|
||||
case $args["--output-format"]:
|
||||
of "plain":
|
||||
formatPlain(passage, query.translation)
|
||||
of "reading":
|
||||
formatPlain(passage, query.translation, keepVerseNumbers = false)
|
||||
of "text":
|
||||
passage.multiReplace([(re"\[(\d+)\]", "$1")])
|
||||
of "raw":
|
||||
passage
|
||||
else:
|
||||
formatMarkdown(passage, query.translation))
|
||||
|
||||
echo formattedPassages.join("\p\p")
|
||||
|
||||
except CatchableError:
|
||||
fatal getCurrentExceptionMsg()
|
||||
debug getCurrentException().getStackTrace()
|
||||
quit(QuitFailure)
|
||||
@@ -0,0 +1,112 @@
|
||||
import std/[strutils, tables]
|
||||
|
||||
import ./reference_parser
|
||||
|
||||
type BibleIndex = object
|
||||
verses: Table[string, string]
|
||||
lastVerseByChapter: Table[string, int]
|
||||
lastChapterByBook: Table[string, int]
|
||||
translationName: string
|
||||
|
||||
proc verseKey(code: string, chapter, verse: int): string =
|
||||
code & "\t" & $chapter & "\t" & $verse
|
||||
|
||||
proc chapterKey(code: string, chapter: int): string =
|
||||
code & "\t" & $chapter
|
||||
|
||||
proc loadBibleIndex(rows, translationName: string): BibleIndex =
|
||||
result.translationName = translationName
|
||||
|
||||
for line in rows.splitLines:
|
||||
if line.strip.len == 0:
|
||||
continue
|
||||
|
||||
let parts = line.split('\t', maxsplit = 3)
|
||||
if parts.len != 4:
|
||||
raise newException(ValueError,
|
||||
"invalid embedded " & translationName & " row: " & line)
|
||||
|
||||
let code = parts[0]
|
||||
let chapter = parseInt(parts[1])
|
||||
let verse = parseInt(parts[2])
|
||||
let text = parts[3]
|
||||
|
||||
result.verses[verseKey(code, chapter, verse)] = text
|
||||
|
||||
let cKey = chapterKey(code, chapter)
|
||||
if not result.lastVerseByChapter.hasKey(cKey) or
|
||||
verse > result.lastVerseByChapter[cKey]:
|
||||
result.lastVerseByChapter[cKey] = verse
|
||||
|
||||
if not result.lastChapterByBook.hasKey(code) or
|
||||
chapter > result.lastChapterByBook[code]:
|
||||
result.lastChapterByBook[code] = chapter
|
||||
|
||||
proc requireLastChapter(index: BibleIndex, code: string): int =
|
||||
if not index.lastChapterByBook.hasKey(code):
|
||||
raise newException(ValueError,
|
||||
"no embedded " & index.translationName & " data for " & code)
|
||||
index.lastChapterByBook[code]
|
||||
|
||||
proc requireLastVerse(index: BibleIndex, code: string, chapter: int): int =
|
||||
let cKey = chapterKey(code, chapter)
|
||||
if not index.lastVerseByChapter.hasKey(cKey):
|
||||
raise newException(ValueError,
|
||||
"no embedded " & index.translationName & " data for " &
|
||||
bookInfo(code).name & " " & $chapter)
|
||||
index.lastVerseByChapter[cKey]
|
||||
|
||||
proc requireVerse(index: BibleIndex, code: string, chapter, verse: int): string =
|
||||
let vKey = verseKey(code, chapter, verse)
|
||||
if not index.verses.hasKey(vKey):
|
||||
raise newException(ValueError,
|
||||
"no embedded " & index.translationName & " data for " &
|
||||
bookInfo(code).name & " " & $chapter & ":" & $verse)
|
||||
index.verses[vKey]
|
||||
|
||||
proc addVerseLines(
|
||||
lines: var seq[string],
|
||||
index: BibleIndex,
|
||||
reference: PassageReference,
|
||||
range: RefRange) =
|
||||
|
||||
let code = reference.book.code
|
||||
discard index.requireLastChapter(code)
|
||||
|
||||
for chapter in range.start.chapter .. range.finish.chapter:
|
||||
let startVerse =
|
||||
if chapter == range.start.chapter and range.start.verse > 0:
|
||||
range.start.verse
|
||||
else:
|
||||
1
|
||||
|
||||
let endVerse =
|
||||
if chapter == range.finish.chapter and range.finish.verse > 0:
|
||||
range.finish.verse
|
||||
else:
|
||||
index.requireLastVerse(code, chapter)
|
||||
|
||||
if startVerse > endVerse:
|
||||
raise newException(ValueError, "reference range starts after it ends")
|
||||
|
||||
for verse in startVerse .. endVerse:
|
||||
lines.add(" [" & $verse & "] " & index.requireVerse(code, chapter, verse))
|
||||
|
||||
proc fetchReference(index: BibleIndex, reference: PassageReference): string =
|
||||
var lines = @[$reference]
|
||||
let code = reference.book.code
|
||||
|
||||
if reference.ranges.len == 0:
|
||||
for chapter in 1 .. index.requireLastChapter(code):
|
||||
for verse in 1 .. index.requireLastVerse(code, chapter):
|
||||
lines.add(" [" & $verse & "] " & index.requireVerse(code, chapter, verse))
|
||||
else:
|
||||
for range in reference.ranges:
|
||||
lines.addVerseLines(index, reference, range)
|
||||
|
||||
lines.join("\n")
|
||||
|
||||
proc fetchPassages*(rows, reference, translationName: string): seq[string] =
|
||||
let index = loadBibleIndex(rows, translationName)
|
||||
for parsedReference in parseReferences(reference):
|
||||
result.add(fetchReference(index, parsedReference))
|
||||
+13
@@ -0,0 +1,13 @@
|
||||
import std/[httpclient, json, logging, uri]
|
||||
|
||||
proc fetchPassages*(reference, apiToken, apiRoot: string): seq[string] =
|
||||
let http = newHttpClient()
|
||||
http.headers = newHttpHeaders({"Authorization": "Token " & apiToken})
|
||||
|
||||
let urlPath = apiRoot & "/v3/passage/text/?q=" & encodeUrl(reference)
|
||||
debug "requesting " & urlPath
|
||||
|
||||
let respJson = parseJson(http.getContent(urlPath))
|
||||
result = @[]
|
||||
for passage in respJson["passages"].getElems:
|
||||
result.add(passage.getStr)
|
||||
@@ -1,86 +0,0 @@
|
||||
# Nim CLI Wrapper for the ESV API
|
||||
# © 2023 Jonathan Bernard
|
||||
|
||||
## Simple command-line wrapper around the ESV API.
|
||||
|
||||
import std/[httpclient, json, logging, os, re, strutils, uri, wordwrap]
|
||||
import cliutils, docopt, zero_functional
|
||||
|
||||
proc formatMarkdown(raw: string): string =
|
||||
let rawLines = raw.splitLines
|
||||
let wrapped = (raw.splitLines -->
|
||||
filter(match(it, re"^\s+(\[\d+\]|\w).*")).
|
||||
map(it.strip.multiReplace([(re"\((\d+)\)", ""), (re"\[(\d+)\]", "**$1**")])).
|
||||
map(wrapWords(it, maxLineWidth = 74, newLine = "\p"))).
|
||||
join("\p")
|
||||
|
||||
result = (wrapped.splitLines --> map("> " & it)).join("\p") &
|
||||
"\p>\p> -- *" & rawLines[0].strip & " (ESV)*"
|
||||
|
||||
when isMainModule:
|
||||
const USAGE = """Usage:
|
||||
esv_api <reference> [options]
|
||||
|
||||
Options:
|
||||
|
||||
--debug Log debug information.
|
||||
|
||||
--echo-args Echo back the arguments that were passed on the
|
||||
command line for debugging purposes.
|
||||
|
||||
-f, --output-format <format> Select a specific output format. Valid values
|
||||
are 'raw', 'markdown', 'plain'.
|
||||
|
||||
-t, --esv-api-token <token> Provide the API token on the command line. By
|
||||
default this will be read either from the
|
||||
.esv_api.cfg.json file or the ESV_API_TOKEN
|
||||
envionment variable.
|
||||
"""
|
||||
|
||||
let consoleLogger = newConsoleLogger(
|
||||
levelThreshold=lvlInfo,
|
||||
fmtStr="esv_api - $levelname: ")
|
||||
logging.addHandler(consoleLogger)
|
||||
|
||||
try:
|
||||
# Parse arguments
|
||||
let args = docopt(USAGE, version = "0.2.2")
|
||||
|
||||
if args["--debug"]:
|
||||
consoleLogger.levelThreshold = lvlDebug
|
||||
|
||||
if args["--echo-args"]: stderr.writeLine($args)
|
||||
|
||||
let cfgFilePath = getEnv("HOME") / ".esv_api.cfg.json"
|
||||
var cfgFileJson = newJObject()
|
||||
if fileExists(cfgFilePath):
|
||||
debug "Loading config from " & cfgFilePath
|
||||
cfgFileJson = parseFile(cfgFilePath)
|
||||
|
||||
let cfg = CombinedConfig(docopt: args, json: cfgFileJson)
|
||||
let apiToken = cfg.getVal("esv-api-token")
|
||||
let apiRoot = cfg.getVal("esv-api-root", "https://api.esv.org")
|
||||
let reference = $args["<reference>"]
|
||||
|
||||
let http = newHttpClient()
|
||||
http.headers = newHttpHeaders({"Authorization": "Token " & apiToken})
|
||||
|
||||
let urlPath = apiRoot & "/v3/passage/text/?q=" & encodeUrl(reference)
|
||||
debug "requesting " & urlPath
|
||||
let respJson = parseJson(http.getContent(urlPath))
|
||||
|
||||
let formattedPassages =
|
||||
case $args["--output-format"]:
|
||||
of "text":
|
||||
respJson["passages"].getElems -->
|
||||
map(it.getStr.multiReplace([(re"\[(\d+)\]", "$1")]))
|
||||
of "raw": respJson["passages"].getElems --> map(it.getStr)
|
||||
else:
|
||||
respJson["passages"].getElems --> map(formatMarkdown(it.getStr))
|
||||
|
||||
echo formattedPassages.join("\p\p")
|
||||
|
||||
except CatchableError:
|
||||
fatal getCurrentExceptionMsg()
|
||||
debug getCurrentException().getStackTrace()
|
||||
quit(QuitFailure)
|
||||
@@ -0,0 +1,7 @@
|
||||
import ./embedded_bible
|
||||
import ./offline_data
|
||||
|
||||
const kjvRows = embeddedTranslationData("kjv")
|
||||
|
||||
proc fetchPassages*(reference: string): seq[string] =
|
||||
embedded_bible.fetchPassages(kjvRows, reference, "KJV")
|
||||
+13
@@ -0,0 +1,13 @@
|
||||
import ./offline_data
|
||||
|
||||
when hasEmbeddedTranslationData("mev"):
|
||||
import ./embedded_bible
|
||||
|
||||
const mevRows = embeddedTranslationData("mev")
|
||||
|
||||
proc fetchPassages*(reference: string): seq[string] =
|
||||
when hasEmbeddedTranslationData("mev"):
|
||||
embedded_bible.fetchPassages(mevRows, reference, "MEV")
|
||||
else:
|
||||
raise newException(ValueError,
|
||||
"MEV data is not embedded; generate data/private/mev.tsv and rebuild")
|
||||
@@ -0,0 +1,15 @@
|
||||
import std/os
|
||||
|
||||
template translationDataPath(name: static[string], visibility: static[string]): string =
|
||||
const dataRoot = currentSourcePath().parentDir.parentDir / "data"
|
||||
dataRoot / visibility / (name & ".tsv")
|
||||
|
||||
template hasEmbeddedTranslationData*(name: static[string]): bool =
|
||||
fileExists(translationDataPath(name, "private")) or
|
||||
fileExists(translationDataPath(name, "public"))
|
||||
|
||||
template embeddedTranslationData*(name: static[string]): string =
|
||||
when fileExists(translationDataPath(name, "private")):
|
||||
staticRead(translationDataPath(name, "private"))
|
||||
else:
|
||||
staticRead(translationDataPath(name, "public"))
|
||||
@@ -0,0 +1,64 @@
|
||||
import std/strutils
|
||||
|
||||
import ./reference_parser
|
||||
|
||||
type PassageQuery* = object
|
||||
reference*: PassageReference
|
||||
translation*: string
|
||||
|
||||
const SupportedTranslations* = [
|
||||
"akjv", "amp", "esv", "kjv", "mev", "niv", "nkjv"
|
||||
]
|
||||
|
||||
proc supportedTranslationsList*(): string =
|
||||
SupportedTranslations.join(", ")
|
||||
|
||||
proc normalizeTranslation*(translation: string): string =
|
||||
result = translation.strip.toLowerAscii
|
||||
|
||||
for supported in SupportedTranslations:
|
||||
if result == supported:
|
||||
return
|
||||
|
||||
raise newException(ValueError,
|
||||
"unsupported translation '" & translation &
|
||||
"'; supported translations: " & supportedTranslationsList())
|
||||
|
||||
proc splitTrailingTranslationMarker(
|
||||
input: string): tuple[referenceText: string, translation: string] =
|
||||
|
||||
let text = input.strip
|
||||
if not text.endsWith(")"):
|
||||
return (text, "")
|
||||
|
||||
let openIdx = text.rfind("(")
|
||||
if openIdx < 0:
|
||||
return (text, "")
|
||||
|
||||
let referenceText = text[0 ..< openIdx].strip
|
||||
let translation = text[openIdx + 1 ..< text.len - 1].strip
|
||||
if referenceText.len == 0 or translation.len == 0:
|
||||
return (text, "")
|
||||
|
||||
(referenceText, translation)
|
||||
|
||||
proc parsePassageQuery*(input, defaultTranslation: string): PassageQuery =
|
||||
let parsed = splitTrailingTranslationMarker(input)
|
||||
result.reference = parseReference(parsed.referenceText)
|
||||
result.translation =
|
||||
if parsed.translation.len > 0:
|
||||
normalizeTranslation(parsed.translation)
|
||||
else:
|
||||
normalizeTranslation(defaultTranslation)
|
||||
|
||||
proc parsePassageQueries*(input, defaultTranslation: string): seq[PassageQuery] =
|
||||
for rawRef in input.split(';'):
|
||||
let refText = rawRef.strip
|
||||
if refText.len > 0:
|
||||
result.add(parsePassageQuery(refText, defaultTranslation))
|
||||
|
||||
if result.len == 0:
|
||||
raise newException(ValueError, "empty Bible reference")
|
||||
|
||||
proc referenceText*(query: PassageQuery): string =
|
||||
$query.reference
|
||||
@@ -0,0 +1,399 @@
|
||||
import std/[strutils]
|
||||
|
||||
type
|
||||
BookInfo* = object
|
||||
code*: string
|
||||
name*: string
|
||||
singleChapter*: bool
|
||||
|
||||
RefPoint* = object
|
||||
chapter*: int
|
||||
verse*: int
|
||||
|
||||
RefRange* = object
|
||||
start*: RefPoint
|
||||
finish*: RefPoint
|
||||
|
||||
PassageReference* = object
|
||||
book*: BookInfo
|
||||
ranges*: seq[RefRange]
|
||||
|
||||
const CanonBooks*: array[66, BookInfo] = [
|
||||
BookInfo(code: "GEN", name: "Genesis"),
|
||||
BookInfo(code: "EXO", name: "Exodus"),
|
||||
BookInfo(code: "LEV", name: "Leviticus"),
|
||||
BookInfo(code: "NUM", name: "Numbers"),
|
||||
BookInfo(code: "DEU", name: "Deuteronomy"),
|
||||
BookInfo(code: "JOS", name: "Joshua"),
|
||||
BookInfo(code: "JDG", name: "Judges"),
|
||||
BookInfo(code: "RUT", name: "Ruth"),
|
||||
BookInfo(code: "1SA", name: "1 Samuel"),
|
||||
BookInfo(code: "2SA", name: "2 Samuel"),
|
||||
BookInfo(code: "1KI", name: "1 Kings"),
|
||||
BookInfo(code: "2KI", name: "2 Kings"),
|
||||
BookInfo(code: "1CH", name: "1 Chronicles"),
|
||||
BookInfo(code: "2CH", name: "2 Chronicles"),
|
||||
BookInfo(code: "EZR", name: "Ezra"),
|
||||
BookInfo(code: "NEH", name: "Nehemiah"),
|
||||
BookInfo(code: "EST", name: "Esther"),
|
||||
BookInfo(code: "JOB", name: "Job"),
|
||||
BookInfo(code: "PSA", name: "Psalms"),
|
||||
BookInfo(code: "PRO", name: "Proverbs"),
|
||||
BookInfo(code: "ECC", name: "Ecclesiastes"),
|
||||
BookInfo(code: "SNG", name: "Song of Solomon"),
|
||||
BookInfo(code: "ISA", name: "Isaiah"),
|
||||
BookInfo(code: "JER", name: "Jeremiah"),
|
||||
BookInfo(code: "LAM", name: "Lamentations"),
|
||||
BookInfo(code: "EZK", name: "Ezekiel"),
|
||||
BookInfo(code: "DAN", name: "Daniel"),
|
||||
BookInfo(code: "HOS", name: "Hosea"),
|
||||
BookInfo(code: "JOL", name: "Joel"),
|
||||
BookInfo(code: "AMO", name: "Amos"),
|
||||
BookInfo(code: "OBA", name: "Obadiah", singleChapter: true),
|
||||
BookInfo(code: "JON", name: "Jonah"),
|
||||
BookInfo(code: "MIC", name: "Micah"),
|
||||
BookInfo(code: "NAM", name: "Nahum"),
|
||||
BookInfo(code: "HAB", name: "Habakkuk"),
|
||||
BookInfo(code: "ZEP", name: "Zephaniah"),
|
||||
BookInfo(code: "HAG", name: "Haggai"),
|
||||
BookInfo(code: "ZEC", name: "Zechariah"),
|
||||
BookInfo(code: "MAL", name: "Malachi"),
|
||||
BookInfo(code: "MAT", name: "Matthew"),
|
||||
BookInfo(code: "MRK", name: "Mark"),
|
||||
BookInfo(code: "LUK", name: "Luke"),
|
||||
BookInfo(code: "JHN", name: "John"),
|
||||
BookInfo(code: "ACT", name: "Acts"),
|
||||
BookInfo(code: "ROM", name: "Romans"),
|
||||
BookInfo(code: "1CO", name: "1 Corinthians"),
|
||||
BookInfo(code: "2CO", name: "2 Corinthians"),
|
||||
BookInfo(code: "GAL", name: "Galatians"),
|
||||
BookInfo(code: "EPH", name: "Ephesians"),
|
||||
BookInfo(code: "PHP", name: "Philippians"),
|
||||
BookInfo(code: "COL", name: "Colossians"),
|
||||
BookInfo(code: "1TH", name: "1 Thessalonians"),
|
||||
BookInfo(code: "2TH", name: "2 Thessalonians"),
|
||||
BookInfo(code: "1TI", name: "1 Timothy"),
|
||||
BookInfo(code: "2TI", name: "2 Timothy"),
|
||||
BookInfo(code: "TIT", name: "Titus"),
|
||||
BookInfo(code: "PHM", name: "Philemon", singleChapter: true),
|
||||
BookInfo(code: "HEB", name: "Hebrews"),
|
||||
BookInfo(code: "JAS", name: "James"),
|
||||
BookInfo(code: "1PE", name: "1 Peter"),
|
||||
BookInfo(code: "2PE", name: "2 Peter"),
|
||||
BookInfo(code: "1JN", name: "1 John"),
|
||||
BookInfo(code: "2JN", name: "2 John", singleChapter: true),
|
||||
BookInfo(code: "3JN", name: "3 John", singleChapter: true),
|
||||
BookInfo(code: "JUD", name: "Jude", singleChapter: true),
|
||||
BookInfo(code: "REV", name: "Revelation")
|
||||
]
|
||||
|
||||
const bookAliases = [
|
||||
("GEN", "genesis"), ("GEN", "gen"),
|
||||
("EXO", "exodus"), ("EXO", "exod"), ("EXO", "exo"),
|
||||
("LEV", "leviticus"), ("LEV", "lev"),
|
||||
("NUM", "numbers"), ("NUM", "num"), ("NUM", "numb"),
|
||||
("DEU", "deuteronomy"), ("DEU", "deut"), ("DEU", "deu"),
|
||||
("JOS", "joshua"), ("JOS", "josh"), ("JOS", "jos"),
|
||||
("JDG", "judges"), ("JDG", "judg"), ("JDG", "jdg"),
|
||||
("RUT", "ruth"), ("RUT", "rut"),
|
||||
("1SA", "1 samuel"), ("1SA", "1 sam"), ("1SA", "i samuel"), ("1SA", "first samuel"),
|
||||
("2SA", "2 samuel"), ("2SA", "2 sam"), ("2SA", "ii samuel"), ("2SA", "second samuel"),
|
||||
("1KI", "1 kings"), ("1KI", "1 kgs"), ("1KI", "1 kin"), ("1KI", "i kings"), ("1KI", "first kings"),
|
||||
("2KI", "2 kings"), ("2KI", "2 kgs"), ("2KI", "2 kin"), ("2KI", "ii kings"), ("2KI", "second kings"),
|
||||
("1CH", "1 chronicles"), ("1CH", "1 chron"), ("1CH", "1 chr"), ("1CH", "i chronicles"), ("1CH", "first chronicles"),
|
||||
("2CH", "2 chronicles"), ("2CH", "2 chron"), ("2CH", "2 chr"), ("2CH", "ii chronicles"), ("2CH", "second chronicles"),
|
||||
("EZR", "ezra"), ("EZR", "ezr"),
|
||||
("NEH", "nehemiah"), ("NEH", "neh"),
|
||||
("EST", "esther"), ("EST", "est"),
|
||||
("JOB", "job"),
|
||||
("PSA", "psalms"), ("PSA", "psalm"), ("PSA", "ps"), ("PSA", "psa"),
|
||||
("PRO", "proverbs"), ("PRO", "prov"), ("PRO", "pro"),
|
||||
("ECC", "ecclesiastes"), ("ECC", "eccl"), ("ECC", "ecc"),
|
||||
("SNG", "song of solomon"), ("SNG", "song"), ("SNG", "songs"), ("SNG", "canticles"), ("SNG", "sng"),
|
||||
("ISA", "isaiah"), ("ISA", "isa"),
|
||||
("JER", "jeremiah"), ("JER", "jer"),
|
||||
("LAM", "lamentations"), ("LAM", "lam"),
|
||||
("EZK", "ezekiel"), ("EZK", "ezek"), ("EZK", "ezk"),
|
||||
("DAN", "daniel"), ("DAN", "dan"),
|
||||
("HOS", "hosea"), ("HOS", "hos"),
|
||||
("JOL", "joel"), ("JOL", "jol"),
|
||||
("AMO", "amos"), ("AMO", "amo"),
|
||||
("OBA", "obadiah"), ("OBA", "obad"), ("OBA", "oba"),
|
||||
("JON", "jonah"), ("JON", "jon"),
|
||||
("MIC", "micah"), ("MIC", "mic"),
|
||||
("NAM", "nahum"), ("NAM", "nah"),
|
||||
("HAB", "habakkuk"), ("HAB", "hab"),
|
||||
("ZEP", "zephaniah"), ("ZEP", "zeph"), ("ZEP", "zep"),
|
||||
("HAG", "haggai"), ("HAG", "hag"),
|
||||
("ZEC", "zechariah"), ("ZEC", "zech"), ("ZEC", "zec"),
|
||||
("MAL", "malachi"), ("MAL", "mal"),
|
||||
("MAT", "matthew"), ("MAT", "matt"), ("MAT", "mat"), ("MAT", "mt"),
|
||||
("MRK", "mark"), ("MRK", "mrk"), ("MRK", "mk"),
|
||||
("LUK", "luke"), ("LUK", "luk"), ("LUK", "lk"),
|
||||
("JHN", "john"), ("JHN", "jhn"), ("JHN", "jn"),
|
||||
("ACT", "acts"), ("ACT", "act"),
|
||||
("ROM", "romans"), ("ROM", "rom"),
|
||||
("1CO", "1 corinthians"), ("1CO", "1 cor"), ("1CO", "1 co"), ("1CO", "i corinthians"), ("1CO", "first corinthians"),
|
||||
("2CO", "2 corinthians"), ("2CO", "2 cor"), ("2CO", "2 co"), ("2CO", "ii corinthians"), ("2CO", "second corinthians"),
|
||||
("GAL", "galatians"), ("GAL", "gal"),
|
||||
("EPH", "ephesians"), ("EPH", "eph"),
|
||||
("PHP", "philippians"), ("PHP", "php"),
|
||||
("COL", "colossians"), ("COL", "col"),
|
||||
("1TH", "1 thessalonians"), ("1TH", "1 thess"), ("1TH", "1 thes"), ("1TH", "i thessalonians"), ("1TH", "first thessalonians"),
|
||||
("2TH", "2 thessalonians"), ("2TH", "2 thess"), ("2TH", "2 thes"), ("2TH", "ii thessalonians"), ("2TH", "second thessalonians"),
|
||||
("1TI", "1 timothy"), ("1TI", "1 tim"), ("1TI", "i timothy"), ("1TI", "first timothy"),
|
||||
("2TI", "2 timothy"), ("2TI", "2 tim"), ("2TI", "ii timothy"), ("2TI", "second timothy"),
|
||||
("TIT", "titus"), ("TIT", "tit"),
|
||||
("PHM", "philemon"), ("PHM", "philem"), ("PHM", "phm"),
|
||||
("HEB", "hebrews"), ("HEB", "heb"),
|
||||
("JAS", "james"), ("JAS", "jas"), ("JAS", "jam"),
|
||||
("1PE", "1 peter"), ("1PE", "1 pet"), ("1PE", "1 pe"), ("1PE", "i peter"), ("1PE", "first peter"),
|
||||
("2PE", "2 peter"), ("2PE", "2 pet"), ("2PE", "2 pe"), ("2PE", "ii peter"), ("2PE", "second peter"),
|
||||
("1JN", "1 john"), ("1JN", "1 jn"), ("1JN", "1 jhn"), ("1JN", "i john"), ("1JN", "first john"),
|
||||
("2JN", "2 john"), ("2JN", "2 jn"), ("2JN", "2 jhn"), ("2JN", "ii john"), ("2JN", "second john"),
|
||||
("3JN", "3 john"), ("3JN", "3 jn"), ("3JN", "3 jhn"), ("3JN", "iii john"), ("3JN", "third john"),
|
||||
("JUD", "jude"), ("JUD", "jud"),
|
||||
("REV", "revelation"), ("REV", "revelations"), ("REV", "rev"), ("REV", "apocalypse")
|
||||
]
|
||||
|
||||
proc bookInfo*(code: string): BookInfo =
|
||||
for book in CanonBooks:
|
||||
if book.code == code:
|
||||
return book
|
||||
|
||||
raise newException(ValueError, "unknown Bible book code '" & code & "'")
|
||||
|
||||
proc bookIndex*(code: string): int =
|
||||
for idx, book in CanonBooks:
|
||||
if book.code == code:
|
||||
return idx
|
||||
|
||||
raise newException(ValueError, "unknown Bible book code '" & code & "'")
|
||||
|
||||
proc normalizeReferenceInput(s: string): string =
|
||||
s.multiReplace([
|
||||
("–", "-"),
|
||||
("—", "-"),
|
||||
("−", "-")
|
||||
]).strip
|
||||
|
||||
proc normalizeBookPrefix(s: string): string =
|
||||
for ch in s:
|
||||
if ch.isAlphaAscii:
|
||||
result.add(ch.toLowerAscii)
|
||||
elif ch.isDigit:
|
||||
result.add(ch)
|
||||
|
||||
proc canonicalNamePrefixMatches(prefix: string): seq[BookInfo] =
|
||||
for book in CanonBooks:
|
||||
if normalizeBookPrefix(book.name).startsWith(prefix):
|
||||
result.add(book)
|
||||
|
||||
proc formatBookList(books: seq[BookInfo]): string =
|
||||
var names: seq[string] = @[]
|
||||
for book in books:
|
||||
names.add(book.name)
|
||||
names.join(", ")
|
||||
|
||||
proc matchCanonicalBookPrefix(input: string): tuple[
|
||||
matched: bool,
|
||||
ambiguous: bool,
|
||||
book: BookInfo,
|
||||
consumed: int,
|
||||
prefix: string,
|
||||
matches: seq[BookInfo]] =
|
||||
|
||||
for idx in 1 .. input.len:
|
||||
if idx < input.len and input[idx].isAlphaAscii:
|
||||
continue
|
||||
|
||||
let prefix = normalizeBookPrefix(input[0 ..< idx])
|
||||
if prefix.len == 0:
|
||||
continue
|
||||
|
||||
let matches = canonicalNamePrefixMatches(prefix)
|
||||
if matches.len == 1:
|
||||
result.matched = true
|
||||
result.ambiguous = false
|
||||
result.book = matches[0]
|
||||
result.consumed = idx
|
||||
result.prefix = input[0 ..< idx].strip
|
||||
result.matches = matches
|
||||
elif matches.len > 1 and not result.matched:
|
||||
result.ambiguous = true
|
||||
result.consumed = idx
|
||||
result.prefix = input[0 ..< idx].strip
|
||||
result.matches = matches
|
||||
|
||||
proc matchAlias(input, alias: string): int =
|
||||
var i = 0
|
||||
var j = 0
|
||||
|
||||
while j < alias.len:
|
||||
let aliasCh = alias[j]
|
||||
if aliasCh.isSpaceAscii or aliasCh == '.':
|
||||
while i < input.len and (input[i].isSpaceAscii or input[i] == '.'):
|
||||
inc i
|
||||
inc j
|
||||
else:
|
||||
while i < input.len and input[i] == '.':
|
||||
inc i
|
||||
|
||||
if i >= input.len or input[i].toLowerAscii != aliasCh.toLowerAscii:
|
||||
return -1
|
||||
|
||||
inc i
|
||||
inc j
|
||||
|
||||
while i < input.len and input[i] == '.':
|
||||
inc i
|
||||
|
||||
if i < input.len and input[i].isAlphaAscii:
|
||||
return -1
|
||||
|
||||
i
|
||||
|
||||
proc parseBook(input: string): tuple[book: BookInfo, rest: string] =
|
||||
let canonicalPrefix = matchCanonicalBookPrefix(input)
|
||||
if canonicalPrefix.matched:
|
||||
result.book = canonicalPrefix.book
|
||||
result.rest = input[canonicalPrefix.consumed .. ^1].strip
|
||||
return
|
||||
|
||||
var bestCode = ""
|
||||
var bestLen = -1
|
||||
|
||||
for row in bookAliases:
|
||||
let consumed = matchAlias(input, row[1])
|
||||
if consumed > bestLen:
|
||||
bestCode = row[0]
|
||||
bestLen = consumed
|
||||
|
||||
if bestLen < 0:
|
||||
if canonicalPrefix.ambiguous:
|
||||
raise newException(ValueError,
|
||||
"ambiguous Bible book prefix '" & canonicalPrefix.prefix & "' in '" &
|
||||
input & "'; matches " & canonicalPrefix.matches.formatBookList)
|
||||
|
||||
raise newException(ValueError, "could not parse Bible book in '" & input & "'")
|
||||
|
||||
result.book = bookInfo(bestCode)
|
||||
result.rest = input[bestLen .. ^1].strip
|
||||
|
||||
proc parsePositiveInt(s, label: string): int =
|
||||
if s.len == 0 or not s.allCharsInSet({'0'..'9'}):
|
||||
raise newException(ValueError, "invalid " & label & " '" & s & "'")
|
||||
|
||||
result = parseInt(s)
|
||||
if result <= 0:
|
||||
raise newException(ValueError, label & " must be positive")
|
||||
|
||||
proc parsePoint(token: string, defaultChapter: int, singleChapter: bool): RefPoint =
|
||||
let normalized = token.strip
|
||||
if normalized.len == 0:
|
||||
raise newException(ValueError, "empty reference point")
|
||||
|
||||
let colonIdx = normalized.find(':')
|
||||
if colonIdx >= 0:
|
||||
return RefPoint(
|
||||
chapter: parsePositiveInt(normalized[0 ..< colonIdx], "chapter"),
|
||||
verse: parsePositiveInt(normalized[colonIdx + 1 .. ^1], "verse"))
|
||||
|
||||
let value = parsePositiveInt(normalized, "reference number")
|
||||
if singleChapter:
|
||||
RefPoint(chapter: 1, verse: value)
|
||||
elif defaultChapter > 0:
|
||||
RefPoint(chapter: defaultChapter, verse: value)
|
||||
else:
|
||||
RefPoint(chapter: value, verse: 0)
|
||||
|
||||
proc parseRange(segment: string, defaultChapter: int, singleChapter: bool): RefRange =
|
||||
let normalized = segment.strip
|
||||
let dashIdx = normalized.find('-')
|
||||
|
||||
if dashIdx >= 0:
|
||||
result.start = parsePoint(normalized[0 ..< dashIdx], defaultChapter, singleChapter)
|
||||
let endDefaultChapter =
|
||||
if result.start.verse > 0: result.start.chapter
|
||||
else: 0
|
||||
result.finish = parsePoint(normalized[dashIdx + 1 .. ^1], endDefaultChapter, singleChapter)
|
||||
else:
|
||||
result.start = parsePoint(normalized, defaultChapter, singleChapter)
|
||||
result.finish = result.start
|
||||
|
||||
if result.finish.chapter < result.start.chapter:
|
||||
raise newException(ValueError, "range ends before it starts: '" & segment & "'")
|
||||
|
||||
if result.finish.chapter == result.start.chapter and
|
||||
result.start.verse > 0 and
|
||||
result.finish.verse > 0 and
|
||||
result.finish.verse < result.start.verse:
|
||||
raise newException(ValueError, "range ends before it starts: '" & segment & "'")
|
||||
|
||||
proc parsePassageSpec(spec: string, book: BookInfo): seq[RefRange] =
|
||||
var currentChapter = 0
|
||||
|
||||
for rawSegment in spec.split(','):
|
||||
let segment = rawSegment.strip
|
||||
if segment.len == 0:
|
||||
raise newException(ValueError, "empty passage range in '" & spec & "'")
|
||||
|
||||
let range = parseRange(segment, currentChapter, book.singleChapter)
|
||||
result.add(range)
|
||||
|
||||
if segment.contains(':') or (range.start.verse > 0 and range.finish.verse > 0):
|
||||
currentChapter = range.start.chapter
|
||||
else:
|
||||
currentChapter = 0
|
||||
|
||||
proc parseReference*(input: string): PassageReference =
|
||||
let normalized = normalizeReferenceInput(input)
|
||||
let parsedBook = parseBook(normalized)
|
||||
|
||||
result.book = parsedBook.book
|
||||
if parsedBook.rest.len > 0:
|
||||
result.ranges = parsePassageSpec(parsedBook.rest, result.book)
|
||||
|
||||
proc parseReferences*(input: string): seq[PassageReference] =
|
||||
for rawRef in input.split(';'):
|
||||
let refText = rawRef.strip
|
||||
if refText.len > 0:
|
||||
result.add(parseReference(refText))
|
||||
|
||||
if result.len == 0:
|
||||
raise newException(ValueError, "empty Bible reference")
|
||||
|
||||
proc `$`*(point: RefPoint): string =
|
||||
if point.verse > 0: $point.chapter & ":" & $point.verse
|
||||
else: $point.chapter
|
||||
|
||||
proc `$`*(range: RefRange): string =
|
||||
if range.start == range.finish:
|
||||
return $range.start
|
||||
|
||||
if range.start.chapter == range.finish.chapter and
|
||||
range.start.verse > 0 and
|
||||
range.finish.verse > 0:
|
||||
return $range.start.chapter & ":" & $range.start.verse & "-" & $range.finish.verse
|
||||
|
||||
$range.start & "-" & $range.finish
|
||||
|
||||
proc formatSingleChapterRange(range: RefRange): string =
|
||||
if range.start == range.finish:
|
||||
return $range.start.verse
|
||||
|
||||
if range.start.chapter == range.finish.chapter:
|
||||
return $range.start.verse & "-" & $range.finish.verse
|
||||
|
||||
$range.start & "-" & $range.finish
|
||||
|
||||
proc `$`*(reference: PassageReference): string =
|
||||
result = reference.book.name
|
||||
if reference.ranges.len > 0:
|
||||
var rangeText: seq[string] = @[]
|
||||
for range in reference.ranges:
|
||||
if reference.book.singleChapter:
|
||||
rangeText.add(formatSingleChapterRange(range))
|
||||
else:
|
||||
rangeText.add($range)
|
||||
result.add(" " & rangeText.join(", "))
|
||||
@@ -0,0 +1,84 @@
|
||||
import std/[strutils, unittest]
|
||||
|
||||
import ../src/kjv
|
||||
import ../src/reference_parser
|
||||
|
||||
suite "reference parser":
|
||||
test "parses single verse references":
|
||||
let reference = parseReference("John 3:16")
|
||||
|
||||
check reference.book.code == "JHN"
|
||||
check reference.ranges.len == 1
|
||||
check reference.ranges[0].start.chapter == 3
|
||||
check reference.ranges[0].start.verse == 16
|
||||
check reference.ranges[0].finish == reference.ranges[0].start
|
||||
check $reference == "John 3:16"
|
||||
|
||||
test "parses verse lists using the previous chapter":
|
||||
let reference = parseReference("John 3:16,20-21")
|
||||
|
||||
check reference.ranges.len == 2
|
||||
check reference.ranges[1].start.chapter == 3
|
||||
check reference.ranges[1].start.verse == 20
|
||||
check reference.ranges[1].finish.chapter == 3
|
||||
check reference.ranges[1].finish.verse == 21
|
||||
check $reference == "John 3:16, 3:20-21"
|
||||
|
||||
test "parses chapter ranges":
|
||||
let reference = parseReference("John 3-4")
|
||||
|
||||
check reference.ranges.len == 1
|
||||
check reference.ranges[0].start.chapter == 3
|
||||
check reference.ranges[0].start.verse == 0
|
||||
check reference.ranges[0].finish.chapter == 4
|
||||
check reference.ranges[0].finish.verse == 0
|
||||
check $reference == "John 3-4"
|
||||
|
||||
test "parses abbreviated numbered books":
|
||||
let reference = parseReference("1 Jn 1:9")
|
||||
|
||||
check reference.book.code == "1JN"
|
||||
check reference.ranges[0].start.chapter == 1
|
||||
check reference.ranges[0].start.verse == 9
|
||||
check $reference == "1 John 1:9"
|
||||
|
||||
test "parses unique canonical book prefixes":
|
||||
check parseReference("Gene 1:1").book.code == "GEN"
|
||||
check parseReference("Phile 3").book.code == "PHM"
|
||||
check parseReference("Phili 1:6").book.code == "PHP"
|
||||
|
||||
test "rejects ambiguous canonical book prefixes":
|
||||
expect ValueError:
|
||||
discard parseReference("Phil 1")
|
||||
|
||||
test "normalizes single-chapter book references":
|
||||
let reference = parseReference("Jude 3-4")
|
||||
|
||||
check reference.book.code == "JUD"
|
||||
check reference.ranges[0].start.chapter == 1
|
||||
check reference.ranges[0].start.verse == 3
|
||||
check reference.ranges[0].finish.chapter == 1
|
||||
check reference.ranges[0].finish.verse == 4
|
||||
check $reference == "Jude 3-4"
|
||||
|
||||
test "parses semicolon-separated references":
|
||||
let references = parseReferences("Psalm 23; John 3:16")
|
||||
|
||||
check references.len == 2
|
||||
check references[0].book.code == "PSA"
|
||||
check references[1].book.code == "JHN"
|
||||
|
||||
suite "offline KJV backend":
|
||||
test "fetches a single embedded verse":
|
||||
let passages = kjv.fetchPassages("John 3:16")
|
||||
|
||||
check passages.len == 1
|
||||
check passages[0].startsWith("John 3:16\n")
|
||||
check passages[0].contains(" [16] ")
|
||||
|
||||
test "fetches a single-chapter embedded verse":
|
||||
let passages = kjv.fetchPassages("Jude 3")
|
||||
|
||||
check passages.len == 1
|
||||
check passages[0].startsWith("Jude 3\n")
|
||||
check passages[0].contains(" [3] ")
|
||||
@@ -0,0 +1,44 @@
|
||||
import std/unittest
|
||||
|
||||
import ../src/passage_query
|
||||
|
||||
suite "passage query parser":
|
||||
test "uses the default translation when no marker is present":
|
||||
let queries = parsePassageQueries("John 3:16", "kjv")
|
||||
|
||||
check queries.len == 1
|
||||
check queries[0].referenceText == "John 3:16"
|
||||
check queries[0].translation == "kjv"
|
||||
|
||||
test "uses a trailing translation marker":
|
||||
let queries = parsePassageQueries("2 John 5 (KJV)", "esv")
|
||||
|
||||
check queries.len == 1
|
||||
check queries[0].referenceText == "2 John 5"
|
||||
check queries[0].translation == "kjv"
|
||||
|
||||
test "parses mixed translation queries":
|
||||
let queries = parsePassageQueries("2 John 5 (KJV); 2 John 5 (ESV)", "mev")
|
||||
|
||||
check queries.len == 2
|
||||
check queries[0].referenceText == "2 John 5"
|
||||
check queries[0].translation == "kjv"
|
||||
check queries[1].referenceText == "2 John 5"
|
||||
check queries[1].translation == "esv"
|
||||
|
||||
test "uses the default translation per unmarked reference":
|
||||
let queries = parsePassageQueries("John 3:16; Psalm 23 (MEV)", "nkjv")
|
||||
|
||||
check queries.len == 2
|
||||
check queries[0].referenceText == "John 3:16"
|
||||
check queries[0].translation == "nkjv"
|
||||
check queries[1].referenceText == "Psalms 23"
|
||||
check queries[1].translation == "mev"
|
||||
|
||||
test "rejects unknown translation markers":
|
||||
expect ValueError:
|
||||
discard parsePassageQueries("John 3:16 (XYZ)", "esv")
|
||||
|
||||
test "rejects unknown default translations":
|
||||
expect ValueError:
|
||||
discard parsePassageQueries("John 3:16", "xyz")
|
||||
@@ -0,0 +1,144 @@
|
||||
import std/[os, strutils, tables]
|
||||
|
||||
# Source archive: https://ebible.org/Scriptures/eng-kjv_usfm.zip
|
||||
|
||||
const canonBookCodes = [
|
||||
"GEN", "EXO", "LEV", "NUM", "DEU", "JOS", "JDG", "RUT",
|
||||
"1SA", "2SA", "1KI", "2KI", "1CH", "2CH", "EZR", "NEH",
|
||||
"EST", "JOB", "PSA", "PRO", "ECC", "SNG", "ISA", "JER",
|
||||
"LAM", "EZK", "DAN", "HOS", "JOL", "AMO", "OBA", "JON",
|
||||
"MIC", "NAM", "HAB", "ZEP", "HAG", "ZEC", "MAL", "MAT",
|
||||
"MRK", "LUK", "JHN", "ACT", "ROM", "1CO", "2CO", "GAL",
|
||||
"EPH", "PHP", "COL", "1TH", "2TH", "1TI", "2TI", "TIT",
|
||||
"PHM", "HEB", "JAS", "1PE", "2PE", "1JN", "2JN", "3JN",
|
||||
"JUD", "REV"
|
||||
]
|
||||
|
||||
proc normalizeWhitespace(s: string): string =
|
||||
var lastWasSpace = false
|
||||
for ch in s:
|
||||
if ch.isSpaceAscii:
|
||||
if not lastWasSpace:
|
||||
result.add(' ')
|
||||
lastWasSpace = true
|
||||
else:
|
||||
result.add(ch)
|
||||
lastWasSpace = false
|
||||
result = result.strip
|
||||
|
||||
proc removeFootnotes(s: string): string =
|
||||
var i = 0
|
||||
while i < s.len:
|
||||
if s.continuesWith("\\f ", i) or s.continuesWith("\\f +", i):
|
||||
let closeIdx = s.find("\\f*", i + 2)
|
||||
if closeIdx < 0:
|
||||
break
|
||||
i = closeIdx + 3
|
||||
else:
|
||||
result.add(s[i])
|
||||
inc i
|
||||
|
||||
proc stripUsfmMarkup(s: string): string =
|
||||
let withoutFootnotes = removeFootnotes(s)
|
||||
var i = 0
|
||||
|
||||
while i < withoutFootnotes.len:
|
||||
case withoutFootnotes[i]
|
||||
of '\\':
|
||||
inc i
|
||||
if i < withoutFootnotes.len and withoutFootnotes[i] == '+':
|
||||
inc i
|
||||
|
||||
while i < withoutFootnotes.len and
|
||||
(withoutFootnotes[i].isAlphaAscii or
|
||||
withoutFootnotes[i].isDigit or
|
||||
withoutFootnotes[i] == '-'):
|
||||
inc i
|
||||
|
||||
let isClosingMarker = i < withoutFootnotes.len and withoutFootnotes[i] == '*'
|
||||
if isClosingMarker:
|
||||
inc i
|
||||
|
||||
while not isClosingMarker and
|
||||
i < withoutFootnotes.len and
|
||||
withoutFootnotes[i].isSpaceAscii:
|
||||
inc i
|
||||
of '|':
|
||||
while i < withoutFootnotes.len and withoutFootnotes[i] != '\\':
|
||||
inc i
|
||||
of '\t':
|
||||
result.add(' ')
|
||||
inc i
|
||||
else:
|
||||
result.add(withoutFootnotes[i])
|
||||
inc i
|
||||
|
||||
result = normalizeWhitespace(result)
|
||||
|
||||
proc parseVerseLine(line: string): tuple[verse: int, text: string] =
|
||||
var rest = line[3..^1].strip
|
||||
let numberEnd = rest.find(' ')
|
||||
if numberEnd < 0:
|
||||
raise newException(ValueError, "verse marker without text: " & line)
|
||||
|
||||
result.verse = parseInt(rest[0 ..< numberEnd])
|
||||
result.text = stripUsfmMarkup(rest[numberEnd + 1 .. ^1])
|
||||
|
||||
proc findCanonFiles(inputDir: string): Table[string, string] =
|
||||
for path in walkFiles(inputDir / "*eng-kjv.usfm"):
|
||||
let name = path.extractFilename
|
||||
let dashIdx = name.find('-')
|
||||
let suffixIdx = name.find("eng-kjv.usfm")
|
||||
if dashIdx >= 0 and suffixIdx > dashIdx:
|
||||
let code = name[dashIdx + 1 ..< suffixIdx]
|
||||
if canonBookCodes.contains(code):
|
||||
result[code] = path
|
||||
|
||||
proc generate(inputDir, outputPath: string) =
|
||||
let canonFiles = findCanonFiles(inputDir)
|
||||
var rows: seq[string] = @[]
|
||||
|
||||
for code in canonBookCodes:
|
||||
if not canonFiles.hasKey(code):
|
||||
raise newException(ValueError, "missing USFM file for " & code)
|
||||
|
||||
var chapter = 0
|
||||
var verse = 0
|
||||
var verseText = ""
|
||||
|
||||
proc flushVerse() =
|
||||
if chapter > 0 and verse > 0:
|
||||
let text = normalizeWhitespace(verseText).replace("\t", " ")
|
||||
if text.len > 0:
|
||||
rows.add([code, $chapter, $verse, text].join("\t"))
|
||||
verse = 0
|
||||
verseText = ""
|
||||
|
||||
for rawLine in canonFiles[code].lines:
|
||||
let line = rawLine.strip
|
||||
|
||||
if line.startsWith("\\c "):
|
||||
flushVerse()
|
||||
chapter = parseInt(line[3..^1].strip)
|
||||
elif line.startsWith("\\v "):
|
||||
flushVerse()
|
||||
let parsed = parseVerseLine(line)
|
||||
verse = parsed.verse
|
||||
verseText = parsed.text
|
||||
elif verse > 0:
|
||||
let continued = stripUsfmMarkup(line)
|
||||
if continued.len > 0:
|
||||
if verseText.len > 0:
|
||||
verseText.add(' ')
|
||||
verseText.add(continued)
|
||||
|
||||
flushVerse()
|
||||
|
||||
createDir(outputPath.parentDir)
|
||||
writeFile(outputPath, rows.join("\n") & "\n")
|
||||
|
||||
when isMainModule:
|
||||
if paramCount() != 2:
|
||||
quit("Usage: generate_kjv_data <usfm-dir> <output-tsv>", QuitFailure)
|
||||
|
||||
generate(paramStr(1), paramStr(2))
|
||||
@@ -0,0 +1,330 @@
|
||||
import std/[
|
||||
htmlparser,
|
||||
os,
|
||||
osproc,
|
||||
streams,
|
||||
strutils,
|
||||
xmlparser,
|
||||
xmltree
|
||||
]
|
||||
|
||||
import ../src/reference_parser
|
||||
|
||||
type
|
||||
TocEntry = object
|
||||
label: string
|
||||
code: string
|
||||
fileIndex: int
|
||||
|
||||
BookSource = object
|
||||
code: string
|
||||
startIndex: int
|
||||
endIndex: int
|
||||
|
||||
ParseState = object
|
||||
code: string
|
||||
chapter: int
|
||||
verse: int
|
||||
verseText: string
|
||||
rows: seq[string]
|
||||
|
||||
proc normalizeWhitespace(s: string): string =
|
||||
var lastWasSpace = false
|
||||
for ch in s.replace("\xC2\xA0", " "):
|
||||
if ch.isSpaceAscii:
|
||||
if not lastWasSpace:
|
||||
result.add(' ')
|
||||
lastWasSpace = true
|
||||
else:
|
||||
result.add(ch)
|
||||
lastWasSpace = false
|
||||
result = result.strip
|
||||
|
||||
proc markerText(s: string): string =
|
||||
normalizeWhitespace(s).replace(" ", "")
|
||||
|
||||
proc numberAfterPrefix(s, prefix: string): int =
|
||||
let text = normalizeWhitespace(s).toUpperAscii
|
||||
if not text.startsWith(prefix):
|
||||
return 0
|
||||
|
||||
var digits = ""
|
||||
for ch in text[prefix.len .. ^1].strip:
|
||||
if ch.isDigit:
|
||||
digits.add(ch)
|
||||
elif digits.len > 0:
|
||||
break
|
||||
elif not ch.isSpaceAscii:
|
||||
break
|
||||
|
||||
if digits.len > 0:
|
||||
result = parseInt(digits)
|
||||
|
||||
proc isPositiveIntText(s: string): bool =
|
||||
let text = markerText(s)
|
||||
text.len > 0 and text.allCharsInSet({'0'..'9'}) and parseInt(text) > 0
|
||||
|
||||
proc readEpubEntry(epubPath, entryPath: string): string =
|
||||
let process = startProcess(
|
||||
"unzip",
|
||||
args = ["-p", epubPath, entryPath],
|
||||
options = {poUsePath, poStdErrToStdOut})
|
||||
result = process.outputStream.readAll()
|
||||
let exitCode = process.waitForExit()
|
||||
process.close()
|
||||
|
||||
if exitCode != 0:
|
||||
raise newException(IOError,
|
||||
"could not read " & entryPath & " from " & epubPath & ": " & result)
|
||||
|
||||
proc textContent(node: XmlNode): string =
|
||||
case node.kind
|
||||
of xnText:
|
||||
result = node.text
|
||||
of xnElement:
|
||||
for child in node.items:
|
||||
result.add(textContent(child))
|
||||
else:
|
||||
discard
|
||||
|
||||
proc firstDescendant(node: XmlNode, tag: string): XmlNode =
|
||||
if node.kind == xnElement:
|
||||
if node.tag == tag:
|
||||
return node
|
||||
|
||||
for child in node.items:
|
||||
let found = firstDescendant(child, tag)
|
||||
if not found.isNil:
|
||||
return found
|
||||
|
||||
proc descendantText(node: XmlNode, tag: string): string =
|
||||
let found = firstDescendant(node, tag)
|
||||
if found.isNil: ""
|
||||
else: normalizeWhitespace(textContent(found))
|
||||
|
||||
proc descendantAttr(node: XmlNode, tag, attrName: string): string =
|
||||
let found = firstDescendant(node, tag)
|
||||
if found.isNil: ""
|
||||
else: found.attr(attrName)
|
||||
|
||||
proc bookCodeForLabel(label: string): string =
|
||||
let bookName = label.split("(", maxsplit = 1)[0].strip
|
||||
if bookName == "Solomon":
|
||||
return "SNG"
|
||||
|
||||
for book in CanonBooks:
|
||||
if book.name == bookName:
|
||||
return book.code
|
||||
|
||||
proc indexFromSplitFile(path: string): int =
|
||||
let filename = path.split('#', maxsplit = 1)[0].extractFilename
|
||||
if not filename.startsWith("index_split_") or not filename.endsWith(".html"):
|
||||
return 0
|
||||
|
||||
parseInt(filename["index_split_".len ..< filename.len - ".html".len])
|
||||
|
||||
proc parseTocEntries(epubPath: string): seq[TocEntry] =
|
||||
let toc = parseXml(newStringStream(readEpubEntry(epubPath, "toc.ncx")))
|
||||
var entries: seq[TocEntry] = @[]
|
||||
|
||||
proc walk(node: XmlNode) =
|
||||
if node.kind == xnElement and node.tag == "navPoint":
|
||||
let label = node.descendantText("text")
|
||||
let src = node.descendantAttr("content", "src")
|
||||
let fileIndex = indexFromSplitFile(src)
|
||||
if fileIndex > 0:
|
||||
entries.add(TocEntry(
|
||||
label: label,
|
||||
code: bookCodeForLabel(label),
|
||||
fileIndex: fileIndex))
|
||||
|
||||
if node.kind == xnElement:
|
||||
for child in node.items:
|
||||
walk(child)
|
||||
|
||||
walk(toc)
|
||||
entries
|
||||
|
||||
proc bookSources(entries: seq[TocEntry]): seq[BookSource] =
|
||||
for idx, entry in entries:
|
||||
if entry.code.len == 0:
|
||||
continue
|
||||
|
||||
let endIndex =
|
||||
if idx + 1 < entries.len:
|
||||
entries[idx + 1].fileIndex - 1
|
||||
else:
|
||||
entry.fileIndex
|
||||
|
||||
result.add(BookSource(
|
||||
code: entry.code,
|
||||
startIndex: entry.fileIndex,
|
||||
endIndex: endIndex))
|
||||
|
||||
if result.len != CanonBooks.len:
|
||||
raise newException(ValueError,
|
||||
"expected " & $CanonBooks.len & " canonical books in EPUB TOC, found " &
|
||||
$result.len)
|
||||
|
||||
for idx, book in CanonBooks:
|
||||
if result[idx].code != book.code:
|
||||
raise newException(ValueError,
|
||||
"expected " & book.code & " at position " & $idx & ", found " &
|
||||
result[idx].code)
|
||||
|
||||
proc hasClass(node: XmlNode, className: string): bool =
|
||||
if node.kind != xnElement:
|
||||
return false
|
||||
|
||||
for value in node.attr("class").splitWhitespace:
|
||||
if value == className:
|
||||
return true
|
||||
|
||||
proc shouldSkipElement(node: XmlNode): bool =
|
||||
node.hasClass("calibre_29") or # section headings
|
||||
node.hasClass("calibre_6") or # parallel/cross-reference paragraphs
|
||||
node.hasClass("calibre_26") # Psalm superscriptions/cross-references
|
||||
|
||||
proc hasHref(node: XmlNode): bool =
|
||||
if node.kind == xnElement:
|
||||
if node.attr("href").len > 0:
|
||||
return true
|
||||
|
||||
for child in node.items:
|
||||
if hasHref(child):
|
||||
return true
|
||||
|
||||
proc isBlockElement(node: XmlNode): bool =
|
||||
node.kind == xnElement and
|
||||
node.tag in ["blockquote", "br", "div", "h1", "h2", "h3", "li", "p"]
|
||||
|
||||
proc chapterMarker(node: XmlNode): int =
|
||||
if node.kind == xnElement and node.tag == "span" and node.hasClass("calibre1"):
|
||||
let text = markerText(textContent(node))
|
||||
if text.isPositiveIntText:
|
||||
return parseInt(text)
|
||||
|
||||
proc headingChapterMarker(node: XmlNode, code: string): int =
|
||||
if node.kind != xnElement or node.tag != "p":
|
||||
return 0
|
||||
|
||||
let text = textContent(node)
|
||||
result = numberAfterPrefix(text, "CHAPTER ")
|
||||
if result > 0:
|
||||
return
|
||||
|
||||
if code == "PSA":
|
||||
result = numberAfterPrefix(text, "PSALM ")
|
||||
|
||||
proc verseMarker(node: XmlNode): int =
|
||||
if node.kind == xnElement and node.tag == "sup" and not node.hasHref:
|
||||
let text = markerText(textContent(node))
|
||||
if text.isPositiveIntText:
|
||||
return parseInt(text)
|
||||
|
||||
proc leadingVerseText(s: string): tuple[verse: int, rest: string] =
|
||||
let text = s.replace("\xC2\xA0", " ")
|
||||
var idx = 0
|
||||
while idx < text.len and text[idx].isSpaceAscii:
|
||||
inc idx
|
||||
|
||||
let digitStart = idx
|
||||
while idx < text.len and text[idx].isDigit:
|
||||
inc idx
|
||||
|
||||
if idx == digitStart:
|
||||
return
|
||||
|
||||
let numberText = text[digitStart ..< idx]
|
||||
while idx < text.len and text[idx].isSpaceAscii:
|
||||
inc idx
|
||||
|
||||
result.verse = parseInt(numberText)
|
||||
if idx < text.len:
|
||||
result.rest = text[idx .. ^1]
|
||||
|
||||
proc flushVerse(state: var ParseState) =
|
||||
if state.chapter > 0 and state.verse > 0:
|
||||
let text = normalizeWhitespace(state.verseText).replace("\t", " ")
|
||||
if text.len > 0:
|
||||
state.rows.add([state.code, $state.chapter, $state.verse, text].join("\t"))
|
||||
|
||||
state.verseText = ""
|
||||
|
||||
proc walkPassageText(node: XmlNode, state: var ParseState) =
|
||||
case node.kind
|
||||
of xnText:
|
||||
if state.chapter > 0:
|
||||
if state.verse == 0:
|
||||
let leading = leadingVerseText(node.text)
|
||||
if leading.verse > 0:
|
||||
state.verse = leading.verse
|
||||
state.verseText.add(leading.rest)
|
||||
elif state.verse > 0:
|
||||
state.verseText.add(node.text)
|
||||
of xnElement:
|
||||
let headingChapter = headingChapterMarker(node, state.code)
|
||||
if headingChapter > 0:
|
||||
state.flushVerse()
|
||||
state.chapter = headingChapter
|
||||
state.verse = 0
|
||||
return
|
||||
|
||||
if node.shouldSkipElement:
|
||||
return
|
||||
|
||||
let chapter = chapterMarker(node)
|
||||
if chapter > 0:
|
||||
state.flushVerse()
|
||||
state.chapter = chapter
|
||||
state.verse = 1
|
||||
return
|
||||
|
||||
let verse = verseMarker(node)
|
||||
if verse > 0:
|
||||
state.flushVerse()
|
||||
state.verse = verse
|
||||
return
|
||||
|
||||
if node.tag == "sup":
|
||||
return
|
||||
|
||||
for child in node.items:
|
||||
walkPassageText(child, state)
|
||||
|
||||
if node.isBlockElement and state.chapter > 0 and state.verse > 0:
|
||||
state.verseText.add(' ')
|
||||
else:
|
||||
discard
|
||||
|
||||
proc indexSplitFile(index: int): string =
|
||||
"index_split_" & align($index, 3, '0') & ".html"
|
||||
|
||||
proc parseBook(epubPath: string, source: BookSource): seq[string] =
|
||||
var state = ParseState(code: source.code)
|
||||
if bookInfo(source.code).singleChapter:
|
||||
state.chapter = 1
|
||||
|
||||
for index in source.startIndex .. source.endIndex:
|
||||
let html = readEpubEntry(epubPath, indexSplitFile(index))
|
||||
let doc = parseHtml(newStringStream(html))
|
||||
walkPassageText(doc, state)
|
||||
|
||||
state.flushVerse()
|
||||
state.rows
|
||||
|
||||
proc generate(epubPath, outputPath: string) =
|
||||
let sources = bookSources(parseTocEntries(epubPath))
|
||||
var rows: seq[string] = @[]
|
||||
|
||||
for source in sources:
|
||||
rows.add(parseBook(epubPath, source))
|
||||
|
||||
createDir(outputPath.parentDir)
|
||||
writeFile(outputPath, rows.join("\n") & "\n")
|
||||
|
||||
when isMainModule:
|
||||
if paramCount() != 2:
|
||||
quit("Usage: generate_mev_data <mev-epub> <output-tsv>", QuitFailure)
|
||||
|
||||
generate(paramStr(1), paramStr(2))
|
||||
Reference in New Issue
Block a user