Prepare 1.1.0 release

Add translation-aware passage queries
Add private MEV embedded support
2026-06-29 08:16:27 -05:00 · 2026-06-29 08:16:27 -05:00 · 2026-06-29 08:16:27 -05:00 · 2026-06-29 08:16:27 -05:00 · 2026-06-23 22:19:06 -05:00 · 2026-06-13 07:15:50 -05:00
19 changed files with 32643 additions and 103 deletions
@@ -1,2 +1,6 @@
 esv_api
+bibleref
+tests/test_offline_kjv
+tests/test_passage_query
+data/private/
 *.sw?
@@ -0,0 +1,19 @@
+# Package
+
+version       = "1.1.0"
+author        = "Jonathan Bernard"
+description   = "Simple Nim CLI for retrieving Biblical passages"
+license       = "MIT"
+srcDir        = "src"
+bin           = @["bibleref"]
+
+
+# Dependencies
+
+requires "nim >= 1.6.10"
+requires "docopt"
+requires "nimquery"
+requires "zero_functional"
+
+# dependencies from git.jdb-software.com/jdb/nim-packages.git
+requires "cliutils"
@@ -1,17 +0,0 @@
-# Package
-
-version       = "0.2.2"
-author        = "Jonathan Bernard"
-description   = "Simple Nim CLI wrapper around the ESV API (api.esv.org)"
-license       = "MIT"
-srcDir        = "src"
-bin           = @["esv_api"]
-
-
-# Dependencies
-
-requires "nim >= 1.6.10"
-requires @["docopt", "zero_functional"]
-
-# dependencies from git.jdb-software.com/jdb/nim-packages.git
-requires @["cliutils"]
@@ -0,0 +1,2 @@
+[tools]
+nim = "latest"
@@ -0,0 +1,103 @@
+import std/[httpclient, json, logging, strutils, uri]
+
+const apiBibleRoot* = "https://rest.api.bible/v1"
+
+proc configBibleIdKey(translation: string): string =
+  let normalizedTranslation = translation.toLowerAscii
+  "apiBible" & normalizedTranslation[0].toUpperAscii &
+    normalizedTranslation[1..^1] & "BibleId"
+
+proc defaultBibleId*(translation: string): string =
+  case translation.toLowerAscii
+  of "niv": "78a9f6124f344018-01"
+  else: ""
+
+proc apiGet(apiRoot, path, query, apiKey: string): JsonNode =
+  var root = apiRoot
+  while root.endsWith("/"): root.setLen(root.len - 1)
+
+  var urlPath = root & path
+  if query.len > 0:
+    urlPath &= "?" & query
+
+  debug "requesting " & urlPath
+
+  let http = newHttpClient()
+  http.headers = newHttpHeaders({"api-key": apiKey})
+  parseJson(http.getContent(urlPath))
+
+proc resolveBibleId(translation, apiKey, apiRoot, configuredBibleId: string): string =
+  let normalizedTranslation = translation.toLowerAscii
+
+  if configuredBibleId.strip.len > 0:
+    return configuredBibleId.strip
+
+  let defaultId = defaultBibleId(normalizedTranslation)
+  if defaultId.len > 0:
+    return defaultId
+
+  let translationCode = normalizedTranslation.toUpperAscii
+  let respJson = apiGet(
+    apiRoot,
+    "/bibles",
+    "language=eng&abbreviation=" & encodeUrl(translationCode) &
+      "&include-full-details=false",
+    apiKey)
+
+  for bible in respJson["data"].getElems:
+    let abbreviation =
+      if bible.hasKey("abbreviation"): bible["abbreviation"].getStr else: ""
+    let abbreviationLocal =
+      if bible.hasKey("abbreviationLocal"): bible["abbreviationLocal"].getStr else: ""
+
+    if abbreviation.toLowerAscii == normalizedTranslation or
+       abbreviationLocal.toLowerAscii == normalizedTranslation:
+      return bible["id"].getStr
+
+  if respJson["data"].getElems.len > 0:
+    return respJson["data"].getElems[0]["id"].getStr
+
+  raise newException(ValueError,
+    "could not find an API.Bible Bible ID for '" & translation &
+      "'; configure " & configBibleIdKey(normalizedTranslation))
+
+proc resolvePassageId(reference, bibleId, apiKey, apiRoot: string): string =
+  let respJson = apiGet(
+    apiRoot,
+    "/bibles/" & encodeUrl(bibleId) & "/search",
+    "query=" & encodeUrl(reference) & "&limit=1&sort=canonical",
+    apiKey)
+
+  if respJson["data"].hasKey("passages"):
+    let passages = respJson["data"]["passages"].getElems
+    if passages.len > 0:
+      return passages[0]["id"].getStr
+
+  if respJson["data"].hasKey("verses"):
+    let verses = respJson["data"]["verses"].getElems
+    if verses.len == 1:
+      return verses[0]["id"].getStr
+
+  raise newException(ValueError,
+    "could not resolve passage reference '" & reference & "' using API.Bible")
+
+proc fetchPassages*(
+    reference,
+    translation,
+    apiKey,
+    apiRoot,
+    configuredBibleId: string): seq[string] =
+
+  let bibleId = resolveBibleId(translation, apiKey, apiRoot, configuredBibleId)
+  let passageId = resolvePassageId(reference, bibleId, apiKey, apiRoot)
+
+  let respJson = apiGet(
+    apiRoot,
+    "/bibles/" & encodeUrl(bibleId) & "/passages/" & encodeUrl(passageId),
+    "content-type=text&include-notes=false&include-titles=true" &
+      "&include-chapter-numbers=false&include-verse-numbers=true" &
+      "&include-verse-spans=false",
+    apiKey)
+
+  let passage = respJson["data"]
+  @[passage["reference"].getStr & "\n" & passage["content"].getStr]
@@ -0,0 +1,188 @@
+# Nim CLI for retrieving Biblical passages
+# © 2023 Jonathan Bernard
+
+## Simple command-line tool for retrieving Biblical passages.
+
+import std/[json, logging, os, re, strutils, wordwrap]
+import cliutils, docopt, zero_functional
+
+import ./api_bible
+import ./esv
+import ./kjv
+import ./mev
+import ./passage_query
+
+proc formatMarkdown(raw, translation: string): string =
+  var reference = ""
+  var inVerse = false
+  var verseLines = newSeq[string]()
+
+  for line in raw.splitLines:
+    if reference.len == 0: reference = line.strip
+    if inVerse:
+      if line.startsWith("Footnotes"): inVerse = false
+      elif line.isEmptyOrWhitespace and verseLines[^1] != "":
+        verseLines.add("")
+      elif not line.match(re"^\s+[^\s]"): continue
+      elif line.match(re"$(.*)\(ESV\)$"): verseLines.add(line[0 ..< ^5])
+      else: verseLines.add(line)
+    elif line.match(re"^\s+\[\d+\]"):
+      inVerse = true
+      verseLines.add(line)
+
+  let wrapped = (verseLines -->
+    map(if it.len > 90: it.strip else: it & "  ").
+    map(it.multiReplace([(re"\((\d+)\)", ""), (re"\[(\d+)\]", "**$1**")])).
+    map(wrapWords(it, maxLineWidth = 74, newLine = "\p"))).join("\p")
+
+  result = (wrapped.splitLines --> map("> " & it)).
+    join("\p") & "\p> -- *" & reference & " (" &
+      translation.toUpperAscii & ")*"
+
+proc formatPlain(
+    raw,
+    translation: string,
+    keepVerseNumbers = true): string =
+
+  var reference = ""
+  var inVerse = false
+  var verseLines = newSeq[string]()
+
+  for line in raw.splitLines:
+    if reference.len == 0: reference = line.strip
+    if inVerse:
+      if line.startsWith("Footnotes"): inVerse = false
+      elif line.isEmptyOrWhitespace and verseLines[^1] != "":
+        verseLines.add("")
+      elif not line.match(re"^\s+[^\s]"): continue
+      elif line.match(re"$(.*)\(ESV\)$"): verseLines.add(line[0 ..< ^5])
+      else: verseLines.add(line)
+    elif line.match(re"^\s+\[\d+\]"):
+      inVerse = true
+      verseLines.add(line)
+
+  let wrapped = (verseLines -->
+    map(if it.len > 90: it.strip else: it & "  ").
+    map(
+      if keepVerseNumbers:
+        it.multiReplace([(re"\((\d+)\)", ""), (re"\[(\d+)\]", "$1")])
+      else:
+        it.multiReplace([(re"\((\d+)\)", ""), (re"\[(\d+)\]", "")])).
+    map(wrapWords(it, maxLineWidth = 74, newLine = "\p"))).join("\p")
+
+  result = (wrapped.splitLines --> map(it)).
+    join("\p") & "\p– " & reference & " (" & translation.toUpperAscii & ")"
+
+proc fetchPassages(reference, translation: string, cfg: CombinedConfig): seq[string] =
+  case translation
+  of "esv":
+    esv.fetchPassages(
+      reference,
+      cfg.getVal("esv-api-token"),
+      cfg.getVal("esv-api-root", "https://api.esv.org"))
+  of "akjv", "kjv":
+    kjv.fetchPassages(reference)
+  of "mev":
+    mev.fetchPassages(reference)
+  of "amp", "nkjv", "niv":
+    api_bible.fetchPassages(
+      reference,
+      translation,
+      cfg.getVal("api-bible-api-key"),
+      cfg.getVal("api-bible-root", api_bible.apiBibleRoot),
+      cfg.getVal(
+        "api-bible-" & translation & "-bible-id",
+        api_bible.defaultBibleId(translation)))
+  else:
+    raise newException(ValueError,
+      "unsupported translation '" & translation &
+        "'; supported translations: " & supportedTranslationsList())
+
+when isMainModule:
+  const USAGE = """Usage:
+  bibleref <reference> [options]
+
+Options:
+
+  --debug                       Log debug information.
+
+  --echo-args                   Echo back the arguments that were passed on the
+                                command line for debugging purposes.
+
+  -f, --output-format <format>  Select a specific output format. Valid values
+                                are 'raw', 'markdown', 'plain', 'reading'.
+
+  -t, --translation <translation>
+                                Select a specific translation. Supported values
+                                are 'akjv', 'amp', 'esv', 'kjv', 'mev',
+                                'nkjv', and 'niv'. Defaults to 'esv'.
+                                Individual references may override this with a
+                                trailing marker, for example:
+                                'John 3:16 (KJV); John 3:16 (ESV)'.
+
+  --esv-api-token <token>       Provide the API token on the command line. By
+                                default this will be read either from the
+                                .bibleref.cfg.json file or the ESV_API_TOKEN
+                                envionment variable.
+
+  --api-bible-api-key <key>     Provide the API.Bible API key for translations
+                                backed by api.bible.
+
+  --api-bible-root <url>        Override the API.Bible API root. Defaults to
+                                https://rest.api.bible/v1.
+
+  --api-bible-amp-bible-id <id> Override the API.Bible Bible ID for AMP.
+
+  --api-bible-niv-bible-id <id> Override the API.Bible Bible ID for NIV.
+
+  --api-bible-nkjv-bible-id <id>
+                                Override the API.Bible Bible ID for NKJV.
+"""
+
+  let consoleLogger = newConsoleLogger(
+    levelThreshold=lvlInfo,
+    fmtStr="bibleref - $levelname: ")
+  logging.addHandler(consoleLogger)
+
+  try:
+    # Parse arguments
+    let args = docopt(USAGE, version = "1.1.0")
+
+    if args["--debug"]:
+      consoleLogger.levelThreshold = lvlDebug
+
+    if args["--echo-args"]: stderr.writeLine($args)
+
+    let cfgFilePath = getEnv("HOME") / ".bibleref.cfg.json"
+    var cfgFileJson = newJObject()
+    if fileExists(cfgFilePath):
+      debug "Loading config from " & cfgFilePath
+      cfgFileJson = parseFile(cfgFilePath)
+
+    let cfg = CombinedConfig(docopt: args, json: cfgFileJson)
+    let defaultTranslation = cfg.getVal("translation", "esv")
+    let reference = $args["<reference>"]
+    let queries = parsePassageQueries(reference, defaultTranslation)
+
+    var formattedPassages: seq[string] = @[]
+    for query in queries:
+      for passage in fetchPassages(query.referenceText, query.translation, cfg):
+        formattedPassages.add(
+          case $args["--output-format"]:
+          of "plain":
+            formatPlain(passage, query.translation)
+          of "reading":
+            formatPlain(passage, query.translation, keepVerseNumbers = false)
+          of "text":
+            passage.multiReplace([(re"\[(\d+)\]", "$1")])
+          of "raw":
+            passage
+          else:
+            formatMarkdown(passage, query.translation))
+
+    echo formattedPassages.join("\p\p")
+
+  except CatchableError:
+    fatal getCurrentExceptionMsg()
+    debug getCurrentException().getStackTrace()
+    quit(QuitFailure)
@@ -0,0 +1,112 @@
+import std/[strutils, tables]
+
+import ./reference_parser
+
+type BibleIndex = object
+  verses: Table[string, string]
+  lastVerseByChapter: Table[string, int]
+  lastChapterByBook: Table[string, int]
+  translationName: string
+
+proc verseKey(code: string, chapter, verse: int): string =
+  code & "\t" & $chapter & "\t" & $verse
+
+proc chapterKey(code: string, chapter: int): string =
+  code & "\t" & $chapter
+
+proc loadBibleIndex(rows, translationName: string): BibleIndex =
+  result.translationName = translationName
+
+  for line in rows.splitLines:
+    if line.strip.len == 0:
+      continue
+
+    let parts = line.split('\t', maxsplit = 3)
+    if parts.len != 4:
+      raise newException(ValueError,
+        "invalid embedded " & translationName & " row: " & line)
+
+    let code = parts[0]
+    let chapter = parseInt(parts[1])
+    let verse = parseInt(parts[2])
+    let text = parts[3]
+
+    result.verses[verseKey(code, chapter, verse)] = text
+
+    let cKey = chapterKey(code, chapter)
+    if not result.lastVerseByChapter.hasKey(cKey) or
+        verse > result.lastVerseByChapter[cKey]:
+      result.lastVerseByChapter[cKey] = verse
+
+    if not result.lastChapterByBook.hasKey(code) or
+        chapter > result.lastChapterByBook[code]:
+      result.lastChapterByBook[code] = chapter
+
+proc requireLastChapter(index: BibleIndex, code: string): int =
+  if not index.lastChapterByBook.hasKey(code):
+    raise newException(ValueError,
+      "no embedded " & index.translationName & " data for " & code)
+  index.lastChapterByBook[code]
+
+proc requireLastVerse(index: BibleIndex, code: string, chapter: int): int =
+  let cKey = chapterKey(code, chapter)
+  if not index.lastVerseByChapter.hasKey(cKey):
+    raise newException(ValueError,
+      "no embedded " & index.translationName & " data for " &
+        bookInfo(code).name & " " & $chapter)
+  index.lastVerseByChapter[cKey]
+
+proc requireVerse(index: BibleIndex, code: string, chapter, verse: int): string =
+  let vKey = verseKey(code, chapter, verse)
+  if not index.verses.hasKey(vKey):
+    raise newException(ValueError,
+      "no embedded " & index.translationName & " data for " &
+        bookInfo(code).name & " " & $chapter & ":" & $verse)
+  index.verses[vKey]
+
+proc addVerseLines(
+    lines: var seq[string],
+    index: BibleIndex,
+    reference: PassageReference,
+    range: RefRange) =
+
+  let code = reference.book.code
+  discard index.requireLastChapter(code)
+
+  for chapter in range.start.chapter .. range.finish.chapter:
+    let startVerse =
+      if chapter == range.start.chapter and range.start.verse > 0:
+        range.start.verse
+      else:
+        1
+
+    let endVerse =
+      if chapter == range.finish.chapter and range.finish.verse > 0:
+        range.finish.verse
+      else:
+        index.requireLastVerse(code, chapter)
+
+    if startVerse > endVerse:
+      raise newException(ValueError, "reference range starts after it ends")
+
+    for verse in startVerse .. endVerse:
+      lines.add("  [" & $verse & "] " & index.requireVerse(code, chapter, verse))
+
+proc fetchReference(index: BibleIndex, reference: PassageReference): string =
+  var lines = @[$reference]
+  let code = reference.book.code
+
+  if reference.ranges.len == 0:
+    for chapter in 1 .. index.requireLastChapter(code):
+      for verse in 1 .. index.requireLastVerse(code, chapter):
+        lines.add("  [" & $verse & "] " & index.requireVerse(code, chapter, verse))
+  else:
+    for range in reference.ranges:
+      lines.addVerseLines(index, reference, range)
+
+  lines.join("\n")
+
+proc fetchPassages*(rows, reference, translationName: string): seq[string] =
+  let index = loadBibleIndex(rows, translationName)
+  for parsedReference in parseReferences(reference):
+    result.add(fetchReference(index, parsedReference))
@@ -0,0 +1,13 @@
+import std/[httpclient, json, logging, uri]
+
+proc fetchPassages*(reference, apiToken, apiRoot: string): seq[string] =
+  let http = newHttpClient()
+  http.headers = newHttpHeaders({"Authorization": "Token " & apiToken})
+
+  let urlPath = apiRoot & "/v3/passage/text/?q=" & encodeUrl(reference)
+  debug "requesting " & urlPath
+
+  let respJson = parseJson(http.getContent(urlPath))
+  result = @[]
+  for passage in respJson["passages"].getElems:
+    result.add(passage.getStr)
@@ -1,86 +0,0 @@
-# Nim CLI Wrapper for the ESV API
-# © 2023 Jonathan Bernard
-
-## Simple command-line wrapper around the ESV API.
-
-import std/[httpclient, json, logging, os, re, strutils, uri, wordwrap]
-import cliutils, docopt, zero_functional
-
-proc formatMarkdown(raw: string): string =
-  let rawLines = raw.splitLines
-  let wrapped = (raw.splitLines -->
-    filter(match(it, re"^\s+(\[\d+\]|\w).*")).
-    map(it.strip.multiReplace([(re"\((\d+)\)", ""), (re"\[(\d+)\]", "**$1**")])).
-    map(wrapWords(it, maxLineWidth = 74, newLine = "\p"))).
-    join("\p")
-
-  result = (wrapped.splitLines --> map("> " & it)).join("\p") &
-    "\p>\p> -- *" & rawLines[0].strip & " (ESV)*"
-
-when isMainModule:
-  const USAGE = """Usage:
-  esv_api <reference> [options]
-
-Options:
-
-  --debug                       Log debug information.
-
-  --echo-args                   Echo back the arguments that were passed on the
-                                command line for debugging purposes.
-
-  -f, --output-format <format>  Select a specific output format. Valid values
-                                are 'raw', 'markdown', 'plain'.
-
-  -t, --esv-api-token <token>   Provide the API token on the command line. By
-                                default this will be read either from the
-                                .esv_api.cfg.json file or the ESV_API_TOKEN
-                                envionment variable.
-"""
-
-  let consoleLogger = newConsoleLogger(
-    levelThreshold=lvlInfo,
-    fmtStr="esv_api - $levelname: ")
-  logging.addHandler(consoleLogger)
-
-  try:
-    # Parse arguments
-    let args = docopt(USAGE, version = "0.2.2")
-
-    if args["--debug"]:
-      consoleLogger.levelThreshold = lvlDebug
-
-    if args["--echo-args"]: stderr.writeLine($args)
-
-    let cfgFilePath = getEnv("HOME") / ".esv_api.cfg.json"
-    var cfgFileJson = newJObject()
-    if fileExists(cfgFilePath):
-      debug "Loading config from " & cfgFilePath
-      cfgFileJson = parseFile(cfgFilePath)
-
-    let cfg = CombinedConfig(docopt: args, json: cfgFileJson)
-    let apiToken = cfg.getVal("esv-api-token")
-    let apiRoot = cfg.getVal("esv-api-root", "https://api.esv.org")
-    let reference = $args["<reference>"]
-
-    let http = newHttpClient()
-    http.headers = newHttpHeaders({"Authorization": "Token " & apiToken})
-
-    let urlPath = apiRoot & "/v3/passage/text/?q=" & encodeUrl(reference)
-    debug "requesting " & urlPath
-    let respJson = parseJson(http.getContent(urlPath))
-
-    let formattedPassages =
-      case $args["--output-format"]:
-      of "text":
-        respJson["passages"].getElems -->
-          map(it.getStr.multiReplace([(re"\[(\d+)\]", "$1")]))
-      of "raw": respJson["passages"].getElems --> map(it.getStr)
-      else:
-        respJson["passages"].getElems --> map(formatMarkdown(it.getStr))
-
-    echo formattedPassages.join("\p\p")
-
-  except CatchableError:
-    fatal getCurrentExceptionMsg()
-    debug getCurrentException().getStackTrace()
-    quit(QuitFailure)
@@ -0,0 +1,7 @@
+import ./embedded_bible
+import ./offline_data
+
+const kjvRows = embeddedTranslationData("kjv")
+
+proc fetchPassages*(reference: string): seq[string] =
+  embedded_bible.fetchPassages(kjvRows, reference, "KJV")
@@ -0,0 +1,13 @@
+import ./offline_data
+
+when hasEmbeddedTranslationData("mev"):
+  import ./embedded_bible
+
+  const mevRows = embeddedTranslationData("mev")
+
+proc fetchPassages*(reference: string): seq[string] =
+  when hasEmbeddedTranslationData("mev"):
+    embedded_bible.fetchPassages(mevRows, reference, "MEV")
+  else:
+    raise newException(ValueError,
+      "MEV data is not embedded; generate data/private/mev.tsv and rebuild")
@@ -0,0 +1,15 @@
+import std/os
+
+template translationDataPath(name: static[string], visibility: static[string]): string =
+  const dataRoot = currentSourcePath().parentDir.parentDir / "data"
+  dataRoot / visibility / (name & ".tsv")
+
+template hasEmbeddedTranslationData*(name: static[string]): bool =
+  fileExists(translationDataPath(name, "private")) or
+    fileExists(translationDataPath(name, "public"))
+
+template embeddedTranslationData*(name: static[string]): string =
+  when fileExists(translationDataPath(name, "private")):
+    staticRead(translationDataPath(name, "private"))
+  else:
+    staticRead(translationDataPath(name, "public"))
@@ -0,0 +1,64 @@
+import std/strutils
+
+import ./reference_parser
+
+type PassageQuery* = object
+  reference*: PassageReference
+  translation*: string
+
+const SupportedTranslations* = [
+  "akjv", "amp", "esv", "kjv", "mev", "niv", "nkjv"
+]
+
+proc supportedTranslationsList*(): string =
+  SupportedTranslations.join(", ")
+
+proc normalizeTranslation*(translation: string): string =
+  result = translation.strip.toLowerAscii
+
+  for supported in SupportedTranslations:
+    if result == supported:
+      return
+
+  raise newException(ValueError,
+    "unsupported translation '" & translation &
+      "'; supported translations: " & supportedTranslationsList())
+
+proc splitTrailingTranslationMarker(
+    input: string): tuple[referenceText: string, translation: string] =
+
+  let text = input.strip
+  if not text.endsWith(")"):
+    return (text, "")
+
+  let openIdx = text.rfind("(")
+  if openIdx < 0:
+    return (text, "")
+
+  let referenceText = text[0 ..< openIdx].strip
+  let translation = text[openIdx + 1 ..< text.len - 1].strip
+  if referenceText.len == 0 or translation.len == 0:
+    return (text, "")
+
+  (referenceText, translation)
+
+proc parsePassageQuery*(input, defaultTranslation: string): PassageQuery =
+  let parsed = splitTrailingTranslationMarker(input)
+  result.reference = parseReference(parsed.referenceText)
+  result.translation =
+    if parsed.translation.len > 0:
+      normalizeTranslation(parsed.translation)
+    else:
+      normalizeTranslation(defaultTranslation)
+
+proc parsePassageQueries*(input, defaultTranslation: string): seq[PassageQuery] =
+  for rawRef in input.split(';'):
+    let refText = rawRef.strip
+    if refText.len > 0:
+      result.add(parsePassageQuery(refText, defaultTranslation))
+
+  if result.len == 0:
+    raise newException(ValueError, "empty Bible reference")
+
+proc referenceText*(query: PassageQuery): string =
+  $query.reference
@@ -0,0 +1,399 @@
+import std/[strutils]
+
+type
+  BookInfo* = object
+    code*: string
+    name*: string
+    singleChapter*: bool
+
+  RefPoint* = object
+    chapter*: int
+    verse*: int
+
+  RefRange* = object
+    start*: RefPoint
+    finish*: RefPoint
+
+  PassageReference* = object
+    book*: BookInfo
+    ranges*: seq[RefRange]
+
+const CanonBooks*: array[66, BookInfo] = [
+  BookInfo(code: "GEN", name: "Genesis"),
+  BookInfo(code: "EXO", name: "Exodus"),
+  BookInfo(code: "LEV", name: "Leviticus"),
+  BookInfo(code: "NUM", name: "Numbers"),
+  BookInfo(code: "DEU", name: "Deuteronomy"),
+  BookInfo(code: "JOS", name: "Joshua"),
+  BookInfo(code: "JDG", name: "Judges"),
+  BookInfo(code: "RUT", name: "Ruth"),
+  BookInfo(code: "1SA", name: "1 Samuel"),
+  BookInfo(code: "2SA", name: "2 Samuel"),
+  BookInfo(code: "1KI", name: "1 Kings"),
+  BookInfo(code: "2KI", name: "2 Kings"),
+  BookInfo(code: "1CH", name: "1 Chronicles"),
+  BookInfo(code: "2CH", name: "2 Chronicles"),
+  BookInfo(code: "EZR", name: "Ezra"),
+  BookInfo(code: "NEH", name: "Nehemiah"),
+  BookInfo(code: "EST", name: "Esther"),
+  BookInfo(code: "JOB", name: "Job"),
+  BookInfo(code: "PSA", name: "Psalms"),
+  BookInfo(code: "PRO", name: "Proverbs"),
+  BookInfo(code: "ECC", name: "Ecclesiastes"),
+  BookInfo(code: "SNG", name: "Song of Solomon"),
+  BookInfo(code: "ISA", name: "Isaiah"),
+  BookInfo(code: "JER", name: "Jeremiah"),
+  BookInfo(code: "LAM", name: "Lamentations"),
+  BookInfo(code: "EZK", name: "Ezekiel"),
+  BookInfo(code: "DAN", name: "Daniel"),
+  BookInfo(code: "HOS", name: "Hosea"),
+  BookInfo(code: "JOL", name: "Joel"),
+  BookInfo(code: "AMO", name: "Amos"),
+  BookInfo(code: "OBA", name: "Obadiah", singleChapter: true),
+  BookInfo(code: "JON", name: "Jonah"),
+  BookInfo(code: "MIC", name: "Micah"),
+  BookInfo(code: "NAM", name: "Nahum"),
+  BookInfo(code: "HAB", name: "Habakkuk"),
+  BookInfo(code: "ZEP", name: "Zephaniah"),
+  BookInfo(code: "HAG", name: "Haggai"),
+  BookInfo(code: "ZEC", name: "Zechariah"),
+  BookInfo(code: "MAL", name: "Malachi"),
+  BookInfo(code: "MAT", name: "Matthew"),
+  BookInfo(code: "MRK", name: "Mark"),
+  BookInfo(code: "LUK", name: "Luke"),
+  BookInfo(code: "JHN", name: "John"),
+  BookInfo(code: "ACT", name: "Acts"),
+  BookInfo(code: "ROM", name: "Romans"),
+  BookInfo(code: "1CO", name: "1 Corinthians"),
+  BookInfo(code: "2CO", name: "2 Corinthians"),
+  BookInfo(code: "GAL", name: "Galatians"),
+  BookInfo(code: "EPH", name: "Ephesians"),
+  BookInfo(code: "PHP", name: "Philippians"),
+  BookInfo(code: "COL", name: "Colossians"),
+  BookInfo(code: "1TH", name: "1 Thessalonians"),
+  BookInfo(code: "2TH", name: "2 Thessalonians"),
+  BookInfo(code: "1TI", name: "1 Timothy"),
+  BookInfo(code: "2TI", name: "2 Timothy"),
+  BookInfo(code: "TIT", name: "Titus"),
+  BookInfo(code: "PHM", name: "Philemon", singleChapter: true),
+  BookInfo(code: "HEB", name: "Hebrews"),
+  BookInfo(code: "JAS", name: "James"),
+  BookInfo(code: "1PE", name: "1 Peter"),
+  BookInfo(code: "2PE", name: "2 Peter"),
+  BookInfo(code: "1JN", name: "1 John"),
+  BookInfo(code: "2JN", name: "2 John", singleChapter: true),
+  BookInfo(code: "3JN", name: "3 John", singleChapter: true),
+  BookInfo(code: "JUD", name: "Jude", singleChapter: true),
+  BookInfo(code: "REV", name: "Revelation")
+]
+
+const bookAliases = [
+  ("GEN", "genesis"), ("GEN", "gen"),
+  ("EXO", "exodus"), ("EXO", "exod"), ("EXO", "exo"),
+  ("LEV", "leviticus"), ("LEV", "lev"),
+  ("NUM", "numbers"), ("NUM", "num"), ("NUM", "numb"),
+  ("DEU", "deuteronomy"), ("DEU", "deut"), ("DEU", "deu"),
+  ("JOS", "joshua"), ("JOS", "josh"), ("JOS", "jos"),
+  ("JDG", "judges"), ("JDG", "judg"), ("JDG", "jdg"),
+  ("RUT", "ruth"), ("RUT", "rut"),
+  ("1SA", "1 samuel"), ("1SA", "1 sam"), ("1SA", "i samuel"), ("1SA", "first samuel"),
+  ("2SA", "2 samuel"), ("2SA", "2 sam"), ("2SA", "ii samuel"), ("2SA", "second samuel"),
+  ("1KI", "1 kings"), ("1KI", "1 kgs"), ("1KI", "1 kin"), ("1KI", "i kings"), ("1KI", "first kings"),
+  ("2KI", "2 kings"), ("2KI", "2 kgs"), ("2KI", "2 kin"), ("2KI", "ii kings"), ("2KI", "second kings"),
+  ("1CH", "1 chronicles"), ("1CH", "1 chron"), ("1CH", "1 chr"), ("1CH", "i chronicles"), ("1CH", "first chronicles"),
+  ("2CH", "2 chronicles"), ("2CH", "2 chron"), ("2CH", "2 chr"), ("2CH", "ii chronicles"), ("2CH", "second chronicles"),
+  ("EZR", "ezra"), ("EZR", "ezr"),
+  ("NEH", "nehemiah"), ("NEH", "neh"),
+  ("EST", "esther"), ("EST", "est"),
+  ("JOB", "job"),
+  ("PSA", "psalms"), ("PSA", "psalm"), ("PSA", "ps"), ("PSA", "psa"),
+  ("PRO", "proverbs"), ("PRO", "prov"), ("PRO", "pro"),
+  ("ECC", "ecclesiastes"), ("ECC", "eccl"), ("ECC", "ecc"),
+  ("SNG", "song of solomon"), ("SNG", "song"), ("SNG", "songs"), ("SNG", "canticles"), ("SNG", "sng"),
+  ("ISA", "isaiah"), ("ISA", "isa"),
+  ("JER", "jeremiah"), ("JER", "jer"),
+  ("LAM", "lamentations"), ("LAM", "lam"),
+  ("EZK", "ezekiel"), ("EZK", "ezek"), ("EZK", "ezk"),
+  ("DAN", "daniel"), ("DAN", "dan"),
+  ("HOS", "hosea"), ("HOS", "hos"),
+  ("JOL", "joel"), ("JOL", "jol"),
+  ("AMO", "amos"), ("AMO", "amo"),
+  ("OBA", "obadiah"), ("OBA", "obad"), ("OBA", "oba"),
+  ("JON", "jonah"), ("JON", "jon"),
+  ("MIC", "micah"), ("MIC", "mic"),
+  ("NAM", "nahum"), ("NAM", "nah"),
+  ("HAB", "habakkuk"), ("HAB", "hab"),
+  ("ZEP", "zephaniah"), ("ZEP", "zeph"), ("ZEP", "zep"),
+  ("HAG", "haggai"), ("HAG", "hag"),
+  ("ZEC", "zechariah"), ("ZEC", "zech"), ("ZEC", "zec"),
+  ("MAL", "malachi"), ("MAL", "mal"),
+  ("MAT", "matthew"), ("MAT", "matt"), ("MAT", "mat"), ("MAT", "mt"),
+  ("MRK", "mark"), ("MRK", "mrk"), ("MRK", "mk"),
+  ("LUK", "luke"), ("LUK", "luk"), ("LUK", "lk"),
+  ("JHN", "john"), ("JHN", "jhn"), ("JHN", "jn"),
+  ("ACT", "acts"), ("ACT", "act"),
+  ("ROM", "romans"), ("ROM", "rom"),
+  ("1CO", "1 corinthians"), ("1CO", "1 cor"), ("1CO", "1 co"), ("1CO", "i corinthians"), ("1CO", "first corinthians"),
+  ("2CO", "2 corinthians"), ("2CO", "2 cor"), ("2CO", "2 co"), ("2CO", "ii corinthians"), ("2CO", "second corinthians"),
+  ("GAL", "galatians"), ("GAL", "gal"),
+  ("EPH", "ephesians"), ("EPH", "eph"),
+  ("PHP", "philippians"), ("PHP", "php"),
+  ("COL", "colossians"), ("COL", "col"),
+  ("1TH", "1 thessalonians"), ("1TH", "1 thess"), ("1TH", "1 thes"), ("1TH", "i thessalonians"), ("1TH", "first thessalonians"),
+  ("2TH", "2 thessalonians"), ("2TH", "2 thess"), ("2TH", "2 thes"), ("2TH", "ii thessalonians"), ("2TH", "second thessalonians"),
+  ("1TI", "1 timothy"), ("1TI", "1 tim"), ("1TI", "i timothy"), ("1TI", "first timothy"),
+  ("2TI", "2 timothy"), ("2TI", "2 tim"), ("2TI", "ii timothy"), ("2TI", "second timothy"),
+  ("TIT", "titus"), ("TIT", "tit"),
+  ("PHM", "philemon"), ("PHM", "philem"), ("PHM", "phm"),
+  ("HEB", "hebrews"), ("HEB", "heb"),
+  ("JAS", "james"), ("JAS", "jas"), ("JAS", "jam"),
+  ("1PE", "1 peter"), ("1PE", "1 pet"), ("1PE", "1 pe"), ("1PE", "i peter"), ("1PE", "first peter"),
+  ("2PE", "2 peter"), ("2PE", "2 pet"), ("2PE", "2 pe"), ("2PE", "ii peter"), ("2PE", "second peter"),
+  ("1JN", "1 john"), ("1JN", "1 jn"), ("1JN", "1 jhn"), ("1JN", "i john"), ("1JN", "first john"),
+  ("2JN", "2 john"), ("2JN", "2 jn"), ("2JN", "2 jhn"), ("2JN", "ii john"), ("2JN", "second john"),
+  ("3JN", "3 john"), ("3JN", "3 jn"), ("3JN", "3 jhn"), ("3JN", "iii john"), ("3JN", "third john"),
+  ("JUD", "jude"), ("JUD", "jud"),
+  ("REV", "revelation"), ("REV", "revelations"), ("REV", "rev"), ("REV", "apocalypse")
+]
+
+proc bookInfo*(code: string): BookInfo =
+  for book in CanonBooks:
+    if book.code == code:
+      return book
+
+  raise newException(ValueError, "unknown Bible book code '" & code & "'")
+
+proc bookIndex*(code: string): int =
+  for idx, book in CanonBooks:
+    if book.code == code:
+      return idx
+
+  raise newException(ValueError, "unknown Bible book code '" & code & "'")
+
+proc normalizeReferenceInput(s: string): string =
+  s.multiReplace([
+    ("–", "-"),
+    ("—", "-"),
+    ("−", "-")
+  ]).strip
+
+proc normalizeBookPrefix(s: string): string =
+  for ch in s:
+    if ch.isAlphaAscii:
+      result.add(ch.toLowerAscii)
+    elif ch.isDigit:
+      result.add(ch)
+
+proc canonicalNamePrefixMatches(prefix: string): seq[BookInfo] =
+  for book in CanonBooks:
+    if normalizeBookPrefix(book.name).startsWith(prefix):
+      result.add(book)
+
+proc formatBookList(books: seq[BookInfo]): string =
+  var names: seq[string] = @[]
+  for book in books:
+    names.add(book.name)
+  names.join(", ")
+
+proc matchCanonicalBookPrefix(input: string): tuple[
+    matched: bool,
+    ambiguous: bool,
+    book: BookInfo,
+    consumed: int,
+    prefix: string,
+    matches: seq[BookInfo]] =
+
+  for idx in 1 .. input.len:
+    if idx < input.len and input[idx].isAlphaAscii:
+      continue
+
+    let prefix = normalizeBookPrefix(input[0 ..< idx])
+    if prefix.len == 0:
+      continue
+
+    let matches = canonicalNamePrefixMatches(prefix)
+    if matches.len == 1:
+      result.matched = true
+      result.ambiguous = false
+      result.book = matches[0]
+      result.consumed = idx
+      result.prefix = input[0 ..< idx].strip
+      result.matches = matches
+    elif matches.len > 1 and not result.matched:
+      result.ambiguous = true
+      result.consumed = idx
+      result.prefix = input[0 ..< idx].strip
+      result.matches = matches
+
+proc matchAlias(input, alias: string): int =
+  var i = 0
+  var j = 0
+
+  while j < alias.len:
+    let aliasCh = alias[j]
+    if aliasCh.isSpaceAscii or aliasCh == '.':
+      while i < input.len and (input[i].isSpaceAscii or input[i] == '.'):
+        inc i
+      inc j
+    else:
+      while i < input.len and input[i] == '.':
+        inc i
+
+      if i >= input.len or input[i].toLowerAscii != aliasCh.toLowerAscii:
+        return -1
+
+      inc i
+      inc j
+
+  while i < input.len and input[i] == '.':
+    inc i
+
+  if i < input.len and input[i].isAlphaAscii:
+    return -1
+
+  i
+
+proc parseBook(input: string): tuple[book: BookInfo, rest: string] =
+  let canonicalPrefix = matchCanonicalBookPrefix(input)
+  if canonicalPrefix.matched:
+    result.book = canonicalPrefix.book
+    result.rest = input[canonicalPrefix.consumed .. ^1].strip
+    return
+
+  var bestCode = ""
+  var bestLen = -1
+
+  for row in bookAliases:
+    let consumed = matchAlias(input, row[1])
+    if consumed > bestLen:
+      bestCode = row[0]
+      bestLen = consumed
+
+  if bestLen < 0:
+    if canonicalPrefix.ambiguous:
+      raise newException(ValueError,
+        "ambiguous Bible book prefix '" & canonicalPrefix.prefix & "' in '" &
+          input & "'; matches " & canonicalPrefix.matches.formatBookList)
+
+    raise newException(ValueError, "could not parse Bible book in '" & input & "'")
+
+  result.book = bookInfo(bestCode)
+  result.rest = input[bestLen .. ^1].strip
+
+proc parsePositiveInt(s, label: string): int =
+  if s.len == 0 or not s.allCharsInSet({'0'..'9'}):
+    raise newException(ValueError, "invalid " & label & " '" & s & "'")
+
+  result = parseInt(s)
+  if result <= 0:
+    raise newException(ValueError, label & " must be positive")
+
+proc parsePoint(token: string, defaultChapter: int, singleChapter: bool): RefPoint =
+  let normalized = token.strip
+  if normalized.len == 0:
+    raise newException(ValueError, "empty reference point")
+
+  let colonIdx = normalized.find(':')
+  if colonIdx >= 0:
+    return RefPoint(
+      chapter: parsePositiveInt(normalized[0 ..< colonIdx], "chapter"),
+      verse: parsePositiveInt(normalized[colonIdx + 1 .. ^1], "verse"))
+
+  let value = parsePositiveInt(normalized, "reference number")
+  if singleChapter:
+    RefPoint(chapter: 1, verse: value)
+  elif defaultChapter > 0:
+    RefPoint(chapter: defaultChapter, verse: value)
+  else:
+    RefPoint(chapter: value, verse: 0)
+
+proc parseRange(segment: string, defaultChapter: int, singleChapter: bool): RefRange =
+  let normalized = segment.strip
+  let dashIdx = normalized.find('-')
+
+  if dashIdx >= 0:
+    result.start = parsePoint(normalized[0 ..< dashIdx], defaultChapter, singleChapter)
+    let endDefaultChapter =
+      if result.start.verse > 0: result.start.chapter
+      else: 0
+    result.finish = parsePoint(normalized[dashIdx + 1 .. ^1], endDefaultChapter, singleChapter)
+  else:
+    result.start = parsePoint(normalized, defaultChapter, singleChapter)
+    result.finish = result.start
+
+  if result.finish.chapter < result.start.chapter:
+    raise newException(ValueError, "range ends before it starts: '" & segment & "'")
+
+  if result.finish.chapter == result.start.chapter and
+      result.start.verse > 0 and
+      result.finish.verse > 0 and
+      result.finish.verse < result.start.verse:
+    raise newException(ValueError, "range ends before it starts: '" & segment & "'")
+
+proc parsePassageSpec(spec: string, book: BookInfo): seq[RefRange] =
+  var currentChapter = 0
+
+  for rawSegment in spec.split(','):
+    let segment = rawSegment.strip
+    if segment.len == 0:
+      raise newException(ValueError, "empty passage range in '" & spec & "'")
+
+    let range = parseRange(segment, currentChapter, book.singleChapter)
+    result.add(range)
+
+    if segment.contains(':') or (range.start.verse > 0 and range.finish.verse > 0):
+      currentChapter = range.start.chapter
+    else:
+      currentChapter = 0
+
+proc parseReference*(input: string): PassageReference =
+  let normalized = normalizeReferenceInput(input)
+  let parsedBook = parseBook(normalized)
+
+  result.book = parsedBook.book
+  if parsedBook.rest.len > 0:
+    result.ranges = parsePassageSpec(parsedBook.rest, result.book)
+
+proc parseReferences*(input: string): seq[PassageReference] =
+  for rawRef in input.split(';'):
+    let refText = rawRef.strip
+    if refText.len > 0:
+      result.add(parseReference(refText))
+
+  if result.len == 0:
+    raise newException(ValueError, "empty Bible reference")
+
+proc `$`*(point: RefPoint): string =
+  if point.verse > 0: $point.chapter & ":" & $point.verse
+  else: $point.chapter
+
+proc `$`*(range: RefRange): string =
+  if range.start == range.finish:
+    return $range.start
+
+  if range.start.chapter == range.finish.chapter and
+      range.start.verse > 0 and
+      range.finish.verse > 0:
+    return $range.start.chapter & ":" & $range.start.verse & "-" & $range.finish.verse
+
+  $range.start & "-" & $range.finish
+
+proc formatSingleChapterRange(range: RefRange): string =
+  if range.start == range.finish:
+    return $range.start.verse
+
+  if range.start.chapter == range.finish.chapter:
+    return $range.start.verse & "-" & $range.finish.verse
+
+  $range.start & "-" & $range.finish
+
+proc `$`*(reference: PassageReference): string =
+  result = reference.book.name
+  if reference.ranges.len > 0:
+    var rangeText: seq[string] = @[]
+    for range in reference.ranges:
+      if reference.book.singleChapter:
+        rangeText.add(formatSingleChapterRange(range))
+      else:
+        rangeText.add($range)
+    result.add(" " & rangeText.join(", "))
@@ -0,0 +1,84 @@
+import std/[strutils, unittest]
+
+import ../src/kjv
+import ../src/reference_parser
+
+suite "reference parser":
+  test "parses single verse references":
+    let reference = parseReference("John 3:16")
+
+    check reference.book.code == "JHN"
+    check reference.ranges.len == 1
+    check reference.ranges[0].start.chapter == 3
+    check reference.ranges[0].start.verse == 16
+    check reference.ranges[0].finish == reference.ranges[0].start
+    check $reference == "John 3:16"
+
+  test "parses verse lists using the previous chapter":
+    let reference = parseReference("John 3:16,20-21")
+
+    check reference.ranges.len == 2
+    check reference.ranges[1].start.chapter == 3
+    check reference.ranges[1].start.verse == 20
+    check reference.ranges[1].finish.chapter == 3
+    check reference.ranges[1].finish.verse == 21
+    check $reference == "John 3:16, 3:20-21"
+
+  test "parses chapter ranges":
+    let reference = parseReference("John 3-4")
+
+    check reference.ranges.len == 1
+    check reference.ranges[0].start.chapter == 3
+    check reference.ranges[0].start.verse == 0
+    check reference.ranges[0].finish.chapter == 4
+    check reference.ranges[0].finish.verse == 0
+    check $reference == "John 3-4"
+
+  test "parses abbreviated numbered books":
+    let reference = parseReference("1 Jn 1:9")
+
+    check reference.book.code == "1JN"
+    check reference.ranges[0].start.chapter == 1
+    check reference.ranges[0].start.verse == 9
+    check $reference == "1 John 1:9"
+
+  test "parses unique canonical book prefixes":
+    check parseReference("Gene 1:1").book.code == "GEN"
+    check parseReference("Phile 3").book.code == "PHM"
+    check parseReference("Phili 1:6").book.code == "PHP"
+
+  test "rejects ambiguous canonical book prefixes":
+    expect ValueError:
+      discard parseReference("Phil 1")
+
+  test "normalizes single-chapter book references":
+    let reference = parseReference("Jude 3-4")
+
+    check reference.book.code == "JUD"
+    check reference.ranges[0].start.chapter == 1
+    check reference.ranges[0].start.verse == 3
+    check reference.ranges[0].finish.chapter == 1
+    check reference.ranges[0].finish.verse == 4
+    check $reference == "Jude 3-4"
+
+  test "parses semicolon-separated references":
+    let references = parseReferences("Psalm 23; John 3:16")
+
+    check references.len == 2
+    check references[0].book.code == "PSA"
+    check references[1].book.code == "JHN"
+
+suite "offline KJV backend":
+  test "fetches a single embedded verse":
+    let passages = kjv.fetchPassages("John 3:16")
+
+    check passages.len == 1
+    check passages[0].startsWith("John 3:16\n")
+    check passages[0].contains("  [16] ")
+
+  test "fetches a single-chapter embedded verse":
+    let passages = kjv.fetchPassages("Jude 3")
+
+    check passages.len == 1
+    check passages[0].startsWith("Jude 3\n")
+    check passages[0].contains("  [3] ")
@@ -0,0 +1,44 @@
+import std/unittest
+
+import ../src/passage_query
+
+suite "passage query parser":
+  test "uses the default translation when no marker is present":
+    let queries = parsePassageQueries("John 3:16", "kjv")
+
+    check queries.len == 1
+    check queries[0].referenceText == "John 3:16"
+    check queries[0].translation == "kjv"
+
+  test "uses a trailing translation marker":
+    let queries = parsePassageQueries("2 John 5 (KJV)", "esv")
+
+    check queries.len == 1
+    check queries[0].referenceText == "2 John 5"
+    check queries[0].translation == "kjv"
+
+  test "parses mixed translation queries":
+    let queries = parsePassageQueries("2 John 5 (KJV); 2 John 5 (ESV)", "mev")
+
+    check queries.len == 2
+    check queries[0].referenceText == "2 John 5"
+    check queries[0].translation == "kjv"
+    check queries[1].referenceText == "2 John 5"
+    check queries[1].translation == "esv"
+
+  test "uses the default translation per unmarked reference":
+    let queries = parsePassageQueries("John 3:16; Psalm 23 (MEV)", "nkjv")
+
+    check queries.len == 2
+    check queries[0].referenceText == "John 3:16"
+    check queries[0].translation == "nkjv"
+    check queries[1].referenceText == "Psalms 23"
+    check queries[1].translation == "mev"
+
+  test "rejects unknown translation markers":
+    expect ValueError:
+      discard parsePassageQueries("John 3:16 (XYZ)", "esv")
+
+  test "rejects unknown default translations":
+    expect ValueError:
+      discard parsePassageQueries("John 3:16", "xyz")
@@ -0,0 +1,144 @@
+import std/[os, strutils, tables]
+
+# Source archive: https://ebible.org/Scriptures/eng-kjv_usfm.zip
+
+const canonBookCodes = [
+  "GEN", "EXO", "LEV", "NUM", "DEU", "JOS", "JDG", "RUT",
+  "1SA", "2SA", "1KI", "2KI", "1CH", "2CH", "EZR", "NEH",
+  "EST", "JOB", "PSA", "PRO", "ECC", "SNG", "ISA", "JER",
+  "LAM", "EZK", "DAN", "HOS", "JOL", "AMO", "OBA", "JON",
+  "MIC", "NAM", "HAB", "ZEP", "HAG", "ZEC", "MAL", "MAT",
+  "MRK", "LUK", "JHN", "ACT", "ROM", "1CO", "2CO", "GAL",
+  "EPH", "PHP", "COL", "1TH", "2TH", "1TI", "2TI", "TIT",
+  "PHM", "HEB", "JAS", "1PE", "2PE", "1JN", "2JN", "3JN",
+  "JUD", "REV"
+]
+
+proc normalizeWhitespace(s: string): string =
+  var lastWasSpace = false
+  for ch in s:
+    if ch.isSpaceAscii:
+      if not lastWasSpace:
+        result.add(' ')
+      lastWasSpace = true
+    else:
+      result.add(ch)
+      lastWasSpace = false
+  result = result.strip
+
+proc removeFootnotes(s: string): string =
+  var i = 0
+  while i < s.len:
+    if s.continuesWith("\\f ", i) or s.continuesWith("\\f +", i):
+      let closeIdx = s.find("\\f*", i + 2)
+      if closeIdx < 0:
+        break
+      i = closeIdx + 3
+    else:
+      result.add(s[i])
+      inc i
+
+proc stripUsfmMarkup(s: string): string =
+  let withoutFootnotes = removeFootnotes(s)
+  var i = 0
+
+  while i < withoutFootnotes.len:
+    case withoutFootnotes[i]
+    of '\\':
+      inc i
+      if i < withoutFootnotes.len and withoutFootnotes[i] == '+':
+        inc i
+
+      while i < withoutFootnotes.len and
+          (withoutFootnotes[i].isAlphaAscii or
+           withoutFootnotes[i].isDigit or
+           withoutFootnotes[i] == '-'):
+        inc i
+
+      let isClosingMarker = i < withoutFootnotes.len and withoutFootnotes[i] == '*'
+      if isClosingMarker:
+        inc i
+
+      while not isClosingMarker and
+          i < withoutFootnotes.len and
+          withoutFootnotes[i].isSpaceAscii:
+        inc i
+    of '|':
+      while i < withoutFootnotes.len and withoutFootnotes[i] != '\\':
+        inc i
+    of '\t':
+      result.add(' ')
+      inc i
+    else:
+      result.add(withoutFootnotes[i])
+      inc i
+
+  result = normalizeWhitespace(result)
+
+proc parseVerseLine(line: string): tuple[verse: int, text: string] =
+  var rest = line[3..^1].strip
+  let numberEnd = rest.find(' ')
+  if numberEnd < 0:
+    raise newException(ValueError, "verse marker without text: " & line)
+
+  result.verse = parseInt(rest[0 ..< numberEnd])
+  result.text = stripUsfmMarkup(rest[numberEnd + 1 .. ^1])
+
+proc findCanonFiles(inputDir: string): Table[string, string] =
+  for path in walkFiles(inputDir / "*eng-kjv.usfm"):
+    let name = path.extractFilename
+    let dashIdx = name.find('-')
+    let suffixIdx = name.find("eng-kjv.usfm")
+    if dashIdx >= 0 and suffixIdx > dashIdx:
+      let code = name[dashIdx + 1 ..< suffixIdx]
+      if canonBookCodes.contains(code):
+        result[code] = path
+
+proc generate(inputDir, outputPath: string) =
+  let canonFiles = findCanonFiles(inputDir)
+  var rows: seq[string] = @[]
+
+  for code in canonBookCodes:
+    if not canonFiles.hasKey(code):
+      raise newException(ValueError, "missing USFM file for " & code)
+
+    var chapter = 0
+    var verse = 0
+    var verseText = ""
+
+    proc flushVerse() =
+      if chapter > 0 and verse > 0:
+        let text = normalizeWhitespace(verseText).replace("\t", " ")
+        if text.len > 0:
+          rows.add([code, $chapter, $verse, text].join("\t"))
+      verse = 0
+      verseText = ""
+
+    for rawLine in canonFiles[code].lines:
+      let line = rawLine.strip
+
+      if line.startsWith("\\c "):
+        flushVerse()
+        chapter = parseInt(line[3..^1].strip)
+      elif line.startsWith("\\v "):
+        flushVerse()
+        let parsed = parseVerseLine(line)
+        verse = parsed.verse
+        verseText = parsed.text
+      elif verse > 0:
+        let continued = stripUsfmMarkup(line)
+        if continued.len > 0:
+          if verseText.len > 0:
+            verseText.add(' ')
+          verseText.add(continued)
+
+    flushVerse()
+
+  createDir(outputPath.parentDir)
+  writeFile(outputPath, rows.join("\n") & "\n")
+
+when isMainModule:
+  if paramCount() != 2:
+    quit("Usage: generate_kjv_data <usfm-dir> <output-tsv>", QuitFailure)
+
+  generate(paramStr(1), paramStr(2))
@@ -0,0 +1,330 @@
+import std/[
+  htmlparser,
+  os,
+  osproc,
+  streams,
+  strutils,
+  xmlparser,
+  xmltree
+]
+
+import ../src/reference_parser
+
+type
+  TocEntry = object
+    label: string
+    code: string
+    fileIndex: int
+
+  BookSource = object
+    code: string
+    startIndex: int
+    endIndex: int
+
+  ParseState = object
+    code: string
+    chapter: int
+    verse: int
+    verseText: string
+    rows: seq[string]
+
+proc normalizeWhitespace(s: string): string =
+  var lastWasSpace = false
+  for ch in s.replace("\xC2\xA0", " "):
+    if ch.isSpaceAscii:
+      if not lastWasSpace:
+        result.add(' ')
+      lastWasSpace = true
+    else:
+      result.add(ch)
+      lastWasSpace = false
+  result = result.strip
+
+proc markerText(s: string): string =
+  normalizeWhitespace(s).replace(" ", "")
+
+proc numberAfterPrefix(s, prefix: string): int =
+  let text = normalizeWhitespace(s).toUpperAscii
+  if not text.startsWith(prefix):
+    return 0
+
+  var digits = ""
+  for ch in text[prefix.len .. ^1].strip:
+    if ch.isDigit:
+      digits.add(ch)
+    elif digits.len > 0:
+      break
+    elif not ch.isSpaceAscii:
+      break
+
+  if digits.len > 0:
+    result = parseInt(digits)
+
+proc isPositiveIntText(s: string): bool =
+  let text = markerText(s)
+  text.len > 0 and text.allCharsInSet({'0'..'9'}) and parseInt(text) > 0
+
+proc readEpubEntry(epubPath, entryPath: string): string =
+  let process = startProcess(
+    "unzip",
+    args = ["-p", epubPath, entryPath],
+    options = {poUsePath, poStdErrToStdOut})
+  result = process.outputStream.readAll()
+  let exitCode = process.waitForExit()
+  process.close()
+
+  if exitCode != 0:
+    raise newException(IOError,
+      "could not read " & entryPath & " from " & epubPath & ": " & result)
+
+proc textContent(node: XmlNode): string =
+  case node.kind
+  of xnText:
+    result = node.text
+  of xnElement:
+    for child in node.items:
+      result.add(textContent(child))
+  else:
+    discard
+
+proc firstDescendant(node: XmlNode, tag: string): XmlNode =
+  if node.kind == xnElement:
+    if node.tag == tag:
+      return node
+
+    for child in node.items:
+      let found = firstDescendant(child, tag)
+      if not found.isNil:
+        return found
+
+proc descendantText(node: XmlNode, tag: string): string =
+  let found = firstDescendant(node, tag)
+  if found.isNil: ""
+  else: normalizeWhitespace(textContent(found))
+
+proc descendantAttr(node: XmlNode, tag, attrName: string): string =
+  let found = firstDescendant(node, tag)
+  if found.isNil: ""
+  else: found.attr(attrName)
+
+proc bookCodeForLabel(label: string): string =
+  let bookName = label.split("(", maxsplit = 1)[0].strip
+  if bookName == "Solomon":
+    return "SNG"
+
+  for book in CanonBooks:
+    if book.name == bookName:
+      return book.code
+
+proc indexFromSplitFile(path: string): int =
+  let filename = path.split('#', maxsplit = 1)[0].extractFilename
+  if not filename.startsWith("index_split_") or not filename.endsWith(".html"):
+    return 0
+
+  parseInt(filename["index_split_".len ..< filename.len - ".html".len])
+
+proc parseTocEntries(epubPath: string): seq[TocEntry] =
+  let toc = parseXml(newStringStream(readEpubEntry(epubPath, "toc.ncx")))
+  var entries: seq[TocEntry] = @[]
+
+  proc walk(node: XmlNode) =
+    if node.kind == xnElement and node.tag == "navPoint":
+      let label = node.descendantText("text")
+      let src = node.descendantAttr("content", "src")
+      let fileIndex = indexFromSplitFile(src)
+      if fileIndex > 0:
+        entries.add(TocEntry(
+          label: label,
+          code: bookCodeForLabel(label),
+          fileIndex: fileIndex))
+
+    if node.kind == xnElement:
+      for child in node.items:
+        walk(child)
+
+  walk(toc)
+  entries
+
+proc bookSources(entries: seq[TocEntry]): seq[BookSource] =
+  for idx, entry in entries:
+    if entry.code.len == 0:
+      continue
+
+    let endIndex =
+      if idx + 1 < entries.len:
+        entries[idx + 1].fileIndex - 1
+      else:
+        entry.fileIndex
+
+    result.add(BookSource(
+      code: entry.code,
+      startIndex: entry.fileIndex,
+      endIndex: endIndex))
+
+  if result.len != CanonBooks.len:
+    raise newException(ValueError,
+      "expected " & $CanonBooks.len & " canonical books in EPUB TOC, found " &
+        $result.len)
+
+  for idx, book in CanonBooks:
+    if result[idx].code != book.code:
+      raise newException(ValueError,
+        "expected " & book.code & " at position " & $idx & ", found " &
+          result[idx].code)
+
+proc hasClass(node: XmlNode, className: string): bool =
+  if node.kind != xnElement:
+    return false
+
+  for value in node.attr("class").splitWhitespace:
+    if value == className:
+      return true
+
+proc shouldSkipElement(node: XmlNode): bool =
+  node.hasClass("calibre_29") or # section headings
+    node.hasClass("calibre_6") or # parallel/cross-reference paragraphs
+    node.hasClass("calibre_26")   # Psalm superscriptions/cross-references
+
+proc hasHref(node: XmlNode): bool =
+  if node.kind == xnElement:
+    if node.attr("href").len > 0:
+      return true
+
+    for child in node.items:
+      if hasHref(child):
+        return true
+
+proc isBlockElement(node: XmlNode): bool =
+  node.kind == xnElement and
+    node.tag in ["blockquote", "br", "div", "h1", "h2", "h3", "li", "p"]
+
+proc chapterMarker(node: XmlNode): int =
+  if node.kind == xnElement and node.tag == "span" and node.hasClass("calibre1"):
+    let text = markerText(textContent(node))
+    if text.isPositiveIntText:
+      return parseInt(text)
+
+proc headingChapterMarker(node: XmlNode, code: string): int =
+  if node.kind != xnElement or node.tag != "p":
+    return 0
+
+  let text = textContent(node)
+  result = numberAfterPrefix(text, "CHAPTER ")
+  if result > 0:
+    return
+
+  if code == "PSA":
+    result = numberAfterPrefix(text, "PSALM ")
+
+proc verseMarker(node: XmlNode): int =
+  if node.kind == xnElement and node.tag == "sup" and not node.hasHref:
+    let text = markerText(textContent(node))
+    if text.isPositiveIntText:
+      return parseInt(text)
+
+proc leadingVerseText(s: string): tuple[verse: int, rest: string] =
+  let text = s.replace("\xC2\xA0", " ")
+  var idx = 0
+  while idx < text.len and text[idx].isSpaceAscii:
+    inc idx
+
+  let digitStart = idx
+  while idx < text.len and text[idx].isDigit:
+    inc idx
+
+  if idx == digitStart:
+    return
+
+  let numberText = text[digitStart ..< idx]
+  while idx < text.len and text[idx].isSpaceAscii:
+    inc idx
+
+  result.verse = parseInt(numberText)
+  if idx < text.len:
+    result.rest = text[idx .. ^1]
+
+proc flushVerse(state: var ParseState) =
+  if state.chapter > 0 and state.verse > 0:
+    let text = normalizeWhitespace(state.verseText).replace("\t", " ")
+    if text.len > 0:
+      state.rows.add([state.code, $state.chapter, $state.verse, text].join("\t"))
+
+  state.verseText = ""
+
+proc walkPassageText(node: XmlNode, state: var ParseState) =
+  case node.kind
+  of xnText:
+    if state.chapter > 0:
+      if state.verse == 0:
+        let leading = leadingVerseText(node.text)
+        if leading.verse > 0:
+          state.verse = leading.verse
+          state.verseText.add(leading.rest)
+      elif state.verse > 0:
+        state.verseText.add(node.text)
+  of xnElement:
+    let headingChapter = headingChapterMarker(node, state.code)
+    if headingChapter > 0:
+      state.flushVerse()
+      state.chapter = headingChapter
+      state.verse = 0
+      return
+
+    if node.shouldSkipElement:
+      return
+
+    let chapter = chapterMarker(node)
+    if chapter > 0:
+      state.flushVerse()
+      state.chapter = chapter
+      state.verse = 1
+      return
+
+    let verse = verseMarker(node)
+    if verse > 0:
+      state.flushVerse()
+      state.verse = verse
+      return
+
+    if node.tag == "sup":
+      return
+
+    for child in node.items:
+      walkPassageText(child, state)
+
+    if node.isBlockElement and state.chapter > 0 and state.verse > 0:
+      state.verseText.add(' ')
+  else:
+    discard
+
+proc indexSplitFile(index: int): string =
+  "index_split_" & align($index, 3, '0') & ".html"
+
+proc parseBook(epubPath: string, source: BookSource): seq[string] =
+  var state = ParseState(code: source.code)
+  if bookInfo(source.code).singleChapter:
+    state.chapter = 1
+
+  for index in source.startIndex .. source.endIndex:
+    let html = readEpubEntry(epubPath, indexSplitFile(index))
+    let doc = parseHtml(newStringStream(html))
+    walkPassageText(doc, state)
+
+  state.flushVerse()
+  state.rows
+
+proc generate(epubPath, outputPath: string) =
+  let sources = bookSources(parseTocEntries(epubPath))
+  var rows: seq[string] = @[]
+
+  for source in sources:
+    rows.add(parseBook(epubPath, source))
+
+  createDir(outputPath.parentDir)
+  writeFile(outputPath, rows.join("\n") & "\n")
+
+when isMainModule:
+  if paramCount() != 2:
+    quit("Usage: generate_mev_data <mev-epub> <output-tsv>", QuitFailure)
+
+  generate(paramStr(1), paramStr(2))
Author	SHA1	Message	Date
jdb	dbc39480f7	Prepare 1.1.0 release	2026-06-29 08:16:27 -05:00
jdb	176fa46816	Add translation-aware passage queries	2026-06-29 08:16:27 -05:00
jdb	42d2587704	Add private MEV embedded support	2026-06-29 08:16:27 -05:00
jdb	544062902b	Add embedded KJV support	2026-06-29 08:16:27 -05:00
jdb	2d78d8e5c0	Update .nimble file for nimble v0.22+	2026-06-23 22:19:06 -05:00
jdb	da6ba66e1c	Prepare 1.0.0 release	2026-06-13 07:15:50 -05:00
jdb	17f953882f	Add API.Bible translation support	2026-06-13 07:12:14 -05:00
jdb	ffe3118ddf	Migrate tool versions to mise	2026-06-13 06:44:42 -05:00
jdb	49594610dc	Rename CLI to bibleref	2026-06-13 06:42:33 -05:00
jdb	8f83c07693	Pin to Nim 1.6 with asdf.	2024-08-05 08:01:55 -05:00
jdb	8422199d7b	Add reading format, rework plain format.	2023-09-17 06:57:52 -05:00
jdb	a0c17bcad9	Rework Markdown formatting to handle quoted passages and poem structure.	2023-06-25 18:47:57 -05:00