Prepare 1.1.0 release

Add translation-aware passage queries
Add private MEV embedded support
2026-06-29 08:16:27 -05:00 · 2026-06-29 08:16:27 -05:00 · 2026-06-29 08:16:27 -05:00 · 2026-06-29 08:16:27 -05:00 · 2026-06-23 22:19:06 -05:00 · 2026-06-13 07:15:50 -05:00
19 changed files with 32643 additions and 103 deletions
@@ -1,2 +1,6 @@
 esv_api
 bibleref
 tests/test_offline_kjv
 tests/test_passage_query
 data/private/
 *.sw?
@@ -0,0 +1,19 @@
 # Package
 version       = "1.1.0"
 author        = "Jonathan Bernard"
 description   = "Simple Nim CLI for retrieving Biblical passages"
 license       = "MIT"
 srcDir        = "src"
 bin           = @["bibleref"]
 # Dependencies
 requires "nim >= 1.6.10"
 requires "docopt"
 requires "nimquery"
 requires "zero_functional"
 # dependencies from git.jdb-software.com/jdb/nim-packages.git
 requires "cliutils"
@@ -1,17 +0,0 @@
 # Package
 version       = "0.2.1"
 author        = "Jonathan Bernard"
 description   = "Simple Nim CLI wrapper around the ESV API (api.esv.org)"
 license       = "MIT"
 srcDir        = "src"
 bin           = @["esv_api"]
 # Dependencies
 requires "nim >= 1.6.10"
 requires @["docopt", "zero_functional"]
 # dependencies from git.jdb-software.com/jdb/nim-packages.git
 requires @["cliutils"]
@@ -0,0 +1,2 @@
 [tools]
 nim = "latest"
@@ -0,0 +1,103 @@
 import std/[httpclient, json, logging, strutils, uri]
 const apiBibleRoot* = "https://rest.api.bible/v1"
 proc configBibleIdKey(translation: string): string =
  let normalizedTranslation = translation.toLowerAscii
  "apiBible" & normalizedTranslation[0].toUpperAscii &
    normalizedTranslation[1..^1] & "BibleId"
 proc defaultBibleId*(translation: string): string =
  case translation.toLowerAscii
  of "niv": "78a9f6124f344018-01"
  else: ""
 proc apiGet(apiRoot, path, query, apiKey: string): JsonNode =
  var root = apiRoot
  while root.endsWith("/"): root.setLen(root.len - 1)
  var urlPath = root & path
  if query.len > 0:
    urlPath &= "?" & query
  debug "requesting " & urlPath
  let http = newHttpClient()
  http.headers = newHttpHeaders({"api-key": apiKey})
  parseJson(http.getContent(urlPath))
 proc resolveBibleId(translation, apiKey, apiRoot, configuredBibleId: string): string =
  let normalizedTranslation = translation.toLowerAscii
  if configuredBibleId.strip.len > 0:
    return configuredBibleId.strip
  let defaultId = defaultBibleId(normalizedTranslation)
  if defaultId.len > 0:
    return defaultId
  let translationCode = normalizedTranslation.toUpperAscii
  let respJson = apiGet(
    apiRoot,
    "/bibles",
    "language=eng&abbreviation=" & encodeUrl(translationCode) &
      "&include-full-details=false",
    apiKey)
  for bible in respJson["data"].getElems:
    let abbreviation =
      if bible.hasKey("abbreviation"): bible["abbreviation"].getStr else: ""
    let abbreviationLocal =
      if bible.hasKey("abbreviationLocal"): bible["abbreviationLocal"].getStr else: ""
    if abbreviation.toLowerAscii == normalizedTranslation or
       abbreviationLocal.toLowerAscii == normalizedTranslation:
      return bible["id"].getStr
  if respJson["data"].getElems.len > 0:
    return respJson["data"].getElems[0]["id"].getStr
  raise newException(ValueError,
    "could not find an API.Bible Bible ID for '" & translation &
      "'; configure " & configBibleIdKey(normalizedTranslation))
 proc resolvePassageId(reference, bibleId, apiKey, apiRoot: string): string =
  let respJson = apiGet(
    apiRoot,
    "/bibles/" & encodeUrl(bibleId) & "/search",
    "query=" & encodeUrl(reference) & "&limit=1&sort=canonical",
    apiKey)
  if respJson["data"].hasKey("passages"):
    let passages = respJson["data"]["passages"].getElems
    if passages.len > 0:
      return passages[0]["id"].getStr
  if respJson["data"].hasKey("verses"):
    let verses = respJson["data"]["verses"].getElems
    if verses.len == 1:
      return verses[0]["id"].getStr
  raise newException(ValueError,
    "could not resolve passage reference '" & reference & "' using API.Bible")
 proc fetchPassages*(
    reference,
    translation,
    apiKey,
    apiRoot,
    configuredBibleId: string): seq[string] =
  let bibleId = resolveBibleId(translation, apiKey, apiRoot, configuredBibleId)
  let passageId = resolvePassageId(reference, bibleId, apiKey, apiRoot)
  let respJson = apiGet(
    apiRoot,
    "/bibles/" & encodeUrl(bibleId) & "/passages/" & encodeUrl(passageId),
    "content-type=text&include-notes=false&include-titles=true" &
      "&include-chapter-numbers=false&include-verse-numbers=true" &
      "&include-verse-spans=false",
    apiKey)
  let passage = respJson["data"]
  @[passage["reference"].getStr & "\n" & passage["content"].getStr]
@@ -0,0 +1,188 @@
 # Nim CLI for retrieving Biblical passages
 # © 2023 Jonathan Bernard
 ## Simple command-line tool for retrieving Biblical passages.
 import std/[json, logging, os, re, strutils, wordwrap]
 import cliutils, docopt, zero_functional
 import ./api_bible
 import ./esv
 import ./kjv
 import ./mev
 import ./passage_query
 proc formatMarkdown(raw, translation: string): string =
  var reference = ""
  var inVerse = false
  var verseLines = newSeq[string]()
  for line in raw.splitLines:
    if reference.len == 0: reference = line.strip
    if inVerse:
      if line.startsWith("Footnotes"): inVerse = false
      elif line.isEmptyOrWhitespace and verseLines[^1] != "":
        verseLines.add("")
      elif not line.match(re"^\s+[^\s]"): continue
      elif line.match(re"$(.*)\(ESV\)$"): verseLines.add(line[0 ..< ^5])
      else: verseLines.add(line)
    elif line.match(re"^\s+\[\d+\]"):
      inVerse = true
      verseLines.add(line)
  let wrapped = (verseLines -->
    map(if it.len > 90: it.strip else: it & "  ").
    map(it.multiReplace([(re"\((\d+)\)", ""), (re"\[(\d+)\]", "**$1**")])).
    map(wrapWords(it, maxLineWidth = 74, newLine = "\p"))).join("\p")
  result = (wrapped.splitLines --> map("> " & it)).
    join("\p") & "\p> -- *" & reference & " (" &
      translation.toUpperAscii & ")*"
 proc formatPlain(
    raw,
    translation: string,
    keepVerseNumbers = true): string =
  var reference = ""
  var inVerse = false
  var verseLines = newSeq[string]()
  for line in raw.splitLines:
    if reference.len == 0: reference = line.strip
    if inVerse:
      if line.startsWith("Footnotes"): inVerse = false
      elif line.isEmptyOrWhitespace and verseLines[^1] != "":
        verseLines.add("")
      elif not line.match(re"^\s+[^\s]"): continue
      elif line.match(re"$(.*)\(ESV\)$"): verseLines.add(line[0 ..< ^5])
      else: verseLines.add(line)
    elif line.match(re"^\s+\[\d+\]"):
      inVerse = true
      verseLines.add(line)
  let wrapped = (verseLines -->
    map(if it.len > 90: it.strip else: it & "  ").
    map(
      if keepVerseNumbers:
        it.multiReplace([(re"\((\d+)\)", ""), (re"\[(\d+)\]", "$1")])
      else:
        it.multiReplace([(re"\((\d+)\)", ""), (re"\[(\d+)\]", "")])).
    map(wrapWords(it, maxLineWidth = 74, newLine = "\p"))).join("\p")
  result = (wrapped.splitLines --> map(it)).
    join("\p") & "\p– " & reference & " (" & translation.toUpperAscii & ")"
 proc fetchPassages(reference, translation: string, cfg: CombinedConfig): seq[string] =
  case translation
  of "esv":
    esv.fetchPassages(
      reference,
      cfg.getVal("esv-api-token"),
      cfg.getVal("esv-api-root", "https://api.esv.org"))
  of "akjv", "kjv":
    kjv.fetchPassages(reference)
  of "mev":
    mev.fetchPassages(reference)
  of "amp", "nkjv", "niv":
    api_bible.fetchPassages(
      reference,
      translation,
      cfg.getVal("api-bible-api-key"),
      cfg.getVal("api-bible-root", api_bible.apiBibleRoot),
      cfg.getVal(
        "api-bible-" & translation & "-bible-id",
        api_bible.defaultBibleId(translation)))
  else:
    raise newException(ValueError,
      "unsupported translation '" & translation &
        "'; supported translations: " & supportedTranslationsList())
 when isMainModule:
  const USAGE = """Usage:
  bibleref <reference> [options]
 Options:
  --debug                       Log debug information.
  --echo-args                   Echo back the arguments that were passed on the
                                command line for debugging purposes.
  -f, --output-format <format>  Select a specific output format. Valid values
                                are 'raw', 'markdown', 'plain', 'reading'.
  -t, --translation <translation>
                                Select a specific translation. Supported values
                                are 'akjv', 'amp', 'esv', 'kjv', 'mev',
                                'nkjv', and 'niv'. Defaults to 'esv'.
                                Individual references may override this with a
                                trailing marker, for example:
                                'John 3:16 (KJV); John 3:16 (ESV)'.
  --esv-api-token <token>       Provide the API token on the command line. By
                                default this will be read either from the
                                .bibleref.cfg.json file or the ESV_API_TOKEN
                                envionment variable.
  --api-bible-api-key <key>     Provide the API.Bible API key for translations
                                backed by api.bible.
  --api-bible-root <url>        Override the API.Bible API root. Defaults to
                                https://rest.api.bible/v1.
  --api-bible-amp-bible-id <id> Override the API.Bible Bible ID for AMP.
  --api-bible-niv-bible-id <id> Override the API.Bible Bible ID for NIV.
  --api-bible-nkjv-bible-id <id>
                                Override the API.Bible Bible ID for NKJV.
 """
  let consoleLogger = newConsoleLogger(
    levelThreshold=lvlInfo,
    fmtStr="bibleref - $levelname: ")
  logging.addHandler(consoleLogger)
  try:
    # Parse arguments
    let args = docopt(USAGE, version = "1.1.0")
    if args["--debug"]:
      consoleLogger.levelThreshold = lvlDebug
    if args["--echo-args"]: stderr.writeLine($args)
    let cfgFilePath = getEnv("HOME") / ".bibleref.cfg.json"
    var cfgFileJson = newJObject()
    if fileExists(cfgFilePath):
      debug "Loading config from " & cfgFilePath
      cfgFileJson = parseFile(cfgFilePath)
    let cfg = CombinedConfig(docopt: args, json: cfgFileJson)
    let defaultTranslation = cfg.getVal("translation", "esv")
    let reference = $args["<reference>"]
    let queries = parsePassageQueries(reference, defaultTranslation)
    var formattedPassages: seq[string] = @[]
    for query in queries:
      for passage in fetchPassages(query.referenceText, query.translation, cfg):
        formattedPassages.add(
          case $args["--output-format"]:
          of "plain":
            formatPlain(passage, query.translation)
          of "reading":
            formatPlain(passage, query.translation, keepVerseNumbers = false)
          of "text":
            passage.multiReplace([(re"\[(\d+)\]", "$1")])
          of "raw":
            passage
          else:
            formatMarkdown(passage, query.translation))
    echo formattedPassages.join("\p\p")
  except CatchableError:
    fatal getCurrentExceptionMsg()
    debug getCurrentException().getStackTrace()
    quit(QuitFailure)
@@ -0,0 +1,112 @@
 import std/[strutils, tables]
 import ./reference_parser
 type BibleIndex = object
  verses: Table[string, string]
  lastVerseByChapter: Table[string, int]
  lastChapterByBook: Table[string, int]
  translationName: string
 proc verseKey(code: string, chapter, verse: int): string =
  code & "\t" & $chapter & "\t" & $verse
 proc chapterKey(code: string, chapter: int): string =
  code & "\t" & $chapter
 proc loadBibleIndex(rows, translationName: string): BibleIndex =
  result.translationName = translationName
  for line in rows.splitLines:
    if line.strip.len == 0:
      continue
    let parts = line.split('\t', maxsplit = 3)
    if parts.len != 4:
      raise newException(ValueError,
        "invalid embedded " & translationName & " row: " & line)
    let code = parts[0]
    let chapter = parseInt(parts[1])
    let verse = parseInt(parts[2])
    let text = parts[3]
    result.verses[verseKey(code, chapter, verse)] = text
    let cKey = chapterKey(code, chapter)
    if not result.lastVerseByChapter.hasKey(cKey) or
        verse > result.lastVerseByChapter[cKey]:
      result.lastVerseByChapter[cKey] = verse
    if not result.lastChapterByBook.hasKey(code) or
        chapter > result.lastChapterByBook[code]:
      result.lastChapterByBook[code] = chapter
 proc requireLastChapter(index: BibleIndex, code: string): int =
  if not index.lastChapterByBook.hasKey(code):
    raise newException(ValueError,
      "no embedded " & index.translationName & " data for " & code)
  index.lastChapterByBook[code]
 proc requireLastVerse(index: BibleIndex, code: string, chapter: int): int =
  let cKey = chapterKey(code, chapter)
  if not index.lastVerseByChapter.hasKey(cKey):
    raise newException(ValueError,
      "no embedded " & index.translationName & " data for " &
        bookInfo(code).name & " " & $chapter)
  index.lastVerseByChapter[cKey]
 proc requireVerse(index: BibleIndex, code: string, chapter, verse: int): string =
  let vKey = verseKey(code, chapter, verse)
  if not index.verses.hasKey(vKey):
    raise newException(ValueError,
      "no embedded " & index.translationName & " data for " &
        bookInfo(code).name & " " & $chapter & ":" & $verse)
  index.verses[vKey]
 proc addVerseLines(
    lines: var seq[string],
    index: BibleIndex,
    reference: PassageReference,
    range: RefRange) =
  let code = reference.book.code
  discard index.requireLastChapter(code)
  for chapter in range.start.chapter .. range.finish.chapter:
    let startVerse =
      if chapter == range.start.chapter and range.start.verse > 0:
        range.start.verse
      else:
        1
    let endVerse =
      if chapter == range.finish.chapter and range.finish.verse > 0:
        range.finish.verse
      else:
        index.requireLastVerse(code, chapter)
    if startVerse > endVerse:
      raise newException(ValueError, "reference range starts after it ends")
    for verse in startVerse .. endVerse:
      lines.add("  [" & $verse & "] " & index.requireVerse(code, chapter, verse))
 proc fetchReference(index: BibleIndex, reference: PassageReference): string =
  var lines = @[$reference]
  let code = reference.book.code
  if reference.ranges.len == 0:
    for chapter in 1 .. index.requireLastChapter(code):
      for verse in 1 .. index.requireLastVerse(code, chapter):
        lines.add("  [" & $verse & "] " & index.requireVerse(code, chapter, verse))
  else:
    for range in reference.ranges:
      lines.addVerseLines(index, reference, range)
  lines.join("\n")
 proc fetchPassages*(rows, reference, translationName: string): seq[string] =
  let index = loadBibleIndex(rows, translationName)
  for parsedReference in parseReferences(reference):
    result.add(fetchReference(index, parsedReference))
@@ -0,0 +1,13 @@
 import std/[httpclient, json, logging, uri]
 proc fetchPassages*(reference, apiToken, apiRoot: string): seq[string] =
  let http = newHttpClient()
  http.headers = newHttpHeaders({"Authorization": "Token " & apiToken})
  let urlPath = apiRoot & "/v3/passage/text/?q=" & encodeUrl(reference)
  debug "requesting " & urlPath
  let respJson = parseJson(http.getContent(urlPath))
  result = @[]
  for passage in respJson["passages"].getElems:
    result.add(passage.getStr)
@@ -1,86 +0,0 @@
 # Nim CLI Wrapper for the ESV API
 # © 2023 Jonathan Bernard
 ## Simple command-line wrapper around the ESV API.
 import std/[httpclient, json, logging, os, re, strutils, uri, wordwrap]
 import cliutils, docopt, zero_functional
 proc formatMarkdown(raw: string): string =
  let rawLines = raw.splitLines --> map(it.strip)
  let wrapped = (rawLines -->
    filter(not isEmptyOrWhitespace(it.strip) and match(it, re"^\[\d+\].*")).
    map(it.multiReplace([(re"\((\d+)\)", ""), (re"\[(\d+)\]", "**$1**")])).
    map(wrapWords(it, maxLineWidth = 74, newLine = "\p"))).
    join("\p")
  result = (wrapped.splitLines --> map("> " & it)).join("\p") &
    "\p>\p> -- *" & rawLines[0] & " (ESV)*"
 when isMainModule:
  const USAGE = """Usage:
  esv_api <reference> [options]
 Options:
  --debug                       Log debug information.
  --echo-args                   Echo back the arguments that were passed on the
                                command line for debugging purposes.
  -f, --output-format <format>  Select a specific output format. Valid values
                                are 'raw', 'markdown', 'plain'.
  -t, --esv-api-token <token>   Provide the API token on the command line. By
                                default this will be read either from the
                                .esv_api.cfg.json file or the ESV_API_TOKEN
                                envionment variable.
 """
  let consoleLogger = newConsoleLogger(
    levelThreshold=lvlInfo,
    fmtStr="esv_api - $levelname: ")
  logging.addHandler(consoleLogger)
  try:
    # Parse arguments
    let args = docopt(USAGE, version = "0.2.1")
    if args["--debug"]:
      consoleLogger.levelThreshold = lvlDebug
    if args["--echo-args"]: stderr.writeLine($args)
    let cfgFilePath = getEnv("HOME") / ".esv_api.cfg.json"
    var cfgFileJson = newJObject()
    if fileExists(cfgFilePath):
      debug "Loading config from " & cfgFilePath
      cfgFileJson = parseFile(cfgFilePath)
    let cfg = CombinedConfig(docopt: args, json: cfgFileJson)
    let apiToken = cfg.getVal("esv-api-token")
    let apiRoot = cfg.getVal("esv-api-root", "https://api.esv.org")
    let reference = $args["<reference>"]
    let http = newHttpClient()
    http.headers = newHttpHeaders({"Authorization": "Token " & apiToken})
    let urlPath = apiRoot & "/v3/passage/text/?q=" & encodeUrl(reference)
    debug "requesting " & urlPath
    let respJson = parseJson(http.getContent(urlPath))
    let formattedPassages =
      case $args["--output-format"]:
      of "text":
        respJson["passages"].getElems -->
          map(it.getStr.multiReplace([(re"\[(\d+)\]", "$1")]))
      of "raw": respJson["passages"].getElems --> map(it.getStr)
      else:
        respJson["passages"].getElems --> map(formatMarkdown(it.getStr))
    echo formattedPassages.join("\p\p")
  except CatchableError:
    fatal getCurrentExceptionMsg()
    debug getCurrentException().getStackTrace()
    quit(QuitFailure)
@@ -0,0 +1,7 @@
 import ./embedded_bible
 import ./offline_data
 const kjvRows = embeddedTranslationData("kjv")
 proc fetchPassages*(reference: string): seq[string] =
  embedded_bible.fetchPassages(kjvRows, reference, "KJV")
@@ -0,0 +1,13 @@
 import ./offline_data
 when hasEmbeddedTranslationData("mev"):
  import ./embedded_bible
  const mevRows = embeddedTranslationData("mev")
 proc fetchPassages*(reference: string): seq[string] =
  when hasEmbeddedTranslationData("mev"):
    embedded_bible.fetchPassages(mevRows, reference, "MEV")
  else:
    raise newException(ValueError,
      "MEV data is not embedded; generate data/private/mev.tsv and rebuild")
@@ -0,0 +1,15 @@
 import std/os
 template translationDataPath(name: static[string], visibility: static[string]): string =
  const dataRoot = currentSourcePath().parentDir.parentDir / "data"
  dataRoot / visibility / (name & ".tsv")
 template hasEmbeddedTranslationData*(name: static[string]): bool =
  fileExists(translationDataPath(name, "private")) or
    fileExists(translationDataPath(name, "public"))
 template embeddedTranslationData*(name: static[string]): string =
  when fileExists(translationDataPath(name, "private")):
    staticRead(translationDataPath(name, "private"))
  else:
    staticRead(translationDataPath(name, "public"))
@@ -0,0 +1,64 @@
 import std/strutils
 import ./reference_parser
 type PassageQuery* = object
  reference*: PassageReference
  translation*: string
 const SupportedTranslations* = [
  "akjv", "amp", "esv", "kjv", "mev", "niv", "nkjv"
 ]
 proc supportedTranslationsList*(): string =
  SupportedTranslations.join(", ")
 proc normalizeTranslation*(translation: string): string =
  result = translation.strip.toLowerAscii
  for supported in SupportedTranslations:
    if result == supported:
      return
  raise newException(ValueError,
    "unsupported translation '" & translation &
      "'; supported translations: " & supportedTranslationsList())
 proc splitTrailingTranslationMarker(
    input: string): tuple[referenceText: string, translation: string] =
  let text = input.strip
  if not text.endsWith(")"):
    return (text, "")
  let openIdx = text.rfind("(")
  if openIdx < 0:
    return (text, "")
  let referenceText = text[0 ..< openIdx].strip
  let translation = text[openIdx + 1 ..< text.len - 1].strip
  if referenceText.len == 0 or translation.len == 0:
    return (text, "")
  (referenceText, translation)
 proc parsePassageQuery*(input, defaultTranslation: string): PassageQuery =
  let parsed = splitTrailingTranslationMarker(input)
  result.reference = parseReference(parsed.referenceText)
  result.translation =
    if parsed.translation.len > 0:
      normalizeTranslation(parsed.translation)
    else:
      normalizeTranslation(defaultTranslation)
 proc parsePassageQueries*(input, defaultTranslation: string): seq[PassageQuery] =
  for rawRef in input.split(';'):
    let refText = rawRef.strip
    if refText.len > 0:
      result.add(parsePassageQuery(refText, defaultTranslation))
  if result.len == 0:
    raise newException(ValueError, "empty Bible reference")
 proc referenceText*(query: PassageQuery): string =
  $query.reference
@@ -0,0 +1,399 @@
 import std/[strutils]
 type
  BookInfo* = object
    code*: string
    name*: string
    singleChapter*: bool
  RefPoint* = object
    chapter*: int
    verse*: int
  RefRange* = object
    start*: RefPoint
    finish*: RefPoint
  PassageReference* = object
    book*: BookInfo
    ranges*: seq[RefRange]
 const CanonBooks*: array[66, BookInfo] = [
  BookInfo(code: "GEN", name: "Genesis"),
  BookInfo(code: "EXO", name: "Exodus"),
  BookInfo(code: "LEV", name: "Leviticus"),
  BookInfo(code: "NUM", name: "Numbers"),
  BookInfo(code: "DEU", name: "Deuteronomy"),
  BookInfo(code: "JOS", name: "Joshua"),
  BookInfo(code: "JDG", name: "Judges"),
  BookInfo(code: "RUT", name: "Ruth"),
  BookInfo(code: "1SA", name: "1 Samuel"),
  BookInfo(code: "2SA", name: "2 Samuel"),
  BookInfo(code: "1KI", name: "1 Kings"),
  BookInfo(code: "2KI", name: "2 Kings"),
  BookInfo(code: "1CH", name: "1 Chronicles"),
  BookInfo(code: "2CH", name: "2 Chronicles"),
  BookInfo(code: "EZR", name: "Ezra"),
  BookInfo(code: "NEH", name: "Nehemiah"),
  BookInfo(code: "EST", name: "Esther"),
  BookInfo(code: "JOB", name: "Job"),
  BookInfo(code: "PSA", name: "Psalms"),
  BookInfo(code: "PRO", name: "Proverbs"),
  BookInfo(code: "ECC", name: "Ecclesiastes"),
  BookInfo(code: "SNG", name: "Song of Solomon"),
  BookInfo(code: "ISA", name: "Isaiah"),
  BookInfo(code: "JER", name: "Jeremiah"),
  BookInfo(code: "LAM", name: "Lamentations"),
  BookInfo(code: "EZK", name: "Ezekiel"),
  BookInfo(code: "DAN", name: "Daniel"),
  BookInfo(code: "HOS", name: "Hosea"),
  BookInfo(code: "JOL", name: "Joel"),
  BookInfo(code: "AMO", name: "Amos"),
  BookInfo(code: "OBA", name: "Obadiah", singleChapter: true),
  BookInfo(code: "JON", name: "Jonah"),
  BookInfo(code: "MIC", name: "Micah"),
  BookInfo(code: "NAM", name: "Nahum"),
  BookInfo(code: "HAB", name: "Habakkuk"),
  BookInfo(code: "ZEP", name: "Zephaniah"),
  BookInfo(code: "HAG", name: "Haggai"),
  BookInfo(code: "ZEC", name: "Zechariah"),
  BookInfo(code: "MAL", name: "Malachi"),
  BookInfo(code: "MAT", name: "Matthew"),
  BookInfo(code: "MRK", name: "Mark"),
  BookInfo(code: "LUK", name: "Luke"),
  BookInfo(code: "JHN", name: "John"),
  BookInfo(code: "ACT", name: "Acts"),
  BookInfo(code: "ROM", name: "Romans"),
  BookInfo(code: "1CO", name: "1 Corinthians"),
  BookInfo(code: "2CO", name: "2 Corinthians"),
  BookInfo(code: "GAL", name: "Galatians"),
  BookInfo(code: "EPH", name: "Ephesians"),
  BookInfo(code: "PHP", name: "Philippians"),
  BookInfo(code: "COL", name: "Colossians"),
  BookInfo(code: "1TH", name: "1 Thessalonians"),
  BookInfo(code: "2TH", name: "2 Thessalonians"),
  BookInfo(code: "1TI", name: "1 Timothy"),
  BookInfo(code: "2TI", name: "2 Timothy"),
  BookInfo(code: "TIT", name: "Titus"),
  BookInfo(code: "PHM", name: "Philemon", singleChapter: true),
  BookInfo(code: "HEB", name: "Hebrews"),
  BookInfo(code: "JAS", name: "James"),
  BookInfo(code: "1PE", name: "1 Peter"),
  BookInfo(code: "2PE", name: "2 Peter"),
  BookInfo(code: "1JN", name: "1 John"),
  BookInfo(code: "2JN", name: "2 John", singleChapter: true),
  BookInfo(code: "3JN", name: "3 John", singleChapter: true),
  BookInfo(code: "JUD", name: "Jude", singleChapter: true),
  BookInfo(code: "REV", name: "Revelation")
 ]
 const bookAliases = [
  ("GEN", "genesis"), ("GEN", "gen"),
  ("EXO", "exodus"), ("EXO", "exod"), ("EXO", "exo"),
  ("LEV", "leviticus"), ("LEV", "lev"),
  ("NUM", "numbers"), ("NUM", "num"), ("NUM", "numb"),
  ("DEU", "deuteronomy"), ("DEU", "deut"), ("DEU", "deu"),
  ("JOS", "joshua"), ("JOS", "josh"), ("JOS", "jos"),
  ("JDG", "judges"), ("JDG", "judg"), ("JDG", "jdg"),
  ("RUT", "ruth"), ("RUT", "rut"),
  ("1SA", "1 samuel"), ("1SA", "1 sam"), ("1SA", "i samuel"), ("1SA", "first samuel"),
  ("2SA", "2 samuel"), ("2SA", "2 sam"), ("2SA", "ii samuel"), ("2SA", "second samuel"),
  ("1KI", "1 kings"), ("1KI", "1 kgs"), ("1KI", "1 kin"), ("1KI", "i kings"), ("1KI", "first kings"),
  ("2KI", "2 kings"), ("2KI", "2 kgs"), ("2KI", "2 kin"), ("2KI", "ii kings"), ("2KI", "second kings"),
  ("1CH", "1 chronicles"), ("1CH", "1 chron"), ("1CH", "1 chr"), ("1CH", "i chronicles"), ("1CH", "first chronicles"),
  ("2CH", "2 chronicles"), ("2CH", "2 chron"), ("2CH", "2 chr"), ("2CH", "ii chronicles"), ("2CH", "second chronicles"),
  ("EZR", "ezra"), ("EZR", "ezr"),
  ("NEH", "nehemiah"), ("NEH", "neh"),
  ("EST", "esther"), ("EST", "est"),
  ("JOB", "job"),
  ("PSA", "psalms"), ("PSA", "psalm"), ("PSA", "ps"), ("PSA", "psa"),
  ("PRO", "proverbs"), ("PRO", "prov"), ("PRO", "pro"),
  ("ECC", "ecclesiastes"), ("ECC", "eccl"), ("ECC", "ecc"),
  ("SNG", "song of solomon"), ("SNG", "song"), ("SNG", "songs"), ("SNG", "canticles"), ("SNG", "sng"),
  ("ISA", "isaiah"), ("ISA", "isa"),
  ("JER", "jeremiah"), ("JER", "jer"),
  ("LAM", "lamentations"), ("LAM", "lam"),
  ("EZK", "ezekiel"), ("EZK", "ezek"), ("EZK", "ezk"),
  ("DAN", "daniel"), ("DAN", "dan"),
  ("HOS", "hosea"), ("HOS", "hos"),
  ("JOL", "joel"), ("JOL", "jol"),
  ("AMO", "amos"), ("AMO", "amo"),
  ("OBA", "obadiah"), ("OBA", "obad"), ("OBA", "oba"),
  ("JON", "jonah"), ("JON", "jon"),
  ("MIC", "micah"), ("MIC", "mic"),
  ("NAM", "nahum"), ("NAM", "nah"),
  ("HAB", "habakkuk"), ("HAB", "hab"),
  ("ZEP", "zephaniah"), ("ZEP", "zeph"), ("ZEP", "zep"),
  ("HAG", "haggai"), ("HAG", "hag"),
  ("ZEC", "zechariah"), ("ZEC", "zech"), ("ZEC", "zec"),
  ("MAL", "malachi"), ("MAL", "mal"),
  ("MAT", "matthew"), ("MAT", "matt"), ("MAT", "mat"), ("MAT", "mt"),
  ("MRK", "mark"), ("MRK", "mrk"), ("MRK", "mk"),
  ("LUK", "luke"), ("LUK", "luk"), ("LUK", "lk"),
  ("JHN", "john"), ("JHN", "jhn"), ("JHN", "jn"),
  ("ACT", "acts"), ("ACT", "act"),
  ("ROM", "romans"), ("ROM", "rom"),
  ("1CO", "1 corinthians"), ("1CO", "1 cor"), ("1CO", "1 co"), ("1CO", "i corinthians"), ("1CO", "first corinthians"),
  ("2CO", "2 corinthians"), ("2CO", "2 cor"), ("2CO", "2 co"), ("2CO", "ii corinthians"), ("2CO", "second corinthians"),
  ("GAL", "galatians"), ("GAL", "gal"),
  ("EPH", "ephesians"), ("EPH", "eph"),
  ("PHP", "philippians"), ("PHP", "php"),
  ("COL", "colossians"), ("COL", "col"),
  ("1TH", "1 thessalonians"), ("1TH", "1 thess"), ("1TH", "1 thes"), ("1TH", "i thessalonians"), ("1TH", "first thessalonians"),
  ("2TH", "2 thessalonians"), ("2TH", "2 thess"), ("2TH", "2 thes"), ("2TH", "ii thessalonians"), ("2TH", "second thessalonians"),
  ("1TI", "1 timothy"), ("1TI", "1 tim"), ("1TI", "i timothy"), ("1TI", "first timothy"),
  ("2TI", "2 timothy"), ("2TI", "2 tim"), ("2TI", "ii timothy"), ("2TI", "second timothy"),
  ("TIT", "titus"), ("TIT", "tit"),
  ("PHM", "philemon"), ("PHM", "philem"), ("PHM", "phm"),
  ("HEB", "hebrews"), ("HEB", "heb"),
  ("JAS", "james"), ("JAS", "jas"), ("JAS", "jam"),
  ("1PE", "1 peter"), ("1PE", "1 pet"), ("1PE", "1 pe"), ("1PE", "i peter"), ("1PE", "first peter"),
  ("2PE", "2 peter"), ("2PE", "2 pet"), ("2PE", "2 pe"), ("2PE", "ii peter"), ("2PE", "second peter"),
  ("1JN", "1 john"), ("1JN", "1 jn"), ("1JN", "1 jhn"), ("1JN", "i john"), ("1JN", "first john"),
  ("2JN", "2 john"), ("2JN", "2 jn"), ("2JN", "2 jhn"), ("2JN", "ii john"), ("2JN", "second john"),
  ("3JN", "3 john"), ("3JN", "3 jn"), ("3JN", "3 jhn"), ("3JN", "iii john"), ("3JN", "third john"),
  ("JUD", "jude"), ("JUD", "jud"),
  ("REV", "revelation"), ("REV", "revelations"), ("REV", "rev"), ("REV", "apocalypse")
 ]
 proc bookInfo*(code: string): BookInfo =
  for book in CanonBooks:
    if book.code == code:
      return book
  raise newException(ValueError, "unknown Bible book code '" & code & "'")
 proc bookIndex*(code: string): int =
  for idx, book in CanonBooks:
    if book.code == code:
      return idx
  raise newException(ValueError, "unknown Bible book code '" & code & "'")
 proc normalizeReferenceInput(s: string): string =
  s.multiReplace([
    ("–", "-"),
    ("—", "-"),
    ("−", "-")
  ]).strip
 proc normalizeBookPrefix(s: string): string =
  for ch in s:
    if ch.isAlphaAscii:
      result.add(ch.toLowerAscii)
    elif ch.isDigit:
      result.add(ch)
 proc canonicalNamePrefixMatches(prefix: string): seq[BookInfo] =
  for book in CanonBooks:
    if normalizeBookPrefix(book.name).startsWith(prefix):
      result.add(book)
 proc formatBookList(books: seq[BookInfo]): string =
  var names: seq[string] = @[]
  for book in books:
    names.add(book.name)
  names.join(", ")
 proc matchCanonicalBookPrefix(input: string): tuple[
    matched: bool,
    ambiguous: bool,
    book: BookInfo,
    consumed: int,
    prefix: string,
    matches: seq[BookInfo]] =
  for idx in 1 .. input.len:
    if idx < input.len and input[idx].isAlphaAscii:
      continue
    let prefix = normalizeBookPrefix(input[0 ..< idx])
    if prefix.len == 0:
      continue
    let matches = canonicalNamePrefixMatches(prefix)
    if matches.len == 1:
      result.matched = true
      result.ambiguous = false
      result.book = matches[0]
      result.consumed = idx
      result.prefix = input[0 ..< idx].strip
      result.matches = matches
    elif matches.len > 1 and not result.matched:
      result.ambiguous = true
      result.consumed = idx
      result.prefix = input[0 ..< idx].strip
      result.matches = matches
 proc matchAlias(input, alias: string): int =
  var i = 0
  var j = 0
  while j < alias.len:
    let aliasCh = alias[j]
    if aliasCh.isSpaceAscii or aliasCh == '.':
      while i < input.len and (input[i].isSpaceAscii or input[i] == '.'):
        inc i
      inc j
    else:
      while i < input.len and input[i] == '.':
        inc i
      if i >= input.len or input[i].toLowerAscii != aliasCh.toLowerAscii:
        return -1
      inc i
      inc j
  while i < input.len and input[i] == '.':
    inc i
  if i < input.len and input[i].isAlphaAscii:
    return -1
  i
 proc parseBook(input: string): tuple[book: BookInfo, rest: string] =
  let canonicalPrefix = matchCanonicalBookPrefix(input)
  if canonicalPrefix.matched:
    result.book = canonicalPrefix.book
    result.rest = input[canonicalPrefix.consumed .. ^1].strip
    return
  var bestCode = ""
  var bestLen = -1
  for row in bookAliases:
    let consumed = matchAlias(input, row[1])
    if consumed > bestLen:
      bestCode = row[0]
      bestLen = consumed
  if bestLen < 0:
    if canonicalPrefix.ambiguous:
      raise newException(ValueError,
        "ambiguous Bible book prefix '" & canonicalPrefix.prefix & "' in '" &
          input & "'; matches " & canonicalPrefix.matches.formatBookList)
    raise newException(ValueError, "could not parse Bible book in '" & input & "'")
  result.book = bookInfo(bestCode)
  result.rest = input[bestLen .. ^1].strip
 proc parsePositiveInt(s, label: string): int =
  if s.len == 0 or not s.allCharsInSet({'0'..'9'}):
    raise newException(ValueError, "invalid " & label & " '" & s & "'")
  result = parseInt(s)
  if result <= 0:
    raise newException(ValueError, label & " must be positive")
 proc parsePoint(token: string, defaultChapter: int, singleChapter: bool): RefPoint =
  let normalized = token.strip
  if normalized.len == 0:
    raise newException(ValueError, "empty reference point")
  let colonIdx = normalized.find(':')
  if colonIdx >= 0:
    return RefPoint(
      chapter: parsePositiveInt(normalized[0 ..< colonIdx], "chapter"),
      verse: parsePositiveInt(normalized[colonIdx + 1 .. ^1], "verse"))
  let value = parsePositiveInt(normalized, "reference number")
  if singleChapter:
    RefPoint(chapter: 1, verse: value)
  elif defaultChapter > 0:
    RefPoint(chapter: defaultChapter, verse: value)
  else:
    RefPoint(chapter: value, verse: 0)
 proc parseRange(segment: string, defaultChapter: int, singleChapter: bool): RefRange =
  let normalized = segment.strip
  let dashIdx = normalized.find('-')
  if dashIdx >= 0:
    result.start = parsePoint(normalized[0 ..< dashIdx], defaultChapter, singleChapter)
    let endDefaultChapter =
      if result.start.verse > 0: result.start.chapter
      else: 0
    result.finish = parsePoint(normalized[dashIdx + 1 .. ^1], endDefaultChapter, singleChapter)
  else:
    result.start = parsePoint(normalized, defaultChapter, singleChapter)
    result.finish = result.start
  if result.finish.chapter < result.start.chapter:
    raise newException(ValueError, "range ends before it starts: '" & segment & "'")
  if result.finish.chapter == result.start.chapter and
      result.start.verse > 0 and
      result.finish.verse > 0 and
      result.finish.verse < result.start.verse:
    raise newException(ValueError, "range ends before it starts: '" & segment & "'")
 proc parsePassageSpec(spec: string, book: BookInfo): seq[RefRange] =
  var currentChapter = 0
  for rawSegment in spec.split(','):
    let segment = rawSegment.strip
    if segment.len == 0:
      raise newException(ValueError, "empty passage range in '" & spec & "'")
    let range = parseRange(segment, currentChapter, book.singleChapter)
    result.add(range)
    if segment.contains(':') or (range.start.verse > 0 and range.finish.verse > 0):
      currentChapter = range.start.chapter
    else:
      currentChapter = 0
 proc parseReference*(input: string): PassageReference =
  let normalized = normalizeReferenceInput(input)
  let parsedBook = parseBook(normalized)
  result.book = parsedBook.book
  if parsedBook.rest.len > 0:
    result.ranges = parsePassageSpec(parsedBook.rest, result.book)
 proc parseReferences*(input: string): seq[PassageReference] =
  for rawRef in input.split(';'):
    let refText = rawRef.strip
    if refText.len > 0:
      result.add(parseReference(refText))
  if result.len == 0:
    raise newException(ValueError, "empty Bible reference")
 proc `$`*(point: RefPoint): string =
  if point.verse > 0: $point.chapter & ":" & $point.verse
  else: $point.chapter
 proc `$`*(range: RefRange): string =
  if range.start == range.finish:
    return $range.start
  if range.start.chapter == range.finish.chapter and
      range.start.verse > 0 and
      range.finish.verse > 0:
    return $range.start.chapter & ":" & $range.start.verse & "-" & $range.finish.verse
  $range.start & "-" & $range.finish
 proc formatSingleChapterRange(range: RefRange): string =
  if range.start == range.finish:
    return $range.start.verse
  if range.start.chapter == range.finish.chapter:
    return $range.start.verse & "-" & $range.finish.verse
  $range.start & "-" & $range.finish
 proc `$`*(reference: PassageReference): string =
  result = reference.book.name
  if reference.ranges.len > 0:
    var rangeText: seq[string] = @[]
    for range in reference.ranges:
      if reference.book.singleChapter:
        rangeText.add(formatSingleChapterRange(range))
      else:
        rangeText.add($range)
    result.add(" " & rangeText.join(", "))
@@ -0,0 +1,84 @@
 import std/[strutils, unittest]
 import ../src/kjv
 import ../src/reference_parser
 suite "reference parser":
  test "parses single verse references":
    let reference = parseReference("John 3:16")
    check reference.book.code == "JHN"
    check reference.ranges.len == 1
    check reference.ranges[0].start.chapter == 3
    check reference.ranges[0].start.verse == 16
    check reference.ranges[0].finish == reference.ranges[0].start
    check $reference == "John 3:16"
  test "parses verse lists using the previous chapter":
    let reference = parseReference("John 3:16,20-21")
    check reference.ranges.len == 2
    check reference.ranges[1].start.chapter == 3
    check reference.ranges[1].start.verse == 20
    check reference.ranges[1].finish.chapter == 3
    check reference.ranges[1].finish.verse == 21
    check $reference == "John 3:16, 3:20-21"
  test "parses chapter ranges":
    let reference = parseReference("John 3-4")
    check reference.ranges.len == 1
    check reference.ranges[0].start.chapter == 3
    check reference.ranges[0].start.verse == 0
    check reference.ranges[0].finish.chapter == 4
    check reference.ranges[0].finish.verse == 0
    check $reference == "John 3-4"
  test "parses abbreviated numbered books":
    let reference = parseReference("1 Jn 1:9")
    check reference.book.code == "1JN"
    check reference.ranges[0].start.chapter == 1
    check reference.ranges[0].start.verse == 9
    check $reference == "1 John 1:9"
  test "parses unique canonical book prefixes":
    check parseReference("Gene 1:1").book.code == "GEN"
    check parseReference("Phile 3").book.code == "PHM"
    check parseReference("Phili 1:6").book.code == "PHP"
  test "rejects ambiguous canonical book prefixes":
    expect ValueError:
      discard parseReference("Phil 1")
  test "normalizes single-chapter book references":
    let reference = parseReference("Jude 3-4")
    check reference.book.code == "JUD"
    check reference.ranges[0].start.chapter == 1
    check reference.ranges[0].start.verse == 3
    check reference.ranges[0].finish.chapter == 1
    check reference.ranges[0].finish.verse == 4
    check $reference == "Jude 3-4"
  test "parses semicolon-separated references":
    let references = parseReferences("Psalm 23; John 3:16")
    check references.len == 2
    check references[0].book.code == "PSA"
    check references[1].book.code == "JHN"
 suite "offline KJV backend":
  test "fetches a single embedded verse":
    let passages = kjv.fetchPassages("John 3:16")
    check passages.len == 1
    check passages[0].startsWith("John 3:16\n")
    check passages[0].contains("  [16] ")
  test "fetches a single-chapter embedded verse":
    let passages = kjv.fetchPassages("Jude 3")
    check passages.len == 1
    check passages[0].startsWith("Jude 3\n")
    check passages[0].contains("  [3] ")
@@ -0,0 +1,44 @@
 import std/unittest
 import ../src/passage_query
 suite "passage query parser":
  test "uses the default translation when no marker is present":
    let queries = parsePassageQueries("John 3:16", "kjv")
    check queries.len == 1
    check queries[0].referenceText == "John 3:16"
    check queries[0].translation == "kjv"
  test "uses a trailing translation marker":
    let queries = parsePassageQueries("2 John 5 (KJV)", "esv")
    check queries.len == 1
    check queries[0].referenceText == "2 John 5"
    check queries[0].translation == "kjv"
  test "parses mixed translation queries":
    let queries = parsePassageQueries("2 John 5 (KJV); 2 John 5 (ESV)", "mev")
    check queries.len == 2
    check queries[0].referenceText == "2 John 5"
    check queries[0].translation == "kjv"
    check queries[1].referenceText == "2 John 5"
    check queries[1].translation == "esv"
  test "uses the default translation per unmarked reference":
    let queries = parsePassageQueries("John 3:16; Psalm 23 (MEV)", "nkjv")
    check queries.len == 2
    check queries[0].referenceText == "John 3:16"
    check queries[0].translation == "nkjv"
    check queries[1].referenceText == "Psalms 23"
    check queries[1].translation == "mev"
  test "rejects unknown translation markers":
    expect ValueError:
      discard parsePassageQueries("John 3:16 (XYZ)", "esv")
  test "rejects unknown default translations":
    expect ValueError:
      discard parsePassageQueries("John 3:16", "xyz")
@@ -0,0 +1,144 @@
 import std/[os, strutils, tables]
 # Source archive: https://ebible.org/Scriptures/eng-kjv_usfm.zip
 const canonBookCodes = [
  "GEN", "EXO", "LEV", "NUM", "DEU", "JOS", "JDG", "RUT",
  "1SA", "2SA", "1KI", "2KI", "1CH", "2CH", "EZR", "NEH",
  "EST", "JOB", "PSA", "PRO", "ECC", "SNG", "ISA", "JER",
  "LAM", "EZK", "DAN", "HOS", "JOL", "AMO", "OBA", "JON",
  "MIC", "NAM", "HAB", "ZEP", "HAG", "ZEC", "MAL", "MAT",
  "MRK", "LUK", "JHN", "ACT", "ROM", "1CO", "2CO", "GAL",
  "EPH", "PHP", "COL", "1TH", "2TH", "1TI", "2TI", "TIT",
  "PHM", "HEB", "JAS", "1PE", "2PE", "1JN", "2JN", "3JN",
  "JUD", "REV"
 ]
 proc normalizeWhitespace(s: string): string =
  var lastWasSpace = false
  for ch in s:
    if ch.isSpaceAscii:
      if not lastWasSpace:
        result.add(' ')
      lastWasSpace = true
    else:
      result.add(ch)
      lastWasSpace = false
  result = result.strip
 proc removeFootnotes(s: string): string =
  var i = 0
  while i < s.len:
    if s.continuesWith("\\f ", i) or s.continuesWith("\\f +", i):
      let closeIdx = s.find("\\f*", i + 2)
      if closeIdx < 0:
        break
      i = closeIdx + 3
    else:
      result.add(s[i])
      inc i
 proc stripUsfmMarkup(s: string): string =
  let withoutFootnotes = removeFootnotes(s)
  var i = 0
  while i < withoutFootnotes.len:
    case withoutFootnotes[i]
    of '\\':
      inc i
      if i < withoutFootnotes.len and withoutFootnotes[i] == '+':
        inc i
      while i < withoutFootnotes.len and
          (withoutFootnotes[i].isAlphaAscii or
           withoutFootnotes[i].isDigit or
           withoutFootnotes[i] == '-'):
        inc i
      let isClosingMarker = i < withoutFootnotes.len and withoutFootnotes[i] == '*'
      if isClosingMarker:
        inc i
      while not isClosingMarker and
          i < withoutFootnotes.len and
          withoutFootnotes[i].isSpaceAscii:
        inc i
    of '|':
      while i < withoutFootnotes.len and withoutFootnotes[i] != '\\':
        inc i
    of '\t':
      result.add(' ')
      inc i
    else:
      result.add(withoutFootnotes[i])
      inc i
  result = normalizeWhitespace(result)
 proc parseVerseLine(line: string): tuple[verse: int, text: string] =
  var rest = line[3..^1].strip
  let numberEnd = rest.find(' ')
  if numberEnd < 0:
    raise newException(ValueError, "verse marker without text: " & line)
  result.verse = parseInt(rest[0 ..< numberEnd])
  result.text = stripUsfmMarkup(rest[numberEnd + 1 .. ^1])
 proc findCanonFiles(inputDir: string): Table[string, string] =
  for path in walkFiles(inputDir / "*eng-kjv.usfm"):
    let name = path.extractFilename
    let dashIdx = name.find('-')
    let suffixIdx = name.find("eng-kjv.usfm")
    if dashIdx >= 0 and suffixIdx > dashIdx:
      let code = name[dashIdx + 1 ..< suffixIdx]
      if canonBookCodes.contains(code):
        result[code] = path
 proc generate(inputDir, outputPath: string) =
  let canonFiles = findCanonFiles(inputDir)
  var rows: seq[string] = @[]
  for code in canonBookCodes:
    if not canonFiles.hasKey(code):
      raise newException(ValueError, "missing USFM file for " & code)
    var chapter = 0
    var verse = 0
    var verseText = ""
    proc flushVerse() =
      if chapter > 0 and verse > 0:
        let text = normalizeWhitespace(verseText).replace("\t", " ")
        if text.len > 0:
          rows.add([code, $chapter, $verse, text].join("\t"))
      verse = 0
      verseText = ""
    for rawLine in canonFiles[code].lines:
      let line = rawLine.strip
      if line.startsWith("\\c "):
        flushVerse()
        chapter = parseInt(line[3..^1].strip)
      elif line.startsWith("\\v "):
        flushVerse()
        let parsed = parseVerseLine(line)
        verse = parsed.verse
        verseText = parsed.text
      elif verse > 0:
        let continued = stripUsfmMarkup(line)
        if continued.len > 0:
          if verseText.len > 0:
            verseText.add(' ')
          verseText.add(continued)
    flushVerse()
  createDir(outputPath.parentDir)
  writeFile(outputPath, rows.join("\n") & "\n")
 when isMainModule:
  if paramCount() != 2:
    quit("Usage: generate_kjv_data <usfm-dir> <output-tsv>", QuitFailure)
  generate(paramStr(1), paramStr(2))
@@ -0,0 +1,330 @@
 import std/[
  htmlparser,
  os,
  osproc,
  streams,
  strutils,
  xmlparser,
  xmltree
 ]
 import ../src/reference_parser
 type
  TocEntry = object
    label: string
    code: string
    fileIndex: int
  BookSource = object
    code: string
    startIndex: int
    endIndex: int
  ParseState = object
    code: string
    chapter: int
    verse: int
    verseText: string
    rows: seq[string]
 proc normalizeWhitespace(s: string): string =
  var lastWasSpace = false
  for ch in s.replace("\xC2\xA0", " "):
    if ch.isSpaceAscii:
      if not lastWasSpace:
        result.add(' ')
      lastWasSpace = true
    else:
      result.add(ch)
      lastWasSpace = false
  result = result.strip
 proc markerText(s: string): string =
  normalizeWhitespace(s).replace(" ", "")
 proc numberAfterPrefix(s, prefix: string): int =
  let text = normalizeWhitespace(s).toUpperAscii
  if not text.startsWith(prefix):
    return 0
  var digits = ""
  for ch in text[prefix.len .. ^1].strip:
    if ch.isDigit:
      digits.add(ch)
    elif digits.len > 0:
      break
    elif not ch.isSpaceAscii:
      break
  if digits.len > 0:
    result = parseInt(digits)
 proc isPositiveIntText(s: string): bool =
  let text = markerText(s)
  text.len > 0 and text.allCharsInSet({'0'..'9'}) and parseInt(text) > 0
 proc readEpubEntry(epubPath, entryPath: string): string =
  let process = startProcess(
    "unzip",
    args = ["-p", epubPath, entryPath],
    options = {poUsePath, poStdErrToStdOut})
  result = process.outputStream.readAll()
  let exitCode = process.waitForExit()
  process.close()
  if exitCode != 0:
    raise newException(IOError,
      "could not read " & entryPath & " from " & epubPath & ": " & result)
 proc textContent(node: XmlNode): string =
  case node.kind
  of xnText:
    result = node.text
  of xnElement:
    for child in node.items:
      result.add(textContent(child))
  else:
    discard
 proc firstDescendant(node: XmlNode, tag: string): XmlNode =
  if node.kind == xnElement:
    if node.tag == tag:
      return node
    for child in node.items:
      let found = firstDescendant(child, tag)
      if not found.isNil:
        return found
 proc descendantText(node: XmlNode, tag: string): string =
  let found = firstDescendant(node, tag)
  if found.isNil: ""
  else: normalizeWhitespace(textContent(found))
 proc descendantAttr(node: XmlNode, tag, attrName: string): string =
  let found = firstDescendant(node, tag)
  if found.isNil: ""
  else: found.attr(attrName)
 proc bookCodeForLabel(label: string): string =
  let bookName = label.split("(", maxsplit = 1)[0].strip
  if bookName == "Solomon":
    return "SNG"
  for book in CanonBooks:
    if book.name == bookName:
      return book.code
 proc indexFromSplitFile(path: string): int =
  let filename = path.split('#', maxsplit = 1)[0].extractFilename
  if not filename.startsWith("index_split_") or not filename.endsWith(".html"):
    return 0
  parseInt(filename["index_split_".len ..< filename.len - ".html".len])
 proc parseTocEntries(epubPath: string): seq[TocEntry] =
  let toc = parseXml(newStringStream(readEpubEntry(epubPath, "toc.ncx")))
  var entries: seq[TocEntry] = @[]
  proc walk(node: XmlNode) =
    if node.kind == xnElement and node.tag == "navPoint":
      let label = node.descendantText("text")
      let src = node.descendantAttr("content", "src")
      let fileIndex = indexFromSplitFile(src)
      if fileIndex > 0:
        entries.add(TocEntry(
          label: label,
          code: bookCodeForLabel(label),
          fileIndex: fileIndex))
    if node.kind == xnElement:
      for child in node.items:
        walk(child)
  walk(toc)
  entries
 proc bookSources(entries: seq[TocEntry]): seq[BookSource] =
  for idx, entry in entries:
    if entry.code.len == 0:
      continue
    let endIndex =
      if idx + 1 < entries.len:
        entries[idx + 1].fileIndex - 1
      else:
        entry.fileIndex
    result.add(BookSource(
      code: entry.code,
      startIndex: entry.fileIndex,
      endIndex: endIndex))
  if result.len != CanonBooks.len:
    raise newException(ValueError,
      "expected " & $CanonBooks.len & " canonical books in EPUB TOC, found " &
        $result.len)
  for idx, book in CanonBooks:
    if result[idx].code != book.code:
      raise newException(ValueError,
        "expected " & book.code & " at position " & $idx & ", found " &
          result[idx].code)
 proc hasClass(node: XmlNode, className: string): bool =
  if node.kind != xnElement:
    return false
  for value in node.attr("class").splitWhitespace:
    if value == className:
      return true
 proc shouldSkipElement(node: XmlNode): bool =
  node.hasClass("calibre_29") or # section headings
    node.hasClass("calibre_6") or # parallel/cross-reference paragraphs
    node.hasClass("calibre_26")   # Psalm superscriptions/cross-references
 proc hasHref(node: XmlNode): bool =
  if node.kind == xnElement:
    if node.attr("href").len > 0:
      return true
    for child in node.items:
      if hasHref(child):
        return true
 proc isBlockElement(node: XmlNode): bool =
  node.kind == xnElement and
    node.tag in ["blockquote", "br", "div", "h1", "h2", "h3", "li", "p"]
 proc chapterMarker(node: XmlNode): int =
  if node.kind == xnElement and node.tag == "span" and node.hasClass("calibre1"):
    let text = markerText(textContent(node))
    if text.isPositiveIntText:
      return parseInt(text)
 proc headingChapterMarker(node: XmlNode, code: string): int =
  if node.kind != xnElement or node.tag != "p":
    return 0
  let text = textContent(node)
  result = numberAfterPrefix(text, "CHAPTER ")
  if result > 0:
    return
  if code == "PSA":
    result = numberAfterPrefix(text, "PSALM ")
 proc verseMarker(node: XmlNode): int =
  if node.kind == xnElement and node.tag == "sup" and not node.hasHref:
    let text = markerText(textContent(node))
    if text.isPositiveIntText:
      return parseInt(text)
 proc leadingVerseText(s: string): tuple[verse: int, rest: string] =
  let text = s.replace("\xC2\xA0", " ")
  var idx = 0
  while idx < text.len and text[idx].isSpaceAscii:
    inc idx
  let digitStart = idx
  while idx < text.len and text[idx].isDigit:
    inc idx
  if idx == digitStart:
    return
  let numberText = text[digitStart ..< idx]
  while idx < text.len and text[idx].isSpaceAscii:
    inc idx
  result.verse = parseInt(numberText)
  if idx < text.len:
    result.rest = text[idx .. ^1]
 proc flushVerse(state: var ParseState) =
  if state.chapter > 0 and state.verse > 0:
    let text = normalizeWhitespace(state.verseText).replace("\t", " ")
    if text.len > 0:
      state.rows.add([state.code, $state.chapter, $state.verse, text].join("\t"))
  state.verseText = ""
 proc walkPassageText(node: XmlNode, state: var ParseState) =
  case node.kind
  of xnText:
    if state.chapter > 0:
      if state.verse == 0:
        let leading = leadingVerseText(node.text)
        if leading.verse > 0:
          state.verse = leading.verse
          state.verseText.add(leading.rest)
      elif state.verse > 0:
        state.verseText.add(node.text)
  of xnElement:
    let headingChapter = headingChapterMarker(node, state.code)
    if headingChapter > 0:
      state.flushVerse()
      state.chapter = headingChapter
      state.verse = 0
      return
    if node.shouldSkipElement:
      return
    let chapter = chapterMarker(node)
    if chapter > 0:
      state.flushVerse()
      state.chapter = chapter
      state.verse = 1
      return
    let verse = verseMarker(node)
    if verse > 0:
      state.flushVerse()
      state.verse = verse
      return
    if node.tag == "sup":
      return
    for child in node.items:
      walkPassageText(child, state)
    if node.isBlockElement and state.chapter > 0 and state.verse > 0:
      state.verseText.add(' ')
  else:
    discard
 proc indexSplitFile(index: int): string =
  "index_split_" & align($index, 3, '0') & ".html"
 proc parseBook(epubPath: string, source: BookSource): seq[string] =
  var state = ParseState(code: source.code)
  if bookInfo(source.code).singleChapter:
    state.chapter = 1
  for index in source.startIndex .. source.endIndex:
    let html = readEpubEntry(epubPath, indexSplitFile(index))
    let doc = parseHtml(newStringStream(html))
    walkPassageText(doc, state)
  state.flushVerse()
  state.rows
 proc generate(epubPath, outputPath: string) =
  let sources = bookSources(parseTocEntries(epubPath))
  var rows: seq[string] = @[]
  for source in sources:
    rows.add(parseBook(epubPath, source))
  createDir(outputPath.parentDir)
  writeFile(outputPath, rows.join("\n") & "\n")
 when isMainModule:
  if paramCount() != 2:
    quit("Usage: generate_mev_data <mev-epub> <output-tsv>", QuitFailure)
  generate(paramStr(1), paramStr(2))
Author	SHA1	Message	Date
jdb	dbc39480f7	Prepare 1.1.0 release	2026-06-29 08:16:27 -05:00
jdb	176fa46816	Add translation-aware passage queries	2026-06-29 08:16:27 -05:00
jdb	42d2587704	Add private MEV embedded support	2026-06-29 08:16:27 -05:00
jdb	544062902b	Add embedded KJV support	2026-06-29 08:16:27 -05:00
jdb	2d78d8e5c0	Update .nimble file for nimble v0.22+	2026-06-23 22:19:06 -05:00
jdb	da6ba66e1c	Prepare 1.0.0 release	2026-06-13 07:15:50 -05:00
jdb	17f953882f	Add API.Bible translation support	2026-06-13 07:12:14 -05:00
jdb	ffe3118ddf	Migrate tool versions to mise	2026-06-13 06:44:42 -05:00
jdb	49594610dc	Rename CLI to bibleref	2026-06-13 06:42:33 -05:00
jdb	8f83c07693	Pin to Nim 1.6 with asdf.	2024-08-05 08:01:55 -05:00
jdb	8422199d7b	Add reading format, rework plain format.	2023-09-17 06:57:52 -05:00
jdb	a0c17bcad9	Rework Markdown formatting to handle quoted passages and poem structure.	2023-06-25 18:47:57 -05:00
jdb	d622d767a0	Better line match to avoid cutting off verses that contain line breaks.	2023-06-21 06:11:54 -05:00