Add embedded KJV support

2026-06-14 07:46:21 -05:00
parent 2d78d8e5c0
commit 544062902b
8 changed files with 31857 additions and 3 deletions
@@ -1,3 +1,5 @@
 esv_api
 bibleref
+tests/test_offline_kjv
+data/private/
 *.sw?
@@ -8,6 +8,7 @@ import cliutils, docopt, zero_functional

 import ./api_bible
 import ./esv
+import ./kjv

 proc formatMarkdown(raw, translation: string): string =
  var reference = ""
@@ -77,6 +78,8 @@ proc fetchPassages(reference, translation: string, cfg: CombinedConfig): seq[str
      reference,
      cfg.getVal("esv-api-token"),
      cfg.getVal("esv-api-root", "https://api.esv.org"))
+  of "akjv", "kjv":
+    kjv.fetchPassages(reference)
  of "amp", "nkjv", "niv":
    api_bible.fetchPassages(
      reference,
@@ -89,7 +92,7 @@ proc fetchPassages(reference, translation: string, cfg: CombinedConfig): seq[str
  else:
    raise newException(ValueError,
      "unsupported translation '" & translation &
-        "'; supported translations: amp, esv, nkjv, niv")
+        "'; supported translations: akjv, amp, esv, kjv, nkjv, niv")

 when isMainModule:
  const USAGE = """Usage:
@@ -107,8 +110,8 @@ Options:

  -t, --translation <translation>
                                Select a specific translation. Supported values
-                                are 'amp', 'esv', 'nkjv', and 'niv'. Defaults
-                                to 'esv'.
+                                are 'akjv', 'amp', 'esv', 'kjv', 'nkjv', and
+                                'niv'. Defaults to 'esv'.

  --esv-api-token <token>       Provide the API token on the command line. By
                                default this will be read either from the
@@ -0,0 +1,109 @@
+import std/[strutils, tables]
+
+import ./offline_data
+import ./reference_parser
+
+const kjvRows = embeddedTranslationData("kjv")
+
+type BibleIndex = object
+  verses: Table[string, string]
+  lastVerseByChapter: Table[string, int]
+  lastChapterByBook: Table[string, int]
+
+proc verseKey(code: string, chapter, verse: int): string =
+  code & "\t" & $chapter & "\t" & $verse
+
+proc chapterKey(code: string, chapter: int): string =
+  code & "\t" & $chapter
+
+proc loadBibleIndex(): BibleIndex =
+  for line in kjvRows.splitLines:
+    if line.strip.len == 0:
+      continue
+
+    let parts = line.split('\t', maxsplit = 3)
+    if parts.len != 4:
+      raise newException(ValueError, "invalid embedded KJV row: " & line)
+
+    let code = parts[0]
+    let chapter = parseInt(parts[1])
+    let verse = parseInt(parts[2])
+    let text = parts[3]
+
+    result.verses[verseKey(code, chapter, verse)] = text
+
+    let cKey = chapterKey(code, chapter)
+    if not result.lastVerseByChapter.hasKey(cKey) or
+        verse > result.lastVerseByChapter[cKey]:
+      result.lastVerseByChapter[cKey] = verse
+
+    if not result.lastChapterByBook.hasKey(code) or
+        chapter > result.lastChapterByBook[code]:
+      result.lastChapterByBook[code] = chapter
+
+proc requireLastChapter(index: BibleIndex, code: string): int =
+  if not index.lastChapterByBook.hasKey(code):
+    raise newException(ValueError, "no embedded KJV data for " & code)
+  index.lastChapterByBook[code]
+
+proc requireLastVerse(index: BibleIndex, code: string, chapter: int): int =
+  let cKey = chapterKey(code, chapter)
+  if not index.lastVerseByChapter.hasKey(cKey):
+    raise newException(ValueError,
+      "no embedded KJV data for " & bookInfo(code).name & " " & $chapter)
+  index.lastVerseByChapter[cKey]
+
+proc requireVerse(index: BibleIndex, code: string, chapter, verse: int): string =
+  let vKey = verseKey(code, chapter, verse)
+  if not index.verses.hasKey(vKey):
+    raise newException(ValueError,
+      "no embedded KJV data for " & bookInfo(code).name & " " &
+        $chapter & ":" & $verse)
+  index.verses[vKey]
+
+proc addVerseLines(
+    lines: var seq[string],
+    index: BibleIndex,
+    reference: PassageReference,
+    range: RefRange) =
+
+  let code = reference.book.code
+  discard index.requireLastChapter(code)
+
+  for chapter in range.start.chapter .. range.finish.chapter:
+    let startVerse =
+      if chapter == range.start.chapter and range.start.verse > 0:
+        range.start.verse
+      else:
+        1
+
+    let endVerse =
+      if chapter == range.finish.chapter and range.finish.verse > 0:
+        range.finish.verse
+      else:
+        index.requireLastVerse(code, chapter)
+
+    if startVerse > endVerse:
+      raise newException(ValueError, "reference range starts after it ends")
+
+    for verse in startVerse .. endVerse:
+      lines.add("  [" & $verse & "] " & index.requireVerse(code, chapter, verse))
+
+proc fetchReference(index: BibleIndex, reference: PassageReference): string =
+  var lines = @[$reference]
+  let code = reference.book.code
+
+  if reference.ranges.len == 0:
+    for chapter in 1 .. index.requireLastChapter(code):
+      for verse in 1 .. index.requireLastVerse(code, chapter):
+        lines.add("  [" & $verse & "] " & index.requireVerse(code, chapter, verse))
+  else:
+    for range in reference.ranges:
+      lines.addVerseLines(index, reference, range)
+
+  lines.join("\n")
+
+proc fetchPassages*(reference: string): seq[string] =
+  let index = loadBibleIndex()
+  for parsedReference in parseReferences(reference):
+    result.add(fetchReference(index, parsedReference))
@@ -0,0 +1,11 @@
+import std/os
+
+template embeddedTranslationData*(name: static[string]): string =
+  const dataRoot = currentSourcePath().parentDir.parentDir / "data"
+  const privatePath = dataRoot / "private" / (name & ".tsv")
+  const publicPath = dataRoot / "public" / (name & ".tsv")
+
+  when fileExists(privatePath):
+    staticRead(privatePath)
+  else:
+    staticRead(publicPath)
@@ -0,0 +1,399 @@
+import std/[strutils]
+
+type
+  BookInfo* = object
+    code*: string
+    name*: string
+    singleChapter*: bool
+
+  RefPoint* = object
+    chapter*: int
+    verse*: int
+
+  RefRange* = object
+    start*: RefPoint
+    finish*: RefPoint
+
+  PassageReference* = object
+    book*: BookInfo
+    ranges*: seq[RefRange]
+
+const CanonBooks*: array[66, BookInfo] = [
+  BookInfo(code: "GEN", name: "Genesis"),
+  BookInfo(code: "EXO", name: "Exodus"),
+  BookInfo(code: "LEV", name: "Leviticus"),
+  BookInfo(code: "NUM", name: "Numbers"),
+  BookInfo(code: "DEU", name: "Deuteronomy"),
+  BookInfo(code: "JOS", name: "Joshua"),
+  BookInfo(code: "JDG", name: "Judges"),
+  BookInfo(code: "RUT", name: "Ruth"),
+  BookInfo(code: "1SA", name: "1 Samuel"),
+  BookInfo(code: "2SA", name: "2 Samuel"),
+  BookInfo(code: "1KI", name: "1 Kings"),
+  BookInfo(code: "2KI", name: "2 Kings"),
+  BookInfo(code: "1CH", name: "1 Chronicles"),
+  BookInfo(code: "2CH", name: "2 Chronicles"),
+  BookInfo(code: "EZR", name: "Ezra"),
+  BookInfo(code: "NEH", name: "Nehemiah"),
+  BookInfo(code: "EST", name: "Esther"),
+  BookInfo(code: "JOB", name: "Job"),
+  BookInfo(code: "PSA", name: "Psalms"),
+  BookInfo(code: "PRO", name: "Proverbs"),
+  BookInfo(code: "ECC", name: "Ecclesiastes"),
+  BookInfo(code: "SNG", name: "Song of Solomon"),
+  BookInfo(code: "ISA", name: "Isaiah"),
+  BookInfo(code: "JER", name: "Jeremiah"),
+  BookInfo(code: "LAM", name: "Lamentations"),
+  BookInfo(code: "EZK", name: "Ezekiel"),
+  BookInfo(code: "DAN", name: "Daniel"),
+  BookInfo(code: "HOS", name: "Hosea"),
+  BookInfo(code: "JOL", name: "Joel"),
+  BookInfo(code: "AMO", name: "Amos"),
+  BookInfo(code: "OBA", name: "Obadiah", singleChapter: true),
+  BookInfo(code: "JON", name: "Jonah"),
+  BookInfo(code: "MIC", name: "Micah"),
+  BookInfo(code: "NAM", name: "Nahum"),
+  BookInfo(code: "HAB", name: "Habakkuk"),
+  BookInfo(code: "ZEP", name: "Zephaniah"),
+  BookInfo(code: "HAG", name: "Haggai"),
+  BookInfo(code: "ZEC", name: "Zechariah"),
+  BookInfo(code: "MAL", name: "Malachi"),
+  BookInfo(code: "MAT", name: "Matthew"),
+  BookInfo(code: "MRK", name: "Mark"),
+  BookInfo(code: "LUK", name: "Luke"),
+  BookInfo(code: "JHN", name: "John"),
+  BookInfo(code: "ACT", name: "Acts"),
+  BookInfo(code: "ROM", name: "Romans"),
+  BookInfo(code: "1CO", name: "1 Corinthians"),
+  BookInfo(code: "2CO", name: "2 Corinthians"),
+  BookInfo(code: "GAL", name: "Galatians"),
+  BookInfo(code: "EPH", name: "Ephesians"),
+  BookInfo(code: "PHP", name: "Philippians"),
+  BookInfo(code: "COL", name: "Colossians"),
+  BookInfo(code: "1TH", name: "1 Thessalonians"),
+  BookInfo(code: "2TH", name: "2 Thessalonians"),
+  BookInfo(code: "1TI", name: "1 Timothy"),
+  BookInfo(code: "2TI", name: "2 Timothy"),
+  BookInfo(code: "TIT", name: "Titus"),
+  BookInfo(code: "PHM", name: "Philemon", singleChapter: true),
+  BookInfo(code: "HEB", name: "Hebrews"),
+  BookInfo(code: "JAS", name: "James"),
+  BookInfo(code: "1PE", name: "1 Peter"),
+  BookInfo(code: "2PE", name: "2 Peter"),
+  BookInfo(code: "1JN", name: "1 John"),
+  BookInfo(code: "2JN", name: "2 John", singleChapter: true),
+  BookInfo(code: "3JN", name: "3 John", singleChapter: true),
+  BookInfo(code: "JUD", name: "Jude", singleChapter: true),
+  BookInfo(code: "REV", name: "Revelation")
+]
+
+const bookAliases = [
+  ("GEN", "genesis"), ("GEN", "gen"),
+  ("EXO", "exodus"), ("EXO", "exod"), ("EXO", "exo"),
+  ("LEV", "leviticus"), ("LEV", "lev"),
+  ("NUM", "numbers"), ("NUM", "num"), ("NUM", "numb"),
+  ("DEU", "deuteronomy"), ("DEU", "deut"), ("DEU", "deu"),
+  ("JOS", "joshua"), ("JOS", "josh"), ("JOS", "jos"),
+  ("JDG", "judges"), ("JDG", "judg"), ("JDG", "jdg"),
+  ("RUT", "ruth"), ("RUT", "rut"),
+  ("1SA", "1 samuel"), ("1SA", "1 sam"), ("1SA", "i samuel"), ("1SA", "first samuel"),
+  ("2SA", "2 samuel"), ("2SA", "2 sam"), ("2SA", "ii samuel"), ("2SA", "second samuel"),
+  ("1KI", "1 kings"), ("1KI", "1 kgs"), ("1KI", "1 kin"), ("1KI", "i kings"), ("1KI", "first kings"),
+  ("2KI", "2 kings"), ("2KI", "2 kgs"), ("2KI", "2 kin"), ("2KI", "ii kings"), ("2KI", "second kings"),
+  ("1CH", "1 chronicles"), ("1CH", "1 chron"), ("1CH", "1 chr"), ("1CH", "i chronicles"), ("1CH", "first chronicles"),
+  ("2CH", "2 chronicles"), ("2CH", "2 chron"), ("2CH", "2 chr"), ("2CH", "ii chronicles"), ("2CH", "second chronicles"),
+  ("EZR", "ezra"), ("EZR", "ezr"),
+  ("NEH", "nehemiah"), ("NEH", "neh"),
+  ("EST", "esther"), ("EST", "est"),
+  ("JOB", "job"),
+  ("PSA", "psalms"), ("PSA", "psalm"), ("PSA", "ps"), ("PSA", "psa"),
+  ("PRO", "proverbs"), ("PRO", "prov"), ("PRO", "pro"),
+  ("ECC", "ecclesiastes"), ("ECC", "eccl"), ("ECC", "ecc"),
+  ("SNG", "song of solomon"), ("SNG", "song"), ("SNG", "songs"), ("SNG", "canticles"), ("SNG", "sng"),
+  ("ISA", "isaiah"), ("ISA", "isa"),
+  ("JER", "jeremiah"), ("JER", "jer"),
+  ("LAM", "lamentations"), ("LAM", "lam"),
+  ("EZK", "ezekiel"), ("EZK", "ezek"), ("EZK", "ezk"),
+  ("DAN", "daniel"), ("DAN", "dan"),
+  ("HOS", "hosea"), ("HOS", "hos"),
+  ("JOL", "joel"), ("JOL", "jol"),
+  ("AMO", "amos"), ("AMO", "amo"),
+  ("OBA", "obadiah"), ("OBA", "obad"), ("OBA", "oba"),
+  ("JON", "jonah"), ("JON", "jon"),
+  ("MIC", "micah"), ("MIC", "mic"),
+  ("NAM", "nahum"), ("NAM", "nah"),
+  ("HAB", "habakkuk"), ("HAB", "hab"),
+  ("ZEP", "zephaniah"), ("ZEP", "zeph"), ("ZEP", "zep"),
+  ("HAG", "haggai"), ("HAG", "hag"),
+  ("ZEC", "zechariah"), ("ZEC", "zech"), ("ZEC", "zec"),
+  ("MAL", "malachi"), ("MAL", "mal"),
+  ("MAT", "matthew"), ("MAT", "matt"), ("MAT", "mat"), ("MAT", "mt"),
+  ("MRK", "mark"), ("MRK", "mrk"), ("MRK", "mk"),
+  ("LUK", "luke"), ("LUK", "luk"), ("LUK", "lk"),
+  ("JHN", "john"), ("JHN", "jhn"), ("JHN", "jn"),
+  ("ACT", "acts"), ("ACT", "act"),
+  ("ROM", "romans"), ("ROM", "rom"),
+  ("1CO", "1 corinthians"), ("1CO", "1 cor"), ("1CO", "1 co"), ("1CO", "i corinthians"), ("1CO", "first corinthians"),
+  ("2CO", "2 corinthians"), ("2CO", "2 cor"), ("2CO", "2 co"), ("2CO", "ii corinthians"), ("2CO", "second corinthians"),
+  ("GAL", "galatians"), ("GAL", "gal"),
+  ("EPH", "ephesians"), ("EPH", "eph"),
+  ("PHP", "philippians"), ("PHP", "php"),
+  ("COL", "colossians"), ("COL", "col"),
+  ("1TH", "1 thessalonians"), ("1TH", "1 thess"), ("1TH", "1 thes"), ("1TH", "i thessalonians"), ("1TH", "first thessalonians"),
+  ("2TH", "2 thessalonians"), ("2TH", "2 thess"), ("2TH", "2 thes"), ("2TH", "ii thessalonians"), ("2TH", "second thessalonians"),
+  ("1TI", "1 timothy"), ("1TI", "1 tim"), ("1TI", "i timothy"), ("1TI", "first timothy"),
+  ("2TI", "2 timothy"), ("2TI", "2 tim"), ("2TI", "ii timothy"), ("2TI", "second timothy"),
+  ("TIT", "titus"), ("TIT", "tit"),
+  ("PHM", "philemon"), ("PHM", "philem"), ("PHM", "phm"),
+  ("HEB", "hebrews"), ("HEB", "heb"),
+  ("JAS", "james"), ("JAS", "jas"), ("JAS", "jam"),
+  ("1PE", "1 peter"), ("1PE", "1 pet"), ("1PE", "1 pe"), ("1PE", "i peter"), ("1PE", "first peter"),
+  ("2PE", "2 peter"), ("2PE", "2 pet"), ("2PE", "2 pe"), ("2PE", "ii peter"), ("2PE", "second peter"),
+  ("1JN", "1 john"), ("1JN", "1 jn"), ("1JN", "1 jhn"), ("1JN", "i john"), ("1JN", "first john"),
+  ("2JN", "2 john"), ("2JN", "2 jn"), ("2JN", "2 jhn"), ("2JN", "ii john"), ("2JN", "second john"),
+  ("3JN", "3 john"), ("3JN", "3 jn"), ("3JN", "3 jhn"), ("3JN", "iii john"), ("3JN", "third john"),
+  ("JUD", "jude"), ("JUD", "jud"),
+  ("REV", "revelation"), ("REV", "revelations"), ("REV", "rev"), ("REV", "apocalypse")
+]
+
+proc bookInfo*(code: string): BookInfo =
+  for book in CanonBooks:
+    if book.code == code:
+      return book
+
+  raise newException(ValueError, "unknown Bible book code '" & code & "'")
+
+proc bookIndex*(code: string): int =
+  for idx, book in CanonBooks:
+    if book.code == code:
+      return idx
+
+  raise newException(ValueError, "unknown Bible book code '" & code & "'")
+
+proc normalizeReferenceInput(s: string): string =
+  s.multiReplace([
+    ("–", "-"),
+    ("—", "-"),
+    ("−", "-")
+  ]).strip
+
+proc normalizeBookPrefix(s: string): string =
+  for ch in s:
+    if ch.isAlphaAscii:
+      result.add(ch.toLowerAscii)
+    elif ch.isDigit:
+      result.add(ch)
+
+proc canonicalNamePrefixMatches(prefix: string): seq[BookInfo] =
+  for book in CanonBooks:
+    if normalizeBookPrefix(book.name).startsWith(prefix):
+      result.add(book)
+
+proc formatBookList(books: seq[BookInfo]): string =
+  var names: seq[string] = @[]
+  for book in books:
+    names.add(book.name)
+  names.join(", ")
+
+proc matchCanonicalBookPrefix(input: string): tuple[
+    matched: bool,
+    ambiguous: bool,
+    book: BookInfo,
+    consumed: int,
+    prefix: string,
+    matches: seq[BookInfo]] =
+
+  for idx in 1 .. input.len:
+    if idx < input.len and input[idx].isAlphaAscii:
+      continue
+
+    let prefix = normalizeBookPrefix(input[0 ..< idx])
+    if prefix.len == 0:
+      continue
+
+    let matches = canonicalNamePrefixMatches(prefix)
+    if matches.len == 1:
+      result.matched = true
+      result.ambiguous = false
+      result.book = matches[0]
+      result.consumed = idx
+      result.prefix = input[0 ..< idx].strip
+      result.matches = matches
+    elif matches.len > 1 and not result.matched:
+      result.ambiguous = true
+      result.consumed = idx
+      result.prefix = input[0 ..< idx].strip
+      result.matches = matches
+
+proc matchAlias(input, alias: string): int =
+  var i = 0
+  var j = 0
+
+  while j < alias.len:
+    let aliasCh = alias[j]
+    if aliasCh.isSpaceAscii or aliasCh == '.':
+      while i < input.len and (input[i].isSpaceAscii or input[i] == '.'):
+        inc i
+      inc j
+    else:
+      while i < input.len and input[i] == '.':
+        inc i
+
+      if i >= input.len or input[i].toLowerAscii != aliasCh.toLowerAscii:
+        return -1
+
+      inc i
+      inc j
+
+  while i < input.len and input[i] == '.':
+    inc i
+
+  if i < input.len and input[i].isAlphaAscii:
+    return -1
+
+  i
+
+proc parseBook(input: string): tuple[book: BookInfo, rest: string] =
+  let canonicalPrefix = matchCanonicalBookPrefix(input)
+  if canonicalPrefix.matched:
+    result.book = canonicalPrefix.book
+    result.rest = input[canonicalPrefix.consumed .. ^1].strip
+    return
+
+  var bestCode = ""
+  var bestLen = -1
+
+  for row in bookAliases:
+    let consumed = matchAlias(input, row[1])
+    if consumed > bestLen:
+      bestCode = row[0]
+      bestLen = consumed
+
+  if bestLen < 0:
+    if canonicalPrefix.ambiguous:
+      raise newException(ValueError,
+        "ambiguous Bible book prefix '" & canonicalPrefix.prefix & "' in '" &
+          input & "'; matches " & canonicalPrefix.matches.formatBookList)
+
+    raise newException(ValueError, "could not parse Bible book in '" & input & "'")
+
+  result.book = bookInfo(bestCode)
+  result.rest = input[bestLen .. ^1].strip
+
+proc parsePositiveInt(s, label: string): int =
+  if s.len == 0 or not s.allCharsInSet({'0'..'9'}):
+    raise newException(ValueError, "invalid " & label & " '" & s & "'")
+
+  result = parseInt(s)
+  if result <= 0:
+    raise newException(ValueError, label & " must be positive")
+
+proc parsePoint(token: string, defaultChapter: int, singleChapter: bool): RefPoint =
+  let normalized = token.strip
+  if normalized.len == 0:
+    raise newException(ValueError, "empty reference point")
+
+  let colonIdx = normalized.find(':')
+  if colonIdx >= 0:
+    return RefPoint(
+      chapter: parsePositiveInt(normalized[0 ..< colonIdx], "chapter"),
+      verse: parsePositiveInt(normalized[colonIdx + 1 .. ^1], "verse"))
+
+  let value = parsePositiveInt(normalized, "reference number")
+  if singleChapter:
+    RefPoint(chapter: 1, verse: value)
+  elif defaultChapter > 0:
+    RefPoint(chapter: defaultChapter, verse: value)
+  else:
+    RefPoint(chapter: value, verse: 0)
+
+proc parseRange(segment: string, defaultChapter: int, singleChapter: bool): RefRange =
+  let normalized = segment.strip
+  let dashIdx = normalized.find('-')
+
+  if dashIdx >= 0:
+    result.start = parsePoint(normalized[0 ..< dashIdx], defaultChapter, singleChapter)
+    let endDefaultChapter =
+      if result.start.verse > 0: result.start.chapter
+      else: 0
+    result.finish = parsePoint(normalized[dashIdx + 1 .. ^1], endDefaultChapter, singleChapter)
+  else:
+    result.start = parsePoint(normalized, defaultChapter, singleChapter)
+    result.finish = result.start
+
+  if result.finish.chapter < result.start.chapter:
+    raise newException(ValueError, "range ends before it starts: '" & segment & "'")
+
+  if result.finish.chapter == result.start.chapter and
+      result.start.verse > 0 and
+      result.finish.verse > 0 and
+      result.finish.verse < result.start.verse:
+    raise newException(ValueError, "range ends before it starts: '" & segment & "'")
+
+proc parsePassageSpec(spec: string, book: BookInfo): seq[RefRange] =
+  var currentChapter = 0
+
+  for rawSegment in spec.split(','):
+    let segment = rawSegment.strip
+    if segment.len == 0:
+      raise newException(ValueError, "empty passage range in '" & spec & "'")
+
+    let range = parseRange(segment, currentChapter, book.singleChapter)
+    result.add(range)
+
+    if segment.contains(':') or (range.start.verse > 0 and range.finish.verse > 0):
+      currentChapter = range.start.chapter
+    else:
+      currentChapter = 0
+
+proc parseReference*(input: string): PassageReference =
+  let normalized = normalizeReferenceInput(input)
+  let parsedBook = parseBook(normalized)
+
+  result.book = parsedBook.book
+  if parsedBook.rest.len > 0:
+    result.ranges = parsePassageSpec(parsedBook.rest, result.book)
+
+proc parseReferences*(input: string): seq[PassageReference] =
+  for rawRef in input.split(';'):
+    let refText = rawRef.strip
+    if refText.len > 0:
+      result.add(parseReference(refText))
+
+  if result.len == 0:
+    raise newException(ValueError, "empty Bible reference")
+
+proc `$`*(point: RefPoint): string =
+  if point.verse > 0: $point.chapter & ":" & $point.verse
+  else: $point.chapter
+
+proc `$`*(range: RefRange): string =
+  if range.start == range.finish:
+    return $range.start
+
+  if range.start.chapter == range.finish.chapter and
+      range.start.verse > 0 and
+      range.finish.verse > 0:
+    return $range.start.chapter & ":" & $range.start.verse & "-" & $range.finish.verse
+
+  $range.start & "-" & $range.finish
+
+proc formatSingleChapterRange(range: RefRange): string =
+  if range.start == range.finish:
+    return $range.start.verse
+
+  if range.start.chapter == range.finish.chapter:
+    return $range.start.verse & "-" & $range.finish.verse
+
+  $range.start & "-" & $range.finish
+
+proc `$`*(reference: PassageReference): string =
+  result = reference.book.name
+  if reference.ranges.len > 0:
+    var rangeText: seq[string] = @[]
+    for range in reference.ranges:
+      if reference.book.singleChapter:
+        rangeText.add(formatSingleChapterRange(range))
+      else:
+        rangeText.add($range)
+    result.add(" " & rangeText.join(", "))
@@ -0,0 +1,84 @@
+import std/[strutils, unittest]
+
+import ../src/kjv
+import ../src/reference_parser
+
+suite "reference parser":
+  test "parses single verse references":
+    let reference = parseReference("John 3:16")
+
+    check reference.book.code == "JHN"
+    check reference.ranges.len == 1
+    check reference.ranges[0].start.chapter == 3
+    check reference.ranges[0].start.verse == 16
+    check reference.ranges[0].finish == reference.ranges[0].start
+    check $reference == "John 3:16"
+
+  test "parses verse lists using the previous chapter":
+    let reference = parseReference("John 3:16,20-21")
+
+    check reference.ranges.len == 2
+    check reference.ranges[1].start.chapter == 3
+    check reference.ranges[1].start.verse == 20
+    check reference.ranges[1].finish.chapter == 3
+    check reference.ranges[1].finish.verse == 21
+    check $reference == "John 3:16, 3:20-21"
+
+  test "parses chapter ranges":
+    let reference = parseReference("John 3-4")
+
+    check reference.ranges.len == 1
+    check reference.ranges[0].start.chapter == 3
+    check reference.ranges[0].start.verse == 0
+    check reference.ranges[0].finish.chapter == 4
+    check reference.ranges[0].finish.verse == 0
+    check $reference == "John 3-4"
+
+  test "parses abbreviated numbered books":
+    let reference = parseReference("1 Jn 1:9")
+
+    check reference.book.code == "1JN"
+    check reference.ranges[0].start.chapter == 1
+    check reference.ranges[0].start.verse == 9
+    check $reference == "1 John 1:9"
+
+  test "parses unique canonical book prefixes":
+    check parseReference("Gene 1:1").book.code == "GEN"
+    check parseReference("Phile 3").book.code == "PHM"
+    check parseReference("Phili 1:6").book.code == "PHP"
+
+  test "rejects ambiguous canonical book prefixes":
+    expect ValueError:
+      discard parseReference("Phil 1")
+
+  test "normalizes single-chapter book references":
+    let reference = parseReference("Jude 3-4")
+
+    check reference.book.code == "JUD"
+    check reference.ranges[0].start.chapter == 1
+    check reference.ranges[0].start.verse == 3
+    check reference.ranges[0].finish.chapter == 1
+    check reference.ranges[0].finish.verse == 4
+    check $reference == "Jude 3-4"
+
+  test "parses semicolon-separated references":
+    let references = parseReferences("Psalm 23; John 3:16")
+
+    check references.len == 2
+    check references[0].book.code == "PSA"
+    check references[1].book.code == "JHN"
+
+suite "offline KJV backend":
+  test "fetches a single embedded verse":
+    let passages = kjv.fetchPassages("John 3:16")
+
+    check passages.len == 1
+    check passages[0].startsWith("John 3:16\n")
+    check passages[0].contains("  [16] ")
+
+  test "fetches a single-chapter embedded verse":
+    let passages = kjv.fetchPassages("Jude 3")
+
+    check passages.len == 1
+    check passages[0].startsWith("Jude 3\n")
+    check passages[0].contains("  [3] ")
@@ -0,0 +1,144 @@
+import std/[os, strutils, tables]
+
+# Source archive: https://ebible.org/Scriptures/eng-kjv_usfm.zip
+
+const canonBookCodes = [
+  "GEN", "EXO", "LEV", "NUM", "DEU", "JOS", "JDG", "RUT",
+  "1SA", "2SA", "1KI", "2KI", "1CH", "2CH", "EZR", "NEH",
+  "EST", "JOB", "PSA", "PRO", "ECC", "SNG", "ISA", "JER",
+  "LAM", "EZK", "DAN", "HOS", "JOL", "AMO", "OBA", "JON",
+  "MIC", "NAM", "HAB", "ZEP", "HAG", "ZEC", "MAL", "MAT",
+  "MRK", "LUK", "JHN", "ACT", "ROM", "1CO", "2CO", "GAL",
+  "EPH", "PHP", "COL", "1TH", "2TH", "1TI", "2TI", "TIT",
+  "PHM", "HEB", "JAS", "1PE", "2PE", "1JN", "2JN", "3JN",
+  "JUD", "REV"
+]
+
+proc normalizeWhitespace(s: string): string =
+  var lastWasSpace = false
+  for ch in s:
+    if ch.isSpaceAscii:
+      if not lastWasSpace:
+        result.add(' ')
+      lastWasSpace = true
+    else:
+      result.add(ch)
+      lastWasSpace = false
+  result = result.strip
+
+proc removeFootnotes(s: string): string =
+  var i = 0
+  while i < s.len:
+    if s.continuesWith("\\f ", i) or s.continuesWith("\\f +", i):
+      let closeIdx = s.find("\\f*", i + 2)
+      if closeIdx < 0:
+        break
+      i = closeIdx + 3
+    else:
+      result.add(s[i])
+      inc i
+
+proc stripUsfmMarkup(s: string): string =
+  let withoutFootnotes = removeFootnotes(s)
+  var i = 0
+
+  while i < withoutFootnotes.len:
+    case withoutFootnotes[i]
+    of '\\':
+      inc i
+      if i < withoutFootnotes.len and withoutFootnotes[i] == '+':
+        inc i
+
+      while i < withoutFootnotes.len and
+          (withoutFootnotes[i].isAlphaAscii or
+           withoutFootnotes[i].isDigit or
+           withoutFootnotes[i] == '-'):
+        inc i
+
+      let isClosingMarker = i < withoutFootnotes.len and withoutFootnotes[i] == '*'
+      if isClosingMarker:
+        inc i
+
+      while not isClosingMarker and
+          i < withoutFootnotes.len and
+          withoutFootnotes[i].isSpaceAscii:
+        inc i
+    of '|':
+      while i < withoutFootnotes.len and withoutFootnotes[i] != '\\':
+        inc i
+    of '\t':
+      result.add(' ')
+      inc i
+    else:
+      result.add(withoutFootnotes[i])
+      inc i
+
+  result = normalizeWhitespace(result)
+
+proc parseVerseLine(line: string): tuple[verse: int, text: string] =
+  var rest = line[3..^1].strip
+  let numberEnd = rest.find(' ')
+  if numberEnd < 0:
+    raise newException(ValueError, "verse marker without text: " & line)
+
+  result.verse = parseInt(rest[0 ..< numberEnd])
+  result.text = stripUsfmMarkup(rest[numberEnd + 1 .. ^1])
+
+proc findCanonFiles(inputDir: string): Table[string, string] =
+  for path in walkFiles(inputDir / "*eng-kjv.usfm"):
+    let name = path.extractFilename
+    let dashIdx = name.find('-')
+    let suffixIdx = name.find("eng-kjv.usfm")
+    if dashIdx >= 0 and suffixIdx > dashIdx:
+      let code = name[dashIdx + 1 ..< suffixIdx]
+      if canonBookCodes.contains(code):
+        result[code] = path
+
+proc generate(inputDir, outputPath: string) =
+  let canonFiles = findCanonFiles(inputDir)
+  var rows: seq[string] = @[]
+
+  for code in canonBookCodes:
+    if not canonFiles.hasKey(code):
+      raise newException(ValueError, "missing USFM file for " & code)
+
+    var chapter = 0
+    var verse = 0
+    var verseText = ""
+
+    proc flushVerse() =
+      if chapter > 0 and verse > 0:
+        let text = normalizeWhitespace(verseText).replace("\t", " ")
+        if text.len > 0:
+          rows.add([code, $chapter, $verse, text].join("\t"))
+      verse = 0
+      verseText = ""
+
+    for rawLine in canonFiles[code].lines:
+      let line = rawLine.strip
+
+      if line.startsWith("\\c "):
+        flushVerse()
+        chapter = parseInt(line[3..^1].strip)
+      elif line.startsWith("\\v "):
+        flushVerse()
+        let parsed = parseVerseLine(line)
+        verse = parsed.verse
+        verseText = parsed.text
+      elif verse > 0:
+        let continued = stripUsfmMarkup(line)
+        if continued.len > 0:
+          if verseText.len > 0:
+            verseText.add(' ')
+          verseText.add(continued)
+
+    flushVerse()
+
+  createDir(outputPath.parentDir)
+  writeFile(outputPath, rows.join("\n") & "\n")
+
+when isMainModule:
+  if paramCount() != 2:
+    quit("Usage: generate_kjv_data <usfm-dir> <output-tsv>", QuitFailure)
+
+  generate(paramStr(1), paramStr(2))