Add embedded KJV support

This commit is contained in:
2026-06-14 07:46:21 -05:00
parent 2d78d8e5c0
commit 544062902b
8 changed files with 31857 additions and 3 deletions
+2
View File
@@ -1,3 +1,5 @@
esv_api esv_api
bibleref bibleref
tests/test_offline_kjv
data/private/
*.sw? *.sw?
+31102
View File
File diff suppressed because it is too large Load Diff
+6 -3
View File
@@ -8,6 +8,7 @@ import cliutils, docopt, zero_functional
import ./api_bible import ./api_bible
import ./esv import ./esv
import ./kjv
proc formatMarkdown(raw, translation: string): string = proc formatMarkdown(raw, translation: string): string =
var reference = "" var reference = ""
@@ -77,6 +78,8 @@ proc fetchPassages(reference, translation: string, cfg: CombinedConfig): seq[str
reference, reference,
cfg.getVal("esv-api-token"), cfg.getVal("esv-api-token"),
cfg.getVal("esv-api-root", "https://api.esv.org")) cfg.getVal("esv-api-root", "https://api.esv.org"))
of "akjv", "kjv":
kjv.fetchPassages(reference)
of "amp", "nkjv", "niv": of "amp", "nkjv", "niv":
api_bible.fetchPassages( api_bible.fetchPassages(
reference, reference,
@@ -89,7 +92,7 @@ proc fetchPassages(reference, translation: string, cfg: CombinedConfig): seq[str
else: else:
raise newException(ValueError, raise newException(ValueError,
"unsupported translation '" & translation & "unsupported translation '" & translation &
"'; supported translations: amp, esv, nkjv, niv") "'; supported translations: akjv, amp, esv, kjv, nkjv, niv")
when isMainModule: when isMainModule:
const USAGE = """Usage: const USAGE = """Usage:
@@ -107,8 +110,8 @@ Options:
-t, --translation <translation> -t, --translation <translation>
Select a specific translation. Supported values Select a specific translation. Supported values
are 'amp', 'esv', 'nkjv', and 'niv'. Defaults are 'akjv', 'amp', 'esv', 'kjv', 'nkjv', and
to 'esv'. 'niv'. Defaults to 'esv'.
--esv-api-token <token> Provide the API token on the command line. By --esv-api-token <token> Provide the API token on the command line. By
default this will be read either from the default this will be read either from the
+109
View File
@@ -0,0 +1,109 @@
import std/[strutils, tables]
import ./offline_data
import ./reference_parser
const kjvRows = embeddedTranslationData("kjv")
type BibleIndex = object
verses: Table[string, string]
lastVerseByChapter: Table[string, int]
lastChapterByBook: Table[string, int]
proc verseKey(code: string, chapter, verse: int): string =
code & "\t" & $chapter & "\t" & $verse
proc chapterKey(code: string, chapter: int): string =
code & "\t" & $chapter
proc loadBibleIndex(): BibleIndex =
for line in kjvRows.splitLines:
if line.strip.len == 0:
continue
let parts = line.split('\t', maxsplit = 3)
if parts.len != 4:
raise newException(ValueError, "invalid embedded KJV row: " & line)
let code = parts[0]
let chapter = parseInt(parts[1])
let verse = parseInt(parts[2])
let text = parts[3]
result.verses[verseKey(code, chapter, verse)] = text
let cKey = chapterKey(code, chapter)
if not result.lastVerseByChapter.hasKey(cKey) or
verse > result.lastVerseByChapter[cKey]:
result.lastVerseByChapter[cKey] = verse
if not result.lastChapterByBook.hasKey(code) or
chapter > result.lastChapterByBook[code]:
result.lastChapterByBook[code] = chapter
proc requireLastChapter(index: BibleIndex, code: string): int =
if not index.lastChapterByBook.hasKey(code):
raise newException(ValueError, "no embedded KJV data for " & code)
index.lastChapterByBook[code]
proc requireLastVerse(index: BibleIndex, code: string, chapter: int): int =
let cKey = chapterKey(code, chapter)
if not index.lastVerseByChapter.hasKey(cKey):
raise newException(ValueError,
"no embedded KJV data for " & bookInfo(code).name & " " & $chapter)
index.lastVerseByChapter[cKey]
proc requireVerse(index: BibleIndex, code: string, chapter, verse: int): string =
let vKey = verseKey(code, chapter, verse)
if not index.verses.hasKey(vKey):
raise newException(ValueError,
"no embedded KJV data for " & bookInfo(code).name & " " &
$chapter & ":" & $verse)
index.verses[vKey]
proc addVerseLines(
lines: var seq[string],
index: BibleIndex,
reference: PassageReference,
range: RefRange) =
let code = reference.book.code
discard index.requireLastChapter(code)
for chapter in range.start.chapter .. range.finish.chapter:
let startVerse =
if chapter == range.start.chapter and range.start.verse > 0:
range.start.verse
else:
1
let endVerse =
if chapter == range.finish.chapter and range.finish.verse > 0:
range.finish.verse
else:
index.requireLastVerse(code, chapter)
if startVerse > endVerse:
raise newException(ValueError, "reference range starts after it ends")
for verse in startVerse .. endVerse:
lines.add(" [" & $verse & "] " & index.requireVerse(code, chapter, verse))
proc fetchReference(index: BibleIndex, reference: PassageReference): string =
var lines = @[$reference]
let code = reference.book.code
if reference.ranges.len == 0:
for chapter in 1 .. index.requireLastChapter(code):
for verse in 1 .. index.requireLastVerse(code, chapter):
lines.add(" [" & $verse & "] " & index.requireVerse(code, chapter, verse))
else:
for range in reference.ranges:
lines.addVerseLines(index, reference, range)
lines.join("\n")
proc fetchPassages*(reference: string): seq[string] =
let index = loadBibleIndex()
for parsedReference in parseReferences(reference):
result.add(fetchReference(index, parsedReference))
+11
View File
@@ -0,0 +1,11 @@
import std/os
template embeddedTranslationData*(name: static[string]): string =
const dataRoot = currentSourcePath().parentDir.parentDir / "data"
const privatePath = dataRoot / "private" / (name & ".tsv")
const publicPath = dataRoot / "public" / (name & ".tsv")
when fileExists(privatePath):
staticRead(privatePath)
else:
staticRead(publicPath)
+399
View File
@@ -0,0 +1,399 @@
import std/[strutils]
type
BookInfo* = object
code*: string
name*: string
singleChapter*: bool
RefPoint* = object
chapter*: int
verse*: int
RefRange* = object
start*: RefPoint
finish*: RefPoint
PassageReference* = object
book*: BookInfo
ranges*: seq[RefRange]
const CanonBooks*: array[66, BookInfo] = [
BookInfo(code: "GEN", name: "Genesis"),
BookInfo(code: "EXO", name: "Exodus"),
BookInfo(code: "LEV", name: "Leviticus"),
BookInfo(code: "NUM", name: "Numbers"),
BookInfo(code: "DEU", name: "Deuteronomy"),
BookInfo(code: "JOS", name: "Joshua"),
BookInfo(code: "JDG", name: "Judges"),
BookInfo(code: "RUT", name: "Ruth"),
BookInfo(code: "1SA", name: "1 Samuel"),
BookInfo(code: "2SA", name: "2 Samuel"),
BookInfo(code: "1KI", name: "1 Kings"),
BookInfo(code: "2KI", name: "2 Kings"),
BookInfo(code: "1CH", name: "1 Chronicles"),
BookInfo(code: "2CH", name: "2 Chronicles"),
BookInfo(code: "EZR", name: "Ezra"),
BookInfo(code: "NEH", name: "Nehemiah"),
BookInfo(code: "EST", name: "Esther"),
BookInfo(code: "JOB", name: "Job"),
BookInfo(code: "PSA", name: "Psalms"),
BookInfo(code: "PRO", name: "Proverbs"),
BookInfo(code: "ECC", name: "Ecclesiastes"),
BookInfo(code: "SNG", name: "Song of Solomon"),
BookInfo(code: "ISA", name: "Isaiah"),
BookInfo(code: "JER", name: "Jeremiah"),
BookInfo(code: "LAM", name: "Lamentations"),
BookInfo(code: "EZK", name: "Ezekiel"),
BookInfo(code: "DAN", name: "Daniel"),
BookInfo(code: "HOS", name: "Hosea"),
BookInfo(code: "JOL", name: "Joel"),
BookInfo(code: "AMO", name: "Amos"),
BookInfo(code: "OBA", name: "Obadiah", singleChapter: true),
BookInfo(code: "JON", name: "Jonah"),
BookInfo(code: "MIC", name: "Micah"),
BookInfo(code: "NAM", name: "Nahum"),
BookInfo(code: "HAB", name: "Habakkuk"),
BookInfo(code: "ZEP", name: "Zephaniah"),
BookInfo(code: "HAG", name: "Haggai"),
BookInfo(code: "ZEC", name: "Zechariah"),
BookInfo(code: "MAL", name: "Malachi"),
BookInfo(code: "MAT", name: "Matthew"),
BookInfo(code: "MRK", name: "Mark"),
BookInfo(code: "LUK", name: "Luke"),
BookInfo(code: "JHN", name: "John"),
BookInfo(code: "ACT", name: "Acts"),
BookInfo(code: "ROM", name: "Romans"),
BookInfo(code: "1CO", name: "1 Corinthians"),
BookInfo(code: "2CO", name: "2 Corinthians"),
BookInfo(code: "GAL", name: "Galatians"),
BookInfo(code: "EPH", name: "Ephesians"),
BookInfo(code: "PHP", name: "Philippians"),
BookInfo(code: "COL", name: "Colossians"),
BookInfo(code: "1TH", name: "1 Thessalonians"),
BookInfo(code: "2TH", name: "2 Thessalonians"),
BookInfo(code: "1TI", name: "1 Timothy"),
BookInfo(code: "2TI", name: "2 Timothy"),
BookInfo(code: "TIT", name: "Titus"),
BookInfo(code: "PHM", name: "Philemon", singleChapter: true),
BookInfo(code: "HEB", name: "Hebrews"),
BookInfo(code: "JAS", name: "James"),
BookInfo(code: "1PE", name: "1 Peter"),
BookInfo(code: "2PE", name: "2 Peter"),
BookInfo(code: "1JN", name: "1 John"),
BookInfo(code: "2JN", name: "2 John", singleChapter: true),
BookInfo(code: "3JN", name: "3 John", singleChapter: true),
BookInfo(code: "JUD", name: "Jude", singleChapter: true),
BookInfo(code: "REV", name: "Revelation")
]
const bookAliases = [
("GEN", "genesis"), ("GEN", "gen"),
("EXO", "exodus"), ("EXO", "exod"), ("EXO", "exo"),
("LEV", "leviticus"), ("LEV", "lev"),
("NUM", "numbers"), ("NUM", "num"), ("NUM", "numb"),
("DEU", "deuteronomy"), ("DEU", "deut"), ("DEU", "deu"),
("JOS", "joshua"), ("JOS", "josh"), ("JOS", "jos"),
("JDG", "judges"), ("JDG", "judg"), ("JDG", "jdg"),
("RUT", "ruth"), ("RUT", "rut"),
("1SA", "1 samuel"), ("1SA", "1 sam"), ("1SA", "i samuel"), ("1SA", "first samuel"),
("2SA", "2 samuel"), ("2SA", "2 sam"), ("2SA", "ii samuel"), ("2SA", "second samuel"),
("1KI", "1 kings"), ("1KI", "1 kgs"), ("1KI", "1 kin"), ("1KI", "i kings"), ("1KI", "first kings"),
("2KI", "2 kings"), ("2KI", "2 kgs"), ("2KI", "2 kin"), ("2KI", "ii kings"), ("2KI", "second kings"),
("1CH", "1 chronicles"), ("1CH", "1 chron"), ("1CH", "1 chr"), ("1CH", "i chronicles"), ("1CH", "first chronicles"),
("2CH", "2 chronicles"), ("2CH", "2 chron"), ("2CH", "2 chr"), ("2CH", "ii chronicles"), ("2CH", "second chronicles"),
("EZR", "ezra"), ("EZR", "ezr"),
("NEH", "nehemiah"), ("NEH", "neh"),
("EST", "esther"), ("EST", "est"),
("JOB", "job"),
("PSA", "psalms"), ("PSA", "psalm"), ("PSA", "ps"), ("PSA", "psa"),
("PRO", "proverbs"), ("PRO", "prov"), ("PRO", "pro"),
("ECC", "ecclesiastes"), ("ECC", "eccl"), ("ECC", "ecc"),
("SNG", "song of solomon"), ("SNG", "song"), ("SNG", "songs"), ("SNG", "canticles"), ("SNG", "sng"),
("ISA", "isaiah"), ("ISA", "isa"),
("JER", "jeremiah"), ("JER", "jer"),
("LAM", "lamentations"), ("LAM", "lam"),
("EZK", "ezekiel"), ("EZK", "ezek"), ("EZK", "ezk"),
("DAN", "daniel"), ("DAN", "dan"),
("HOS", "hosea"), ("HOS", "hos"),
("JOL", "joel"), ("JOL", "jol"),
("AMO", "amos"), ("AMO", "amo"),
("OBA", "obadiah"), ("OBA", "obad"), ("OBA", "oba"),
("JON", "jonah"), ("JON", "jon"),
("MIC", "micah"), ("MIC", "mic"),
("NAM", "nahum"), ("NAM", "nah"),
("HAB", "habakkuk"), ("HAB", "hab"),
("ZEP", "zephaniah"), ("ZEP", "zeph"), ("ZEP", "zep"),
("HAG", "haggai"), ("HAG", "hag"),
("ZEC", "zechariah"), ("ZEC", "zech"), ("ZEC", "zec"),
("MAL", "malachi"), ("MAL", "mal"),
("MAT", "matthew"), ("MAT", "matt"), ("MAT", "mat"), ("MAT", "mt"),
("MRK", "mark"), ("MRK", "mrk"), ("MRK", "mk"),
("LUK", "luke"), ("LUK", "luk"), ("LUK", "lk"),
("JHN", "john"), ("JHN", "jhn"), ("JHN", "jn"),
("ACT", "acts"), ("ACT", "act"),
("ROM", "romans"), ("ROM", "rom"),
("1CO", "1 corinthians"), ("1CO", "1 cor"), ("1CO", "1 co"), ("1CO", "i corinthians"), ("1CO", "first corinthians"),
("2CO", "2 corinthians"), ("2CO", "2 cor"), ("2CO", "2 co"), ("2CO", "ii corinthians"), ("2CO", "second corinthians"),
("GAL", "galatians"), ("GAL", "gal"),
("EPH", "ephesians"), ("EPH", "eph"),
("PHP", "philippians"), ("PHP", "php"),
("COL", "colossians"), ("COL", "col"),
("1TH", "1 thessalonians"), ("1TH", "1 thess"), ("1TH", "1 thes"), ("1TH", "i thessalonians"), ("1TH", "first thessalonians"),
("2TH", "2 thessalonians"), ("2TH", "2 thess"), ("2TH", "2 thes"), ("2TH", "ii thessalonians"), ("2TH", "second thessalonians"),
("1TI", "1 timothy"), ("1TI", "1 tim"), ("1TI", "i timothy"), ("1TI", "first timothy"),
("2TI", "2 timothy"), ("2TI", "2 tim"), ("2TI", "ii timothy"), ("2TI", "second timothy"),
("TIT", "titus"), ("TIT", "tit"),
("PHM", "philemon"), ("PHM", "philem"), ("PHM", "phm"),
("HEB", "hebrews"), ("HEB", "heb"),
("JAS", "james"), ("JAS", "jas"), ("JAS", "jam"),
("1PE", "1 peter"), ("1PE", "1 pet"), ("1PE", "1 pe"), ("1PE", "i peter"), ("1PE", "first peter"),
("2PE", "2 peter"), ("2PE", "2 pet"), ("2PE", "2 pe"), ("2PE", "ii peter"), ("2PE", "second peter"),
("1JN", "1 john"), ("1JN", "1 jn"), ("1JN", "1 jhn"), ("1JN", "i john"), ("1JN", "first john"),
("2JN", "2 john"), ("2JN", "2 jn"), ("2JN", "2 jhn"), ("2JN", "ii john"), ("2JN", "second john"),
("3JN", "3 john"), ("3JN", "3 jn"), ("3JN", "3 jhn"), ("3JN", "iii john"), ("3JN", "third john"),
("JUD", "jude"), ("JUD", "jud"),
("REV", "revelation"), ("REV", "revelations"), ("REV", "rev"), ("REV", "apocalypse")
]
proc bookInfo*(code: string): BookInfo =
for book in CanonBooks:
if book.code == code:
return book
raise newException(ValueError, "unknown Bible book code '" & code & "'")
proc bookIndex*(code: string): int =
for idx, book in CanonBooks:
if book.code == code:
return idx
raise newException(ValueError, "unknown Bible book code '" & code & "'")
proc normalizeReferenceInput(s: string): string =
s.multiReplace([
("", "-"),
("", "-"),
("", "-")
]).strip
proc normalizeBookPrefix(s: string): string =
for ch in s:
if ch.isAlphaAscii:
result.add(ch.toLowerAscii)
elif ch.isDigit:
result.add(ch)
proc canonicalNamePrefixMatches(prefix: string): seq[BookInfo] =
for book in CanonBooks:
if normalizeBookPrefix(book.name).startsWith(prefix):
result.add(book)
proc formatBookList(books: seq[BookInfo]): string =
var names: seq[string] = @[]
for book in books:
names.add(book.name)
names.join(", ")
proc matchCanonicalBookPrefix(input: string): tuple[
matched: bool,
ambiguous: bool,
book: BookInfo,
consumed: int,
prefix: string,
matches: seq[BookInfo]] =
for idx in 1 .. input.len:
if idx < input.len and input[idx].isAlphaAscii:
continue
let prefix = normalizeBookPrefix(input[0 ..< idx])
if prefix.len == 0:
continue
let matches = canonicalNamePrefixMatches(prefix)
if matches.len == 1:
result.matched = true
result.ambiguous = false
result.book = matches[0]
result.consumed = idx
result.prefix = input[0 ..< idx].strip
result.matches = matches
elif matches.len > 1 and not result.matched:
result.ambiguous = true
result.consumed = idx
result.prefix = input[0 ..< idx].strip
result.matches = matches
proc matchAlias(input, alias: string): int =
var i = 0
var j = 0
while j < alias.len:
let aliasCh = alias[j]
if aliasCh.isSpaceAscii or aliasCh == '.':
while i < input.len and (input[i].isSpaceAscii or input[i] == '.'):
inc i
inc j
else:
while i < input.len and input[i] == '.':
inc i
if i >= input.len or input[i].toLowerAscii != aliasCh.toLowerAscii:
return -1
inc i
inc j
while i < input.len and input[i] == '.':
inc i
if i < input.len and input[i].isAlphaAscii:
return -1
i
proc parseBook(input: string): tuple[book: BookInfo, rest: string] =
let canonicalPrefix = matchCanonicalBookPrefix(input)
if canonicalPrefix.matched:
result.book = canonicalPrefix.book
result.rest = input[canonicalPrefix.consumed .. ^1].strip
return
var bestCode = ""
var bestLen = -1
for row in bookAliases:
let consumed = matchAlias(input, row[1])
if consumed > bestLen:
bestCode = row[0]
bestLen = consumed
if bestLen < 0:
if canonicalPrefix.ambiguous:
raise newException(ValueError,
"ambiguous Bible book prefix '" & canonicalPrefix.prefix & "' in '" &
input & "'; matches " & canonicalPrefix.matches.formatBookList)
raise newException(ValueError, "could not parse Bible book in '" & input & "'")
result.book = bookInfo(bestCode)
result.rest = input[bestLen .. ^1].strip
proc parsePositiveInt(s, label: string): int =
if s.len == 0 or not s.allCharsInSet({'0'..'9'}):
raise newException(ValueError, "invalid " & label & " '" & s & "'")
result = parseInt(s)
if result <= 0:
raise newException(ValueError, label & " must be positive")
proc parsePoint(token: string, defaultChapter: int, singleChapter: bool): RefPoint =
let normalized = token.strip
if normalized.len == 0:
raise newException(ValueError, "empty reference point")
let colonIdx = normalized.find(':')
if colonIdx >= 0:
return RefPoint(
chapter: parsePositiveInt(normalized[0 ..< colonIdx], "chapter"),
verse: parsePositiveInt(normalized[colonIdx + 1 .. ^1], "verse"))
let value = parsePositiveInt(normalized, "reference number")
if singleChapter:
RefPoint(chapter: 1, verse: value)
elif defaultChapter > 0:
RefPoint(chapter: defaultChapter, verse: value)
else:
RefPoint(chapter: value, verse: 0)
proc parseRange(segment: string, defaultChapter: int, singleChapter: bool): RefRange =
let normalized = segment.strip
let dashIdx = normalized.find('-')
if dashIdx >= 0:
result.start = parsePoint(normalized[0 ..< dashIdx], defaultChapter, singleChapter)
let endDefaultChapter =
if result.start.verse > 0: result.start.chapter
else: 0
result.finish = parsePoint(normalized[dashIdx + 1 .. ^1], endDefaultChapter, singleChapter)
else:
result.start = parsePoint(normalized, defaultChapter, singleChapter)
result.finish = result.start
if result.finish.chapter < result.start.chapter:
raise newException(ValueError, "range ends before it starts: '" & segment & "'")
if result.finish.chapter == result.start.chapter and
result.start.verse > 0 and
result.finish.verse > 0 and
result.finish.verse < result.start.verse:
raise newException(ValueError, "range ends before it starts: '" & segment & "'")
proc parsePassageSpec(spec: string, book: BookInfo): seq[RefRange] =
var currentChapter = 0
for rawSegment in spec.split(','):
let segment = rawSegment.strip
if segment.len == 0:
raise newException(ValueError, "empty passage range in '" & spec & "'")
let range = parseRange(segment, currentChapter, book.singleChapter)
result.add(range)
if segment.contains(':') or (range.start.verse > 0 and range.finish.verse > 0):
currentChapter = range.start.chapter
else:
currentChapter = 0
proc parseReference*(input: string): PassageReference =
let normalized = normalizeReferenceInput(input)
let parsedBook = parseBook(normalized)
result.book = parsedBook.book
if parsedBook.rest.len > 0:
result.ranges = parsePassageSpec(parsedBook.rest, result.book)
proc parseReferences*(input: string): seq[PassageReference] =
for rawRef in input.split(';'):
let refText = rawRef.strip
if refText.len > 0:
result.add(parseReference(refText))
if result.len == 0:
raise newException(ValueError, "empty Bible reference")
proc `$`*(point: RefPoint): string =
if point.verse > 0: $point.chapter & ":" & $point.verse
else: $point.chapter
proc `$`*(range: RefRange): string =
if range.start == range.finish:
return $range.start
if range.start.chapter == range.finish.chapter and
range.start.verse > 0 and
range.finish.verse > 0:
return $range.start.chapter & ":" & $range.start.verse & "-" & $range.finish.verse
$range.start & "-" & $range.finish
proc formatSingleChapterRange(range: RefRange): string =
if range.start == range.finish:
return $range.start.verse
if range.start.chapter == range.finish.chapter:
return $range.start.verse & "-" & $range.finish.verse
$range.start & "-" & $range.finish
proc `$`*(reference: PassageReference): string =
result = reference.book.name
if reference.ranges.len > 0:
var rangeText: seq[string] = @[]
for range in reference.ranges:
if reference.book.singleChapter:
rangeText.add(formatSingleChapterRange(range))
else:
rangeText.add($range)
result.add(" " & rangeText.join(", "))
+84
View File
@@ -0,0 +1,84 @@
import std/[strutils, unittest]
import ../src/kjv
import ../src/reference_parser
suite "reference parser":
test "parses single verse references":
let reference = parseReference("John 3:16")
check reference.book.code == "JHN"
check reference.ranges.len == 1
check reference.ranges[0].start.chapter == 3
check reference.ranges[0].start.verse == 16
check reference.ranges[0].finish == reference.ranges[0].start
check $reference == "John 3:16"
test "parses verse lists using the previous chapter":
let reference = parseReference("John 3:16,20-21")
check reference.ranges.len == 2
check reference.ranges[1].start.chapter == 3
check reference.ranges[1].start.verse == 20
check reference.ranges[1].finish.chapter == 3
check reference.ranges[1].finish.verse == 21
check $reference == "John 3:16, 3:20-21"
test "parses chapter ranges":
let reference = parseReference("John 3-4")
check reference.ranges.len == 1
check reference.ranges[0].start.chapter == 3
check reference.ranges[0].start.verse == 0
check reference.ranges[0].finish.chapter == 4
check reference.ranges[0].finish.verse == 0
check $reference == "John 3-4"
test "parses abbreviated numbered books":
let reference = parseReference("1 Jn 1:9")
check reference.book.code == "1JN"
check reference.ranges[0].start.chapter == 1
check reference.ranges[0].start.verse == 9
check $reference == "1 John 1:9"
test "parses unique canonical book prefixes":
check parseReference("Gene 1:1").book.code == "GEN"
check parseReference("Phile 3").book.code == "PHM"
check parseReference("Phili 1:6").book.code == "PHP"
test "rejects ambiguous canonical book prefixes":
expect ValueError:
discard parseReference("Phil 1")
test "normalizes single-chapter book references":
let reference = parseReference("Jude 3-4")
check reference.book.code == "JUD"
check reference.ranges[0].start.chapter == 1
check reference.ranges[0].start.verse == 3
check reference.ranges[0].finish.chapter == 1
check reference.ranges[0].finish.verse == 4
check $reference == "Jude 3-4"
test "parses semicolon-separated references":
let references = parseReferences("Psalm 23; John 3:16")
check references.len == 2
check references[0].book.code == "PSA"
check references[1].book.code == "JHN"
suite "offline KJV backend":
test "fetches a single embedded verse":
let passages = kjv.fetchPassages("John 3:16")
check passages.len == 1
check passages[0].startsWith("John 3:16\n")
check passages[0].contains(" [16] ")
test "fetches a single-chapter embedded verse":
let passages = kjv.fetchPassages("Jude 3")
check passages.len == 1
check passages[0].startsWith("Jude 3\n")
check passages[0].contains(" [3] ")
+144
View File
@@ -0,0 +1,144 @@
import std/[os, strutils, tables]
# Source archive: https://ebible.org/Scriptures/eng-kjv_usfm.zip
const canonBookCodes = [
"GEN", "EXO", "LEV", "NUM", "DEU", "JOS", "JDG", "RUT",
"1SA", "2SA", "1KI", "2KI", "1CH", "2CH", "EZR", "NEH",
"EST", "JOB", "PSA", "PRO", "ECC", "SNG", "ISA", "JER",
"LAM", "EZK", "DAN", "HOS", "JOL", "AMO", "OBA", "JON",
"MIC", "NAM", "HAB", "ZEP", "HAG", "ZEC", "MAL", "MAT",
"MRK", "LUK", "JHN", "ACT", "ROM", "1CO", "2CO", "GAL",
"EPH", "PHP", "COL", "1TH", "2TH", "1TI", "2TI", "TIT",
"PHM", "HEB", "JAS", "1PE", "2PE", "1JN", "2JN", "3JN",
"JUD", "REV"
]
proc normalizeWhitespace(s: string): string =
var lastWasSpace = false
for ch in s:
if ch.isSpaceAscii:
if not lastWasSpace:
result.add(' ')
lastWasSpace = true
else:
result.add(ch)
lastWasSpace = false
result = result.strip
proc removeFootnotes(s: string): string =
var i = 0
while i < s.len:
if s.continuesWith("\\f ", i) or s.continuesWith("\\f +", i):
let closeIdx = s.find("\\f*", i + 2)
if closeIdx < 0:
break
i = closeIdx + 3
else:
result.add(s[i])
inc i
proc stripUsfmMarkup(s: string): string =
let withoutFootnotes = removeFootnotes(s)
var i = 0
while i < withoutFootnotes.len:
case withoutFootnotes[i]
of '\\':
inc i
if i < withoutFootnotes.len and withoutFootnotes[i] == '+':
inc i
while i < withoutFootnotes.len and
(withoutFootnotes[i].isAlphaAscii or
withoutFootnotes[i].isDigit or
withoutFootnotes[i] == '-'):
inc i
let isClosingMarker = i < withoutFootnotes.len and withoutFootnotes[i] == '*'
if isClosingMarker:
inc i
while not isClosingMarker and
i < withoutFootnotes.len and
withoutFootnotes[i].isSpaceAscii:
inc i
of '|':
while i < withoutFootnotes.len and withoutFootnotes[i] != '\\':
inc i
of '\t':
result.add(' ')
inc i
else:
result.add(withoutFootnotes[i])
inc i
result = normalizeWhitespace(result)
proc parseVerseLine(line: string): tuple[verse: int, text: string] =
var rest = line[3..^1].strip
let numberEnd = rest.find(' ')
if numberEnd < 0:
raise newException(ValueError, "verse marker without text: " & line)
result.verse = parseInt(rest[0 ..< numberEnd])
result.text = stripUsfmMarkup(rest[numberEnd + 1 .. ^1])
proc findCanonFiles(inputDir: string): Table[string, string] =
for path in walkFiles(inputDir / "*eng-kjv.usfm"):
let name = path.extractFilename
let dashIdx = name.find('-')
let suffixIdx = name.find("eng-kjv.usfm")
if dashIdx >= 0 and suffixIdx > dashIdx:
let code = name[dashIdx + 1 ..< suffixIdx]
if canonBookCodes.contains(code):
result[code] = path
proc generate(inputDir, outputPath: string) =
let canonFiles = findCanonFiles(inputDir)
var rows: seq[string] = @[]
for code in canonBookCodes:
if not canonFiles.hasKey(code):
raise newException(ValueError, "missing USFM file for " & code)
var chapter = 0
var verse = 0
var verseText = ""
proc flushVerse() =
if chapter > 0 and verse > 0:
let text = normalizeWhitespace(verseText).replace("\t", " ")
if text.len > 0:
rows.add([code, $chapter, $verse, text].join("\t"))
verse = 0
verseText = ""
for rawLine in canonFiles[code].lines:
let line = rawLine.strip
if line.startsWith("\\c "):
flushVerse()
chapter = parseInt(line[3..^1].strip)
elif line.startsWith("\\v "):
flushVerse()
let parsed = parseVerseLine(line)
verse = parsed.verse
verseText = parsed.text
elif verse > 0:
let continued = stripUsfmMarkup(line)
if continued.len > 0:
if verseText.len > 0:
verseText.add(' ')
verseText.add(continued)
flushVerse()
createDir(outputPath.parentDir)
writeFile(outputPath, rows.join("\n") & "\n")
when isMainModule:
if paramCount() != 2:
quit("Usage: generate_kjv_data <usfm-dir> <output-tsv>", QuitFailure)
generate(paramStr(1), paramStr(2))