Add embedded KJV support
This commit is contained in:
@@ -1,3 +1,5 @@
|
||||
esv_api
|
||||
bibleref
|
||||
tests/test_offline_kjv
|
||||
data/private/
|
||||
*.sw?
|
||||
|
||||
+31102
File diff suppressed because it is too large
Load Diff
+6
-3
@@ -8,6 +8,7 @@ import cliutils, docopt, zero_functional
|
||||
|
||||
import ./api_bible
|
||||
import ./esv
|
||||
import ./kjv
|
||||
|
||||
proc formatMarkdown(raw, translation: string): string =
|
||||
var reference = ""
|
||||
@@ -77,6 +78,8 @@ proc fetchPassages(reference, translation: string, cfg: CombinedConfig): seq[str
|
||||
reference,
|
||||
cfg.getVal("esv-api-token"),
|
||||
cfg.getVal("esv-api-root", "https://api.esv.org"))
|
||||
of "akjv", "kjv":
|
||||
kjv.fetchPassages(reference)
|
||||
of "amp", "nkjv", "niv":
|
||||
api_bible.fetchPassages(
|
||||
reference,
|
||||
@@ -89,7 +92,7 @@ proc fetchPassages(reference, translation: string, cfg: CombinedConfig): seq[str
|
||||
else:
|
||||
raise newException(ValueError,
|
||||
"unsupported translation '" & translation &
|
||||
"'; supported translations: amp, esv, nkjv, niv")
|
||||
"'; supported translations: akjv, amp, esv, kjv, nkjv, niv")
|
||||
|
||||
when isMainModule:
|
||||
const USAGE = """Usage:
|
||||
@@ -107,8 +110,8 @@ Options:
|
||||
|
||||
-t, --translation <translation>
|
||||
Select a specific translation. Supported values
|
||||
are 'amp', 'esv', 'nkjv', and 'niv'. Defaults
|
||||
to 'esv'.
|
||||
are 'akjv', 'amp', 'esv', 'kjv', 'nkjv', and
|
||||
'niv'. Defaults to 'esv'.
|
||||
|
||||
--esv-api-token <token> Provide the API token on the command line. By
|
||||
default this will be read either from the
|
||||
|
||||
+109
@@ -0,0 +1,109 @@
|
||||
import std/[strutils, tables]
|
||||
|
||||
import ./offline_data
|
||||
import ./reference_parser
|
||||
|
||||
const kjvRows = embeddedTranslationData("kjv")
|
||||
|
||||
type BibleIndex = object
|
||||
verses: Table[string, string]
|
||||
lastVerseByChapter: Table[string, int]
|
||||
lastChapterByBook: Table[string, int]
|
||||
|
||||
proc verseKey(code: string, chapter, verse: int): string =
|
||||
code & "\t" & $chapter & "\t" & $verse
|
||||
|
||||
proc chapterKey(code: string, chapter: int): string =
|
||||
code & "\t" & $chapter
|
||||
|
||||
proc loadBibleIndex(): BibleIndex =
|
||||
for line in kjvRows.splitLines:
|
||||
if line.strip.len == 0:
|
||||
continue
|
||||
|
||||
let parts = line.split('\t', maxsplit = 3)
|
||||
if parts.len != 4:
|
||||
raise newException(ValueError, "invalid embedded KJV row: " & line)
|
||||
|
||||
let code = parts[0]
|
||||
let chapter = parseInt(parts[1])
|
||||
let verse = parseInt(parts[2])
|
||||
let text = parts[3]
|
||||
|
||||
result.verses[verseKey(code, chapter, verse)] = text
|
||||
|
||||
let cKey = chapterKey(code, chapter)
|
||||
if not result.lastVerseByChapter.hasKey(cKey) or
|
||||
verse > result.lastVerseByChapter[cKey]:
|
||||
result.lastVerseByChapter[cKey] = verse
|
||||
|
||||
if not result.lastChapterByBook.hasKey(code) or
|
||||
chapter > result.lastChapterByBook[code]:
|
||||
result.lastChapterByBook[code] = chapter
|
||||
|
||||
proc requireLastChapter(index: BibleIndex, code: string): int =
|
||||
if not index.lastChapterByBook.hasKey(code):
|
||||
raise newException(ValueError, "no embedded KJV data for " & code)
|
||||
index.lastChapterByBook[code]
|
||||
|
||||
proc requireLastVerse(index: BibleIndex, code: string, chapter: int): int =
|
||||
let cKey = chapterKey(code, chapter)
|
||||
if not index.lastVerseByChapter.hasKey(cKey):
|
||||
raise newException(ValueError,
|
||||
"no embedded KJV data for " & bookInfo(code).name & " " & $chapter)
|
||||
index.lastVerseByChapter[cKey]
|
||||
|
||||
proc requireVerse(index: BibleIndex, code: string, chapter, verse: int): string =
|
||||
let vKey = verseKey(code, chapter, verse)
|
||||
if not index.verses.hasKey(vKey):
|
||||
raise newException(ValueError,
|
||||
"no embedded KJV data for " & bookInfo(code).name & " " &
|
||||
$chapter & ":" & $verse)
|
||||
index.verses[vKey]
|
||||
|
||||
proc addVerseLines(
|
||||
lines: var seq[string],
|
||||
index: BibleIndex,
|
||||
reference: PassageReference,
|
||||
range: RefRange) =
|
||||
|
||||
let code = reference.book.code
|
||||
discard index.requireLastChapter(code)
|
||||
|
||||
for chapter in range.start.chapter .. range.finish.chapter:
|
||||
let startVerse =
|
||||
if chapter == range.start.chapter and range.start.verse > 0:
|
||||
range.start.verse
|
||||
else:
|
||||
1
|
||||
|
||||
let endVerse =
|
||||
if chapter == range.finish.chapter and range.finish.verse > 0:
|
||||
range.finish.verse
|
||||
else:
|
||||
index.requireLastVerse(code, chapter)
|
||||
|
||||
if startVerse > endVerse:
|
||||
raise newException(ValueError, "reference range starts after it ends")
|
||||
|
||||
for verse in startVerse .. endVerse:
|
||||
lines.add(" [" & $verse & "] " & index.requireVerse(code, chapter, verse))
|
||||
|
||||
proc fetchReference(index: BibleIndex, reference: PassageReference): string =
|
||||
var lines = @[$reference]
|
||||
let code = reference.book.code
|
||||
|
||||
if reference.ranges.len == 0:
|
||||
for chapter in 1 .. index.requireLastChapter(code):
|
||||
for verse in 1 .. index.requireLastVerse(code, chapter):
|
||||
lines.add(" [" & $verse & "] " & index.requireVerse(code, chapter, verse))
|
||||
else:
|
||||
for range in reference.ranges:
|
||||
lines.addVerseLines(index, reference, range)
|
||||
|
||||
lines.join("\n")
|
||||
|
||||
proc fetchPassages*(reference: string): seq[string] =
|
||||
let index = loadBibleIndex()
|
||||
for parsedReference in parseReferences(reference):
|
||||
result.add(fetchReference(index, parsedReference))
|
||||
@@ -0,0 +1,11 @@
|
||||
import std/os
|
||||
|
||||
template embeddedTranslationData*(name: static[string]): string =
|
||||
const dataRoot = currentSourcePath().parentDir.parentDir / "data"
|
||||
const privatePath = dataRoot / "private" / (name & ".tsv")
|
||||
const publicPath = dataRoot / "public" / (name & ".tsv")
|
||||
|
||||
when fileExists(privatePath):
|
||||
staticRead(privatePath)
|
||||
else:
|
||||
staticRead(publicPath)
|
||||
@@ -0,0 +1,399 @@
|
||||
import std/[strutils]
|
||||
|
||||
type
|
||||
BookInfo* = object
|
||||
code*: string
|
||||
name*: string
|
||||
singleChapter*: bool
|
||||
|
||||
RefPoint* = object
|
||||
chapter*: int
|
||||
verse*: int
|
||||
|
||||
RefRange* = object
|
||||
start*: RefPoint
|
||||
finish*: RefPoint
|
||||
|
||||
PassageReference* = object
|
||||
book*: BookInfo
|
||||
ranges*: seq[RefRange]
|
||||
|
||||
const CanonBooks*: array[66, BookInfo] = [
|
||||
BookInfo(code: "GEN", name: "Genesis"),
|
||||
BookInfo(code: "EXO", name: "Exodus"),
|
||||
BookInfo(code: "LEV", name: "Leviticus"),
|
||||
BookInfo(code: "NUM", name: "Numbers"),
|
||||
BookInfo(code: "DEU", name: "Deuteronomy"),
|
||||
BookInfo(code: "JOS", name: "Joshua"),
|
||||
BookInfo(code: "JDG", name: "Judges"),
|
||||
BookInfo(code: "RUT", name: "Ruth"),
|
||||
BookInfo(code: "1SA", name: "1 Samuel"),
|
||||
BookInfo(code: "2SA", name: "2 Samuel"),
|
||||
BookInfo(code: "1KI", name: "1 Kings"),
|
||||
BookInfo(code: "2KI", name: "2 Kings"),
|
||||
BookInfo(code: "1CH", name: "1 Chronicles"),
|
||||
BookInfo(code: "2CH", name: "2 Chronicles"),
|
||||
BookInfo(code: "EZR", name: "Ezra"),
|
||||
BookInfo(code: "NEH", name: "Nehemiah"),
|
||||
BookInfo(code: "EST", name: "Esther"),
|
||||
BookInfo(code: "JOB", name: "Job"),
|
||||
BookInfo(code: "PSA", name: "Psalms"),
|
||||
BookInfo(code: "PRO", name: "Proverbs"),
|
||||
BookInfo(code: "ECC", name: "Ecclesiastes"),
|
||||
BookInfo(code: "SNG", name: "Song of Solomon"),
|
||||
BookInfo(code: "ISA", name: "Isaiah"),
|
||||
BookInfo(code: "JER", name: "Jeremiah"),
|
||||
BookInfo(code: "LAM", name: "Lamentations"),
|
||||
BookInfo(code: "EZK", name: "Ezekiel"),
|
||||
BookInfo(code: "DAN", name: "Daniel"),
|
||||
BookInfo(code: "HOS", name: "Hosea"),
|
||||
BookInfo(code: "JOL", name: "Joel"),
|
||||
BookInfo(code: "AMO", name: "Amos"),
|
||||
BookInfo(code: "OBA", name: "Obadiah", singleChapter: true),
|
||||
BookInfo(code: "JON", name: "Jonah"),
|
||||
BookInfo(code: "MIC", name: "Micah"),
|
||||
BookInfo(code: "NAM", name: "Nahum"),
|
||||
BookInfo(code: "HAB", name: "Habakkuk"),
|
||||
BookInfo(code: "ZEP", name: "Zephaniah"),
|
||||
BookInfo(code: "HAG", name: "Haggai"),
|
||||
BookInfo(code: "ZEC", name: "Zechariah"),
|
||||
BookInfo(code: "MAL", name: "Malachi"),
|
||||
BookInfo(code: "MAT", name: "Matthew"),
|
||||
BookInfo(code: "MRK", name: "Mark"),
|
||||
BookInfo(code: "LUK", name: "Luke"),
|
||||
BookInfo(code: "JHN", name: "John"),
|
||||
BookInfo(code: "ACT", name: "Acts"),
|
||||
BookInfo(code: "ROM", name: "Romans"),
|
||||
BookInfo(code: "1CO", name: "1 Corinthians"),
|
||||
BookInfo(code: "2CO", name: "2 Corinthians"),
|
||||
BookInfo(code: "GAL", name: "Galatians"),
|
||||
BookInfo(code: "EPH", name: "Ephesians"),
|
||||
BookInfo(code: "PHP", name: "Philippians"),
|
||||
BookInfo(code: "COL", name: "Colossians"),
|
||||
BookInfo(code: "1TH", name: "1 Thessalonians"),
|
||||
BookInfo(code: "2TH", name: "2 Thessalonians"),
|
||||
BookInfo(code: "1TI", name: "1 Timothy"),
|
||||
BookInfo(code: "2TI", name: "2 Timothy"),
|
||||
BookInfo(code: "TIT", name: "Titus"),
|
||||
BookInfo(code: "PHM", name: "Philemon", singleChapter: true),
|
||||
BookInfo(code: "HEB", name: "Hebrews"),
|
||||
BookInfo(code: "JAS", name: "James"),
|
||||
BookInfo(code: "1PE", name: "1 Peter"),
|
||||
BookInfo(code: "2PE", name: "2 Peter"),
|
||||
BookInfo(code: "1JN", name: "1 John"),
|
||||
BookInfo(code: "2JN", name: "2 John", singleChapter: true),
|
||||
BookInfo(code: "3JN", name: "3 John", singleChapter: true),
|
||||
BookInfo(code: "JUD", name: "Jude", singleChapter: true),
|
||||
BookInfo(code: "REV", name: "Revelation")
|
||||
]
|
||||
|
||||
const bookAliases = [
|
||||
("GEN", "genesis"), ("GEN", "gen"),
|
||||
("EXO", "exodus"), ("EXO", "exod"), ("EXO", "exo"),
|
||||
("LEV", "leviticus"), ("LEV", "lev"),
|
||||
("NUM", "numbers"), ("NUM", "num"), ("NUM", "numb"),
|
||||
("DEU", "deuteronomy"), ("DEU", "deut"), ("DEU", "deu"),
|
||||
("JOS", "joshua"), ("JOS", "josh"), ("JOS", "jos"),
|
||||
("JDG", "judges"), ("JDG", "judg"), ("JDG", "jdg"),
|
||||
("RUT", "ruth"), ("RUT", "rut"),
|
||||
("1SA", "1 samuel"), ("1SA", "1 sam"), ("1SA", "i samuel"), ("1SA", "first samuel"),
|
||||
("2SA", "2 samuel"), ("2SA", "2 sam"), ("2SA", "ii samuel"), ("2SA", "second samuel"),
|
||||
("1KI", "1 kings"), ("1KI", "1 kgs"), ("1KI", "1 kin"), ("1KI", "i kings"), ("1KI", "first kings"),
|
||||
("2KI", "2 kings"), ("2KI", "2 kgs"), ("2KI", "2 kin"), ("2KI", "ii kings"), ("2KI", "second kings"),
|
||||
("1CH", "1 chronicles"), ("1CH", "1 chron"), ("1CH", "1 chr"), ("1CH", "i chronicles"), ("1CH", "first chronicles"),
|
||||
("2CH", "2 chronicles"), ("2CH", "2 chron"), ("2CH", "2 chr"), ("2CH", "ii chronicles"), ("2CH", "second chronicles"),
|
||||
("EZR", "ezra"), ("EZR", "ezr"),
|
||||
("NEH", "nehemiah"), ("NEH", "neh"),
|
||||
("EST", "esther"), ("EST", "est"),
|
||||
("JOB", "job"),
|
||||
("PSA", "psalms"), ("PSA", "psalm"), ("PSA", "ps"), ("PSA", "psa"),
|
||||
("PRO", "proverbs"), ("PRO", "prov"), ("PRO", "pro"),
|
||||
("ECC", "ecclesiastes"), ("ECC", "eccl"), ("ECC", "ecc"),
|
||||
("SNG", "song of solomon"), ("SNG", "song"), ("SNG", "songs"), ("SNG", "canticles"), ("SNG", "sng"),
|
||||
("ISA", "isaiah"), ("ISA", "isa"),
|
||||
("JER", "jeremiah"), ("JER", "jer"),
|
||||
("LAM", "lamentations"), ("LAM", "lam"),
|
||||
("EZK", "ezekiel"), ("EZK", "ezek"), ("EZK", "ezk"),
|
||||
("DAN", "daniel"), ("DAN", "dan"),
|
||||
("HOS", "hosea"), ("HOS", "hos"),
|
||||
("JOL", "joel"), ("JOL", "jol"),
|
||||
("AMO", "amos"), ("AMO", "amo"),
|
||||
("OBA", "obadiah"), ("OBA", "obad"), ("OBA", "oba"),
|
||||
("JON", "jonah"), ("JON", "jon"),
|
||||
("MIC", "micah"), ("MIC", "mic"),
|
||||
("NAM", "nahum"), ("NAM", "nah"),
|
||||
("HAB", "habakkuk"), ("HAB", "hab"),
|
||||
("ZEP", "zephaniah"), ("ZEP", "zeph"), ("ZEP", "zep"),
|
||||
("HAG", "haggai"), ("HAG", "hag"),
|
||||
("ZEC", "zechariah"), ("ZEC", "zech"), ("ZEC", "zec"),
|
||||
("MAL", "malachi"), ("MAL", "mal"),
|
||||
("MAT", "matthew"), ("MAT", "matt"), ("MAT", "mat"), ("MAT", "mt"),
|
||||
("MRK", "mark"), ("MRK", "mrk"), ("MRK", "mk"),
|
||||
("LUK", "luke"), ("LUK", "luk"), ("LUK", "lk"),
|
||||
("JHN", "john"), ("JHN", "jhn"), ("JHN", "jn"),
|
||||
("ACT", "acts"), ("ACT", "act"),
|
||||
("ROM", "romans"), ("ROM", "rom"),
|
||||
("1CO", "1 corinthians"), ("1CO", "1 cor"), ("1CO", "1 co"), ("1CO", "i corinthians"), ("1CO", "first corinthians"),
|
||||
("2CO", "2 corinthians"), ("2CO", "2 cor"), ("2CO", "2 co"), ("2CO", "ii corinthians"), ("2CO", "second corinthians"),
|
||||
("GAL", "galatians"), ("GAL", "gal"),
|
||||
("EPH", "ephesians"), ("EPH", "eph"),
|
||||
("PHP", "philippians"), ("PHP", "php"),
|
||||
("COL", "colossians"), ("COL", "col"),
|
||||
("1TH", "1 thessalonians"), ("1TH", "1 thess"), ("1TH", "1 thes"), ("1TH", "i thessalonians"), ("1TH", "first thessalonians"),
|
||||
("2TH", "2 thessalonians"), ("2TH", "2 thess"), ("2TH", "2 thes"), ("2TH", "ii thessalonians"), ("2TH", "second thessalonians"),
|
||||
("1TI", "1 timothy"), ("1TI", "1 tim"), ("1TI", "i timothy"), ("1TI", "first timothy"),
|
||||
("2TI", "2 timothy"), ("2TI", "2 tim"), ("2TI", "ii timothy"), ("2TI", "second timothy"),
|
||||
("TIT", "titus"), ("TIT", "tit"),
|
||||
("PHM", "philemon"), ("PHM", "philem"), ("PHM", "phm"),
|
||||
("HEB", "hebrews"), ("HEB", "heb"),
|
||||
("JAS", "james"), ("JAS", "jas"), ("JAS", "jam"),
|
||||
("1PE", "1 peter"), ("1PE", "1 pet"), ("1PE", "1 pe"), ("1PE", "i peter"), ("1PE", "first peter"),
|
||||
("2PE", "2 peter"), ("2PE", "2 pet"), ("2PE", "2 pe"), ("2PE", "ii peter"), ("2PE", "second peter"),
|
||||
("1JN", "1 john"), ("1JN", "1 jn"), ("1JN", "1 jhn"), ("1JN", "i john"), ("1JN", "first john"),
|
||||
("2JN", "2 john"), ("2JN", "2 jn"), ("2JN", "2 jhn"), ("2JN", "ii john"), ("2JN", "second john"),
|
||||
("3JN", "3 john"), ("3JN", "3 jn"), ("3JN", "3 jhn"), ("3JN", "iii john"), ("3JN", "third john"),
|
||||
("JUD", "jude"), ("JUD", "jud"),
|
||||
("REV", "revelation"), ("REV", "revelations"), ("REV", "rev"), ("REV", "apocalypse")
|
||||
]
|
||||
|
||||
proc bookInfo*(code: string): BookInfo =
|
||||
for book in CanonBooks:
|
||||
if book.code == code:
|
||||
return book
|
||||
|
||||
raise newException(ValueError, "unknown Bible book code '" & code & "'")
|
||||
|
||||
proc bookIndex*(code: string): int =
|
||||
for idx, book in CanonBooks:
|
||||
if book.code == code:
|
||||
return idx
|
||||
|
||||
raise newException(ValueError, "unknown Bible book code '" & code & "'")
|
||||
|
||||
proc normalizeReferenceInput(s: string): string =
|
||||
s.multiReplace([
|
||||
("–", "-"),
|
||||
("—", "-"),
|
||||
("−", "-")
|
||||
]).strip
|
||||
|
||||
proc normalizeBookPrefix(s: string): string =
|
||||
for ch in s:
|
||||
if ch.isAlphaAscii:
|
||||
result.add(ch.toLowerAscii)
|
||||
elif ch.isDigit:
|
||||
result.add(ch)
|
||||
|
||||
proc canonicalNamePrefixMatches(prefix: string): seq[BookInfo] =
|
||||
for book in CanonBooks:
|
||||
if normalizeBookPrefix(book.name).startsWith(prefix):
|
||||
result.add(book)
|
||||
|
||||
proc formatBookList(books: seq[BookInfo]): string =
|
||||
var names: seq[string] = @[]
|
||||
for book in books:
|
||||
names.add(book.name)
|
||||
names.join(", ")
|
||||
|
||||
proc matchCanonicalBookPrefix(input: string): tuple[
|
||||
matched: bool,
|
||||
ambiguous: bool,
|
||||
book: BookInfo,
|
||||
consumed: int,
|
||||
prefix: string,
|
||||
matches: seq[BookInfo]] =
|
||||
|
||||
for idx in 1 .. input.len:
|
||||
if idx < input.len and input[idx].isAlphaAscii:
|
||||
continue
|
||||
|
||||
let prefix = normalizeBookPrefix(input[0 ..< idx])
|
||||
if prefix.len == 0:
|
||||
continue
|
||||
|
||||
let matches = canonicalNamePrefixMatches(prefix)
|
||||
if matches.len == 1:
|
||||
result.matched = true
|
||||
result.ambiguous = false
|
||||
result.book = matches[0]
|
||||
result.consumed = idx
|
||||
result.prefix = input[0 ..< idx].strip
|
||||
result.matches = matches
|
||||
elif matches.len > 1 and not result.matched:
|
||||
result.ambiguous = true
|
||||
result.consumed = idx
|
||||
result.prefix = input[0 ..< idx].strip
|
||||
result.matches = matches
|
||||
|
||||
proc matchAlias(input, alias: string): int =
|
||||
var i = 0
|
||||
var j = 0
|
||||
|
||||
while j < alias.len:
|
||||
let aliasCh = alias[j]
|
||||
if aliasCh.isSpaceAscii or aliasCh == '.':
|
||||
while i < input.len and (input[i].isSpaceAscii or input[i] == '.'):
|
||||
inc i
|
||||
inc j
|
||||
else:
|
||||
while i < input.len and input[i] == '.':
|
||||
inc i
|
||||
|
||||
if i >= input.len or input[i].toLowerAscii != aliasCh.toLowerAscii:
|
||||
return -1
|
||||
|
||||
inc i
|
||||
inc j
|
||||
|
||||
while i < input.len and input[i] == '.':
|
||||
inc i
|
||||
|
||||
if i < input.len and input[i].isAlphaAscii:
|
||||
return -1
|
||||
|
||||
i
|
||||
|
||||
proc parseBook(input: string): tuple[book: BookInfo, rest: string] =
|
||||
let canonicalPrefix = matchCanonicalBookPrefix(input)
|
||||
if canonicalPrefix.matched:
|
||||
result.book = canonicalPrefix.book
|
||||
result.rest = input[canonicalPrefix.consumed .. ^1].strip
|
||||
return
|
||||
|
||||
var bestCode = ""
|
||||
var bestLen = -1
|
||||
|
||||
for row in bookAliases:
|
||||
let consumed = matchAlias(input, row[1])
|
||||
if consumed > bestLen:
|
||||
bestCode = row[0]
|
||||
bestLen = consumed
|
||||
|
||||
if bestLen < 0:
|
||||
if canonicalPrefix.ambiguous:
|
||||
raise newException(ValueError,
|
||||
"ambiguous Bible book prefix '" & canonicalPrefix.prefix & "' in '" &
|
||||
input & "'; matches " & canonicalPrefix.matches.formatBookList)
|
||||
|
||||
raise newException(ValueError, "could not parse Bible book in '" & input & "'")
|
||||
|
||||
result.book = bookInfo(bestCode)
|
||||
result.rest = input[bestLen .. ^1].strip
|
||||
|
||||
proc parsePositiveInt(s, label: string): int =
|
||||
if s.len == 0 or not s.allCharsInSet({'0'..'9'}):
|
||||
raise newException(ValueError, "invalid " & label & " '" & s & "'")
|
||||
|
||||
result = parseInt(s)
|
||||
if result <= 0:
|
||||
raise newException(ValueError, label & " must be positive")
|
||||
|
||||
proc parsePoint(token: string, defaultChapter: int, singleChapter: bool): RefPoint =
|
||||
let normalized = token.strip
|
||||
if normalized.len == 0:
|
||||
raise newException(ValueError, "empty reference point")
|
||||
|
||||
let colonIdx = normalized.find(':')
|
||||
if colonIdx >= 0:
|
||||
return RefPoint(
|
||||
chapter: parsePositiveInt(normalized[0 ..< colonIdx], "chapter"),
|
||||
verse: parsePositiveInt(normalized[colonIdx + 1 .. ^1], "verse"))
|
||||
|
||||
let value = parsePositiveInt(normalized, "reference number")
|
||||
if singleChapter:
|
||||
RefPoint(chapter: 1, verse: value)
|
||||
elif defaultChapter > 0:
|
||||
RefPoint(chapter: defaultChapter, verse: value)
|
||||
else:
|
||||
RefPoint(chapter: value, verse: 0)
|
||||
|
||||
proc parseRange(segment: string, defaultChapter: int, singleChapter: bool): RefRange =
|
||||
let normalized = segment.strip
|
||||
let dashIdx = normalized.find('-')
|
||||
|
||||
if dashIdx >= 0:
|
||||
result.start = parsePoint(normalized[0 ..< dashIdx], defaultChapter, singleChapter)
|
||||
let endDefaultChapter =
|
||||
if result.start.verse > 0: result.start.chapter
|
||||
else: 0
|
||||
result.finish = parsePoint(normalized[dashIdx + 1 .. ^1], endDefaultChapter, singleChapter)
|
||||
else:
|
||||
result.start = parsePoint(normalized, defaultChapter, singleChapter)
|
||||
result.finish = result.start
|
||||
|
||||
if result.finish.chapter < result.start.chapter:
|
||||
raise newException(ValueError, "range ends before it starts: '" & segment & "'")
|
||||
|
||||
if result.finish.chapter == result.start.chapter and
|
||||
result.start.verse > 0 and
|
||||
result.finish.verse > 0 and
|
||||
result.finish.verse < result.start.verse:
|
||||
raise newException(ValueError, "range ends before it starts: '" & segment & "'")
|
||||
|
||||
proc parsePassageSpec(spec: string, book: BookInfo): seq[RefRange] =
|
||||
var currentChapter = 0
|
||||
|
||||
for rawSegment in spec.split(','):
|
||||
let segment = rawSegment.strip
|
||||
if segment.len == 0:
|
||||
raise newException(ValueError, "empty passage range in '" & spec & "'")
|
||||
|
||||
let range = parseRange(segment, currentChapter, book.singleChapter)
|
||||
result.add(range)
|
||||
|
||||
if segment.contains(':') or (range.start.verse > 0 and range.finish.verse > 0):
|
||||
currentChapter = range.start.chapter
|
||||
else:
|
||||
currentChapter = 0
|
||||
|
||||
proc parseReference*(input: string): PassageReference =
|
||||
let normalized = normalizeReferenceInput(input)
|
||||
let parsedBook = parseBook(normalized)
|
||||
|
||||
result.book = parsedBook.book
|
||||
if parsedBook.rest.len > 0:
|
||||
result.ranges = parsePassageSpec(parsedBook.rest, result.book)
|
||||
|
||||
proc parseReferences*(input: string): seq[PassageReference] =
|
||||
for rawRef in input.split(';'):
|
||||
let refText = rawRef.strip
|
||||
if refText.len > 0:
|
||||
result.add(parseReference(refText))
|
||||
|
||||
if result.len == 0:
|
||||
raise newException(ValueError, "empty Bible reference")
|
||||
|
||||
proc `$`*(point: RefPoint): string =
|
||||
if point.verse > 0: $point.chapter & ":" & $point.verse
|
||||
else: $point.chapter
|
||||
|
||||
proc `$`*(range: RefRange): string =
|
||||
if range.start == range.finish:
|
||||
return $range.start
|
||||
|
||||
if range.start.chapter == range.finish.chapter and
|
||||
range.start.verse > 0 and
|
||||
range.finish.verse > 0:
|
||||
return $range.start.chapter & ":" & $range.start.verse & "-" & $range.finish.verse
|
||||
|
||||
$range.start & "-" & $range.finish
|
||||
|
||||
proc formatSingleChapterRange(range: RefRange): string =
|
||||
if range.start == range.finish:
|
||||
return $range.start.verse
|
||||
|
||||
if range.start.chapter == range.finish.chapter:
|
||||
return $range.start.verse & "-" & $range.finish.verse
|
||||
|
||||
$range.start & "-" & $range.finish
|
||||
|
||||
proc `$`*(reference: PassageReference): string =
|
||||
result = reference.book.name
|
||||
if reference.ranges.len > 0:
|
||||
var rangeText: seq[string] = @[]
|
||||
for range in reference.ranges:
|
||||
if reference.book.singleChapter:
|
||||
rangeText.add(formatSingleChapterRange(range))
|
||||
else:
|
||||
rangeText.add($range)
|
||||
result.add(" " & rangeText.join(", "))
|
||||
@@ -0,0 +1,84 @@
|
||||
import std/[strutils, unittest]
|
||||
|
||||
import ../src/kjv
|
||||
import ../src/reference_parser
|
||||
|
||||
suite "reference parser":
|
||||
test "parses single verse references":
|
||||
let reference = parseReference("John 3:16")
|
||||
|
||||
check reference.book.code == "JHN"
|
||||
check reference.ranges.len == 1
|
||||
check reference.ranges[0].start.chapter == 3
|
||||
check reference.ranges[0].start.verse == 16
|
||||
check reference.ranges[0].finish == reference.ranges[0].start
|
||||
check $reference == "John 3:16"
|
||||
|
||||
test "parses verse lists using the previous chapter":
|
||||
let reference = parseReference("John 3:16,20-21")
|
||||
|
||||
check reference.ranges.len == 2
|
||||
check reference.ranges[1].start.chapter == 3
|
||||
check reference.ranges[1].start.verse == 20
|
||||
check reference.ranges[1].finish.chapter == 3
|
||||
check reference.ranges[1].finish.verse == 21
|
||||
check $reference == "John 3:16, 3:20-21"
|
||||
|
||||
test "parses chapter ranges":
|
||||
let reference = parseReference("John 3-4")
|
||||
|
||||
check reference.ranges.len == 1
|
||||
check reference.ranges[0].start.chapter == 3
|
||||
check reference.ranges[0].start.verse == 0
|
||||
check reference.ranges[0].finish.chapter == 4
|
||||
check reference.ranges[0].finish.verse == 0
|
||||
check $reference == "John 3-4"
|
||||
|
||||
test "parses abbreviated numbered books":
|
||||
let reference = parseReference("1 Jn 1:9")
|
||||
|
||||
check reference.book.code == "1JN"
|
||||
check reference.ranges[0].start.chapter == 1
|
||||
check reference.ranges[0].start.verse == 9
|
||||
check $reference == "1 John 1:9"
|
||||
|
||||
test "parses unique canonical book prefixes":
|
||||
check parseReference("Gene 1:1").book.code == "GEN"
|
||||
check parseReference("Phile 3").book.code == "PHM"
|
||||
check parseReference("Phili 1:6").book.code == "PHP"
|
||||
|
||||
test "rejects ambiguous canonical book prefixes":
|
||||
expect ValueError:
|
||||
discard parseReference("Phil 1")
|
||||
|
||||
test "normalizes single-chapter book references":
|
||||
let reference = parseReference("Jude 3-4")
|
||||
|
||||
check reference.book.code == "JUD"
|
||||
check reference.ranges[0].start.chapter == 1
|
||||
check reference.ranges[0].start.verse == 3
|
||||
check reference.ranges[0].finish.chapter == 1
|
||||
check reference.ranges[0].finish.verse == 4
|
||||
check $reference == "Jude 3-4"
|
||||
|
||||
test "parses semicolon-separated references":
|
||||
let references = parseReferences("Psalm 23; John 3:16")
|
||||
|
||||
check references.len == 2
|
||||
check references[0].book.code == "PSA"
|
||||
check references[1].book.code == "JHN"
|
||||
|
||||
suite "offline KJV backend":
|
||||
test "fetches a single embedded verse":
|
||||
let passages = kjv.fetchPassages("John 3:16")
|
||||
|
||||
check passages.len == 1
|
||||
check passages[0].startsWith("John 3:16\n")
|
||||
check passages[0].contains(" [16] ")
|
||||
|
||||
test "fetches a single-chapter embedded verse":
|
||||
let passages = kjv.fetchPassages("Jude 3")
|
||||
|
||||
check passages.len == 1
|
||||
check passages[0].startsWith("Jude 3\n")
|
||||
check passages[0].contains(" [3] ")
|
||||
@@ -0,0 +1,144 @@
|
||||
import std/[os, strutils, tables]
|
||||
|
||||
# Source archive: https://ebible.org/Scriptures/eng-kjv_usfm.zip
|
||||
|
||||
const canonBookCodes = [
|
||||
"GEN", "EXO", "LEV", "NUM", "DEU", "JOS", "JDG", "RUT",
|
||||
"1SA", "2SA", "1KI", "2KI", "1CH", "2CH", "EZR", "NEH",
|
||||
"EST", "JOB", "PSA", "PRO", "ECC", "SNG", "ISA", "JER",
|
||||
"LAM", "EZK", "DAN", "HOS", "JOL", "AMO", "OBA", "JON",
|
||||
"MIC", "NAM", "HAB", "ZEP", "HAG", "ZEC", "MAL", "MAT",
|
||||
"MRK", "LUK", "JHN", "ACT", "ROM", "1CO", "2CO", "GAL",
|
||||
"EPH", "PHP", "COL", "1TH", "2TH", "1TI", "2TI", "TIT",
|
||||
"PHM", "HEB", "JAS", "1PE", "2PE", "1JN", "2JN", "3JN",
|
||||
"JUD", "REV"
|
||||
]
|
||||
|
||||
proc normalizeWhitespace(s: string): string =
|
||||
var lastWasSpace = false
|
||||
for ch in s:
|
||||
if ch.isSpaceAscii:
|
||||
if not lastWasSpace:
|
||||
result.add(' ')
|
||||
lastWasSpace = true
|
||||
else:
|
||||
result.add(ch)
|
||||
lastWasSpace = false
|
||||
result = result.strip
|
||||
|
||||
proc removeFootnotes(s: string): string =
|
||||
var i = 0
|
||||
while i < s.len:
|
||||
if s.continuesWith("\\f ", i) or s.continuesWith("\\f +", i):
|
||||
let closeIdx = s.find("\\f*", i + 2)
|
||||
if closeIdx < 0:
|
||||
break
|
||||
i = closeIdx + 3
|
||||
else:
|
||||
result.add(s[i])
|
||||
inc i
|
||||
|
||||
proc stripUsfmMarkup(s: string): string =
|
||||
let withoutFootnotes = removeFootnotes(s)
|
||||
var i = 0
|
||||
|
||||
while i < withoutFootnotes.len:
|
||||
case withoutFootnotes[i]
|
||||
of '\\':
|
||||
inc i
|
||||
if i < withoutFootnotes.len and withoutFootnotes[i] == '+':
|
||||
inc i
|
||||
|
||||
while i < withoutFootnotes.len and
|
||||
(withoutFootnotes[i].isAlphaAscii or
|
||||
withoutFootnotes[i].isDigit or
|
||||
withoutFootnotes[i] == '-'):
|
||||
inc i
|
||||
|
||||
let isClosingMarker = i < withoutFootnotes.len and withoutFootnotes[i] == '*'
|
||||
if isClosingMarker:
|
||||
inc i
|
||||
|
||||
while not isClosingMarker and
|
||||
i < withoutFootnotes.len and
|
||||
withoutFootnotes[i].isSpaceAscii:
|
||||
inc i
|
||||
of '|':
|
||||
while i < withoutFootnotes.len and withoutFootnotes[i] != '\\':
|
||||
inc i
|
||||
of '\t':
|
||||
result.add(' ')
|
||||
inc i
|
||||
else:
|
||||
result.add(withoutFootnotes[i])
|
||||
inc i
|
||||
|
||||
result = normalizeWhitespace(result)
|
||||
|
||||
proc parseVerseLine(line: string): tuple[verse: int, text: string] =
|
||||
var rest = line[3..^1].strip
|
||||
let numberEnd = rest.find(' ')
|
||||
if numberEnd < 0:
|
||||
raise newException(ValueError, "verse marker without text: " & line)
|
||||
|
||||
result.verse = parseInt(rest[0 ..< numberEnd])
|
||||
result.text = stripUsfmMarkup(rest[numberEnd + 1 .. ^1])
|
||||
|
||||
proc findCanonFiles(inputDir: string): Table[string, string] =
|
||||
for path in walkFiles(inputDir / "*eng-kjv.usfm"):
|
||||
let name = path.extractFilename
|
||||
let dashIdx = name.find('-')
|
||||
let suffixIdx = name.find("eng-kjv.usfm")
|
||||
if dashIdx >= 0 and suffixIdx > dashIdx:
|
||||
let code = name[dashIdx + 1 ..< suffixIdx]
|
||||
if canonBookCodes.contains(code):
|
||||
result[code] = path
|
||||
|
||||
proc generate(inputDir, outputPath: string) =
|
||||
let canonFiles = findCanonFiles(inputDir)
|
||||
var rows: seq[string] = @[]
|
||||
|
||||
for code in canonBookCodes:
|
||||
if not canonFiles.hasKey(code):
|
||||
raise newException(ValueError, "missing USFM file for " & code)
|
||||
|
||||
var chapter = 0
|
||||
var verse = 0
|
||||
var verseText = ""
|
||||
|
||||
proc flushVerse() =
|
||||
if chapter > 0 and verse > 0:
|
||||
let text = normalizeWhitespace(verseText).replace("\t", " ")
|
||||
if text.len > 0:
|
||||
rows.add([code, $chapter, $verse, text].join("\t"))
|
||||
verse = 0
|
||||
verseText = ""
|
||||
|
||||
for rawLine in canonFiles[code].lines:
|
||||
let line = rawLine.strip
|
||||
|
||||
if line.startsWith("\\c "):
|
||||
flushVerse()
|
||||
chapter = parseInt(line[3..^1].strip)
|
||||
elif line.startsWith("\\v "):
|
||||
flushVerse()
|
||||
let parsed = parseVerseLine(line)
|
||||
verse = parsed.verse
|
||||
verseText = parsed.text
|
||||
elif verse > 0:
|
||||
let continued = stripUsfmMarkup(line)
|
||||
if continued.len > 0:
|
||||
if verseText.len > 0:
|
||||
verseText.add(' ')
|
||||
verseText.add(continued)
|
||||
|
||||
flushVerse()
|
||||
|
||||
createDir(outputPath.parentDir)
|
||||
writeFile(outputPath, rows.join("\n") & "\n")
|
||||
|
||||
when isMainModule:
|
||||
if paramCount() != 2:
|
||||
quit("Usage: generate_kjv_data <usfm-dir> <output-tsv>", QuitFailure)
|
||||
|
||||
generate(paramStr(1), paramStr(2))
|
||||
Reference in New Issue
Block a user