Re-organizing code in preparation for v4.0 implementation.
This commit is contained in:
parent
9d030132de
commit
8e58189a8b
@ -1,3 +1,66 @@
|
|||||||
import vcard/vcard3
|
# vCard 3.0 and 4.0 Nim implementation
|
||||||
|
# © 2022 Jonathan Bernard
|
||||||
|
|
||||||
export vcard3
|
## The `vcard` module implements a high-performance vCard parser for both
|
||||||
|
## versions 3.0 (defined by RFCs [2425][rfc2425] and [2426][rfc2426]) and 4.0
|
||||||
|
## (defined by RFC [6350][rfc6350])
|
||||||
|
##
|
||||||
|
## [rfc2425]: https://tools.ietf.org/html/rfc2425
|
||||||
|
## [rfc2426]: https://tools.ietf.org/html/rfc2426
|
||||||
|
## [rfc6350]: https://tools.ietf.org/html/rfc6350
|
||||||
|
import std/[streams, unicode]
|
||||||
|
|
||||||
|
import ./vcard/private/[common, lexer]
|
||||||
|
import ./vcard/[vcard3, vcard4]
|
||||||
|
|
||||||
|
export vcard3, vcard4
|
||||||
|
export common.VC_XParam,
|
||||||
|
common.VCardParsingError,
|
||||||
|
common.VCardVersion,
|
||||||
|
common.VCard
|
||||||
|
|
||||||
|
proc add[T](vc: VCard, content: varargs[T]): void =
|
||||||
|
if vc.parsedVersion == VCardV3: add(cast[VCard3](vc), content)
|
||||||
|
else: add(cast[VCard4](vc), content)
|
||||||
|
|
||||||
|
proc readVCard*(p: var VCardParser): VCard =
|
||||||
|
# Read the preamble
|
||||||
|
discard p.readGroup
|
||||||
|
p.expect("begin:vcard" & CRLF)
|
||||||
|
|
||||||
|
# Look for the version tag
|
||||||
|
p.setBookmark
|
||||||
|
discard p.readGroup
|
||||||
|
if p.isNext("version:4.0"):
|
||||||
|
result = VCard4()
|
||||||
|
result.parsedVersion = VCardV4
|
||||||
|
else:
|
||||||
|
result = VCard3()
|
||||||
|
result.parsedVersion = VCardV3
|
||||||
|
p.returnToBookmark
|
||||||
|
|
||||||
|
# VCard3 3.0 allows arbitrarily many empty lines after BEGIN and END
|
||||||
|
if result.parsedVersion == VCardV3:
|
||||||
|
while (p.skip(CRLF, true)): discard
|
||||||
|
for content in vcard3.parseContentLines(p): result.add(content)
|
||||||
|
while (p.skip(CRLF, true)): discard
|
||||||
|
|
||||||
|
else:
|
||||||
|
for content in vcard4.parseContentLines(p): result.add(content)
|
||||||
|
|
||||||
|
if result.parsedVersion == VCardV3:
|
||||||
|
while (p.skip(CRLF, true)): discard
|
||||||
|
|
||||||
|
proc parseVCards*(input: Stream, filename = "input"): seq[VCard] =
|
||||||
|
var p: VCardParser
|
||||||
|
p.filename = filename
|
||||||
|
lexer.open(p, input)
|
||||||
|
|
||||||
|
# until EOF
|
||||||
|
while p.peek != '\0': result.add(p.readVCard)
|
||||||
|
|
||||||
|
proc parseVCards*(content: string, filename = "input"): seq[VCard] =
|
||||||
|
parseVCards(newStringStream(content), filename)
|
||||||
|
|
||||||
|
proc parseVCardsFromFile*(filepath: string): seq[VCard] =
|
||||||
|
parseVCards(newFileStream(filepath, fmRead), filepath)
|
||||||
|
240
src/vcard/private/common.nim
Normal file
240
src/vcard/private/common.nim
Normal file
@ -0,0 +1,240 @@
|
|||||||
|
import std/[macros, options, strutils, unicode]
|
||||||
|
import zero_functional
|
||||||
|
from std/sequtils import toSeq
|
||||||
|
import ./lexer
|
||||||
|
|
||||||
|
type
|
||||||
|
VCardVersion* = enum VCardV3 = "3.0", VCardV4 = "4.0"
|
||||||
|
|
||||||
|
VCardParser* = object of VCardLexer
|
||||||
|
filename*: string
|
||||||
|
|
||||||
|
VCParam* = tuple[name: string, values: seq[string]]
|
||||||
|
|
||||||
|
VCardParsingError* = object of ValueError
|
||||||
|
|
||||||
|
VC_XParam* = tuple[name, value: string]
|
||||||
|
|
||||||
|
VCard* = ref object of RootObj
|
||||||
|
parsedVersion*: VCardVersion
|
||||||
|
|
||||||
|
const CRLF* = "\r\n"
|
||||||
|
const WSP* = {' ', '\t'}
|
||||||
|
const DIGIT* = { '0'..'9' }
|
||||||
|
const ALPHA_NUM* = { 'a'..'z', 'A'..'Z', '0'..'9' }
|
||||||
|
const NON_ASCII* = { '\x80'..'\xFF' }
|
||||||
|
const QSAFE_CHARS* = WSP + { '\x21', '\x23'..'\x7E' } + NON_ASCII
|
||||||
|
const SAFE_CHARS* = WSP + { '\x21', '\x23'..'\x2B', '\x2D'..'\x39', '\x3C'..'\x7E' } + NON_ASCII
|
||||||
|
const VALUE_CHAR* = WSP + { '\x21'..'\x7E' } + NON_ASCII
|
||||||
|
|
||||||
|
# Internal Utility/Implementation
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
template findAll*[T, VCT](c: openarray[VCT]): seq[T] =
|
||||||
|
c.filterIt(it of typeof(T)).mapIt(cast[T](it))
|
||||||
|
|
||||||
|
template findFirst*[T, VCT](c: openarray[VCT]): Option[T] =
|
||||||
|
let found = c.filterIt(it of typeof(T)).mapIt(cast[T](it))
|
||||||
|
if found.len > 0: some(found[0])
|
||||||
|
else: none[T]()
|
||||||
|
|
||||||
|
macro assignFields*(assign: untyped, fields: varargs[untyped]): untyped =
|
||||||
|
result = assign
|
||||||
|
|
||||||
|
for f in fields:
|
||||||
|
let exp = newNimNode(nnkExprColonExpr)
|
||||||
|
exp.add(f, f)
|
||||||
|
result.add(exp)
|
||||||
|
|
||||||
|
# Output
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
func serialize*(s: seq[VC_XParam]): string =
|
||||||
|
result = ""
|
||||||
|
for x in s: result &= ";" & x.name & "=" & x.value
|
||||||
|
|
||||||
|
# Parsing
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
proc error*(p: VCardParser, msg: string) =
|
||||||
|
raise newException(VCardParsingError, "$1($2, $3) Error: $4] " %
|
||||||
|
[ p.filename, $p.lineNumber, $p.getColNumber(p.pos), msg ])
|
||||||
|
|
||||||
|
proc isNext*[T](p: var T, expected: string, caseSensitive = false): bool =
|
||||||
|
result = true
|
||||||
|
p.setBookmark
|
||||||
|
|
||||||
|
if caseSensitive:
|
||||||
|
for ch in expected:
|
||||||
|
if p.read != ch:
|
||||||
|
result = false
|
||||||
|
break
|
||||||
|
|
||||||
|
else:
|
||||||
|
for rune in expected.runes:
|
||||||
|
if p.readRune.toLower != rune.toLower:
|
||||||
|
result = false
|
||||||
|
break
|
||||||
|
|
||||||
|
p.returnToBookmark
|
||||||
|
|
||||||
|
proc expect*[T](p: var T, expected: string, caseSensitive = false) =
|
||||||
|
p.setBookmark
|
||||||
|
|
||||||
|
if caseSensitive:
|
||||||
|
for ch in expected:
|
||||||
|
if p.read != ch:
|
||||||
|
p.error("expected '$1' but found '$2'" %
|
||||||
|
[expected, p.readSinceBookmark])
|
||||||
|
|
||||||
|
else:
|
||||||
|
for rune in expected.runes:
|
||||||
|
if p.readRune.toLower != rune.toLower:
|
||||||
|
p.error("expected '$1' but found '$2'" %
|
||||||
|
[ expected, p.readSinceBookmark ])
|
||||||
|
|
||||||
|
p.unsetBookmark
|
||||||
|
|
||||||
|
proc readGroup*[T](p: var T): Option[string] =
|
||||||
|
## All VCARD content items can be optionally prefixed with a group name. This
|
||||||
|
## scans the input to see if there is a group defined at the current read
|
||||||
|
## location. If there is a valid group, the group name is returned and the
|
||||||
|
## read position is advanced past the '.' to the start of the content type
|
||||||
|
## name. If there is not a valid group the read position is left unchanged.
|
||||||
|
|
||||||
|
p.setBookmark
|
||||||
|
var ch = p.read
|
||||||
|
while ALPHA_NUM.contains(ch): ch = p.read
|
||||||
|
|
||||||
|
if (ch == '.'):
|
||||||
|
result = some(readSinceBookmark(p)[0..^2])
|
||||||
|
p.unsetBookmark
|
||||||
|
else:
|
||||||
|
result = none[string]()
|
||||||
|
p.returnToBookmark
|
||||||
|
|
||||||
|
proc readName*(p: var VCardParser): string =
|
||||||
|
## Read a name from the current read position or error. As both content types
|
||||||
|
## and paramaters use the same definition for valid names, this method is
|
||||||
|
## used to read in both.
|
||||||
|
p.setBookmark
|
||||||
|
let validChars = ALPHA_NUM + {'-'}
|
||||||
|
while validChars.contains(p.peek): discard p.read
|
||||||
|
result = p.readSinceBookmark.toUpper()
|
||||||
|
if result.len == 0:
|
||||||
|
p.error("expected to read a name but found '$1'" % [$p.peek])
|
||||||
|
p.unsetBookmark
|
||||||
|
|
||||||
|
proc readValue*(p: var VCardParser): string =
|
||||||
|
## Read a content value at the current read position.
|
||||||
|
p.setBookmark
|
||||||
|
while VALUE_CHAR.contains(p.peek): discard p.read
|
||||||
|
result = p.readSinceBookmark
|
||||||
|
p.unsetBookmark
|
||||||
|
|
||||||
|
proc skip*(p: var VCardParser, count: int): bool =
|
||||||
|
for _ in 0..<count: discard p.read
|
||||||
|
|
||||||
|
proc skip*(p: var VCardParser, expected: string, caseSensitive = false): bool =
|
||||||
|
p.setBookmark
|
||||||
|
if caseSensitive:
|
||||||
|
for ch in expected:
|
||||||
|
if p.read != ch:
|
||||||
|
p.returnToBookmark
|
||||||
|
return false
|
||||||
|
|
||||||
|
else:
|
||||||
|
for rune in expected.runes:
|
||||||
|
if p.readRune.toLower != rune.toLower:
|
||||||
|
p.returnToBookmark
|
||||||
|
return false
|
||||||
|
|
||||||
|
p.unsetBookmark
|
||||||
|
return true
|
||||||
|
|
||||||
|
proc existsWithValue*(
|
||||||
|
params: openarray[VCParam],
|
||||||
|
name, value: string,
|
||||||
|
caseSensitive = false
|
||||||
|
): bool =
|
||||||
|
|
||||||
|
## Determine if the given parameter exists and has the expected value. By
|
||||||
|
## default, value checks are not case-sensitive, as most VCard3 values are not
|
||||||
|
## defined as being case-sensitive.
|
||||||
|
|
||||||
|
let ps = params.toSeq
|
||||||
|
|
||||||
|
if caseSensitive:
|
||||||
|
ps --> exists(
|
||||||
|
it.name == name and
|
||||||
|
it.values.len == 1 and
|
||||||
|
it.values[0] == value)
|
||||||
|
else:
|
||||||
|
ps --> exists(
|
||||||
|
it.name == name and
|
||||||
|
it.values.len == 1 and
|
||||||
|
it.values[0].toLower == value.toLower)
|
||||||
|
|
||||||
|
proc getMultipleValues*(
|
||||||
|
params: openarray[VCParam],
|
||||||
|
name: string
|
||||||
|
): seq[string] =
|
||||||
|
|
||||||
|
## Get all of the values for a given parameter in a single list. There are
|
||||||
|
## two patterns for multi-valued parameters defined in the VCard3 RFCs:
|
||||||
|
##
|
||||||
|
## - TYPE=work,cell,voice
|
||||||
|
## - TYPE=work;TYPE=cell;TYPE=voice
|
||||||
|
##
|
||||||
|
## Parameter values can often be specific using both patterns. This method
|
||||||
|
## joins all defined values regardless of the pattern used to define them.
|
||||||
|
|
||||||
|
let ps = params.toSeq
|
||||||
|
ps -->
|
||||||
|
filter(it.name == name).
|
||||||
|
map(it.values).
|
||||||
|
flatten()
|
||||||
|
|
||||||
|
proc getSingleValue*(
|
||||||
|
params: openarray[VCParam],
|
||||||
|
name: string
|
||||||
|
): Option[string] =
|
||||||
|
## Get the first single value defined for a parameter.
|
||||||
|
#
|
||||||
|
# Many parameters only support a single value, depending on the content type.
|
||||||
|
# In order to support multi-valued parameters our implementation stores all
|
||||||
|
# parameters as seq[string]. This function is a convenience around that.
|
||||||
|
|
||||||
|
let ps = params.toSeq
|
||||||
|
let foundParam = ps --> find(it.name == name)
|
||||||
|
|
||||||
|
if foundParam.isSome and foundParam.get.values.len > 0:
|
||||||
|
return some(foundParam.get.values[0])
|
||||||
|
else:
|
||||||
|
return none[string]()
|
||||||
|
|
||||||
|
proc validateNoParameters*(
|
||||||
|
p: VCardParser,
|
||||||
|
params: openarray[VCParam],
|
||||||
|
name: string
|
||||||
|
) =
|
||||||
|
|
||||||
|
## Error unless there are no defined parameters
|
||||||
|
if params.len > 0:
|
||||||
|
p.error("no parameters allowed on the $1 content type" % [name])
|
||||||
|
|
||||||
|
proc validateRequiredParameters*(
|
||||||
|
p: VCardParser,
|
||||||
|
params: openarray[VCParam],
|
||||||
|
expectations: openarray[tuple[name: string, value: string]]
|
||||||
|
) =
|
||||||
|
|
||||||
|
## Some content types have specific allowed parameters. For example, the
|
||||||
|
## SOURCE content type requires that the VALUE parameter be set to "uri" if
|
||||||
|
## it is present. This will error if given parameters are present with
|
||||||
|
## different values that expected.
|
||||||
|
|
||||||
|
for (n, v) in expectations:
|
||||||
|
let pv = params.getSingleValue(n)
|
||||||
|
if pv.isSome and pv.get != v:
|
||||||
|
p.error("parameter '$1' must have the value '$2'" % [n, v])
|
@ -1,40 +0,0 @@
|
|||||||
import options, strutils
|
|
||||||
import ./lexer
|
|
||||||
|
|
||||||
const WSP* = {' ', '\t'}
|
|
||||||
const ALPHA_NUM* = { 'a'..'z', 'A'..'Z', '0'..'9' }
|
|
||||||
|
|
||||||
proc expect*[T](p: var T, expected: string, caseSensitive = false) =
|
|
||||||
p.setBookmark
|
|
||||||
|
|
||||||
if caseSensitive:
|
|
||||||
for ch in expected:
|
|
||||||
if p.read != ch:
|
|
||||||
p.error("expected '$1' but found '$2'" %
|
|
||||||
[expected, p.readSinceBookmark])
|
|
||||||
|
|
||||||
else:
|
|
||||||
for rune in expected.runes:
|
|
||||||
if p.readRune.toLower != rune.toLower:
|
|
||||||
p.error("expected '$1' but found '$2'" %
|
|
||||||
[ expected, p.readSinceBookmark ])
|
|
||||||
|
|
||||||
p.unsetBookmark
|
|
||||||
|
|
||||||
proc readGroup*[T](p: var T): Option[string] =
|
|
||||||
## All VCARD content items can be optionally prefixed with a group name. This
|
|
||||||
## scans the input to see if there is a group defined at the current read
|
|
||||||
## location. If there is a valid group, the group name is returned and the
|
|
||||||
## read position is advanced past the '.' to the start of the content type
|
|
||||||
## name. If there is not a valid group the read position is left unchanged.
|
|
||||||
|
|
||||||
p.setBookmark
|
|
||||||
var ch = p.read
|
|
||||||
while ALPHA_NUM.contains(ch): ch = p.read
|
|
||||||
|
|
||||||
if (ch == '.'):
|
|
||||||
result = some(readSinceBookmark(p)[0..^2])
|
|
||||||
p.unsetBookmark
|
|
||||||
else:
|
|
||||||
result = none[string]()
|
|
||||||
p.returnToBookmark
|
|
1039
src/vcard/vcard3.nim
1039
src/vcard/vcard3.nim
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,7 @@
|
|||||||
import options, unittest, zero_functional
|
import options, unittest, zero_functional
|
||||||
|
|
||||||
import ./vcard
|
import ./vcard
|
||||||
|
import ./vcard/vcard3
|
||||||
|
|
||||||
suite "vcard/vcard3":
|
suite "vcard/vcard3":
|
||||||
|
|
||||||
@ -8,7 +9,8 @@ suite "vcard/vcard3":
|
|||||||
runVcard3PrivateTests()
|
runVcard3PrivateTests()
|
||||||
|
|
||||||
let jdbVCard = readFile("tests/jdb.vcf")
|
let jdbVCard = readFile("tests/jdb.vcf")
|
||||||
let jdb = parseVCard3(jdbVCard)[0]
|
# TODO: remove cast after finishing VCard4 implementation
|
||||||
|
let jdb = cast[VCard3](parseVCards(jdbVCard)[0])
|
||||||
|
|
||||||
test "parseVCard3":
|
test "parseVCard3":
|
||||||
check:
|
check:
|
||||||
@ -17,7 +19,7 @@ suite "vcard/vcard3":
|
|||||||
jdb.fn.value == "Jonathan Bernard"
|
jdb.fn.value == "Jonathan Bernard"
|
||||||
|
|
||||||
test "parseVCard3File":
|
test "parseVCard3File":
|
||||||
let jdb = parseVCard3File("tests/jdb.vcf")[0]
|
let jdb = cast[VCard3](parseVCardsFromFile("tests/jdb.vcf")[0])
|
||||||
check:
|
check:
|
||||||
jdb.email.len == 7
|
jdb.email.len == 7
|
||||||
jdb.email[0].value == "jonathan@jdbernard.com"
|
jdb.email[0].value == "jonathan@jdbernard.com"
|
||||||
@ -70,9 +72,9 @@ suite "vcard/vcard3":
|
|||||||
"EMAIL;TYPE=INTERNET:howes@netscape.com\r\n" &
|
"EMAIL;TYPE=INTERNET:howes@netscape.com\r\n" &
|
||||||
"END:vCard\r\n"
|
"END:vCard\r\n"
|
||||||
|
|
||||||
let vcards = parseVCard3(vcardsStr)
|
let vcards = parseVCards(vcardsStr)
|
||||||
check:
|
check:
|
||||||
vcards.len == 2
|
vcards.len == 2
|
||||||
vcards[0].fn.value == "Frank Dawson"
|
cast[VCard3](vcards[0]).fn.value == "Frank Dawson"
|
||||||
vcards[0].email.len == 2
|
cast[VCard3](vcards[0]).email.len == 2
|
||||||
(vcards[0].email --> find(it.emailType.contains("PREF"))).isSome
|
(cast[VCard3](vcards[0]).email --> find(it.emailType.contains("PREF"))).isSome
|
||||||
|
Loading…
x
Reference in New Issue
Block a user