Initial VCard 3.0 implementation completed.
This commit is contained in:
parent
c322616747
commit
82dffc1154
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,2 +1,3 @@
|
||||
*.sw?
|
||||
tests/tlexer
|
||||
tests/tvcard3
|
||||
|
10
README.md
10
README.md
@ -0,0 +1,10 @@
|
||||
## Debugging
|
||||
|
||||
*Need to clean up and organize*
|
||||
|
||||
Run `tlexer` tests in gdb:
|
||||
|
||||
```sh
|
||||
$ cd tests
|
||||
$ nim --debuginfo --linedir:on c tlexer
|
||||
$ gdb --tui tlexer
|
@ -1,24 +1,32 @@
|
||||
import std/[streams, unicode, unittest]
|
||||
import std/[streams, unicode]
|
||||
|
||||
type VCardLexer* = object of RootObj
|
||||
input: Stream
|
||||
|
||||
pos*: int # current read position
|
||||
bookmark*: int # bookmark to support rewind functionality
|
||||
buffer*: string # buffer of bytes read
|
||||
lineNumber*: int # how many newlines have we seen so far
|
||||
lineStart: int # index into the buffer for the start of the current line
|
||||
|
||||
bufStart: int # starting boundary for the buffer
|
||||
bufEnd: int # ending boundary for the buffer
|
||||
buffer*: string # buffer of bytes read
|
||||
bufStart: int # starting boundary for the buffer
|
||||
bufEnd: int # ending boundary for the buffer
|
||||
pos*: int # current read position
|
||||
bookmark*: int # bookmark to support rewind functionality
|
||||
bookmarkVal*: string # value that has been read since the bookmark was set
|
||||
lineNumber*: int # how many newlines have we seen so far
|
||||
lineStart: int # index into the buffer for the start of the current line
|
||||
|
||||
proc skipUtf8Bom(vcl: var VCardLexer) =
|
||||
if (vcl.buffer[0] == '\xEF') and (vcl.buffer[1] == '\xBB') and (vcl.buffer[2] == '\xBF'):
|
||||
inc(vcl.pos, 3)
|
||||
|
||||
template wrappedIdx(idx: untyped): int = idx mod vcl.buffer.len
|
||||
|
||||
proc newStartIdx(vcl: VCardLexer): int =
|
||||
if vcl.bookmark > 0: vcl.bookmark else: vcl.pos
|
||||
|
||||
func isFull(vcl: VCardLexer): bool {.inline.} =
|
||||
return wrappedIdx(vcl.bufEnd + 1) == vcl.newStartIdx
|
||||
|
||||
func atEnd(vcl: VCardLexer): bool {.inline.} =
|
||||
vcl.pos == vcl.bufEnd
|
||||
|
||||
proc doubleBuffer(vcl: var VCardLexer) =
|
||||
let oldBuf = vcl.buffer
|
||||
vcl.buffer = newString(oldBuf.len * 2)
|
||||
@ -42,21 +50,32 @@ proc fillBuffer(vcl: var VCardLexer) =
|
||||
var charsRead: int
|
||||
|
||||
# check to see if we have a full buffer
|
||||
if (vcl.bufStart == 0 and vcl.bufEnd == vcl.buffer.len) or
|
||||
vcl.bufEnd == vcl.bufStart - 1:
|
||||
vcl.doubleBuffer()
|
||||
if vcl.isFull: vcl.doubleBuffer()
|
||||
|
||||
# discard used portions of the buffer
|
||||
vcl.bufStart = vcl.newStartIdx
|
||||
|
||||
if vcl.bufEnd < vcl.bufStart:
|
||||
charsRead = vcl.input.readDataStr(vcl.buffer, vcl.bufEnd ..< vcl.bufStart)
|
||||
# e s
|
||||
# 0 1 2 3 4 5 6 7 8 9
|
||||
charsRead = vcl.input.readDataStr(vcl.buffer,
|
||||
vcl.bufEnd ..< (vcl.bufStart - 1))
|
||||
vcl.bufEnd += charsRead
|
||||
|
||||
elif vcl.bufStart == 0:
|
||||
# s e
|
||||
# 0 1 2 3 4 5 6 7 8 9
|
||||
charsRead = vcl.input.readDataStr(vcl.buffer,
|
||||
vcl.bufEnd ..< (vcl.buffer.len - 1))
|
||||
vcl.bufEnd = wrappedIdx(vcl.bufEnd + charsRead)
|
||||
|
||||
else:
|
||||
charsRead = vcl.input.readDataStr(vcl.buffer, vcl.bufEnd ..< vcl.buffer.len)
|
||||
vcl.bufEnd += charsRead
|
||||
# s e
|
||||
# 0 1 2 3 4 5 6 7 8 9
|
||||
charsRead = vcl.input.readDataStr(vcl.buffer, vcl.bufEnd..<vcl.buffer.len)
|
||||
if charsRead == vcl.buffer.len - vcl.bufEnd:
|
||||
vcl.bufEnd = vcl.input.readDataStr(vcl.buffer, 0 ..< vcl.bufStart)
|
||||
vcl.bufEnd = vcl.input.readDataStr(vcl.buffer, 0 ..< (vcl.bufStart - 1))
|
||||
|
||||
|
||||
proc close*(vcl: var VCardLexer) = vcl.input.close
|
||||
|
||||
@ -67,6 +86,8 @@ proc open*(vcl: var VCardLexer, input: Stream, bufLen = 16384) =
|
||||
vcl.pos = 0
|
||||
vcl.bookmark = -1
|
||||
vcl.buffer = newString(bufLen)
|
||||
vcl.bufStart = 0
|
||||
vcl.bufEnd = 0
|
||||
vcl.lineNumber = 0
|
||||
vcl.lineStart = 0
|
||||
vcl.fillBuffer
|
||||
@ -74,6 +95,7 @@ proc open*(vcl: var VCardLexer, input: Stream, bufLen = 16384) =
|
||||
|
||||
proc setBookmark*(vcl: var VCardLexer) =
|
||||
vcl.bookmark = vcl.pos
|
||||
vcl.bookmarkVal = newStringOfCap(32)
|
||||
|
||||
proc returnToBookmark*(vcl: var VCardLexer) =
|
||||
vcl.pos = vcl.bookmark
|
||||
@ -83,11 +105,21 @@ proc unsetBookmark*(vcl: var VCardLexer) =
|
||||
vcl.bookmark = -1
|
||||
|
||||
proc readSinceBookmark*(vcl: var VCardLexer): string =
|
||||
return vcl.bookmarkVal
|
||||
#[
|
||||
if vcl.pos < vcl.bookmark:
|
||||
vcl.buffer[vcl.bookmark ..< vcl.buffer.len] & vcl.buffer[0 ..< vcl.pos]
|
||||
else: vcl.buffer[vcl.pos ..< vcl.bookmark]
|
||||
# p e s b
|
||||
# 0 1 2 3 4 5 6 7 8 9
|
||||
result = newStringOfCap(vcl.buffer.len - vcl.bookmark + vcl.pos)
|
||||
else:
|
||||
# s b p e
|
||||
# 0 1 2 3 4 5 6 7 8 9
|
||||
result = newStringOfCap(vcl.pos - vcl.bookmark)
|
||||
|
||||
template wrappedIdx(idx: untyped): int = idx mod vcl.buffer.len
|
||||
let curPos = vcl.pos
|
||||
vcl.pos = vcl.bookmark
|
||||
while vcl.pos != curPos: result.add(vcl.read)
|
||||
]#
|
||||
|
||||
proc isLineWrap(vcl: var VCardLexer, allowRefill = true): bool =
|
||||
if vcl.buffer[vcl.pos] != '\r': return false
|
||||
@ -105,29 +137,31 @@ proc isLineWrap(vcl: var VCardLexer, allowRefill = true): bool =
|
||||
vcl.buffer[wrappedIdx(vcl.pos + 2)] == ' '
|
||||
|
||||
proc read*(vcl: var VCardLexer, peek = false): char =
|
||||
if vcl.pos == vcl.bufEnd: vcl.fillBuffer()
|
||||
if vcl.atEnd: vcl.fillBuffer()
|
||||
|
||||
if vcl.isLineWrap:
|
||||
vcl.pos += 3
|
||||
vcl.lineNumber += 1
|
||||
vcl.lineStart = vcl.pos
|
||||
if vcl.pos == vcl.bufEnd: vcl.fillBuffer()
|
||||
if vcl.atEnd: vcl.fillBuffer()
|
||||
|
||||
elif vcl.buffer[vcl.pos] == '\n':
|
||||
vcl.lineNumber += 1
|
||||
vcl.lineStart = wrappedIdx(vcl.pos + 1)
|
||||
|
||||
result = vcl.buffer[vcl.pos]
|
||||
if not peek: vcl.pos = wrappedIdx(vcl.pos + 1)
|
||||
if not peek:
|
||||
if vcl.bookmark != -1: vcl.bookmarkVal.add(result)
|
||||
vcl.pos = wrappedIdx(vcl.pos + 1)
|
||||
|
||||
proc readRune*(vcl: var VCardLexer, peek = false): Rune =
|
||||
if vcl.pos == vcl.bufEnd: vcl.fillBuffer()
|
||||
if vcl.atEnd: vcl.fillBuffer()
|
||||
|
||||
if vcl.isLineWrap:
|
||||
vcl.pos += 3
|
||||
vcl.lineNumber += 1
|
||||
vcl.lineStart = vcl.pos
|
||||
if vcl.pos == vcl.bufEnd: vcl.fillBuffer()
|
||||
if vcl.atEnd: vcl.fillBuffer()
|
||||
|
||||
elif vcl.buffer[vcl.pos] == '\n':
|
||||
vcl.lineNumber += 1
|
||||
@ -146,16 +180,175 @@ proc getColNumber*(vcl: VCardLexer, pos: int): int =
|
||||
if vcl.lineStart < pos: return pos - vcl.lineStart
|
||||
else: return (vcl.buffer.len - vcl.lineStart) + pos
|
||||
|
||||
## Unit Tests
|
||||
## ============================================================================
|
||||
|
||||
import std/unittest
|
||||
|
||||
proc dumpLexerState*(l: VCardLexer): string =
|
||||
result =
|
||||
"pos = " & $l.pos & "\p" &
|
||||
"bookmark = " & $l.bookmark & "\p" &
|
||||
"lineNumber = " & $l.lineNumber & "\p" &
|
||||
"lineStart = " & $l.lineStart & "\p" &
|
||||
"bufStart = " & $l.bufStart & "\p" &
|
||||
"bufEnd = " & $l.bufEnd & "\p" &
|
||||
"buffer = " & l.buffer & "\p"
|
||||
|
||||
suite "vcard/lexer":
|
||||
|
||||
func expectBfr(l: VCardLexer, s: string): bool =
|
||||
const longTestString =
|
||||
"This is my test string. There are many like it but this one is mine."
|
||||
|
||||
proc bufferIs(vcl: VCardLexer, s: string): bool =
|
||||
#debugEcho vcl.buffer & " : " & $vcl.bufStart & "-" & $vcl.bufEnd
|
||||
# for i in vcl.bufStart..<vcl.bufEnd:
|
||||
# debugEcho $i & ": " & vcl.buffer[i]
|
||||
|
||||
for i in 0..<s.len:
|
||||
if s[i] != l.buffer[i]:
|
||||
# debugEcho "i:" & $i & "\tl.bufStart:" & $(vcl.bufStart + i)
|
||||
# debugEcho s[i] & " == " & vcl.buffer[vcl.bufStart + i]
|
||||
if s[i] != vcl.buffer[wrappedIdx(vcl.bufStart + i)]:
|
||||
return false
|
||||
return true
|
||||
|
||||
#test "fillBuffer doesn't double the buffer needlessly":
|
||||
# var l: VCardLexer
|
||||
|
||||
proc readExpected(vcl: var VCardLexer, s: string): bool =
|
||||
for i in 0..<s.len:
|
||||
if vcl.read != s[i]:
|
||||
return false
|
||||
return true
|
||||
|
||||
test "can open and fill buffer":
|
||||
var l: VCardLexer
|
||||
l.open(newStringStream("test"))
|
||||
check l.expectBfr("test")
|
||||
check:
|
||||
l.bufferIs("test")
|
||||
not l.isFull
|
||||
l.readExpected("test")
|
||||
|
||||
test "refills buffer when emptied":
|
||||
var l: VCardLexer
|
||||
l.open(newStringStream("test"), 3)
|
||||
check:
|
||||
l.bufferIs("te")
|
||||
l.isFull
|
||||
l.read == 't'
|
||||
l.read == 'e'
|
||||
l.read == 's'
|
||||
l.bufferIs("st")
|
||||
l.read == 't'
|
||||
|
||||
test "isFull correctness":
|
||||
var l = VCardLexer(
|
||||
pos: 0,
|
||||
bookmark: -1,
|
||||
buffer: "0123456789",
|
||||
bufStart: 0,
|
||||
bufEnd: 9)
|
||||
|
||||
# s e
|
||||
# 0 1 2 3 4 5 6 7 8 9
|
||||
check l.isFull
|
||||
|
||||
# s p e
|
||||
# 0 1 2 3 4 5 6 7 8 9
|
||||
discard l.read
|
||||
check not l.isFull
|
||||
|
||||
# e s
|
||||
# 0 1 2 3 4 5 6 7 8 9
|
||||
l.bufStart = 3
|
||||
l.pos = 3
|
||||
l.bufEnd = 2
|
||||
check l.isFull
|
||||
|
||||
# e s p
|
||||
# 0 1 2 3 4 5 6 7 8 9
|
||||
discard l.read
|
||||
check:
|
||||
l.pos == 4
|
||||
not l.isFull
|
||||
|
||||
# e s
|
||||
# 0 1 2 3 4 5 6 7 8 9
|
||||
l.bufStart = 9
|
||||
l.pos = 9
|
||||
l.bufEnd = 8
|
||||
check l.isFull
|
||||
|
||||
# p e s
|
||||
# 0 1 2 3 4 5 6 7 8 9
|
||||
discard l.read
|
||||
check:
|
||||
l.pos == 0
|
||||
not l.isFull
|
||||
|
||||
test "handles wrapped lines":
|
||||
var l: VCardLexer
|
||||
l.open(newStringStream("line\r\n wrap\r\nline 2"), 3)
|
||||
|
||||
check l.readExpected("line wrap\r\nline 2")
|
||||
|
||||
test "fillBuffer correctness":
|
||||
var l: VCardLexer
|
||||
l.open(newStringStream(longTestString), 5)
|
||||
check:
|
||||
l.bufferIs(longTestString[0..<4])
|
||||
l.isFull
|
||||
l.bufStart == 0
|
||||
l.bufEnd == 4
|
||||
l.pos == 0
|
||||
l.readExpected("Th")
|
||||
not l.isFull
|
||||
not l.atEnd
|
||||
l.pos == 2
|
||||
|
||||
l.fillBuffer
|
||||
check:
|
||||
l.isFull
|
||||
l.bufEnd == 1
|
||||
l.pos == 2
|
||||
l.bufStart == 2
|
||||
|
||||
test "bookmark preserves the buffer":
|
||||
var l: VCardLexer
|
||||
l.open(newStringStream(longTestString), 7)
|
||||
check:
|
||||
l.buffer.len == 7
|
||||
l.bufferIs(longTestString[0..<6])
|
||||
l.isFull
|
||||
l.bufEnd == 6
|
||||
l.pos == 0
|
||||
l.bookmark == -1
|
||||
l.readExpected(longTestString[0..<5])
|
||||
not l.isFull
|
||||
not l.atEnd
|
||||
l.pos == 5
|
||||
|
||||
l.setBookmark
|
||||
# read enough to require us to refill the buffer.
|
||||
check:
|
||||
l.bookmark == 5
|
||||
l.readExpected(longTestString[5..<10])
|
||||
l.pos == 3
|
||||
newStartIdx(l) == 5
|
||||
l.buffer.len == 7
|
||||
|
||||
l.returnToBookmark
|
||||
check:
|
||||
l.bookmark == -1
|
||||
l.pos == 5
|
||||
|
||||
test "readRune":
|
||||
var l: VCardLexer
|
||||
l.open(newStringStream("TEST"))
|
||||
check:
|
||||
l.bufferIs("TEST")
|
||||
l.peekRune == Rune('T')
|
||||
l.readRune == Rune('T')
|
||||
l.readRune == Rune('E')
|
||||
l.readRune == Rune('S')
|
||||
l.readRune == Rune('T')
|
||||
|
40
src/vcard/private/parsercommon.nim
Normal file
40
src/vcard/private/parsercommon.nim
Normal file
@ -0,0 +1,40 @@
|
||||
import options, strutils
|
||||
import ./lexer
|
||||
|
||||
const WSP* = {' ', '\t'}
|
||||
const ALPHA_NUM* = { 'a'..'z', 'A'..'Z', '0'..'9' }
|
||||
|
||||
proc expect*[T](p: var T, expected: string, caseSensitive = false) =
|
||||
p.setBookmark
|
||||
|
||||
if caseSensitive:
|
||||
for ch in expected:
|
||||
if p.read != ch:
|
||||
p.error("expected '$1' but found '$2'" %
|
||||
[expected, p.readSinceBookmark])
|
||||
|
||||
else:
|
||||
for rune in expected.runes:
|
||||
if p.readRune.toLower != rune.toLower:
|
||||
p.error("expected '$1' but found '$2'" %
|
||||
[ expected, p.readSinceBookmark ])
|
||||
|
||||
p.unsetBookmark
|
||||
|
||||
proc readGroup*[T](p: var T): Option[string] =
|
||||
## All VCARD content items can be optionally prefixed with a group name. This
|
||||
## scans the input to see if there is a group defined at the current read
|
||||
## location. If there is a valid group, the group name is returned and the
|
||||
## read position is advanced past the '.' to the start of the content type
|
||||
## name. If there is not a valid group the read position is left unchanged.
|
||||
|
||||
p.setBookmark
|
||||
var ch = p.read
|
||||
while ALPHA_NUM.contains(ch): ch = p.read
|
||||
|
||||
if (ch == '.'):
|
||||
result = some(readSinceBookmark(p)[0..^2])
|
||||
p.unsetBookmark
|
||||
else:
|
||||
result = none[string]()
|
||||
p.returnToBookmark
|
@ -1,4 +1,39 @@
|
||||
import strutils
|
||||
import sequtils, strutils, times
|
||||
|
||||
const DATE_FMTS = [ "yyyy-MM-dd", "yyyyMMdd" ]
|
||||
const DATE_TIME_FMTS = [
|
||||
"yyyyMMdd'T'HHmmss",
|
||||
"yyyyMMdd'T'HHmmssz",
|
||||
"yyyyMMdd'T'HHmmsszzz",
|
||||
"yyyyMMdd'T'HHmmss'.'fffzzz",
|
||||
"yyyy-MM-dd'T'HH:mm:ss",
|
||||
"yyyy-MM-dd'T'HH:mm:ssz",
|
||||
"yyyy-MM-dd'T'HH:mm:sszzz",
|
||||
"yyyy-MM-dd'T'HH:mm:ss'.'fffzzz",
|
||||
]
|
||||
|
||||
const ALL_FMTS = DATE_FMTS.toSeq & DATE_TIME_FMTS.toSeq
|
||||
|
||||
proc parseDateTimeStr(
|
||||
dateStr: string,
|
||||
dateFmts: openarray[string]
|
||||
): DateTime {.inline.} =
|
||||
|
||||
for fmt in dateFmts:
|
||||
try: result = parse(dateStr, fmt)
|
||||
except: discard
|
||||
|
||||
if not result.isInitialized:
|
||||
raise newException(ValueError, "cannot parse date: " & dateStr )
|
||||
|
||||
proc parseDate*(dateStr: string): DateTime =
|
||||
parseDateTimeStr(dateStr, DATE_FMTS)
|
||||
|
||||
proc parseDateTime*(dateStr: string): DateTime =
|
||||
parseDateTimeStr(dateStr, DATE_TIME_FMTS)
|
||||
|
||||
proc parseDateOrDateTime*(dateStr: string): DateTime =
|
||||
parseDateTimeStr(dateStr, ALL_FMTS)
|
||||
|
||||
func foldContentLine*(s: string): string =
|
||||
result = ""
|
||||
|
864
src/vcard3.nim
864
src/vcard3.nim
File diff suppressed because it is too large
Load Diff
55
tests/tvcard3.nim
Normal file
55
tests/tvcard3.nim
Normal file
@ -0,0 +1,55 @@
|
||||
import options, unittest, vcard3, zero_functional
|
||||
|
||||
suite "vcard/vcard3":
|
||||
|
||||
let testVCard =
|
||||
"BEGIN:VCARD\r\n" &
|
||||
"VERSION:3.0\r\n" &
|
||||
"FN:Mr. John Q. Public\\, Esq.\r\n" &
|
||||
"N:Public;John;Quinlan;Mr.;Esq.\r\n" &
|
||||
"END:VCARD\r\n"
|
||||
|
||||
test "minimal VCard":
|
||||
let vc = parseVCard3(testVCard)[0]
|
||||
check:
|
||||
vc.n.family[0] == "Public"
|
||||
vc.n.given[0] == "John"
|
||||
vc.fn.value == "Mr. John Q. Public\\, Esq."
|
||||
|
||||
test "serialize minimal VCard":
|
||||
let vc = parseVCard3(testVCard)[0]
|
||||
check $vc == testVCard
|
||||
|
||||
test "RFC2426 Author's VCards":
|
||||
let vcardsStr =
|
||||
"BEGIN:vCard\r\n" &
|
||||
"VERSION:3.0\r\n" &
|
||||
"FN:Frank Dawson\r\n" &
|
||||
"ORG:Lotus Development Corporation\r\n" &
|
||||
"ADR;TYPE=WORK,POSTAL,PARCEL:;;6544 Battleford Drive\r\n" &
|
||||
" ;Raleigh;NC;27613-3502;U.S.A.\r\n" &
|
||||
"TEL;TYPE=VOICE,MSG,WORK:+1-919-676-9515\r\n" &
|
||||
"TEL;TYPE=FAX,WORK:+1-919-676-9564\r\n" &
|
||||
"EMAIL;TYPE=INTERNET,PREF:Frank_Dawson@Lotus.com\r\n" &
|
||||
"EMAIL;TYPE=INTERNET:fdawson@earthlink.net\r\n" &
|
||||
"URL:http://home.earthlink.net/~fdawson\r\n" &
|
||||
"END:vCard\r\n" &
|
||||
"\r\n" &
|
||||
"\r\n" &
|
||||
"BEGIN:vCard\r\n" &
|
||||
"VERSION:3.0\r\n" &
|
||||
"FN:Tim Howes\r\n" &
|
||||
"ORG:Netscape Communications Corp.\r\n" &
|
||||
"ADR;TYPE=WORK:;;501 E. Middlefield Rd.;Mountain View;\r\n" &
|
||||
" CA; 94043;U.S.A.\r\n" &
|
||||
"TEL;TYPE=VOICE,MSG,WORK:+1-415-937-3419\r\n" &
|
||||
"TEL;TYPE=FAX,WORK:+1-415-528-4164\r\n" &
|
||||
"EMAIL;TYPE=INTERNET:howes@netscape.com\r\n" &
|
||||
"END:vCard\r\n"
|
||||
|
||||
let vcards = parseVCard3(vcardsStr)
|
||||
check:
|
||||
vcards.len == 2
|
||||
vcards[0].fn.value == "Frank Dawson"
|
||||
vcards[0].email.len == 2
|
||||
(vcards[0].email --> find(it.emailType.contains("PREF"))).isSome
|
@ -10,3 +10,4 @@ srcDir = "src"
|
||||
# Dependencies
|
||||
|
||||
requires "nim >= 1.6.6"
|
||||
requires @[ "zero_functional" ]
|
||||
|
Loading…
x
Reference in New Issue
Block a user