WIP vcard 3.0 lexer and parser.
This commit is contained in:
parent
118f7f5a45
commit
2bc5d5c74f
147
src/vcard/private/lexer.nim
Normal file
147
src/vcard/private/lexer.nim
Normal file
@ -0,0 +1,147 @@
|
|||||||
|
import std/[streams, unicode]
|
||||||
|
|
||||||
|
type VCardLexer* = object of RootObj
|
||||||
|
input: Stream
|
||||||
|
|
||||||
|
pos*: int # current read position
|
||||||
|
bookmark*: int # bookmark to support rewind functionality
|
||||||
|
buffer*: string # buffer of bytes read
|
||||||
|
lineNumber*: int # how many newlines have we seen so far
|
||||||
|
lineStart: int # index into the buffer for the start of the current line
|
||||||
|
|
||||||
|
bufStart: int # starting boundary for the buffer
|
||||||
|
bufEnd: int # ending boundary for the buffer
|
||||||
|
|
||||||
|
proc skipUtf8Bom(vcl: var VCardLexer) =
|
||||||
|
if (vcl.buffer[0] == '\xEF') and (vcl.buffer[1] == '\xBB') and (vcl.buffer[2] == '\xBF'):
|
||||||
|
inc(vcl.pos, 3)
|
||||||
|
|
||||||
|
proc newStartIdx(vcl: VCardLexer): int =
|
||||||
|
if vcl.bookmark > 0: vcl.bookmark else: vcl.pos
|
||||||
|
|
||||||
|
proc doubleBuffer(vcl: var VCardLexer) =
|
||||||
|
let oldBuf = vcl.buffer
|
||||||
|
vcl.buffer = newString(oldBuf.len * 2)
|
||||||
|
|
||||||
|
var newIdx = 0
|
||||||
|
var oldIdx = vcl.bufStart
|
||||||
|
|
||||||
|
while oldIdx != vcl.bufEnd or newIdx == 0:
|
||||||
|
vcl.buffer[newIdx] = oldBuf[oldIdx]
|
||||||
|
inc(newIdx)
|
||||||
|
oldIdx = (newIdx + vcl.bufStart) mod oldBuf.len
|
||||||
|
|
||||||
|
vcl.pos -= vcl.bufStart
|
||||||
|
vcl.lineStart -= vcl.bufStart
|
||||||
|
if vcl.bookmark >= 0: vcl.bookmark -= vcl.bufStart
|
||||||
|
vcl.bufStart = 0
|
||||||
|
vcl.bufEnd = newIdx
|
||||||
|
|
||||||
|
proc fillBuffer(vcl: var VCardLexer) =
|
||||||
|
|
||||||
|
var charsRead: int
|
||||||
|
|
||||||
|
# check to see if we have a full buffer
|
||||||
|
if (vcl.bufStart == 0 and vcl.bufEnd == vcl.buffer.len) or
|
||||||
|
vcl.bufEnd == vcl.bufStart - 1:
|
||||||
|
vcl.doubleBuffer()
|
||||||
|
|
||||||
|
# discard used portions of the buffer
|
||||||
|
vcl.bufStart = vcl.newStartIdx
|
||||||
|
|
||||||
|
if vcl.bufEnd < vcl.bufStart:
|
||||||
|
charsRead = vcl.input.readDataStr(vcl.buffer, vcl.bufEnd ..< vcl.bufStart)
|
||||||
|
vcl.bufEnd += charsRead
|
||||||
|
else:
|
||||||
|
charsRead = vcl.input.readDataStr(vcl.buffer, vcl.bufEnd ..< vcl.buffer.len)
|
||||||
|
vcl.bufEnd += charsRead
|
||||||
|
if charsRead == vcl.buffer.len - vcl.bufEnd:
|
||||||
|
vcl.bufEnd = vcl.input.readDataStr(vcl.buffer, 0 ..< vcl.bufStart)
|
||||||
|
|
||||||
|
proc close*(vcl: var VCardLexer) = vcl.input.close
|
||||||
|
|
||||||
|
proc open*(vcl: var VCardLexer, input: Stream, bufLen = 16384) =
|
||||||
|
assert(bufLen > 0)
|
||||||
|
assert(input != nil)
|
||||||
|
vcl.input = input
|
||||||
|
vcl.pos = 0
|
||||||
|
vcl.bookmark = -1
|
||||||
|
vcl.buffer = newString(bufLen)
|
||||||
|
vcl.lineNumber = 0
|
||||||
|
vcl.lineStart = 0
|
||||||
|
vcl.fillBuffer
|
||||||
|
vcl.skipUtf8Bom
|
||||||
|
|
||||||
|
proc setBookmark*(vcl: var VCardLexer) =
|
||||||
|
vcl.bookmark = vcl.pos
|
||||||
|
|
||||||
|
proc returnToBookmark*(vcl: var VCardLexer) =
|
||||||
|
vcl.pos = vcl.bookmark
|
||||||
|
vcl.bookmark = -1
|
||||||
|
|
||||||
|
proc unsetBookmark*(vcl: var VCardLexer) =
|
||||||
|
vcl.bookmark = -1
|
||||||
|
|
||||||
|
proc readSinceBookmark*(vcl: var VCardLexer): string =
|
||||||
|
if vcl.pos < vcl.bookmark:
|
||||||
|
vcl.buffer[vcl.bookmark ..< vcl.buffer.len] & vcl.buffer[0 ..< vcl.pos]
|
||||||
|
else: vcl.buffer[vcl.pos ..< vcl.bookmark]
|
||||||
|
|
||||||
|
template wrappedIdx(idx: untyped): int = idx mod vcl.buffer.len
|
||||||
|
|
||||||
|
proc isLineWrap(vcl: var VCardLexer, allowRefill = true): bool =
|
||||||
|
if vcl.buffer[vcl.pos] != '\r': return false
|
||||||
|
|
||||||
|
# less than three characters in the buffer
|
||||||
|
if wrappedIdx(vcl.pos + 3) > vcl.bufEnd:
|
||||||
|
if allowRefill:
|
||||||
|
vcl.fillBuffer()
|
||||||
|
return vcl.isLineWrap(false)
|
||||||
|
else: return false
|
||||||
|
|
||||||
|
# at least three characters in the buffer
|
||||||
|
else:
|
||||||
|
return vcl.buffer[wrappedIdx(vcl.pos + 1)] == '\n' and
|
||||||
|
vcl.buffer[wrappedIdx(vcl.pos + 2)] == ' '
|
||||||
|
|
||||||
|
proc read*(vcl: var VCardLexer, peek = false): char =
|
||||||
|
if vcl.pos == vcl.bufEnd: vcl.fillBuffer()
|
||||||
|
|
||||||
|
if vcl.isLineWrap:
|
||||||
|
vcl.pos += 3
|
||||||
|
vcl.lineNumber += 1
|
||||||
|
vcl.lineStart = vcl.pos
|
||||||
|
if vcl.pos == vcl.bufEnd: vcl.fillBuffer()
|
||||||
|
|
||||||
|
elif vcl.buffer[vcl.pos] == '\n':
|
||||||
|
vcl.lineNumber += 1
|
||||||
|
vcl.lineStart = wrappedIdx(vcl.pos + 1)
|
||||||
|
|
||||||
|
result = vcl.buffer[vcl.pos]
|
||||||
|
if not peek: vcl.pos = wrappedIdx(vcl.pos + 1)
|
||||||
|
|
||||||
|
proc readRune*(vcl: var VCardLexer, peek = false): Rune =
|
||||||
|
if vcl.pos == vcl.bufEnd: vcl.fillBuffer()
|
||||||
|
|
||||||
|
if vcl.isLineWrap:
|
||||||
|
vcl.pos += 3
|
||||||
|
vcl.lineNumber += 1
|
||||||
|
vcl.lineStart = vcl.pos
|
||||||
|
if vcl.pos == vcl.bufEnd: vcl.fillBuffer()
|
||||||
|
|
||||||
|
elif vcl.buffer[vcl.pos] == '\n':
|
||||||
|
vcl.lineNumber += 1
|
||||||
|
vcl.lineStart = wrappedIdx(vcl.pos + 1)
|
||||||
|
|
||||||
|
result = vcl.buffer.runeAt(vcl.pos)
|
||||||
|
if not peek: vcl.pos += vcl.buffer.runeLenAt(vcl.pos)
|
||||||
|
|
||||||
|
proc peek*(vcl: var VCardLexer): char =
|
||||||
|
return vcl.read(peek = true)
|
||||||
|
|
||||||
|
proc peekRune*(vcl: var VCardLexer): Rune =
|
||||||
|
return vcl.readRune(peek = true)
|
||||||
|
|
||||||
|
proc getColNumber*(vcl: VCardLexer, pos: int): int =
|
||||||
|
if vcl.lineStart < pos: return pos - vcl.lineStart
|
||||||
|
else: return (vcl.buffer.len - vcl.lineStart) + pos
|
251
src/vcard3.nim
251
src/vcard3.nim
@ -9,10 +9,10 @@
|
|||||||
## [rfc2426]: https://tools.ietf.org/html/rfc2426
|
## [rfc2426]: https://tools.ietf.org/html/rfc2426
|
||||||
## [rfc6350]: https://tools.ietf.org/html/rfc6350
|
## [rfc6350]: https://tools.ietf.org/html/rfc6350
|
||||||
|
|
||||||
import std/base64, std/lexbase, std/macros, std/options, std/sequtils,
|
import std/[base64, lexbase, macros, options, sequtils, streams, strutils,
|
||||||
std/streams, std/strutils, std/times
|
times, unicode]
|
||||||
|
|
||||||
import vcard/private/util
|
import vcard/private/[util, lexer]
|
||||||
|
|
||||||
type
|
type
|
||||||
#[
|
#[
|
||||||
@ -221,6 +221,7 @@ type
|
|||||||
nextContentId: int
|
nextContentId: int
|
||||||
content*: seq[VC3_Content]
|
content*: seq[VC3_Content]
|
||||||
|
|
||||||
|
const CRLF = "\r\n"
|
||||||
const DATE_FMT = "yyyy-MM-dd"
|
const DATE_FMT = "yyyy-MM-dd"
|
||||||
const DATETIME_FMT = "yyyy-MM-dd'T'HH:mm:sszz"
|
const DATETIME_FMT = "yyyy-MM-dd'T'HH:mm:sszz"
|
||||||
|
|
||||||
@ -252,11 +253,6 @@ macro assignFields(assign: untyped, fields: varargs[untyped]): untyped =
|
|||||||
# Initializers
|
# Initializers
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
func clone(vc3: VCard3): VCard3 =
|
|
||||||
result = VCard3(
|
|
||||||
nextContentId: vc3.nextContentId,
|
|
||||||
content: vc3.content)
|
|
||||||
|
|
||||||
func newVC3_Name*(value: string, group = none[string]()): VC3_Name =
|
func newVC3_Name*(value: string, group = none[string]()): VC3_Name =
|
||||||
return VC3_Name(name: "NAME", value: value, group: group)
|
return VC3_Name(name: "NAME", value: value, group: group)
|
||||||
|
|
||||||
@ -434,21 +430,23 @@ func newVC3_Org*(
|
|||||||
value: seq[string],
|
value: seq[string],
|
||||||
isPText = false,
|
isPText = false,
|
||||||
language = none[string](),
|
language = none[string](),
|
||||||
xParams: seq[VC3_XParam] = @[]): VC3_Org =
|
xParams: seq[VC3_XParam] = @[],
|
||||||
|
group = none[string]()): VC3_Org =
|
||||||
|
|
||||||
return assignFields(
|
return assignFields(
|
||||||
VC3_Org(name: "ORG"),
|
VC3_Org(name: "ORG"),
|
||||||
value, isPText, language, xParams)
|
value, isPText, language, xParams, group)
|
||||||
|
|
||||||
func newVC3_Categories*(
|
func newVC3_Categories*(
|
||||||
value: seq[string],
|
value: seq[string],
|
||||||
isPText = false,
|
isPText = false,
|
||||||
language = none[string](),
|
language = none[string](),
|
||||||
xParams: seq[VC3_XParam] = @[]): VC3_Categories =
|
xParams: seq[VC3_XParam] = @[],
|
||||||
|
group = none[string]()): VC3_Categories =
|
||||||
|
|
||||||
return assignFields(
|
return assignFields(
|
||||||
VC3_Categories(name: "CATEGORIES"),
|
VC3_Categories(name: "CATEGORIES"),
|
||||||
value, isPText, language, xParams)
|
value, isPText, language, xParams, group)
|
||||||
|
|
||||||
func newVC3_Note*(
|
func newVC3_Note*(
|
||||||
value: string,
|
value: string,
|
||||||
@ -688,6 +686,7 @@ func updateOrAdd*[T](vc3: var VCard3, content: seq[T]): VCard3 =
|
|||||||
if existingIdx < 0: vc3.content.add(c)
|
if existingIdx < 0: vc3.content.add(c)
|
||||||
else: c.content[existingIdx] = c
|
else: c.content[existingIdx] = c
|
||||||
|
|
||||||
|
#[
|
||||||
func setName*(vc3: var VCard3, name: string, group = none[string]()): void =
|
func setName*(vc3: var VCard3, name: string, group = none[string]()): void =
|
||||||
var name = newVC3_Name(name, group)
|
var name = newVC3_Name(name, group)
|
||||||
vc3.setContent(name)
|
vc3.setContent(name)
|
||||||
@ -1074,12 +1073,81 @@ func addLogo*(
|
|||||||
result = vc3
|
result = vc3
|
||||||
result.addLogo(logo, valueType, binaryType, isInline, group)
|
result.addLogo(logo, valueType, binaryType, isInline, group)
|
||||||
|
|
||||||
func setAgent
|
func setAgent*(
|
||||||
|
vc3: var VCard3,
|
||||||
|
agent: string,
|
||||||
|
isInline = true,
|
||||||
|
group = none[string]()): void =
|
||||||
|
|
||||||
|
var c = newVC3_Agent(agent, isInline, group)
|
||||||
|
vc3.add(c)
|
||||||
|
|
||||||
|
func setAgent*(
|
||||||
|
vc3: VCard3,
|
||||||
|
agent: string,
|
||||||
|
isInline = true,
|
||||||
|
group = none[string]()): VCard3 =
|
||||||
|
|
||||||
|
result = vc3
|
||||||
|
result.setAgent(agent, isInline, group)
|
||||||
|
|
||||||
|
func setOrg*(
|
||||||
|
vc3: var VCard3,
|
||||||
|
org: seq[string],
|
||||||
|
isPText = false,
|
||||||
|
language = none[string](),
|
||||||
|
xParams: seq[VC3_XParam] = @[],
|
||||||
|
group = none[string]()): void =
|
||||||
|
|
||||||
|
var c = newVC3_Org(org, isPText, language, xParams, group)
|
||||||
|
vc3.setContent(c)
|
||||||
|
|
||||||
|
func setOrg*(
|
||||||
|
vc3: VCard3,
|
||||||
|
org: seq[string],
|
||||||
|
isPText = false,
|
||||||
|
language = none[string](),
|
||||||
|
xParams: seq[VC3_XParam] = @[],
|
||||||
|
group = none[string]()): VCard3 =
|
||||||
|
|
||||||
|
result = vc3
|
||||||
|
result.setOrg(org, isPText, language, xParams, group)
|
||||||
|
|
||||||
|
func setCategories*(
|
||||||
|
vc3: var VCard3,
|
||||||
|
categories: seq[string],
|
||||||
|
isPText = false,
|
||||||
|
language = none[string](),
|
||||||
|
xParams: seq[VC3_XParam] = @[],
|
||||||
|
group = none[string]()): void =
|
||||||
|
|
||||||
|
var c = newVC3_Categories(categories, isPText, language, xParams, group)
|
||||||
|
vc3.setContent(c)
|
||||||
|
|
||||||
|
func setCategories*(
|
||||||
|
vc3: VCard3,
|
||||||
|
categories: seq[string],
|
||||||
|
isPText = false,
|
||||||
|
language = none[string](),
|
||||||
|
xParams: seq[VC3_XParam] = @[],
|
||||||
|
group = none[string]()): VCard3 =
|
||||||
|
|
||||||
|
result = vc3
|
||||||
|
result.setCategories(categories, isPText, language, xParams, group)
|
||||||
|
|
||||||
|
func addNote(
|
||||||
|
vc3: VCard3,
|
||||||
|
value: string,
|
||||||
|
language = none[string](),
|
||||||
|
isPText = false,
|
||||||
|
xParams: seq[VC3_XParam] = @[],
|
||||||
|
group = none[string]()): VCard3 =
|
||||||
|
|
||||||
|
var c = newVC3_Note(value, language, isPText, xParams, group)
|
||||||
|
vc3.add(c)
|
||||||
|
]#
|
||||||
#[
|
#[
|
||||||
# TODO
|
# TODO
|
||||||
agent
|
|
||||||
org
|
|
||||||
categories
|
|
||||||
note
|
note
|
||||||
prodid
|
prodid
|
||||||
rev
|
rev
|
||||||
@ -1241,8 +1309,151 @@ proc serialize(c: VC3_Content): string =
|
|||||||
return serialize(cast[VC3_BinaryContent](c))
|
return serialize(cast[VC3_BinaryContent](c))
|
||||||
|
|
||||||
proc `$`*(vc3: VCard3): string =
|
proc `$`*(vc3: VCard3): string =
|
||||||
result = "BEGIN:vCard\r\n"
|
result = "BEGIN:vCard" & CRLF
|
||||||
result &= "VERSION:3.0\r\n"
|
result &= "VERSION:3.0" & CRLF
|
||||||
for c in vc3.content.filterIt(not (it of VC3_Version)):
|
for c in vc3.content.filterIt(not (it of VC3_Version)):
|
||||||
result &= foldContentLine(serialize(c)) & "\r\n"
|
result &= foldContentLine(serialize(c)) & CRLF
|
||||||
result &= "END:vCard\r\n"
|
result &= "END:vCard" & CRLF
|
||||||
|
|
||||||
|
|
||||||
|
# Parsing
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
type
|
||||||
|
VC3_ParseEvent = enum
|
||||||
|
peStart,
|
||||||
|
peContentLine,
|
||||||
|
peName,
|
||||||
|
peParam,
|
||||||
|
peParamName,
|
||||||
|
peParamValue,
|
||||||
|
prPText,
|
||||||
|
peQuoted,
|
||||||
|
peSafeChar,
|
||||||
|
peQSafeChar,
|
||||||
|
peValue,
|
||||||
|
peEnd
|
||||||
|
|
||||||
|
VC3Parser = object of VCardLexer
|
||||||
|
filename: string
|
||||||
|
state: seq[VC3_ParseEvent]
|
||||||
|
|
||||||
|
VCard3ParsingError = object of ValueError
|
||||||
|
|
||||||
|
const NON_ASCII = { '\x80'..'\xFF' }
|
||||||
|
const WSP = {' ', '\t'}
|
||||||
|
const SAFE_CHARS = WSP + { '\x21', '\x23'..'\x2B', '\x2D'..'\x39', '\x3C'..'\x7E' } + NON_ASCII
|
||||||
|
const QSAFE_CHARS = WSP + { '\x21', '\x23'..'\x7E' } + NON_ASCII
|
||||||
|
const VALUE_CHAR = WSP + { '\x21'..'\x7E' } + NON_ASCII
|
||||||
|
const ALPHA_NUM = { 'a'..'z', 'A'..'Z', '0'..'9' }
|
||||||
|
const NAME_CHARS = { 'a'..'z', 'A'..'Z', '0'..'9' }
|
||||||
|
|
||||||
|
proc error(p: VC3Parser, msg: string) =
|
||||||
|
raise newException(VCard3ParsingError, "$1($2, $3) Error: $4] " %
|
||||||
|
[ p.filename, $p.lineNumber, $p.getColNumber(p.pos), msg ])
|
||||||
|
|
||||||
|
proc readGroup(p: var VC3Parser): Option[string] =
|
||||||
|
p.setBookmark
|
||||||
|
|
||||||
|
var ch = p.read
|
||||||
|
while ALPHA_NUM.contains(ch): ch = p.read
|
||||||
|
|
||||||
|
if (ch == '.'):
|
||||||
|
p.unsetBookmark
|
||||||
|
return some(readSinceBookmark(p)[0..^1])
|
||||||
|
else:
|
||||||
|
p.returnToBookmark
|
||||||
|
return none[string]()
|
||||||
|
|
||||||
|
proc readName(p: var VC3Parser): string =
|
||||||
|
while ALPHA_
|
||||||
|
|
||||||
|
proc expect(p: var VC3Parser, expected: string, caseSensitive = false) =
|
||||||
|
p.setBookmark
|
||||||
|
|
||||||
|
if caseSensitive:
|
||||||
|
for ch in expected:
|
||||||
|
if p.read != ch:
|
||||||
|
p.error("expected '$1' but found '$2'" %
|
||||||
|
[expected, p.readSinceBookmark])
|
||||||
|
|
||||||
|
else:
|
||||||
|
for rune in expected.runes:
|
||||||
|
if p.readRune.toLower != rune.toLower:
|
||||||
|
p.error("expected '$1' but found '$2'" %
|
||||||
|
[ expected, p.readSinceBookmark ])
|
||||||
|
|
||||||
|
p.unsetBookmark
|
||||||
|
|
||||||
|
proc skip(p: var VC3Parser, expected: string, caseSensitive = false): bool =
|
||||||
|
p.setBookmark
|
||||||
|
if caseSensitive:
|
||||||
|
for ch in expected:
|
||||||
|
if p.read != ch:
|
||||||
|
p.returnToBookmark
|
||||||
|
return false
|
||||||
|
|
||||||
|
else:
|
||||||
|
for rune in expected.runes:
|
||||||
|
if p.readRune.toLower != rune.toLower:
|
||||||
|
p.returnToBookmark
|
||||||
|
return false
|
||||||
|
|
||||||
|
p.unsetBookmark
|
||||||
|
return true
|
||||||
|
|
||||||
|
proc parseContentLines(p: var VC3Parser): seq[VC3_Content] =
|
||||||
|
while true:
|
||||||
|
let group = p.readGroup
|
||||||
|
let name = p.readName
|
||||||
|
if name.toLower == "end":
|
||||||
|
p.expect(":VCARD\r\n")
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
proc parseVCard3*(input: Stream, filename = "input"): seq[VCard3] =
|
||||||
|
var p: VC3Parser
|
||||||
|
lexer.open(p, input)
|
||||||
|
p.state = @[peStart]
|
||||||
|
|
||||||
|
discard p.readGroup
|
||||||
|
p.expect("begin:vcard")
|
||||||
|
while (p.skip("\r\n", true)): discard
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
proc parseVCard3*(content: string, filename = "input"): seq[VCard3] =
|
||||||
|
parseVCard3(newStringStream(content), filename)
|
||||||
|
|
||||||
|
proc parseVCard3File*(filepath: string): seq[VCard3] =
|
||||||
|
parseVCard3(newFileStream(filepath, fmRead), filepath)
|
||||||
|
|
||||||
|
#[
|
||||||
|
Simplified Parsing Diagram
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
stateDiagram-v2
|
||||||
|
[*] --> StartVCard
|
||||||
|
StartVCard --> ContentLine: "BEGIN VCARD" CRLF
|
||||||
|
ContentLine --> EndVCard: "END VCARD" CRLF
|
||||||
|
ContentLine --> Name
|
||||||
|
Name --> Name: 0-9/a-z/-/.
|
||||||
|
Name --> Param: SEMICOLON
|
||||||
|
Name --> Value: COLON
|
||||||
|
Param --> Value: COLON
|
||||||
|
Value --> ContentLine: CRLF
|
||||||
|
|
||||||
|
state Param {
|
||||||
|
[*] --> ParamName
|
||||||
|
ParamName --> ParamName: 0-9/a-z/-/.
|
||||||
|
ParamName --> ParamValue: "="
|
||||||
|
ParamValue --> ParamValue: ","
|
||||||
|
ParamValue --> PText
|
||||||
|
ParamValue --> Quoted
|
||||||
|
PText --> PText: SAFE-CHAR
|
||||||
|
PText --> [*]
|
||||||
|
Quoted --> Quoted: QSAFE-CHAR
|
||||||
|
Quoted --> [*]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
]#
|
||||||
|
Loading…
x
Reference in New Issue
Block a user