diff --git a/.gitignore b/.gitignore index bf97536..dfa1252 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ *.sw? tests/tlexer +tests/tvcard3 diff --git a/README.md b/README.md index e69de29..ffbd132 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,10 @@ +## Debugging + +*Need to clean up and organize* + +Run `tlexer` tests in gdb: + +```sh +$ cd tests +$ nim --debuginfo --linedir:on c tlexer +$ gdb --tui tlexer diff --git a/src/vcard/private/lexer.nim b/src/vcard/private/lexer.nim index 830c293..e95fdb4 100644 --- a/src/vcard/private/lexer.nim +++ b/src/vcard/private/lexer.nim @@ -1,24 +1,32 @@ -import std/[streams, unicode, unittest] +import std/[streams, unicode] type VCardLexer* = object of RootObj input: Stream - pos*: int # current read position - bookmark*: int # bookmark to support rewind functionality - buffer*: string # buffer of bytes read - lineNumber*: int # how many newlines have we seen so far - lineStart: int # index into the buffer for the start of the current line - - bufStart: int # starting boundary for the buffer - bufEnd: int # ending boundary for the buffer + buffer*: string # buffer of bytes read + bufStart: int # starting boundary for the buffer + bufEnd: int # ending boundary for the buffer + pos*: int # current read position + bookmark*: int # bookmark to support rewind functionality + bookmarkVal*: string # value that has been read since the bookmark was set + lineNumber*: int # how many newlines have we seen so far + lineStart: int # index into the buffer for the start of the current line proc skipUtf8Bom(vcl: var VCardLexer) = if (vcl.buffer[0] == '\xEF') and (vcl.buffer[1] == '\xBB') and (vcl.buffer[2] == '\xBF'): inc(vcl.pos, 3) +template wrappedIdx(idx: untyped): int = idx mod vcl.buffer.len + proc newStartIdx(vcl: VCardLexer): int = if vcl.bookmark > 0: vcl.bookmark else: vcl.pos +func isFull(vcl: VCardLexer): bool {.inline.} = + return wrappedIdx(vcl.bufEnd + 1) == vcl.newStartIdx + +func atEnd(vcl: VCardLexer): bool {.inline.} = + vcl.pos == vcl.bufEnd + proc doubleBuffer(vcl: var VCardLexer) = let oldBuf = vcl.buffer vcl.buffer = newString(oldBuf.len * 2) @@ -42,21 +50,32 @@ proc fillBuffer(vcl: var VCardLexer) = var charsRead: int # check to see if we have a full buffer - if (vcl.bufStart == 0 and vcl.bufEnd == vcl.buffer.len) or - vcl.bufEnd == vcl.bufStart - 1: - vcl.doubleBuffer() + if vcl.isFull: vcl.doubleBuffer() # discard used portions of the buffer vcl.bufStart = vcl.newStartIdx if vcl.bufEnd < vcl.bufStart: - charsRead = vcl.input.readDataStr(vcl.buffer, vcl.bufEnd ..< vcl.bufStart) + # e s + # 0 1 2 3 4 5 6 7 8 9 + charsRead = vcl.input.readDataStr(vcl.buffer, + vcl.bufEnd ..< (vcl.bufStart - 1)) vcl.bufEnd += charsRead + + elif vcl.bufStart == 0: + # s e + # 0 1 2 3 4 5 6 7 8 9 + charsRead = vcl.input.readDataStr(vcl.buffer, + vcl.bufEnd ..< (vcl.buffer.len - 1)) + vcl.bufEnd = wrappedIdx(vcl.bufEnd + charsRead) + else: - charsRead = vcl.input.readDataStr(vcl.buffer, vcl.bufEnd ..< vcl.buffer.len) - vcl.bufEnd += charsRead + # s e + # 0 1 2 3 4 5 6 7 8 9 + charsRead = vcl.input.readDataStr(vcl.buffer, vcl.bufEnd.. exists( + it.name == name and + it.values.len == 1 and + it.values[0] == value) + else: + ps --> exists( + it.name == name and + it.values.len == 1 and + it.values[0].toLower == value.toLower) + +proc getMultipleValues( + params: openarray[VC3Param], + name: string + ): seq[string] = + + ## Get all of the values for a given parameter in a single list. There are + ## two patterns for multi-valued parameters defined in the VCard RFCs: + ## + ## - TYPE=work,cell,voice + ## - TYPE=work;TYPE=cell;TYPE=voice + ## + ## Parameter values can often be specific using both patterns. This method + ## joins all defined values regardless of the pattern used to define them. + + let ps = params.toSeq + ps --> + filter(it.name == name). + map(it.values). + flatten() + +proc getSingleValue(params: openarray[VC3Param], name: string): Option[string] = + ## Get the first single value defined for a parameter. + # + # Many parameters only support a single value, depending on the content type. + # In order to support multi-valued parameters our implementation stores all + # parameters as seq[string]. This function is a convenience around that. + + let ps = params.toSeq + let foundParam = ps --> find(it.name == name) + + if foundParam.isSome and foundParam.get.values.len > 0: + return some(foundParam.get.values[0]) + else: + return none[string]() + +proc validateNoParameters( + p: VC3Parser, + params: openarray[VC3Param], + name: string + ) = + + ## Error unless there are no defined parameters + if params.len > 0: + p.error("no parameters allowed on the $1 content type" % [name]) + +proc validateRequiredParameters( + p: VC3Parser, + params: openarray[VC3Param], + expectations: openarray[tuple[name: string, value: string]] + ) = + + ## Some content types have specific allowed parameters. For example, the + ## SOURCE content type requires that the VALUE parameter be set to "uri" if + ## it is present. This will error if given parameters are present with + ## different values that expected. + + for (n, v) in expectations: + let pv = params.getSingleValue(n) + if pv.isSome and pv.get != v: + p.error("parameter '$1' must have the value '$2'" % [n, v]) + +proc getXParams(params: openarray[VC3Param]): seq[VC3_XParam] = + ## Filter out and return only the non-standard parameters starting with "x-" + + let ps = params.toSeq + return ps --> + filter(it.name.startsWith("x-")). + map((name: it.name, value: it.values.join(","))) + proc parseContentLines(p: var VC3Parser): seq[VC3_Content] = + result = @[] + + macro assignCommon(assign: untyped): untyped = + result = assign + result.add(newTree(nnkExprEqExpr, ident("group"), ident("group"))) + + result.add(newTree(nnkExprEqExpr, + ident("language"), + newCall(ident("getSingleValue"), + ident("params"), + newStrLitNode("LANGUAGE")))) + + result.add(newTree(nnkExprEqExpr, + ident("isPText"), + newCall(ident("existsWithValue"), + ident("params"), + newStrLitNode("VALUE"), + newTree(nnkPrefix, ident("$"), ident("vtPText"))))) + + result.add(newTree(nnkExprEqExpr, + ident("xParams"), + newCall(ident("getXParams"), ident("params")))) + while true: let group = p.readGroup let name = p.readName - if name.toLower == "end": + if name == "END": p.expect(":VCARD\r\n") break + let params = p.readParams + p.expect(":") + case name + + of $cnName: + p.validateNoParameters(params, "NAME") + result.add(newVC3_Name(p.readValue, group)) + + of $cnProfile: + if p.readValue.toUpper != "VCARD": + p.error("the value of the PROFILE content type must be \"$1\"" % + ["vcard"]) + p.validateNoParameters(params, "NAME") + result.add(VC3_Content(group: group, name: name)) + + of $cnSource: + p.validateRequiredParameters(params, + [("CONTEXT", "word"), ("VALUE", "uri")]) + + result.add(newVC3_Source( + group = group, + value = p.readValue, + inclContext = params.existsWithValue("CONTEXT", "WORD"), + inclValue = params.existsWithValue("VALUE", $vtUri), + xParams = params.getXParams)) + + of $cnFn: + result.add(assignCommon(newVC3_Fn(value = p.readValue))) + + of $cnN: + result.add(assignCommon(newVC3_N( + family = p.readTextValueList, + given = p.readTextValueList(ifPrefix = some(';')), + additional = p.readTextValueList(ifPrefix = some(';')), + prefixes = p.readTextValueList(ifPrefix = some(';')), + suffixes = p.readTextValueList(ifPrefix = some(';'))))) + + of $cnNickname: + result.add(assignCommon(newVC3_Nickname(value = p.readValue))) + + of $cnPhoto: + result.add(newVC3_Photo( + group = group, + value = p.readValue, + valueType = params.getSingleValue("VALUE"), + binaryType = params.getSingleValue("TYPE"), + isInline = params.existsWithValue("ENCODING", "B"))) + + of $cnBday: + let valueType = params.getSingleValue("VALUE") + let valueStr = p.readValue + var value: DateTime + + try: + if valueType.isSome and valueType.get == $vtDate: + value = parseDate(valueStr) + elif valueType.isSome and valueType.get == $vtDateTime: + value = parseDateTime(valueStr) + elif valueType.isSome: + p.error("invalid VALUE for BDAY content. " & + "Expected '" & $vtDate & "' or '" & $vtDateTime & "'") + else: + value = parseDateOrDateTime(valueStr) + except ValueError: + p.error("invalid date or date-time value: $1" % [valueStr]) + + result.add(newVC3_Bday( + group = group, + valueType = valueType, + value = value)) + + of $cnAdr: + result.add(assignCommon(newVC3_Adr( + adrType = params.getMultipleValues("TYPE"), + poBox = p.readTextValue, + extendedAdr = p.readTextValue(ignorePrefix = {';'}), + streetAdr = p.readTextValue(ignorePrefix = {';'}), + locality = p.readTextValue(ignorePrefix = {';'}), + region = p.readTextValue(ignorePrefix = {';'}), + postalCode = p.readTextValue(ignorePrefix = {';'}), + country = p.readTextValue(ignorePrefix = {';'})))) + + of $cnLabel: + result.add(assignCommon(newVC3_Label( + value = p.readValue, + adrType = params.getMultipleValues("TYPE")))) + + of $cnTel: + result.add(newVC3_Tel( + group = group, + value = p.readValue, + telType = params.getMultipleValues("TYPE"))) + + of $cnEmail: + result.add(newVC3_Email( + group = group, + value = p.readValue, + emailType = params.getMultipleValues("TYPE"))) + + of $cnMailer: + result.add(assignCommon(newVC3_Mailer(value = p.readValue))) + + of $cnTz: + result.add(newVC3_Tz( + value = p.readValue, + isText = params.existsWithValue("VALUE", "TEXT"))) + + of $cnGeo: + let rawValue = p.readValue + try: + let partsStr = rawValue.split(';') + result.add(newVC3_Geo( + group = group, + lat = parseFloat(partsStr[0]), + long = parseFloat(partsStr[1]) + )) + except: + p.error("expected two float values separated by ';' for the GEO " & + "content type but received '" & rawValue & "'") + + of $cnTitle: + result.add(assignCommon(newVC3_Title(value = p.readValue))) + + of $cnRole: + result.add(assignCommon(newVC3_Role(value = p.readValue))) + + of $cnLogo: + result.add(newVC3_Logo( + group = group, + value = p.readValue, + valueType = params.getSingleValue("VALUE"), + binaryType = params.getSingleValue("TYPE"), + isInline = params.existsWithValue("ENCODING", "B"))) + + of $cnAgent: + let valueParam = params.getSingleValue("VALUE") + if valueParam.isSome and valueParam.get != $vtUri: + p.error("the VALUE parameter must be set to '" & $vtUri & + "' if present on the AGENT content type, but it was '" & + valueParam.get & "'") + + result.add(newVC3_Agent( + group = group, + value = p.readValue, + isInline = valueParam.isNone)) + + of $cnOrg: + result.add(assignCommon(newVC3_Org( + value = p.readTextValueList(seps = {';'})))) + + of $cnCategories: + result.add(assignCommon(newVC3_Categories( + value = p.readTextValueList()))) + + of $cnNote: + result.add(assignCommon(newVC3_Note(value = p.readTextValue))) + + of $cnProdid: + result.add(assignCommon(newVC3_Prodid(value = p.readValue))) + + of $cnRev: + let valueType = params.getSingleValue("VALUE") + let valueStr = p.readValue + var value: DateTime + + try: + if valueType.isSome and valueType.get == $vtDate: + value = parseDate(valueStr) + elif valueType.isSome and valueType.get == $vtDateTime: + value = parseDateTime(valueStr) + elif valueType.isSome: + p.error("invalid VALUE for BDAY content. " & + "Expected '" & $vtDate & "' or '" & $vtDateTime & "'") + else: + value = parseDateOrDateTime(valueStr) + except ValueError: + p.error("invalid date or date-time value: $1" % [valueStr]) + + result.add(newVC3_Rev( + group = group, + value = value, + valueType = valueType + )) + + of $cnSortString: + result.add(assignCommon(newVC3_SortString(value = p.readValue))) + + of $cnSound: + result.add(newVC3_Sound( + group = group, + value = p.readValue, + valueType = params.getSingleValue("VALUE"), + binaryType = params.getSingleValue("TYPE"), + isInline = params.existsWithValue("ENCODING", "B"))) + + of $cnUid: + result.add(newVC3_Uid(group = group, value = p.readValue)) + + of $cnUrl: + result.add(newVC3_Url(group = group, value = p.readValue)) + + of $cnVersion: + p.expect("3.0") + p.validateNoParameters(params, "VERSION") + result.add(newVC3_Version(group = group)) + + of $cnClass: + result.add(newVC3_Class(group = group, value = p.readValue)) + + of $cnKey: + result.add(newVC3_Key( + group = group, + value = p.readValue, + valueType = params.getSingleValue("VALUE"), + keyType = params.getSingleValue("TYPE"), + isInline = params.existsWithValue("ENCODING", "B"))) + + else: + if not name.startsWith("x-"): + p.error("unrecognized content type: '$1'" % [name]) + + result.add(newVC3_XType( + name = name, + value = p.readValue, + language = params.getSingleValue("LANGUAGE"), + isPText = params.existsWithValue("VALUE", "PTEXT"), + group = group, + xParams = params --> + filter(not ["value", "language"].contains(it.name)). + map((name: it.name, value: it.values.join(","))))) + + p.expect("\r\n") proc parseVCard3*(input: Stream, filename = "input"): seq[VCard3] = var p: VC3Parser + p.filename = filename lexer.open(p, input) - p.state = @[peStart] - - discard p.readGroup - p.expect("begin:vcard") - while (p.skip("\r\n", true)): discard - + while p.peek != '\0': # until EOF? + var vcard = VCard3() + discard p.readGroup + p.expect("begin:vcard") + while (p.skip("\r\n", true)): discard + for content in p.parseContentLines: vcard.add(content) + while (p.skip("\r\n", true)): discard + result.add(vcard) proc parseVCard3*(content: string, filename = "input"): seq[VCard3] = parseVCard3(newStringStream(content), filename) @@ -1457,3 +1865,153 @@ stateDiagram-v2 } ``` ]# + +## Private Function Unit Tests +## ============================================================================ + +import std/unittest + +suite "vcard/vcard3/private": + + proc initParser(input: string): VC3Parser = + result = VC3Parser(filename: "private unittests") + lexer.open(result, newStringStream(input)) + + test "readGroup with group": + var p = initParser("mygroup.BEGIN:VCARD") + let g = p.readGroup + + check: + g.isSome + g.get == "mygroup" + + test "readGroup without group": + var p = initParser("BEGIN:VCARD") + check p.readGroup.isNone + + test "expect (case-sensitive)": + var p = initParser("BEGIN:VCARD") + p.expect("BEGIN", true) + + try: + p.expect(":vcard", true) + check "" == "expect should have raised an error" + except: discard + + test "expect (case-insensitive)": + var p = initParser("BEGIN:VCARD") + p.expect("begin") + + try: + p.expect("begin") + check "" == "expect should have raised an error" + except: discard + + test "readName": + var p = initParser("TEL;tel;x-Example;x-Are1+Name") + check: + p.readName == "TEL" + p.read == ';' + p.readName == "TEL" + p.read == ';' + p.readName == "X-EXAMPLE" + p.read == ';' + p.readName == "X-ARE1" + + try: + discard p.readName + check "" == "readName should have raised an error" + except: discard + + test "readParamValue": + var p = initParser("TEL;TYPE=WORK;TYPE=Fun&Games%:+15551234567") + check: + p.readName == "TEL" + p.read == ';' + p.readName == "TYPE" + p.read == '=' + p.readParamValue == "WORK" + p.read == ';' + p.readName == "TYPE" + p.read == '=' + p.readParamValue == "Fun&Games%" + + test "readParams": + var p = initParser("TEL;TYPE=WORK;TYPE=Fun&Games%,Extra:+15551234567") + check p.readName == "TEL" + let params = p.readParams + check: + params.len == 2 + params[0].name == "TYPE" + params[0].values.len == 1 + params[0].values[0] == "WORK" + params[1].name == "TYPE" + params[1].values.len == 2 + params[1].values[0] == "Fun&Games%" + params[1].values[1] == "Extra" + + test "readValue": + var p = initParser("TEL;TYPE=WORK:+15551234567\r\nFN:John Smith\r\n") + check p.skip("TEL") + discard p.readParams + check p.read == ':' + check p.readValue == "+15551234567" + p.expect("\r\n") + check p.readName == "FN" + discard p.readParams + check p.read == ':' + check p.readValue == "John Smith" + + test "readTextValueList": + var p = initParser("Public;John;Quincey,Adams;Rev.;Esq:limited\r\n") + check: + p.readTextValueList == @["Public"] + p.readTextValueList(ifPrefix = some(';')) == @["John"] + p.readTextValueList(ifPrefix = some(';')) == @["Quincey", "Adams"] + p.readTextValueList(ifPrefix = some(';')) == @["Rev."] + p.readTextValueList(ifPrefix = some(';')) == @["Esq:limited"] + p.readTextValueList(ifPrefix = some(';')) == newSeq[string]() + + test "existsWithValue": + var p = initParser(";TYPE=WORK;TYPE=VOICE;TYPE=CELL") + let params = p.readParams + check: + params.existsWithValue("TYPE", "WORK") + params.existsWithValue("TYPE", "CELL") + not params.existsWithValue("TYPE", "ISDN") + + test "getSingleValue": + var p = initParser(";TYPE=WORK;TYPE=VOICE;TYPE=CELL") + let params = p.readParams + let val = params.getSingleValue("TYPE") + check: + val.isSome + val.get == "WORK" + params.getSingleValue("VALUE").isNone + + test "getMultipleValues": + var p = initParser(";TYPE=WORK;TYPE=VOICE;TYPE=CELL") + let params = p.readParams + check: + params.getMultipleValues("TYPE") == @["WORK", "VOICE", "CELL"] + params.getMultipleValues("VALUE") == newSeq[string]() + + test "validateNoParameters": + var p = initParser(";TYPE=WORK;TYPE=VOICE;TYPE=CELL") + let params = p.readParams + p.validateNoParameters(@[], "TEST") + try: + p.validateNoParameters(params, "TEST") + check "" == "validateNoParameters should have errored" + except: discard + + test "validateRequredParameters": + var p = initParser(";CONTEXT=word;VALUE=uri;TYPE=CELL") + let params = p.readParams + p.validateRequiredParameters(params, + [("VALUE", "uri"), ("CONTEXT", "word")]) + + try: + p.validateRequiredParameters(params, [("TYPE", "VOICE")]) + check "" == "validateRequiredParameters should have errored" + except: discard diff --git a/tests/tvcard3.nim b/tests/tvcard3.nim new file mode 100644 index 0000000..885eebc --- /dev/null +++ b/tests/tvcard3.nim @@ -0,0 +1,55 @@ +import options, unittest, vcard3, zero_functional + +suite "vcard/vcard3": + + let testVCard = + "BEGIN:VCARD\r\n" & + "VERSION:3.0\r\n" & + "FN:Mr. John Q. Public\\, Esq.\r\n" & + "N:Public;John;Quinlan;Mr.;Esq.\r\n" & + "END:VCARD\r\n" + + test "minimal VCard": + let vc = parseVCard3(testVCard)[0] + check: + vc.n.family[0] == "Public" + vc.n.given[0] == "John" + vc.fn.value == "Mr. John Q. Public\\, Esq." + + test "serialize minimal VCard": + let vc = parseVCard3(testVCard)[0] + check $vc == testVCard + + test "RFC2426 Author's VCards": + let vcardsStr = + "BEGIN:vCard\r\n" & + "VERSION:3.0\r\n" & + "FN:Frank Dawson\r\n" & + "ORG:Lotus Development Corporation\r\n" & + "ADR;TYPE=WORK,POSTAL,PARCEL:;;6544 Battleford Drive\r\n" & + " ;Raleigh;NC;27613-3502;U.S.A.\r\n" & + "TEL;TYPE=VOICE,MSG,WORK:+1-919-676-9515\r\n" & + "TEL;TYPE=FAX,WORK:+1-919-676-9564\r\n" & + "EMAIL;TYPE=INTERNET,PREF:Frank_Dawson@Lotus.com\r\n" & + "EMAIL;TYPE=INTERNET:fdawson@earthlink.net\r\n" & + "URL:http://home.earthlink.net/~fdawson\r\n" & + "END:vCard\r\n" & + "\r\n" & + "\r\n" & + "BEGIN:vCard\r\n" & + "VERSION:3.0\r\n" & + "FN:Tim Howes\r\n" & + "ORG:Netscape Communications Corp.\r\n" & + "ADR;TYPE=WORK:;;501 E. Middlefield Rd.;Mountain View;\r\n" & + " CA; 94043;U.S.A.\r\n" & + "TEL;TYPE=VOICE,MSG,WORK:+1-415-937-3419\r\n" & + "TEL;TYPE=FAX,WORK:+1-415-528-4164\r\n" & + "EMAIL;TYPE=INTERNET:howes@netscape.com\r\n" & + "END:vCard\r\n" + + let vcards = parseVCard3(vcardsStr) + check: + vcards.len == 2 + vcards[0].fn.value == "Frank Dawson" + vcards[0].email.len == 2 + (vcards[0].email --> find(it.emailType.contains("PREF"))).isSome diff --git a/vcard.nimble b/vcard.nimble index 79eea98..2293df7 100644 --- a/vcard.nimble +++ b/vcard.nimble @@ -10,3 +10,4 @@ srcDir = "src" # Dependencies requires "nim >= 1.6.6" +requires @[ "zero_functional" ]