From 5565087359f5c2e5dc7a64b26b4439c89e7966c9 Mon Sep 17 00:00:00 2001 From: Jonathan Bernard Date: Wed, 3 May 2023 00:28:26 -0500 Subject: [PATCH] common: Reformat for clarity, documentation. - Consolidate `vcard/private/util` into `vcard/private/common`. --- src/vcard/private/common.nim | 268 +++++++++++++++++++++++++---------- src/vcard/private/util.nim | 55 ------- src/vcard/vcard3.nim | 2 +- src/vcard/vcard4.nim | 2 +- 4 files changed, 199 insertions(+), 128 deletions(-) delete mode 100644 src/vcard/private/util.nim diff --git a/src/vcard/private/common.nim b/src/vcard/private/common.nim index 9747944..dfbdfe0 100644 --- a/src/vcard/private/common.nim +++ b/src/vcard/private/common.nim @@ -1,29 +1,61 @@ -import std/[macros, options, strutils, unicode] +# Common Functionality +# © 2022-2023 Jonathan Bernard + +## This module contains type definitions, constant values, and func/proc +## definitions that are used in common between both the 3.0 and 4.0 parser +## implementations. +## +## This module is not intended to be exposed to library consumers. It is +## intended to be imported by the vcard3 and vcard4 implementations. There are +## a handful of functions (under the **Public Definitions** section) that will +## be re-exported by the root `vcard` module and available on that namespace to +## users of the library. + +import std/[macros, options, strutils, times, unicode] import zero_functional from std/sequtils import toSeq import ./lexer +## Internal Types (used by `vcard{3,4}`) +## ===================================== type - VCardVersion* = enum VCardV3 = "3.0", VCardV4 = "4.0" - - VCardParser* = object of VCardLexer - filename*: string - - VC_Param* = tuple[name: string, values: seq[string]] - - VCardParsingError* = object of ValueError - - VC_XParam* = tuple[name, value: string] - VC_PropCardinality* = enum + ## enum used to define the possible cardinalities of VCard properties. vpcAtMostOne, vpcExactlyOne, vpcAtLeastOne vpcAny +## External Types (exported on `vcard`) +## ==================================== +type + VC_Param* = tuple[name: string, values: seq[string]] + ## Representation of VCard parameter and its values. + + VCardVersion* = enum VCardV3 = "3.0", VCardV4 = "4.0" ## \ + ## enum used to differentiate VCard3 and VCard4 versions. + + VCardParser* = object of VCardLexer + ## Common VCard parser object + filename*: string + + VCardParsingError* = object of ValueError + ## Error raised when invalid input is detected while parsing a VCard + + VC_XParam* = tuple[name, value: string] + ## Representation of VCard extended parameters (starting with "X-"). + ## Because the meaning of these parameters is implementation-specific, no + ## parsing of the parameter value is performed, it is returned verbatim. + VCard* = ref object of RootObj + ## Abstract base class for all VCards. `parsedVersion` can be used to + ## interrogate any concrete instance of this class. `asVCard3` and + ## `asVCard4` exist as convenience functions to cast an instance to one of + ## the subclasses depending on the value of `parsedVersion`. parsedVersion*: VCardVersion +## Internal constants (used by `vcard{3,4}`) +## ========================================= const CRLF* = "\r\n" const WSP* = {' ', '\t'} const DIGIT* = { '0'..'9' } @@ -33,8 +65,51 @@ const QSAFE_CHARS* = WSP + { '\x21', '\x23'..'\x7E' } + NON_ASCII const SAFE_CHARS* = WSP + { '\x21', '\x23'..'\x2B', '\x2D'..'\x39', '\x3C'..'\x7E' } + NON_ASCII const VALUE_CHAR* = WSP + { '\x21'..'\x7E' } + NON_ASCII -# Internal Utility/Implementation -# ============================================================================= +const DATE_FMTS = [ "yyyy-MM-dd", "yyyyMMdd" ] +const DATE_TIME_FMTS = [ + "yyyyMMdd'T'HHmmss", + "yyyyMMdd'T'HHmmssz", + "yyyyMMdd'T'HHmmsszzz", + "yyyyMMdd'T'HHmmss'.'fffzzz", + "yyyy-MM-dd'T'HH:mm:ss", + "yyyy-MM-dd'T'HH:mm:ssz", + "yyyy-MM-dd'T'HH:mm:sszzz", + "yyyy-MM-dd'T'HH:mm:ss'.'fffzzz", +] + +const ALL_DATE_AND_OR_TIME_FMTS = DATE_TIME_FMTS.toSeq & DATE_FMTS.toSeq + +## Internal Utility/Implementation Functions +## ========================================= + +proc parseDateTimeStr( + dateStr: string, + dateFmts: openarray[string] + ): DateTime {.inline, raises:[ValueError].} = + + for fmt in dateFmts: + try: result = parse(dateStr, fmt) + except ValueError: discard + + if not result.isInitialized: + raise newException(ValueError, "cannot parse date: " & dateStr ) + +proc parseDate*(dateStr: string): DateTime = + parseDateTimeStr(dateStr, DATE_FMTS) + +proc parseDateTime*(dateStr: string): DateTime = + parseDateTimeStr(dateStr, DATE_TIME_FMTS) + +proc parseDateOrDateTime*(dateStr: string): DateTime = + parseDateTimeStr(dateStr, ALL_DATE_AND_OR_TIME_FMTS) + +template indexOfIt*(s, pred: untyped): int = + var result = -1 + for idx, it {.inject.} in pairs(s): + if pred: + result = idx + break + result template findAll*[T, VCT](c: openarray[VCT]): seq[T] = c.filterIt(it of typeof(T)).mapIt(cast[T](it)) @@ -44,8 +119,6 @@ template findFirst*[T, VCT](c: openarray[VCT]): Option[T] = if found.len > 0: some(found[0]) else: none[T]() -func allPropsOfType*[T, VC: VCard](vc: VC): seq[T] = findAll[T](vc) - macro assignFields*(assign: untyped, fields: varargs[untyped]): untyped = result = assign @@ -54,35 +127,23 @@ macro assignFields*(assign: untyped, fields: varargs[untyped]): untyped = exp.add(f, f) result.add(exp) -# Output -# ============================================================================= +## Internal Parsing Functionality +## ============================== -# Parsing -# ============================================================================= +proc getSingleValue*(params: openarray[VC_Param], name: string): Option[string]; proc error*(p: VCardParser, msg: string) = raise newException(VCardParsingError, "$1($2, $3) Error: $4" % [ p.filename, $p.lineNumber, $p.getColNumber(p.pos), msg ]) -proc isNext*[T](p: var T, expected: string, caseSensitive = false): bool = - result = true - p.setBookmark - - if caseSensitive: - for ch in expected: - if p.read != ch: - result = false - break - - else: - for rune in expected.runes: - if p.readRune.toLower != rune.toLower: - result = false - break - - p.returnToBookmark - -proc expect*[T](p: var T, expected: string, caseSensitive = false) = +proc expect*(p: var VCardParser, expected: string, caseSensitive = false) = + ## Read `expected.len` from the parser's input stream and determine if it + ## matches the expected value. If the parser is unable to supply + ## `expected.len` bytes or if the value read does not match the expected + ## value, raise a VCardParsingError via the `error` utility function + ## reporting to the user where the input failed to match the expected value. + ## If the expectation is met, the parser read position is advanced past the + ## expected value, on the next byte after the expected value that was read. try: p.setBookmark @@ -101,7 +162,31 @@ proc expect*[T](p: var T, expected: string, caseSensitive = false) = finally: p.unsetBookmark -proc readGroup*[T](p: var T): Option[string] = +proc isNext*(p: var VCardParser, expected: string, caseSensitive = false): bool = + ## Read `expected.len` from the parser's input stream and determine if it + ## matches the expected value. Assuming the parser did not fail to read + ## `expected.len` bytes, this will reset the parser read state to its initial + ## position (prior to calling `isNext`). This is similar to `expect` but + ## exhibits more "`peek`-like" behavior (doesn't disturb the read position, + ## and doesn't fail). + result = true + p.setBookmark + + if caseSensitive: + for ch in expected: + if p.read != ch: + result = false + break + + else: + for rune in expected.runes: + if p.readRune.toLower != rune.toLower: + result = false + break + + p.returnToBookmark + +proc readGroup*(p: var VCardParser): Option[string] = ## All VCARD content items can be optionally prefixed with a group name. This ## scans the input to see if there is a group defined at the current read ## location. If there is a valid group, the group name is returned and the @@ -139,9 +224,19 @@ proc readValue*(p: var VCardParser): string = p.unsetBookmark proc skip*(p: var VCardParser, count: int): bool = + ## Ignore the next `count` bytes of data from the parser's underlying input + ## stream. for _ in 0.. 0: + p.error("no parameters allowed on the $1 content type" % [name]) + +proc validateRequiredParameters*( + p: VCardParser, + params: openarray[VC_Param], + expectations: openarray[tuple[name: string, value: string]] + ) = + + ## Some content types have specific allowed parameters. For example, the + ## SOURCE content type requires that the VALUE parameter be set to "uri" if + ## it is present. This will raise a VCardParsingError if given parameters are + ## present with different values that expected. + + for (n, v) in expectations: + let pv = params.getSingleValue(n) + if pv.isSome and pv.get != v: + p.error("parameter '$1' must have the value '$2'" % [n, v]) + +## Internal Serialization Utilities +## ================================ + +func foldContentLine*(s: string): string = + result = "" + var rem = s + while rem.len > 75: # TODO: unicode multi-byte safe? + result &= rem[0..<75] & "\r\n " + rem = rem[75..^1] + result &= rem + + +## Publicly Exported Procedure and Functions +## ========================================= + proc getMultipleValues*( params: openarray[VC_Param], name: string @@ -192,8 +328,8 @@ proc getMultipleValues*( ## - TYPE=work,cell,voice ## - TYPE=work;TYPE=cell;TYPE=voice ## - ## Parameter values can often be specific using both patterns. This method - ## joins all defined values regardless of the pattern used to define them. + ## Parameter values can be specific using both patterns. This method joins + ## all defined values regardless of the pattern used to define them. let ps = params.toSeq ps --> @@ -206,10 +342,10 @@ proc getSingleValue*( name: string ): Option[string] = ## Get the first single value defined for a parameter. - # - # Many parameters only support a single value, depending on the content type. - # In order to support multi-valued parameters our implementation stores all - # parameters as seq[string]. This function is a convenience around that. + ## + ## Many parameters only support a single value, depending on the content type. + ## In order to support multi-valued parameters our implementation stores all + ## parameters as seq[string]. This function is a convenience around that. let ps = params.toSeq let foundParam = ps --> find(it.name == name) @@ -219,28 +355,18 @@ proc getSingleValue*( else: return none[string]() -proc validateNoParameters*( - p: VCardParser, - params: openarray[VC_Param], - name: string - ) = - - ## Error unless there are no defined parameters - if params.len > 0: - p.error("no parameters allowed on the $1 content type" % [name]) - -proc validateRequiredParameters*( - p: VCardParser, - params: openarray[VC_Param], - expectations: openarray[tuple[name: string, value: string]] - ) = - - ## Some content types have specific allowed parameters. For example, the - ## SOURCE content type requires that the VALUE parameter be set to "uri" if - ## it is present. This will error if given parameters are present with - ## different values that expected. - - for (n, v) in expectations: - let pv = params.getSingleValue(n) - if pv.isSome and pv.get != v: - p.error("parameter '$1' must have the value '$2'" % [n, v]) +func allPropsOfType*[T, VC: VCard](vc: VC): seq[T] = findAll[T](vc) + ## Get all instances of the requested property type present on the given + ## VCard. + ## + ## This can be useful when there is some logic that hides multiple instances + ## of a property, or returns a limited subset. For example, on 3.0 versions + ## of VCards, this library assumes that there will only be one instance of + ## the NAME property. The 3.0 spec implies that the NAME property should only + ## be present at most once, but does not explicitly state this. It is + ## possible for a 3.0 VCard to contain multiple NAME properties. using + ## `vc3.name` will only return the first. This function allows a caller to + ## retrieve all instances for any given property type. For example: + ## + ## let vc3 = parseVCards(...) + ## let allNames = allPropsOfType[VC3_Name](vc3) diff --git a/src/vcard/private/util.nim b/src/vcard/private/util.nim deleted file mode 100644 index 8a4c92f..0000000 --- a/src/vcard/private/util.nim +++ /dev/null @@ -1,55 +0,0 @@ -import sequtils, strutils, times - -const DATE_FMTS = [ "yyyy-MM-dd", "yyyyMMdd" ] -const DATE_TIME_FMTS = [ - "yyyyMMdd'T'HHmmss", - "yyyyMMdd'T'HHmmssz", - "yyyyMMdd'T'HHmmsszzz", - "yyyyMMdd'T'HHmmss'.'fffzzz", - "yyyy-MM-dd'T'HH:mm:ss", - "yyyy-MM-dd'T'HH:mm:ssz", - "yyyy-MM-dd'T'HH:mm:sszzz", - "yyyy-MM-dd'T'HH:mm:ss'.'fffzzz", -] - -const ALL_FMTS = DATE_TIME_FMTS.toSeq & DATE_FMTS.toSeq - -proc parseDateTimeStr( - dateStr: string, - dateFmts: openarray[string] - ): DateTime {.inline, raises:[ValueError].} = - - for fmt in dateFmts: - try: result = parse(dateStr, fmt) - except ValueError: discard - - if not result.isInitialized: - raise newException(ValueError, "cannot parse date: " & dateStr ) - -proc parseDate*(dateStr: string): DateTime = - parseDateTimeStr(dateStr, DATE_FMTS) - -proc parseDateTime*(dateStr: string): DateTime = - parseDateTimeStr(dateStr, DATE_TIME_FMTS) - -proc parseDateOrDateTime*(dateStr: string): DateTime = - parseDateTimeStr(dateStr, ALL_FMTS) - -func foldContentLine*(s: string): string = - result = "" - var rem = s - while rem.len > 75: # TODO: unicode multi-byte safe? - result &= rem[0..<75] & "\r\n " - rem = rem[75..^1] - result &= rem - -func unfoldContentLine*(s: string): string = - return s.multiReplace([("\r\n ", "")]) - -template indexOfIt*(s, pred: untyped): int = - var result = -1 - for idx, it {.inject.} in pairs(s): - if pred: - result = idx - break - result diff --git a/src/vcard/vcard3.nim b/src/vcard/vcard3.nim index 9160554..3223ea0 100644 --- a/src/vcard/vcard3.nim +++ b/src/vcard/vcard3.nim @@ -14,7 +14,7 @@ import std/[base64, genasts, macros, options, sequtils, streams, strutils, import zero_functional -import ./private/[common, lexer, util] +import ./private/[common, lexer] type VC3_ValueTypes = enum diff --git a/src/vcard/vcard4.nim b/src/vcard/vcard4.nim index b61d63a..45a84b2 100644 --- a/src/vcard/vcard4.nim +++ b/src/vcard/vcard4.nim @@ -4,7 +4,7 @@ import zero_functional #from std/sequtils import toSeq -import ./private/[common, lexer, util] +import ./private/[common, lexer] type VC4_ValueType* = enum