From 663ad994eb1486f93d34cc454162e41d9b04c026 Mon Sep 17 00:00:00 2001 From: Jonathan Bernard Date: Sat, 11 Apr 2026 08:51:55 -0500 Subject: [PATCH] Implement generic identifier casing conventions. AI-Assisted: yes AI-Tool: OpenAI Codex / gpt-5.4 xhigh --- .gitignore | 13 +++ identcasing.nimble | 12 +++ src/identcasing.nim | 177 +++++++++++++++++++++++++++++++++++++++++ tests/config.nims | 1 + tests/tidentcasing.nim | 61 ++++++++++++++ 5 files changed, 264 insertions(+) create mode 100644 .gitignore create mode 100644 identcasing.nimble create mode 100644 src/identcasing.nim create mode 100644 tests/config.nims create mode 100644 tests/tidentcasing.nim diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..72edec6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +# ---> Vim +.*sw? +*.un~ +Session.vim +.netrwhist +*~ + +# ---> Test binaries (exclude everything in the test directories except nim +# source files) +/tests/* +!/tests/*.nim + +.codex diff --git a/identcasing.nimble b/identcasing.nimble new file mode 100644 index 0000000..40255a2 --- /dev/null +++ b/identcasing.nimble @@ -0,0 +1,12 @@ +# Package + +version = "0.1.0" +author = "Jonathan Bernard" +description = "Little library to convert between identifier casing conventions." +license = "MIT" +srcDir = "src" + + +# Dependencies + +requires "nim >= 2.2.8" diff --git a/src/identcasing.nim b/src/identcasing.nim new file mode 100644 index 0000000..24266ae --- /dev/null +++ b/src/identcasing.nim @@ -0,0 +1,177 @@ +import unicode + +## Utilities for parsing and converting identifier casing styles. +## +## The library normalizes supported identifiers into lowercase words and then +## renders those words into a target style. +## +## For camel-style parsing, digits stay attached to the preceding word. This +## preserves common technical identifiers such as ``oauth2Client``, +## ``utf8String``, and ``ipv6Address``. +## +## Because of that rule, delimited identifiers with standalone numeric +## segments do not always round-trip through camel case. For example: +## ``PBM-123`` -> ``pbm123`` -> ``PBM123``. + +type + CaseStyle* = enum + upperSnakeCase, + lowerSnakeCase, + titleSnakeCase, + lowerKebabCase, + upperKebabCase, + trainCase, + dotCase, + lowerCamelCase, + pascalCase + + WordTransform = enum + lowerWordTransform, + upperWordTransform, + titleWordTransform + +const + headerCase* = trainCase + +func isUpperish(rune: Rune): bool = + rune.isUpper or rune.isTitle + +func titleFirstRune(word: string): string = + let normalized = word.toLower + if normalized.len == 0: + return "" + + var byteIndex = 0 + var firstRune: Rune + fastRuneAt(normalized, byteIndex, firstRune, true) + + result = $firstRune.toUpper + if byteIndex < normalized.len: + result.add normalized[byteIndex .. ^1] + +func transformWord(word: string, transform: WordTransform): string = + case transform + of lowerWordTransform: + result = word.toLower + of upperWordTransform: + result = word.toUpper + of titleWordTransform: + result = titleFirstRune(word) + +func addWord(words: var seq[string], word: string) = + if word.len > 0: + words.add word.toLower + +func joinWords( + words: openArray[string], + separator: char, + transform: WordTransform +): string = + for i, word in words: + if i > 0: + result.add separator + result.add transformWord(word, transform) + +func squashWords(words: openArray[string], transform: WordTransform): string = + for word in words: + result.add transformWord(word, transform) + +func parseDelimitedWords(value: string, separator: Rune): seq[string] = + for word in value.split(separator): + result.addWord(word) + +func startsNewCamelWord(runes: openArray[Rune], index: int): bool = + if index == 0: + return false + + let current = runes[index] + if not current.isUpperish: + return false + + let previous = runes[index - 1] + if not previous.isUpperish: + return true + + if index + 1 < runes.len: + let next = runes[index + 1] + if next.isLower or next.isTitle: + return true + + result = false + +func parseCamelWords(value: string): seq[string] = + # Camel parsing only splits on upper/titlecase boundaries. Digit runs remain + # attached to the preceding word so identifiers like "oauth2Client" render + # as "oauth2-client" rather than "oauth-2-client". + let runes = value.toRunes + var current = newStringOfCap(value.len) + + for i, rune in runes: + if startsNewCamelWord(runes, i): + result.addWord(current) + current.setLen 0 + current.add rune + + result.addWord(current) + +func parseWords*(value: string, style: CaseStyle): seq[string] = + ## Parse a supported identifier into normalized lowercase words. + ## + ## For ``lowerCamelCase`` and ``pascalCase``, digits remain attached to the + ## preceding word. + if value.len == 0: + return @[] + + case style + of upperSnakeCase, lowerSnakeCase, titleSnakeCase: + result = parseDelimitedWords(value, '_'.Rune) + of lowerKebabCase, upperKebabCase, trainCase: + result = parseDelimitedWords(value, '-'.Rune) + of dotCase: + result = parseDelimitedWords(value, '.'.Rune) + of lowerCamelCase, pascalCase: + result = parseCamelWords(value) + +func renderWords*(words: openArray[string], style: CaseStyle): string = + ## Render normalized words into a supported identifier style. + case style + of upperSnakeCase: + result = joinWords(words, '_', upperWordTransform) + of lowerSnakeCase: + result = joinWords(words, '_', lowerWordTransform) + of titleSnakeCase: + result = joinWords(words, '_', titleWordTransform) + of lowerKebabCase: + result = joinWords(words, '-', lowerWordTransform) + of upperKebabCase: + result = joinWords(words, '-', upperWordTransform) + of trainCase: + result = joinWords(words, '-', titleWordTransform) + of dotCase: + result = joinWords(words, '.', lowerWordTransform) + of lowerCamelCase: + if words.len == 0: + return "" + + result = transformWord(words[0], lowerWordTransform) + for i in 1 ..< words.len: + result.add transformWord(words[i], titleWordTransform) + of pascalCase: + result = squashWords(words, titleWordTransform) + +func convertCase*( + value: string, + sourceStyle: CaseStyle, + targetStyle: CaseStyle +): string = + ## Convert an identifier from one supported style to another. + ## + ## Round-tripping through camel case is not guaranteed when a delimited input + ## uses standalone numeric segments, such as ``PBM-123``. + renderWords(parseWords(value, sourceStyle), targetStyle) + +func lowerKebabCaseToLowerCamelCase*(str: string): string = + convertCase(str, lowerKebabCase, lowerCamelCase) + +func lowerCamelCaseToLowerKebabCase*(str: string): string = + convertCase(str, lowerCamelCase, lowerKebabCase) diff --git a/tests/config.nims b/tests/config.nims new file mode 100644 index 0000000..3bb69f8 --- /dev/null +++ b/tests/config.nims @@ -0,0 +1 @@ +switch("path", "$projectDir/../src") \ No newline at end of file diff --git a/tests/tidentcasing.nim b/tests/tidentcasing.nim new file mode 100644 index 0000000..c8a6f5d --- /dev/null +++ b/tests/tidentcasing.nim @@ -0,0 +1,61 @@ +import unittest + +import identcasing + +let canonicalWords = @["naïve", "api", "value"] + +suite "identifier casing": + test "renders every supported style": + check renderWords(canonicalWords, upperSnakeCase) == "NAÏVE_API_VALUE" + check renderWords(canonicalWords, lowerSnakeCase) == "naïve_api_value" + check renderWords(canonicalWords, titleSnakeCase) == "Naïve_Api_Value" + check renderWords(canonicalWords, lowerKebabCase) == "naïve-api-value" + check renderWords(canonicalWords, upperKebabCase) == "NAÏVE-API-VALUE" + check renderWords(canonicalWords, trainCase) == "Naïve-Api-Value" + check renderWords(canonicalWords, headerCase) == "Naïve-Api-Value" + check renderWords(canonicalWords, dotCase) == "naïve.api.value" + check renderWords(canonicalWords, lowerCamelCase) == "naïveApiValue" + check renderWords(canonicalWords, pascalCase) == "NaïveApiValue" + + test "parses every unambiguous style": + check parseWords("NAÏVE_API_VALUE", upperSnakeCase) == canonicalWords + check parseWords("naïve_api_value", lowerSnakeCase) == canonicalWords + check parseWords("Naïve_Api_Value", titleSnakeCase) == canonicalWords + check parseWords("naïve-api-value", lowerKebabCase) == canonicalWords + check parseWords("NAÏVE-API-VALUE", upperKebabCase) == canonicalWords + check parseWords("Naïve-Api-Value", trainCase) == canonicalWords + check parseWords("naïve.api.value", dotCase) == canonicalWords + check parseWords("naïveApiValue", lowerCamelCase) == canonicalWords + check parseWords("NaïveApiValue", pascalCase) == canonicalWords + + test "splits acronym and digit boundaries in camel styles": + check parseWords("URLValue", pascalCase) == @["url", "value"] + check parseWords("version2Value", lowerCamelCase) == @["version2", "value"] + check convertCase("URLValue", pascalCase, lowerKebabCase) == "url-value" + check convertCase("version2Value", lowerCamelCase, lowerSnakeCase) == "version2_value" + + test "keeps digits attached to the preceding camel word": + check parseWords("oauth2Client", lowerCamelCase) == @["oauth2", "client"] + check parseWords("ipv6Address", lowerCamelCase) == @["ipv6", "address"] + check convertCase("oauth2Client", lowerCamelCase, lowerKebabCase) == "oauth2-client" + check convertCase("ipv6Address", lowerCamelCase, lowerSnakeCase) == "ipv6_address" + + test "does not round-trip standalone numeric segments through camel case": + check convertCase("PBM-123", upperKebabCase, lowerCamelCase) == "pbm123" + check convertCase("pbm123", lowerCamelCase, upperKebabCase) == "PBM123" + + test "converts between parseable and rendered styles": + check convertCase("naïveApiValue", lowerCamelCase, upperSnakeCase) == "NAÏVE_API_VALUE" + check convertCase("NaïveApiValue", pascalCase, upperKebabCase) == "NAÏVE-API-VALUE" + check convertCase("naïve_api_value", lowerSnakeCase, trainCase) == "Naïve-Api-Value" + + test "supports the existing pairwise helpers": + check lowerKebabCaseToLowerCamelCase("lower-kebab-case") == "lowerKebabCase" + check lowerCamelCaseToLowerKebabCase("lowerCamelCase") == "lower-camel-case" + check lowerKebabCaseToLowerCamelCase("lower-äbc") == "lowerÄbc" + check lowerCamelCaseToLowerKebabCase("lowerÄbc") == "lower-äbc" + + test "handles empty values": + check parseWords("", lowerCamelCase) == newSeq[string]() + check renderWords([], lowerCamelCase) == "" + check convertCase("", lowerCamelCase, upperSnakeCase) == ""