Implement generic identifier casing conventions.

AI-Assisted: yes AI-Tool: OpenAI Codex / gpt-5.4 xhigh
2026-04-11 08:51:55 -05:00
commit 663ad994eb
5 changed files with 264 additions and 0 deletions
@@ -0,0 +1,13 @@
+# ---> Vim
+.*sw?
+*.un~
+Session.vim
+.netrwhist
+*~
+
+# ---> Test binaries (exclude everything in the test directories except nim
+#      source files)
+/tests/*
+!/tests/*.nim
+
+.codex
@@ -0,0 +1,12 @@
+# Package
+
+version       = "0.1.0"
+author        = "Jonathan Bernard"
+description   = "Little library to convert between identifier casing conventions."
+license       = "MIT"
+srcDir        = "src"
+
+
+# Dependencies
+
+requires "nim >= 2.2.8"
@@ -0,0 +1,177 @@
+import unicode
+
+## Utilities for parsing and converting identifier casing styles.
+##
+## The library normalizes supported identifiers into lowercase words and then
+## renders those words into a target style.
+##
+## For camel-style parsing, digits stay attached to the preceding word. This
+## preserves common technical identifiers such as ``oauth2Client``,
+## ``utf8String``, and ``ipv6Address``.
+##
+## Because of that rule, delimited identifiers with standalone numeric
+## segments do not always round-trip through camel case. For example:
+## ``PBM-123`` -> ``pbm123`` -> ``PBM123``.
+
+type
+  CaseStyle* = enum
+    upperSnakeCase,
+    lowerSnakeCase,
+    titleSnakeCase,
+    lowerKebabCase,
+    upperKebabCase,
+    trainCase,
+    dotCase,
+    lowerCamelCase,
+    pascalCase
+
+  WordTransform = enum
+    lowerWordTransform,
+    upperWordTransform,
+    titleWordTransform
+
+const
+  headerCase* = trainCase
+
+func isUpperish(rune: Rune): bool =
+  rune.isUpper or rune.isTitle
+
+func titleFirstRune(word: string): string =
+  let normalized = word.toLower
+  if normalized.len == 0:
+    return ""
+
+  var byteIndex = 0
+  var firstRune: Rune
+  fastRuneAt(normalized, byteIndex, firstRune, true)
+
+  result = $firstRune.toUpper
+  if byteIndex < normalized.len:
+    result.add normalized[byteIndex .. ^1]
+
+func transformWord(word: string, transform: WordTransform): string =
+  case transform
+  of lowerWordTransform:
+    result = word.toLower
+  of upperWordTransform:
+    result = word.toUpper
+  of titleWordTransform:
+    result = titleFirstRune(word)
+
+func addWord(words: var seq[string], word: string) =
+  if word.len > 0:
+    words.add word.toLower
+
+func joinWords(
+  words: openArray[string],
+  separator: char,
+  transform: WordTransform
+): string =
+  for i, word in words:
+    if i > 0:
+      result.add separator
+    result.add transformWord(word, transform)
+
+func squashWords(words: openArray[string], transform: WordTransform): string =
+  for word in words:
+    result.add transformWord(word, transform)
+
+func parseDelimitedWords(value: string, separator: Rune): seq[string] =
+  for word in value.split(separator):
+    result.addWord(word)
+
+func startsNewCamelWord(runes: openArray[Rune], index: int): bool =
+  if index == 0:
+    return false
+
+  let current = runes[index]
+  if not current.isUpperish:
+    return false
+
+  let previous = runes[index - 1]
+  if not previous.isUpperish:
+    return true
+
+  if index + 1 < runes.len:
+    let next = runes[index + 1]
+    if next.isLower or next.isTitle:
+      return true
+
+  result = false
+
+func parseCamelWords(value: string): seq[string] =
+  # Camel parsing only splits on upper/titlecase boundaries. Digit runs remain
+  # attached to the preceding word so identifiers like "oauth2Client" render
+  # as "oauth2-client" rather than "oauth-2-client".
+  let runes = value.toRunes
+  var current = newStringOfCap(value.len)
+
+  for i, rune in runes:
+    if startsNewCamelWord(runes, i):
+      result.addWord(current)
+      current.setLen 0
+    current.add rune
+
+  result.addWord(current)
+
+func parseWords*(value: string, style: CaseStyle): seq[string] =
+  ## Parse a supported identifier into normalized lowercase words.
+  ##
+  ## For ``lowerCamelCase`` and ``pascalCase``, digits remain attached to the
+  ## preceding word.
+  if value.len == 0:
+    return @[]
+
+  case style
+  of upperSnakeCase, lowerSnakeCase, titleSnakeCase:
+    result = parseDelimitedWords(value, '_'.Rune)
+  of lowerKebabCase, upperKebabCase, trainCase:
+    result = parseDelimitedWords(value, '-'.Rune)
+  of dotCase:
+    result = parseDelimitedWords(value, '.'.Rune)
+  of lowerCamelCase, pascalCase:
+    result = parseCamelWords(value)
+
+func renderWords*(words: openArray[string], style: CaseStyle): string =
+  ## Render normalized words into a supported identifier style.
+  case style
+  of upperSnakeCase:
+    result = joinWords(words, '_', upperWordTransform)
+  of lowerSnakeCase:
+    result = joinWords(words, '_', lowerWordTransform)
+  of titleSnakeCase:
+    result = joinWords(words, '_', titleWordTransform)
+  of lowerKebabCase:
+    result = joinWords(words, '-', lowerWordTransform)
+  of upperKebabCase:
+    result = joinWords(words, '-', upperWordTransform)
+  of trainCase:
+    result = joinWords(words, '-', titleWordTransform)
+  of dotCase:
+    result = joinWords(words, '.', lowerWordTransform)
+  of lowerCamelCase:
+    if words.len == 0:
+      return ""
+
+    result = transformWord(words[0], lowerWordTransform)
+    for i in 1 ..< words.len:
+      result.add transformWord(words[i], titleWordTransform)
+  of pascalCase:
+    result = squashWords(words, titleWordTransform)
+
+func convertCase*(
+  value: string,
+  sourceStyle: CaseStyle,
+  targetStyle: CaseStyle
+): string =
+  ## Convert an identifier from one supported style to another.
+  ##
+  ## Round-tripping through camel case is not guaranteed when a delimited input
+  ## uses standalone numeric segments, such as ``PBM-123``.
+  renderWords(parseWords(value, sourceStyle), targetStyle)
+
+func lowerKebabCaseToLowerCamelCase*(str: string): string =
+  convertCase(str, lowerKebabCase, lowerCamelCase)
+
+func lowerCamelCaseToLowerKebabCase*(str: string): string =
+  convertCase(str, lowerCamelCase, lowerKebabCase)
@@ -0,0 +1 @@
+switch("path", "$projectDir/../src")
@@ -0,0 +1,61 @@
+import unittest
+
+import identcasing
+
+let canonicalWords = @["naïve", "api", "value"]
+
+suite "identifier casing":
+  test "renders every supported style":
+    check renderWords(canonicalWords, upperSnakeCase) == "NAÏVE_API_VALUE"
+    check renderWords(canonicalWords, lowerSnakeCase) == "naïve_api_value"
+    check renderWords(canonicalWords, titleSnakeCase) == "Naïve_Api_Value"
+    check renderWords(canonicalWords, lowerKebabCase) == "naïve-api-value"
+    check renderWords(canonicalWords, upperKebabCase) == "NAÏVE-API-VALUE"
+    check renderWords(canonicalWords, trainCase) == "Naïve-Api-Value"
+    check renderWords(canonicalWords, headerCase) == "Naïve-Api-Value"
+    check renderWords(canonicalWords, dotCase) == "naïve.api.value"
+    check renderWords(canonicalWords, lowerCamelCase) == "naïveApiValue"
+    check renderWords(canonicalWords, pascalCase) == "NaïveApiValue"
+
+  test "parses every unambiguous style":
+    check parseWords("NAÏVE_API_VALUE", upperSnakeCase) == canonicalWords
+    check parseWords("naïve_api_value", lowerSnakeCase) == canonicalWords
+    check parseWords("Naïve_Api_Value", titleSnakeCase) == canonicalWords
+    check parseWords("naïve-api-value", lowerKebabCase) == canonicalWords
+    check parseWords("NAÏVE-API-VALUE", upperKebabCase) == canonicalWords
+    check parseWords("Naïve-Api-Value", trainCase) == canonicalWords
+    check parseWords("naïve.api.value", dotCase) == canonicalWords
+    check parseWords("naïveApiValue", lowerCamelCase) == canonicalWords
+    check parseWords("NaïveApiValue", pascalCase) == canonicalWords
+
+  test "splits acronym and digit boundaries in camel styles":
+    check parseWords("URLValue", pascalCase) == @["url", "value"]
+    check parseWords("version2Value", lowerCamelCase) == @["version2", "value"]
+    check convertCase("URLValue", pascalCase, lowerKebabCase) == "url-value"
+    check convertCase("version2Value", lowerCamelCase, lowerSnakeCase) == "version2_value"
+
+  test "keeps digits attached to the preceding camel word":
+    check parseWords("oauth2Client", lowerCamelCase) == @["oauth2", "client"]
+    check parseWords("ipv6Address", lowerCamelCase) == @["ipv6", "address"]
+    check convertCase("oauth2Client", lowerCamelCase, lowerKebabCase) == "oauth2-client"
+    check convertCase("ipv6Address", lowerCamelCase, lowerSnakeCase) == "ipv6_address"
+
+  test "does not round-trip standalone numeric segments through camel case":
+    check convertCase("PBM-123", upperKebabCase, lowerCamelCase) == "pbm123"
+    check convertCase("pbm123", lowerCamelCase, upperKebabCase) == "PBM123"
+
+  test "converts between parseable and rendered styles":
+    check convertCase("naïveApiValue", lowerCamelCase, upperSnakeCase) == "NAÏVE_API_VALUE"
+    check convertCase("NaïveApiValue", pascalCase, upperKebabCase) == "NAÏVE-API-VALUE"
+    check convertCase("naïve_api_value", lowerSnakeCase, trainCase) == "Naïve-Api-Value"
+
+  test "supports the existing pairwise helpers":
+    check lowerKebabCaseToLowerCamelCase("lower-kebab-case") == "lowerKebabCase"
+    check lowerCamelCaseToLowerKebabCase("lowerCamelCase") == "lower-camel-case"
+    check lowerKebabCaseToLowerCamelCase("lower-äbc") == "lowerÄbc"
+    check lowerCamelCaseToLowerKebabCase("lowerÄbc") == "lower-äbc"
+
+  test "handles empty values":
+    check parseWords("", lowerCamelCase) == newSeq[string]()
+    check renderWords([], lowerCamelCase) == ""
+    check convertCase("", lowerCamelCase, upperSnakeCase) == ""