diff --git a/CHANGELOG.md b/CHANGELOG.md index d5f2dab..2047349 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,20 @@ All notable changes to this project are documented in this file. +## [2.1.0] - 2026-03-31 + +### Added + +- `is_mixed_case`: validation function to check if a string contains both uppercase and lowercase characters. +- Dedicated case converters: `camel_to_snake`, `pascal_to_snake`, `snake_to_camel`, `snake_to_pascal` that don't aggressively strip non-alphanumeric characters. + +### Changed + +- Enhanced `words` to handle additional Unicode whitespace characters (non-breaking space, en-space, em-space, thin space, zero-width space, ideographic space). +- Optimized string concatenation in `repeat_str` by replacing basic concatenation with `gleam/string_tree` for better performance on large string repetitions. + +--- + ## [2.0.1] - 2026-02-28 ### Fixed diff --git a/README.md b/README.md index e2bf827..aca95ff 100644 --- a/README.md +++ b/README.md @@ -91,6 +91,7 @@ pub fn main() { | `is_uppercase(text)` | `"HELLO123"` | `True` | | `is_lowercase(text)` | `"hello_world"` | `True` | | `is_title_case(text)` | `"Hello World"` | `True` | +| `is_mixed_case(text)` | `"helloWorld"` | `True` | ### ✂️ Grapheme Extraction @@ -239,6 +240,10 @@ str.to_camel_case("hello world") // → "helloWorld" str.to_pascal_case("hello world") // → "HelloWorld" str.to_kebab_case("Hello World") // → "hello-world" str.to_title_case("hello world") // → "Hello World" +str.camel_to_snake("camelCase") // → "camel_case" +str.snake_to_camel("snake_case") // → "snakeCase" +str.pascal_to_snake("PascalCase") // → "pascal_case" +str.snake_to_pascal("snake_case") // → "SnakeCase" ``` ### ASCII Folding (Deburr) diff --git a/gleam.toml b/gleam.toml index 0c5ec74..39523c0 100644 --- a/gleam.toml +++ b/gleam.toml @@ -1,5 +1,5 @@ name = "str" -version = "2.0.1" +version = "2.1.0" # Project metadata (fill or replace placeholders before publishing) description = "Unicode-aware string utilities for Gleam: grapheme-safe operations, pragmatic ASCII transliteration, and slug generation." diff --git a/src/str.gleam b/src/str.gleam index 7608230..ff92489 100644 --- a/src/str.gleam +++ b/src/str.gleam @@ -430,6 +430,11 @@ pub fn is_lowercase(text: String) -> Bool { core.is_lowercase(text) } +/// Checks if text contains both uppercase and lowercase characters. +pub fn is_mixed_case(text: String) -> Bool { + core.is_mixed_case(text) +} + /// Checks if text is in Title Case format. pub fn is_title_case(text: String) -> Bool { core.is_title_case(text) @@ -787,6 +792,26 @@ pub fn to_title_case(text: String) -> String { extra.to_title_case(text) } +/// Converts camelCase or PascalCase to snake_case. +pub fn camel_to_snake(text: String) -> String { + extra.camel_to_snake(text) +} + +/// Alias for camel_to_snake. +pub fn pascal_to_snake(text: String) -> String { + extra.pascal_to_snake(text) +} + +/// Converts snake_case to camelCase. +pub fn snake_to_camel(text: String) -> String { + extra.snake_to_camel(text) +} + +/// Converts snake_case to PascalCase. +pub fn snake_to_pascal(text: String) -> String { + extra.snake_to_pascal(text) +} + // ============================================================================ // GRAPHEME TOKENIZATION (from str/tokenize) // ============================================================================ diff --git a/src/str/internal/core.gleam b/src/str/internal/core.gleam index 3f6d837..2ba8134 100644 --- a/src/str/internal/core.gleam +++ b/src/str/internal/core.gleam @@ -13,6 +13,7 @@ import gleam/dict import gleam/int import gleam/list import gleam/string +import gleam/string_tree import houdini import odysseus import str/config @@ -154,6 +155,12 @@ pub fn words(text: String) -> List(String) { |> string.replace("\n", " ") |> string.replace("\r", " ") |> string.replace("\t", " ") + |> string.replace("\u{00A0}", " ") + |> string.replace("\u{2002}", " ") + |> string.replace("\u{2003}", " ") + |> string.replace("\u{2009}", " ") + |> string.replace("\u{200B}", " ") + |> string.replace("\u{3000}", " ") normalized |> string.split(" ") @@ -176,13 +183,20 @@ pub fn is_blank(text: String) -> Bool { /// /// Internal helper for padding operations. Returns empty string if n <= 0. fn repeat_str(s: String, n: Int) -> String { - repeat_str_loop(s, n, "") + case n <= 0 { + True -> "" + False -> repeat_str_loop(s, n, string_tree.new()) |> string_tree.to_string + } } -fn repeat_str_loop(s: String, n: Int, acc: String) -> String { +fn repeat_str_loop( + s: String, + n: Int, + acc: string_tree.StringTree, +) -> string_tree.StringTree { case n <= 0 { True -> acc - False -> repeat_str_loop(s, n - 1, acc <> s) + False -> repeat_str_loop(s, n - 1, string_tree.append(acc, s)) } } @@ -1646,6 +1660,24 @@ pub fn is_lowercase(text: String) -> Bool { } } +/// Checks if text contains both uppercase and lowercase characters. +/// Non-cased characters are ignored. +/// Returns False for empty strings or strings with no cased characters. +/// +/// is_mixed_case("Hello") -> True +/// is_mixed_case("hello") -> False +/// is_mixed_case("HELLO") -> False +/// is_mixed_case("Hello123") -> True +/// is_mixed_case("123") -> False +/// is_mixed_case("") -> False +/// +pub fn is_mixed_case(text: String) -> Bool { + let chars = string.to_graphemes(text) + let has_upper = list.any(chars, is_grapheme_uppercase) + let has_lower = list.any(chars, is_grapheme_lowercase) + has_upper && has_lower +} + /// Checks if text is in Title Case (first letter of each word is uppercase). /// Non-alphabetic characters are ignored. Empty strings return False. /// diff --git a/src/str/internal/extra.gleam b/src/str/internal/extra.gleam index 4a47ca9..09dbd20 100644 --- a/src/str/internal/extra.gleam +++ b/src/str/internal/extra.gleam @@ -394,6 +394,115 @@ pub fn to_title_case(s: String) -> String { }) string.join(capitalized, " ") } + +// ---------------------------------------------------------------------------- +// Dedicated, Non-Destructive Case Converters +// ---------------------------------------------------------------------------- + +fn is_upper_char(g: String) -> Bool { + case string.to_utf_codepoints(g) { + [cp] -> { + let code = string.utf_codepoint_to_int(cp) + code >= 0x41 && code <= 0x5A + } + _ -> False + } +} + +fn is_lower_char(g: String) -> Bool { + case string.to_utf_codepoints(g) { + [cp] -> { + let code = string.utf_codepoint_to_int(cp) + code >= 0x61 && code <= 0x7A + } + _ -> False + } +} + +fn camel_to_snake_loop( + chars: List(String), + acc: String, + prev_char: String, +) -> String { + case chars { + [] -> acc + [c, ..rest] -> { + let c_is_upper = is_upper_char(c) + let prev_is_lower = is_lower_char(prev_char) + let next_is_lower = case rest { + [n, ..] -> is_lower_char(n) + [] -> False + } + + let insert_underscore = + { prev_is_lower && c_is_upper } + || { is_upper_char(prev_char) && c_is_upper && next_is_lower } + + let new_acc = case + insert_underscore && acc != "" && !string.ends_with(acc, "_") + { + True -> acc <> "_" <> string.lowercase(c) + False -> acc <> string.lowercase(c) + } + camel_to_snake_loop(rest, new_acc, c) + } + } +} + +/// Converts camelCase or PascalCase to snake_case without aggressively stripping characters. +/// +/// camel_to_snake("camelCase") -> "camel_case" +/// camel_to_snake("XMLHttpRequest") -> "xml_http_request" +/// +pub fn camel_to_snake(s: String) -> String { + camel_to_snake_loop(string.to_graphemes(s), "", "") +} + +/// Alias for camel_to_snake. +pub fn pascal_to_snake(s: String) -> String { + camel_to_snake(s) +} + +/// Converts snake_case to camelCase without aggressively stripping characters. +/// +/// snake_to_camel("snake_case_name") -> "snakeCaseName" +/// +pub fn snake_to_camel(s: String) -> String { + let parts = string.split(s, "_") + case parts { + [] -> "" + [first, ..rest] -> { + let camel_rest = + list.fold(rest, "", fn(acc, part) { + case string.is_empty(part) { + True -> acc + False -> + acc + <> string.uppercase(string.slice(part, 0, 1)) + <> string.slice(part, 1, string.length(part) - 1) + } + }) + string.lowercase(first) <> camel_rest + } + } +} + +/// Converts snake_case to PascalCase without aggressively stripping characters. +/// +/// snake_to_pascal("snake_case_name") -> "SnakeCaseName" +/// +pub fn snake_to_pascal(s: String) -> String { + let parts = string.split(s, "_") + list.fold(parts, "", fn(acc, part) { + case string.is_empty(part) { + True -> acc + False -> + acc + <> string.uppercase(string.slice(part, 0, 1)) + <> string.slice(part, 1, string.length(part) - 1) + } + }) +} // Note: normalizer helpers (NFC/NFD/NFKC/NFKD) are intentionally not // exported by the `str` library to avoid introducing an OTP dependency. // If you need to use OTP normalization, define a small helper in your diff --git a/src/str/internal/translit.gleam b/src/str/internal/translit.gleam index e006287..32c86c9 100644 --- a/src/str/internal/translit.gleam +++ b/src/str/internal/translit.gleam @@ -157,6 +157,14 @@ pub fn replacements() -> List(#(String, String)) { // Czech/Slovak extras #("Ŕ", "R"), #("ŕ", "r"), + // Common Symbols + #("€", "EUR"), + #("£", "GBP"), + #("¥", "JPY"), + #("©", "(c)"), + #("®", "(r)"), + #("™", "tm"), + #("…", "..."), ] } diff --git a/test/str_new_converters_test.gleam b/test/str_new_converters_test.gleam new file mode 100644 index 0000000..a8bf16a --- /dev/null +++ b/test/str_new_converters_test.gleam @@ -0,0 +1,35 @@ +import gleeunit/should +import str + +pub fn is_mixed_case_test() { + str.is_mixed_case("Hello") |> should.be_true + str.is_mixed_case("hello") |> should.be_false + str.is_mixed_case("HELLO") |> should.be_false + str.is_mixed_case("Hello123") |> should.be_true + str.is_mixed_case("123") |> should.be_false + str.is_mixed_case("") |> should.be_false +} + +pub fn camel_to_snake_test() { + str.camel_to_snake("camelCase") |> should.equal("camel_case") + str.camel_to_snake("XMLHttpRequest") |> should.equal("xml_http_request") + str.camel_to_snake("simple") |> should.equal("simple") + str.camel_to_snake("Already_Snake") |> should.equal("already_snake") +} + +pub fn pascal_to_snake_test() { + str.pascal_to_snake("PascalCase") |> should.equal("pascal_case") + str.pascal_to_snake("XMLHttpRequest") |> should.equal("xml_http_request") +} + +pub fn snake_to_camel_test() { + str.snake_to_camel("snake_case_name") |> should.equal("snakeCaseName") + str.snake_to_camel("simple") |> should.equal("simple") + // Testing numbers and acroynms + str.snake_to_camel("xml_http_request") |> should.equal("xmlHttpRequest") +} + +pub fn snake_to_pascal_test() { + str.snake_to_pascal("snake_case_name") |> should.equal("SnakeCaseName") + str.snake_to_pascal("simple") |> should.equal("Simple") +}