diff --git a/src/gleam/string.gleam b/src/gleam/string.gleam index e530c8fa..c065d988 100644 --- a/src/gleam/string.gleam +++ b/src/gleam/string.gleam @@ -553,51 +553,123 @@ fn do_trim(string: String) -> String { erl_trim(string, Both) } +/// Like `trim`, but removes the specified chars on both sides of a `String` +/// +/// ## Examples +/// +/// ```gleam +/// trim_with("..,hats,..", ".,") +/// // -> "hats" +/// ``` +pub fn trim_with(string: String, charset: String) -> String { + do_trim_with(string, charset) +} + +@external(javascript, "../gleam_stdlib.mjs", "trim_with") +fn do_trim_with(string: String, charset: String) -> String { + erl_trim_with(string, Both, erl_to_graphemes(charset)) +} + @external(erlang, "string", "trim") fn erl_trim(a: String, b: Direction) -> String +@external(erlang, "string", "trim") +fn erl_trim_with(a: String, b: Direction, c: ErlGraphemes) -> String + +@external(erlang, "string", "to_graphemes") +fn erl_to_graphemes(a: String) -> ErlGraphemes + +// erlang's string:to_graphemes returns char() | [char()], which cannot be directly represented +type ErlGraphemes + type Direction { Leading Trailing Both } -/// Removes whitespace on the left of a `String`. +/// Removes whitespace at the start of a `String`. /// /// ## Examples /// /// ```gleam -/// trim_left(" hats \n") +/// trim_start(" hats \n") /// // -> "hats \n" /// ``` /// +pub fn trim_start(string: String) -> String { + do_trim_start(string) +} + +/// An alias for trim_start +@deprecated("Use trim_start. There is no behavior change") pub fn trim_left(string: String) -> String { - do_trim_left(string) + trim_start(string) } -@external(javascript, "../gleam_stdlib.mjs", "trim_left") -fn do_trim_left(string: String) -> String { +@external(javascript, "../gleam_stdlib.mjs", "trim_start") +fn do_trim_start(string: String) -> String { erl_trim(string, Leading) } -/// Removes whitespace on the right of a `String`. +/// Removes whitespace at the end of a `String`. /// /// ## Examples /// /// ```gleam -/// trim_right(" hats \n") +/// trim_end(" hats \n") /// // -> " hats" /// ``` /// +pub fn trim_end(string: String) -> String { + do_trim_end(string) +} + +/// An alias for trim_end +@deprecated("Use trim_end. There is no behavior change") pub fn trim_right(string: String) -> String { - do_trim_right(string) + trim_end(string) } -@external(javascript, "../gleam_stdlib.mjs", "trim_right") -fn do_trim_right(string: String) -> String { +@external(javascript, "../gleam_stdlib.mjs", "trim_end") +fn do_trim_end(string: String) -> String { erl_trim(string, Trailing) } +/// Like `trim_start`, but removes the specified chars at the start of a `String` +/// +/// ## Examples +/// +/// ```gleam +/// trim_start_with("..,hats,..", ".,") +/// // -> "hats,.." +/// ``` +pub fn trim_start_with(string: String, charset: String) -> String { + do_trim_start_with(string, charset) +} + +@external(javascript, "../gleam_stdlib.mjs", "trim_start_with") +fn do_trim_start_with(string: String, charset: String) -> String { + erl_trim_with(string, Leading, erl_to_graphemes(charset)) +} + +/// Like `trim_end`, but removes the specified chars at the end of a `String` +/// +/// ## Examples +/// +/// ```gleam +/// trim_end_with("..,hats,..", ".,") +/// // -> "..,hats" +/// ``` +pub fn trim_end_with(string: String, charset: String) -> String { + do_trim_end_with(string, charset) +} + +@external(javascript, "../gleam_stdlib.mjs", "trim_end_with") +fn do_trim_end_with(string: String, charset: String) -> String { + erl_trim_with(string, Trailing, erl_to_graphemes(charset)) +} + /// Splits a non-empty `String` into its first element (head) and rest (tail). /// This lets you pattern match on `String`s exactly as you would with lists. /// diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs index 50ebb46f..44b39701 100644 --- a/src/gleam_stdlib.mjs +++ b/src/gleam_stdlib.mjs @@ -21,6 +21,9 @@ import Dict from "./dict.mjs"; const Nil = undefined; const NOT_FOUND = {}; +// See license note in escape_regexp_chars +const reRegExpChar = /[\\^$.*+?()[\]{}|]/g; +const reHasRegExpChar = RegExp(reRegExpChar.source); export function identity(x) { return x; @@ -259,21 +262,38 @@ const unicode_whitespaces = [ "\u2029", // Paragraph separator ].join(""); -const left_trim_regex = new RegExp(`^([${unicode_whitespaces}]*)`, "g"); -const right_trim_regex = new RegExp(`([${unicode_whitespaces}]*)$`, "g"); +const start_trim_regex = new_start_trim_regexp(unicode_whitespaces); +const right_trim_regex = new_right_trim_regexp(unicode_whitespaces); export function trim(string) { - return trim_left(trim_right(string)); + return trim_start(trim_end(string)); } -export function trim_left(string) { - return string.replace(left_trim_regex, ""); +export function trim_start(string) { + return string.replace(start_trim_regex, ""); } -export function trim_right(string) { +export function trim_end(string) { return string.replace(right_trim_regex, ""); } +export function trim_with(string, charset) { + const trimmed_right = trim_end_with(string, charset); + return trim_start_with(trimmed_right, charset); +} + +export function trim_start_with(string, charset) { + const trim_regexp = new_start_trim_regexp(charset); + + return string.replace(trim_regexp, "") +} + +export function trim_end_with(string, charset) { + const trim_regexp = new_right_trim_regexp(charset); + + return string.replace(trim_regexp, "") +} + export function bit_array_from_string(string) { return toBitArray([stringBits(string)]); } @@ -953,3 +973,69 @@ export function bit_array_compare(first, second) { } return new Lt(); // second has more items } + +function new_start_trim_regexp(charset) { + return new RegExp(`^([${charset}]*)`, "g"); +} + +function new_right_trim_regexp(charset) { + const escaped_charset = escape_regexp_chars(charset); + return new RegExp(`([${escaped_charset}]*)$`, "g"); +} + +function escape_regexp_chars(string) { + /* + * The MIT License + + * Copyright JS Foundation and other contributors + * + * Based on Underscore.js, copyright Jeremy Ashkenas, + * DocumentCloud and Investigative Reporters & Editors + * + * This software consists of voluntary contributions made by many + * individuals. For exact contribution history, see the revision history + * available at https://github.com/lodash/lodash + * + * The following license applies to all parts of this software except as + * documented below: + * + * ==== + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ==== + * + * Copyright and related rights for sample code are waived via CC0. Sample + * code is defined as all source code displayed within the prose of the + * documentation. + * + * CC0: http://creativecommons.org/publicdomain/zero/1.0/ + * + * ==== + * + * Files located in the node_modules and vendor directories are externally + * maintained libraries used by this software which have their own + * licenses; we recommend you read them, as their terms may differ from the + * terms above. + */ + return string && reHasRegExpChar.test(string) + ? string.replace(reRegExpChar, '\\$&') + : string || ''; +} diff --git a/test/gleam/string_test.gleam b/test/gleam/string_test.gleam index 6d3031e4..0484d694 100644 --- a/test/gleam/string_test.gleam +++ b/test/gleam/string_test.gleam @@ -164,18 +164,54 @@ pub fn trim_test() { |> should.equal("hats") } -pub fn trim_left_test() { +pub fn trim_start_test() { " hats \n" - |> string.trim_left + |> string.trim_start |> should.equal("hats \n") } -pub fn trim_right_test() { +pub fn trim_start_rtl_test() { + " עברית " + |> string.trim_start + |> should.equal("עברית ") +} + +pub fn trim_end_rtl_test() { + " עברית " + |> string.trim_end + |> should.equal(" עברית") +} + +pub fn trim_end_test() { " hats \n" - |> string.trim_right + |> string.trim_end |> should.equal(" hats") } +pub fn trim_start_with_test() { + ",..hats..," + |> string.trim_start_with(",.") + |> should.equal("hats..,") +} + +pub fn trim_start_with_rtl_test() { + "שמש" + |> string.trim_start_with("ש") + |> should.equal("מש") +} + +pub fn trim_end_with_test() { + ",..hats..," + |> string.trim_end_with(",.") + |> should.equal(",..hats") +} + +pub fn trim_end_with_rtl_test() { + "שמש" + |> string.trim_end_with("ש") + |> should.equal("שמ") +} + // unicode whitespaces pub fn trim_horizontal_tab_test() { "hats\u{0009}" @@ -364,6 +400,36 @@ pub fn trim_comma_test() { |> should.equal("hats,") } +pub fn trim_with_test() { + ",,hats," + |> string.trim_with(",") + |> should.equal("hats") +} + +pub fn trim_with_commas_and_periods_test() { + ",,hats,..." + |> string.trim_with(",.") + |> should.equal("hats") +} + +pub fn trim_with_keeps_whitespace_not_in_charset_test() { + ",,hats ,..." + |> string.trim_with(",.") + |> should.equal("hats ") +} + +pub fn trim_with_does_not_trim_from_middle_of_string_test() { + ",,hats,hats,hats,..." + |> string.trim_with(",.") + |> should.equal("hats,hats,hats") +} + +pub fn trim_with_trims_complex_graphemes_test() { + "hats👍👍👍👍" + |> string.trim_with("👍") + |> should.equal("hats") +} + pub fn starts_with_test() { "theory" |> string.starts_with("")