Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add charset based variants for trimming, rename from left/right to start/end #696

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 82 additions & 10 deletions src/gleam/string.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -553,51 +553,123 @@ fn do_trim(string: String) -> String {
erl_trim(string, Both)
}

/// Like `trim`, but removes the specified chars on both sides of a `String`
///
/// ## Examples
///
/// ```gleam
/// trim_chars("..,hats,..", ".,")
/// // -> "hats"
/// ```
pub fn trim_chars(string: String, charset: String) -> String {
do_trim_chars(string, charset)
}

@external(javascript, "../gleam_stdlib.mjs", "trim_chars")
fn do_trim_chars(string: String, charset: String) -> String {
erl_trim_chars(string, Both, erl_to_graphemes(charset))
}

@external(erlang, "string", "trim")
fn erl_trim(a: String, b: Direction) -> String

@external(erlang, "string", "trim")
fn erl_trim_chars(a: String, b: Direction, c: ErlGraphemes) -> String

@external(erlang, "string", "to_graphemes")
fn erl_to_graphemes(a: String) -> ErlGraphemes

// erlang's string:to_graphemes returns char() | [char()], which cannot be directly represented
type ErlGraphemes

type Direction {
Leading
Trailing
Both
}

/// Removes whitespace on the left of a `String`.
/// Removes whitespace at the start of a `String`.
///
/// ## Examples
///
/// ```gleam
/// trim_left(" hats \n")
/// trim_start(" hats \n")
/// // -> "hats \n"
/// ```
///
pub fn trim_start(string: String) -> String {
do_trim_start(string)
}

/// An alias for trim_start
@deprecated("Use trim_start. There is no behavior change")
pub fn trim_left(string: String) -> String {
do_trim_left(string)
trim_start(string)
}

@external(javascript, "../gleam_stdlib.mjs", "trim_left")
fn do_trim_left(string: String) -> String {
@external(javascript, "../gleam_stdlib.mjs", "trim_start")
fn do_trim_start(string: String) -> String {
erl_trim(string, Leading)
}

/// Removes whitespace on the right of a `String`.
/// Removes whitespace at the end of a `String`.
///
/// ## Examples
///
/// ```gleam
/// trim_right(" hats \n")
/// trim_end(" hats \n")
/// // -> " hats"
/// ```
///
pub fn trim_end(string: String) -> String {
do_trim_end(string)
}

/// An alias for trim_end
@deprecated("Use trim_end. There is no behavior change")
pub fn trim_right(string: String) -> String {
do_trim_right(string)
trim_end(string)
}

@external(javascript, "../gleam_stdlib.mjs", "trim_right")
fn do_trim_right(string: String) -> String {
@external(javascript, "../gleam_stdlib.mjs", "trim_end")
fn do_trim_end(string: String) -> String {
erl_trim(string, Trailing)
}

/// Like `trim_start`, but removes the specified chars at the start of a `String`
///
/// ## Examples
///
/// ```gleam
/// trim_chars_start("..,hats,..", ".,")
/// // -> "hats,.."
/// ```
pub fn trim_chars_start(string: String, charset: String) -> String {
do_trim_chars_start(string, charset)
}

@external(javascript, "../gleam_stdlib.mjs", "trim_chars_start")
fn do_trim_chars_start(string: String, charset: String) -> String {
erl_trim_chars(string, Leading, erl_to_graphemes(charset))
}

/// Like `trim_end`, but removes the specified chars at the end of a `String`
///
/// ## Examples
///
/// ```gleam
/// trim_chars_end("..,hats,..", ".,")
/// // -> "..,hats"
/// ```
pub fn trim_chars_end(string: String, charset: String) -> String {
do_trim_chars_end(string, charset)
}

@external(javascript, "../gleam_stdlib.mjs", "trim_chars_end")
fn do_trim_chars_end(string: String, charset: String) -> String {
erl_trim_chars(string, Trailing, erl_to_graphemes(charset))
}

/// Splits a non-empty `String` into its first element (head) and rest (tail).
/// This lets you pattern match on `String`s exactly as you would with lists.
///
Expand Down
100 changes: 93 additions & 7 deletions src/gleam_stdlib.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ import Dict from "./dict.mjs";

const Nil = undefined;
const NOT_FOUND = {};
// See license note in escape_regexp_chars
const reRegExpChar = /[\\^$.*+?()[\]{}|]/g;
const reHasRegExpChar = RegExp(reRegExpChar.source);

export function identity(x) {
return x;
Expand Down Expand Up @@ -259,21 +262,38 @@ const unicode_whitespaces = [
"\u2029", // Paragraph separator
].join("");

const left_trim_regex = new RegExp(`^([${unicode_whitespaces}]*)`, "g");
const right_trim_regex = new RegExp(`([${unicode_whitespaces}]*)$`, "g");
const start_trim_regex = new_start_trim_regexp(unicode_whitespaces);
const right_trim_regex = new_right_trim_regexp(unicode_whitespaces);

export function trim(string) {
return trim_left(trim_right(string));
return trim_start(trim_end(string));
}

export function trim_left(string) {
return string.replace(left_trim_regex, "");
export function trim_start(string) {
return string.replace(start_trim_regex, "");
}

export function trim_right(string) {
export function trim_end(string) {
return string.replace(right_trim_regex, "");
}

export function trim_chars(string, charset) {
const trimmed_right = trim_chars_end(string, charset);
return trim_chars_start(trimmed_right, charset);
}

export function trim_chars_start(string, charset) {
const trim_regexp = new_start_trim_regexp(charset);

return string.replace(trim_regexp, "")
}

export function trim_chars_end(string, charset) {
const trim_regexp = new_right_trim_regexp(charset);

return string.replace(trim_regexp, "")
}

export function bit_array_from_string(string) {
return toBitArray([stringBits(string)]);
}
Expand All @@ -296,7 +316,7 @@ export function crash(message) {

export function bit_array_to_string(bit_array) {
try {
const decoder = new TextDecoder("utf-8", { fatal: true });
const decoder = new TextDecoder("utf-8", { fatarl: true });
ollien marked this conversation as resolved.
Show resolved Hide resolved
return new Ok(decoder.decode(bit_array.buffer));
} catch {
return new Error(Nil);
Expand Down Expand Up @@ -953,3 +973,69 @@ export function bit_array_compare(first, second) {
}
return new Lt(); // second has more items
}

function new_start_trim_regexp(charset) {
return new RegExp(`^([${charset}]*)`, "g");
}

function new_right_trim_regexp(charset) {
const escaped_charset = escape_regexp_chars(charset);
return new RegExp(`([${escaped_charset}]*)$`, "g");
}

function escape_regexp_chars(string) {
/*
* The MIT License

* Copyright JS Foundation and other contributors <https://js.foundation/>
*
* Based on Underscore.js, copyright Jeremy Ashkenas,
* DocumentCloud and Investigative Reporters & Editors <http://underscorejs.org/>
*
* This software consists of voluntary contributions made by many
* individuals. For exact contribution history, see the revision history
* available at https://github.com/lodash/lodash
*
* The following license applies to all parts of this software except as
* documented below:
*
* ====
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ====
*
* Copyright and related rights for sample code are waived via CC0. Sample
* code is defined as all source code displayed within the prose of the
* documentation.
*
* CC0: http://creativecommons.org/publicdomain/zero/1.0/
*
* ====
*
* Files located in the node_modules and vendor directories are externally
* maintained libraries used by this software which have their own
* licenses; we recommend you read them, as their terms may differ from the
* terms above.
*/
return string && reHasRegExpChar.test(string)
? string.replace(reRegExpChar, '\\$&')
: string || '';
}
74 changes: 70 additions & 4 deletions test/gleam/string_test.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -164,18 +164,54 @@ pub fn trim_test() {
|> should.equal("hats")
}

pub fn trim_left_test() {
pub fn trim_start_test() {
" hats \n"
|> string.trim_left
|> string.trim_start
|> should.equal("hats \n")
}

pub fn trim_right_test() {
pub fn trim_start_rtl_test() {
" עברית "
|> string.trim_start
|> should.equal("עברית ")
}

pub fn trim_end_rtl_test() {
" עברית "
|> string.trim_end
|> should.equal(" עברית")
}
Comment on lines +173 to +183
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These tests surprised me, as a native english speaker (I do not speak any RTL languages), but it seems to match Rust's implementation, so I'd be surprised if it was wrong? Happy to be corrected!

https://play.rust-lang.org/?version=stable&mode=debug&edition=2021&gist=3c96c56a9a7da1a7c70898000531ba38

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Presumably it's because the RTL text is only in the middle, surrounded by LTR text.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yeah - I've gone looking and not been able to find an example of a test case that has RTL whitespace I can use to test here. Maybe there is and I'm just not finding it


pub fn trim_end_test() {
" hats \n"
|> string.trim_right
|> string.trim_end
|> should.equal(" hats")
}

pub fn trim_chars_start_test() {
",..hats..,"
|> string.trim_chars_start(",.")
|> should.equal("hats..,")
}

pub fn trim_chars_start_rtl_test() {
"שמש"
|> string.trim_chars_start("ש")
|> should.equal("מש")
}

pub fn trim_chars_end_test() {
",..hats..,"
|> string.trim_chars_end(",.")
|> should.equal(",..hats")
}

pub fn trim_chars_end_rtl_test() {
"שמש"
|> string.trim_chars_end("ש")
|> should.equal("שמ")
}

// unicode whitespaces
pub fn trim_horizontal_tab_test() {
"hats\u{0009}"
Expand Down Expand Up @@ -364,6 +400,36 @@ pub fn trim_comma_test() {
|> should.equal("hats,")
}

pub fn trim_chars_test() {
",,hats,"
|> string.trim_chars(",")
|> should.equal("hats")
}

pub fn trim_chars_commas_and_periods_test() {
",,hats,..."
|> string.trim_chars(",.")
|> should.equal("hats")
}

pub fn trim_chars_keeps_whitespace_not_in_charset_test() {
",,hats ,..."
|> string.trim_chars(",.")
|> should.equal("hats ")
}

pub fn trim_chars_does_not_trim_from_middle_of_string_test() {
",,hats,hats,hats,..."
|> string.trim_chars(",.")
|> should.equal("hats,hats,hats")
}

pub fn trim_chars_trims_complex_graphemes_test() {
"hats👍👍👍👍"
|> string.trim_chars("👍")
|> should.equal("hats")
}

pub fn starts_with_test() {
"theory"
|> string.starts_with("")
Expand Down