Publish 0.2.0 with newlines treated as width 1 (#68)

* Revert "Treat newlines as width 0 in the 0.1 stream, publish 0.1.14 (#67)" This reverts commit 9eaafa5. * Update readme * Bump to 0.2
unicode-rs · Sep 19, 2024 · 79eab0d · 79eab0d
1 parent 9eaafa5
commit 79eab0d
Show file tree

Hide file tree

Showing 5 changed files with 22 additions and 32 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 
 name = "unicode-width"
-version = "0.1.14"
+version = "0.2.0"
 authors = [
     "kwantam <[email protected]>",
     "Manish Goregaokar <[email protected]>",

diff --git a/README.md b/README.md
@@ -55,3 +55,16 @@ to your `Cargo.toml`:
 [dependencies]
 unicode-width = "0.1.11"
 ```
+
+
+## Changelog
+
+
+### 0.2.0
+
+ - Treat `\n` as width 1 (#60)
+ - Treat ambiguous `Modifier_Letter`s as narrow (#63)
+ - Support `Grapheme_Cluster_Break=Prepend` (#62)
+ - Support lots of ligatures (#53)
+
+Note: If you are using `unicode-width` for linebreaking, the change treating `\n` as width 1 _may cause behavior changes_. It is recommended that in such cases you feed already-line segmented text to `unicode-width`. In other words, please apply higher level control character based line breaking protocols before feeding text to `unicode-width`. Relying on any character producing a stable width in this crate is likely the sign of a bug.
diff --git a/scripts/unicode.py b/scripts/unicode.py
@@ -1281,10 +1281,7 @@ def lookup_fns(
     s += """
     if c <= '\\u{A0}' {
         match c {
-            // According to the spec, LF should be width 1, which is how it is often rendered when it is forced to have a single-line rendering
-            // However, this makes it harder to use this crate to calculate line breaks, and breaks assumptions of downstream crates.
-            // https://github.com/unicode-rs/unicode-width/issues/60
-            '\\n' => (0, WidthInfo::LINE_FEED),
+            '\\n' => (1, WidthInfo::LINE_FEED),
             '\\r' if next_info == WidthInfo::LINE_FEED => (0, WidthInfo::DEFAULT),
             _ => (1, WidthInfo::DEFAULT),
         }

diff --git a/src/tables.rs b/src/tables.rs
@@ -215,10 +215,7 @@ fn width_in_str(c: char, mut next_info: WidthInfo) -> (i8, WidthInfo) {
     }
     if c <= '\u{A0}' {
         match c {
-            // According to the spec, LF should be width 1, which is how it is often rendered when it is forced to have a single-line rendering
-            // However, this makes it harder to use this crate to calculate line breaks, and breaks assumptions of downstream crates.
-            // https://github.com/unicode-rs/unicode-width/issues/60
-            '\n' => (0, WidthInfo::LINE_FEED),
+            '\n' => (1, WidthInfo::LINE_FEED),
             '\r' if next_info == WidthInfo::LINE_FEED => (0, WidthInfo::DEFAULT),
             _ => (1, WidthInfo::DEFAULT),
         }
@@ -510,10 +507,7 @@ fn width_in_str_cjk(c: char, mut next_info: WidthInfo) -> (i8, WidthInfo) {
     }
     if c <= '\u{A0}' {
         match c {
-            // According to the spec, LF should be width 1, which is how it is often rendered when it is forced to have a single-line rendering
-            // However, this makes it harder to use this crate to calculate line breaks, and breaks assumptions of downstream crates.
-            // https://github.com/unicode-rs/unicode-width/issues/60
-            '\n' => (0, WidthInfo::LINE_FEED),
+            '\n' => (1, WidthInfo::LINE_FEED),
             '\r' if next_info == WidthInfo::LINE_FEED => (0, WidthInfo::DEFAULT),
             _ => (1, WidthInfo::DEFAULT),
         }

diff --git a/tests/tests.rs b/tests/tests.rs
@@ -214,23 +214,18 @@ fn test_control_line_break() {
     assert_width!('\r', None, None);
     assert_width!('\n', None, None);
     assert_width!("\r", 1, 1);
-    // This is 0 due to #60
-    assert_width!("\n", 0, 0);
-    assert_width!("\r\n", 0, 0);
+    assert_width!("\n", 1, 1);
+    assert_width!("\r\n", 1, 1);
     assert_width!("\0", 1, 1);
-    assert_width!("1\t2\r\n3\u{85}4", 6, 6);
-    assert_width!("\r\u{FE0F}\n", 1, 1);
-    assert_width!("\r\u{200D}\n", 1, 1);
+    assert_width!("1\t2\r\n3\u{85}4", 7, 7);
+    assert_width!("\r\u{FE0F}\n", 2, 2);
+    assert_width!("\r\u{200D}\n", 2, 2);
 }
 
 #[test]
 fn char_str_consistent() {
     let mut s = String::with_capacity(4);
     for c in '\0'..=char::MAX {
-        // Newlines are special cased (#60)
-        if c == '\n' {
-            continue;
-        }
         s.clear();
         s.push(c);
         assert_eq!(c.width().unwrap_or(1), s.width());
@@ -423,10 +418,6 @@ fn test_khmer_coeng() {
             assert_width!(format!("\u{17D2}{c}"), 0, 0);
             assert_width!(format!("\u{17D2}\u{200D}\u{200D}{c}"), 0, 0);
         } else {
-            // Newlines are special cased (#60)
-            if c == '\n' {
-                continue;
-            }
             assert_width!(
                 format!("\u{17D2}{c}"),
                 c.width().unwrap_or(1),
@@ -597,11 +588,6 @@ fn emoji_test_file() {
     }
 }
 
-#[test]
-fn test_newline_zero_issue_60() {
-    assert_width!("a\na", 2, 2);
-}
-
 // Test traits are unsealed
 
 #[cfg(feature = "cjk")]