Skip to content

Commit 90adbe2

Browse files
committed
refactor: make string formatting more readable
To make the final output code easier to see: * Get rid of the unnecessary line-noise of `.unwrap()`ing calls to `write!()` by moving the `.unwrap()` into a macro. * Join consecutive `write!()` calls using a single multiline format string. * Replace `.push()` and `.push_str(format!())` with `write!()`. * If after doing all of the above, there is only a single `write!()` call in the function, just construct the string directly with `format!()`.
1 parent 7b6ec46 commit 90adbe2

File tree

7 files changed

+232
-283
lines changed

7 files changed

+232
-283
lines changed

library/core/src/unicode/unicode_data.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
// Total : 31911 bytes
1313

1414
pub const UNICODE_VERSION: (u8, u8, u8) = (17, 0, 0);
15+
1516
use super::rt::*;
1617

1718
pub mod alphabetic {
@@ -129,6 +130,7 @@ pub mod alphabetic {
129130
1, 10, 1, 17, 5, 3, 1, 5, 1, 17, 0, 26, 6, 26, 6, 26, 0, 0, 32, 0, 2, 0, 2, 0, 15, 0, 0, 0,
130131
0, 0, 5, 0, 0,
131132
];
133+
132134
#[inline]
133135
pub fn lookup(c: char) -> bool {
134136
debug_assert!(!c.is_ascii());
@@ -228,6 +230,7 @@ pub mod case_ignorable {
228230
1, 17, 2, 7, 1, 2, 1, 5, 5, 62, 33, 1, 160, 14, 0, 1, 61, 4, 0, 5, 254, 2, 243, 1, 2, 1, 7,
229231
2, 5, 1, 9, 1, 0, 7, 109, 8, 0, 5, 0, 1, 30, 96, 128, 240, 0,
230232
];
233+
231234
#[inline]
232235
pub fn lookup(c: char) -> bool {
233236
debug_assert!(!c.is_ascii());
@@ -291,6 +294,7 @@ pub mod cased {
291294
7, 1, 0, 2, 25, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 8, 0, 10,
292295
1, 20, 6, 6, 0, 62, 0, 68, 0, 26, 6, 26, 6, 26, 0,
293296
];
297+
294298
#[inline]
295299
pub fn lookup(c: char) -> bool {
296300
debug_assert!(!c.is_ascii());
@@ -381,6 +385,7 @@ pub mod grapheme_extend {
381385
4, 50, 8, 1, 14, 1, 22, 5, 1, 15, 0, 7, 1, 17, 2, 7, 1, 2, 1, 5, 100, 1, 160, 7, 0, 1, 61,
382386
4, 0, 4, 254, 2, 243, 1, 2, 1, 7, 2, 5, 1, 0, 7, 109, 7, 0, 96, 128, 240, 0,
383387
];
388+
384389
#[inline]
385390
pub fn lookup(c: char) -> bool {
386391
debug_assert!(!c.is_ascii());
@@ -593,6 +598,7 @@ pub mod n {
593598
50, 0, 10, 0, 10, 0, 10, 247, 10, 0, 9, 128, 10, 0, 59, 1, 3, 1, 4, 76, 45, 1, 15, 0, 13,
594599
0, 10, 0,
595600
];
601+
596602
#[inline]
597603
pub fn lookup(c: char) -> bool {
598604
debug_assert!(!c.is_ascii());
@@ -741,6 +747,7 @@ pub mod white_space {
741747
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
742748
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
743749
];
750+
744751
#[inline]
745752
pub const fn lookup(c: char) -> bool {
746753
debug_assert!(!c.is_ascii());
@@ -755,7 +762,7 @@ pub mod white_space {
755762
}
756763

757764
pub mod conversions {
758-
const INDEX_MASK: u32 = 0x400000;
765+
const INDEX_MASK: u32 = 1 << 22;
759766

760767
pub fn to_lower(c: char) -> [char; 3] {
761768
if c.is_ascii() {
@@ -1152,7 +1159,6 @@ pub mod conversions {
11521159
('\u{1e91d}', 125247), ('\u{1e91e}', 125248), ('\u{1e91f}', 125249), ('\u{1e920}', 125250),
11531160
('\u{1e921}', 125251),
11541161
];
1155-
11561162
#[rustfmt::skip]
11571163
static LOWERCASE_TABLE_MULTI: &[[char; 3]; 1] = &[
11581164
['i', '\u{307}', '\u{0}'],
@@ -1538,7 +1544,6 @@ pub mod conversions {
15381544
('\u{1e93d}', 125211), ('\u{1e93e}', 125212), ('\u{1e93f}', 125213), ('\u{1e940}', 125214),
15391545
('\u{1e941}', 125215), ('\u{1e942}', 125216), ('\u{1e943}', 125217),
15401546
];
1541-
15421547
#[rustfmt::skip]
15431548
static UPPERCASE_TABLE_MULTI: &[[char; 3]; 102] = &[
15441549
['S', 'S', '\u{0}'], ['\u{2bc}', 'N', '\u{0}'], ['J', '\u{30c}', '\u{0}'],

src/tools/unicode-table-generator/src/cascading_map.rs

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
use std::collections::HashMap;
2-
use std::fmt::Write as _;
32
use std::ops::Range;
43

5-
use crate::fmt_list;
64
use crate::raw_emitter::RawEmitter;
5+
use crate::writeln;
76

87
impl RawEmitter {
98
pub fn emit_cascading_map(&mut self, ranges: &[Range<u32>]) -> bool {
@@ -24,8 +23,6 @@ impl RawEmitter {
2423
.flat_map(|r| (r.start..r.end).collect::<Vec<u32>>())
2524
.collect::<Vec<u32>>();
2625

27-
println!("there are {} points", points.len());
28-
2926
// how many distinct ranges need to be counted?
3027
let mut codepoints_by_high_bytes = HashMap::<usize, Vec<u32>>::new();
3128
for point in points {
@@ -37,41 +34,41 @@ impl RawEmitter {
3734
}
3835

3936
let mut bit_for_high_byte = 1u8;
40-
let mut arms = Vec::<String>::new();
37+
let mut arms = String::new();
4138

4239
let mut high_bytes: Vec<usize> = codepoints_by_high_bytes.keys().copied().collect();
4340
high_bytes.sort();
4441
for high_byte in high_bytes {
4542
let codepoints = codepoints_by_high_bytes.get_mut(&high_byte).unwrap();
4643
if codepoints.len() == 1 {
4744
let ch = codepoints.pop().unwrap();
48-
arms.push(format!("{high_byte} => c as u32 == {ch:#04x}"));
45+
writeln!(arms, "{high_byte} => c as u32 == {ch:#04x},");
4946
continue;
5047
}
5148
// more than 1 codepoint in this arm
5249
for codepoint in codepoints {
5350
map[(*codepoint & 0xff) as usize] |= bit_for_high_byte;
5451
}
55-
arms.push(format!(
56-
"{high_byte} => WHITESPACE_MAP[c as usize & 0xff] & {bit_for_high_byte} != 0"
57-
));
52+
writeln!(
53+
arms,
54+
"{high_byte} => WHITESPACE_MAP[c as usize & 0xff] & {bit_for_high_byte} != 0,"
55+
);
5856
bit_for_high_byte <<= 1;
5957
}
6058

61-
writeln!(&mut self.file, "static WHITESPACE_MAP: [u8; 256] = [{}];", fmt_list(map.iter()))
62-
.unwrap();
6359
self.bytes_used += 256;
60+
self.file = format!(
61+
"static WHITESPACE_MAP: [u8; 256] = {map:?};
6462
65-
writeln!(&mut self.file, "#[inline]").unwrap();
66-
writeln!(&mut self.file, "pub const fn lookup(c: char) -> bool {{").unwrap();
67-
writeln!(&mut self.file, " debug_assert!(!c.is_ascii());").unwrap();
68-
writeln!(&mut self.file, " match c as u32 >> 8 {{").unwrap();
69-
for arm in arms {
70-
writeln!(&mut self.file, " {arm},").unwrap();
71-
}
72-
writeln!(&mut self.file, " _ => false,").unwrap();
73-
writeln!(&mut self.file, " }}").unwrap();
74-
writeln!(&mut self.file, "}}").unwrap();
63+
#[inline]
64+
pub const fn lookup(c: char) -> bool {{
65+
debug_assert!(!c.is_ascii());
66+
match c as u32 >> 8 {{
67+
{arms}\
68+
_ => false,
69+
}}
70+
}}"
71+
);
7572

7673
true
7774
}

src/tools/unicode-table-generator/src/case_mapping.rs

Lines changed: 20 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,18 @@
11
use std::char;
22
use std::collections::BTreeMap;
3-
use std::fmt::{self, Write};
43

5-
use crate::{UnicodeData, fmt_list};
4+
use crate::{CharEscape, UnicodeData, fmt_list};
65

76
const INDEX_MASK: u32 = 1 << 22;
87

98
pub(crate) fn generate_case_mapping(data: &UnicodeData) -> (String, [usize; 2]) {
10-
let mut file = String::new();
11-
12-
write!(file, "const INDEX_MASK: u32 = 0x{INDEX_MASK:x};").unwrap();
13-
file.push_str("\n\n");
14-
file.push_str(HEADER.trim_start());
15-
file.push('\n');
169
let (lower_tables, lower_size) = generate_tables("LOWER", &data.to_lower);
17-
file.push_str(&lower_tables);
18-
file.push_str("\n\n");
1910
let (upper_tables, upper_size) = generate_tables("UPPER", &data.to_upper);
20-
file.push_str(&upper_tables);
11+
let file = format!(
12+
"{HEADER}
13+
{lower_tables}
14+
{upper_tables}"
15+
);
2116
(file, [lower_size, upper_size])
2217
}
2318

@@ -47,45 +42,23 @@ fn generate_tables(case: &str, data: &BTreeMap<u32, [u32; 3]>) -> (String, usize
4742
mappings.push((CharEscape(key), value));
4843
}
4944

50-
let mut tables = String::new();
51-
let mut size = 0;
52-
53-
size += size_of_val(mappings.as_slice());
54-
writeln!(tables, "#[rustfmt::skip]").unwrap();
55-
write!(
56-
tables,
57-
"static {}CASE_TABLE: &[(char, u32); {}] = &[{}];",
58-
case,
59-
mappings.len(),
60-
fmt_list(mappings),
61-
)
62-
.unwrap();
63-
64-
tables.push_str("\n\n");
65-
66-
size += size_of_val(multis.as_slice());
67-
writeln!(tables, "#[rustfmt::skip]").unwrap();
68-
write!(
69-
tables,
70-
"static {}CASE_TABLE_MULTI: &[[char; 3]; {}] = &[{}];",
71-
case,
72-
multis.len(),
73-
fmt_list(multis),
74-
)
75-
.unwrap();
76-
77-
(tables, size)
78-
}
79-
80-
struct CharEscape(char);
81-
82-
impl fmt::Debug for CharEscape {
83-
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
84-
write!(f, "'{}'", self.0.escape_default())
85-
}
45+
let size = size_of_val(mappings.as_slice()) + size_of_val(multis.as_slice());
46+
let file = format!(
47+
"
48+
#[rustfmt::skip]\nstatic {case}CASE_TABLE: &[(char, u32); {mappings_len}] = &[{mappings}];
49+
#[rustfmt::skip]\nstatic {case}CASE_TABLE_MULTI: &[[char; 3]; {multis_len}] = &[{multis}];",
50+
mappings = fmt_list(&mappings),
51+
mappings_len = mappings.len(),
52+
multis = fmt_list(&multis),
53+
multis_len = multis.len(),
54+
);
55+
56+
(file, size)
8657
}
8758

8859
static HEADER: &str = r"
60+
const INDEX_MASK: u32 = 1 << 22;
61+
8962
pub fn to_lower(c: char) -> [char; 3] {
9063
if c.is_ascii() {
9164
[(c as u8).to_ascii_lowercase() as char, '\0', '\0']
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
use std::fmt;
2+
3+
// Convenience macros for writing and unwrapping.
4+
#[macro_export]
5+
macro_rules! writeln {
6+
($($args:tt)*) => {{
7+
use std::fmt::Write as _;
8+
std::writeln!($($args)*).unwrap();
9+
}};
10+
}
11+
#[macro_export]
12+
macro_rules! write {
13+
($($args:tt)*) => {{
14+
use std::fmt::Write as _;
15+
std::write!($($args)*).unwrap();
16+
}};
17+
}
18+
19+
pub fn fmt_list<V: fmt::Debug>(values: impl IntoIterator<Item = V>) -> String {
20+
let pieces = values.into_iter().map(|b| format!("{b:?}, "));
21+
let mut out = String::new();
22+
let mut line = String::from("\n ");
23+
for piece in pieces {
24+
if line.len() + piece.len() < 98 {
25+
line.push_str(&piece);
26+
} else {
27+
writeln!(out, "{}", line.trim_end());
28+
line = format!(" {piece}");
29+
}
30+
}
31+
writeln!(out, "{}", line.trim_end());
32+
out
33+
}
34+
35+
/// Wrapper type for formatting a `T` using its `Binary` implementation.
36+
#[derive(Copy, Clone)]
37+
pub struct Bin<T>(pub T);
38+
39+
impl<T: fmt::Binary> fmt::Debug for Bin<T> {
40+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
41+
let bits = size_of::<T>() * 8;
42+
std::write!(f, "0b{:0bits$b}", self.0)
43+
}
44+
}
45+
46+
impl<T: fmt::Binary> fmt::Display for Bin<T> {
47+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
48+
fmt::Debug::fmt(self, f)
49+
}
50+
}
51+
52+
/// Wrapper type for formatting a `char` using `escape_default`.
53+
#[derive(Copy, Clone)]
54+
pub struct CharEscape(pub char);
55+
56+
impl fmt::Debug for CharEscape {
57+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
58+
std::write!(f, "'{}'", self.0.escape_default())
59+
}
60+
}
61+
62+
impl fmt::Display for CharEscape {
63+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
64+
fmt::Debug::fmt(self, f)
65+
}
66+
}

0 commit comments

Comments
 (0)