Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 3f1d2aa

Browse files
committedSep 28, 2020
Use more efficient scheme for display u128/i128
Add zero padding Add benchmarks for fmt u128 This tests both when there is the max amount of work(all characters used) And least amount of work(1 character used)
1 parent d62d3f7 commit 3f1d2aa

File tree

2 files changed

+283
-59
lines changed

2 files changed

+283
-59
lines changed
 

‎library/core/benches/fmt.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,3 +108,32 @@ fn write_str_macro_debug(bh: &mut Bencher) {
108108
}
109109
});
110110
}
111+
112+
#[bench]
113+
fn write_u128_max(bh: &mut Bencher) {
114+
bh.iter(|| {
115+
std::hint::black_box(format!("{}", u128::MAX));
116+
});
117+
}
118+
119+
#[bench]
120+
fn write_u128_min(bh: &mut Bencher) {
121+
bh.iter(|| {
122+
let s = format!("{}", 0u128);
123+
std::hint::black_box(s);
124+
});
125+
}
126+
127+
#[bench]
128+
fn write_u64_max(bh: &mut Bencher) {
129+
bh.iter(|| {
130+
std::hint::black_box(format!("{}", u64::MAX));
131+
});
132+
}
133+
134+
#[bench]
135+
fn write_u64_min(bh: &mut Bencher) {
136+
bh.iter(|| {
137+
std::hint::black_box(format!("{}", 0u64));
138+
});
139+
}

‎library/core/src/fmt/num.rs

Lines changed: 254 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use crate::slice;
99
use crate::str;
1010

1111
#[doc(hidden)]
12-
trait Int:
12+
trait DisplayInt:
1313
PartialEq + PartialOrd + Div<Output = Self> + Rem<Output = Self> + Sub<Output = Self> + Copy
1414
{
1515
fn zero() -> Self;
@@ -21,22 +21,39 @@ trait Int:
2121
fn to_u128(&self) -> u128;
2222
}
2323

24-
macro_rules! doit {
25-
($($t:ident)*) => ($(impl Int for $t {
26-
fn zero() -> Self { 0 }
27-
fn from_u8(u: u8) -> Self { u as Self }
28-
fn to_u8(&self) -> u8 { *self as u8 }
29-
fn to_u16(&self) -> u16 { *self as u16 }
30-
fn to_u32(&self) -> u32 { *self as u32 }
31-
fn to_u64(&self) -> u64 { *self as u64 }
32-
fn to_u128(&self) -> u128 { *self as u128 }
33-
})*)
24+
macro_rules! impl_int {
25+
($($t:ident)*) => (
26+
$(impl DisplayInt for $t {
27+
fn zero() -> Self { 0 }
28+
fn from_u8(u: u8) -> Self { u as Self }
29+
fn to_u8(&self) -> u8 { *self as u8 }
30+
fn to_u16(&self) -> u16 { *self as u16 }
31+
fn to_u32(&self) -> u32 { *self as u32 }
32+
fn to_u64(&self) -> u64 { *self as u64 }
33+
fn to_u128(&self) -> u128 { *self as u128 }
34+
})*
35+
)
3436
}
35-
doit! { i8 i16 i32 i64 i128 isize u8 u16 u32 u64 u128 usize }
37+
macro_rules! impl_uint {
38+
($($t:ident)*) => (
39+
$(impl DisplayInt for $t {
40+
fn zero() -> Self { 0 }
41+
fn from_u8(u: u8) -> Self { u as Self }
42+
fn to_u8(&self) -> u8 { *self as u8 }
43+
fn to_u16(&self) -> u16 { *self as u16 }
44+
fn to_u32(&self) -> u32 { *self as u32 }
45+
fn to_u64(&self) -> u64 { *self as u64 }
46+
fn to_u128(&self) -> u128 { *self as u128 }
47+
})*
48+
)
49+
}
50+
51+
impl_int! { i8 i16 i32 i64 i128 isize }
52+
impl_uint! { u8 u16 u32 u64 u128 usize }
3653

3754
/// A type that represents a specific radix
3855
#[doc(hidden)]
39-
trait GenericRadix {
56+
trait GenericRadix: Sized {
4057
/// The number of digits.
4158
const BASE: u8;
4259

@@ -47,7 +64,7 @@ trait GenericRadix {
4764
fn digit(x: u8) -> u8;
4865

4966
/// Format an integer using the radix using a formatter.
50-
fn fmt_int<T: Int>(&self, mut x: T, f: &mut fmt::Formatter<'_>) -> fmt::Result {
67+
fn fmt_int<T: DisplayInt>(&self, mut x: T, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5168
// The radix can be as low as 2, so we need a buffer of at least 128
5269
// characters for a base 2 number.
5370
let zero = T::zero();
@@ -127,13 +144,11 @@ macro_rules! radix {
127144

128145
radix! { Binary, 2, "0b", x @ 0 ..= 1 => b'0' + x }
129146
radix! { Octal, 8, "0o", x @ 0 ..= 7 => b'0' + x }
130-
radix! { LowerHex, 16, "0x", x @ 0 ..= 9 => b'0' + x,
131-
x @ 10 ..= 15 => b'a' + (x - 10) }
132-
radix! { UpperHex, 16, "0x", x @ 0 ..= 9 => b'0' + x,
133-
x @ 10 ..= 15 => b'A' + (x - 10) }
147+
radix! { LowerHex, 16, "0x", x @ 0 ..= 9 => b'0' + x, x @ 10 ..= 15 => b'a' + (x - 10) }
148+
radix! { UpperHex, 16, "0x", x @ 0 ..= 9 => b'0' + x, x @ 10 ..= 15 => b'A' + (x - 10) }
134149

135150
macro_rules! int_base {
136-
($Trait:ident for $T:ident as $U:ident -> $Radix:ident) => {
151+
(fmt::$Trait:ident for $T:ident as $U:ident -> $Radix:ident) => {
137152
#[stable(feature = "rust1", since = "1.0.0")]
138153
impl fmt::$Trait for $T {
139154
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
@@ -143,8 +158,27 @@ macro_rules! int_base {
143158
};
144159
}
145160

161+
macro_rules! integer {
162+
($Int:ident, $Uint:ident) => {
163+
int_base! { fmt::Binary for $Int as $Uint -> Binary }
164+
int_base! { fmt::Octal for $Int as $Uint -> Octal }
165+
int_base! { fmt::LowerHex for $Int as $Uint -> LowerHex }
166+
int_base! { fmt::UpperHex for $Int as $Uint -> UpperHex }
167+
168+
int_base! { fmt::Binary for $Uint as $Uint -> Binary }
169+
int_base! { fmt::Octal for $Uint as $Uint -> Octal }
170+
int_base! { fmt::LowerHex for $Uint as $Uint -> LowerHex }
171+
int_base! { fmt::UpperHex for $Uint as $Uint -> UpperHex }
172+
};
173+
}
174+
integer! { isize, usize }
175+
integer! { i8, u8 }
176+
integer! { i16, u16 }
177+
integer! { i32, u32 }
178+
integer! { i64, u64 }
179+
integer! { i128, u128 }
146180
macro_rules! debug {
147-
($T:ident) => {
181+
($($T:ident)*) => {$(
148182
#[stable(feature = "rust1", since = "1.0.0")]
149183
impl fmt::Debug for $T {
150184
#[inline]
@@ -158,31 +192,14 @@ macro_rules! debug {
158192
}
159193
}
160194
}
161-
};
195+
)*};
162196
}
163-
164-
macro_rules! integer {
165-
($Int:ident, $Uint:ident) => {
166-
int_base! { Binary for $Int as $Uint -> Binary }
167-
int_base! { Octal for $Int as $Uint -> Octal }
168-
int_base! { LowerHex for $Int as $Uint -> LowerHex }
169-
int_base! { UpperHex for $Int as $Uint -> UpperHex }
170-
debug! { $Int }
171-
172-
int_base! { Binary for $Uint as $Uint -> Binary }
173-
int_base! { Octal for $Uint as $Uint -> Octal }
174-
int_base! { LowerHex for $Uint as $Uint -> LowerHex }
175-
int_base! { UpperHex for $Uint as $Uint -> UpperHex }
176-
debug! { $Uint }
177-
};
197+
debug! {
198+
i8 i16 i32 i64 i128 isize
199+
u8 u16 u32 u64 u128 usize
178200
}
179-
integer! { isize, usize }
180-
integer! { i8, u8 }
181-
integer! { i16, u16 }
182-
integer! { i32, u32 }
183-
integer! { i64, u64 }
184-
integer! { i128, u128 }
185201

202+
// 2 digit decimal look up table
186203
static DEC_DIGITS_LUT: &[u8; 200] = b"0001020304050607080910111213141516171819\
187204
2021222324252627282930313233343536373839\
188205
4041424344454647484950515253545556575859\
@@ -256,21 +273,20 @@ macro_rules! impl_Display {
256273
f.pad_integral(is_nonnegative, "", buf_slice)
257274
}
258275

259-
$(
260-
#[stable(feature = "rust1", since = "1.0.0")]
261-
impl fmt::Display for $t {
262-
#[allow(unused_comparisons)]
263-
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
264-
let is_nonnegative = *self >= 0;
265-
let n = if is_nonnegative {
266-
self.$conv_fn()
267-
} else {
268-
// convert the negative num to positive by summing 1 to it's 2 complement
269-
(!self.$conv_fn()).wrapping_add(1)
270-
};
271-
$name(n, is_nonnegative, f)
272-
}
273-
})*
276+
$(#[stable(feature = "rust1", since = "1.0.0")]
277+
impl fmt::Display for $t {
278+
#[allow(unused_comparisons)]
279+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
280+
let is_nonnegative = *self >= 0;
281+
let n = if is_nonnegative {
282+
self.$conv_fn()
283+
} else {
284+
// convert the negative num to positive by summing 1 to it's 2 complement
285+
(!self.$conv_fn()).wrapping_add(1)
286+
};
287+
$name(n, is_nonnegative, f)
288+
}
289+
})*
274290
};
275291
}
276292

@@ -461,6 +477,185 @@ mod imp {
461477
impl_Exp!(i8, u8, i16, u16, i32, u32, isize, usize as u32 via to_u32 named exp_u32);
462478
impl_Exp!(i64, u64 as u64 via to_u64 named exp_u64);
463479
}
464-
465-
impl_Display!(i128, u128 as u128 via to_u128 named fmt_u128);
466480
impl_Exp!(i128, u128 as u128 via to_u128 named exp_u128);
481+
482+
/// Helper function for writing a u64 into `buf` going from last to first, with `curr`.
483+
fn parse_u64_into<const N: usize>(mut n: u64, buf: &mut [MaybeUninit<u8>; N], curr: &mut isize) {
484+
let buf_ptr = MaybeUninit::slice_as_mut_ptr(buf);
485+
let lut_ptr = DEC_DIGITS_LUT.as_ptr();
486+
assert!(*curr > 19);
487+
488+
// SAFETY:
489+
// Writes at most 19 characters into the buffer. Guaranteed that any ptr into LUT is at most
490+
// 198, so will never OOB. There is a check above that there are at least 19 characters
491+
// remaining.
492+
unsafe {
493+
if n >= 1e16 as u64 {
494+
let to_parse = n % 1e16 as u64;
495+
n /= 1e16 as u64;
496+
497+
// Some of these are nops but it looks more elegant this way.
498+
let d1 = ((to_parse / 1e14 as u64) % 100) << 1;
499+
let d2 = ((to_parse / 1e12 as u64) % 100) << 1;
500+
let d3 = ((to_parse / 1e10 as u64) % 100) << 1;
501+
let d4 = ((to_parse / 1e8 as u64) % 100) << 1;
502+
let d5 = ((to_parse / 1e6 as u64) % 100) << 1;
503+
let d6 = ((to_parse / 1e4 as u64) % 100) << 1;
504+
let d7 = ((to_parse / 1e2 as u64) % 100) << 1;
505+
let d8 = ((to_parse / 1e0 as u64) % 100) << 1;
506+
507+
*curr -= 16;
508+
509+
ptr::copy_nonoverlapping(lut_ptr.offset(d1 as isize), buf_ptr.offset(*curr + 0), 2);
510+
ptr::copy_nonoverlapping(lut_ptr.offset(d2 as isize), buf_ptr.offset(*curr + 2), 2);
511+
ptr::copy_nonoverlapping(lut_ptr.offset(d3 as isize), buf_ptr.offset(*curr + 4), 2);
512+
ptr::copy_nonoverlapping(lut_ptr.offset(d4 as isize), buf_ptr.offset(*curr + 6), 2);
513+
ptr::copy_nonoverlapping(lut_ptr.offset(d5 as isize), buf_ptr.offset(*curr + 8), 2);
514+
ptr::copy_nonoverlapping(lut_ptr.offset(d6 as isize), buf_ptr.offset(*curr + 10), 2);
515+
ptr::copy_nonoverlapping(lut_ptr.offset(d7 as isize), buf_ptr.offset(*curr + 12), 2);
516+
ptr::copy_nonoverlapping(lut_ptr.offset(d8 as isize), buf_ptr.offset(*curr + 14), 2);
517+
}
518+
if n >= 1e8 as u64 {
519+
let to_parse = n % 1e8 as u64;
520+
n /= 1e8 as u64;
521+
522+
// Some of these are nops but it looks more elegant this way.
523+
let d1 = ((to_parse / 1e6 as u64) % 100) << 1;
524+
let d2 = ((to_parse / 1e4 as u64) % 100) << 1;
525+
let d3 = ((to_parse / 1e2 as u64) % 100) << 1;
526+
let d4 = ((to_parse / 1e0 as u64) % 100) << 1;
527+
*curr -= 8;
528+
529+
ptr::copy_nonoverlapping(lut_ptr.offset(d1 as isize), buf_ptr.offset(*curr + 0), 2);
530+
ptr::copy_nonoverlapping(lut_ptr.offset(d2 as isize), buf_ptr.offset(*curr + 2), 2);
531+
ptr::copy_nonoverlapping(lut_ptr.offset(d3 as isize), buf_ptr.offset(*curr + 4), 2);
532+
ptr::copy_nonoverlapping(lut_ptr.offset(d4 as isize), buf_ptr.offset(*curr + 6), 2);
533+
}
534+
// `n` < 1e8 < (1 << 32)
535+
let mut n = n as u32;
536+
if n >= 1e4 as u32 {
537+
let to_parse = n % 1e4 as u32;
538+
n /= 1e4 as u32;
539+
540+
let d1 = (to_parse / 100) << 1;
541+
let d2 = (to_parse % 100) << 1;
542+
*curr -= 4;
543+
544+
ptr::copy_nonoverlapping(lut_ptr.offset(d1 as isize), buf_ptr.offset(*curr + 0), 2);
545+
ptr::copy_nonoverlapping(lut_ptr.offset(d2 as isize), buf_ptr.offset(*curr + 2), 2);
546+
}
547+
548+
// `n` < 1e4 < (1 << 16)
549+
let mut n = n as u16;
550+
if n >= 100 {
551+
let d1 = (n % 100) << 1;
552+
n /= 100;
553+
*curr -= 2;
554+
ptr::copy_nonoverlapping(lut_ptr.offset(d1 as isize), buf_ptr.offset(*curr), 2);
555+
}
556+
557+
// decode last 1 or 2 chars
558+
if n < 10 {
559+
*curr -= 1;
560+
*buf_ptr.offset(*curr) = (n as u8) + b'0';
561+
} else {
562+
let d1 = n << 1;
563+
*curr -= 2;
564+
ptr::copy_nonoverlapping(lut_ptr.offset(d1 as isize), buf_ptr.offset(*curr), 2);
565+
}
566+
}
567+
}
568+
569+
#[stable(feature = "rust1", since = "1.0.0")]
570+
impl fmt::Display for u128 {
571+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
572+
fmt_u128(*self, true, f)
573+
}
574+
}
575+
576+
#[stable(feature = "rust1", since = "1.0.0")]
577+
impl fmt::Display for i128 {
578+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
579+
let is_nonnegative = *self >= 0;
580+
let n = if is_nonnegative {
581+
self.to_u128()
582+
} else {
583+
// convert the negative num to positive by summing 1 to it's 2 complement
584+
(!self.to_u128()).wrapping_add(1)
585+
};
586+
fmt_u128(n, is_nonnegative, f)
587+
}
588+
}
589+
590+
/// Specialized optimization for u128. Instead of taking two items at a time, it splits
591+
/// into at most 2 u64s, and then chunks by 10e16, 10e8, 10e4, 10e2, and then 10e1.
592+
/// It also has to handle 1 last item, as 10^40 > 2^128 > 10^39, whereas
593+
/// 10^20 > 2^64 > 10^19.
594+
fn fmt_u128(n: u128, is_nonnegative: bool, f: &mut fmt::Formatter<'_>) -> fmt::Result {
595+
// 2^128 is about 3*10^38, so 39 gives an extra byte of space
596+
let mut buf = [MaybeUninit::<u8>::uninit(); 39];
597+
let mut curr = buf.len() as isize;
598+
let buf_ptr = MaybeUninit::slice_as_mut_ptr(&mut buf);
599+
600+
let (n, rem) = udiv_1e19(n);
601+
parse_u64_into(rem, &mut buf, &mut curr);
602+
603+
if n != 0 {
604+
// 0 pad up to point
605+
let target = (buf.len() - 19) as isize;
606+
// SAFETY: Guaranteed that we wrote at most 19 bytes, and there must be space
607+
// remaining since it has length 39
608+
unsafe {
609+
ptr::write_bytes(buf_ptr.offset(target), b'0', (curr - target) as usize);
610+
}
611+
curr = target;
612+
613+
let (n, rem) = udiv_1e19(n);
614+
parse_u64_into(rem, &mut buf, &mut curr);
615+
// Should this following branch be annotated with unlikely?
616+
if n != 0 {
617+
let target = (buf.len() - 38) as isize;
618+
// SAFETY: At this point we wrote at most 38 bytes, pad up to that point,
619+
// There can only be at most 1 digit remaining.
620+
unsafe {
621+
ptr::write_bytes(buf_ptr.offset(target), b'0', (curr - target) as usize);
622+
curr = target - 1;
623+
*buf_ptr.offset(curr) = (n as u8) + b'0';
624+
}
625+
}
626+
}
627+
628+
// SAFETY: `curr` > 0 (since we made `buf` large enough), and all the chars are valid
629+
// UTF-8 since `DEC_DIGITS_LUT` is
630+
let buf_slice = unsafe {
631+
str::from_utf8_unchecked(slice::from_raw_parts(
632+
buf_ptr.offset(curr),
633+
buf.len() - curr as usize,
634+
))
635+
};
636+
f.pad_integral(is_nonnegative, "", buf_slice)
637+
}
638+
639+
/// Partition of `n` into n > 1e19 and rem <= 1e19
640+
fn udiv_1e19(n: u128) -> (u128, u64) {
641+
const DIV: u64 = 1e19 as u64;
642+
let high = (n >> 64) as u64;
643+
if high == 0 {
644+
let low = n as u64;
645+
return ((low / DIV) as u128, low % DIV);
646+
}
647+
let sr = 65 - high.leading_zeros();
648+
let mut q = n << (128 - sr);
649+
let mut r = n >> sr;
650+
let mut carry = 0;
651+
652+
for _ in 0..sr {
653+
r = (r << 1) | (q >> 127);
654+
q = (q << 1) | carry as u128;
655+
656+
let s = (DIV as u128).wrapping_sub(r).wrapping_sub(1) as i128 >> 127;
657+
carry = (s & 1) as u64;
658+
r -= (DIV as u128) & s as u128;
659+
}
660+
((q << 1) | carry as u128, r as u64)
661+
}

0 commit comments

Comments
 (0)
Please sign in to comment.