Skip to content

Commit c4c4a3d

Browse files
committed
add char fast path
1 parent 224d35a commit c4c4a3d

File tree

3 files changed

+32
-38
lines changed

3 files changed

+32
-38
lines changed

library/alloc/src/str.rs

Lines changed: 12 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use core::borrow::{Borrow, BorrowMut};
1111
use core::iter::FusedIterator;
1212
#[stable(feature = "rust1", since = "1.0.0")]
1313
pub use core::str::pattern;
14-
use core::str::pattern::{DoubleEndedSearcher, Pattern, ReverseSearcher, Searcher};
14+
use core::str::pattern::{DoubleEndedSearcher, Pattern, ReverseSearcher, Searcher, Utf8Pattern};
1515
#[stable(feature = "encode_utf16", since = "1.8.0")]
1616
pub use core::str::EncodeUtf16;
1717
#[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
@@ -268,35 +268,17 @@ impl str {
268268
#[stable(feature = "rust1", since = "1.0.0")]
269269
#[inline]
270270
pub fn replace<P: Pattern>(&self, from: P, to: &str) -> String {
271-
let mut capacity = 0;
272-
273-
#[inline]
274-
fn get_minimum_result_capacity(input_len: usize, from_len: usize, to_len: usize) -> usize {
275-
// check if output is going to be at least as long as input.
276-
if from_len <= to_len {
277-
return input_len;
271+
// Fast path for ASCII to ASCII case.
272+
if let Some(from_byte) = match from.as_utf8_pattern() {
273+
Some(Utf8Pattern::Slice([from_byte])) => Some(*from_byte),
274+
Some(Utf8Pattern::Char(c)) => c.as_ascii().map(|ascii_char| ascii_char.to_u8()),
275+
_ => None,
276+
} {
277+
if let [to_byte] = to.as_bytes() {
278+
return unsafe { replace_ascii(self.as_bytes(), from_byte, *to_byte) };
278279
}
279-
// lower bound where we have the maximum number of matches:
280-
// max_n_matches = len(input) / len(from)
281-
// capacity = max_n_matches * to_utf8_len
282-
let max_n_matches = input_len.checked_div(from_len).unwrap_or(0);
283-
return max_n_matches * to_len;
284280
}
285-
286-
// Path for patterns that can be represented as utf8 bytes (str, char etc.).
287-
if let Some(from_as_utf8) = from.as_utf8_bytes() {
288-
let from_utf8_len = from_as_utf8.len();
289-
let to_utf8_len = to.as_bytes().len();
290-
// Fast path for ascii
291-
if from_utf8_len == 1 && to_utf8_len == 1 {
292-
return unsafe {
293-
replace_ascii(&self.as_bytes(), from_as_utf8[0], to.as_bytes()[0])
294-
};
295-
}
296-
capacity = get_minimum_result_capacity(self.bytes().len(), from_utf8_len, to_utf8_len);
297-
}
298-
299-
let mut result = String::with_capacity(capacity);
281+
let mut result = String::new();
300282
let mut last_end = 0;
301283
for (start, part) in self.match_indices(from) {
302284
result.push_str(unsafe { self.get_unchecked(last_end..start) });
@@ -692,6 +674,8 @@ fn convert_while_ascii(b: &[u8], convert: fn(&u8) -> u8) -> Vec<u8> {
692674
#[inline]
693675
#[cfg(not(test))]
694676
#[cfg(not(no_global_oom_handling))]
677+
/// Faster implementation of string replacement for ASCII to ASCII cases.
678+
/// Should produce fast vectorized code.
695679
unsafe fn replace_ascii(utf8_bytes: &[u8], from: u8, to: u8) -> String {
696680
let result: Vec<u8> = utf8_bytes.iter().map(|b| if *b == from { to } else { *b }).collect();
697681
// SAFETY: We replaced ascii with ascii on valid utf8 strings.

library/alloc/src/string.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ use core::ops::AddAssign;
5353
#[cfg(not(no_global_oom_handling))]
5454
use core::ops::Bound::{Excluded, Included, Unbounded};
5555
use core::ops::{self, Range, RangeBounds};
56-
use core::str::pattern::Pattern;
56+
use core::str::pattern::{Pattern, Utf8Pattern};
5757
use core::{fmt, hash, ptr, slice};
5858

5959
#[cfg(not(no_global_oom_handling))]
@@ -2321,8 +2321,8 @@ impl<'b> Pattern for &'b String {
23212321
}
23222322

23232323
#[inline]
2324-
fn as_utf8_bytes(&self) -> Option<&[u8]> {
2325-
Some(self.as_bytes())
2324+
fn as_utf8_pattern(&self) -> Option<Utf8Pattern<'_>> {
2325+
Some(Utf8Pattern::Slice(self.as_bytes()))
23262326
}
23272327
}
23282328

library/core/src/str/pattern.rs

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,17 @@ pub trait Pattern: Sized {
162162
}
163163

164164
/// Returns the pattern as utf-8 bytes if possible.
165-
fn as_utf8_bytes(&self) -> Option<&[u8]>;
165+
fn as_utf8_pattern(&self) -> Option<Utf8Pattern<'_>>;
166+
}
167+
/// Result of calling [`Pattern::as_utf8_pattern()`].
168+
/// Can be used for inspecting the contents of a [`Pattern`] in cases
169+
/// where the underlying representation can be represented as UTF-8.
170+
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
171+
pub enum Utf8Pattern<'a> {
172+
/// Type returned by String and str types.
173+
Slice(&'a [u8]),
174+
/// Type returned by char types.
175+
Char(char),
166176
}
167177

168178
// Searcher
@@ -604,8 +614,8 @@ impl Pattern for char {
604614
}
605615

606616
#[inline]
607-
fn as_utf8_bytes(&self) -> Option<&[u8]> {
608-
None
617+
fn as_utf8_pattern(&self) -> Option<Utf8Pattern<'_>> {
618+
Some(Utf8Pattern::Char(*self))
609619
}
610620
}
611621

@@ -667,7 +677,7 @@ impl<C: MultiCharEq> Pattern for MultiCharEqPattern<C> {
667677
}
668678

669679
#[inline]
670-
fn as_utf8_bytes(&self) -> Option<&[u8]> {
680+
fn as_utf8_pattern(&self) -> Option<Utf8Pattern<'_>> {
671681
None
672682
}
673683
}
@@ -762,7 +772,7 @@ macro_rules! pattern_methods {
762772
}
763773

764774
#[inline]
765-
fn as_utf8_bytes(&self) -> Option<&[u8]> {
775+
fn as_utf8_pattern(&self) -> Option<Utf8Pattern<'_>> {
766776
None
767777
}
768778
};
@@ -1042,8 +1052,8 @@ impl<'b> Pattern for &'b str {
10421052
}
10431053

10441054
#[inline]
1045-
fn as_utf8_bytes(&self) -> Option<&[u8]> {
1046-
Some(self.as_bytes())
1055+
fn as_utf8_pattern(&self) -> Option<Utf8Pattern<'_>> {
1056+
Some(Utf8Pattern::Slice(self.as_bytes()))
10471057
}
10481058
}
10491059

0 commit comments

Comments
 (0)