From dc18aaec42b7d0573aa5f0ccda6c2c74a6e6e0a3 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Mon, 16 Jun 2025 14:10:29 -0700 Subject: [PATCH 1/5] library: Add a very simple microbenchmark for splitn --- library/coretests/benches/pattern.rs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/library/coretests/benches/pattern.rs b/library/coretests/benches/pattern.rs index b0f8b39c22e16..259519c6633f1 100644 --- a/library/coretests/benches/pattern.rs +++ b/library/coretests/benches/pattern.rs @@ -39,3 +39,27 @@ fn ends_with_str(b: &mut Bencher) { } }) } + +#[bench] +fn splitn_on_http_response(b: &mut Bencher) { + fn parse_http(s: &str) -> Result<(&str, &str, &str), &str> { + let mut parts = s.splitn(3, ' '); + let version = parts.next().ok_or("No version")?; + let code = parts.next().ok_or("No status code")?; + let description = parts.next().ok_or("No description")?; + Ok((version, code, description)) + } + + let response = String::from("HTTP/1.1 418 I'm a teapot\r\n"); + let mut res: (&str, &str, &str) = ("", "", ""); + b.iter(|| { + for _ in 0..1024 { + res = black_box(match parse_http(black_box(&response)) { + Ok(data) => data, + Err(_) => { + continue; + } + }) + } + }) +} From f02b53865cae6c6d1914116f760b4168273df15c Mon Sep 17 00:00:00 2001 From: bendn Date: Sat, 31 May 2025 16:47:03 +0700 Subject: [PATCH 2/5] faster charsearcher --- library/core/src/str/pattern.rs | 34 ++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/library/core/src/str/pattern.rs b/library/core/src/str/pattern.rs index bcbbb11c83b2f..a9e5f55e90ebc 100644 --- a/library/core/src/str/pattern.rs +++ b/library/core/src/str/pattern.rs @@ -429,8 +429,24 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> { SearchStep::Done } } - #[inline] + #[inline(always)] fn next_match(&mut self) -> Option<(usize, usize)> { + if self.utf8_size == 1 { + let find = |haystack: &[u8]| { + if haystack.len() < 32 { + haystack.iter().position(|&x| x == self.utf8_encoded[0]) + } else { + memchr::memchr(self.utf8_encoded[0], haystack) + } + }; + return match find(self.haystack.as_bytes().get(self.finger..self.finger_back)?) { + Some(x) => { + self.finger += x + 1; + Some((self.finger - 1, self.finger)) + } + None => None, + }; + } loop { // get the haystack after the last character found let bytes = self.haystack.as_bytes().get(self.finger..self.finger_back)?; @@ -498,6 +514,22 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> { } #[inline] fn next_match_back(&mut self) -> Option<(usize, usize)> { + if self.utf8_size == 1 { + let find = |haystack: &[u8]| { + if haystack.len() < 32 { + haystack.iter().rposition(|&x| x == self.utf8_encoded[0]) + } else { + memchr::memrchr(self.utf8_encoded[0], haystack) + } + }; + return match find(self.haystack.as_bytes().get(self.finger..self.finger_back)?) { + Some(x) => { + self.finger_back = self.finger + x; + Some((self.finger_back, self.finger_back + 1)) + } + None => None, + }; + } let haystack = self.haystack.as_bytes(); loop { // get the haystack up to but not including the last character searched From 667703fcbc27808c229c4476bfa8e6ef378d7375 Mon Sep 17 00:00:00 2001 From: bendn Date: Wed, 4 Jun 2025 00:29:56 +0700 Subject: [PATCH 3/5] dont ever position() --- library/core/src/str/pattern.rs | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/library/core/src/str/pattern.rs b/library/core/src/str/pattern.rs index a9e5f55e90ebc..9bea4ca523b2a 100644 --- a/library/core/src/str/pattern.rs +++ b/library/core/src/str/pattern.rs @@ -432,13 +432,7 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> { #[inline(always)] fn next_match(&mut self) -> Option<(usize, usize)> { if self.utf8_size == 1 { - let find = |haystack: &[u8]| { - if haystack.len() < 32 { - haystack.iter().position(|&x| x == self.utf8_encoded[0]) - } else { - memchr::memchr(self.utf8_encoded[0], haystack) - } - }; + let find = |haystack: &[u8]| memchr::memchr(self.utf8_encoded[0], haystack); return match find(self.haystack.as_bytes().get(self.finger..self.finger_back)?) { Some(x) => { self.finger += x + 1; @@ -515,13 +509,7 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> { #[inline] fn next_match_back(&mut self) -> Option<(usize, usize)> { if self.utf8_size == 1 { - let find = |haystack: &[u8]| { - if haystack.len() < 32 { - haystack.iter().rposition(|&x| x == self.utf8_encoded[0]) - } else { - memchr::memrchr(self.utf8_encoded[0], haystack) - } - }; + let find = |haystack: &[u8]| memchr::memrchr(self.utf8_encoded[0], haystack); return match find(self.haystack.as_bytes().get(self.finger..self.finger_back)?) { Some(x) => { self.finger_back = self.finger + x; From b2d161f8017bd3026f58accd97630812b38af892 Mon Sep 17 00:00:00 2001 From: bendn Date: Wed, 4 Jun 2025 13:37:28 +0700 Subject: [PATCH 4/5] add more inline --- library/core/src/slice/memchr.rs | 1 + library/core/src/str/iter.rs | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/library/core/src/slice/memchr.rs b/library/core/src/slice/memchr.rs index 1e1053583a617..edd67f58b7004 100644 --- a/library/core/src/slice/memchr.rs +++ b/library/core/src/slice/memchr.rs @@ -48,6 +48,7 @@ const fn memchr_naive(x: u8, text: &[u8]) -> Option { } #[rustc_allow_const_fn_unstable(const_eval_select)] // fallback impl has same behavior +#[inline] const fn memchr_aligned(x: u8, text: &[u8]) -> Option { // The runtime version behaves the same as the compiletime version, it's // just more optimized. diff --git a/library/core/src/str/iter.rs b/library/core/src/str/iter.rs index 425c4eaee28ee..49c581f352eb3 100644 --- a/library/core/src/str/iter.rs +++ b/library/core/src/str/iter.rs @@ -656,7 +656,7 @@ impl<'a, P: Pattern> SplitInternal<'a, P> { None } - #[inline] + #[inline(always)] fn next(&mut self) -> Option<&'a str> { if self.finished { return None; From 7f0c41905d65c96140170efaf71bc8e288ae56a5 Mon Sep 17 00:00:00 2001 From: bendn Date: Thu, 5 Jun 2025 02:48:24 +0700 Subject: [PATCH 5/5] Revert "add more inline" This reverts commit 99e141c79b0defd9cae8e8fc44a3650609a28083. --- library/core/src/slice/memchr.rs | 1 - library/core/src/str/iter.rs | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/library/core/src/slice/memchr.rs b/library/core/src/slice/memchr.rs index edd67f58b7004..1e1053583a617 100644 --- a/library/core/src/slice/memchr.rs +++ b/library/core/src/slice/memchr.rs @@ -48,7 +48,6 @@ const fn memchr_naive(x: u8, text: &[u8]) -> Option { } #[rustc_allow_const_fn_unstable(const_eval_select)] // fallback impl has same behavior -#[inline] const fn memchr_aligned(x: u8, text: &[u8]) -> Option { // The runtime version behaves the same as the compiletime version, it's // just more optimized. diff --git a/library/core/src/str/iter.rs b/library/core/src/str/iter.rs index 49c581f352eb3..425c4eaee28ee 100644 --- a/library/core/src/str/iter.rs +++ b/library/core/src/str/iter.rs @@ -656,7 +656,7 @@ impl<'a, P: Pattern> SplitInternal<'a, P> { None } - #[inline(always)] + #[inline] fn next(&mut self) -> Option<&'a str> { if self.finished { return None;