Skip to content

Commit c1cdd77

Browse files
committed
Create Wtf8::code_point_indices iterator
1 parent d2b52c5 commit c1cdd77

File tree

1 file changed

+66
-5
lines changed

1 file changed

+66
-5
lines changed

library/std/src/sys_common/wtf8.rs

+66-5
Original file line numberDiff line numberDiff line change
@@ -665,8 +665,14 @@ impl Wtf8 {
665665

666666
/// Returns an iterator for the string’s code points.
667667
#[inline]
668-
pub fn code_points(&self) -> Wtf8CodePoints<'_> {
669-
Wtf8CodePoints { bytes: self.bytes.iter() }
668+
pub fn code_points(&self) -> CodePoints<'_> {
669+
CodePoints { bytes: self.bytes.iter() }
670+
}
671+
672+
/// Returns an iterator for the string’s code points.
673+
#[inline]
674+
pub fn code_point_indices(&self) -> CodePointIndices<'_> {
675+
CodePointIndices { front_offset: 0, iter: self.code_points() }
670676
}
671677

672678
/// Access raw bytes of WTF-8 data
@@ -984,11 +990,11 @@ pub fn slice_error_fail(s: &Wtf8, begin: usize, end: usize) -> ! {
984990
///
985991
/// Created with the method `.code_points()`.
986992
#[derive(Clone)]
987-
pub struct Wtf8CodePoints<'a> {
993+
pub struct CodePoints<'a> {
988994
bytes: slice::Iter<'a, u8>,
989995
}
990996

991-
impl Iterator for Wtf8CodePoints<'_> {
997+
impl Iterator for CodePoints<'_> {
992998
type Item = CodePoint;
993999

9941000
#[inline]
@@ -1004,11 +1010,66 @@ impl Iterator for Wtf8CodePoints<'_> {
10041010
}
10051011
}
10061012

1013+
impl<'a> CodePoints<'a> {
1014+
/// Views the underlying data as a subslice of the original data.
1015+
#[inline]
1016+
pub fn as_slice(&self) -> &Wtf8 {
1017+
// SAFETY: `CodePoints` is only made from a `Wtf8Str`, which guarantees
1018+
// the iter is valid WTF-8.
1019+
unsafe { Wtf8::from_bytes_unchecked(self.bytes.as_slice()) }
1020+
}
1021+
}
1022+
1023+
/// An iterator over the code points of a WTF-8 string, and their positions.
1024+
///
1025+
/// Created with the method `.code_point_indices()`.
1026+
#[derive(Clone)]
1027+
pub struct CodePointIndices<'a> {
1028+
front_offset: usize,
1029+
iter: CodePoints<'a>,
1030+
}
1031+
1032+
impl Iterator for CodePointIndices<'_> {
1033+
type Item = (usize, CodePoint);
1034+
1035+
#[inline]
1036+
fn next(&mut self) -> Option<Self::Item> {
1037+
let pre_len = self.iter.bytes.len();
1038+
match self.iter.next() {
1039+
None => None,
1040+
Some(code_point) => {
1041+
let index = self.front_offset;
1042+
let len = self.iter.bytes.len();
1043+
self.front_offset += pre_len - len;
1044+
Some((index, code_point))
1045+
}
1046+
}
1047+
}
1048+
1049+
#[inline]
1050+
fn count(self) -> usize {
1051+
self.iter.count()
1052+
}
1053+
1054+
#[inline]
1055+
fn size_hint(&self) -> (usize, Option<usize>) {
1056+
self.iter.size_hint()
1057+
}
1058+
}
1059+
1060+
impl<'a> CodePointIndices<'a> {
1061+
/// Views the underlying data as a subslice of the original data.
1062+
#[inline]
1063+
pub fn as_slice(&self) -> &Wtf8 {
1064+
self.iter.as_slice()
1065+
}
1066+
}
1067+
10071068
/// Generates a wide character sequence for potentially ill-formed UTF-16.
10081069
#[stable(feature = "rust1", since = "1.0.0")]
10091070
#[derive(Clone)]
10101071
pub struct EncodeWide<'a> {
1011-
code_points: Wtf8CodePoints<'a>,
1072+
code_points: CodePoints<'a>,
10121073
extra: u16,
10131074
}
10141075

0 commit comments

Comments
 (0)