@@ -665,8 +665,14 @@ impl Wtf8 {
665
665
666
666
/// Returns an iterator for the string’s code points.
667
667
#[ inline]
668
- pub fn code_points ( & self ) -> Wtf8CodePoints < ' _ > {
669
- Wtf8CodePoints { bytes : self . bytes . iter ( ) }
668
+ pub fn code_points ( & self ) -> CodePoints < ' _ > {
669
+ CodePoints { bytes : self . bytes . iter ( ) }
670
+ }
671
+
672
+ /// Returns an iterator for the string’s code points.
673
+ #[ inline]
674
+ pub fn code_point_indices ( & self ) -> CodePointIndices < ' _ > {
675
+ CodePointIndices { front_offset : 0 , iter : self . code_points ( ) }
670
676
}
671
677
672
678
/// Access raw bytes of WTF-8 data
@@ -984,11 +990,11 @@ pub fn slice_error_fail(s: &Wtf8, begin: usize, end: usize) -> ! {
984
990
///
985
991
/// Created with the method `.code_points()`.
986
992
#[ derive( Clone ) ]
987
- pub struct Wtf8CodePoints < ' a > {
993
+ pub struct CodePoints < ' a > {
988
994
bytes : slice:: Iter < ' a , u8 > ,
989
995
}
990
996
991
- impl Iterator for Wtf8CodePoints < ' _ > {
997
+ impl Iterator for CodePoints < ' _ > {
992
998
type Item = CodePoint ;
993
999
994
1000
#[ inline]
@@ -1004,11 +1010,66 @@ impl Iterator for Wtf8CodePoints<'_> {
1004
1010
}
1005
1011
}
1006
1012
1013
+ impl < ' a > CodePoints < ' a > {
1014
+ /// Views the underlying data as a subslice of the original data.
1015
+ #[ inline]
1016
+ pub fn as_slice ( & self ) -> & Wtf8 {
1017
+ // SAFETY: `CodePoints` is only made from a `Wtf8Str`, which guarantees
1018
+ // the iter is valid WTF-8.
1019
+ unsafe { Wtf8 :: from_bytes_unchecked ( self . bytes . as_slice ( ) ) }
1020
+ }
1021
+ }
1022
+
1023
+ /// An iterator over the code points of a WTF-8 string, and their positions.
1024
+ ///
1025
+ /// Created with the method `.code_point_indices()`.
1026
+ #[ derive( Clone ) ]
1027
+ pub struct CodePointIndices < ' a > {
1028
+ front_offset : usize ,
1029
+ iter : CodePoints < ' a > ,
1030
+ }
1031
+
1032
+ impl Iterator for CodePointIndices < ' _ > {
1033
+ type Item = ( usize , CodePoint ) ;
1034
+
1035
+ #[ inline]
1036
+ fn next ( & mut self ) -> Option < Self :: Item > {
1037
+ let pre_len = self . iter . bytes . len ( ) ;
1038
+ match self . iter . next ( ) {
1039
+ None => None ,
1040
+ Some ( code_point) => {
1041
+ let index = self . front_offset ;
1042
+ let len = self . iter . bytes . len ( ) ;
1043
+ self . front_offset += pre_len - len;
1044
+ Some ( ( index, code_point) )
1045
+ }
1046
+ }
1047
+ }
1048
+
1049
+ #[ inline]
1050
+ fn count ( self ) -> usize {
1051
+ self . iter . count ( )
1052
+ }
1053
+
1054
+ #[ inline]
1055
+ fn size_hint ( & self ) -> ( usize , Option < usize > ) {
1056
+ self . iter . size_hint ( )
1057
+ }
1058
+ }
1059
+
1060
+ impl < ' a > CodePointIndices < ' a > {
1061
+ /// Views the underlying data as a subslice of the original data.
1062
+ #[ inline]
1063
+ pub fn as_slice ( & self ) -> & Wtf8 {
1064
+ self . iter . as_slice ( )
1065
+ }
1066
+ }
1067
+
1007
1068
/// Generates a wide character sequence for potentially ill-formed UTF-16.
1008
1069
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
1009
1070
#[ derive( Clone ) ]
1010
1071
pub struct EncodeWide < ' a > {
1011
- code_points : Wtf8CodePoints < ' a > ,
1072
+ code_points : CodePoints < ' a > ,
1012
1073
extra : u16 ,
1013
1074
}
1014
1075
0 commit comments