@@ -41,6 +41,7 @@ pub mod attributes;
4141use encoding_rs:: Encoding ;
4242use std:: borrow:: Cow ;
4343use std:: fmt:: { self , Debug , Formatter } ;
44+ use std:: iter:: FusedIterator ;
4445use std:: mem:: replace;
4546use std:: ops:: Deref ;
4647use std:: str:: from_utf8;
@@ -53,7 +54,7 @@ use crate::escape::{
5354use crate :: name:: { LocalName , QName } ;
5455#[ cfg( feature = "serialize" ) ]
5556use crate :: utils:: CowRef ;
56- use crate :: utils:: { name_len, trim_xml_end, trim_xml_start, write_cow_string} ;
57+ use crate :: utils:: { name_len, trim_xml_end, trim_xml_start, write_cow_string, Bytes } ;
5758use attributes:: { AttrError , Attribute , Attributes } ;
5859
5960/// Opening tag data (`Event::Start`), with optional attributes: `<name attr="value">`.
@@ -700,12 +701,53 @@ impl<'a> BytesCData<'a> {
700701 ///
701702 /// # Warning
702703 ///
703- /// `content` must not contain the `]]>` sequence.
704+ /// `content` must not contain the `]]>` sequence. You can use
705+ /// [`BytesCData::escaped`] to escape the content instead.
704706 #[ inline]
705707 pub fn new < C : Into < Cow < ' a , str > > > ( content : C ) -> Self {
706708 Self :: wrap ( str_cow_to_bytes ( content) , Decoder :: utf8 ( ) )
707709 }
708710
711+ /// Creates an iterator of `BytesCData` from a string.
712+ ///
713+ /// If a string contains `]]>`, it needs to be split into multiple `CDATA`
714+ /// sections, splitting the `]]` and `>` characters, because the CDATA closing
715+ /// sequence cannot be escaped. This iterator yields a `BytesCData` instance
716+ /// for each of those sections.
717+ ///
718+ /// # Examples
719+ ///
720+ /// ```
721+ /// # use quick_xml::events::BytesCData;
722+ /// # use pretty_assertions::assert_eq;
723+ /// let content = "";
724+ /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
725+ /// assert_eq!(cdata, &[BytesCData::new("")]);
726+ ///
727+ /// let content = "Certain tokens like ]]> can be difficult and <invalid>";
728+ /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
729+ /// assert_eq!(cdata, &[
730+ /// BytesCData::new("Certain tokens like ]]"),
731+ /// BytesCData::new("> can be difficult and <invalid>"),
732+ /// ]);
733+ ///
734+ /// let content = "foo]]>bar]]>baz]]>quux";
735+ /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
736+ /// assert_eq!(cdata, &[
737+ /// BytesCData::new("foo]]"),
738+ /// BytesCData::new(">bar]]"),
739+ /// BytesCData::new(">baz]]"),
740+ /// BytesCData::new(">quux"),
741+ /// ]);
742+ /// ```
743+ #[ inline]
744+ pub fn escaped ( content : & ' a str ) -> CDataIterator < ' a > {
745+ CDataIterator {
746+ unprocessed : content. as_bytes ( ) ,
747+ finished : false ,
748+ }
749+ }
750+
709751 /// Ensures that all data is owned to extend the object's lifetime if
710752 /// necessary.
711753 #[ inline]
@@ -833,6 +875,49 @@ impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> {
833875 }
834876}
835877
878+ /// Iterator over `CDATA` sections in a string.
879+ ///
880+ /// This iterator is created by the [`BytesCData::escaped`] method.
881+ #[ derive( Clone ) ]
882+ pub struct CDataIterator < ' a > {
883+ /// The unprocessed data which should be emitted as `BytesCData` events.
884+ /// At each iteration, the processed data is cut from this slice.
885+ unprocessed : & ' a [ u8 ] ,
886+ finished : bool ,
887+ }
888+
889+ impl < ' a > Debug for CDataIterator < ' a > {
890+ fn fmt ( & self , f : & mut Formatter ) -> fmt:: Result {
891+ f. debug_struct ( "CDataIterator" )
892+ . field ( "unprocessed" , & Bytes ( self . unprocessed ) )
893+ . field ( "finished" , & self . finished )
894+ . finish ( )
895+ }
896+ }
897+
898+ impl < ' a > Iterator for CDataIterator < ' a > {
899+ type Item = BytesCData < ' a > ;
900+
901+ fn next ( & mut self ) -> Option < BytesCData < ' a > > {
902+ if self . finished {
903+ return None ;
904+ }
905+
906+ for gt in memchr:: memchr_iter ( b'>' , self . unprocessed ) {
907+ if self . unprocessed [ ..gt] . ends_with ( b"]]" ) {
908+ let ( slice, rest) = self . unprocessed . split_at ( gt) ;
909+ self . unprocessed = rest;
910+ return Some ( BytesCData :: wrap ( slice, Decoder :: utf8 ( ) ) ) ;
911+ }
912+ }
913+
914+ self . finished = true ;
915+ Some ( BytesCData :: wrap ( self . unprocessed , Decoder :: utf8 ( ) ) )
916+ }
917+ }
918+
919+ impl FusedIterator for CDataIterator < ' _ > { }
920+
836921////////////////////////////////////////////////////////////////////////////////////////////////////
837922
838923/// [Processing instructions][PI] (PIs) allow documents to contain instructions for applications.
0 commit comments