diff --git a/src/stanzafilter.rs b/src/stanzafilter.rs index 9700a27..38b78c9 100644 --- a/src/stanzafilter.rs +++ b/src/stanzafilter.rs @@ -9,6 +9,7 @@ enum StanzaState { StanzaFirstChar, InsideTagFirstChar, InsideTag, + InsideAttribute(u8), BetweenTags, ExclamationTag(usize), InsideCDATA, @@ -71,18 +72,18 @@ impl StanzaFilter { } StanzaFirstChar => match b { b'/' => self.state = EndStream, - b'!' => bail!("illegal stanza: {}", to_str(&self.buf[..(self.cnt + 1)])), + b'!' | b'>' | b'\'' | b'"' => bail!("illegal stanza: {}", to_str(&self.buf[..(self.cnt + 1)])), b'?' => self.state = QuestionTag(self.cnt + 4), // 4 is length of b"xml " _ => self.state = InsideTag, }, InsideTagFirstChar => match b { b'/' => self.tag_cnt -= 2, b'!' => self.state = ExclamationTag(self.cnt + 7), // 7 is length of b"[CDATA[" - b'?' => bail!("illegal stanza: {}", to_str(&self.buf[..(self.cnt + 1)])), + b'?' | b'>' | b'\'' | b'"' => bail!("illegal stanza: {}", to_str(&self.buf[..(self.cnt + 1)])), _ => self.state = InsideTag, }, - InsideTag => { - if b == b'>' { + InsideTag => match b { + b'>' => { if self.buf[self.cnt - 1] == b'/' { // state can't be InsideTag unless we are on at least the second character, so can't go out of range // self-closing tag @@ -97,6 +98,13 @@ impl StanzaFilter { } self.state = BetweenTags; } + b'\'' | b'"' => self.state = InsideAttribute(b), + _ => {} + }, + InsideAttribute(end) => { + if b == end { + self.state = InsideTag; + } } QuestionTag(idx) => { if idx == self.cnt { @@ -206,6 +214,7 @@ mod tests { async fn process_next_byte() -> std::result::Result<(), anyhow::Error> { let mut filter = StanzaFilter::new(262_144); + //todo: This is going to be fun. assert_eq!( StanzaReader(Cursor::new( br###" @@ -213,6 +222,11 @@ mod tests { inside b before cinside c blais]]>bloo + ]]> + ]]> + + This is going to be fun. + This is going to be fun. ]]]]> "###, )) @@ -225,6 +239,11 @@ mod tests { "inside b before cinside c", "", "blais]]>bloo", + " ]]>", + " ]]>", + "", + "This is going to be fun.", + "This is going to be fun.", "", "]]]]>", "",