diff --git a/release-notes/VERSION b/release-notes/VERSION index d1a9893..716794e 100644 --- a/release-notes/VERSION +++ b/release-notes/VERSION @@ -12,6 +12,8 @@ Project: aalto-xml #90: Update stax2-api dep to 4.2.2 (from 4.2) #91: Multi-byte characters are split in `writeComment()` if first byte sits right at the end of the buffer +#93: Multi-byte characters are split in `writePI()` if first byte sits + right at the end of the buffer 1.3.2 (25-Apr-2022) diff --git a/src/main/java/com/fasterxml/aalto/out/ByteXmlWriter.java b/src/main/java/com/fasterxml/aalto/out/ByteXmlWriter.java index 5334e28..d265d89 100644 --- a/src/main/java/com/fasterxml/aalto/out/ByteXmlWriter.java +++ b/src/main/java/com/fasterxml/aalto/out/ByteXmlWriter.java @@ -1193,7 +1193,7 @@ public int writeComment(String data) throws IOException, XMLStreamException /** * Note: the only way to fix comment contents is to inject a space - * to split up consequtive '--' (or '-' that ends a comment). + * to split up consecutive '--' (or '-' that ends a comment). */ protected int writeCommentContents(char[] cbuf, int offset, int len) throws IOException, XMLStreamException @@ -1293,6 +1293,14 @@ public void writeDTD(WName rootName, String systemId, String publicId, protected int writePIData(char[] cbuf, int offset, int len) throws IOException, XMLStreamException { + if (_surrogate != 0) { + outputSurrogates(_surrogate, cbuf[offset]); + // reset the temporary surrogate storage + _surrogate = 0; + ++offset; + --len; + } + // Unlike with writeCharacters() and fastWriteName(), let's not // worry about split buffers here: this is unlikely to become // performance bottleneck. This allows keeping it simple; and diff --git a/src/test/java/com/fasterxml/aalto/sax/TestSaxWriter.java b/src/test/java/com/fasterxml/aalto/sax/TestSaxWriter.java index 31ccc10..100e9d9 100644 --- a/src/test/java/com/fasterxml/aalto/sax/TestSaxWriter.java +++ b/src/test/java/com/fasterxml/aalto/sax/TestSaxWriter.java @@ -7,6 +7,19 @@ public class TestSaxWriter extends base.BaseTestCase { + private final String TEXT_WITH_SURROGATE; + { + StringBuilder testText = new StringBuilder(1025); + for (int i = 0; i < 511; i++) { + testText.append('x'); + } + testText.append("\uD835\uDFCE"); + for (int i = 0; i < 512; i++) { + testText.append('x'); + } + TEXT_WITH_SURROGATE = testText.toString(); + } + public void testSplitSurrogateWithAttributeValue() throws Exception { // This test aims to produce the @@ -16,19 +29,11 @@ public void testSplitSurrogateWithAttributeValue() throws Exception // to also fill the next two internal reading buffers. Then, the code would try to fuse the first byte // of the original multi-byte character with the first character in the third buffer because // ByteXmlWriter#_surrogate was not set back to 0 after writing the original multi-byte character. - StringBuilder testText = new StringBuilder(); - for (int i = 0; i < 511; i++) { - testText.append('x'); - } - testText.append("\uD835\uDFCE"); - for (int i = 0; i < 512; i++) { - testText.append('x'); - } WriterConfig writerConfig = new WriterConfig(); ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); Utf8XmlWriter writer = new Utf8XmlWriter(writerConfig, byteArrayOutputStream); writer.writeStartTagStart(writer.constructName("testelement")); - writer.writeAttribute(writer.constructName("testattr"), testText.toString()); + writer.writeAttribute(writer.constructName("testattr"), TEXT_WITH_SURROGATE); writer.writeStartTagEnd(); writer.writeEndTag(writer.constructName("testelement")); writer.close(false); @@ -61,43 +66,37 @@ public void testSplitSurrogateWithAttributeValue2() throws Exception public void testSplitSurrogateWithCData() throws Exception { // Modification of "testSplitSurrogateWithAttributeValue()" but for CDATA - StringBuilder testText = new StringBuilder(); - for (int i = 0; i < 511; i++) { - testText.append('x'); - } - testText.append("\uD835\uDFCE"); - for (int i = 0; i < 512; i++) { - testText.append('x'); - } - WriterConfig writerConfig = new WriterConfig(); ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); Utf8XmlWriter writer = new Utf8XmlWriter(writerConfig, byteArrayOutputStream); writer.writeStartTagStart(writer.constructName("testelement")); - writer.writeCData(testText.toString()); + writer.writeCData(TEXT_WITH_SURROGATE); writer.writeStartTagEnd(); writer.writeEndTag(writer.constructName("testelement")); writer.close(false); } - public void testSplitSurrogateWithComment() throws Exception { // Modification of "testSplitSurrogateWithAttributeValue()" but for Comment - StringBuilder testText = new StringBuilder(); - for (int i = 0; i < 511; i++) { - testText.append('x'); - } - testText.append("\uD835\uDFCE"); - for (int i = 0; i < 512; i++) { - testText.append('x'); - } + WriterConfig writerConfig = new WriterConfig(); + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + Utf8XmlWriter writer = new Utf8XmlWriter(writerConfig, byteArrayOutputStream); + writer.writeStartTagStart(writer.constructName("testelement")); + writer.writeComment(TEXT_WITH_SURROGATE); + writer.writeStartTagEnd(); + writer.writeEndTag(writer.constructName("testelement")); + writer.close(false); + } + public void testSplitSurrogateWithPI() throws Exception + { + // Modification of "testSplitSurrogateWithAttributeValue()" but for Processing instructions WriterConfig writerConfig = new WriterConfig(); ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); Utf8XmlWriter writer = new Utf8XmlWriter(writerConfig, byteArrayOutputStream); writer.writeStartTagStart(writer.constructName("testelement")); - writer.writeComment(testText.toString()); + writer.writePI(writer.constructName("target"), TEXT_WITH_SURROGATE); writer.writeStartTagEnd(); writer.writeEndTag(writer.constructName("testelement")); writer.close(false);