Skip to content

Commit

Permalink
Fix #93: handle split surrogate for PI writes too
Browse files Browse the repository at this point in the history
  • Loading branch information
cowtowncoder committed Jun 7, 2024
1 parent 45eaad6 commit cfecd0f
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 30 deletions.
2 changes: 2 additions & 0 deletions release-notes/VERSION
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ Project: aalto-xml
#90: Update stax2-api dep to 4.2.2 (from 4.2)
#91: Multi-byte characters are split in `writeComment()` if first byte sits
right at the end of the buffer
#93: Multi-byte characters are split in `writePI()` if first byte sits
right at the end of the buffer

1.3.2 (25-Apr-2022)

Expand Down
10 changes: 9 additions & 1 deletion src/main/java/com/fasterxml/aalto/out/ByteXmlWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -1193,7 +1193,7 @@ public int writeComment(String data) throws IOException, XMLStreamException

/**
* Note: the only way to fix comment contents is to inject a space
* to split up consequtive '--' (or '-' that ends a comment).
* to split up consecutive '--' (or '-' that ends a comment).
*/
protected int writeCommentContents(char[] cbuf, int offset, int len)
throws IOException, XMLStreamException
Expand Down Expand Up @@ -1293,6 +1293,14 @@ public void writeDTD(WName rootName, String systemId, String publicId,
protected int writePIData(char[] cbuf, int offset, int len)
throws IOException, XMLStreamException
{
if (_surrogate != 0) {
outputSurrogates(_surrogate, cbuf[offset]);
// reset the temporary surrogate storage
_surrogate = 0;
++offset;
--len;
}

// Unlike with writeCharacters() and fastWriteName(), let's not
// worry about split buffers here: this is unlikely to become
// performance bottleneck. This allows keeping it simple; and
Expand Down
57 changes: 28 additions & 29 deletions src/test/java/com/fasterxml/aalto/sax/TestSaxWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,19 @@

public class TestSaxWriter extends base.BaseTestCase
{
private final String TEXT_WITH_SURROGATE;
{
StringBuilder testText = new StringBuilder(1025);
for (int i = 0; i < 511; i++) {
testText.append('x');
}
testText.append("\uD835\uDFCE");
for (int i = 0; i < 512; i++) {
testText.append('x');
}
TEXT_WITH_SURROGATE = testText.toString();
}

public void testSplitSurrogateWithAttributeValue() throws Exception
{
// This test aims to produce the
Expand All @@ -16,19 +29,11 @@ public void testSplitSurrogateWithAttributeValue() throws Exception
// to also fill the next two internal reading buffers. Then, the code would try to fuse the first byte
// of the original multi-byte character with the first character in the third buffer because
// ByteXmlWriter#_surrogate was not set back to 0 after writing the original multi-byte character.
StringBuilder testText = new StringBuilder();
for (int i = 0; i < 511; i++) {
testText.append('x');
}
testText.append("\uD835\uDFCE");
for (int i = 0; i < 512; i++) {
testText.append('x');
}
WriterConfig writerConfig = new WriterConfig();
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
Utf8XmlWriter writer = new Utf8XmlWriter(writerConfig, byteArrayOutputStream);
writer.writeStartTagStart(writer.constructName("testelement"));
writer.writeAttribute(writer.constructName("testattr"), testText.toString());
writer.writeAttribute(writer.constructName("testattr"), TEXT_WITH_SURROGATE);
writer.writeStartTagEnd();
writer.writeEndTag(writer.constructName("testelement"));
writer.close(false);
Expand Down Expand Up @@ -61,43 +66,37 @@ public void testSplitSurrogateWithAttributeValue2() throws Exception
public void testSplitSurrogateWithCData() throws Exception
{
// Modification of "testSplitSurrogateWithAttributeValue()" but for CDATA
StringBuilder testText = new StringBuilder();
for (int i = 0; i < 511; i++) {
testText.append('x');
}
testText.append("\uD835\uDFCE");
for (int i = 0; i < 512; i++) {
testText.append('x');
}

WriterConfig writerConfig = new WriterConfig();
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
Utf8XmlWriter writer = new Utf8XmlWriter(writerConfig, byteArrayOutputStream);
writer.writeStartTagStart(writer.constructName("testelement"));
writer.writeCData(testText.toString());
writer.writeCData(TEXT_WITH_SURROGATE);
writer.writeStartTagEnd();
writer.writeEndTag(writer.constructName("testelement"));
writer.close(false);
}


public void testSplitSurrogateWithComment() throws Exception
{
// Modification of "testSplitSurrogateWithAttributeValue()" but for Comment
StringBuilder testText = new StringBuilder();
for (int i = 0; i < 511; i++) {
testText.append('x');
}
testText.append("\uD835\uDFCE");
for (int i = 0; i < 512; i++) {
testText.append('x');
}
WriterConfig writerConfig = new WriterConfig();
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
Utf8XmlWriter writer = new Utf8XmlWriter(writerConfig, byteArrayOutputStream);
writer.writeStartTagStart(writer.constructName("testelement"));
writer.writeComment(TEXT_WITH_SURROGATE);
writer.writeStartTagEnd();
writer.writeEndTag(writer.constructName("testelement"));
writer.close(false);
}

public void testSplitSurrogateWithPI() throws Exception
{
// Modification of "testSplitSurrogateWithAttributeValue()" but for Processing instructions
WriterConfig writerConfig = new WriterConfig();
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
Utf8XmlWriter writer = new Utf8XmlWriter(writerConfig, byteArrayOutputStream);
writer.writeStartTagStart(writer.constructName("testelement"));
writer.writeComment(testText.toString());
writer.writePI(writer.constructName("target"), TEXT_WITH_SURROGATE);
writer.writeStartTagEnd();
writer.writeEndTag(writer.constructName("testelement"));
writer.close(false);
Expand Down

0 comments on commit cfecd0f

Please sign in to comment.