Skip to content

Commit

Permalink
Fixing a bug when multi-byte characters were split (#75)
Browse files Browse the repository at this point in the history
  • Loading branch information
khituras authored Jan 14, 2022
1 parent 3d8bafd commit c000880
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 8 deletions.
20 changes: 12 additions & 8 deletions src/main/java/com/fasterxml/aalto/out/ByteXmlWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ public final void writeStartTagEnd()
flushBuffer();
}
_outputBuffer[_outputPtr++] = BYTE_GT;
}
}

@Override
public void writeStartTagEmptyEnd()
Expand Down Expand Up @@ -435,7 +435,7 @@ public final void writeEndTag(WName name)
ptr += name.appendBytes(bbuf, ptr);
bbuf[ptr++] = BYTE_GT;
_outputPtr = ptr;
}
}

/*
/**********************************************************************
Expand Down Expand Up @@ -572,6 +572,8 @@ protected final void writeAttrValue(char[] vbuf, int offset, int len)
{
if (_surrogate != 0) {
outputSurrogates(_surrogate, vbuf[offset]);
// reset the temporary surrogate storage
_surrogate = 0;
++offset;
--len;
}
Expand Down Expand Up @@ -785,7 +787,7 @@ public int writeCData(char[] cbuf, int offset, int len)
writeCDataEnd(); // will check surrogates
}
return ix;
}
}

protected int writeCDataContents(char[] cbuf, int offset, int len)
throws IOException, XMLStreamException
Expand Down Expand Up @@ -865,7 +867,7 @@ protected int writeCDataContents(char[] cbuf, int offset, int len)
}
}
return -1;
}
}

@Override
public final void writeCharacters(String text)
Expand Down Expand Up @@ -908,6 +910,8 @@ public final void writeCharacters(char[] cbuf, int offset, int len)
{
if (_surrogate != 0) {
outputSurrogates(_surrogate, cbuf[offset]);
// reset the temporary surrogate storage
_surrogate = 0;
++offset;
--len;
}
Expand Down Expand Up @@ -1088,7 +1092,7 @@ private final void writeSplitCharacters(char[] cbuf, int offset, int len)
}
_outputBuffer[_outputPtr++] = (byte)ch;
}
}
}

/*
/**********************************************************************
Expand Down Expand Up @@ -1439,7 +1443,7 @@ public void writeXmlDeclaration(String version, String encoding, String standalo
// !!! TBI: check validity
writeRaw(version, 0, version.length());
writeRaw(BYTE_APOS);

if (encoding != null && encoding.length() > 0) {
writeRaw(BYTES_XMLDECL_ENCODING);
// !!! TBI: check validity
Expand All @@ -1453,7 +1457,7 @@ public void writeXmlDeclaration(String version, String encoding, String standalo
writeRaw(BYTE_APOS);
}
writeRaw(BYTE_QMARK, BYTE_GT);
}
}

/*
/**********************************************************************
Expand Down Expand Up @@ -1594,7 +1598,7 @@ protected final void flushBuffer()
protected final void writeAsEntity(int c)
throws IOException
{
// Quickie check to avoid
// Quickie check to avoid

byte[] buf = _outputBuffer;
int ptr = _outputPtr;
Expand Down
57 changes: 57 additions & 0 deletions src/test/java/com/fasterxml/aalto/sax/TestSaxWriter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package com.fasterxml.aalto.sax;

import com.fasterxml.aalto.out.Utf8XmlWriter;
import com.fasterxml.aalto.out.WriterConfig;

import java.io.ByteArrayOutputStream;

public class TestSaxWriter extends base.BaseTestCase {

public void testSurrogateMemory1() throws Exception {
// This test aims to produce the
// javax.xml.stream.XMLStreamException: Incomplete surrogate pair in content: first char 0xd835, second 0x78
// error message. Before fixing the respective issue, it was provoked by a multi-byte character
// where the first byte was exactly at the end of the internal reading buffer and enough further data
// to also fill the next two internal reading buffers. Then, the code would try to fuse the first byte
// of the original multi-byte character with the first character in the third buffer because
// ByteXmlWriter#_surrogate was not set back to 0 after writing the original multi-byte character.
StringBuilder testText = new StringBuilder();
for (int i = 0; i < 511; i++)
testText.append('x');
testText.append("\uD835\uDFCE");
for (int i = 0; i < 512; i++)
testText.append('x');

WriterConfig writerConfig = new WriterConfig();
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
Utf8XmlWriter writer = new Utf8XmlWriter(writerConfig, byteArrayOutputStream);
writer.writeStartTagStart(writer.constructName("testelement"));
writer.writeAttribute(writer.constructName("testattr"), testText.toString());
writer.writeStartTagEnd();
writer.writeEndTag(writer.constructName("testelement"));
writer.close(false);

}

public void testSurrogateMemory2() throws Exception {
// This test aims to produce the
// java.io.IOException: Unpaired surrogate character (0xd835)
// error message. Before fixing the respective issue, it was provoked by a multi-byte character
// where the first byte was exactly at the end of the internal reading buffer and the next
// reading buffer was enough to write all the remaining data. Then, by the missing reset of
// ByteXmlWriter#_surrogate, the code expected another multi-byte surrogate that never came.
StringBuilder testText = new StringBuilder();
for (int i = 0; i < 511; i++)
testText.append('x');
testText.append("\uD835\uDFCE");

WriterConfig writerConfig = new WriterConfig();
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
Utf8XmlWriter writer = new Utf8XmlWriter(writerConfig, byteArrayOutputStream);
writer.writeStartTagStart(writer.constructName("testelement"));
writer.writeAttribute(writer.constructName("testattr"), testText.toString());
writer.writeStartTagEnd();
writer.writeEndTag(writer.constructName("testelement"));
writer.close(false);
}
}

0 comments on commit c000880

Please sign in to comment.