Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support charsets other than UTF-8 #150

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions core/src/main/java/com/tickaroo/tikxml/TikXml.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
import com.tickaroo.tikxml.typeadapter.TypeAdapter;
import java.io.IOException;
import java.lang.reflect.Type;
import java.nio.charset.Charset;

import okio.BufferedSink;
import okio.BufferedSource;

Expand Down Expand Up @@ -64,6 +66,17 @@ public Builder writeDefaultXmlDeclaration(boolean writeDeclaration) {
return this;
}

/**
* Specify the charset
*
* @param charset character encoding set to use when reading and writing the xml document
* @return The Builder itself
*/
public Builder charset(Charset charset) {
config.charset = charset;
return this;
}

/**
* Adds an type converter for the given class
*
Expand Down Expand Up @@ -105,7 +118,7 @@ private TikXml(TikXmlConfig config) {

public <T> T read(BufferedSource source, Type clazz) throws IOException {

XmlReader reader = XmlReader.of(source);
XmlReader reader = XmlReader.of(source, config.charset);

reader.beginElement();
reader.nextElementName(); // We don't care about the name of the root tag
Expand All @@ -125,7 +138,7 @@ public <T> void write(BufferedSink sink, T valueToWrite) throws IOException {

public <T> void write(BufferedSink sink, T valueToWrite, Type typeOfValueToWrite) throws IOException {

XmlWriter writer = XmlWriter.of(sink);
XmlWriter writer = XmlWriter.of(sink, config.charset);

TypeAdapter<T> adapter = config.getTypeAdapter(typeOfValueToWrite);
if (config.writeDefaultXmlDeclaration()) {
Expand Down
12 changes: 12 additions & 0 deletions core/src/main/java/com/tickaroo/tikxml/TikXmlConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
import com.tickaroo.tikxml.typeadapter.TypeAdapter;

import java.lang.reflect.Type;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;

/**
* Holds the config for parsing and writing xml via {@link TikXml}
Expand All @@ -34,6 +36,7 @@ public final class TikXmlConfig {
TypeConverters typeConverters = new TypeConverters();
TypeAdapters typeAdapters = new TypeAdapters();
boolean writeDefaultXmlDeclaration = true;
Charset charset = StandardCharsets.UTF_8;

TikXmlConfig() {
}
Expand All @@ -58,6 +61,15 @@ public boolean writeDefaultXmlDeclaration() {
return writeDefaultXmlDeclaration;
}

/**
* The charset
*
* @return character encoding set to use when reading and writing the xml document
*/
public Charset charset() {
return charset;
}

/**
* Query a {@link TypeConverter} for a given class
*
Expand Down
67 changes: 40 additions & 27 deletions core/src/main/java/com/tickaroo/tikxml/XmlReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
import java.io.Closeable;
import java.io.EOFException;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;

/**
* A class to read and parse an xml stream.
Expand All @@ -37,14 +39,13 @@ public class XmlReader implements Closeable {

//private static final ByteString LINEFEED_OR_CARRIAGE_RETURN = ByteString.encodeUtf8("\n\r");

private static final ByteString UNQUOTED_STRING_TERMINALS
= ByteString.encodeUtf8(" >/=\n");
private final ByteString unquotedStringTerminals;

private static final ByteString CDATA_CLOSE = ByteString.encodeUtf8("]]>");
private static final ByteString CDATA_OPEN = ByteString.encodeUtf8("<![CDATA[");
private static final ByteString DOCTYPE_OPEN = ByteString.encodeUtf8("<!DOCTYPE");
private static final ByteString COMMENT_CLOSE = ByteString.encodeUtf8("-->");
private static final ByteString XML_DECLARATION_CLOSE = ByteString.encodeUtf8("?>");
private final ByteString cdataClose;
private final ByteString cdataOpen;
private final ByteString doctypeOpen;
private final ByteString commentClose;
private final ByteString xmlDeclarationClose;
private static final ByteString UTF8_BOM = ByteString.of((byte) 0xEF, (byte) 0xBB, (byte) 0xBF);

private static final byte DOUBLE_QUOTE = '"';
Expand Down Expand Up @@ -97,21 +98,33 @@ public class XmlReader implements Closeable {

private final BufferedSource source;
private final Buffer buffer;
private final Charset charset;
private String currentElementName;

private XmlReader(BufferedSource source) {
private XmlReader(BufferedSource source, Charset charset) {
if (source == null) {
throw new NullPointerException("source == null");
}
this.source = source;
this.buffer = source.buffer();
this.charset = charset;
unquotedStringTerminals = ByteString.encodeString(" >/=\n", charset);
cdataClose = ByteString.encodeString("]]>", charset);
cdataOpen = ByteString.encodeString("<![CDATA[", charset);
doctypeOpen = ByteString.encodeString("<!DOCTYPE", charset);
commentClose = ByteString.encodeString("-->", charset);
xmlDeclarationClose = ByteString.encodeString("?>", charset);
}

/**
* Returns a new instance that reads a XML-encoded stream from {@code source}.
*/
public static XmlReader of(BufferedSource source) {
return new XmlReader(source);
return new XmlReader(source, StandardCharsets.UTF_8);
}

public static XmlReader of(BufferedSource source, Charset charset) {
return new XmlReader(source, charset);
}

/**
Expand Down Expand Up @@ -313,7 +326,7 @@ private int doPeek() throws IOException {
* @throws IOException
*/
private boolean isCDATA() throws IOException {
return fillBuffer(CDATA_OPEN.size()) && buffer.rangeEquals(0, CDATA_OPEN);
return fillBuffer(cdataOpen.size()) && buffer.rangeEquals(0, cdataOpen);
}

/**
Expand All @@ -324,8 +337,8 @@ private boolean isCDATA() throws IOException {
* @throws IOException
*/
private boolean isDocTypeDefinition() throws IOException {
return buffer.size() >= DOCTYPE_OPEN.size() &&
buffer.snapshot(DOCTYPE_OPEN.size()).toAsciiUppercase().equals(DOCTYPE_OPEN);
return buffer.size() >= doctypeOpen.size() &&
buffer.snapshot(doctypeOpen.size()).toAsciiUppercase().equals(doctypeOpen);
}

/**
Expand Down Expand Up @@ -564,14 +577,14 @@ public String nextTextContent() throws IOException {
+ "> but haven't found");
}

return buffer.readUtf8(index);
return buffer.readString(index, charset);
} else if (p == PEEKED_CDATA) {
peeked = PEEKED_NONE;

// Search index of closing CDATA tag ]]>
long index = indexOfClosingCDATA();

String result = buffer.readUtf8(index);
String result = buffer.readString(index, charset);
buffer.skip(3); // consume ]]>
return result;
} else if (p == PEEKED_ELEMENT_END) {
Expand Down Expand Up @@ -673,7 +686,7 @@ public boolean nextTextContentAsBoolean() throws IOException {
* @throws IOException
*/
private long indexOfClosingCDATA() throws IOException {
long index = source.indexOf(CDATA_CLOSE);
long index = source.indexOf(cdataClose);
if (index == -1) {
throw new EOFException("<![CDATA[ at " + getPath() + " has never been closed with ]]>");
}
Expand Down Expand Up @@ -810,12 +823,12 @@ private int nextNonWhitespace(boolean throwOnEof, boolean isDocumentBeginning) t
int peekStack = stack[stackSize - 1];

if (peekStack == XmlScope.NONEMPTY_DOCUMENT && isDocTypeDefinition()) {
long index = source.indexOf(CLOSING_XML_ELEMENT, DOCTYPE_OPEN.size());
long index = source.indexOf(CLOSING_XML_ELEMENT, doctypeOpen.size());
if (index == -1) {
throw syntaxError("Unterminated <!DOCTYPE> . Inline DOCTYPE is not support at the moment.");
}
// check if doctype uses brackets
long bracketIndex = source.indexOf(OPENING_DOCTYPE_BRACKET, DOCTYPE_OPEN.size(), index);
long bracketIndex = source.indexOf(OPENING_DOCTYPE_BRACKET, doctypeOpen.size(), index);
if (bracketIndex != -1) {
index = source.indexOf(ByteString.of(CLOSING_DOCTYPE_BRACKET, CLOSING_XML_ELEMENT), index + bracketIndex);
if (index == -1) {
Expand All @@ -829,19 +842,19 @@ private int nextNonWhitespace(boolean throwOnEof, boolean isDocumentBeginning) t
p = 0;
continue;
} else if (peek == '!' && fillBuffer(4)) {
long index = source.indexOf(COMMENT_CLOSE, 4); // skip <!-- in comparison by offset 4
long index = source.indexOf(commentClose, 4); // skip <!-- in comparison by offset 4
if (index == -1) {
throw syntaxError("Unterminated comment");
}
source.skip(index + COMMENT_CLOSE.size()); // skip behind --!>
source.skip(index + commentClose.size()); // skip behind --!>
p = 0;
continue;
} else if (peek == '?') {
long index = source.indexOf(XML_DECLARATION_CLOSE, 2); // skip <? in comparison by offset 2
long index = source.indexOf(xmlDeclarationClose, 2); // skip <? in comparison by offset 2
if (index == -1) {
throw syntaxError("Unterminated xml declaration or processing instruction \"<?\"");
}
source.skip(index + XML_DECLARATION_CLOSE.size()); // skip behind ?>
source.skip(index + xmlDeclarationClose.size()); // skip behind ?>
p = 0;
continue;
}
Expand Down Expand Up @@ -896,8 +909,8 @@ public String getCurrentElementName() {

/** Returns an unquoted value as a string. */
private String nextUnquotedValue() throws IOException {
long i = source.indexOfElement(UNQUOTED_STRING_TERMINALS);
return i != -1 ? buffer.readUtf8(i) : buffer.readUtf8();
long i = source.indexOfElement(unquotedStringTerminals);
return i != -1 ? buffer.readString(i, charset) : buffer.readString(charset);
}

/**
Expand All @@ -920,19 +933,19 @@ private String nextQuotedValue(byte runTerminator) throws IOException {
// If we've got an escape character, we're going to need a string builder.
if (buffer.getByte(index) == '\\') {
if (builder == null) builder = new StringBuilder();
builder.append(buffer.readUtf8(index));
builder.append(buffer.readString(index, charset));
buffer.readByte(); // '\'
builder.append(readEscapeCharacter());
continue;
}

// If it isn't the escape character, it's the quote. Return the string.
if (builder == null) {
String result = buffer.readUtf8(index);
String result = buffer.readString(index, charset);
buffer.readByte(); // Consume the quote character.
return result;
} else {
builder.append(buffer.readUtf8(index));
builder.append(buffer.readString(index, charset));
buffer.readByte(); // Consume the quote character.
return builder.toString();
}
Expand Down Expand Up @@ -988,7 +1001,7 @@ private char readEscapeCharacter() throws IOException {
} else if (c >= 'A' && c <= 'F') {
result += (c - 'A' + 10);
} else {
throw syntaxError("\\u" + buffer.readUtf8(4));
throw syntaxError("\\u" + buffer.readString(4, charset));
}
}
buffer.skip(4);
Expand Down
Loading