Skip to content

Commit

Permalink
fix: retain attribute entity references instead of choking when XMLIn…
Browse files Browse the repository at this point in the history
…pu… (#74)

Implement #65: Add AaltoInputProperties.P_RETAIN_ATTRIBUTE_GENERAL_ENTITIES which allows skipping handling of general entity references in attributes.
  • Loading branch information
cjmamo authored Apr 26, 2021
1 parent 11ae44d commit 49dfd6a
Show file tree
Hide file tree
Showing 8 changed files with 111 additions and 20 deletions.
16 changes: 16 additions & 0 deletions src/main/java/com/fasterxml/aalto/AaltoInputProperties.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package com.fasterxml.aalto;

/**
* Class that contains constant for property names used to configure
* cursor and event readers produced by Aalto implementation of
* {@link javax.xml.stream.XMLInputFactory}.
*/
public final class AaltoInputProperties {

/**
* Feature controlling whether general entities in attributes are retained.
*
* @since 1.3
*/
public final static String P_RETAIN_ATTRIBUTE_GENERAL_ENTITIES = "com.fasterxml.aalto.retainAttributeGeneralEntities";
}
16 changes: 13 additions & 3 deletions src/main/java/com/fasterxml/aalto/in/ReaderConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import javax.xml.stream.*;

import com.fasterxml.aalto.AaltoInputProperties;
import org.codehaus.stax2.XMLInputFactory2;

import com.fasterxml.aalto.impl.CommonConfig;
Expand Down Expand Up @@ -41,9 +42,10 @@ public final class ReaderConfig
final static int F_AUTO_CLOSE_INPUT = 0x2000;

// Custom flags:
final static int F_RETAIN_ATTRIBUTE_GENERAL_ENTITIES = 0x4000;

/**
* These are the default settigs for XMLInputFactory.
* These are the default settings for XMLInputFactory.
*/
final static int DEFAULT_FLAGS =
F_NS_AWARE
Expand All @@ -55,7 +57,7 @@ public final class ReaderConfig
| F_INTERN_NS_URIS
// and will report CDATA as such (and not as CHARACTERS)
| F_REPORT_CDATA
| F_PRESERVE_LOCATION
| F_PRESERVE_LOCATION
;

private final static HashMap<String, Object> sProperties;
Expand Down Expand Up @@ -98,7 +100,8 @@ public final class ReaderConfig
// !!! Not really implemented, but let's recognize it
sProperties.put(XMLInputFactory2.P_DTD_OVERRIDE, null);

// Custom ones?
// Custom ones
sProperties.put(AaltoInputProperties.P_RETAIN_ATTRIBUTE_GENERAL_ENTITIES, Integer.valueOf(F_RETAIN_ATTRIBUTE_GENERAL_ENTITIES));
}

/**
Expand Down Expand Up @@ -280,6 +283,10 @@ public void doReportCData(boolean state) {
setFlag(F_REPORT_CDATA, state);
}

public void doRetainAttributeGeneralEntities(boolean state) {
setFlag(F_RETAIN_ATTRIBUTE_GENERAL_ENTITIES, state);
}

/*
/**********************************************************************
/* Common accessors from CommonConfig
Expand Down Expand Up @@ -412,6 +419,9 @@ public boolean willParseLazily() {

public boolean hasInternNsURIsBeenEnabled() { return hasExplicitFlag(F_INTERN_NS_URIS); }

// // // Custom properties

public boolean willRetainAttributeGeneralEntities() { return hasFlag(F_RETAIN_ATTRIBUTE_GENERAL_ENTITIES); }

/*
/**********************************************************************
Expand Down
28 changes: 15 additions & 13 deletions src/main/java/com/fasterxml/aalto/in/ReaderScanner.java
Original file line number Diff line number Diff line change
Expand Up @@ -830,7 +830,7 @@ protected final int handleStartElement(char c)
* simplify main method, which makes code more maintainable
* and possibly easier for JIT/HotSpot to optimize.
*/
private final int collectValue(int attrPtr, char quoteChar, PName attrName)
private int collectValue(int attrPtr, char quoteChar, PName attrName)
throws XMLStreamException
{
char[] attrBuffer = _attrCollector.startNewValue(attrName, attrPtr);
Expand Down Expand Up @@ -896,20 +896,22 @@ private final int collectValue(int attrPtr, char quoteChar, PName attrName)
throwUnexpectedChar(c, "'<' not allowed in attribute value");
case XmlCharTypes.CT_AMP:
{
int d = handleEntityInText(false);
if (d == 0) { // unexpanded general entity... not good
reportUnexpandedEntityInAttr(attrName, false);
}
// Ok; does it need a surrogate though? (over 16 bits)
if ((d >> 16) != 0) {
d -= 0x10000;
attrBuffer[attrPtr++] = (char) (0xD800 | (d >> 10));
d = 0xDC00 | (d & 0x3FF);
if (attrPtr >= attrBuffer.length) {
attrBuffer = _attrCollector.valueBufferFull();
if (!_config.willRetainAttributeGeneralEntities()) {
int d = handleEntityInText(false);
if (d == 0) { // unexpanded general entity... not good
reportUnexpandedEntityInAttr(attrName, false);
}
// Ok; does it need a surrogate though? (over 16 bits)
if ((d >> 16) != 0) {
d -= 0x10000;
attrBuffer[attrPtr++] = (char) (0xD800 | (d >> 10));
d = 0xDC00 | (d & 0x3FF);
if (attrPtr >= attrBuffer.length) {
attrBuffer = _attrCollector.valueBufferFull();
}
}
c = (char) d;
}
c = (char) d;
}
break;
case XmlCharTypes.CT_ATTR_QUOTE:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import com.fasterxml.aalto.AaltoInputProperties;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;

Expand All @@ -36,7 +37,7 @@ public class SAXParserFactoryImpl
extends SAXParserFactory
{
final InputFactoryImpl mStaxFactory;

public SAXParserFactoryImpl()
{
// defaults should be fine...
Expand Down Expand Up @@ -68,6 +69,8 @@ public boolean getFeature(String name)
switch (stdFeat) {
case IS_STANDALONE: // read-only, but only during parsing
return true;
case EXTERNAL_GENERAL_ENTITIES:
return Boolean.FALSE.equals(mStaxFactory.getProperty(AaltoInputProperties.P_RETAIN_ATTRIBUTE_GENERAL_ENTITIES));
default:
}
} else {
Expand Down Expand Up @@ -95,7 +98,8 @@ public void setFeature(String name, boolean enabled)

switch (stdFeat) {
case EXTERNAL_GENERAL_ENTITIES:
ok = !enabled;
mStaxFactory.setProperty(AaltoInputProperties.P_RETAIN_ATTRIBUTE_GENERAL_ENTITIES, !enabled);
ok = true;
break;
case EXTERNAL_PARAMETER_ENTITIES:
ok = !enabled;
Expand Down
3 changes: 3 additions & 0 deletions src/main/java/com/fasterxml/aalto/sax/SAXParserImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;

import com.fasterxml.aalto.AaltoInputProperties;
import org.xml.sax.*;
import org.xml.sax.ext.Attributes2;
import org.xml.sax.ext.DeclHandler;
Expand Down Expand Up @@ -262,6 +263,8 @@ public boolean getFeature(String name)
case IS_STANDALONE: // read-only, but only during parsing
// !!! TBI
return true;
case EXTERNAL_GENERAL_ENTITIES:
return Boolean.FALSE.equals(_staxFactory.getProperty(AaltoInputProperties.P_RETAIN_ATTRIBUTE_GENERAL_ENTITIES));
default:
}
} else {
Expand Down
2 changes: 0 additions & 2 deletions src/main/java/com/fasterxml/aalto/sax/SAXUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,6 @@ public static SAXProperty findStdProperty(String featURI)
public static Boolean getFixedStdFeatureValue(SAXFeature stdFeat)
{
switch (stdFeat) {
case EXTERNAL_GENERAL_ENTITIES: // not yet implemented
return Boolean.FALSE;
case EXTERNAL_PARAMETER_ENTITIES: // not yet implemented
return Boolean.FALSE;
case IS_STANDALONE: // read-only, but only during parsing
Expand Down
41 changes: 41 additions & 0 deletions src/test/java/com/fasterxml/aalto/sax/TestEntityResolver.java
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
package com.fasterxml.aalto.sax;

import java.io.*;
import java.util.concurrent.CountDownLatch;

import javax.xml.parsers.SAXParser;
import javax.xml.stream.XMLInputFactory;

import com.fasterxml.aalto.stax.InputFactoryImpl;
import org.xml.sax.*;
import org.xml.sax.helpers.DefaultHandler;

Expand Down Expand Up @@ -47,6 +50,44 @@ public void testWithDummyExtSubset()
}
}

public void testRetainAttributeEntityReference()
throws Exception
{
final String XML =
"<!DOCTYPE root PUBLIC '//some//public//id' 'no-such-thing.dtd'>\n"
+"<root b=\"&replace-me;\" />";

SAXParserFactoryImpl spf = new SAXParserFactoryImpl();
SAXParser sp = spf.newSAXParser();
DefaultHandler h = new DefaultHandler();

try {
sp.parse(new InputSource(new StringReader(XML)), h);
fail();
} catch (SAXException e) {
verifyException(e, "General entity reference (&replace-me;) encountered in entity expanding mode: operation not (yet) implemented\n at [row,col {unknown-source}]: [2,22]");
}

SAXParserFactoryImpl spfKeepEntityReferences = new SAXParserFactoryImpl();
spfKeepEntityReferences.setFeature("http://xml.org/sax/features/external-general-entities", false);
SAXParser spKeepEntityReferences = spfKeepEntityReferences.newSAXParser();

final CountDownLatch countDownLatch = new CountDownLatch(1);
spKeepEntityReferences.parse(new InputSource(new StringReader(XML)), new DefaultHandler() {
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) {
assertEquals("root", localName);
assertEquals("root", qName);
assertEquals(1, attributes.getLength());
assertEquals("&replace-me;", attributes.getValue(0));

countDownLatch.countDown();
}
});

assertEquals(0, countDownLatch.getCount());
}

static class MyResolver
implements EntityResolver
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package com.fasterxml.aalto.sax;

import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;

public class TestSAXParserFactoryImpl extends base.BaseTestCase {

public void testSetGetFeatureExternalGeneralEntities() throws SAXNotRecognizedException, SAXNotSupportedException {
SAXParserFactoryImpl saxParserFactory = new SAXParserFactoryImpl();
saxParserFactory.setFeature("http://xml.org/sax/features/external-general-entities", false);
assertFalse(saxParserFactory.getFeature("http://xml.org/sax/features/external-general-entities"));

saxParserFactory.setFeature("http://xml.org/sax/features/external-general-entities", true);
assertTrue(saxParserFactory.getFeature("http://xml.org/sax/features/external-general-entities"));
}

}

0 comments on commit 49dfd6a

Please sign in to comment.