diff --git a/poi-ooxml/src/main/java/org/apache/poi/ooxml/POIXMLRelation.java b/poi-ooxml/src/main/java/org/apache/poi/ooxml/POIXMLRelation.java index e4f6e08ef7e..ada90eb75dd 100644 --- a/poi-ooxml/src/main/java/org/apache/poi/ooxml/POIXMLRelation.java +++ b/poi-ooxml/src/main/java/org/apache/poi/ooxml/POIXMLRelation.java @@ -198,6 +198,9 @@ public ParentPartConstructor getParentPartConstructor() { * @since 3.16-beta3 */ public InputStream getContents(PackagePart corePart) throws IOException, InvalidFormatException { + if (corePart == null) { + throw new IllegalArgumentException("Core-Part cannot be empty"); + } PackageRelationshipCollection prc = corePart.getRelationshipsByType(getRelation()); Iterator it = prc.iterator(); diff --git a/poi-ooxml/src/main/java/org/apache/poi/ooxml/extractor/POIXMLExtractorFactory.java b/poi-ooxml/src/main/java/org/apache/poi/ooxml/extractor/POIXMLExtractorFactory.java index 2de39e36691..a648f26d9f3 100644 --- a/poi-ooxml/src/main/java/org/apache/poi/ooxml/extractor/POIXMLExtractorFactory.java +++ b/poi-ooxml/src/main/java/org/apache/poi/ooxml/extractor/POIXMLExtractorFactory.java @@ -229,7 +229,7 @@ public POIXMLTextExtractor create(OPCPackage pkg) throws IOException { // Grab the core document part, and try to identify from that final PackagePart corePart = pkg.getPart(core.getRelationship(0)); - final String contentType = corePart.getContentType(); + final String contentType = corePart == null ? null : corePart.getContentType(); // Is it XSSF? for (XSSFRelation rel : XSSFExcelExtractor.SUPPORTED_TYPES) { diff --git a/poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/WordToTextConverter.java b/poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/WordToTextConverter.java index ddd7ff68d58..8cb75796210 100644 --- a/poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/WordToTextConverter.java +++ b/poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/WordToTextConverter.java @@ -22,8 +22,6 @@ Licensed to the Apache Software Foundation (ASF) under one or more import java.util.List; import java.util.concurrent.atomic.AtomicInteger; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.dom.DOMSource; @@ -53,10 +51,11 @@ Licensed to the Apache Software Foundation (ASF) under one or more import org.w3c.dom.Element; @Beta -public class WordToTextConverter extends AbstractWordConverter -{ +public class WordToTextConverter extends AbstractWordConverter { private static final Logger LOG = LogManager.getLogger(WordToTextConverter.class); + private static final int MAX_NESTED_CHILD_NODES = 500; + public static String getText( DirectoryNode root ) throws Exception { final HWPFDocumentCore wordDocument = AbstractWordUtils.loadDoc( root ); @@ -109,7 +108,7 @@ public static void main( String[] args ) throws Exception { serializer.transform( domSource, streamResult ); } - private static Document process( File docFile ) throws IOException, ParserConfigurationException { + private static Document process( File docFile ) throws IOException { try (final HWPFDocumentCore wordDocument = AbstractWordUtils.loadDoc( docFile )) { WordToTextConverter wordToTextConverter = new WordToTextConverter( XMLHelper.newDocumentBuilder().newDocument()); @@ -118,7 +117,7 @@ private static Document process( File docFile ) throws IOException, ParserConfig } } - private AtomicInteger noteCounters = new AtomicInteger( 1 ); + private final AtomicInteger noteCounters = new AtomicInteger( 1 ); private Element notes; @@ -130,11 +129,8 @@ private static Document process( File docFile ) throws IOException, ParserConfig * Creates new instance of {@link WordToTextConverter}. Can be used for * output several {@link HWPFDocument}s into single text document. * - * @throws ParserConfigurationException - * if an internal {@link DocumentBuilder} cannot be created */ - public WordToTextConverter() throws ParserConfigurationException - { + public WordToTextConverter() { this.textDocumentFacade = new TextDocumentFacade( XMLHelper.newDocumentBuilder().newDocument() ); } @@ -312,6 +308,12 @@ private void processNote( HWPFDocument wordDocument, Element block, Range noteTe Element note = textDocumentFacade.createBlock(); notes.appendChild( note ); + // avoid StackOverflowException with very deeply nested files (mostly synthetic test files via fuzzing) + // if this limit is reached in real-word documents, we can make this configurable + if (note.getParentNode() != null && note.getParentNode().getChildNodes().getLength() > MAX_NESTED_CHILD_NODES) { + throw new IllegalStateException("Had more than the limit of " + MAX_NESTED_CHILD_NODES + " nested child notes"); + } + note.appendChild( textDocumentFacade.createText( "^" + noteIndex + "\t " ) ); processCharacters( wordDocument, Integer.MIN_VALUE, noteTextRange, note ); diff --git a/poi-scratchpad/src/test/java/org/apache/poi/hslf/dev/BaseTestPPTIterating.java b/poi-scratchpad/src/test/java/org/apache/poi/hslf/dev/BaseTestPPTIterating.java index 7848f688cde..ef146dda238 100644 --- a/poi-scratchpad/src/test/java/org/apache/poi/hslf/dev/BaseTestPPTIterating.java +++ b/poi-scratchpad/src/test/java/org/apache/poi/hslf/dev/BaseTestPPTIterating.java @@ -35,6 +35,7 @@ Licensed to the Apache Software Foundation (ASF) under one or more import org.apache.poi.POIDataSamples; import org.apache.poi.hslf.exceptions.EncryptedPowerPointFileException; +import org.apache.poi.hslf.exceptions.HSLFException; import org.apache.poi.hslf.exceptions.OldPowerPointFormatException; import org.apache.poi.util.IOUtils; import org.apache.commons.io.output.NullPrintStream; @@ -65,6 +66,7 @@ public abstract class BaseTestPPTIterating { EXCLUDED.put("clusterfuzz-testcase-minimized-POIHSLFFuzzer-6710128412590080.ppt", RuntimeException.class); EXCLUDED.put("clusterfuzz-testcase-minimized-POIFuzzer-5429732352851968.ppt", FileNotFoundException.class); EXCLUDED.put("clusterfuzz-testcase-minimized-POIFuzzer-5681320547975168.ppt", FileNotFoundException.class); + EXCLUDED.put("clusterfuzz-testcase-minimized-POIHSLFFuzzer-5962760801091584.ppt", RuntimeException.class); } public static Stream files() { diff --git a/poi/src/main/java/org/apache/poi/ddf/EscherContainerRecord.java b/poi/src/main/java/org/apache/poi/ddf/EscherContainerRecord.java index 74df2761a6e..da4d2a32898 100644 --- a/poi/src/main/java/org/apache/poi/ddf/EscherContainerRecord.java +++ b/poi/src/main/java/org/apache/poi/ddf/EscherContainerRecord.java @@ -50,6 +50,8 @@ public final class EscherContainerRecord extends EscherRecord implements Iterabl private static final Logger LOGGER = LogManager.getLogger(EscherContainerRecord.class); + private static final int MAX_NESTED_CHILD_NODES = 1000; + /** * in case if document contains any charts we have such document structure: * BOF @@ -86,12 +88,26 @@ public EscherContainerRecord(EscherContainerRecord other) { @Override public int fillFields(byte[] data, int pOffset, EscherRecordFactory recordFactory) { + return fillFields(data, pOffset, recordFactory, 0); + } + + private int fillFields(byte[] data, int pOffset, EscherRecordFactory recordFactory, int nesting) { + if (nesting > MAX_NESTED_CHILD_NODES) { + throw new IllegalStateException("Had more than the limit of " + MAX_NESTED_CHILD_NODES + " nested child notes"); + } int bytesRemaining = readHeader(data, pOffset); int bytesWritten = 8; int offset = pOffset + 8; while (bytesRemaining > 0 && offset < data.length) { EscherRecord child = recordFactory.createRecord(data, offset); - int childBytesWritten = child.fillFields(data, offset, recordFactory); + + final int childBytesWritten; + if (child instanceof EscherContainerRecord) { + childBytesWritten = ((EscherContainerRecord)child).fillFields(data, offset, recordFactory, nesting + 1); + } else { + childBytesWritten = child.fillFields(data, offset, recordFactory); + } + bytesWritten += childBytesWritten; offset += childBytesWritten; bytesRemaining -= childBytesWritten; diff --git a/test-data/document/clusterfuzz-testcase-minimized-POIHWPFFuzzer-5074346559012864.doc b/test-data/document/clusterfuzz-testcase-minimized-POIHWPFFuzzer-5074346559012864.doc new file mode 100644 index 00000000000..add1da339f2 Binary files /dev/null and b/test-data/document/clusterfuzz-testcase-minimized-POIHWPFFuzzer-5074346559012864.doc differ diff --git a/test-data/document/clusterfuzz-testcase-minimized-POIXWPFFuzzer-6733884933668864.docx b/test-data/document/clusterfuzz-testcase-minimized-POIXWPFFuzzer-6733884933668864.docx new file mode 100644 index 00000000000..36132483c9b Binary files /dev/null and b/test-data/document/clusterfuzz-testcase-minimized-POIXWPFFuzzer-6733884933668864.docx differ diff --git a/test-data/slideshow/clusterfuzz-testcase-minimized-POIHSLFFuzzer-5962760801091584.ppt b/test-data/slideshow/clusterfuzz-testcase-minimized-POIHSLFFuzzer-5962760801091584.ppt new file mode 100644 index 00000000000..9bc426a2b20 Binary files /dev/null and b/test-data/slideshow/clusterfuzz-testcase-minimized-POIHSLFFuzzer-5962760801091584.ppt differ diff --git a/test-data/spreadsheet/clusterfuzz-testcase-minimized-XLSX2CSVFuzzer-6594557414080512.xlsx b/test-data/spreadsheet/clusterfuzz-testcase-minimized-XLSX2CSVFuzzer-6594557414080512.xlsx new file mode 100644 index 00000000000..b6447933802 Binary files /dev/null and b/test-data/spreadsheet/clusterfuzz-testcase-minimized-XLSX2CSVFuzzer-6594557414080512.xlsx differ diff --git a/test-data/spreadsheet/stress.xls b/test-data/spreadsheet/stress.xls index 0c0044cfc45..5841042e61a 100644 Binary files a/test-data/spreadsheet/stress.xls and b/test-data/spreadsheet/stress.xls differ