diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java index f59d887436..367a38c555 100644 --- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java +++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java @@ -200,7 +200,6 @@ public void process(InputStream stream, OutputStream output, Metadata metadata) */ private String password = System.getenv("TIKA_PASSWORD"); private DigestingParser.Digester digester = null; - private boolean asyncMode = false; private boolean pipeMode = true; private boolean fork = false; private boolean prettyPrint; @@ -340,9 +339,12 @@ private void configurePDFExtractSettings() { if (configFilePath == null && context.get(PDFParserConfig.class) == null) { PDFParserConfig pdfParserConfig = new PDFParserConfig(); pdfParserConfig.setExtractInlineImages(true); + pdfParserConfig.setExtractIncrementalUpdateInfo(true); pdfParserConfig.setParseIncrementalUpdates(true); - String warn = "As a convenience, TikaCLI has turned on extraction of\n" + "inline images and incremental updates for the PDFParser (TIKA-2374 and " + "TIKA-4017).\n" + - "Aside from the -z option, this is not the default behavior\n" + "in Tika generally or in tika-server."; + String warn = "As a convenience, TikaCLI has turned on extraction of\n" + + "inline images and incremental updates for the PDFParser (TIKA-2374, " + + "TIKA-4017 and TIKA-4354).\n" + + "This is not the default behavior in Tika generally or in tika-server."; LOG.info(warn); context.set(PDFParserConfig.class, pdfParserConfig); } @@ -401,8 +403,6 @@ public void process(String arg) throws Exception { // ignore, as container-aware detectors are now always used } else if (arg.equals("-f") || arg.equals("--fork")) { fork = true; - } else if (arg.equals("-a") || arg.equals("--async")) { - asyncMode = true; } else if (arg.startsWith("--config=")) { configFilePath = arg.substring("--config=".length()); } else if (arg.startsWith("--digest=")) { @@ -446,7 +446,6 @@ public void process(String arg) throws Exception { } extractDir = new File(dirPath); } else if (arg.equals("-z") || arg.equals("--extract")) { - configurePDFExtractSettings(); type = NO_OUTPUT; context.set(EmbeddedDocumentExtractor.class, new FileEmbeddedDocumentExtractor()); } else if (arg.equals("-r") || arg.equals("--pretty-print")) { @@ -475,6 +474,7 @@ public void process(String arg) throws Exception { } else { url = new URL(arg); } + configurePDFExtractSettings(); if (recursiveJSON) { handleRecursiveJson(url, System.out); } else { diff --git a/tika-app/src/test/java/org/apache/tika/cli/TikaCLIAsyncTest.java b/tika-app/src/test/java/org/apache/tika/cli/TikaCLIAsyncTest.java index d0d7ce1003..072f2c7d7d 100644 --- a/tika-app/src/test/java/org/apache/tika/cli/TikaCLIAsyncTest.java +++ b/tika-app/src/test/java/org/apache/tika/cli/TikaCLIAsyncTest.java @@ -120,7 +120,7 @@ public void testAsync() throws Exception { json++; } } - assertEquals(17, json); + assertEquals(18, json); } private void checkForPrettyPrint(File f) throws IOException { diff --git a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java index ec6e7df1a6..e318ff80b4 100644 --- a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java +++ b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java @@ -241,6 +241,14 @@ public void testJsonMetadataPrettyPrintOutput() throws Exception { assertTrue(fb > -1 && title > -1 && fb > title); } + @Test + public void testDefaultPDFIncrementalUpdateSettings() throws Exception { + String json = getParamOutContent("-J", + resourcePrefix + "testPDF_incrementalUpdates.pdf"); + assertTrue(json.contains("pdf:incrementalUpdateCount\":\"2\"")); + assertTrue(json.contains("embeddedResourceType\":\"VERSION\"")); + } + /** * Tests -l option of the cli * diff --git a/tika-app/src/test/resources/test-data/testPDF_incrementalUpdates.pdf b/tika-app/src/test/resources/test-data/testPDF_incrementalUpdates.pdf new file mode 100644 index 0000000000..8494cc8396 Binary files /dev/null and b/tika-app/src/test/resources/test-data/testPDF_incrementalUpdates.pdf differ