Skip to content

Commit 45a16c4

Browse files
authored
TIKA-4358 -- turn on extraction of incremental update metadata as default (#2062)
* TIKA-4358 -- turn on extraction of incremental update metadata as default
1 parent ff9d722 commit 45a16c4

File tree

4 files changed

+6
-5
lines changed

4 files changed

+6
-5
lines changed

tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ private void configurePDFExtractSettings() {
342342
pdfParserConfig.setExtractIncrementalUpdateInfo(true);
343343
pdfParserConfig.setParseIncrementalUpdates(true);
344344
String warn = "As a convenience, TikaCLI has turned on extraction of\n" +
345-
"inline images and incremental updates for the PDFParser (TIKA-2374, " +
345+
"inline images and parsing of incremental updates for the PDFParser (TIKA-2374, " +
346346
"TIKA-4017 and TIKA-4354).\n" +
347347
"This is not the default behavior in Tika generally or in tika-server.";
348348
LOG.info(warn);

tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ public void testMetadataOutput() throws Exception {
219219
public void testJsonMetadataOutput() throws Exception {
220220
String json = getParamOutContent("--json", "--digest=MD2", resourcePrefix + "testJsonMultipleInts.html");
221221
//TIKA-1310
222-
assertTrue(json.contains("\"fb:admins\":\"1,2,3,4\","));
222+
assertTrue(json.contains("\"" + "fb:admins\":\"1,2,3,4\","));
223223
assertTrue(json.contains("\"X-TIKA:digest:MD2\":"));
224224
}
225225

tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ public ImageType getImageType() {
153153

154154
private Renderer renderer;
155155

156-
private boolean extractIncrementalUpdateInfo = false;
156+
private boolean extractIncrementalUpdateInfo = true;
157157

158158
private boolean parseIncrementalUpdates = false;
159159

@@ -876,7 +876,8 @@ public int getMaxIncrementalUpdates() {
876876
}
877877

878878
/**
879-
* The maximum number of incremental updates to parse.
879+
* The maximum number of incremental updates to parse if
880+
* {@link #setParseIncrementalUpdates(boolean)} is set to <code>true</code>
880881
*
881882
* @param maxIncrementalUpdates
882883
*/

tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ public String getPassword(Metadata metadata) {
237237
assertEquals("application/pdf", metadata.get(Metadata.CONTENT_TYPE));
238238
assertEquals("true", metadata.get("pdf:encrypted"));
239239
//pdf:encrypted, X-Parsed-By and Content-Type
240-
assertEquals(5, metadata.names().length, "very little metadata should be parsed");
240+
assertEquals(8, metadata.names().length, "very little metadata should be parsed");
241241
assertEquals(0, handler.toString().length());
242242
}
243243

0 commit comments

Comments
 (0)