Skip to content

Commit

Permalink
TIKA-4358 -- turn on extraction of incremental update metadata as def…
Browse files Browse the repository at this point in the history
…ault (#2062)

* TIKA-4358 -- turn on extraction of incremental update metadata as default
  • Loading branch information
tballison authored Nov 21, 2024
1 parent ff9d722 commit 45a16c4
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 5 deletions.
2 changes: 1 addition & 1 deletion tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ private void configurePDFExtractSettings() {
pdfParserConfig.setExtractIncrementalUpdateInfo(true);
pdfParserConfig.setParseIncrementalUpdates(true);
String warn = "As a convenience, TikaCLI has turned on extraction of\n" +
"inline images and incremental updates for the PDFParser (TIKA-2374, " +
"inline images and parsing of incremental updates for the PDFParser (TIKA-2374, " +
"TIKA-4017 and TIKA-4354).\n" +
"This is not the default behavior in Tika generally or in tika-server.";
LOG.info(warn);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ public void testMetadataOutput() throws Exception {
public void testJsonMetadataOutput() throws Exception {
String json = getParamOutContent("--json", "--digest=MD2", resourcePrefix + "testJsonMultipleInts.html");
//TIKA-1310
assertTrue(json.contains("\"fb:admins\":\"1,2,3,4\","));
assertTrue(json.contains("\"" + "fb:admins\":\"1,2,3,4\","));
assertTrue(json.contains("\"X-TIKA:digest:MD2\":"));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ public ImageType getImageType() {

private Renderer renderer;

private boolean extractIncrementalUpdateInfo = false;
private boolean extractIncrementalUpdateInfo = true;

private boolean parseIncrementalUpdates = false;

Expand Down Expand Up @@ -876,7 +876,8 @@ public int getMaxIncrementalUpdates() {
}

/**
* The maximum number of incremental updates to parse.
* The maximum number of incremental updates to parse if
* {@link #setParseIncrementalUpdates(boolean)} is set to <code>true</code>
*
* @param maxIncrementalUpdates
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ public String getPassword(Metadata metadata) {
assertEquals("application/pdf", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("true", metadata.get("pdf:encrypted"));
//pdf:encrypted, X-Parsed-By and Content-Type
assertEquals(5, metadata.names().length, "very little metadata should be parsed");
assertEquals(8, metadata.names().length, "very little metadata should be parsed");
assertEquals(0, handler.toString().length());
}

Expand Down

0 comments on commit 45a16c4

Please sign in to comment.