Skip to content

Commit

Permalink
Age off modulation and new formatting utility (#2326)
Browse files Browse the repository at this point in the history
* Extracted code and resources only related to age-off to their own module Removed unneeded dependencies and transitive dependencies from poms, added scope and comment for each

* Added CSV to Age off xml configuration file utilities

* Corrected compiler warnings and code inspection findings

* Use pom version variables when reused

* Correct javadoc formatting and remove star exclusions

* Began changes from peer review

* Remove XMLStreamWriter implementation

* MR feedback

* MR feedback

* MR feedback - removed Reader

* MR feedback: improved exception message

* MR feedback: survival food

* MR feedback: remove the remove the redundancy

* Updated poms based on dependency:analyze feedback

* Use Document Transformer to create XML

* Update pom versions in new modules post-rebase

* Remove typo from pom version

* Update pom versions in new modules post-rebase

---------

Co-authored-by: Matthew Peterson <[email protected]>
Co-authored-by: hgklohr <[email protected]>
  • Loading branch information
3 people committed May 6, 2024
1 parent b7d257f commit 7361007
Show file tree
Hide file tree
Showing 80 changed files with 3,163 additions and 292 deletions.
13 changes: 12 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -65,19 +65,24 @@
<version.geotools>28.1</version.geotools>
<version.geowave>1.2.0</version.geowave>
<version.google-guava>31.1-jre</version.google-guava>
<version.google-guava-failure>1.0.1</version.google-guava-failure>
<version.googlecode-findbugs>2.0.3</version.googlecode-findbugs>
<version.googlecode-json-simple>1.1.1</version.googlecode-json-simple>
<version.hadoop>3.3.5</version.hadoop>
<version.hadoop-shaded>1.1.1</version.hadoop-shaded>
<version.hamcrest>1.3</version.hamcrest>
<version.httpcomponents-httpclient>4.5.13</version.httpcomponents-httpclient>
<version.httpcomponents-httpcore>4.4.8</version.httpcomponents-httpcore>
<version.in-memory-accumulo>3.0.1</version.in-memory-accumulo>
<version.infinispan>9.4.21.Final</version.infinispan>
<version.jackson>2.10.0.pr1</version.jackson>
<version.jackson-mapper-asl>1.9.13</version.jackson-mapper-asl>
<version.jakarta>2.3.3</version.jakarta>
<version.javassist>3.24.0-GA</version.javassist>
<version.javastatsd>3.1.0</version.javastatsd>
<version.javax-validation>2.0.1.Final</version.javax-validation>
<version.jaxb-api>2.3.1</version.jaxb-api>
<version.jaxb-impl>2.3.3</version.jaxb-impl>
<version.jcommander>1.72</version.jcommander>
<version.jetty>6.1.26</version.jetty>
<version.jgroups>4.0.19.Final</version.jgroups>
Expand Down Expand Up @@ -192,7 +197,7 @@
<!-- this is a runtime dependency of guava, no longer included with guava as of 27.1 -->
<groupId>com.google.guava</groupId>
<artifactId>failureaccess</artifactId>
<version>1.0.1</version>
<version>${version.google-guava-failure}</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
Expand Down Expand Up @@ -1439,6 +1444,12 @@
</exclusion>
</exclusions>
</dependency>
<!-- Workaround for error: Execution default-cli of goal org.apache.maven.plugins:maven-dependency-plugin:3.1.1:analyze failed: Unsupported class file major version 55 -->
<dependency>
<groupId>org.apache.maven.shared</groupId>
<artifactId>maven-dependency-analyzer</artifactId>
<version>1.11.1</version>
</dependency>
</dependencies>
</plugin>
<plugin>
Expand Down
70 changes: 70 additions & 0 deletions warehouse/age-off-utils/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>gov.nsa.datawave</groupId>
<artifactId>datawave-warehouse-parent</artifactId>
<version>6.12.0-SNAPSHOT</version>
</parent>
<artifactId>datawave-age-off-utils</artifactId>
<name>${project.artifactId}</name>
<dependencies>
<!-- Needed for VisibleForTesting -->
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<scope>compile</scope>
</dependency>
<!-- For AppliedRule, etc. -->
<dependency>
<groupId>gov.nsa.datawave</groupId>
<artifactId>datawave-age-off</artifactId>
<version>${project.version}</version>
<scope>compile</scope>
<exclusions>
<exclusion>
<groupId>xml-apis</groupId>
<artifactId>xml-apis</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.accumulo</groupId>
<artifactId>accumulo-core</artifactId>
<scope>compile</scope>
</dependency>
<!-- Needed for org.slf4j imports -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<scope>compile</scope>
</dependency>
<!-- RuleConfigDocument -->
<dependency>
<groupId>xerces</groupId>
<artifactId>xercesImpl</artifactId>
<version>${version.xerces}</version>
<scope>compile</scope>
</dependency>
<!-- RuleConfigDocument -->
<dependency>
<groupId>xml-apis</groupId>
<artifactId>xml-apis</artifactId>
<version>1.4.01</version>
<scope>compile</scope>
</dependency>
<!-- Test jar for TestFilter, etc. -->
<dependency>
<groupId>gov.nsa.datawave</groupId>
<artifactId>datawave-age-off</artifactId>
<version>${project.version}</version>
<classifier>tests</classifier>
<scope>test</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package datawave.age.off.util;

import java.text.MessageFormat;
import java.util.Arrays;

public class AgeOffCsvColumnInformation {

int patternColumnNumber = -1;
int durationColumnNumber = -1;
int labelColumnNumber = -1;
int overrideColumnNumber = -1;

// required
private static final String PATTERN_COLUMN_HEADER = "pattern";
// required
private static final String DURATION_COLUMN_HEADER = "duration";
// optional
private static final String LABEL_COLUMN_NUMBER = "label";
// optional - conditionally override duration
private static final String DURATION_OVERRIDE_COLUMN_HEADER = "override";

public void parseHeader(String[] headerTokens) {
int columnNumber = 0;
for (String headerToken : headerTokens) {
switch (headerToken.trim().toLowerCase()) {
case DURATION_COLUMN_HEADER:
this.durationColumnNumber = columnNumber;
break;
case LABEL_COLUMN_NUMBER:
this.labelColumnNumber = columnNumber;
break;
case PATTERN_COLUMN_HEADER:
this.patternColumnNumber = columnNumber;
break;
case DURATION_OVERRIDE_COLUMN_HEADER:
this.overrideColumnNumber = columnNumber;
break;
}
columnNumber++;
}
if (this.durationColumnNumber == -1 || this.patternColumnNumber == -1) {
throw new IllegalStateException(MessageFormat.format("Unable to find {0} or {1} in {2}", DURATION_COLUMN_HEADER, PATTERN_COLUMN_HEADER,
Arrays.toString(headerTokens)));
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
package datawave.age.off.util;

import java.io.IOException;
import java.io.Writer;
import java.util.Arrays;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.annotations.VisibleForTesting;

/**
* Reformats csv input into an age off match pattern. Expects a header to appear as the first line that's not a comment or whitespace-only. See
* ConfigurableAgeOffFilter.
*/
public class AgeOffCsvToMatchPatternFormatter {
private static final Logger log = LoggerFactory.getLogger(AgeOffCsvToMatchPatternFormatter.class);

private static final String COMMA = ",";
private static final char COLON = ':';
private static final char EQUALS = '=';
private static final char NEW_LINE = '\n';
private static final char SPACE = ' ';
private final AgeOffCsvToMatchPatternFormatterConfiguration configuration;
private AgeOffCsvColumnInformation columnInformation;

public AgeOffCsvToMatchPatternFormatter(AgeOffCsvToMatchPatternFormatterConfiguration configuration) {
this.configuration = configuration;
}

/**
* Reformats each input line and outputs to writer
*
* @param writer
* output writer
* @throws IOException
* i/o exception with writer
*/
@VisibleForTesting
void write(Writer writer) throws IOException {
while (configuration.getInputIterator().hasNext()) {
String inputLine = configuration.getInputIterator().next();
reformat(writer, inputLine);
}
}

private void reformat(Writer writer, String inputLine) throws IOException {
String trimmedLine = inputLine.trim();

if (isWhitespaceOnly(trimmedLine)) {
writer.write(inputLine + "\n");
} else if (isComment(trimmedLine)) {
writer.write(createComment(trimmedLine));
} else {
// Use -1 to prevent chopping of empty tokens
String[] tokens = inputLine.split(COMMA, -1);

if (columnInformation == null) {
log.debug("Attempting to parse header: {}", inputLine);
initializeHeader(tokens);
} else {
writer.write(reformatLine(tokens));
}
}
}

private boolean isWhitespaceOnly(String trimmedLine) {
return trimmedLine.equals("");
}

private void initializeHeader(String[] tokens) {
columnInformation = new AgeOffCsvColumnInformation();
columnInformation.parseHeader(tokens);
}

private boolean isComment(String trimmedLine) {
return trimmedLine.startsWith("#");
}

private String createComment(String trimmedLine) {
return "<!--" + trimmedLine.substring(1) + "-->\n";
}

private String reformatLine(String[] tokens) {
StringBuilder sb = new StringBuilder();

appendLabel(tokens, sb);

appendLiteral(tokens, sb);

appendEquivalenceSymbol(sb);

appendValue(tokens, sb);

sb.append(NEW_LINE);

return sb.toString();
}

private void appendValue(String[] tokens, StringBuilder sb) {
String value = "";

// use override value if it exists for this line (it might be empty)
if (configuration.useOverrides()) {
if (tokens.length <= columnInformation.overrideColumnNumber) {
log.error("Unable to process override {}", Arrays.toString(tokens));
throw new IllegalStateException("Unable to process override from " + Arrays.toString(tokens));
}
value = tokens[columnInformation.overrideColumnNumber].trim();
}

// if overrides are disabled or override was missing
if (value.length() == 0) {
if (tokens.length <= columnInformation.durationColumnNumber) {
log.error("Unable to process duration {}", Arrays.toString(tokens));
throw new IllegalStateException("Unable to process duration from " + Arrays.toString(tokens));
}
value = tokens[columnInformation.durationColumnNumber].trim();
}

if (value.length() == 0) {
log.error("Unable to find non-empty override or duration {}", Arrays.toString(tokens));
throw new IllegalStateException("Unable to find non-empty override or duration from tokens: " + Arrays.toString(tokens));
}
sb.append(attemptValueMapping(value));
}

private String attemptValueMapping(String originalValue) {
if (null == configuration.getValueMapping()) {
return originalValue;
}

String replacementValue = configuration.getValueMapping().get(originalValue);
if (null == replacementValue) {
return originalValue;
}
return replacementValue;
}

private void appendLabel(String[] tokens, StringBuilder sb) {
if (configuration.shouldDisableLabel()) {
return;
}

if (tokens.length <= columnInformation.labelColumnNumber) {
log.error("Unable to process label {}", Arrays.toString(tokens));
throw new IllegalStateException("Unable to process label from " + Arrays.toString(tokens));
}

String label = "";

if (null != configuration.getStaticLabel()) {
label = configuration.getStaticLabel();
} else if (columnInformation.labelColumnNumber != -1) {
label = tokens[columnInformation.labelColumnNumber].trim();
}

if (label.length() == 0) {
log.error("Unable to apply non-empty label {}", Arrays.toString(tokens));
throw new IllegalStateException("Unable to apply non-empty label from " + Arrays.toString(tokens));
}
sb.append(label).append(SPACE);
}

private void appendLiteral(String[] tokens, StringBuilder sb) {
if (tokens.length <= columnInformation.patternColumnNumber) {
log.error("Unable to process literal {}", Arrays.toString(tokens));
throw new IllegalStateException("Not enough tokens");
}

if (configuration.shouldQuoteLiteral()) {
sb.append(configuration.getQuoteCharacter());
}

String literal = tokens[columnInformation.patternColumnNumber].trim();
if (literal.length() == 0) {
log.error("Unable to find non-empty literal {}", Arrays.toString(tokens));
throw new IllegalStateException("Unable to find non-empty literal from tokens: " + Arrays.toString(tokens));
}

if (configuration.shouldUpperCaseLiterals()) {
literal = literal.toUpperCase();
} else if (configuration.shouldLowerCaseLiterals()) {
literal = literal.toLowerCase();
}
sb.append(literal);

if (configuration.shouldQuoteLiteral()) {
sb.append(configuration.getQuoteCharacter());
}
}

private void appendEquivalenceSymbol(StringBuilder sb) {
if (configuration.shouldPadEquivalence()) {
sb.append(SPACE);
}

sb.append(configuration.useColons() ? COLON : EQUALS);

if (configuration.shouldPadEquivalence()) {
sb.append(SPACE);
}
}
}
Loading

0 comments on commit 7361007

Please sign in to comment.