-
Notifications
You must be signed in to change notification settings - Fork 237
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Age off modulation and new formatting utility (#2326)
* Extracted code and resources only related to age-off to their own module Removed unneeded dependencies and transitive dependencies from poms, added scope and comment for each * Added CSV to Age off xml configuration file utilities * Corrected compiler warnings and code inspection findings * Use pom version variables when reused * Correct javadoc formatting and remove star exclusions * Began changes from peer review * Remove XMLStreamWriter implementation * MR feedback * MR feedback * MR feedback - removed Reader * MR feedback: improved exception message * MR feedback: survival food * MR feedback: remove the remove the redundancy * Updated poms based on dependency:analyze feedback * Use Document Transformer to create XML * Update pom versions in new modules post-rebase * Remove typo from pom version * Update pom versions in new modules post-rebase --------- Co-authored-by: Matthew Peterson <[email protected]> Co-authored-by: hgklohr <[email protected]>
- Loading branch information
1 parent
b7d257f
commit 7361007
Showing
80 changed files
with
3,163 additions
and
292 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
<modelVersion>4.0.0</modelVersion> | ||
<parent> | ||
<groupId>gov.nsa.datawave</groupId> | ||
<artifactId>datawave-warehouse-parent</artifactId> | ||
<version>6.12.0-SNAPSHOT</version> | ||
</parent> | ||
<artifactId>datawave-age-off-utils</artifactId> | ||
<name>${project.artifactId}</name> | ||
<dependencies> | ||
<!-- Needed for VisibleForTesting --> | ||
<dependency> | ||
<groupId>com.google.guava</groupId> | ||
<artifactId>guava</artifactId> | ||
<scope>compile</scope> | ||
</dependency> | ||
<!-- For AppliedRule, etc. --> | ||
<dependency> | ||
<groupId>gov.nsa.datawave</groupId> | ||
<artifactId>datawave-age-off</artifactId> | ||
<version>${project.version}</version> | ||
<scope>compile</scope> | ||
<exclusions> | ||
<exclusion> | ||
<groupId>xml-apis</groupId> | ||
<artifactId>xml-apis</artifactId> | ||
</exclusion> | ||
</exclusions> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.apache.accumulo</groupId> | ||
<artifactId>accumulo-core</artifactId> | ||
<scope>compile</scope> | ||
</dependency> | ||
<!-- Needed for org.slf4j imports --> | ||
<dependency> | ||
<groupId>org.slf4j</groupId> | ||
<artifactId>slf4j-api</artifactId> | ||
<scope>compile</scope> | ||
</dependency> | ||
<!-- RuleConfigDocument --> | ||
<dependency> | ||
<groupId>xerces</groupId> | ||
<artifactId>xercesImpl</artifactId> | ||
<version>${version.xerces}</version> | ||
<scope>compile</scope> | ||
</dependency> | ||
<!-- RuleConfigDocument --> | ||
<dependency> | ||
<groupId>xml-apis</groupId> | ||
<artifactId>xml-apis</artifactId> | ||
<version>1.4.01</version> | ||
<scope>compile</scope> | ||
</dependency> | ||
<!-- Test jar for TestFilter, etc. --> | ||
<dependency> | ||
<groupId>gov.nsa.datawave</groupId> | ||
<artifactId>datawave-age-off</artifactId> | ||
<version>${project.version}</version> | ||
<classifier>tests</classifier> | ||
<scope>test</scope> | ||
</dependency> | ||
<dependency> | ||
<groupId>junit</groupId> | ||
<artifactId>junit</artifactId> | ||
<scope>test</scope> | ||
</dependency> | ||
</dependencies> | ||
</project> |
46 changes: 46 additions & 0 deletions
46
warehouse/age-off-utils/src/main/java/datawave/age/off/util/AgeOffCsvColumnInformation.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
package datawave.age.off.util; | ||
|
||
import java.text.MessageFormat; | ||
import java.util.Arrays; | ||
|
||
public class AgeOffCsvColumnInformation { | ||
|
||
int patternColumnNumber = -1; | ||
int durationColumnNumber = -1; | ||
int labelColumnNumber = -1; | ||
int overrideColumnNumber = -1; | ||
|
||
// required | ||
private static final String PATTERN_COLUMN_HEADER = "pattern"; | ||
// required | ||
private static final String DURATION_COLUMN_HEADER = "duration"; | ||
// optional | ||
private static final String LABEL_COLUMN_NUMBER = "label"; | ||
// optional - conditionally override duration | ||
private static final String DURATION_OVERRIDE_COLUMN_HEADER = "override"; | ||
|
||
public void parseHeader(String[] headerTokens) { | ||
int columnNumber = 0; | ||
for (String headerToken : headerTokens) { | ||
switch (headerToken.trim().toLowerCase()) { | ||
case DURATION_COLUMN_HEADER: | ||
this.durationColumnNumber = columnNumber; | ||
break; | ||
case LABEL_COLUMN_NUMBER: | ||
this.labelColumnNumber = columnNumber; | ||
break; | ||
case PATTERN_COLUMN_HEADER: | ||
this.patternColumnNumber = columnNumber; | ||
break; | ||
case DURATION_OVERRIDE_COLUMN_HEADER: | ||
this.overrideColumnNumber = columnNumber; | ||
break; | ||
} | ||
columnNumber++; | ||
} | ||
if (this.durationColumnNumber == -1 || this.patternColumnNumber == -1) { | ||
throw new IllegalStateException(MessageFormat.format("Unable to find {0} or {1} in {2}", DURATION_COLUMN_HEADER, PATTERN_COLUMN_HEADER, | ||
Arrays.toString(headerTokens))); | ||
} | ||
} | ||
} |
204 changes: 204 additions & 0 deletions
204
...e/age-off-utils/src/main/java/datawave/age/off/util/AgeOffCsvToMatchPatternFormatter.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,204 @@ | ||
package datawave.age.off.util; | ||
|
||
import java.io.IOException; | ||
import java.io.Writer; | ||
import java.util.Arrays; | ||
|
||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
import com.google.common.annotations.VisibleForTesting; | ||
|
||
/** | ||
* Reformats csv input into an age off match pattern. Expects a header to appear as the first line that's not a comment or whitespace-only. See | ||
* ConfigurableAgeOffFilter. | ||
*/ | ||
public class AgeOffCsvToMatchPatternFormatter { | ||
private static final Logger log = LoggerFactory.getLogger(AgeOffCsvToMatchPatternFormatter.class); | ||
|
||
private static final String COMMA = ","; | ||
private static final char COLON = ':'; | ||
private static final char EQUALS = '='; | ||
private static final char NEW_LINE = '\n'; | ||
private static final char SPACE = ' '; | ||
private final AgeOffCsvToMatchPatternFormatterConfiguration configuration; | ||
private AgeOffCsvColumnInformation columnInformation; | ||
|
||
public AgeOffCsvToMatchPatternFormatter(AgeOffCsvToMatchPatternFormatterConfiguration configuration) { | ||
this.configuration = configuration; | ||
} | ||
|
||
/** | ||
* Reformats each input line and outputs to writer | ||
* | ||
* @param writer | ||
* output writer | ||
* @throws IOException | ||
* i/o exception with writer | ||
*/ | ||
@VisibleForTesting | ||
void write(Writer writer) throws IOException { | ||
while (configuration.getInputIterator().hasNext()) { | ||
String inputLine = configuration.getInputIterator().next(); | ||
reformat(writer, inputLine); | ||
} | ||
} | ||
|
||
private void reformat(Writer writer, String inputLine) throws IOException { | ||
String trimmedLine = inputLine.trim(); | ||
|
||
if (isWhitespaceOnly(trimmedLine)) { | ||
writer.write(inputLine + "\n"); | ||
} else if (isComment(trimmedLine)) { | ||
writer.write(createComment(trimmedLine)); | ||
} else { | ||
// Use -1 to prevent chopping of empty tokens | ||
String[] tokens = inputLine.split(COMMA, -1); | ||
|
||
if (columnInformation == null) { | ||
log.debug("Attempting to parse header: {}", inputLine); | ||
initializeHeader(tokens); | ||
} else { | ||
writer.write(reformatLine(tokens)); | ||
} | ||
} | ||
} | ||
|
||
private boolean isWhitespaceOnly(String trimmedLine) { | ||
return trimmedLine.equals(""); | ||
} | ||
|
||
private void initializeHeader(String[] tokens) { | ||
columnInformation = new AgeOffCsvColumnInformation(); | ||
columnInformation.parseHeader(tokens); | ||
} | ||
|
||
private boolean isComment(String trimmedLine) { | ||
return trimmedLine.startsWith("#"); | ||
} | ||
|
||
private String createComment(String trimmedLine) { | ||
return "<!--" + trimmedLine.substring(1) + "-->\n"; | ||
} | ||
|
||
private String reformatLine(String[] tokens) { | ||
StringBuilder sb = new StringBuilder(); | ||
|
||
appendLabel(tokens, sb); | ||
|
||
appendLiteral(tokens, sb); | ||
|
||
appendEquivalenceSymbol(sb); | ||
|
||
appendValue(tokens, sb); | ||
|
||
sb.append(NEW_LINE); | ||
|
||
return sb.toString(); | ||
} | ||
|
||
private void appendValue(String[] tokens, StringBuilder sb) { | ||
String value = ""; | ||
|
||
// use override value if it exists for this line (it might be empty) | ||
if (configuration.useOverrides()) { | ||
if (tokens.length <= columnInformation.overrideColumnNumber) { | ||
log.error("Unable to process override {}", Arrays.toString(tokens)); | ||
throw new IllegalStateException("Unable to process override from " + Arrays.toString(tokens)); | ||
} | ||
value = tokens[columnInformation.overrideColumnNumber].trim(); | ||
} | ||
|
||
// if overrides are disabled or override was missing | ||
if (value.length() == 0) { | ||
if (tokens.length <= columnInformation.durationColumnNumber) { | ||
log.error("Unable to process duration {}", Arrays.toString(tokens)); | ||
throw new IllegalStateException("Unable to process duration from " + Arrays.toString(tokens)); | ||
} | ||
value = tokens[columnInformation.durationColumnNumber].trim(); | ||
} | ||
|
||
if (value.length() == 0) { | ||
log.error("Unable to find non-empty override or duration {}", Arrays.toString(tokens)); | ||
throw new IllegalStateException("Unable to find non-empty override or duration from tokens: " + Arrays.toString(tokens)); | ||
} | ||
sb.append(attemptValueMapping(value)); | ||
} | ||
|
||
private String attemptValueMapping(String originalValue) { | ||
if (null == configuration.getValueMapping()) { | ||
return originalValue; | ||
} | ||
|
||
String replacementValue = configuration.getValueMapping().get(originalValue); | ||
if (null == replacementValue) { | ||
return originalValue; | ||
} | ||
return replacementValue; | ||
} | ||
|
||
private void appendLabel(String[] tokens, StringBuilder sb) { | ||
if (configuration.shouldDisableLabel()) { | ||
return; | ||
} | ||
|
||
if (tokens.length <= columnInformation.labelColumnNumber) { | ||
log.error("Unable to process label {}", Arrays.toString(tokens)); | ||
throw new IllegalStateException("Unable to process label from " + Arrays.toString(tokens)); | ||
} | ||
|
||
String label = ""; | ||
|
||
if (null != configuration.getStaticLabel()) { | ||
label = configuration.getStaticLabel(); | ||
} else if (columnInformation.labelColumnNumber != -1) { | ||
label = tokens[columnInformation.labelColumnNumber].trim(); | ||
} | ||
|
||
if (label.length() == 0) { | ||
log.error("Unable to apply non-empty label {}", Arrays.toString(tokens)); | ||
throw new IllegalStateException("Unable to apply non-empty label from " + Arrays.toString(tokens)); | ||
} | ||
sb.append(label).append(SPACE); | ||
} | ||
|
||
private void appendLiteral(String[] tokens, StringBuilder sb) { | ||
if (tokens.length <= columnInformation.patternColumnNumber) { | ||
log.error("Unable to process literal {}", Arrays.toString(tokens)); | ||
throw new IllegalStateException("Not enough tokens"); | ||
} | ||
|
||
if (configuration.shouldQuoteLiteral()) { | ||
sb.append(configuration.getQuoteCharacter()); | ||
} | ||
|
||
String literal = tokens[columnInformation.patternColumnNumber].trim(); | ||
if (literal.length() == 0) { | ||
log.error("Unable to find non-empty literal {}", Arrays.toString(tokens)); | ||
throw new IllegalStateException("Unable to find non-empty literal from tokens: " + Arrays.toString(tokens)); | ||
} | ||
|
||
if (configuration.shouldUpperCaseLiterals()) { | ||
literal = literal.toUpperCase(); | ||
} else if (configuration.shouldLowerCaseLiterals()) { | ||
literal = literal.toLowerCase(); | ||
} | ||
sb.append(literal); | ||
|
||
if (configuration.shouldQuoteLiteral()) { | ||
sb.append(configuration.getQuoteCharacter()); | ||
} | ||
} | ||
|
||
private void appendEquivalenceSymbol(StringBuilder sb) { | ||
if (configuration.shouldPadEquivalence()) { | ||
sb.append(SPACE); | ||
} | ||
|
||
sb.append(configuration.useColons() ? COLON : EQUALS); | ||
|
||
if (configuration.shouldPadEquivalence()) { | ||
sb.append(SPACE); | ||
} | ||
} | ||
} |
Oops, something went wrong.