Skip to content

Commit

Permalink
Run accumulo checks at WARN, and fix some easier spots (#2389)
Browse files Browse the repository at this point in the history
* run accumulo import checks

* Clean up some of the references to accumulo code

* Adding a few more easy ones

* Update missed reference to accumulo internal code

* Add root location prefix to fix sub-dir builds

Resolves the proper base directory for the import-control file so builds
from sub-directories will still work.

---------

Co-authored-by: Daniel Roberts <[email protected]>
  • Loading branch information
alerman and ddanielr committed May 28, 2024
1 parent 59be85b commit d51c4e9
Show file tree
Hide file tree
Showing 11 changed files with 67 additions and 27 deletions.
13 changes: 7 additions & 6 deletions checkstyle.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
<!DOCTYPE module PUBLIC "-//Puppy Crawl//DTD Check Configuration 1.3//EN" "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
<module name="Checker">
<property name="charset" value="UTF-8"/>
<property name="severity" value="info"/>
<property name="basedir" value="${basedir}"/>
<module name="TreeWalker">
<!--check that only Accumulo 1.9 public APIs are imported-->
<module name="RegexpSinglelineJava">
<!-- <property name="format" value="import\s+org\.apache\.accumulo\.(.*\.(impl|thrift|crypto)\..*|(?!core|minicluster|testing).*|core\.(?!client|conf|data|security|spi|iterators).*)"/>-->
<property name="format" value="import\s+org\.apache\.accumulo\.(?!(core\.(client|data|iterators|security)|minicluster|hadoop)\.).*" />
<property name="ignoreComments" value="true" />
<property name="message" value="Accumulo non-public classes imported" />
<!--check that only Accumulo 2.1.x public APIs are imported-->
<module name="ImportControl">
<!--Allows per-package enforcement for importing Accumulo 2.1.x public APIs -->
<property name="file" value="${basedir}/import-control-accumulo.xml"/>
<property name="severity" value="warning"/>
</module>
</module>
</module>
28 changes: 28 additions & 0 deletions import-control-accumulo.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<!DOCTYPE import-control PUBLIC
"-//Checkstyle//DTD ImportControl Configuration 1.4//EN"
"https://checkstyle.org/dtds/import_control_1_4.dtd">

<!-- This checkstyle rule is configured to ensure only use of Accumulo API -->
<import-control
pkg="(|
|datawave.*|
|datawave\.common\.test.*|
|datawave\.test.*|
|gov.nsa.datawave.*)" regex="true" strategyOnMismatch="allowed">
<!-- API packages -->
<allow pkg="org.apache.accumulo.core.client"/>
<allow pkg="org.apache.accumulo.core.data"/>
<allow pkg="org.apache.accumulo.core.security"/>
<allow pkg="org.apache.accumulo.core.iterators"/>
<allow pkg="org.apache.accumulo.minicluster"/>
<allow pkg="org.apache.accumulo.hadoop.mapreduce"/>
<allow pkg="org.apache.accumulo.core.spi"/>

<!-- Temporarily allow some specific classes until accumulo
team seperates parts of that into public api -->
<allow class="org.apache.accumulo.core.conf.Property"/>
<allow class="org.apache.accumulo.core.clientImpl.TabletLocator"/>

<!-- disallow everything else coming from accumulo -->
<disallow pkg="org.apache.accumulo"/>
</import-control>
4 changes: 3 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1403,8 +1403,10 @@
<version>3.1.0</version>
<configuration>
<configLocation>checkstyle.xml</configLocation>
<failsOnError>false</failsOnError>
<failsOnError>true</failsOnError>
<failOnViolation>false</failOnViolation>
<consoleOutput>true</consoleOutput>
<propertyExpansion>basedir=${datawave.root}</propertyExpansion>
</configuration>
<dependencies>
<dependency>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
import java.util.Set;
import java.util.StringTokenizer;
import java.util.UUID;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;

import org.apache.accumulo.core.client.Accumulo;
import org.apache.accumulo.core.client.AccumuloClient;
Expand Down Expand Up @@ -54,8 +56,6 @@
import org.apache.accumulo.core.security.TablePermission;
import org.apache.accumulo.core.singletons.SingletonReservation;
import org.apache.accumulo.core.util.Pair;
import org.apache.accumulo.core.util.TextUtil;
import org.apache.accumulo.core.util.UtilWaitThread;
import org.apache.accumulo.core.util.format.DefaultFormatter;
import org.apache.accumulo.core.util.threads.Threads;
import org.apache.commons.codec.binary.Base64;
Expand Down Expand Up @@ -89,6 +89,7 @@
import datawave.mr.bulk.split.LocationStrategy;
import datawave.mr.bulk.split.RangeSplit;
import datawave.mr.bulk.split.SplitStrategy;
import datawave.util.TextUtil;

public class BulkInputFormat extends InputFormat<Key,Value> {

Expand Down Expand Up @@ -1115,7 +1116,7 @@ public List<InputSplit> getSplits(JobContext job) throws IOException {
binnedRanges = binOfflineTable(job, tableName, ranges);
while (binnedRanges == null) {
// Some tablets were still online, try again
UtilWaitThread.sleep(100L + (int) (Math.random() * 100)); // sleep randomly between 100 and 200 ms
TimeUnit.MILLISECONDS.sleep(ThreadLocalRandom.current().nextInt(100, 200));
binnedRanges = binOfflineTable(job, tableName, ranges);
}
} else {
Expand All @@ -1138,7 +1139,7 @@ public List<InputSplit> getSplits(JobContext job) throws IOException {
}
binnedRanges.clear();
log.warn("Unable to locate bins for specified ranges. Retrying.");
UtilWaitThread.sleep(100 + (int) (Math.random() * 100)); // sleep randomly between 100 and 200 ms
TimeUnit.MILLISECONDS.sleep(ThreadLocalRandom.current().nextInt(100, 200));
tl.invalidateCache();
}

Expand Down
9 changes: 9 additions & 0 deletions warehouse/core/src/main/java/datawave/util/TextUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -107,4 +107,13 @@ public static String fromUtf8(byte[] bytes) {
throw new IllegalArgumentException(e);
}
}

public static byte[] getBytes(Text text) {
byte[] bytes = text.getBytes();
if (bytes.length != text.getLength()) {
bytes = new byte[text.getLength()];
System.arraycopy(text.getBytes(), 0, bytes, 0, bytes.length);
}
return bytes;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.hadoop.mapreduce.AccumuloOutputFormat;
import org.apache.accumulo.hadoop.mapreduce.OutputFormatBuilder;
import org.apache.accumulo.hadoopImpl.mapreduce.OutputFormatBuilderImpl;
import org.apache.accumulo.hadoopImpl.mapreduce.lib.OutputConfigurator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,10 @@
import java.util.Observer;
import java.util.Set;

import org.apache.accumulo.core.Constants;
import org.apache.accumulo.core.client.Accumulo;
import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.client.TableExistsException;
import org.apache.accumulo.core.client.TableNotFoundException;
import org.apache.accumulo.core.client.security.tokens.PasswordToken;
import org.apache.accumulo.core.data.ColumnUpdate;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.KeyValue;
Expand Down Expand Up @@ -1582,6 +1579,8 @@ public static void verboseCounters(TaskInputOutputContext context, String locati
key.getKey().getColumnQualifier().getBytes(), key.getKey().getColumnVisibility(), value.get());
}

public final static int MAX_DATA_TO_PRINT = 64;

/**
* Output a verbose counter
*
Expand All @@ -1606,9 +1605,8 @@ public static void verboseCounters(TaskInputOutputContext context, String locati
public static void verboseCounter(TaskInputOutputContext context, String location, Text tableName, byte[] row, byte[] colFamily, byte[] colQualifier,
Text colVis, byte[] val) {
String labelString = new ColumnVisibility(colVis).toString();
String s = Key.toPrintableString(row, 0, row.length, Constants.MAX_DATA_TO_PRINT) + " "
+ Key.toPrintableString(colFamily, 0, colFamily.length, Constants.MAX_DATA_TO_PRINT) + ":"
+ Key.toPrintableString(colQualifier, 0, colQualifier.length, Constants.MAX_DATA_TO_PRINT) + " " + labelString + " "
String s = Key.toPrintableString(row, 0, row.length, MAX_DATA_TO_PRINT) + " " + Key.toPrintableString(colFamily, 0, colFamily.length, MAX_DATA_TO_PRINT)
+ ":" + Key.toPrintableString(colQualifier, 0, colQualifier.length, MAX_DATA_TO_PRINT) + " " + labelString + " "
+ (val == null ? "null" : String.valueOf(val.length) + " value bytes");

s = s.replace('\n', ' ');
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.hadoop.mapreduce.AccumuloInputFormat;
import org.apache.accumulo.hadoopImpl.mapreduce.lib.InputConfigurator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.TimeUnit;

import org.apache.accumulo.core.client.AccumuloClient;
import org.apache.accumulo.core.client.AccumuloException;
Expand All @@ -19,7 +20,6 @@
import org.apache.accumulo.core.client.admin.TableOperations;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.TabletId;
import org.apache.accumulo.core.util.UtilWaitThread;
import org.apache.commons.lang.time.DateUtils;
import org.apache.commons.lang3.mutable.MutableInt;
import org.apache.hadoop.conf.Configuration;
Expand Down Expand Up @@ -355,7 +355,12 @@ public static Map<Text,String> getLocations(Logger log, AccumuloHelper accumuloH
keepRetrying = false;
} catch (Exception e) {
log.warn(e.getClass().getName() + ":" + e.getMessage() + " ... retrying ...", e);
UtilWaitThread.sleep(3000);
try {
TimeUnit.MILLISECONDS.sleep(3000);
} catch (InterruptedException ex) {
log.error(e.getMessage(), ex);
}

splitToLocation.clear();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.accumulo.core.Constants;
import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.client.ClientConfiguration;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import org.apache.accumulo.core.iterators.IteratorEnvironment;
import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
import org.apache.accumulo.core.iterators.WrappingIterator;
import org.apache.accumulo.core.iteratorsImpl.system.IterationInterruptedException;
import org.apache.commons.pool.impl.GenericObjectPool;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
Expand Down Expand Up @@ -812,7 +811,7 @@ else if (key.compareTo(this.lastRangeSeeked.getStartKey()) < 0) {

if (this.setControl.isCancelledQuery()) {
log.debug("Ivarator query was cancelled");
throw new IterationInterruptedException("Ivarator query was cancelled");
throw new RuntimeException("Ivarator query was cancelled");
}

// if we have any persisted data or we have scanned a significant number of keys, then persist it completely
Expand All @@ -831,7 +830,7 @@ else if (key.compareTo(this.lastRangeSeeked.getStartKey()) < 0) {
throw new IvaratorException("Ivarator query timed out");
} else {
log.debug("Ivarator query was cancelled");
throw new IterationInterruptedException("Ivarator query was cancelled");
throw new RuntimeException("Ivarator query was cancelled");
}
}

Expand Down Expand Up @@ -1028,7 +1027,7 @@ protected SortedKeyValueIterator<Key,Value> takePoolSource() {
try {
source = ivaratorSourcePool.borrowObject();
} catch (Exception e) {
throw new IterationInterruptedException("Unable to borrow object from ivarator source pool. " + e.getMessage());
throw new RuntimeException("Unable to borrow object from ivarator source pool. " + e.getMessage());
}
return source;
}
Expand All @@ -1043,7 +1042,7 @@ protected void returnPoolSource(SortedKeyValueIterator<Key,Value> source) {
try {
ivaratorSourcePool.returnObject(source);
} catch (Exception e) {
throw new IterationInterruptedException("Unable to return object to ivarator source pool. " + e.getMessage());
throw new RuntimeException("Unable to return object to ivarator source pool. " + e.getMessage());
}
}

Expand Down

0 comments on commit d51c4e9

Please sign in to comment.