Skip to content

Commit 674fb29

Browse files
alermankeith-ratcliffeddanielr
authored
Bulk v2 accumulo 4894 rebased (#3092)
Co-authored-by: keith-ratcliffe <[email protected]> Co-authored-by: Daniel Roberts ddanielr <[email protected]>
1 parent 370d929 commit 674fb29

File tree

46 files changed

+1330
-160
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+1330
-160
lines changed

common-test/pom.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@
2525
<groupId>org.apache.zookeeper</groupId>
2626
<artifactId>zookeeper</artifactId>
2727
</exclusion>
28+
<exclusion>
29+
<groupId>org.apache.zookeeper</groupId>
30+
<artifactId>zookeeper-jute</artifactId>
31+
</exclusion>
2832
</exclusions>
2933
</dependency>
3034
<dependency>

contrib/datawave-quickstart/bin/common.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,8 @@ function verifyChecksum() {
9393
else
9494
error "------------------------------------------------------------------------"
9595
error "$(printRed "CHECKSUM MISMATCH") - Could not verify integrity of: ${tarballName}"
96+
error "Calculated: ${calculatedChecksum}"
97+
error "Expected: $3"
9698
error "------------------------------------------------------------------------"
9799
fatal "Checksum verification failed!"
98100
return 1
@@ -124,7 +126,6 @@ function downloadTarball() {
124126
fi
125127
}
126128

127-
# Needed for users who run locally instead of containers
128129
function downloadMavenTarball() {
129130
local pomFile="${DW_DATAWAVE_SOURCE_DIR:-$( cd "${DW_CLOUD_HOME}/../.." && pwd )}/pom.xml"
130131
local rootProject=":$1"

contrib/datawave-quickstart/bin/services/accumulo/install.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,8 @@ assertCreateDir "${DW_ACCUMULO_JVM_HEAPDUMP_DIR}" || exit 1
9797
sed -i'' -e "s~\(ACCUMULO_TSERVER_OPTS=\).*$~\1\"${DW_ACCUMULO_TSERVER_OPTS}\"~g" "${DW_ACCUMULO_CONF_DIR}/accumulo-env.sh"
9898
sed -i'' -e "s~\(export JAVA_HOME=\).*$~\1\"${JAVA_HOME}\"~g" "${DW_ACCUMULO_CONF_DIR}/accumulo-env.sh"
9999
sed -i'' -e "s~\(export ACCUMULO_MONITOR_OPTS=\).*$~\1\"\${POLICY} -Xmx2g -Xms512m\"~g" "${DW_ACCUMULO_CONF_DIR}/accumulo-env.sh"
100-
100+
echo 'JAVA_OPTS=('-Dcom.google.protobuf.use_unsafe_pre22_gencode' "${JAVA_OPTS[@]}")' >> "${DW_ACCUMULO_CONF_DIR}/accumulo-env.sh"
101+
cat "${DW_ACCUMULO_CONF_DIR}/accumulo-env.sh"
101102
# Update Accumulo bind host if it's not set to localhost
102103
if [ "${DW_ACCUMULO_BIND_HOST}" != "localhost" ] ; then
103104
sed -i'' -e "s/localhost/${DW_ACCUMULO_BIND_HOST}/g" ${DW_ACCUMULO_CONF_DIR}/cluster.yaml

contrib/datawave-quickstart/bin/services/datawave/bootstrap-ingest.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ DW_DATAWAVE_INGEST_HOME="${DW_CLOUD_HOME}/${DW_DATAWAVE_INGEST_SYMLINK}"
99
# ingest reducers. Set to 1 for standalone instance, but typically set to the first prime number that is less than the
1010
# number of available Accumulo tablet servers...
1111

12-
DW_DATAWAVE_INGEST_NUM_SHARDS=${DW_DATAWAVE_INGEST_NUM_SHARDS:-1}
12+
DW_DATAWAVE_INGEST_NUM_SHARDS=${DW_DATAWAVE_INGEST_NUM_SHARDS:-10}
1313

1414
# Ingest job logs, etc
1515

@@ -39,7 +39,7 @@ DW_DATAWAVE_INGEST_FLAGFILE_DIR="${DW_DATAWAVE_DATA_DIR}/flags"
3939

4040
# Comma-delimited list of configs for the FlagMaker process(es)
4141

42-
DW_DATAWAVE_INGEST_FLAGMAKER_CONFIGS=${DW_DATAWAVE_INGEST_FLAGMAKER_CONFIGS:-"${DW_DATAWAVE_INGEST_CONFIG_HOME}/flag-maker-live.xml"}
42+
DW_DATAWAVE_INGEST_FLAGMAKER_CONFIGS=${DW_DATAWAVE_INGEST_FLAGMAKER_CONFIGS:-"${DW_DATAWAVE_INGEST_CONFIG_HOME}/flag-maker-live.xml,${DW_DATAWAVE_INGEST_CONFIG_HOME}/flag-maker-bulk.xml"}
4343

4444
# Dir for ingest-related 'pid' files
4545

contrib/datawave-quickstart/bin/services/datawave/install-ingest.sh

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ if ! hadoopIsRunning ; then
4343
hadoopStart
4444
fi
4545

46-
# Create any Hadoop directories related to Datawave Ingest
46+
# Create any Hadoop directories needed for live (and optionally bulk) raw input
4747
if [[ -n "${DW_DATAWAVE_INGEST_LIVE_DATA_TYPES}" ]] ; then
4848

4949
OLD_IFS="${IFS}"
@@ -56,6 +56,20 @@ if [[ -n "${DW_DATAWAVE_INGEST_LIVE_DATA_TYPES}" ]] ; then
5656
done
5757
fi
5858

59+
# Create any Hadoop directories needed for bulk ingest input
60+
if [[ -n "${DW_DATAWAVE_INGEST_BULK_DATA_TYPES}" ]] ; then
61+
62+
OLD_IFS="${IFS}"
63+
IFS=","
64+
HDFS_RAW_INPUT_DIRS=( ${DW_DATAWAVE_INGEST_BULK_DATA_TYPES} )
65+
IFS="${OLD_IFS}"
66+
67+
for dir in "${HDFS_RAW_INPUT_DIRS[@]}" ; do
68+
# Dirs created here should be configured in your bulk flag maker config (e.g., in config/flag-maker-bulk.xml)
69+
hdfs dfs -mkdir -p "${DW_DATAWAVE_INGEST_HDFS_BASEDIR}/${dir}-bulk" || fatal "Failed to create HDFS directory: ${dir}-bulk"
70+
done
71+
fi
72+
5973
#----------------------------------------------------------
6074
# Configure/update Accumulo classpath, set auths, etc
6175
#----------------------------------------------------------

contrib/datawave-quickstart/docker/pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
<dist.wildfly>wildfly-${version.quickstart.wildfly}.tar.gz</dist.wildfly>
1919
<dist.zookeeper>zookeeper-${version.quickstart.zookeeper}.tar.gz</dist.zookeeper>
2020

21-
<sha512.checksum.accumulo>1a27a144dc31f55ccc8e081b6c1bc6cc0362a8391838c53c166cb45291ff8f35867fd8e4729aa7b2c540f8b721f8c6953281bf589fc7fe320e4dc4d20b87abc4</sha512.checksum.accumulo>
21+
<sha512.checksum.accumulo>3a5e9ade2c84d4f8e0cb0551a9f6ea74a5cc2611afa141f4685f26431132c4cc60daeeedf22ab27c961ed7cd2df8b687e9fcaf00280093743e5c576fcdb53a52</sha512.checksum.accumulo>
2222
<sha512.checksum.hadoop>09cda6943625bc8e4307deca7a4df76d676a51aca1b9a0171938b793521dfe1ab5970fdb9a490bab34c12a2230ffdaed2992bad16458169ac51b281be1ab6741</sha512.checksum.hadoop>
2323
<sha512.checksum.wildfly>fcbdff4bc275f478c3bf5f665a83e62468a920e58fcddeaa2710272dd0f1ce3154cdc371d5011763a6be24ae1a5e0bca0218cceea63543edb4b5cf22de60b485</sha512.checksum.wildfly>
2424
<sha512.checksum.zookeeper>4d85d6f7644d5f36d9c4d65e78bd662ab35ebe1380d762c24c12b98af029027eee453437c9245dbdf2b9beb77cd6b690b69e26f91cf9d11b0a183a979c73fa43</sha512.checksum.zookeeper>
@@ -30,7 +30,7 @@
3030
<url.wildfly>file://${DW_CLOUD_PLUGINS}/datawave/${dist.wildfly}</url.wildfly>
3131
<url.zookeeper>file://${DW_CLOUD_PLUGINS}/accumulo/${dist.zookeeper}</url.zookeeper>
3232

33-
<version.quickstart.accumulo>2.1.3</version.quickstart.accumulo>
33+
<version.quickstart.accumulo>2.1.4-5792fed3-bulkv2</version.quickstart.accumulo>
3434
<version.quickstart.hadoop>${version.hadoop}</version.quickstart.hadoop>
3535
<version.quickstart.wildfly>${version.wildfly}</version.quickstart.wildfly>
3636
<version.quickstart.zookeeper>${version.zookeeper}</version.quickstart.zookeeper>

core/in-memory-accumulo/pom.xml

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
<maven.compiler.target>11</maven.compiler.target>
3232
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
3333
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
34-
<version.accumulo>2.1.1</version.accumulo>
34+
<version.accumulo>2.1.4-5792fed3-bulkv2</version.accumulo>
3535
<version.hadoop>3.3.4</version.hadoop>
3636
<version.junit5>5.12.0</version.junit5>
3737
<version.log4j.bom>2.17.2</version.log4j.bom>
@@ -106,6 +106,18 @@
106106
<artifactId>mockito-core</artifactId>
107107
</dependency>
108108
</dependencies>
109+
<repositories>
110+
<repository>
111+
<releases>
112+
<enabled>true</enabled>
113+
</releases>
114+
<snapshots>
115+
<enabled>false</enabled>
116+
</snapshots>
117+
<id>github-datawave</id>
118+
<url>https://maven.pkg.github.com/NationalSecurityAgency/datawave</url>
119+
</repository>
120+
</repositories>
109121
<pluginRepositories>
110122
<pluginRepository>
111123
<releases>

core/in-memory-accumulo/src/main/java/datawave/accumulo/inmemory/InMemoryInstanceOperations.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,11 @@ public List<String> getManagerLocations() {
7979
return null;
8080
}
8181

82+
@Override
83+
public Set<String> getCompactors() {
84+
return Set.of();
85+
}
86+
8287
@Override
8388
public List<String> getTabletServers() {
8489
return new ArrayList<>();

core/in-memory-accumulo/src/main/java/datawave/accumulo/inmemory/InMemoryScannerBase.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import org.apache.accumulo.core.data.ByteSequence;
3434
import org.apache.accumulo.core.data.Column;
3535
import org.apache.accumulo.core.data.Key;
36+
import org.apache.accumulo.core.data.TableId;
3637
import org.apache.accumulo.core.data.Value;
3738
import org.apache.accumulo.core.iterators.IteratorEnvironment;
3839
import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope;
@@ -45,6 +46,7 @@
4546
import org.apache.accumulo.core.iteratorsImpl.system.MultiIterator;
4647
import org.apache.accumulo.core.iteratorsImpl.system.VisibilityFilter;
4748
import org.apache.accumulo.core.security.Authorizations;
49+
import org.apache.accumulo.core.spi.common.ServiceEnvironment;
4850

4951
public class InMemoryScannerBase extends ScannerOptions {
5052

@@ -89,6 +91,11 @@ public PluginEnvironment getPluginEnv() {
8991
return MockPluginEnvironment.newInstance(getConfig());
9092
}
9193

94+
@Override
95+
public TableId getTableId() {
96+
return null;
97+
}
98+
9299
@Override
93100
public IteratorScope getIteratorScope() {
94101
return IteratorScope.scan;
@@ -103,6 +110,11 @@ public boolean isUserCompaction() {
103110
return false;
104111
}
105112

113+
@Override
114+
public ServiceEnvironment getServiceEnv() {
115+
return null;
116+
}
117+
106118
private ArrayList<SortedKeyValueIterator<Key,Value>> topLevelIterators = new ArrayList<>();
107119

108120
@Override

core/utils/accumulo-utils/pom.xml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,22 @@
5555
<dependency>
5656
<groupId>io.dropwizard.metrics</groupId>
5757
<artifactId>metrics-core</artifactId>
58+
<exclusions>
59+
<exclusion>
60+
<groupId>org.slf4j</groupId>
61+
<artifactId>slf4j-api</artifactId>
62+
</exclusion>
63+
</exclusions>
5864
</dependency>
5965
<dependency>
6066
<groupId>org.apache.accumulo</groupId>
6167
<artifactId>accumulo-core</artifactId>
68+
<exclusions>
69+
<exclusion>
70+
<groupId>org.slf4j</groupId>
71+
<artifactId>slf4j-api</artifactId>
72+
</exclusion>
73+
</exclusions>
6274
</dependency>
6375
<dependency>
6476
<groupId>org.apache.commons</groupId>
@@ -68,6 +80,11 @@
6880
<groupId>org.apache.zookeeper</groupId>
6981
<artifactId>zookeeper</artifactId>
7082
</dependency>
83+
<dependency>
84+
<groupId>org.slf4j</groupId>
85+
<artifactId>slf4j-api</artifactId>
86+
<version>${version.slf4j}</version>
87+
</dependency>
7188
<dependency>
7289
<groupId>org.springframework</groupId>
7390
<artifactId>spring-context-support</artifactId>

0 commit comments

Comments
 (0)