Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Develop version 2.2.1 #35

Merged
merged 9 commits into from
Jun 26, 2018
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
default: build dockerize
default: build dockerize-all

build:
mvn clean package -U -Dmaven.test.skip=true

dockerize: dockerize-controller dockerize-datagen dockerize-taskgen dockerize-evalmodule dockerize-virtuoso-baseline
dockerize-all: dockerize-controller dockerize-datagen dockerize-taskgen dockerize-evalmodule dockerize-virtuoso-baseline

dockerize-controller:
docker build -f docker/versioningbenchmarkcontroller.docker -t git.project-hobbit.eu:4567/papv/versioningbenchmarkcontroller:$(tag) .
Expand Down
72 changes: 7 additions & 65 deletions required_files/load_to_virtuoso.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@ VIRTUOSO_BIN=/usr/local/virtuoso-opensource/bin
GRAPH_NAME=http://graph.version.
DATASETS_PATH=/versioning/data
DATASETS_PATH_FINAL=/versioning/data/final
ONTOLOGIES_PATH=/versioning/ontologies
SERIALIZATION_FORMAT=$1
NUMBER_OF_VERSIONS=$2
NUMBER_OF_VERSIONS=$1
total_cores=$(cat /proc/cpuinfo | grep processor | wc -l)
rdf_loaders=$(awk "BEGIN {printf \"%d\", $total_cores/2.5}")

Expand All @@ -19,7 +17,7 @@ prll_rdf_loader_run() {
wait
$VIRTUOSO_BIN/isql-v 1111 dba dba exec="checkpoint;" > /dev/null

# if there are files that failed to be loaded reload them until the succeed
# if there are files that failed to be loaded reload them until they succeed
errors=$($VIRTUOSO_BIN/isql-v 1111 dba dba exec="select count(*) from load_list where ll_error is not null;" | sed -n 9p)
files=$($VIRTUOSO_BIN/isql-v 1111 dba dba exec="select ll_file from load_list where ll_error is not null;" | sed '1,8d' | head -n $errors)

Expand All @@ -42,69 +40,14 @@ prll_rdf_loader_run() {
echo "All data files loaded successfully"
}

# prepare cw data files for loading
# sort files
start_sort=$(($(date +%s%N)/1000000))
for f in $(find $DATASETS_PATH -name 'generatedCreativeWorks-*.nt'); do
sort "$f" -o "$f"
done
end_sort=$(($(date +%s%N)/1000000))
sorttime=$(($end_sort - $start_sort))
echo "Sorted generated Creative Works in $sorttime ms."

# copy and compute the addsets
start_prepare=$(($(date +%s%N)/1000000))
mkdir $DATASETS_PATH_FINAL
# prepare bulk loadmkdir $DATASETS_PATH_FINAL
for ((i=0; i<$NUMBER_OF_VERSIONS; i++)); do
echo "Constructing v$i..."
if [ "$i" = "0" ]; then
mkdir $DATASETS_PATH_FINAL/v$i
cp $DATASETS_PATH/v$i/generatedCreativeWorks*.nt $DATASETS_PATH_FINAL/v$i
cp $DATASETS_PATH/v$i/dbpedia_final/*.nt $DATASETS_PATH_FINAL/v$i
cp $ONTOLOGIES_PATH/*.nt $DATASETS_PATH_FINAL/v$i
else
mkdir $DATASETS_PATH_FINAL/v$i
cp $ONTOLOGIES_PATH/* $DATASETS_PATH_FINAL/v$i
prev=$((i-1))

# dbpedia
# if current version contains dbpedia copy the dbpedia version, else copy the previous version
if ls $DATASETS_PATH/c$i/dbpedia_final/dbpedia_*_1000_entities.nt 1> /dev/null 2>&1; then
# copy the current version
cp $DATASETS_PATH/c$i/dbpedia_final/dbpedia_*_1000_entities.nt $DATASETS_PATH_FINAL/v$i
else
cp $DATASETS_PATH_FINAL/v$prev/dbpedia_*.nt $DATASETS_PATH_FINAL/v$i
fi

# creative works
if ls $DATASETS_PATH/c$i/generatedCreativeWorks-*.deleted.nt 1> /dev/null 2>&1; then
# compute the old creative works that still exist
for f in $DATASETS_PATH_FINAL/v$prev/generatedCreativeWorks*.added.nt; do
comm_command="comm -23 $f "
for ff in $DATASETS_PATH/c$i/generatedCreativeWorks*.deleted.nt; do
comm_command+="$ff | comm -23 - "
done
filename=$(basename "$f")
comm_command=${comm_command::-14}
eval $comm_command > $DATASETS_PATH_FINAL/v$i/$filename &
done
wait
else
# copy the previous added
cp $DATASETS_PATH_FINAL/v$prev/generatedCreativeWorks*.added.nt $DATASETS_PATH_FINAL/v$i
fi
# copy the current added
cp $DATASETS_PATH/c$i/generatedCreativeWorks*.added.nt $DATASETS_PATH_FINAL/v$i
fi
end_compute=$(($(date +%s%N)/1000000))

# prepare bulk load
$VIRTUOSO_BIN/isql-v 1111 dba dba exec="ld_dir('$DATASETS_PATH_FINAL/v$i', '*', '$GRAPH_NAME$i');" > /dev/null
done
end_prepare=$(($(date +%s%N)/1000000))

# bulk load
echo "Loading data files into virtuoso using $rdf_loaders rdf loaders..."
start_load=$(($(date +%s%N)/1000000))
prll_rdf_loader_run $rdf_loaders
end_load=$(($(date +%s%N)/1000000))

Expand All @@ -114,10 +57,9 @@ for ((j=0; j<$NUMBER_OF_VERSIONS; j++)); do
done
end_size=$(($(date +%s%N)/1000000))

preptime=$(($end_prepare - $start_prepare))
loadingtime=$(($end_load - $end_prepare))
loadingtime=$(($end_load - $start_load))
sizetime=$(($end_size - $end_load))
overalltime=$(($end_size - $start_sort))
overalltime=$(($end_size - $start_load))

echo "Loading of all generated data to Virtuoso triple store completed successfully. Time: $overalltime ms (preparation: $preptime, loading: $loadingtime, size: $sizetime)"
echo "Loading of all generated data to Virtuoso triple store completed successfully. Time: $overalltime ms (loading: $loadingtime, size: $sizetime)"

67 changes: 67 additions & 0 deletions required_files/versions_construction.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#!/bin/bash

DATASETS_PATH=/versioning/data
DATASETS_PATH_FINAL=/versioning/data/final
ONTOLOGIES_PATH=/versioning/ontologies
NUMBER_OF_VERSIONS=$1

# prepare cw data files for loading
# sort files
start_sort=$(($(date +%s%N)/1000000))
for f in $(find $DATASETS_PATH -name 'generatedCreativeWorks-*.nt'); do
sort "$f" -o "$f"
done
end_sort=$(($(date +%s%N)/1000000))
sorttime=$(($end_sort - $start_sort))
echo "Generated creative works sorted successfully in $sorttime ms."

# copy and compute the addsets
start_prepare=$(($(date +%s%N)/1000000))
mkdir $DATASETS_PATH_FINAL
for ((i=0; i<$NUMBER_OF_VERSIONS; i++)); do
start_v_construction=$(($(date +%s%N)/1000000))
echo "Constructing v$i..."
if [ "$i" = "0" ]; then
mkdir $DATASETS_PATH_FINAL/v$i
cp $DATASETS_PATH/v$i/generatedCreativeWorks*.nt $DATASETS_PATH_FINAL/v$i
cp $DATASETS_PATH/v$i/dbpedia_final/*.nt $DATASETS_PATH_FINAL/v$i
cp $ONTOLOGIES_PATH/*.nt $DATASETS_PATH_FINAL/v$i
else
mkdir $DATASETS_PATH_FINAL/v$i
cp $ONTOLOGIES_PATH/* $DATASETS_PATH_FINAL/v$i
prev=$((i-1))

# dbpedia
# if current version contains dbpedia copy the dbpedia version, else copy the previous version
if ls $DATASETS_PATH/c$i/dbpedia_final/dbpedia_*_1000_entities.nt 1> /dev/null 2>&1; then
# copy the current version
cp $DATASETS_PATH/c$i/dbpedia_final/dbpedia_*_1000_entities.nt $DATASETS_PATH_FINAL/v$i
else
cp $DATASETS_PATH_FINAL/v$prev/dbpedia_*.nt $DATASETS_PATH_FINAL/v$i
fi

# creative works
if ls $DATASETS_PATH/c$i/generatedCreativeWorks-*.deleted.nt 1> /dev/null 2>&1; then
# compute the old creative works that still exist
for f in $DATASETS_PATH_FINAL/v$prev/generatedCreativeWorks*.added.nt; do
comm_command="comm -23 $f "
for ff in $DATASETS_PATH/c$i/generatedCreativeWorks*.deleted.nt; do
comm_command+="$ff | comm -23 - "
done
filename=$(basename "$f")
comm_command=${comm_command::-14}
eval $comm_command > $DATASETS_PATH_FINAL/v$i/$filename &
done
wait
else
# copy the previous added
cp $DATASETS_PATH_FINAL/v$prev/generatedCreativeWorks*.added.nt $DATASETS_PATH_FINAL/v$i
fi
# copy the current added
cp $DATASETS_PATH/c$i/generatedCreativeWorks*.added.nt $DATASETS_PATH_FINAL/v$i
fi
end_v_construction=$(($(date +%s%N)/1000000))
v_construction=$(($end_v_construction - $start_v_construction))
echo "v$i constructed successfully in $v_construction ms"
done

Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ public void init() throws Exception {
int insRatio = (Integer) getPropertyOrDefault(PREFIX + "versionInsertionRatio", 5);
int delRatio = (Integer) getPropertyOrDefault(PREFIX + "versionDeletionRatio", 3);
String dataForm = (String) getPropertyOrDefault(PREFIX + "generatedDataForm", "ic");
String enabledQueries = (String) getPropertyOrDefault(PREFIX + "enableDisableQT", "QT1=1;QT2=1;QT3=1;QT4=1;QT5=1;QT6=1;QT7=1;QT8=1");
String enabledQueries = (String) getPropertyOrDefault(PREFIX + "enableDisableQT", "QT1=1, QT2=1, QT3=1, QT4=1, QT5=1, QT6=1, QT7=1, QT8=1");

loadingTimes = new long[numOfVersions];
triplesToBeAdded = new AtomicIntegerArray(numOfVersions);
Expand Down Expand Up @@ -242,23 +242,25 @@ protected void executeBenchmark() throws Exception {
// wait for the data generators to finish their work
LOGGER.info("Waiting for the data generators to finish their work.");
waitForDataGenToFinish();
LOGGER.info("Data generators finished.");

// wait for the task generators to finish their work
LOGGER.info("Waiting for the task generators to finish their work.");
waitForTaskGenToFinish();
LOGGER.info("Task generators finished.");
LOGGER.info("Data generators finished.");

LOGGER.info("Computing system's storage space overhead after data loading");
ResourceUsageInformation infoAfter = resUsageRequester.getSystemResourceUsage();
if (infoAfter.getDiskStats() != null) {
systemStorageSpaceCost = infoAfter.getDiskStats().getFsSizeSum() - systemInitialUsableSpace;
long systemFinalUsableSpace = infoAfter.getDiskStats().getFsSizeSum();
systemStorageSpaceCost = systemFinalUsableSpace - systemInitialUsableSpace;
LOGGER.info("System's usable space after data loading: " + systemFinalUsableSpace);
LOGGER.info("System's storage space overhead after data loading: " + systemStorageSpaceCost);
} else {
LOGGER.info(infoAfter.toString());
LOGGER.info("Got null as response.");
}

// wait for the task generators to finish their work
LOGGER.info("Waiting for the task generators to finish their work.");
waitForTaskGenToFinish();
LOGGER.info("Task generators finished.");

// wait for the system to terminate
LOGGER.info("Waiting for the system to terminate.");
waitForSystemToFinish(1000 * 60 * 25);
Expand Down
Loading