diff --git a/Makefile b/Makefile index 5734754..70cd328 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,9 @@ -default: build dockerize +default: build dockerize-all build: mvn clean package -U -Dmaven.test.skip=true -dockerize: dockerize-controller dockerize-datagen dockerize-taskgen dockerize-evalmodule dockerize-virtuoso-baseline +dockerize-all: dockerize-controller dockerize-datagen dockerize-taskgen dockerize-evalmodule dockerize-virtuoso-baseline dockerize-controller: docker build -f docker/versioningbenchmarkcontroller.docker -t git.project-hobbit.eu:4567/papv/versioningbenchmarkcontroller:$(tag) . diff --git a/required_files/load_to_virtuoso.sh b/required_files/load_to_virtuoso.sh index 20ef1b2..2bf4c3b 100644 --- a/required_files/load_to_virtuoso.sh +++ b/required_files/load_to_virtuoso.sh @@ -4,9 +4,7 @@ VIRTUOSO_BIN=/usr/local/virtuoso-opensource/bin GRAPH_NAME=http://graph.version. DATASETS_PATH=/versioning/data DATASETS_PATH_FINAL=/versioning/data/final -ONTOLOGIES_PATH=/versioning/ontologies -SERIALIZATION_FORMAT=$1 -NUMBER_OF_VERSIONS=$2 +NUMBER_OF_VERSIONS=$1 total_cores=$(cat /proc/cpuinfo | grep processor | wc -l) rdf_loaders=$(awk "BEGIN {printf \"%d\", $total_cores/2.5}") @@ -19,7 +17,7 @@ prll_rdf_loader_run() { wait $VIRTUOSO_BIN/isql-v 1111 dba dba exec="checkpoint;" > /dev/null - # if there are files that failed to be loaded reload them until the succeed + # if there are files that failed to be loaded reload them until they succeed errors=$($VIRTUOSO_BIN/isql-v 1111 dba dba exec="select count(*) from load_list where ll_error is not null;" | sed -n 9p) files=$($VIRTUOSO_BIN/isql-v 1111 dba dba exec="select ll_file from load_list where ll_error is not null;" | sed '1,8d' | head -n $errors) @@ -42,69 +40,14 @@ prll_rdf_loader_run() { echo "All data files loaded successfully" } -# prepare cw data files for loading -# sort files -start_sort=$(($(date +%s%N)/1000000)) -for f in $(find $DATASETS_PATH -name 'generatedCreativeWorks-*.nt'); do - sort "$f" -o "$f" -done -end_sort=$(($(date +%s%N)/1000000)) -sorttime=$(($end_sort - $start_sort)) -echo "Sorted generated Creative Works in $sorttime ms." - -# copy and compute the addsets -start_prepare=$(($(date +%s%N)/1000000)) -mkdir $DATASETS_PATH_FINAL +# prepare bulk loadmkdir $DATASETS_PATH_FINAL for ((i=0; i<$NUMBER_OF_VERSIONS; i++)); do - echo "Constructing v$i..." - if [ "$i" = "0" ]; then - mkdir $DATASETS_PATH_FINAL/v$i - cp $DATASETS_PATH/v$i/generatedCreativeWorks*.nt $DATASETS_PATH_FINAL/v$i - cp $DATASETS_PATH/v$i/dbpedia_final/*.nt $DATASETS_PATH_FINAL/v$i - cp $ONTOLOGIES_PATH/*.nt $DATASETS_PATH_FINAL/v$i - else - mkdir $DATASETS_PATH_FINAL/v$i - cp $ONTOLOGIES_PATH/* $DATASETS_PATH_FINAL/v$i - prev=$((i-1)) - - # dbpedia - # if current version contains dbpedia copy the dbpedia version, else copy the previous version - if ls $DATASETS_PATH/c$i/dbpedia_final/dbpedia_*_1000_entities.nt 1> /dev/null 2>&1; then - # copy the current version - cp $DATASETS_PATH/c$i/dbpedia_final/dbpedia_*_1000_entities.nt $DATASETS_PATH_FINAL/v$i - else - cp $DATASETS_PATH_FINAL/v$prev/dbpedia_*.nt $DATASETS_PATH_FINAL/v$i - fi - - # creative works - if ls $DATASETS_PATH/c$i/generatedCreativeWorks-*.deleted.nt 1> /dev/null 2>&1; then - # compute the old creative works that still exist - for f in $DATASETS_PATH_FINAL/v$prev/generatedCreativeWorks*.added.nt; do - comm_command="comm -23 $f " - for ff in $DATASETS_PATH/c$i/generatedCreativeWorks*.deleted.nt; do - comm_command+="$ff | comm -23 - " - done - filename=$(basename "$f") - comm_command=${comm_command::-14} - eval $comm_command > $DATASETS_PATH_FINAL/v$i/$filename & - done - wait - else - # copy the previous added - cp $DATASETS_PATH_FINAL/v$prev/generatedCreativeWorks*.added.nt $DATASETS_PATH_FINAL/v$i - fi - # copy the current added - cp $DATASETS_PATH/c$i/generatedCreativeWorks*.added.nt $DATASETS_PATH_FINAL/v$i - fi - end_compute=$(($(date +%s%N)/1000000)) - - # prepare bulk load $VIRTUOSO_BIN/isql-v 1111 dba dba exec="ld_dir('$DATASETS_PATH_FINAL/v$i', '*', '$GRAPH_NAME$i');" > /dev/null done -end_prepare=$(($(date +%s%N)/1000000)) # bulk load echo "Loading data files into virtuoso using $rdf_loaders rdf loaders..." +start_load=$(($(date +%s%N)/1000000)) prll_rdf_loader_run $rdf_loaders end_load=$(($(date +%s%N)/1000000)) @@ -114,10 +57,9 @@ for ((j=0; j<$NUMBER_OF_VERSIONS; j++)); do done end_size=$(($(date +%s%N)/1000000)) -preptime=$(($end_prepare - $start_prepare)) -loadingtime=$(($end_load - $end_prepare)) +loadingtime=$(($end_load - $start_load)) sizetime=$(($end_size - $end_load)) -overalltime=$(($end_size - $start_sort)) +overalltime=$(($end_size - $start_load)) -echo "Loading of all generated data to Virtuoso triple store completed successfully. Time: $overalltime ms (preparation: $preptime, loading: $loadingtime, size: $sizetime)" +echo "Loading of all generated data to Virtuoso triple store completed successfully. Time: $overalltime ms (loading: $loadingtime, size: $sizetime)" diff --git a/required_files/versions_construction.sh b/required_files/versions_construction.sh new file mode 100644 index 0000000..10cafd5 --- /dev/null +++ b/required_files/versions_construction.sh @@ -0,0 +1,67 @@ +#!/bin/bash + +DATASETS_PATH=/versioning/data +DATASETS_PATH_FINAL=/versioning/data/final +ONTOLOGIES_PATH=/versioning/ontologies +NUMBER_OF_VERSIONS=$1 + +# prepare cw data files for loading +# sort files +start_sort=$(($(date +%s%N)/1000000)) +for f in $(find $DATASETS_PATH -name 'generatedCreativeWorks-*.nt'); do + sort "$f" -o "$f" +done +end_sort=$(($(date +%s%N)/1000000)) +sorttime=$(($end_sort - $start_sort)) +echo "Generated creative works sorted successfully in $sorttime ms." + +# copy and compute the addsets +start_prepare=$(($(date +%s%N)/1000000)) +mkdir $DATASETS_PATH_FINAL +for ((i=0; i<$NUMBER_OF_VERSIONS; i++)); do + start_v_construction=$(($(date +%s%N)/1000000)) + echo "Constructing v$i..." + if [ "$i" = "0" ]; then + mkdir $DATASETS_PATH_FINAL/v$i + cp $DATASETS_PATH/v$i/generatedCreativeWorks*.nt $DATASETS_PATH_FINAL/v$i + cp $DATASETS_PATH/v$i/dbpedia_final/*.nt $DATASETS_PATH_FINAL/v$i + cp $ONTOLOGIES_PATH/*.nt $DATASETS_PATH_FINAL/v$i + else + mkdir $DATASETS_PATH_FINAL/v$i + cp $ONTOLOGIES_PATH/* $DATASETS_PATH_FINAL/v$i + prev=$((i-1)) + + # dbpedia + # if current version contains dbpedia copy the dbpedia version, else copy the previous version + if ls $DATASETS_PATH/c$i/dbpedia_final/dbpedia_*_1000_entities.nt 1> /dev/null 2>&1; then + # copy the current version + cp $DATASETS_PATH/c$i/dbpedia_final/dbpedia_*_1000_entities.nt $DATASETS_PATH_FINAL/v$i + else + cp $DATASETS_PATH_FINAL/v$prev/dbpedia_*.nt $DATASETS_PATH_FINAL/v$i + fi + + # creative works + if ls $DATASETS_PATH/c$i/generatedCreativeWorks-*.deleted.nt 1> /dev/null 2>&1; then + # compute the old creative works that still exist + for f in $DATASETS_PATH_FINAL/v$prev/generatedCreativeWorks*.added.nt; do + comm_command="comm -23 $f " + for ff in $DATASETS_PATH/c$i/generatedCreativeWorks*.deleted.nt; do + comm_command+="$ff | comm -23 - " + done + filename=$(basename "$f") + comm_command=${comm_command::-14} + eval $comm_command > $DATASETS_PATH_FINAL/v$i/$filename & + done + wait + else + # copy the previous added + cp $DATASETS_PATH_FINAL/v$prev/generatedCreativeWorks*.added.nt $DATASETS_PATH_FINAL/v$i + fi + # copy the current added + cp $DATASETS_PATH/c$i/generatedCreativeWorks*.added.nt $DATASETS_PATH_FINAL/v$i + fi + end_v_construction=$(($(date +%s%N)/1000000)) + v_construction=$(($end_v_construction - $start_v_construction)) + echo "v$i constructed successfully in $v_construction ms" +done + diff --git a/src/main/java/org/hobbit/benchmark/versioning/components/VersioningBenchmarkController.java b/src/main/java/org/hobbit/benchmark/versioning/components/VersioningBenchmarkController.java index 78c97d9..f0ede64 100644 --- a/src/main/java/org/hobbit/benchmark/versioning/components/VersioningBenchmarkController.java +++ b/src/main/java/org/hobbit/benchmark/versioning/components/VersioningBenchmarkController.java @@ -62,7 +62,7 @@ public void init() throws Exception { int insRatio = (Integer) getPropertyOrDefault(PREFIX + "versionInsertionRatio", 5); int delRatio = (Integer) getPropertyOrDefault(PREFIX + "versionDeletionRatio", 3); String dataForm = (String) getPropertyOrDefault(PREFIX + "generatedDataForm", "ic"); - String enabledQueries = (String) getPropertyOrDefault(PREFIX + "enableDisableQT", "QT1=1;QT2=1;QT3=1;QT4=1;QT5=1;QT6=1;QT7=1;QT8=1"); + String enabledQueries = (String) getPropertyOrDefault(PREFIX + "enableDisableQT", "QT1=1, QT2=1, QT3=1, QT4=1, QT5=1, QT6=1, QT7=1, QT8=1"); loadingTimes = new long[numOfVersions]; triplesToBeAdded = new AtomicIntegerArray(numOfVersions); @@ -242,23 +242,25 @@ protected void executeBenchmark() throws Exception { // wait for the data generators to finish their work LOGGER.info("Waiting for the data generators to finish their work."); waitForDataGenToFinish(); - LOGGER.info("Data generators finished."); - - // wait for the task generators to finish their work - LOGGER.info("Waiting for the task generators to finish their work."); - waitForTaskGenToFinish(); - LOGGER.info("Task generators finished."); + LOGGER.info("Data generators finished."); LOGGER.info("Computing system's storage space overhead after data loading"); ResourceUsageInformation infoAfter = resUsageRequester.getSystemResourceUsage(); if (infoAfter.getDiskStats() != null) { - systemStorageSpaceCost = infoAfter.getDiskStats().getFsSizeSum() - systemInitialUsableSpace; + long systemFinalUsableSpace = infoAfter.getDiskStats().getFsSizeSum(); + systemStorageSpaceCost = systemFinalUsableSpace - systemInitialUsableSpace; + LOGGER.info("System's usable space after data loading: " + systemFinalUsableSpace); LOGGER.info("System's storage space overhead after data loading: " + systemStorageSpaceCost); } else { LOGGER.info(infoAfter.toString()); LOGGER.info("Got null as response."); } + // wait for the task generators to finish their work + LOGGER.info("Waiting for the task generators to finish their work."); + waitForTaskGenToFinish(); + LOGGER.info("Task generators finished."); + // wait for the system to terminate LOGGER.info("Waiting for the system to terminate."); waitForSystemToFinish(1000 * 60 * 25); diff --git a/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java b/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java index 95b9b37..3f84d5e 100644 --- a/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java +++ b/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java @@ -38,7 +38,6 @@ import org.apache.jena.query.ResultSetFormatter; import org.apache.jena.query.ResultSetRewindable; import org.hobbit.benchmark.versioning.Task; -import org.hobbit.benchmark.versioning.properties.RDFUtils; import org.hobbit.benchmark.versioning.properties.VersioningConstants; import org.hobbit.benchmark.versioning.util.FTPUtils; import org.hobbit.benchmark.versioning.util.SystemAdapterConstants; @@ -98,7 +97,8 @@ public class VersioningDataGenerator extends AbstractDataGenerator { private int[] cwsToBeLoaded; private Properties enabledQueryTypes = new Properties(); - + private boolean allQueriesDisabled = true; + private AtomicInteger numberOfmessages = new AtomicInteger(0); private Configuration configuration = new Configuration(); @@ -157,6 +157,7 @@ public void init() throws Exception { Matcher matcher = pattern.matcher(enabledQueryTypesParam); String enabledQueryTypesParamProp = ""; while (matcher.find()) { + allQueriesDisabled = allQueriesDisabled ? !matcher.group(2).equals("1") : false; enabledQueryTypesParamProp += "QT" + matcher.group(1) + "=" + matcher.group(2) + "\n"; } enabledQueryTypes.load(new StringReader(enabledQueryTypesParamProp)); @@ -185,8 +186,11 @@ public void init() throws Exception { DataGenerator dataGenerator = new DataGenerator(randomGenerator, configuration, definitions, dataGeneratorWorkers, totalTriples, maxTriplesPerFile, initialVersionDataPath, serializationFormat); dataGenerator.produceData(); cwsToBeLoaded[0] = v0SizeInTriples; - triplesExpectedToBeAdded[0] = dataGenerator.getTriplesGeneratedSoFar().intValue(); + // ontologies triples included + triplesExpectedToBeAdded[0] = dataGenerator.getTriplesGeneratedSoFar().intValue() + VersioningConstants.ONTOLOGIES_TRIPLES; triplesExpectedToBeDeleted[0] = 0; + triplesExpectedToBeLoaded[0] = triplesExpectedToBeAdded[0]; + LOGGER.info("triplesExpectedToBeLoaded-0: " + triplesExpectedToBeLoaded[0]); // Generate the change sets. Additions and deletions are supported. // TODO: support changes @@ -253,44 +257,57 @@ public void init() throws Exception { preVersionDeletedCWs = currVersionDeletedCreativeWorks; triplesExpectedToBeDeleted[i] = currVersionDeletedTriples; long deleteSetEnd = System.currentTimeMillis(); - LOGGER.info("Deleteset of total " + String.format(Locale.US, "%,d", preVersionDeletedCWs).replace(',', '.') + " Creative Works generated successfully. Triples: " + String.format(Locale.US, "%,d", currVersionDeletedTriples).replace(',', '.') + " . Target: " + String.format(Locale.US, "%,d", triplesToBeDeleted).replace(',', '.') + " triples. Time: " + (deleteSetEnd - deleteSetStart) + " ms."); + LOGGER.info("Deleteset of total " + + String.format(Locale.US, "%,d", preVersionDeletedCWs).replace(',', '.') + " creative works generated successfully. " + + "Triples: " + String.format(Locale.US, "%,d", currVersionDeletedTriples).replace(',', '.') + ". " + + "Target: " + String.format(Locale.US, "%,d", triplesToBeDeleted).replace(',', '.') + " triples. " + + "Time: " + (deleteSetEnd - deleteSetStart) + " ms."); // produce the add set LOGGER.info("Generating version " + i + " add-set."); dataGenerator.produceAdded(destinationPath, triplesToBeAdded); triplesExpectedToBeAdded[i] = dataGenerator.getTriplesGeneratedSoFar().intValue(); - + + // for the generated version compute the number of triples (creative works + ontologies) that expected + // to be loaded by the system, so the evaluation module can compute the ingestion and average changes speeds. + triplesExpectedToBeLoaded[i] = triplesExpectedToBeLoaded[i-1] + triplesExpectedToBeAdded[i] - triplesExpectedToBeDeleted[i]; + LOGGER.info("triplesExpectedToBeLoaded-" + i + ": " + triplesExpectedToBeLoaded[i]); } long changeSetEnd = System.currentTimeMillis(); LOGGER.info("All changesets generated successfully. Time: " + (changeSetEnd - changeSetStart) + " ms."); // Evenly distribute the 5 dbpedia versions to the total number of versions that were generated distributeDBpediaVersions(); + + // construct all versions as independent copies + constructVersions(); + + // if all query types are disabled skip this part + if(!allQueriesDisabled) { + LOGGER.info("Generating tasks..."); + // 3) Generate SPARQL query tasks + // generate benchmark tasks substitution parameters + String queriesPath = System.getProperty("user.dir") + File.separator + "query_templates"; + versioningMustacheTemplatesHolder.loadFrom(queriesPath); + generateQuerySubstitutionParameters(); - LOGGER.info("Generating tasks..."); - // 3) Generate SPARQL query tasks - // generate benchmark tasks substitution parameters - String queriesPath = System.getProperty("user.dir") + File.separator + "query_templates"; - versioningMustacheTemplatesHolder.loadFrom(queriesPath); - generateQuerySubstitutionParameters(); - // initialize substitution parameters - String substitutionParametersPath = System.getProperty("user.dir") + File.separator + "substitution_parameters"; - LOGGER.info("Initializing parameters for SPARQL query tasks..."); - substitutionQueryParametersManager.initVersioningSubstitutionParameters(substitutionParametersPath, false, false); - LOGGER.info("Query parameters initialized successfully."); - // build mustache templates to create queries - LOGGER.info("Building SPRQL tasks..."); - buildSPRQLQueries(); - LOGGER.info("All SPRQL tasks built successfully."); + // initialize substitution parameters + String substitutionParametersPath = System.getProperty("user.dir") + File.separator + "substitution_parameters"; + LOGGER.info("Initializing parameters for SPARQL query tasks..."); + substitutionQueryParametersManager.initVersioningSubstitutionParameters(substitutionParametersPath, false, false); + LOGGER.info("Query parameters initialized successfully."); - LOGGER.info("Loading generating data, in order to compute gold standard..."); - // load generated creative works to virtuoso, in order to compute the gold standard - loadFirstNVersions(numberOfVersions); - - // compute expected answers for all tasks - LOGGER.info("Computing expected answers for generated SPARQL tasks..."); - computeExpectedAnswers(); - LOGGER.info("Expected answers have computed successfully for all generated SPRQL tasks."); + // build mustache templates to create queries + LOGGER.info("Building SPRQL tasks..."); + buildSPRQLQueries(); + LOGGER.info("All SPRQL tasks built successfully."); + + // compute expected answers for all tasks + LOGGER.info("Computing expected answers for generated SPARQL tasks..."); + computeExpectedAnswers(); + + LOGGER.info("Expected answers have computed successfully for all generated SPRQL tasks."); + } LOGGER.info("Data Generator initialized successfully."); } @@ -339,7 +356,7 @@ public void initFromEnv() { Map env = System.getenv(); // Assume that in v0Size the 40362 triples of DBpedia initial dataset // plus the 8135 ontologies triples are included - v0SizeInTriples = (Integer) getFromEnv(env, VersioningConstants.V0_SIZE_IN_TRIPLES, 0) - 48497 ; + v0SizeInTriples = (Integer) getFromEnv(env, VersioningConstants.V0_SIZE_IN_TRIPLES, 0) - VersioningConstants.DBPEDIA_ADDED_TRIPLES_V0 - VersioningConstants.ONTOLOGIES_TRIPLES; numberOfVersions = (Integer) getFromEnv(env, VersioningConstants.NUMBER_OF_VERSIONS, 0); subGeneratorSeed = (Integer) getFromEnv(env, VersioningConstants.DATA_GENERATOR_SEED, 0) + getGeneratorId(); versionInsertionRatio = (Integer) getFromEnv(env, VersioningConstants.VERSION_INSERTION_RATIO, 0); @@ -385,34 +402,29 @@ private void distributeDBpediaVersions() { List deletedDataFiles = (List) FileUtils.listFiles(changesetsDbpediaPathFile, new String[] { "deleted.nt" }, false); Collections.sort(deletedDataFiles); - // if the number of versions that have to be produced is larger than the total 5 of dbpedia - // determine in which versions the dbpedia ones will be assigned - dbPediaVersionsDistribution = new int[numberOfVersions]; + // equally distribute the 5 versions of dbpedia to the number of total ones + dbPediaVersionsDistribution = new int[numberOfVersions]; if(numberOfVersions > 5) { LOGGER.info("Distributing the 5 DBpedia versions to the total " + numberOfVersions + " produced..."); Arrays.fill(dbPediaVersionsDistribution, 0); for(int dbpediaVersion = 0; dbpediaVersion < VersioningConstants.DBPEDIA_VERSIONS; dbpediaVersion++) { int versionIndex = Math.round((numberOfVersions - 1) * (dbpediaVersion / 4f)); dbPediaVersionsDistribution[versionIndex] = 1; - triplesExpectedToBeAdded[versionIndex] += triplesToBeAdded[dbpediaVersion]; - triplesExpectedToBeDeleted[versionIndex] += triplesToBeDeleted[dbpediaVersion]; } } else { LOGGER.info("Assigning the first " + numberOfVersions + " DBpedia versions to the total " + numberOfVersions + " produced..."); Arrays.fill(dbPediaVersionsDistribution, 1); - for(int dbpediaVersion = 0; dbpediaVersion < numberOfVersions; dbpediaVersion++) { - triplesExpectedToBeAdded[dbpediaVersion] += triplesToBeAdded[dbpediaVersion]; - triplesExpectedToBeDeleted[dbpediaVersion] += triplesToBeDeleted[dbpediaVersion]; - } } LOGGER.info("Distribution: " + Arrays.toString(dbPediaVersionsDistribution)); - // copy the dbpedia file to the appropriate version dir, (when dbPediaVersionsDistribution[i] = 1) - for (int i = 0, dbpediaIndex = 0; i < dbPediaVersionsDistribution.length; i++) { - if (dbPediaVersionsDistribution[i] == 1) { + // Copy the dbpedia file to the appropriate version dir, (when dbPediaVersionsDistribution[i] = 1) + // Also, for each version compute the total number of triples that have to be added, deleted or loaded + // in order to let the evaluation module to calculate the appropriate KPIs + for (int version = 0, dbpediaIndex = 0; version < dbPediaVersionsDistribution.length; version++) { + if (dbPediaVersionsDistribution[version] == 1) { try { - String destinationParent = generatedDatasetPath + File.separator + (i == 0 ? "v" : "c") + i + File.separator; + String destinationParent = generatedDatasetPath + File.separator + (version == 0 ? "v" : "c") + version + File.separator; // copy the final dbpedia file that will be used from the datagenerator File finalFrom = finalDbpediaFiles.get(dbpediaIndex); @@ -424,18 +436,28 @@ private void distributeDBpediaVersions() { File addedTo = new File(destinationParent + addedFrom.getName()); FileUtils.copyFile(addedFrom, addedTo); - if(i > 0) { + if(version > 0) { // copy the deletset that will be sent to the system // dbpediaIndex-1 because for version 0 we do not have deleted triples File deletedFrom = deletedDataFiles.get(dbpediaIndex - 1); File deletedTo = new File(destinationParent + deletedFrom.getName()); FileUtils.copyFile(deletedFrom, deletedTo); } - dbpediaIndex++; + + // enhance the version's total triples to be added with the dbpedia ones + triplesExpectedToBeAdded[version] += triplesToBeAdded[dbpediaIndex]; + + // enhance the version's total triples to be deleted with the dbpedia ones + triplesExpectedToBeDeleted[version] += triplesToBeDeleted[dbpediaIndex]; } catch(IOException e) { LOGGER.error("Exception caught during the copy of dbpedia files to the appropriate version dir", e); + } finally { + dbpediaIndex++; } } + // compute version's total triples that have to be loaded + triplesExpectedToBeLoaded[version] += Arrays.stream(triplesToBeAdded, 0, dbpediaIndex).sum(); + triplesExpectedToBeLoaded[version] -= Arrays.stream(triplesToBeDeleted, 0, dbpediaIndex).sum(); } } @@ -544,31 +566,8 @@ public String compileMustacheTemplate(int queryType, int queryIndex, int subsPar } return compiledQuery; } - - private int getVersionSize(int versionNum) { - String sparqlQueryString = "" - + "SELECT (COUNT(*) AS ?cnt) " - + "FROM " - + "WHERE { ?s ?p ?o }"; - - try (QueryExecution qexec = QueryExecutionFactory.sparqlService("http://localhost:8890/sparql", sparqlQueryString)) { - ResultSet results = ResultSetFactory.makeRewindable(qexec.execSelect()); - if(results.hasNext()) { - return results.next().getLiteral("cnt").getInt(); - } - } catch (Exception e) { - LOGGER.error("Exception caught during the computation of version " + versionNum + " triples number.", e); - } - return 0; - } - - public void computeExpectedAnswers() { - // compute the number of triples that expected to be loaded by the system. - // so the evaluation module can compute the ingestion and average changes speeds - for (int version = 0; version < numberOfVersions; version++) { - triplesExpectedToBeLoaded[version] = getVersionSize(version); - } + public void computeExpectedAnswers() { for (Task task : tasks) { ResultSetRewindable results = null; @@ -1056,6 +1055,12 @@ protected void generateData() throws Exception { } } catch (Exception e) { LOGGER.error("Exception while sending generated data to System Adapter.", e); + } finally { + // Add a delay of 1 minute after the bulk loading phase has ended. + // This is required in order to get reliable results regarding the final system's resource + // usage, since in cAdvisor the period for disk stats collection is hard-coded at 1 minute + LOGGER.info("Waiting one minute after the bulk loading phase has ended..."); + Thread.sleep(1000 * 60); } try { @@ -1070,6 +1075,33 @@ protected void generateData() throws Exception { LOGGER.error("Exception while sending tasks to Task Generator.", e); } } + + // method for constructing all versions (from change-sets to independent copies) + public void constructVersions() { + LOGGER.info("Constructing all versions as independent copies..."); + try { + String scriptFilePath = System.getProperty("user.dir") + File.separator + "versions_construction.sh"; + String[] command = {"/bin/bash", scriptFilePath, Integer.toString(numberOfVersions) }; + Process p = new ProcessBuilder(command).start(); + BufferedReader stdInput = new BufferedReader(new InputStreamReader(p.getInputStream())); + BufferedReader stdError = new BufferedReader(new InputStreamReader(p.getErrorStream())); + String line; + while ((line = stdInput.readLine()) != null) { + LOGGER.info(line); + } + while ((line = stdError.readLine()) != null) { + LOGGER.info(line); + } + p.waitFor(); + LOGGER.info("All versions constructed successfully."); + stdInput.close(); + stdError.close(); + } catch (IOException e) { + LOGGER.error("Exception while executing script for loading data.", e); + } catch (InterruptedException e) { + LOGGER.error("Exception while executing script for loading data.", e); + } + } // method for loading to virtuoso the first N versions. e.g. for first 2 versions // v0 and v1 will be loaded into. @@ -1078,7 +1110,7 @@ public void loadFirstNVersions(int n) { try { String scriptFilePath = System.getProperty("user.dir") + File.separator + "load_to_virtuoso.sh"; - String[] command = {"/bin/bash", scriptFilePath, RDFUtils.getFileExtensionFromRdfFormat(serializationFormat), Integer.toString(n) }; + String[] command = {"/bin/bash", scriptFilePath, Integer.toString(n) }; Process p = new ProcessBuilder(command).start(); BufferedReader stdInput = new BufferedReader(new InputStreamReader(p.getInputStream())); BufferedReader stdError = new BufferedReader(new InputStreamReader(p.getErrorStream())); @@ -1106,7 +1138,7 @@ public void loadVersion(int version) { try { String scriptFilePath = System.getProperty("user.dir") + File.separator + "load_version_to_virtuoso.sh"; - String[] command = {"/bin/bash", scriptFilePath, RDFUtils.getFileExtensionFromRdfFormat(serializationFormat), Integer.toString(version) }; + String[] command = {"/bin/bash", scriptFilePath, Integer.toString(version) }; Process p = new ProcessBuilder(command).start(); BufferedReader in = new BufferedReader(new InputStreamReader(p.getInputStream())); String line; diff --git a/src/main/java/org/hobbit/benchmark/versioning/components/VersioningEvaluationModule.java b/src/main/java/org/hobbit/benchmark/versioning/components/VersioningEvaluationModule.java index 99bbc13..2292101 100644 --- a/src/main/java/org/hobbit/benchmark/versioning/components/VersioningEvaluationModule.java +++ b/src/main/java/org/hobbit/benchmark/versioning/components/VersioningEvaluationModule.java @@ -77,7 +77,7 @@ public void init() throws Exception { int triplesToBeAdded = Integer.parseInt(env.get(String.format(VersioningConstants.VERSION_TRIPLES_TO_BE_ADDED, version))); int triplesToBeDeleted = Integer.parseInt(env.get(String.format(VersioningConstants.VERSION_TRIPLES_TO_BE_DELETED, version))); int triplesToBeLoaded = Integer.parseInt(env.get(String.format(VersioningConstants.VERSION_TRIPLES_TO_BE_LOADED, version))); - LOGGER.info("version " + version + " loaded in " + loadingTime + " ms (" + triplesToBeLoaded + " triples had to be loaded)."); + LOGGER.info("version " + version + "(+" + triplesToBeAdded + ", -" + triplesToBeDeleted + ", total=" + triplesToBeLoaded + ") loaded in " + loadingTime + " ms."); is.reportSuccess(version, triplesToBeAdded, triplesToBeDeleted, triplesToBeLoaded, loadingTime); } diff --git a/src/main/java/org/hobbit/benchmark/versioning/properties/VersioningConstants.java b/src/main/java/org/hobbit/benchmark/versioning/properties/VersioningConstants.java index d0ffe47..ae438e0 100644 --- a/src/main/java/org/hobbit/benchmark/versioning/properties/VersioningConstants.java +++ b/src/main/java/org/hobbit/benchmark/versioning/properties/VersioningConstants.java @@ -98,5 +98,10 @@ public final class VersioningConstants { public static final int DBPEDIA_ADDED_TRIPLES_V4 = 32884; public static final int DBPEDIA_DELETED_TRIPLES_V4 = 21957; + + // =============== ontologies data stats =============== + + public static final int ONTOLOGIES_TRIPLES = 8134; + } diff --git a/system/run.sh b/system/run.sh index 569c2d7..3821948 100755 --- a/system/run.sh +++ b/system/run.sh @@ -44,4 +44,4 @@ echo $(date +%H:%M:%S.%N | cut -b1-12)" : Virtuoso Server started successfully." # run the system adapter echo $(date +%H:%M:%S.%N | cut -b1-12)" : Running the System adapter..." -java -cp /versioning/versioning.jar org.hobbit.core.run.ComponentStarter org.hobbit.benchmark.versioning.systems.VirtuosoSystemAdapter +java -Xmx4g -cp /versioning/versioning.jar org.hobbit.core.run.ComponentStarter org.hobbit.benchmark.versioning.systems.VirtuosoSystemAdapter