From 0d09660e15d89b856692779c2d853ab9a5072c0d Mon Sep 17 00:00:00 2001 From: papv Date: Fri, 8 Dec 2017 12:38:29 +0200 Subject: [PATCH 1/8] reset parameters --- system/virtuoso.ini.template | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/system/virtuoso.ini.template b/system/virtuoso.ini.template index 6a43294..5839461 100644 --- a/system/virtuoso.ini.template +++ b/system/virtuoso.ini.template @@ -55,7 +55,7 @@ DisableTcpSocket = 0 ;X509ClientVerify = 0 ;X509ClientVerifyDepth = 0 ;X509ClientVerifyCAFile = ca.pem -MaxClientConnections = 25 +MaxClientConnections = 10 CheckpointInterval = 60 O_DIRECT = 0 CaseMode = 2 @@ -126,7 +126,7 @@ EnabledDavVSP = 0 HTTPProxyEnabled = 0 TempASPXDir = 0 DefaultMailServer = localhost:25 -ServerThreads = 20 +ServerThreads = 10 MaxKeepAlives = 10 KeepAliveTimeout = 10 MaxCachedProxyConnections = 10 From 4a4424e153ea032f50aa38763ee444912f8fe747 Mon Sep 17 00:00:00 2001 From: papv Date: Fri, 8 Dec 2017 12:39:55 +0200 Subject: [PATCH 2/8] fix deletion bug. implement parallel deletion through java. --- required_files/export_cws_tbd.sh | 29 ++--- .../components/VersioningDataGenerator.java | 119 +++++++++++++++--- 2 files changed, 112 insertions(+), 36 deletions(-) diff --git a/required_files/export_cws_tbd.sh b/required_files/export_cws_tbd.sh index de7f76c..2652fee 100644 --- a/required_files/export_cws_tbd.sh +++ b/required_files/export_cws_tbd.sh @@ -1,25 +1,14 @@ #!/bin/bash -CURR_VERSION=$1 +CURR_VERSION_FILE=$1 CW_TBD=$2 DESTINATION_PATH=$3 -export_deleted() { - filename=$(basename "$1") - filename="${filename%added.*}" - tbd=$(grep -F -f $CW_TBD $1) - lines=$(echo "${tbd}" | wc -l) - if [[ "$lines" > "1" ]]; then - printf "%s\n" "${tbd}" >> $DESTINATION_PATH/$filename"deleted.nt" - echo $lines - fi -} - -# do it parallely in the background for improving performance -for ((i=0; i<$CURR_VERSION; i++)); do - SOURCE_PATH=$([ "$i" = "0" ] && echo $4"/v"$i || echo $4"/c"$i) - for f in $SOURCE_PATH/generatedCreativeWorks*.added.nt; do - export_deleted $f & - done -done -wait +filename=$(basename "$CURR_VERSION_FILE") +filename="${filename%added.*}" +tbd=$(grep -F -f $CW_TBD $CURR_VERSION_FILE) +lines=$(echo "${tbd}" | wc -l) +if [[ "$lines" > "1" ]]; then + printf "%s\n" "${tbd}" >> $DESTINATION_PATH/$filename"deleted.nt" + echo $lines +fi diff --git a/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java b/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java index cc9fb3e..8cd7139 100644 --- a/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java +++ b/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java @@ -17,10 +17,15 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; import java.util.concurrent.Semaphore; import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.io.FileUtils; +import org.apache.commons.io.filefilter.RegexFileFilter; import org.apache.commons.lang3.SerializationUtils; import org.apache.jena.query.Query; import org.apache.jena.query.QueryExecution; @@ -164,7 +169,8 @@ public void init() throws Exception { dataGenerator.produceData(); cwsToBeLoaded[0] = v0SizeInTriples; - + LOGGER.info("initial remainingRandomCreativeWorkIdsList: "+DataManager.remainingRandomCreativeWorkIdsList.size()); + // Generate the change sets. Only additions/deletions are supported. // TODO: support changes int preVersionDeletedCWs = 0; @@ -175,34 +181,59 @@ public void init() throws Exception { cwsToBeLoaded[i] = cwsToBeLoaded[i-1] + triplesToBeAdded - triplesToBeDeleted; LOGGER.info("Generating version " + i + " changeset. Target: " + "[+" + triplesToBeAdded + ", -" + triplesToBeDeleted + "]"); String destinationPath = generatedDatasetPath + File.separator + "c" + i; - - // produce the add set - LOGGER.info("Generating version " + i + " add-set."); - dataGenerator.produceAdded(destinationPath, triplesToBeAdded); + File theDir = new File(destinationPath); + theDir.mkdir(); // produce the delete set LOGGER.info("Generating version " + i + " delete-set."); long deleteSetStart = System.currentTimeMillis(); int currVersionDeletedCreativeWorks = 0; int currVersionDeletedTriples = 0; - int creativeWorkAvgTriples = DataManager.randomTriples.intValue() / DataManager.randomCreativeWorkIdsList.size(); + int creativeWorkAvgTriples = DataManager.randomCreativeWorkTriples.intValue() / DataManager.randomCreativeWorkIdsList.size(); // Estimate the total number of creative works that have to be deleted, using - // creative work average triples that have been generated so far. - int creativeWorksToBeDeleted = triplesToBeDeleted / creativeWorkAvgTriples; + // creative work average triples that have been generated so far plus 1 for safety reasons. + int creativeWorksToBeDeleted = triplesToBeDeleted / (creativeWorkAvgTriples + 1); LOGGER.info(creativeWorksToBeDeleted + " cworks estimated that have to be deleted from v" + (i - 1)); while (currVersionDeletedTriples < triplesToBeDeleted) { ArrayList cwToBeDeleted = new ArrayList(); - for(int c=0; c DataManager.randomCreativeWorkTriples.get()) { + LOGGER.info("Target of " + triplesToBeDeleted + " triples exceedes the already (random-model) existing ones (" + DataManager.randomCreativeWorkTriples.get() + "). Take all the existing."); + for (int cworkIdIdx = 0; cworkIdIdx < DataManager.remainingRandomCreativeWorkIdsList.size(); cworkIdIdx++) { + cwToBeDeleted.add("http://www.bbc.co.uk/things/" + getGeneratorId() + "-" + DataManager.remainingRandomCreativeWorkIdsList.get(cworkIdIdx) + "#id"); + DataManager.remainingRandomCreativeWorkIdsList.remove(cworkIdIdx); + } + } else { + for(int c=0; c> list = new ArrayList>(); + for(int j = 0; j < i; j++) { + String sourcePath = generatedDatasetPath + File.separator + (j == 0 ? "v" : "c") + j + File.separator; + File sourcePathFile = new File(sourcePath); + List previousVersionAddedFiles = (List) FileUtils.listFiles(sourcePathFile, new RegexFileFilter("generatedCreativeWorks-[0-9]+-[0-9]+.added.nt"), null); + for (File f : previousVersionAddedFiles) { + Callable callable = new ExtractDeleted(f, "/versioning/creativeWorksToBeDeleted.txt", destinationPath); + Future future = executor.submit(callable); + list.add(future); + } + } + for(Future fut : list) { + currVersionDeletedTriples += fut.get(); + } + executor.shutdown(); currVersionDeletedCreativeWorks += creativeWorksToBeDeleted; // estimation of the remaining creative works that have to be extracted creativeWorksToBeDeleted = (int) Math.ceil((double) (triplesToBeDeleted - currVersionDeletedTriples) / creativeWorkAvgTriples); @@ -212,7 +243,14 @@ public void init() throws Exception { } preVersionDeletedCWs = currVersionDeletedCreativeWorks; long deleteSetEnd = System.currentTimeMillis(); - LOGGER.info("Deleteset of total " + preVersionDeletedCWs + " Creative Works generated successfully. Triples: " + currVersionDeletedTriples + ". Target: " + triplesToBeDeleted + " triples. Time: " + (deleteSetEnd - deleteSetStart) + " ms."); + LOGGER.info("Deleteset of total " + preVersionDeletedCWs + " Creative Works generated successfully. Triples: " + currVersionDeletedTriples + " . Target: " + triplesToBeDeleted + " triples. Time: " + (deleteSetEnd - deleteSetStart) + " ms."); + LOGGER.info("v"+i+" after delete remainingRandomCreativeWorkIdsList: "+DataManager.remainingRandomCreativeWorkIdsList.size()); + + // produce the add set + LOGGER.info("Generating version " + i + " add-set."); + dataGenerator.produceAdded(destinationPath, triplesToBeAdded); + LOGGER.info("v"+i+" after add remainingRandomCreativeWorkIdsList: "+DataManager.remainingRandomCreativeWorkIdsList.size()); + } long changeSetEnd = System.currentTimeMillis(); LOGGER.info("All changesets generated successfully. Time: " + (changeSetEnd - changeSetStart) + " ms."); @@ -249,6 +287,30 @@ public void init() throws Exception { LOGGER.info("Expected answers have computed successfully for all generated SPRQL tasks."); } + // class for implementing extraction of triples that have to be deleted concurrently + public static class ExtractDeleted implements Callable { + private File file; + private String cwTBD; + private String destinationPath; + private int numberOfDeletedTriples; + + ExtractDeleted(File file, String cwTBD, String destinationPath) { + this.file = file; + this.cwTBD = cwTBD; + this.destinationPath = destinationPath; + this.numberOfDeletedTriples = 0; + } + + public Integer call() { + try { + numberOfDeletedTriples = extractDeleted(file.getAbsolutePath(), cwTBD, destinationPath); + } catch (Exception e) { + LOGGER.error("Exception caught during the extraction of deleted triples from " + file, e); + } + return numberOfDeletedTriples; + } + } + public void initFromEnv() { LOGGER.info("Getting Data Generator's properites from the environment..."); @@ -947,6 +1009,31 @@ private int extractDeleted(int currentVersion, String cwIdsFile, String destPath return deletedTriples; } + private static int extractDeleted(String currentFile, String cwIdsFile, String destPath) { + int deletedTriples = 0; + try { + String scriptFilePath = System.getProperty("user.dir") + File.separator + "export_cws_tbd2.sh"; + String[] command = {"/bin/bash", scriptFilePath, currentFile, cwIdsFile, destPath }; + Process p = new ProcessBuilder(command).start(); + BufferedReader in = new BufferedReader(new InputStreamReader(p.getInputStream())); + BufferedReader stdError = new BufferedReader(new InputStreamReader(p.getErrorStream())); + String line; + while ((line = in.readLine()) != null) { + deletedTriples += Integer.parseInt(line); + } + while ((line = stdError.readLine()) != null) { + LOGGER.info(line); + } + p.waitFor(); + in.close(); + stdError.close(); + } catch (IOException e) { + LOGGER.error("Exception while executing script for extracting creative works that have to be deleted.", e); + } catch (InterruptedException e) { + LOGGER.error("Exception while executing script for extracting creative works that have to be deleted.", e); + } + return deletedTriples; + } @Override public void receiveCommand(byte command, byte[] data) { if (command == VirtuosoSystemAdapterConstants.BULK_LOADING_DATA_FINISHED) { From aae3e405f303535c0caa9775f8132a9aef3d7f53 Mon Sep 17 00:00:00 2001 From: papv Date: Fri, 8 Dec 2017 12:42:47 +0200 Subject: [PATCH 3/8] fix export script name --- .../versioning/components/VersioningDataGenerator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java b/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java index 8cd7139..3050b80 100644 --- a/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java +++ b/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java @@ -1012,7 +1012,7 @@ private int extractDeleted(int currentVersion, String cwIdsFile, String destPath private static int extractDeleted(String currentFile, String cwIdsFile, String destPath) { int deletedTriples = 0; try { - String scriptFilePath = System.getProperty("user.dir") + File.separator + "export_cws_tbd2.sh"; + String scriptFilePath = System.getProperty("user.dir") + File.separator + "export_cws_tbd.sh"; String[] command = {"/bin/bash", scriptFilePath, currentFile, cwIdsFile, destPath }; Process p = new ProcessBuilder(command).start(); BufferedReader in = new BufferedReader(new InputStreamReader(p.getInputStream())); From 4c7bc45a26d2674954597b1f044fc60c46fa34a1 Mon Sep 17 00:00:00 2001 From: papv Date: Fri, 8 Dec 2017 17:04:40 +0200 Subject: [PATCH 4/8] handle cases when deleted triples exceed the total random --- .../components/VersioningDataGenerator.java | 173 ++++++++++-------- 1 file changed, 97 insertions(+), 76 deletions(-) diff --git a/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java b/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java index 3050b80..502d96d 100644 --- a/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java +++ b/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java @@ -16,8 +16,10 @@ import java.util.Collections; import java.util.HashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; @@ -176,75 +178,94 @@ public void init() throws Exception { int preVersionDeletedCWs = 0; long changeSetStart = System.currentTimeMillis(); for(int i = 1; i < numberOfVersions; i++) { - int triplesToBeAdded = Math.round(versionInsertionRatio / 100f * cwsToBeLoaded[i-1]); - int triplesToBeDeleted = Math.round(versionDeletionRatio / 100f * cwsToBeLoaded[i-1]); - cwsToBeLoaded[i] = cwsToBeLoaded[i-1] + triplesToBeAdded - triplesToBeDeleted; - LOGGER.info("Generating version " + i + " changeset. Target: " + "[+" + triplesToBeAdded + ", -" + triplesToBeDeleted + "]"); String destinationPath = generatedDatasetPath + File.separator + "c" + i; File theDir = new File(destinationPath); theDir.mkdir(); + int triplesToBeAdded = Math.round(versionInsertionRatio / 100f * cwsToBeLoaded[i-1]); + int triplesToBeDeleted = Math.round(versionDeletionRatio / 100f * cwsToBeLoaded[i-1]); + cwsToBeLoaded[i] = cwsToBeLoaded[i-1] + triplesToBeAdded - triplesToBeDeleted; + LOGGER.info("Generating version " + i + " changeset. Target: " + "[+" + String.format(Locale.US, "%,d", triplesToBeAdded).replace(',', '.') + " , -" + String.format(Locale.US, "%,d", triplesToBeDeleted).replace(',', '.') + "]"); + // produce the delete set LOGGER.info("Generating version " + i + " delete-set."); long deleteSetStart = System.currentTimeMillis(); int currVersionDeletedCreativeWorks = 0; int currVersionDeletedTriples = 0; - int creativeWorkAvgTriples = DataManager.randomCreativeWorkTriples.intValue() / DataManager.randomCreativeWorkIdsList.size(); + int creativeWorksToBeDeleted = 0; + int totalRandomTriplesSoFar = DataManager.randomCreativeWorkTriples.intValue(); + LOGGER.info("totalRandomTriplesSoFar: " + totalRandomTriplesSoFar); - // Estimate the total number of creative works that have to be deleted, using - // creative work average triples that have been generated so far plus 1 for safety reasons. - int creativeWorksToBeDeleted = triplesToBeDeleted / (creativeWorkAvgTriples + 1); - LOGGER.info(creativeWorksToBeDeleted + " cworks estimated that have to be deleted from v" + (i - 1)); - while (currVersionDeletedTriples < triplesToBeDeleted) { - ArrayList cwToBeDeleted = new ArrayList(); - // if the estimated number of creative works is larger than the already existing - // random-model one, then take all the existing - if(triplesToBeDeleted > DataManager.randomCreativeWorkTriples.get()) { - LOGGER.info("Target of " + triplesToBeDeleted + " triples exceedes the already (random-model) existing ones (" + DataManager.randomCreativeWorkTriples.get() + "). Take all the existing."); - for (int cworkIdIdx = 0; cworkIdIdx < DataManager.remainingRandomCreativeWorkIdsList.size(); cworkIdIdx++) { - cwToBeDeleted.add("http://www.bbc.co.uk/things/" + getGeneratorId() + "-" + DataManager.remainingRandomCreativeWorkIdsList.get(cworkIdIdx) + "#id"); - DataManager.remainingRandomCreativeWorkIdsList.remove(cworkIdIdx); + int randomCreativeWorkAvgTriples = totalRandomTriplesSoFar / DataManager.randomCreativeWorkIdsList.size(); + ArrayList cwToBeDeleted = new ArrayList(); + + // if the number of triples that have to be deleted is larger than the already existing + // random-model ones, take all the random and choose from other data-models (correlations, + // major/minor events) as well + if(triplesToBeDeleted > totalRandomTriplesSoFar) { + LOGGER.info("Target of " + String.format(Locale.US, "%,d", triplesToBeDeleted).replace(',', '.') + " triples exceedes the already (random-model) existing ones (" + String.format(Locale.US, "%,d", totalRandomTriplesSoFar).replace(',', '.') + "). Will choose from other models as well."); + // take all the random + for (int cworkIdIdx = 0; cworkIdIdx < DataManager.remainingRandomCreativeWorkIdsList.size(); cworkIdIdx++) { + cwToBeDeleted.add("http://www.bbc.co.uk/things/" + getGeneratorId() + "-" + DataManager.remainingRandomCreativeWorkIdsList.get(cworkIdIdx) + "#id"); + } + currVersionDeletedCreativeWorks = DataManager.remainingRandomCreativeWorkIdsList.size(); + DataManager.remainingRandomCreativeWorkIdsList.clear(); + currVersionDeletedTriples = totalRandomTriplesSoFar; + + // as delete-set target have not reached yet, choose the rest from correlations or major/minor + int totalCorrExpTriplesSoFar = DataManager.corExpCreativeWorkTriples.intValue(); + int corrExpCreativeWorkAvgTriples = totalCorrExpTriplesSoFar / DataManager.corrExpCreativeWorkIdsList.size(); + while (currVersionDeletedTriples < triplesToBeDeleted) { + cwToBeDeleted.clear(); + // Estimate the total number of creative works that have to be deleted, using + // creative work average triples that have been generated (using correlation + // or exponential decays) so far plus 1 for safety reasons. + creativeWorksToBeDeleted = (int) Math.ceil((double) (triplesToBeDeleted - currVersionDeletedTriples) / (corrExpCreativeWorkAvgTriples + 1)); + LOGGER.info(String.format(Locale.US, "%,d", creativeWorksToBeDeleted).replace(',', '.') + " more cworks (from correlations or exponential decays) estimated that have to be deleted from v" + (i - 1)); + + for(int c = 0; c < creativeWorksToBeDeleted; c++) { + int deletedCWIndex = randomGenerator.nextInt(DataManager.remainingCorrExpCreativeWorkIdsList.size()); + long creativeWorkToBeDeleted = DataManager.remainingCorrExpCreativeWorkIdsList.get(deletedCWIndex); + DataManager.remainingCorrExpCreativeWorkIdsList.remove(deletedCWIndex); + cwToBeDeleted.add("http://www.bbc.co.uk/things/" + getGeneratorId() + "-" + creativeWorkToBeDeleted + "#id"); } - } else { - for(int c=0; c> list = new ArrayList>(); - for(int j = 0; j < i; j++) { - String sourcePath = generatedDatasetPath + File.separator + (j == 0 ? "v" : "c") + j + File.separator; - File sourcePathFile = new File(sourcePath); - List previousVersionAddedFiles = (List) FileUtils.listFiles(sourcePathFile, new RegexFileFilter("generatedCreativeWorks-[0-9]+-[0-9]+.added.nt"), null); - for (File f : previousVersionAddedFiles) { - Callable callable = new ExtractDeleted(f, "/versioning/creativeWorksToBeDeleted.txt", destinationPath); - Future future = executor.submit(callable); - list.add(future); - } - } - for(Future fut : list) { - currVersionDeletedTriples += fut.get(); - } - executor.shutdown(); - currVersionDeletedCreativeWorks += creativeWorksToBeDeleted; - // estimation of the remaining creative works that have to be extracted - creativeWorksToBeDeleted = (int) Math.ceil((double) (triplesToBeDeleted - currVersionDeletedTriples) / creativeWorkAvgTriples); - if(creativeWorksToBeDeleted > 0) { - LOGGER.info(creativeWorksToBeDeleted + " more cwork" + (creativeWorksToBeDeleted > 1 ? "s" : "") +" estimated that have to be deleted from v" + (i - 1)); + + // write down the creative work uris that are going to be deleted + FileUtils.writeLines(new File("/versioning/creativeWorksToBeDeleted.txt") , cwToBeDeleted, false); + + // extract all triples that have to be deleted using multiple threads + currVersionDeletedTriples += parallelyExtract(i, destinationPath); + currVersionDeletedCreativeWorks += creativeWorksToBeDeleted; } } + preVersionDeletedCWs = currVersionDeletedCreativeWorks; long deleteSetEnd = System.currentTimeMillis(); - LOGGER.info("Deleteset of total " + preVersionDeletedCWs + " Creative Works generated successfully. Triples: " + currVersionDeletedTriples + " . Target: " + triplesToBeDeleted + " triples. Time: " + (deleteSetEnd - deleteSetStart) + " ms."); - LOGGER.info("v"+i+" after delete remainingRandomCreativeWorkIdsList: "+DataManager.remainingRandomCreativeWorkIdsList.size()); + LOGGER.info("Deleteset of total " + String.format(Locale.US, "%,d", preVersionDeletedCWs).replace(',', '.') + " Creative Works generated successfully. Triples: " + String.format(Locale.US, "%,d", currVersionDeletedTriples).replace(',', '.') + " . Target: " + String.format(Locale.US, "%,d", triplesToBeDeleted).replace(',', '.') + " triples. Time: " + (deleteSetEnd - deleteSetStart) + " ms."); + LOGGER.info("v"+i+" after delete remainingRandomCreativeWorkIdsList: " + DataManager.remainingRandomCreativeWorkIdsList.size()); // produce the add set LOGGER.info("Generating version " + i + " add-set."); @@ -287,6 +308,33 @@ public void init() throws Exception { LOGGER.info("Expected answers have computed successfully for all generated SPRQL tasks."); } + public int parallelyExtract(int currVersion, String destinationPath) { + int currVersionDeletedTriples = 0; + ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); + List> list = new ArrayList>(); + for(int j = 0; j < currVersion; j++) { + String sourcePath = generatedDatasetPath + File.separator + (j == 0 ? "v" : "c") + j + File.separator; + File sourcePathFile = new File(sourcePath); + List previousVersionAddedFiles = (List) FileUtils.listFiles(sourcePathFile, new RegexFileFilter("generatedCreativeWorks-[0-9]+-[0-9]+.added.nt"), null); + for (File f : previousVersionAddedFiles) { + Callable callable = new ExtractDeleted(f, "/versioning/creativeWorksToBeDeleted.txt", destinationPath); + Future future = executor.submit(callable); + list.add(future); + } + } + for(Future fut : list) { + try { + currVersionDeletedTriples += fut.get(); + } catch (InterruptedException e) { + LOGGER.error("Exception caught during the extraction of deleted triples", e); + } catch (ExecutionException e) { + LOGGER.error("Exception caught during the extraction of deleted triples", e); + } + } + executor.shutdown(); + return currVersionDeletedTriples; + } + // class for implementing extraction of triples that have to be deleted concurrently public static class ExtractDeleted implements Callable { private File file; @@ -981,33 +1029,6 @@ public void loadVersion(int version) { LOGGER.error("Exception while executing script for loading data.", e); } } - - private int extractDeleted(int currentVersion, String cwIdsFile, String destPath, String sourcePath) { - int deletedTriples = 0; - try { - String scriptFilePath = System.getProperty("user.dir") + File.separator + "export_cws_tbd.sh"; - String[] command = {"/bin/bash", scriptFilePath, - Integer.toString(currentVersion), cwIdsFile, destPath, sourcePath }; - Process p = new ProcessBuilder(command).start(); - BufferedReader in = new BufferedReader(new InputStreamReader(p.getInputStream())); - BufferedReader stdError = new BufferedReader(new InputStreamReader(p.getErrorStream())); - String line; - while ((line = in.readLine()) != null) { - deletedTriples += Integer.parseInt(line); - } - while ((line = stdError.readLine()) != null) { - LOGGER.info(line); - } - p.waitFor(); - in.close(); - stdError.close(); - } catch (IOException e) { - LOGGER.error("Exception while executing script for extracting creative works that have to be deleted.", e); - } catch (InterruptedException e) { - LOGGER.error("Exception while executing script for extracting creative works that have to be deleted.", e); - } - return deletedTriples; - } private static int extractDeleted(String currentFile, String cwIdsFile, String destPath) { int deletedTriples = 0; From 69e2200c05cebc4569dba36308c268f40e80335f Mon Sep 17 00:00:00 2001 From: papv Date: Mon, 11 Dec 2017 12:33:40 +0200 Subject: [PATCH 5/8] compute delete-set much more efficient based on statistics --- required_files/export_cws_tbd.sh | 7 +- .../components/VersioningDataGenerator.java | 130 ++++++------------ 2 files changed, 46 insertions(+), 91 deletions(-) diff --git a/required_files/export_cws_tbd.sh b/required_files/export_cws_tbd.sh index 2652fee..71712f7 100644 --- a/required_files/export_cws_tbd.sh +++ b/required_files/export_cws_tbd.sh @@ -6,9 +6,4 @@ DESTINATION_PATH=$3 filename=$(basename "$CURR_VERSION_FILE") filename="${filename%added.*}" -tbd=$(grep -F -f $CW_TBD $CURR_VERSION_FILE) -lines=$(echo "${tbd}" | wc -l) -if [[ "$lines" > "1" ]]; then - printf "%s\n" "${tbd}" >> $DESTINATION_PATH/$filename"deleted.nt" - echo $lines -fi +grep -F -f $CW_TBD $CURR_VERSION_FILE >> $DESTINATION_PATH/$filename"deleted.nt" diff --git a/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java b/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java index 502d96d..35cda4f 100644 --- a/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java +++ b/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java @@ -15,15 +15,18 @@ import java.util.Arrays; import java.util.Collections; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Map.Entry; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.Semaphore; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.io.FileUtils; @@ -171,8 +174,6 @@ public void init() throws Exception { dataGenerator.produceData(); cwsToBeLoaded[0] = v0SizeInTriples; - LOGGER.info("initial remainingRandomCreativeWorkIdsList: "+DataManager.remainingRandomCreativeWorkIdsList.size()); - // Generate the change sets. Only additions/deletions are supported. // TODO: support changes int preVersionDeletedCWs = 0; @@ -192,86 +193,64 @@ public void init() throws Exception { long deleteSetStart = System.currentTimeMillis(); int currVersionDeletedCreativeWorks = 0; int currVersionDeletedTriples = 0; - int creativeWorksToBeDeleted = 0; int totalRandomTriplesSoFar = DataManager.randomCreativeWorkTriples.intValue(); LOGGER.info("totalRandomTriplesSoFar: " + totalRandomTriplesSoFar); - int randomCreativeWorkAvgTriples = totalRandomTriplesSoFar / DataManager.randomCreativeWorkIdsList.size(); ArrayList cwToBeDeleted = new ArrayList(); // if the number of triples that have to be deleted is larger than the already existing // random-model ones, take all the random and choose from other data-models (correlations, // major/minor events) as well + List randomCreativeWorkIds = new ArrayList(DataManager.randomCreativeWorkIdsList.keySet()); if(triplesToBeDeleted > totalRandomTriplesSoFar) { LOGGER.info("Target of " + String.format(Locale.US, "%,d", triplesToBeDeleted).replace(',', '.') + " triples exceedes the already (random-model) existing ones (" + String.format(Locale.US, "%,d", totalRandomTriplesSoFar).replace(',', '.') + "). Will choose from other models as well."); // take all the random - for (int cworkIdIdx = 0; cworkIdIdx < DataManager.remainingRandomCreativeWorkIdsList.size(); cworkIdIdx++) { - cwToBeDeleted.add("http://www.bbc.co.uk/things/" + getGeneratorId() + "-" + DataManager.remainingRandomCreativeWorkIdsList.get(cworkIdIdx) + "#id"); + for (long creativeWorkId : randomCreativeWorkIds) { + cwToBeDeleted.add("http://www.bbc.co.uk/things/" + getGeneratorId() + "-" + DataManager.randomCreativeWorkIdsList.get(creativeWorkId) + "#id"); } - currVersionDeletedCreativeWorks = DataManager.remainingRandomCreativeWorkIdsList.size(); - DataManager.remainingRandomCreativeWorkIdsList.clear(); + DataManager.randomCreativeWorkIdsList.clear(); currVersionDeletedTriples = totalRandomTriplesSoFar; // as delete-set target have not reached yet, choose the rest from correlations or major/minor - int totalCorrExpTriplesSoFar = DataManager.corExpCreativeWorkTriples.intValue(); - int corrExpCreativeWorkAvgTriples = totalCorrExpTriplesSoFar / DataManager.corrExpCreativeWorkIdsList.size(); + List keys = new ArrayList(DataManager.corrExpCreativeWorkIdsList.keySet()); while (currVersionDeletedTriples < triplesToBeDeleted) { - cwToBeDeleted.clear(); - // Estimate the total number of creative works that have to be deleted, using - // creative work average triples that have been generated (using correlation - // or exponential decays) so far plus 1 for safety reasons. - creativeWorksToBeDeleted = (int) Math.ceil((double) (triplesToBeDeleted - currVersionDeletedTriples) / (corrExpCreativeWorkAvgTriples + 1)); - LOGGER.info(String.format(Locale.US, "%,d", creativeWorksToBeDeleted).replace(',', '.') + " more cworks (from correlations or exponential decays) estimated that have to be deleted from v" + (i - 1)); - - for(int c = 0; c < creativeWorksToBeDeleted; c++) { - int deletedCWIndex = randomGenerator.nextInt(DataManager.remainingCorrExpCreativeWorkIdsList.size()); - long creativeWorkToBeDeleted = DataManager.remainingCorrExpCreativeWorkIdsList.get(deletedCWIndex); - DataManager.remainingCorrExpCreativeWorkIdsList.remove(deletedCWIndex); - cwToBeDeleted.add("http://www.bbc.co.uk/things/" + getGeneratorId() + "-" + creativeWorkToBeDeleted + "#id"); - } - - // write down the creative work uris that are going to be deleted - FileUtils.writeLines(new File("/versioning/creativeWorksToBeDeleted.txt") , cwToBeDeleted, false); - - // extract all triples that have to be deleted using multiple threads - currVersionDeletedTriples += parallelyExtract(i, destinationPath); - currVersionDeletedCreativeWorks += creativeWorksToBeDeleted; + int creativeWorkToBeDeletedIdx = randomGenerator.nextInt(keys.size()); + long creativeWorkToBeDeleted = keys.get(creativeWorkToBeDeletedIdx); + currVersionDeletedTriples += DataManager.corrExpCreativeWorkIdsList.get(creativeWorkToBeDeleted); + keys.remove(creativeWorkToBeDeletedIdx); + DataManager.corrExpCreativeWorkIdsList.remove(creativeWorkToBeDeleted); + cwToBeDeleted.add("http://www.bbc.co.uk/things/" + getGeneratorId() + "-" + creativeWorkToBeDeleted + "#id"); } + // write down the creative work uris that are going to be deleted + FileUtils.writeLines(new File("/versioning/creativeWorksToBeDeleted.txt") , cwToBeDeleted, false); + + // extract all triples that have to be deleted using multiple threads + parallelyExtract(i, destinationPath); + currVersionDeletedCreativeWorks += cwToBeDeleted.size(); } else { while (currVersionDeletedTriples < triplesToBeDeleted) { - cwToBeDeleted.clear(); - // Estimate the total number of creative works that have to be deleted, using - // creative work average triples that have been generated (using random data - // model) so far plus 1 for safety reasons. - creativeWorksToBeDeleted = (int) Math.ceil((double) (triplesToBeDeleted - currVersionDeletedTriples) / (randomCreativeWorkAvgTriples + 1)); - LOGGER.info(String.format(Locale.US, "%,d", creativeWorksToBeDeleted).replace(',', '.') + " cworks (random model) estimated that have to be deleted from v" + (i - 1)); - - for(int c = 0; c < creativeWorksToBeDeleted; c++) { - int deletedCWIndex = randomGenerator.nextInt(DataManager.remainingRandomCreativeWorkIdsList.size()); - long creativeWorkToBeDeleted = DataManager.remainingRandomCreativeWorkIdsList.get(deletedCWIndex); - DataManager.remainingRandomCreativeWorkIdsList.remove(deletedCWIndex); - cwToBeDeleted.add("http://www.bbc.co.uk/things/" + getGeneratorId() + "-" + creativeWorkToBeDeleted + "#id"); - } - - // write down the creative work uris that are going to be deleted - FileUtils.writeLines(new File("/versioning/creativeWorksToBeDeleted.txt") , cwToBeDeleted, false); - - // extract all triples that have to be deleted using multiple threads - currVersionDeletedTriples += parallelyExtract(i, destinationPath); - currVersionDeletedCreativeWorks += creativeWorksToBeDeleted; + int creativeWorkToBeDeletedIdx = randomGenerator.nextInt(randomCreativeWorkIds.size()); + long creativeWorkToBeDeleted = randomCreativeWorkIds.get(creativeWorkToBeDeletedIdx); + currVersionDeletedTriples += DataManager.randomCreativeWorkIdsList.get(creativeWorkToBeDeleted); + randomCreativeWorkIds.remove(creativeWorkToBeDeletedIdx); + DataManager.randomCreativeWorkIdsList.remove(creativeWorkToBeDeleted); + cwToBeDeleted.add("http://www.bbc.co.uk/things/" + getGeneratorId() + "-" + creativeWorkToBeDeleted + "#id"); } - } + // write down the creative work uris that are going to be deleted + // in order to use it in grep -F -f + FileUtils.writeLines(new File("/versioning/creativeWorksToBeDeleted.txt") , cwToBeDeleted, false); + // extract all triples that have to be deleted using multiple threads + parallelyExtract(i, destinationPath); + currVersionDeletedCreativeWorks += cwToBeDeleted.size(); + } preVersionDeletedCWs = currVersionDeletedCreativeWorks; long deleteSetEnd = System.currentTimeMillis(); LOGGER.info("Deleteset of total " + String.format(Locale.US, "%,d", preVersionDeletedCWs).replace(',', '.') + " Creative Works generated successfully. Triples: " + String.format(Locale.US, "%,d", currVersionDeletedTriples).replace(',', '.') + " . Target: " + String.format(Locale.US, "%,d", triplesToBeDeleted).replace(',', '.') + " triples. Time: " + (deleteSetEnd - deleteSetStart) + " ms."); - LOGGER.info("v"+i+" after delete remainingRandomCreativeWorkIdsList: " + DataManager.remainingRandomCreativeWorkIdsList.size()); // produce the add set LOGGER.info("Generating version " + i + " add-set."); dataGenerator.produceAdded(destinationPath, triplesToBeAdded); - LOGGER.info("v"+i+" after add remainingRandomCreativeWorkIdsList: "+DataManager.remainingRandomCreativeWorkIdsList.size()); - } long changeSetEnd = System.currentTimeMillis(); LOGGER.info("All changesets generated successfully. Time: " + (changeSetEnd - changeSetStart) + " ms."); @@ -308,54 +287,42 @@ public void init() throws Exception { LOGGER.info("Expected answers have computed successfully for all generated SPRQL tasks."); } - public int parallelyExtract(int currVersion, String destinationPath) { - int currVersionDeletedTriples = 0; + public void parallelyExtract(int currVersion, String destinationPath) { ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); - List> list = new ArrayList>(); for(int j = 0; j < currVersion; j++) { String sourcePath = generatedDatasetPath + File.separator + (j == 0 ? "v" : "c") + j + File.separator; File sourcePathFile = new File(sourcePath); List previousVersionAddedFiles = (List) FileUtils.listFiles(sourcePathFile, new RegexFileFilter("generatedCreativeWorks-[0-9]+-[0-9]+.added.nt"), null); for (File f : previousVersionAddedFiles) { - Callable callable = new ExtractDeleted(f, "/versioning/creativeWorksToBeDeleted.txt", destinationPath); - Future future = executor.submit(callable); - list.add(future); - } - } - for(Future fut : list) { - try { - currVersionDeletedTriples += fut.get(); - } catch (InterruptedException e) { - LOGGER.error("Exception caught during the extraction of deleted triples", e); - } catch (ExecutionException e) { - LOGGER.error("Exception caught during the extraction of deleted triples", e); + executor.execute(new ExtractDeleted(f, "/versioning/creativeWorksToBeDeleted.txt", destinationPath)); } } executor.shutdown(); - return currVersionDeletedTriples; + try { + executor.awaitTermination(Long.MAX_VALUE, TimeUnit.SECONDS); // no timeout + } catch (InterruptedException e) { + LOGGER.error("Exception caught while awaiting termination...", e); + } } // class for implementing extraction of triples that have to be deleted concurrently - public static class ExtractDeleted implements Callable { + public static class ExtractDeleted implements Runnable { private File file; private String cwTBD; private String destinationPath; - private int numberOfDeletedTriples; ExtractDeleted(File file, String cwTBD, String destinationPath) { this.file = file; this.cwTBD = cwTBD; this.destinationPath = destinationPath; - this.numberOfDeletedTriples = 0; } - public Integer call() { + public void run() { try { - numberOfDeletedTriples = extractDeleted(file.getAbsolutePath(), cwTBD, destinationPath); + extractDeleted(file.getAbsolutePath(), cwTBD, destinationPath); } catch (Exception e) { LOGGER.error("Exception caught during the extraction of deleted triples from " + file, e); } - return numberOfDeletedTriples; } } @@ -1030,30 +997,23 @@ public void loadVersion(int version) { } } - private static int extractDeleted(String currentFile, String cwIdsFile, String destPath) { - int deletedTriples = 0; + private static void extractDeleted(String currentFile, String cwIdsFile, String destPath) { try { String scriptFilePath = System.getProperty("user.dir") + File.separator + "export_cws_tbd.sh"; String[] command = {"/bin/bash", scriptFilePath, currentFile, cwIdsFile, destPath }; Process p = new ProcessBuilder(command).start(); - BufferedReader in = new BufferedReader(new InputStreamReader(p.getInputStream())); BufferedReader stdError = new BufferedReader(new InputStreamReader(p.getErrorStream())); - String line; - while ((line = in.readLine()) != null) { - deletedTriples += Integer.parseInt(line); - } + String line = null; while ((line = stdError.readLine()) != null) { LOGGER.info(line); } p.waitFor(); - in.close(); stdError.close(); } catch (IOException e) { LOGGER.error("Exception while executing script for extracting creative works that have to be deleted.", e); } catch (InterruptedException e) { LOGGER.error("Exception while executing script for extracting creative works that have to be deleted.", e); } - return deletedTriples; } @Override public void receiveCommand(byte command, byte[] data) { From f1e537f81ed044d21d10a0af980914697a680457 Mon Sep 17 00:00:00 2001 From: papv Date: Tue, 12 Dec 2017 12:22:33 +0200 Subject: [PATCH 6/8] minor changes --- .../components/VersioningDataGenerator.java | 31 ++++++++++--------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java b/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java index 35cda4f..322e96c 100644 --- a/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java +++ b/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java @@ -15,16 +15,11 @@ import java.util.Arrays; import java.util.Collections; import java.util.HashMap; -import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Map; -import java.util.Map.Entry; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import java.util.concurrent.Future; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; @@ -47,8 +42,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.io.Files; - import eu.ldbc.semanticpublishing.generators.data.DataGenerator; import eu.ldbc.semanticpublishing.properties.Configuration; import eu.ldbc.semanticpublishing.properties.Definitions; @@ -194,7 +187,7 @@ public void init() throws Exception { int currVersionDeletedCreativeWorks = 0; int currVersionDeletedTriples = 0; int totalRandomTriplesSoFar = DataManager.randomCreativeWorkTriples.intValue(); - LOGGER.info("totalRandomTriplesSoFar: " + totalRandomTriplesSoFar); + LOGGER.info("totalRandomTriplesSoFar version " + i + ": " + totalRandomTriplesSoFar); ArrayList cwToBeDeleted = new ArrayList(); @@ -202,22 +195,26 @@ public void init() throws Exception { // random-model ones, take all the random and choose from other data-models (correlations, // major/minor events) as well List randomCreativeWorkIds = new ArrayList(DataManager.randomCreativeWorkIdsList.keySet()); + LOGGER.info("randomCreativeWorkIds size "+ randomCreativeWorkIds.size()); + if(triplesToBeDeleted > totalRandomTriplesSoFar) { - LOGGER.info("Target of " + String.format(Locale.US, "%,d", triplesToBeDeleted).replace(',', '.') + " triples exceedes the already (random-model) existing ones (" + String.format(Locale.US, "%,d", totalRandomTriplesSoFar).replace(',', '.') + "). Will choose from other models as well."); + LOGGER.info("Target of " + String.format(Locale.US, "%,d", triplesToBeDeleted).replace(',', '.') + " triples exceedes the already (random-model) existing ones (" + String.format(Locale.US, "%,d", totalRandomTriplesSoFar).replace(',', '.') + "). Will choose from clustering and correlation models as well."); // take all the random for (long creativeWorkId : randomCreativeWorkIds) { cwToBeDeleted.add("http://www.bbc.co.uk/things/" + getGeneratorId() + "-" + DataManager.randomCreativeWorkIdsList.get(creativeWorkId) + "#id"); } DataManager.randomCreativeWorkIdsList.clear(); + DataManager.randomCreativeWorkTriples.set(0); currVersionDeletedTriples = totalRandomTriplesSoFar; // as delete-set target have not reached yet, choose the rest from correlations or major/minor - List keys = new ArrayList(DataManager.corrExpCreativeWorkIdsList.keySet()); + List corrExpCreativeWorkIds = new ArrayList(DataManager.corrExpCreativeWorkIdsList.keySet()); + LOGGER.info("corrExpCreativeWorkIds size: "+DataManager.corrExpCreativeWorkIdsList.size()); while (currVersionDeletedTriples < triplesToBeDeleted) { - int creativeWorkToBeDeletedIdx = randomGenerator.nextInt(keys.size()); - long creativeWorkToBeDeleted = keys.get(creativeWorkToBeDeletedIdx); + int creativeWorkToBeDeletedIdx = randomGenerator.nextInt(corrExpCreativeWorkIds.size()); + long creativeWorkToBeDeleted = corrExpCreativeWorkIds.get(creativeWorkToBeDeletedIdx); currVersionDeletedTriples += DataManager.corrExpCreativeWorkIdsList.get(creativeWorkToBeDeleted); - keys.remove(creativeWorkToBeDeletedIdx); + corrExpCreativeWorkIds.remove(creativeWorkToBeDeletedIdx); DataManager.corrExpCreativeWorkIdsList.remove(creativeWorkToBeDeleted); cwToBeDeleted.add("http://www.bbc.co.uk/things/" + getGeneratorId() + "-" + creativeWorkToBeDeleted + "#id"); } @@ -225,7 +222,11 @@ public void init() throws Exception { FileUtils.writeLines(new File("/versioning/creativeWorksToBeDeleted.txt") , cwToBeDeleted, false); // extract all triples that have to be deleted using multiple threads + long start = System.currentTimeMillis(); parallelyExtract(i, destinationPath); + long end = System.currentTimeMillis(); + LOGGER.info("extract time: "+(end-start) + " ms"); + currVersionDeletedCreativeWorks += cwToBeDeleted.size(); } else { while (currVersionDeletedTriples < triplesToBeDeleted) { @@ -241,8 +242,10 @@ public void init() throws Exception { FileUtils.writeLines(new File("/versioning/creativeWorksToBeDeleted.txt") , cwToBeDeleted, false); // extract all triples that have to be deleted using multiple threads + long start = System.currentTimeMillis(); parallelyExtract(i, destinationPath); - currVersionDeletedCreativeWorks += cwToBeDeleted.size(); + long end = System.currentTimeMillis(); + LOGGER.info("extract time: "+(end-start) + " ms"); currVersionDeletedCreativeWorks += cwToBeDeleted.size(); } preVersionDeletedCWs = currVersionDeletedCreativeWorks; long deleteSetEnd = System.currentTimeMillis(); From cad5577cfbd999d18fc47e153253a375bb9f9efe Mon Sep 17 00:00:00 2001 From: papv Date: Tue, 12 Dec 2017 16:31:39 +0200 Subject: [PATCH 7/8] fix typo --- .../versioning/components/VersioningDataGenerator.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java b/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java index 322e96c..17c8270 100644 --- a/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java +++ b/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java @@ -201,7 +201,7 @@ public void init() throws Exception { LOGGER.info("Target of " + String.format(Locale.US, "%,d", triplesToBeDeleted).replace(',', '.') + " triples exceedes the already (random-model) existing ones (" + String.format(Locale.US, "%,d", totalRandomTriplesSoFar).replace(',', '.') + "). Will choose from clustering and correlation models as well."); // take all the random for (long creativeWorkId : randomCreativeWorkIds) { - cwToBeDeleted.add("http://www.bbc.co.uk/things/" + getGeneratorId() + "-" + DataManager.randomCreativeWorkIdsList.get(creativeWorkId) + "#id"); + cwToBeDeleted.add("http://www.bbc.co.uk/things/" + getGeneratorId() + "-" + creativeWorkId + "#id"); } DataManager.randomCreativeWorkIdsList.clear(); DataManager.randomCreativeWorkTriples.set(0); @@ -245,6 +245,7 @@ public void init() throws Exception { long start = System.currentTimeMillis(); parallelyExtract(i, destinationPath); long end = System.currentTimeMillis(); + DataManager.randomCreativeWorkTriples.addAndGet(-currVersionDeletedTriples); LOGGER.info("extract time: "+(end-start) + " ms"); currVersionDeletedCreativeWorks += cwToBeDeleted.size(); } preVersionDeletedCWs = currVersionDeletedCreativeWorks; From 62f252c0bf8efee7c8d9d93cbfaec39f70a873f4 Mon Sep 17 00:00:00 2001 From: papv Date: Wed, 13 Dec 2017 14:57:53 +0200 Subject: [PATCH 8/8] remove debug messages --- .../components/VersioningDataGenerator.java | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java b/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java index 17c8270..2e71776 100644 --- a/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java +++ b/src/main/java/org/hobbit/benchmark/versioning/components/VersioningDataGenerator.java @@ -187,7 +187,6 @@ public void init() throws Exception { int currVersionDeletedCreativeWorks = 0; int currVersionDeletedTriples = 0; int totalRandomTriplesSoFar = DataManager.randomCreativeWorkTriples.intValue(); - LOGGER.info("totalRandomTriplesSoFar version " + i + ": " + totalRandomTriplesSoFar); ArrayList cwToBeDeleted = new ArrayList(); @@ -195,8 +194,6 @@ public void init() throws Exception { // random-model ones, take all the random and choose from other data-models (correlations, // major/minor events) as well List randomCreativeWorkIds = new ArrayList(DataManager.randomCreativeWorkIdsList.keySet()); - LOGGER.info("randomCreativeWorkIds size "+ randomCreativeWorkIds.size()); - if(triplesToBeDeleted > totalRandomTriplesSoFar) { LOGGER.info("Target of " + String.format(Locale.US, "%,d", triplesToBeDeleted).replace(',', '.') + " triples exceedes the already (random-model) existing ones (" + String.format(Locale.US, "%,d", totalRandomTriplesSoFar).replace(',', '.') + "). Will choose from clustering and correlation models as well."); // take all the random @@ -206,14 +203,15 @@ public void init() throws Exception { DataManager.randomCreativeWorkIdsList.clear(); DataManager.randomCreativeWorkTriples.set(0); currVersionDeletedTriples = totalRandomTriplesSoFar; - + // as delete-set target have not reached yet, choose the rest from correlations or major/minor List corrExpCreativeWorkIds = new ArrayList(DataManager.corrExpCreativeWorkIdsList.keySet()); - LOGGER.info("corrExpCreativeWorkIds size: "+DataManager.corrExpCreativeWorkIdsList.size()); + int corrExpTotalTriples = 0; while (currVersionDeletedTriples < triplesToBeDeleted) { int creativeWorkToBeDeletedIdx = randomGenerator.nextInt(corrExpCreativeWorkIds.size()); long creativeWorkToBeDeleted = corrExpCreativeWorkIds.get(creativeWorkToBeDeletedIdx); currVersionDeletedTriples += DataManager.corrExpCreativeWorkIdsList.get(creativeWorkToBeDeleted); + corrExpTotalTriples += DataManager.corrExpCreativeWorkIdsList.get(creativeWorkToBeDeleted); corrExpCreativeWorkIds.remove(creativeWorkToBeDeletedIdx); DataManager.corrExpCreativeWorkIdsList.remove(creativeWorkToBeDeleted); cwToBeDeleted.add("http://www.bbc.co.uk/things/" + getGeneratorId() + "-" + creativeWorkToBeDeleted + "#id"); @@ -225,7 +223,7 @@ public void init() throws Exception { long start = System.currentTimeMillis(); parallelyExtract(i, destinationPath); long end = System.currentTimeMillis(); - LOGGER.info("extract time: "+(end-start) + " ms"); + DataManager.corrExpCreativeWorkTriples.addAndGet(-corrExpTotalTriples); currVersionDeletedCreativeWorks += cwToBeDeleted.size(); } else { @@ -246,7 +244,7 @@ public void init() throws Exception { parallelyExtract(i, destinationPath); long end = System.currentTimeMillis(); DataManager.randomCreativeWorkTriples.addAndGet(-currVersionDeletedTriples); - LOGGER.info("extract time: "+(end-start) + " ms"); currVersionDeletedCreativeWorks += cwToBeDeleted.size(); + currVersionDeletedCreativeWorks += cwToBeDeleted.size(); } preVersionDeletedCWs = currVersionDeletedCreativeWorks; long deleteSetEnd = System.currentTimeMillis();