Skip to content

Commit

Permalink
Workflow + logging + docker compose
Browse files Browse the repository at this point in the history
  • Loading branch information
JPugetGil committed Oct 20, 2024
1 parent 68878e1 commit 44c4158
Show file tree
Hide file tree
Showing 9 changed files with 121 additions and 13 deletions.
2 changes: 2 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ services:
container_name: blazegraph
environment:
- "BLAZEGRAPH_QUADS=true"
- "BLAZEGRAPH_TIMEOUT=600000"
- "BLAZEGRAPH_MEMORY=12G"
deploy:
resources:
limits:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,14 +114,25 @@ public Integer importModel(MultipartFile file) throws RiotException {
try (InputStream inputStream = file.getInputStream()) {
Long start = System.nanoTime();

Long startBatching = System.nanoTime();
getQuadsStreamRDF(inputStream, file.getOriginalFilename(), version.getIndexVersion())
.finish();
Long endBatching = System.nanoTime();
log.info("[Measure] (Batching): {} ns for file: {};", endBatching - startBatching, file.getOriginalFilename());

log.info("Saving quads to catalog");
Long startCatalog = System.nanoTime();
rdfResourceRepository.flatModelQuadsToCatalog();
Long endCatalog = System.nanoTime();
log.info("[Measure] (Catalog): {} ns for file: {};", endCatalog - startCatalog, file.getOriginalFilename());

log.info("Condensing quads to catalog");
Long startCondensing = System.nanoTime();
rdfVersionedQuadRepository.condenseModel();
rdfVersionedQuadRepository.updateValidityVersionedQuad();
Long endCondensing = System.nanoTime();
log.info("[Measure] (Condensing): {} ns for file: {};", endCondensing - startCondensing, file.getOriginalFilename());

flatModelQuadRepository.deleteAll();

Long end = System.nanoTime();
Expand Down
7 changes: 6 additions & 1 deletion workflows/bsbm/download-2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,19 @@ if [ "$#" -ne 4 ]; then
exit 1
fi

# Get the argument
# Get the argument for bsbm
versions_number=$1
products_number=$2
products_steps=$3
variability=$4

# Randomize another bsbm dataset
random_product_number=$(( ( RANDOM % 5 ) + 3 ))
random_product_step=$(( ( RANDOM % 3 ) + 1 ))

# Run the command versions_number times
docker run --name "bsbm-$versions_number-$products_number" -v "$PWD:/app/data" vcity/bsbm generate-n "$versions_number" "$products_number" "$products_steps" "$variability"
docker run --name "bsbm-alt-$versions_number-$products_number" -v "$PWD:/app/data" -e "DATA_DESTINATION=alt" vcity/bsbm generate-n "$versions_number" "$random_product_number" "$random_product_step" "$variability"

# Cleaning metadata
docker ps --filter name=bsbm-* -aq | xargs docker stop | xargs docker rm
Expand Down
45 changes: 45 additions & 0 deletions workflows/bsbm/import_relational-1.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/bin/bash

######################################################
# Import the data inside the Relational Database (through quads-loader import endpoint)
######################################################

cd ../dataset/quads/alt/relational || exit

echo "------------------------------------------------------------ [BEGIN IMPORT RELATIONAL] ------------------------------------------------------------"
printf "\n%s$(date +%FT%T) - [quads-loader] Dataset import started."

## BSBM tagged data
### Import the versions of BSBM
printf "\n%s$(date +%FT%T) - [quads-loader] Versions import started."

number_of_versions=$1

find . -type f -name "*.ttl.trig" -print0 | while IFS= read -r -d '' file
do
# Extract version number from the file name (assuming the format dataset-{version}.ttl.trig)
version=$(echo "$file" | grep -oP '(?<=-)\d+(?=.ttl\.trig)')

# Check if the version is less than or equal to the specified number_of_versions
if [ "$version" -le "$number_of_versions" ]; then
printf "\n%s$(date +%FT%T) - [quads-loader] Version $file"
start=$(date +%s%3N)
curl --location 'http://localhost:8080/import/version' \
--header 'Content-Type: multipart/form-data' \
--connect-timeout 60 \
--form file=@"$file"
end=$(date +%s%3N)
printf "\n%s$(date +%FT%T) - [Measure] (Import STS $file):$((end-start))ms;"
fi
done

printf "\n%s$(date +%FT%T) - [quads-loader] Versions import completed."

### Import the version transitions

#printf "\n%s$(date +%FT%T) - [quads-loader] Version transitions import started."
#
#printf "\n%s$(date +%FT%T) - [quads-loader] Version transitions import completed."
printf "\n%s$(date +%FT%T) - [quads-loader] Dataset import completed."

echo "------------------------------------------------------------- [END IMPORT RELATIONAL] -------------------------------------------------------------"
8 changes: 4 additions & 4 deletions workflows/bsbm/import_relational-2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# Import the data inside the Relational Database (through quads-loader import endpoint)
######################################################

cd ../dataset/quads/relational || exit
cd ../dataset/quads/data/relational || exit

echo "------------------------------------------------------------ [BEGIN IMPORT RELATIONAL] ------------------------------------------------------------"
printf "\n%s$(date +%FT%T) - [quads-loader] Dataset import started."
Expand All @@ -15,10 +15,10 @@ printf "\n%s$(date +%FT%T) - [quads-loader] Versions import started."

number_of_versions=$1

find . -type f -name "*.ttl.relational.trig" -print0 | while IFS= read -r -d '' file
find . -type f -name "*.ttl.trig" -print0 | while IFS= read -r -d '' file
do
# Extract version number from the file name (assuming the format dataset-{version}.ttl.relational.trig)
version=$(echo "$file" | grep -oP '(?<=-)\d+(?=.ttl\.relational\.trig)')
# Extract version number from the file name (assuming the format dataset-{version}.ttl.trig)
version=$(echo "$file" | grep -oP '(?<=-)\d+(?=.ttl\.trig)')

# Check if the version is less than or equal to the specified number_of_versions
if [ "$version" -le "$number_of_versions" ]; then
Expand Down
42 changes: 42 additions & 0 deletions workflows/bsbm/import_triple-1.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/bin/bash

######################################################
# Import the data inside the Triple Store
######################################################

cd ../dataset/quads/alt/theoretical || exit

echo "-------------------------------------------------------------- [BEGIN IMPORT TRIPLE] --------------------------------------------------------------"
printf "\n%s$(date +%FT%T) - [Triple Store] Dataset import started."

number_of_versions=$1

## BSBM tagged data
find . -type f -name "*.trig" -print0 | while IFS= read -r -d '' file
do
# Extract version number from the file name (assuming the format dataset-{version}.ttl.trig)
version=$(echo "$file" | grep -oP '(?<=-)\d+(?=.ttl\.trig)')

# Check if the version is less than or equal to the specified number_of_versions
if [ "$version" -le "$number_of_versions" ]; then
printf "%s\n$(date +%FT%T) - [Triple Store] $file."
start=$(date +%s%3N)
curl -X POST --location 'http://localhost:9999/blazegraph/sparql' \
--header 'Content-Type:application/x-trig' \
--connect-timeout 60 \
--data-binary @"$file"
end=$(date +%s%3N)
printf "\n%s$(date +%FT%T) - [Measure] (Import BG $file):$((end-start))ms;"
fi
done

start=$(date +%s%3N)
curl -X POST --location 'http://localhost:9999/blazegraph/sparql' \
--header 'Content-Type:application/x-trig' \
--connect-timeout 60 \
--data-binary @"theoretical_annotations.trig"
end=$(date +%s%3N)
printf "\n%s$(date +%FT%T) - [Measure] (Import BG theoretical_annotations.trig):$((end-start))ms;"

printf "\n%s$(date +%FT%T) - [Triple Store] Dataset import completed."
echo "--------------------------------------------------------------- [END IMPORT TRIPLE] ---------------------------------------------------------------"
6 changes: 3 additions & 3 deletions workflows/bsbm/import_triple-2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# Import the data inside the Triple Store
######################################################

cd ../dataset/quads/theoretical || exit
cd ../dataset/quads/data/theoretical || exit

echo "-------------------------------------------------------------- [BEGIN IMPORT TRIPLE] --------------------------------------------------------------"
printf "\n%s$(date +%FT%T) - [Triple Store] Dataset import started."
Expand All @@ -14,8 +14,8 @@ number_of_versions=$1
## BSBM tagged data
find . -type f -name "*.trig" -print0 | while IFS= read -r -d '' file
do
# Extract version number from the file name (assuming the format dataset-{version}.ttl.theoretical.trig)
version=$(echo "$file" | grep -oP '(?<=-)\d+(?=.ttl\.theoretical\.trig)')
# Extract version number from the file name (assuming the format dataset-{version}.ttl.trig)
version=$(echo "$file" | grep -oP '(?<=-)\d+(?=.ttl\.trig)')

# Check if the version is less than or equal to the specified number_of_versions
if [ "$version" -le "$number_of_versions" ]; then
Expand Down
11 changes: 6 additions & 5 deletions workflows/bsbm/transform-2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@ cd ../dataset || exit

echo "---------------------------------------------------------------- [BEGIN TRANSFORM] ----------------------------------------------------------------"

mkdir -p quads/relational
mkdir -p quads/theoretical

## Transform data as quads
printf "\n%s$(date +%FT%T) - [Transformations] Version annotation started.\n"

docker run --name "annotate_graph-theoretical" -v "$PWD:/data" vcity/annotate_graph "/data/quads/theoretical" "/data/triples" "*" theoretical BSBM
docker run --name "annotate_graph-relational" -v "$PWD:/data" vcity/annotate_graph "/data/quads/relational" "/data/triples" "*" relational BSBM
docker run --name "annotate_graph-theoretical-data" -v "$PWD:/data" vcity/annotate_graph "/data/quads/data/theoretical" "/data/triples/data" "*" theoretical BSBM
docker run --name "annotate_graph-relational-data" -v "$PWD:/data" vcity/annotate_graph "/data/quads/data/relational" "/data/triples/data" "*" relational BSBM

docker run --name "annotate_graph-theoretical-alt" -v "$PWD:/data" vcity/annotate_graph "/data/quads/alt/theoretical" "/data/triples/alt" "*" theoretical BSBM-alt
docker run --name "annotate_graph-relational-alt" -v "$PWD:/data" vcity/annotate_graph "/data/quads/alt/relational" "/data/triples/alt" "*" relational BSBM-alt

docker ps --filter name=annotate_graph-* -aq | xargs docker stop | xargs docker rm

printf "\n%s$(date +%FT%T) - [Transformations] Version annotation completed."
Expand Down
2 changes: 2 additions & 0 deletions workflows/workflow-bsbm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,13 @@ echo "----------------------------------------------------------------- [BEGIN W
/bin/bash ./bsbm/transform-2.sh

start_import_relational=$(date +%s%3N)
/bin/bash ./bsbm/import_relational-1.sh "$1"
/bin/bash ./bsbm/import_relational-2.sh "$1"
end_import_relational=$(date +%s%3N)
printf "[Measure] {Import relational} Import duration: %s ms\n" "$((end_import_relational-start_import_relational))"

start_import_triple=$(date +%s%3N)
/bin/bash ./bsbm/import_triple-1.sh "$1"
/bin/bash ./bsbm/import_triple-2.sh "$1"
end_import_triple=$(date +%s%3N)
printf "[Measure] {Import triple} Import duration: %s ms\n" "$((end_import_triple-start_import_triple))"
Expand Down

0 comments on commit 44c4158

Please sign in to comment.