diff --git a/.github/workflows/lint.yml b/.github/workflows/check_version.yml similarity index 79% rename from .github/workflows/lint.yml rename to .github/workflows/check_version.yml index f2fbcf424..770bf137c 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/check_version.yml @@ -1,4 +1,5 @@ -name: lint +# Checks if version number has been updated +name: Version Check on: pull_request jobs: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index cd3373c97..000000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,99 +0,0 @@ -name: ci -on: - push: - branches: - - main -jobs: - deploy: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - name: Set up JDK 17 - uses: actions/setup-java@v3 - with: - java-version: '17' - distribution: 'adopt' - - name: Cache Maven packages - uses: actions/cache@v2 - with: - path: ~/.m2 - key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} - restore-keys: ${{ runner.os }}-m2 - - uses: actions/setup-python@v2 - with: - python-version: 3.x - - shell: bash - run: mvn help:evaluate -Dexpression=major.minor.version -q -DforceStdout > version.log - - shell: bash - run: mvn help:evaluate -Dexpression=project.artifactId -q -DforceStdout > artifactid.log - - name: Set env version - run: echo "MM_VERSION=$(cat version.log)" >> $GITHUB_ENV - - name: Set env version - run: echo "RELEASE_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)" >> $GITHUB_ENV - - name: Set env name - run: echo "RELEASE_ARTIFACTID=$(cat artifactid.log)" >> $GITHUB_ENV - - name: test - run: echo ${{ env.RELEASE_VERSION }} ${{ env.RELEASE_ARTIFACTID }} - - run: pip install mkdocs-material - - run: pip install mkdocs-macros-plugin - - run: sed -i "s/\$VERSION/$(cat version.log)/g" mkdocs.yml - - run: sed -i "s/\$RELEASE_VERSION/${{ env.RELEASE_VERSION }}/g" mkdocs.yml - - run: mkdocs build -d site/$(cat version.log) - - run: mvn install -Dmaven.test.skip=true - - run: mvn javadoc:javadoc - - run: sed -i "s/\$VERSION/$(cat version.log)/g" .github/pages/latest.html - - run: sed -i "s/\$VERSION/$(cat version.log)/g" .github/pages/javadoc-latest.html - - name: Deploy Site - uses: peaceiris/actions-gh-pages@v3 - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - publish_dir: ./site/${{ env.MM_VERSION }} - destination_dir: ./docs/${{ env.MM_VERSION }} - - name: Deploy Javadoc - uses: peaceiris/actions-gh-pages@v3 - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - publish_dir: ./javadoc/${{ env.MM_VERSION }} - destination_dir: ./javadoc/${{ env.MM_VERSION }} - - name: Deploy latest.html - uses: peaceiris/actions-gh-pages@v3 - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - publish_dir: .github/pages/ - keep_files: true - destination_dir: ./docs/ - - name: Deploy latest.html - uses: peaceiris/actions-gh-pages@v3 - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - publish_dir: .github/pages/ - keep_files: true - destination_dir: ./docs/ - - run: mkdir iguana - - run: cp target/start-iguana.sh iguana/ - - run: cp target/iguana-${{ env.RELEASE_VERSION }}.jar iguana/iguana-${{ env.RELEASE_VERSION }}.jar - - run: cp example-suite.yml iguana/ - - run: zip -r iguana-${{ env.RELEASE_VERSION }}.zip iguana/ - - name: Create Release - id: create_release - uses: actions/create-release@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - tag_name: v${{ env.RELEASE_VERSION }} - release_name: version ${{ env.RELEASE_VERSION }} - draft: false - prerelease: false - body: "" - - uses: actions/upload-release-asset@v1.0.1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - upload_url: ${{ steps.create_release.outputs.upload_url }} - asset_path: iguana-${{ env.RELEASE_VERSION }}.zip - asset_name: iguana-${{ env.RELEASE_VERSION }}.zip - asset_content_type: application/zip - - name: Publish package - run: mvn --batch-mode deploy -Dmaven.test.skip=true - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 000000000..8e1912a05 --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,156 @@ +name: Deployment + +on: + push: + branches: + - main + +jobs: + find_version: + name: Find Release Version + runs-on: ubuntu-latest + outputs: + RELEASE_VERSION: ${{ steps.step_find.outputs.RELEASE_VERSION }} + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-java@v3 + with: + java-version: '17' + distribution: 'adopt' + cache: 'maven' + - name: 'Find velease version' + run: echo "RELEASE_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)" >> $GITHUB_OUTPUT + id: step_find + + deploy_to_maven: + name: Deploy to Maven Repository + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up JDK 17 + uses: actions/setup-java@v3 + with: + java-version: '17' + distribution: 'adopt' + cache: 'maven' + - name: Publish package + run: mvn --batch-mode deploy -Dmaven.test.skip=true + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: 'Upload artifact' + uses: actions/upload-artifact@v4 + with: + if-no-files-found: error + name: 'iguana-jar' + path: 'target/' + + + compile_native: + name: Compile Native Executable + runs-on: ubuntu-latest + needs: find_version + steps: + - uses: actions/checkout@v4 + - name: Set up GraalVM + uses: graalvm/setup-graalvm@v1 + with: + java-version: '21' + cache: 'maven' + - name: 'Compile native-binary' + run: 'mvn -Dagent=true -Pnative package' + - name: 'Upload artifact' + uses: actions/upload-artifact@v4 + with: + name: 'iguana-native' + path: 'target/iguana' + if-no-files-found: error + + deploy_docs: + name: Deploy Documentation + runs-on: ubuntu-latest + needs: find_version + env: + RELEASE_VERSION: ${{ needs.find_version.outputs.RELEASE_VERSION }} + steps: + - uses: actions/checkout@v4 + - name: Set up JDK 17 + uses: actions/setup-java@v3 + with: + java-version: '17' + distribution: 'adopt' + cache: 'maven' + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.x + cache: 'pip' + - run: pip install mkdocs-material + - run: pip install mkdocs-macros-plugin + - run: sed -i "s/\$VERSION/${{ env.RELEASE_VERSION }}/g" mkdocs.yml + - run: sed -i "s/\$RELEASE_VERSION/${{ env.RELEASE_VERSION }}/g" mkdocs.yml + - run: mkdocs build -d site/${{ env.RELEASE_VERSION }} + - run: mvn javadoc:javadoc + - run: sed -i "s/\$VERSION/${{ env.RELEASE_VERSION }}/g" .github/pages/latest.html + - run: sed -i "s/\$VERSION/${{ env.RELEASE_VERSION }}/g" .github/pages/javadoc-latest.html + - name: Deploy Site + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: ./site/${{ env.RELEASE_VERSION }} + destination_dir: ./docs/${{ env.RELEASE_VERSION }} + - name: Deploy Javadoc + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: ./javadoc/${{ env.RELEASE_VERSION }} + destination_dir: ./javadoc/${{ env.RELEASE_VERSION }} + - name: Deploy latest.html + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: .github/pages/ + keep_files: true + destination_dir: ./docs/ + - name: Deploy latest.html + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: .github/pages/ + keep_files: true + destination_dir: ./docs/ + + deploy_gh_release: + runs-on: ubuntu-latest + needs: [compile-jar, deploy_to_maven, find_version] + env: + RELEASE_VERSION: ${{ needs.find_version.outputs.RELEASE_VERSION }} + + steps: + - name: Download artifacts from previous jobs + uses: actions/download-artifact@v4 + with: + path: artifacts/ + merge-multiple: true + - name: Prepare files + run: | + mkdir iguana + cp artifacts/start-iguana.sh iguana/ + cp artifacts/iguana.jar iguana/iguana.jar + cp artifacts/iguana iguana/iguana + cp example-suite.yml iguana/ + zip -r iguana-${{ env.RELEASE_VERSION }}.zip iguana/ + - name: Create Release + uses: softprops/action-gh-release@v2 + with: + tag_name: v${{ env.RELEASE_VERSION }} + name: version ${{ env.RELEASE_VERSION }} + draft: false + prerelease: false + body: "" + fail_on_unmatched_files: true + make_latest: true + token: ${{ secrets.GITHUB_TOKEN }} + files: | + iguana-${{ env.RELEASE_VERSION }}.zip + artifacts/iguana.jar + artifacts/iguana diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml deleted file mode 100644 index 07bc310a7..000000000 --- a/.github/workflows/maven.yml +++ /dev/null @@ -1,29 +0,0 @@ -name: testing - -on: - push: - branches: - - develop - pull_request: - branches: - - develop - - main - -jobs: - deploy: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - name: Set up JDK 17 - uses: actions/setup-java@v3 - with: - java-version: '17' - distribution: 'adopt' - - name: Cache Maven packages - uses: actions/cache@v2 - with: - path: ~/.m2 - key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} - restore-keys: ${{ runner.os }}-m2 - - name: Testing the Java code - run: mvn install diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 000000000..400e0a527 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,45 @@ +name: Tests + +on: + push: + branches: + - develop + pull_request: + branches: + - develop + - main + +jobs: + tests: + name: Compile and Run Tests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up JDK 17 + uses: actions/setup-java@v3 + with: + java-version: '17' + distribution: 'adopt' + - name: Cache Maven packages + uses: actions/cache@v2 + with: + path: ~/.m2 + key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} + restore-keys: ${{ runner.os }}-m2 + - name: Testing the Java code + run: mvn package + + # Only run for pull request on main or if pushed to develop + compile_native: + if: github.base_ref == 'main' || github.event_name == 'push' + name: Test Native Executable Compilation + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up GraalVM + uses: graalvm/setup-graalvm@v1 + with: + java-version: '21' + cache: 'maven' + - name: 'Compile native-binary and run tests' + run: 'mvn -Pnative -Dagent=true package' diff --git a/README.md b/README.md deleted file mode 100644 index 4ff9dad36..000000000 --- a/README.md +++ /dev/null @@ -1,88 +0,0 @@ -

- IGUANA Logo -

- -# IGUANA -Iguana is a benchmarking framework for testing the read performances of HTTP endpoints. -It is mostly designed for benchmarking triplestores by using the SPARQL protocol. -Iguana stresstests endpoints by simulating users which send a set of queries independently of each other. - -Benchmarks are configured using a YAML-file, this allows them to be easily repeated and adjustable. -Results are stored in RDF-files and can also be exported as CSV-files. - -## Features -- Benchmarking of (SPARQL) HTTP endpoints -- Reusable configuration -- Calculation of various metrics for better comparisons -- Processing of HTTP responses (e.g., results counting) - -## Setup - -### Prerequisites -You need to have `Java 17` or higher installed. -On Ubuntu it can be installed by executing the following command: - -```bash -sudo apt install openjdk-17-jre -``` - -### Download -The latest release can be downloaded at https://github.com/dice-group/IGUANA/releases/latest. -The zip file contains three files: - -* `iguana-4.0.0.jar` -* `example-suite.yml` -* `start-iguana.sh` - -### Configuration -The `example-suite.yml` file contains an extensive configuration for a benchmark suite. -It can be used as a starting point for your own benchmark suite. -For a detailed explanation of the configuration, see the [configuration](./configuration/overview.md) documentation. - -## Usage -Start Iguana with a benchmark suite (e.g., the `example-suite.yml`) either by using the start script: - -```bash -./start-iguana.sh example-suite.yml -``` - -or by directly executing the jar-file: - -```bash -java -jar iguana-4.0.0.jar example-suite.yml -``` - -If you're using the script, you can use JVM arguments by setting the environment variable `IGUANA_JVM`. -For example, to let Iguana use 4GB of RAM you can set `IGUANA_JVM` as follows: - -```bash -export IGUANA_JVM=-Xmx4g -``` - -# How to Cite - -```bibtex -@InProceedings{10.1007/978-3-319-68204-4_5, -author="Conrads, Lixi -and Lehmann, Jens -and Saleem, Muhammad -and Morsey, Mohamed -and Ngonga Ngomo, Axel-Cyrille", -editor="d'Amato, Claudia -and Fernandez, Miriam -and Tamma, Valentina -and Lecue, Freddy -and Cudr{\'e}-Mauroux, Philippe -and Sequeda, Juan -and Lange, Christoph -and Heflin, Jeff", -title="Iguana: A Generic Framework for Benchmarking the Read-Write Performance of Triple Stores", -booktitle="The Semantic Web -- ISWC 2017", -year="2017", -publisher="Springer International Publishing", -address="Cham", -pages="48--65", -abstract="The performance of triples stores is crucial for applications driven by RDF. Several benchmarks have been proposed that assess the performance of triple stores. However, no integrated benchmark-independent execution framework for these benchmarks has yet been provided. We propose a novel SPARQL benchmark execution framework called Iguana. Our framework complements benchmarks by providing an execution environment which can measure the performance of triple stores during data loading, data updates as well as under different loads and parallel requests. Moreover, it allows a uniform comparison of results on different benchmarks. We execute the FEASIBLE and DBPSB benchmarks using the Iguana framework and measure the performance of popular triple stores under updates and parallel user requests. We compare our results (See https://doi.org/10.6084/m9.figshare.c.3767501.v1) with state-of-the-art benchmarking results and show that our benchmark execution framework can unveil new insights pertaining to the performance of triple stores.", -isbn="978-3-319-68204-4" -} -``` \ No newline at end of file diff --git a/README.md b/README.md new file mode 120000 index 000000000..0e01b4308 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +docs/README.md \ No newline at end of file diff --git a/docs/README.md b/docs/README.md index 4ff9dad36..ee5544e03 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,88 +1,107 @@ -

- IGUANA Logo -

- -# IGUANA -Iguana is a benchmarking framework for testing the read performances of HTTP endpoints. -It is mostly designed for benchmarking triplestores by using the SPARQL protocol. -Iguana stresstests endpoints by simulating users which send a set of queries independently of each other. - -Benchmarks are configured using a YAML-file, this allows them to be easily repeated and adjustable. -Results are stored in RDF-files and can also be exported as CSV-files. - -## Features -- Benchmarking of (SPARQL) HTTP endpoints -- Reusable configuration -- Calculation of various metrics for better comparisons -- Processing of HTTP responses (e.g., results counting) - -## Setup - -### Prerequisites -You need to have `Java 17` or higher installed. -On Ubuntu it can be installed by executing the following command: - -```bash -sudo apt install openjdk-17-jre -``` - -### Download -The latest release can be downloaded at https://github.com/dice-group/IGUANA/releases/latest. -The zip file contains three files: - -* `iguana-4.0.0.jar` -* `example-suite.yml` -* `start-iguana.sh` - -### Configuration -The `example-suite.yml` file contains an extensive configuration for a benchmark suite. -It can be used as a starting point for your own benchmark suite. -For a detailed explanation of the configuration, see the [configuration](./configuration/overview.md) documentation. - -## Usage -Start Iguana with a benchmark suite (e.g., the `example-suite.yml`) either by using the start script: - -```bash -./start-iguana.sh example-suite.yml -``` - -or by directly executing the jar-file: - -```bash -java -jar iguana-4.0.0.jar example-suite.yml -``` - -If you're using the script, you can use JVM arguments by setting the environment variable `IGUANA_JVM`. -For example, to let Iguana use 4GB of RAM you can set `IGUANA_JVM` as follows: - -```bash -export IGUANA_JVM=-Xmx4g -``` - -# How to Cite - -```bibtex -@InProceedings{10.1007/978-3-319-68204-4_5, -author="Conrads, Lixi -and Lehmann, Jens -and Saleem, Muhammad -and Morsey, Mohamed -and Ngonga Ngomo, Axel-Cyrille", -editor="d'Amato, Claudia -and Fernandez, Miriam -and Tamma, Valentina -and Lecue, Freddy -and Cudr{\'e}-Mauroux, Philippe -and Sequeda, Juan -and Lange, Christoph -and Heflin, Jeff", -title="Iguana: A Generic Framework for Benchmarking the Read-Write Performance of Triple Stores", -booktitle="The Semantic Web -- ISWC 2017", -year="2017", -publisher="Springer International Publishing", -address="Cham", -pages="48--65", -abstract="The performance of triples stores is crucial for applications driven by RDF. Several benchmarks have been proposed that assess the performance of triple stores. However, no integrated benchmark-independent execution framework for these benchmarks has yet been provided. We propose a novel SPARQL benchmark execution framework called Iguana. Our framework complements benchmarks by providing an execution environment which can measure the performance of triple stores during data loading, data updates as well as under different loads and parallel requests. Moreover, it allows a uniform comparison of results on different benchmarks. We execute the FEASIBLE and DBPSB benchmarks using the Iguana framework and measure the performance of popular triple stores under updates and parallel user requests. We compare our results (See https://doi.org/10.6084/m9.figshare.c.3767501.v1) with state-of-the-art benchmarking results and show that our benchmark execution framework can unveil new insights pertaining to the performance of triple stores.", -isbn="978-3-319-68204-4" -} +

+ IGUANA Logo +

+ +# IGUANA +Iguana is a benchmarking framework for testing the read performances of HTTP endpoints. +It is mostly designed for benchmarking triplestores by using the SPARQL protocol. +Iguana stresstests endpoints by simulating users which send a set of queries independently of each other. + +Benchmarks are configured using a YAML-file, this allows them to be easily repeated and adjustable. +Results are stored in RDF-files and can also be exported as CSV-files. + +## Features +- Benchmarking of (SPARQL) HTTP endpoints +- Reusable configuration +- Calculation of various metrics for better comparisons +- Processing of HTTP responses (e.g., results counting) + +## Setup + +### Prerequisites + +If you're using the native version of IGUANA, you need to have at least a `x86-64-v3` (Intel Haswell and AMD Excavator or newer) system that is running Linux. + +If you're using the Java version of IGUANA, you need to have `Java 17` or higher installed. +On Ubuntu it can be installed by executing the following command: + +```bash +sudo apt install openjdk-17-jre +``` + +### Download +The latest release can be downloaded at https://github.com/dice-group/IGUANA/releases/latest. +The zip file contains three files: + +* `iguana` +* `iguana.jar` +* `example-suite.yml` +* `start-iguana.sh` + +The `iguana` file is a native executable for IGUANA that has been compiled with GraalVM. +The `iguana.jar` file is the standard Java executable for IGUANA. +The `start-iguana.sh` script is a helper script to start IGUANA with the `iguana.jar` file. + +### Configuration +The `example-suite.yml` file contains an extensive configuration for a benchmark suite. +It can be used as a starting point for your own benchmark suite. +For a detailed explanation of the configuration, see the [configuration](./configuration/overview.md) documentation. + +## Usage + +### Native Version + +Start Iguana with a benchmark suite (e.g., the `example-suite.yml`) by executing the binary: + +```bash +./iguana example-suite.yml +``` + +### Java Version + +Start Iguana with a benchmark suite (e.g., the `example-suite.yml`) either by using the start script: + +```bash +./start-iguana.sh example-suite.yml +``` + +or by directly executing the jar-file: + +```bash +java -jar iguana.jar example-suite.yml +``` + +If you're using the script, you can use JVM arguments by setting the environment variable `IGUANA_JVM`. +For example, to let Iguana use 4GB of RAM you can set `IGUANA_JVM` as follows: + +```bash +export IGUANA_JVM=-Xmx4g +``` + +# How to Cite + +```bibtex +@InProceedings{10.1007/978-3-319-68204-4_5, +author="Conrads, Lixi +and Lehmann, Jens +and Saleem, Muhammad +and Morsey, Mohamed +and Ngonga Ngomo, Axel-Cyrille", +editor="d'Amato, Claudia +and Fernandez, Miriam +and Tamma, Valentina +and Lecue, Freddy +and Cudr{\'e}-Mauroux, Philippe +and Sequeda, Juan +and Lange, Christoph +and Heflin, Jeff", +title="Iguana: A Generic Framework for Benchmarking the Read-Write Performance of Triple Stores", +booktitle="The Semantic Web -- ISWC 2017", +year="2017", +publisher="Springer International Publishing", +address="Cham", +pages="48--65", +abstract="The performance of triples stores is crucial for applications driven by RDF. Several benchmarks have been proposed that assess the performance of triple stores. However, no integrated benchmark-independent execution framework for these benchmarks has yet been provided. We propose a novel SPARQL benchmark execution framework called Iguana. Our framework complements benchmarks by providing an execution environment which can measure the performance of triple stores during data loading, data updates as well as under different loads and parallel requests. Moreover, it allows a uniform comparison of results on different benchmarks. We execute the FEASIBLE and DBPSB benchmarks using the Iguana framework and measure the performance of popular triple stores under updates and parallel user requests. We compare our results (See https://doi.org/10.6084/m9.figshare.c.3767501.v1) with state-of-the-art benchmarking results and show that our benchmark execution framework can unveil new insights pertaining to the performance of triple stores.", +isbn="978-3-319-68204-4" +} ``` \ No newline at end of file diff --git a/docs/configuration/ahead-of-time-compilation.md b/docs/configuration/ahead-of-time-compilation.md new file mode 100644 index 000000000..3f54387c5 --- /dev/null +++ b/docs/configuration/ahead-of-time-compilation.md @@ -0,0 +1,37 @@ +# Ahead of Time Compilation + +Because IGUANA is written in Java, the benchmark results might become inaccurate due to the architecture of the JVM. +The benchmark results might appear to be slower at the beginning of the execution and faster at the end, even though the +benchmarked system's performance remains constant. + +To minimize this effect, IGUANA uses GraalVM's ahead-of-time compilation feature. +This feature compiles the Java code to a native executable, which can be run without the need for a JVM. + +This section explains how to compile IGUANA with GraalVM and how to use the compiled binary. + +## Prerequisites + +To compile IGUANA with GraalVM, you need to have [GraalVM](https://www.graalvm.org/) installed on your system. +The `native-image` tool also requires some additional libraries to be installed on your system. +The further prerequisites can be found [here](https://www.graalvm.org/latest/reference-manual/native-image/#prerequisites). + +The default target architecture for the native binary is `x86-64-v3` (Intel Haswell and AMD Excavator or newer). +This and other settings can be adjusted in the `pom.xml` file. + +## Compilation + +To compile IGUANA with GraalVM, execute the following command: + +```bash +mvn -Pnative -Dagent=true package +``` + +This command creates a native binary named `iguana` in the `target/` directory. + +## Usage + +The compiled executable can be run like any other executable and behaves the same as the Java version. + +```bash +./iguana +``` diff --git a/graalvm/generate-config.sh b/graalvm/generate-config.sh new file mode 100755 index 000000000..fdd4625f2 --- /dev/null +++ b/graalvm/generate-config.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash + +if [ -z "$GRAALVM_HOME" ]; then + echo "The variable GRAALVM_HOME needs to be set to the GraalVM installation directory." + exit 1 +fi + +SUITE=./graalvm/suite.yml +TARGET_DIR=./target +while getopts ":hs:t:" opt; do + case ${opt} in + h) + echo "Usage: $0 [-h] [-s ]" + echo " -h: Display this help message." + echo " -s : The path to the suite.yml file. Default: ./graalvm/suite.yml" + echo " -t : The location of the maven target directory. Default: ./target/" + exit 0 + ;; + t) + TARGET_DIR=$OPTARG + ;; + s) + SUITE=$OPTARG + ;; + ?) + echo "Invalid option: ${opt}" 1>&2 + exit 1 + ;; + esac +done + +if [ ! -f "$TARGET_DIR"/iguana.jar ]; then + mvn -DskipTests package +fi + +if [ ! -d src/main/resources/META-INF/native-image/ ]; then + mkdir -p src/main/resources/META-INF/native-image/ +fi + +# Move generated configuration files from tests to the resources +if [ -f "$TARGET_DIR"/native/agent-output/test/resource-config.json ]; then + mv "$TARGET_DIR"/native/agent-output/test/* src/main/resources/META-INF/native-image/ +fi + +# Run through multiple different execution paths, so that the tracing agent can generate complete configuration files. +"$GRAALVM_HOME"/bin/java -agentlib:native-image-agent=config-merge-dir=src/main/resources/META-INF/native-image/ -jar "$TARGET_DIR"/iguana.jar --help > /dev/null +"$GRAALVM_HOME"/bin/java -agentlib:native-image-agent=config-merge-dir=src/main/resources/META-INF/native-image/ -jar "$TARGET_DIR"/iguana.jar --dry-run -is "$SUITE" > /dev/null +"$GRAALVM_HOME"/bin/java -agentlib:native-image-agent=config-merge-dir=src/main/resources/META-INF/native-image/ -jar "$TARGET_DIR"/iguana.jar --dry-run "$SUITE" > /dev/null + +# there is a bug in the tracing agent that outputs wrong formatted lines in the resource-config.json file (https://github.com/oracle/graal/issues/7985) +sed 's/\\\\E//g' src/main/resources/META-INF/native-image/resource-config.json | sed 's/\\\\Q//g' > src/main/resources/META-INF/native-image/resource-config.json.tmp +mv src/main/resources/META-INF/native-image/resource-config.json.tmp src/main/resources/META-INF/native-image/resource-config.json + +rm -r ./graalvm/results/ diff --git a/graalvm/generate-profile.sh b/graalvm/generate-profile.sh new file mode 100755 index 000000000..5960767ed --- /dev/null +++ b/graalvm/generate-profile.sh @@ -0,0 +1,61 @@ +#!/usr/bin/env bash + +# Check if the GRAALVM_HOME variable is set +if [ -z "$GRAALVM_HOME" ]; then + echo "The variable GRAALVM_HOME needs to be set to the GraalVM installation directory." + exit 1 +fi + +# Default value for ARGUMENTS +ARGUMENTS="--gc=G1 -march=x86-64-v3" + +# Parse the command line arguments +while getopts ":hs:a:" opt; do + case ${opt} in + h) + echo "Usage: $0 [-h] [-s ]" + echo " -h: Display this help message." + echo " -s : The path to the suite.yml file" + echo " -a : The arguments to pass to the native-image command. Default: --gc=G1 -march=x86-64-v3" + exit 0 + ;; + s) + SUITE=$OPTARG + ;; + a) + ARGUMENTS="$OPTARG" + ;; + ?) + echo "Invalid option: $OPTARG" 1>&2 + exit 1 + ;; + esac +done + +# Check if suite argument was given +printf "" +if [ -z "$SUITE" ]; then + echo "Argument -s is required." + exit 1 +fi + +# Instrument the application +"$GRAALVM_HOME"/bin/native-image --pgo-instrument "$ARGUMENTS" -jar ./target/iguana.jar -o "./target/iguana-4.0.0-instrumented" +if [ $? -ne 0 ]; then + echo "Error while instrumenting the application." + exit 1 +fi + +# Generate the profile +./target/iguana-4.0.0-instrumented -XX:ProfilesDumpFile=custom.iprof "$SUITE" +if [ $? -ne 0 ]; then + echo "Error while generating the profile." + exit 1 +fi + +# Compile the application with the profile +"$GRAALVM_HOME"/bin/native-image --pgo=custom.iprof "$ARGUMENTS" -jar ./target/iguana.jar -o "./target/iguana-4.0.0-pgo" +if [ $? -ne 0 ]; then + echo "Error while compiling the application." + exit 1 +fi diff --git a/graalvm/queries.txt b/graalvm/queries.txt new file mode 100644 index 000000000..b3a425249 --- /dev/null +++ b/graalvm/queries.txt @@ -0,0 +1 @@ +placeholder \ No newline at end of file diff --git a/graalvm/suite.yml b/graalvm/suite.yml new file mode 100644 index 000000000..243127d1f --- /dev/null +++ b/graalvm/suite.yml @@ -0,0 +1,88 @@ +datasets: + - name: "DatasetName" + file: "src/test/resources/dataset.txt" + +connections: + - name: "Blazegraph" + version: "1.1.1" + dataset: "DatasetName" + endpoint: "http://localhost:9999/blazegraph/sparql" + authentication: + user: "user" + password: "test" + updateEndpoint: "http://localhost:3030/ds/update" + updateAuthentication: + user: "updateUser" + password: "password" + +storages: + - type: "rdf file" + path: "graalvm/results/some.ttl" + - type: "csv file" + directory: "graalvm/results/" + - type: "triplestore" + endpoint: "http://localhost:9999/blazegraph/sparql" + user: "user" + password: "test" + baseUri: "http://example.org" + +responseBodyProcessors: + - contentType: "application/sparql-results+json" + threads: 1 + +metrics: + - type: "AES" + - type: "EachQuery" + - type: "QPS" + - type: "AvgQPS" + - type: "NoQ" + - type: "NoQPH" + - type: "QMPH" + - type: "PAvgQPS" + penalty: 100 + - type: "PQPS" + penalty: 100 + + +tasks: + # 1 hour (time Limit is in ms) + - type: stresstest + warmupWorkers: + # 1 minutes (is in ms) + - type: SPARQLProtocolWorker + number: 1 + queries: + path: "./graalvm/queries.txt" + format: "separator" + separator: ";" + caching: true + order: "random" + seed: 123 + lang: "SPARQL" + timeout: 2s + connection: Blazegraph + completionTarget: + duration: 1s + acceptHeader: "application/sparql-results+json" + requestType: get query + parseResults: true + workers: + - type: "SPARQLProtocolWorker" + number: 1 + queries: + path: "./graalvm/queries.txt" + timeout: 3m + connection: Blazegraph + completionTarget: + duration: 1s + requestType: get query + acceptHeader: "application/sparql-results+json" + - number: 1 + type: "SPARQLProtocolWorker" + connection: Blazegraph + completionTarget: + number: 1 + queries: + path: "./graalvm/queries.txt" + timeout: 100s + acceptHeader: "application/sparql-results+json" diff --git a/pom.xml b/pom.xml index 6347fe757..24c2dd3bf 100644 --- a/pom.xml +++ b/pom.xml @@ -58,11 +58,6 @@ - - org.apache.jena - jena-iri - ${jena.version} - org.apache.jena jena-arq @@ -79,19 +74,9 @@ ${jena.version} - org.apache.httpcomponents - httpclient - 4.5.13 - - - org.apache.logging.log4j - log4j-slf4j-impl - ${log4j.version} - - - org.apache.logging.log4j - log4j-core - ${log4j.version} + ch.qos.logback + logback-classic + 1.4.14 com.fasterxml.jackson.dataformat @@ -145,30 +130,13 @@ 2.35.0 test - - org.apache.maven.plugins - maven-surefire-plugin - 3.1.2 - - - org.springframework.data - spring-data-commons - 3.1.2 - - - org.springframework - spring-context - 6.0.11 - org.apache.httpcomponents.client5 httpclient5 5.3 - - @@ -204,7 +172,7 @@ 3.4.1 false - iguana-${revision} + iguana @@ -262,4 +230,99 @@ + + + + native + + + + org.junit.platform + junit-platform-launcher + 1.9.2 + test + + + + + + org.codehaus.mojo + exec-maven-plugin + 1.6.0 + + + run-script + generate-resources + + exec + + + ${project.basedir}/graalvm/generate-config.sh + + -t + ${project.build.directory} + + + + + cleanup-files + test + + exec + + + bash + + -c + if [ -f ${project.build.directory}/native/agent-output/test/*/resource-config.json ]; then sed "s/\\\\\\\\E//g" ${project.build.directory}/native/agent-output/test/*/resource-config.json | sed "s/\\\\\\\\Q//g" > ${project.build.directory}/resource-config.json.tmp && cp ${project.build.directory}/resource-config.json.tmp ${project.build.directory}/native/agent-output/test/*/resource-config.json; fi + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 3.1.2 + + + org.graalvm.buildtools + native-maven-plugin + 0.10.1 + true + + + build-native + + compile-no-fork + + package + + + test-native + + test + + test + + + + iguana + + --gc=G1 + -march=x86-64-v3 + --no-fallback + -O3 + -H:-UseCompressedReferences + + + true + + + + + + + + diff --git a/src/main/java/org/aksw/iguana/cc/controller/MainController.java b/src/main/java/org/aksw/iguana/cc/controller/MainController.java index b9291fc38..1190f84e3 100644 --- a/src/main/java/org/aksw/iguana/cc/controller/MainController.java +++ b/src/main/java/org/aksw/iguana/cc/controller/MainController.java @@ -3,12 +3,10 @@ import com.beust.jcommander.*; import org.aksw.iguana.cc.suite.IguanaSuiteParser; import org.aksw.iguana.cc.suite.Suite; -import org.apache.logging.log4j.core.config.Configurator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; -import java.net.URI; import java.nio.file.Path; @@ -30,6 +28,9 @@ public Path convert(String value) { @Parameter(names = {"--ignore-schema", "-is"}, description = "Do not check the schema before parsing the suite file.") private boolean ignoreShema = false; + @Parameter(names = {"--dry-run", "-d"}, hidden = true) + public static boolean dryRun = false; + @Parameter(names = "--help", help = true) private boolean help; @@ -37,7 +38,6 @@ public Path convert(String value) { private Path suitePath; } - private static final Logger LOGGER = LoggerFactory.getLogger(MainController.class); /** @@ -46,9 +46,7 @@ public Path convert(String value) { * @param argc The command line arguments that are passed to the program. */ public static void main(String[] argc) { - // Apparently, there is something weird going on, where the apache jena library already configures log4j2 for - // some reason. That's why you have to call reconfigure here. - Configurator.reconfigure(URI.create("log4j2.yml")); + // Configurator.reconfigure(URI.create("log4j2.yml")); var args = new Args(); JCommander jc = JCommander.newBuilder() diff --git a/src/main/java/org/aksw/iguana/cc/lang/LanguageProcessor.java b/src/main/java/org/aksw/iguana/cc/lang/LanguageProcessor.java index bd902dd82..ee8868528 100644 --- a/src/main/java/org/aksw/iguana/cc/lang/LanguageProcessor.java +++ b/src/main/java/org/aksw/iguana/cc/lang/LanguageProcessor.java @@ -3,7 +3,6 @@ import org.aksw.iguana.cc.storage.Storable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.springframework.data.util.AnnotatedTypeScanner; import java.io.InputStream; import java.lang.annotation.ElementType; @@ -17,6 +16,9 @@ /** * Interface for abstract language processors that work on InputStreams. + * LanguageProcessors are used to process the content of an InputStream and extract relevant information. + * They are used by the Worker to process the response of a request.
+ * LanguageProcessors must be registered in the static block of this class. */ public abstract class LanguageProcessor { @@ -40,17 +42,9 @@ public interface LanguageProcessingData extends Storable { final private static Logger LOGGER = LoggerFactory.getLogger(LanguageProcessor.class); + // Register all available LanguageProcessors here. static { - final var scanner = new AnnotatedTypeScanner(false, ContentType.class); - final var langProcessors = scanner.findTypes("org.aksw.iguana.cc.lang"); - for (Class langProcessor : langProcessors) { - String contentType = langProcessor.getAnnotation(ContentType.class).value(); - if (LanguageProcessor.class.isAssignableFrom(langProcessor)) { - processors.put(contentType, (Class) langProcessor); - } else { - LOGGER.error("Found a class with the ContentType annotation, that doesn't inherit from the class LanguageProcessor: {}", langProcessor.getName()); - } - } + processors.put("application/sparql-results+json", org.aksw.iguana.cc.lang.impl.SaxSparqlJsonResultCountingParser.class); } public static LanguageProcessor getInstance(String contentType) { diff --git a/src/main/java/org/aksw/iguana/cc/storage/impl/TriplestoreStorage.java b/src/main/java/org/aksw/iguana/cc/storage/impl/TriplestoreStorage.java index 994c24af2..d391d3b25 100644 --- a/src/main/java/org/aksw/iguana/cc/storage/impl/TriplestoreStorage.java +++ b/src/main/java/org/aksw/iguana/cc/storage/impl/TriplestoreStorage.java @@ -2,6 +2,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import org.aksw.iguana.cc.config.elements.StorageConfig; +import org.aksw.iguana.cc.controller.MainController; import org.aksw.iguana.cc.storage.Storage; import org.apache.http.auth.AuthScope; import org.apache.http.auth.Credentials; @@ -17,6 +18,9 @@ import org.apache.jena.update.UpdateFactory; import org.apache.jena.update.UpdateProcessor; import org.apache.jena.update.UpdateRequest; +import org.mortbay.jetty.Main; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.StringWriter; @@ -29,6 +33,8 @@ */ public class TriplestoreStorage implements Storage { + Logger logger = LoggerFactory.getLogger(TriplestoreStorage.class); + public record Config( @JsonProperty(required = true) String endpoint, String user, @@ -75,7 +81,20 @@ public void storeResult(Model data) { //submit Block to Triple Store UpdateProcessor processor = UpdateExecutionFactory .createRemote(blockRequest, endpoint, createHttpClient()); - processor.execute(); + + // If dry run is enabled, the data will not be sent to an existing triplestore, + // therefore we catch the exception and log it instead of letting the program crash. + // The dry run is used for generating the configuration files for the native compilation with GraalVM. + // For normal runs, exceptions will be thrown normally. + if (MainController.Args.dryRun) { + try { + processor.execute(); + } catch (Exception e) { + logger.error("Error while storing data in triplestore: " + e.getMessage()); + } + } else { + processor.execute(); + } blockRequest = new UpdateRequest(); } diff --git a/src/main/java/org/aksw/iguana/cc/tasks/impl/Stresstest.java b/src/main/java/org/aksw/iguana/cc/tasks/impl/Stresstest.java index 923a1683e..1e93882e1 100644 --- a/src/main/java/org/aksw/iguana/cc/tasks/impl/Stresstest.java +++ b/src/main/java/org/aksw/iguana/cc/tasks/impl/Stresstest.java @@ -43,8 +43,8 @@ public record Result( public Stresstest(String suiteID, long stresstestID, Config config, ResponseBodyProcessorInstances responseBodyProcessorInstances, List storages, List metrics) { // initialize workers - long workerId = 0; if (config.warmupWorkers() != null) { + long workerId = 0; for (HttpWorker.Config workerConfig : config.warmupWorkers()) { for (int i = 0; i < workerConfig.number(); i++) { var responseBodyProcessor = (workerConfig.parseResults()) ? responseBodyProcessorInstances.getProcessor(workerConfig.acceptHeader()) : null; @@ -54,6 +54,7 @@ public Stresstest(String suiteID, long stresstestID, Config config, ResponseBody } for (HttpWorker.Config workerConfig : config.workers()) { + long workerId = 0; for (int i = 0; i < workerConfig.number(); i++) { var responseBodyProcessor = (workerConfig.parseResults()) ? responseBodyProcessorInstances.getProcessor(workerConfig.acceptHeader()) : null; workers.add(new SPARQLProtocolWorker(workerId++, responseBodyProcessor, (SPARQLProtocolWorker.Config) workerConfig)); diff --git a/src/main/java/org/aksw/iguana/cc/worker/ResponseBodyProcessor.java b/src/main/java/org/aksw/iguana/cc/worker/ResponseBodyProcessor.java index 6f44574c8..6dcec479d 100644 --- a/src/main/java/org/aksw/iguana/cc/worker/ResponseBodyProcessor.java +++ b/src/main/java/org/aksw/iguana/cc/worker/ResponseBodyProcessor.java @@ -1,11 +1,10 @@ package org.aksw.iguana.cc.worker; import org.aksw.iguana.cc.lang.LanguageProcessor; -import org.aksw.iguana.commons.io.BigByteArrayInputStream; -import org.aksw.iguana.commons.io.BigByteArrayOutputStream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.InputStream; import java.text.MessageFormat; import java.time.Duration; import java.util.ArrayList; @@ -46,18 +45,18 @@ public ResponseBodyProcessor(String contentType) { private final ThreadPoolExecutor executor; private final ScheduledExecutorService executorHandler = Executors.newScheduledThreadPool(1); - public boolean add(long contentLength, long xxh64, BigByteArrayOutputStream bbaos) { + public boolean add(long contentLength, long xxh64, InputStream responseBodyStream) { final var key = new Key(contentLength, xxh64); if (seenResponseBodies.add(key)) { - submit(key, bbaos); + submit(key, responseBodyStream); return true; } return false; } - private void submit(Key key, BigByteArrayOutputStream bigByteArrayOutputStream) { + private void submit(Key key, InputStream responseBodyStream) { final var future = executor.submit(() -> { - var processingResult = languageProcessor.process(new BigByteArrayInputStream(bigByteArrayOutputStream), key.xxh64); + var processingResult = languageProcessor.process(responseBodyStream, key.xxh64); responseDataMetrics.add(processingResult); }); executorHandler.schedule(() -> { diff --git a/src/main/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorker.java b/src/main/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorker.java index be90138b4..7745ddb96 100644 --- a/src/main/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorker.java +++ b/src/main/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorker.java @@ -10,6 +10,8 @@ import org.aksw.iguana.cc.worker.ResponseBodyProcessor; import org.aksw.iguana.cc.worker.HttpWorker; import org.aksw.iguana.commons.io.BigByteArrayOutputStream; +import org.aksw.iguana.commons.io.ByteArrayListOutputStream; +import org.aksw.iguana.commons.io.ReversibleOutputStream; import org.apache.hc.client5.http.async.methods.AbstractBinResponseConsumer; import org.apache.hc.client5.http.config.RequestConfig; import org.apache.hc.client5.http.impl.DefaultConnectionKeepAliveStrategy; @@ -72,7 +74,7 @@ record HttpExecutionResult( Optional response, Instant requestStart, Duration duration, - Optional outputStream, + Optional outputStream, OptionalLong actualContentLength, OptionalLong hash, Optional exception @@ -98,9 +100,6 @@ public boolean successful() { private final ResponseBodyProcessor responseBodyProcessor; - // declared here, so it can be reused across multiple method calls - private BigByteArrayOutputStream responseBodybbaos = new BigByteArrayOutputStream(); - // used to read the http response body private final byte[] buffer = new byte[BUFFER_SIZE]; private static final int BUFFER_SIZE = 4096; @@ -127,12 +126,12 @@ public SPARQLProtocolWorker(long workerId, ResponseBodyProcessor responseBodyPro */ public static void initHttpClient(int threadCount) { connectionManager = PoolingAsyncClientConnectionManagerBuilder.create() - .setMaxConnTotal(threadCount) - .setMaxConnPerRoute(threadCount) + .setMaxConnTotal(threadCount * 1000) + .setMaxConnPerRoute(threadCount * 1000) .build(); final var ioReactorConfig = IOReactorConfig.custom() .setTcpNoDelay(true) - .setIoThreadCount(threadCount) + .setSoKeepAlive(true) .build(); httpClient = HttpAsyncClients.custom() .setConnectionManager(connectionManager) @@ -216,7 +215,6 @@ public CompletableFuture start() { executionStats.add(execution); } - // if ((++queryExecutionCount) >= queryMixSize) { queryExecutionCount = 0; queryMixExecutionCount++; @@ -254,17 +252,7 @@ private ExecutionStats executeQuery(Duration timeout, boolean discardOnFailure) } // process result - if (!responseBodyProcessor.add(result.actualContentLength().orElse(-1), result.hash().orElse(-1), result.outputStream().orElse(new BigByteArrayOutputStream()))) { - this.responseBodybbaos = result.outputStream().orElse(new BigByteArrayOutputStream()); - } else { - this.responseBodybbaos = new BigByteArrayOutputStream(); - } - } - - try { - this.responseBodybbaos.reset(); - } catch (IOException e) { - this.responseBodybbaos = new BigByteArrayOutputStream(); + responseBodyProcessor.add(result.actualContentLength().getAsLong(), result.hash().getAsLong(), result.outputStream.get().toInputStream()); } if (!result.successful() && discardOnFailure) { @@ -328,6 +316,7 @@ private HttpExecutionResult executeHttpRequest(Duration timeout) { private final StreamingXXHash64 hasher = hasherFactory.newStreamingHash64(0); private long responseSize = 0; // will be used if parseResults is false private long responseEnd = 0; // time in nanos + private ReversibleOutputStream responseBody = null; @Override public void releaseResources() {} // nothing to release @@ -345,27 +334,27 @@ protected int capacityIncrement() { */ @Override protected void data(ByteBuffer src, boolean endOfStream) throws IOException { - if (endOfStream) { + if (endOfStream) responseEnd = System.nanoTime(); - return; - } + if (responseBody == null) + responseBody = new ByteArrayListOutputStream(); + + responseSize += src.remaining(); if (config.parseResults()) { // if the buffer uses an array, use the array directly if (src.hasArray()) { hasher.update(src.array(), src.position() + src.arrayOffset(), src.remaining()); - responseBodybbaos.write(src.array(), src.position() + src.arrayOffset(), src.remaining()); + responseBody.write(src.array(), src.position() + src.arrayOffset(), src.remaining()); } else { // otherwise, copy the buffer to an array int readCount; while (src.hasRemaining()) { readCount = Math.min(BUFFER_SIZE, src.remaining()); src.get(buffer, 0, readCount); hasher.update(buffer, 0, readCount); - responseBodybbaos.write(buffer, 0, readCount); + responseBody.write(buffer, 0, readCount); } } - } else { - responseSize += src.remaining(); } } @@ -379,6 +368,12 @@ protected void data(ByteBuffer src, boolean endOfStream) throws IOException { @Override protected void start(HttpResponse response, ContentType contentType) { this.response = response; + final var contentLengthHeader = response.getFirstHeader("Content-Length"); + Long contentLength = contentLengthHeader != null ? Long.parseLong(contentLengthHeader.getValue()) : null; + // if the content length is known, create a BigByteArrayOutputStream with the known length + if (contentLength != null && responseBody == null && config.parseResults()) { + responseBody = new BigByteArrayOutputStream(contentLength); + } } /** @@ -405,8 +400,11 @@ protected HttpExecutionResult buildResult() { Long contentLength = contentLengthHeader != null ? Long.parseLong(contentLengthHeader.getValue()) : null; if (contentLength != null) { if ((!config.parseResults() && responseSize != contentLength) // if parseResults is false, the responseSize will be used - || (config.parseResults() && responseBodybbaos.size() != contentLength)) { // if parseResults is true, the size of the bbaos will be used - return createFailedResultDuringResponse(queryIndex, response, timeStamp, duration, new HttpException("Content-Length header value doesn't match actual content length.")); + || (config.parseResults() && responseBody.size() != contentLength)) { // if parseResults is true, the size of the bbaos will be used + if (responseSize != responseBody.size()) + LOGGER.error("Error during copying the response data. (expected written data size = {}, actual written data size = {}, Content-Length-Header = {})", responseSize, responseBody.size(), contentLengthHeader.getValue()); + final var exception = new HttpException(String.format("Content-Length header value doesn't match actual content length. (Content-Length-Header = %s, written data size = %s)", contentLength, config.parseResults() ? responseBody.size() : responseSize)); + return createFailedResultDuringResponse(queryIndex, response, timeStamp, duration, exception); } } @@ -421,8 +419,8 @@ protected HttpExecutionResult buildResult() { Optional.of(response), timeStamp, Duration.ofNanos(responseEnd - requestStart), - Optional.of(responseBodybbaos), - OptionalLong.of(config.parseResults() ? responseBodybbaos.size() : responseSize), + Optional.of(responseBody), + OptionalLong.of(config.parseResults() ? responseBody.size() : responseSize), OptionalLong.of(config.parseResults() ? hasher.getValue() : 0), Optional.empty() ); @@ -435,10 +433,22 @@ protected HttpExecutionResult buildResult() { // The timeout from the parameter might be reduced if the end of the time limit is near // and it might be so small that it causes issues. return future.get(config.timeout().toNanos(), TimeUnit.NANOSECONDS); - } catch (InterruptedException | ExecutionException | TimeoutException e) { + } catch (InterruptedException | ExecutionException e) { // This will close the connection and cancel the request if it's still running. future.cancel(true); return createFailedResultBeforeRequest(queryIndex, e); + } catch (TimeoutException e) { + if (future.isDone()) { + LOGGER.warn("Request finished immediately after timeout but will still be counted as timed out."); + try { + return future.get(); + } catch (InterruptedException | ExecutionException ex) { + return createFailedResultBeforeRequest(queryIndex, ex); + } + } else { + future.cancel(true); + return createFailedResultBeforeRequest(queryIndex, e); + } } } diff --git a/src/main/java/org/aksw/iguana/commons/io/BigByteArrayOutputStream.java b/src/main/java/org/aksw/iguana/commons/io/BigByteArrayOutputStream.java index 2085b4158..02ee4f446 100644 --- a/src/main/java/org/aksw/iguana/commons/io/BigByteArrayOutputStream.java +++ b/src/main/java/org/aksw/iguana/commons/io/BigByteArrayOutputStream.java @@ -3,7 +3,7 @@ import org.apache.hadoop.hbase.io.ByteArrayOutputStream; import java.io.IOException; -import java.io.OutputStream; +import java.io.InputStream; import java.util.ArrayList; import java.util.List; import java.util.Objects; @@ -22,7 +22,7 @@ * the stream is cleared, all the internal ByteArrayOutputStreams are cleared and a new one is * added to the list. */ -public class BigByteArrayOutputStream extends OutputStream { +public class BigByteArrayOutputStream extends ReversibleOutputStream { /** * The maximum size limit for an array. This is no limit to the amount of bytes {@code BigByteArrayOutputStream} can consume. @@ -102,6 +102,7 @@ public void write(BigByteArrayOutputStream bbaos) throws IOException { write(bbaos.toByteArray()); } + @Override public long size() { return baosList.stream().mapToLong(ByteArrayOutputStream::size).sum(); } @@ -201,4 +202,9 @@ public void clear() throws IOException { public void close() throws IOException { this.closed = true; } + + @Override + public InputStream toInputStream() { + return new BigByteArrayInputStream(this); + } } \ No newline at end of file diff --git a/src/main/java/org/aksw/iguana/commons/io/ByteArrayListInputStream.java b/src/main/java/org/aksw/iguana/commons/io/ByteArrayListInputStream.java new file mode 100644 index 000000000..813e77161 --- /dev/null +++ b/src/main/java/org/aksw/iguana/commons/io/ByteArrayListInputStream.java @@ -0,0 +1,163 @@ +package org.aksw.iguana.commons.io; + +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.util.Iterator; +import java.util.List; +import java.util.Objects; + +/** + * An InputStream that reads from a list of byte arrays. + */ +public class ByteArrayListInputStream extends InputStream { + + private final List data; + private Iterator iterator; + private ByteBuffer currentBuffer; + private boolean closed = false; + + /** + * Creates a new ByteArrayListInputStream that reads from the given list of byte arrays. + * The list is not copied, so it should not be modified while the stream is in use. + * + * @param data the list of byte arrays to read from + */ + public ByteArrayListInputStream(List data) { + this.data = data; + this.iterator = data.iterator(); + if (iterator.hasNext()) { + this.currentBuffer = ByteBuffer.wrap(iterator.next()); + } else { + this.currentBuffer = null; + } + } + + private boolean checkBuffer() { + if (currentBuffer != null && currentBuffer.hasRemaining()) { + return true; + } + if (!iterator.hasNext()) { + return false; + } + currentBuffer = ByteBuffer.wrap(iterator.next()); + return true; + } + + private void checkNotClosed() throws IOException { + if (closed) { + throw new IOException("Stream closed"); + } + } + + private int read(byte[] b, int off, int len, int eofCode) throws IOException { + Objects.checkFromIndexSize(off, len, b.length); + if (!checkBuffer()) + return eofCode; + + int read = 0; + int remaining = len; + int bufferRemaining; + while (remaining > 0 && checkBuffer()) { + bufferRemaining = currentBuffer.remaining(); + + // current buffer has enough bytes + if (bufferRemaining >= remaining) { + currentBuffer.get(b, off + read, remaining); + read += remaining; + break; + } + + // else + currentBuffer.get(b, off + read, bufferRemaining); + currentBuffer = null; + read += bufferRemaining; + remaining -= bufferRemaining; + } + return read; + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + checkNotClosed(); + return read(b, off, len, -1); + } + + @Override + public byte[] readAllBytes() throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public int readNBytes(byte[] b, int off, int len) throws IOException { + checkNotClosed(); + return read(b, off, len, 0); + } + + @Override + public long skip(long n) throws IOException { + checkNotClosed(); + long skipped = 0; + long remaining = n; + while (remaining > 0) { + if (!checkBuffer()) + break; + int bufferRemaining = currentBuffer.remaining(); + if (bufferRemaining >= remaining) { + currentBuffer.position(currentBuffer.position() + (int) remaining); + skipped += remaining; + break; + } + currentBuffer = null; + skipped += bufferRemaining; + remaining -= bufferRemaining; + } + return skipped; + } + + @Override + public void skipNBytes(long n) throws IOException { + long skipped = skip(n); + if (skipped != n) { + throw new EOFException(); + } + } + + @Override + public int available() throws IOException { + return (int) Math.min(Integer.MAX_VALUE, availableLong()); + } + + public long availableLong() throws IOException { + checkNotClosed(); + if (!checkBuffer()) + return 0; + long sum = 0; + boolean foundCurrentBuffer = false; + for (byte[] arr : data) { + if (foundCurrentBuffer) { + sum += arr.length; + } else { + if (arr == currentBuffer.array()) { + foundCurrentBuffer = true; + } + } + } + sum += currentBuffer != null ? currentBuffer.remaining() : 0; + return sum; + } + + @Override + public void close() throws IOException { + closed = true; + } + + @Override + public int read() throws IOException { + checkNotClosed(); + if (!checkBuffer()) + return -1; + return currentBuffer.get() & 0xFF; + } +} diff --git a/src/main/java/org/aksw/iguana/commons/io/ByteArrayListOutputStream.java b/src/main/java/org/aksw/iguana/commons/io/ByteArrayListOutputStream.java new file mode 100644 index 000000000..74d00949b --- /dev/null +++ b/src/main/java/org/aksw/iguana/commons/io/ByteArrayListOutputStream.java @@ -0,0 +1,136 @@ +package org.aksw.iguana.commons.io; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.util.LinkedList; +import java.util.List; +import java.util.Objects; + +/** + * An OutputStream that writes to a list of byte arrays. + * The buffers have a minimum size. + * If a write operation is smaller than the minimum size, the data is stored in a separate buffer. + * This buffer will be filled up by subsequent writings to the minimum size before another buffer is created. + */ +public class ByteArrayListOutputStream extends ReversibleOutputStream { + + private final int MIN_BUFFER_SIZE; + private ByteBuffer currentBuffer; + private final LinkedList bufferList = new LinkedList<>(); + private boolean closed = false; + + /** + * Creates a new ByteArrayListOutputStream with a minimum buffer size of 4096 bytes. + */ + public ByteArrayListOutputStream() { + MIN_BUFFER_SIZE = 4096; + } + + /** + * Creates a new ByteArrayListOutputStream with the given minimum buffer size. + * + * @param minBufferSize the minimum buffer size + */ + public ByteArrayListOutputStream(int minBufferSize) { + if (minBufferSize < 1) { + throw new IllegalArgumentException("minBufferSize must be bigger than 1"); + } + MIN_BUFFER_SIZE = minBufferSize; + } + + private void checkNotClosed() throws IOException { + if (closed) { + throw new IOException("Stream closed"); + } + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + checkNotClosed(); + Objects.checkFromIndexSize(off, len, b.length); + if (currentBuffer == null) { + if (len < MIN_BUFFER_SIZE) { + currentBuffer = ByteBuffer.allocate(MIN_BUFFER_SIZE); + currentBuffer.put(b, off, len); + } else { + final var buffer = new byte[len]; + System.arraycopy(b, off, buffer, 0, len); + bufferList.add(buffer); + } + return; + } + + final var spaceRemaining = currentBuffer.remaining(); + if (spaceRemaining >= len) { + currentBuffer.put(b, off, len); + } else { + currentBuffer.put(b, off, spaceRemaining); + bufferList.add(currentBuffer.array()); + currentBuffer = null; + + if (len - spaceRemaining < MIN_BUFFER_SIZE) { + currentBuffer = ByteBuffer.allocate(MIN_BUFFER_SIZE); + currentBuffer.put(b, off + spaceRemaining, len - spaceRemaining); + } else { + final var buffer = new byte[len - spaceRemaining]; + System.arraycopy(b, off + spaceRemaining, buffer, 0, len - spaceRemaining); + bufferList.add(buffer); + } + } + } + + @Override + public void write(int b) throws IOException { + checkNotClosed(); + if (currentBuffer == null) { + currentBuffer = ByteBuffer.allocate(MIN_BUFFER_SIZE); + } + if (currentBuffer.remaining() == 0) { + bufferList.add(currentBuffer.array()); + currentBuffer = ByteBuffer.allocate(MIN_BUFFER_SIZE); + } + currentBuffer.put((byte) b); + } + + @Override + public long size() { + long sum = 0; + for (var buffer : bufferList) { + sum += buffer.length; + } + return sum + (currentBuffer == null ? 0 : currentBuffer.position()); + } + + /** + * Returns the list of buffers. + * The list does not contain the current buffer. + * If the stream is closed, the current buffer is trimmed to the actual size and then added to the list. + * + * @return the list of buffers + */ + public List getBuffers() { + return bufferList; + } + + @Override + public void close() throws IOException { + closed = true; + if (currentBuffer != null) { + // trim buffer + final var temp = currentBuffer.array(); + final var buffer = new byte[currentBuffer.position()]; + System.arraycopy(temp, 0, buffer, 0, buffer.length); + bufferList.add(buffer); + currentBuffer = null; + } + } + + @Override + public InputStream toInputStream() { + try { + this.close(); + } catch (IOException ignored) {} // doesn't throw + return new ByteArrayListInputStream(bufferList); + } +} diff --git a/src/main/java/org/aksw/iguana/commons/io/ReversibleOutputStream.java b/src/main/java/org/aksw/iguana/commons/io/ReversibleOutputStream.java new file mode 100644 index 000000000..0a78acade --- /dev/null +++ b/src/main/java/org/aksw/iguana/commons/io/ReversibleOutputStream.java @@ -0,0 +1,13 @@ +package org.aksw.iguana.commons.io; + +import java.io.InputStream; +import java.io.OutputStream; + +/** + * An OutputStream that can be converted to an InputStream. + * The size of the data can be queried. + */ +public abstract class ReversibleOutputStream extends OutputStream { + public abstract InputStream toInputStream(); + public abstract long size(); +} diff --git a/src/main/resources/log4j2.yml b/src/main/resources/log4j2.yml deleted file mode 100644 index 0b5f391be..000000000 --- a/src/main/resources/log4j2.yml +++ /dev/null @@ -1,56 +0,0 @@ -Configuration: - status: info - name: iguana - properties: - property: - name: filename - value: iguana.log - thresholdFilter: - level: debug - appenders: - Console: - name: STDOUT - target: SYSTEM_OUT - PatternLayout: - Pattern: "%highlight{%d [%t] \t %-5p [%c{1}] - <%m>%n}{FATAL=red blink, ERROR=red, WARN=yellow bold, INFO=green, DEBUG=green bold, TRACE=blue}" - disableAnsi: false - File: - name: File - fileName: ${filename} - PatternLayout: - Pattern: "%d [%t] %p [%c] - <%m>%n" - Filters: - ThresholdFilter: - level: warn - - Loggers: - logger: - - name: org.apache.http.client.protocol - level: error - additivity: true - AppenderRef: - - ref: STDOUT - - ref: File - - name: org.reflections.Reflections - level: error - additivity: true - AppenderRef: - - ref: STDOUT - - ref: File - - name: org.apache.http.impl - level: error - additivity: true - AppenderRef: - - ref: STDOUT - - ref: File - - name: org.apache.jena.riot - level: error - additivity: true - AppenderRef: - - ref: STDOUT - - ref: File - Root: - level: info - AppenderRef: - - ref: STDOUT - - ref: File \ No newline at end of file diff --git a/src/main/resources/logback.xml b/src/main/resources/logback.xml new file mode 100644 index 000000000..d80cde084 --- /dev/null +++ b/src/main/resources/logback.xml @@ -0,0 +1,27 @@ + + + + + + + + + + + %d{HH:mm:ss.SSS} %highlight(%-5level) [%thread] %logger{0} -- %msg%n + + + + + iguana.log + true + + %d{HH:mm:ss.SSS} %-5level [%thread] %logger{0} -- %msg%n + + + + + + + + \ No newline at end of file diff --git a/src/test/java/org/aksw/iguana/cc/storage/impl/CSVStorageTest.java b/src/test/java/org/aksw/iguana/cc/storage/impl/CSVStorageTest.java index db1d5ff5f..a77333377 100644 --- a/src/test/java/org/aksw/iguana/cc/storage/impl/CSVStorageTest.java +++ b/src/test/java/org/aksw/iguana/cc/storage/impl/CSVStorageTest.java @@ -23,6 +23,7 @@ public class CSVStorageTest extends StorageTest { private static final String EXPECTED_FILES_DIR = "src/test/resources/test-data/csv-storage-test/"; public static List data() { + resetDate(); final var workersTask1 = List.of( MockupWorker.createWorkers(0, 2, new MockupQueryHandler(0, 10), "test-connection-1", "v1.0.0", "test-dataset-1"), MockupWorker.createWorkers(2, 2, new MockupQueryHandler(1, 10), "test-connection-2", "v1.1.0", "test-dataset-2") diff --git a/src/test/java/org/aksw/iguana/cc/storage/impl/RDFFileStorageTest.java b/src/test/java/org/aksw/iguana/cc/storage/impl/RDFFileStorageTest.java index 8d094fcbc..e251b55b0 100644 --- a/src/test/java/org/aksw/iguana/cc/storage/impl/RDFFileStorageTest.java +++ b/src/test/java/org/aksw/iguana/cc/storage/impl/RDFFileStorageTest.java @@ -18,6 +18,7 @@ */ public class RDFFileStorageTest extends StorageTest { public static List data() { + resetDate(); final var arguments = new ArrayList(); final var paths = new ArrayList<>(List.of("rdf-file-storage-test1.ttl", "rdf-file-storage-test1.nt", "rdf-file-storage-test1.nt", "")); diff --git a/src/test/java/org/aksw/iguana/cc/storage/impl/StorageTest.java b/src/test/java/org/aksw/iguana/cc/storage/impl/StorageTest.java index 5ee40b7b5..c38586a4c 100644 --- a/src/test/java/org/aksw/iguana/cc/storage/impl/StorageTest.java +++ b/src/test/java/org/aksw/iguana/cc/storage/impl/StorageTest.java @@ -14,7 +14,6 @@ import org.apache.jena.rdf.model.ModelFactory; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; import java.io.IOException; import java.nio.file.Files; @@ -56,8 +55,12 @@ public Model toRDF() { } } - @BeforeEach - public void resetDate() { + /** + * This method resets the date to a fixed date. + * This is necessary to ensure that the tests are deterministic. + * The method needs to be called manually before retrieving the test data. + */ + public static void resetDate() { someDateTime = GregorianCalendar.from(ZonedDateTime.ofInstant(Instant.parse("2023-10-21T20:48:06.399Z"), ZoneId.of("Europe/Berlin"))); } diff --git a/src/test/java/org/aksw/iguana/cc/storage/impl/TriplestoreStorageTest.java b/src/test/java/org/aksw/iguana/cc/storage/impl/TriplestoreStorageTest.java index a33d135cf..7dc0694d3 100644 --- a/src/test/java/org/aksw/iguana/cc/storage/impl/TriplestoreStorageTest.java +++ b/src/test/java/org/aksw/iguana/cc/storage/impl/TriplestoreStorageTest.java @@ -12,6 +12,7 @@ import org.apache.jena.update.UpdateFactory; import org.apache.jena.update.UpdateRequest; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.DisabledInNativeImage; import org.junit.jupiter.api.extension.RegisterExtension; import java.io.StringWriter; @@ -24,6 +25,7 @@ import static com.github.tomakehurst.wiremock.client.WireMock.*; import static org.junit.jupiter.api.Assertions.assertTrue; +@DisabledInNativeImage // WireMock is not supported in native image public class TriplestoreStorageTest extends StorageTest { @RegisterExtension @@ -34,6 +36,7 @@ public class TriplestoreStorageTest extends StorageTest { @Test public void dataTest() throws URISyntaxException { + resetDate(); final var uuid = UUID.randomUUID(); wm.stubFor(post(urlEqualTo("/ds/sparql")) .willReturn(aResponse() diff --git a/src/test/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorkerTest.java b/src/test/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorkerTest.java index 4df92a929..f7955b947 100644 --- a/src/test/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorkerTest.java +++ b/src/test/java/org/aksw/iguana/cc/worker/impl/SPARQLProtocolWorkerTest.java @@ -13,6 +13,7 @@ import org.aksw.iguana.cc.worker.HttpWorker; import org.aksw.iguana.cc.worker.ResponseBodyProcessor; import org.junit.jupiter.api.*; +import org.junit.jupiter.api.condition.DisabledInNativeImage; import org.junit.jupiter.api.extension.RegisterExtension; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; @@ -40,11 +41,18 @@ import static com.github.tomakehurst.wiremock.client.WireMock.*; import static org.junit.jupiter.api.Assertions.*; +@DisabledInNativeImage // WireMock is not supported in native image public class SPARQLProtocolWorkerTest { @RegisterExtension public static WireMockExtension wm = WireMockExtension.newInstance() - .options(new WireMockConfiguration().useChunkedTransferEncoding(Options.ChunkedEncodingPolicy.NEVER).dynamicPort().notifier(new ConsoleNotifier(false))) + .options(new WireMockConfiguration() + .useChunkedTransferEncoding(Options.ChunkedEncodingPolicy.NEVER) + .dynamicPort() + .notifier(new ConsoleNotifier(false)) + .jettyIdleTimeout(2000L) + .jettyStopTimeout(2000L) + .timeout(2000)) .failOnUnmatchedRequests(true) .build(); @@ -58,11 +66,11 @@ public class SPARQLProtocolWorkerTest { public static void setup() throws IOException { queryFile = Files.createTempFile("iguana-test-queries", ".tmp"); Files.writeString(queryFile, QUERY, StandardCharsets.UTF_8); - SPARQLProtocolWorker.initHttpClient(1); } @BeforeEach public void reset() { + SPARQLProtocolWorker.initHttpClient(1); wm.resetMappings(); // reset stubbing maps after each test } @@ -72,6 +80,12 @@ public static void cleanup() throws IOException { SPARQLProtocolWorker.closeHttpClient(); } + @AfterEach + public void verify() { + wm.resetAll(); + SPARQLProtocolWorker.closeHttpClient(); + } + public static Stream requestFactoryData() throws URISyntaxException { final var uri = new URI("http://localhost:" + wm.getPort() + "/ds/query"); @@ -95,7 +109,7 @@ public static Stream requestFactoryData() throws URISyntaxException { queryHandlderSupplier.apply(cached), new HttpWorker.QueryMixes(QUERY_MIXES), connection, - Duration.parse("PT100S"), + Duration.parse("PT6S"), "application/sparql-results+json", requestType, true @@ -108,7 +122,7 @@ public static Stream requestFactoryData() throws URISyntaxException { public static List completionTargets() { final var out = new ArrayList(); - final var queryMixesAmount = List.of(1, 2, 5, 10, 100, 1000); + final var queryMixesAmount = List.of(1, 2, 5, 10, 100, 200); final var timeDurations = List.of(Duration.of(1, ChronoUnit.SECONDS), Duration.of(5, ChronoUnit.SECONDS)); for (var queryMixes : queryMixesAmount) { @@ -226,7 +240,7 @@ public void testCompletionTargets(HttpWorker.CompletionTarget target) throws URI queryHandler, target, connection, - Duration.parse("PT20S"), + Duration.parse("PT5S"), "application/sparql-results+json", RequestFactory.RequestType.POST_URL_ENC_QUERY, false @@ -242,6 +256,8 @@ public void testCompletionTargets(HttpWorker.CompletionTarget target) throws URI final HttpWorker.Result result = worker.start().join(); for (var stat : result.executionStats()) { + if (stat.httpStatusCode().orElse(0) == 500) + continue; // ignore server errors stat.error().ifPresent(ex -> LOGGER.error(ex.getMessage(), ex)); assertTrue(stat.successful()); assertTrue(stat.error().isEmpty()); @@ -276,7 +292,7 @@ public void testTimeLimitExecutionCutoff() throws URISyntaxException, IOExceptio queryHandlder, new HttpWorker.TimeLimit(Duration.of(2, ChronoUnit.SECONDS)), connection, - Duration.parse("PT20S"), + Duration.parse("PT2S"), "application/sparql-results+json", RequestFactory.RequestType.POST_URL_ENC_QUERY, false diff --git a/src/test/java/org/aksw/iguana/commons/io/BigByteArrayInputStreamTest.java b/src/test/java/org/aksw/iguana/commons/io/BigByteArrayInputStreamTest.java index cb68b1b82..939328b75 100644 --- a/src/test/java/org/aksw/iguana/commons/io/BigByteArrayInputStreamTest.java +++ b/src/test/java/org/aksw/iguana/commons/io/BigByteArrayInputStreamTest.java @@ -1,9 +1,9 @@ package org.aksw.iguana.commons.io; import com.google.common.primitives.Bytes; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; import java.io.IOException; import java.nio.charset.StandardCharsets; @@ -14,7 +14,7 @@ import static org.junit.jupiter.api.Assertions.*; -@Disabled("This test takes a lot of time and resources.") +@EnabledIfEnvironmentVariable(named = "RUN_LARGE_TESTS", matches = "true") class BigByteArrayInputStreamTest { private static final int MAX_SINGLE_BUFFER_SIZE = Integer.MAX_VALUE - 8; @@ -27,7 +27,7 @@ class BigByteArrayInputStreamTest { * @param maxSingleBufferSize maximum size of a single array * @return 2d-array buffer */ - public static byte[][] getBigRandomBuffer(long size, int maxSingleBufferSize) { + private static byte[][] getBigRandomBuffer(long size, int maxSingleBufferSize) { if (size < 1) return new byte[0][0]; final var bufferField = new byte[(int) ((size - 1) / maxSingleBufferSize) + 1][]; diff --git a/src/test/java/org/aksw/iguana/commons/io/BigByteArrayOutputStreamTest.java b/src/test/java/org/aksw/iguana/commons/io/BigByteArrayOutputStreamTest.java index 5b49c0541..21104d80c 100644 --- a/src/test/java/org/aksw/iguana/commons/io/BigByteArrayOutputStreamTest.java +++ b/src/test/java/org/aksw/iguana/commons/io/BigByteArrayOutputStreamTest.java @@ -1,9 +1,9 @@ package org.aksw.iguana.commons.io; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Named; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; @@ -18,7 +18,7 @@ import static org.junit.jupiter.api.Assertions.*; -@Disabled("This test takes a lot of time and resources.") +@EnabledIfEnvironmentVariable(named = "RUN_LARGE_TESTS", matches = "true") class BigByteArrayOutputStreamTest { final static Random rng = new Random(0); diff --git a/src/test/java/org/aksw/iguana/commons/io/ByteArrayListInputStreamTest.java b/src/test/java/org/aksw/iguana/commons/io/ByteArrayListInputStreamTest.java new file mode 100644 index 000000000..bf841d0db --- /dev/null +++ b/src/test/java/org/aksw/iguana/commons/io/ByteArrayListInputStreamTest.java @@ -0,0 +1,174 @@ +package org.aksw.iguana.commons.io; + +import org.junit.jupiter.api.Test; + +import java.io.EOFException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +import static org.junit.jupiter.api.Assertions.*; + +class ByteArrayListInputStreamTest { + + private final static int BUFFER_SIZE = 1024; + private final static int NUM_BUFFERS = 10; + + private static final Random rng = new Random(); + + private static List createByteArrayListInputStream(int arraySize, int numArrays) { + + List data = new ArrayList<>(numArrays); + for (int i = 0; i < numArrays; i++) { + final var temp = new byte[arraySize]; + rng.nextBytes(temp); + data.add(temp); + } + return data; + } + + + @Test + void testReadSingle() throws IOException { + final var data = createByteArrayListInputStream(1024, 10); + final var stream = new ByteArrayListInputStream(data); + for (int i = 0; i < BUFFER_SIZE * NUM_BUFFERS; i++) { + assertEquals(data.get(i / BUFFER_SIZE)[i % BUFFER_SIZE], (byte) stream.read(), String.format("Failed at index %d", i)); + } + assertEquals(-1, stream.read()); + } + + @Test + void testReadAllBytes() throws IOException { + final var data = createByteArrayListInputStream(BUFFER_SIZE, NUM_BUFFERS); + final var stream = new ByteArrayListInputStream(data); + assertEquals(BUFFER_SIZE * NUM_BUFFERS, stream.availableLong()); + assertThrows(UnsupportedOperationException.class, stream::readAllBytes); + assertEquals(BUFFER_SIZE * NUM_BUFFERS, stream.availableLong()); + } + + @Test + void testReadMultiple() throws IOException { + // readNBytes + // test full read + var data = createByteArrayListInputStream(BUFFER_SIZE, NUM_BUFFERS); + var stream = new ByteArrayListInputStream(data); + assertEquals(BUFFER_SIZE * NUM_BUFFERS, stream.availableLong()); + byte[] buffer = new byte[BUFFER_SIZE * NUM_BUFFERS + 1]; + assertEquals(BUFFER_SIZE * NUM_BUFFERS, stream.readNBytes(buffer, 0, BUFFER_SIZE * NUM_BUFFERS + 1)); + for (int i = 0; i < BUFFER_SIZE * NUM_BUFFERS; i++) { + assertEquals(data.get(i / BUFFER_SIZE)[i % BUFFER_SIZE], buffer[i], String.format("Failed at index %d", i)); + } + assertEquals(0, stream.availableLong()); + assertEquals(0, stream.readNBytes(buffer, 0, 1)); + + // test partial read with 3 bytes + data = createByteArrayListInputStream(BUFFER_SIZE, NUM_BUFFERS); + stream = new ByteArrayListInputStream(data); + assertEquals(BUFFER_SIZE * NUM_BUFFERS, stream.availableLong()); + buffer = new byte[3]; + for (int i = 0; i < BUFFER_SIZE * NUM_BUFFERS; i += 3) { + assertEquals(Math.min(BUFFER_SIZE * NUM_BUFFERS - i, 3), stream.readNBytes(buffer, 0, 3)); + for (int j = 0; j < Math.min(BUFFER_SIZE * NUM_BUFFERS - i, 3); j++) { + assertEquals(data.get((i + j) / BUFFER_SIZE)[(i + j) % BUFFER_SIZE], buffer[j], String.format("Failed at index %d", i + j)); + } + } + assertEquals(0, stream.availableLong()); + + // read + // test full read + data = createByteArrayListInputStream(BUFFER_SIZE, NUM_BUFFERS); + stream = new ByteArrayListInputStream(data); + assertEquals(BUFFER_SIZE * NUM_BUFFERS, stream.availableLong()); + buffer = new byte[BUFFER_SIZE * NUM_BUFFERS + 1]; + assertEquals(BUFFER_SIZE * NUM_BUFFERS, stream.read(buffer, 0, BUFFER_SIZE * NUM_BUFFERS + 1)); + for (int i = 0; i < BUFFER_SIZE * NUM_BUFFERS; i++) { + assertEquals(data.get(i / BUFFER_SIZE)[i % BUFFER_SIZE], buffer[i], String.format("Failed at index %d", i)); + } + assertEquals(0, stream.availableLong()); + assertEquals(-1, stream.read(buffer, 0, 1)); + + // test partial read with 3 bytes + data = createByteArrayListInputStream(BUFFER_SIZE, NUM_BUFFERS); + stream = new ByteArrayListInputStream(data); + assertEquals(BUFFER_SIZE * NUM_BUFFERS, stream.availableLong()); + buffer = new byte[3]; + for (int i = 0; i < BUFFER_SIZE * NUM_BUFFERS; i += 3) { + assertEquals(Math.min(BUFFER_SIZE * NUM_BUFFERS - i, 3), stream.read(buffer, 0, 3)); + for (int j = 0; j < Math.min(BUFFER_SIZE * NUM_BUFFERS - i, 3); j++) { + assertEquals(data.get((i + j) / BUFFER_SIZE)[(i + j) % BUFFER_SIZE], buffer[j], String.format("Failed at index %d", i + j)); + } + } + assertEquals(0, stream.availableLong()); + assertEquals(-1, stream.read(buffer, 0, 1)); + } + + @Test + void testSkip() throws IOException { + // skip + final var data = createByteArrayListInputStream(BUFFER_SIZE, NUM_BUFFERS); + final var stream = new ByteArrayListInputStream(data); + assertEquals(BUFFER_SIZE * NUM_BUFFERS, stream.availableLong()); + for (int i = 0; i < BUFFER_SIZE * NUM_BUFFERS; i += 3) { + final var skip = stream.skip(3); + assertEquals(Math.min(3, BUFFER_SIZE * NUM_BUFFERS - i), skip); + assertEquals(BUFFER_SIZE * NUM_BUFFERS - i - skip, stream.availableLong()); + } + assertEquals(0, stream.availableLong()); + assertEquals(0, stream.skip(1)); + + // skipNBytes + final var data2 = createByteArrayListInputStream(BUFFER_SIZE, NUM_BUFFERS); + final var stream2 = new ByteArrayListInputStream(data2); + assertEquals(BUFFER_SIZE * NUM_BUFFERS, stream2.availableLong()); + for (int i = 0; i < BUFFER_SIZE * NUM_BUFFERS; i += 3) { + try { + stream2.skipNBytes(3); + } catch (EOFException e) { + if (i <= BUFFER_SIZE * NUM_BUFFERS - 3) { + fail("EOFException thrown too early"); + } else { + break; + } + } + assertEquals(BUFFER_SIZE * NUM_BUFFERS - i - 3, stream2.availableLong()); + } + assertEquals(0, stream2.availableLong()); + assertThrows(EOFException.class, () -> stream2.skipNBytes(1)); + } + + @Test + void testAvailable() throws IOException { + final var data = createByteArrayListInputStream(BUFFER_SIZE, NUM_BUFFERS); + final var stream = new ByteArrayListInputStream(data); + assertEquals(BUFFER_SIZE * NUM_BUFFERS, stream.availableLong()); + assertEquals(BUFFER_SIZE * NUM_BUFFERS, stream.available()); + } + + @Test + void testClose() { + final var data = createByteArrayListInputStream(BUFFER_SIZE, NUM_BUFFERS); + final var stream = new ByteArrayListInputStream(data); + final var buffer = new byte[BUFFER_SIZE * NUM_BUFFERS]; + assertDoesNotThrow(stream::close); + assertThrows(IOException.class, stream::read); + assertThrows(IOException.class, () -> stream.read(buffer, 0, BUFFER_SIZE * NUM_BUFFERS)); + assertThrows(IOException.class, () -> stream.readNBytes(buffer, 0, BUFFER_SIZE * NUM_BUFFERS)); + assertThrows(IOException.class, () -> stream.skip(1)); + assertThrows(IOException.class, () -> stream.skipNBytes(1)); + assertThrows(IOException.class, stream::availableLong); + + } + + @Test + void testAvailableLong() throws IOException { + final var data1 = createByteArrayListInputStream(Integer.MAX_VALUE - 8, 1); + final var data2 = createByteArrayListInputStream(BUFFER_SIZE, 1); + final var combined = new ArrayList<>(data1); + combined.addAll(data2); + final var stream = new ByteArrayListInputStream(combined); + assertEquals(Integer.MAX_VALUE - 8 + (long) BUFFER_SIZE, stream.availableLong()); + assertEquals(Integer.MAX_VALUE, stream.available()); + } +} \ No newline at end of file diff --git a/src/test/java/org/aksw/iguana/commons/io/ByteArrayListOutputStreamTest.java b/src/test/java/org/aksw/iguana/commons/io/ByteArrayListOutputStreamTest.java new file mode 100644 index 000000000..007468cfc --- /dev/null +++ b/src/test/java/org/aksw/iguana/commons/io/ByteArrayListOutputStreamTest.java @@ -0,0 +1,96 @@ +package org.aksw.iguana.commons.io; + +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Random; + +import static org.junit.jupiter.api.Assertions.*; + +class ByteArrayListOutputStreamTest { + + private static final Random random = new Random(); + + private static byte[] getRandomData(int size) { + final var buffer = new byte[size]; + random.nextBytes(buffer); + return buffer; + } + + @Test + void testSingleWrite() throws IOException { + final var data = getRandomData(1024); + final var out = new ByteArrayListOutputStream(); + assertDoesNotThrow(() -> out.write(data)); + assertDoesNotThrow(out::close); + assertArrayEquals(data, out.getBuffers().get(0)); + assertEquals(1024, out.size()); + + final var out2 = new ByteArrayListOutputStream(1024 / 4); + assertDoesNotThrow(() -> out2.write(data)); + assertDoesNotThrow(out2::close); + assertArrayEquals(data, out2.getBuffers().get(0)); + assertEquals(1024, out2.size()); + } + + @Test + void testMultipleWrite() { + final var data = getRandomData(1024); + final var out = new ByteArrayListOutputStream(); + assertDoesNotThrow(() -> out.write(data)); + assertDoesNotThrow(() -> out.write(data)); + assertDoesNotThrow(out::close); + assertArrayEquals(data, Arrays.copyOfRange(out.getBuffers().get(0), 0, 1024)); + assertArrayEquals(data, Arrays.copyOfRange(out.getBuffers().get(0), 1024, 2048)); + assertEquals(2048, out.size()); + + final var out2 = new ByteArrayListOutputStream(1024 / 4); + assertDoesNotThrow(() -> out2.write(data)); + assertDoesNotThrow(() -> out2.write(data)); + assertDoesNotThrow(out2::close); + assertArrayEquals(data, out2.getBuffers().get(0)); + assertArrayEquals(data, out2.getBuffers().get(1)); + assertEquals(2048, out2.size()); + + final var out3 = new ByteArrayListOutputStream(1024 / 4); + for (int i = 0; i < 1024; i++) { + int finalI = i; + assertDoesNotThrow(() -> out3.write(data[finalI])); + } + assertDoesNotThrow(out3::close); + assertArrayEquals(Arrays.copyOfRange(data, 0, 256), out3.getBuffers().get(0)); + assertArrayEquals(Arrays.copyOfRange(data, 256, 512), out3.getBuffers().get(1)); + assertArrayEquals(Arrays.copyOfRange(data, 512, 768), out3.getBuffers().get(2)); + assertArrayEquals(Arrays.copyOfRange(data, 768, 1024), out3.getBuffers().get(3)); + assertEquals(1024, out3.size()); + } + + @Test + void testClose() { + final var out = new ByteArrayListOutputStream(); + final var data = getRandomData(1024); + assertDoesNotThrow(out::close); + assertDoesNotThrow(out::close); + assertThrows(IOException.class, () -> out.write(data)); + assertThrows(IOException.class, () -> out.write(data[0])); + } + + @Test + void testToInputStream() throws IOException { + final var data = getRandomData(1024); + final var out = new ByteArrayListOutputStream(); + assertDoesNotThrow(() -> out.write(data)); + final var in = out.toInputStream(); + + // stream should be closed + assertThrows(IOException.class, () -> out.write(data)); + + assertEquals(ByteArrayListInputStream.class, in.getClass()); + final var typedIn = (ByteArrayListInputStream) in; + final var buffer = new byte[1024]; + assertEquals(1024, typedIn.availableLong()); + assertEquals(1024, typedIn.read(buffer)); + assertArrayEquals(data, buffer); + } +}