Skip to content

Commit

Permalink
Add result aggregation for query templates (#283)
Browse files Browse the repository at this point in the history
* Add QueryData class

* Add test

* Check for update queries

* Move responsibility of QueryData to QueryHandler

* Remove unused methods

* Add tests

* Fix authentication

* Cleanup

* Fix StringListQueryList

* Modify QueryHandler and QueryData

* Add executable query count and representative query count to QueryHandler

* Update the saving template instances

* Fix individual template instances results

* Add some comments

* Update schema

* Change default behavior of query templates

* Update tests

* Fix configuration

* Update documentation

* Add some comments (to trigger GitHub actions)

* Fix minor bug that caused an infinite loop

* Change sparql endpoint for testing

* Add javadocs

* Refactor attribute name

* Refactor method name

* Add more javadocs

* Update src/main/java/org/aksw/iguana/cc/query/QueryData.java

Co-authored-by: Alexander Bigerl <[email protected]>

* Revert "Update src/main/java/org/aksw/iguana/cc/query/QueryData.java"

This reverts commit 8489f6e.

* Delegate handling of query templates to an extra class

* Trying to clarify comments

* Add more comments

* Change behavior of noOfQueries property in results

* Remove unused import

* Fix broken tests

* Rename TemplateHandler to QueryTemplateHandler

* Add javadoc string

---------

Co-authored-by: Alexander Bigerl <[email protected]>
  • Loading branch information
nck-mlcnv and bigerl authored Jan 24, 2025
1 parent c1dc6da commit 20e1bee
Show file tree
Hide file tree
Showing 23 changed files with 555 additions and 194 deletions.
16 changes: 11 additions & 5 deletions docs/configuration/queries.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,16 +130,21 @@ The results may look like the following:
### Configuration
The `template` attribute has the following properties:

| property | required | default | description | example |
|----------|----------|---------|---------------------------------------------------------------------|-----------------------------|
| endpoint | yes | | The endpoint to query. | `http://dbpedia.org/sparql` |
| limit | no | `2000` | The maximum number of instances per query template. | `100` |
| save | no | `true` | If set to `true`, query instances will be saved in a separate file. | `false` |
| property | required | default | description | example |
|-------------------|----------|---------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------|
| endpoint | yes | | The endpoint to query. | `http://dbpedia.org/sparql` |
| limit | no | `2000` | The maximum number of instances per query template. | `100` |
| save | no | `true` | If set to `true`, query instances will be saved in a separate file. | `false` |
| individualResults | no | `false` | If set to `true`, the results of each individual template instance will be reported, otherwise if set to `false` their results will be subsumed for the query template. | `true` |

If the `save` attribute is set to `true`,
the instances will be saved in a separate file in the same directory as the query templates.
If the query templates are stored in a folder, the instances will be saved in the parent directory.

If the `individualResults` attribute is set to `false`,
the results of the instances will be subsumed for the query template.
The query template will then be considered as an actual query in the results.

Example of query configuration with query templates:
```yaml
queries:
Expand All @@ -149,4 +154,5 @@ queries:
endpoint: "http://dbpedia.org/sparql"
limit: 100
save: true
individualResults: true
```
3 changes: 2 additions & 1 deletion example-suite.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,11 @@ tasks:
requestType: post query
queries:
path: "./example/query_pattern.txt"
pattern:
template:
endpoint: "https://dbpedia.org/sparql"
limit: 1000
save: false
individualResults: false
timeout: 180s
completionTarget:
duration: 1000s
Expand Down
9 changes: 5 additions & 4 deletions graalvm/suite.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ connections:
- name: "Blazegraph"
version: "1.1.1"
dataset: "DatasetName"
endpoint: "http://localhost:9999/blazegraph/sparql"
endpoint: "https://query.wikidata.org/"
authentication:
user: "user"
password: "test"
Expand Down Expand Up @@ -60,13 +60,14 @@ tasks:
seed: 123
lang: "SPARQL"
template:
endpoint: "http://dbpedia.org/sparql"
endpoint: "https://dbpedia.org/sparql"
limit: 1
save: false
individualResults: false
timeout: 2s
connection: Blazegraph
completionTarget:
duration: 1s
duration: 0.5s
acceptHeader: "application/sparql-results+json"
requestType: get query
parseResults: true
Expand All @@ -78,7 +79,7 @@ tasks:
timeout: 3m
connection: Blazegraph
completionTarget:
duration: 1s
duration: 0.5s
requestType: get query
acceptHeader: "application/sparql-results+json"
- number: 1
Expand Down
3 changes: 3 additions & 0 deletions schema/iguana-schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,9 @@
},
"save": {
"type": "boolean"
},
"individualResults": {
"type": "boolean"
}
},
"required": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public AggregatedExecutionStatistics() {
public Model createMetricModel(List<HttpWorker> workers, List<HttpWorker.ExecutionStats>[][] data, IRES.Factory iresFactory) {
Model m = ModelFactory.createDefaultModel();
for (var worker : workers) {
for (int i = 0; i < worker.config().queries().getQueryCount(); i++) {
for (int i = 0; i < worker.config().queries().getRepresentedQueryCount(); i++) {
Resource queryRes = iresFactory.getWorkerQueryResource(worker, i);
m.add(createAggregatedModel(data[(int) worker.getWorkerID()][i], queryRes));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ public EachExecutionStatistic() {
public Model createMetricModel(List<HttpWorker> workers, List<HttpWorker.ExecutionStats>[][] data, IRES.Factory iresFactory) {
Model m = ModelFactory.createDefaultModel();
for (var worker : workers) {
for (int i = 0; i < worker.config().queries().getQueryCount(); i++) {
for (int i = 0; i < worker.config().queries().getRepresentedQueryCount(); i++) {
Resource workerQueryResource = iresFactory.getWorkerQueryResource(worker, i);
Resource queryRes = IRES.getResource(worker.config().queries().getQueryId(i));
BigInteger run = BigInteger.ONE;
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/aksw/iguana/cc/metrics/impl/QMPH.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public Number calculateTaskMetric(List<HttpWorker> workers, List<HttpWorker.Exec
@Override
public Number calculateWorkerMetric(HttpWorker.Config worker, List<HttpWorker.ExecutionStats>[] data) {
BigDecimal successes = BigDecimal.ZERO;
BigDecimal noq = BigDecimal.valueOf(worker.queries().getQueryCount());
BigDecimal noq = BigDecimal.valueOf(worker.queries().getExecutableQueryCount());
Duration totalTime = Duration.ZERO;
for (List<HttpWorker.ExecutionStats> datum : data) {
for (HttpWorker.ExecutionStats exec : datum) {
Expand Down
44 changes: 41 additions & 3 deletions src/main/java/org/aksw/iguana/cc/query/QueryData.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import org.apache.jena.update.UpdateFactory;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
Expand All @@ -12,9 +13,23 @@
* At the moment, it only stores if the query is an update query or not.
*
* @param queryId The id of the query
* @param update If the query is an update query
*/
public record QueryData(int queryId, boolean update) {
public record QueryData(int queryId, QueryType type, Integer templateId) {
public enum QueryType {
DEFAULT,
UPDATE,
TEMPLATE,
TEMPLATE_INSTANCE
}

/**
* Generates a list of QueryData objects for a collection of queries.
* The method uses the Jena library to check if the query is an update query.
* It only checks if the query is an update query or not and sets their index in the order they were given.
*
* @param queries collection of input streams of queries
* @return list of QueryData objects
*/
public static List<QueryData> generate(Collection<InputStream> queries) {
final var queryData = new ArrayList<QueryData>();
int i = 0;
Expand All @@ -25,8 +40,31 @@ public static List<QueryData> generate(Collection<InputStream> queries) {
} catch (Exception e) {
update = false;
}
queryData.add(new QueryData(i++, update));
queryData.add(new QueryData(i++, update ? QueryType.UPDATE : QueryType.DEFAULT, null));
try {
query.close();
} catch (IOException ignored) {}
}
return queryData;
}

/**
* Checks if the given query is an update query.
* The method uses the Jena library to check if the query is an update query.
*
* @param query input stream of the query
* @return true if the query is an update query, false otherwise
*/
public static boolean checkIfUpdate(InputStream query) {
try {
UpdateFactory.read(query); // Throws an exception if the query is not an update query
return true;
} catch (Exception e) {
return false;
}
}

public boolean update() {
return type == QueryType.UPDATE;
}
}
Loading

0 comments on commit 20e1bee

Please sign in to comment.