Skip to content

Commit a3832be

Browse files
committed
refactor PropertyComparisonProcessor (WIP)
1 parent 1759bf1 commit a3832be

File tree

7 files changed

+41
-31
lines changed

7 files changed

+41
-31
lines changed

abecto-core/src/main/java/de/uni_jena/cs/fusion/abecto/ResourcePair.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ private ResourcePair(Resource first, Resource second) {
3232
this.second = second;
3333
}
3434

35-
public static Set<ResourcePair> getPairsOf(Set<Resource> resources) {
35+
public static Set<ResourcePair> getPairsWithoutRepetitionOf(Set<Resource> resources) {
3636
Set<ResourcePair> pairs = new HashSet<>();
3737
for (Resource first : resources) {
3838
for (Resource second : resources) {
@@ -44,6 +44,18 @@ public static Set<ResourcePair> getPairsOf(Set<Resource> resources) {
4444
return pairs;
4545
}
4646

47+
public static Set<ResourcePair> getPairsWithRepetitionOf(Set<Resource> resources) {
48+
Set<ResourcePair> pairs = new HashSet<>();
49+
for (Resource first : resources) {
50+
for (Resource second : resources) {
51+
if (first.equals(second) || validOrder(first, second)) {
52+
pairs.add(new ResourcePair(first, second));
53+
}
54+
}
55+
}
56+
return pairs;
57+
}
58+
4759
public static ResourcePair getPair(Resource first, Resource second) {
4860
if (validOrder(first, second)) {
4961
return new ResourcePair(first, second);

abecto-core/src/main/java/de/uni_jena/cs/fusion/abecto/ResourceTupel.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ public static Set<ResourceTupel> getTupelsOf(Set<Resource> resources) {
4545
}
4646

4747
public static ResourceTupel getTupel(Resource first, Resource second) {
48-
return new ResourceTupel(second, first);
48+
return new ResourceTupel(first, second);
4949
}
5050

5151
@Override

abecto-core/src/main/java/de/uni_jena/cs/fusion/abecto/measure/Count.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,11 @@ public Count(Resource quantity, Resource unit) {
2626
super(quantity, unit);
2727
}
2828

29-
public void incrementByOne(K key) {
30-
incrementBy(key, 1);
29+
public void incrementByOrSetOne(K key) {
30+
incrementByOrSet(key, 1);
3131
}
3232

33-
public void incrementBy(K key, long increment) {
33+
public void incrementByOrSet(K key, long increment) {
3434
values.merge(key, increment, Long::sum);
3535
}
3636

abecto-core/src/main/java/de/uni_jena/cs/fusion/abecto/measure/PerDatasetTupelRatio.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,14 @@ public void setRatioOf(PerDatasetPairCount numerators, PerDatasetCount denominat
4242
}
4343
}
4444

45-
void setRatioForTupel(BigDecimal numerator, PerDatasetCount denominators, Resource measuredResource, Resource otherResource) {
46-
if (denominators.contains(measuredResource)) {
47-
BigDecimal denominator = BigDecimal.valueOf(denominators.get(measuredResource));
45+
void setRatioForTupel(BigDecimal numerator, PerDatasetCount denominators, Resource asessedResource, Resource otherResource) {
46+
if (denominators.contains(otherResource)) {
47+
BigDecimal denominator = BigDecimal.valueOf(denominators.get(otherResource));
4848
if (!denominator.equals(BigDecimal.ZERO)) {
4949
BigDecimal value = numerator.divide(denominator, SCALE, ROUNDING_MODE);
50-
set(ResourceTupel.getTupel(measuredResource, otherResource), value);
50+
set(ResourceTupel.getTupel(asessedResource, otherResource), value);
5151
}
5252
}
53-
// TODO check direction
5453
}
5554

5655
public void storeInModel(Aspect aspect, Map<Resource, Model> outputModelsMap) {

abecto-core/src/main/java/de/uni_jena/cs/fusion/abecto/processor/ComparisonProcessor.java

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -97,11 +97,6 @@ public Stream<Resource> getResourceKeys(Aspect aspect, Resource dataset)
9797
* pattern of the given dataset in the given {@link Model}. If this aspect does
9898
* not cover the given dataset or the model does not contain values for the
9999
* given resource, {@code null} is returned.
100-
*
101-
* @param resource
102-
* @param dataset
103-
* @param aspect
104-
* @return
105100
*/
106101
public Map<String, Set<RDFNode>> selectResourceValues(Resource resource, Resource dataset,
107102
Aspect aspect, Collection<String> variables) {
@@ -199,7 +194,9 @@ PerDatasetRatio calculateCompleteness(Iterable<ResourcePair> datasetPairs, PerDa
199194
long calculateTotalPairwiseOverlap(Iterable<ResourcePair> datasetPairs, PerDatasetPairCount absoluteCoverage) {
200195
long totalPairwiseOverlap = 0L;
201196
for (ResourcePair datasetPair : datasetPairs) {
202-
totalPairwiseOverlap += absoluteCoverage.get(datasetPair);
197+
if (absoluteCoverage.contains(datasetPair)) {
198+
totalPairwiseOverlap += absoluteCoverage.get(datasetPair);
199+
}
203200
}
204201
return totalPairwiseOverlap;
205202
}

abecto-core/src/main/java/de/uni_jena/cs/fusion/abecto/processor/PopulationComparisonProcessor.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
package de.uni_jena.cs.fusion.abecto.processor;
2020

2121
import java.math.BigDecimal;
22-
import java.math.RoundingMode;
2322
import java.util.*;
2423
import java.util.stream.Collectors;
2524
import java.util.stream.Stream;
@@ -108,7 +107,7 @@ private void setAspect(Aspect aspect) {
108107

109108
private void setAspectDatasets() {
110109
datasets = aspect.getDatasets();
111-
datasetPairs = ResourcePair.getPairsOf(datasets);
110+
datasetPairs = ResourcePair.getPairsWithoutRepetitionOf(datasets);
112111
datasetTupels = ResourceTupel.getTupelsOf(datasets);
113112
outputMetaModelByDataset = getOutputMetaModels(datasets);
114113
}
@@ -145,7 +144,7 @@ private void incrementAbsoluteCoverages(Map<Resource, Set<Resource>> correspondi
145144
for (ResourcePair datasetPair : datasetPairs) {
146145
if (!correspondingResourcesByDataset.get(datasetPair.first).isEmpty() &&
147146
!correspondingResourcesByDataset.get(datasetPair.second).isEmpty()) {
148-
absoluteCoverage.incrementByOne(datasetPair);
147+
absoluteCoverage.incrementByOrSetOne(datasetPair);
149148
}
150149
}
151150
}
@@ -171,7 +170,7 @@ private void incrementDuplicatesCount(Map<Resource, Set<Resource>> corresponding
171170
for (Resource dataset : datasets) {
172171
if (!correspondingResourcesByDataset.get(dataset).isEmpty()) {
173172
int occurrencesInDataset = correspondingResourcesByDataset.get(dataset).size();
174-
duplicateCount.incrementBy(dataset, occurrencesInDataset - 1);
173+
duplicateCount.incrementByOrSet(dataset, occurrencesInDataset - 1);
175174
}
176175
}
177176
}

abecto-core/src/main/java/de/uni_jena/cs/fusion/abecto/processor/PropertyComparisonProcessor.java

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,8 @@ public class PropertyComparisonProcessor extends ComparisonProcessor<PropertyCom
7373
public boolean allowLangTagSkip;
7474
Aspect theAspect; // TODO rename to `aspect` after renaming the aspect parameter variable into `aspectIri`
7575
Set<Resource> datasets;
76-
Set<ResourcePair> datasetPairs;
76+
Set<ResourcePair> datasetPairsWithoutRepetition;
77+
Set<ResourcePair> datasetPairsWithRepetition;
7778
Set<ResourceTupel> datasetTupels;
7879
Map<Resource, Model> outputMetaModelByDataset;
7980
/**
@@ -123,7 +124,7 @@ public final void run() {
123124
}
124125
}
125126

126-
for (ResourcePair datasetPair : datasetPairs) {
127+
for (ResourcePair datasetPair : datasetPairsWithRepetition) {
127128
for (String variable : variables) {
128129
if (theAspect.getPattern(datasetPair.first).getResultVars().contains(variable)
129130
&& theAspect.getPattern(datasetPair.second).getResultVars().contains(variable)) {
@@ -147,7 +148,8 @@ private void setAspect(Resource aspect) {
147148

148149
private void setAspectDatasets() {
149150
datasets = theAspect.getDatasets();
150-
datasetPairs = ResourcePair.getPairsOf(datasets);
151+
datasetPairsWithoutRepetition = ResourcePair.getPairsWithoutRepetitionOf(datasets);
152+
datasetPairsWithRepetition = ResourcePair.getPairsWithRepetitionOf(datasets);
151153
datasetTupels = ResourceTupel.getTupelsOf(datasets);
152154
outputMetaModelByDataset = getOutputMetaModels(datasets);
153155
}
@@ -164,7 +166,6 @@ private void initializeCount() {
164166
count = new HashMap<>();
165167
for (String variable : variables) {
166168
PerDatasetCount countOfVariable = new PerDatasetCount(AV.count, OM.one);
167-
countOfVariable.reset(datasets, 0L);
168169
count.put(variable, countOfVariable);
169170
}
170171
}
@@ -173,7 +174,6 @@ private void initializeDeduplicatedCount() {
173174
deduplicatedCount = new HashMap<>();
174175
for (String variable : variables) {
175176
PerDatasetCount deduplicatedCountOfVariable = new PerDatasetCount(AV.deduplicatedCount, OM.one);
176-
deduplicatedCountOfVariable.reset(datasets, 0L);
177177
deduplicatedCount.put(variable, deduplicatedCountOfVariable);
178178
}
179179
}
@@ -182,7 +182,6 @@ private void initializeAbsoluteCoverage() {
182182
absoluteCoverage = new HashMap<>();
183183
for (String variable : variables) {
184184
PerDatasetPairCount absoluteCoverageOfVariable = new PerDatasetPairCount(AV.absoluteCoverage, OM.one);
185-
absoluteCoverageOfVariable.reset(datasetPairs, 0L);
186185
absoluteCoverage.put(variable, absoluteCoverageOfVariable);
187186
}
188187
}
@@ -281,14 +280,14 @@ private void countAndDeduplicateValuesOfUncoveredResource() {
281280
void measureCountAndDeduplicatedCount(Resource dataset, String variable, Collection<RDFNode> valuesOfVariable) {
282281
long valuesCountWithDuplicates = valuesOfVariable.size();
283282
long valuesCountWithoutDuplicates = deduplicate(valuesOfVariable).size();
284-
count.get(variable).incrementBy(dataset, valuesCountWithDuplicates);
285-
deduplicatedCount.get(variable).incrementBy(dataset, valuesCountWithoutDuplicates);
283+
count.get(variable).incrementByOrSet(dataset, valuesCountWithDuplicates);
284+
deduplicatedCount.get(variable).incrementByOrSet(dataset, valuesCountWithoutDuplicates);
286285
}
287286

288287
private void calculateCompleteness() {
289288
for (String variable : variables) {
290289
// TODO add value exclusion filter description to measurement description
291-
completeness.put(variable, calculateCompleteness(datasetPairs, absoluteCoverage.get(variable), deduplicatedCount.get(variable)));
290+
completeness.put(variable, calculateCompleteness(datasetPairsWithoutRepetition, absoluteCoverage.get(variable), deduplicatedCount.get(variable)));
292291
}
293292
}
294293

@@ -389,9 +388,13 @@ public void calculateDeviationsAndOmissions(String variable, ResourcePair datase
389388
mapResources(variable, resourcesByMappedValues, valuesByVariableByResource1);
390389
mapResources(variable, resourcesByMappedValues, valuesByVariableByResource2);
391390

391+
392392
// update measurements
393-
int pairwiseOverlap = getPairwiseOverlap(valuesByVariableByResource1.keySet(), valuesByVariableByResource2.keySet(), resourcesByMappedValues);
394-
absoluteCoverage.get(variable).incrementBy(datasetPair, pairwiseOverlap);
393+
if (!datasetPair.first.equals(datasetPair.second)) {// do not measure for first == second
394+
// TODO test, that no absolute coverage exist for dataset compared with itself
395+
int pairwiseOverlap = getPairwiseOverlap(valuesByVariableByResource1.keySet(), valuesByVariableByResource2.keySet(), resourcesByMappedValues);
396+
absoluteCoverage.get(variable).incrementByOrSet(datasetPair, pairwiseOverlap);
397+
}
395398

396399
// deviation: a pair of resources with each having a value not present in the
397400
// other resource

0 commit comments

Comments
 (0)