Skip to content

Commit

Permalink
[dbs-leipzig#1571] add degree variance operator
Browse files Browse the repository at this point in the history
  • Loading branch information
alwba committed Aug 9, 2022
1 parent 5f1b5d5 commit 88220a9
Show file tree
Hide file tree
Showing 6 changed files with 466 additions and 0 deletions.
19 changes: 19 additions & 0 deletions !
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
[#1559] add unit-tests

# Please enter the commit message for your changes. Lines starting
# with '#' will be ignored, and an empty message aborts the commit.
#
# Date: Tue Aug 2 11:06:54 2022 +0200
#
# interactive rebase in progress; onto 5f1b5d5a004
# Last commands done (3 commands done):
# reword dc25f24377f add help functions
# reword 22c9104636f add unit-tests
# No commands remaining.
# You are currently editing a commit while rebasing branch '#1559_min_max_avg_degree_evolution' on '5f1b5d5a004'.
#
# Changes to be committed:
# new file: gradoop-temporal/src/test/java/org/gradoop/temporal/model/impl/operators/metric/AvgDegreeEvolutionTest.java
# new file: gradoop-temporal/src/test/java/org/gradoop/temporal/model/impl/operators/metric/MaxDegreeEvolutionTest.java
# new file: gradoop-temporal/src/test/java/org/gradoop/temporal/model/impl/operators/metric/MinDegreeEvolutionTest.java
#
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* Copyright © 2014 - 2021 Leipzig University (Database Research Group)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.gradoop.temporal.model.impl.operators.metric;

import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.tuple.Tuple1;
import org.apache.flink.api.java.tuple.Tuple2;
import org.gradoop.common.model.impl.id.GradoopId;
import org.gradoop.flink.model.api.operators.UnaryBaseGraphToValueOperator;
import org.gradoop.flink.model.impl.operators.sampling.functions.VertexDegree;
import org.gradoop.temporal.model.api.TimeDimension;
import org.gradoop.temporal.model.impl.TemporalGraph;
import org.gradoop.temporal.model.impl.operators.metric.functions.*;
import org.gradoop.temporal.model.impl.operators.metric.functions.ExtractAllTimePointsReduce;
import org.gradoop.temporal.model.impl.operators.metric.functions.GroupDegreeTreesToVariance;

import java.util.Objects;
import java.util.TreeMap;

/**
* Operator that calculates the degree variance evolution of a temporal graph for the
* whole lifetime of the graph.
*/
public class DegreeVarianceEvolution implements UnaryBaseGraphToValueOperator<TemporalGraph, DataSet<Tuple2<Long, Double>>> {
/**
* The time dimension that will be considered.
*/
private final TimeDimension dimension;

/**
* The degree type (IN, OUT, BOTH);
*/
private final VertexDegree degreeType;

/**
* Creates an instance of this average degree evolution operator.
*
* @param degreeType the degree type to use (IN, OUT, BOTH).
* @param dimension the time dimension to use (VALID_TIME, TRANSACTION_TIME).
*/
public DegreeVarianceEvolution(VertexDegree degreeType, TimeDimension dimension) {
this.degreeType = Objects.requireNonNull(degreeType);
this.dimension = Objects.requireNonNull(dimension);
}

@Override
public DataSet<Tuple2<Long, Double>> execute(TemporalGraph graph) {
DataSet<Tuple2<GradoopId, TreeMap<Long, Integer>>> absoluteDegreeTrees = graph.getEdges()
// 1) Extract vertex id(s) and corresponding time intervals
.flatMap(new FlatMapVertexIdEdgeInterval(dimension, degreeType))
// 2) Group them by the vertex id
.groupBy(0)
// 3) For each vertex id, build a degree tree data structure
.reduceGroup(new BuildTemporalDegreeTree())
// 4) Transform each tree to aggregated evolution
.map(new TransformDeltaToAbsoluteDegreeTree());

DataSet<Tuple1<Long>> timePoints = absoluteDegreeTrees
// 5) extract all timestamps where degree of any vertex changes
.reduceGroup(new ExtractAllTimePointsReduce())
.distinct();

return absoluteDegreeTrees
// join with interval degree mappings
// 6) Merge trees together and calculate aggregation
.reduceGroup(new GroupDegreeTreesToVariance(timePoints));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* Copyright © 2014 - 2021 Leipzig University (Database Research Group)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.gradoop.temporal.model.impl.operators.metric.functions;

import org.apache.flink.api.common.functions.GroupReduceFunction;
import org.apache.flink.api.java.tuple.Tuple1;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.util.Collector;
import org.gradoop.common.model.impl.id.GradoopId;

import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;

/**
* Reduce function to extract all timestamps where the degree of a vertex changes.
*/
public class ExtractAllTimePointsReduce implements GroupReduceFunction<Tuple2<GradoopId, TreeMap<Long, Integer>>, Tuple1<Long>> {

public ExtractAllTimePointsReduce() {
}

@Override
public void reduce(Iterable<Tuple2<GradoopId, TreeMap<Long, Integer>>> iterable, Collector<Tuple1<Long>> collector) throws Exception {
SortedSet<Long> timePoints = new TreeSet<>();

for (Tuple2<GradoopId, TreeMap<Long, Integer>> tuple : iterable) {
timePoints.addAll(tuple.f1.keySet());
}

for (Long timePoint: timePoints) {
collector.collect(new Tuple1<>(timePoint));
}

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
/*
* Copyright © 2014 - 2021 Leipzig University (Database Research Group)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.gradoop.temporal.model.impl.operators.metric.functions;

import org.apache.flink.api.common.functions.GroupReduceFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.tuple.Tuple1;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.util.Collector;
import org.gradoop.common.model.impl.id.GradoopId;

import java.util.*;
import java.util.stream.Stream;

/**
* A group reduce function that merges all Tuples (vId, degreeTree) to a dataset of tuples (time, aggDegree)
* that represents the aggregated degree value for the whole graph at the given time.
*/
public class GroupDegreeTreesToVariance
implements GroupReduceFunction<Tuple2<GradoopId, TreeMap<Long, Integer>>, Tuple2<Long, Double>> {

/**
* The timestamps where at least one vertex degree changes.
*/
private final SortedSet<Long> timePoints;

/**
* Creates an instance of this group reduce function.
*
* @param timePoints the timestamps where vertex degree changes.
*/
public GroupDegreeTreesToVariance(DataSet<Tuple1<Long>> timePoints) {

List<Tuple1<Long>> tuples;
try {
tuples = timePoints.collect();
this.timePoints = new TreeSet<>();

for (int i = 0; i < timePoints.count(); i = i + 1) {
this.timePoints.add(tuples.get(i).getField(0));
}
} catch (Exception e) {
throw new RuntimeException(e);
}

}

@Override
public void reduce(Iterable<Tuple2<GradoopId, TreeMap<Long, Integer>>> iterable,
Collector<Tuple2<Long, Double>> collector) throws Exception {

// init necessary maps and set
HashMap<GradoopId, TreeMap<Long, Integer>> degreeTrees = new HashMap<>();
HashMap<GradoopId, Integer> vertexDegrees = new HashMap<>();

// convert the iterables to a hashmap and remember all possible timestamps
for (Tuple2<GradoopId, TreeMap<Long, Integer>> tuple : iterable) {
degreeTrees.put(tuple.f0, tuple.f1);
}

int numberOfVertices = degreeTrees.size();

// Add default times
timePoints.add(Long.MIN_VALUE);

for (Long timePoint : timePoints) {
// skip last default time
if (Long.MAX_VALUE == timePoint) {
continue;
}
// Iterate over all vertices
for (Map.Entry<GradoopId, TreeMap<Long, Integer>> entry : degreeTrees.entrySet()) {
// Make sure the vertex is registered in the current vertexDegrees capture
if (!vertexDegrees.containsKey(entry.getKey())) {
vertexDegrees.put(entry.getKey(), 0);
}

// Check if timestamp is in tree, if not, take the lower key
if (entry.getValue().containsKey(timePoint)) {
vertexDegrees.put(entry.getKey(), entry.getValue().get(timePoint));
} else {
Long lowerKey = entry.getValue().lowerKey(timePoint);
if (lowerKey != null) {
vertexDegrees.put(entry.getKey(), entry.getValue().get(lowerKey));
}
}
}

Optional<Integer> opt = vertexDegrees.values().stream().reduce(Math::addExact);
Optional<Double> opt2 = Optional.empty();

double mean;

if (opt.isPresent()) {
mean = (double) opt.get() / (double) numberOfVertices;
opt2 = Optional.of(vertexDegrees.values().stream().mapToDouble(val -> (val - mean) * (val - mean)).sum());
}

opt2.ifPresent(val -> collector.collect(
new Tuple2<>(timePoint, val / (double) numberOfVertices)));
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Copyright © 2014 - 2021 Leipzig University (Database Research Group)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.gradoop.temporal.model.impl.operators.metric.functions;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.functions.FunctionAnnotation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.gradoop.common.model.impl.id.GradoopId;

import java.util.Map;
import java.util.TreeMap;

/**
* Replaces the degree tree, that just stores the degree changes for each time, with a degree tree that
* stores the actual degree of the vertex at that time.
*/
@FunctionAnnotation.ForwardedFields("f0")
public class TransformDeltaToAbsoluteDegreeTree
implements MapFunction<Tuple2<GradoopId, TreeMap<Long, Integer>>,
Tuple2<GradoopId, TreeMap<Long, Integer>>> {

/**
* To reduce object instantiations.
*/
private TreeMap<Long, Integer> absoluteDegreeTree;

@Override
public Tuple2<GradoopId, TreeMap<Long, Integer>> map(
Tuple2<GradoopId, TreeMap<Long, Integer>> vIdTreeMapTuple) throws Exception {
// init the degree and the temporal tree
int degree = 0;
absoluteDegreeTree = new TreeMap<>();

// aggregate the degrees
for (Map.Entry<Long, Integer> entry : vIdTreeMapTuple.f1.entrySet()) {
degree += entry.getValue();
absoluteDegreeTree.put(entry.getKey(), degree);
}
vIdTreeMapTuple.f1 = absoluteDegreeTree;
return vIdTreeMapTuple;
}
}
Loading

0 comments on commit 88220a9

Please sign in to comment.