Skip to content

Commit

Permalink
Merge branch 'develop' into dateline
Browse files Browse the repository at this point in the history
  • Loading branch information
EmileSonneveld committed Sep 16, 2024
2 parents 4896876 + 72856ef commit 5ef4a96
Show file tree
Hide file tree
Showing 30 changed files with 876 additions and 208 deletions.
15 changes: 11 additions & 4 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ pipeline {
PACKAGE_NAME = "${package_name}"
WORKSPACE = "${env.WORKSPACE}"
}

parameters {
booleanParam(name: 'skip_tests', defaultValue: false, description: 'Check this if you want to skip running tests.')
}
stages {
stage('Checkout') {
steps {
Expand All @@ -55,7 +57,7 @@ pipeline {
steps {
script {
rel_version = getMavenVersion()
build()
build( !params.skip_tests)
utils.setWorkspacePermissions()
}
}
Expand Down Expand Up @@ -196,12 +198,17 @@ void build(tests = true){
sh "dnf install -y maven git java-11-openjdk-devel gdal-3.8.4"
def server = Artifactory.server('vitoartifactory')
def rtMaven = Artifactory.newMavenBuild()
rtMaven.deployer server: server, releaseRepo: 'libs-release-public', snapshotRepo: 'libs-snapshot-public'
def snapshotRepo = 'libs-snapshot-public'
if (!publishable_branches.contains(env.BRANCH_NAME)) {
snapshotRepo = 'openeo-branch-builds'
rtMaven.opts += " -Drevision=${env.BRANCH_NAME}"
}
rtMaven.deployer server: server, releaseRepo: 'libs-release-public', snapshotRepo: snapshotRepo
rtMaven.tool = maven
if (!tests) {
rtMaven.opts += ' -DskipTests=true'
}
rtMaven.deployer.deployArtifacts = publishable_branches.contains(env.BRANCH_NAME) || publishable_branches.contains(env.CHANGE_BRANCH)
rtMaven.deployer.deployArtifacts = true
//use '--projects StatisticsMapReduce' in 'goals' to build specific module
try {
withCredentials([
Expand Down
2 changes: 1 addition & 1 deletion geopyspark-geotrellis/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>openeo-geotrellis-extensions</artifactId>
<groupId>org.openeo</groupId>
<version>2.4.0_2.12-SNAPSHOT</version>
<version>${revision}</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion geotrellis-accumulo-extensions/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<parent>
<artifactId>openeo-geotrellis-extensions</artifactId>
<groupId>org.openeo</groupId>
<version>2.4.0_2.12-SNAPSHOT</version>
<version>${revision}</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion geotrellis-benchmarks/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>openeo-geotrellis-extensions</artifactId>
<groupId>org.openeo</groupId>
<version>2.4.0_2.12-SNAPSHOT</version>
<version>${revision}</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
6 changes: 3 additions & 3 deletions geotrellis-common/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>openeo-geotrellis-extensions</artifactId>
<groupId>org.openeo</groupId>
<version>2.4.0_2.12-SNAPSHOT</version>
<version>${revision}</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down Expand Up @@ -55,13 +55,13 @@
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-api</artifactId>
<version>5.3.2</version>
<version>5.10.3</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.vintage</groupId>
<artifactId>junit-vintage-engine</artifactId>
<version>5.3.2</version>
<version>5.10.3</version>
<scope>test</scope>
</dependency>
<dependency>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ import geotrellis.spark.partition.{PartitionerIndex, SpacePartitioner}
import geotrellis.spark.{MultibandTileLayerRDD, _}
import geotrellis.util.GetComponent
import geotrellis.vector.{Extent, MultiPolygon, ProjectedExtent}
import org.apache.spark.rdd.RDD
import org.apache.spark.Partitioner
import org.apache.spark.rdd.{CoGroupedRDD, RDD}
import org.slf4j.LoggerFactory

import java.time.ZonedDateTime
Expand Down Expand Up @@ -194,7 +195,25 @@ object DatacubeSupport {
ignoreKeysWithoutMask: Boolean = false,
): RDD[(K, MultibandTile)] with Metadata[M] = {
val joined = if (ignoreKeysWithoutMask) {
val tmpRdd = SpatialJoin.join(datacube, mask).mapValues(v => (v._1, Option(v._2)))
//inner join, try to preserve partitioner
val tmpRdd: RDD[(K, (MultibandTile, Option[MultibandTile]))] =
if(datacube.partitioner.isDefined && datacube.partitioner.get.isInstanceOf[SpacePartitioner[K]]){
val part = datacube.partitioner.get.asInstanceOf[SpacePartitioner[K]]
new CoGroupedRDD[K](List(datacube, part(mask)), part)
.flatMapValues { case Array(l, r) =>
if (l.isEmpty) {
Seq.empty[(MultibandTile, Option[MultibandTile])]
}
else if (r.isEmpty)
Seq.empty[(MultibandTile, Option[MultibandTile])]
else
for (v <- l.iterator; w <- r.iterator) yield (v, Some(w))
}.asInstanceOf[RDD[(K, (MultibandTile, Option[MultibandTile]))]]
}else{
SpatialJoin.join(datacube, mask).mapValues(v => (v._1, Option(v._2)))
}


ContextRDD(tmpRdd, datacube.metadata)
} else {
SpatialJoin.leftOuterJoin(datacube, mask)
Expand All @@ -210,7 +229,8 @@ object DatacubeSupport {
if (dataTile.bandCount == maskTile.bandCount) {
maskIndex = index
}
tile.dualCombine(maskTile.band(maskIndex))((v1, v2) => if (v2 != 0 && isData(v1)) replacementInt else v1)((v1, v2) => if (v2 != 0.0 && isData(v1)) replacementDouble else v1)
//tile has to be 'mutable', for instant ConstantTile implements dualCombine, but not correctly converting celltype!!
tile.mutable.dualCombine(maskTile.band(maskIndex))((v1, v2) => if (v2 != 0 && isData(v1)) replacementInt else v1)((v1, v2) => if (v2 != 0.0 && isData(v1)) replacementDouble else v1)
})

} else {
Expand All @@ -236,16 +256,8 @@ object DatacubeSupport {
logger.debug(s"Spacetime mask is used to reduce input.")
}

val alignedMask: MultibandTileLayerRDD[SpaceTimeKey] =
if(spacetimeMask.metadata.crs.equals(metadata.crs) && spacetimeMask.metadata.layout.equals(metadata.layout)) {
spacetimeMask
}else{
logger.debug(s"mask: automatically resampling mask to match datacube: ${spacetimeMask.metadata}")
spacetimeMask.reproject(metadata.crs,metadata.layout,16,rdd.partitioner)._2
}

// retain only tiles where there is at least one valid pixel (mask value == 0), others will be fully removed
val filtered = alignedMask.withContext{_.filter(_._2.band(0).toArray().exists(pixel => pixel == 0))}
val partitioner = rdd.partitioner
val filtered = prepareMask(spacetimeMask, metadata, partitioner)

if (pixelwiseMasking) {
val spacetimeDataContextRDD = ContextRDD(rdd, metadata)
Expand All @@ -263,4 +275,23 @@ object DatacubeSupport {
rdd
}
}

def prepareMask(spacetimeMask: MultibandTileLayerRDD[SpaceTimeKey], metadata: TileLayerMetadata[SpaceTimeKey], partitioner: Option[Partitioner]): ContextRDD[SpaceTimeKey, MultibandTile, TileLayerMetadata[SpaceTimeKey]] = {
val alignedMask: MultibandTileLayerRDD[SpaceTimeKey] =
if (spacetimeMask.metadata.crs.equals(metadata.crs) && spacetimeMask.metadata.layout.equals(metadata.layout)) {
spacetimeMask
} else {
logger.debug(s"mask: automatically resampling mask to match datacube: ${spacetimeMask.metadata}")
spacetimeMask.reproject(metadata.crs, metadata.layout, 16, partitioner)._2
}

val keyBounds = metadata.bounds.get
// retain only tiles where there is at least one valid pixel (mask value == 0), others will be fully removed
val filtered = alignedMask.withContext {
_.filter(t => {
keyBounds.includes(t._1) && t._2.band(0).toArray().exists(pixel => pixel == 0)
})
}
filtered
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -231,16 +231,16 @@ package object geotrelliscommon {
import java.util.concurrent.TimeUnit


def retryForever[R](delay: Duration, retries: Int = 20, onAttemptFailed: Exception => Unit = _ => ())(f: => R): R = {
def retryForever[R](delay: Duration, attempts: Int = 20, onAttemptFailed: Exception => Unit = _ => ())(f: => R): R = {
var lastException: Exception = null
var countDown = retries
var countDown = attempts
while (countDown>0) {
try return f
catch {
case e: Exception =>
onAttemptFailed(e)
lastException = e
TimeUnit.SECONDS.sleep(delay.getSeconds)
if (countDown > 1) TimeUnit.SECONDS.sleep(delay.getSeconds)
}
countDown = countDown - 1
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package org.openeo.geotrelliscommon

import org.junit.jupiter.api.Assertions.{assertEquals, assertThrowsExactly, fail}
import org.junit.jupiter.api.{Test, Timeout}

import java.time.Duration

class PackageTest {
class FailedAttempt extends Exception

@Test
def retryForeverNumberOfAttempts(): Unit = {
var attempts = 0

try {
retryForever(delay = Duration.ZERO, attempts = 3, onAttemptFailed = _ => attempts += 1) {
println("attempting...")
throw new FailedAttempt
}

fail("should have thrown a FailedAttempt")
} catch {
case _: FailedAttempt =>
}

// count the number of failures to get the number of attempts
assertEquals(3, attempts)
}

@Test
@Timeout(5) // less than RetryForever's delay below
def retryForeverNoDelayAfterFinalFailure(): Unit =
assertThrowsExactly(classOf[FailedAttempt], () =>
retryForever(delay = Duration.ofSeconds(60), attempts = 1) {
println("attempting...")
throw new FailedAttempt
})
}
2 changes: 1 addition & 1 deletion geotrellis-extensions/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<parent>
<artifactId>openeo-geotrellis-extensions</artifactId>
<groupId>org.openeo</groupId>
<version>2.4.0_2.12-SNAPSHOT</version>
<version>${revision}</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion geotrellis-integrationtests/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>openeo-geotrellis-extensions</artifactId>
<groupId>org.openeo</groupId>
<version>2.4.0_2.12-SNAPSHOT</version>
<version>${revision}</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion geotrellis-s3-extensions/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>openeo-geotrellis-extensions</artifactId>
<groupId>org.openeo</groupId>
<version>2.4.0_2.12-SNAPSHOT</version>
<version>${revision}</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion geotrellis-seeder/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>openeo-geotrellis-extensions</artifactId>
<groupId>org.openeo</groupId>
<version>2.4.0_2.12-SNAPSHOT</version>
<version>${revision}</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion geotrellis-sentinelhub/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>openeo-geotrellis-extensions</artifactId>
<groupId>org.openeo</groupId>
<version>2.4.0_2.12-SNAPSHOT</version>
<version>${revision}</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
18 changes: 17 additions & 1 deletion openeo-geotrellis/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>openeo-geotrellis-extensions</artifactId>
<groupId>org.openeo</groupId>
<version>2.4.0_2.12-SNAPSHOT</version>
<version>${revision}</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down Expand Up @@ -245,6 +245,22 @@
<version>1.19.0</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>io.opentelemetry</groupId>
<artifactId>opentelemetry-sdk</artifactId>
<version>1.38.0</version>
</dependency>
<dependency>
<groupId>io.opentelemetry</groupId>
<artifactId>opentelemetry-sdk-metrics</artifactId>
<version>1.38.0</version>
</dependency>
<dependency>
<groupId>io.opentelemetry</groupId>
<artifactId>opentelemetry-api</artifactId>
<version>1.38.0</version>
</dependency>

</dependencies>
<build>
<plugins>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package org.openeo.sparklisteners;

import org.apache.spark.executor.TaskMetrics;
import org.apache.spark.scheduler.SparkListener;
import org.apache.spark.scheduler.SparkListenerStageCompleted;
import org.apache.spark.scheduler.SparkListenerStageSubmitted;
import org.apache.spark.util.AccumulatorV2;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.collection.Traversable;
import scala.collection.mutable.Map;

import java.time.Duration;

object BatchJobProgressListener {

val logger = LoggerFactory.getLogger(BatchJobProgressListener.getClass)

}

class BatchJobProgressListener extends SparkListener {

import BatchJobProgressListener.logger

override def onStageSubmitted( stageSubmitted:SparkListenerStageSubmitted):Unit = {
logger.info("Starting part of the process graph: " + stageSubmitted.stageInfo.name)
}

override def onStageCompleted( stageCompleted: SparkListenerStageCompleted):Unit = {
val taskMetrics = stageCompleted.stageInfo.taskMetrics

if(stageCompleted.stageInfo.failureReason.isDefined){
val message =
f"""
|"A part of the process graph failed, and will be retried, the reason was: ${stageCompleted.stageInfo.failureReason.get}
|"Your job may still complete if the failure was caused by a transient error, but will take more time. A common cause of transient errors is too little executor memory (overhead). Too low executor-memory can be seen by a high 'garbage collection' time, which was: ${Duration.ofMillis(taskMetrics.jvmGCTime).toSeconds/1000.0} seconds."
|""".stripMargin
logger.warn(message);

}else{
val duration = Duration.ofMillis(taskMetrics.executorRunTime)
val timeString = if(duration.toSeconds>60) {
duration.toMinutes + " minutes"
} else {
duration.toMillis.toFloat / 1000.0 + " seconds"
}
val megabytes = taskMetrics.shuffleWriteMetrics.bytesWritten.toFloat/(1024.0*1024.0)
val name = stageCompleted.stageInfo.name
logger.info(f"Finished part ${stageCompleted.stageInfo.stageId} of the process graph: ${name}.\n The total computing time was: $timeString. It produced: $megabytes%.2f MB of data.");

val accumulators = stageCompleted.stageInfo.accumulables;
val chunkCounts = accumulators.filter(_._2.name.get.startsWith("ChunkCount"));
if (chunkCounts.nonEmpty) {
val totalChunks = chunkCounts.head._2.value
val megapixel = totalChunks.get.asInstanceOf[Long] * 256 * 256 / (1024 * 1024)
if(taskMetrics.executorRunTime > 0) {
logger.info(f"load_collection: data was loaded with an average speed of: ${megapixel.toFloat/ duration.toSeconds.toFloat}%.3f Megapixel per second.")
};
}
}


}


}
Loading

0 comments on commit 5ef4a96

Please sign in to comment.