Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
improve FileLayerProvider resilience
* retryForever: no delay after final failure eu-cdse/openeo-cdse-infra#196 * smaller job runs successfully locally eu-cdse/openeo-cdse-infra#196 * simple GDALRasterSource.read is also successful eu-cdse/openeo-cdse-infra#196 * optimize retryForever - remove outer retryForever in favor of more attempts for inner retryForever - optimization: implement with exponential back-off eu-cdse/openeo-cdse-infra#196 * disable test eu-cdse/openeo-cdse-infra#196 * restore retry of RasterSource.reproject() as it can fail at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2785) at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2721) at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2720) at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2720) at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1206) at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1206) at scala.Option.foreach(Option.scala:407) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1206) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2984) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2923) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2912) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:971) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2263) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2284) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2303) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2328) at org.apache.spark.rdd.RDD.count(RDD.scala:1266) at org.openeo.geotrellis.netcdf.NetCDFRDDWriter$.cacheAndRepartition(NetCDFRDDWriter.scala:267) at org.openeo.geotrellis.netcdf.NetCDFRDDWriter$.saveSingleNetCDFGeneric(NetCDFRDDWriter.scala:126) at org.openeo.geotrellis.netcdf.NetCDFRDDWriter$.saveSingleNetCDFGeneric(NetCDFRDDWriter.scala:108) at org.openeo.geotrellis.netcdf.NetCDFRDDWriter$.writeRasters(NetCDFRDDWriter.scala:80) at org.openeo.geotrellis.netcdf.NetCDFRDDWriter.writeRasters(NetCDFRDDWriter.scala) at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.base/java.lang.reflect.Method.invoke(Method.java:566) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374) at py4j.Gateway.invoke(Gateway.java:282) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182) at py4j.ClientServerConnection.run(ClientServerConnection.java:106) at java.base/java.lang.Thread.run(Thread.java:829) Caused by: java.io.IOException: load_collection/load_stac: error while reading from: /vsis3/EODATA/Sentinel-2/MSI/L2A_N0500/2018/03/27/S2A_MSIL2A_20180327T114351_N0500_R123_T29UNV_20230828T122340.SAFE/GRANULE/L2A_T29UNV_A014420_20180327T114351/IMG_DATA/R10m/T29UNV_20180327T114351_B08_10m.jp2. Detailed error: Unable to parse projection as CRS. GDAL Error Code: 4 at org.openeo.geotrellis.layers.FileLayerProvider$.$anonfun$loadPartitionBySource$1(FileLayerProvider.scala:663) at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492) at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:140) at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:101) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161) at org.apache.spark.scheduler.Task.run(Task.scala:139) at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:554) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1529) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:557) at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) ... 1 more Caused by: geotrellis.raster.gdal.MalformedProjectionException: Unable to parse projection as CRS. GDAL Error Code: 4 at geotrellis.raster.gdal.GDALDataset$.$anonfun$crs$1(GDALDataset.scala:293) at geotrellis.raster.gdal.GDALDataset$.$anonfun$crs$1$adapted(GDALDataset.scala:290) at geotrellis.raster.gdal.GDALDataset$.errorHandler$extension(GDALDataset.scala:422) at geotrellis.raster.gdal.GDALDataset$.crs$extension1(GDALDataset.scala:290) at geotrellis.raster.gdal.GDALDataset$.crs$extension0(GDALDataset.scala:282) at geotrellis.raster.gdal.GDALRasterSource.crs$lzycompute(GDALRasterSource.scala:84) at geotrellis.raster.gdal.GDALRasterSource.crs(GDALRasterSource.scala:84) at org.openeo.geotrellis.layers.ValueOffsetRasterSource.crs(ValueOffsetRasterSource.scala:93) at geotrellis.raster.RasterSource.reproject(RasterSource.scala:54) at org.openeo.geotrellis.layers.BandCompositeRasterSource.$anonfun$reprojectedSources$2(FileLayerProvider.scala:84) at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286) at scala.collection.Iterator.foreach(Iterator.scala:943) at scala.collection.Iterator.foreach$(Iterator.scala:943) at scala.collection.AbstractIterator.foreach(Iterator.scala:1431) at scala.collection.IterableLike.foreach(IterableLike.scala:74) at scala.collection.IterableLike.foreach$(IterableLike.scala:73) at scala.collection.AbstractIterable.foreach(Iterable.scala:56) at scala.collection.TraversableLike.map(TraversableLike.scala:286) at scala.collection.TraversableLike.map$(TraversableLike.scala:279) at scala.collection.AbstractTraversable.map(Traversable.scala:108) at org.openeo.geotrellis.layers.BandCompositeRasterSource.reprojectedSources(FileLayerProvider.scala:84) at org.openeo.geotrellis.layers.BandCompositeRasterSource.read(FileLayerProvider.scala:129) at geotrellis.raster.RasterSource.read(RasterSource.scala:128) at org.openeo.geotrellis.layers.FileLayerProvider$.$anonfun$loadPartitionBySource$6(FileLayerProvider.scala:661) at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492) at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460) at scala.collection.Iterator.toStream(Iterator.scala:1417) at scala.collection.Iterator.toStream$(Iterator.scala:1416) at scala.collection.AbstractIterator.toStream(Iterator.scala:1431) at scala.collection.TraversableOnce.toSeq(TraversableOnce.scala:354) at scala.collection.TraversableOnce.toSeq$(TraversableOnce.scala:354) at scala.collection.AbstractIterator.toSeq(Iterator.scala:1431) at org.openeo.geotrellis.layers.FileLayerProvider$.$anonfun$loadPartitionBySource$1(FileLayerProvider.scala:661) ... 14 more eu-cdse/openeo-cdse-infra#196 * make GDALRasterSource fail with an error eu-cdse/openeo-cdse-infra#196 * add test eu-cdse/openeo-cdse-infra#196 * support soft errors eu-cdse/openeo-cdse-infra#196 * restore number-of-attempts and disable test * make attempts argument explicit eu-cdse/openeo-cdse-infra#196 * cleanup eu-cdse/openeo-cdse-infra#196 * cleanup eu-cdse/openeo-cdse-infra#196 * cleanup eu-cdse/openeo-cdse-infra#196
- Loading branch information